Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/workflows/claude-cache-stability.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Claude Cache Stability

on: [pull_request, push]

jobs:
claude-cache-stability:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 22
cache: npm

- name: Install dependencies
run: npm ci

- name: Run Claude cache stability test
run: npm run test:cache
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"smoke:btc": "tsx scripts/real-btc-smoke.ts",
"smoke:nested-market": "npm run build && tsx scripts/nested-market-smoke.ts",
"test": "vitest run --config vitest.config.ts",
"typecheck": "tsc -p tsconfig.json --noEmit"
"typecheck": "tsc -p tsconfig.json --noEmit",
"test:cache": "vitest run --config vitest.config.ts tests/claude-cache-stability.test.ts"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.23.0",
Expand Down
49 changes: 36 additions & 13 deletions src/agents/workflow-context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,28 @@ export function promptWithWorkflowContext(prompt: string, context: WorkflowAgent
return prompt;
}
return [
"Humanize2 workflow agent instructions:",
"- You are running as a Humanize2-managed agent.",
"- Read the workflow context block after the task before acting.",
"- vertexId is the workflow node identity for artifact ownership and routing;",
"- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
"Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
"Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
"Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
"",
"Task:",
prompt,
"",
"Humanize2 workflow context:",
`- workflowRunId: ${context.workflowRunId}`,
`- vertexId: ${context.vertexId}`,
`- shortName: ${context.shortName}`,
"- vertexId is the workflow node identity for artifact ownership and routing;",
"- shortName is the human-facing agent/session alias and should not replace vertexId in workflow state.",
`- jsonRpcUrl: ${context.jsonRpcUrl}`,
`- expectedArtifacts: ${JSON.stringify(context.expectedArtifacts)}`,
`- inputs: ${JSON.stringify(context.inputs ?? [])}`,
`- expectedArtifacts: ${stableJson(context.expectedArtifacts)}`,
`- inputs: ${stableJson(context.inputs ?? [])}`,
`- mcpToolNames: ${context.mcpToolNames.join(", ")}`,
"",
...inputSnapshotSection(context),
"Deliver expected artifacts back to Humanize2 through the listed MCP tools or JSON-RPC endpoint.",
"Do not inspect, signal, attach to, or mutate the Humanize2 hub process or its in-memory runtime state.",
"Do not repair workflow state directly; use Humanize2 artifact, board, event, message, or view APIs.",
"",
prompt
...inputSnapshotSection(context)
].join("\n");
}

Expand All @@ -38,8 +43,8 @@ export function environmentWithWorkflowContext(
HUMANIZE2_WORKFLOW_VERTEX_ID: context.vertexId,
HUMANIZE2_WORKFLOW_SHORT_NAME: context.shortName,
HUMANIZE2_WORKFLOW_JSONRPC_URL: context.jsonRpcUrl,
HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: JSON.stringify(context.expectedArtifacts),
HUMANIZE2_WORKFLOW_INPUTS: JSON.stringify(context.inputs ?? []),
HUMANIZE2_WORKFLOW_EXPECTED_ARTIFACTS: stableJson(context.expectedArtifacts),
HUMANIZE2_WORKFLOW_INPUTS: stableJson(context.inputs ?? []),
HUMANIZE2_WORKFLOW_MCP_TOOLS: context.mcpToolNames.join(",")
};
}
Expand All @@ -50,8 +55,26 @@ function inputSnapshotSection(context: WorkflowAgentLaunchContext): string[] {
}
return [
"Declared workflow input snapshots:",
JSON.stringify(context.inputs, null, 2),
stableJson(context.inputs, 2),
"Treat these input snapshots as part of the current task contract.",
""
];
}

function stableJson(value: unknown, space?: number): string {
return JSON.stringify(stableJsonValue(value), null, space);
}

function stableJsonValue(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map(stableJsonValue);
}
if (value === null || typeof value !== "object") {
return value;
}

const object = value as Record<string, unknown>;
return Object.fromEntries(
Object.keys(object).sort().map((key) => [key, stableJsonValue(object[key])])
);
}
130 changes: 130 additions & 0 deletions tests/claude-cache-stability.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import { describe, expect, it } from "vitest";

import { promptWithWorkflowContext } from "../src/agents/workflow-context.js";
import type { WorkflowAgentLaunchContext } from "../src/agents/types.js";

describe("Claude workflow prompt cache stability", () => {
it("keeps more than 90 percent of prompt bytes reusable across 100 dynamic workflow turns", () => {
const claudeCodeVersion = "2.1.143";
const rounds = 100;
const taskPrompt = [
"Implement the requested workflow task using the declared artifacts.",
stableTaskBody()
].join("\n\n");
const prompts = Array.from({ length: rounds }, (_, index) =>
withSameClaudeCodeVersionEnvelope(
claudeCodeVersion,
promptWithWorkflowContext(taskPrompt, contextForTurn(index))
)
);

const cache = estimateCacheStability(prompts);

expect(cache.claudeCodeVersion).toBe(claudeCodeVersion);
expect(cache.rounds).toBe(rounds);
expect(cache.averagePromptBytes).toBeGreaterThan(10_000);
expect(cache.averageReusablePrefixBytes).toBeGreaterThan(10_000);
expect(cache.cacheHitRatio).toBeGreaterThan(0.9);
});
});

interface CacheEstimate {
claudeCodeVersion: string;
rounds: number;
averagePromptBytes: number;
averageReusablePrefixBytes: number;
cacheHitRatio: number;
}

function withSameClaudeCodeVersionEnvelope(claudeCodeVersion: string, prompt: string): string {
return [
`Claude Code version: ${claudeCodeVersion}`,
"Model: gpt-5.5",
"Permission mode: bypassPermissions",
"Output format: stream-json",
"",
prompt
].join("\n");
}

function estimateCacheStability(prompts: string[]): CacheEstimate {
const reusablePrefixBytes = prompts.slice(1).map((prompt, index) =>
commonPrefixLength(prompts[index], prompt)
);
const promptBytes = prompts.map((prompt) => prompt.length);
const averagePromptBytes = average(promptBytes);
const averageReusablePrefixBytes = average(reusablePrefixBytes);
const version = /^Claude Code version: (.+)$/m.exec(prompts[0])?.[1] ?? "unknown";

return {
claudeCodeVersion: version,
rounds: prompts.length,
averagePromptBytes,
averageReusablePrefixBytes,
cacheHitRatio: averageReusablePrefixBytes / averagePromptBytes
};
}

function contextForTurn(index: number): WorkflowAgentLaunchContext {
return {
workflowRunId: `workflow-run-${index.toString().padStart(3, "0")}`,
vertexId: `reviewer-${index % 7}`,
shortName: `reviewer-${index % 5}`,
jsonRpcUrl: `http://127.0.0.1:${4772 + index}/jsonrpc`,
expectedArtifacts: [{
schema: "rlcr.verdict.v1",
name: "verdict"
}],
inputs: [{
kind: "artifact",
name: "draft",
schema: "draft.v1",
label: "Current draft",
optional: false,
producer: `builder-${index}`,
iteration: index + 1,
createdAt: `2026-05-16T10:${String(index % 60).padStart(2, "0")}:00.000Z`,
content: {
b: 2,
a: 1,
turn: index
}
}, {
kind: "board",
id: "loop-status",
label: "Loop status",
optional: true,
updatedAt: `2026-05-16T11:${String(index % 60).padStart(2, "0")}:00.000Z`,
value: {
status: index % 2 === 0 ? "revise" : "review",
requiredFollowUp: [`Fix-${index}`]
}
}],
mcpToolNames: [
"artifact_deliver",
"workflow_get",
"board_patch",
"event_emit"
]
};
}

function stableTaskBody(): string {
return Array.from({ length: 120 }, (_, index) =>
`STABLE_TASK_LINE_${String(index + 1).padStart(3, "0")}: This deterministic task body represents reusable workflow instructions and stays unchanged across turns.`
).join("\n");
}

function average(values: number[]): number {
return values.reduce((total, value) => total + value, 0) / values.length;
}

function commonPrefixLength(left: string, right: string): number {
const limit = Math.min(left.length, right.length);
for (let index = 0; index < limit; index += 1) {
if (left[index] !== right[index]) {
return index;
}
}
return limit;
}