PostHog · Twixes · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -1656,12 +1656,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     // needs so the session doesn't pin the whole meta object.
     const baseBranch = meta?.baseBranch;
     const environment = meta?.environment;
+    // Snapshot the external MCP servers (before the in-process local server is
+    // mixed in below) so run_mcp_script can dial them with inherited auth.
+    const externalMcpServers: Record<string, McpServerConfig> =
+      supportsMcpInjection(earlyModelId)
+        ? parseMcpServers(params, this.logger)
+        : {};
     const buildInProcessMcpServers = (): Record<
       string,
       McpSdkServerConfigWithInstance
     > => {
       const server = createLocalToolsMcpServer(
-        { cwd, token: resolveGithubToken(), taskId, baseBranch },
+        {
+          cwd,
+          token: resolveGithubToken(),
+          taskId,
+          baseBranch,
+          scriptableMcpServers: externalMcpServers,
+        },
         { environment },
       );
       return server ? { [LOCAL_TOOLS_MCP_NAME]: server } : {};
@@ -1676,9 +1688,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     }
 
     const mcpServers: Record<string, McpServerConfig> = {
-      ...(supportsMcpInjection(earlyModelId)
-        ? parseMcpServers(params, this.logger)
-        : {}),
+      ...externalMcpServers,
       ...initialInProcess,
     };
 

diff --git a/packages/agent/src/adapters/codex/codex-agent.test.ts b/packages/agent/src/adapters/codex/codex-agent.test.ts
@@ -424,16 +424,20 @@ describe("CodexAcpAgent", () => {
         _meta: { systemPrompt: string };
       };
 
-      // Existing MCP server is preserved; ours is appended.
-      expect(forwarded.mcpServers).toHaveLength(2);
+      // Existing MCP server is preserved; the structured-output server is
+      // appended. The local-tools server is also present because the existing
+      // server makes run_mcp_script/list_mcp_tools available.
       expect(forwarded.mcpServers[0].name).toBe("existing");
-      expect(forwarded.mcpServers[1].name).toBe("posthog_output");
-      expect(forwarded.mcpServers[1].command).toBe(process.execPath);
+      const outputServer = forwarded.mcpServers.find(
+        (s) => s.name === "posthog_output",
+      );
+      expect(outputServer).toBeDefined();
+      expect(outputServer?.command).toBe(process.execPath);
 
       // The schema is forwarded base64-encoded so codex-acp doesn't have
       // to escape it through a shell.
       const envEntry = (
-        forwarded.mcpServers[1].env as Array<{ name: string; value: string }>
+        outputServer?.env as Array<{ name: string; value: string }>
       ).find((e) => e.name === "POSTHOG_OUTPUT_SCHEMA");
       expect(envEntry).toBeDefined();
       const decoded = JSON.parse(

diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts
@@ -71,6 +71,7 @@ import {
   emptyBaseline,
   estimateTokens,
 } from "../claude/context-breakdown";
+import { parseMcpServers } from "../claude/session/mcp-config";
 import { classifyAgentError } from "../error-classification";
 import {
   enabledLocalTools,
@@ -646,6 +647,10 @@ export class CodexAcpAgent extends BaseAcpAgent {
       token: resolveGithubToken(),
       taskId: resolveTaskId(meta),
       baseBranch: meta?.baseBranch,
+      // Reuse the ACP MCP servers so run_mcp_script can dial them (auth inherited).
+      scriptableMcpServers: parseMcpServers({
+        mcpServers: request.mcpServers ?? [],
+      }),
     };
     const tools = enabledLocalTools(ctx, meta);
     if (tools.length === 0) {

diff --git a/packages/agent/src/adapters/local-tools/index.ts b/packages/agent/src/adapters/local-tools/index.ts
@@ -1,3 +1,4 @@
+import { listMcpToolsTool, runMcpScriptTool } from "../../mcp-scripting/tools";
 import type { LocalTool, LocalToolCtx, LocalToolGateMeta } from "./registry";
 import { signedCommitTool } from "./tools/signed-commit";
 import { signedMergeTool } from "./tools/signed-merge";
@@ -17,6 +18,8 @@ export const LOCAL_TOOLS: LocalTool[] = [
   signedCommitTool,
   signedMergeTool,
   signedRewriteTool,
+  runMcpScriptTool,
+  listMcpToolsTool,
 ];
 
 /** Tools whose gate passes for the given context — the set to actually expose. */

diff --git a/packages/agent/src/adapters/local-tools/registry.ts b/packages/agent/src/adapters/local-tools/registry.ts
@@ -1,3 +1,4 @@
+import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk";
 import type { z } from "zod";
 
 /**
@@ -20,6 +21,15 @@ export interface LocalToolCtx {
    * back to origin/HEAD detection when unset.
    */
   baseBranch?: string;
+  /**
+   * The session's external MCP servers, keyed by name — the same
+   * `McpServerConfig` map handed to the Claude SDK `query()`. The MCP-scripting
+   * tools (`run_mcp_script` / `list_mcp_tools`) open their own clients against
+   * these configs, inheriting auth (stdio `env`, http/sse `headers`). In-process
+   * `sdk` servers are skipped — they have no transport to dial. Absent or empty
+   * means scripting tools self-disable.
+   */
+  scriptableMcpServers?: Record<string, McpServerConfig>;
 }
 
 /** Minimal session-meta shape needed to gate tools (e.g. cloud-only). */

diff --git a/packages/agent/src/mcp-scripting/README.md b/packages/agent/src/mcp-scripting/README.md
@@ -0,0 +1,111 @@
+# MCP tools as scripts
+
+Lets the agent write **one JavaScript script** that calls the connected MCP
+tools as ordinary async functions, instead of orchestrating them one tool-call
+at a time. The classic pain it removes: a server that needs 100 sequential calls
+(list, then act per item) becomes a single script with a loop.
+
+```js
+const issues = await tools.linear.listIssues({ teamId })
+const stale = issues.filter((i) => i.status === "backlog")
+for (const i of stale) {
+  await tools.linear.createComment({ issueId: i.id, body: "bump" })
+}
+return { bumped: stale.length }
+```
+
+Exposed to the model as two local tools (registered in
+`../adapters/local-tools/index.ts`):
+
+- **`list_mcp_tools`** — returns `.d.ts`-style signatures for every
+  `tools.<server>.<tool>(args)` call available, generated from each tool's MCP
+  input schema. Call it first to discover what to call.
+- **`run_mcp_script`** — takes `{ script, timeoutMs? }`, runs the script with
+  `tools` injected, returns `{ result, logs, error? }`.
+
+## Pieces
+
+| File | Responsibility |
+| --- | --- |
+| `client-pool.ts` | Opens/caches one MCP `Client` per server from the session's `McpServerConfig` map; `listTools` / `callTool`. |
+| `proxy.ts` | Builds the lazy `tools.<server>.<tool>(args)` proxy that forwards to the pool. |
+| `runner.ts` | Runs the script in a constrained `node:vm` context with a wall-clock timeout. |
+| `signatures.ts` | Renders connected tools as TypeScript-style signatures. |
+| `tools.ts` | The `run_mcp_script` / `list_mcp_tools` local-tool definitions. |
+
+## Credential flow — no new auth path
+
+The proxy dials the **exact same MCP server configs** the agent's own MCP tools
+use, so authentication is inherited verbatim. The chain:
+
+1. The ACP client sends MCP servers in the `newSession` params. `parseMcpServers`
+   (`../adapters/claude/session/mcp-config.ts`) turns them into a
+   `Record<string, McpServerConfig>` — **stdio** entries carry `env`, **http/sse**
+   entries carry `headers`. This map is the single credential source.
+2. Both adapters snapshot that map into `LocalToolCtx.scriptableMcpServers`:
+   `claude-agent.ts` passes it *before* the in-process local-tools server is mixed
+   in (so scripts never try to dial an in-process `sdk` server — those have no
+   transport), and `codex-agent.ts` derives it from the same ACP `mcpServers` via
+   `parseMcpServers`. The scripting tools self-disable when no external servers
+   are present.
+3. On a `run_mcp_script` / `list_mcp_tools` call, `McpClientPool` reads a config
+   and constructs the matching MCP SDK transport:
+   - `stdio` → `StdioClientTransport` with `command`/`args`/`env` (the session env
+     is inherited too, so stdio servers keep ambient credentials).
+   - `http` → `StreamableHTTPClientTransport` with `requestInit.headers`.
+   - `sse` → `SSEClientTransport` with `requestInit.headers`.
+
+There is no separate token store, no re-auth, and nothing the model can set: a
+script can only reach servers the session was already authorized for, with the
+same credentials those tools already had.
+
+## Sandbox model
+
+`runner.ts` executes the script in a `node:vm` context whose globals are an
+explicit allowlist:
+
+- **Granted:** `tools`, a captured `console`, and pure stateless helpers
+  (`JSON`, `Math`, `Date`, `Array`/`Object`/`Map`/`Set`/…, `structuredClone`,
+  `TextEncoder`/`TextDecoder`, `URL`/`URLSearchParams`, `setTimeout`/`clearTimeout`).
+- **Denied:** `require`, `import`, `process`, `global`/`globalThis` ambient
+  authority, `Buffer`, `fetch`, filesystem — so the **only** way out is `tools.*`.
+- **No dynamic code:** the context is created with
+  `codeGeneration: { strings: false, wasm: false }`, so `new Function(...)` /
+  `eval` throw — closing the most common `vm` escape via the `Function`
+  constructor.
+- **Wall-clock timeout:** default 30s, capped at 120s. `node:vm` can't interrupt a
+  pending Promise (e.g. a hung tool call), so the timeout *races* script
+  completion to bound total time; the per-server MCP tool timeout still applies to
+  each individual call.
+
+`node:vm` is **not** a hard security boundary against hostile code sharing the
+process — but here the script author is the same agent that already calls these
+tools directly. The goal is to **remove ambient authority** (fs/net/env) and
+funnel every side effect through the audited `tools` path, not to contain an
+adversary. Cloud runs additionally execute the whole agent inside a sandbox,
+which is the real isolation layer.
+
+## Adopt vs build
+
+Researched the "code mode for MCP" ecosystem (Cloudflare *Code Mode*,
+`@utcp/code-mode` / `code-mode-mcp`, `mcpac`). They all run as a **separate MCP
+server or standalone process** that connects to MCP clients via its own config
+(or target Cloudflare `workerd` isolates), and several add a second abstraction
+(UTCP) on top of MCP. None reuse an existing in-process `McpServerConfig` map
+with already-resolved credentials — which is the entire integration we need.
+
+Adopting one would mean standing up another process, re-plumbing auth into it,
+and taking a heavier dependency (some MPL-2.0) for what is a ~5-file thin layer
+over the MCP SDK `Client` we already depend on. **Decision: build.** The layer is
+small, has no new runtime dependencies (only `@modelcontextprotocol/sdk` and
+`zod`, both already present), and inherits auth for free.
+
+## Tests
+
+`mcp-scripting.test.ts` covers proxy generation, a script calling a tool,
+looping/batching, timeout enforcement, error surfacing, signature rendering, and
+sandbox-escape attempts (`require`/`process`/`global`/`Buffer`/`fetch`/`new
+Function` denied). `client-pool.integration.test.ts` spins up a real stdio MCP
+server (`fixtures/echo-mcp-server.mjs`) and drives it end-to-end through a
+script, including asserting that stdio `env` reaches the server (the credential
+path).
diff --git a/packages/agent/src/mcp-scripting/client-pool.integration.test.ts b/packages/agent/src/mcp-scripting/client-pool.integration.test.ts
@@ -0,0 +1,121 @@
+import { fileURLToPath } from "node:url";
+import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk";
+import { afterEach, describe, expect, it } from "vitest";
+import { McpClientPool } from "./client-pool";
+import { buildToolsProxy } from "./proxy";
+import { runScript } from "./runner";
+import { listMcpToolsTool, runMcpScriptTool } from "./tools";
+
+const ECHO_SERVER = fileURLToPath(
+  new URL("./fixtures/echo-mcp-server.mjs", import.meta.url),
+);
+
+describe("McpClientPool (real stdio MCP server)", () => {
+  let pool: McpClientPool | undefined;
+
+  afterEach(async () => {
+    await pool?.close();
+    pool = undefined;
+  });
+
+  it("lists tools and calls them over a real stdio transport", async () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+    });
+
+    const tools = await pool.listTools("echo");
+    expect(tools.map((t) => t.name).sort()).toEqual(["add", "whoami"]);
+
+    const result = await pool.callTool("echo", "add", { a: 2, b: 3 });
+    expect(result.isError).toBe(false);
+    expect(result.value).toEqual({ sum: 5 });
+  });
+
+  it("inherits stdio env as the credential path", async () => {
+    pool = new McpClientPool({
+      echo: {
+        type: "stdio",
+        command: process.execPath,
+        args: [ECHO_SERVER],
+        env: { ECHO_SECRET: "s3cr3t-token" },
+      },
+    });
+
+    const result = await pool.callTool("echo", "whoami", {});
+    expect(result.value).toBe("s3cr3t-token");
+  });
+
+  it("drives the real server end-to-end through a script", async () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+    });
+    const tools = buildToolsProxy(pool, pool.serverNames());
+
+    const { result, error } = await runScript({
+      tools,
+      script: `
+        let total = 0
+        for (let i = 1; i <= 3; i++) {
+          const r = await tools.echo.add({ a: total, b: i })
+          total = r.sum
+        }
+        return total
+      `,
+    });
+
+    expect(error).toBeUndefined();
+    expect(result).toBe(6);
+  }, 15_000);
+
+  it("excludes in-process sdk servers from serverNames", () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+      // sdk servers have no dialable transport; cast to satisfy the union.
+      inproc: { type: "sdk", name: "inproc" } as never,
+    });
+    expect(pool.serverNames()).toEqual(["echo"]);
+  });
+});
+
+describe("scripting local tools (real stdio MCP server)", () => {
+  const echoConfig: Record<string, McpServerConfig> = {
+    echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+  };
+
+  it("run_mcp_script gates on having scriptable servers", () => {
+    expect(runMcpScriptTool.isEnabled({ cwd: "/r" }, undefined)).toBe(false);
+    expect(
+      runMcpScriptTool.isEnabled(
+        { cwd: "/r", scriptableMcpServers: echoConfig },
+        undefined,
+      ),
+    ).toBe(true);
+  });
+
+  it("list_mcp_tools renders real signatures and notes unreachable servers", async () => {
+    const result = await listMcpToolsTool.handler(
+      {
+        cwd: "/r",
+        scriptableMcpServers: {
+          ...echoConfig,
+          broken: { type: "stdio", command: "definitely-not-a-real-binary" },
+        },
+      },
+      {},
+    );
+    const text = result.content[0].text;
+    expect(text).toContain("echo");
+    expect(text).toContain("add(args: {");
+    expect(text).toContain("Unreachable servers");
+    expect(text).toContain("broken");
+  }, 15_000);
+
+  it("run_mcp_script executes against the real server end-to-end", async () => {
+    const result = await runMcpScriptTool.handler(
+      { cwd: "/r", scriptableMcpServers: echoConfig },
+      { script: "return (await tools.echo.add({ a: 40, b: 2 })).sum" },
+    );
+    expect(result.isError).toBeUndefined();
+    expect(result.content[0].text).toContain('"result": 42');
+  }, 15_000);
+});