From e9ca8d9409cc2d2de054bea129b3310ecc442e37 Mon Sep 17 00:00:00 2001
From: Michael Matloka <michael@matloka.com>
Date: Fri, 19 Jun 2026 03:48:51 +0200
Subject: [PATCH 1/3] feat(agent): add MCP-scripting core (tools proxy, vm
 runner, signatures)

Lets the agent write one JS script that calls connected MCP tools as
async functions instead of one tool-call at a time. Adds:

- McpClientPool: opens MCP clients from the session's McpServerConfig
  map, inheriting auth (stdio env, http/sse headers) verbatim
- buildToolsProxy: lazy tools.<server>.<tool>(args) proxy
- runScript: constrained node:vm sandbox with wall-clock timeout,
  captured console, and no ambient fs/net/process authority
- renderToolsetSignatures: JSON Schema to TS-style signatures
---
 packages/agent/src/mcp-scripting/README.md    | 111 +++++++
 .../client-pool.integration.test.ts           | 121 +++++++
 .../agent/src/mcp-scripting/client-pool.ts    | 220 +++++++++++++
 .../fixtures/echo-mcp-server.mjs              |  28 ++
 packages/agent/src/mcp-scripting/index.ts     |   9 +
 .../src/mcp-scripting/mcp-scripting.test.ts   | 301 ++++++++++++++++++
 packages/agent/src/mcp-scripting/proxy.ts     | 104 ++++++
 packages/agent/src/mcp-scripting/runner.ts    | 178 +++++++++++
 .../agent/src/mcp-scripting/signatures.ts     | 110 +++++++
 packages/agent/src/mcp-scripting/tools.ts     | 134 ++++++++
 10 files changed, 1316 insertions(+)
 create mode 100644 packages/agent/src/mcp-scripting/README.md
 create mode 100644 packages/agent/src/mcp-scripting/client-pool.integration.test.ts
 create mode 100644 packages/agent/src/mcp-scripting/client-pool.ts
 create mode 100644 packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs
 create mode 100644 packages/agent/src/mcp-scripting/index.ts
 create mode 100644 packages/agent/src/mcp-scripting/mcp-scripting.test.ts
 create mode 100644 packages/agent/src/mcp-scripting/proxy.ts
 create mode 100644 packages/agent/src/mcp-scripting/runner.ts
 create mode 100644 packages/agent/src/mcp-scripting/signatures.ts
 create mode 100644 packages/agent/src/mcp-scripting/tools.ts
diff --git a/packages/agent/src/mcp-scripting/README.md b/packages/agent/src/mcp-scripting/README.md
new file mode 100644
index 000000000..597f41997
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/README.md
@@ -0,0 +1,111 @@
+# MCP tools as scripts
+
+Lets the agent write **one JavaScript script** that calls the connected MCP
+tools as ordinary async functions, instead of orchestrating them one tool-call
+at a time. The classic pain it removes: a server that needs 100 sequential calls
+(list, then act per item) becomes a single script with a loop.
+
+```js
+const issues = await tools.linear.listIssues({ teamId })
+const stale = issues.filter((i) => i.status === "backlog")
+for (const i of stale) {
+  await tools.linear.createComment({ issueId: i.id, body: "bump" })
+}
+return { bumped: stale.length }
+```
+
+Exposed to the model as two local tools (registered in
+`../adapters/local-tools/index.ts`):
+
+- **`list_mcp_tools`** — returns `.d.ts`-style signatures for every
+  `tools.<server>.<tool>(args)` call available, generated from each tool's MCP
+  input schema. Call it first to discover what to call.
+- **`run_mcp_script`** — takes `{ script, timeoutMs? }`, runs the script with
+  `tools` injected, returns `{ result, logs, error? }`.
+
+## Pieces
+
+| File | Responsibility |
+| --- | --- |
+| `client-pool.ts` | Opens/caches one MCP `Client` per server from the session's `McpServerConfig` map; `listTools` / `callTool`. |
+| `proxy.ts` | Builds the lazy `tools.<server>.<tool>(args)` proxy that forwards to the pool. |
+| `runner.ts` | Runs the script in a constrained `node:vm` context with a wall-clock timeout. |
+| `signatures.ts` | Renders connected tools as TypeScript-style signatures. |
+| `tools.ts` | The `run_mcp_script` / `list_mcp_tools` local-tool definitions. |
+
+## Credential flow — no new auth path
+
+The proxy dials the **exact same MCP server configs** the agent's own MCP tools
+use, so authentication is inherited verbatim. The chain:
+
+1. The ACP client sends MCP servers in the `newSession` params. `parseMcpServers`
+   (`../adapters/claude/session/mcp-config.ts`) turns them into a
+   `Record<string, McpServerConfig>` — **stdio** entries carry `env`, **http/sse**
+   entries carry `headers`. This map is the single credential source.
+2. Both adapters snapshot that map into `LocalToolCtx.scriptableMcpServers`:
+   `claude-agent.ts` passes it *before* the in-process local-tools server is mixed
+   in (so scripts never try to dial an in-process `sdk` server — those have no
+   transport), and `codex-agent.ts` derives it from the same ACP `mcpServers` via
+   `parseMcpServers`. The scripting tools self-disable when no external servers
+   are present.
+3. On a `run_mcp_script` / `list_mcp_tools` call, `McpClientPool` reads a config
+   and constructs the matching MCP SDK transport:
+   - `stdio` → `StdioClientTransport` with `command`/`args`/`env` (the session env
+     is inherited too, so stdio servers keep ambient credentials).
+   - `http` → `StreamableHTTPClientTransport` with `requestInit.headers`.
+   - `sse` → `SSEClientTransport` with `requestInit.headers`.
+
+There is no separate token store, no re-auth, and nothing the model can set: a
+script can only reach servers the session was already authorized for, with the
+same credentials those tools already had.
+
+## Sandbox model
+
+`runner.ts` executes the script in a `node:vm` context whose globals are an
+explicit allowlist:
+
+- **Granted:** `tools`, a captured `console`, and pure stateless helpers
+  (`JSON`, `Math`, `Date`, `Array`/`Object`/`Map`/`Set`/…, `structuredClone`,
+  `TextEncoder`/`TextDecoder`, `URL`/`URLSearchParams`, `setTimeout`/`clearTimeout`).
+- **Denied:** `require`, `import`, `process`, `global`/`globalThis` ambient
+  authority, `Buffer`, `fetch`, filesystem — so the **only** way out is `tools.*`.
+- **No dynamic code:** the context is created with
+  `codeGeneration: { strings: false, wasm: false }`, so `new Function(...)` /
+  `eval` throw — closing the most common `vm` escape via the `Function`
+  constructor.
+- **Wall-clock timeout:** default 30s, capped at 120s. `node:vm` can't interrupt a
+  pending Promise (e.g. a hung tool call), so the timeout *races* script
+  completion to bound total time; the per-server MCP tool timeout still applies to
+  each individual call.
+
+`node:vm` is **not** a hard security boundary against hostile code sharing the
+process — but here the script author is the same agent that already calls these
+tools directly. The goal is to **remove ambient authority** (fs/net/env) and
+funnel every side effect through the audited `tools` path, not to contain an
+adversary. Cloud runs additionally execute the whole agent inside a sandbox,
+which is the real isolation layer.
+
+## Adopt vs build
+
+Researched the "code mode for MCP" ecosystem (Cloudflare *Code Mode*,
+`@utcp/code-mode` / `code-mode-mcp`, `mcpac`). They all run as a **separate MCP
+server or standalone process** that connects to MCP clients via its own config
+(or target Cloudflare `workerd` isolates), and several add a second abstraction
+(UTCP) on top of MCP. None reuse an existing in-process `McpServerConfig` map
+with already-resolved credentials — which is the entire integration we need.
+
+Adopting one would mean standing up another process, re-plumbing auth into it,
+and taking a heavier dependency (some MPL-2.0) for what is a ~5-file thin layer
+over the MCP SDK `Client` we already depend on. **Decision: build.** The layer is
+small, has no new runtime dependencies (only `@modelcontextprotocol/sdk` and
+`zod`, both already present), and inherits auth for free.
+
+## Tests
+
+`mcp-scripting.test.ts` covers proxy generation, a script calling a tool,
+looping/batching, timeout enforcement, error surfacing, signature rendering, and
+sandbox-escape attempts (`require`/`process`/`global`/`Buffer`/`fetch`/`new
+Function` denied). `client-pool.integration.test.ts` spins up a real stdio MCP
+server (`fixtures/echo-mcp-server.mjs`) and drives it end-to-end through a
+script, including asserting that stdio `env` reaches the server (the credential
+path).
diff --git a/packages/agent/src/mcp-scripting/client-pool.integration.test.ts b/packages/agent/src/mcp-scripting/client-pool.integration.test.ts
new file mode 100644
index 000000000..4c30bcbdf
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/client-pool.integration.test.ts
@@ -0,0 +1,121 @@
+import { fileURLToPath } from "node:url";
+import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk";
+import { afterEach, describe, expect, it } from "vitest";
+import { McpClientPool } from "./client-pool";
+import { buildToolsProxy } from "./proxy";
+import { runScript } from "./runner";
+import { listMcpToolsTool, runMcpScriptTool } from "./tools";
+
+const ECHO_SERVER = fileURLToPath(
+  new URL("./fixtures/echo-mcp-server.mjs", import.meta.url),
+);
+
+describe("McpClientPool (real stdio MCP server)", () => {
+  let pool: McpClientPool | undefined;
+
+  afterEach(async () => {
+    await pool?.close();
+    pool = undefined;
+  });
+
+  it("lists tools and calls them over a real stdio transport", async () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+    });
+
+    const tools = await pool.listTools("echo");
+    expect(tools.map((t) => t.name).sort()).toEqual(["add", "whoami"]);
+
+    const result = await pool.callTool("echo", "add", { a: 2, b: 3 });
+    expect(result.isError).toBe(false);
+    expect(result.value).toEqual({ sum: 5 });
+  });
+
+  it("inherits stdio env as the credential path", async () => {
+    pool = new McpClientPool({
+      echo: {
+        type: "stdio",
+        command: process.execPath,
+        args: [ECHO_SERVER],
+        env: { ECHO_SECRET: "s3cr3t-token" },
+      },
+    });
+
+    const result = await pool.callTool("echo", "whoami", {});
+    expect(result.value).toBe("s3cr3t-token");
+  });
+
+  it("drives the real server end-to-end through a script", async () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+    });
+    const tools = buildToolsProxy(pool, pool.serverNames());
+
+    const { result, error } = await runScript({
+      tools,
+      script: `
+        let total = 0
+        for (let i = 1; i <= 3; i++) {
+          const r = await tools.echo.add({ a: total, b: i })
+          total = r.sum
+        }
+        return total
+      `,
+    });
+
+    expect(error).toBeUndefined();
+    expect(result).toBe(6);
+  }, 15_000);
+
+  it("excludes in-process sdk servers from serverNames", () => {
+    pool = new McpClientPool({
+      echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+      // sdk servers have no dialable transport; cast to satisfy the union.
+      inproc: { type: "sdk", name: "inproc" } as never,
+    });
+    expect(pool.serverNames()).toEqual(["echo"]);
+  });
+});
+
+describe("scripting local tools (real stdio MCP server)", () => {
+  const echoConfig: Record<string, McpServerConfig> = {
+    echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] },
+  };
+
+  it("run_mcp_script gates on having scriptable servers", () => {
+    expect(runMcpScriptTool.isEnabled({ cwd: "/r" }, undefined)).toBe(false);
+    expect(
+      runMcpScriptTool.isEnabled(
+        { cwd: "/r", scriptableMcpServers: echoConfig },
+        undefined,
+      ),
+    ).toBe(true);
+  });
+
+  it("list_mcp_tools renders real signatures and notes unreachable servers", async () => {
+    const result = await listMcpToolsTool.handler(
+      {
+        cwd: "/r",
+        scriptableMcpServers: {
+          ...echoConfig,
+          broken: { type: "stdio", command: "definitely-not-a-real-binary" },
+        },
+      },
+      {},
+    );
+    const text = result.content[0].text;
+    expect(text).toContain("echo");
+    expect(text).toContain("add(args: {");
+    expect(text).toContain("Unreachable servers");
+    expect(text).toContain("broken");
+  }, 15_000);
+
+  it("run_mcp_script executes against the real server end-to-end", async () => {
+    const result = await runMcpScriptTool.handler(
+      { cwd: "/r", scriptableMcpServers: echoConfig },
+      { script: "return (await tools.echo.add({ a: 40, b: 2 })).sum" },
+    );
+    expect(result.isError).toBeUndefined();
+    expect(result.content[0].text).toContain('"result": 42');
+  }, 15_000);
+});
diff --git a/packages/agent/src/mcp-scripting/client-pool.ts b/packages/agent/src/mcp-scripting/client-pool.ts
new file mode 100644
index 000000000..79eb1698a
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/client-pool.ts
@@ -0,0 +1,220 @@
+import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js";
+import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
+import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
+import type { Transport } from "@modelcontextprotocol/sdk/shared/transport.js";
+import type { Logger } from "../utils/logger";
+
+/** A tool as advertised by a connected MCP server. */
+export interface McpToolDescriptor {
+  name: string;
+  description?: string;
+  /** The tool's JSON Schema for arguments (MCP `inputSchema`). */
+  inputSchema?: Record<string, unknown>;
+}
+
+/** Result of a single MCP tool call, normalized for scripts. */
+export interface McpCallResult {
+  /** Structured payload when the server returns `structuredContent`, else the
+   *  text blocks joined (JSON-parsed when they look like JSON). */
+  value: unknown;
+  /** Raw `content` blocks the server returned. */
+  content: unknown[];
+  isError: boolean;
+}
+
+/**
+ * Opens and caches one MCP `Client` per configured server, reusing the
+ * session's `McpServerConfig` map so authentication is inherited verbatim:
+ * stdio servers carry credentials in `env`, http/sse servers in `headers`.
+ * There is no separate auth path — a script call dials the exact transport the
+ * agent's own MCP tools use.
+ *
+ * Connections are established lazily on first use of a server and torn down by
+ * {@link close}. A pool is meant to live for the duration of one script run.
+ */
+export class McpClientPool {
+  private readonly configs: Record<string, McpServerConfig>;
+  private readonly logger?: Logger;
+  private readonly clients = new Map<string, Promise<Client>>();
+
+  constructor(configs: Record<string, McpServerConfig>, logger?: Logger) {
+    this.configs = configs;
+    this.logger = logger;
+  }
+
+  /** Server names this pool can dial (in-process `sdk` servers excluded). */
+  serverNames(): string[] {
+    return scriptableServerNames(this.configs);
+  }
+
+  /** Lists the tools a server advertises. Connects on first use. */
+  async listTools(serverName: string): Promise<McpToolDescriptor[]> {
+    const client = await this.getClient(serverName);
+    const { tools } = await client.listTools();
+    return tools.map((t) => ({
+      name: t.name,
+      description: t.description,
+      inputSchema: t.inputSchema as Record<string, unknown> | undefined,
+    }));
+  }
+
+  /** Calls a tool on a server, returning a normalized result. */
+  async callTool(
+    serverName: string,
+    toolName: string,
+    args: Record<string, unknown>,
+  ): Promise<McpCallResult> {
+    const client = await this.getClient(serverName);
+    const raw = await client.callTool({ name: toolName, arguments: args });
+    const content = Array.isArray(raw.content) ? raw.content : [];
+    return {
+      value: extractValue(raw.structuredContent, content),
+      content,
+      isError: raw.isError === true,
+    };
+  }
+
+  /** Disconnects every open client. Safe to call more than once. */
+  async close(): Promise<void> {
+    const pending = [...this.clients.values()];
+    this.clients.clear();
+    await Promise.allSettled(
+      pending.map(async (p) => {
+        try {
+          const client = await p;
+          await client.close();
+        } catch (err) {
+          this.logger?.debug("Error closing MCP client", {
+            error: err instanceof Error ? err.message : String(err),
+          });
+        }
+      }),
+    );
+  }
+
+  private getClient(serverName: string): Promise<Client> {
+    const existing = this.clients.get(serverName);
+    if (existing) {
+      return existing;
+    }
+    const connecting = this.connect(serverName);
+    this.clients.set(serverName, connecting);
+    // Don't cache a rejected connection — let the next call retry.
+    connecting.catch(() => this.clients.delete(serverName));
+    return connecting;
+  }
+
+  private async connect(serverName: string): Promise<Client> {
+    const config = this.configs[serverName];
+    if (!config) {
+      throw new Error(`Unknown MCP server: ${serverName}`);
+    }
+    const transport = this.createTransport(serverName, config);
+    const client = new Client({
+      name: "posthog-mcp-scripting",
+      version: "1.0.0",
+    });
+    await client.connect(transport);
+    return client;
+  }
+
+  private createTransport(
+    serverName: string,
+    config: McpServerConfig,
+  ): Transport {
+    const type = transportableType(config);
+    if (type === "stdio") {
+      const stdio = config as {
+        command: string;
+        args?: string[];
+        env?: Record<string, string>;
+      };
+      return new StdioClientTransport({
+        command: stdio.command,
+        args: stdio.args,
+        // Inherit the session env so stdio servers keep their credentials.
+        env: { ...filterUndefined(process.env), ...(stdio.env ?? {}) },
+      });
+    }
+    if (type === "http" || type === "sse") {
+      const remote = config as {
+        url: string;
+        headers?: Record<string, string>;
+      };
+      const url = new URL(remote.url);
+      const opts = remote.headers
+        ? { requestInit: { headers: remote.headers } }
+        : undefined;
+      return type === "http"
+        ? new StreamableHTTPClientTransport(url, opts)
+        : new SSEClientTransport(url, opts);
+    }
+    throw new Error(
+      `MCP server "${serverName}" is in-process (sdk) and cannot be scripted`,
+    );
+  }
+}
+
+/** The dialable transport for a config, or `undefined` for in-process `sdk`. */
+function transportableType(
+  config: McpServerConfig,
+): "stdio" | "http" | "sse" | undefined {
+  if (!("type" in config) || config.type === "stdio") {
+    return "stdio";
+  }
+  if (config.type === "http") {
+    return "http";
+  }
+  if (config.type === "sse") {
+    return "sse";
+  }
+  return undefined; // sdk (in-process) — no dialable transport
+}
+
+/**
+ * Names of servers a script can dial — every config except in-process `sdk`
+ * ones. Lets the scripting tools gate themselves without opening a pool.
+ */
+export function scriptableServerNames(
+  configs: Record<string, McpServerConfig>,
+): string[] {
+  return Object.entries(configs)
+    .filter(([, cfg]) => transportableType(cfg) !== undefined)
+    .map(([name]) => name);
+}
+
+function filterUndefined(env: NodeJS.ProcessEnv): Record<string, string> {
+  const out: Record<string, string> = {};
+  for (const [k, v] of Object.entries(env)) {
+    if (v !== undefined) {
+      out[k] = v;
+    }
+  }
+  return out;
+}
+
+function extractValue(structuredContent: unknown, content: unknown[]): unknown {
+  if (structuredContent !== undefined) {
+    return structuredContent;
+  }
+  const texts = content
+    .filter(
+      (c): c is { type: "text"; text: string } =>
+        typeof c === "object" &&
+        c !== null &&
+        (c as { type?: unknown }).type === "text" &&
+        typeof (c as { text?: unknown }).text === "string",
+    )
+    .map((c) => c.text);
+  if (texts.length === 0) {
+    return content;
+  }
+  const joined = texts.join("\n");
+  try {
+    return JSON.parse(joined);
+  } catch {
+    return joined;
+  }
+}
diff --git a/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs b/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs
new file mode 100644
index 000000000..3f2216ff3
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs
@@ -0,0 +1,28 @@
+// A minimal stdio MCP server used by the McpClientPool integration test. It
+// exposes two tools and echoes an env var back, so the test can assert that
+// stdio `env` (the credential path) reaches the server process.
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { z } from "zod";
+
+const server = new McpServer({ name: "echo", version: "1.0.0" });
+
+server.tool(
+  "add",
+  "Add two numbers",
+  { a: z.number(), b: z.number() },
+  async ({ a, b }) => ({
+    content: [{ type: "text", text: JSON.stringify({ sum: a + b }) }],
+  }),
+);
+
+server.tool(
+  "whoami",
+  "Return the ECHO_SECRET env var the server was launched with",
+  {},
+  async () => ({
+    content: [{ type: "text", text: process.env.ECHO_SECRET ?? "(unset)" }],
+  }),
+);
+
+await server.connect(new StdioServerTransport());
diff --git a/packages/agent/src/mcp-scripting/index.ts b/packages/agent/src/mcp-scripting/index.ts
new file mode 100644
index 000000000..acc51e17f
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/index.ts
@@ -0,0 +1,9 @@
+export type { McpCallResult, McpToolDescriptor } from "./client-pool";
+export { McpClientPool, scriptableServerNames } from "./client-pool";
+export type { ToolsProxy } from "./proxy";
+export { buildToolsProxy } from "./proxy";
+export type { RunScriptOptions, RunScriptResult } from "./runner";
+export { runScript } from "./runner";
+export type { ServerToolset } from "./signatures";
+export { renderToolsetSignatures } from "./signatures";
+export { listMcpToolsTool, runMcpScriptTool } from "./tools";
diff --git a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts
new file mode 100644
index 000000000..0ba017e78
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts
@@ -0,0 +1,301 @@
+import { describe, expect, it } from "vitest";
+import type { McpClientPool, McpToolDescriptor } from "./client-pool";
+import { buildToolsProxy } from "./proxy";
+import { runScript } from "./runner";
+import { renderToolsetSignatures } from "./signatures";
+
+/**
+ * A fake pool standing in for {@link McpClientPool}: it records calls and serves
+ * canned results, so the proxy/runner can be exercised without a real MCP
+ * server. Only the methods the proxy uses are implemented.
+ */
+function fakePool(opts: {
+  servers: Record<string, McpToolDescriptor[]>;
+  call?: (
+    server: string,
+    tool: string,
+    args: Record<string, unknown>,
+  ) => unknown;
+}): McpClientPool & { calls: Array<[string, string, unknown]> } {
+  const calls: Array<[string, string, unknown]> = [];
+  const pool = {
+    calls,
+    serverNames: () => Object.keys(opts.servers),
+    listTools: async (server: string) => opts.servers[server] ?? [],
+    callTool: async (
+      server: string,
+      tool: string,
+      args: Record<string, unknown>,
+    ) => {
+      calls.push([server, tool, args]);
+      const value = opts.call ? opts.call(server, tool, args) : null;
+      const isError =
+        typeof value === "object" &&
+        value !== null &&
+        (value as { __error?: boolean }).__error === true;
+      return { value, content: [], isError };
+    },
+    close: async () => {},
+  };
+  return pool as unknown as McpClientPool & {
+    calls: Array<[string, string, unknown]>;
+  };
+}
+
+describe("mcp-scripting", () => {
+  describe("buildToolsProxy", () => {
+    it("forwards tools.<server>.<tool>(args) to the pool and returns the value", async () => {
+      const pool = fakePool({
+        servers: { linear: [] },
+        call: (_s, tool, args) =>
+          tool === "createIssue" ? { id: "ISS-1", ...args } : null,
+      });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      const result = await tools.linear.createIssue({ title: "Bug" });
+
+      expect(result).toEqual({ id: "ISS-1", title: "Bug" });
+      expect(pool.calls).toEqual([["linear", "createIssue", { title: "Bug" }]]);
+    });
+
+    it("defaults args to {} when called without arguments", async () => {
+      const pool = fakePool({ servers: { linear: [] }, call: () => "ok" });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      await tools.linear.listIssues();
+
+      expect(pool.calls).toEqual([["linear", "listIssues", {}]]);
+    });
+
+    it("returns undefined for unknown servers", () => {
+      const pool = fakePool({ servers: { linear: [] } });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+      expect((tools as Record<string, unknown>).github).toBeUndefined();
+      expect(Object.keys(tools)).toEqual(["linear"]);
+    });
+
+    it("rejects when a tool reports isError so scripts can try/catch", async () => {
+      const pool = fakePool({
+        servers: { linear: [] },
+        call: () => ({ __error: true, message: "boom" }),
+      });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      await expect(tools.linear.failing({})).rejects.toThrow(/boom/);
+    });
+  });
+
+  describe("runScript", () => {
+    it("runs a script, returns its value, and captures console output", async () => {
+      const pool = fakePool({
+        servers: { linear: [] },
+        call: (_s, _t, args) => (args as { n: number }).n * 2,
+      });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      const { result, logs, error } = await runScript({
+        tools,
+        script: `
+          console.log("starting")
+          const doubled = await tools.linear.double({ n: 21 })
+          return { doubled }
+        `,
+      });
+
+      expect(error).toBeUndefined();
+      expect(result).toEqual({ doubled: 42 });
+      expect(logs).toContain("starting");
+    });
+
+    it("supports looping and batching over results", async () => {
+      const pool = fakePool({
+        servers: { linear: [] },
+        call: (_s, tool, args) => {
+          if (tool === "listIssues") {
+            return [
+              { id: "A", done: false },
+              { id: "B", done: true },
+              { id: "C", done: false },
+            ];
+          }
+          return { closed: (args as { id: string }).id };
+        },
+      });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      const { result, error } = await runScript({
+        tools,
+        script: `
+          const issues = await tools.linear.listIssues({})
+          const open = issues.filter((i) => !i.done)
+          const closed = []
+          for (const i of open) {
+            const r = await tools.linear.closeIssue({ id: i.id })
+            closed.push(r.closed)
+          }
+          return closed
+        `,
+      });
+
+      expect(error).toBeUndefined();
+      expect(result).toEqual(["A", "C"]);
+      // 1 list + 2 closes
+      expect(pool.calls).toHaveLength(3);
+    });
+
+    it("surfaces script errors as a message, not a throw", async () => {
+      const pool = fakePool({ servers: { linear: [] } });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      const { result, error } = await runScript({
+        tools,
+        script: `throw new Error("explicit failure")`,
+      });
+
+      expect(result).toBeUndefined();
+      expect(error).toMatch(/explicit failure/);
+    });
+
+    it("surfaces a tool error thrown inside the script", async () => {
+      const pool = fakePool({
+        servers: { linear: [] },
+        call: () => ({ __error: true, message: "rate limited" }),
+      });
+      const tools = buildToolsProxy(pool, pool.serverNames());
+
+      const { error } = await runScript({
+        tools,
+        script: `await tools.linear.create({})`,
+      });
+
+      expect(error).toMatch(/rate limited/);
+    });
+
+    it("enforces a wall-clock timeout", async () => {
+      const pool = fakePool({ servers: {} });
+      const tools = buildToolsProxy(pool, []);
+
+      const { error } = await runScript({
+        tools,
+        timeoutMs: 50,
+        script: `await new Promise((resolve) => setTimeout(resolve, 5000))`,
+      });
+
+      expect(error).toMatch(/timed out/i);
+    });
+
+    describe("sandbox isolation", () => {
+      const pool = fakePool({ servers: {} });
+      const tools = buildToolsProxy(pool, []);
+
+      it.each([
+        ["require", `return typeof require`],
+        ["process", `return typeof process`],
+        ["global", `return typeof global`],
+        ["globalThis.process", `return typeof globalThis.process`],
+        ["Buffer", `return typeof Buffer`],
+        ["fetch", `return typeof fetch`],
+      ])("denies access to %s", async (_name, script) => {
+        const { result, error } = await runScript({ tools, script });
+        // Either the symbol is absent (typeof "undefined") or referencing it throws.
+        if (error) {
+          expect(error).toMatch(/is not defined|undefined/i);
+        } else {
+          expect(result).toBe("undefined");
+        }
+      });
+
+      it("blocks dynamic code generation (new Function)", async () => {
+        const { error } = await runScript({
+          tools,
+          script: `return new Function("return 1")()`,
+        });
+        expect(error).toBeTruthy();
+      });
+
+      it("blocks process access via constructor escape attempt", async () => {
+        const { result, error } = await runScript({
+          tools,
+          script: `
+            try {
+              return (function(){}).constructor("return process")()
+            } catch (e) {
+              return "blocked: " + e.message
+            }
+          `,
+        });
+        // codeGeneration.strings:false makes the Function constructor throw.
+        if (!error) {
+          expect(String(result)).toMatch(/blocked/);
+        } else {
+          expect(error).toBeTruthy();
+        }
+      });
+    });
+  });
+
+  describe("renderToolsetSignatures", () => {
+    it("renders tools.<server>.<tool>(args) signatures from JSON schemas", () => {
+      const text = renderToolsetSignatures([
+        {
+          serverName: "linear",
+          tools: [
+            {
+              name: "createIssue",
+              description: "Create an issue",
+              inputSchema: {
+                type: "object",
+                properties: {
+                  title: { type: "string" },
+                  teamId: { type: "string" },
+                  priority: { type: "number" },
+                },
+                required: ["title", "teamId"],
+              },
+            },
+          ],
+        },
+      ]);
+
+      expect(text).toContain("linear");
+      expect(text).toContain("createIssue(args: {");
+      expect(text).toContain("title: string");
+      expect(text).toContain("teamId: string");
+      expect(text).toContain("priority?: number");
+      expect(text).toContain("Create an issue");
+    });
+
+    it("handles enums, arrays, and empty schemas", () => {
+      const text = renderToolsetSignatures([
+        {
+          serverName: "x",
+          tools: [
+            {
+              name: "noArgs",
+              inputSchema: { type: "object", properties: {} },
+            },
+            {
+              name: "withEnum",
+              inputSchema: {
+                type: "object",
+                properties: {
+                  status: { enum: ["open", "closed"] },
+                  tags: { type: "array", items: { type: "string" } },
+                },
+                required: ["status"],
+              },
+            },
+          ],
+        },
+      ]);
+
+      expect(text).toContain("noArgs()");
+      expect(text).toContain(`status: "open" | "closed"`);
+      expect(text).toContain("tags?: string[]");
+    });
+
+    it("reports the empty case", () => {
+      expect(renderToolsetSignatures([])).toMatch(/No external MCP servers/);
+    });
+  });
+});
diff --git a/packages/agent/src/mcp-scripting/proxy.ts b/packages/agent/src/mcp-scripting/proxy.ts
new file mode 100644
index 000000000..0e0fdeb94
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/proxy.ts
@@ -0,0 +1,104 @@
+import type { McpClientPool } from "./client-pool";
+
+/** The `tools` object injected into a script: `tools.<server>.<tool>(args)`. */
+export type ToolsProxy = Record<
+  string,
+  Record<string, (args?: Record<string, unknown>) => Promise<unknown>>
+>;
+
+/**
+ * Builds the `tools` proxy a script sees. Each `tools.<server>.<tool>(args)`
+ * forwards to the live MCP client via the pool and resolves to the call's
+ * parsed value (`structuredContent` when present, else parsed text). A tool
+ * that returns `isError` rejects, so scripts can use ordinary try/catch.
+ *
+ * Access is lazy and name-driven: we don't pre-enumerate tools, so a script can
+ * call any tool the server actually exposes. Unknown servers surface as
+ * `undefined`, matching plain object access (`tools.nope` is `undefined`).
+ */
+export function buildToolsProxy(
+  pool: McpClientPool,
+  serverNames: readonly string[],
+): ToolsProxy {
+  const known = new Set(serverNames);
+  const serverCache = new Map<
+    string,
+    Record<string, (args?: Record<string, unknown>) => Promise<unknown>>
+  >();
+
+  return new Proxy({} as ToolsProxy, {
+    get(_target, prop): unknown {
+      if (typeof prop !== "string" || !known.has(prop)) {
+        return undefined;
+      }
+      const cached = serverCache.get(prop);
+      if (cached) {
+        return cached;
+      }
+      const serverProxy = buildServerProxy(pool, prop);
+      serverCache.set(prop, serverProxy);
+      return serverProxy;
+    },
+    has(_target, prop): boolean {
+      return typeof prop === "string" && known.has(prop);
+    },
+    ownKeys(): string[] {
+      return [...known];
+    },
+    getOwnPropertyDescriptor(_target, prop): PropertyDescriptor | undefined {
+      if (typeof prop === "string" && known.has(prop)) {
+        return { enumerable: true, configurable: true };
+      }
+      return undefined;
+    },
+  });
+}
+
+function buildServerProxy(
+  pool: McpClientPool,
+  serverName: string,
+): Record<string, (args?: Record<string, unknown>) => Promise<unknown>> {
+  const toolCache = new Map<
+    string,
+    (args?: Record<string, unknown>) => Promise<unknown>
+  >();
+
+  return new Proxy(
+    {} as Record<string, (args?: Record<string, unknown>) => Promise<unknown>>,
+    {
+      get(_target, prop): unknown {
+        if (typeof prop !== "string") {
+          return undefined;
+        }
+        const cached = toolCache.get(prop);
+        if (cached) {
+          return cached;
+        }
+        const fn = async (
+          args: Record<string, unknown> = {},
+        ): Promise<unknown> => {
+          const result = await pool.callTool(serverName, prop, args);
+          if (result.isError) {
+            throw new Error(
+              `tools.${serverName}.${prop} failed: ${stringifyError(result.value)}`,
+            );
+          }
+          return result.value;
+        };
+        toolCache.set(prop, fn);
+        return fn;
+      },
+    },
+  );
+}
+
+function stringifyError(value: unknown): string {
+  if (typeof value === "string") {
+    return value;
+  }
+  try {
+    return JSON.stringify(value);
+  } catch {
+    return String(value);
+  }
+}
diff --git a/packages/agent/src/mcp-scripting/runner.ts b/packages/agent/src/mcp-scripting/runner.ts
new file mode 100644
index 000000000..d5e975971
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/runner.ts
@@ -0,0 +1,178 @@
+import * as vm from "node:vm";
+import type { ToolsProxy } from "./proxy";
+
+export interface RunScriptOptions {
+  script: string;
+  tools: ToolsProxy;
+  /** Wall-clock budget for the whole script. Default 30s, capped at 120s. */
+  timeoutMs?: number;
+}
+
+export interface RunScriptResult {
+  /** The script's returned/last-evaluated value, JSON-safe. */
+  result: unknown;
+  /** Lines captured from `console.*` during the run. */
+  logs: string[];
+  /** Present only when the script threw or timed out. */
+  error?: string;
+}
+
+const DEFAULT_TIMEOUT_MS = 30_000;
+const MAX_TIMEOUT_MS = 120_000;
+
+/**
+ * Runs agent-authored JavaScript in a constrained `node:vm` context with the
+ * `tools` proxy injected. The sandbox boundary:
+ *
+ * - Globals are an explicit allowlist (`tools`, captured `console`, timers,
+ *   JSON, Math, Date, encoders, structured-data constructors). There is no
+ *   `require`, `import`, `process`, `global`, `Buffer`, `fetch`, or filesystem —
+ *   so a script reaches the outside world ONLY through `tools.*`.
+ * - A wall-clock timeout aborts a runaway script. `node:vm` cannot interrupt a
+ *   pending Promise (e.g. a never-resolving tool call), so the timeout races the
+ *   script's completion; it bounds total time even if async work is still
+ *   in flight.
+ *
+ * `node:vm` is not a security sandbox against a determined attacker sharing the
+ * process (prototype-chain escapes exist), but here the script author is the
+ * same agent that already runs tools directly — the goal is to remove ambient
+ * authority (fs/net/env) and force all side effects through the audited `tools`
+ * path, not to contain hostile code. For stronger isolation, run the agent
+ * itself in its sandbox (which cloud runs already do).
+ */
+export async function runScript(
+  options: RunScriptOptions,
+): Promise<RunScriptResult> {
+  const timeoutMs = Math.min(
+    Math.max(options.timeoutMs ?? DEFAULT_TIMEOUT_MS, 1),
+    MAX_TIMEOUT_MS,
+  );
+  const logs: string[] = [];
+  const sandboxConsole = makeCapturingConsole(logs);
+
+  const context = vm.createContext(
+    Object.assign(Object.create(null), {
+      tools: options.tools,
+      console: sandboxConsole,
+      // Pure, stateless helpers — no ambient authority granted by these.
+      JSON,
+      Math,
+      Date,
+      Promise,
+      Array,
+      Object,
+      String,
+      Number,
+      Boolean,
+      Map,
+      Set,
+      RegExp,
+      Error,
+      TypeError,
+      RangeError,
+      Symbol,
+      BigInt,
+      structuredClone,
+      TextEncoder,
+      TextDecoder,
+      URL,
+      URLSearchParams,
+      setTimeout,
+      clearTimeout,
+    }),
+    { name: "mcp-script", codeGeneration: { strings: false, wasm: false } },
+  );
+
+  // Wrap as an async IIFE so the script may use top-level await and `return`.
+  const wrapped = `(async () => {\n${options.script}\n})()`;
+
+  let script: vm.Script;
+  try {
+    script = new vm.Script(wrapped, { filename: "mcp-script.js" });
+  } catch (err) {
+    return { result: undefined, logs, error: formatError(err) };
+  }
+
+  const run = (async (): Promise<unknown> => {
+    // `timeout` here guards synchronous spin; async work is bounded by the race.
+    const completion = script.runInContext(context, { timeout: timeoutMs });
+    return await completion;
+  })();
+
+  try {
+    const result = await withTimeout(run, timeoutMs);
+    return { result: toJsonSafe(result), logs };
+  } catch (err) {
+    return { result: undefined, logs, error: formatError(err) };
+  }
+}
+
+function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
+  return new Promise<T>((resolve, reject) => {
+    const timer = setTimeout(() => {
+      reject(new Error(`Script timed out after ${ms}ms`));
+    }, ms);
+    promise.then(
+      (value) => {
+        clearTimeout(timer);
+        resolve(value);
+      },
+      (err) => {
+        clearTimeout(timer);
+        reject(err);
+      },
+    );
+  });
+}
+
+function makeCapturingConsole(logs: string[]): Console {
+  const record = (...args: unknown[]): void => {
+    logs.push(args.map(formatLogArg).join(" "));
+  };
+  // Only log-shaped methods are wired; everything else is a no-op so a script
+  // calling e.g. console.table doesn't throw.
+  return new Proxy({} as Console, {
+    get(_target, prop): unknown {
+      if (
+        prop === "log" ||
+        prop === "info" ||
+        prop === "warn" ||
+        prop === "error" ||
+        prop === "debug"
+      ) {
+        return record;
+      }
+      return () => {};
+    },
+  });
+}
+
+function formatLogArg(arg: unknown): string {
+  if (typeof arg === "string") {
+    return arg;
+  }
+  try {
+    return JSON.stringify(arg);
+  } catch {
+    return String(arg);
+  }
+}
+
+/** Ensures the returned value survives the JSON round-trip the tool result uses. */
+function toJsonSafe(value: unknown): unknown {
+  if (value === undefined) {
+    return undefined;
+  }
+  try {
+    return JSON.parse(JSON.stringify(value));
+  } catch {
+    return String(value);
+  }
+}
+
+function formatError(err: unknown): string {
+  if (err instanceof Error) {
+    return err.message;
+  }
+  return String(err);
+}
diff --git a/packages/agent/src/mcp-scripting/signatures.ts b/packages/agent/src/mcp-scripting/signatures.ts
new file mode 100644
index 000000000..6f2278ec6
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/signatures.ts
@@ -0,0 +1,110 @@
+import type { McpToolDescriptor } from "./client-pool";
+
+/** A server and the tools it exposes, ready to render as signatures. */
+export interface ServerToolset {
+  serverName: string;
+  tools: McpToolDescriptor[];
+}
+
+/**
+ * Renders the connected MCP toolset as a `.d.ts`-style hint so the model can
+ * see exactly what `tools.<server>.<tool>(args)` calls are available and what
+ * each argument is. Every call returns a Promise; the doc says so once at the
+ * top rather than repeating `Promise<...>` on every line.
+ */
+export function renderToolsetSignatures(toolsets: ServerToolset[]): string {
+  if (toolsets.length === 0) {
+    return "// No external MCP servers are connected, so `tools` is empty.";
+  }
+  const blocks = toolsets.map(renderServerBlock);
+  return [
+    "// Each method is async — `await tools.<server>.<tool>(args)`.",
+    "// Args are validated against the server's schema before the call runs.",
+    "declare const tools: {",
+    ...blocks,
+    "}",
+  ].join("\n");
+}
+
+function renderServerBlock(toolset: ServerToolset): string {
+  const member = propertyKey(toolset.serverName);
+  if (toolset.tools.length === 0) {
+    return `  ${member}: {} // no tools advertised`;
+  }
+  const lines = toolset.tools.map((tool) => renderToolSignature(tool));
+  return [`  ${member}: {`, ...lines, "  }"].join("\n");
+}
+
+function renderToolSignature(tool: McpToolDescriptor): string {
+  const params = renderParams(tool.inputSchema);
+  const doc = tool.description
+    ? `    /** ${oneLine(tool.description)} */\n`
+    : "";
+  return `${doc}    ${propertyKey(tool.name)}(${params}): unknown`;
+}
+
+function renderParams(schema: McpToolDescriptor["inputSchema"]): string {
+  const properties = isRecord(schema?.properties)
+    ? schema.properties
+    : undefined;
+  if (!properties || Object.keys(properties).length === 0) {
+    return "";
+  }
+  const required = new Set(
+    Array.isArray(schema?.required)
+      ? (schema.required as unknown[]).filter(
+          (r): r is string => typeof r === "string",
+        )
+      : [],
+  );
+  const fields = Object.entries(properties).map(([name, raw]) => {
+    const optional = required.has(name) ? "" : "?";
+    return `${propertyKey(name)}${optional}: ${jsonSchemaToTs(raw)}`;
+  });
+  return `args: { ${fields.join("; ")} }`;
+}
+
+/** Best-effort JSON-Schema → TS type for a single field, kept shallow. */
+function jsonSchemaToTs(raw: unknown): string {
+  if (!isRecord(raw)) {
+    return "unknown";
+  }
+  if (Array.isArray(raw.enum) && raw.enum.length > 0) {
+    return raw.enum.map((v) => JSON.stringify(v)).join(" | ");
+  }
+  const type = raw.type;
+  if (type === "array") {
+    return `${jsonSchemaToTs(raw.items)}[]`;
+  }
+  if (type === "object" || isRecord(raw.properties)) {
+    return "Record<string, unknown>";
+  }
+  if (type === "string") {
+    return "string";
+  }
+  if (type === "number" || type === "integer") {
+    return "number";
+  }
+  if (type === "boolean") {
+    return "boolean";
+  }
+  if (Array.isArray(type)) {
+    return (
+      type.map((t) => jsonSchemaToTs({ type: t })).join(" | ") || "unknown"
+    );
+  }
+  return "unknown";
+}
+
+/** A bare identifier when it's a valid one, else a quoted key. */
+function propertyKey(name: string): string {
+  return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name) ? name : JSON.stringify(name);
+}
+
+function oneLine(text: string): string {
+  return text.replace(/\s+/g, " ").trim();
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
diff --git a/packages/agent/src/mcp-scripting/tools.ts b/packages/agent/src/mcp-scripting/tools.ts
new file mode 100644
index 000000000..106204ba0
--- /dev/null
+++ b/packages/agent/src/mcp-scripting/tools.ts
@@ -0,0 +1,134 @@
+import { z } from "zod";
+import {
+  defineLocalTool,
+  type LocalTool,
+  type LocalToolCtx,
+  type LocalToolResult,
+} from "../adapters/local-tools/registry";
+import { McpClientPool, scriptableServerNames } from "./client-pool";
+import { buildToolsProxy } from "./proxy";
+import { runScript } from "./runner";
+import { renderToolsetSignatures, type ServerToolset } from "./signatures";
+
+const RUN_MCP_SCRIPT_DESCRIPTION = `Run one JavaScript script that calls the connected MCP tools as async functions, so you can orchestrate many tool calls with normal control flow (loops, filtering, batching) in a single step instead of one tool call at a time.
+
+Inside the script, every connected MCP server is exposed as \`tools.<server>.<tool>(args)\` and returns a Promise of the tool's parsed result:
+
+  const issues = await tools.linear.listIssues({ teamId })
+  const stale = issues.filter((i) => i.status === "backlog")
+  for (const i of stale) {
+    await tools.linear.createComment({ issueId: i.id, body: "bump" })
+  }
+  return { closed: stale.length }
+
+Rules:
+- Call \`list_mcp_tools\` first to see which \`tools.*\` calls exist and their argument schemas.
+- The script body runs as an async function: use \`await\` freely and \`return\` the value you want back.
+- A tool that errors throws — wrap calls in try/catch if you want to continue.
+- Loops and batching over results are encouraged; that's the whole point.
+- Only \`tools\`, \`console\`, JSON/Math/Date and similar pure helpers are available — no filesystem, network, \`require\`, or \`process\`. Reach the outside world only through \`tools.*\`.
+- The return value and any \`console.log\` output are sent back to you.`;
+
+const LIST_MCP_TOOLS_DESCRIPTION = `List the MCP tools available to \`run_mcp_script\`, rendered as TypeScript-style signatures (\`tools.<server>.<tool>(args)\`) with argument schemas. Call this before writing a script so you know what to call.`;
+
+export const runMcpScriptTool: LocalTool = defineLocalTool({
+  name: "run_mcp_script",
+  description: RUN_MCP_SCRIPT_DESCRIPTION,
+  schema: {
+    script: z
+      .string()
+      .describe(
+        "JavaScript to run. Runs as an async function body; use await and return.",
+      ),
+    timeoutMs: z
+      .number()
+      .int()
+      .positive()
+      .max(120_000)
+      .optional()
+      .describe("Wall-clock budget in ms (default 30000, max 120000)."),
+  },
+  alwaysLoad: true,
+  isEnabled: (ctx) => hasScriptableServers(ctx),
+  handler: async (ctx, args): Promise<LocalToolResult> => {
+    const configs = ctx.scriptableMcpServers ?? {};
+    const pool = new McpClientPool(configs);
+    try {
+      const serverNames = pool.serverNames();
+      const tools = buildToolsProxy(pool, serverNames);
+      const { result, logs, error } = await runScript({
+        script: args.script as string,
+        tools,
+        timeoutMs: args.timeoutMs as number | undefined,
+      });
+      return toToolResult({ result, logs, error });
+    } finally {
+      await pool.close();
+    }
+  },
+});
+
+export const listMcpToolsTool: LocalTool = defineLocalTool({
+  name: "list_mcp_tools",
+  description: LIST_MCP_TOOLS_DESCRIPTION,
+  schema: {},
+  alwaysLoad: true,
+  isEnabled: (ctx) => hasScriptableServers(ctx),
+  handler: async (ctx): Promise<LocalToolResult> => {
+    const configs = ctx.scriptableMcpServers ?? {};
+    const pool = new McpClientPool(configs);
+    try {
+      const { toolsets, failed } = await collectToolsets(pool);
+      const signatures = renderToolsetSignatures(toolsets);
+      // Tell the agent about servers that wouldn't connect rather than silently
+      // dropping them — otherwise an expected server just looks absent.
+      const text =
+        failed.length > 0
+          ? `${signatures}\n\n// Unreachable servers (failed to connect): ${failed.join(", ")}`
+          : signatures;
+      return { content: [{ type: "text", text }] };
+    } finally {
+      await pool.close();
+    }
+  },
+});
+
+function hasScriptableServers(ctx: LocalToolCtx): boolean {
+  const configs = ctx.scriptableMcpServers;
+  return configs ? scriptableServerNames(configs).length > 0 : false;
+}
+
+async function collectToolsets(
+  pool: McpClientPool,
+): Promise<{ toolsets: ServerToolset[]; failed: string[] }> {
+  const names = pool.serverNames();
+  const settled = await Promise.allSettled(
+    names.map(async (serverName) => ({
+      serverName,
+      tools: await pool.listTools(serverName),
+    })),
+  );
+  // One failing server shouldn't fail the whole listing; report it instead.
+  const toolsets: ServerToolset[] = [];
+  const failed: string[] = [];
+  settled.forEach((result, i) => {
+    if (result.status === "fulfilled") {
+      toolsets.push(result.value);
+    } else {
+      failed.push(names[i]);
+    }
+  });
+  return { toolsets, failed };
+}
+
+function toToolResult(payload: {
+  result: unknown;
+  logs: string[];
+  error?: string;
+}): LocalToolResult {
+  const text = JSON.stringify(payload, null, 2);
+  return {
+    content: [{ type: "text", text }],
+    ...(payload.error ? { isError: true as const } : {}),
+  };
+}

From 49f7c8526f729b661dce7588efea51bf8c719ad7 Mon Sep 17 00:00:00 2001
From: Michael Matloka <michael@matloka.com>
Date: Fri, 19 Jun 2026 03:50:19 +0200
Subject: [PATCH 2/3] feat(agent): expose run_mcp_script / list_mcp_tools to
 both adapters

Registers the scripting tools in the local-tools registry and threads
the session's external MCP server map into LocalToolCtx from both the
Claude (claude-agent.ts) and Codex (codex-agent.ts) adapters, so a
script dials the same servers with inherited auth. Tools self-disable
when no external MCP servers are connected.
---
 .../agent/src/adapters/claude/claude-agent.ts  | 18 ++++++++++++++----
 .../src/adapters/codex/codex-agent.test.ts     | 14 +++++++++-----
 .../agent/src/adapters/codex/codex-agent.ts    |  5 +++++
 .../agent/src/adapters/local-tools/index.ts    |  3 +++
 .../agent/src/adapters/local-tools/registry.ts | 10 ++++++++++
 5 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts
index 7c6d8220f..5ca227aa3 100644
--- a/packages/agent/src/adapters/claude/claude-agent.ts
+++ b/packages/agent/src/adapters/claude/claude-agent.ts
@@ -1656,12 +1656,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     // needs so the session doesn't pin the whole meta object.
     const baseBranch = meta?.baseBranch;
     const environment = meta?.environment;
+    // Snapshot the external MCP servers (before the in-process local server is
+    // mixed in below) so run_mcp_script can dial them with inherited auth.
+    const externalMcpServers: Record<string, McpServerConfig> =
+      supportsMcpInjection(earlyModelId)
+        ? parseMcpServers(params, this.logger)
+        : {};
     const buildInProcessMcpServers = (): Record<
       string,
       McpSdkServerConfigWithInstance
     > => {
       const server = createLocalToolsMcpServer(
-        { cwd, token: resolveGithubToken(), taskId, baseBranch },
+        {
+          cwd,
+          token: resolveGithubToken(),
+          taskId,
+          baseBranch,
+          scriptableMcpServers: externalMcpServers,
+        },
         { environment },
       );
       return server ? { [LOCAL_TOOLS_MCP_NAME]: server } : {};
@@ -1676,9 +1688,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent {
     }
 
     const mcpServers: Record<string, McpServerConfig> = {
-      ...(supportsMcpInjection(earlyModelId)
-        ? parseMcpServers(params, this.logger)
-        : {}),
+      ...externalMcpServers,
       ...initialInProcess,
     };
 
diff --git a/packages/agent/src/adapters/codex/codex-agent.test.ts b/packages/agent/src/adapters/codex/codex-agent.test.ts
index 382fe6131..23e209299 100644
--- a/packages/agent/src/adapters/codex/codex-agent.test.ts
+++ b/packages/agent/src/adapters/codex/codex-agent.test.ts
@@ -424,16 +424,20 @@ describe("CodexAcpAgent", () => {
         _meta: { systemPrompt: string };
       };
 
-      // Existing MCP server is preserved; ours is appended.
-      expect(forwarded.mcpServers).toHaveLength(2);
+      // Existing MCP server is preserved; the structured-output server is
+      // appended. The local-tools server is also present because the existing
+      // server makes run_mcp_script/list_mcp_tools available.
       expect(forwarded.mcpServers[0].name).toBe("existing");
-      expect(forwarded.mcpServers[1].name).toBe("posthog_output");
-      expect(forwarded.mcpServers[1].command).toBe(process.execPath);
+      const outputServer = forwarded.mcpServers.find(
+        (s) => s.name === "posthog_output",
+      );
+      expect(outputServer).toBeDefined();
+      expect(outputServer?.command).toBe(process.execPath);
 
       // The schema is forwarded base64-encoded so codex-acp doesn't have
       // to escape it through a shell.
       const envEntry = (
-        forwarded.mcpServers[1].env as Array<{ name: string; value: string }>
+        outputServer?.env as Array<{ name: string; value: string }>
       ).find((e) => e.name === "POSTHOG_OUTPUT_SCHEMA");
       expect(envEntry).toBeDefined();
       const decoded = JSON.parse(
diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts
index a047f23dc..45b6eb188 100644
--- a/packages/agent/src/adapters/codex/codex-agent.ts
+++ b/packages/agent/src/adapters/codex/codex-agent.ts
@@ -71,6 +71,7 @@ import {
   emptyBaseline,
   estimateTokens,
 } from "../claude/context-breakdown";
+import { parseMcpServers } from "../claude/session/mcp-config";
 import { classifyAgentError } from "../error-classification";
 import {
   enabledLocalTools,
@@ -646,6 +647,10 @@ export class CodexAcpAgent extends BaseAcpAgent {
       token: resolveGithubToken(),
       taskId: resolveTaskId(meta),
       baseBranch: meta?.baseBranch,
+      // Reuse the ACP MCP servers so run_mcp_script can dial them (auth inherited).
+      scriptableMcpServers: parseMcpServers({
+        mcpServers: request.mcpServers ?? [],
+      }),
     };
     const tools = enabledLocalTools(ctx, meta);
     if (tools.length === 0) {
diff --git a/packages/agent/src/adapters/local-tools/index.ts b/packages/agent/src/adapters/local-tools/index.ts
index 1272e18a7..74bfd057f 100644
--- a/packages/agent/src/adapters/local-tools/index.ts
+++ b/packages/agent/src/adapters/local-tools/index.ts
@@ -1,3 +1,4 @@
+import { listMcpToolsTool, runMcpScriptTool } from "../../mcp-scripting/tools";
 import type { LocalTool, LocalToolCtx, LocalToolGateMeta } from "./registry";
 import { signedCommitTool } from "./tools/signed-commit";
 import { signedMergeTool } from "./tools/signed-merge";
@@ -17,6 +18,8 @@ export const LOCAL_TOOLS: LocalTool[] = [
   signedCommitTool,
   signedMergeTool,
   signedRewriteTool,
+  runMcpScriptTool,
+  listMcpToolsTool,
 ];
 
 /** Tools whose gate passes for the given context — the set to actually expose. */
diff --git a/packages/agent/src/adapters/local-tools/registry.ts b/packages/agent/src/adapters/local-tools/registry.ts
index 97e3da62b..a52a33481 100644
--- a/packages/agent/src/adapters/local-tools/registry.ts
+++ b/packages/agent/src/adapters/local-tools/registry.ts
@@ -1,3 +1,4 @@
+import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk";
 import type { z } from "zod";
 
 /**
@@ -20,6 +21,15 @@ export interface LocalToolCtx {
    * back to origin/HEAD detection when unset.
    */
   baseBranch?: string;
+  /**
+   * The session's external MCP servers, keyed by name — the same
+   * `McpServerConfig` map handed to the Claude SDK `query()`. The MCP-scripting
+   * tools (`run_mcp_script` / `list_mcp_tools`) open their own clients against
+   * these configs, inheriting auth (stdio `env`, http/sse `headers`). In-process
+   * `sdk` servers are skipped — they have no transport to dial. Absent or empty
+   * means scripting tools self-disable.
+   */
+  scriptableMcpServers?: Record<string, McpServerConfig>;
 }
 
 /** Minimal session-meta shape needed to gate tools (e.g. cloud-only). */

From f3a43fa2f129877244125a9dd00a6f56b10a2e20 Mon Sep 17 00:00:00 2001
From: Michael Matloka <michael@matloka.com>
Date: Fri, 19 Jun 2026 09:41:48 +0200
Subject: [PATCH 3/3] fix(agent): single timeout budget and JSDoc-safe tool
 descriptions

- runScript now enforces timeoutMs as one shared wall-clock deadline across
  the synchronous and async phases (previously up to 2x the budget)
- signature rendering neutralizes */ in tool descriptions so a description
  can't close the generated JSDoc block early
---
 .../src/mcp-scripting/mcp-scripting.test.ts   | 45 +++++++++++++++++++
 packages/agent/src/mcp-scripting/runner.ts    | 23 +++++++---
 .../agent/src/mcp-scripting/signatures.ts     |  4 +-
 3 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts
index 0ba017e78..7076045dd 100644
--- a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts
+++ b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts
@@ -184,6 +184,29 @@ describe("mcp-scripting", () => {
       expect(error).toMatch(/timed out/i);
     });
 
+    it("treats timeoutMs as one shared budget across sync and async phases", async () => {
+      const pool = fakePool({ servers: {} });
+      const tools = buildToolsProxy(pool, []);
+
+      // A brief synchronous spin followed by an async wait that would, on its
+      // own, fit inside timeoutMs — but combined must trip the single deadline.
+      const start = Date.now();
+      const { error } = await runScript({
+        tools,
+        timeoutMs: 200,
+        script: `
+          const until = Date.now() + 120;
+          while (Date.now() < until) {}
+          await new Promise((resolve) => setTimeout(resolve, 5000));
+        `,
+      });
+      const elapsed = Date.now() - start;
+
+      expect(error).toMatch(/timed out/i);
+      // Single budget: total stays near timeoutMs, never approaching 2×.
+      expect(elapsed).toBeLessThan(400);
+    });
+
     describe("sandbox isolation", () => {
       const pool = fakePool({ servers: {} });
       const tools = buildToolsProxy(pool, []);
@@ -297,5 +320,27 @@ describe("mcp-scripting", () => {
     it("reports the empty case", () => {
       expect(renderToolsetSignatures([])).toMatch(/No external MCP servers/);
     });
+
+    it("neutralizes `*/` in a description so the JSDoc block stays valid", () => {
+      const text = renderToolsetSignatures([
+        {
+          serverName: "math",
+          tools: [
+            {
+              name: "divide",
+              description: "Computes a*/b",
+              inputSchema: { type: "object", properties: {} },
+            },
+          ],
+        },
+      ]);
+
+      // The raw `*/` must not survive, or it would close the comment early.
+      const jsdocLine = text
+        .split("\n")
+        .find((l) => l.includes("/**") && l.includes("Computes"));
+      expect(jsdocLine).toBeDefined();
+      expect(jsdocLine).toBe("    /** Computes a* /b */");
+    });
   });
 });
diff --git a/packages/agent/src/mcp-scripting/runner.ts b/packages/agent/src/mcp-scripting/runner.ts
index d5e975971..67aa6b518 100644
--- a/packages/agent/src/mcp-scripting/runner.ts
+++ b/packages/agent/src/mcp-scripting/runner.ts
@@ -93,25 +93,38 @@ export async function runScript(
     return { result: undefined, logs, error: formatError(err) };
   }
 
+  // A single wall-clock deadline governs the whole run. The synchronous
+  // `runInContext` phase and the async tool-call phase draw from the same
+  // budget: the sync `timeout` is capped at the time left, and the async race
+  // keys off the same absolute deadline. Without this, the two phases would be
+  // independent and a sync-then-async script could run for nearly 2× timeoutMs.
+  const deadline = Date.now() + timeoutMs;
+
   const run = (async (): Promise<unknown> => {
+    const syncBudget = Math.max(deadline - Date.now(), 1);
     // `timeout` here guards synchronous spin; async work is bounded by the race.
-    const completion = script.runInContext(context, { timeout: timeoutMs });
+    const completion = script.runInContext(context, { timeout: syncBudget });
     return await completion;
   })();
 
   try {
-    const result = await withTimeout(run, timeoutMs);
+    const result = await withDeadline(run, deadline, timeoutMs);
     return { result: toJsonSafe(result), logs };
   } catch (err) {
     return { result: undefined, logs, error: formatError(err) };
   }
 }
 
-function withTimeout<T>(promise: Promise<T>, ms: number): Promise<T> {
+function withDeadline<T>(
+  promise: Promise<T>,
+  deadline: number,
+  budgetMs: number,
+): Promise<T> {
   return new Promise<T>((resolve, reject) => {
+    const remaining = Math.max(deadline - Date.now(), 0);
     const timer = setTimeout(() => {
-      reject(new Error(`Script timed out after ${ms}ms`));
-    }, ms);
+      reject(new Error(`Script timed out after ${budgetMs}ms`));
+    }, remaining);
     promise.then(
       (value) => {
         clearTimeout(timer);
diff --git a/packages/agent/src/mcp-scripting/signatures.ts b/packages/agent/src/mcp-scripting/signatures.ts
index 6f2278ec6..b9b3061b6 100644
--- a/packages/agent/src/mcp-scripting/signatures.ts
+++ b/packages/agent/src/mcp-scripting/signatures.ts
@@ -102,7 +102,9 @@ function propertyKey(name: string): string {
 }
 
 function oneLine(text: string): string {
-  return text.replace(/\s+/g, " ").trim();
+  // Collapse whitespace and neutralize `*/` so a tool description can't close
+  // the surrounding JSDoc block early and emit malformed TypeScript.
+  return text.replace(/\s+/g, " ").trim().replace(/\*\//g, "* /");
 }
 
 function isRecord(value: unknown): value is Record<string, unknown> {