From e9ca8d9409cc2d2de054bea129b3310ecc442e37 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 19 Jun 2026 03:48:51 +0200 Subject: [PATCH 1/3] feat(agent): add MCP-scripting core (tools proxy, vm runner, signatures) Lets the agent write one JS script that calls connected MCP tools as async functions instead of one tool-call at a time. Adds: - McpClientPool: opens MCP clients from the session's McpServerConfig map, inheriting auth (stdio env, http/sse headers) verbatim - buildToolsProxy: lazy tools..(args) proxy - runScript: constrained node:vm sandbox with wall-clock timeout, captured console, and no ambient fs/net/process authority - renderToolsetSignatures: JSON Schema to TS-style signatures --- packages/agent/src/mcp-scripting/README.md | 111 +++++++ .../client-pool.integration.test.ts | 121 +++++++ .../agent/src/mcp-scripting/client-pool.ts | 220 +++++++++++++ .../fixtures/echo-mcp-server.mjs | 28 ++ packages/agent/src/mcp-scripting/index.ts | 9 + .../src/mcp-scripting/mcp-scripting.test.ts | 301 ++++++++++++++++++ packages/agent/src/mcp-scripting/proxy.ts | 104 ++++++ packages/agent/src/mcp-scripting/runner.ts | 178 +++++++++++ .../agent/src/mcp-scripting/signatures.ts | 110 +++++++ packages/agent/src/mcp-scripting/tools.ts | 134 ++++++++ 10 files changed, 1316 insertions(+) create mode 100644 packages/agent/src/mcp-scripting/README.md create mode 100644 packages/agent/src/mcp-scripting/client-pool.integration.test.ts create mode 100644 packages/agent/src/mcp-scripting/client-pool.ts create mode 100644 packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs create mode 100644 packages/agent/src/mcp-scripting/index.ts create mode 100644 packages/agent/src/mcp-scripting/mcp-scripting.test.ts create mode 100644 packages/agent/src/mcp-scripting/proxy.ts create mode 100644 packages/agent/src/mcp-scripting/runner.ts create mode 100644 packages/agent/src/mcp-scripting/signatures.ts create mode 100644 packages/agent/src/mcp-scripting/tools.ts diff --git a/packages/agent/src/mcp-scripting/README.md b/packages/agent/src/mcp-scripting/README.md new file mode 100644 index 000000000..597f41997 --- /dev/null +++ b/packages/agent/src/mcp-scripting/README.md @@ -0,0 +1,111 @@ +# MCP tools as scripts + +Lets the agent write **one JavaScript script** that calls the connected MCP +tools as ordinary async functions, instead of orchestrating them one tool-call +at a time. The classic pain it removes: a server that needs 100 sequential calls +(list, then act per item) becomes a single script with a loop. + +```js +const issues = await tools.linear.listIssues({ teamId }) +const stale = issues.filter((i) => i.status === "backlog") +for (const i of stale) { + await tools.linear.createComment({ issueId: i.id, body: "bump" }) +} +return { bumped: stale.length } +``` + +Exposed to the model as two local tools (registered in +`../adapters/local-tools/index.ts`): + +- **`list_mcp_tools`** — returns `.d.ts`-style signatures for every + `tools..(args)` call available, generated from each tool's MCP + input schema. Call it first to discover what to call. +- **`run_mcp_script`** — takes `{ script, timeoutMs? }`, runs the script with + `tools` injected, returns `{ result, logs, error? }`. + +## Pieces + +| File | Responsibility | +| --- | --- | +| `client-pool.ts` | Opens/caches one MCP `Client` per server from the session's `McpServerConfig` map; `listTools` / `callTool`. | +| `proxy.ts` | Builds the lazy `tools..(args)` proxy that forwards to the pool. | +| `runner.ts` | Runs the script in a constrained `node:vm` context with a wall-clock timeout. | +| `signatures.ts` | Renders connected tools as TypeScript-style signatures. | +| `tools.ts` | The `run_mcp_script` / `list_mcp_tools` local-tool definitions. | + +## Credential flow — no new auth path + +The proxy dials the **exact same MCP server configs** the agent's own MCP tools +use, so authentication is inherited verbatim. The chain: + +1. The ACP client sends MCP servers in the `newSession` params. `parseMcpServers` + (`../adapters/claude/session/mcp-config.ts`) turns them into a + `Record` — **stdio** entries carry `env`, **http/sse** + entries carry `headers`. This map is the single credential source. +2. Both adapters snapshot that map into `LocalToolCtx.scriptableMcpServers`: + `claude-agent.ts` passes it *before* the in-process local-tools server is mixed + in (so scripts never try to dial an in-process `sdk` server — those have no + transport), and `codex-agent.ts` derives it from the same ACP `mcpServers` via + `parseMcpServers`. The scripting tools self-disable when no external servers + are present. +3. On a `run_mcp_script` / `list_mcp_tools` call, `McpClientPool` reads a config + and constructs the matching MCP SDK transport: + - `stdio` → `StdioClientTransport` with `command`/`args`/`env` (the session env + is inherited too, so stdio servers keep ambient credentials). + - `http` → `StreamableHTTPClientTransport` with `requestInit.headers`. + - `sse` → `SSEClientTransport` with `requestInit.headers`. + +There is no separate token store, no re-auth, and nothing the model can set: a +script can only reach servers the session was already authorized for, with the +same credentials those tools already had. + +## Sandbox model + +`runner.ts` executes the script in a `node:vm` context whose globals are an +explicit allowlist: + +- **Granted:** `tools`, a captured `console`, and pure stateless helpers + (`JSON`, `Math`, `Date`, `Array`/`Object`/`Map`/`Set`/…, `structuredClone`, + `TextEncoder`/`TextDecoder`, `URL`/`URLSearchParams`, `setTimeout`/`clearTimeout`). +- **Denied:** `require`, `import`, `process`, `global`/`globalThis` ambient + authority, `Buffer`, `fetch`, filesystem — so the **only** way out is `tools.*`. +- **No dynamic code:** the context is created with + `codeGeneration: { strings: false, wasm: false }`, so `new Function(...)` / + `eval` throw — closing the most common `vm` escape via the `Function` + constructor. +- **Wall-clock timeout:** default 30s, capped at 120s. `node:vm` can't interrupt a + pending Promise (e.g. a hung tool call), so the timeout *races* script + completion to bound total time; the per-server MCP tool timeout still applies to + each individual call. + +`node:vm` is **not** a hard security boundary against hostile code sharing the +process — but here the script author is the same agent that already calls these +tools directly. The goal is to **remove ambient authority** (fs/net/env) and +funnel every side effect through the audited `tools` path, not to contain an +adversary. Cloud runs additionally execute the whole agent inside a sandbox, +which is the real isolation layer. + +## Adopt vs build + +Researched the "code mode for MCP" ecosystem (Cloudflare *Code Mode*, +`@utcp/code-mode` / `code-mode-mcp`, `mcpac`). They all run as a **separate MCP +server or standalone process** that connects to MCP clients via its own config +(or target Cloudflare `workerd` isolates), and several add a second abstraction +(UTCP) on top of MCP. None reuse an existing in-process `McpServerConfig` map +with already-resolved credentials — which is the entire integration we need. + +Adopting one would mean standing up another process, re-plumbing auth into it, +and taking a heavier dependency (some MPL-2.0) for what is a ~5-file thin layer +over the MCP SDK `Client` we already depend on. **Decision: build.** The layer is +small, has no new runtime dependencies (only `@modelcontextprotocol/sdk` and +`zod`, both already present), and inherits auth for free. + +## Tests + +`mcp-scripting.test.ts` covers proxy generation, a script calling a tool, +looping/batching, timeout enforcement, error surfacing, signature rendering, and +sandbox-escape attempts (`require`/`process`/`global`/`Buffer`/`fetch`/`new +Function` denied). `client-pool.integration.test.ts` spins up a real stdio MCP +server (`fixtures/echo-mcp-server.mjs`) and drives it end-to-end through a +script, including asserting that stdio `env` reaches the server (the credential +path). diff --git a/packages/agent/src/mcp-scripting/client-pool.integration.test.ts b/packages/agent/src/mcp-scripting/client-pool.integration.test.ts new file mode 100644 index 000000000..4c30bcbdf --- /dev/null +++ b/packages/agent/src/mcp-scripting/client-pool.integration.test.ts @@ -0,0 +1,121 @@ +import { fileURLToPath } from "node:url"; +import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk"; +import { afterEach, describe, expect, it } from "vitest"; +import { McpClientPool } from "./client-pool"; +import { buildToolsProxy } from "./proxy"; +import { runScript } from "./runner"; +import { listMcpToolsTool, runMcpScriptTool } from "./tools"; + +const ECHO_SERVER = fileURLToPath( + new URL("./fixtures/echo-mcp-server.mjs", import.meta.url), +); + +describe("McpClientPool (real stdio MCP server)", () => { + let pool: McpClientPool | undefined; + + afterEach(async () => { + await pool?.close(); + pool = undefined; + }); + + it("lists tools and calls them over a real stdio transport", async () => { + pool = new McpClientPool({ + echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] }, + }); + + const tools = await pool.listTools("echo"); + expect(tools.map((t) => t.name).sort()).toEqual(["add", "whoami"]); + + const result = await pool.callTool("echo", "add", { a: 2, b: 3 }); + expect(result.isError).toBe(false); + expect(result.value).toEqual({ sum: 5 }); + }); + + it("inherits stdio env as the credential path", async () => { + pool = new McpClientPool({ + echo: { + type: "stdio", + command: process.execPath, + args: [ECHO_SERVER], + env: { ECHO_SECRET: "s3cr3t-token" }, + }, + }); + + const result = await pool.callTool("echo", "whoami", {}); + expect(result.value).toBe("s3cr3t-token"); + }); + + it("drives the real server end-to-end through a script", async () => { + pool = new McpClientPool({ + echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] }, + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const { result, error } = await runScript({ + tools, + script: ` + let total = 0 + for (let i = 1; i <= 3; i++) { + const r = await tools.echo.add({ a: total, b: i }) + total = r.sum + } + return total + `, + }); + + expect(error).toBeUndefined(); + expect(result).toBe(6); + }, 15_000); + + it("excludes in-process sdk servers from serverNames", () => { + pool = new McpClientPool({ + echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] }, + // sdk servers have no dialable transport; cast to satisfy the union. + inproc: { type: "sdk", name: "inproc" } as never, + }); + expect(pool.serverNames()).toEqual(["echo"]); + }); +}); + +describe("scripting local tools (real stdio MCP server)", () => { + const echoConfig: Record = { + echo: { type: "stdio", command: process.execPath, args: [ECHO_SERVER] }, + }; + + it("run_mcp_script gates on having scriptable servers", () => { + expect(runMcpScriptTool.isEnabled({ cwd: "/r" }, undefined)).toBe(false); + expect( + runMcpScriptTool.isEnabled( + { cwd: "/r", scriptableMcpServers: echoConfig }, + undefined, + ), + ).toBe(true); + }); + + it("list_mcp_tools renders real signatures and notes unreachable servers", async () => { + const result = await listMcpToolsTool.handler( + { + cwd: "/r", + scriptableMcpServers: { + ...echoConfig, + broken: { type: "stdio", command: "definitely-not-a-real-binary" }, + }, + }, + {}, + ); + const text = result.content[0].text; + expect(text).toContain("echo"); + expect(text).toContain("add(args: {"); + expect(text).toContain("Unreachable servers"); + expect(text).toContain("broken"); + }, 15_000); + + it("run_mcp_script executes against the real server end-to-end", async () => { + const result = await runMcpScriptTool.handler( + { cwd: "/r", scriptableMcpServers: echoConfig }, + { script: "return (await tools.echo.add({ a: 40, b: 2 })).sum" }, + ); + expect(result.isError).toBeUndefined(); + expect(result.content[0].text).toContain('"result": 42'); + }, 15_000); +}); diff --git a/packages/agent/src/mcp-scripting/client-pool.ts b/packages/agent/src/mcp-scripting/client-pool.ts new file mode 100644 index 000000000..79eb1698a --- /dev/null +++ b/packages/agent/src/mcp-scripting/client-pool.ts @@ -0,0 +1,220 @@ +import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { SSEClientTransport } from "@modelcontextprotocol/sdk/client/sse.js"; +import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; +import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; +import type { Transport } from "@modelcontextprotocol/sdk/shared/transport.js"; +import type { Logger } from "../utils/logger"; + +/** A tool as advertised by a connected MCP server. */ +export interface McpToolDescriptor { + name: string; + description?: string; + /** The tool's JSON Schema for arguments (MCP `inputSchema`). */ + inputSchema?: Record; +} + +/** Result of a single MCP tool call, normalized for scripts. */ +export interface McpCallResult { + /** Structured payload when the server returns `structuredContent`, else the + * text blocks joined (JSON-parsed when they look like JSON). */ + value: unknown; + /** Raw `content` blocks the server returned. */ + content: unknown[]; + isError: boolean; +} + +/** + * Opens and caches one MCP `Client` per configured server, reusing the + * session's `McpServerConfig` map so authentication is inherited verbatim: + * stdio servers carry credentials in `env`, http/sse servers in `headers`. + * There is no separate auth path — a script call dials the exact transport the + * agent's own MCP tools use. + * + * Connections are established lazily on first use of a server and torn down by + * {@link close}. A pool is meant to live for the duration of one script run. + */ +export class McpClientPool { + private readonly configs: Record; + private readonly logger?: Logger; + private readonly clients = new Map>(); + + constructor(configs: Record, logger?: Logger) { + this.configs = configs; + this.logger = logger; + } + + /** Server names this pool can dial (in-process `sdk` servers excluded). */ + serverNames(): string[] { + return scriptableServerNames(this.configs); + } + + /** Lists the tools a server advertises. Connects on first use. */ + async listTools(serverName: string): Promise { + const client = await this.getClient(serverName); + const { tools } = await client.listTools(); + return tools.map((t) => ({ + name: t.name, + description: t.description, + inputSchema: t.inputSchema as Record | undefined, + })); + } + + /** Calls a tool on a server, returning a normalized result. */ + async callTool( + serverName: string, + toolName: string, + args: Record, + ): Promise { + const client = await this.getClient(serverName); + const raw = await client.callTool({ name: toolName, arguments: args }); + const content = Array.isArray(raw.content) ? raw.content : []; + return { + value: extractValue(raw.structuredContent, content), + content, + isError: raw.isError === true, + }; + } + + /** Disconnects every open client. Safe to call more than once. */ + async close(): Promise { + const pending = [...this.clients.values()]; + this.clients.clear(); + await Promise.allSettled( + pending.map(async (p) => { + try { + const client = await p; + await client.close(); + } catch (err) { + this.logger?.debug("Error closing MCP client", { + error: err instanceof Error ? err.message : String(err), + }); + } + }), + ); + } + + private getClient(serverName: string): Promise { + const existing = this.clients.get(serverName); + if (existing) { + return existing; + } + const connecting = this.connect(serverName); + this.clients.set(serverName, connecting); + // Don't cache a rejected connection — let the next call retry. + connecting.catch(() => this.clients.delete(serverName)); + return connecting; + } + + private async connect(serverName: string): Promise { + const config = this.configs[serverName]; + if (!config) { + throw new Error(`Unknown MCP server: ${serverName}`); + } + const transport = this.createTransport(serverName, config); + const client = new Client({ + name: "posthog-mcp-scripting", + version: "1.0.0", + }); + await client.connect(transport); + return client; + } + + private createTransport( + serverName: string, + config: McpServerConfig, + ): Transport { + const type = transportableType(config); + if (type === "stdio") { + const stdio = config as { + command: string; + args?: string[]; + env?: Record; + }; + return new StdioClientTransport({ + command: stdio.command, + args: stdio.args, + // Inherit the session env so stdio servers keep their credentials. + env: { ...filterUndefined(process.env), ...(stdio.env ?? {}) }, + }); + } + if (type === "http" || type === "sse") { + const remote = config as { + url: string; + headers?: Record; + }; + const url = new URL(remote.url); + const opts = remote.headers + ? { requestInit: { headers: remote.headers } } + : undefined; + return type === "http" + ? new StreamableHTTPClientTransport(url, opts) + : new SSEClientTransport(url, opts); + } + throw new Error( + `MCP server "${serverName}" is in-process (sdk) and cannot be scripted`, + ); + } +} + +/** The dialable transport for a config, or `undefined` for in-process `sdk`. */ +function transportableType( + config: McpServerConfig, +): "stdio" | "http" | "sse" | undefined { + if (!("type" in config) || config.type === "stdio") { + return "stdio"; + } + if (config.type === "http") { + return "http"; + } + if (config.type === "sse") { + return "sse"; + } + return undefined; // sdk (in-process) — no dialable transport +} + +/** + * Names of servers a script can dial — every config except in-process `sdk` + * ones. Lets the scripting tools gate themselves without opening a pool. + */ +export function scriptableServerNames( + configs: Record, +): string[] { + return Object.entries(configs) + .filter(([, cfg]) => transportableType(cfg) !== undefined) + .map(([name]) => name); +} + +function filterUndefined(env: NodeJS.ProcessEnv): Record { + const out: Record = {}; + for (const [k, v] of Object.entries(env)) { + if (v !== undefined) { + out[k] = v; + } + } + return out; +} + +function extractValue(structuredContent: unknown, content: unknown[]): unknown { + if (structuredContent !== undefined) { + return structuredContent; + } + const texts = content + .filter( + (c): c is { type: "text"; text: string } => + typeof c === "object" && + c !== null && + (c as { type?: unknown }).type === "text" && + typeof (c as { text?: unknown }).text === "string", + ) + .map((c) => c.text); + if (texts.length === 0) { + return content; + } + const joined = texts.join("\n"); + try { + return JSON.parse(joined); + } catch { + return joined; + } +} diff --git a/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs b/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs new file mode 100644 index 000000000..3f2216ff3 --- /dev/null +++ b/packages/agent/src/mcp-scripting/fixtures/echo-mcp-server.mjs @@ -0,0 +1,28 @@ +// A minimal stdio MCP server used by the McpClientPool integration test. It +// exposes two tools and echoes an env var back, so the test can assert that +// stdio `env` (the credential path) reaches the server process. +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; + +const server = new McpServer({ name: "echo", version: "1.0.0" }); + +server.tool( + "add", + "Add two numbers", + { a: z.number(), b: z.number() }, + async ({ a, b }) => ({ + content: [{ type: "text", text: JSON.stringify({ sum: a + b }) }], + }), +); + +server.tool( + "whoami", + "Return the ECHO_SECRET env var the server was launched with", + {}, + async () => ({ + content: [{ type: "text", text: process.env.ECHO_SECRET ?? "(unset)" }], + }), +); + +await server.connect(new StdioServerTransport()); diff --git a/packages/agent/src/mcp-scripting/index.ts b/packages/agent/src/mcp-scripting/index.ts new file mode 100644 index 000000000..acc51e17f --- /dev/null +++ b/packages/agent/src/mcp-scripting/index.ts @@ -0,0 +1,9 @@ +export type { McpCallResult, McpToolDescriptor } from "./client-pool"; +export { McpClientPool, scriptableServerNames } from "./client-pool"; +export type { ToolsProxy } from "./proxy"; +export { buildToolsProxy } from "./proxy"; +export type { RunScriptOptions, RunScriptResult } from "./runner"; +export { runScript } from "./runner"; +export type { ServerToolset } from "./signatures"; +export { renderToolsetSignatures } from "./signatures"; +export { listMcpToolsTool, runMcpScriptTool } from "./tools"; diff --git a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts new file mode 100644 index 000000000..0ba017e78 --- /dev/null +++ b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts @@ -0,0 +1,301 @@ +import { describe, expect, it } from "vitest"; +import type { McpClientPool, McpToolDescriptor } from "./client-pool"; +import { buildToolsProxy } from "./proxy"; +import { runScript } from "./runner"; +import { renderToolsetSignatures } from "./signatures"; + +/** + * A fake pool standing in for {@link McpClientPool}: it records calls and serves + * canned results, so the proxy/runner can be exercised without a real MCP + * server. Only the methods the proxy uses are implemented. + */ +function fakePool(opts: { + servers: Record; + call?: ( + server: string, + tool: string, + args: Record, + ) => unknown; +}): McpClientPool & { calls: Array<[string, string, unknown]> } { + const calls: Array<[string, string, unknown]> = []; + const pool = { + calls, + serverNames: () => Object.keys(opts.servers), + listTools: async (server: string) => opts.servers[server] ?? [], + callTool: async ( + server: string, + tool: string, + args: Record, + ) => { + calls.push([server, tool, args]); + const value = opts.call ? opts.call(server, tool, args) : null; + const isError = + typeof value === "object" && + value !== null && + (value as { __error?: boolean }).__error === true; + return { value, content: [], isError }; + }, + close: async () => {}, + }; + return pool as unknown as McpClientPool & { + calls: Array<[string, string, unknown]>; + }; +} + +describe("mcp-scripting", () => { + describe("buildToolsProxy", () => { + it("forwards tools..(args) to the pool and returns the value", async () => { + const pool = fakePool({ + servers: { linear: [] }, + call: (_s, tool, args) => + tool === "createIssue" ? { id: "ISS-1", ...args } : null, + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const result = await tools.linear.createIssue({ title: "Bug" }); + + expect(result).toEqual({ id: "ISS-1", title: "Bug" }); + expect(pool.calls).toEqual([["linear", "createIssue", { title: "Bug" }]]); + }); + + it("defaults args to {} when called without arguments", async () => { + const pool = fakePool({ servers: { linear: [] }, call: () => "ok" }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + await tools.linear.listIssues(); + + expect(pool.calls).toEqual([["linear", "listIssues", {}]]); + }); + + it("returns undefined for unknown servers", () => { + const pool = fakePool({ servers: { linear: [] } }); + const tools = buildToolsProxy(pool, pool.serverNames()); + expect((tools as Record).github).toBeUndefined(); + expect(Object.keys(tools)).toEqual(["linear"]); + }); + + it("rejects when a tool reports isError so scripts can try/catch", async () => { + const pool = fakePool({ + servers: { linear: [] }, + call: () => ({ __error: true, message: "boom" }), + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + await expect(tools.linear.failing({})).rejects.toThrow(/boom/); + }); + }); + + describe("runScript", () => { + it("runs a script, returns its value, and captures console output", async () => { + const pool = fakePool({ + servers: { linear: [] }, + call: (_s, _t, args) => (args as { n: number }).n * 2, + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const { result, logs, error } = await runScript({ + tools, + script: ` + console.log("starting") + const doubled = await tools.linear.double({ n: 21 }) + return { doubled } + `, + }); + + expect(error).toBeUndefined(); + expect(result).toEqual({ doubled: 42 }); + expect(logs).toContain("starting"); + }); + + it("supports looping and batching over results", async () => { + const pool = fakePool({ + servers: { linear: [] }, + call: (_s, tool, args) => { + if (tool === "listIssues") { + return [ + { id: "A", done: false }, + { id: "B", done: true }, + { id: "C", done: false }, + ]; + } + return { closed: (args as { id: string }).id }; + }, + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const { result, error } = await runScript({ + tools, + script: ` + const issues = await tools.linear.listIssues({}) + const open = issues.filter((i) => !i.done) + const closed = [] + for (const i of open) { + const r = await tools.linear.closeIssue({ id: i.id }) + closed.push(r.closed) + } + return closed + `, + }); + + expect(error).toBeUndefined(); + expect(result).toEqual(["A", "C"]); + // 1 list + 2 closes + expect(pool.calls).toHaveLength(3); + }); + + it("surfaces script errors as a message, not a throw", async () => { + const pool = fakePool({ servers: { linear: [] } }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const { result, error } = await runScript({ + tools, + script: `throw new Error("explicit failure")`, + }); + + expect(result).toBeUndefined(); + expect(error).toMatch(/explicit failure/); + }); + + it("surfaces a tool error thrown inside the script", async () => { + const pool = fakePool({ + servers: { linear: [] }, + call: () => ({ __error: true, message: "rate limited" }), + }); + const tools = buildToolsProxy(pool, pool.serverNames()); + + const { error } = await runScript({ + tools, + script: `await tools.linear.create({})`, + }); + + expect(error).toMatch(/rate limited/); + }); + + it("enforces a wall-clock timeout", async () => { + const pool = fakePool({ servers: {} }); + const tools = buildToolsProxy(pool, []); + + const { error } = await runScript({ + tools, + timeoutMs: 50, + script: `await new Promise((resolve) => setTimeout(resolve, 5000))`, + }); + + expect(error).toMatch(/timed out/i); + }); + + describe("sandbox isolation", () => { + const pool = fakePool({ servers: {} }); + const tools = buildToolsProxy(pool, []); + + it.each([ + ["require", `return typeof require`], + ["process", `return typeof process`], + ["global", `return typeof global`], + ["globalThis.process", `return typeof globalThis.process`], + ["Buffer", `return typeof Buffer`], + ["fetch", `return typeof fetch`], + ])("denies access to %s", async (_name, script) => { + const { result, error } = await runScript({ tools, script }); + // Either the symbol is absent (typeof "undefined") or referencing it throws. + if (error) { + expect(error).toMatch(/is not defined|undefined/i); + } else { + expect(result).toBe("undefined"); + } + }); + + it("blocks dynamic code generation (new Function)", async () => { + const { error } = await runScript({ + tools, + script: `return new Function("return 1")()`, + }); + expect(error).toBeTruthy(); + }); + + it("blocks process access via constructor escape attempt", async () => { + const { result, error } = await runScript({ + tools, + script: ` + try { + return (function(){}).constructor("return process")() + } catch (e) { + return "blocked: " + e.message + } + `, + }); + // codeGeneration.strings:false makes the Function constructor throw. + if (!error) { + expect(String(result)).toMatch(/blocked/); + } else { + expect(error).toBeTruthy(); + } + }); + }); + }); + + describe("renderToolsetSignatures", () => { + it("renders tools..(args) signatures from JSON schemas", () => { + const text = renderToolsetSignatures([ + { + serverName: "linear", + tools: [ + { + name: "createIssue", + description: "Create an issue", + inputSchema: { + type: "object", + properties: { + title: { type: "string" }, + teamId: { type: "string" }, + priority: { type: "number" }, + }, + required: ["title", "teamId"], + }, + }, + ], + }, + ]); + + expect(text).toContain("linear"); + expect(text).toContain("createIssue(args: {"); + expect(text).toContain("title: string"); + expect(text).toContain("teamId: string"); + expect(text).toContain("priority?: number"); + expect(text).toContain("Create an issue"); + }); + + it("handles enums, arrays, and empty schemas", () => { + const text = renderToolsetSignatures([ + { + serverName: "x", + tools: [ + { + name: "noArgs", + inputSchema: { type: "object", properties: {} }, + }, + { + name: "withEnum", + inputSchema: { + type: "object", + properties: { + status: { enum: ["open", "closed"] }, + tags: { type: "array", items: { type: "string" } }, + }, + required: ["status"], + }, + }, + ], + }, + ]); + + expect(text).toContain("noArgs()"); + expect(text).toContain(`status: "open" | "closed"`); + expect(text).toContain("tags?: string[]"); + }); + + it("reports the empty case", () => { + expect(renderToolsetSignatures([])).toMatch(/No external MCP servers/); + }); + }); +}); diff --git a/packages/agent/src/mcp-scripting/proxy.ts b/packages/agent/src/mcp-scripting/proxy.ts new file mode 100644 index 000000000..0e0fdeb94 --- /dev/null +++ b/packages/agent/src/mcp-scripting/proxy.ts @@ -0,0 +1,104 @@ +import type { McpClientPool } from "./client-pool"; + +/** The `tools` object injected into a script: `tools..(args)`. */ +export type ToolsProxy = Record< + string, + Record) => Promise> +>; + +/** + * Builds the `tools` proxy a script sees. Each `tools..(args)` + * forwards to the live MCP client via the pool and resolves to the call's + * parsed value (`structuredContent` when present, else parsed text). A tool + * that returns `isError` rejects, so scripts can use ordinary try/catch. + * + * Access is lazy and name-driven: we don't pre-enumerate tools, so a script can + * call any tool the server actually exposes. Unknown servers surface as + * `undefined`, matching plain object access (`tools.nope` is `undefined`). + */ +export function buildToolsProxy( + pool: McpClientPool, + serverNames: readonly string[], +): ToolsProxy { + const known = new Set(serverNames); + const serverCache = new Map< + string, + Record) => Promise> + >(); + + return new Proxy({} as ToolsProxy, { + get(_target, prop): unknown { + if (typeof prop !== "string" || !known.has(prop)) { + return undefined; + } + const cached = serverCache.get(prop); + if (cached) { + return cached; + } + const serverProxy = buildServerProxy(pool, prop); + serverCache.set(prop, serverProxy); + return serverProxy; + }, + has(_target, prop): boolean { + return typeof prop === "string" && known.has(prop); + }, + ownKeys(): string[] { + return [...known]; + }, + getOwnPropertyDescriptor(_target, prop): PropertyDescriptor | undefined { + if (typeof prop === "string" && known.has(prop)) { + return { enumerable: true, configurable: true }; + } + return undefined; + }, + }); +} + +function buildServerProxy( + pool: McpClientPool, + serverName: string, +): Record) => Promise> { + const toolCache = new Map< + string, + (args?: Record) => Promise + >(); + + return new Proxy( + {} as Record) => Promise>, + { + get(_target, prop): unknown { + if (typeof prop !== "string") { + return undefined; + } + const cached = toolCache.get(prop); + if (cached) { + return cached; + } + const fn = async ( + args: Record = {}, + ): Promise => { + const result = await pool.callTool(serverName, prop, args); + if (result.isError) { + throw new Error( + `tools.${serverName}.${prop} failed: ${stringifyError(result.value)}`, + ); + } + return result.value; + }; + toolCache.set(prop, fn); + return fn; + }, + }, + ); +} + +function stringifyError(value: unknown): string { + if (typeof value === "string") { + return value; + } + try { + return JSON.stringify(value); + } catch { + return String(value); + } +} diff --git a/packages/agent/src/mcp-scripting/runner.ts b/packages/agent/src/mcp-scripting/runner.ts new file mode 100644 index 000000000..d5e975971 --- /dev/null +++ b/packages/agent/src/mcp-scripting/runner.ts @@ -0,0 +1,178 @@ +import * as vm from "node:vm"; +import type { ToolsProxy } from "./proxy"; + +export interface RunScriptOptions { + script: string; + tools: ToolsProxy; + /** Wall-clock budget for the whole script. Default 30s, capped at 120s. */ + timeoutMs?: number; +} + +export interface RunScriptResult { + /** The script's returned/last-evaluated value, JSON-safe. */ + result: unknown; + /** Lines captured from `console.*` during the run. */ + logs: string[]; + /** Present only when the script threw or timed out. */ + error?: string; +} + +const DEFAULT_TIMEOUT_MS = 30_000; +const MAX_TIMEOUT_MS = 120_000; + +/** + * Runs agent-authored JavaScript in a constrained `node:vm` context with the + * `tools` proxy injected. The sandbox boundary: + * + * - Globals are an explicit allowlist (`tools`, captured `console`, timers, + * JSON, Math, Date, encoders, structured-data constructors). There is no + * `require`, `import`, `process`, `global`, `Buffer`, `fetch`, or filesystem — + * so a script reaches the outside world ONLY through `tools.*`. + * - A wall-clock timeout aborts a runaway script. `node:vm` cannot interrupt a + * pending Promise (e.g. a never-resolving tool call), so the timeout races the + * script's completion; it bounds total time even if async work is still + * in flight. + * + * `node:vm` is not a security sandbox against a determined attacker sharing the + * process (prototype-chain escapes exist), but here the script author is the + * same agent that already runs tools directly — the goal is to remove ambient + * authority (fs/net/env) and force all side effects through the audited `tools` + * path, not to contain hostile code. For stronger isolation, run the agent + * itself in its sandbox (which cloud runs already do). + */ +export async function runScript( + options: RunScriptOptions, +): Promise { + const timeoutMs = Math.min( + Math.max(options.timeoutMs ?? DEFAULT_TIMEOUT_MS, 1), + MAX_TIMEOUT_MS, + ); + const logs: string[] = []; + const sandboxConsole = makeCapturingConsole(logs); + + const context = vm.createContext( + Object.assign(Object.create(null), { + tools: options.tools, + console: sandboxConsole, + // Pure, stateless helpers — no ambient authority granted by these. + JSON, + Math, + Date, + Promise, + Array, + Object, + String, + Number, + Boolean, + Map, + Set, + RegExp, + Error, + TypeError, + RangeError, + Symbol, + BigInt, + structuredClone, + TextEncoder, + TextDecoder, + URL, + URLSearchParams, + setTimeout, + clearTimeout, + }), + { name: "mcp-script", codeGeneration: { strings: false, wasm: false } }, + ); + + // Wrap as an async IIFE so the script may use top-level await and `return`. + const wrapped = `(async () => {\n${options.script}\n})()`; + + let script: vm.Script; + try { + script = new vm.Script(wrapped, { filename: "mcp-script.js" }); + } catch (err) { + return { result: undefined, logs, error: formatError(err) }; + } + + const run = (async (): Promise => { + // `timeout` here guards synchronous spin; async work is bounded by the race. + const completion = script.runInContext(context, { timeout: timeoutMs }); + return await completion; + })(); + + try { + const result = await withTimeout(run, timeoutMs); + return { result: toJsonSafe(result), logs }; + } catch (err) { + return { result: undefined, logs, error: formatError(err) }; + } +} + +function withTimeout(promise: Promise, ms: number): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + reject(new Error(`Script timed out after ${ms}ms`)); + }, ms); + promise.then( + (value) => { + clearTimeout(timer); + resolve(value); + }, + (err) => { + clearTimeout(timer); + reject(err); + }, + ); + }); +} + +function makeCapturingConsole(logs: string[]): Console { + const record = (...args: unknown[]): void => { + logs.push(args.map(formatLogArg).join(" ")); + }; + // Only log-shaped methods are wired; everything else is a no-op so a script + // calling e.g. console.table doesn't throw. + return new Proxy({} as Console, { + get(_target, prop): unknown { + if ( + prop === "log" || + prop === "info" || + prop === "warn" || + prop === "error" || + prop === "debug" + ) { + return record; + } + return () => {}; + }, + }); +} + +function formatLogArg(arg: unknown): string { + if (typeof arg === "string") { + return arg; + } + try { + return JSON.stringify(arg); + } catch { + return String(arg); + } +} + +/** Ensures the returned value survives the JSON round-trip the tool result uses. */ +function toJsonSafe(value: unknown): unknown { + if (value === undefined) { + return undefined; + } + try { + return JSON.parse(JSON.stringify(value)); + } catch { + return String(value); + } +} + +function formatError(err: unknown): string { + if (err instanceof Error) { + return err.message; + } + return String(err); +} diff --git a/packages/agent/src/mcp-scripting/signatures.ts b/packages/agent/src/mcp-scripting/signatures.ts new file mode 100644 index 000000000..6f2278ec6 --- /dev/null +++ b/packages/agent/src/mcp-scripting/signatures.ts @@ -0,0 +1,110 @@ +import type { McpToolDescriptor } from "./client-pool"; + +/** A server and the tools it exposes, ready to render as signatures. */ +export interface ServerToolset { + serverName: string; + tools: McpToolDescriptor[]; +} + +/** + * Renders the connected MCP toolset as a `.d.ts`-style hint so the model can + * see exactly what `tools..(args)` calls are available and what + * each argument is. Every call returns a Promise; the doc says so once at the + * top rather than repeating `Promise<...>` on every line. + */ +export function renderToolsetSignatures(toolsets: ServerToolset[]): string { + if (toolsets.length === 0) { + return "// No external MCP servers are connected, so `tools` is empty."; + } + const blocks = toolsets.map(renderServerBlock); + return [ + "// Each method is async — `await tools..(args)`.", + "// Args are validated against the server's schema before the call runs.", + "declare const tools: {", + ...blocks, + "}", + ].join("\n"); +} + +function renderServerBlock(toolset: ServerToolset): string { + const member = propertyKey(toolset.serverName); + if (toolset.tools.length === 0) { + return ` ${member}: {} // no tools advertised`; + } + const lines = toolset.tools.map((tool) => renderToolSignature(tool)); + return [` ${member}: {`, ...lines, " }"].join("\n"); +} + +function renderToolSignature(tool: McpToolDescriptor): string { + const params = renderParams(tool.inputSchema); + const doc = tool.description + ? ` /** ${oneLine(tool.description)} */\n` + : ""; + return `${doc} ${propertyKey(tool.name)}(${params}): unknown`; +} + +function renderParams(schema: McpToolDescriptor["inputSchema"]): string { + const properties = isRecord(schema?.properties) + ? schema.properties + : undefined; + if (!properties || Object.keys(properties).length === 0) { + return ""; + } + const required = new Set( + Array.isArray(schema?.required) + ? (schema.required as unknown[]).filter( + (r): r is string => typeof r === "string", + ) + : [], + ); + const fields = Object.entries(properties).map(([name, raw]) => { + const optional = required.has(name) ? "" : "?"; + return `${propertyKey(name)}${optional}: ${jsonSchemaToTs(raw)}`; + }); + return `args: { ${fields.join("; ")} }`; +} + +/** Best-effort JSON-Schema → TS type for a single field, kept shallow. */ +function jsonSchemaToTs(raw: unknown): string { + if (!isRecord(raw)) { + return "unknown"; + } + if (Array.isArray(raw.enum) && raw.enum.length > 0) { + return raw.enum.map((v) => JSON.stringify(v)).join(" | "); + } + const type = raw.type; + if (type === "array") { + return `${jsonSchemaToTs(raw.items)}[]`; + } + if (type === "object" || isRecord(raw.properties)) { + return "Record"; + } + if (type === "string") { + return "string"; + } + if (type === "number" || type === "integer") { + return "number"; + } + if (type === "boolean") { + return "boolean"; + } + if (Array.isArray(type)) { + return ( + type.map((t) => jsonSchemaToTs({ type: t })).join(" | ") || "unknown" + ); + } + return "unknown"; +} + +/** A bare identifier when it's a valid one, else a quoted key. */ +function propertyKey(name: string): string { + return /^[A-Za-z_$][A-Za-z0-9_$]*$/.test(name) ? name : JSON.stringify(name); +} + +function oneLine(text: string): string { + return text.replace(/\s+/g, " ").trim(); +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} diff --git a/packages/agent/src/mcp-scripting/tools.ts b/packages/agent/src/mcp-scripting/tools.ts new file mode 100644 index 000000000..106204ba0 --- /dev/null +++ b/packages/agent/src/mcp-scripting/tools.ts @@ -0,0 +1,134 @@ +import { z } from "zod"; +import { + defineLocalTool, + type LocalTool, + type LocalToolCtx, + type LocalToolResult, +} from "../adapters/local-tools/registry"; +import { McpClientPool, scriptableServerNames } from "./client-pool"; +import { buildToolsProxy } from "./proxy"; +import { runScript } from "./runner"; +import { renderToolsetSignatures, type ServerToolset } from "./signatures"; + +const RUN_MCP_SCRIPT_DESCRIPTION = `Run one JavaScript script that calls the connected MCP tools as async functions, so you can orchestrate many tool calls with normal control flow (loops, filtering, batching) in a single step instead of one tool call at a time. + +Inside the script, every connected MCP server is exposed as \`tools..(args)\` and returns a Promise of the tool's parsed result: + + const issues = await tools.linear.listIssues({ teamId }) + const stale = issues.filter((i) => i.status === "backlog") + for (const i of stale) { + await tools.linear.createComment({ issueId: i.id, body: "bump" }) + } + return { closed: stale.length } + +Rules: +- Call \`list_mcp_tools\` first to see which \`tools.*\` calls exist and their argument schemas. +- The script body runs as an async function: use \`await\` freely and \`return\` the value you want back. +- A tool that errors throws — wrap calls in try/catch if you want to continue. +- Loops and batching over results are encouraged; that's the whole point. +- Only \`tools\`, \`console\`, JSON/Math/Date and similar pure helpers are available — no filesystem, network, \`require\`, or \`process\`. Reach the outside world only through \`tools.*\`. +- The return value and any \`console.log\` output are sent back to you.`; + +const LIST_MCP_TOOLS_DESCRIPTION = `List the MCP tools available to \`run_mcp_script\`, rendered as TypeScript-style signatures (\`tools..(args)\`) with argument schemas. Call this before writing a script so you know what to call.`; + +export const runMcpScriptTool: LocalTool = defineLocalTool({ + name: "run_mcp_script", + description: RUN_MCP_SCRIPT_DESCRIPTION, + schema: { + script: z + .string() + .describe( + "JavaScript to run. Runs as an async function body; use await and return.", + ), + timeoutMs: z + .number() + .int() + .positive() + .max(120_000) + .optional() + .describe("Wall-clock budget in ms (default 30000, max 120000)."), + }, + alwaysLoad: true, + isEnabled: (ctx) => hasScriptableServers(ctx), + handler: async (ctx, args): Promise => { + const configs = ctx.scriptableMcpServers ?? {}; + const pool = new McpClientPool(configs); + try { + const serverNames = pool.serverNames(); + const tools = buildToolsProxy(pool, serverNames); + const { result, logs, error } = await runScript({ + script: args.script as string, + tools, + timeoutMs: args.timeoutMs as number | undefined, + }); + return toToolResult({ result, logs, error }); + } finally { + await pool.close(); + } + }, +}); + +export const listMcpToolsTool: LocalTool = defineLocalTool({ + name: "list_mcp_tools", + description: LIST_MCP_TOOLS_DESCRIPTION, + schema: {}, + alwaysLoad: true, + isEnabled: (ctx) => hasScriptableServers(ctx), + handler: async (ctx): Promise => { + const configs = ctx.scriptableMcpServers ?? {}; + const pool = new McpClientPool(configs); + try { + const { toolsets, failed } = await collectToolsets(pool); + const signatures = renderToolsetSignatures(toolsets); + // Tell the agent about servers that wouldn't connect rather than silently + // dropping them — otherwise an expected server just looks absent. + const text = + failed.length > 0 + ? `${signatures}\n\n// Unreachable servers (failed to connect): ${failed.join(", ")}` + : signatures; + return { content: [{ type: "text", text }] }; + } finally { + await pool.close(); + } + }, +}); + +function hasScriptableServers(ctx: LocalToolCtx): boolean { + const configs = ctx.scriptableMcpServers; + return configs ? scriptableServerNames(configs).length > 0 : false; +} + +async function collectToolsets( + pool: McpClientPool, +): Promise<{ toolsets: ServerToolset[]; failed: string[] }> { + const names = pool.serverNames(); + const settled = await Promise.allSettled( + names.map(async (serverName) => ({ + serverName, + tools: await pool.listTools(serverName), + })), + ); + // One failing server shouldn't fail the whole listing; report it instead. + const toolsets: ServerToolset[] = []; + const failed: string[] = []; + settled.forEach((result, i) => { + if (result.status === "fulfilled") { + toolsets.push(result.value); + } else { + failed.push(names[i]); + } + }); + return { toolsets, failed }; +} + +function toToolResult(payload: { + result: unknown; + logs: string[]; + error?: string; +}): LocalToolResult { + const text = JSON.stringify(payload, null, 2); + return { + content: [{ type: "text", text }], + ...(payload.error ? { isError: true as const } : {}), + }; +} From 49f7c8526f729b661dce7588efea51bf8c719ad7 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 19 Jun 2026 03:50:19 +0200 Subject: [PATCH 2/3] feat(agent): expose run_mcp_script / list_mcp_tools to both adapters Registers the scripting tools in the local-tools registry and threads the session's external MCP server map into LocalToolCtx from both the Claude (claude-agent.ts) and Codex (codex-agent.ts) adapters, so a script dials the same servers with inherited auth. Tools self-disable when no external MCP servers are connected. --- .../agent/src/adapters/claude/claude-agent.ts | 18 ++++++++++++++---- .../src/adapters/codex/codex-agent.test.ts | 14 +++++++++----- .../agent/src/adapters/codex/codex-agent.ts | 5 +++++ .../agent/src/adapters/local-tools/index.ts | 3 +++ .../agent/src/adapters/local-tools/registry.ts | 10 ++++++++++ 5 files changed, 41 insertions(+), 9 deletions(-) diff --git a/packages/agent/src/adapters/claude/claude-agent.ts b/packages/agent/src/adapters/claude/claude-agent.ts index 7c6d8220f..5ca227aa3 100644 --- a/packages/agent/src/adapters/claude/claude-agent.ts +++ b/packages/agent/src/adapters/claude/claude-agent.ts @@ -1656,12 +1656,24 @@ export class ClaudeAcpAgent extends BaseAcpAgent { // needs so the session doesn't pin the whole meta object. const baseBranch = meta?.baseBranch; const environment = meta?.environment; + // Snapshot the external MCP servers (before the in-process local server is + // mixed in below) so run_mcp_script can dial them with inherited auth. + const externalMcpServers: Record = + supportsMcpInjection(earlyModelId) + ? parseMcpServers(params, this.logger) + : {}; const buildInProcessMcpServers = (): Record< string, McpSdkServerConfigWithInstance > => { const server = createLocalToolsMcpServer( - { cwd, token: resolveGithubToken(), taskId, baseBranch }, + { + cwd, + token: resolveGithubToken(), + taskId, + baseBranch, + scriptableMcpServers: externalMcpServers, + }, { environment }, ); return server ? { [LOCAL_TOOLS_MCP_NAME]: server } : {}; @@ -1676,9 +1688,7 @@ export class ClaudeAcpAgent extends BaseAcpAgent { } const mcpServers: Record = { - ...(supportsMcpInjection(earlyModelId) - ? parseMcpServers(params, this.logger) - : {}), + ...externalMcpServers, ...initialInProcess, }; diff --git a/packages/agent/src/adapters/codex/codex-agent.test.ts b/packages/agent/src/adapters/codex/codex-agent.test.ts index 382fe6131..23e209299 100644 --- a/packages/agent/src/adapters/codex/codex-agent.test.ts +++ b/packages/agent/src/adapters/codex/codex-agent.test.ts @@ -424,16 +424,20 @@ describe("CodexAcpAgent", () => { _meta: { systemPrompt: string }; }; - // Existing MCP server is preserved; ours is appended. - expect(forwarded.mcpServers).toHaveLength(2); + // Existing MCP server is preserved; the structured-output server is + // appended. The local-tools server is also present because the existing + // server makes run_mcp_script/list_mcp_tools available. expect(forwarded.mcpServers[0].name).toBe("existing"); - expect(forwarded.mcpServers[1].name).toBe("posthog_output"); - expect(forwarded.mcpServers[1].command).toBe(process.execPath); + const outputServer = forwarded.mcpServers.find( + (s) => s.name === "posthog_output", + ); + expect(outputServer).toBeDefined(); + expect(outputServer?.command).toBe(process.execPath); // The schema is forwarded base64-encoded so codex-acp doesn't have // to escape it through a shell. const envEntry = ( - forwarded.mcpServers[1].env as Array<{ name: string; value: string }> + outputServer?.env as Array<{ name: string; value: string }> ).find((e) => e.name === "POSTHOG_OUTPUT_SCHEMA"); expect(envEntry).toBeDefined(); const decoded = JSON.parse( diff --git a/packages/agent/src/adapters/codex/codex-agent.ts b/packages/agent/src/adapters/codex/codex-agent.ts index a047f23dc..45b6eb188 100644 --- a/packages/agent/src/adapters/codex/codex-agent.ts +++ b/packages/agent/src/adapters/codex/codex-agent.ts @@ -71,6 +71,7 @@ import { emptyBaseline, estimateTokens, } from "../claude/context-breakdown"; +import { parseMcpServers } from "../claude/session/mcp-config"; import { classifyAgentError } from "../error-classification"; import { enabledLocalTools, @@ -646,6 +647,10 @@ export class CodexAcpAgent extends BaseAcpAgent { token: resolveGithubToken(), taskId: resolveTaskId(meta), baseBranch: meta?.baseBranch, + // Reuse the ACP MCP servers so run_mcp_script can dial them (auth inherited). + scriptableMcpServers: parseMcpServers({ + mcpServers: request.mcpServers ?? [], + }), }; const tools = enabledLocalTools(ctx, meta); if (tools.length === 0) { diff --git a/packages/agent/src/adapters/local-tools/index.ts b/packages/agent/src/adapters/local-tools/index.ts index 1272e18a7..74bfd057f 100644 --- a/packages/agent/src/adapters/local-tools/index.ts +++ b/packages/agent/src/adapters/local-tools/index.ts @@ -1,3 +1,4 @@ +import { listMcpToolsTool, runMcpScriptTool } from "../../mcp-scripting/tools"; import type { LocalTool, LocalToolCtx, LocalToolGateMeta } from "./registry"; import { signedCommitTool } from "./tools/signed-commit"; import { signedMergeTool } from "./tools/signed-merge"; @@ -17,6 +18,8 @@ export const LOCAL_TOOLS: LocalTool[] = [ signedCommitTool, signedMergeTool, signedRewriteTool, + runMcpScriptTool, + listMcpToolsTool, ]; /** Tools whose gate passes for the given context — the set to actually expose. */ diff --git a/packages/agent/src/adapters/local-tools/registry.ts b/packages/agent/src/adapters/local-tools/registry.ts index 97e3da62b..a52a33481 100644 --- a/packages/agent/src/adapters/local-tools/registry.ts +++ b/packages/agent/src/adapters/local-tools/registry.ts @@ -1,3 +1,4 @@ +import type { McpServerConfig } from "@anthropic-ai/claude-agent-sdk"; import type { z } from "zod"; /** @@ -20,6 +21,15 @@ export interface LocalToolCtx { * back to origin/HEAD detection when unset. */ baseBranch?: string; + /** + * The session's external MCP servers, keyed by name — the same + * `McpServerConfig` map handed to the Claude SDK `query()`. The MCP-scripting + * tools (`run_mcp_script` / `list_mcp_tools`) open their own clients against + * these configs, inheriting auth (stdio `env`, http/sse `headers`). In-process + * `sdk` servers are skipped — they have no transport to dial. Absent or empty + * means scripting tools self-disable. + */ + scriptableMcpServers?: Record; } /** Minimal session-meta shape needed to gate tools (e.g. cloud-only). */ From f3a43fa2f129877244125a9dd00a6f56b10a2e20 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 19 Jun 2026 09:41:48 +0200 Subject: [PATCH 3/3] fix(agent): single timeout budget and JSDoc-safe tool descriptions - runScript now enforces timeoutMs as one shared wall-clock deadline across the synchronous and async phases (previously up to 2x the budget) - signature rendering neutralizes */ in tool descriptions so a description can't close the generated JSDoc block early --- .../src/mcp-scripting/mcp-scripting.test.ts | 45 +++++++++++++++++++ packages/agent/src/mcp-scripting/runner.ts | 23 +++++++--- .../agent/src/mcp-scripting/signatures.ts | 4 +- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts index 0ba017e78..7076045dd 100644 --- a/packages/agent/src/mcp-scripting/mcp-scripting.test.ts +++ b/packages/agent/src/mcp-scripting/mcp-scripting.test.ts @@ -184,6 +184,29 @@ describe("mcp-scripting", () => { expect(error).toMatch(/timed out/i); }); + it("treats timeoutMs as one shared budget across sync and async phases", async () => { + const pool = fakePool({ servers: {} }); + const tools = buildToolsProxy(pool, []); + + // A brief synchronous spin followed by an async wait that would, on its + // own, fit inside timeoutMs — but combined must trip the single deadline. + const start = Date.now(); + const { error } = await runScript({ + tools, + timeoutMs: 200, + script: ` + const until = Date.now() + 120; + while (Date.now() < until) {} + await new Promise((resolve) => setTimeout(resolve, 5000)); + `, + }); + const elapsed = Date.now() - start; + + expect(error).toMatch(/timed out/i); + // Single budget: total stays near timeoutMs, never approaching 2×. + expect(elapsed).toBeLessThan(400); + }); + describe("sandbox isolation", () => { const pool = fakePool({ servers: {} }); const tools = buildToolsProxy(pool, []); @@ -297,5 +320,27 @@ describe("mcp-scripting", () => { it("reports the empty case", () => { expect(renderToolsetSignatures([])).toMatch(/No external MCP servers/); }); + + it("neutralizes `*/` in a description so the JSDoc block stays valid", () => { + const text = renderToolsetSignatures([ + { + serverName: "math", + tools: [ + { + name: "divide", + description: "Computes a*/b", + inputSchema: { type: "object", properties: {} }, + }, + ], + }, + ]); + + // The raw `*/` must not survive, or it would close the comment early. + const jsdocLine = text + .split("\n") + .find((l) => l.includes("/**") && l.includes("Computes")); + expect(jsdocLine).toBeDefined(); + expect(jsdocLine).toBe(" /** Computes a* /b */"); + }); }); }); diff --git a/packages/agent/src/mcp-scripting/runner.ts b/packages/agent/src/mcp-scripting/runner.ts index d5e975971..67aa6b518 100644 --- a/packages/agent/src/mcp-scripting/runner.ts +++ b/packages/agent/src/mcp-scripting/runner.ts @@ -93,25 +93,38 @@ export async function runScript( return { result: undefined, logs, error: formatError(err) }; } + // A single wall-clock deadline governs the whole run. The synchronous + // `runInContext` phase and the async tool-call phase draw from the same + // budget: the sync `timeout` is capped at the time left, and the async race + // keys off the same absolute deadline. Without this, the two phases would be + // independent and a sync-then-async script could run for nearly 2× timeoutMs. + const deadline = Date.now() + timeoutMs; + const run = (async (): Promise => { + const syncBudget = Math.max(deadline - Date.now(), 1); // `timeout` here guards synchronous spin; async work is bounded by the race. - const completion = script.runInContext(context, { timeout: timeoutMs }); + const completion = script.runInContext(context, { timeout: syncBudget }); return await completion; })(); try { - const result = await withTimeout(run, timeoutMs); + const result = await withDeadline(run, deadline, timeoutMs); return { result: toJsonSafe(result), logs }; } catch (err) { return { result: undefined, logs, error: formatError(err) }; } } -function withTimeout(promise: Promise, ms: number): Promise { +function withDeadline( + promise: Promise, + deadline: number, + budgetMs: number, +): Promise { return new Promise((resolve, reject) => { + const remaining = Math.max(deadline - Date.now(), 0); const timer = setTimeout(() => { - reject(new Error(`Script timed out after ${ms}ms`)); - }, ms); + reject(new Error(`Script timed out after ${budgetMs}ms`)); + }, remaining); promise.then( (value) => { clearTimeout(timer); diff --git a/packages/agent/src/mcp-scripting/signatures.ts b/packages/agent/src/mcp-scripting/signatures.ts index 6f2278ec6..b9b3061b6 100644 --- a/packages/agent/src/mcp-scripting/signatures.ts +++ b/packages/agent/src/mcp-scripting/signatures.ts @@ -102,7 +102,9 @@ function propertyKey(name: string): string { } function oneLine(text: string): string { - return text.replace(/\s+/g, " ").trim(); + // Collapse whitespace and neutralize `*/` so a tool description can't close + // the surrounding JSDoc block early and emit malformed TypeScript. + return text.replace(/\s+/g, " ").trim().replace(/\*\//g, "* /"); } function isRecord(value: unknown): value is Record {