diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 3e158f2..38d26f5 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -136,6 +136,10 @@ class CuaRuntimeController { this.translator = this.createTranslator(); } + setPlaywright(enabled: boolean): void { + this.options.playwright = enabled; + } + tools(): AgentTool[] { return [ ...buildCuaComputerTools( @@ -312,6 +316,16 @@ export class CuaAgent extends Agent { state.systemPrompt = this.runtime.systemPrompt; } } + + /** + * Toggle the `playwright_execute` tool mid-session. Refreshes + * `state.tools` so the next turn sees the updated set. + */ + setPlaywright(enabled: boolean): void { + this.runtime.setPlaywright(enabled); + this.runtimeDirty = true; + super.state.tools = this.runtime.tools(); + } } /** @@ -390,6 +404,16 @@ export class CuaAgentHarness< await super.setActiveTools(toolNames); this.requestedActiveToolNames = [...toolNames]; } + + /** + * Toggle the `playwright_execute` tool mid-session. Re-resolves the CUA + * tool set and pushes it through `setTools` so the next turn sees it. + */ + async setPlaywright(enabled: boolean): Promise { + this.runtime.setPlaywright(enabled); + const tools = this.runtime.tools(); + await super.setTools(tools, this.requestedActiveToolNames ?? tools.map((tool) => tool.name)); + } } function composeOnPayload(first: AgentOptions["onPayload"], second: AgentOptions["onPayload"]): AgentOptions["onPayload"] { diff --git a/packages/agent/test/agent.test.ts b/packages/agent/test/agent.test.ts index dfc7525..b6e1bd8 100644 --- a/packages/agent/test/agent.test.ts +++ b/packages/agent/test/agent.test.ts @@ -161,6 +161,26 @@ describe("CuaAgent", () => { ]); }); + it("toggles playwright_execute on and off through setPlaywright", () => { + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const baseNames = runtime.toolExecutors.map((tool) => tool.definition.name); + const agent = new CuaAgent({ + browser, + client, + initialState: { + model: "openai:gpt-5.5", + }, + }); + + expect(agent.state.tools.map((tool) => tool.name)).toEqual(baseNames); + + agent.setPlaywright(true); + expect(agent.state.tools.map((tool) => tool.name)).toEqual([...baseNames, "playwright_execute"]); + + agent.setPlaywright(false); + expect(agent.state.tools.map((tool) => tool.name)).toEqual(baseNames); + }); + it("refreshes CUA runtime state when state.model changes", () => { const runtime = resolveCuaRuntimeSpec("google:gemini-3-flash-preview"); const agent = new CuaAgent({ @@ -376,6 +396,25 @@ describe("CuaAgentHarness", () => { expect(harness.getTools().length).toBeGreaterThan(0); }); + it("toggles playwright_execute on and off through harness.setPlaywright", async () => { + const runtime = resolveCuaRuntimeSpec("openai:gpt-5.5"); + const baseNames = runtime.toolExecutors.map((tool) => tool.definition.name); + const harness = new CuaAgentHarness({ + ...(await createHarnessServices()), + browser, + client, + model: "openai:gpt-5.5", + }); + + expect(harness.getTools().map((tool) => tool.name)).toEqual(baseNames); + + await harness.setPlaywright(true); + expect(harness.getTools().map((tool) => tool.name)).toEqual([...baseNames, "playwright_execute"]); + + await harness.setPlaywright(false); + expect(harness.getTools().map((tool) => tool.name)).toEqual(baseNames); + }); + it("refreshes CUA runtime state through setModel", async () => { const runtime = resolveCuaRuntimeSpec("google:gemini-3-flash-preview"); const harness = new CuaAgentHarness({ diff --git a/packages/cli/README.md b/packages/cli/README.md index 2e12229..07c5d0e 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -106,7 +106,8 @@ model run Playwright/TypeScript directly against the live browser session for steps that are awkward as raw pointer/keyboard actions (precise DOM reads, form fills, data extraction, waiting on selectors). `page`, `context`, and `browser` are in scope; the code may `return` a -JSON-serializable value. Off by default. Verified e2e with Anthropic, +JSON-serializable value. Off by default. Toggle mid-session with +`/playwright on` or `/playwright off`. Verified e2e with Anthropic, Tzafon, and Yutori CUA models. ## Output formats diff --git a/packages/cli/src/tui/main.ts b/packages/cli/src/tui/main.ts index 67111d3..d817cde 100644 --- a/packages/cli/src/tui/main.ts +++ b/packages/cli/src/tui/main.ts @@ -311,6 +311,10 @@ export async function runInteractive(opts: InteractiveOptions): Promise await applyCompactCommand(opts, messages); return; } + if (parsed?.command === "playwright") { + await applyPlaywrightCommand(opts, messages, parsed.argument); + return; + } if (parsed?.command === "skill") { const skill = (opts.skills ?? []).find((s) => s.name === parsed.name); if (!skill) { @@ -511,6 +515,24 @@ function isThinkingLevel(value: string): value is ThinkingLevel { return ["off", "minimal", "low", "medium", "high", "xhigh"].includes(value); } +async function applyPlaywrightCommand( + opts: InteractiveOptions, + messages: MessageList, + argument: string, +): Promise { + const value = argument.trim().toLowerCase(); + if (value !== "on" && value !== "off") { + messages.addError("usage: /playwright "); + return; + } + try { + await opts.harness.setPlaywright(value === "on"); + messages.addNotice(`playwright → ${value}`); + } catch (err) { + messages.addError((err as Error).message); + } +} + async function applyCompactCommand(opts: InteractiveOptions, messages: MessageList): Promise { messages.addNotice("compacting…"); try { diff --git a/packages/cli/src/tui/slash-commands.ts b/packages/cli/src/tui/slash-commands.ts index 8d4ac47..5cb462d 100644 --- a/packages/cli/src/tui/slash-commands.ts +++ b/packages/cli/src/tui/slash-commands.ts @@ -39,6 +39,13 @@ export function buildAutocompleteProvider( description: "Summarize older turns to free context budget", }); + commands.push({ + name: "playwright", + description: "Toggle the playwright_execute tool", + argumentHint: "", + getArgumentCompletions: (prefix: string) => playwrightCompletions(prefix), + }); + for (const skill of skills) { commands.push({ name: `skill:${skill.name}`, @@ -73,10 +80,22 @@ function thinkingCompletions(prefix: string): AutocompleteItem[] { return filtered.map((t) => ({ value: t.value, label: t.value, description: t.description })); } +const PLAYWRIGHT_TOGGLES: ReadonlyArray<{ value: string; description: string }> = [ + { value: "on", description: "Enable the playwright_execute tool" }, + { value: "off", description: "Disable the playwright_execute tool" }, +]; + +function playwrightCompletions(prefix: string): AutocompleteItem[] { + const trimmed = prefix.trim().toLowerCase(); + const filtered = trimmed ? PLAYWRIGHT_TOGGLES.filter((t) => t.value.startsWith(trimmed)) : PLAYWRIGHT_TOGGLES; + return filtered.map((t) => ({ value: t.value, label: t.value, description: t.description })); +} + export type ParsedSlashCommand = | { command: "model"; argument: string } | { command: "thinking"; argument: string } | { command: "compact"; argument: string } + | { command: "playwright"; argument: string } | { command: "skill"; name: string; remainder: string }; /** @@ -91,11 +110,11 @@ export function parseSlashCommand(text: string): ParsedSlashCommand | undefined const [, name, rest] = skillMatch; return { command: "skill", name: name ?? "", remainder: (rest ?? "").trim() }; } - const builtinMatch = trimmed.match(/^\/(model|thinking|compact)\s*(.*)$/); + const builtinMatch = trimmed.match(/^\/(model|thinking|compact|playwright)\s*(.*)$/); if (builtinMatch) { const [, name, rest] = builtinMatch; return { - command: name as "model" | "thinking" | "compact", + command: name as "model" | "thinking" | "compact" | "playwright", argument: (rest ?? "").trim(), }; } diff --git a/packages/cli/test/slash-commands.test.ts b/packages/cli/test/slash-commands.test.ts index 84caa1a..6779537 100644 --- a/packages/cli/test/slash-commands.test.ts +++ b/packages/cli/test/slash-commands.test.ts @@ -26,6 +26,12 @@ describe("parseSlashCommand", () => { expect(parseSlashCommand("/compact")).toEqual({ command: "compact", argument: "" }); }); + it("parses /playwright with on|off arguments", () => { + expect(parseSlashCommand("/playwright on")).toEqual({ command: "playwright", argument: "on" }); + expect(parseSlashCommand("/playwright off")).toEqual({ command: "playwright", argument: "off" }); + expect(parseSlashCommand("/playwright")).toEqual({ command: "playwright", argument: "" }); + }); + it("parses /skill: with optional remainder", () => { expect(parseSlashCommand("/skill:hello")).toEqual({ command: "skill",