diff --git a/.gitignore b/.gitignore index 9de7b0d..7a6c94e 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,9 @@ web_modules/ # TypeScript cache *.tsbuildinfo +# TypeScript compilation output +*.js + # Optional npm cache directory .npm @@ -149,3 +152,10 @@ package-lock.json .chunkhound.json .chunkhound/ .mcp.json + +# macOS +.DS_Store + +# PR review artifacts +AGENT_REVIEW.md +HUMAN_REVIEW.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..231fa84 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,5 @@ +# TUI Safety + +**Never use `console.debug/warn/error/log`** — writes to stdout/stderr corrupt pi's TUI ANSI rendering. Extension host runs in the same process. + +Use `ctx.ui.notify()` / `setStatus()` / `setWidget()` instead. For diagnostics, remove entirely. diff --git a/agenticoding.test.ts b/agenticoding.test.ts index 5468314..3491468 100644 --- a/agenticoding.test.ts +++ b/agenticoding.test.ts @@ -1,9 +1,12 @@ import test, { after } from "node:test"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; import assert from "node:assert/strict"; import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { AuthStorage, ModelRegistry, type Theme } from "@earendil-works/pi-coding-agent"; +import { AuthStorage, ModelRegistry, createEditTool, createWriteTool, type Theme } from "@earendil-works/pi-coding-agent"; import { Text } from "@earendil-works/pi-tui"; import { registerHandoffCommand } from "./handoff/command.js"; import { registerHandoffTool } from "./handoff/tool.js"; @@ -109,6 +112,8 @@ class MockPi { toolSources = new Map(); sentUserMessages: Array<{ content: string; options: any }> = []; appendedEntries: Array<{ customType: string; data: any }> = []; + flags = new Map(); + shortcuts = new Map(); registerCommand(name: string, definition: { description?: string; handler: Handler }) { this.commands.set(name, definition); @@ -175,6 +180,18 @@ class MockPi { appendEntry(customType: string, data: any) { this.appendedEntries.push({ customType, data }); } + + registerFlag(name: string, definition: { description?: string; type: string; default: any }) { + if (!this.flags.has(name)) this.flags.set(name, definition.default); + } + + getFlag(name: string): any { + return this.flags.get(name); + } + + registerShortcut(key: string, definition: { description?: string; handler: Handler }) { + this.shortcuts.set(key, definition); + } } const EMPTY_USAGE = { @@ -262,6 +279,18 @@ test("updateIndicators uses error tone at 70%+ context", () => { assert.ok(w?.[0]?.includes("85%"), "warning widget shown at 85%"); }); +test("updateIndicators uses readonly-specific high-context guidance", () => { + const state = createState(); + state.readonlyEnabled = true; + const record = { statuses: new Map(), widgets: new Map() }; + const ctx = makeTUICtx({ percent: 85, record }); + + updateIndicators(ctx, state); + const w = record.widgets.get("agenticoding-warning"); + assert.ok(w?.[0]?.includes("readonly: same topic → spawn")); + assert.ok(w?.[0]?.includes("disable readonly, then handoff")); +}); + test("updateIndicators uses warning tone at 50-69% context", () => { const state = createState(); const record = { statuses: new Map(), widgets: new Map() }; @@ -378,6 +407,29 @@ test("/handoff requires a direction", async () => { assert.deepEqual(pi.sentUserMessages, []); }); +test("/handoff is gated at command entry in readonly mode", async () => { + const pi = new MockPi(); + const state = createState(); + state.readonlyEnabled = true; + registerHandoffCommand(pi as any, state); + + const notifications: Array<{ message: string; level: string }> = []; + await pi.commands.get("handoff")!.handler("implement auth", { + hasUI: true, + isIdle: () => true, + ui: { + notify: (message: string, level: string) => notifications.push({ message, level }), + }, + }); + + assert.deepEqual(pi.sentUserMessages, []); + assert.equal(state.pendingRequestedHandoff, null); + assert.equal(notifications.length, 1); + assert.match(notifications[0].message, /Readonly mode blocks \/handoff/); + assert.match(notifications[0].message, /disable readonly with \/readonly/); + assert.equal(notifications[0].level, "warning"); +}); + test("handoff tool triggers compaction and resumes with the compacted task", async () => { const pi = new MockPi(); const state = createState(); @@ -660,17 +712,40 @@ test("buildNudge handles null percent and boundary hints before topic guidance", { activeNotebookTopic: "oauth", pendingTopicBoundaryHint: { from: "oauth", to: "billing", source: "human" }, + readonlyEnabled: false, }, null, ); assert.match(boundary, /Notebook topic changed from oauth to billing/); assert.doesNotMatch(boundary, /Active notebook topic: oauth/); - const noTopic = buildNudge({ activeNotebookTopic: null, pendingTopicBoundaryHint: null }, null); + const noTopic = buildNudge({ activeNotebookTopic: null, pendingTopicBoundaryHint: null, readonlyEnabled: false }, null); assert.match(noTopic, /Topic-aware context reminder/); assert.match(noTopic, /No active notebook topic is set/); }); +test("context throttles watchdog nudges within the same band", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + const [handler] = pi.handlers.get("context")!; + + // First call: 75% → band 2, should inject watchdog + const first = await handler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 75 }) }, + ); + assert.notEqual(first, undefined); + assert.equal(first.messages[1].customType, "agenticoding-watchdog"); + + // Second call: 78% → same band 2, should be throttled + const second = await handler( + { messages: [{ role: "user", content: "hi", timestamp: 2 }] }, + { getContextUsage: () => ({ percent: 78 }) }, + ); + assert.equal(second, undefined); +}); + + test("watchdog stays advisory when a requested handoff is not completed", async () => { const pi = new MockPi(); const state = createState(); @@ -1230,44 +1305,31 @@ test("spawn execute marks stats unavailable when stats collection throws", async pi.setActiveTools(["read", "bash", "spawn"]); const state = createState(); - const warnings: any[] = []; - const originalWarn = console.warn; - console.warn = (...args: any[]) => { - warnings.push(args); - }; - - try { - const mockFactory = async () => { - const session = { - messages: [] as any[], - prompt: async () => { - session.messages = [{ role: "assistant", content: [{ type: "text", text: "child result" }] }]; - }, - abort: async () => {}, - getSessionStats: () => { - throw new Error("stats failed"); - }, - }; - return { session: session as any }; + const mockFactory = async () => { + const session = { + messages: [] as any[], + prompt: async () => { + session.messages = [{ role: "assistant", content: [{ type: "text", text: "child result" }] }]; + }, + abort: async () => {}, + getSessionStats: () => { + throw new Error("stats failed"); + }, }; + return { session: session as any }; + }; - registerSpawnTool(pi as any, state, mockFactory as any); - const result = await pi.tools.get("spawn").execute( - "spawn-1", - { prompt: "Do the task" }, - undefined, - undefined, - { model: { id: "mock-model" }, cwd: "/tmp" }, - ); + registerSpawnTool(pi as any, state, mockFactory as any); + const result = await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Do the task" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: "/tmp" }, + ); - assert.equal(result.details.stats, undefined); - assert.equal(result.details.statsUnavailable, true); - assert.equal(warnings.length, 1); - assert.match(String(warnings[0][1]), /stats failed/); - assert.equal(warnings[0][2], "spawn-1"); - } finally { - console.warn = originalWarn; - } + assert.equal(result.details.stats, undefined); + assert.equal(result.details.statsUnavailable, true); }); test("spawn execute throws when child produces no output", async () => { @@ -1610,41 +1672,32 @@ test("nested spawn live action tracks tool execution events", () => { state.childSessions.set("tool-call-1", session); state.liveChildSessions.set("tool-call-1", session); - // Mock console.warn to suppress any expected-but-harmless warnings - // (e.g., streaming component errors in headless test env). - const originalWarn = console.warn; - console.warn = () => {}; + const component = childSpawnTool.renderResult( + { content: [{ type: "text", text: "ignored" }], details: { model: "m", thinking: "low", truncated: false } }, + { expanded: false }, + theme, + createRenderContext(), + ) as any; - try { - const component = childSpawnTool.renderResult( - { content: [{ type: "text", text: "ignored" }], details: { model: "m", thinking: "low", truncated: false } }, - { expanded: false }, - theme, - createRenderContext(), - ) as any; - - // message_start → thinking - emit({ type: "message_start", message: { role: "assistant", content: [] } }); - let lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("thinking")), `expected thinking, got: ${lines.join("\n")}`); - - // message_update with text → live preview - emit({ type: "message_update", message: { role: "assistant", content: [{ type: "text", text: "writing code now" }] } }); - lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("writing code now")), `expected live text preview, got: ${lines.join("\n")}`); - - // message_end → success marker in identity line - emit({ type: "message_end", message: { role: "assistant", content: [{ type: "text", text: "summary" }], stopReason: "end_turn" } }); - lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("✅")), `expected success marker, got: ${lines.join("\n")}`); - - // Tool events degrade gracefully in minimal test env and still update live action - emit({ type: "tool_execution_start", toolCallId: "tc-1", toolName: "bash", args: { command: "ls" } }); - lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("[bash]")), `expected tool live action, got: ${lines.join("\n")}`); - } finally { - console.warn = originalWarn; - } + // message_start → thinking + emit({ type: "message_start", message: { role: "assistant", content: [] } }); + let lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("thinking")), `expected thinking, got: ${lines.join("\n")}`); + + // message_update with text → live preview + emit({ type: "message_update", message: { role: "assistant", content: [{ type: "text", text: "writing code now" }] } }); + lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("writing code now")), `expected live text preview, got: ${lines.join("\n")}`); + + // message_end → success marker in identity line + emit({ type: "message_end", message: { role: "assistant", content: [{ type: "text", text: "summary" }], stopReason: "end_turn" } }); + lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("✅")), `expected success marker, got: ${lines.join("\n")}`); + + // Tool events degrade gracefully in minimal test env and still update live action + emit({ type: "tool_execution_start", toolCallId: "tc-1", toolName: "bash", args: { command: "ls" } }); + lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("[bash]")), `expected tool live action, got: ${lines.join("\n")}`); }); test("nested spawn handleEvent recovers from malformed events", () => { @@ -1654,30 +1707,20 @@ test("nested spawn handleEvent recovers from malformed events", () => { state.childSessions.set("tool-call-1", session); state.liveChildSessions.set("tool-call-1", session); - const warnings: any[] = []; - const originalWarn = console.warn; - console.warn = (...args: any[]) => warnings.push(args); + const component = childSpawnTool.renderResult( + { content: [{ type: "text", text: "ignored" }], details: { model: "m", thinking: "low", truncated: false } }, + { expanded: false }, + theme, + createRenderContext(), + ) as any; + + // Emit a malformed event that will throw inside handleEvent + emit({ type: "message_start", message: null }); - try { - const component = childSpawnTool.renderResult( - { content: [{ type: "text", text: "ignored" }], details: { model: "m", thinking: "low", truncated: false } }, - { expanded: false }, - theme, - createRenderContext(), - ) as any; - - // Emit a malformed event that will throw inside handleEvent - emit({ type: "message_start", message: null }); - assert.equal(warnings.length, 1); - assert.match(String(warnings[0][1]), /message_start/); - - // Subsequent valid events still process - emit({ type: "message_start", message: { role: "assistant", content: [] } }); - const lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("thinking")), `expected thinking after recovery, got: ${lines.join("\n")}`); - } finally { - console.warn = originalWarn; - } + // Subsequent valid events still process + emit({ type: "message_start", message: { role: "assistant", content: [] } }); + const lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("thinking")), `expected thinking after recovery, got: ${lines.join("\n")}`); }); test("nested spawn message_end with aborted stopReason clears pending tools", () => { @@ -2141,6 +2184,32 @@ test("notebook rehydration clears stale in-memory notebook state when persisted }); +test("notebook rehydration handles null and malformed entries in branch", async () => { + const pi = new MockPi(); + const state = createState(); + registerNotebookRehydration(pi as any, state); + const [handler] = pi.handlers.get("session_start")!; + + await handler( + {}, + { + sessionManager: { + getBranch: () => [ + null, + undefined, + "bad-string", + { type: "custom", customType: "notebook-entry", data: { epoch: 1, name: "keep", content: "valid" } }, + null, + { customType: "notebook-entry" }, // missing type: "custom" + ], + }, + }, + ); + + assert.equal(state.epoch, 1); + assert.deepEqual(Array.from(state.notebookPages.entries()), [["keep", "valid"]]); +}); + test("session_start rehydrates the latest persisted notebook state through the full hook chain", async () => { resetNotebookWriteLock(); const pi = new MockPi(); @@ -2376,6 +2445,30 @@ test("/notebook notifies with info on first set and warning on boundary assert.equal(widgets.get(WIDGET_KEY_WARNING), undefined); }); +test("/notebook warns with readonly-safe guidance on boundary change", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + const notifications: Array<{ message: string; level: string }> = []; + const ctx = { + hasUI: true, + getContextUsage: () => ({ percent: 20 }), + ui: { + theme: { fg: (_name: string, text: string) => text }, + notify: (message: string, level: string) => { notifications.push({ message, level }); }, + setStatus: () => {}, + setWidget: () => {}, + }, + }; + + await pi.commands.get("readonly")!.handler("", ctx as any); + await pi.commands.get("notebook")!.handler("oauth", ctx as any); + await pi.commands.get("notebook")!.handler("billing", ctx as any); + + assert.match(notifications[2].message, /use spawn only for same-topic delegation/); + assert.match(notifications[2].message, /disable readonly with \/readonly before handoff/); + assert.equal(notifications[2].level, "warning"); +}); + test("/notebook empty overlay renders empty state and closes on input", async () => { const pi = new MockPi(); registerAgenticoding(pi as any); @@ -2685,26 +2778,17 @@ test("nested spawn rebuildFromSession quietly tolerates missing tool definitions } as any; state.childSessions.set("tool-call-1", session); - const warnings: any[] = []; - const originalWarn = console.warn; - console.warn = (...args: any[]) => warnings.push(args); + const component = childSpawnTool.renderResult( + { content: [], details: { model: "m", thinking: "low", truncated: false, outcome: "error" } }, + { expanded: false }, + theme, + createRenderContext(), + ) as any; - try { - const component = childSpawnTool.renderResult( - { content: [], details: { model: "m", thinking: "low", truncated: false, outcome: "error" } }, - { expanded: false }, - theme, - createRenderContext(), - ) as any; - - const lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("⚠ m • low"))); - assert.ok(lines.some((l: string) => l.includes("error"))); - assert.equal(state.childSessions.has("tool-call-1"), false); - assert.deepEqual(warnings, []); - } finally { - console.warn = originalWarn; - } + const lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("⚠ m • low"))); + assert.ok(lines.some((l: string) => l.includes("error"))); + assert.equal(state.childSessions.has("tool-call-1"), false); }); test("nested spawn attachSession recovers from subscribe throwing", () => { @@ -2721,29 +2805,19 @@ test("nested spawn attachSession recovers from subscribe throwing", () => { } as any; state.childSessions.set("tool-call-1", throwingSession); - const warnings: any[] = []; - const originalWarn = console.warn; - console.warn = (...args: any[]) => warnings.push(args); - - try { - const component = childSpawnTool.renderResult( - { content: [], details: { model: "m", thinking: "low", truncated: false } }, - { expanded: false }, - theme, - createRenderContext(), - ) as any; + const component = childSpawnTool.renderResult( + { content: [], details: { model: "m", thinking: "low", truncated: false } }, + { expanded: false }, + theme, + createRenderContext(), + ) as any; - // Should not crash, session attached, ownership transferred - assert.equal(state.childSessions.has("tool-call-1"), false); - assert.equal(warnings.length, 1); - assert.match(String(warnings[0][0]), /Failed to subscribe/); + // Should not crash, session attached, ownership transferred + assert.equal(state.childSessions.has("tool-call-1"), false); - // Should still render from session messages despite subscribe failure - const lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("hello"))); - } finally { - console.warn = originalWarn; - } + // Should still render from session messages despite subscribe failure + const lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("hello"))); }); test("nested spawn rapid events collapse to last state", () => { @@ -3066,25 +3140,16 @@ test("nested spawn recovers batching state after event handler error", async () createRenderContext(), ) as any; - const warnings: any[] = []; - const originalWarn = console.warn; - console.warn = (...args: any[]) => warnings.push(args); - try { - // Bad event triggers an error in handleMessageStart (null message) - // catch block must call resetRenderBatching() so the flag resets - emit({ type: "message_start", message: null } as any); - - // Good event after error — should still schedule and render - emit({ type: "message_start", message: { role: "assistant", content: [] } }); - flushSpawnFrameScheduler(); - const lines = component.render(120); - assert.ok(lines.some((l: string) => l.includes("thinking")), - "error recovery should allow subsequent events to render"); - assert.equal(warnings.length, 1); - assert.match(String(warnings[0][0]), /Event handler error/); - } finally { - console.warn = originalWarn; - } + // Bad event triggers an error in handleMessageStart (null message) + // catch block must call resetRenderBatching() so the flag resets + emit({ type: "message_start", message: null } as any); + + // Good event after error — should still schedule and render + emit({ type: "message_start", message: { role: "assistant", content: [] } }); + flushSpawnFrameScheduler(); + const lines = component.render(120); + assert.ok(lines.some((l: string) => l.includes("thinking")), + "error recovery should allow subsequent events to render"); }); test("nested spawn processes stale-state events without invalidating the parent", async () => { @@ -3479,8 +3544,37 @@ test("notebook_topic_set preserves human authority, stays idempotent for equal t ); }); +test("buildNudge readonly with topic suggests same-topic spawn and readonly disable for handoff", () => { + const nudge = buildNudge( + { readonlyEnabled: true, activeNotebookTopic: "my-topic", pendingTopicBoundaryHint: null }, + 50, + ); + assert.match(nudge, /my-topic/); + assert.match(nudge, /same-topic delegation/); + assert.match(nudge, /disable readonly with \/readonly/i); +}); + +test("buildNudge readonly without topic suggests notebook_topic_set", () => { + const nudge = buildNudge( + { readonlyEnabled: true, activeNotebookTopic: null, pendingTopicBoundaryHint: null }, + 50, + ); + assert.match(nudge, /disable readonly with \/readonly/i); + assert.match(nudge, /notebook_topic_set/); +}); + +test("buildNudge readonly with boundary hint points to spawn vs disable readonly", () => { + const nudge = buildNudge( + { readonlyEnabled: true, activeNotebookTopic: null, pendingTopicBoundaryHint: { from: "old", to: "new", source: "agent" } }, + null, + ); + assert.match(nudge, /Readonly blocks handoff/); + assert.match(nudge, /current topic/); + assert.match(nudge, /disable readonly with \/readonly/i); +}); + test("buildNudge no longer emits the old percent-only handoff text", () => { - const old = buildNudge({ activeNotebookTopic: "oauth", pendingTopicBoundaryHint: null }, 46); + const old = buildNudge({ activeNotebookTopic: "oauth", pendingTopicBoundaryHint: null, readonlyEnabled: false }, 46); assert.doesNotMatch(old, /One context, one job\.|If you're mid-job and still clear|consider a handoff and draft a clear brief/i); assert.match(old, /Active notebook topic: oauth/); assert.match(old, /prefer spawn/i); @@ -3738,6 +3832,24 @@ test("truncateText respects line limit before byte limit", async () => { assert.ok(result.content[0].text.includes("[Result truncated")); }); + +test("truncateText handles multi-byte boundary correctly", async () => { + const { truncateText } = await import("./spawn/index.js"); + + // Mid-multi-byte boundary: 4-byte emoji truncated at byte 2 — should shrink to 0 bytes + assert.equal(truncateText("🙂", 10, 2), ""); + + // Exact boundary at multi-byte start: 4-byte emoji, maxBytes=4 — should keep full emoji + assert.equal(truncateText("🙂", 10, 4), "🙂"); + + // Empty input: returns empty string + assert.equal(truncateText("", 10, 1024), ""); + + // Under-limit text: returns unchanged + assert.equal(truncateText("hello", 10, 1024), "hello"); +}); + + test("nested spawn setExpanded and setShowImages no-op when value matches", () => { const state = createState(); const childSpawnTool = createChildSpawnTool(state); @@ -3823,6 +3935,1963 @@ test("registerSpawnTool registers a tool with correct name and metadata", () => assert.equal(tool.executionMode, undefined, "spawn should not be sequential"); }); + +// ── Readonly mode: bash safety tests ─────────────────────────────── + + +// ── classifyBashCommand: readonly contract tests ─────────────────── + +import { classifyBashCommand, applyReadonlyBashGuard } from "./readonly-bash.js"; +import { canUseOsSandbox, buildMacProfile, wrapWithSandboxExec, wrapWithBwrap, wrapCommandWithOsSandbox } from "./os-sandbox.js"; +import { resolveRealPath } from "./resolve-path.js"; + +function isDirect(cmd: string, cwd = "/workspace"): boolean { + return classifyBashCommand(cmd, cwd).ok === true; +} + +function isBlocked(cmd: string, cwd = "/workspace"): boolean { + return classifyBashCommand(cmd, cwd).ok === false; +} + + +test("classifyBashCommand allows non-mutating and unknown commands", () => { + assert.equal(isDirect("ls -la"), true); + assert.equal(isDirect("python3 script.py"), true); + assert.equal(isDirect("curl https://example.com"), true); + assert.equal(isDirect("docker ps"), true); + assert.equal(isDirect("env FOO=bar node --version"), true); + assert.equal(isDirect("export FOO=bar; echo $FOO"), true); +}); + +test("classifyBashCommand blocks writes outside temp but allows temp redirects", () => { + const tempFile = `${os.tmpdir()}/pi-readonly-test.txt`; + assert.equal(isBlocked("echo hello > file.txt"), true); + assert.equal(isBlocked("cat > ./out.txt"), true); + assert.equal(isDirect(`echo hello > ${tempFile}`), true); + assert.equal(isDirect(`cat > ${tempFile}`), true); + assert.equal(isDirect("ls >/dev/null"), true); +}); + +test("classifyBashCommand blocks explicit filesystem mutation outside temp", () => { + assert.equal(isBlocked("rm file.txt"), true); + assert.equal(isBlocked("mv a b"), true); + assert.equal(isBlocked("cp a b"), true); + assert.equal(isBlocked("mkdir newdir"), true); + assert.equal(isBlocked("touch file"), true); + assert.equal(isBlocked("chmod 755 file"), true); + assert.equal(isBlocked("tee file"), true); +}); + +test("classifyBashCommand allows explicit filesystem mutation inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`rm ${tmp}/x`), true); + assert.equal(isDirect(`mkdir ${tmp}/newdir`), true); + assert.equal(isDirect(`touch ${tmp}/file`), true); + assert.equal(isDirect(`cp ${tmp}/a ${tmp}/b`), true); + assert.equal(isDirect(`mv ${tmp}/a ${tmp}/b`), true); +}); + +test("classifyBashCommand blocks rm -r outside temp (no -r value-skip bypass)", () => { + // Critical fix: rm -r must not be treated as "-r consumes target as value" + assert.equal(isBlocked("rm -rf /etc/passwd"), true, "rm -rf outside temp"); + assert.equal(isBlocked("rm -r /etc/passwd"), true, "rm -r with standalone -r"); + assert.equal(isBlocked("rm -fr /etc/passwd"), true, "rm -fr combined flags"); + // Inside temp, rm -r should be allowed + const tmp = os.tmpdir(); + assert.equal(isDirect(`rm -r ${tmp}/x`), true, "rm -r inside temp"); + assert.equal(isDirect(`rm -rf ${tmp}/x`), true, "rm -rf inside temp"); +}); + +test("classifyBashCommand blocks truncate --no-create outside temp", () => { + // Fix: --no-create is boolean, not value-consuming — must not skip the target + assert.equal(isBlocked("truncate -s 0 --no-create /etc/config"), true, "truncate --no-create outside temp"); + const tmp = os.tmpdir(); + assert.equal(isDirect(`truncate -s 0 --no-create ${tmp}/config`), true, "truncate --no-create inside temp"); + // touch --no-create must also be correctly classified + assert.equal(isBlocked("touch --no-create /etc/config"), true, "touch --no-create outside temp"); + assert.equal(isDirect(`touch --no-create ${tmp}/config`), true, "touch --no-create inside temp"); +}); + +test("classifyBashCommand blocks mutable git commands and allows readonly git", () => { + assert.equal(isDirect("git status"), true); + assert.equal(isDirect("git log --oneline"), true); + assert.equal(isDirect("git branch --list"), true); + assert.equal(isDirect("git config --get user.name"), true); + assert.equal(isBlocked("git add ."), true); + assert.equal(isBlocked("git commit -m 'msg'"), true); + assert.equal(isBlocked("git fetch"), true); + assert.equal(isBlocked("git branch feature"), true); + assert.equal(isBlocked("git tag v1"), true); +}); + +test("classifyBashCommand checks command substitutions for writes", () => { + assert.equal(isBlocked("echo $(rm file.txt)"), true); + assert.equal(isBlocked("echo `touch file.txt`"), true); + assert.equal(isDirect("echo $(printf hi)"), true); +}); + + +// ── Readonly mode: toggle + TUI indicator tests ──────────────────── + +test("readonly toggle command enables and disables readonly mode", () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const state = createState(); + const notifications: string[] = []; + const statuses = new Map(); + + const ctx = { + hasUI: true, + ui: { + notify: (msg: string, _type: string) => notifications.push(msg), + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + sessionManager: { getBranch: () => [] }, + }; + + // First toggle: ON + pi.commands.get("readonly")!.handler("", ctx); + assert.equal(notifications.pop(), "Readonly mode enabled \u2014 write/edit/handoff and non-temp bash writes blocked"); + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly")); + + // Second toggle: OFF + pi.commands.get("readonly")!.handler("", ctx); + assert.equal(notifications.pop(), "Readonly mode disabled \u2014 write/edit/handoff and non-temp bash writes unblocked"); + assert.equal(statuses.get("agenticoding-readonly"), undefined); +}); + +test("readonly toggle is a no-op in headless mode", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const state = createState(); + const ctx = { + hasUI: false, + ui: { + notify: () => { throw new Error("should not be called in headless"); }, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => { throw new Error("should not be called in headless"); }, + setWidget: () => { throw new Error("should not be called in headless"); }, + }, + getContextUsage: () => null, + }; + + // Toggle in headless mode should not crash and should not change state + pi.commands.get("readonly")!.handler("", ctx); + // Verify readonly was NOT enabled — write should not be blocked + const [toolCallHandler] = pi.handlers.get("tool_call")!; + const result = await toolCallHandler( + { toolName: "write", input: { path: "/tmp/test", content: "" } }, + { cwd: "/workspace" }, + ); + assert.equal(result, undefined, "write is not blocked after headless readonly toggle"); +}); + +test("readonly TUI indicator shows warning tone when enabled", () => { + const state = createState(); + state.readonlyEnabled = true; + const record = { statuses: new Map(), widgets: new Map() }; + const ctx = makeTUICtx({ percent: null, record }); + + updateIndicators(ctx, state); + const s = record.statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("🔒 readonly"), `expected readonly indicator, got: ${s}`); +}); + +test("readonly TUI indicator is cleared when disabled", () => { + const state = createState(); + state.readonlyEnabled = false; + const record = { statuses: new Map(), widgets: new Map() }; + const ctx = makeTUICtx({ percent: null, record }); + + updateIndicators(ctx, state); + assert.equal(record.statuses.get("agenticoding-readonly"), undefined); +}); + +// ── Readonly mode: tool_call blocking tests ──────────────────────── + +test("readonly tool_call blocks write, edit, and handoff", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + // Toggle readonly ON via command (modifies internal state) + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const [toolCallHandler] = pi.handlers.get("tool_call")!; + + // Block write + const writeResult = await toolCallHandler({ toolName: "write", input: { path: "/tmp/test" } }, {}); + assert.equal(writeResult.block, true); + assert.match(writeResult.reason, /write\/edit\/handoff disabled/); + + // Block edit + const editResult = await toolCallHandler({ toolName: "edit", input: { path: "/tmp/test" } }, {}); + assert.equal(editResult.block, true); + + // Block handoff + const handoffResult = await toolCallHandler({ toolName: "handoff", input: { task: "test" } }, {}); + assert.equal(handoffResult.block, true); + + // Allow read + const readResult = await toolCallHandler({ toolName: "read", input: { path: "/tmp/test" } }, {}); + assert.equal(readResult, undefined); +}); + +test("normal tool_call does not block ordinary write/edit calls", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const [toolCallHandler] = pi.handlers.get("tool_call")!; + + const writeResult = await toolCallHandler( + { toolName: "write", input: { path: "/tmp/test.txt", content: "hello" } }, + {}, + ); + assert.equal(writeResult, undefined, "write should pass through when readonly is off"); + + const editResult = await toolCallHandler( + { toolName: "edit", input: { path: "/tmp/test.txt", edits: [] } }, + {}, + ); + assert.equal(editResult, undefined, "edit should pass through when readonly is off"); +}); + + +test("readonly tool_call does not block bash when readonly is off", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const [toolCallHandler] = pi.handlers.get("tool_call")!; + + // Bash not blocked when readonly is off + const safeResult = await toolCallHandler({ toolName: "bash", input: { command: "rm -rf /" } }, {}); + assert.equal(safeResult, undefined, "should not block when readonly is off"); +}); + +test("readonly tool_call blocks non-temp bash writes when readonly is on", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const [toolCallHandler] = pi.handlers.get("tool_call")!; + + // Toggle readonly ON via command + const notifications: string[] = []; + const statuses = new Map(); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: (msg: string) => notifications.push(msg), + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const blockedInput = { command: "rm -rf /" }; + const blockedResult = await toolCallHandler({ toolName: "bash", input: blockedInput }, { cwd: "/workspace" }); + + if (canUseOsSandbox()) { + // OS-level sandbox is available, but classifyBashCommand pre-blocks + // known dangerous commands (rm, mv, etc.) before the sandbox wraps. + // The sandbox only handles commands with unrecognized file-target paths. + assert.equal(blockedResult.block, true); + assert.match(blockedResult.reason, /outside temp dir/); + } else { + // Fallback: classifyBashCommand blocks + assert.equal(blockedResult.block, true); + assert.match(blockedResult.reason, /outside temp dir/); + } + + const tempAllowedInput = { command: `rm ${os.tmpdir()}/x` }; + const tempAllowed = await toolCallHandler({ toolName: "bash", input: tempAllowedInput }, { cwd: "/workspace" }); + assert.equal(tempAllowed, undefined); + + const safeInput = { command: "ls -la" }; + const safeResult = await toolCallHandler({ toolName: "bash", input: safeInput }, { cwd: "/workspace" }); + assert.equal(safeResult, undefined); + + const blankInput = { command: " " }; + const blankResult = await toolCallHandler({ toolName: "bash", input: blankInput }, { cwd: "/workspace" }); + assert.equal(blankResult, undefined); +}); + +test("readonly tool_call blocks malformed bash input", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const [toolCallHandler] = pi.handlers.get("tool_call")!; + const notifications: string[] = []; + const statuses = new Map(); + + // Toggle readonly ON via command + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: (msg: string) => notifications.push(msg), + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + // Missing command property + const missingCmd = await toolCallHandler({ toolName: "bash", input: {} }, { cwd: "/workspace" }); + assert.ok(missingCmd, "should block bash with missing command"); + assert.equal(missingCmd.block, true); + assert.match(missingCmd.reason, /invalid bash command input/); + + // Non-string command input + const numCmd = await toolCallHandler({ toolName: "bash", input: { command: 42 } }, { cwd: "/workspace" }); + assert.ok(numCmd, "should block bash with non-string command"); + assert.equal(numCmd.block, true); + assert.match(numCmd.reason, /invalid bash command input/); + + // Null command + const nullCmd = await toolCallHandler({ toolName: "bash", input: { command: null } }, { cwd: "/workspace" }); + assert.ok(nullCmd, "should block bash with null command"); + assert.equal(nullCmd.block, true); + assert.match(nullCmd.reason, /invalid bash command input/); +}); + +// ── Readonly mode: spawn child filtering ─────────────────────────── + +test("spawn filters write and edit from child tools when readonly is on", async () => { + const pi = new MockPi(); + pi.setActiveTools(["read", "bash", "write", "edit", "spawn"]); + const state = createState(); + state.readonlyEnabled = true; + + let seenTools: string[] = []; + const mockFactory = async (config: any) => { + seenTools = config.tools; + const session = { + messages: [] as any[], + prompt: async (prompt: string) => { + session.messages = [{ role: "assistant", content: [{ type: "text", text: "done" }] }]; + }, + abort: async () => {}, + getSessionStats: () => undefined, + }; + return { session: session as any }; + }; + + registerSpawnTool(pi as any, state, mockFactory as any); + await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Do the task" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: "/tmp" }, + ); + + assert.equal(seenTools.includes("write"), false, "write should be filtered"); + assert.equal(seenTools.includes("edit"), false, "edit should be filtered"); + assert.equal(seenTools.includes("read"), true, "read should be inherited"); + assert.equal(seenTools.includes("bash"), true, "bash should be inherited"); +}); + +test("spawn adds a readonly bash override that mirrors parent readonly bash policy", async () => { + const pi = new MockPi(); + pi.setActiveTools(["read", "bash", "spawn"]); + const state = createState(); + state.readonlyEnabled = true; + + let seenTools: string[] = []; + let seenCustomTools: any[] = []; + const mockFactory = async (config: any) => { + seenTools = config.tools; + seenCustomTools = config.customTools; + const session = { + messages: [] as any[], + prompt: async () => { + session.messages = [{ role: "assistant", content: [{ type: "text", text: "done" }] }]; + }, + abort: async () => {}, + getSessionStats: () => undefined, + }; + return { session: session as any }; + }; + + registerSpawnTool(pi as any, state, mockFactory as any); + await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Do the task" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: "/tmp" }, + ); + + assert.equal(seenTools.includes("bash"), true, "bash should still be available"); + const bashTool = seenCustomTools.find((tool) => tool.name === "bash"); + assert.ok(bashTool, "readonly child should override bash"); + if (canUseOsSandbox()) { + // OS-level sandbox is available, but classifyBashCommand pre-blocks + // known dangerous commands at the spawnHook before the sandbox wraps. + await assert.rejects( + bashTool.execute("bash-1", { command: "sudo rm -rf /" }, undefined, undefined, {}), + /Readonly mode: command blocked/, + ); + } else { + // Fallback: classifyBashCommand blocks at the spawnHook + await assert.rejects( + bashTool.execute("bash-1", { command: "sudo rm -rf /" }, undefined, undefined, {}), + /Readonly mode: command blocked/, + ); + } + + // Also verify that a safe command is ALLOWED through the child bash tool + await assert.doesNotReject( + bashTool.execute("bash-2", { command: "ls -la" }, undefined, undefined, {}), + /Readonly mode: command blocked/, + ); + await assert.doesNotReject( + bashTool.execute("bash-3", { command: " " }, undefined, undefined, {}), + /Readonly mode: command blocked/, + ); +}); + +test("spawn non-readonly child can use inherited builtin write/edit", async () => { + const pi = new MockPi(); + pi.setActiveTools(["read", "bash", "write", "edit", "spawn"]); + const state = createState(); + state.readonlyEnabled = false; + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-spawn-write-edit-")); + const childFile = path.join(tmpDir, "child.txt"); + + const mockFactory = async (config: any) => { + const session = { + messages: [] as any[], + prompt: async () => { + assert.equal(config.tools.includes("write"), true, "child should inherit builtin write"); + assert.equal(config.tools.includes("edit"), true, "child should inherit builtin edit"); + assert.equal(config.customTools.some((t: any) => t.name === "write"), false, "write should stay builtin"); + assert.equal(config.customTools.some((t: any) => t.name === "edit"), false, "edit should stay builtin"); + + const childWrite = createWriteTool(config.cwd); + const childEdit = createEditTool(config.cwd); + await childWrite.execute("child-write", { path: childFile, content: "alpha\nbeta\n" }, undefined, undefined, {}); + await childEdit.execute( + "child-edit", + { path: childFile, edits: [{ oldText: "beta", newText: "gamma" }] }, + undefined, + undefined, + {}, + ); + session.messages = [{ role: "assistant", content: [{ type: "text", text: fs.readFileSync(childFile, "utf8") }] }]; + }, + abort: async () => {}, + getSessionStats: () => undefined, + }; + return { session: session as any }; + }; + + registerSpawnTool(pi as any, state, mockFactory as any); + try { + const result = await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Write then edit the file" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: tmpDir }, + ); + + assert.equal(fs.readFileSync(childFile, "utf8"), "alpha\ngamma\n"); + assert.equal(result.content[0].text, "alpha\ngamma"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } +}); + +test("spawn prompt includes readonly notice when enabled", async () => { + const pi = new MockPi(); + pi.setActiveTools(["read", "bash", "spawn"]); + const state = createState(); + state.readonlyEnabled = true; + + let seenPrompt = ""; + const mockFactory = async () => { + const session = { + messages: [] as any[], + prompt: async (prompt: string) => { + seenPrompt = prompt; + session.messages = [{ role: "assistant", content: [{ type: "text", text: "done" }] }]; + }, + abort: async () => {}, + getSessionStats: () => undefined, + }; + return { session: session as any }; + }; + + registerSpawnTool(pi as any, state, mockFactory as any); + await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Do the task" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: "/tmp" }, + ); + + assert.match(seenPrompt, /readonly authority/); + assert.match(seenPrompt, /Readonly restrictions apply/); + assert.doesNotMatch(seenPrompt, /same authority as the parent/); +}); + +test("spawn prompt uses standard authority wording when readonly is off", async () => { + const pi = new MockPi(); + pi.setActiveTools(["read", "bash", "spawn"]); + const state = createState(); + state.readonlyEnabled = false; + + let seenPrompt = ""; + const mockFactory = async () => { + const session = { + messages: [] as any[], + prompt: async (prompt: string) => { + seenPrompt = prompt; + session.messages = [{ role: "assistant", content: [{ type: "text", text: "done" }] }]; + }, + abort: async () => {}, + getSessionStats: () => undefined, + }; + return { session: session as any }; + }; + + registerSpawnTool(pi as any, state, mockFactory as any); + await pi.tools.get("spawn").execute( + "spawn-1", + { prompt: "Do the task" }, + undefined, + undefined, + { model: { id: "mock-model" }, cwd: "/tmp" }, + ); + + assert.match(seenPrompt, /same authority as the parent/); + assert.doesNotMatch(seenPrompt, /read-only authority/); + assert.doesNotMatch(seenPrompt, /Readonly restrictions apply/); +}); + + + +// ── Readonly mode: session rehydration ───────────────────────────── + +test("session_start rehydrates readonly from branch entries", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const branch = [ + { type: "custom", customType: "agenticoding-readonly", data: { enabled: false } }, + { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }, + ]; + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + } + + const s = statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("readonly"), "readonly indicator should be shown after rehydrating true"); +}); + +test("session_start rehydrate handles null entries in branch", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + // null entries between valid entries should not crash or affect rehydration + const branch = [ + null, + undefined, + { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }, + null, + ]; + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + } + + const s = statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("readonly"), "readonly should be rehydrated past null entries"); +}); + +test("session_start rehydrate handles string entries in branch", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const branch = ["bad-entry", { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }]; + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + } + + const s = statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("readonly"), "readonly should be rehydrated past string entries"); +}); + +test("--readonly CLI flag takes precedence when branch has only malformed entries", async () => { + const pi = new MockPi(); + pi.flags.set("readonly", true); + registerAgenticoding(pi as any); + + const statuses = new Map(); + // Entry has customType but missing type:"custom" — should not count as a valid branch entry + const branch = [ + { customType: "agenticoding-readonly" }, + ]; + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + } + + const s = statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("readonly"), "CLI flag should win when branch has only malformed entries"); +}); + +test("session_start clears readonly indicator on /new", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + + // First: enable readonly via command + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly")); + + // Now: /new should clear it + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "new" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }); + } + + assert.equal(statuses.get("agenticoding-readonly"), undefined, "readonly indicator should be cleared on /new"); +}); + +test("--readonly CLI flag does not override branch state when branch has entries", async () => { + const pi = new MockPi(); + pi.flags.set("readonly", true); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const branch = [ + { type: "custom", customType: "agenticoding-readonly", data: { enabled: false } }, + ]; + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + } + + // Branch has an explicit OFF entry; CLI flag only applies when no entries exist. + const s = statuses.get("agenticoding-readonly"); + assert.equal(s, undefined, "branch state should win over CLI flag"); +}); + +test("--readonly CLI flag applies on session_start for new sessions", async () => { + const pi = new MockPi(); + pi.flags.set("readonly", true); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "new" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }); + } + + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly")); +}); + +test("session_start clears stale readonly state on resume when the branch has no readonly entry", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly")); + + const sessionStartHandlers = pi.handlers.get("session_start")!; + for (const handler of sessionStartHandlers) { + await handler({ reason: "resume" }, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }); + } + + assert.equal(statuses.get("agenticoding-readonly"), undefined); +}); + +// ── Readonly mode: context hook nudges ───────────────────────────── + +test("readonly ON nudge is delivered via context hook", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + // Toggle readonly ON + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const [contextHandler] = pi.handlers.get("context")!; + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 10 }), sessionManager: { getBranch: () => [] } }, + ); + + assert.equal(result.messages.length, 2); + assert.equal(result.messages[1].customType, "agenticoding-readonly-nudge"); + assert.match(result.messages[1].content, /Readonly mode is active/); +}); + +test("readonly OFF nudge is delivered when the current tree has a prior ON entry", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + // Toggle ON then OFF + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const branch = [ + { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }, + { type: "custom", customType: "agenticoding-readonly", data: { enabled: false } }, + ]; + const [contextHandler] = pi.handlers.get("context")!; + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 10 }), sessionManager: { getBranch: () => branch } }, + ); + + assert.equal(result.messages[1].customType, "agenticoding-readonly-nudge"); + assert.match(result.messages[1].content, /turned off/); +}); + +test("readonly OFF nudge is delivered after an explicit disable", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const [contextHandler] = pi.handlers.get("context")!; + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 10 }), sessionManager: { getBranch: () => [] } }, + ); + + assert.ok(result && "messages" in result); + assert.match((result as any).messages.at(-1).content, /turned off/); +}); + +test("readonly OFF nudge includes a handoff hint after high-context disable", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const branch = [ + { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }, + { type: "custom", customType: "agenticoding-readonly", data: { enabled: false } }, + ]; + const [contextHandler] = pi.handlers.get("context")!; + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 61 }), sessionManager: { getBranch: () => branch } }, + ); + + assert.match(result.messages[1].content, /Context was at 61%/); + assert.match(result.messages[1].content, /if the work changed topics, you can handoff now/); +}); + +test("readonly nudge is one-shot — not re-delivered on subsequent calls", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + // Toggle readonly ON + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const [contextHandler] = pi.handlers.get("context")!; + + // First call: delivers ON nudge + await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 10 }), sessionManager: { getBranch: () => [] } }, + ); + + // Second call: no nudge + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 2 }] }, + { getContextUsage: () => ({ percent: 10 }), sessionManager: { getBranch: () => [] } }, + ); + + assert.equal(result, undefined, "nudge should not be re-delivered"); +}); + +test("session_tree rehydrates readonly from branch", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const branch = [ + { type: "custom", customType: "agenticoding-readonly", data: { enabled: true } }, + ]; + + const [sessionTreeHandler] = pi.handlers.get("session_tree")!; + await sessionTreeHandler({}, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => branch }, + getContextUsage: () => null, + }); + + const s = statuses.get("agenticoding-readonly"); + assert.ok(s?.includes("readonly"), "session_tree should rehydrate readonly"); +}); + +test("session_tree rehydrates readonly-off nudge after branch change", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + const [sessionTreeHandler] = pi.handlers.get("session_tree")!; + const [contextHandler] = pi.handlers.get("context")!; + + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => ({ percent: 12 }), + }); + + await sessionTreeHandler({}, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => ({ percent: 12 }), + }); + assert.equal(statuses.get("agenticoding-readonly"), undefined); + + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 12 }), sessionManager: { getBranch: () => [] } }, + ); + assert.ok(result && "messages" in result); + assert.match((result as any).messages.at(-1).content, /turned off/); +}); + +test("session_tree reapplies --readonly and clears stale readonly on no-entry branches", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + const statuses = new Map(); + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly")); + + const [sessionTreeHandler] = pi.handlers.get("session_tree")!; + await sessionTreeHandler({}, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }); + assert.equal(statuses.get("agenticoding-readonly"), undefined, "no-entry branch should clear stale readonly"); + + pi.flags.set("readonly", true); + await sessionTreeHandler({}, { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }); + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly"), "CLI flag should win during session_tree rehydration"); +}); + +test("--readonly rehydration does not append synthetic history entries", async () => { + const pi = new MockPi(); + pi.flags.set("readonly", true); + registerAgenticoding(pi as any); + + const ctx = { + hasUI: true, + ui: { + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + sessionManager: { getBranch: () => [] }, + getContextUsage: () => null, + }; + + for (const handler of pi.handlers.get("session_start")!) { + await handler({ reason: "resume" }, ctx as any); + } + const [sessionTreeHandler] = pi.handlers.get("session_tree")!; + await sessionTreeHandler({}, ctx as any); + + assert.equal(pi.appendedEntries.length, 0); +}); + +test("resetState clears readonly fields", () => { + const state = createState(); + state.readonlyEnabled = true; + state.readonlyNudgePending = true; + resetState(state); + assert.equal(state.readonlyEnabled, false); + assert.equal(state.readonlyNudgePending, false); +}); + +test("readonly shortcut is registered and gated on isIdle", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + assert.ok(pi.shortcuts.has("ctrl+shift+r"), "shortcut should be registered"); + + const shortcut = pi.shortcuts.get("ctrl+shift+r")!; + + // isIdle = false: should not toggle + const statuses = new Map(); + await shortcut.handler({ + isIdle: () => false, + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + assert.equal(statuses.get("agenticoding-readonly"), undefined, "should not toggle when not idle"); + + // isIdle = true: should toggle + await shortcut.handler({ + isIdle: () => true, + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: (key: string, val: string | undefined) => statuses.set(key, val), + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + assert.ok(statuses.get("agenticoding-readonly")?.includes("readonly"), "should toggle when idle"); +}); + +test("readonly toggle persists entry via appendEntry", () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + assert.equal(pi.appendedEntries.length, 1); + assert.equal(pi.appendedEntries[0].customType, "agenticoding-readonly"); + assert.equal(pi.appendedEntries[0].data.enabled, true); +}); + + + +test("classifyBashCommand pipes and shell chaining stay direct for non-mutating commands", () => { + assert.equal(isDirect("cat file | sort"), true, "cat | sort is safe"); + assert.equal(isDirect("ls -la | head -5"), true, "ls | head is safe"); + assert.equal(isDirect("export PATH=/tmp:$PATH; ls"), true, "shell state changes are not blocked by readonly"); +}); + +test("classifyBashCommand block reasons stay mutation-focused", () => { + const check = (cmd: string, expected: string) => { + const v = classifyBashCommand(cmd, "/workspace"); + assert.equal(v.ok, false, `${cmd} should be blocked`); + if (!v.ok) { + assert.match(v.reason, new RegExp(expected, "i"), `reason for ${cmd}`); + } + }; + + check("echo hi > out.txt", "write redirect"); + check("rm file.txt", "outside temp"); + check("git add .", "mutable git"); + check("echo $(rm file.txt)", "command substitution"); +}); + +test("classifyBashCommand blocks find mutation and allows readonly find", () => { + assert.equal(isBlocked("find . -exec rm {} +"), true, "find -exec rm is blocked"); + assert.equal(isBlocked("find . -delete"), true, "find -delete is blocked outside temp"); + assert.equal(isBlocked("find . -fprint out.txt"), true, "find -fprint is blocked outside temp"); + assert.equal(isDirect(`find ${os.tmpdir()} -delete`, "/workspace"), true, "temp-only delete is allowed"); + assert.equal(isDirect("find . -name \"*.ts\""), true, "find -name is direct"); +}); + +test("classifyBashCommand allows cd and heredocs when they do not write outside temp", () => { + assert.equal(isDirect("cd /tmp"), true, "cd is direct"); + assert.equal(isDirect("cd /var/log && ls"), true, "cd && ls is direct"); + assert.equal(isDirect("cat < { + assert.equal(isBlocked("sudo rm /etc/passwd"), true, "sudo rm is blocked"); + assert.equal(isBlocked("sudo -u root rm /etc/passwd"), true, "sudo -u root rm is blocked"); +}); + +test("classifyBashCommand blocks sudo with interpreter -c inline script", () => { + assert.equal(isBlocked("sudo bash -c 'rm /etc/passwd'"), true, "sudo bash -c rm is blocked"); + assert.equal(isBlocked("sudo sh -c 'echo hi > /etc/config'"), true, "sudo sh -c with redirect blocked"); + assert.equal(isBlocked("sudo -u root bash -c \"rm -rf /etc\""), true, "sudo -u root bash -c rm blocked"); +}); + +test("classifyBashCommand allows sudo with safe interpreter -c inline script", () => { + assert.equal(isDirect("sudo bash -c 'echo hello'"), true, "sudo bash -c echo is safe"); +}); + +test("classifyBashCommand blocks sed -i in-place mutation", () => { + assert.equal(isBlocked("sed -i 's/a/b/g' file.txt"), true, "sed -i is blocked outside temp"); + assert.equal(isBlocked("sed -i '' 's/a/b/g' /etc/config"), true, "sed -i '' (macOS) is blocked outside temp"); + assert.equal(isBlocked("sed -i \"\" 's/a/b/g' /etc/config"), true, 'sed -i "" (macOS) is blocked outside temp'); + assert.equal(isBlocked("sed -i.bak 's/a/b/' /etc/config"), true, "sed -i.bak is blocked"); +}); + +test("classifyBashCommand blocks dd output mutation", () => { + assert.equal(isBlocked("dd if=/dev/zero of=/etc/passwd bs=1 count=1"), true, "dd of= outside temp is blocked"); + assert.equal(isDirect("dd if=/dev/zero of=" + os.tmpdir() + "/test bs=1 count=0"), true, "dd of= inside temp is allowed"); +}); + +test("classifyBashCommand blocks perl in-place mutation", () => { + assert.equal(isBlocked("perl -pi -e 's/a/b/g' file.txt"), true, "perl -pi is blocked outside temp"); +}); + +test("classifyBashCommand blocks ruby in-place mutation", () => { + assert.equal(isBlocked("ruby -pi -e 's/a/b/g' file.txt"), true, "ruby -pi is blocked outside temp"); +}); + +test("classifyBashCommand blocks sed -i with multiple -e expressions outside temp", () => { + // H3 fix: expression values from -e flags should not leak as false targets + assert.equal(isBlocked("sed -i '' -e 's/foo/g' -e 's/bar/g' /etc/config"), true, "multi -e outside temp"); + const tmp = os.tmpdir(); + assert.equal(isDirect(`sed -i '' -e 's/foo/g' -e 's/bar/g' ${tmp}/config`), true, "multi -e inside temp"); + assert.equal(isDirect(`sed -i.bak -e 's/foo/g' ${tmp}/config`), true, "sed -i with backup ext inside temp"); + assert.equal(isBlocked("sed -i 's/foo/g' /etc/config"), true, "single expression outside temp"); + // --expression combined form (--expression=SCRIPT) must be detected + assert.equal(isBlocked("sed -i '' --expression='s/foo/g' /etc/config"), true, "--expression= combined form outside temp"); + assert.equal(isDirect(`sed -i '' --expression='s/foo/g' ${tmp}/config`), true, "--expression= combined form inside temp"); + // --expression long form (separate arg) + assert.equal(isBlocked("sed -i '' --expression 's/foo/g' /etc/config"), true, "--expression long form outside temp"); + assert.equal(isDirect(`sed -i '' --expression 's/foo/g' ${tmp}/config`), true, "--expression long form inside temp"); + // --expression combined form without backup extension + assert.equal(isBlocked("sed -i --expression='s/foo/g' /etc/config"), true, "--expression= no backup ext outside temp"); + assert.equal(isDirect(`sed -i --expression='s/foo/g' ${tmp}/config`), true, "--expression= no backup ext inside temp"); +}); + +test("classifyBashCommand blocks env prefix with mutation command", () => { + assert.equal(isBlocked("env VAR=value rm file.txt"), true, "env rm is blocked"); + assert.equal(isBlocked("env -i PATH=/tmp rm file.txt"), true, "env -i rm is blocked"); +}); + +test("classifyBashCommand blocks command prefix with mutation", () => { + assert.equal(isBlocked("command rm file.txt"), true, "command rm is blocked"); +}); + +test("classifyBashCommand blocks >> append redirect to unsafe target", () => { + assert.equal(isBlocked("echo hi >> /etc/config"), true, ">> append to outside temp is blocked"); + const tmpFile = os.tmpdir() + "/test-append.txt"; + assert.equal(isDirect("echo hi >> " + tmpFile), true, ">> append to temp is allowed"); +}); + +test("classifyBashCommand blocks >| noclobber redirect to unsafe target", () => { + assert.equal(isBlocked("echo hi >| /etc/config"), true, ">| noclobber override to outside temp is blocked"); +}); + +test("classifyBashCommand blocks quoted paths with spaces outside temp", () => { + assert.equal(isBlocked("rm 'My File.txt'"), true, "rm with quoted space path is blocked outside temp"); + assert.equal(isBlocked("touch \"My File.txt\""), true, "touch with quoted space path is blocked outside temp"); + const tmpFile = "\"" + os.tmpdir() + "/My File.txt\""; + assert.equal(isDirect("rm " + tmpFile), true, "rm with quoted space path in temp is allowed"); +}); + +test("classifyBashCommand blocks path traversal attacks", () => { + assert.equal(isBlocked("rm /tmp/../etc/passwd"), true, "path traversal outside temp is blocked"); + assert.equal(isBlocked("rm /private/var/tmp/../../../etc/passwd"), true, "relative traversal outside temp is blocked"); +}); + +// ── classifyBashCommand: fd redirect passthrough ───────────────────── + +test("classifyBashCommand allows fd redirect passthrough", () => { + assert.equal(isDirect("echo hi 2>&1"), true, "fd redirect 2>&1 is passthrough"); + assert.equal(isDirect("echo hi 2>/dev/null"), true, "fd redirect to /dev/null is safe"); + assert.equal(isDirect("exec 3>&1"), true, "exec fd redirect is safe"); +}); + +// ── classifyBashCommand: empty/bare commands ───────────────────────── + +test("classifyBashCommand handles empty and bare commands", () => { + assert.equal(isDirect(""), true, "empty string should be allowed"); + assert.equal(isDirect(" "), true, "whitespace should be allowed"); + assert.equal(isBlocked("git"), true, "bare git without subcommand should be blocked"); +}); + +test("classifyBashCommand allows npm run build inside temp", () => { + // H1 fix: 'build' removed from package mutation regex. 'npm run build' is not + // a package installation — it runs a build script. Package installations are + // still caught by install/uninstall/add/remove/etc. + const tmp = os.tmpdir(); + assert.equal(isDirect(`cd ${tmp} && npm run build`), true, "npm run build inside temp"); + // npm run build outside temp should also be allowed (not a package mutation) + assert.equal(isDirect("npm run build"), true, "npm run build allowed anywhere"); + assert.equal(isDirect(`cd ${tmp} && yarn build`), true, "yarn build inside temp"); + assert.equal(isDirect(`cd ${tmp} && npm build`), true, "npm build (old-style) inside temp"); + // Actual package mutations should still be blocked + assert.equal(isBlocked("npm install lodash"), true, "npm install still blocked"); + assert.equal(isBlocked("pip install requests"), true, "pip install still blocked"); + // apt build-dep is a package mutation (not a script build) + assert.equal(isBlocked("apt build-dep nginx"), true, "apt build-dep still blocked"); + assert.equal(isBlocked("dnf build-dep nginx"), true, "dnf build-dep still blocked"); +}); + +test("classifyBashCommand resolves glob patterns inside temp", () => { + // H2 fix: glob patterns like *.log should be resolved and checked per-target + const tmp = os.tmpdir(); + // Empty glob (no matches) should be allowed — no files to mutate + assert.equal(isDirect(`rm ${tmp}/*.nonexistent`), true, "empty glob is allowed"); + // Empty glob outside temp is also allowed (no files to mutate) + assert.equal(isDirect("rm *.log"), true, "empty glob to non-existent files is allowed"); + // Glob to explicitly non-temp paths is blocked + assert.equal(isBlocked("rm /etc/*.conf"), true, "glob to /etc is blocked"); + // Non-mutating globs should pass + assert.equal(isDirect("ls *.ts"), true, "ls with glob is allowed"); + // Glob with actual matches inside temp should be allowed + const testFile = path.join(tmp, "readonly-test-glob-match.tmp"); + try { fs.writeFileSync(testFile, ""); } catch { /* best-effort */ } + try { + assert.equal(isDirect(`rm ${tmp}/*.tmp`), true, "glob matches inside temp is allowed"); + } finally { + try { fs.unlinkSync(testFile); } catch { /* best-effort cleanup */ } + } +}); + +test("classifyBashCommand resolves ~ paths", () => { + // ~ expands via os.homedir() — homedir is outside temp, so mutations blocked. + // This verifies the expansion code path runs (vs. old blanket-block on ~ chars). + assert.equal(isBlocked("rm ~/test-file"), true, "rm ~/file blocked (home outside temp)"); + assert.equal(isBlocked("touch ~/test-file"), true, "touch ~/file blocked (home outside temp)"); + + // ~user/path blocked conservatively (cannot resolve without getpwuid) + assert.equal(isBlocked("rm ~other/file"), true, "rm ~user/file blocked (unresolvable user)"); + + // Non-mutating commands with ~ are allowed + assert.equal(isDirect("ls ~"), true, "ls ~ allowed"); + assert.equal(isDirect("ls ~/Documents"), true, "ls ~/Documents allowed"); + assert.equal(isDirect("echo ~"), true, "echo ~ allowed"); + + // Mutating command where target happens to be inside temp after tilde expansion + // Use a temp-relative path — tilde expands to homedir, which is outside temp, + // so a path like ~/tmp/... still resolves outside temp. This assertion confirms + // tilde expansion happened correctly and the temp check runs on the result. + const tmp = os.tmpdir(); + assert.equal(isDirect(`ls ${tmp}`), true, "non-mutating ls to temp is allowed"); +}); + +// ── classifyBashCommand: exact-string contract tests ───────────────── + +test("classifyBashCommand exact reason: git mutable block", () => { + const v = classifyBashCommand("git add .", "/workspace"); + assert.equal(v.ok, false); + if (!v.ok) { + assert.match(v.reason, /mutable git/); + } +}); + +test("classifyBashCommand exact reason: command substitution block", () => { + const v = classifyBashCommand("echo \$(rm file.txt)", "/workspace"); + assert.equal(v.ok, false); + if (!v.ok) { + assert.match(v.reason, /command substitution/); + } +}); + +test("classifyBashCommand exact reason: write redirect block", () => { + const v = classifyBashCommand("echo hi > out.txt", "/workspace"); + assert.equal(v.ok, false); + if (!v.ok) { + assert.match(v.reason, /write redirect blocked outside temp dir/); + } +}); + + +// ── classifyBashCommand: sudo -h fix (F1) ──────────────────────────── + +test("classifyBashCommand blocks sudo -h with mutating command", () => { + assert.equal(isBlocked("sudo -h localhost rm /etc/passwd"), true, "sudo -h localhost rm should be blocked"); + assert.equal(isBlocked("sudo -h host apt-get install nginx"), true, "sudo -h host apt-get should be blocked"); +}); + +// ── classifyBashCommand: env -u fix (F2) ───────────────────────────── + +test("classifyBashCommand blocks env -u with mutating command", () => { + assert.equal(isBlocked("env -u HOME rm /etc/passwd"), true, "env -u HOME rm blocked"); + assert.equal(isBlocked("env --unset HOME rm /etc/passwd"), true, "env --unset HOME rm blocked"); +}); + +// ── classifyBashCommand: touch -t/-d/-r (H1) ───────────────────────── + +test("classifyBashCommand allows touch with -t/-d/-r flags inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`touch -t 202001010000 ${tmp}/safe`), true, "touch -t timestamp inside temp"); + assert.equal(isDirect(`touch -d '2020-01-01' ${tmp}/safe`), true, "touch -d date inside temp"); + assert.equal(isDirect(`touch -r ${tmp}/ref ${tmp}/target`), true, "touch -r ref file inside temp"); +}); + +// ── classifyBashCommand: additional command coverage ───────────────── + +test("classifyBashCommand blocks install, ln, truncate, unlink, rmdir outside temp", () => { + assert.equal(isBlocked("install /tmp/foo /etc/bar"), true, "install to outside temp"); + assert.equal(isBlocked("ln /tmp/foo /etc/bar"), true, "ln hard link to outside temp"); + assert.equal(isBlocked("truncate -s 0 /etc/config"), true, "truncate outside temp"); + assert.equal(isBlocked("unlink /etc/file"), true, "unlink outside temp"); + assert.equal(isBlocked("rmdir /etc/empty-dir"), true, "rmdir outside temp"); + assert.equal(isBlocked("chown root /etc/file"), true, "chown outside temp"); + assert.equal(isBlocked("chgrp root /etc/file"), true, "chgrp outside temp"); +}); + +// ── classifyBashCommand: env fix (env -S bypass) ────────────────── + +test("classifyBashCommand blocks env -S bypass for mutating commands and redirects", () => { + assert.equal(isBlocked('env -S "rm -rf /"'), true, "env -S with rm is blocked"); + assert.equal(isBlocked('env -u HOME -S "touch /etc/passwd"'), true, "env -u HOME -S with touch is blocked"); + assert.equal(isBlocked('env -S "git add ."'), true, "env -S with git add is blocked"); + assert.equal(isBlocked('env -S "echo hi > /etc/config"'), true, "env -S with redirect is blocked"); + assert.equal(isBlocked('env KEY=value rm file.txt'), true, "env KEY=value with rm is blocked"); +}); + +test("classifyBashCommand allows non-mutating env -S inline commands", () => { + assert.equal(isDirect('env -S "echo hi"'), true, "env -S with echo is allowed"); +}); + +test("classifyBashCommand blocks env --split-string bypass for mutating commands", () => { + assert.equal(isBlocked('env --split-string "rm -rf /"'), true, "env --split-string rm blocked"); + assert.equal(isBlocked('env -u HOME --split-string "touch /etc/passwd"'), true, "env -u HOME --split-string touch blocked"); + assert.equal(isBlocked('env --split-string "git add ."'), true, "env --split-string git add blocked"); + assert.equal(isBlocked('env --split-string "echo hi > /etc/config"'), true, "env --split-string redirect blocked"); +}); + +test("classifyBashCommand allows non-mutating env --split-string inline commands", () => { + assert.equal(isDirect('env --split-string "echo hi"'), true, "env --split-string echo allowed"); +}); + +test("classifyBashCommand blocks env without -S with mutating direct commands", () => { + assert.equal(isBlocked('env rm /etc/passwd'), true, "env rm is blocked"); + assert.equal(isBlocked('env -i rm /etc/passwd'), true, "env -i rm is blocked"); + assert.equal(isDirect('env - PATH=/tmp ls'), true, "env - PATH=/tmp ls is allowed"); +}); + +test("classifyBashCommand extracts and classifies process substitution <()", () => { + assert.equal(isBlocked("cat <(rm /etc/passwd)"), true, "<() rm outside temp blocked"); + assert.equal(isBlocked("cat <(git add .)"), true, "<() git add blocked"); + assert.equal(isBlocked("cat <(bash -c 'rm /etc/passwd')"), true, "<() bash -c rm blocked"); + assert.equal(isDirect("cat <(echo hi)"), true, "<() echo allowed"); + assert.equal(isDirect("diff <(git diff) <(git status)"), true, "<() git immutable in diff allowed"); +}); + +// ── classifyBashCommand: git readonly subcommand regressions ───────── + +test("classifyBashCommand allows git stash read-only subcommands", () => { + assert.equal(isDirect("git stash list"), true, "git stash list is allowed"); + assert.equal(isDirect("git stash show"), true, "git stash show is allowed"); +}); + +test("classifyBashCommand blocks git stash mutable subcommands", () => { + assert.equal(isBlocked("git stash push"), true, "git stash push is blocked"); + assert.equal(isBlocked("git stash drop"), true, "git stash drop is blocked"); +}); + +test("classifyBashCommand allows git tag read-only subcommands", () => { + assert.equal(isDirect("git tag"), true, "bare git tag is allowed"); + assert.equal(isDirect("git tag --list"), true, "git tag --list is allowed"); + assert.equal(isDirect("git tag -l"), true, "git tag -l is allowed"); +}); + +test("classifyBashCommand blocks git tag mutable subcommands", () => { + assert.equal(isBlocked("git tag v1.0"), true, "git tag v1.0 is blocked"); +}); + +test("classifyBashCommand allows git submodule read-only subcommands", () => { + assert.equal(isDirect("git submodule status"), true, "git submodule status is allowed"); +}); + +test("classifyBashCommand blocks git submodule mutable subcommands", () => { + assert.equal(isBlocked("git submodule add"), true, "git submodule add is blocked"); +}); + +test("classifyBashCommand allows git worktree read-only subcommands", () => { + assert.equal(isDirect("git worktree list"), true, "git worktree list is allowed"); +}); + +test("classifyBashCommand blocks git worktree mutable subcommands", () => { + assert.equal(isBlocked("git worktree add"), true, "git worktree add is blocked"); +}); + +test("classifyBashCommand allows git bisect read-only subcommands and bare bisect", () => { + assert.equal(isDirect("git bisect log"), true, "git bisect log is allowed"); + assert.equal(isDirect("git bisect view"), true, "git bisect view is allowed"); + assert.equal(isDirect("git bisect"), true, "bare git bisect is allowed"); +}); + +test("classifyBashCommand blocks git bisect mutable subcommands", () => { + assert.equal(isBlocked("git bisect start"), true, "git bisect start is blocked"); + assert.equal(isBlocked("git bisect reset"), true, "git bisect reset is blocked"); +}); + + +test("classifyBashCommand blocks node -e with dangerous code", () => { + assert.equal(isBlocked('node -e "rm file.txt"'), true); +}); + +test("classifyBashCommand allows node -e with safe code", () => { + assert.equal(isDirect('node -e "console.log(1)"'), true); +}); + +test("classifyBashCommand blocks python3 -c with dangerous code", () => { + assert.equal(isBlocked('python3 -c "rm file.txt"'), true); +}); + +test("classifyBashCommand blocks perl -e with dangerous code", () => { + assert.equal(isBlocked('perl -e "rm file.txt"'), true); +}); + +test("classifyBashCommand blocks ruby -e with dangerous code", () => { + assert.equal(isBlocked('ruby -e "rm file.txt"'), true); +}); + +test("classifyBashCommand allows node -c (syntax check only)", () => { + assert.equal(isDirect('node -c "const x = 1"'), true); +}); + +// ── S3: eval/exec/subshell handling ──────────────────────────────── + +test("classifyBashCommand blocks eval with dangerous command", () => { + assert.equal(isBlocked("eval 'rm -rf /'"), true); +}); + +test("classifyBashCommand allows eval with safe command", () => { + assert.equal(isDirect("eval 'echo hi'"), true); +}); + +test("classifyBashCommand blocks exec with dangerous command", () => { + assert.equal(isBlocked("exec rm file.txt"), true); +}); + +test("classifyBashCommand allows exec with safe command", () => { + assert.equal(isDirect("exec ls"), true); +}); + +test("classifyBashCommand blocks subshell parens with mutation", () => { + assert.equal(isBlocked("(rm file.txt)"), true); +}); + +test("classifyBashCommand allows subshell parens with safe command", () => { + assert.equal(isDirect("(echo hi)"), true); +}); + +test("classifyBashCommand blocks curl -o outside temp", () => { + assert.equal(isBlocked("curl -o /etc/passwd http://example.com"), true); +}); + +test("classifyBashCommand allows curl -o inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`curl -o ${tmp}/out.html http://example.com`), true); +}); + +test("classifyBashCommand blocks curl --output outside temp", () => { + assert.equal(isBlocked("curl --output /tmp/../outside.txt http://example.com"), true); +}); + +test("classifyBashCommand blocks curl -O (remote-name) outside temp", () => { + assert.equal(isBlocked("curl -O http://example.com/evil.sh"), true, "-O writes to cwd"); + assert.equal(isBlocked("curl --remote-name http://example.com/evil.sh"), true, "--remote-name writes to cwd"); + assert.equal(isBlocked("curl -OJ http://example.com/evil.sh"), true, "-OJ combined form"); +}); + +test("classifyBashCommand allows curl -O (remote-name) inside temp cwd", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect("curl -O http://example.com/evil.sh", tmp), true, "-O allowed when cwd is temp"); + assert.equal(isDirect("curl --remote-name http://example.com/evil.sh", tmp), true, "--remote-name allowed when cwd is temp"); +}); + +test("classifyBashCommand documents current curl L2 limitation forms", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect("curl -JO http://example.com/evil.sh"), true, "-JO currently slips past L2"); + assert.equal(isDirect("curl -sJO http://example.com/evil.sh"), true, "-sJO currently slips past L2"); + assert.equal(isDirect("curl --remote-name-all http://example.com/evil.sh"), true, "--remote-name-all currently slips past L2"); + assert.equal(isDirect("curl -JO http://example.com/evil.sh", tmp), true, "limitation also remains allowed in temp cwd"); +}); + +test("classifyBashCommand blocks curl -O even with explicit -o temp path", () => { + const tmp = os.tmpdir(); + // -O still writes URL basename to cwd, even when -o targets temp dir + assert.equal(isBlocked("curl -O -o " + tmp + "/out.html http://example.com"), true, "-O cwd write still blocked despite -o temp"); + assert.equal(isBlocked("curl -o " + tmp + "/out.html -O http://example.com"), true, "-O cwd write still blocked when -o before -O"); +}); + +test("classifyBashCommand blocks curl -O combined with -o outside temp", () => { + // -O writes URL basename to cwd even when -o is present — curl uses both cumulatively + assert.equal(isBlocked("curl -o /etc/passwd -O http://example.com"), true, "-O cwd write blocked despite -o outside temp"); + assert.equal(isBlocked("curl -O -o /etc/passwd http://example.com"), true, "-O cwd write blocked when -o is before -O"); +}); + +test("classifyBashCommand blocks curl -O combined with -o inside temp", () => { + const tmp = os.tmpdir(); + // -o points to temp dir, but -O still writes to cwd — must be blocked + assert.equal(isBlocked("curl -o " + tmp + "/out -O http://example.com"), true, "-O cwd write blocked even when -o targets temp"); + assert.equal(isBlocked("curl -O -o " + tmp + "/out http://example.com"), true, "-O cwd write blocked regardless of flag order"); +}); + +test("classifyBashCommand allows curl -O combined with -o when cwd and output are both temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect("curl -o " + tmp + "/out -O http://example.com", tmp), true, "-O and -o both allowed when both writes stay in temp"); + assert.equal(isDirect("curl -O -o " + tmp + "/out http://example.com", tmp), true, "flag order does not matter when both writes stay in temp"); +}); + +test("classifyBashCommand blocks curl --output=VALUE outside temp", () => { + assert.equal(isBlocked("curl --output=/etc/passwd http://example.com"), true, "--output=/etc/passwd writes to disk"); +}); + +test("classifyBashCommand allows curl --output=VALUE inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`curl --output=${tmp}/out http://example.com`), true, "--output=/tmp/... writes to temp"); +}); + +test("classifyBashCommand blocks curl -o/path combined form outside temp", () => { + assert.equal(isBlocked("curl -o/etc/passwd http://example.com"), true, "-o/etc/passwd combined short form writes to disk"); +}); + +test("classifyBashCommand allows curl -o/path combined form inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`curl -o${tmp}/out http://example.com`), true, "-o/tmp/out combined short form writes to temp"); +}); + +test("classifyBashCommand blocks curl -O (remote-name) outside temp (error message)", () => { + const verdict = classifyBashCommand("curl -O http://example.com/evil.sh"); + assert.equal(verdict.ok, false); + assert.match(verdict.reason, /curl blocked/, "error message mentions curl"); +}); + +test("classifyBashCommand allows curl -- -O (-- ends options, -O is a URL arg)", () => { + assert.equal(isDirect("curl -- -O"), true, "-O after -- is a URL, not a flag"); +}); + +test("classifyBashCommand blocks curl -O before -- (flag before end-of-options)", () => { + assert.equal(isBlocked("curl -O -- http://example.com/evil.sh"), true, "-O before -- is still a flag"); +}); + +test("classifyBashCommand blocks curl with multiple -o flags where first is unsafe", () => { + assert.equal(isBlocked("curl -o /etc/passwd -o /tmp/f http://example.com"), true, "first -o outside temp blocked"); +}); + +test("classifyBashCommand allows curl with multiple -o flags both inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`curl -o ${tmp}/f1 -o ${tmp}/f2 http://example.com`, tmp), true, "both -o in temp allowed"); +}); + +test("classifyBashCommand allows curl -o - (stdout)", () => { + assert.equal(isDirect("curl -o - http://example.com"), true, "-o - writes to stdout"); + assert.equal(isDirect("curl --output - http://example.com"), true, "--output - writes to stdout"); +}); + +test("classifyBashCommand allows wget -O inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`wget -O ${tmp}/out.html http://example.com`), true); +}); + +test("classifyBashCommand blocks wget -O outside temp", () => { + assert.equal(isBlocked("wget -O /etc/passwd http://example.com"), true); +}); + +test("classifyBashCommand allows wget --output-document inside temp", () => { + const tmp = os.tmpdir(); + assert.equal(isDirect(`wget --output-document ${tmp}/out.html http://example.com`), true); +}); + +test("classifyBashCommand blocks wget --output-document outside temp", () => { + assert.equal(isBlocked("wget --output-document /etc/passwd http://example.com"), true); +}); + +test("classifyBashCommand blocks wget without output flags", () => { + assert.equal(isBlocked("wget http://example.com"), true, "wget without -O writes to disk by default"); +}); + +test("classifyBashCommand allows curl without output flags", () => { + assert.equal(isDirect("curl http://example.com"), true, "curl without -o outputs to stdout"); +}); + +// ── classifyBashCommand: wget -O- stdout ──────────────────────────── + +test("classifyBashCommand allows wget -O- stdout output", () => { + assert.equal(isDirect("wget -O- http://example.com"), true, "-O- combined token writes to stdout"); + assert.equal(isDirect("wget -O - http://example.com"), true, "-O separate token writes to stdout"); + assert.equal(isDirect("wget --output-document=- http://example.com"), true, "--output-document=- writes to stdout"); +}); + +test("classifyBashCommand uses the last wget output flag", () => { + const tmp = os.tmpdir(); + assert.equal(isBlocked("wget -O- -O /etc/passwd http://example.com"), true, "later file output should win over stdout"); + assert.equal(isBlocked("wget --output-document=- --output-document=/etc/passwd http://example.com"), true, "later long output flag should win over stdout"); + assert.equal(isDirect(`wget -O /etc/passwd -O ${tmp}/out.html http://example.com`), true, "later temp output should win over earlier unsafe path"); + assert.equal(isDirect(`wget -O ${tmp}/out.html -O- http://example.com`), true, "later stdout output should win over earlier temp path"); +}); + +// ── N4: xargs command classification ─────────────────────────────── + +test("classifyBashCommand blocks xargs with mutation command and concrete target", () => { + assert.equal(isBlocked("echo /etc/passwd | xargs rm"), true, "xargs rm outside temp blocked"); + assert.equal(isBlocked("echo . | xargs git add"), true, "xargs git add blocked"); + assert.equal(isBlocked("echo '/etc/passwd' | xargs bash -c 'rm /etc/passwd'"), true, "xargs bash -c rm blocked"); + assert.equal(isBlocked("echo install | xargs npm install"), true, "xargs npm install blocked"); +}); + +test("classifyBashCommand allows xargs with safe command", () => { + assert.equal(isDirect("echo file.txt | xargs echo"), true); +}); + +test("classifyBashCommand blocks xargs with flags and mutation", () => { + assert.equal(isBlocked("echo /etc/passwd | xargs -I {} rm {}"), true); +}); + +test("classifyBashCommand allows xargs with flags and safe command", () => { + assert.equal(isDirect("echo file.txt | xargs -I {} echo {}"), true); +}); + +// ── os-sandbox: OS-level sandbox tests ───────────────────────────── + +test("os-sandbox: buildMacProfile includes deny file-write* and allow /dev/null", () => { + const tempDir = os.tmpdir(); + const profile = buildMacProfile(tempDir); + assert.ok(profile.includes("(allow default)"), "profile should allow default"); + assert.ok(profile.includes("(deny file-write*)"), "profile should deny all file-write*"); + assert.ok(profile.includes('/dev/null'), "profile should allow /dev/null"); + assert.ok(profile.includes('(allow file-write* (subpath'), "profile should allow subpath writes"); +}); + +test("os-sandbox: buildMacProfile rejects paths containing single or double quotes", () => { + assert.throws( + () => buildMacProfile("/tmp/evil'path"), + /quote/, + "should reject single quote in path", + ); + assert.throws( + () => buildMacProfile('/tmp/evil"path'), + /quote/, + "should reject double quote in path", + ); +}); + +test("os-sandbox: wrapWithSandboxExec uses heredoc", () => { + const cmd = "echo hello"; + const result = wrapWithSandboxExec(cmd); + assert.ok(result.startsWith("sandbox-exec -p '"), "should start with sandbox-exec -p"); + assert.ok(result.includes("PI_SANDBOX_INNER_"), "should include heredoc delimiter"); + assert.ok(result.includes(cmd), "should contain original command"); + assert.ok(result.includes("/bin/bash << '"), "should use heredoc with bash"); +}); + +test("os-sandbox: wrapWithBwrap includes ro-bind and tmpfs", () => { + const cmd = "echo hello"; + const result = wrapWithBwrap(cmd); + assert.ok(result.startsWith("bwrap"), "should start with bwrap"); + assert.ok(result.includes("--ro-bind / /"), "should include ro-bind root"); + assert.ok(result.includes("--tmpfs /tmp"), "should include tmpfs /tmp"); + assert.ok(result.includes(cmd), "should contain original command"); + assert.ok(result.includes("/bin/bash << '"), "should use heredoc with bash"); +}); + +test("os-sandbox: wrapCommandWithOsSandbox returns sandbox-exec on darwin", () => { + const origPlatform = Object.getOwnPropertyDescriptor(process, "platform"); + Object.defineProperty(process, "platform", { value: "darwin", configurable: true }); + try { + const result = wrapCommandWithOsSandbox("echo hello"); + assert.ok(result.startsWith("sandbox-exec"), "should use sandbox-exec on darwin"); + } finally { + if (origPlatform) { + Object.defineProperty(process, "platform", origPlatform); + } + } +}); + +test("os-sandbox: wrapWithSandboxExec handles multiline command", () => { + const cmd = "echo line1\necho line2\necho line3"; + const result = wrapWithSandboxExec(cmd); + assert.ok(result.includes("echo line1"), "should preserve first line"); + assert.ok(result.includes("echo line2"), "should preserve second line"); + assert.ok(result.includes("echo line3"), "should preserve third line"); + // All lines should be after heredoc open and before heredoc close + const delimIndex = result.indexOf("PI_SANDBOX_INNER_"); + const innerEnd = result.indexOf("\n", delimIndex); // skip to end of delimiter name + const cmdStart = result.indexOf("\n", innerEnd + 1); + const lastDelim = result.lastIndexOf("PI_SANDBOX_INNER_"); + assert.ok(cmdStart > 0 && lastDelim > cmdStart, "command should be inside heredoc"); +}); + +test("os-sandbox: wrapWithSandboxExec generates unique delimiters", () => { + const cmd = "echo hello"; + const result1 = wrapWithSandboxExec(cmd); + const result2 = wrapWithSandboxExec(cmd); + const delim1 = result1.match(/PI_SANDBOX_INNER_\w+/)?.[0] || ""; + const delim2 = result2.match(/PI_SANDBOX_INNER_\w+/)?.[0] || ""; + assert.notEqual(delim1, delim2, "two calls should produce different delimiters"); +}); + +// ── resolveRealPath tests ───────────────────────────────────────────── + +test("resolveRealPath: existing path returns unchanged", () => { + const result = resolveRealPath(os.tmpdir()); + assert.ok(result.length > 0, "should resolve to a non-empty path"); +}); + +test("resolveRealPath: root returns root", () => { + assert.equal(resolveRealPath("/"), "/"); +}); + +test("resolveRealPath: existing file resolves", () => { + const result = resolveRealPath(new URL(".", import.meta.url).pathname); + assert.ok(result.length > 0, "should resolve to a non-empty path"); +}); + +test("resolveRealPath: non-existent path inside temp dir preserves full path", () => { + const tmp = os.tmpdir(); + const nonExistent = `${tmp}/__pi_test_deep/a/b/c`; + const result = resolveRealPath(nonExistent); + // Should contain the full path including all intermediate components + assert.ok(result.includes("__pi_test_deep/a/b/c"), "should preserve all path components"); +}); + +// ── I6: Missing test scenarios ──────────────────────────────────────── + +test("classifyBashCommand blocks package manager mutations directly", () => { + assert.equal(isBlocked("npm install lodash"), true); + assert.equal(isBlocked("pip install requests"), true); + assert.equal(isBlocked("brew install node"), true); + assert.equal(isBlocked("apt-get install ripgrep"), true); +}); + +test("applyReadonlyBashGuard fallback mirrors classifyBashCommand on unsupported platforms", () => { + const origPlatform = Object.getOwnPropertyDescriptor(process, "platform"); + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + try { + const blocked = applyReadonlyBashGuard("npm install lodash", "/workspace"); + assert.deepEqual(blocked.action, "block"); + if (blocked.action === "block") { + assert.match(blocked.reason, /npm install lodash is blocked in readonly mode/i); + } + + const wrapped = applyReadonlyBashGuard('env -S "pip install requests"', "/workspace"); + assert.deepEqual(wrapped.action, "block"); + if (wrapped.action === "block") { + assert.match(wrapped.reason, /pip install requests is blocked in readonly mode/i); + } + + assert.deepEqual(applyReadonlyBashGuard("ls -la", "/workspace"), { action: "allow" }); + } finally { + if (origPlatform) Object.defineProperty(process, "platform", origPlatform); + } +}); + +test("classifyBashCommand: deep recursion triggers depth limit", () => { + // Build a deeply nested eval chain with safe commands to exceed the depth limit. + // eval always recurses, so each level increments depth. We need 11+ levels. + let cmd = "echo safe"; + for (let i = 0; i < 12; i++) { + cmd = `eval "${cmd}"`; + } + const result = classifyBashCommand(cmd, "/workspace"); + assert.equal(result.ok, false); + assert.match((result as { ok: false; reason: string }).reason, /recursion depth/); +}); + +test("resolveRealPath follows symlinks", () => { + const dir = os.tmpdir(); + const target = path.join(dir, `pi-test-target-${Date.now()}`); + const link = path.join(dir, `pi-test-link-${Date.now()}`); + fs.mkdirSync(target); + try { + fs.symlinkSync(target, link); + const resolved = resolveRealPath(link); + // Use resolveRealPath on target too to handle macOS /var → /private/var + assert.equal(resolved, resolveRealPath(target)); + } finally { + fs.rmSync(link, { force: true }); + fs.rmSync(target, { force: true, recursive: true }); + } +}); + +test("wrapCommandWithOsSandbox returns command unchanged on unsupported platform", () => { + const origPlatform = Object.getOwnPropertyDescriptor(process, "platform"); + Object.defineProperty(process, "platform", { value: "win32", configurable: true }); + try { + const result = wrapCommandWithOsSandbox("echo hello"); + assert.equal(result, "echo hello"); + } finally { + Object.defineProperty(process, "platform", origPlatform!); + } +}); + + + +test("watchdog nudges when crossing from band 0 to band 1 (45%→55%)", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + const [handler] = pi.handlers.get("context")!; + + // First call: 45% → band 0, should inject watchdog + const first = await handler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 45 }) }, + ); + assert.notEqual(first, undefined); + assert.equal(first.messages[1].customType, "agenticoding-watchdog"); + +}); + +test("readonly nudge and watchdog nudge merge in same context turn", async () => { + const pi = new MockPi(); + registerAgenticoding(pi as any); + + // Toggle readonly ON + await pi.commands.get("readonly")!.handler("", { + hasUI: true, + ui: { + notify: () => {}, + theme: { fg: (_n: string, t: string) => t }, + setStatus: () => {}, + setWidget: () => {}, + }, + getContextUsage: () => null, + }); + + const [contextHandler] = pi.handlers.get("context")!; + const result = await contextHandler( + { messages: [{ role: "user", content: "hi", timestamp: 1 }] }, + { getContextUsage: () => ({ percent: 70 }), sessionManager: { getBranch: () => [] } }, + ); + + // Both nudges should be present in the result + assert.ok(result.messages.length >= 3, `expected >= 3 messages, got ${result.messages.length}`); + const customTypes = result.messages + .filter((m: any) => m.role === "custom") + .map((m: any) => m.customType); + assert.ok(customTypes.includes("agenticoding-readonly-nudge"), "should include readonly nudge"); + assert.ok(customTypes.includes("agenticoding-watchdog"), "should include watchdog nudge"); +}); + test("spawn docs document active registered inheritance", async () => { const readme = await readFile("README.md", "utf8"); const changelog = await readFile("CHANGELOG.md", "utf8"); diff --git a/handoff/command.ts b/handoff/command.ts index 314a389..a387649 100644 --- a/handoff/command.ts +++ b/handoff/command.ts @@ -21,6 +21,15 @@ export function registerHandoffCommand(pi: ExtensionAPI, state: AgenticodingStat if (ctx.hasUI) ctx.ui.notify("Usage: /handoff ", "error"); return; } + if (state.readonlyEnabled) { + if (ctx.hasUI) { + ctx.ui.notify( + "Readonly mode blocks /handoff. Use spawn only for same-topic delegation, or disable readonly with /readonly before a real handoff.", + "warning", + ); + } + return; + } state.pendingRequestedHandoff = { direction, diff --git a/index.ts b/index.ts index f6506f0..267dbfb 100644 --- a/index.ts +++ b/index.ts @@ -13,7 +13,7 @@ */ import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent"; -import { DynamicBorder } from "@earendil-works/pi-coding-agent"; +import { DynamicBorder, isToolCallEventType } from "@earendil-works/pi-coding-agent"; import { Container, type SelectItem, @@ -33,10 +33,12 @@ import { registerHandoffCompaction } from "./handoff/compact.js"; import { registerSpawnTool } from "./spawn/index.js"; import { STATUS_KEY_HANDOFF, + STATUS_KEY_READONLY, STATUS_KEY_TOPIC, WIDGET_KEY_WARNING, updateIndicators, } from "./tui.js"; +import { applyReadonlyBashGuard } from "./readonly-bash.js"; import { formatPagePreview } from "./notebook/store.js"; export default function (pi: ExtensionAPI): void { @@ -56,6 +58,116 @@ export default function (pi: ExtensionAPI): void { // ── Register commands ─────────────────────────────────────────── registerHandoffCommand(pi, state); + // ── Readonly mode ─────────────────────────────────────────────── + + pi.registerFlag("readonly", { + description: "Start in readonly mode", + type: "boolean", + default: false, + }); + + function toggleReadonly(ctx: ExtensionContext): void { + if (!ctx.hasUI) return; // Toggle is a UI-only command, no-op in headless. + state.readonlyEnabled = !state.readonlyEnabled; + state.readonlyNudgePending = true; + pi.appendEntry("agenticoding-readonly", { enabled: state.readonlyEnabled }); + updateIndicators(ctx, state); + ctx.ui.notify( + state.readonlyEnabled + ? "Readonly mode enabled \u2014 write/edit/handoff and non-temp bash writes blocked" + : "Readonly mode disabled \u2014 write/edit/handoff and non-temp bash writes unblocked", + "info", + ); + } + + pi.registerCommand("readonly", { + description: "Toggle readonly mode (blocks write/edit/handoff and bash writes outside the OS temp dir)", + handler: async (_args, ctx) => toggleReadonly(ctx), + }); + + pi.registerShortcut("ctrl+shift+r", { + description: "Toggle readonly mode", + handler: async (ctx) => { + if (ctx.isIdle()) toggleReadonly(ctx); + }, + }); + + function rehydrateReadonlyState(ctx: ExtensionContext): void { + const wasEnabled = state.readonlyEnabled; + const branch = ctx.sessionManager?.getBranch?.() ?? []; + state.readonlyEnabled = false; + for (let i = branch.length - 1; i >= 0; i--) { + const entry = branch[i] as unknown; + if (!entry || typeof entry !== "object") continue; + const e = entry as Record; + if (e.type !== "custom" || e.customType !== "agenticoding-readonly") continue; + const d = e.data as Record | undefined; + state.readonlyEnabled = d?.enabled === true; + break; + } + // CLI flag sets initial default, but branch state takes precedence after any toggle. + if (pi.getFlag("readonly") === true) { + const hasBranchEntry = branch.some( + (e) => { + const entry = e as unknown; + return entry !== null && typeof entry === "object" && + (entry as Record).type === "custom" && + (entry as Record).customType === "agenticoding-readonly"; + } + ); + if (!hasBranchEntry) { + state.readonlyEnabled = true; + } + } + // Nudge on any rehydrated readonly authority change. + if (state.readonlyEnabled !== wasEnabled) { + state.readonlyNudgePending = true; + } + } + + // ── Readonly: tool_call blocking ──────────────────────────────── + pi.on("tool_call", async (event, ctx) => { + // ── Readonly mode ─────────────────────────────────────────── + // Guardrail for a coding agent (not a security boundary): + // write/edit/handoff stay in the tool list but are blocked at + // call time with { block: true }. Keeping them advertised + // avoids context-cache invalidation from tools disappearing + // mid-session. Children use the opposite approach (remove + // from tool list entirely) because they start with a fresh + // context — see spawn/index.ts. + if (!state.readonlyEnabled) return; + + if (event.toolName === "write" || event.toolName === "edit" || event.toolName === "handoff") { + return { + block: true as const, + reason: + "Readonly mode: write/edit/handoff disabled. " + + "Toggle with /readonly. Use spawn for same-topic delegation.", + }; + } + + if (isToolCallEventType("bash", event)) { + const cmd = event.input.command; + // Defensive guard: malformed bash input (null/undefined/object) blocks cleanly. + // Whitespace-only strings pass through to classifyBashCommand. + if (typeof cmd !== "string") { + return { + block: true as const, + reason: `Readonly mode: invalid bash command input (expected string, got ${typeof cmd}: ${String(cmd).slice(0, 100)})`, + }; + } + const result = applyReadonlyBashGuard(cmd, ctx.cwd); + if (result.action === "block") { + return { block: true as const, reason: result.reason }; + } + if (result.action === "sandbox") { + // Mutate input.command in-place — SDK has no transform return type. + // Other tool_call hooks will see the sandbox-wrapped command. + event.input.command = result.sandboxedCommand; + } + } + }); + // ── /notebook command — interactive page selector ──────────────── pi.registerCommand("notebook", { description: "Select a notebook page to preview, or set the active notebook topic with /notebook ", @@ -65,7 +177,9 @@ export default function (pi: ExtensionAPI): void { const result = setActiveNotebookTopic(state, topicArg, "human"); if (ctx.hasUI) { const message = result.boundaryHint - ? `Active notebook topic changed: ${result.boundaryHint.from} → ${result.boundaryHint.to}. This is a likely task boundary; handoff is recommended before continuing.` + ? state.readonlyEnabled + ? `Active notebook topic changed: ${result.boundaryHint.from} → ${result.boundaryHint.to}. This is a likely task boundary; use spawn only for same-topic delegation, or disable readonly with /readonly before handoff.` + : `Active notebook topic changed: ${result.boundaryHint.from} → ${result.boundaryHint.to}. This is a likely task boundary; handoff is recommended before continuing.` : `Active notebook topic: ${result.current}`; ctx.ui.notify(message, result.boundaryHint ? "warning" : "info"); } @@ -201,22 +315,66 @@ export default function (pi: ExtensionAPI): void { return { systemPrompt: parts.join("\n\n") }; }); - // ── context: inject primacy-zone nudge before each LLM call ──── + // ── context: inject primacy-zone nudge + readonly ON/OFF nudges ────── + // ON: nudge once on toggle. OFF: checks --readonly CLI flag and prior + // branch entries to detect session-level un-toggle before nudging. pi.on("context", async (event, ctx: ExtensionContext) => { const usage = ctx.getContextUsage(); const percent = usage?.percent ?? null; if (usage && usage.percent !== null) { state.lastContextPercent = usage.percent; } + + // Build the readonly nudge message (if pending) — don't early-return so + // it can merge with the watchdog nudge when both are needed in the same turn. + let readonlyNudgeMsg: { role: string; customType: string; content: string; display: boolean; timestamp: number } | null = null; + if (state.readonlyNudgePending) { + state.readonlyNudgePending = false; + readonlyNudgeMsg = { + role: "custom" as const, + customType: "agenticoding-readonly-nudge", + content: state.readonlyEnabled + ? "Readonly mode is active. write, edit, handoff, and bash filesystem writes/deletions outside the OS temp dir are blocked. " + + "Allowed: read, notebook, env inheritance, and non-mutating bash." + : "Readonly mode has been turned off. You may now use write, edit, handoff, and bash freely." + + (percent !== null && percent >= 30 + ? " Context was at " + Math.round(percent) + "% — if the work changed topics, you can handoff now." + : ""), + display: false, + timestamp: Date.now(), + }; + } + + // Below primacy-zone threshold (~30%), skip watchdog unless a boundary + // hint is pending — context is still fresh enough that nudges add noise. if (!state.pendingTopicBoundaryHint && (percent === null || percent < 30)) { + state.lastWatchdogBand = null; + if (readonlyNudgeMsg) { + return { messages: [...event.messages, readonlyNudgeMsg] }; + } return; } + // Throttle: only nudge when crossing into a higher context-percentage band. + // Bands: null (<30), 0 (30-49), 1 (50-69), 2 (70+). This prevents nudging + // every turn once past 30%. + if (!state.pendingTopicBoundaryHint) { + const band = percent! < 50 ? 0 : percent! < 70 ? 1 : 2; + if (state.lastWatchdogBand !== null && band <= state.lastWatchdogBand) { + if (readonlyNudgeMsg) { + return { messages: [...event.messages, readonlyNudgeMsg] }; + } + return; + } + state.lastWatchdogBand = band; + } + const nudge = buildNudge(state, percent); state.pendingTopicBoundaryHint = null; return { messages: [ ...event.messages, + ...(readonlyNudgeMsg ? [readonlyNudgeMsg] : []), { role: "custom", customType: "agenticoding-watchdog", @@ -228,7 +386,7 @@ export default function (pi: ExtensionAPI): void { }; }); - // ── session_start: reset state + update indicators ───────────── + // ── session_start: reset state + readonly rehydration + indicators ── pi.on("session_start", async (event, ctx: ExtensionContext) => { if (event.reason === "new") { resetState(state); @@ -236,9 +394,17 @@ export default function (pi: ExtensionAPI): void { if (ctx.hasUI) { ctx.ui.setStatus(STATUS_KEY_HANDOFF, undefined); ctx.ui.setStatus(STATUS_KEY_TOPIC, undefined); + ctx.ui.setStatus(STATUS_KEY_READONLY, undefined); ctx.ui.setWidget(WIDGET_KEY_WARNING, undefined); } } + rehydrateReadonlyState(ctx); + updateIndicators(ctx, state); + }); + + // ── session_tree: rehydrate readonly state on tree changes ───── + pi.on("session_tree", async (_event, ctx: ExtensionContext) => { + rehydrateReadonlyState(ctx); updateIndicators(ctx, state); }); diff --git a/notebook/rehydration.ts b/notebook/rehydration.ts index 08e19e2..d20b4df 100644 --- a/notebook/rehydration.ts +++ b/notebook/rehydration.ts @@ -42,15 +42,17 @@ export function registerNotebookRehydration( for (let i = branch.length - 1; i >= 0; i--) { const entry = branch[i]; + if (!entry || typeof entry !== "object") continue; + const e = entry as unknown as Record; if ( - entry.type !== "custom" || - !ENTRY_TYPES.has((entry as Record).customType as string) + e.type !== "custom" || + !ENTRY_TYPES.has(e.customType as string) ) { continue; } - const data = (entry as Record).data as NotebookEntryData | undefined; + const data = e.data as NotebookEntryData | undefined; if (!data?.name || typeof data.content !== "string") continue; // Skip if we already have a newer version of this name diff --git a/os-sandbox.ts b/os-sandbox.ts new file mode 100644 index 0000000..709d43b --- /dev/null +++ b/os-sandbox.ts @@ -0,0 +1,272 @@ +/** + * OS-level sandboxing for readonly-mode bash commands. + * + * Wraps bash commands to run inside an OS sandbox that denies filesystem + * writes outside the OS temp dir. Uses platform-native sandbox mechanisms: + * macOS → sandbox-exec with Seatbelt profile + * Linux → bubblewrap (bwrap) if available + * Windows → not supported (returns command unchanged, classifyBashCommand applies) + * + * This replaces the best-effort command-pattern matching in classifyBashCommand + * with actual kernel-enforced file-write blocking. + */ + +import { execSync } from "node:child_process"; +import crypto from "node:crypto"; +import os from "node:os"; +import path from "node:path"; + +import { TEMP_DIR } from "./temp-dir.js"; +import { resolveRealPath } from "./resolve-path.js"; + +// ── Temp dir canonicalization ──────────────────────────────────── + +let _canonicalTempDir: string | undefined; + +/** Get the canonical (symlink-resolved) temp dir path. */ +function getCanonicalTempDir(): string { + if (_canonicalTempDir === undefined) { + _canonicalTempDir = resolveRealPath(TEMP_DIR); + } + return _canonicalTempDir; +} + +// ── Platform detection ─────────────────────────────────────────── + +/** + * Check whether we can use OS-level sandboxing on the current platform. + * Returns true when sandbox-exec is available (macOS) or bwrap is installed (Linux). + */ +export function canUseOsSandbox(): boolean { + const platform = process.platform; + if (platform === "darwin") { + return _hasSandboxExec(); + } + if (platform === "linux") { + return _hasBwrap(); + } + + return false; +} + +let _bwrapResult: boolean | undefined; +let _sandboxExecResult: boolean | undefined; + +function hasCommand(command: string): boolean { + try { + execSync(`command -v ${command}`, { stdio: "ignore" }); + return true; + } catch { + return false; + } +} + +function _hasBwrap(): boolean { + if (_bwrapResult === undefined) { + if (hasCommand("bwrap")) { + // Quick functional test: can bwrap actually create a namespace? + try { + execSync("bwrap --ro-bind / / true 2>/dev/null", { stdio: "ignore", timeout: 2000 }); + _bwrapResult = true; + } catch (e) { + _bwrapResult = false; + } + } else { + _bwrapResult = false; + } + } + return _bwrapResult; +} + +function _hasSandboxExec(): boolean { + if (_sandboxExecResult === undefined) { + if (hasCommand("sandbox-exec")) { + // Quick functional test: can sandbox-exec actually enforce a profile? + try { + execSync("echo true | sandbox-exec -p '(version 1)(allow default)' /bin/bash 2>/dev/null", + { stdio: "ignore", timeout: 2000 }); + _sandboxExecResult = true; + } catch (e) { + _sandboxExecResult = false; + } + } else { + _sandboxExecResult = false; + } + } + return _sandboxExecResult; +} + +// ── macOS: sandbox-exec ────────────────────────────────────────── + +/** + * Build a Seatbelt sandbox profile string for readonly mode. + * + * Pattern: allow everything by default, but deny all file writes except + * to the canonical temp dir and /dev/null. + * + * Using (allow default) + write denies (permissive pattern) because + * (deny default) + explicit read allows is fragile — system library + * reads, dyld, and process execution are complex to enumerate and + * vary across macOS versions. The permissive pattern keeps standard + * tooling (node, npm, git, python, etc.) working while correctly + * blocking all file writes outside the temp dir. + */ +export function buildMacProfile(tempDir: string): string { + const canon = resolveRealPath(tempDir); + const original = path.resolve(os.tmpdir()); // may have symlinks (e.g., /var -> /private/var) + + // Collect unique paths — both canonical and unresolved (symlink) forms. + // Seatbelt subpath does NOT resolve symlinks, so we must include both. + // Also include /tmp and /private/tmp because bash (on macOS) creates + // heredoc temp files in /tmp regardless of $TMPDIR. + const writePaths = new Set(); + writePaths.add(canon); + if (original !== canon) writePaths.add(original); + writePaths.add("/private/tmp"); + writePaths.add("/tmp"); + + // Two distinct injection risks in the profile string: + // - Single quotes (') break out of the outer shell wrapper: sandbox-exec -p '${profile}' + // - Double quotes (") break Seatbelt (subpath "...") literal syntax + for (const p of writePaths) { + if (p.includes("'") || p.includes('"')) { + throw new Error(`[readonly] Sandbox profile path contains quote — cannot safely escape: ${p}`); + } + } + + const parts = [ + "(version 1)", + "(allow default)", + "(deny file-write*)", + '(allow file-write* (literal "/dev/null"))', + ]; + for (const p of writePaths) { + parts.push(`(allow file-write* (subpath "${p}"))`); + } + return parts.join(""); +} + +/** + * Generate a unique heredoc delimiter for wrapping commands. + * Using a random suffix avoids accidental collision with command content. + */ +function generateDelimiter(): string { + const suffix = crypto.randomBytes(4).toString("hex"); + return `PI_SANDBOX_INNER_${suffix}`; +} + +/** + * Wrap a bash command with sandbox-exec on macOS. + * + * Uses a heredoc to pipe the original command verbatim (with all newlines + * and special characters preserved) to an inner bash running under + * sandbox-exec: + * + * sandbox-exec -p '' /bin/bash << 'DELIM' + * + * DELIM + * + * The outer bash tool calls spawn(shell, ['-c', modifiedCommand]), so: + * /bin/bash -c "sandbox-exec -p '...' /bin/bash << 'DELIM'\n\nDELIM" + * + * The heredoc preserves all original characters (multiline, quotes, pipes, + * redirects) so the inner bash receives the exact original command. + * All descendants inherit the sandbox restrictions. + */ +export function wrapWithSandboxExec(command: string): string { + const profile = buildMacProfile(getCanonicalTempDir()); + const delim = generateDelimiter(); + return `sandbox-exec -p '${profile}' /bin/bash << '${delim}' +output=\$({ +: +${command} +} 2>&1) +rc=\$? +if [ \$rc -ne 0 ]; then + case "\$output" in + *"Operation not permitted"*|*"Permission denied"*|*"denying file-write"*) + echo "" + echo "[readonly mode] The OS sandbox blocked a filesystem write outside the OS temp dir." + echo "Use /readonly to disable, or write within the OS temp dir." + echo "" + ;; + esac +fi +[ -n "\$output" ] && echo "\$output" +exit \$rc +${delim}`; +} + +// ── Linux: bubblewrap ──────────────────────────────────────────── + +/** + * Wrap a bash command with bubblewrap on Linux. + * + * Uses the same heredoc approach as sandbox-exec for consistent behavior. + * + * --ro-bind / / makes entire root read-only + * --tmpfs /tmp then mounts writable tmpfs at /tmp (overrides ro-bind) + * --bind binds the real temp dir writable into /tmp + * --proc /proc, --dev /dev for proper /proc and /dev + * --unshare-all --share-net for isolation while allowing network + * --die-with-parent --new-session for clean termination + */ +export function wrapWithBwrap(command: string): string { + const canon = getCanonicalTempDir(); + const delim = generateDelimiter(); + const flags = [ + "--ro-bind / /", + "--tmpfs /tmp", + `--bind "${canon}" "${canon}"`, + "--proc /proc", + "--dev /dev", + "--unshare-all", + "--share-net", + "--die-with-parent", + "--new-session", + ]; + return `bwrap ${flags.join(" ")} /bin/bash << '${delim}' +output=\$({ +: +${command} +} 2>&1) +rc=\$? +if [ \$rc -ne 0 ]; then + case "\$output" in + *"Operation not permitted"*|*"Permission denied"*|*"denying file-write"*) + echo "" + echo "[readonly mode] The OS sandbox blocked a filesystem write outside the OS temp dir." + echo "Use /readonly to disable, or write within the OS temp dir." + echo "" + ;; + esac +fi +[ -n "\$output" ] && echo "\$output" +exit \$rc +${delim}`; +} + +// ── Unified dispatch ───────────────────────────────────────────── + +/** + * Wrap a bash command string to run inside an OS-level filesystem sandbox. + * + * On macOS: wraps with sandbox-exec (native, no deps). + * On Linux: wraps with bubblewrap if available. + * On other platforms / when unavailable: returns command unchanged. + * + * The returned command must be passed to /bin/bash -c (or equivalent) for + * execution — the shell tool handles this automatically. + */ +export function wrapCommandWithOsSandbox(command: string): string { + const platform = process.platform; + if (platform === "darwin") { + return wrapWithSandboxExec(command); + } + if (platform === "linux" && _hasBwrap()) { + return wrapWithBwrap(command); + } + // No OS sandbox available — command unchanged, classifyBashCommand + // fallback will handle it at the call site. + return command; +} diff --git a/readonly-bash.ts b/readonly-bash.ts new file mode 100644 index 0000000..795ce87 --- /dev/null +++ b/readonly-bash.ts @@ -0,0 +1,852 @@ +import path from "node:path"; +import os from "node:os"; +import { globSync } from "node:fs"; +import { canUseOsSandbox, wrapCommandWithOsSandbox } from "./os-sandbox.js"; +import { resolveRealPath } from "./resolve-path.js"; +import { TEMP_DIR } from "./temp-dir.js"; + +/** + * Readonly bash guard. + * + * Contract: block filesystem writes/deletions outside the OS temp dir. + * Non-mutating commands, unknown commands, and environment inheritance are + * allowed. Process-level commands (kill, reboot, shutdown, systemctl, su) + * are not filesystem mutations and are intentionally allowed. + * + * Package-manager mutations (npm install, pip install, etc.) are blocked + * unconditionally regardless of target path — they write outside any single + * directory (node_modules, site-packages, etc.) making temp-dir checking + * meaningless. + * + * This is a best-effort command inspection layer, not a security sandbox. + * + * ## Known L2 limitations (no OS sandbox available) + * + * These bypasses are mitigated by L1 (OS sandbox) on macOS and Linux but + * are effective on Windows or when sandbox tools are missing: + * + * - **Interpreters with programmatic code** — `node -e`, `python3 -c`, etc. + * running code like `require('fs').writeFileSync(...)` are not checked. + * The classifier only parses shell command tokens, not JS/Python/Perl code. + * - **xargs with stdin-fed package managers** — `printf install | xargs npm` + * bypasses because `xargs npm` alone has no verb args. The pipe feeds + * `install` at runtime via stdin; only the OS sandbox blocks the writes. + * - **curl combined-flag permutations** — `-JO`, `-sJO` (where `-O` is not + * the first character after `-`) pass through undetected because the + * classifier only checks `startsWith("-O")`, not substring presence. + * The natural form `-OJ` (now detected) should be used, or separate flags. + * - **curl --remote-name-all** — implicitly applies `-O` to every URL but + * has no `-O` token for the classifier to detect. + */ + +type Verdict = + | { ok: true } + | { ok: false; reason: string }; + +// TEMP_DIR is resolved in temp-dir.ts — imported above so both +// readonly-bash and os-sandbox use the same canonical temp dir. + +const GIT_IMMUTABLE = new Set([ + "diff", "log", "show", "status", "blame", "grep", + "ls-files", "ls-tree", "merge-tree", "format-patch", + "rev-parse", "rev-list", "cat-file", "for-each-ref", + "merge-base", "fsck", "range-diff", "shortlog", "name-rev", + "describe", "var", "version", +]); + +const GIT_MUTABLE = new Set([ + "add", "am", "apply", "checkout", "cherry-pick", "clean", + "clone", "commit", "fetch", "init", "merge", "mv", "pull", "push", + "rebase", "reset", "restore", "revert", "rm", "switch", +]); + +const GIT_MIXED: Record boolean> = { + reflog: (sub) => sub === "" || sub === "show" || sub.startsWith("show "), + branch: (sub) => + sub === "" || sub === "-l" || sub === "--show-current" || + /^--?[a-zA-Z-]*list(?:[=\s]|$)/.test(sub), + tag: (sub) => sub === "" || sub === "-l" || /^--?[a-zA-Z-]*list(?:[=\s]|$)/.test(sub), + remote: (sub) => sub === "" || sub === "-v" || sub === "show" || sub === "get-url", + config: (sub) => + sub === "" || sub === "-l" || sub === "--list" || + sub === "--get" || sub.startsWith("--get ") || sub.startsWith("--get="), + notes: (sub) => sub === "list" || sub === "show", + stash: (sub) => sub === "list" || sub === "show", + bisect: (sub) => sub === "log" || sub === "view" || sub === "", + worktree: (sub) => sub === "list", + submodule: (sub) => sub === "status", +}; + +// Interpreters whose inline-execution flag is recursively classified. +// node -c = syntax check only (non-executing); node -e executes code. +const INTERPRETER_EXEC_FLAGS: Record = { + node: ["-e"], + bash: ["-c"], sh: ["-c"], zsh: ["-c"], dash: ["-c"], ksh: ["-c"], + python3: ["-c"], python: ["-c"], + perl: ["-e"], + ruby: ["-e"], +}; + +const INTERPRETERS = new Set(Object.keys(INTERPRETER_EXEC_FLAGS)); + +// Package managers are blocked unconditionally — they mutate system state +// outside any single directory (npm install writes to node_modules, pip +// installs to site-packages, etc.). Temp-dir path checking is not meaningful. +const PACKAGE_MANAGERS = new Set(["npm", "yarn", "pnpm", "pip", "apt", "apt-get", "brew", "cargo", "gem", "yum", "dnf", "pacman", "choco"]); + + +/** + * Classify a bash command string for readonly mode. + * + * Splits the command into shell-operator-separated segments (&&, ||, ;, |, &, \n), + * checks each segment for command substitutions ($(...), backticks), write redirects (>), + * and filesystem mutations. Blocks if any target path resolves outside the OS temp dir. + * + * When OS-level sandboxing (canUseOsSandbox()) is available, this serves as a fallback — + * the kernel-enforced sandbox enforces the same write-restriction policy. + * + * @param cmd - Raw bash command string (may contain multiple segments via &&, ;, |, etc.) + * @param cwd - Working directory for relative path resolution (defaults to process.cwd()) + * @returns {ok: true} if allowed, or {ok: false, reason} with explanation + */ + +export function classifyBashCommand(cmd: string, cwd: string = process.cwd(), depth: number = 0): Verdict { + if (depth > 10) return { ok: false, reason: "recursion depth exceeded in command classification" }; + for (const rawSegment of splitUnquotedShellSegments(cmd)) { + const segment = rawSegment.trim(); + if (!segment) continue; + + for (const subcommand of extractCommandSubstitutions(segment)) { + const nested = classifyBashCommand(subcommand, cwd, depth + 1); + if (!nested.ok) { + return { ok: false, reason: `command substitution blocked: ${nested.reason}` }; + } + } + + const redirectTarget = getUnsafeWriteRedirectTarget(segment, cwd); + if (redirectTarget) { + return { ok: false, reason: `write redirect blocked outside temp dir: ${redirectTarget}` }; + } + + const mutationReason = getFilesystemMutationReason(segment, cwd, depth); + if (mutationReason) return { ok: false, reason: mutationReason }; + } + + return { ok: true }; +} + +/** + * Classify a shell segment's filesystem mutation risk. + * + * Extracts the command and its targets, then blocks if any target + * resolves outside the OS temp dir. Handles git, sudo, env, interpreter -c, + * dd of=, sed -i, find -exec/-delete, perl/ruby -pi, and package managers. + * Command names are compared case-insensitively (normalized via .toLowerCase()). + * Unknown commands return null (allowed). + */ +function getFilesystemMutationReason(segment: string, cwd: string, depth: number = 0): string | null { + const tokens = getCommandTokens(segment); + const command = tokens[0]?.toLowerCase(); + if (!command) return null; + + // Strip subshell parens: (rm file) → rm file + if (command.startsWith("(") && segment.endsWith(")")) { + const inner = segment.slice(1, -1).trim(); + return inner ? getFilesystemMutationReason(inner, cwd, depth) : null; + } + + // eval/exec: recursively classify the remaining argument string + if (command === "eval" || command === "exec") { + const inner = tokens.slice(1).map(stripMatchingQuotes).join(" "); + const nested = classifyBashCommand(inner, cwd, depth + 1); + return nested.ok ? null : nested.reason; + } + + if (command === "sudo") { + const nested = classifyBashCommand(tokens.slice(findSudoCommandIndex(tokens)).join(" "), cwd, depth + 1); + return nested.ok ? null : nested.reason; + } + + if (command === "env") { + // Handle env prefix: recursively classify the inner command. + // env -S "command" is common — getCommandTokens strips env flags + // and assignments, but -S "string" and its value consume all + // remaining tokens, leaving tokens.length === 1 (just ["env"]). + // In that case, find the -S value in the raw segment and classify it. + if (tokens.length > 1) { + const nested = classifyBashCommand(tokens.slice(1).join(" "), cwd, depth + 1); + return nested.ok ? null : nested.reason; + } + // env with only flags (e.g., env -S "cmd") — extract -S value + const sMatch = segment.match(/\benv\b.*?(?:-S|--split-string)\s+/); + if (sMatch) { + const afterS = segment.slice(sMatch.index! + sMatch[0].length).trim(); + const stripped = stripMatchingQuotes(afterS); + const nested = classifyBashCommand(stripped, cwd, depth + 1); + return nested.ok ? null : nested.reason; + } + return null; + } + + if (command === "git") { + return isSafeGitCommand(tokens.slice(1).join(" ")) + ? null + : "mutable git command blocked outside temp dir"; + } + + // Interpreters with inline-execution flags — check inline code, then fall through + // so perl/ruby -pi, python3 script.py, etc. still reach getMutationTargets. + if (INTERPRETERS.has(command)) { + const args = tokens.slice(1); + const execFlags = INTERPRETER_EXEC_FLAGS[command]; + for (const flag of execFlags) { + const idx = args.indexOf(flag); + if (idx !== -1 && idx + 1 < args.length) { + const inlineScript = stripMatchingQuotes(args[idx + 1]); + const nested = classifyBashCommand(inlineScript, cwd, depth + 1); + if (!nested.ok) { + return `${command} ${flag} blocked: ${nested.reason}`; + } + } + } + } + + const ddMatch = segment.match(/\bof=([^\s]+)/); + if (ddMatch && !isTempPath(ddMatch[1], cwd)) { + return `dd output blocked outside temp dir: ${stripMatchingQuotes(ddMatch[1])}`; + } + + const packageManagerReason = getPackageManagerMutationReason(segment); + if (packageManagerReason) return packageManagerReason; + + // wget without -O/--output-document writes to disk (URL basename in cwd) — block + // Must be checked before getMutationTargets since there is no explicit target path + // to feed into the generic path check. + if (command === "wget") { + const wArgs = tokens.slice(1); + const hasOutputFlag = wArgs.some( + (a) => a === "-O" || a.startsWith("-O") || a === "--output-document" || a.startsWith("--output-document="), + ); + if (!hasOutputFlag) { + return "wget blocked outside temp dir: current directory (use -O /tmp/... to write to temp)"; + } + } + + // curl -O/--remote-name writes to disk (URL basename in cwd). Allow it only + // when cwd itself is inside temp; when -o and -O are combined, both writes + // remain cumulative and must be allowed. + if (command === "curl") { + const { hasRemoteName } = getCurlWriteTargets(tokens); + if (hasRemoteName && !isTempPath(".", cwd)) { + return "curl blocked outside temp dir: current directory (use -o /tmp/... to write to temp)"; + } + } + + // xargs: classify the command xargs would run. + // xargs feeds stdin as args, so any mutation command is blocked even + // without explicit targets — the targets come from the pipe. + if (command === "xargs") { + const xArgs = tokens.slice(1); + const XARGS_FLAGS_WITH_VALUE = new Set(["-I", "-L", "-n", "-P", "-d", "-E", "-s"]); + let cmdStart = 0; + while (cmdStart < xArgs.length) { + if (XARGS_FLAGS_WITH_VALUE.has(xArgs[cmdStart])) { cmdStart += 2; continue; } + if (xArgs[cmdStart].startsWith("-")) { cmdStart++; continue; } + break; + } + if (cmdStart < xArgs.length) { + const xTokens = xArgs.slice(cmdStart); + // L1: Full classifier check (catches git, interpreters, package managers, etc.) + const inner = xTokens.join(" "); + const nested = classifyBashCommand(inner, cwd, depth + 1); + if (!nested.ok) return nested.reason; + // L2: xargs feeds stdin as arguments, so even targetless mutation commands + // (rm, mv, rm, sed -i, etc.) are dangerous — the targets come from the pipe. + // Block if getMutationTargets recognizes the command (returns non-null). + const xCmd = xTokens[0]?.toLowerCase(); + if (xCmd && getMutationTargets(xCmd, xTokens) !== null) { + return `xargs ${xCmd} blocked: mutation command via xargs`; + } + return null; + } + return null; + } + + const paths = getMutationTargets(command, tokens); + if (!paths) return null; + for (const target of paths) { + if (!isTempPath(target, cwd)) { + return `${command} blocked outside temp dir: ${stripMatchingQuotes(target)}`; + } + } + return null; +} + +function getPackageManagerMutationReason(cmd: string): string | null { + for (const rawSegment of splitUnquotedShellSegments(cmd)) { + const segment = rawSegment.trim(); + if (!segment) continue; + const tokens = getCommandTokens(segment); + const command = tokens[0]?.toLowerCase(); + if (command && PACKAGE_MANAGERS.has(command) && isPackageMutation(tokens.slice(1))) { + const args = tokens.slice(1).join(" "); + return `${command} ${args} is blocked in readonly mode`; + } + } + return null; +} + +function skipFlagValues(args: string[], flagsWithValues: Set): string[] { + const result: string[] = []; + let i = 0; + while (i < args.length) { + if (flagsWithValues.has(args[i])) { + i += 2; // skip flag + value + } else { + result.push(args[i]); + i++; + } + } + return result; +} + +function getCurlWriteTargets(tokens: string[]): { hasRemoteName: boolean; outputs: string[] } { + const cArgs = tokens.slice(1); + const outputs: string[] = []; + let hasRemoteName = false; + for (let i = 0; i < cArgs.length; i++) { + if (cArgs[i] === "--") break; // end of options; remaining args are URLs + if ((cArgs[i] === "-o" || cArgs[i] === "--output") && cArgs[i + 1]) { + outputs.push(cArgs[i + 1]); + i++; + continue; + } + if (cArgs[i].startsWith("--output=")) { + outputs.push(cArgs[i].slice("--output=".length)); + continue; + } + if (cArgs[i].startsWith("-o") && cArgs[i].length > 2 && !cArgs[i].startsWith("--")) { + outputs.push(cArgs[i].slice(2)); + continue; + } + if (cArgs[i] === "-O" || cArgs[i] === "--remote-name") { + hasRemoteName = true; + continue; + } + if (cArgs[i].startsWith("-O") && cArgs[i].length > 2 && !cArgs[i].startsWith("--")) { + hasRemoteName = true; + continue; + } + } + return { hasRemoteName, outputs }; +} + +function getMutationTargets(command: string, tokens: string[]): string[] | null { + switch (command) { + case "rm": + case "rmdir": + case "unlink": + case "mkdir": + return nonOptionArgs(skipFlagValues(tokens.slice(1), new Set(["-s", "-o", "--io-size"]))); + case "truncate": + return nonOptionArgs(skipFlagValues(tokens.slice(1), new Set(["-s", "-r", "--reference", "-o", "--io-size"]))); + case "touch": + return nonOptionArgs(skipFlagValues(tokens.slice(1), new Set(["-t", "-d", "-r"]))); + case "chmod": + case "chown": + case "chgrp": { + const args = nonOptionArgs(tokens.slice(1)); + return args.slice(1); + } + case "cp": + case "mv": + case "install": + case "ln": { + const args = nonOptionArgs(tokens.slice(1)); + return args.length > 0 ? [args[args.length - 1]] : []; + } + case "tee": + return nonOptionArgs(tokens.slice(1)); + case "sed": + if (tokens.slice(1).some((arg) => arg === "-i" || arg.startsWith("-i"))) { + const sedTokens = tokens.slice(1); + // Strip -e/--expression flag-value pairs so their expression values + // don't appear as false non-option targets. Track whether any -e was + // used — this changes how we skip the expression slot later. + let hasExpressionFlag = false; + const filteredTokens: string[] = []; + let ti = 0; + while (ti < sedTokens.length) { + if (sedTokens[ti] === "-e" || sedTokens[ti] === "--expression") { + ti += 2; + hasExpressionFlag = true; + } else if (sedTokens[ti].startsWith("-e")) { + // -e'expr' concatenated form (GNU sed) — token IS flag + value, skip 1 + ti += 1; + hasExpressionFlag = true; + } else if (sedTokens[ti].startsWith("--expression=")) { + ti += 1; + hasExpressionFlag = true; + } else { + filteredTokens.push(sedTokens[ti]); + ti++; + } + } + const args = nonOptionArgs(filteredTokens); + // -i may have a separate backup extension value (macOS: sed -i '' 's/.../.../' file). + // When present, it becomes the first non-option arg before the sed expression. + // Skip the extension (if present), then the expression. + // When expressions came via -e flags, there's no expression in non-option args. + const extArg = args.length > 0 ? stripMatchingQuotes(args[0]) : ""; + if (args.length > 0 && (extArg === "" || /^[a-zA-Z0-9._-]{1,10}$/.test(extArg))) { + // First arg is the backup extension — skip it. + // If -e was used, expression is not in non-option args (already consumed by -e skip). + // Remaining args after the extension are targets. + return hasExpressionFlag ? args.slice(1) : args.slice(2); + } + // No backup extension. + // If -e was used, all non-option args are targets. + // Otherwise, first non-option arg is the expression, remaining are targets. + return hasExpressionFlag ? args : args.slice(1); + } + return null; + case "perl": + case "ruby": + if (tokens.slice(1).some((arg) => /^-p?i/.test(arg))) { + const args = nonOptionArgs(tokens.slice(1)); + return args; + } + return null; + case "find": + return getFindMutationTargets(tokens.slice(1)); + case "wget": { + const wArgs = tokens.slice(1); + let outputTarget: string | null = null; + for (let i = 0; i < wArgs.length; i++) { + if (wArgs[i] === "-O" && wArgs[i + 1]) { + outputTarget = wArgs[i + 1]; + i++; + continue; + } + if (wArgs[i].startsWith("-O") && wArgs[i].length > 2) { + outputTarget = wArgs[i].slice(2); + continue; + } + if (wArgs[i] === "--output-document" && wArgs[i + 1]) { + outputTarget = wArgs[i + 1]; + i++; + continue; + } + if (wArgs[i].startsWith("--output-document=")) { + outputTarget = wArgs[i].slice("--output-document=".length); + } + } + if (outputTarget !== null) { + return stripMatchingQuotes(outputTarget) === "-" ? ["/dev/null"] : [outputTarget]; + } + // wget without -O/--output-document writes to disk (URL basename in cwd) — + // this path is unreachable when called via getFilesystemMutationReason (which + // handles the no-flag case before calling getMutationTargets), but kept as a + // safety net for any other callers. + return ["."]; + } + case "curl": { + const { hasRemoteName, outputs } = getCurlWriteTargets(tokens); + // -o - and --output - write to stdout, not a file — map to /dev/null (safe) + const mapped = outputs.map((o) => stripMatchingQuotes(o) === "-" ? "/dev/null" : o); + if (mapped.length > 0) { + return hasRemoteName ? [...mapped, "."] : mapped; + } + if (hasRemoteName) return ["."]; + return null; + } + default: + return null; + } +} + +function getFindMutationTargets(args: string[]): string[] | null { + // Skip glob-pattern args (e.g., -name '*.txt') — these cannot be filesystem roots. + const roots = args.filter((arg) => arg && !arg.startsWith("-") && !/[*?{}()\[\]~]/.test(arg)); + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === "-delete") return roots.length > 0 ? roots : ["."]; + if (["-exec", "-execdir", "-ok", "-okdir"].includes(arg)) return roots.length > 0 ? roots : ["."]; + if (["-fprintf", "-fprint", "-fprint0", "-fls"].includes(arg)) { + const output = args[i + 1]; + return output ? [output] : ["."]; + } + } + return null; +} + +function isPackageMutation(args: string[]): boolean { + // Match individual tokens against known package-mutation verbs. + // Token-level matching (vs. substring-on-joined-string) avoids false + // positives when a path or argument contains a verb word (install-sh, etc.). + const VERBS = new Set(["install", "uninstall", "update", "upgrade", "ci", "link", "publish", "add", "remove", "reinstall", "tap", "untap", "download", "build-dep"]); + return args.some((a) => VERBS.has(a.toLowerCase())); +} + +function findSudoCommandIndex(tokens: string[]): number { + const FLAGS_WITH_VALUE = new Set(["-u", "-g", "-p", "-C", "-T", "-h"]); + let i = 1; + while (i < tokens.length) { + const token = tokens[i]; + if (token === "--") return i + 1; + if (!token.startsWith("-")) return i; + if (FLAGS_WITH_VALUE.has(token)) i += 2; + else i += 1; + } + return tokens.length; +} + +/** + * Extract the command tokens from a shell segment, stripping env-prefixes, + * env-var assignments, and the `command` builtin wrapper. + * + * The `env` prefix is handled specially: env flags with values (-u, --unset, + * -S, -g) consume the next token as their value, and env-var assignments + * (KEY=value) before the real command are stripped. + */ +function getCommandTokens(segment: string): string[] { + const tokens = segment.match(/"[^"]*"|'[^']*'|\S+/g) ?? []; + let i = 0; + + if (tokens[i] === "env") { + i++; + // env -u VAR and -S "string" take a value — consume as flag-value pairs + const ENV_FLAGS_WITH_VALUE = new Set(["-u", "--unset", "-S", "--split-string", "-g", "--group"]); + while (i < tokens.length && tokens[i].startsWith("-")) { + if (ENV_FLAGS_WITH_VALUE.has(tokens[i])) { + i += 2; // skip flag + its value + } else { + i++; // valueless flag + } + } + while (i < tokens.length && /^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) i++; + // Skip -- separator between env assignments and the command + if (i < tokens.length && tokens[i] === "--") i++; + if (i >= tokens.length) return ["env"]; + } + + while (i < tokens.length && /^[A-Za-z_][A-Za-z0-9_]*=/.test(tokens[i])) i++; + if (tokens[i] === "command") i++; + return tokens.slice(i); +} + +function nonOptionArgs(args: string[]): string[] { + const result: string[] = []; + let stopOptions = false; + for (const arg of args) { + if (!stopOptions && arg === "--") { + stopOptions = true; + continue; + } + if (!stopOptions && arg.startsWith("-") && arg !== "-") continue; + result.push(arg); + } + return result; +} + +function isSafeGitCommand(rest: string): boolean { + const trimmed = rest.trim(); + if (!trimmed) return false; + + const tokens = trimmed.split(/\s+/); + const FLAGS_WITH_VALUE = new Set(["-C", "-c", "--git-dir", "--work-tree", "--namespace"]); + let subcommand = ""; + + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + if (FLAGS_WITH_VALUE.has(token)) { i++; continue; } + if (token.startsWith("-")) continue; + subcommand = token; + break; + } + + if (!subcommand) return false; + if (GIT_IMMUTABLE.has(subcommand)) return true; + if (GIT_MUTABLE.has(subcommand)) return false; + const mixed = GIT_MIXED[subcommand]; + if (!mixed) return false; + const afterSub = trimmed.slice(trimmed.indexOf(subcommand) + subcommand.length).trim(); + return mixed(afterSub); +} + +function stripMatchingQuotes(token: string): string { + if ( + (token.startsWith('"') && token.endsWith('"')) || + (token.startsWith("'") && token.endsWith("'")) + ) { + return token.slice(1, -1); + } + return token; +} + +/** + * Resolve a path's real location, following symlinks. + * If the path doesn't exist, walk up to the nearest existing ancestor + * and resolve that, then append the remaining components. + * This handles the common case where a new file is created inside a + * symlinked temp dir (/tmp -> /private/tmp). + */ +function isTempPath(rawPath: string, cwd: string): boolean { + const normalized = stripMatchingQuotes(rawPath); + if (!normalized || normalized === "/dev/null" || /^&\d+$/.test(normalized)) return true; + + // Expand ~ and ~/path to the home directory (os.homedir()). + // ~user/path is not resolvable without getpwuid — block conservatively. + if (normalized.startsWith("~")) { + if (normalized === "~" || normalized.startsWith("~/")) { + const expanded = normalized.replace(/^~/, os.homedir()); + return isTempPath(expanded, cwd); + } + return false; // ~user/path cannot be resolved safely + } + + if (/[*?`{}()\[\]]/.test(normalized)) { + // Glob pattern - resolve against cwd and check each target individually. + // Empty glob (no matches) is allowed — no files to mutate. + try { + const matches = globSync(normalized, { cwd, dot: true }); + if (matches.length === 0) return true; + return matches.every((m) => isTempPath(m, cwd)); + } catch { + return false; + } + } + const absolute = path.resolve(cwd, normalized); + // Resolve symlinks so /tmp/link -> /etc/passwd is correctly classified as non-temp. + // Walking up to the nearest existing ancestor handles new files inside symlinked dirs. + const real = resolveRealPath(absolute); + const relative = path.relative(TEMP_DIR, real); + return relative === "" || (!relative.startsWith("..") && !path.isAbsolute(relative)); +} + +/** + * Read the file redirect target starting at position `start`. + * + * Handles quoted targets (single/double quotes) and backslash escapes. + * Scope: > (write), >> (append), >| (noclobber override). Heredoc redirects + * (<= cmd.length) return { target: "", end: i }; + + const first = cmd[i]; + if (first === '"' || first === "'") { + const quote = first; + let target = quote; + i++; + while (i < cmd.length) { + const ch = cmd[i]; + target += ch; + if (ch === "\\" && quote === '"' && i + 1 < cmd.length) { + i++; + target += cmd[i]; + continue; + } + if (ch === quote) { + i++; + break; + } + i++; + } + return { target, end: i }; + } + + let target = ""; + while (i < cmd.length) { + const ch = cmd[i]; + if (/\s/.test(ch) || ch === ";" || ch === "|" || ch === "\n") break; + if (ch === "&" && target !== "") break; + target += ch; + i++; + } + return { target, end: i }; +} + +/** + * Detect write redirects (>) to unsafe targets outside the temp dir. + * + * Scope: > (write), >> (append), >| (noclobber override), 2> (stderr), &> (combined). + * Heredoc redirect targets (<") continue; + + const next = cmd[i + 1]; + // >&N = fd redirect (e.g., 2>&1) — not a file write, skip + if (next === "&" && /^[\d-]$/.test(cmd[i + 2] ?? "")) continue; + // >& = combined stdout+stderr redirect to a file, treat as 2-char operator + const opLen = next === ">" || next === "|" || next === "&" ? 2 : 1; + const { target, end } = readRedirectTarget(cmd, i + opLen); + if (!isTempPath(target, cwd)) return stripMatchingQuotes(target) || "(unknown target)"; + i = Math.max(i, end - 1); + } + + return null; +} + +/** + * Split a shell command string into segments separated by shell operators. + * + * Handles quoted strings (single/double quotes) and backslash escapes. + * Shell operator handling: + * ; — sequential (segment boundary) + * | — pipe (segment boundary) + * & — background (segment boundary, but >& and <& are redirects not separators) + * && — AND (segment boundary) + * || — OR (segment boundary) + * \n — newline (segment boundary) + * The >| and >& operators are consumed as part of the preceding segment. + */ +function splitUnquotedShellSegments(cmd: string): string[] { + const segments: string[] = []; + let current = ""; + let quote: '"' | "'" | null = null; + let escaped = false; + + for (let i = 0; i < cmd.length; i++) { + const ch = cmd[i]; + const next = cmd[i + 1]; + + if (escaped) { + current += ch; + escaped = false; + continue; + } + if (ch === "\\") { + current += ch; + escaped = true; + continue; + } + if (quote) { + current += ch; + if (ch === quote) quote = null; + continue; + } + if (ch === '"' || ch === "'") { + quote = ch; + current += ch; + continue; + } + if ((ch === "&" && next === "&") || (ch === "|" && next === "|")) { + segments.push(current); + current = ""; + i++; + continue; + } + const prev = current[current.length - 1]; + if (ch === "|" && prev === ">") { + current += ch; + continue; + } + if (ch === "&" && (prev === ">" || prev === "<" || next === ">")) { + current += ch; + continue; + } + if (ch === ";" || ch === "|" || ch === "&" || ch === "\n") { + segments.push(current); + current = ""; + continue; + } + current += ch; + } + segments.push(current); + return segments; +} + +/** + * Extract command substitution targets ($(...) and backticks) from a shell line. + * + * Uses simple depth-tracked matching. This is a best-effort guard — nested + * nesting, backslash escapes, and quote-aware tracking are intentionally + * skipped for simplicity since this is not a security boundary. + */ +function extractCommandSubstitutions(line: string): string[] { + const commands: string[] = []; + + // Backtick substitutions: `` `cmd` `` + const backtickRe = /`([^`]*)`/g; + let match: RegExpExecArray | null; + while ((match = backtickRe.exec(line)) !== null) { + if (match[1].trim()) commands.push(match[1].trim()); + } + + // $() substitutions: handles arbitrary nesting via depth counter + for (let i = 0; i < line.length; i++) { + if (line[i] !== "$" || line[i + 1] !== "(") continue; + let depth = 1; + let cmd = ""; + let j = i + 2; + for (; j < line.length && depth > 0; j++) { + if (line[j] === "(" && line[j - 1] === "$") depth++; + else if (line[j] === ")") depth--; + if (depth > 0) cmd += line[j]; + } + if (cmd.trim()) commands.push(cmd.trim()); + i = j; + } + + // <() process substitutions: extract inner command for recursive classification. + // Handles one level of nesting inside <(). + const procSubRe = /<\(([^()]*(?:\([^()]*\)[^()]*)*)\)/g; + let procMatch: RegExpExecArray | null; + while ((procMatch = procSubRe.exec(line)) !== null) { + if (procMatch[1].trim()) commands.push(procMatch[1].trim()); + } + + return commands; +} + +// ── Shared readonly bash guard (consumed by parent tool_call hook and child spawnHook) ── + +export type ReadonlyBashGuardResult = + | { action: "allow" } + | { action: "block"; reason: string } + | { action: "sandbox"; sandboxedCommand: string }; + +/** + * Apply the readonly bash guard to a command. + * + * L1: OS-level sandboxing — wraps command if available (sandbox-exec / bwrap). + * L2: Command-pattern inspection — blocks if OS sandbox unavailable. + * + * @param cmd - Raw bash command string + * @param cwd - Working directory for path resolution + * @returns Structured result: allow, block (with reason), or sandbox (with wrapped command) + */ +export function applyReadonlyBashGuard(cmd: string, cwd: string): ReadonlyBashGuardResult { + // L1: OS sandbox (primary enforcement when available) + if (canUseOsSandbox()) { + const verdict = classifyBashCommand(cmd, cwd); + if (verdict.ok === false) { + return { action: "block", reason: `Readonly mode: command blocked.\nReason: ${verdict.reason}\nCommand: ${cmd}` }; + } + return { action: "sandbox", sandboxedCommand: wrapCommandWithOsSandbox(cmd) }; + } + + // L2: Pattern inspection fallback (no sandbox available) + const verdict = classifyBashCommand(cmd, cwd); + if (verdict.ok === false) { + return { action: "block", reason: `Readonly mode: command blocked.\nReason: ${verdict.reason}\nCommand: ${cmd}` }; + } + return { action: "allow" }; +} diff --git a/resolve-path.ts b/resolve-path.ts new file mode 100644 index 0000000..f1c4ced --- /dev/null +++ b/resolve-path.ts @@ -0,0 +1,24 @@ +import fs from "node:fs"; +import path from "node:path"; + +/** + * Resolve a path's real location, following symlinks. + * If the path doesn't exist, walk up to the nearest existing ancestor + * and resolve that, then append the remaining components. + * This handles the common case where a new file is created inside a + * symlinked temp dir (/tmp -> /private/tmp on macOS). + */ +export function resolveRealPath(p: string): string { + try { + return fs.realpathSync(p); + } catch { + const parent = path.dirname(p); + if (parent === p) return p; // hit root + try { + const realParent = fs.realpathSync(parent); + return path.join(realParent, path.basename(p)); + } catch { + return path.join(resolveRealPath(parent), path.basename(p)); + } + } +} diff --git a/spawn/index.ts b/spawn/index.ts index d344f8a..b566d8b 100644 --- a/spawn/index.ts +++ b/spawn/index.ts @@ -19,6 +19,7 @@ import type { import { AuthStorage, createAgentSession, + createBashToolDefinition, ModelRegistry, SessionManager, } from "@earendil-works/pi-coding-agent"; @@ -27,6 +28,7 @@ import { Type } from "typebox"; import type { AgenticodingState } from "../state.js"; import { formatPageList } from "../notebook/store.js"; import { createNotebookToolDefinitions } from "../notebook/tools.js"; +import { applyReadonlyBashGuard } from "../readonly-bash.js"; import { renderSpawnCall, renderSpawnResult, @@ -71,13 +73,24 @@ function getLastAssistantOutcome(messages: AssistantMessageLike[]): SpawnOutcome * Line-count limit is applied first, then byte limit. * May end mid-line if the byte limit is the tighter constraint. */ -function truncateText(text: string, maxLines: number, maxBytes: number): string { +export function truncateText(text: string, maxLines: number, maxBytes: number): string { const lines = text.split("\n"); let truncated = lines.slice(0, maxLines).join("\n"); - if (new TextEncoder().encode(truncated).length > maxBytes) { - truncated = new TextDecoder().decode( - new TextEncoder().encode(truncated).slice(0, maxBytes), - ); + const encoded = new TextEncoder().encode(truncated); + if (encoded.length > maxBytes) { + // Shrink byte-by-byte at the boundary until we have valid UTF-8. + // This avoids splitting a multi-byte character mid-sequence. + // An empty slice (0 bytes) is always valid and decodes to empty string. + let slice = encoded.slice(0, maxBytes); + for (;;) { + try { + truncated = new TextDecoder("utf-8", { fatal: true }).decode(slice); + break; + } catch { + if (slice.length === 0) break; + slice = slice.slice(0, slice.length - 1); + } + } } return truncated; } @@ -135,6 +148,34 @@ export function buildChildToolNames( return [...new Set([...inheritedTools, ...childTools.map((tool) => tool.name)])]; } +/** + * Create a bash tool definition for readonly-mode child sessions. + * + * Applies OS-level sandboxing (sandbox-exec on macOS, bwrap on Linux) when available. + * Falls back to classifyBashCommand command-pattern inspection when no OS sandbox + * is available (Windows). The fallback blocks filesystem writes/deletions outside + * the OS temp dir using the same logic as the parent's tool_call hook. + */ +function createReadonlyChildBashTool( + cwd: string, +): ToolDefinition { + const bashTool = createBashToolDefinition(cwd, { + spawnHook: (spawnContext) => { + const result = applyReadonlyBashGuard(spawnContext.command, cwd); + if (result.action === "block") { + throw new Error(result.reason); + } + if (result.action === "sandbox") { + spawnContext.command = result.sandboxedCommand; + } + return spawnContext; + }, + }); + return bashTool; +} + + + // ── Spawn tool metadata ── const SPAWN_DESCRIPTION = @@ -164,7 +205,6 @@ const SPAWN_PARAMETERS = Type.Object({ }); - /** * Build the custom tool set for child agent sessions. * @@ -183,7 +223,6 @@ export function createChildTools( } - // ── Shared spawn execution logic ────────────────────────────────────── /** @@ -227,15 +266,21 @@ export async function executeSpawn( const notebookListing = listing ? "Available notebook pages:\n" + listing : "No notebook pages."; + const readonlyNotice = state.readonlyEnabled + ? "\n\nReadonly restrictions apply. Do not attempt filesystem writes or deletions outside the OS temp dir. Environment inheritance is allowed." + : ""; + const authorityNote = state.readonlyEnabled + ? "You inherit readonly authority in this session." + : "You have the same authority as the parent."; const fullPrompt = `You are a focused child agent spawned by a parent agent. ` + - `You have the same authority as the parent. ` + + `${authorityNote} ` + `Children cannot spawn further children. ` + `Your result will be read by the parent, so be concise and complete.\n\n` + `${notebookListing}\n\n` + `If you write notebook pages, store only durable grounding knowledge for future contexts. ` + `Keep transient task state in your final reply to the parent.\n\n` + - `## Task\n\n${params.prompt}\n\n` + + `## Task\n\n${params.prompt}${readonlyNotice}\n\n` + `When complete, provide a concise summary of findings. ` + `Keep the result under ${CHILD_MAX_LINES} lines / ${(CHILD_MAX_BYTES / 1024).toFixed(0)}KB.`; @@ -246,14 +291,36 @@ export async function executeSpawn( const childTools = createChildTools(pi, state, { isStale }); const parentToolNames = pi.getActiveTools(); const childToolNames = buildChildToolNames(parentToolNames, childTools, pi.getAllTools()); + // Children: readonly vs non-readonly tool strategy differs from the parent. + // Parent keeps write/edit in the tool list and blocks at call time to avoid + // context-cache misses (index.ts). Children start with a fresh context — no + // cache to preserve — so we remove write/edit from the tool list entirely + // (cleaner than advertising tools that always error). The readonly bash guard + // (sandbox-exec/bwrap or classifyBashCommand fallback) still propagates to + // children via createReadonlyChildBashTool below. + // + // This is a guardrail for a coding agent, not a security boundary. + const effectiveChildTools = [ + ...childTools, + ...(state.readonlyEnabled && childToolNames.includes("bash") + ? [createReadonlyChildBashTool(ctx.cwd)] + : []), + ]; + + // Readonly: remove write/edit from child tool list entirely (fresh context, + // no cache to invalidate). The readonly bash guard overrides the built-in + // bash tool — no name exclusion needed. + const effectiveToolNames = state.readonlyEnabled + ? childToolNames.filter((name) => name !== "write" && name !== "edit") + : childToolNames; const { session } = await sessionFactory({ sessionManager: SessionManager.inMemory(), model: childModel, thinkingLevel: childThinking, cwd: ctx.cwd, - tools: childToolNames, - customTools: childTools, + tools: effectiveToolNames, + customTools: effectiveChildTools, authStorage, modelRegistry, }); @@ -262,7 +329,7 @@ export async function executeSpawn( let wasAborted = false; const abortChild = () => { wasAborted = true; - session.abort().catch(e => console.error("[spawn] abort failed:", toolCallId, e)); + session.abort().catch(() => {}); }; const clearChildSession = () => { if (state.childSessions.get(toolCallId) === session) { @@ -274,7 +341,7 @@ export async function executeSpawn( }; const abortAndInvalidate = async () => { clearChildSession(); - await session.abort().catch(e => console.error("[spawn] abort failed:", toolCallId, e)); + await session.abort().catch(() => {}); throw invalidatedError; }; @@ -358,7 +425,6 @@ export async function executeSpawn( } } catch (error: unknown) { statsUnavailable = true; - console.warn("[spawn] Failed to collect child session stats:", error, toolCallId); } if (isStale()) { @@ -421,7 +487,17 @@ export function registerSpawnTool( ctx: ExtensionContext, ) { const parentThinking: ThinkingValue = pi.getThinkingLevel(); - return executeSpawn(_toolCallId, pi, ctx, state, params, signal, onUpdate, parentThinking, sessionFactory); + return executeSpawn( + _toolCallId, + pi, + ctx, + state, + params, + signal, + onUpdate, + parentThinking, + sessionFactory, + ); }, renderCall: renderSpawnCall, diff --git a/spawn/renderer.ts b/spawn/renderer.ts index 00e92e7..d6a531b 100644 --- a/spawn/renderer.ts +++ b/spawn/renderer.ts @@ -505,7 +505,6 @@ class NestedAgentSessionComponent extends Container implements SpawnFrameTarget : undefined; } catch (error) { this.unsubscribe = undefined; - console.warn("[spawn] Failed to subscribe to child session events:", this.ownedToolCallId, error); } } @@ -575,7 +574,7 @@ class NestedAgentSessionComponent extends Container implements SpawnFrameTarget this.state = undefined; this.attachedChildSessionEpoch = undefined; if (session && ownedToolCallId && liveChildSessions?.get(ownedToolCallId) === session) { - session.abort().catch(e => console.error("[spawn] abort failed:", ownedToolCallId, e)); + session.abort().catch(() => {}); liveChildSessions.delete(ownedToolCallId); } } @@ -665,11 +664,6 @@ class NestedAgentSessionComponent extends Container implements SpawnFrameTarget if (isExpectedToolComponentFailure(error)) { return undefined; } - const failureKey = `${toolCallId}:${toolName}`; - if (!this.toolComponentFailures.has(failureKey)) { - this.toolComponentFailures.add(failureKey); - console.warn("[spawn] Failed to create tool component:", toolCallId, toolName, error); - } return undefined; } } @@ -896,7 +890,6 @@ class NestedAgentSessionComponent extends Container implements SpawnFrameTarget if (isExpectedToolComponentFailure(error)) { return; } - console.warn(`[spawn] streaming component error (${eventType}):`, this.ownedToolCallId, error); } // ── Event handlers ─────────────────────────────────────────────── @@ -1111,7 +1104,6 @@ class NestedAgentSessionComponent extends Container implements SpawnFrameTarget this.resetRenderBatching(); // Prevent a single bad event from killing the subscription. // The TUI degrades gracefully — stale content until next successful event. - console.warn("[spawn] Event handler error:", event.type, this.ownedToolCallId, error); } } } diff --git a/state.ts b/state.ts index 626c696..345d8ad 100644 --- a/state.ts +++ b/state.ts @@ -63,6 +63,20 @@ export interface AgenticodingState { * Increment on /new so stale child updates/results cannot touch fresh state. */ childSessionEpoch: number; + + /** Whether readonly mode is active — blocks write/edit/handoff and bash writes outside temp. */ + readonlyEnabled: boolean; + + /** One-shot flag: deliver a readonly ON or OFF nudge via context hook, then clear. */ + readonlyNudgePending: boolean; + + /** + * Last context-percentage band at which the watchdog nudge was delivered. + * null = never delivered. Bands: null (<30), 0 (30-49), 1 (50-69), 2 (70+). + * Used to throttle nudges — only nudge when crossing into a higher band. + */ + lastWatchdogBand: number | null; + } /** Create a fresh state instance. Call reset() on /new. */ @@ -81,6 +95,9 @@ export function createState(): AgenticodingState { childSessions, liveChildSessions, childSessionEpoch: 0, + readonlyEnabled: false, + readonlyNudgePending: false, + lastWatchdogBand: null, }; // Prevent replacement — spawn lifecycle code and renderer ownership checks // depend on stable map identity. Only .clear() and .delete() are valid — @@ -111,6 +128,9 @@ export function resetState(state: AgenticodingState): void { state.lastContextPercent = null; state.pendingHandoff = null; state.pendingRequestedHandoff = null; + state.readonlyEnabled = false; + state.readonlyNudgePending = false; + state.lastWatchdogBand = null; abortAndClearChildSessions(state); } @@ -123,6 +143,6 @@ export function abortAndClearChildSessions(state: AgenticodingState): void { state.childSessions.clear(); state.liveChildSessions.clear(); for (const [session, id] of seen) { - session.abort().catch((e: unknown) => console.warn("[spawn] abort failed:", id, e)); + session.abort().catch(() => {}); } } diff --git a/system-prompt.ts b/system-prompt.ts index ae7b809..5d9c726 100644 --- a/system-prompt.ts +++ b/system-prompt.ts @@ -11,18 +11,20 @@ export const CONTEXT_PRIMER = ` One context, one job. Research is one job. Planning is one job. Execution is one job. When the job changes, call the handoff tool. -### The primacy-zone heuristic +### Plan then execute +Before acting, deliberate internally. Does the work still fit the +current topic? If yes, break it into phases, size each sub-task, +and delegate >10k-token sub-tasks via spawn. If no, prefer handoff. +Consider spawn for verification. End by presenting the concise plan. + +### The primacy-zone You use long context unevenly. Performance can degrade as context grows — -even far from the window limit. Treat the first ~30% as a practical heuristic -for keeping the current job near the front of attention. The system tells you -exact context usage after each turn, and watchdog reminders may be injected -before LLM calls when context is past the heuristic. Watchdog reminders are -advisory only. +even far from the window limit. Treat the first ~30% as the optimal working zone. ### Spawn — isolate noise Delegate isolated work to child agents. They are trusted extensions of you, with their own context and the same authority. You receive only condensed -results. Parent context stays at orchestration level. Siblings run in parallel. +results. Your context stays at orchestration level. Siblings run in parallel. ### Notebook — durable cross-context grounding Treat the notebook as durable grounding for future contexts. Each page covers diff --git a/temp-dir.ts b/temp-dir.ts new file mode 100644 index 0000000..b8ae0ad --- /dev/null +++ b/temp-dir.ts @@ -0,0 +1,17 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +/** + * Canonical (symlink-resolved) OS temp dir path. + * + * Resolved at module import time. Shared by readonly-bash.ts and os-sandbox.ts + * so both modules agree on the same temp directory. + * + * This lives in its own module to avoid a cyclic dependency between + * readonly-bash.ts (imports from os-sandbox.ts) and os-sandbox.ts. + */ +export const TEMP_DIR = (() => { + const resolved = path.resolve(os.tmpdir()); + try { return fs.realpathSync(resolved); } catch { return resolved; } +})(); diff --git a/tui.ts b/tui.ts index 9205b2c..22b2a58 100644 --- a/tui.ts +++ b/tui.ts @@ -25,6 +25,9 @@ export const STATUS_KEY_NOTEBOOK = "agenticoding-notebook"; /** Status bar key for the active notebook topic. */ export const STATUS_KEY_TOPIC = "agenticoding-topic"; +/** Status bar key for the readonly mode indicator. */ +export const STATUS_KEY_READONLY = "agenticoding-readonly"; + /** Update TUI indicators: context usage, notebook count, topic, warning widget. */ export function updateIndicators(ctx: ExtensionContext, state: AgenticodingState): void { if (!ctx.hasUI) return; @@ -48,6 +51,12 @@ export function updateIndicators(ctx: ExtensionContext, state: AgenticodingState : theme.fg("dim", "\u{1F4D2} 0"), ); + // Readonly mode indicator + ctx.ui.setStatus( + STATUS_KEY_READONLY, + state.readonlyEnabled ? theme.fg("warning", "\u{1F512} readonly") : undefined, + ); + // Active notebook topic — show a dim placeholder when unset so the frame is discoverable ctx.ui.setStatus( STATUS_KEY_TOPIC, @@ -58,9 +67,12 @@ export function updateIndicators(ctx: ExtensionContext, state: AgenticodingState // High-context warning widget (above editor) if (usage && usage.percent !== null && usage.percent >= 70) { - const warning = state.activeNotebookTopic - ? `Context at ${Math.round(usage.percent)}% — use topic fit: same topic → spawn, different topic → handoff` - : `Context at ${Math.round(usage.percent)}% — no active topic; handoff soon unless you can assign one cleanly`; + const pct = Math.round(usage.percent); + const warning = state.readonlyEnabled + ? `Context at ${pct}% — readonly: same topic → spawn; different topic → disable readonly, then handoff` + : state.activeNotebookTopic + ? `Context at ${pct}% — use topic fit: same topic → spawn, different topic → handoff` + : `Context at ${pct}% — no active topic; handoff soon unless you can assign one cleanly`; ctx.ui.setWidget(WIDGET_KEY_WARNING, [ theme.fg("error", "\u26A0 ") + theme.fg("warning", warning), ]); diff --git a/watchdog.ts b/watchdog.ts index 2800817..0e832cc 100644 --- a/watchdog.ts +++ b/watchdog.ts @@ -12,12 +12,20 @@ import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-a import type { AgenticodingState } from "./state.js"; import { STATUS_KEY_HANDOFF } from "./tui.js"; -export function buildNudge(state: Pick, percent: number | null): string { +export function buildNudge( + state: Pick, + percent: number | null, +): string { const pct = percent === null ? null : Math.round(percent); const topic = state.activeNotebookTopic; const boundary = state.pendingTopicBoundaryHint; + const readonly = state.readonlyEnabled; if (boundary) { + if (readonly) { + return `Notebook topic changed from ${boundary.from ?? "(unset)"} to ${boundary.to}. +Readonly blocks handoff. Use spawn only for subtasks that still fit the current topic. Disable readonly with /readonly before a real handoff.`; + } return `Notebook topic changed from ${boundary.from ?? "(unset)"} to ${boundary.to}. Treat this as a strong task-boundary signal. Prefer a deliberate handoff before continuing under the new topic: save durable findings to the notebook, draft a @@ -25,6 +33,24 @@ concise situational brief, and call handoff. Only continue inline if this was merely a rename rather than a real pivot.`; } + if (readonly) { + const contextLead = pct === null + ? "Readonly mode is active." + : `Context at ${pct}% — readonly mode is active.`; + + const readonlyAdvice = "Use spawn only for same-topic delegation. Disable readonly with /readonly before a real handoff."; + if (topic) { + return `${contextLead} +Active notebook topic: ${topic}. +${readonlyAdvice} +Save durable findings to the notebook before moving on.`; + } + return `${contextLead} +${readonlyAdvice} +Assign a short stable topic with notebook_topic_set to track the current frame.`; + } + + // ── Not readonly — existing logic unchanged ────────────────────── const contextLead = pct === null ? "Topic-aware context reminder." : pct >= 70