diff --git a/.gitignore b/.gitignore index 83631f8..5aec6b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ dist/ node_modules/ *.tsbuildinfo +.pnpm-store/ diff --git a/README.md b/README.md index 93d098b..949b619 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ The local embedding model downloads automatically on install. For API-based embe - **CLI Tools** - Export, import, list, stats, cleanup, upgrade, status, and cancel commands via `ocm-mem` binary - **Dimension Mismatch Detection** - Detects embedding model changes and guides recovery via reindex - **Iterative Development Loops** - Autonomous coding/auditing loop with worktree isolation, session rotation, stall detection, and review finding persistence +- **Docker Sandbox** - Run loops inside isolated Docker containers with bind-mounted project directory, automatic container lifecycle, and selective tool routing (bash, glob, grep) ## Agents @@ -269,6 +270,10 @@ You can edit this file to customize settings. The file is created only if it doe "minAudits": 1, "stallTimeoutMs": 60000 }, + "sandbox": { + "mode": "off", + "image": "ocm-sandbox:latest" + }, "tui": { "sidebar": true, "showLoops": true, @@ -344,6 +349,10 @@ When enabled, logs are written to the specified file with timestamps. The log fi - `loop.stallTimeoutMs` - Watchdog stall detection timeout in milliseconds (default: `60000`) - `loop.minAudits` - Minimum audit iterations required before completion (default: `1`) +#### Sandbox +- `sandbox.mode` - Sandbox mode: `"off"` or `"docker"` (default: `"off"`) +- `sandbox.image` - Docker image for sandbox containers (default: `"ocm-sandbox:latest"`) + #### Top-level - `defaultKvTtlMs` - Default TTL for KV store entries in milliseconds (default: `604800000` / 7 days) @@ -397,8 +406,8 @@ After the architect presents a plan, the user approves via one of four execution - **New session** — Creates a new Code session via `memory-plan-execute` - **Execute here** — Executes the plan in the current session (code agent takes over immediately) -- **Loop (worktree)** — Runs the plan in an isolated git worktree with iterative coding/auditing via `memory-loop` -- **Loop** — Same as loop (worktree) but runs in the current directory (no worktree isolation) +- **Loop (worktree)** — Runs the plan in an isolated git worktree with iterative coding/auditing via `memory-loop`. When `config.sandbox.mode` is `"docker"`, the loop automatically uses Docker sandbox. +- **Loop** — Same as loop (worktree) but runs in the current directory (no worktree isolation, no sandbox) Set `executionModel` in your config to a fast model (e.g., Haiku) and use a smart model (e.g., Opus) for the architect session. @@ -460,6 +469,84 @@ By default, loops run in an isolated git worktree. Set `inPlace: true` to run in See the [full documentation](https://chriswritescode-dev.github.io/opencode-memory/features/memory/#loop) for details on worktree management, model configuration, and termination conditions. +## Docker Sandbox + +Run loop iterations inside an isolated Docker container. Three tools (`bash`, `glob`, `grep`) execute inside the container via `docker exec`, while `read`/`write`/`edit` operate on the host filesystem. Your project directory is bind-mounted at `/workspace` for instant file sharing. + +### Prerequisites + +- Docker running on your machine + +### Setup + +**1. Build the sandbox image:** + +```bash +docker build -t ocm-sandbox:latest container/ +``` + +The image includes Node.js 24, pnpm, Bun, Python 3 + uv, ripgrep, git, and jq. + +**2. Enable sandbox mode in your config** (`~/.config/opencode/memory-config.jsonc`): + +```jsonc +{ + "sandbox": { + "mode": "docker", + "image": "ocm-sandbox:latest" + } +} +``` + +**3. Restart OpenCode.** + +### Usage + +Start a sandbox loop via the architect plan approval flow (select "Loop (worktree)") or directly with the `memory-loop` tool: + +``` +memory-loop with worktree: true +``` + +Sandbox is automatically enabled when `config.sandbox.mode` is set to `"docker"` and the loop uses `worktree: true`. The loop: +1. Creates a git worktree (if `worktree: true`) +2. Starts a Docker container with the worktree directory bind-mounted at `/workspace` +3. Redirects `bash`, `glob`, and `grep` tool calls into the container +4. Cleans up the container on loop completion or cancellation + +### How It Works + +- **Bind mount** -- the project directory is mounted directly into the container at `/workspace`. No sync daemon, no file copying. Changes are visible instantly on both sides. +- **Tool redirection** -- `bash`, `glob`, and `grep` route through `docker exec` when a session belongs to a sandbox loop. The `read`/`write`/`edit` tools operate on the host filesystem directly (compatible with host LSP). +- **Git blocking** -- git commands are explicitly blocked inside the container. All git operations (commit, push, branch management) are handled by the loop system on the host. +- **Host LSP** -- since files are shared via the bind mount, OpenCode's LSP servers on the host read the same files and provide diagnostics after writes and edits. +- **Container lifecycle** -- one container per loop, automatically started and stopped. Container name format: `ocm-sandbox-`. + +### Configuration + +| Option | Default | Description | +|--------|---------|-------------| +| `sandbox.mode` | `"off"` | Set to `"docker"` to enable sandbox support | +| `sandbox.image` | `"ocm-sandbox:latest"` | Docker image to use for sandbox containers | + +### Customizing the Image + +The `container/Dockerfile` is included in the project. To add project-specific tools (e.g., Go, Rust, additional language servers), edit the Dockerfile and rebuild: + +```bash +docker build -t ocm-sandbox:latest container/ +``` + +### Caveats + +- **Worktree required** -- sandbox only works with `worktree: true`. In-place loops (`worktree: false`) never use sandbox. +- **Git blocked** -- git commands are explicitly blocked inside the container. All git operations are handled by the loop system on the host. +- **No `tsc` global** -- TypeScript compiler is not globally available in the container. Use `pnpm tsc` or add it to your project dependencies. +- **pnpm install caution** -- running `pnpm install` in the container writes `node_modules` to the host via the bind mount, potentially bloating worktree diffs. +- **No network isolation** -- the container has full network access (no `--network=none` flag). +- **No resource limits** -- no `--memory`, `--cpus`, or `--pids-limit` flags are applied. +- **Orphan cleanup** -- orphaned containers from previous runs are automatically cleaned up on plugin startup. + ## Documentation Full documentation available at [chriswritescode-dev.github.io/opencode-memory/features/memory](https://chriswritescode-dev.github.io/opencode-memory/features/memory/) diff --git a/config.jsonc b/config.jsonc index a00a7d3..cfc8746 100644 --- a/config.jsonc +++ b/config.jsonc @@ -44,6 +44,10 @@ "minAudits": 1, "stallTimeoutMs": 60000 }, + "sandbox": { + "mode": "off", + "image": "ocm-sandbox:latest" + }, "tui": { "sidebar": true, "showLoops": true, diff --git a/container/.dockerignore b/container/.dockerignore new file mode 100644 index 0000000..8bb7566 --- /dev/null +++ b/container/.dockerignore @@ -0,0 +1,4 @@ +node_modules +.git +dist +*.log diff --git a/container/Dockerfile b/container/Dockerfile new file mode 100644 index 0000000..b27f8c8 --- /dev/null +++ b/container/Dockerfile @@ -0,0 +1,35 @@ +FROM node:24-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + curl \ + jq \ + python3 \ + python3-venv \ + sudo \ + ca-certificates \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +RUN npm install -g pnpm + +ENV PNPM_HOME="/home/devuser/.local/share/pnpm" +ENV npm_config_store_dir="/home/devuser/.local/share/pnpm/store" + +RUN ARCH="$(uname -m)" && \ + curl -LsSf "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-${ARCH}-unknown-linux-gnu.tar.gz" | tar xz && \ + mv "ripgrep-14.1.1-${ARCH}-unknown-linux-gnu/rg" /usr/local/bin/ && \ + rm -rf "ripgrep-14.1.1-${ARCH}-unknown-linux-gnu" + +RUN useradd -m -s /bin/bash -u 1001 devuser && \ + echo "devuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers + +USER devuser + +RUN curl -LsSf https://astral.sh/uv/install.sh | sh +RUN curl -fsSL https://bun.sh/install | bash + +ENV PATH="/home/devuser/.local/bin:/home/devuser/.bun/bin:/home/devuser/.cargo/bin:${PATH}" + +WORKDIR /workspace +CMD ["sleep", "infinity"] diff --git a/package.json b/package.json index a36af2a..17db975 100644 --- a/package.json +++ b/package.json @@ -51,7 +51,6 @@ "@huggingface/transformers": "^3.8.1", "@opencode-ai/plugin": "^1.3.5", "@opencode-ai/sdk": "^1.2.26", - "jsonc-parser": "^3.3.1", "sqlite-vec": "0.1.7-alpha.2" }, "peerDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index facd6b9..8bcca35 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -17,9 +17,6 @@ importers: '@opencode-ai/sdk': specifier: ^1.2.26 version: 1.3.7 - jsonc-parser: - specifier: ^3.3.1 - version: 3.3.1 sqlite-vec: specifier: 0.1.7-alpha.2 version: 0.1.7-alpha.2 @@ -238,105 +235,89 @@ packages: resolution: {integrity: sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==} cpu: [arm64] os: [linux] - libc: [glibc] '@img/sharp-libvips-linux-arm@1.2.4': resolution: {integrity: sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==} cpu: [arm] os: [linux] - libc: [glibc] '@img/sharp-libvips-linux-ppc64@1.2.4': resolution: {integrity: sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==} cpu: [ppc64] os: [linux] - libc: [glibc] '@img/sharp-libvips-linux-riscv64@1.2.4': resolution: {integrity: sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==} cpu: [riscv64] os: [linux] - libc: [glibc] '@img/sharp-libvips-linux-s390x@1.2.4': resolution: {integrity: sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==} cpu: [s390x] os: [linux] - libc: [glibc] '@img/sharp-libvips-linux-x64@1.2.4': resolution: {integrity: sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==} cpu: [x64] os: [linux] - libc: [glibc] '@img/sharp-libvips-linuxmusl-arm64@1.2.4': resolution: {integrity: sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==} cpu: [arm64] os: [linux] - libc: [musl] '@img/sharp-libvips-linuxmusl-x64@1.2.4': resolution: {integrity: sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==} cpu: [x64] os: [linux] - libc: [musl] '@img/sharp-linux-arm64@0.34.5': resolution: {integrity: sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] - libc: [glibc] '@img/sharp-linux-arm@0.34.5': resolution: {integrity: sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm] os: [linux] - libc: [glibc] '@img/sharp-linux-ppc64@0.34.5': resolution: {integrity: sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [ppc64] os: [linux] - libc: [glibc] '@img/sharp-linux-riscv64@0.34.5': resolution: {integrity: sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [riscv64] os: [linux] - libc: [glibc] '@img/sharp-linux-s390x@0.34.5': resolution: {integrity: sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [s390x] os: [linux] - libc: [glibc] '@img/sharp-linux-x64@0.34.5': resolution: {integrity: sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] - libc: [glibc] '@img/sharp-linuxmusl-arm64@0.34.5': resolution: {integrity: sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [arm64] os: [linux] - libc: [musl] '@img/sharp-linuxmusl-x64@0.34.5': resolution: {integrity: sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} cpu: [x64] os: [linux] - libc: [musl] '@img/sharp-wasm32@0.34.5': resolution: {integrity: sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==} @@ -846,9 +827,6 @@ packages: engines: {node: '>=6'} hasBin: true - jsonc-parser@3.3.1: - resolution: {integrity: sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==} - locate-path@3.0.0: resolution: {integrity: sha512-7AO748wWnIhNqAuaty2ZWHkQHRSNfPVIsPIfwEOWO22AmaoVrWavlOcMR5nzTLNYvp36X220/maaRsrec1G65A==} engines: {node: '>=6'} @@ -2053,8 +2031,6 @@ snapshots: json5@2.2.3: {} - jsonc-parser@3.3.1: {} - locate-path@3.0.0: dependencies: p-locate: 3.0.0 diff --git a/src/agents/architect.ts b/src/agents/architect.ts index 24aec02..3030531 100644 --- a/src/agents/architect.ts +++ b/src/agents/architect.ts @@ -99,11 +99,50 @@ KV entries are scoped to the current project and expire after 7 days. Use this f Present plans with: - **Objective**: What we're building and why - **Phases**: Ordered implementation steps, each with specific files to create/modify, what changes to make, and acceptance criteria -- **Verification**: Concrete, runnable commands that prove the plan is complete. Every plan MUST include at least one verification step. Examples: - - Test commands: \`pnpm test\`, \`vitest run src/path/to/test.ts\` - - Type checking: \`pnpm tsc --noEmit\`, \`pnpm lint\` - - Runtime checks: curl commands, specific assertions about output - Plans without verification steps are incomplete. If no existing tests cover the changes, the plan MUST include a phase to write tests. +- **Verification**: Concrete criteria the code agent can validate automatically inside the loop. Every plan MUST include verification. Plans without verification are incomplete. + + **Verification tiers (prefer higher tiers):** + + | Tier | Type | Example | Why | + |---|---|---|---| + | 1 | Targeted tests | \`vitest run src/services/loop.test.ts\` | Directly exercises the new code paths | + | 2 | Type/lint checks | \`pnpm tsc --noEmit\`, \`pnpm lint\` | Catches structural and convention errors | + | 3 | File assertions | "src/services/auth.ts exports \`validateToken(token: string): boolean\`" | Auditor can verify by reading code | + | 4 | Behavioral assertions | "Calling \`parseConfig({})\` returns default config, not throws" | Should be captured in a test | + + **Do NOT use these as verification — they cannot be validated in an automated loop:** + - \`pnpm build\` — tests bundling, not correctness; slow and opaque + - \`curl\` / HTTP requests — requires a running server + - \`pnpm test\` (full suite without path) — too broad, may fail for unrelated reasons + - Manual checks ("verify the UI", "check the output looks right") + - External service dependencies (APIs, databases that may not be running) + + **Test requirements for new code:** + When a plan adds new functions, modules, or significant logic, verification MUST include either: + - Existing tests that already cover the new code paths (cite the specific test file) + - A dedicated phase to write targeted tests, specifying: what function/behavior to test, happy path, error cases, and edge cases + + When tests are required, they must actually exercise the code — not just exist. The auditor will verify test quality. + + **Per-phase acceptance criteria:** + Each phase MUST have its own acceptance criteria, not just a global verification section. This gives the code agent clear milestones and the auditor specific checkpoints per iteration. + + **Good verification example:** + \`\`\` + ## Verification + 1. \`vitest run test/loop.test.ts\` — all tests pass + 2. \`pnpm tsc --noEmit\` — no type errors + 3. \`src/services/loop.ts\` exports \`buildAuditPrompt\` accepting \`LoopState\`, returning \`string\` + 4. New tests in \`test/loop.test.ts\` cover: empty state, state with findings, long prompt truncation + \`\`\` + + **Bad verification example:** + \`\`\` + ## Verification + 1. Run \`pnpm build\` — builds successfully + 2. Start the server and test manually + 3. Everything should work + \`\`\` - **Decisions**: Architectural choices made during planning with rationale - **Conventions**: Existing project conventions that must be followed - **Key Context**: Relevant code patterns, file locations, integration points, and dependencies discovered during research diff --git a/src/agents/auditor.ts b/src/agents/auditor.ts index 4fe91e6..54351dc 100644 --- a/src/agents/auditor.ts +++ b/src/agents/auditor.ts @@ -57,9 +57,12 @@ Diffs alone are not enough. After getting the diff: **Behavior Changes** — If a behavioral change is introduced, raise it (especially if possibly unintentional). -**Plan Compliance** — When reviewing loop iterations, check whether the implementation satisfies the plan's stated acceptance criteria and verification steps. -- If the task context includes verification commands (test, lint, type check), check whether they were run and passed -- If acceptance criteria from the plan are not met, report as a **warning** with the specific unmet criterion +**Plan Compliance** — When reviewing loop iterations, rigorously verify the implementation against the plan's stated acceptance criteria and verification steps. +- Check **per-phase acceptance criteria**: each plan phase should have its own criteria. Verify every phase that has been implemented so far. +- If verification commands are listed (targeted tests, type check, lint), confirm they were run AND passed. If you can't confirm, run them yourself. +- If the plan required tests to be written, verify the tests actually exercise the stated scenarios — not just that they exist. Tests that pass trivially (empty assertions, mocked everything) do not satisfy the requirement. +- If file-level assertions are listed (e.g., "exports function X with signature Y"), read the file and verify them directly. +- Report **unmet acceptance criteria as bug severity** — they block loop completion. Be specific: cite the criterion from the plan and explain what is missing or incorrect. ## Before You Flag Something diff --git a/src/hooks/loop.ts b/src/hooks/loop.ts index d06217c..359e069 100644 --- a/src/hooks/loop.ts +++ b/src/hooks/loop.ts @@ -5,7 +5,8 @@ import { MAX_RETRIES, MAX_CONSECUTIVE_STALLS } from '../services/loop' import type { Logger, PluginConfig, LoopConfig } from '../types' import { parseModelString, retryWithModelFallback } from '../utils/model-fallback' import { execSync, spawnSync } from 'child_process' -import { resolve } from 'path' +import { resolve, join } from 'path' +import type { createSandboxManager } from '../sandbox/manager' export interface LoopEventHandler { onEvent(input: { event: { type: string; properties?: Record } }): Promise @@ -22,12 +23,14 @@ export function createLoopEventHandler( v2Client: OpencodeClient, logger: Logger, getConfig: () => PluginConfig, + sandboxManager?: ReturnType, ): LoopEventHandler { const minAudits = loopService.getMinAudits() const retryTimeouts = new Map() const lastActivityTime = new Map() const stallWatchdogs = new Map() const consecutiveStalls = new Map() + const watchdogRunning = new Map() const stateLocks = new Map>() function withStateLock(worktreeName: string, fn: () => Promise): Promise { @@ -51,6 +54,15 @@ export function createLoopEventHandler( let cleaned = false try { + // Remove the opencode.jsonc file we wrote for permissions - don't commit it + try { + const { unlinkSync } = await import('fs') + unlinkSync(join(state.worktreeDir, 'opencode.jsonc')) + logger.log(`Loop: removed opencode.jsonc before commit`) + } catch { + // File may not exist, ignore + } + const addResult = spawnSync('git', ['add', '-A'], { cwd: state.worktreeDir, encoding: 'utf-8' }) if (addResult.status !== 0) { throw new Error(addResult.stderr || 'git add failed') @@ -103,6 +115,7 @@ export function createLoopEventHandler( } lastActivityTime.delete(worktreeName) consecutiveStalls.delete(worktreeName) + watchdogRunning.delete(worktreeName) } function startWatchdog(worktreeName: string): void { @@ -113,56 +126,68 @@ export function createLoopEventHandler( const stallTimeout = loopService.getStallTimeoutMs() const interval = setInterval(async () => { - const lastActivity = lastActivityTime.get(worktreeName) - if (!lastActivity) return - - const elapsed = Date.now() - lastActivity - if (elapsed < stallTimeout) return - - const state = loopService.getActiveState(worktreeName) - if (!state?.active) { - stopWatchdog(worktreeName) - return - } - - const sessionId = state.sessionId + if (watchdogRunning.get(worktreeName)) return + watchdogRunning.set(worktreeName, true) try { - const statusResult = await v2Client.session.status({ directory: state.worktreeDir }) - const statuses = (statusResult.data ?? {}) as Record + const lastActivity = lastActivityTime.get(worktreeName) + if (!lastActivity) return - const status = statuses[sessionId]?.type - const hasActiveWork = status === 'busy' || status === 'retry' || status === 'compact' + const elapsed = Date.now() - lastActivity + if (elapsed < stallTimeout) return - if (hasActiveWork) { - lastActivityTime.set(worktreeName, Date.now()) - logger.log(`Loop watchdog: worktree ${worktreeName} has active work, resetting timer`) + const state = loopService.getActiveState(worktreeName) + if (!state?.active) { + stopWatchdog(worktreeName) return } - } catch (err) { - logger.error(`Loop watchdog: failed to check session status`, err) - return - } - const stallCount = (consecutiveStalls.get(worktreeName) ?? 0) + 1 - consecutiveStalls.set(worktreeName, stallCount) - lastActivityTime.set(worktreeName, Date.now()) + const sessionId = state.sessionId + let statusCheckFailed = false + try { + const statusResult = await v2Client.session.status({ directory: state.worktreeDir }) + const statuses = (statusResult.data ?? {}) as Record - if (stallCount >= MAX_CONSECUTIVE_STALLS) { - logger.error(`Loop watchdog: worktree ${worktreeName} exceeded max consecutive stalls (${MAX_CONSECUTIVE_STALLS}), terminating`) - await terminateLoop(worktreeName, state, 'stall_timeout') - return - } + const status = statuses[sessionId]?.type + const hasActiveWork = status === 'busy' || status === 'retry' + + if (hasActiveWork) { + lastActivityTime.set(worktreeName, Date.now()) + logger.log(`Loop watchdog: worktree ${worktreeName} has active work (${status}), resetting timer`) + return + } + } catch (err) { + logger.error(`Loop watchdog: failed to check session status, treating as stall`, err) + statusCheckFailed = true + } - logger.log(`Loop watchdog: stall detected for worktree ${worktreeName} (${stallCount}/${MAX_CONSECUTIVE_STALLS}), re-triggering ${state.phase} phase`) + const stallCount = (consecutiveStalls.get(worktreeName) ?? 0) + 1 + consecutiveStalls.set(worktreeName, stallCount) + lastActivityTime.set(worktreeName, Date.now()) - try { - if (state.phase === 'auditing') { - await handleAuditingPhase(worktreeName, state) - } else { - await handleCodingPhase(worktreeName, state) + if (stallCount >= MAX_CONSECUTIVE_STALLS) { + logger.error(`Loop watchdog: worktree ${worktreeName} exceeded max consecutive stalls (${MAX_CONSECUTIVE_STALLS}), terminating`) + await terminateLoop(worktreeName, state, 'stall_timeout') + return } - } catch (err) { - await handlePromptError(worktreeName, state, `watchdog recovery in ${state.phase} phase`, err) + + logger.log(`Loop watchdog: stall #${stallCount}/${MAX_CONSECUTIVE_STALLS} for ${worktreeName} (phase=${state.phase}, elapsed=${elapsed}ms, statusCheckFailed=${statusCheckFailed}), re-triggering`) + + await withStateLock(worktreeName, async () => { + const freshState = loopService.getActiveState(worktreeName) + if (!freshState?.active) return + + try { + if (freshState.phase === 'auditing') { + await handleAuditingPhase(worktreeName, freshState) + } else { + await handleCodingPhase(worktreeName, freshState) + } + } catch (err) { + await handlePromptError(worktreeName, freshState, `watchdog recovery in ${freshState.phase} phase`, err) + } + }) + } finally { + watchdogRunning.set(worktreeName, false) } }, stallTimeout) @@ -239,6 +264,15 @@ export function createLoopEventHandler( if (reason === 'completed' || reason === 'cancelled') { commitResult = await commitAndCleanupWorktree(state) } + + if (state.sandbox && state.sandboxContainerName && sandboxManager) { + try { + await sandboxManager.stop(state.worktreeName) + logger.log(`Loop: stopped sandbox container for ${state.worktreeName}`) + } catch (err) { + logger.error(`Loop: failed to stop sandbox container`, err) + } + } } async function handlePromptError(worktreeName: string, state: LoopState, context: string, err: unknown, retryFn?: () => Promise): Promise { @@ -388,7 +422,7 @@ export function createLoopEventHandler( logger.log(`Loop: completion promise detected but outstanding review findings remain, continuing`) } else { await terminateLoop(worktreeName, currentState, 'completed') - logger.log(`Loop completed: detected ${currentState.completionPromise} at iteration ${currentState.iteration} (${currentAuditCount}/${minAudits} audits)`) + logger.log(`Loop completed: detected ${currentState.completionPromise} at iteration ${currentState.iteration} (${currentAuditCount}/${minAudits} audits)`) return } } else { @@ -581,7 +615,7 @@ export function createLoopEventHandler( logger.log(`Loop: completion promise detected but outstanding review findings remain, continuing`) } else { await terminateLoop(worktreeName, currentState, 'completed') - logger.log(`Loop completed: detected ${currentState.completionPromise} in audit at iteration ${currentState.iteration} (${newAuditCount}/${minAudits} audits)`) + logger.log(`Loop completed: detected ${currentState.completionPromise} in audit at iteration ${currentState.iteration} (${newAuditCount}/${minAudits} audits)`) return } } else { @@ -752,20 +786,18 @@ export function createLoopEventHandler( const state = loopService.getActiveState(worktreeName) if (!state || !state.active) return + if (state.sessionId !== sessionId) { + logger.log(`Loop: ignoring stale idle event for session ${sessionId} (current: ${state.sessionId})`) + return + } + try { - // Re-check state right before calling phase handler as extra safety - const freshState = loopService.getActiveState(worktreeName) - if (!freshState?.active) { - logger.log(`Loop: loop ${worktreeName} was terminated, skipping phase handler`) - return - } - startWatchdog(worktreeName) - if (freshState.phase === 'auditing') { - await handleAuditingPhase(worktreeName, freshState) + if (state.phase === 'auditing') { + await handleAuditingPhase(worktreeName, state) } else { - await handleCodingPhase(worktreeName, freshState) + await handleCodingPhase(worktreeName, state) } } catch (err) { const freshState = loopService.getActiveState(worktreeName) @@ -789,6 +821,7 @@ export function createLoopEventHandler( } lastActivityTime.clear() consecutiveStalls.clear() + watchdogRunning.clear() stateLocks.clear() logger.log('Loop: cleared all retry timeouts') } diff --git a/src/hooks/sandbox-tools.ts b/src/hooks/sandbox-tools.ts new file mode 100644 index 0000000..436444b --- /dev/null +++ b/src/hooks/sandbox-tools.ts @@ -0,0 +1,98 @@ +import type { Hooks } from '@opencode-ai/plugin' +import type { Logger } from '../types' +import type { createLoopService } from '../services/loop' +import type { createSandboxManager } from '../sandbox/manager' +import { toContainerPath, rewriteOutput } from '../sandbox/path' + +interface SandboxToolHookDeps { + loopService: ReturnType + sandboxManager: ReturnType | null + logger: Logger +} + +const pendingResults = new Map() + +function getSandboxContext(deps: SandboxToolHookDeps, sessionId: string) { + if (!deps.sandboxManager) return null + + const worktreeName = deps.loopService.resolveWorktreeName(sessionId) + if (!worktreeName) return null + + const state = deps.loopService.getActiveState(worktreeName) + if (!state?.active || !state.sandbox) return null + + const active = deps.sandboxManager.getActive(worktreeName) + if (!active) return null + + return { + docker: deps.sandboxManager.docker, + containerName: active.containerName, + hostDir: active.projectDir, + } +} + +export function createSandboxToolBeforeHook(deps: SandboxToolHookDeps): Hooks['tool.execute.before'] { + return async ( + input: { tool: string; sessionID: string; callID: string }, + output: { args: any }, + ) => { + if (input.tool !== 'bash') return + + const sandbox = getSandboxContext(deps, input.sessionID) + if (!sandbox) return + + const { docker, containerName, hostDir } = sandbox + const args = output.args + const cwd = args.workdir ? toContainerPath(args.workdir, hostDir) : undefined + + const cmd = (args.command ?? '').trimStart() + if (cmd === 'git' || cmd.startsWith('git ')) { + pendingResults.set(input.callID, 'Git is not available in sandbox mode. The worktree is managed by the loop system on the host.') + output.args = { ...args, command: 'true' } + return + } + + deps.logger.log(`[sandbox-hook] intercepting bash: ${args.command?.slice(0, 100)}`) + + try { + const result = await docker.exec(containerName, args.command, { + timeout: args.timeout, + cwd, + }) + + let dockerOutput = rewriteOutput(result.stdout, hostDir) + if (result.stderr && result.exitCode !== 0) { + dockerOutput += rewriteOutput(result.stderr, hostDir) + } + if (result.exitCode === 124) { + const timeoutMs = args.timeout ?? 120000 + dockerOutput += `\n\n\nbash tool terminated command after exceeding timeout ${timeoutMs} ms\n` + } else if (result.exitCode !== 0) { + dockerOutput += `\n\n[Exit code: ${result.exitCode}]` + } + + pendingResults.set(input.callID, dockerOutput.trim()) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + pendingResults.set(input.callID, `Command failed: ${message}`) + } + + output.args = { ...args, command: 'true' } + } +} + +export function createSandboxToolAfterHook(deps: SandboxToolHookDeps): Hooks['tool.execute.after'] { + return async ( + input: { tool: string; sessionID: string; callID: string; args: any }, + output: { title: string; output: string; metadata: any }, + ) => { + if (input.tool !== 'bash') return + + const dockerResult = pendingResults.get(input.callID) + if (dockerResult === undefined) return + + pendingResults.delete(input.callID) + deps.logger.log(`[sandbox-hook] replacing bash output for callID ${input.callID}`) + output.output = dockerResult + } +} diff --git a/src/index.ts b/src/index.ts index 9944e15..978ba8b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,8 +14,12 @@ import { createLoopService, migrateRalphKeys } from './services/loop' import { loadPluginConfig } from './setup' import { resolveLogPath } from './storage' import { createLogger } from './utils/logger' +import { createDockerService } from './sandbox/docker' +import { createSandboxManager } from './sandbox/manager' +import { join } from 'path' import type { PluginConfig, CompactionConfig } from './types' import { createTools, createToolExecuteBeforeHook, createToolExecuteAfterHook, autoValidateOnLoad, scopeEnum } from './tools' +import { createSandboxToolBeforeHook, createSandboxToolAfterHook } from './hooks/sandbox-tools' import type { DimensionMismatchState, InitState, ToolContext } from './tools' import type { VecService } from './storage/vec-types' @@ -78,7 +82,27 @@ export function createMemoryPlugin(config: PluginConfig): Plugin { if (reconciledCount > 0) { logger.log(`Reconciled ${reconciledCount} stale loop(s) from previous session`) } - const loopHandler = createLoopEventHandler(loopService, client, v2, logger, () => config) + + let sandboxManager: ReturnType | null = null + if (config.sandbox?.mode === 'docker') { + const dockerService = createDockerService(logger) + try { + sandboxManager = createSandboxManager(dockerService, { + image: config.sandbox.image || 'ocm-sandbox:latest', + }, logger) + logger.log('Docker sandbox manager initialized') + } catch (err) { + logger.error('Failed to initialize Docker sandbox manager', err) + } + } + + if (sandboxManager) { + sandboxManager.cleanupOrphans().then((count) => { + if (count > 0) logger.log(`Cleaned up ${count} orphaned sandbox container(s)`) + }).catch((err) => logger.error('Failed to cleanup orphaned containers', err)) + } + + const loopHandler = createLoopEventHandler(loopService, client, v2, logger, () => config, sandboxManager || undefined) const mismatchState: DimensionMismatchState = { detected: false, @@ -152,6 +176,29 @@ export function createMemoryPlugin(config: PluginConfig): Plugin { cleaned = true logger.log('Cleaning up plugin resources...') + if (sandboxManager) { + const activeLoops = loopService.listActive() + for (const state of activeLoops) { + if (state.sandbox && sandboxManager) { + try { + await sandboxManager.stop(state.worktreeName) + logger.log(`Cleanup: stopped sandbox for ${state.worktreeName}`) + } catch (err) { + logger.error(`Cleanup: failed to stop sandbox for ${state.worktreeName}`, err) + } + } + } + const sandboxAny = sandboxManager as any + if (sandboxAny.isGlobalActive?.()) { + try { + await sandboxAny.stopGlobal?.() + logger.log('Cleanup: stopped global sandbox container') + } catch (err) { + logger.error('Cleanup: failed to stop global sandbox container', err) + } + } + } + loopHandler.terminateAll() logger.log('Memory loop: all active loops terminated') @@ -188,11 +235,22 @@ export function createMemoryPlugin(config: PluginConfig): Plugin { getCurrentVec: () => currentVec, cleanup, input, + sandboxManager, } const tools = createTools(ctx) const toolExecuteBeforeHook = createToolExecuteBeforeHook(ctx) const toolExecuteAfterHook = createToolExecuteAfterHook(ctx) + const sandboxBeforeHook = createSandboxToolBeforeHook({ + loopService, + sandboxManager, + logger, + }) + const sandboxAfterHook = createSandboxToolAfterHook({ + loopService, + sandboxManager, + logger, + }) return { getCleanup, @@ -215,10 +273,24 @@ export function createMemoryPlugin(config: PluginConfig): Plugin { await loopHandler.onEvent(eventInput) await sessionHooks.onEvent(eventInput) }, - 'tool.execute.before': toolExecuteBeforeHook, - 'tool.execute.after': toolExecuteAfterHook, + 'tool.execute.before': async (input, output) => { + const worktree = loopService.resolveWorktreeName(input.sessionID) + if (worktree) { + logger.log(`[tool-before] ${input.tool} callID=${input.callID} session=${input.sessionID} worktree=${worktree}`) + } + await toolExecuteBeforeHook!(input, output) + await sandboxBeforeHook!(input, output) + }, + 'tool.execute.after': async (input, output) => { + const worktree = loopService.resolveWorktreeName(input.sessionID) + if (worktree) { + logger.log(`[tool-after] ${input.tool} callID=${input.callID} output=${output.output?.slice(0, 200)}`) + } + await sandboxAfterHook!(input, output) + await toolExecuteAfterHook!(input, output) + }, 'permission.ask': async (input, output) => { - const req = input as unknown as { sessionID: string; patterns: string[] } + const req = input as unknown as { sessionID: string; permission: string; patterns: string[] } const worktreeName = loopService.resolveWorktreeName(req.sessionID) const state = worktreeName ? loopService.getActiveState(worktreeName) : null if (!state?.active) return @@ -228,6 +300,9 @@ export function createMemoryPlugin(config: PluginConfig): Plugin { output.status = 'deny' return } + + logger.log(`Loop: auto-allowing ${req.permission} [${req.patterns.join(', ')}] for session ${req.sessionID}`) + output.status = 'allow' }, 'experimental.session.compacting': async (input, output) => { logger.log(`Compacting triggered`) diff --git a/src/sandbox/docker.ts b/src/sandbox/docker.ts new file mode 100644 index 0000000..f6ccc78 --- /dev/null +++ b/src/sandbox/docker.ts @@ -0,0 +1,298 @@ +import { spawn, spawnSync } from 'child_process' +import type { Logger } from '../types' + +export interface DockerExecOpts { + timeout?: number + cwd?: string + abort?: AbortSignal +} + +export interface DockerExecResult { + stdout: string + stderr: string + exitCode: number +} + +export interface DockerService { + checkDocker(): Promise + imageExists(image: string): Promise + buildImage(dockerfilePath: string, tag: string): Promise + createContainer(name: string, projectDir: string, image: string): Promise + removeContainer(name: string): Promise + exec(name: string, command: string, opts?: DockerExecOpts): Promise + execPipe(name: string, command: string, stdin: string, opts?: { timeout?: number; abort?: AbortSignal }): Promise + isRunning(name: string): Promise + containerName(worktreeName: string): string + listContainersByPrefix(prefix: string): Promise +} + +export function createDockerService(logger: Logger): DockerService { + const DEFAULT_TIMEOUT = 120000 + + function containerName(worktreeName: string): string { + return `ocm-sandbox-${worktreeName}` + } + + async function checkDocker(): Promise { + try { + const result = await execPromise('docker', ['info'], { timeout: 5000 }) + return result.exitCode === 0 + } catch { + return false + } + } + + async function imageExists(image: string): Promise { + try { + const result = await execPromise('docker', ['image', 'inspect', image], { timeout: 5000 }) + return result.exitCode === 0 + } catch { + return false + } + } + + async function buildImage(dockerfilePath: string, tag: string): Promise { + return new Promise((resolve, reject) => { + const child = spawn('docker', ['build', '-t', tag, dockerfilePath], { + stdio: ['ignore', 'pipe', 'pipe'], + }) + + const stderr: string[] = [] + child.stderr.on('data', (data) => { + stderr.push(data.toString()) + }) + + child.on('close', (code) => { + if (code === 0) { + resolve() + } else { + reject(new Error(`Docker build failed: ${stderr.join('')}`)) + } + }) + + child.on('error', reject) + }) + } + + async function createContainer(name: string, projectDir: string, image: string): Promise { + const args = [ + 'run', + '-d', + '--name', + name, + '-v', + `${projectDir}:/workspace`, + '-w', + '/workspace', + image, + 'sleep', + 'infinity', + ] + + const result = await execPromise('docker', args, { timeout: 30000 }) + if (result.exitCode !== 0) { + throw new Error(`Failed to create container: ${result.stderr}`) + } + } + + async function removeContainer(name: string): Promise { + const result = await execPromise('docker', ['rm', '-f', name], { timeout: 30000 }) + if (result.exitCode !== 0 && !result.stderr.includes('No such container')) { + throw new Error(`Failed to remove container: ${result.stderr}`) + } + } + + async function exec( + name: string, + command: string, + opts?: DockerExecOpts, + ): Promise { + const timeout = opts?.timeout ?? DEFAULT_TIMEOUT + const cwd = opts?.cwd + + let fullCommand: string + if (cwd) { + const safeCwd = cwd.replace(/'/g, "'\\''") + fullCommand = `cd '${safeCwd}' && ${command}` + } else { + fullCommand = command + } + + const args = ['exec', name, 'sh', '-c', fullCommand] + + return execPromise('docker', args, { timeout, streaming: true, abort: opts?.abort }) + } + + async function execPipe( + name: string, + command: string, + stdin: string, + opts?: { timeout?: number; abort?: AbortSignal }, + ): Promise { + return new Promise((resolve, reject) => { + const timeout = opts?.timeout ?? DEFAULT_TIMEOUT + const child = spawn('docker', ['exec', '-i', name, 'sh', '-c', command], { + stdio: ['pipe', 'pipe', 'pipe'], + }) + + let stdout = '' + let stderr = '' + let timedOut = false + + const timeoutId = setTimeout(() => { + timedOut = true + child.kill('SIGTERM') + setTimeout(() => { + if (child.exitCode === null) { + child.kill('SIGKILL') + } + }, 5000) + }, timeout) + + if (opts?.abort) { + opts.abort.addEventListener('abort', () => { + clearTimeout(timeoutId) + child.kill('SIGTERM') + setTimeout(() => { + if (child.exitCode === null) { + child.kill('SIGKILL') + } + }, 5000) + }) + } + + child.stdout.on('data', (data) => { + stdout += data.toString() + }) + + child.stderr.on('data', (data) => { + stderr += data.toString() + }) + + child.stdin.write(stdin) + child.stdin.end() + + child.on('close', (code) => { + clearTimeout(timeoutId) + resolve({ + stdout, + stderr, + exitCode: timedOut ? 124 : (code ?? 1), + }) + }) + + child.on('error', (err) => { + clearTimeout(timeoutId) + reject(err) + }) + }) + } + + async function isRunning(name: string): Promise { + try { + const result = await execPromise('docker', ['inspect', '--format={{.State.Running}}', name], { + timeout: 5000, + }) + return result.stdout.trim() === 'true' + } catch { + return false + } + } + + async function listContainersByPrefix(prefix: string): Promise { + try { + const result = await execPromise('docker', ['ps', '-a', '--filter', `name=${prefix}`, '--format', '{{.Names}}'], { timeout: 5000 }) + if (result.exitCode !== 0) return [] + return result.stdout.trim().split('\n').filter(Boolean) + } catch { + return [] + } + } + + function execPromise( + command: string, + args: string[], + options?: { timeout?: number; streaming?: boolean; abort?: AbortSignal }, + ): Promise { + return new Promise((resolve, reject) => { + const timeout = options?.timeout ?? DEFAULT_TIMEOUT + const child = spawn(command, args, { + stdio: ['ignore', 'pipe', 'pipe'], + }) + + let stdout = '' + let stderr = '' + let timedOut = false + const cmdPreview = args.slice(-1)[0]?.slice(0, 80) ?? '' + + const timeoutId = setTimeout(() => { + timedOut = true + logger.log(`[docker] timeout (${timeout}ms) for: ${cmdPreview}`) + child.kill('SIGTERM') + setTimeout(() => { + if (child.exitCode === null) { + logger.log(`[docker] SIGKILL after SIGTERM for: ${cmdPreview}`) + child.kill('SIGKILL') + } + }, 5000) + }, timeout) + + if (options?.abort) { + const onAbort = () => { + clearTimeout(timeoutId) + logger.log(`[docker] abort signal for: ${cmdPreview}`) + child.kill('SIGTERM') + setTimeout(() => { + if (child.exitCode === null) { + child.kill('SIGKILL') + } + }, 3000) + } + if (options.abort.aborted) { + onAbort() + } else { + options.abort.addEventListener('abort', onAbort, { once: true }) + } + } + + child.stdout.on('data', (data) => { + stdout += data.toString() + }) + + child.stderr.on('data', (data) => { + stderr += data.toString() + }) + + child.on('close', (code) => { + clearTimeout(timeoutId) + if (timedOut) { + logger.log(`[docker] close after timeout, code=${code} for: ${cmdPreview}`) + } + resolve({ + stdout, + stderr, + exitCode: timedOut ? 124 : (code ?? 1), + }) + }) + + child.on('error', (err) => { + clearTimeout(timeoutId) + logger.log(`[docker] spawn error: ${err.message} for: ${cmdPreview}`) + reject(err) + }) + }) + } + + return { + checkDocker, + imageExists, + buildImage, + createContainer, + removeContainer, + exec, + execPipe, + isRunning, + containerName, + listContainersByPrefix, + } +} diff --git a/src/sandbox/manager.ts b/src/sandbox/manager.ts new file mode 100644 index 0000000..39efdec --- /dev/null +++ b/src/sandbox/manager.ts @@ -0,0 +1,113 @@ +import type { DockerService } from './docker' +import type { Logger } from '../types' +import { resolve } from 'path' + +export interface SandboxManagerConfig { + image: string +} + +export interface ActiveSandbox { + containerName: string + projectDir: string + startedAt: string +} + +interface SandboxManager { + docker: DockerService + start(worktreeName: string, projectDir: string): Promise<{ containerName: string }> + stop(worktreeName: string): Promise + getActive(worktreeName: string): ActiveSandbox | null + isActive(worktreeName: string): boolean + cleanupOrphans(): Promise +} + +const activeSandboxes = new Map() + +export function createSandboxManager( + docker: DockerService, + config: SandboxManagerConfig, + logger: Logger, +): SandboxManager { + async function start(worktreeName: string, projectDir: string): Promise<{ containerName: string }> { + const dockerAvailable = await docker.checkDocker() + if (!dockerAvailable) { + throw new Error('Docker is not available. Please ensure Docker is running.') + } + + const imageExists = await docker.imageExists(config.image) + if (!imageExists) { + throw new Error( + `Docker image "${config.image}" not found. Build it first:\n` + + ` docker build -t ${config.image} container/` + ) + } + + const containerName = docker.containerName(worktreeName) + + const running = await docker.isRunning(containerName) + if (running) { + logger.log(`Sandbox container ${containerName} already running`) + return { containerName } + } + + const absoluteProjectDir = resolve(projectDir) + logger.log(`Creating sandbox container ${containerName} for ${absoluteProjectDir}`) + await docker.createContainer(containerName, absoluteProjectDir, config.image) + + const active: ActiveSandbox = { + containerName, + projectDir: absoluteProjectDir, + startedAt: new Date().toISOString(), + } + + activeSandboxes.set(worktreeName, active) + logger.log(`Sandbox container ${containerName} started`) + + return { containerName } + } + + async function stop(worktreeName: string): Promise { + const active = activeSandboxes.get(worktreeName) + const containerName = active?.containerName || docker.containerName(worktreeName) + + try { + await docker.removeContainer(containerName) + logger.log(`Sandbox container ${containerName} removed`) + } catch (err) { + const errMsg = err instanceof Error ? err.message : String(err) + logger.log(`Sandbox container ${containerName} removal: ${errMsg}`) + } finally { + activeSandboxes.delete(worktreeName) + } + } + + function getActive(worktreeName: string): ActiveSandbox | null { + return activeSandboxes.get(worktreeName) || null + } + + function isActive(worktreeName: string): boolean { + return activeSandboxes.has(worktreeName) + } + + async function cleanupOrphans(): Promise { + const containers = await docker.listContainersByPrefix('ocm-sandbox-') + let removed = 0 + for (const name of containers) { + try { + await docker.removeContainer(name) + removed++ + } catch {} + } + activeSandboxes.clear() + return removed + } + + return { + docker, + start, + stop, + getActive, + isActive, + cleanupOrphans, + } +} diff --git a/src/sandbox/path.ts b/src/sandbox/path.ts new file mode 100644 index 0000000..bff9673 --- /dev/null +++ b/src/sandbox/path.ts @@ -0,0 +1,29 @@ +const CONTAINER_WORKSPACE = '/workspace' + +export function toContainerPath(hostPath: string, hostDir: string): string { + if (hostPath.startsWith(hostDir)) { + return hostPath.replace(hostDir, CONTAINER_WORKSPACE) + } + if (hostPath.startsWith(CONTAINER_WORKSPACE)) { + return hostPath + } + return hostPath +} + +export function toHostPath(containerPath: string, hostDir: string): string { + if (containerPath.startsWith(CONTAINER_WORKSPACE)) { + return containerPath.replace(CONTAINER_WORKSPACE, hostDir) + } + if (containerPath.startsWith('/')) { + return containerPath + } + const absolutePath = `${CONTAINER_WORKSPACE}/${containerPath}` + return absolutePath.replace(CONTAINER_WORKSPACE, hostDir) +} + +export function rewriteOutput(output: string, hostDir: string): string { + let result = output + result = result.replace(new RegExp(`${CONTAINER_WORKSPACE}/`, 'g'), `${hostDir}/`) + result = result.replace(new RegExp(`${CONTAINER_WORKSPACE}$`, 'gm'), hostDir) + return result +} diff --git a/src/services/loop.ts b/src/services/loop.ts index 62be83f..dae3089 100644 --- a/src/services/loop.ts +++ b/src/services/loop.ts @@ -57,6 +57,8 @@ export interface LoopState { completedAt?: string worktree?: boolean modelFailed?: boolean + sandbox?: boolean + sandboxContainerName?: string } export interface LoopService { @@ -123,19 +125,24 @@ export function createLoopService( } function checkCompletionPromise(text: string, promise: string): boolean { - const match = text.match(/([\s\S]*?)<\/promise>/) - if (!match) { - return false + return text.includes(promise) + } + + function redactCompletionSignal(text: string, promise: string): string { + let result = text + const inner = promise.replace(/<\/?promise>/g, '').trim() + if (inner) { + result = result.replaceAll(inner, '[SIGNAL_REDACTED]') } - const extracted = match[1].trim().replace(/\s+/g, ' ') - return extracted === promise + result = result.replaceAll(promise, '[SIGNAL_REDACTED]') + return result } function buildContinuationPrompt(state: LoopState, auditFindings?: string): string { let systemLine = `Loop iteration ${state.iteration ?? 0}` if (state.completionPromise) { - systemLine += ` | To stop: output ${state.completionPromise} (ONLY after all verification steps pass)` + systemLine += ` | To stop: output ${state.completionPromise} (ONLY after all verification commands pass AND all phase acceptance criteria are met)` } else if ((state.maxIterations ?? 0) > 0) { systemLine += ` / ${state.maxIterations}` } else { @@ -145,10 +152,13 @@ export function createLoopService( let prompt = `[${systemLine}]\n\n${state.prompt ?? ''}` if (auditFindings) { + const cleanedFindings = state.completionPromise + ? redactCompletionSignal(auditFindings, state.completionPromise) + : auditFindings const completionInstruction = state.completionPromise ? '\n\nAfter fixing all issues, output the completion signal.' : '' - prompt += `\n\n---\nThe code auditor reviewed your changes. You MUST address all bugs and convention violations below — do not dismiss findings as unrelated to the task. Fix them directly without creating a plan or asking for approval.\n\n${auditFindings}${completionInstruction}` + prompt += `\n\n---\nThe code auditor reviewed your changes. You MUST address all bugs and convention violations below — do not dismiss findings as unrelated to the task. Fix them directly without creating a plan or asking for approval.\n\n${cleanedFindings}${completionInstruction}` } const outstandingFindings = getOutstandingFindings(state.worktreeBranch) diff --git a/src/setup.ts b/src/setup.ts index e3066a5..f67de67 100644 --- a/src/setup.ts +++ b/src/setup.ts @@ -4,7 +4,6 @@ import { fileURLToPath } from 'url' import { homedir, platform } from 'os' import { resolveDataDir, resolveLogPath } from './storage' import type { PluginConfig, EmbeddingConfig } from './types' -import { parse as parseJsoncLib, type ParseError } from 'jsonc-parser' function resolveBundledConfigPath(): string { const pluginDir = dirname(fileURLToPath(import.meta.url)) @@ -88,16 +87,24 @@ function isValidPluginConfig(config: unknown): config is PluginConfig { return true } +function stripComments(content: string): string { + let result = content + result = result.replace(/\/\*[\s\S]*?\*\//g, '') + result = result.replace(/(^|[^:])(\/\/.*$)/gm, '$1') + return result +} + +function stripTrailingCommas(content: string): string { + let result = content + result = result.replace(/,(\s*}[ \t\n\r]*)/g, '$1') + result = result.replace(/,(\s*][ \t\n\r]*)/g, '$1') + return result +} + function parseJsonc(content: string): T { - const errors: ParseError[] = [] - const result = parseJsoncLib(content, errors, { - allowTrailingComma: true, - disallowComments: false, - }) - if (errors.length > 0) { - throw new SyntaxError(`Invalid JSONC at offset ${errors[0]!.offset}`) - } - return result as T + const cleaned = stripComments(content) + const normalized = stripTrailingCommas(cleaned) + return JSON.parse(normalized) as T } export function loadPluginConfig(): PluginConfig { @@ -141,12 +148,17 @@ function normalizeConfig(config: PluginConfig): PluginConfig { loop: config.loop ?? config.ralph, tui: config.tui, agents: config.agents, + sandbox: config.sandbox, } if (config.ralph && !config.loop) { console.warn('[memory] Config key "ralph" is deprecated, use "loop" instead') } + if (normalized.sandbox) { + normalized.sandbox.mode = normalized.sandbox.mode || 'off' + } + if (normalized.embedding) { const embedding = { ...normalized.embedding } diff --git a/src/tools/index.ts b/src/tools/index.ts index 07a92ea..1ec1e13 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -4,6 +4,7 @@ import { createKvTools } from './kv' import { createHealthTools } from './health' import { createPlanExecuteTools } from './plan-execute' import { createLoopTools } from './loop' +import { createSandboxFsTools } from './sandbox-fs' import type { ToolContext } from './types' export { autoValidateOnLoad } from './health' @@ -12,11 +13,13 @@ export { scopeEnum } from './types' export type { ToolContext, DimensionMismatchState, InitState } from './types' export function createTools(ctx: ToolContext): Record> { + const sandboxEnabled = ctx.config.sandbox?.mode === 'docker' && !!ctx.sandboxManager return { ...createMemoryTools(ctx), ...createKvTools(ctx), ...createHealthTools(ctx), ...createPlanExecuteTools(ctx), ...createLoopTools(ctx), + ...(sandboxEnabled ? createSandboxFsTools(ctx) : {}), } } diff --git a/src/tools/loop.ts b/src/tools/loop.ts index 587a140..6d9b408 100644 --- a/src/tools/loop.ts +++ b/src/tools/loop.ts @@ -1,6 +1,7 @@ import { tool } from '@opencode-ai/plugin' import { execSync, spawnSync } from 'child_process' -import { existsSync } from 'fs' +import { existsSync, writeFileSync } from 'fs' +import { join } from 'path' import { resolve } from 'path' import type { ToolContext } from './types' import { withDimensionWarning } from './types' @@ -11,7 +12,7 @@ import { formatSessionOutput, formatAuditResult } from '../utils/loop-format' import { fetchSessionOutput, MAX_RETRIES, type LoopState, type LoopSessionOutput } from '../services/loop' const z = tool.schema -const DEFAULT_PLAN_COMPLETION_PROMISE = 'ALL_PHASES_COMPLETE' +const DEFAULT_PLAN_COMPLETION_PROMISE = 'ALL_PHASES_COMPLETE' interface LoopSetupOptions { prompt: string @@ -30,7 +31,7 @@ async function setupLoop( ctx: ToolContext, options: LoopSetupOptions, ): Promise { - const { v2, directory, config, loopService, loopHandler, logger } = ctx + const { v2, directory, config, loopService, loopHandler, logger, sandboxManager } = ctx const autoWorktreeName = options.worktreeName ?? `loop-${slugify(options.sessionTitle.replace(/^Loop:\s*/i, ''))}` const projectDir = directory const maxIter = options.maxIterations ?? config.loop?.defaultMaxIterations ?? 0 @@ -104,6 +105,36 @@ async function setupLoop( } } + if (loopContext.worktree) { + try { + const loopConfig = JSON.stringify({ + permission: { + bash: { '*': 'allow', 'git push *': 'deny' }, + external_directory: { '*': 'deny' }, + }, + }, null, 2) + writeFileSync(join(loopContext.directory, 'opencode.jsonc'), loopConfig) + logger.log(`loop: wrote loop opencode.jsonc to ${loopContext.directory}`) + } catch (err) { + logger.error(`loop: failed to write opencode.jsonc`, err) + } + } + + let sandboxContainerName: string | undefined + const sandboxEnabled = config.sandbox?.mode === 'docker' && !!sandboxManager && !!options.worktree + + if (sandboxEnabled) { + try { + const result = await sandboxManager!.start(autoWorktreeName, loopContext.directory) + sandboxContainerName = result.containerName + logger.log(`Sandbox container ${sandboxContainerName} started for loop ${autoWorktreeName}`) + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + logger.error(`loop: failed to start sandbox container`, err) + return `Failed to start sandbox container: ${message}` + } + } + const state: LoopState = { active: true, sessionId: loopContext.sessionId, @@ -120,6 +151,8 @@ async function setupLoop( errorCount: 0, auditCount: 0, worktree: options.worktree, + sandbox: sandboxEnabled, + sandboxContainerName, } loopService.setState(autoWorktreeName, state) @@ -128,7 +161,7 @@ async function setupLoop( let promptText = options.prompt if (options.completionPromise) { - promptText += `\n\n---\n\n**IMPORTANT - Completion Signal:** When you have completed ALL phases of this plan successfully, you MUST output the following tag exactly: ${options.completionPromise}\n\nDo NOT output this tag until every phase is truly complete. The loop will continue until this signal is detected.` + promptText += `\n\n---\n\n**IMPORTANT - Completion Signal:** When you have completed ALL phases of this plan successfully, you MUST output the following phrase exactly: ${options.completionPromise}\n\nBefore outputting the completion signal, you MUST:\n1. Verify each phase's acceptance criteria are met\n2. Run all verification commands listed in the plan and confirm they pass\n3. If tests were required, confirm they exist AND pass\n\nDo NOT output this phrase until every phase is truly complete and all verification steps pass. The loop will continue until this signal is detected.` } const { result: promptResult, usedModel: actualModel } = await retryWithModelFallback( @@ -152,6 +185,13 @@ async function setupLoop( if (promptResult.error) { logger.error(`loop: failed to send prompt`, promptResult.error) loopService.deleteState(autoWorktreeName) + if (sandboxEnabled) { + try { + await sandboxManager!.stop(autoWorktreeName) + } catch (sbxErr) { + logger.error(`loop: failed to stop sandbox container on prompt failure`, sbxErr) + } + } if (options.worktree) { try { await v2.worktree.remove({ worktreeRemoveInput: { directory: loopContext.directory } }) @@ -358,6 +398,17 @@ export function createLoopTools(ctx: ToolContext): Record${stoppedState.completionPromise}\n\nDo NOT output this tag until every phase is truly complete. The loop will continue until this signal is detected.` + promptText += `\n\n---\n\n**IMPORTANT - Completion Signal:** When you have completed ALL phases of this plan successfully, you MUST output the following phrase exactly: ${stoppedState.completionPromise}\n\nDo NOT output this phrase until every phase is truly complete. The loop will continue until this signal is detected.` } const loopModel = parseModelString(config.loop?.model) ?? parseModelString(config.executionModel) @@ -407,6 +462,13 @@ export function createLoopTools(ctx: ToolContext): Record> { + return { + glob: tool({ + description: [ + '- Fast file pattern matching tool that works with any codebase size', + '- Supports glob patterns like "**/*.js" or "src/**/*.ts"', + '- Returns matching file paths sorted by modification time', + '- Use this tool when you need to find files by name patterns', + '- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead', + '- You have the capability to call multiple tools in a single response. It is always better to speculatively perform multiple searches as a batch that are potentially useful.', + ].join('\n'), + args: { + pattern: z.string().describe('The glob pattern to match files against'), + path: z.string().optional().describe( + 'The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter "undefined" or "null" - simply omit it for the default behavior. Must be a valid directory path if provided.' + ), + }, + execute: async (args, context) => { + const sandbox = getSandboxForSession(ctx, context.sessionID) + if (!sandbox) return 'Glob tool requires sandbox context.' + + const { docker, containerName, hostDir } = sandbox + const searchPath = args.path + ? toContainerPath(args.path, hostDir) + : '/workspace' + + const safePattern = args.pattern.replace(/'/g, "'\\''") + const cmd = `rg --files --glob '${safePattern}' '${searchPath}' 2>/dev/null | head -100` + + try { + const result = await docker.exec(containerName, cmd, { timeout: 30000 }) + + if (!result.stdout.trim()) return 'No files found' + + const lines = result.stdout.trim().split('\n').filter(Boolean) + const rewritten = lines.map(l => rewriteOutput(l, hostDir)) + + let output = rewritten.join('\n') + if (lines.length >= 100) { + output += '\n\n(Results are truncated: showing first 100 results. Consider using a more specific path or pattern.)' + } + return output + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + return `Glob failed: ${message}` + } + }, + }), + + grep: tool({ + description: [ + '- Fast content search tool that works with any codebase size', + '- Searches file contents using regular expressions', + '- Supports full regex syntax (eg. "log.*Error", "function\\s+\\w+", etc.)', + '- Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}")', + '- Returns file paths and line numbers with at least one match sorted by modification time', + '- Use this tool when you need to find files containing specific patterns', + '- If you need to identify/count the number of matches within files, use the Bash tool with `rg` (ripgrep) directly. Do NOT use `grep`.', + '- When you are doing an open-ended search that may require multiple rounds of globbing and grepping, use the Task tool instead', + ].join('\n'), + args: { + pattern: z.string().describe('The regex pattern to search for in file contents'), + path: z.string().optional().describe('The directory to search in. Defaults to the current working directory.'), + include: z.string().optional().describe('File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}")'), + }, + execute: async (args, context) => { + const sandbox = getSandboxForSession(ctx, context.sessionID) + if (!sandbox) return 'Grep tool requires sandbox context.' + + const { docker, containerName, hostDir } = sandbox + const searchPath = args.path + ? toContainerPath(args.path, hostDir) + : '/workspace' + + const safePattern = args.pattern.replace(/'/g, "'\\''") + let cmd = `rg -nH --hidden --no-messages --field-match-separator='|' --regexp '${safePattern}'` + if (args.include) { + const safeInclude = args.include.replace(/'/g, "'\\''") + cmd += ` --glob '${safeInclude}'` + } + cmd += ` '${searchPath}' 2>/dev/null | head -100` + + try { + const result = await docker.exec(containerName, cmd, { timeout: 30000 }) + + if (!result.stdout.trim()) return 'No files found' + + const lines = result.stdout.trim().split('\n').filter(Boolean) + const grouped = new Map>() + + for (const line of lines) { + const parts = line.split('|') + if (parts.length < 3) continue + const filePath = rewriteOutput(parts[0], hostDir) + const lineNum = parseInt(parts[1], 10) + const text = parts.slice(2).join('|') + const truncatedText = text.length > 2000 ? text.slice(0, 1997) + '...' : text + if (!grouped.has(filePath)) grouped.set(filePath, []) + grouped.get(filePath)!.push({ line: lineNum, text: truncatedText }) + } + + let totalMatches = 0 + const outputParts: string[] = [] + outputParts.push(`Found ${lines.length} matches`) + + for (const [filePath, matches] of grouped) { + outputParts.push(`${filePath}:`) + for (const m of matches) { + outputParts.push(` Line ${m.line}: ${m.text}`) + totalMatches++ + } + outputParts.push('') + } + + if (lines.length >= 100) { + outputParts.push('(Results truncated: showing 100 of possibly more matches. Consider using a more specific path or pattern.)') + } + + return outputParts.join('\n') + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + return `Grep failed: ${message}` + } + }, + }), + } +} diff --git a/src/tools/types.ts b/src/tools/types.ts index 0b7e9c8..e7a99ec 100644 --- a/src/tools/types.ts +++ b/src/tools/types.ts @@ -10,6 +10,7 @@ import type { createLoopEventHandler } from '../hooks' import type { createMemoryInjectionHook } from '../hooks' import type { createOpencodeClient as createV2Client } from '@opencode-ai/sdk/v2' import type { PluginInput } from '@opencode-ai/plugin' +import type { createSandboxManager } from '../sandbox/manager' const z = tool.schema export const scopeEnum = z.enum(['convention', 'decision', 'context']) as any @@ -45,6 +46,7 @@ export interface ToolContext { getCurrentVec: () => VecService cleanup: () => Promise input: PluginInput + sandboxManager: ReturnType | null } export function withDimensionWarning(mismatchState: DimensionMismatchState, result: string): string { diff --git a/src/tui.tsx b/src/tui.tsx index 060c707..5c97a1e 100644 --- a/src/tui.tsx +++ b/src/tui.tsx @@ -8,6 +8,7 @@ import { execSync } from 'child_process' import { Database } from 'bun:sqlite' import { VERSION } from './version' import { compareVersions } from './utils/upgrade' +import { fetchSessionStats, type SessionStats } from './utils/session-stats' type TuiOptions = { sidebar: boolean @@ -146,27 +147,55 @@ function cancelLoop(projectId: string, loopName: string): string | null { } } +function formatTokens(n: number): string { + return n >= 1000 ? `${(n / 1000).toFixed(1)}k` : `${n}` +} + +function formatDuration(ms: number): string { + const hours = Math.floor(ms / (1000 * 60 * 60)) + const minutes = Math.floor((ms % (1000 * 60 * 60)) / (1000 * 60)) + const seconds = Math.floor((ms % (1000 * 60)) / 1000) + if (hours > 0) { + return `${hours}h ${minutes}m ${seconds}s` + } + if (minutes > 0) { + return `${minutes}m ${seconds}s` + } + return `${seconds}s` +} + +function truncate(text: string, maxLength: number): string { + if (text.length <= maxLength) return text + return text.slice(0, maxLength - 3) + '...' +} + function LoopDetailsDialog(props: { api: TuiPluginApi; loop: LoopInfo }) { const theme = () => props.api.theme.current const loop = props.loop + const [stats, setStats] = createSignal(null) + const [loading, setLoading] = createSignal(true) - const options = () => { - const opts: Array<{ title: string; value: string; description?: string; onSelect?: () => void }> = [] + const directory = props.api.state.path.directory - if (loop.worktreeBranch) { - opts.push({ - title: `Branch: ${loop.worktreeBranch}`, - value: 'branch', - description: loop.worktreeDir, + createEffect(() => { + if (loop.sessionId && directory) { + setLoading(true) + fetchSessionStats(props.api, loop.sessionId, directory).then((result) => { + setStats(result) + setLoading(false) + }).catch(() => { + setStats(null) + setLoading(false) }) + } else { + setLoading(false) } + }) + + const options = () => { + const opts: Array<{ title: string; value: string; description?: string; onSelect?: () => void }> = [] if (loop.active) { - const max = loop.maxIterations > 0 ? `/${loop.maxIterations}` : '' - opts.push({ - title: `Status: ${loop.phase} · iteration ${loop.iteration}${max}`, - value: 'status', - }) opts.push({ title: 'Cancel loop', value: 'cancel', @@ -187,33 +216,137 @@ function LoopDetailsDialog(props: { api: TuiPluginApi; loop: LoopInfo }) { }) }, }) - } else if (loop.terminationReason === 'completed') { - opts.push({ - title: `Completed: ${loop.iteration} iteration${loop.iteration !== 1 ? 's' : ''}`, - value: 'completed', - }) - } else { - opts.push({ - title: `Ended: ${loop.terminationReason?.replace(/_/g, ' ') ?? 'unknown'}`, - value: 'ended', - }) } + opts.push({ + title: 'Close', + value: 'close', + }) + return opts } + const statusBadge = () => { + if (loop.active) return { text: loop.phase, color: loop.phase === 'auditing' ? theme().warning : theme().success } + if (loop.terminationReason === 'completed') return { text: 'completed', color: theme().success } + if (loop.terminationReason === 'cancelled' || loop.terminationReason === 'user_aborted') return { text: 'cancelled', color: theme().textMuted } + return { text: 'ended', color: theme().error } + } + return ( - { - if (opt.onSelect) { - opt.onSelect() - return - } - props.api.ui.dialog.clear() - }} - /> + + + + + {loop.name} + + + [{statusBadge().text}] + + + · + {loop.worktreeBranch} + + + + + Iteration {loop.iteration}{loop.maxIterations > 0 ? `/${loop.maxIterations}` : ''} + + + + + + + Loading stats... + + + + + + + Session stats unavailable + + }> + + + + Session: + {loop.sessionId.slice(0, 8)}... + + + + + Phase: + {loop.phase} + + + + + Messages: + {stats()!.messages.total} total ({stats()!.messages.assistant} assistant) + + + + + Tokens: + {formatTokens(stats()!.tokens.input)} in / {formatTokens(stats()!.tokens.output)} out / {formatTokens(stats()!.tokens.reasoning)} reasoning + + + + + Cost: + ${stats()!.cost.toFixed(4)} + + + + + + Files: + {stats()!.fileChanges!.files} changed (+{stats()!.fileChanges!.additions}/-{stats()!.fileChanges!.deletions}) + + + + + + + Duration: + {formatDuration(stats()!.timing!.durationMs)} + + + + + + + + + + + + Latest Output + + + + {truncate(stats()!.lastAssistantMessage!.text, 300)} + + + + + + + { + if (opt.onSelect) { + opt.onSelect() + return + } + props.api.ui.dialog.clear() + }} + /> + + ) } diff --git a/src/types.ts b/src/types.ts index 546f082..749dd9d 100644 --- a/src/types.ts +++ b/src/types.ts @@ -69,6 +69,11 @@ export interface LoopConfig { minAudits?: number } +export interface SandboxConfig { + mode: 'off' | 'docker' + image?: string +} + export interface ListMemoriesFilter { scope?: MemoryScope limit?: number @@ -120,6 +125,7 @@ export interface PluginConfig { defaultKvTtlMs?: number tui?: TuiConfig agents?: Record + sandbox?: SandboxConfig } export interface HealthStatus { diff --git a/src/utils/session-stats.ts b/src/utils/session-stats.ts new file mode 100644 index 0000000..fc51e75 --- /dev/null +++ b/src/utils/session-stats.ts @@ -0,0 +1,148 @@ +import type { TuiPluginApi } from '@opencode-ai/plugin/tui' + +export interface SessionStats { + tokens: { + input: number + output: number + reasoning: number + cacheRead: number + cacheWrite: number + total: number + } + cost: number + messages: { + total: number + assistant: number + } + fileChanges: { + additions: number + deletions: number + files: number + } | null + timing: { + created: string + updated: string + durationMs: number + } | null + lastAssistantMessage: { + text: string + parts: Array<{ type: string; text?: string }> + } | null +} + +export async function fetchSessionStats( + api: TuiPluginApi, + sessionId: string, + directory: string, +): Promise { + if (!directory || !sessionId) { + return null + } + + try { + const messagesResult = await api.client.session.messages({ + sessionID: sessionId, + directory, + }) + + const messages = (messagesResult.data ?? []) as Array<{ + info: { + role: string + cost?: number + tokens?: { + input: number + output: number + reasoning: number + cache: { read: number; write: number } + } + } + parts: Array<{ type: string; text?: string }> + }> + + const assistantMessages = messages.filter((m) => m.info.role === 'assistant') + const lastAssistantMessage = + assistantMessages.length > 0 + ? { + text: assistantMessages[assistantMessages.length - 1].parts + .filter((p) => p.type === 'text' && typeof p.text === 'string') + .map((p) => p.text as string) + .join('\n'), + parts: assistantMessages[assistantMessages.length - 1].parts, + } + : null + + let totalInputTokens = 0 + let totalOutputTokens = 0 + let totalReasoningTokens = 0 + let totalCacheRead = 0 + let totalCacheWrite = 0 + let totalCost = 0 + + for (const msg of messages) { + totalCost += msg.info.cost ?? 0 + const tokens = msg.info.tokens + if (tokens) { + totalInputTokens += tokens.input ?? 0 + totalOutputTokens += tokens.output ?? 0 + totalReasoningTokens += tokens.reasoning ?? 0 + totalCacheRead += tokens.cache?.read ?? 0 + totalCacheWrite += tokens.cache?.write ?? 0 + } + } + + const sessionResult = await api.client.session.get({ + sessionID: sessionId, + directory, + }) + const session = sessionResult.data as + | { + summary?: { additions: number; deletions: number; files: number } + time?: { created: string; updated: string } + } + | undefined + + const fileChanges = session?.summary + ? { + additions: session.summary.additions, + deletions: session.summary.deletions, + files: session.summary.files, + } + : null + + const timing = session?.time?.created && session?.time?.updated + ? { + created: session.time.created, + updated: session.time.updated, + durationMs: + new Date(session.time.updated).getTime() - + new Date(session.time.created).getTime(), + } + : null + + return { + tokens: { + input: totalInputTokens, + output: totalOutputTokens, + reasoning: totalReasoningTokens, + cacheRead: totalCacheRead, + cacheWrite: totalCacheWrite, + total: + totalInputTokens + + totalOutputTokens + + totalReasoningTokens + + totalCacheRead + + totalCacheWrite, + }, + cost: totalCost, + messages: { + total: messages.length, + assistant: assistantMessages.length, + }, + fileChanges, + timing, + lastAssistantMessage, + } + } catch { + return null + } +} diff --git a/test/loop.test.ts b/test/loop.test.ts index 0617010..b9a163a 100644 --- a/test/loop.test.ts +++ b/test/loop.test.ts @@ -56,7 +56,7 @@ describe('LoopService', () => { worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -106,35 +106,24 @@ describe('LoopService', () => { expect(retrieved).toBeNull() }) - test('checkCompletionPromise matches exact promise', () => { + test('checkCompletionPromise matches exact phrase', () => { const text = 'Some response text ALL_PHASES_COMPLETE more text' - expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(true) + expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(true) }) - test('checkCompletionPromise returns false when no promise tags', () => { - const text = 'Some response text without promise tags' - expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(false) + test('checkCompletionPromise returns false when phrase not present', () => { + const text = 'Some response text without the phrase' + expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(false) }) - test('checkCompletionPromise returns false when promise does not match', () => { + test('checkCompletionPromise returns false when phrase does not match', () => { const text = 'Some response NOT_COMPLETE text' - expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(false) + expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE')).toBe(false) }) - test('checkCompletionPromise handles whitespace normalization', () => { - const text = 'Response ALL_PHASES_COMPLETE WITH SPACES text' - expect(loopService.checkCompletionPromise(text, 'ALL_PHASES_COMPLETE WITH SPACES')).toBe(true) - }) - - test('checkCompletionPromise matches first promise tag when multiple present', () => { - const text = 'First FIRST second SECOND' - expect(loopService.checkCompletionPromise(text, 'FIRST')).toBe(true) - expect(loopService.checkCompletionPromise(text, 'SECOND')).toBe(false) - }) - - test('checkCompletionPromise handles multiline promise', () => { - const text = 'Response \n MULTI\n LINE\n text' - expect(loopService.checkCompletionPromise(text, 'MULTI LINE')).toBe(true) + test('checkCompletionPromise requires exact match', () => { + const text = 'Response ALL_PHASES_COMPLETE text' + expect(loopService.checkCompletionPromise(text, 'NOT_COMPLETE')).toBe(false) }) test('buildContinuationPrompt includes iteration number', () => { @@ -169,7 +158,7 @@ describe('LoopService', () => { worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 0, - completionPromise: 'COMPLETE_TASK', + completionPromise: 'COMPLETE_TASK', startedAt: new Date().toISOString(), prompt: 'My test prompt', phase: 'coding' as const, @@ -179,7 +168,7 @@ describe('LoopService', () => { } const prompt = loopService.buildContinuationPrompt(state) - expect(prompt).toContain('[Loop iteration 1 | To stop: output COMPLETE_TASK (ONLY after all verification steps pass)]') + expect(prompt).toContain('[Loop iteration 1 | To stop: output COMPLETE_TASK (ONLY after all verification commands pass AND all phase acceptance criteria are met)]') }) test('buildContinuationPrompt includes max iterations when no promise', () => { @@ -235,7 +224,7 @@ describe('LoopService', () => { worktreeBranch: 'opencode/loop-test', iteration: 5, maxIterations: 10, - completionPromise: 'PERSIST_TEST', + completionPromise: 'PERSIST_TEST', startedAt: new Date().toISOString(), prompt: 'Persistence test', phase: 'coding' as const, @@ -338,7 +327,7 @@ describe('LoopService', () => { worktreeBranch: 'opencode/loop-test', iteration: 2, maxIterations: 0, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -452,7 +441,7 @@ describe('LoopService', () => { worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -499,7 +488,7 @@ describe('LoopService', () => { worktreeBranch: 'main', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'In-place test prompt', phase: 'coding' as const, @@ -547,7 +536,7 @@ describe('LoopService', () => { worktreeBranch: 'main', iteration: 3, maxIterations: 0, - completionPromise: 'COMPLETE', + completionPromise: 'COMPLETE', startedAt: new Date().toISOString(), prompt: 'In-place prompt test', phase: 'coding' as const, @@ -939,7 +928,7 @@ describe('reconcileStale', () => { worktreeBranch: 'main', iteration: 3, maxIterations: 10, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1067,7 +1056,7 @@ describe('buildContinuationPrompt with outstanding findings', () => { worktreeBranch: 'opencode/loop-test', iteration: 3, maxIterations: 0, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1095,7 +1084,7 @@ describe('buildContinuationPrompt with outstanding findings', () => { worktreeBranch: 'opencode/loop-test', iteration: 2, maxIterations: 0, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1117,7 +1106,7 @@ describe('buildContinuationPrompt with outstanding findings', () => { worktreeBranch: 'opencode/loop-test', iteration: 3, maxIterations: 0, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1144,7 +1133,7 @@ describe('buildContinuationPrompt with outstanding findings', () => { worktreeBranch: 'opencode/loop-test', iteration: 2, maxIterations: 0, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1459,7 +1448,7 @@ describe('Assistant Error Detection', () => { worktreeBranch: 'main', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -1879,7 +1868,7 @@ describe('Assistant Error Detection', () => { worktreeBranch: 'main', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, diff --git a/test/plan-approval.test.ts b/test/plan-approval.test.ts index 3715fae..d5e31a2 100644 --- a/test/plan-approval.test.ts +++ b/test/plan-approval.test.ts @@ -97,7 +97,7 @@ Do NOT output text without also making this tool call. worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, diff --git a/test/test-errors.ts b/test/test-errors.ts new file mode 100644 index 0000000..e8b3e1c --- /dev/null +++ b/test/test-errors.ts @@ -0,0 +1,95 @@ +// Test file with intentional TypeScript errors for testing + +import { NonExistentModule } from 'non-existent-package'; +import { readFile } from 'fs/promises'; + +// Error 1: Type mismatch +const count: number = "not a number"; + +// Error 2: Missing return type +function add(a: number, b: number) { + return a + b; +} + +// Error 3: Unused variable +const unusedVar = "this is never used"; + +// Error 4: Any type usage +function processAnything(data: any): any { + return data; +} + +// Error 5: Missing parameter type +function multiply(x, y: number): number { + return x * y; +} + +// Error 6: Accessing non-existent property +interface User { + id: number; + name: string; +} + +const user: User = { id: 1, name: "Test" }; +console.log(user.email); // Property 'email' does not exist + +// Error 7: Async/await misuse +async function fetchData() { + const result = readFile('missing-file.txt'); // Missing await + return result; +} + +// Error 8: Null/undefined issue +let maybeString: string | null = null; +const length = maybeString.length; // Object is possibly 'null' + +// Error 9: Wrong number of arguments +const arr = [1, 2, 3]; +arr.push(4, 5, 6); // This is actually valid, but let's add another error + +// Error 10: Enum mismatch +enum Status { + Active = "ACTIVE", + Inactive = "INACTIVE" +} + +function getStatus(): Status { + return "UNKNOWN" as Status; // Type 'string' is not assignable +} + +// Error 11: Missing required property +interface Config { + host: string; + port: number; + enabled: boolean; +} + +const config: Config = { + host: "localhost", + // Missing port and enabled +}; + +// Error 12: Incorrect generic type +const numbers: Array = [1, 2, 3]; +numbers.push("not a number"); + +// Error 13: Duplicate identifier +const duplicate = "first"; +const duplicate = "second"; + +// Error 14: Export not found +export { NonExistentExport } from './non-existent-file'; + +// Error 15: Circular reference potential +interface Node { + value: number; + next: Node | null; +} + +function createCircular(): Node { + const node: Node = { value: 1, next: null }; + node.next = node; // Creates circular reference + return node; +} + +export { add, processAnything, multiply, fetchData, getStatus, createCircular }; diff --git a/test/tool-blocking.test.ts b/test/tool-blocking.test.ts index 391d414..8bd79e2 100644 --- a/test/tool-blocking.test.ts +++ b/test/tool-blocking.test.ts @@ -57,7 +57,7 @@ describe('Tool Blocking Logic', () => { worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const, @@ -87,7 +87,7 @@ describe('Tool Blocking Logic', () => { worktreeBranch: 'opencode/loop-test', iteration: 1, maxIterations: 5, - completionPromise: 'ALL_PHASES_COMPLETE', + completionPromise: 'ALL_PHASES_COMPLETE', startedAt: new Date().toISOString(), prompt: 'Test prompt', phase: 'coding' as const,