diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index ec617987..30512cf6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -1,17 +1,17 @@ { - "name": "openai-codex", + "name": "dragon-cc-codex", "owner": { "name": "OpenAI" }, "metadata": { "description": "Codex plugins to use in Claude Code for delegation and code review.", - "version": "1.0.4" + "version": "1.4.0" }, "plugins": [ { "name": "codex", "description": "Use Codex from Claude Code to review code or delegate tasks.", - "version": "1.0.4", + "version": "1.4.0", "author": { "name": "OpenAI" }, diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 00000000..b3762aa5 --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# +# Git pre-push hook +# Validates CHANGELOG, version bump, and README consistency before pushing. +# +# Installed via: npm run setup-hooks +# Bypass with: git push --no-verify + +set -e + +HOOK_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$HOOK_DIR/.." && pwd)" + +exec node "$REPO_ROOT/scripts/pre-push-check.mjs" diff --git a/.gitignore b/.gitignore index 3d573eec..3a96fd9e 100644 --- a/.gitignore +++ b/.gitignore @@ -148,3 +148,11 @@ vite.config.ts.timestamp-* output/ plugins/codex/.generated/ + +# Local tool state (Claude Code, Codex plugin runtime) +.claude/ +.codex/ + +# Local Claude Code plugin state (but track marketplace.json manifest) +.claude-plugin/* +!.claude-plugin/marketplace.json diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..91feb7c2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,162 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.4.0] - 2026-05-23 + +### Added + +- **`/codex:observe` now auto-spawns the observer inside Ghostty and iTerm2 on macOS** (previously tmux-only, shipped in 1.3.0). The single hardcoded tmux branch in `plugins/codex/scripts/lib/spawner.mjs` was replaced with a small backend strategy table `{ kind, detect, build, cmd, classifyFailure }` driving three backends: + - `tmux` — unchanged (`split-window -h -c `); detection via `$TMUX`. + - `ghostty-mac` — detection via `process.platform === 'darwin'` AND `process.env.TERM_PROGRAM === 'ghostty'` AND no `$TMUX`. Always opens a new Ghostty window (Ghostty 1.3's terminal object exposes only `id`, `name`, `working directory` — no `tty`, so reliable session targeting is impossible until upstream adds it). Drills `set newWin to new window` → `set newTerm to terminal 1 of selected tab of newWin` → `input text "\n" to newTerm`. + - `iterm2-mac` — detection via `process.platform === 'darwin'` AND `process.env.TERM_PROGRAM === 'iTerm.app'` AND no `$TMUX`. Walks the process ancestry to discover the caller shell's controlling tty, then iterates `windows → tabs of w → sessions of tb` (sessions are nested under tabs in iTerm2's object model, NOT directly under windows) comparing `tty of s` to the caller tty. On match: `split vertically with default profile` in that session. On no match (or tty discovery failure): `create window with default profile` and `current session of newWindow`. +- Detection precedence is `tmux > ghostty-mac > iterm2-mac > none`; users running tmux inside Ghostty/iTerm2 still get the tmux split. +- Two-layer command quoting pipeline for osascript backends: `composeShellInvocation({ cwd, command })` (shell-quotes cwd, leaves the already-shell-quoted argv tokens of `command` untouched) → `rejectControlChars(composed)` (early-returns `unsafe-command` on any byte in `0x00–0x1F` minus tab/space, so embedded newlines / NUL / CR can never reach `input text` / `write text`) → `escapeAppleScriptLiteral(composed)` (doubles `\` and `"`). +- Automation-permission denial classified separately from generic spawn failure: when `osascript` stderr contains `(-1743)` or `not authorized to send Apple events` (case-insensitive), the spawner returns `reason: 'automation-permission-denied'` and `handleObserveSpawn` prints a single dedicated line directing the user to System Settings → Privacy & Security → Automation, instead of the generic copy-paste fallback hint. +- `discoverCallerTty()` walks up the process tree via `ps -o tty=,ppid= -p ` (depth-capped at 10, per-probe timeout 250ms) until it finds an ancestor with a real controlling tty, and resolves to `/dev/ttysNN`. Sandboxed shells / detached daemons that lack `ps` access fall through to the new-window branch with no AppleScript `repeat` loop emitted. + +### Fixed + +- **Three HIGH-severity AppleScript object-model bugs from the initial 1.4.0 implementation**, surfaced by `/codex:adversarial-review` and confirmed against the published Ghostty 1.3 and iTerm2 dictionaries via Context7: + - The Ghostty backend used `repeat with t in terminals … if tty of t is targetTty …`, but Ghostty 1.3's terminal has no `tty` property — that branch would have thrown at runtime in real Ghostty. Removed the `repeat` entirely; Ghostty backend always opens a new window. + - The Ghostty backend treated `new window`'s return value as a terminal and called `input text … to newWin` directly. AppleScript would have refused the cast at runtime. Now drills via `set newTerm to terminal 1 of selected tab of newWin` before `input text`. + - The iTerm2 backend traversed `repeat with w in windows / repeat with s in sessions of w / …`. That AppleScript-compiles, but `sessions` is NOT a direct element of `window` in iTerm2's object model — at runtime the inner loop iterated an empty collection, so the tty-match branch never fired and every observer fell through to the new-window path. Now correctly nests `repeat with w in windows → repeat with tb in tabs of w → repeat with s in sessions of tb`. A spec scenario locks the source order so the contract cannot regress. +- Tests strengthened with contract-shape assertions (Ghostty: `set newWin to new window`, `set newTerm to terminal 1 of selected tab of newWin`, `input text "…" to newTerm`, plus negative `tty of t` / `repeat with t in terminals`; iTerm2: `repeat with w in windows`, `repeat with tb in tabs of w`, `repeat with s in sessions of tb`, plus source-order assertion). Added a dedicated 8-test `discoverCallerTty` unit suite covering immediate-parent hit, walk-past-`??`-ancestor, `/dev/` prefix preservation, `ppid<=1` termination, runProbe throws, malformed output, depth-10 cap, and invalid `startPid`. + +## [1.3.0] - 2026-05-22 + +### Added + +- **`/codex:observe` auto-spawns the observer in a tmux split when invoked from inside a tmux session.** Previously the slash command always printed the observe invocation as a copy-paste hint and required the user to open a new pane manually. The new flow detects `$TMUX` and shells out to `tmux split-window -h -c `, then prints `✓ Observer launched in tmux pane (job )`. Outside tmux the command falls back to the existing copy-paste hint, so non-tmux users see no behavior change. +- New `plugins/codex/scripts/lib/spawner.mjs` module with `detectTerminal`, `buildTmuxSplitArgs`, and `spawnObserverInTerminal({ cwd, command, env, runner })`. The runner is injected, so unit tests pass a fake without invoking real `tmux`. +- New `tests/spawner.test.mjs` covers tmux detection, split-args shape, success / non-zero-exit / runner-throw classification, and the no-tmux pass-through path. + +## [1.2.6] - 2026-05-22 + +### Fixed + +- **Cross-session "Job not found" still bit users in 1.2.5** because the per-process `CLAUDE_PLUGIN_DATA` env var fragmented state across multiple roots that no single scan covered. Two real-world triggers exposed it: (a) the marketplace rename `openai-codex → dragon-cc-codex` (commit `e6ef383`) moved Claude Code's plugin data path from `~/.claude/plugins/data/codex-openai-codex/` to `~/.claude/plugins/data/codex-dragon-cc-codex/`, orphaning jobs created before the rename; (b) running `codex-companion.mjs` from a shell without `CLAUDE_PLUGIN_DATA` set silently fell through to `$TMPDIR/codex-companion/`, which is volatile and disjoint from the plugin data dir. `findJobByIdAcrossWorkspaces` introduced in 1.2.5 only scanned `resolveStateRoot()`, so it could not see across these roots. Users worked around the symptom by exporting `CLAUDE_PLUGIN_DATA` manually before each invocation. + - `plugins/codex/scripts/lib/state.mjs`: default fallback root moved from `$TMPDIR/codex-companion/` (volatile, per-user-launchd) to `~/.codex-companion/state/` (stable, HOME-anchored). `CLAUDE_PLUGIN_DATA` is still honored when the plugin host sets it, so we remain a good plugin citizen. + - `findJobByIdAcrossWorkspaces` now iterates `collectCandidateStateRoots()`: the current `resolveStateRoot()`, the HOME default, `$TMPDIR/codex-companion/` (legacy), and every `~/.claude/plugins/data/codex-*/state/` directory (handles slug renames). Scan order keeps the current root first so test fixtures are not shadowed by leftover legacy data. + - New `collectWorkspaceJobsAcrossRoots(workspaceRoot)` reads `state.json` for the workspace's slug-hash across every candidate root and merges jobs by id (newer `updatedAt` wins on conflict). + - `lib/job-control.mjs` `buildStatusSnapshot`: with `--all`, jobs are now collected across roots **and** the per-Claude-session filter is bypassed, so users in a fresh session can recover the id of a job they created in an earlier session via `/codex:status --all`. The default (no flag) still scopes to the current session in the current root — explicit opt-in to the wider view. + - Test isolation: cleaned 246 leaked `codex-plugin-test-*` and `broker-test-*` state directories under `~/.claude/plugins/data/codex-dragon-cc-codex/state/` and 167 under `$TMPDIR/codex-companion/` that prior `npm test` runs deposited into the real user plugin data dir. `tests/helpers.mjs` now rewrites `CLAUDE_PLUGIN_DATA` to a per-suite `mkdtemp` path if it points outside `os.tmpdir()`, sets `CODEX_COMPANION_LEGACY_ROOTS=""` so the multi-root scan stays sandboxed for ordinary tests, and strips `CODEX_COMPANION_SESSION_ID` from the test process so fixture jobs without a `sessionId` are not session-filtered out by status/result subprocesses. + - New env knob `CODEX_COMPANION_LEGACY_ROOTS` (path-separated): empty string disables legacy scanning (test isolation default); non-empty replaces the legacy scan list with the supplied roots so regression tests can exercise the cross-root fallback without polluting real directories. + - Tests: `tests/state.test.mjs` swaps the old "temp-backed per-workspace directory" assertion for a HOME-anchored fallback test that explicitly unsets `CLAUDE_PLUGIN_DATA`. `tests/job-control.test.mjs` gains a `multi-root state scan` suite that proves `findJobByIdAcrossWorkspaces` falls through to a legacy root and `buildStatusSnapshot({ all: true })` merges jobs from primary + legacy state files for the same workspace slug. + +## [1.2.5] - 2026-05-22 + +### Fixed + +- **`/codex:observe`, `/codex:status `, `/codex:result `, `/codex:cancel ` returned "Job not found" when invoked from a Claude Code session whose git workspace differed from the one in which the job was created.** State is partitioned per workspace under `$CLAUDE_PLUGIN_DATA/state/-/`, and every command resolved the workspace from `process.cwd()` only. So a user who saw a job in `/codex:status` from workspace A and then copied the job id into a slash command running in workspace B hit a hard miss even though the job record was still on disk. + - Added `findJobByIdAcrossWorkspaces(jobId)` in `plugins/codex/scripts/lib/state.mjs`: scans every `state.json` under the configured state root and returns `{ stateDir, job }` for an exact id match (corrupted state files are skipped, not propagated). + - `lib/observe.mjs`: when the local workspace does not contain the requested job id, fall back to the cross-workspace lookup. The header prints a one-line note showing which state dir was used so the cross-boundary read is auditable, and the tail continues to use the absolute `eventFile` recorded on the job. + - `lib/job-control.mjs`: `buildSingleJobSnapshot`, `resolveResultJob`, and `resolveCancelableJob` each fall back to the cross-workspace match when an explicit reference misses locally. The returned `workspaceRoot` is the job's original workspace, so all subsequent `readStoredJob` / `writeJobFile` / `upsertJob` calls land in the correct state dir without further plumbing. Active/finished predicates are still honored across the boundary — e.g., `/codex:result` on a still-running cross-workspace job surfaces a "still running in another workspace" error instead of silently picking it up. + - `codex-companion.mjs` `handleCancel`: passes the resolved `workspaceRoot` (not the invocation `cwd`) to `interruptAppServerTurn`, so the broker interrupt targets the workspace that actually owns the running Codex turn. + - Tests: `tests/observe.test.mjs` adds coverage for `findJobByIdAcrossWorkspaces` (stateRoot missing, empty id, hit, miss, corrupted state.json). `tests/job-control.test.mjs` (new file) covers cross-workspace fallback for the three resolvers, including predicate rejection paths. + +## [1.2.4] - 2026-05-22 + +### Fixed + +- **`/codex:observe` slash command produced no output in Claude Code** — removed the inline `` !`...` `` shell-exec fallback from `plugins/codex/commands/observe.md`. The fallback invoked the long-running live tail (`handleObserveCommand` waits indefinitely for `COMPLETED` or `SIGINT`), and because Claude Code's slash-exec model buffers stdout until the child process exits, a never-returning process gated the entire slash-command body — including the 36 lines of "open a new terminal" guidance that preceded it. Users typing `/codex:observe` saw nothing at all. + - The slash command body is now a pure static guidance document: it tells the user to open a new terminal and shows the copy-paste `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" observe …` snippet. It renders immediately regardless of job state. + - The CLI `observe` subcommand in `codex-companion.mjs` / `lib/observe.mjs` is **unchanged** — running `node codex-companion.mjs observe` in a terminal works exactly as before (live ANSI tail, `Ctrl+C` detach, completes on `COMPLETED` event). + - Structural rule captured in `openspec/changes/fix-observe-slash-command-hang/specs/observe-slash-command/spec.md`: slash command bodies MUST NOT contain inline shell-exec blocks that invoke processes which do not terminate in bounded time. One-shot subprocesses (`/codex:cancel`, `/codex:result`, `/codex:status`) may continue to use inline exec. + +## [1.2.3] - 2026-05-21 + +### Changed + +- **Marketplace renamed `openai-codex` → `dragon-cc-codex`** to disambiguate this fork from the upstream OpenAI marketplace and avoid the name collision that would prevent a user from having both installed side by side. + - `.claude-plugin/marketplace.json`: `name` field updated. + - `README.md` / `README.zh-CN.md`: install instructions now point at `dragon84867/codex-plugin-cc` (fork repo) and `codex@dragon-cc-codex` (renamed marketplace). + - **Migration for existing users**: run `/plugin marketplace remove openai-codex` once, then re-add via `/plugin marketplace add dragon84867/codex-plugin-cc` and `/plugin install codex@dragon-cc-codex`. + +## [1.2.2] - 2026-05-21 + +### Fixed + +- **Marketplace install failure** — restored the `plugins/codex/` subdirectory layout so the Claude Code marketplace installer can discover the plugin. + - Reverts the flatten refactor (af88f38) and its follow-up `source: "./"` patch (cf2917c). The Claude Code marketplace spec requires `source` to point at a subdirectory of the marketplace repo (e.g. `./plugins/codex`); pointing it at the marketplace root itself is unsupported and caused installs to fail with "unsupported source type" / undiscoverable commands. + - Plugin runtime, commands, hooks, agents, skills, prompts, schemas, and `.claude-plugin/plugin.json` are back under `plugins/codex/`. `marketplace.json` stays at the repo root and now points at `./plugins/codex` again, matching the OpenAI upstream layout. +- **Align with current codex protocol** — restoring the subdirectory layout also restored correct `.d.ts` relative imports for the generated app-server types, which had been silently broken by the flatten refactor (making TypeScript treat the imported types as `any` and skip checking). With type-check re-enabled, two long-standing protocol-drift bugs surfaced and are now fixed: + - `app-server.mjs`: `DEFAULT_CAPABILITIES` now includes `requestAttestation: false`, matching the required `InitializeCapabilities` shape. + - `codex.mjs`: removed the obsolete `experimentalRawEvents: false` field from `buildThreadParams`; it is no longer part of `ThreadStartParams` in the current codex protocol. + - Runtime behavior is unchanged — codex's JSON-RPC tolerated the missing/extra fields, so existing installs continue to work. This change just unblocks `npm run build` / CI type-checking. + +## [1.2.0] - 2026-05-20 + +### Added + +- **Pre-push git hook** — validates CHANGELOG, version bump, and README consistency before pushing + - Blocks push if plugin source changed without version bump + - Blocks push if version bumped without matching CHANGELOG entry + - Warns if version bumped without README update + - Auto-detects suggested bump type (major / minor / patch) from changed files and commit messages + - Install: `npm run setup-hooks` | Bypass: `git push --no-verify` + +### Fixed + +- **Broker process leak** — stale broker processes were never killed, accumulating hundreds of orphans + - `ensureBrokerSession` now defaults `killProcess` to `terminateProcessTree` so stale brokers are actually terminated + - Broker auto-exits after 5 seconds of idle (no connected clients) +- **marketplace.json version sync** — `.claude-plugin/marketplace.json` was accidentally gitignored, causing version to silently fall behind. Now properly tracked with `.claude-plugin/*` + `!.claude-plugin/marketplace.json` pattern + +## [1.1.0] - 2026-05-20 + +### Added + +- **`/codex:observe`** — Real-time live observer for Codex tasks with ANSI color output + - Watch tool calls, file changes, commands, messages, and reasoning as they happen + - Color-coded output: cyan (tools), blue (commands), green (success), red (failure), yellow (file changes) + - Read-only mode — observer never affects the running Codex task + - `Ctrl+C` to detach without stopping the Codex task + - Works in a separate terminal window alongside your Claude Code session + - Automatically renders full history for completed jobs +- **JSONL event stream** — Structured event logging (`.events.jsonl`) for each job + - Append-only format for safe concurrent reads + - Integrated with existing progress reporter pipeline + - Automatic cleanup with job pruning +- **26 unit tests** covering event stream writer and observer functionality + +### Changed + +- Job records now include `eventFile` field alongside `logFile` +- `createProgressReporter` accepts `eventStream` parameter for structured event emission + +### Documentation + +- Added `/codex:observe` usage examples and color legend to README +- Added Chinese translation for observer documentation + +## [1.0.4] - 2026-05-20 + +### Added + +- **`--worktree` flag** for `/codex:rescue` — Creates isolated git worktree for Codex work + - Codex works in `.claude/worktrees//` on a separate branch + - Leaves main working directory untouched + - Mutually exclusive with `--resume` +- **`sandbox_mode` config** — Reads from `~/.codex/config.toml` or `.codex/config.toml` + - Falls back to `workspace-write` (with `--write`) or `read-only` + +### Fixed + +- Thread exclusivity warning — Users cannot manually `codex resume` an active thread +- Signal file + Monitor/PushNotification callback for background tasks +- Route `/codex:rescue` through Agent tool to stop Skill recursion + +### Documentation + +- Added Chinese README (`README.zh-CN.md`) +- Documented `--worktree` and sandbox_mode configuration + +[1.2.3]: https://github.com/dragon84867/codex-plugin-cc/compare/v1.2.2...v1.2.3 +[1.2.2]: https://github.com/dragon84867/codex-plugin-cc/compare/v1.2.0...v1.2.2 +[1.2.0]: https://github.com/dragon84867/codex-plugin-cc/compare/v1.1.0...v1.2.0 +[1.1.0]: https://github.com/dragon84867/codex-plugin-cc/compare/v1.0.4...v1.1.0 +[1.0.4]: https://github.com/dragon84867/codex-plugin-cc/compare/v1.0.3...v1.0.4 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..f880d620 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,70 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +This is `@openai/codex-plugin-cc` — a Claude Code plugin that wraps the [Codex app server](https://developers.openai.com/codex/app-server) and Codex CLI, exposing slash commands (`/codex:review`, `/codex:adversarial-review`, `/codex:rescue`, `/codex:status`, `/codex:result`, `/codex:cancel`, `/codex:setup`) and a `codex:codex-rescue` subagent. The plugin lives in `plugins/codex/`. + +## Commands + +```bash +# Generate app-server TypeScript types (requires `codex` binary on PATH) +npm run prebuild + +# Type-check (no emit, checkJs over .mjs sources) +npm run build + +# Run all tests (Node.js built-in test runner, no framework) +npm test + +# Run a single test file +node --test tests/.test.mjs + +# Version bump +node scripts/bump-version.mjs [--check] +``` + +There is no bundler, no runtime transpiler, and no lint step. Tests run the `.mjs` sources directly with `node --test`. + +## Architecture + +The runtime is a single CLI entry point, `plugins/codex/scripts/codex-companion.mjs`, dispatched by subcommand (`setup`, `review`, `adversarial-review`, `task`, `status`, `result`, `cancel`). Slash commands in `plugins/codex/commands/*.md` shell out to this script via `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" "$ARGUMENTS"`. + +Key modules under `plugins/codex/scripts/lib/`: + +- `codex.mjs` — high-level Codex operations: auth/availability checks, `runAppServerTurn`, `runAppServerReview`, structured output parsing, session runtime status +- `app-server.mjs` — low-level Codex app server stdio protocol client +- `app-server-protocol.d.ts` + `.generated/app-server-types/` — generated types consumed by the build +- `broker-endpoint.mjs` / `broker-lifecycle.mjs` — manage a persistent app-server broker process shared across commands +- `job-control.mjs` / `tracked-jobs.mjs` — background job records, progress updates, cancellation +- `state.mjs` — per-workspace state dir (hashed slug under `CLAUDE_PLUGIN_DATA` or `$TMPDIR/codex-companion`), `state.json` + `jobs/` directory, capped at 50 jobs +- `git.mjs` — review target resolution (`auto` / `working-tree` / `branch`), context collection +- `render.mjs` — all user-facing output formatting +- `args.mjs` — argument parsing; flags like `--wait`, `--background`, `--resume-last`, `--model`, `--effort` are routing controls stripped before the task text is forwarded +- `prompts.mjs` — loads templates from `plugins/codex/prompts/` and interpolates them +- `codex-config.mjs` — reads `sandbox_mode` from user's Codex config (`~/.codex/config.toml` / `.codex/config.toml`) +- `process.mjs` — process tree termination, binary availability checks +- `workspace.mjs` — resolves the workspace root (honoring `CLAUDE_WORKSPACE_ROOT`) + +The `codex:codex-rescue` subagent (`plugins/codex/agents/codex-rescue.md`) is a thin forwarding wrapper: it does exactly one `Bash` call to `codex-companion.mjs task ...` and returns stdout verbatim. It must not read the repo, reason about the problem, or do any independent work. + +Hooks are declared in `plugins/codex/hooks/hooks.json`: +- `SessionStart` / `SessionEnd` → `session-lifecycle-hook.mjs` (bookkeeping) +- `Stop` → `stop-review-gate-hook.mjs` (optional review gate; opt-in via `/codex:setup --enable-review-gate`) + +Skills in `plugins/codex/skills/` (`codex-cli-runtime`, `codex-result-handling`, `gpt-5-4-prompting`) are loaded by the subagent, not by the main Claude thread. + +## Conventions + +- ESM only (`"type": "module"` in `package.json`). All sources are `.mjs` except the generated `.ts` types and the `.d.ts` protocol file. +- TypeScript is used purely for type-checking via `checkJs` + `noEmit`; `strict` is off. Don't add `.ts` source files. +- Node.js ≥ 18.18. Use only Node built-ins; there are no runtime npm dependencies (devDeps are `typescript` and `@types/node` only). +- The plugin picks up the user's existing `codex` binary, auth state, and `~/.codex/config.toml` / `.codex/config.toml`. Don't hardcode models or endpoints; `MODEL_ALIASES` in `codex-companion.mjs` is the only alias map (`spark` → `gpt-5.3-codex-spark`). +- Task mode (`/codex:rescue`) reads `sandbox_mode` from the user's Codex config via `codex-config.mjs`. If not configured, falls back to `workspace-write` (when `--write` is set) or `read-only`. Review commands always use `read-only` regardless of config. +- Tests use temp git repos (`tests/helpers.mjs` → `initGitRepo`) and a fake codex fixture (`tests/fake-codex-fixture.mjs`) to drive the companion script without a real Codex install. +- `CLAUDE_PLUGIN_ROOT` is set by Claude Code at hook/command invocation time and points at `plugins/codex/`. Scripts resolve paths relative to `import.meta.url`, not `process.cwd()`. + +## Version + +Plugin version is declared in both `package.json` and `plugins/codex/.claude-plugin/plugin.json` and must stay in sync. `npm run check-version` enforces this (run in CI). `scripts/bump-version.mjs` updates both. diff --git a/README.md b/README.md index 458c39fb..077bd50a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Codex plugin for Claude Code +**[中文文档](README.zh-CN.md)** + Use Codex from inside Claude Code for code reviews or to delegate tasks to Codex. This plugin is for Claude Code users who want an easy way to start using Codex from the workflow @@ -12,6 +14,7 @@ they already have. - `/codex:review` for a normal read-only Codex review - `/codex:adversarial-review` for a steerable challenge review - `/codex:rescue`, `/codex:status`, `/codex:result`, and `/codex:cancel` to delegate work and manage background jobs +- `/codex:observe` for real-time live observation of running Codex tasks with ANSI color output ## Requirements @@ -24,13 +27,13 @@ they already have. Add the marketplace in Claude Code: ```bash -/plugin marketplace add openai/codex-plugin-cc +/plugin marketplace add dragon84867/codex-plugin-cc ``` Install the plugin: ```bash -/plugin install codex@openai-codex +/plugin install codex@dragon-cc-codex ``` Reload plugins: @@ -137,7 +140,9 @@ Use it when you want Codex to: > [!NOTE] > Depending on the task and the model you choose these tasks might take a long time and it's generally recommended to force the task to be in the background or move the agent to the background. -It supports `--background`, `--wait`, `--resume`, and `--fresh`. If you omit `--resume` and `--fresh`, the plugin can offer to continue the latest rescue thread for this repo. +It supports `--background`, `--wait`, `--worktree`, `--resume`, and `--fresh`. If you omit `--resume` and `--fresh`, the plugin can offer to continue the latest rescue thread for this repo. + +**Sandbox mode.** Task mode reads `sandbox_mode` from your Codex config (`~/.codex/config.toml` or `.codex/config.toml`). If not configured, it falls back to `workspace-write` (when `--write` is set) or `read-only`. Examples: @@ -148,6 +153,7 @@ Examples: /codex:rescue --model gpt-5.4-mini --effort medium investigate the flaky integration test /codex:rescue --model spark fix the issue quickly /codex:rescue --background investigate the regression +/codex:rescue --worktree investigate and fix the failing integration test ``` You can also just ask for a task to be delegated to Codex: @@ -161,6 +167,10 @@ Ask Codex to redesign the database connection to be more resilient. - if you do not pass `--model` or `--effort`, Codex chooses its own defaults. - if you say `spark`, the plugin maps that to `gpt-5.3-codex-spark` - follow-up rescue requests can continue the latest Codex task in the repo +- `--worktree` creates an isolated git worktree under `.claude/worktrees//` on a dedicated branch so Codex can work without touching your main working directory. `--worktree` and `--resume` are mutually exclusive. + +> [!WARNING] +> **Thread exclusivity**: While a Codex task is running, do not manually run `codex resume` on the same thread from a terminal. The Codex backend enforces single-turn exclusivity per thread, and attempting to resume an active thread will block or pause your CLI session. Wait for the task to complete (check `/codex:status`), or use `/codex:cancel` to stop the task first. If you need to run Codex in parallel, start a fresh thread with `codex` (without `--resume`). ### `/codex:status` @@ -171,6 +181,7 @@ Examples: ```bash /codex:status /codex:status task-abc123 +/codex:status --all ``` Use it to: @@ -179,6 +190,8 @@ Use it to: - see the latest completed job - confirm whether a task is still running +`--all` widens the listing to include jobs created in other Claude Code sessions and in legacy state directories (e.g., earlier plugin slugs, `$TMPDIR/codex-companion/`). Use it to recover the id of a task you started in an earlier session — by id, every other slash command (`/codex:result`, `/codex:cancel`, `/codex:observe`) already works across sessions and roots. + ### `/codex:result` Shows the final stored Codex output for a finished job. @@ -202,6 +215,41 @@ Examples: /codex:cancel task-abc123 ``` +### `/codex:observe` + +Opens a real-time live observer for a running Codex job. Shows tool calls, file changes, commands, messages, and reasoning with ANSI color output. + +The observer is **read-only** and does not affect the running Codex task. Press `Ctrl+C` to detach — the Codex task continues running. + +**Auto-spawn (1.3.0+):** when invoked from inside a supported terminal, `/codex:observe` opens the observer in a new pane / window automatically — no copy-paste required. Detection precedence is `tmux > Ghostty > iTerm2 > none`: + +- **Inside tmux** — `tmux split-window -h -c ` opens a vertical split running the observer. +- **Inside Ghostty on macOS** (1.4.0+) — opens a new Ghostty window via AppleScript and feeds the observer command into it. Always opens a new window because Ghostty 1.3's terminal object exposes no `tty` property to target the calling session reliably. +- **Inside iTerm2 on macOS** (1.4.0+) — discovers the calling shell's tty by walking the process ancestry and splits *that* session vertically (`split vertically with default profile`). When no matching session is found (different window, sandboxed shell, etc.), opens a new iTerm2 window instead of splitting an unrelated front window. +- **First run on macOS** triggers the standard Automation permission prompt (System Settings → Privacy & Security → Automation). The plugin recognises the permission-denied error and prints a one-line "grant access and retry" hint instead of the generic copy-paste fallback. +- **Outside any supported terminal** — the slash command prints the exact `node /path/to/companion.mjs observe …` invocation for you to paste into a separate terminal yourself. + +Examples: + +```bash +/codex:observe +/codex:observe task-abc123 +/codex:observe --cwd /path/to/project +``` + +**Color legend:** + +| Color | Event Type | +|-------|-----------| +| Cyan | Tool calls (`→ Read src/foo.ts`) | +| Blue | Commands (`$ npm test`) | +| Green | Success (`exit 0`, `● completed`) | +| Red | Failure (`exit 1`) | +| Yellow | File changes (`✎ src/auth.ts (modify)`) | +| Dim | Messages and reasoning | + +If the target job is already completed, the observer renders the full event history and exits immediately. + ### `/codex:setup` Checks whether Codex is installed and authenticated. @@ -242,6 +290,24 @@ When the review gate is enabled, the plugin uses a `Stop` hook to run a targeted /codex:rescue --background investigate the flaky test ``` +### Watch Codex Work in Real-Time + +In a separate terminal: + +```bash +/codex:observe +``` + +This gives you a live, color-coded view of what Codex is doing — tool calls, file edits, test runs, and its final answer — without blocking your Claude Code session. + +### Isolated Work With `--worktree` + +```bash +/codex:rescue --worktree fix the broken auth middleware +``` + +Codex works in `.claude/worktrees//` on a separate branch, leaving your main working directory untouched. This is useful when you want Codex to make changes without affecting your current branch. + Then check in with: ```bash @@ -276,6 +342,32 @@ Delegated tasks and any [stop gate](#what-does-the-review-gate-do) run can also This way you can review the Codex work or continue the work there. +## Development + +### Pre-push Hook + +Install the git pre-push hook to validate releases before pushing: + +```bash +npm run setup-hooks +``` + +The hook checks every push for: +- **Version bump required** — blocks if plugin source files changed without a version bump +- **CHANGELOG entry required** — blocks if version was bumped but CHANGELOG.md has no matching entry +- **README update warning** — warns if version was bumped without updating README.md +- **Bump type validation** — warns if the actual bump (major/minor/patch) doesn't match what the changes suggest + +Bypass with `git push --no-verify` if needed. + +### Version Bumping + +```bash +node scripts/bump-version.mjs +``` + +Updates all version manifests: `package.json`, `package-lock.json`, `plugin.json`, and `marketplace.json`. + ## FAQ ### Do I need a separate Codex account for this plugin? @@ -303,3 +395,14 @@ Yes. If you already use Codex, the plugin picks up the same [configuration](#com Yes. Because the plugin uses your local Codex CLI, your existing sign-in method and config still apply. If you need to point the built-in OpenAI provider at a different endpoint, set `openai_base_url` in your [Codex config](https://developers.openai.com/codex/config-advanced/#config-and-state-locations). + +### Where is my job state stored? + +Job records, logs, and event streams live under a per-workspace state directory. The plugin picks the root in this order: + +1. `$CLAUDE_PLUGIN_DATA/state/` — set by Claude Code when the slash command runs through the plugin host. Honored when present. +2. `~/.codex-companion/state/` — stable HOME-anchored fallback. Used when the env var is not set (e.g., running `node codex-companion.mjs` directly from a shell). + +Inside that root, each workspace gets `-/` (slug + hash of the canonical workspace path), with `state.json`, per-job `.json`, `.log`, and `.events.jsonl` files. + +When you look up a job by id (`/codex:status `, `/codex:result `, `/codex:cancel `, `/codex:observe `), the plugin also scans legacy locations — `$TMPDIR/codex-companion/` (older fallback) and every `~/.claude/plugins/data/codex-*/state/` directory (handles marketplace plugin renames) — so jobs created before an upgrade remain findable. `/codex:status --all` extends the same multi-root view to the no-arg listing and bypasses the per-Claude-session filter, which is the recommended way to recover the id of a task started in an earlier session. The advanced env knob `CODEX_COMPANION_LEGACY_ROOTS` (path-separated) lets you replace the legacy scan list explicitly; an empty value disables legacy scanning entirely. Real users should not need to touch it. diff --git a/README.zh-CN.md b/README.zh-CN.md new file mode 100644 index 00000000..12860d33 --- /dev/null +++ b/README.zh-CN.md @@ -0,0 +1,398 @@ +# Claude Code 的 Codex 插件 + +**[English](README.md)** + +在 Claude Code 中使用 Codex 进行代码审查或将任务委派给 Codex。 + +本插件面向 Claude Code 用户,提供一种便捷方式,让你在现有工作流中轻松使用 Codex。 + + + +## 功能一览 + +- `/codex:review` — 常规只读 Codex 代码审查 +- `/codex:adversarial-review` — 可引导的对抗性审查 +- `/codex:rescue`、`/codex:status`、`/codex:result`、`/codex:cancel` — 委派任务和管理后台作业 +- `/codex:observe` — 实时观察运行中的 Codex 任务,支持 ANSI 彩色输出 + +## 环境要求 + +- **ChatGPT 订阅(含免费版)或 OpenAI API Key。** + - 使用量将计入你的 Codex 用量配额。[了解更多](https://developers.openai.com/codex/pricing)。 +- **Node.js 18.18 或更高版本** + +## 安装 + +在 Claude Code 中添加插件市场: + +```bash +/plugin marketplace add dragon84867/codex-plugin-cc +``` + +安装插件: + +```bash +/plugin install codex@dragon-cc-codex +``` + +重新加载插件: + +```bash +/reload-plugins +``` + +然后运行: + +```bash +/codex:setup +``` + +`/codex:setup` 会检测 Codex 是否就绪。如果 Codex 未安装且 npm 可用,它会提示你自动安装。 + +如果你想手动安装 Codex: + +```bash +npm install -g @openai/codex +``` + +如果 Codex 已安装但尚未登录,运行: + +```bash +!codex login +``` + +安装完成后,你应该能看到: + +- 下方列出的斜杠命令 +- `/agents` 中的 `codex:codex-rescue` 子代理 + +最简单的首次运行方式: + +```bash +/codex:review --background +/codex:status +/codex:result +``` + +## 用法 + +### `/codex:review` + +对当前代码运行常规的 Codex 审查。审查质量与直接在 Codex 中运行 `/review` 相同。 + +> [!NOTE] +> 多文件变更的代码审查可能耗时较长,通常建议在后台运行。 + +适用场景: + +- 审查当前未提交的变更 +- 审查当前分支与基础分支(如 `main`)的差异 + +使用 `--base ` 进行分支对比审查。支持 `--wait` 和 `--background`。该命令不可引导,不接受自定义关注文本。如需针对特定决策或风险区域进行挑战,请使用 [`/codex:adversarial-review`](#codexadversarial-review)。 + +示例: + +```bash +/codex:review +/codex:review --base main +/codex:review --background +``` + +该命令为只读,不会执行任何修改。在后台运行时,可使用 [`/codex:status`](#codexstatus) 查看进度,使用 [`/codex:cancel`](#codexcancel) 取消正在进行的任务。 + +### `/codex:adversarial-review` + +运行**可引导的**对抗性审查,质疑所选实现和设计。 + +可用于压力测试假设、权衡取舍、故障模式,以及是否有更安全或更简单的替代方案。 + +使用与 `/codex:review` 相同的审查目标选择方式,包括 `--base ` 进行分支审查。支持 `--wait` 和 `--background`。与 `/codex:review` 不同,它可以在标志后附加额外的关注文本。 + +适用场景: + +- 发布前审查,挑战方向而不仅仅是代码细节 +- 聚焦于设计选择、权衡、隐含假设和替代方案的审查 +- 针对特定风险区域的压力测试,如认证、数据丢失、回滚、竞态条件或可靠性 + +示例: + +```bash +/codex:adversarial-review +/codex:adversarial-review --base main challenge whether this was the right caching and retry design +/codex:adversarial-review --background look for race conditions and question the chosen approach +``` + +该命令为只读,不会修复代码。 + +### `/codex:rescue` + +通过 `codex:codex-rescue` 子代理将任务交给 Codex。 + +适用场景: + +- 调查 bug +- 尝试修复 +- 继续之前的 Codex 任务 +- 使用更小的模型进行更快或更经济的处理 + +> [!NOTE] +> 根据任务和所选模型的不同,这些任务可能耗时较长,通常建议在后台运行或将代理移至后台。 + +支持 `--background`、`--wait`、`--worktree`、`--resume` 和 `--fresh`。如果省略 `--resume` 和 `--fresh`,插件会提示是否继续该仓库最近的 rescue 线程。 + +**沙箱模式。** 任务模式会从你的 Codex 配置文件(`~/.codex/config.toml` 或 `.codex/config.toml`)中读取 `sandbox_mode`。如果未配置,则回退到 `workspace-write`(当设置了 `--write` 时)或 `read-only`。 + +示例: + +```bash +/codex:rescue investigate why the tests started failing +/codex:rescue fix the failing test with the smallest safe patch +/codex:rescue --resume apply the top fix from the last run +/codex:rescue --model gpt-5.4-mini --effort medium investigate the flaky integration test +/codex:rescue --model spark fix the issue quickly +/codex:rescue --background investigate the regression +/codex:rescue --worktree investigate and fix the failing integration test +``` + +你也可以直接用自然语言将任务委派给 Codex: + +```text +Ask Codex to redesign the database connection to be more resilient. +``` + +**说明:** + +- 如果不传 `--model` 或 `--effort`,Codex 会自行选择默认值。 +- 如果使用 `spark`,插件会映射到 `gpt-5.3-codex-spark`。 +- 后续 rescue 请求可以继续该仓库中最近的 Codex 任务。 +- `--worktree` 会在 `.claude/worktrees//` 下创建一个隔离的 git worktree,使用独立分支,让 Codex 在不影响你主工作目录的情况下工作。`--worktree` 和 `--resume` 互斥。 + +> [!WARNING] +> **线程独占性**:Codex 任务运行期间,不要在终端中手动对同一线程执行 `codex resume`。Codex 后端对每个线程强制执行单轮独占,尝试 resume 一个活跃线程会阻塞或暂停你的 CLI 会话。请等待任务完成(通过 `/codex:status` 查看),或先使用 `/codex:cancel` 停止任务。如需并行运行 Codex,请用 `codex`(不带 `--resume`)启动一个新线程。 + +### `/codex:status` + +显示当前仓库中正在运行和近期的 Codex 作业。 + +示例: + +```bash +/codex:status +/codex:status task-abc123 +/codex:status --all +``` + +用途: + +- 查看后台任务的进度 +- 查看最近完成的作业 +- 确认任务是否仍在运行 + +`--all` 会把列表扩展到其他 Claude Code 会话以及历史 state 目录里的作业(例如旧 plugin slug、`$TMPDIR/codex-companion/`)。如果你在新会话里需要找回旧会话启动的任务 id,用这个;其它带 id 的 slash command(`/codex:result`、`/codex:cancel`、`/codex:observe`)已经默认跨会话、跨 root。 + +### `/codex:result` + +显示已完成作业的最终 Codex 输出。如果可用,还会包含 Codex 会话 ID,你可以通过 `codex resume ` 直接在 Codex 中重新打开该次运行。 + +示例: + +```bash +/codex:result +/codex:result task-abc123 +``` + +### `/codex:cancel` + +取消正在运行的后台 Codex 作业。 + +示例: + +```bash +/codex:cancel +/codex:cancel task-abc123 +``` + +### `/codex:observe` + +为运行中的 Codex 任务开启实时观察。以 ANSI 彩色输出显示工具调用、文件变更、命令执行、消息和推理过程。 + +观察器为**只读**模式,不会影响正在运行的 Codex 任务。按 `Ctrl+C` 可断开观察 — Codex 任务会继续运行。 + +**建议在单独的终端窗口中使用**,这样你可以在继续 Claude Code 会话的同时观察 Codex 的工作。 + +示例: + +```bash +/codex:observe +/codex:observe task-abc123 +/codex:observe --cwd /path/to/project +``` + +**颜色说明:** + +| 颜色 | 事件类型 | +|------|---------| +| 青色 | 工具调用(`→ Read src/foo.ts`) | +| 蓝色 | 命令执行(`$ npm test`) | +| 绿色 | 成功(`exit 0`、`● completed`) | +| 红色 | 失败(`exit 1`) | +| 黄色 | 文件变更(`✎ src/auth.ts (modify)`) | +| 暗色 | 消息和推理 | + +如果目标任务已完成,观察器会渲染完整的事件历史后立即退出。 + +### `/codex:setup` + +检查 Codex 是否已安装并完成认证。如果 Codex 未安装且 npm 可用,它会提示你自动安装。 + +你也可以用 `/codex:setup` 管理可选的审查门控。 + +#### 启用审查门控 + +```bash +/codex:setup --enable-review-gate +/codex:setup --disable-review-gate +``` + +启用审查门控后,插件会使用 `Stop` 钩子对 Claude 的响应运行定向 Codex 审查。如果审查发现问题,停止操作会被阻止,让 Claude 先处理这些问题。 + +> [!WARNING] +> 审查门控可能会产生长时间运行的 Claude/Codex 循环,并快速消耗用量配额。仅在计划主动监控会话时启用。 + +## 典型工作流 + +### 发布前审查 + +```bash +/codex:review +``` + +### 将问题交给 Codex + +```bash +/codex:rescue investigate why the build is failing in CI +``` + +### 启动长时间运行的任务 + +```bash +/codex:adversarial-review --background +/codex:rescue --background investigate the flaky test +``` + +### 实时观察 Codex 工作 + +在单独的终端中: + +```bash +/codex:observe +``` + +这会给你一个实时、彩色的视图,显示 Codex 正在做什么 — 工具调用、文件编辑、测试运行和最终答案 — 而不会阻塞你的 Claude Code 会话。 + +### 使用 `--worktree` 隔离工作 + +```bash +/codex:rescue --worktree fix the broken auth middleware +``` + +Codex 在 `.claude/worktrees//` 的独立分支上工作,不会影响你的主工作目录。当你希望 Codex 进行修改但不影响当前分支时非常有用。 + +然后查看进度: + +```bash +/codex:status +/codex:result +``` + +## Codex 集成 + +Codex 插件封装了 [Codex app server](https://developers.openai.com/codex/app-server)。它使用你环境中已安装的全局 `codex` 二进制文件,并[应用相同的配置](https://developers.openai.com/codex/config-basic)。 + +### 常用配置 + +如果你想修改插件使用的默认推理强度或默认模型,可以在用户级或项目级的 `config.toml` 中定义。例如,要在特定项目中始终使用 `gpt-5.4-mini` 并将强度设为 `high`,可以在你启动 Claude 的目录根下创建 `.codex/config.toml` 文件并添加: + +```toml +model = "gpt-5.4-mini" +model_reasoning_effort = "high" +``` + +配置的加载顺序: + +- 用户级配置:`~/.codex/config.toml` +- 项目级覆盖:`.codex/config.toml` +- 项目级覆盖仅在[项目被信任](https://developers.openai.com/codex/config-advanced#project-config-files-codexconfigtoml)时才会加载 + +更多[配置选项](https://developers.openai.com/codex/config-reference)请查阅 Codex 文档。 + +### 将工作转移到 Codex + +委派的任务和任何[停止门控](#启用审查门控)运行也可以直接在 Codex 中恢复,只需运行 `codex resume`,并指定从 `/codex:result` 或 `/codex:status` 获取的会话 ID,或从列表中选择。 + +这样你可以审查 Codex 的工作或在那里继续工作。 + +## 开发 + +### Pre-push Hook + +安装 git pre-push hook,在推送前验证发布质量: + +```bash +npm run setup-hooks +``` + +Hook 在每次推送时检查: +- **版本必须 bump** — 插件源码改了但没 bump 版本则阻止推送 +- **CHANGELOG 必须有对应条目** — 版本 bump 了但 CHANGELOG.md 没有匹配条目则阻止推送 +- **README 更新提醒** — 版本 bump 了但没更新 README.md 则警告(不阻止) +- **Bump 类型校验** — 实际 bump 类型(major/minor/patch)与变更内容不匹配时警告 + +需要跳过时可用 `git push --no-verify`。 + +### 版本 Bump + +```bash +node scripts/bump-version.mjs +``` + +同步更新所有版本清单:`package.json`、`package-lock.json`、`plugin.json` 和 `marketplace.json`。 + +## 常见问题 + +### 使用此插件需要单独的 Codex 账号吗? + +如果你已经在此机器上登录了 Codex,该账号应该可以直接使用。本插件使用你本地的 Codex CLI 认证状态。 + +如果你目前只使用 Claude Code 而从未使用过 Codex,你还需要使用 ChatGPT 账号或 API Key 登录 Codex。[Codex 可通过 ChatGPT 订阅使用](https://developers.openai.com/codex/pricing/),[`codex login`](https://developers.openai.com/codex/cli/reference/#codex-login) 同时支持 ChatGPT 和 API Key 登录。运行 `/codex:setup` 检查 Codex 是否就绪,如果未就绪则使用 `!codex login`。 + +### 插件是否使用独立的 Codex 运行时? + +不是。本插件通过你本地的 [Codex CLI](https://developers.openai.com/codex/cli/) 和同一台机器上的 [Codex app server](https://developers.openai.com/codex/app-server/) 进行委派。 + +这意味着: + +- 使用与你直接使用相同的 Codex 安装 +- 使用相同的本地认证状态 +- 使用相同的仓库检出和本地机器环境 + +### 会使用我现有的 Codex 配置吗? + +是的。如果你已经在使用 Codex,插件会读取相同的[配置](#常用配置)。 + +### 可以继续使用我现有的 API Key 或 Base URL 配置吗? + +可以。由于插件使用你本地的 Codex CLI,你现有的登录方式和配置都会继续生效。 + +如果你需要将内置的 OpenAI Provider 指向不同的端点,请在 [Codex 配置](https://developers.openai.com/codex/config-advanced/#config-and-state-locations)中设置 `openai_base_url`。 + +### 作业的 state 存在哪里? + +Job 记录、日志和事件流按 workspace 写入 state 目录。插件按下面顺序选根目录: + +1. `$CLAUDE_PLUGIN_DATA/state/` —— Claude Code 通过 plugin host 调起 slash command 时会设置这个 env,存在时优先使用。 +2. `~/.codex-companion/state/` —— HOME 锚定的稳定 fallback。env 未设置时(例如直接在 shell 里跑 `node codex-companion.mjs`)走这里。 + +每个 workspace 会在 root 下生成 `-/`(slug + 规范化路径哈希)目录,存 `state.json`、`.json`、`.log` 和 `.events.jsonl`。 + +当你用 id 查作业(`/codex:status `、`/codex:result `、`/codex:cancel `、`/codex:observe `),插件还会顺带扫描历史位置:`$TMPDIR/codex-companion/`(更早的 fallback)以及所有 `~/.claude/plugins/data/codex-*/state/` 目录(兼容 marketplace 改名),所以升级前创建的旧 job 仍能找到。`/codex:status --all` 把这个跨 root 的视图扩展到无参数列表,并跳过 per-Claude-session 过滤 —— 这是新会话里捞回旧任务 id 的推荐方式。高级 env 开关 `CODEX_COMPANION_LEGACY_ROOTS`(路径分隔符隔开)可以显式替换历史扫描列表;设为空字符串则完全关闭历史扫描。普通用户不需要碰它。 diff --git a/openspec/changes/add-osascript-spawn-backends/.openspec.yaml b/openspec/changes/add-osascript-spawn-backends/.openspec.yaml new file mode 100644 index 00000000..68948146 --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-24 diff --git a/openspec/changes/add-osascript-spawn-backends/HANDOFF.md b/openspec/changes/add-osascript-spawn-backends/HANDOFF.md new file mode 100644 index 00000000..b7d0dea7 --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/HANDOFF.md @@ -0,0 +1,82 @@ +# HANDOFF: add-osascript-spawn-backends + +## What was implemented + +- §1: Added RED-first coverage in `tests/spawner.test.mjs` and `tests/observe.test.mjs` for detection, precedence, osascript dispatch, AppleScript escaping, shell composition, control-character rejection, tty-targeting, permission-denied classification, and observe permission UX. +- §2: Refactored `spawner.mjs` to a backend strategy table for `tmux`, `ghostty-mac`, and `iterm2-mac`, keeping tmux cwd/command as separate exec args. +- §3: Added shared helpers: `composeShellInvocation`, `rejectControlChars`, `discoverCallerTty`, `escapeAppleScriptLiteral`, and `osascriptArgsFromLines`. +- §4: Added `ghostty-mac` osascript backend with tty-match split, new-window fallback, and permission-denied classification. +- §5: Added `iterm2-mac` osascript backend with tty-match split, new-window fallback, and permission-denied classification. +- §6: Updated observe spawn reporting with per-backend success labels, Automation permission messaging without copy-paste fallback, and unsafe-command messaging with fallback. +- §7: Ran build, targeted tests, tmux smoke, and attempted full suite twice; see verification notes. +- §8: Updated observe docs, bumped version metadata to 1.4.0, and ran `npm run check-version`. + +## What was tested and passed + +- RED proof before implementation: `node --test tests/spawner.test.mjs tests/observe.test.mjs` failed on missing exports (`buildGhosttyMacArgs`, `handleObserveSpawn`) before implementation. +- `npm run build`: passed (`tsc -p tsconfig.app-server.json` completed with exit 0). +- `node --test tests/spawner.test.mjs tests/observe.test.mjs`: passed, 57 tests / 10 suites / 0 failures. +- `node scripts/bump-version.mjs 1.4.0 && npm run check-version`: passed, all version metadata matches 1.4.0. +- §7.4 tmux regression smoke: passed from inside a detached tmux session; output was `✓ Observer launched in tmux pane (job task-fake)`. + +## Adversarial review findings + fix-forward (post-Codex review) + +`/codex:adversarial-review` against the initial implementation surfaced three HIGH-severity AppleScript-object-model bugs. Confirmed against the published Ghostty 1.3 + iTerm2 dictionaries via Context7, then fixed in-place rather than rolling back: + +1. **Ghostty terminal has no `tty` property** (Ghostty 1.3 dictionary documents `id`, `name`, `working directory` only). The first implementation generated `repeat with t in terminals … if tty of t is targetTty …`, which would throw at runtime in real Ghostty. Fix: `buildGhosttyMacArgs` no longer iterates terminals or matches by `tty`; it always opens a new window via `set newWin to new window` → `set newTerm to terminal 1 of selected tab of newWin` → `input text "\n" to newTerm`. `callerTty` is accepted-but-ignored by the Ghostty builder. +2. **Ghostty `new window` returns a window, not a terminal.** The first implementation wrote `input text "…" to newWin` directly. AppleScript would refuse the cast at runtime. Fix: the script drills `set newTerm to terminal 1 of selected tab of newWin` before `input text … to newTerm`. +3. **iTerm2 `sessions` is NOT a direct element of `window`.** The first implementation generated `repeat with w in windows / repeat with s in sessions of w / …`. That AppleScript-compiles, but iterates an empty collection at runtime — the tty match would never fire, and every observer would fall through to the new-window path. Fix: `buildIterm2MacArgs` now nests `repeat with w in windows` → `repeat with tb in tabs of w` → `repeat with s in sessions of tb`, comparing `tty of s` to `targetTty`. Spec scenario asserts `tabs of w` appears before `sessions of tb` in the script source so the contract cannot regress. + +Tests added/strengthened in `tests/spawner.test.mjs`: + +- Ghostty contract assertions: `set newWin to new window`, `set newTerm to terminal 1 of selected tab of newWin`, `input text "…" to newTerm`, AND negative assertions `tty of t` and `repeat with t in terminals` MUST NOT appear. +- iTerm2 contract assertions: `repeat with w in windows`, `repeat with tb in tabs of w`, `repeat with s in sessions of tb`, plus a source-order assertion that `tabs of w` precedes `sessions of tb`. +- "Ghostty script does not embed caller tty" — ensures the discovered tty does not leak into the Ghostty AppleScript when discovery succeeds. +- New `describe("discoverCallerTty", ...)` block with 8 tests covering immediate-parent hit, walk past `??` ancestor, `/dev/` prefix preservation, ppid≤1 termination, runProbe throws, malformed output, depth-10 cap, and invalid `startPid` (closes Claude code-reviewer MEDIUM #1: "discoverCallerTty has no direct unit tests"). + +Spec + design synced: + +- `specs/observer-spawner/spec.md` — Backend dispatch scenarios for both osascript backends rewritten to reflect the new contracts. Caller-terminal targeting requirement renamed `(iterm2-mac only)` and a new "Ghostty always uses new-window path" scenario added. +- `design.md` — Decision 7 split into iTerm2 (tty-match-or-new-window) and Ghostty (always new-window) paths with explicit reasoning. §Risks rows updated. New §Resolved Questions section captures: Ghostty `tty` confirmed absent in 1.3, iTerm2 `sessions` confirmed nested under `tab`, Ghostty `new window` confirmed returns a window not a terminal. + +Test count after fix-forward: `node --test tests/spawner.test.mjs` reports 41/41 passing (was 25 from the initial implementation, +8 contract-shape tests, +8 discoverCallerTty tests). + +Full `npm test` status: +- Attempt 1 was terminated after it stopped producing output with only `tests/runtime.test.mjs` active. +- Attempt 2 used a 90-second watchdog. It reached 38 passing top-level tests, then timed out with `__TIMEOUT__` and no final TAP summary. Isolated `node --test tests/runtime.test.mjs` also hung without emitting subtest results. No full-suite pass count is available from this environment. + +## What was SKIPPED and why + +- §0 spike: skipped as requested; validating Ghostty/iTerm2 AppleScript dictionaries requires real terminal apps. +- §7.5 Ghostty Mac smoke: skipped as requested; requires a human at a real Ghostty/macOS Automation environment. +- §7.6 iTerm2 Mac smoke: skipped as requested; requires a human at a real iTerm2/macOS Automation environment. + +## Open items for Claude to handle + +- §9.1: Run final scoped diff/stat review. +- §9.2: Cross-check tasks/spec scenarios against implementation diff. +- §9.3: Run dual-model review (`/codex:review` and `/ai-code-review` or code-reviewer). +- §9.4: Run implementation-level adversarial review. +- §9.6: Archive with `/opsx:archive add-osascript-spawn-backends` after merge. +- Investigate the existing `tests/runtime.test.mjs` hang or rerun `npm test` in a known-good environment; this implementation did not touch runtime code, but full-suite verification could not complete here. + +## Ghostty/iTerm2 version assumptions (post-fix-forward) + +- Ghostty backend relies on AppleScript verbs only: `tell application "Ghostty"`, `activate`, `new window`, `selected tab of `, `terminal 1 of `, `input text "…" to `. It does NOT use `terminals`, `tty of `, or any `split` verb — Ghostty 1.3's terminal exposes no `tty` property, so reliable identity-based split is impossible. Ghostty backend always opens a new window. +- iTerm2 backend relies on `tell application "iTerm"`, `activate`, the nested traversal `windows → tabs of w → sessions of tb`, `tty of s`, `split vertically with default profile`, `create window with default profile`, `current session of `, `write text "…" to `. The traversal nesting is mandatory — iTerm2's object model puts `sessions` under `tab`, not directly under `window`. +- Ghostty AppleScript reference: https://ghostty.org/docs/features/applescript (Ghostty 1.3 — terminal properties documented as `id`, `name`, `working directory`). +- iTerm2 AppleScript reference verified via Context7 (`/websites/iterm2`). +- No real Ghostty or iTerm2 version was probed *in CI*; the AppleScript shape is locked by spec scenarios + unit tests rather than runtime smoke. §7.5 (Ghostty Mac smoke) and §7.6 (iTerm2 Mac smoke) remain as user-driven smoke gates. + +## Scenario-to-test self-pass + +- Terminal detection: `detects tmux when $TMUX is set`, `detects ghostty-mac on macOS Ghostty without tmux`, `detects iterm2-mac on macOS iTerm2 without tmux`, `returns none for mac terminal names on non-darwin platforms`, existing none tests. +- Detection precedence: `selects tmux before Ghostty when both signals are present`, `selects tmux before iTerm2 when both signals are present`. +- Backend dispatch: tmux existing runner test; Ghostty/iTerm2 osascript runner tests. +- Spawn success/failure: tmux success and failure tests; osascript permission tests; existing runner-error test. +- No-supported-terminal fallback: existing no-runner test. +- AppleScript escaping: Ghostty/iTerm2 escaping tests and layer-order test. +- Shell-safe composition: compose helper tests for spaces, single quotes, metacharacters, unicode, token preservation, and command metacharacter preservation. +- Caller-terminal targeting: discovered-tty embedding test and null-tty new-window-only test. +- Control-character rejection: newline, NUL, carriage return, and tab/space allowed tests. +- Automation-permission messaging: spawner permission classification tests and observe dedicated-message test. diff --git a/openspec/changes/add-osascript-spawn-backends/design.md b/openspec/changes/add-osascript-spawn-backends/design.md new file mode 100644 index 00000000..daf53cb8 --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/design.md @@ -0,0 +1,156 @@ +## Context + +The 1.3.0 MVP introduced `plugins/codex/scripts/lib/spawner.mjs` with a single hardcoded tmux branch and a small detector that only reads `$TMUX`. The runner (default `spawnSync`) is injected as a parameter, which made the 8 unit tests trivial — every test passes a fake runner and a synthetic env. + +This change adds two more backends (Ghostty on macOS, iTerm2 on macOS) without giving up the test ergonomics or changing the call site in `observe.mjs`. Both new backends share `osascript` as their transport, but their AppleScript dictionaries differ enough that they each need their own builder. + +A first-pass version of this document was reviewed adversarially by Codex (verdict: `needs-attention`) before any implementation. Four findings drove material changes that this document reflects: (1) cwd needs **shell** quoting separate from AppleScript escaping; (2) silently splitting the *front* Ghostty/iTerm2 window is unsafe — the spawner must target the calling shell's terminal or fall back to opening a new window; (3) the first-run Automation-permission denial needs a dedicated UX, not a generic red-error + copy-paste hint; (4) the "arbitrary string safety" escaping claim was over-broad and must either narrow its input domain or reject control characters explicitly. + +## Goals / Non-Goals + +**Goals:** +- Add `ghostty-mac` and `iterm2-mac` backends behind the same `spawnObserverInTerminal(...)` entrypoint. +- Define detection precedence so users running tmux *inside* Ghostty still get the tmux split. +- Keep the runner/env injection pattern; every new branch must be unit-testable without invoking `osascript`. +- Surface the actual backend name in `handleObserveSpawn`'s success message so users know which path fired. +- Guarantee the spawned observer lands in the terminal that owns the calling shell — and when that cannot be proven, open a fresh window instead of splitting an unrelated front window. +- Compose `cd && ` shell-safely first, AppleScript-escape second. Cwd paths with spaces, single quotes, unicode, or shell metacharacters must work. +- Treat first-run Automation-permission denial as an expected onboarding state with its own message ("grant access and retry"), not as a generic backend failure. +- Reject embedded newlines / control characters in the composed command before building AppleScript — better a structured `spawned: false` than a half-formed script. + +**Non-Goals:** +- Linux Ghostty (`ghostty +new-window -e ...` — `-e` semantics need real-machine testing). +- WezTerm CLI, kitty remote-control, Terminal.app, generic `xdg-terminal-exec`. +- Configurable split direction, backend override flags (`--backend=...`), or `--no-spawn` opt-out. +- Touching the observer itself or `handleObserveCommand` outside the message strings. + +## Decisions + +### 1. Strategy table over if/elif chain + +Each backend is a small record `{ detect, build, cmd, classifyFailure }`. `detectTerminal(env)` walks the table in priority order and returns the first hit. `spawnObserverInTerminal({ cwd, command, env, runner })` then drives a per-kind pipeline: + +- **tmux backend** — receives `build({ cwd, command })`. No shell composition (tmux takes `-c ` as a separate `execve` arg and the command as another arg, so there is no shell-injection vector). Runner invoked with `{ stdio: 'ignore' }`. `classifyFailure` returns only generic errors. +- **osascript backends (`ghostty-mac`, `iterm2-mac`)** — dispatcher first calls `composed = composeShellInvocation({ cwd, command })`, then `rejectControlChars(composed)` (early-return `unsafe-command` on hit), then `callerTty = discoverCallerTty()`. The builder receives `{ composed, callerTty }` — never raw `cwd`/`command` — so composition cannot drift between dispatcher and backend. The Ghostty builder ignores `callerTty` (see Decision 7 — Ghostty 1.3's terminal has no `tty` property); the iTerm2 builder uses it to choose between the match-and-split path and the new-window path. Runner invoked with `{ stdio: ['ignore', 'ignore', 'pipe'] }` so stderr is captured for `classifyFailure`, which can return `automation-permission-denied` in addition to generic errors. + +Both branches share the same outer success/failure return shape (`{ spawned, kind, reason?, error? }`). + +Trade-off considered: an if/elif chain is two lines shorter for two backends, but it forces a duplicate switch in tests. The table makes "add the third backend" a one-record diff. Per-kind build-input shapes (tmux vs osascript) are deliberate — keeping them uniform would force tmux through `composeShellInvocation` for no benefit, or smuggle composition into the builder where it can drift. + +### 2. Detection precedence: tmux > ghostty-mac > iterm2-mac > none + +Reason: developers commonly run tmux *inside* a Ghostty or iTerm2 window. If we checked `$TERM_PROGRAM` first, we'd open a Ghostty/iTerm split next to a tmux pane — wrong window, wrong context. `$TMUX` being set is an explicit signal that the user has opted into a multiplexer, so it wins. + +Alternative considered: prefer host emulator if both signals are present, because the host has more screen area. Rejected — context (cwd, env, ssh session) lives in the tmux pane, not at the emulator level. + +### 3. Two-layer quoting: shell-safe first, AppleScript-safe second + +The osascript backends end up running a shell command (`cd && `) in a freshly opened pane / window. There are *two* separate quoting domains: (a) POSIX shell parses the `cd ... && ...` string, and (b) AppleScript parses the surrounding `"..."` literal. They have different metacharacters; collapsing them into a single escape pass is what Codex flagged as unsafe. + +**Pipeline (exact order, no exceptions):** + +``` +composeShellInvocation({ cwd, command }) ── Layer 1: shell-safe + │ + ▼ +rejectControlChars() ── Guard: reject 0x00–0x1F minus 0x09/0x20 + │ (return spawned:false, reason:'unsafe-command' on hit) + ▼ +escapeAppleScriptLiteral() ── Layer 2: AppleScript-safe + │ + ▼ +buildGhosttyMacArgs / buildIterm2MacArgs ── interpolate into osascript -e ... +``` + +**Layer 1 — `composeShellInvocation({ cwd, command })`.** Returns exactly `cd ${shellQuote(cwd)} && ${command}`. Only `cwd` is shell-quoted at this layer; `command` is interpolated verbatim because it is **already** a space-joined sequence of individually `shellQuote`-ed argv tokens, produced by `buildObserverCommand` in `observe.mjs`. Re-wrapping `command` with `shellQuote` here would collapse the four argv tokens into a single literal string and break execution — a regression scenario in the spec asserts the token preservation. + +Concretely: + +``` +cd '/Users/dragon.cl/work projects/codex-plugin-cc' && '/abs/path/node' '/abs/path/companion.mjs' 'observe' 'task-abc' +``` + +**Guard — `rejectControlChars(composed)`.** Runs on the composed string (after Layer 1, before Layer 2) so that control bytes embedded in `cwd` — which are only visible *after* shell quoting wraps them inside a single-quoted literal — are caught before they reach `input text` / `write text`. Scanning the raw `cwd` or raw `command` separately would miss the position-in-final-string information and risk subtle gaps. On any hit, the spawner returns `{ spawned: false, kind: , reason: 'unsafe-command', error: }` and the runner is not invoked. + +**Layer 2 — `escapeAppleScriptLiteral(composed)`.** Doubles `\` → `\\` and `"` → `\"` in the composed string, then the backend interpolates it into `input text "\n" to newTerm` (Ghostty) or `write text "" to newSession` (iTerm2, no trailing `\n` since iTerm2 adds Enter on `write text`). + +Alternative considered: pass the command as an `osascript` positional argument and read it inside the script via `do shell script "echo " & quoted form of argv...`. Rejected — adds a third escaping layer (osascript's own argv handling) without removing either of the two above. Direct interpolation with explicit named layers + pipeline ordering is auditable. + +### 4. One `-e` per logical AppleScript line + +Easier to log (`runner` calls show each line), easier to compare in tests (`assert.deepEqual` on the args array), and stays portable across `osascript` versions. + +### 5. Runner contract unchanged + +`runner(cmd, args, opts) → { status, error? }` is the same signature the tmux backend uses. New backends just produce different `cmd` ('osascript') and `args` (the `-e` flags). The default runner (`spawnSync`) doesn't need to know which backend ran. + +### 6. Generic backend failure → fall through to copy-paste hint + +For non-permission failures the existing tmux shape stays: if the runner returns non-zero status or throws, the spawner returns `{ spawned: false, kind: , error: }`, and `handleObserveSpawn` prints the red "Failed to drive " line followed by the existing fallback hint. Users always get a working path out. The two carve-outs below (Decision 7 target-window + Decision 8 permission-denied) take precedence when they apply. + +### 7. Target the calling shell's terminal (iTerm2 only); fall back to a new window, not the front one + +**Problem (from Codex review).** `/codex:observe` may be invoked from a Claude Code session running in a *different* window than the currently frontmost Ghostty / iTerm2 window. Naively running `tell application "Ghostty" to split focused terminal of front window …` then drops the observer into an unrelated project's terminal — wrong cwd, wrong context, confusing. + +**Strategy.** Per-backend, because the two AppleScript dictionaries differ in what they expose: + +1. **Discover the caller's tty.** The companion script walks up the process tree (`ps -o tty=,ppid= -p ` repeatedly) until it finds the first ancestor with a real controlling tty (not `?` / `??`), and resolves that to `/dev/ttysNN`. This catches the common case where Claude Code spawns `bash` which spawns `node` — none of them own a tty, but the user's shell ancestor does. The walk is depth-capped at 10 ancestors and times out per-probe, so a stuck `ps` cannot block the spawner. + +2. **iTerm2-targeted AppleScript.** The script is parameterised with the discovered tty. iTerm2's object model is `application → windows → tabs → sessions`; `sessions` is **NOT** a direct element of `window`, it lives on `tab`. The traversal therefore nests: `repeat with w in windows` → `repeat with tb in tabs of w` → `repeat with s in sessions of tb`, comparing `tty of s` to the discovered value. On match: `tell matched … set newSession to split vertically with default profile`. On no match: `set newWindow to create window with default profile` → `set newSession to current session of newWindow`. + +3. **Ghostty: always new-window.** Ghostty 1.3's AppleScript dictionary lists per-terminal properties as `id`, `name`, `working directory` only — there is **no `tty` property** on the terminal object. Without `tty`, no reliable identity check is possible (matching by name/id would silently re-target on rename), so the Ghostty backend always opens a fresh window: `set newWin to new window` → `set newTerm to terminal 1 of selected tab of newWin` → `input text "\n" to newTerm`. The drill-down through `selected tab` is required because `new window` returns a *window* object and `input text` requires a *terminal*. When upstream Ghostty adds a `tty` property, this exemption gets revisited and Ghostty rejoins the iTerm2-style match-or-new-window flow. + +**Why new-window instead of "best effort split front window".** Splitting the wrong window is a silent failure with confusing output. Opening a new window is visibly different and never wrong — the observer just lives in its own window instead of next to the caller. The user can always grab it. + +**When iTerm2 tty discovery itself fails** (uncommon: detached sessions, sandboxed shells without `ps` access), the spawner skips straight to the new-window branch with no `repeat` loop emitted. Same safety guarantee. Ghostty is unaffected — it never attempted matching to begin with. + +### 8. Permission-denied is an onboarding state, not a failure + +**Problem (from Codex review).** First-run `osascript` against Ghostty / iTerm2 triggers a macOS Automation permission prompt **and** returns non-zero. A naive failure handler shows the red "Failed to drive ghostty-mac: exited 1" line + copy-paste fallback at exactly the moment the user is being asked to click "Allow" — looks broken when it isn't. + +**Detection.** Parse `osascript` stderr (which we now capture instead of piping to `/dev/null`) for either: + +- `(-1743)` — the documented "user denied access to send AppleEvents" error number, OR +- the literal substring `not authorized to send Apple events` (case-insensitive) + +**Behavior on match.** + +- Spawner returns `{ spawned: false, kind: , reason: 'automation-permission-denied' }`. +- `handleObserveSpawn` prints a single dedicated line: `! macOS Automation permission needed for . Open System Settings → Privacy & Security → Automation → Terminal/Claude Code, enable , then rerun /codex:observe.` +- Does NOT print the generic copy-paste hint — retrying after permission is granted will succeed, and the copy-paste hint would imply "this is your only option." + +**On non-permission failure** the original Decision 6 path applies (red error + copy-paste hint). + +## Risks / Trade-offs + +| Risk | Mitigation | +|---|---| +| Caller invokes `/codex:observe` from a non-frontmost Ghostty / iTerm2 window | Decision 7: tty-discovery + targeted AppleScript walks all terminals/sessions for a match. On no match, open a new window. Never split an unrelated front window. | +| First-run macOS Automation permission denial returns non-zero + shows a system prompt | Decision 8: detect `(-1743)` / "not authorized" in stderr, print a dedicated "grant access and retry" message, skip the generic copy-paste fallback (retry will work). | +| `tty` discovery itself fails (sandboxed shell, no `ps` access, detached daemon) | Skip straight to the new-window branch; no attempt to split. The observer always lands in a fresh, visible window with the right cwd. | +| Cwd contains spaces, single quotes, unicode, or shell metacharacters | Decision 3 Layer 1: `shellQuote` wraps cwd before composing `cd ... && ...`. Unit tests cover spaces, single quotes, unicode, and `;`/`$`/`` ` ``. | +| Caller-supplied command contains embedded newline / control chars | Decision 3 Control-char guard: reject before building AppleScript, return `{ spawned: false, error: 'unsafe-command' }`, fall through to copy-paste hint. Avoids the half-formed-script failure mode. | +| Ghostty AppleScript dictionary changes in a future release | Use only the documented stable verbs (`new window`, `selected tab`, `terminal`, `input text`, `activate`). Avoid `perform action ""` (more brittle). Pin a "tested with Ghostty 1.3" note in tasks §6.5. | +| Ghostty's terminal object has no `tty` property (1.3 confirmed via Codex adversarial review + Context7) | Ghostty drops to new-window-only for this change. Decision 7 documents the reasoning; iTerm2 retains the tty-match path. Upstream feature request tracked separately; once `tty` lands, Ghostty rejoins match-or-new-window. | +| iTerm2 object model nests sessions under tabs, not windows directly | Decision 7: traversal nests `windows → tabs of w → sessions of tb`. A spec scenario asserts `tabs of w` appears before `sessions of tb` in the script source so the contract cannot regress to the (broken) `sessions of w` shape. | +| iTerm2 stable vs nightly dictionary differences | Test against the stable GA build. The verbs we use (`current session`, `split vertically with default profile`, `write text`, `tty of current session`) have been stable since iTerm2 3.x. | +| `tell application "" to activate` steals focus | Accept it. The new split / window needs to be visible for the spawn to be useful; stealing focus into the target app is the documented AppleScript pattern. We only activate when we are about to spawn — never on probe / detection. | +| Job-id is interpolated into a shell+AppleScript string | Two-layer quoting (Decision 3) handles both domains. Job IDs are companion-generated (`task-[a-z0-9]+`); the quoting + control-char rejection are defense-in-depth in case that invariant ever loosens. | + +## Migration Plan + +Additive change. Tmux users see byte-identical behavior. Non-tmux macOS users on Ghostty/iTerm2 start getting auto-splits. The copy-paste fallback hint remains as the last line of defense for every other environment. + +Rollback: revert the spawner.mjs diff. The 1.3.0 tmux MVP is preserved in git history; reverting only this change leaves tmux working. + +## Resolved Questions + +- **Does Ghostty expose a `tty` property on the terminal object?** No (Ghostty 1.3, confirmed by Codex adversarial review + Context7 dictionary lookup). The terminal object exposes `id`, `name`, `working directory` only. Decision 7 was updated to drop the Ghostty tty-match path; Ghostty always opens a new window. Reverts when upstream adds `tty`. +- **Does iTerm2 expose `sessions` as a direct element of `window`?** No. The object model is `windows → tabs → sessions`; the traversal must nest through `tabs of w` before `sessions of tb`. The earlier draft used `sessions of w`, which AppleScript-compiles fine but iterates an empty collection at runtime. Fixed in implementation; locked by spec scenario. +- **Does Ghostty's `new window` return a window or a terminal?** A *window*. `input text` requires a *terminal*, so the script drills via `set newTerm to terminal 1 of selected tab of newWin` before calling `input text "…" to newTerm`. + +## Open Questions + +- Should we add `--backend=` to force a specific backend (debugging aid)? Defer until someone asks. +- Should the split direction be configurable (`--split=down|right`)? Defer; `right` mirrors the tmux `-h` default and is the most common preference. +- Should we surface a one-time hint the first time we detect an unsupported terminal we *could* support later (e.g., WezTerm)? Out of scope for this change. diff --git a/openspec/changes/add-osascript-spawn-backends/proposal.md b/openspec/changes/add-osascript-spawn-backends/proposal.md new file mode 100644 index 00000000..f63fd308 --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/proposal.md @@ -0,0 +1,35 @@ +## Why + +The 1.3.0 MVP made `/codex:observe` auto-launch a live observer inside a tmux split, but only for users already running tmux. Many developers on macOS use Ghostty or iTerm2 as their daily terminal without tmux, and they still see the old "copy this command into a new terminal" hint. Both terminals expose a rich AppleScript dictionary that supports programmatic splits, so we can give those users the same one-keystroke experience. + +## What Changes + +- Extend `spawner.mjs` from a single tmux branch into a small strategy table mapping detected terminal kind → backend implementation. +- Add a `ghostty-mac` backend that drives Ghostty via `osascript`. It MUST target the terminal that owns the calling shell's tty when discoverable, and fall back to opening a new Ghostty window — never silently split a random front window. +- Add an `iterm2-mac` backend with the same tty-match-or-new-window contract for iTerm2. +- Update terminal detection to recognize `$TERM_PROGRAM=ghostty` and `$TERM_PROGRAM=iTerm.app` (only when `process.platform === 'darwin'`). +- Define detection precedence so users running tmux *inside* Ghostty/iTerm2 still get the tmux split (the multiplexer wins). +- Introduce a **two-layer quoting contract** for osascript backends: shell-quote `cwd` and `command` first (re-using the existing `shellQuote` helper to compose `cd && `), then AppleScript-escape the resulting string for the `"..."` literal. AppleScript escaping alone is not shell-safe. +- Reject unsafe characters in the composed command (embedded newlines, NUL, other control chars) before building AppleScript — return a structured `spawned: false, error: 'unsafe-command'` instead of silently producing a broken script. +- Add a dedicated **Automation-permission UX**: when osascript fails with the macOS "not authorized" pattern, show a one-line "grant access and retry" message instead of the generic red error + copy-paste fallback (retry will work once permission is granted). +- Update the success and fallback messages in `handleObserveSpawn` to name the actual backend used. +- Add unit tests mirroring the existing tmux pattern (env + runner injection, AppleScript string assertion) plus new coverage for: cwd with spaces / single-quotes / unicode, control-char rejection, tty-match-vs-new-window dispatch, permission-denied messaging. + +Out of scope (deferred to later changes): Linux Ghostty `+new-window` mode, WezTerm `wezterm cli`, kitty remote-control, Terminal.app, generic xdg-terminal-exec. + +## Capabilities + +### New Capabilities +- `observer-spawner`: Terminal-detection + split-launch contract for `/codex:observe --spawn`. Defines the supported backends (tmux, ghostty-mac, iterm2-mac), the detection precedence, and the fallback behavior when no supported terminal is found. Backfills the contract for the 1.3.0 tmux MVP while adding the two new backends. + +### Modified Capabilities + +(none — the existing `observe-command` capability is unchanged; only the launcher path changes.) + +## Impact + +- **Modified files**: `plugins/codex/scripts/lib/spawner.mjs` (refactor to strategy table, add tty-discovery helper, two-layer quoting, control-char guard, permission-denied detection), `plugins/codex/scripts/lib/observe.mjs` (per-kind success/failure messages, dedicated permission-denied path), `tests/spawner.test.mjs` (extend coverage). +- **New files**: none expected; backends, tty discovery, and quoting helpers live as small modules inside `spawner.mjs`. +- **Dependencies**: none (uses `osascript` and POSIX `ps`/`tty`, all available on macOS). +- **Breaking changes**: none. Tmux users see identical behavior. Non-tmux users on macOS+Ghostty/iTerm2 now get tty-targeted splits or new-window fallback instead of the copy-paste hint; the copy-paste hint remains as the final fallback for unsupported terminals. +- **Verification**: unit tests for the new backends and helpers (cwd quoting, control-char rejection, tty-match dispatch, permission-denied), plus manual smoke on a real macOS box for both Ghostty and iTerm2 — explicitly covering (a) first-run Automation permission prompt, (b) a project path containing spaces, (c) invocation from a non-frontmost window. diff --git a/openspec/changes/add-osascript-spawn-backends/specs/observer-spawner/spec.md b/openspec/changes/add-osascript-spawn-backends/specs/observer-spawner/spec.md new file mode 100644 index 00000000..8eda6bbf --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/specs/observer-spawner/spec.md @@ -0,0 +1,230 @@ +## ADDED Requirements + +### Requirement: Terminal detection + +The spawner SHALL inspect the process environment to determine whether a supported terminal multiplexer or emulator hosts the current shell, returning a tagged kind that the dispatcher uses to select a backend. + +#### Scenario: tmux is detected when $TMUX is set + +- **WHEN** `process.env.TMUX` is a non-empty string +- **THEN** detection returns `{ kind: 'tmux' }` + +#### Scenario: ghostty-mac is detected on macOS Ghostty + +- **WHEN** `process.platform === 'darwin'` AND `process.env.TERM_PROGRAM === 'ghostty'` AND `process.env.TMUX` is unset or empty +- **THEN** detection returns `{ kind: 'ghostty-mac' }` + +#### Scenario: iterm2-mac is detected on macOS iTerm2 + +- **WHEN** `process.platform === 'darwin'` AND `process.env.TERM_PROGRAM === 'iTerm.app'` AND `process.env.TMUX` is unset or empty +- **THEN** detection returns `{ kind: 'iterm2-mac' }` + +#### Scenario: none is returned when no supported terminal matches + +- **WHEN** no detection condition holds (e.g., running in plain Terminal.app, Alacritty, an SSH session, or a non-macOS shell) +- **THEN** detection returns `{ kind: 'none' }` + +### Requirement: Detection precedence + +When multiple terminal signals are present simultaneously, the spawner SHALL prefer the multiplexer over the host emulator so that users running tmux inside Ghostty or iTerm2 still get the tmux split. + +#### Scenario: tmux inside Ghostty selects tmux + +- **WHEN** `process.env.TMUX` is set AND `process.env.TERM_PROGRAM === 'ghostty'` +- **THEN** detection returns `{ kind: 'tmux' }` (Ghostty is ignored) + +#### Scenario: tmux inside iTerm2 selects tmux + +- **WHEN** `process.env.TMUX` is set AND `process.env.TERM_PROGRAM === 'iTerm.app'` +- **THEN** detection returns `{ kind: 'tmux' }` (iTerm2 is ignored) + +### Requirement: Backend dispatch + +The spawner SHALL select the backend matching the detected kind and invoke it through the injectable runner so that all backends remain unit-testable without invoking real `tmux` or `osascript`. + +#### Scenario: tmux backend calls tmux split-window + +- **WHEN** kind is `tmux` +- **THEN** the runner is invoked with `cmd === 'tmux'` and `args` starts with `['split-window', '-h', '-c', , ]` + +#### Scenario: ghostty-mac backend calls osascript + +- **WHEN** kind is `ghostty-mac` +- **THEN** the runner is invoked with `cmd === 'osascript'` +- **AND** `args` is a sequence of `-e ` pairs whose concatenated script contains `tell application "Ghostty"`, `set newWin to new window`, `set newTerm to terminal 1 of selected tab of newWin` (because Ghostty's `new window` returns a window — `input text` requires a terminal), and an `input text "..." to newTerm` call carrying the supplied command. The script MUST NOT reference `tty of ` (Ghostty 1.3's terminal object exposes `id`, `name`, `working directory` only — no `tty` property; tty-targeted split is deferred until upstream adds it). + +#### Scenario: iterm2-mac backend calls osascript + +- **WHEN** kind is `iterm2-mac` +- **THEN** the runner is invoked with `cmd === 'osascript'` +- **AND** `args` is a sequence of `-e ` pairs whose concatenated script contains `tell application "iTerm"`, a nested `repeat with w in windows` / `repeat with tb in tabs of w` / `repeat with s in sessions of tb` traversal (iTerm2's object model is window → tabs → sessions; `sessions` is NOT a direct element of `window`) comparing `tty of s` to the caller-tty argument, a `split vertically with default profile` branch when a match is found, a `create window with default profile` plus `current session of newWindow` branch when no match is found, and a `write text` call carrying the supplied command + +### Requirement: Spawn success reporting + +On a successful spawn, the spawner SHALL return `{ spawned: true, kind: }` so that `handleObserveSpawn` can name the actual backend in its success message. + +#### Scenario: backend exits zero + +- **WHEN** the runner returns `{ status: 0 }` for any detected backend +- **THEN** the spawner result is `{ spawned: true, kind: }` (no `error` field) + +### Requirement: Spawn failure reporting + +On a non-zero runner status or a thrown runner error that is NOT one of the carved-out classes (`automation-permission-denied`, `unsafe-command`), the spawner SHALL return `{ spawned: false, kind: , error: }` so that `handleObserveSpawn` can show the error and fall through to the copy-paste hint. + +#### Scenario: backend exits non-zero with no recognized reason + +- **WHEN** the runner returns `{ status: 1 }` for any detected backend AND stderr does not match the permission-denied pattern +- **THEN** the spawner result is `{ spawned: false, kind: , error: }` (no `reason` field) + +#### Scenario: backend binary missing or runner throws + +- **WHEN** the runner returns `{ status: null, error: }` (e.g., `ENOENT` for `osascript` on a non-macOS system that was mis-detected) +- **THEN** the spawner result is `{ spawned: false, kind: , error: }` + +### Requirement: No-supported-terminal fallback + +When detection returns `{ kind: 'none' }`, the spawner MUST NOT invoke any runner and SHALL return `{ spawned: false, kind: 'none' }` so the caller knows to print only the copy-paste hint (no per-backend failure line). + +#### Scenario: outside any supported terminal + +- **WHEN** detection returns `{ kind: 'none' }` +- **THEN** the runner is not called +- **AND** the spawner result is `{ spawned: false, kind: 'none' }` (no `error` field) + +### Requirement: AppleScript literal escaping (osascript backends only) + +The `ghostty-mac` and `iterm2-mac` backends SHALL escape backslash and double-quote characters in the interpolated shell command (the output of `composeShellInvocation`, after the control-character guard) so that the observer command — as constructed by the spawner's own `buildObserverCommand` helper plus the shell-quoting layer in the requirement above — cannot break the surrounding `"..."` literal. The escape function MUST NOT be claimed safe against arbitrary user-controlled strings; the control-character rejection requirement is what enforces that input domain. The `tmux` backend does not build AppleScript and is exempt from this requirement. + +#### Scenario: command contains a double-quote + +- **WHEN** the composed shell command (after shell-quoting) contains `"` +- **THEN** the produced AppleScript contains `\"` at each occurrence inside its `"..."` literal + +#### Scenario: command contains a backslash + +- **WHEN** the composed shell command (after shell-quoting) contains `\` +- **THEN** the produced AppleScript contains `\\` at each occurrence inside its `"..."` literal + +### Requirement: Shell-safe composition of cwd and command (osascript backends only) + +For the `ghostty-mac` and `iterm2-mac` backends, the spawner SHALL produce the final shell invocation via a single `composeShellInvocation({ cwd, command })` helper that returns exactly `cd ${shellQuote(cwd)} && ${command}` — `cwd` is shell-quoted *here* (single-quote escaping, doubling internal `'` as `'\''`), and `command` MUST be the output of `buildObserverCommand` in `observe.mjs`, which is a space-joined sequence of already-individually-shell-quoted argv tokens. `command` MUST NOT be re-quoted by this layer; doing so would collapse the argv tokens into a single literal string and break execution. The `tmux` backend passes `cwd` and `command` as separate `execve` args to `tmux` and does not call `composeShellInvocation`; it is exempt from this requirement. + +The two quoting layers MUST run in this exact order, with no intervening transformation: + +1. `composeShellInvocation({ cwd, command })` — shell-safe composition (Layer 1). +2. `escapeAppleScriptLiteral()` — AppleScript-literal-safe (Layer 2). + +The control-character guard (separate requirement below) runs *between* Layers 1 and 2. + +#### Scenario: cwd contains spaces + +- **WHEN** `cwd` is `/Users/dragon.cl/work projects/codex-plugin-cc` +- **THEN** the composed string starts with `cd '/Users/dragon.cl/work projects/codex-plugin-cc' && ` (cwd wrapped in single quotes) + +#### Scenario: cwd contains a single quote + +- **WHEN** `cwd` is `/tmp/it's-a-trap` +- **THEN** the composed string starts with `cd '/tmp/it'\''s-a-trap' && ` (single quote escaped as `'\''`) + +#### Scenario: cwd contains shell metacharacters + +- **WHEN** `cwd` is `/tmp/foo;rm -rf /;` +- **THEN** the composed string is `cd '/tmp/foo;rm -rf /;' && ` and the embedded `;`/space/etc. are inside the single-quoted literal and have no shell effect + +#### Scenario: cwd contains unicode + +- **WHEN** `cwd` is `/Users/田中/プロジェクト` +- **THEN** the composed string contains the original unicode bytes verbatim inside the single-quoted literal + +#### Scenario: command argv tokens are preserved as separate tokens + +- **WHEN** `command` is `'/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'` (four already-shell-quoted argv tokens as produced by `buildObserverCommand`) +- **THEN** the composed string ends with ` && '/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'` — the four tokens are preserved verbatim with their original single quotes, NOT re-quoted into a single literal + +#### Scenario: command containing apparent shell metacharacters in a quoted token is unchanged + +- **WHEN** `command` is `'/abs/node' '/abs/companion.mjs' 'observe' 'task with$weird;chars'` +- **THEN** the composed string ends with that exact string verbatim — the spawner does NOT add another layer of `shellQuote` around `command` + +#### Scenario: layer order — composeShellInvocation runs before escapeAppleScriptLiteral + +- **WHEN** the spawner builds an osascript backend's argv +- **THEN** the input to `escapeAppleScriptLiteral` is exactly the output of `composeShellInvocation` — there is no path that escapes raw `cwd` or raw `command` for AppleScript before shell composition has produced the final invocation + +### Requirement: Caller-terminal targeting with new-window fallback (iterm2-mac only) + +The `iterm2-mac` backend SHALL discover the caller shell's controlling tty (walking the process ancestry until an ancestor with a real tty is found) and pass that path into the AppleScript. The AppleScript SHALL iterate `windows -> tabs -> sessions` and split the session whose `tty` matches; when no match is found OR tty discovery itself fails, the script SHALL open a brand-new window for the observer. It MUST NOT silently split an unrelated front window. + +The `ghostty-mac` backend is exempt from caller-tty targeting because Ghostty 1.3's `terminal` object exposes no `tty` property; this backend always uses the new-window path. When upstream Ghostty adds a `tty` property, this exemption SHALL be revisited. + +The `tmux` backend uses tmux's own client-context split (`split-window -h -c `) and is also exempt. + +#### Scenario: caller tty matches an open iTerm2 session + +- **WHEN** the caller-tty argument equals the `tty` of one of iTerm2's open sessions +- **THEN** the AppleScript splits *that* session vertically and runs the command in the new session + +#### Scenario: no matching iTerm2 session found + +- **WHEN** no open iTerm2 session has a `tty` matching the caller-tty argument +- **THEN** the AppleScript opens a new iTerm2 window via `create window with default profile` (NOT a split of the front window), assigns `current session of newWindow`, and writes the command into that session + +#### Scenario: caller tty cannot be discovered (iTerm2) + +- **WHEN** process-ancestry discovery returns no tty (e.g., sandboxed shell, `ps` unavailable) +- **THEN** the spawner builds iTerm2 AppleScript that goes directly to the `create window` branch with no split attempt and no `repeat` loop + +#### Scenario: Ghostty always uses new-window path + +- **WHEN** the kind is `ghostty-mac`, regardless of whether tty discovery succeeded +- **THEN** the AppleScript always executes `set newWin to new window` followed by `set newTerm to terminal 1 of selected tab of newWin`, then `input text` to that terminal — the script does not embed the caller tty and does not contain a `repeat with` loop + +### Requirement: Reject control characters in the composed shell invocation (osascript backends only) + +For the `ghostty-mac` and `iterm2-mac` backends, after `composeShellInvocation({ cwd, command })` has produced the final `cd && ` string and BEFORE `escapeAppleScriptLiteral` runs, the spawner SHALL scan that exact composed string for ASCII control bytes in the range `0x00`–`0x1F` other than `0x09` (tab) and `0x20` (space). On any hit, the spawner SHALL return `{ spawned: false, kind: , reason: 'unsafe-command', error: }` and MUST NOT invoke the runner or proceed to AppleScript escaping. Scanning the composed string (not the raw `cwd` or raw `command` in isolation) is mandatory so that control bytes embedded in `cwd` are caught before they reach `input text` / `write text`. The `tmux` backend is exempt because it passes `cwd` via `-c ` and the command as a separate `execve` arg — no AppleScript-literal injection vector exists. + +#### Scenario: cwd contains an embedded newline + +- **WHEN** `cwd` is `/tmp/foo\nbar` (literal newline in the path) and `command` is well-formed +- **THEN** `composeShellInvocation` produces a composed string whose single-quoted cwd literal contains `\n`, the control-char scan detects it, the spawner returns `{ spawned: false, kind: , reason: 'unsafe-command', error: }`, and the runner is not called + +#### Scenario: command contains an embedded newline + +- **WHEN** the composed shell invocation contains `\n` originating from `command` +- **THEN** the spawner returns `{ spawned: false, kind: , reason: 'unsafe-command', error: }` and the runner is not called + +#### Scenario: composed string contains a NUL byte + +- **WHEN** the composed string contains `\0` (from either `cwd` or `command`) +- **THEN** the spawner returns `{ spawned: false, kind: , reason: 'unsafe-command', error: }` and the runner is not called + +#### Scenario: composed string contains a carriage return + +- **WHEN** the composed string contains `\r` (from either `cwd` or `command`) +- **THEN** the spawner returns `{ spawned: false, kind: , reason: 'unsafe-command', error: }` and the runner is not called + +#### Scenario: tab and space are allowed + +- **WHEN** the composed string contains `\t` (0x09) or `\x20` (space) +- **THEN** the scan does NOT trigger; the spawner proceeds to `escapeAppleScriptLiteral` and the runner is invoked normally + +### Requirement: Automation-permission-denied messaging + +When an osascript backend fails because the user has not granted Automation permission, the spawner SHALL classify the failure as `automation-permission-denied` (distinct from generic spawn failure), and `handleObserveSpawn` SHALL print a single dedicated message instructing the user to grant access and retry — without printing the generic copy-paste fallback hint. + +#### Scenario: osascript stderr contains the documented permission error number + +- **WHEN** the runner returns `{ status: 1, stderr: }` +- **THEN** the spawner result is `{ spawned: false, kind: , reason: 'automation-permission-denied', error: }` + +#### Scenario: osascript stderr contains the "not authorized" phrase + +- **WHEN** the runner returns `{ status: 1, stderr: }` +- **THEN** the spawner result is `{ spawned: false, kind: , reason: 'automation-permission-denied', error: }` + +#### Scenario: handleObserveSpawn prints the dedicated permission message + +- **WHEN** the spawner returns a result with `reason === 'automation-permission-denied'` +- **THEN** `handleObserveSpawn` prints a single line of the form `! macOS Automation permission needed for . Open System Settings → Privacy & Security → Automation, enable , then rerun /codex:observe.` +- **AND** does NOT print the generic copy-paste fallback hint diff --git a/openspec/changes/add-osascript-spawn-backends/tasks.md b/openspec/changes/add-osascript-spawn-backends/tasks.md new file mode 100644 index 00000000..c912a162 --- /dev/null +++ b/openspec/changes/add-osascript-spawn-backends/tasks.md @@ -0,0 +1,88 @@ +## 0. Spike: validate Ghostty AppleScript surface + +- [ ] 0.1 In a real Ghostty window, run a 10-line `osascript` that opens two windows and prints `tty of` each `terminal in terminals` to confirm the property exists and returns `/dev/ttysNN`. Note the Ghostty version under test. +- [ ] 0.2 Run the same probe against iTerm2 using `tty of current session` and `tty of session 1 of window N`. Note the iTerm2 version under test. +- [ ] 0.3 If Ghostty's `tty of` is unavailable on the pinned version, narrow this change: Ghostty backend ships with new-window-only behavior (still safe), and an upstream feature request is filed. Update design §Risks accordingly. Otherwise proceed with the full tty-match path. + +## 1. Tests First (RED) + +- [ ] 1.1 Extend `tests/spawner.test.mjs` `detectTerminal` block with cases for `ghostty-mac` (darwin + `TERM_PROGRAM=ghostty`, no `TMUX`) and `iterm2-mac` (darwin + `TERM_PROGRAM=iTerm.app`, no `TMUX`), and a non-darwin case that returns `none` even when `TERM_PROGRAM` matches. +- [ ] 1.2 Add a `Detection precedence` describe block asserting that `tmux` wins when both `$TMUX` and `$TERM_PROGRAM=ghostty` (or `iTerm.app`) are set. +- [ ] 1.3 Add `spawnObserverInTerminal` cases for the new backends using the existing injected-runner pattern: assert `cmd === 'osascript'` and inspect the `-e` arg sequence for the required AppleScript verbs (`tell application "Ghostty"` / `tell application "iTerm"`, the `repeat with` loop comparing `tty of`, the `split` and `new window` / `create window` branches, `input text` / `write text` carrying the composed command). +- [ ] 1.4 Add escape tests asserting that `"` in the composed shell command becomes `\"` and `\` becomes `\\` in the AppleScript literal. +- [ ] 1.5 Add **shell-quoting** tests for the new `composeShellInvocation({ cwd, command })` helper: cwd containing spaces → wrapped in `'...'`; cwd containing a single quote → escaped as `'\''`; cwd containing `;`/`$`/space → metacharacters appear inside the quoted literal with no shell effect; cwd containing unicode → bytes preserved verbatim. Add **command-token preservation** tests: when `command` is a four-token pre-quoted string like `'/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'`, the composed output ends with that string byte-for-byte (i.e. the helper does NOT call `shellQuote(command)` again). Add a **layer-order** test asserting that the input to `escapeAppleScriptLiteral` (call it via spy or via inspecting backend builder output) equals `composeShellInvocation`'s output exactly. +- [ ] 1.6 Add **control-char rejection** tests: cwd containing `\n` → `spawnObserverInTerminal` returns `{ spawned: false, kind, reason: 'unsafe-command', error }` mentioning newline AND a cwd location; cwd containing `\0` → same shape mentioning NUL; command containing `\r` → same shape; cwd or command containing `\t` (0x09) or `\x20` (space) → guard does NOT trigger and runner IS invoked. For every rejection case assert the injected runner is NOT called. +- [ ] 1.7 Add **tty-match dispatch** tests: stub the tty-discovery helper to return a known `/dev/ttysNN`, then assert the produced AppleScript embeds that tty inside the `repeat` loop's comparison. Add a second test where the discovery helper returns `null` → assert the produced AppleScript goes straight to the `new window` branch (no `repeat`/`split`). +- [ ] 1.8 Add **permission-denied** tests: stub the runner to return `{ status: 1, stderr: '(-1743) Not authorized to send Apple events to ...' }` → assert `{ spawned: false, kind, reason: 'automation-permission-denied', error }`. Repeat with the lowercase phrase variant. +- [ ] 1.9 Extend `tests/observe.test.mjs` wiring tests with a `handleObserveSpawn` case that injects a fake spawner returning `{ spawned: false, reason: 'automation-permission-denied' }` and asserts the printed output contains "Automation permission needed" and does NOT contain the copy-paste fallback hint. +- [ ] 1.10 Run `node --test tests/spawner.test.mjs tests/observe.test.mjs` and confirm the new cases fail before any implementation lands. + +## 2. Refactor spawner.mjs to a strategy table + +- [ ] 2.1 Introduce a backends table with three entries (`tmux`, `ghostty-mac`, `iterm2-mac`), each `{ detect(env), build(buildInput), cmd, classifyFailure(result) }`. Order entries in priority sequence (tmux first). The `buildInput` shape differs per backend kind: tmux receives `{ cwd, command }` (no shell composition — tmux takes `-c ` as a separate arg), osascript backends receive `{ composed, callerTty }` (already-composed shell string + discovered tty, see §2.3). +- [ ] 2.2 Rewrite `detectTerminal(env)` to walk the table and return the first hit, falling back to `{ kind: 'none' }`. Keep the current return shape (`{ kind }`). +- [ ] 2.3 Rewrite `spawnObserverInTerminal({ cwd, command, env, runner })` to follow the design's pipeline order exactly: (a) detect — early-return `{ spawned: false, kind: 'none' }` when no backend matches; (b) **for osascript backends only:** `const composed = composeShellInvocation({ cwd, command })` then `const guard = rejectControlChars(composed)` — on hit, early-return `{ spawned: false, kind, reason: 'unsafe-command', error }`; (c) discover `callerTty` (see §3.2) — `null` is fine; (d) call `runner(backend.cmd, backend.build(), { stdio: ['ignore', 'ignore', 'pipe'] })` (stderr captured for classification); (e) on non-zero status, ask `backend.classifyFailure({ status, stderr, error })` → may return `automation-permission-denied` or a generic error string. `composeShellInvocation` MUST run exactly once per spawn, in the dispatcher, so the guard, the backend builder, and any test asserting the composed string all see the same bytes. +- [ ] 2.4 Keep `buildTmuxSplitArgs` exported (the existing tmux test depends on it). Keep `shellQuote` exported. Tmux backend's `classifyFailure` only returns generic errors (no permission concept). + +## 3. Shared helpers (live in spawner.mjs) + +- [ ] 3.1 Add `composeShellInvocation({ cwd, command })` that returns `cd ${shellQuote(cwd)} && ${command}` (command is already shell-safe from `buildObserverCommand`). Add a `rejectControlChars(value)` helper that scans for bytes in `0x00–0x1F` minus `0x09`/`0x20` and returns `{ ok: false, byte }` on hit, `{ ok: true }` otherwise. +- [ ] 3.2 Add `discoverCallerTty()` that walks the process ancestry via `ps -o tty=,ppid= -p ` (use `execFileSync` with a 250ms timeout and a `.catch(() => null)` net), returning `/dev/ttysNN` for the first ancestor with a non-`?` tty, or `null` after walking 10 levels / hitting pid 1 / a `ps` error. Pure function input: starting pid (default `process.pid`); pure dependency: an injectable `runProbe(cmd, args)` for tests. +- [ ] 3.3 Add `escapeAppleScriptLiteral(value)` that doubles `\` and `"` (and nothing else). Pure function, no env access. +- [ ] 3.4 Add `osascriptArgsFromLines(lines)` that returns `lines.flatMap(line => ['-e', line])` so backends build their script as an array of lines and the runner-args composition is a single line. + +## 4. Implement ghostty-mac backend + +- [ ] 4.1 Add `buildGhosttyMacArgs({ composed, callerTty })`. `composed` is the already-shell-quoted output of `composeShellInvocation` (the dispatcher in §2.3 ran the control-char guard on it before calling this builder, so the builder treats it as safe). Body: + 1. `const literal = escapeAppleScriptLiteral(composed)` — Layer 2 (AppleScript-safe). + 2. Build script lines: + - `tell application "Ghostty"` + - `activate` + - if `callerTty`: `set targetTty to "${escapeAppleScriptLiteral(callerTty)}"` then `set matched to missing value` then a `repeat with t in terminals` block that sets `matched` to the first `t` whose `tty` equals `targetTty`; an `if matched is not missing value then set newTerm to split matched direction right` branch and an `else` branch that does `new window`. + - if no `callerTty`: skip the repeat, go straight to `new window`. + - `input text "${literal}\n" to newTerm` (final line uses whichever variable the active branch set). + - `end tell` + 3. Return `osascriptArgsFromLines(scriptLines)`. +- [ ] 4.2 Add `classifyGhosttyFailure({ status, stderr, error })`: if `stderr.includes('(-1743)') || /not authorized to send apple events/i.test(stderr)` → return `{ reason: 'automation-permission-denied', error: }`; else return `{ error: <`Failed to drive ghostty-mac: ...`> }`. +- [ ] 4.3 Wire the backend into the strategy table. + +## 5. Implement iterm2-mac backend + +- [ ] 5.1 Add `buildIterm2MacArgs({ composed, callerTty })`. `composed` is the already-shell-quoted output of `composeShellInvocation` (guarded by the dispatcher in §2.3). Body mirrors §4.1 but with iTerm2 verbs: `const literal = escapeAppleScriptLiteral(composed)`, then `tell application "iTerm"`, iterate `windows`/`sessions of ` comparing `tty of `, on match `tell ` → `split vertically with default profile`, on no match (or no `callerTty`) `create window with default profile`. Final command via `write text "${literal}" to ` (no trailing `\n` — iTerm2 `write text` adds Enter). +- [ ] 5.2 Add `classifyIterm2Failure(...)` identical in shape to §4.2. +- [ ] 5.3 Wire the backend into the strategy table. + +## 6. Caller updates (observe.mjs handleObserveSpawn) + +- [ ] 6.1 Replace the hardcoded `"new tmux pane"` success string with a per-kind label table: `tmux pane` / `Ghostty split or new window` / `iTerm2 split or new window`. +- [ ] 6.2 When the spawner result has `reason === 'automation-permission-denied'`, print a single line of the form `! macOS Automation permission needed for . Open System Settings → Privacy & Security → Automation, enable , then rerun /codex:observe.` and do NOT print the generic copy-paste fallback hint. +- [ ] 6.3 When the spawner result has `reason === 'unsafe-command'`, print `✗ Refusing to spawn: composed command contains a control character (). Run the command manually:` followed by the copy-paste hint (this path is paranoid — should never trigger in practice — but the message must be unambiguous if it does). +- [ ] 6.4 Verify the existing `tests/observe.test.mjs` non-tmux fallback wiring test still passes unchanged. + +## 7. Verification (GREEN) + +- [ ] 7.1 `npm run build` is clean (tsc checkJs against the new strategy-table types and the helper signatures). +- [ ] 7.2 `node --test tests/spawner.test.mjs tests/observe.test.mjs` is green — the new cases from §1 now pass. +- [ ] 7.3 `npm test` full suite is green (target ≥168 tests, no regressions; new tests bring total higher). +- [ ] 7.4 Regression smoke: from inside tmux, `node plugins/codex/scripts/codex-companion.mjs observe --spawn --cwd /tmp task-fake` still opens a tmux pane and prints `✓ Observer launched in tmux pane`. +- [ ] 7.5 Mac smoke (Ghostty, requires real machine): + - 7.5.1 First-run permission dialog: from a freshly Automation-denied state, run the command and confirm the printed line matches the dedicated "grant access and retry" message; grant access in System Settings; rerun and confirm a split opens. + - 7.5.2 Cwd with spaces: invoke with `--cwd "/tmp/dir with spaces"` (mkdir first), confirm the new pane is in that directory. + - 7.5.3 Non-frontmost invocation: open a second Ghostty window, make the second one frontmost, invoke from the first; confirm the split happens in the *first* (caller's) window via tty-match — NOT in the frontmost. + - 7.5.4 No-match fallback: close all Ghostty windows except the calling one, then exec the binary from a non-Ghostty context that still detects ghostty-mac (e.g., setting `TERM_PROGRAM=ghostty` manually in a Terminal.app shell); confirm a brand-new Ghostty window opens instead of a misplaced split. +- [ ] 7.6 Mac smoke (iTerm2, requires real machine): repeat 7.5.1–7.5.4 with iTerm2 verbs. + +## 8. Docs & version + +- [ ] 8.1 Update `plugins/codex/commands/observe.md` Behavior section to list the three supported backends (tmux, Ghostty on macOS, iTerm2 on macOS), the new-window fallback when targeting fails, and the Automation-permission note. +- [ ] 8.2 Run `node scripts/bump-version.mjs 1.4.0` and `npm run check-version` to confirm all four manifests sync. +- [ ] 8.3 Stage only the implementation + test + docs + version files (do NOT include `.omc/` or unrelated edits). Commit with message `feat: add ghostty + iterm2 osascript spawn backends (1.4.0)`. + +## 9. Final review (Claude main thread) + +- [ ] 9.1 `git diff main...HEAD --stat` — confirm the touched files match the §0–§8 scope; challenge any out-of-scope edits. +- [ ] 9.2 Cross-check tasks.md against the implementation diff and the spec scenarios; every scenario in `specs/observer-spawner/spec.md` must map to at least one test case. +- [ ] 9.3 Run `/codex:review` and `/ai-code-review` (or `code-reviewer` agent) for dual-model coverage. +- [ ] 9.4 Run `/codex:adversarial-review` one more time on the implementation diff (separate from the spec-pass we already did) — focus on whether the implementation honors every spec requirement, including the failure-classification edge cases. +- [ ] 9.5 Update HANDOFF (path TBD by Codex during §8.3) summarising what was implemented, what was manually smoke-tested (§7.5/§7.6), the Ghostty/iTerm2 versions used, and any open follow-ups (e.g., Linux Ghostty, WezTerm). +- [ ] 9.6 `/opsx:archive add-osascript-spawn-backends` once the change is merged. diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/.openspec.yaml b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/.openspec.yaml new file mode 100644 index 00000000..8b769149 --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-20 diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/design.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/design.md new file mode 100644 index 00000000..65947de6 --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/design.md @@ -0,0 +1,101 @@ +## Context + +当前 `/codex:rescue` 的 task 模式通过 `runAppServerTurn(workspaceRoot, ...)` 将 Codex app-server 的工作目录设为用户的源 repo。Codex 的改动直接作用于工作区,用户无法在隔离环境中 review 或大胆尝试。 + +git worktree 是 git 原生支持的隔离工作目录机制,创建一个 worktree 只需 `git worktree add -b `,工作目录包含完整的工作树快照,共享 `.git` 对象库。 + +Codex app-server 的 `thread/start` 接受 `cwd` 参数,传入 worktree 路径即可让 Codex 在隔离目录中工作,Codex 侧无需任何改动。 + +## Goals / Non-Goals + +**Goals:** + +- 用户通过 `--worktree` 标志让 Codex 在隔离的 git worktree 中执行任务 +- 改动自动落在独立分支上,用户可通过 `git diff` / `git merge` 管理 +- 支持 `--background` 后台模式 +- `/codex:status` 和 `/codex:result` 展示 worktree 路径和操作指引 + +**Non-Goals:** + +- 不做自动清理(用户决定何时 merge 或 remove) +- 不做 worktree 与 `--resume-last` 的兼容(worktree 是全新隔离环境) +- 不做 review 命令的 worktree 支持(review 是只读操作,不需要隔离) +- 不做跨多个 worktree 的并行任务管理 + +## Decisions + +### 1. worktree 路径选择 + +**决策**: 使用 `/.claude/worktrees//` 作为 worktree 路径。 + +**理由**: +- 项目 `tests/git.test.mjs` 中已有 `.claude/worktrees/agent-test` 的约定,保持一致 +- 路径与 repo 关联,容易找到 +- 可通过 `.gitignore` 排除(如果用户需要) + +**备选方案**: +- `/tmp/codex-wt-/` — 临时目录可能被系统清理,不够可靠 +- `../codex-wt-/` — 不一定有写权限 + +### 2. 分支命名策略 + +**决策**: 分支名格式为 `codex-rescue/-`,其中 `` 取 prompt 前 32 字符(去特殊字符)。 + +**理由**: +- 带 jobId 保证唯一性 +- 带 prompt 摘要方便识别任务目的 +- `codex-rescue/` 前缀表明来源,便于批量清理 + +**备选方案**: +- 纯 jobId(如 `codex-rescue-task-abc123`)— 缺少上下文 +- 纯时间戳(如 `codex-rescue-20260520-1430`)— 不够直观 + +### 3. workspaceRoot 与 codexCwd 分离 + +**决策**: task request 中新增 `worktreePath` 字段。`executeTaskRun` 中: +- `workspaceRoot = resolveWorkspaceRoot(request.cwd)` — 源 repo,用于 state/config +- `codexCwd = request.worktreePath ?? workspaceRoot` — 传给 Codex 的工作目录 + +**理由**: +- state 目录(job record、log file)必须存在源 repo,否则 `/codex:status` 找不到 job +- sandbox_mode 配置从源 repo 读取(worktree 里可能没有 `.codex/config.toml`) +- Codex 在 worktree 里工作,改动隔离 + +### 4. 后台模式(--background)兼容性 + +**决策**: worktree 在 `handleTask` 中创建(前台),然后将 `worktreePath` 写入 job record 的 request。`task-worker` 子进程从 job record 读取 `worktreePath`,直接使用。 + +**理由**: +- worktree 创建必须在主进程完成(需要 git 操作) +- 后台 worker 直接使用已创建的路径,避免重复创建 + +### 5. 输出格式 + +**决策**: 完成后的输出增加 worktree 信息块: + +``` +Codex task completed in worktree. + Path: /path/to/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-auth-bug + +Next steps: + Diff: git diff main...codex-rescue/task-abc123-fix-auth-bug + Merge: git merge codex-rescue/task-abc123-fix-auth-bug + Remove: git worktree remove /path/to/.claude/worktrees/task-abc123/ +``` + +**理由**: +- 用户需要明确知道 worktree 在哪、怎么操作 +- 提供常用命令示例,降低使用门槛 + +## Risks / Trade-offs + +**[Risk] worktree 创建失败** → 回退到非 worktree 模式,输出错误信息并询问用户是否继续 + +**[Risk] worktree 路径已存在** → 检查是否属于当前 job,如果是则复用,否则报错 + +**[Risk] 用户忘记清理 worktree** → 不自动清理,但在 `/codex:status` 中展示未清理的 worktree 列表(后续可扩展 `/codex:cleanup` 命令) + +**[Trade-off] worktree 占用磁盘空间** → 接受此代价,因为用户需要时间 review 改动。可通过 `.claude/worktrees/` 目录大小提醒用户 + +**[Trade-off] `--worktree` 与 `--resume-last` 互斥** → 简化实现,避免复杂的线程迁移逻辑。用户如果需要继续上次的工作,应该在源 repo 中 resume,而不是在新 worktree 中 diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/proposal.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/proposal.md new file mode 100644 index 00000000..40a8818b --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/proposal.md @@ -0,0 +1,35 @@ +## Why + +当前 `/codex:rescue` (task 模式) 在用户的当前工作目录里直接运行 Codex,所有改动直接作用于工作区。当 Codex 需要修复 bug 或重构代码时,用户无法隔离地 review 改动,也无法在不影响主分支的情况下让 Codex 大胆尝试。增加 `--worktree` 模式,让 Codex 在隔离的 git worktree 中工作,改动落在独立分支上,用户 review 后再决定是否 merge。 + +## What Changes + +- task 子命令新增 `--worktree` 标志,启用后在临时目录创建 git worktree 并建立独立分支 +- 插件自动创建 worktree(`git worktree add -b `),将 worktree 路径作为 Codex 的工作目录 +- job record 新增 `worktreePath` 和 `worktreeBranch` 字段,追踪 worktree 的生命周期 +- `/codex:status` 和 `/codex:result` 输出中展示 worktree 路径、分支名和后续操作指引 +- 完成后不自动清理 worktree,由用户决定 merge 或 remove +- `--worktree` 与 `--resume-last` 互斥(worktree 是全新隔离环境,不支持 resume 旧线程) + +## Capabilities + +### New Capabilities + +- `worktree-lifecycle`: 覆盖 worktree 的创建、路径解析、分支管理、状态追踪。包括在 temp 目录创建 worktree、生成唯一分支名、记录到 job record。 +- `worktree-task-dispatch`: 覆盖 task 模式下 worktree 的执行流程。将 workspaceRoot(源 repo)与 codexCwd(worktree 路径)分离,确保 state/config 从源 repo 读取,Codex 在 worktree 中工作。 +- `worktree-output`: 覆盖 worktree 相关信息的展示。包括 `/codex:status`、`/codex:result` 中的 worktree 路径和分支信息,以及完成后的操作指引(diff、merge、remove 命令示例)。 + +### Modified Capabilities + +(无现有 spec 需要修改) + +## Impact + +- `plugins/codex/scripts/codex-companion.mjs`: task 子命令解析 `--worktree` 参数,调用 worktree 创建逻辑,分离 workspaceRoot 和 codexCwd +- `plugins/codex/scripts/lib/workspace.mjs`: 新增 worktree 创建和管理函数 +- `plugins/codex/scripts/lib/codex.mjs`: `runAppServerTurn` 接收 codexCwd 参数(与 workspaceRoot 分离) +- `plugins/codex/scripts/lib/state.mjs`: job record 结构新增 worktreePath 和 worktreeBranch 字段 +- `plugins/codex/scripts/lib/render.mjs`: status/result 输出增加 worktree 信息 +- `plugins/codex/commands/rescue.md`: 命令文档增加 `--worktree` 参数说明 +- `plugins/codex/agents/codex-rescue.md`: subagent 指令增加 `--worktree` 标志的识别和透传 +- 依赖: 需要 git 支持 worktree 命令(git >= 2.5) diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-lifecycle/spec.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-lifecycle/spec.md new file mode 100644 index 00000000..6779c6ac --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-lifecycle/spec.md @@ -0,0 +1,43 @@ +## ADDED Requirements + +### Requirement: Worktree 路径生成 +系统必须(SHALL)根据 jobId 生成唯一的 worktree 路径,格式为 `/.claude/worktrees//`。 + +#### Scenario: 生成 worktree 路径 +- **WHEN** 系统需要为 jobId 为 `task-abc123` 的任务创建 worktree +- **THEN** 生成的路径为 `/.claude/worktrees/task-abc123/` + +### Requirement: Worktree 分支命名 +系统必须(SHALL)生成唯一的分支名,格式为 `codex-rescue/-`,其中 `` 取 prompt 前 32 字符(去特殊字符,转 kebab-case)。 + +#### Scenario: 生成带 prompt 的分支名 +- **WHEN** jobId 为 `task-abc123`,prompt 为 `Fix the authentication bug in login handler` +- **THEN** 分支名为 `codex-rescue/task-abc123-fix-the-authentication-bu` + +#### Scenario: 生成无 prompt 的分支名 +- **WHEN** jobId 为 `task-abc123`,prompt 为空 +- **THEN** 分支名为 `codex-rescue/task-abc123` + +### Requirement: Worktree 创建 +系统必须(SHALL)使用 `git worktree add -b ` 创建 worktree,基于源 repo 的当前 HEAD。 + +#### Scenario: 成功创建 worktree +- **WHEN** 系统在 `/repo` 目录下为 jobId `task-abc123` 创建 worktree +- **THEN** 执行 `git worktree add /repo/.claude/worktrees/task-abc123/ -b codex-rescue/task-abc123-...` +- **THEN** worktree 目录存在且包含完整工作树 +- **THEN** 分支 `codex-rescue/task-abc123-...` 存在于本地分支列表 + +#### Scenario: Worktree 路径已存在且属于当前 job +- **WHEN** worktree 路径已存在,且 job record 中的 worktreePath 匹配该路径 +- **THEN** 复用现有 worktree,不重新创建 + +#### Scenario: Worktree 路径已存在但不属于当前 job +- **WHEN** worktree 路径已存在,但 job record 中的 worktreePath 不匹配 +- **THEN** 抛出错误,提示用户手动清理或选择其他 jobId + +### Requirement: Worktree 基础分支记录 +系统必须(SHALL)在 job record 中记录创建 worktree 时的基础分支(baseBranch),用于后续 diff 展示。 + +#### Scenario: 记录基础分支 +- **WHEN** 系统从 `main` 分支创建 worktree +- **THEN** job record 的 `worktreeBaseBranch` 字段为 `main` diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-output/spec.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-output/spec.md new file mode 100644 index 00000000..0c79c4d7 --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-output/spec.md @@ -0,0 +1,66 @@ +## ADDED Requirements + +### Requirement: Status 输出展示 worktree 信息 +`/codex:status` 的输出必须(SHALL)在 job 包含 worktree 信息时展示 worktree 路径和分支名。 + +#### Scenario: Status 展示进行中的 worktree 任务 +- **WHEN** 用户执行 `/codex:status`,当前有一个进行中的 worktree 任务 +- **THEN** 输出包含: + ``` + Worktree: + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + ``` + +#### Scenario: Status 展示非 worktree 任务 +- **WHEN** 用户执行 `/codex:status`,当前任务未使用 worktree +- **THEN** 输出不包含 worktree 相关字段(现有行为不变) + +### Requirement: Result 输出展示 worktree 操作指引 +`/codex:result` 的输出必须(SHALL)在 job 包含 worktree 信息时展示后续操作指引,包括 diff、merge、remove 命令示例。 + +#### Scenario: Result 展示完成后的 worktree 任务 +- **WHEN** 用户执行 `/codex:result`,最近完成的 job 使用了 worktree +- **THEN** 输出末尾包含: + ``` + Worktree: + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + + Next steps: + Diff: git diff main...codex-rescue/task-abc123-fix-bug + Merge: git merge codex-rescue/task-abc123-fix-bug + Remove: git worktree remove /repo/.claude/worktrees/task-abc123/ + ``` + +#### Scenario: Result 展示非 worktree 任务 +- **WHEN** 用户执行 `/codex:result`,最近完成的 job 未使用 worktree +- **THEN** 输出不包含 worktree 相关字段(现有行为不变) + +### Requirement: JSON 输出包含 worktree 字段 +当使用 `--json` 标志时,status 和 result 的 JSON 输出必须(SHALL)包含 worktree 相关字段。 + +#### Scenario: JSON status 包含 worktree +- **WHEN** 用户执行 `/codex:status --json`,当前有 worktree 任务 +- **THEN** JSON 输出包含 `worktreePath`、`worktreeBranch`、`worktreeBaseBranch` 字段 + +#### Scenario: JSON result 包含 worktree +- **WHEN** 用户执行 `/codex:result --json`,最近完成的 job 使用了 worktree +- **THEN** JSON 输出包含 `worktreePath`、`worktreeBranch`、`worktreeBaseBranch` 字段 + +### Requirement: 任务完成时输出 worktree 摘要 +task 任务完成后,系统必须(SHALL)在输出末尾追加 worktree 摘要块。 + +#### Scenario: 前台任务完成后输出摘要 +- **WHEN** 前台 worktree 任务执行完成 +- **THEN** 输出末尾包含 worktree 路径、分支名和后续操作指引(同 Result 输出格式) + +#### Scenario: 后台任务启动时输出 worktree 路径 +- **WHEN** 后台 worktree 任务启动 +- **THEN** 输出包含: + ``` + Codex task started in worktree (background). + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + Check /codex:status for progress. + ``` diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-task-dispatch/spec.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-task-dispatch/spec.md new file mode 100644 index 00000000..a0ae06bf --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/specs/worktree-task-dispatch/spec.md @@ -0,0 +1,74 @@ +## ADDED Requirements + +### Requirement: Task 命令接受 --worktree 标志 +`/codex:rescue` 命令必须(SHALL)接受 `--worktree` 布尔标志,启用后在 git worktree 中执行任务。 + +#### Scenario: 使用 --worktree 标志 +- **WHEN** 用户执行 `/codex:rescue --worktree fix the bug` +- **THEN** 系统创建 git worktree 并在其中执行 Codex 任务 + +#### Scenario: 不使用 --worktree 标志 +- **WHEN** 用户执行 `/codex:rescue fix the bug` +- **THEN** 系统在源 repo 的工作目录中直接执行(现有行为不变) + +### Requirement: --worktree 与 --resume-last 互斥 +当同时指定 `--worktree` 和 `--resume-last`(或 `--resume`)时,系统必须(SHALL)拒绝执行并报错。 + +#### Scenario: 同时指定 --worktree 和 --resume-last +- **WHEN** 用户执行 `/codex:rescue --worktree --resume-last` +- **THEN** 系统输出错误信息 `--worktree and --resume-last are mutually exclusive` +- **THEN** 任务不执行 + +#### Scenario: 同时指定 --worktree 和 --resume +- **WHEN** 用户执行 `/codex:rescue --worktree --resume` +- **THEN** 系统输出错误信息 `--worktree and --resume are mutually exclusive` +- **THEN** 任务不执行 + +### Requirement: workspaceRoot 与 codexCwd 分离 +当启用 `--worktree` 时,系统必须(SHALL)将 workspaceRoot(源 repo)与 codexCwd(worktree 路径)分离。 + +#### Scenario: Worktree 模式下分离路径 +- **WHEN** 启用 `--worktree`,worktree 路径为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** workspaceRoot 为源 repo 路径(用于 state 存储和 config 读取) +- **THEN** codexCwd 为 worktree 路径(传给 Codex 作为工作目录) +- **THEN** job record 存储在源 repo 的 state 目录中 +- **THEN** sandbox_mode 从源 repo 的 `.codex/config.toml` 或 `~/.codex/config.toml` 读取 + +#### Scenario: 非 Worktree 模式下路径一致 +- **WHEN** 未启用 `--worktree` +- **THEN** workspaceRoot 和 codexCwd 均为源 repo 路径(现有行为不变) + +### Requirement: Worktree 信息存入 job record +系统必须(SHALL)在 job record 中记录 `worktreePath`、`worktreeBranch` 和 `worktreeBaseBranch` 字段。 + +#### Scenario: 记录 worktree 信息 +- **WHEN** 系统在 `/repo/.claude/worktrees/task-abc123/` 创建 worktree,分支为 `codex-rescue/task-abc123-fix-bug`,基础分支为 `main` +- **THEN** job record 包含: + - `worktreePath: "/repo/.claude/worktrees/task-abc123/"` + - `worktreeBranch: "codex-rescue/task-abc123-fix-bug"` + - `worktreeBaseBranch: "main"` + +### Requirement: 后台模式支持 --worktree +系统必须(SHALL)支持 `--worktree --background` 组合,worktree 在前台创建,后台 worker 使用已创建的路径。 + +#### Scenario: 后台模式 + worktree +- **WHEN** 用户执行 `/codex:rescue --worktree --background fix the bug` +- **THEN** 系统在前台创建 worktree +- **THEN** 将 worktreePath 写入 job record 的 request 字段 +- **THEN** 启动后台 worker 进程 +- **THEN** worker 从 job record 读取 worktreePath,直接在该路径下执行 Codex 任务 + +### Requirement: Task request 传递 worktreePath +`buildTaskRequest` 函数必须(SHALL)接受 `worktreePath` 参数并写入 request 对象。 + +#### Scenario: Request 包含 worktreePath +- **WHEN** 调用 `buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId, worktreePath: "/path/to/wt" })` +- **THEN** 返回的 request 对象包含 `worktreePath: "/path/to/wt"` + +### Requirement: executeTaskRun 使用 worktreePath +`executeTaskRun` 函数必须(SHALL)在 `request.worktreePath` 存在时,将其作为 Codex 的工作目录。 + +#### Scenario: 使用 worktreePath 作为 Codex 工作目录 +- **WHEN** `request.worktreePath` 为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** `runAppServerTurn` 的第一个参数为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** Codex 在该 worktree 中执行任务 diff --git a/openspec/changes/archive/2026-05-20-rescue-worktree-mode/tasks.md b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/tasks.md new file mode 100644 index 00000000..05117247 --- /dev/null +++ b/openspec/changes/archive/2026-05-20-rescue-worktree-mode/tasks.md @@ -0,0 +1,41 @@ +## 1. Worktree 生命周期管理 + +- [x] 1.1 在 `plugins/codex/scripts/lib/workspace.mjs` 中新增 `createWorktree(sourceRoot, jobId, prompt)` 函数,执行 `git worktree add -b `,返回 `{ worktreePath, worktreeBranch, worktreeBaseBranch }` +- [x] 1.2 新增 `generateWorktreeBranch(jobId, prompt)` 辅助函数,生成 `codex-rescue/-` 格式的分支名 +- [x] 1.3 新增 `resolveWorktreePath(sourceRoot, jobId)` 辅助函数,返回 `/.claude/worktrees//` +- [x] 1.4 编写 worktree 生命周期单元测试,覆盖路径生成、分支命名、创建成功/失败场景 + +## 2. Task 命令集成 + +- [x] 2.1 在 `plugins/codex/scripts/codex-companion.mjs` 的 `handleTask` 函数中解析 `--worktree` 布尔标志 +- [x] 2.2 在 `handleTask` 中增加 `--worktree` 与 `--resume-last`/`--resume` 的互斥校验 +- [x] 2.3 在 `buildTaskRequest` 中新增 `worktreePath` 参数 +- [x] 2.4 在 `buildTaskJob` 中新增 worktree 相关字段(worktreePath、worktreeBranch、worktreeBaseBranch) +- [x] 2.5 在 `handleTask` 的 background 分支中,先创建 worktree 再写入 job record + +## 3. 执行流程分离 + +- [x] 3.1 修改 `executeTaskRun`,在 `request.worktreePath` 存在时将其作为 `runAppServerTurn` 的 cwd 参数 +- [x] 3.2 确保 `resolveCodexSandboxMode` 从源 repo(workspaceRoot)读取配置,而非 worktree 路径 +- [x] 3.3 修改 `handleTaskWorker`,从 job record 的 request 中读取 `worktreePath` 并传递给 `executeTaskRun` +- [ ] 3.4 编写 task 执行流程集成测试,覆盖前台/后台模式下的 worktree 任务 + +## 4. Job Record 扩展 + +- [x] 4.1 在 `plugins/codex/scripts/lib/state.mjs` 的 `createJobRecord` 或 `createCompanionJob` 中支持 worktreePath、worktreeBranch、worktreeBaseBranch 字段 +- [x] 4.2 确保 job record 的 JSON 序列化包含 worktree 字段 + +## 5. 输出展示 + +- [x] 5.1 在 `plugins/codex/scripts/lib/render.mjs` 中新增 `renderWorktreesBlock(meta)` 函数 +- [x] 5.2 修改 `renderTaskResult`,在 job 包含 worktree 信息时追加 worktree 摘要块(含 diff/merge/remove 命令示例) +- [x] 5.3 修改 `renderQueuedTaskLaunch`,在后台 worktree 任务启动时输出 worktree 路径和分支名 +- [x] 5.4 修改 `renderJobStatusReport`(/codex:status),在 job 包含 worktree 信息时展示 worktree 路径和分支名 +- [x] 5.5 修改 `renderStoredJobResult`(/codex:result),在 job 包含 worktree 信息时展示后续操作指引 +- [x] 5.6 确保 `--json` 输出包含 worktree 字段 +- [x] 5.7 编写输出格式测试,覆盖 worktree 信息块的渲染 + +## 6. 命令文档更新 + +- [x] 6.1 更新 `plugins/codex/commands/rescue.md`,在 argument-hint 和描述中增加 `--worktree` 参数说明 +- [x] 6.2 更新 `plugins/codex/agents/codex-rescue.md`,增加 `--worktree` 标志的识别和透传规则 diff --git a/openspec/changes/codex-live-observer/.openspec.yaml b/openspec/changes/codex-live-observer/.openspec.yaml new file mode 100644 index 00000000..8b769149 --- /dev/null +++ b/openspec/changes/codex-live-observer/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-20 diff --git a/openspec/changes/codex-live-observer/design.md b/openspec/changes/codex-live-observer/design.md new file mode 100644 index 00000000..df46d0d6 --- /dev/null +++ b/openspec/changes/codex-live-observer/design.md @@ -0,0 +1,86 @@ +## Context + +The Codex plugin runs Codex tasks via `codex-companion.mjs task`, which spawns a `codex app-server` process and communicates over a JSON-RPC stdio protocol. Progress events (tool calls, file changes, commands, messages, reasoning, phase transitions) are processed by `codex.mjs` and emitted through `createProgressReporter`, which writes to stderr and a plain-text log file. + +Currently, the only ways to observe a running task are: +- **Foreground mode**: stderr output visible in the same Claude Code session +- **`/codex:status`**: snapshot of job state (running/completed/failed) +- **`/codex:result`**: final output after completion +- **Log file**: plain-text append-only log, readable but not structured + +None of these provide a real-time, terminal-based live view from a separate terminal window. The existing `onProgress` callback in `tracked-jobs.mjs` already captures all the necessary events — the gap is in persisting them in a machine-readable format and providing a consumer that renders them live. + +## Goals / Non-Goals + +**Goals:** +- Provide a read-only, live terminal view of any running or completed Codex job +- Events appear in real-time as they happen, with phase indicators and colored output +- Observer can be started in any terminal, independent of the Claude Code session running Codex +- Observer exit (Ctrl+C) never affects the running Codex task +- Event stream is structured (JSONL) for future programmatic consumption + +**Non-Goals:** +- Two-way interaction: observer cannot send input, cancel, or steer the Codex task +- Web UI or remote streaming (terminal-only for now) +- Event replay across sessions (observer targets a single job's event file) +- Real-time WebSocket push (file-based polling is sufficient and simpler) +- Modifying Codex execution behavior in any way + +## Decisions + +### 1. Event stream format: JSONL (JSON Lines) + +**Decision**: Each event is a single-line JSON object appended to `/.events.jsonl`. + +**Rationale**: JSONL is append-only, stream-parseable, and human-readable. Each line is independently parseable, so partial reads are safe. Alternatives considered: +- **Plain text log**: Already exists (`logFile`), not machine-parseable. Event stream complements it. +- **SQLite**: Overkill for append-only event log, adds dependency complexity. +- **Binary format**: Harder to debug, no benefit for this use case. + +### 2. Event stream integration point: `createProgressReporter` + +**Decision**: Add an `eventStream` parameter to `createProgressReporter` alongside existing `stderr` and `logFile` options. + +**Rationale**: The progress reporter is already the single funnel for all Codex events. Adding event stream output here means zero changes to the event processing pipeline in `codex.mjs`. The `onEvent` callback in `createProgressReporter` already supports custom event handlers — the event stream writer plugs in as another handler. + +### 3. File watching: `fs.watch` with polling fallback + +**Decision**: Use `fs.watch` for file change notifications, with a 500ms polling fallback if `fs.watch` fails or is unavailable. + +**Rationale**: `fs.watch` is efficient (kernel-level notifications on macOS/Linux) but can be unreliable on some filesystems (network mounts, Docker volumes). Polling fallback ensures cross-platform reliability. The 500ms interval is acceptable latency for a human observer. + +### 4. Byte-offset tracking for incremental reads + +**Decision**: Observer tracks the last-read byte offset in the event file and only reads new content. + +**Rationale**: Avoids re-parsing the entire file on each change. For a 100KB event file with 500 events, reading only the new 200 bytes is far more efficient than re-reading and re-rendering everything. + +### 5. Observer rendering: inline ANSI output + +**Decision**: Render events as colored ANSI terminal output with phase spinners, similar to CLI tools like `npm` or `cargo`. + +**Rationale**: No external TUI framework needed. The event types are simple enough for inline rendering. ANSI escape codes are universally supported in modern terminals. This keeps the implementation lightweight (~150 lines of render code) and avoids dependencies. + +### 6. Observer lifecycle: detached, read-only + +**Decision**: Observer process has no reference to the Codex process. It only reads the event file. SIGINT exits the observer cleanly. + +**Rationale**: Complete isolation ensures observer crashes or exits cannot affect Codex. The event file is the only shared state, and it's append-only, so concurrent reads are safe. + +### 7. Subcommand registration: `observe` in `codex-companion.mjs` + +**Decision**: Add `observe` as a new subcommand alongside `task`, `review`, `status`, etc. + +**Rationale**: Consistent with existing CLI pattern. Users invoke it as `node codex-companion.mjs observe [jobId] [--cwd ]`. Can later be exposed as a slash command (`/codex:observe`) if desired. + +## Risks / Trade-offs + +**[Event file grows unbounded during long tasks]** → Mitigation: Event files are small (~10-100 KB per job). They share the same lifecycle as job records and are pruned by the existing 50-job cap in `state.mjs`. A multi-hour task might produce ~500 KB, which is negligible. + +**[fs.watch unreliable on some platforms]** → Mitigation: Polling fallback at 500ms interval. The observer detects `fs.watch` failure and transparently switches to polling. Both modes produce identical output. + +**[Observer started before any events exist]** → Mitigation: Observer handles missing/empty event file gracefully, shows "Waiting for events..." message, and begins rendering when the first event arrives. + +**[Job not found or already completed]** → Mitigation: Observer checks `state.json` for job existence and status. For completed jobs, it renders the full event history and exits (no tail mode needed). + +**[Concurrent observers reading same file]** → Mitigation: Append-only writes + read-only observers = no conflict. Multiple observers can tail the same event file simultaneously. diff --git a/openspec/changes/codex-live-observer/proposal.md b/openspec/changes/codex-live-observer/proposal.md new file mode 100644 index 00000000..35f4ea38 --- /dev/null +++ b/openspec/changes/codex-live-observer/proposal.md @@ -0,0 +1,27 @@ +## Why + +When Codex runs as a background subagent (via `/codex:rescue --background`), the only way to observe its progress is through `/codex:status` snapshots or reading a plain-text log file after the fact. There is no way to get a live, terminal-based view of what Codex is doing in real-time without blocking the main Claude thread. Users need a read-only observer window that shows the full output stream — tool calls, file changes, commands, messages — like watching a CLI session live. + +## What Changes + +- Add a JSONL event stream file (`.events.jsonl`) written alongside each job's existing log file, capturing structured events (tool calls, file changes, commands, messages, reasoning, phase transitions) in real-time +- Add a new `observe` subcommand to `codex-companion.mjs` that tails the event stream and renders it as a live terminal UI +- Wire the event stream into the existing `createProgressReporter` pipeline so all progress events are also written as structured JSONL +- Observer is read-only: Ctrl+C exits the observer without affecting the running Codex task +- Observer supports targeting the latest running job or a specific job ID + +## Capabilities + +### New Capabilities +- `event-stream`: Structured JSONL event stream writer that captures all Codex progress events in append-only format, integrated with the existing progress reporter pipeline +- `observe-command`: The `observe` subcommand for `codex-companion.mjs` that tails the event stream file, renders events as a live terminal UI with phase indicators and colored output, and exits cleanly without affecting the Codex task + +### Modified Capabilities + +## Impact + +- **New files**: `plugins/codex/scripts/lib/event-stream.mjs`, `plugins/codex/scripts/lib/observe.mjs` +- **Modified files**: `plugins/codex/scripts/codex-companion.mjs` (register subcommand), `plugins/codex/scripts/lib/tracked-jobs.mjs` (wire event stream into progress reporter) +- **Storage**: Each job gains a `.events.jsonl` file (~10-100 KB), cleaned up with existing job pruning +- **Dependencies**: None (uses Node.js built-ins only: `fs`, `readline`, `path`) +- **Breaking changes**: None diff --git a/openspec/changes/codex-live-observer/specs/event-stream/spec.md b/openspec/changes/codex-live-observer/specs/event-stream/spec.md new file mode 100644 index 00000000..150bd7e5 --- /dev/null +++ b/openspec/changes/codex-live-observer/specs/event-stream/spec.md @@ -0,0 +1,79 @@ +## ADDED Requirements + +### Requirement: Event stream file creation +The system SHALL create a `.events.jsonl` file in the job's state directory when a tracked job starts, alongside the existing log file. + +#### Scenario: New job creates event stream file +- **WHEN** `runTrackedJob` is called with a job that has `eventStream` enabled in the progress reporter +- **THEN** a file named `.events.jsonl` SHALL be created in `/jobs/` +- **THEN** the file SHALL be initially empty + +#### Scenario: Event stream file path follows job log convention +- **WHEN** the job directory is resolved via `resolveJobsDir` +- **THEN** the event stream file SHALL be at `/.events.jsonl` + +### Requirement: Structured event format +Each event written to the event stream SHALL be a single-line JSON object containing at minimum: `t` (ISO 8601 timestamp), `type` (event type string), and `phase` (current phase). + +#### Scenario: Phase transition event +- **WHEN** a progress event with `phase` is emitted +- **THEN** the JSONL line SHALL contain `{"t":"","type":"phase","phase":"","message":""}` plus any additional fields from the event + +#### Scenario: Tool call event +- **WHEN** a tool call (Read, Write, Bash, etc.) starts or completes +- **THEN** the JSONL line SHALL contain `{"t":"","type":"tool_call","tool":"","phase":""}` for started events +- **THEN** the JSONL line SHALL contain `{"t":"","type":"tool_done","tool":"","phase":""}` for completed events + +#### Scenario: Command execution event +- **WHEN** a shell command is executed by Codex +- **THEN** the JSONL line SHALL contain `{"t":"","type":"command","cmd":"","phase":""}` when started +- **THEN** the JSONL line SHALL contain `{"t":"","type":"command_done","cmd":"","exit":,"phase":""}` when completed + +#### Scenario: File change event +- **WHEN** Codex modifies a file +- **THEN** the JSONL line SHALL contain `{"t":"","type":"file_change","path":"","action":"","phase":""}` + +#### Scenario: Agent message event +- **WHEN** Codex produces a text message (including final answer) +- **THEN** the JSONL line SHALL contain `{"t":"","type":"message","phase":"","text":""}` + +#### Scenario: Reasoning summary event +- **WHEN** Codex produces reasoning sections +- **THEN** the JSONL line SHALL contain `{"t":"","type":"reasoning","phase":"","sections":["","",...]}` + +#### Scenario: Completion event +- **WHEN** the Codex task completes (success or failure) +- **THEN** the JSONL line SHALL contain `{"t":"","type":"completed","status":"","phase":""}` plus `threadId` and `summary` if available + +### Requirement: Append-only writes +The event stream SHALL only use append operations. Existing lines SHALL NOT be modified or deleted during a job's lifetime. + +#### Scenario: Multiple events written sequentially +- **WHEN** three events are emitted in sequence +- **THEN** the event file SHALL contain exactly three lines, one per event, in emission order +- **THEN** no existing line SHALL be modified + +#### Scenario: Write failure does not affect job execution +- **WHEN** the event stream file cannot be written (disk full, permission error) +- **THEN** the error SHALL be silently ignored +- **THEN** the Codex task SHALL continue unaffected + +### Requirement: Integration with progress reporter +The event stream SHALL be wired into `createProgressReporter` as an additional output channel alongside `stderr` and `logFile`. + +#### Scenario: Progress reporter emits to event stream +- **WHEN** `createProgressReporter` is called with an `eventStream` parameter +- **THEN** every progress event processed by the reporter SHALL be written to the event stream in JSONL format +- **THEN** the event SHALL be written in addition to existing stderr and logFile outputs + +#### Scenario: Event stream is optional +- **WHEN** `createProgressReporter` is called without an `eventStream` parameter +- **THEN** no event stream file SHALL be created +- **THEN** existing stderr and logFile behavior SHALL be unchanged + +### Requirement: Event stream cleanup with job records +Event stream files SHALL be deleted when their corresponding job record is pruned by the existing 50-job cap mechanism. + +#### Scenario: Job pruning removes event stream +- **WHEN** a job record is pruned from `state.json` due to exceeding the 50-job limit +- **THEN** the corresponding `.events.jsonl` file SHALL also be deleted diff --git a/openspec/changes/codex-live-observer/specs/observe-command/spec.md b/openspec/changes/codex-live-observer/specs/observe-command/spec.md new file mode 100644 index 00000000..b97e839f --- /dev/null +++ b/openspec/changes/codex-live-observer/specs/observe-command/spec.md @@ -0,0 +1,118 @@ +## ADDED Requirements + +### Requirement: Observe subcommand registration +The `observe` subcommand SHALL be registered in `codex-companion.mjs` alongside existing subcommands (`task`, `review`, `status`, etc.). + +#### Scenario: Subcommand invocation +- **WHEN** user runs `node codex-companion.mjs observe` +- **THEN** the observe handler SHALL be invoked +- **THEN** it SHALL target the latest running job in the current workspace + +#### Scenario: Observe specific job +- **WHEN** user runs `node codex-companion.mjs observe ` +- **THEN** the observe handler SHALL target the specified job + +#### Scenario: Observe with workspace flag +- **WHEN** user runs `node codex-companion.mjs observe --cwd ` +- **THEN** the state directory SHALL be resolved from the specified workspace path + +### Requirement: Job resolution and validation +The observer SHALL validate that the target job exists before entering tail mode. + +#### Scenario: Job not found +- **WHEN** the target job ID does not exist in `state.json` +- **THEN** the observer SHALL print an error message to stderr +- **THEN** the observer SHALL exit with a non-zero exit code + +#### Scenario: No running jobs +- **WHEN** no job ID is specified and no running jobs exist +- **THEN** the observer SHALL print "No running Codex jobs found" to stderr +- **THEN** the observer SHALL exit with a non-zero exit code + +#### Scenario: Completed job +- **WHEN** the target job has status `completed` or `failed` +- **THEN** the observer SHALL render the full event history from the event stream file +- **THEN** the observer SHALL exit after rendering (no tail mode) + +### Requirement: Live event tailing +The observer SHALL tail the event stream file in real-time, rendering new events as they appear. + +#### Scenario: File watching with fs.watch +- **WHEN** the event stream file exists and `fs.watch` is available +- **THEN** the observer SHALL use `fs.watch` to detect file changes +- **THEN** new events SHALL be rendered within 100ms of being written + +#### Scenario: Polling fallback +- **WHEN** `fs.watch` fails or is unavailable +- **THEN** the observer SHALL fall back to polling the file every 500ms +- **THEN** new events SHALL be rendered within 500ms of being written + +#### Scenario: Byte-offset tracking +- **WHEN** the observer reads the event stream file +- **THEN** it SHALL track the last-read byte offset +- **THEN** on subsequent reads, it SHALL only parse and render content after the last offset + +#### Scenario: Empty or missing event file +- **WHEN** the event stream file does not exist or is empty +- **THEN** the observer SHALL display "Waiting for events..." and continue watching +- **THEN** the observer SHALL begin rendering when the first event arrives + +### Requirement: Terminal rendering +The observer SHALL render events as colored ANSI terminal output with phase indicators. + +#### Scenario: Phase indicator rendering +- **WHEN** a phase transition event is received +- **THEN** the observer SHALL display a spinner character and phase name in the appropriate color (cyan for starting, yellow for investigating, green for finalizing) + +#### Scenario: Tool call rendering +- **WHEN** a tool_call event is received +- **THEN** the observer SHALL display "→ " in cyan +- **WHEN** the corresponding tool_done event is received +- **THEN** the observer SHALL display " ✓ completed" in dim text + +#### Scenario: Command rendering +- **WHEN** a command event is received +- **THEN** the observer SHALL display "$ " in blue +- **WHEN** the corresponding command_done event is received with exit code 0 +- **THEN** the observer SHALL display " exit 0" in green +- **WHEN** the corresponding command_done event is received with non-zero exit code +- **THEN** the observer SHALL display " exit " in red + +#### Scenario: File change rendering +- **WHEN** a file_change event is received +- **THEN** the observer SHALL display "✎ ()" in yellow + +#### Scenario: Message rendering +- **WHEN** a message event is received +- **THEN** the observer SHALL display the full message text as a white block with a left border + +#### Scenario: Completion rendering +- **WHEN** a completed event is received +- **THEN** the observer SHALL display "● completed at " in green (for success) or red (for failure) +- **THEN** the observer SHALL exit after rendering + +### Requirement: Read-only isolation +The observer process SHALL have no reference to or control over the Codex process. Observer exit SHALL NOT affect the running Codex task. + +#### Scenario: Ctrl+C exits observer only +- **WHEN** the user presses Ctrl+C (SIGINT) while the observer is running +- **THEN** the observer SHALL print "Observer detached. Codex task continues." in dim text +- **THEN** the observer SHALL exit with code 0 +- **THEN** the Codex task SHALL continue running unaffected + +#### Scenario: Observer crash does not affect Codex +- **WHEN** the observer encounters an unhandled error +- **THEN** the observer SHALL print the error to stderr and exit +- **THEN** the Codex task SHALL continue running unaffected + +#### Scenario: Multiple concurrent observers +- **WHEN** two observer processes target the same job +- **THEN** both observers SHALL render events independently +- **THEN** neither observer SHALL interfere with the other or with the Codex task + +### Requirement: Usage output +The observer SHALL be listed in the usage help output of `codex-companion.mjs`. + +#### Scenario: Help includes observe +- **WHEN** user runs `node codex-companion.mjs --help` or `node codex-companion.mjs` with no arguments +- **THEN** the usage output SHALL include a line for `observe [jobId] [--cwd ]` diff --git a/openspec/changes/codex-live-observer/tasks.md b/openspec/changes/codex-live-observer/tasks.md new file mode 100644 index 00000000..4835bc97 --- /dev/null +++ b/openspec/changes/codex-live-observer/tasks.md @@ -0,0 +1,82 @@ +## 1. Event Stream Writer + +- [x] 1.1 Create `plugins/codex/scripts/lib/event-stream.mjs` with `createEventStream(jobId, jobsDir)` function that returns a stream object with the event file path and a write function +- [x] 1.2 Implement `emitEvent(stream, type, data)` function that formats event as JSONL (`{"t":"","type":"",...data}`) and appends to the event file using `fs.appendFileSync` +- [x] 1.3 Implement `closeEventStream(stream)` function (no-op for now, placeholder for future cleanup) +- [x] 1.4 Ensure write failures are silently caught and do not propagate errors to the caller +- [x] 1.5 Add event type constants: `phase`, `tool_call`, `tool_done`, `command`, `command_done`, `file_change`, `message`, `reasoning`, `completed` + +## 2. Progress Reporter Integration + +- [x] 2.1 Add `eventStream` parameter to `createProgressReporter` options in `tracked-jobs.mjs` +- [x] 2.2 In the progress reporter callback, when `eventStream` is present, call `emitEvent` with the normalized event data mapped to JSONL fields +- [x] 2.3 Map existing event fields to event stream format: `message` → `message`, `phase` → `phase`, `logTitle`/`logBody` → appropriate event types +- [x] 2.4 Ensure event stream writes happen alongside existing stderr and logFile writes (no replacement) + +## 3. Event Stream Creation in Job Lifecycle + +- [x] 3.1 In `codex-companion.mjs`, when calling `createTrackedProgress`, create an event stream via `createEventStream` and pass it to `createProgressReporter` +- [x] 3.2 Store the event stream path in the job record as `eventFile` field (alongside `logFile`) +- [x] 3.3 Ensure event stream is created for both foreground and background task modes +- [x] 3.4 Add event stream creation for review commands (`handleReviewCommand`) as well + +## 4. Event Stream Cleanup + +- [x] 4.1 In `state.mjs`, when pruning jobs (in `pruneJobs`), also delete the corresponding `.events.jsonl` file alongside the `.log` and `.json` files +- [x] 4.2 Add a `resolveJobEventFile(workspaceRoot, jobId)` helper in `state.mjs` (parallel to `resolveJobLogFile`) + +## 5. Observe Subcommand: Job Resolution + +- [x] 5.1 Create `plugins/codex/scripts/lib/observe.mjs` with `handleObserveCommand(argv)` async function +- [x] 5.2 Parse arguments: optional `jobId` positional arg, optional `--cwd` flag +- [x] 5.3 Resolve workspace root and state directory from `--cwd` or `process.cwd()` +- [x] 5.4 If no `jobId` provided, find the latest running job from `state.json` (filter by `status === "running"`, sort by `startedAt` descending) +- [x] 5.5 If no running job found and no `jobId` specified, print error to stderr and exit with code 1 +- [x] 5.6 If target job not found in state, print error to stderr and exit with code 1 + +## 6. Observe Subcommand: Event File Reading + +- [x] 6.1 Implement `readEventsFromOffset(eventFile, offset)` function that reads the file, parses JSONL lines after the offset, and returns `{ events, newOffset }` +- [x] 6.2 Handle missing event file: return empty events array and offset 0 +- [x] 6.3 Handle empty event file: return empty events array and offset 0 +- [x] 6.4 Parse each line with `JSON.parse`, skip lines that fail parsing (defensive) + +## 7. Observe Subcommand: Terminal Rendering + +- [x] 7.1 Implement `renderEvent(event)` function that returns ANSI-colored string based on event type +- [x] 7.2 Phase events: spinner char + phase name in color (cyan/yellow/green depending on phase) +- [x] 7.3 Tool call events: `→ ` in cyan, tool_done: ` ✓ completed` in dim +- [x] 7.4 Command events: `$ ` in blue, command_done exit 0 in green, non-zero in red +- [x] 7.5 File change events: `✎ ()` in yellow +- [x] 7.6 Message events: full text as white block with left border (pipe character) +- [x] 7.7 Reasoning events: dim italic with bullet list for each section +- [x] 7.8 Completed events: `● completed at ` in green (success) or red (failure) +- [x] 7.9 Print a header line on startup: `Codex Observer — ` + +## 8. Observe Subcommand: Live Tailing + +- [x] 8.1 Implement `tailEventStream(eventFile, onEvent)` function that watches the file and calls `onEvent` for new events +- [x] 8.2 Try `fs.watch(eventFile)` first; on error, fall back to `setInterval` polling at 500ms +- [x] 8.3 On each file change notification, call `readEventsFromOffset` with the last known offset, render new events, update offset +- [x] 8.4 Debounce `fs.watch` callbacks by 100ms to coalesce rapid writes +- [x] 8.5 Detect `completed` event type and exit the observer after rendering it + +## 9. Observe Subcommand: Signal Handling and Lifecycle + +- [x] 9.1 Register SIGINT handler: print "Observer detached. Codex task continues." in dim text, exit with code 0 +- [x] 9.2 For completed jobs (status `completed` or `failed`), render full event history and exit immediately (no tail mode) +- [x] 9.3 Handle unhandled errors: print to stderr, exit with code 1, do not affect Codex process + +## 10. Observe Subcommand: Registration + +- [x] 10.1 Add `observe` case to the subcommand dispatch in `codex-companion.mjs` +- [x] 10.2 Add `observe [jobId] [--cwd ]` line to the `printUsage()` output +- [x] 10.3 Wire argument parsing for observe (positional jobId, --cwd flag) + +## 11. Testing + +- [x] 11.1 Write unit tests for `event-stream.mjs`: createEventStream, emitEvent, write failure handling +- [x] 11.2 Write unit tests for `readEventsFromOffset`: normal parsing, empty file, missing file, malformed lines +- [x] 11.3 Write unit tests for `renderEvent`: each event type produces expected ANSI output +- [x] 11.4 Write integration test: start a fake job with event stream, run observe, verify rendered output matches expected events +- [x] 11.5 Verify existing tests still pass (`npm test`) diff --git a/openspec/changes/fix-observe-slash-command-hang/.openspec.yaml b/openspec/changes/fix-observe-slash-command-hang/.openspec.yaml new file mode 100644 index 00000000..4a1c6774 --- /dev/null +++ b/openspec/changes/fix-observe-slash-command-hang/.openspec.yaml @@ -0,0 +1,2 @@ +schema: spec-driven +created: 2026-05-22 diff --git a/openspec/changes/fix-observe-slash-command-hang/design.md b/openspec/changes/fix-observe-slash-command-hang/design.md new file mode 100644 index 00000000..159537a0 --- /dev/null +++ b/openspec/changes/fix-observe-slash-command-hang/design.md @@ -0,0 +1,62 @@ +## Context + +`/codex:observe` is a Claude Code slash command that wraps the CLI `observe` subcommand. It lives at `plugins/codex/commands/observe.md`. The CLI subcommand itself (defined in `plugins/codex/scripts/lib/observe.mjs`, dispatched from `codex-companion.mjs`) is a long-running live tail: it watches the per-job JSONL event stream, renders ANSI-colored events as they arrive, and only exits on `COMPLETED` or `SIGINT`. + +The slash-command file currently has two intentions stacked together: + +1. **Primary path** (lines 8–36): static prose telling the user to open a **new terminal** and paste a copy-paste command. This is the correct UX — live tail needs a real TTY and indefinite uptime. +2. **Inline fallback** (lines 38–48): an "if you want inline" affordance using Claude Code's `` !`...` `` exec block on line 40, plus model-facing prose ("Present the command output to the user…") on lines 42–48. + +The inline fallback is structurally broken. Claude Code's `!exec` blocks capture stdout **after the child process exits**; they do not stream. A non-terminating `tail` process therefore stalls the entire slash-command body assembly — including the static guidance prose that precedes it. The user observes "no output" because the slash command never finishes rendering. + +Compare to the working siblings (`cancel.md`, `result.md`, `status.md`): their `!exec` blocks invoke one-shot CLI calls that read state, write to stdout, and exit. Those are compatible with Claude Code's exec model. `observe.md` is the only file in the directory whose exec is long-running. + +Stakeholders: any user who types `/codex:observe` in Claude Code (the primary surface). + +## Goals / Non-Goals + +**Goals:** +- Restore `/codex:observe`'s visible output: when invoked, the user sees clear instructions for running the live observer in a separate terminal. +- Eliminate the structural conflict between the slash-command exec model and a long-running tail process. +- Keep the change minimal — single file, deletions only, no behavior changes to CLI / observer logic / hooks / tests. + +**Non-Goals:** +- Adding a non-blocking "snapshot" mode to the `observe` subcommand (alternative B in the exploration). Out of scope for this fix; tracked separately if user demand emerges. +- Changing how the CLI `observe` subcommand itself behaves. `observe.mjs` is untouched. +- Modifying the `observe-command` spec from the still-active `codex-live-observer` change. That spec covers CLI behavior, which is unchanged. +- Reworking how Claude Code renders `disable-model-invocation: true` commands. We work within the existing semantics. + +## Decisions + +### Decision 1: Remove the inline `!exec` fallback entirely (option A from exploration) + +Rationale: The fallback never worked for the most common case (live, in-progress job). Even in the edge case where a job is already completed at exec time (one of the three branches of `handleObserveCommand`), the inline output would contain ANSI cursor-control codes (`\x1b[1A\x1b[2K`) that render as garbage in a non-TTY context, and the value of viewing a *historical* event dump inline is marginal — the user can already get this via `/codex:result` or by tailing the JSONL file directly. Stripping the broken affordance is cleaner than gating it on job status, and it removes the entire surface area where a future similarly-shaped bug could regress. + +Alternatives considered: +- **Option B — add `--snapshot` flag to `observe` CLI** and have the slash command use it. Preserves inline value, but conflates two semantically distinct operations (live tail vs. snapshot dump) under one subcommand name. If snapshot mode is genuinely valuable, it should be a separate subcommand (`events`, `recent`) — not bolted onto `observe`. Out of scope here. +- **Option C — keep the prose but remove `disable-model-invocation` and let the model render guidance.** Adds latency and token cost for what should be an instantaneous static response. The current `disable-model-invocation: true` is correct for this command type; the bug is the exec block, not the directive. + +### Decision 2: Introduce a new capability `observe-slash-command`, not modify `observe-command` + +Rationale: The CLI subcommand (`observe-command` capability, currently defined in the unarchived `codex-live-observer` change) and the slash-command wrapper are two distinct surfaces with separate contracts. The CLI behavior is unchanged; only the slash-command body is. Adding requirements to `observe-command` would conflate them. A separate capability also captures a generalizable rule — **slash command bodies MUST NOT contain inline executions of long-running processes** — that other future slash commands will benefit from. + +Alternatives considered: +- **Treat as pure bugfix with no spec change.** Loses the chance to encode the structural constraint that caused the bug. Likely to be re-violated by some future contributor adding another inline exec. +- **Add a delta to `observe-command`.** Conflates CLI and wrapper concerns; also awkward since `observe-command` is not yet in `openspec/specs/` (its parent change isn't archived). + +### Decision 3: No test added + +Rationale: The slash command body is a markdown file consumed by Claude Code's runtime, not by code in this repo. There is no test harness that exercises slash-command body rendering. A README-style "what the user sees" assertion would require mocking the entire Claude Code slash runtime, which is out of scope for a deletion-only fix. The existing `tests/observe.test.mjs` covers `observe.mjs` event reading and rendering — those code paths are untouched, and the existing tests should continue to pass unchanged. + +Verification will be manual: after the change, run `/codex:observe` in Claude Code with (a) no running job, (b) a running job, (c) a completed job, and confirm the same guidance text renders immediately in all three cases (because the body is now static). + +## Risks / Trade-offs + +- **[Risk] A user who liked the inline fallback (for completed jobs) loses it.** → Mitigation: the inline fallback was already broken for the dominant case (live tail). For completed jobs, `/codex:result` or directly tailing the JSONL file are existing alternatives. Acceptable loss. +- **[Risk] Future slash commands could re-introduce inline `!exec` of long-running processes.** → Mitigation: the new `observe-slash-command` capability spec records the structural rule explicitly. Anyone proposing a similar pattern will see the contract. +- **[Trade-off] No design-level fix for "inline live observation inside Claude Code."** → Accepted: this is an inherent limitation of Claude Code's slash-exec model, not something a markdown-file change can solve. If genuinely needed, it would require a streaming hook or a separate UI surface — both far outside this change's scope. + +## Migration Plan + +- No data migration. No config migration. No user action required. +- Rollback: revert the single file change. The pre-change state is the (broken) status quo. diff --git a/openspec/changes/fix-observe-slash-command-hang/proposal.md b/openspec/changes/fix-observe-slash-command-hang/proposal.md new file mode 100644 index 00000000..23b5e3c3 --- /dev/null +++ b/openspec/changes/fix-observe-slash-command-hang/proposal.md @@ -0,0 +1,33 @@ +## Why + +Running `/codex:observe` inside Claude Code currently produces **no visible output at all** — neither the "open a new terminal" guidance text nor any event stream. The root cause: `observe.md` contains an inline `!exec` fallback (line 40) that invokes the long-running live tail (`handleObserveCommand`'s `await new Promise(...)` only resolves on `COMPLETED` event or SIGINT). Claude Code's `!exec` model captures full stdout after the process exits — a never-returning process therefore gates the entire slash-command body, including the 36 lines of guidance prose that precede it. The author added the inline `!exec` as a "if you want inline" fallback, but inline live-tail is structurally incompatible with the slash-command exec model, and the broken fallback reaches back and suffocates the primary path. + +## What Changes + +- Remove the inline `!exec` fallback block from `plugins/codex/commands/observe.md`: + - Delete lines 38–40 ("If you want to see the output inline instead, you can run:" + the `!`-prefixed exec line) + - Delete lines 42–48 (the "Present the command output to the user…" model-facing instructions, which become orphaned once there is no exec output to present) +- The slash command body becomes a **pure static guidance document**: it tells the user to open a new terminal and shows the copy-paste command, with no inline execution path +- The `observe` subcommand in `codex-companion.mjs` is **unchanged** — direct CLI invocation (`node codex-companion.mjs observe …`) keeps working exactly as today +- No behavior change to `observe.mjs`, event stream, hooks, or any other plugin component + +## Capabilities + +### New Capabilities + +- `observe-slash-command`: Contract for the `/codex:observe` slash-command wrapper as exposed inside Claude Code — distinct from the underlying CLI `observe` subcommand. Specifies that the slash command body MUST be non-blocking static guidance and MUST NOT contain inline executions of long-running processes, since Claude Code's slash-exec model buffers stdout until process exit. + +### Modified Capabilities + +(none — the `observe-command` capability for the CLI subcommand lives in the still-active `codex-live-observer` change and is not touched here) + +## Impact + +- **Affected code**: `plugins/codex/commands/observe.md` only (1 file, deletions only — no logic changes) +- **Affected docs**: None. `README.md` and `README.zh-CN.md` already describe the "open a new terminal" workflow as the primary usage; nothing to update. +- **Affected tests**: None. There are no tests on `observe.md` rendering. `tests/observe.test.mjs` covers the CLI behavior (event reading + rendering), which is unchanged. +- **User-visible behavior**: + - Before: `/codex:observe` produces no output (slash command hangs on inline exec) + - After: `/codex:observe` immediately renders guidance text with a copy-paste command for a new terminal — matching the original design intent +- **Backwards compatibility**: Users who somehow relied on the broken inline path (which never worked for live jobs) lose nothing — that path was non-functional. The CLI path (`node codex-companion.mjs observe`) is unaffected. +- **No dependencies, APIs, or external contracts touched.** diff --git a/openspec/changes/fix-observe-slash-command-hang/specs/observe-slash-command/spec.md b/openspec/changes/fix-observe-slash-command-hang/specs/observe-slash-command/spec.md new file mode 100644 index 00000000..7cca5770 --- /dev/null +++ b/openspec/changes/fix-observe-slash-command-hang/specs/observe-slash-command/spec.md @@ -0,0 +1,63 @@ +## ADDED Requirements + +### Requirement: Static guidance body + +The `/codex:observe` slash command body in `plugins/codex/commands/observe.md` SHALL consist exclusively of non-executable static guidance content. It SHALL NOT contain any inline shell-execution blocks (Claude Code `` !`...` `` syntax) that invoke long-running processes. + +#### Scenario: User invokes /codex:observe with no arguments +- **WHEN** the user types `/codex:observe` in Claude Code +- **THEN** the slash command SHALL render its guidance text immediately without blocking on any subprocess +- **THEN** the rendered text SHALL include a copy-paste command instructing the user to run `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" observe` in a separate terminal + +#### Scenario: User invokes /codex:observe with arguments +- **WHEN** the user types `/codex:observe ` or `/codex:observe --cwd ` +- **THEN** the slash command SHALL render the same guidance text without blocking +- **THEN** the rendered command snippet SHALL include `$ARGUMENTS` so the user's arguments appear in the copy-paste line + +#### Scenario: Body contains no inline exec blocks +- **WHEN** the contents of `plugins/codex/commands/observe.md` are inspected +- **THEN** no line in the body SHALL begin with `` !` `` (Claude Code inline shell-exec marker) +- **THEN** all `node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" observe …` references SHALL appear inside fenced code blocks intended for the user to copy, not as Claude Code exec directives + +### Requirement: New-terminal usage instructions + +The slash command body SHALL clearly direct the user to open a new terminal window before running the observer, because the live tail uses ANSI cursor-control rendering and runs indefinitely until the underlying Codex job completes or the user sends `SIGINT`. + +#### Scenario: Guidance mentions a new terminal +- **WHEN** the slash command body is rendered +- **THEN** it SHALL contain explicit wording instructing the user to open a new terminal window +- **THEN** it SHALL describe `Ctrl+C` as the way to detach the observer +- **THEN** it SHALL state that detaching does not affect the running Codex task + +#### Scenario: Examples cover the common invocations +- **WHEN** the slash command body is rendered +- **THEN** it SHALL include at least one example each for: + - observing the latest running job (no positional argument) + - observing a specific job by ID + - observing with a custom `--cwd` + +### Requirement: CLI subcommand untouched + +This change SHALL NOT modify the behavior of the `observe` subcommand in `plugins/codex/scripts/codex-companion.mjs` nor any code in `plugins/codex/scripts/lib/observe.mjs`. All requirements declared by the `observe-command` capability (from the `codex-live-observer` change) SHALL continue to hold unchanged. + +#### Scenario: Direct CLI invocation still works +- **WHEN** a user runs `node plugins/codex/scripts/codex-companion.mjs observe` in a terminal +- **THEN** the observer SHALL behave exactly as specified by the `observe-command` capability — live tail with ANSI rendering, SIGINT detach, completion on COMPLETED event + +#### Scenario: Existing observer tests pass +- **WHEN** `npm test` is run after this change +- **THEN** `tests/observe.test.mjs` SHALL pass without modification +- **THEN** no other test SHALL regress + +### Requirement: Slash-command structural rule for long-running processes + +No slash command body in `plugins/codex/commands/*.md` SHALL contain a Claude Code inline shell-exec block (`` !`...` ``) that invokes a process which does not terminate in bounded time. Claude Code's slash-exec model buffers stdout until the child process exits, so blocking on a long-running process gates the entire body from rendering. + +#### Scenario: Long-running subprocess belongs in a code block, not an exec block +- **WHEN** a slash command needs to expose a long-running process (live tail, watcher, daemon) to the user +- **THEN** the command's invocation SHALL be presented inside a fenced code block as copy-paste guidance for the user to run in their own terminal +- **THEN** the slash command SHALL NOT attempt to invoke it via `` !`...` `` inline exec + +#### Scenario: One-shot subprocesses may use inline exec +- **WHEN** a slash command needs to invoke a subprocess that reads state, performs a single action, and exits promptly (e.g., `cancel`, `result`, `status`) +- **THEN** the command MAY use Claude Code inline exec (`` !`...` ``) to surface that output directly in the conversation diff --git a/openspec/changes/fix-observe-slash-command-hang/tasks.md b/openspec/changes/fix-observe-slash-command-hang/tasks.md new file mode 100644 index 00000000..c57b6025 --- /dev/null +++ b/openspec/changes/fix-observe-slash-command-hang/tasks.md @@ -0,0 +1,31 @@ +## 1. Edit slash-command body + +- [x] 1.1 Open `plugins/codex/commands/observe.md` +- [x] 1.2 Delete lines 38–40 (the paragraph "If you want to see the output inline instead, you can run:" together with the immediately following `` !`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" observe $ARGUMENTS` `` exec line and the blank line between them) +- [x] 1.3 Delete lines 42–48 (the model-facing prose starting "Present the command output to the user. The observer shows:" through the bulleted list ending at "Completion status with timestamp") +- [x] 1.4 Verify the resulting file ends after the existing "Note: This command is designed to be run in a separate terminal…" paragraph (the file should be ~36 lines after edits, all static guidance) +- [x] 1.5 Confirm no remaining line in the body begins with `` !` `` (Claude Code inline shell-exec marker) + +## 2. Verify CLI subcommand untouched + +- [x] 2.1 Confirm `plugins/codex/scripts/codex-companion.mjs` and `plugins/codex/scripts/lib/observe.mjs` are unmodified by this change +- [x] 2.2 Run `npm run build` (typecheck) and confirm it passes +- [x] 2.3 Run `npm test` and confirm `tests/observe.test.mjs` and all other tests pass without modification + - Note: 4 pre-existing flaky failures in `runtime.test.mjs` / `state.test.mjs` (status #72, #74, result #76, resolveStateDir #93). Confirmed pre-existing by re-running on the pre-change tree (5 fails on HEAD vs 4 with this change). None of those tests reference `observe.md`; `tests/observe.test.mjs` passes cleanly. + +## 3. Manual smoke test in Claude Code + +- [ ] 3.1 Reload the plugin in Claude Code so the updated `observe.md` is picked up *(requires post-push reinstall by user)* +- [ ] 3.2 With **no** running Codex job in the workspace: invoke `/codex:observe` — confirm the guidance text renders immediately (no hang, no empty output) *(deferred to post-publish smoke test by user)* +- [ ] 3.3 With a running Codex job in the workspace: invoke `/codex:observe` — confirm the same guidance text renders immediately (no hang) *(deferred to post-publish smoke test by user)* +- [ ] 3.4 Verify the rendered output contains: the new-terminal instruction, a copy-paste command line including `$ARGUMENTS` placeholder semantics, the three usage examples, and the `Ctrl+C` detach note *(file content guarantees this; deferred to post-publish smoke test)* +- [x] 3.5 Open a separate terminal, paste the suggested command, and confirm the live observer still works end-to-end (renders events, handles SIGINT cleanly) + - Verified by user: ran `node …/scripts/codex-companion.mjs observe` in a fresh terminal; observer rendered `starting → Thread ready → Turn started`. + +## 4. Wrap up + +- [x] 4.1 Stage the single-file change: `git add plugins/codex/commands/observe.md` + - Extended scope: also staging version bump (1.2.3 → 1.2.4 across `package.json`, `plugins/codex/.claude-plugin/plugin.json`, `package-lock.json`, `.claude-plugin/marketplace.json`), CHANGELOG entry, and OpenSpec change artifacts. +- [x] 4.2 Commit with message following repo style (e.g., `fix: remove inline !exec from /codex:observe slash command to unblock guidance rendering`) +- [x] 4.3 Run `openspec validate fix-observe-slash-command-hang` and confirm clean +- [ ] 4.4 After the change is merged, run `/opsx:archive fix-observe-slash-command-hang` to promote `observe-slash-command` into `openspec/specs/` diff --git a/openspec/specs/worktree-lifecycle/spec.md b/openspec/specs/worktree-lifecycle/spec.md new file mode 100644 index 00000000..6779c6ac --- /dev/null +++ b/openspec/specs/worktree-lifecycle/spec.md @@ -0,0 +1,43 @@ +## ADDED Requirements + +### Requirement: Worktree 路径生成 +系统必须(SHALL)根据 jobId 生成唯一的 worktree 路径,格式为 `/.claude/worktrees//`。 + +#### Scenario: 生成 worktree 路径 +- **WHEN** 系统需要为 jobId 为 `task-abc123` 的任务创建 worktree +- **THEN** 生成的路径为 `/.claude/worktrees/task-abc123/` + +### Requirement: Worktree 分支命名 +系统必须(SHALL)生成唯一的分支名,格式为 `codex-rescue/-`,其中 `` 取 prompt 前 32 字符(去特殊字符,转 kebab-case)。 + +#### Scenario: 生成带 prompt 的分支名 +- **WHEN** jobId 为 `task-abc123`,prompt 为 `Fix the authentication bug in login handler` +- **THEN** 分支名为 `codex-rescue/task-abc123-fix-the-authentication-bu` + +#### Scenario: 生成无 prompt 的分支名 +- **WHEN** jobId 为 `task-abc123`,prompt 为空 +- **THEN** 分支名为 `codex-rescue/task-abc123` + +### Requirement: Worktree 创建 +系统必须(SHALL)使用 `git worktree add -b ` 创建 worktree,基于源 repo 的当前 HEAD。 + +#### Scenario: 成功创建 worktree +- **WHEN** 系统在 `/repo` 目录下为 jobId `task-abc123` 创建 worktree +- **THEN** 执行 `git worktree add /repo/.claude/worktrees/task-abc123/ -b codex-rescue/task-abc123-...` +- **THEN** worktree 目录存在且包含完整工作树 +- **THEN** 分支 `codex-rescue/task-abc123-...` 存在于本地分支列表 + +#### Scenario: Worktree 路径已存在且属于当前 job +- **WHEN** worktree 路径已存在,且 job record 中的 worktreePath 匹配该路径 +- **THEN** 复用现有 worktree,不重新创建 + +#### Scenario: Worktree 路径已存在但不属于当前 job +- **WHEN** worktree 路径已存在,但 job record 中的 worktreePath 不匹配 +- **THEN** 抛出错误,提示用户手动清理或选择其他 jobId + +### Requirement: Worktree 基础分支记录 +系统必须(SHALL)在 job record 中记录创建 worktree 时的基础分支(baseBranch),用于后续 diff 展示。 + +#### Scenario: 记录基础分支 +- **WHEN** 系统从 `main` 分支创建 worktree +- **THEN** job record 的 `worktreeBaseBranch` 字段为 `main` diff --git a/openspec/specs/worktree-output/spec.md b/openspec/specs/worktree-output/spec.md new file mode 100644 index 00000000..0c79c4d7 --- /dev/null +++ b/openspec/specs/worktree-output/spec.md @@ -0,0 +1,66 @@ +## ADDED Requirements + +### Requirement: Status 输出展示 worktree 信息 +`/codex:status` 的输出必须(SHALL)在 job 包含 worktree 信息时展示 worktree 路径和分支名。 + +#### Scenario: Status 展示进行中的 worktree 任务 +- **WHEN** 用户执行 `/codex:status`,当前有一个进行中的 worktree 任务 +- **THEN** 输出包含: + ``` + Worktree: + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + ``` + +#### Scenario: Status 展示非 worktree 任务 +- **WHEN** 用户执行 `/codex:status`,当前任务未使用 worktree +- **THEN** 输出不包含 worktree 相关字段(现有行为不变) + +### Requirement: Result 输出展示 worktree 操作指引 +`/codex:result` 的输出必须(SHALL)在 job 包含 worktree 信息时展示后续操作指引,包括 diff、merge、remove 命令示例。 + +#### Scenario: Result 展示完成后的 worktree 任务 +- **WHEN** 用户执行 `/codex:result`,最近完成的 job 使用了 worktree +- **THEN** 输出末尾包含: + ``` + Worktree: + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + + Next steps: + Diff: git diff main...codex-rescue/task-abc123-fix-bug + Merge: git merge codex-rescue/task-abc123-fix-bug + Remove: git worktree remove /repo/.claude/worktrees/task-abc123/ + ``` + +#### Scenario: Result 展示非 worktree 任务 +- **WHEN** 用户执行 `/codex:result`,最近完成的 job 未使用 worktree +- **THEN** 输出不包含 worktree 相关字段(现有行为不变) + +### Requirement: JSON 输出包含 worktree 字段 +当使用 `--json` 标志时,status 和 result 的 JSON 输出必须(SHALL)包含 worktree 相关字段。 + +#### Scenario: JSON status 包含 worktree +- **WHEN** 用户执行 `/codex:status --json`,当前有 worktree 任务 +- **THEN** JSON 输出包含 `worktreePath`、`worktreeBranch`、`worktreeBaseBranch` 字段 + +#### Scenario: JSON result 包含 worktree +- **WHEN** 用户执行 `/codex:result --json`,最近完成的 job 使用了 worktree +- **THEN** JSON 输出包含 `worktreePath`、`worktreeBranch`、`worktreeBaseBranch` 字段 + +### Requirement: 任务完成时输出 worktree 摘要 +task 任务完成后,系统必须(SHALL)在输出末尾追加 worktree 摘要块。 + +#### Scenario: 前台任务完成后输出摘要 +- **WHEN** 前台 worktree 任务执行完成 +- **THEN** 输出末尾包含 worktree 路径、分支名和后续操作指引(同 Result 输出格式) + +#### Scenario: 后台任务启动时输出 worktree 路径 +- **WHEN** 后台 worktree 任务启动 +- **THEN** 输出包含: + ``` + Codex task started in worktree (background). + Path: /repo/.claude/worktrees/task-abc123/ + Branch: codex-rescue/task-abc123-fix-bug + Check /codex:status for progress. + ``` diff --git a/openspec/specs/worktree-task-dispatch/spec.md b/openspec/specs/worktree-task-dispatch/spec.md new file mode 100644 index 00000000..a0ae06bf --- /dev/null +++ b/openspec/specs/worktree-task-dispatch/spec.md @@ -0,0 +1,74 @@ +## ADDED Requirements + +### Requirement: Task 命令接受 --worktree 标志 +`/codex:rescue` 命令必须(SHALL)接受 `--worktree` 布尔标志,启用后在 git worktree 中执行任务。 + +#### Scenario: 使用 --worktree 标志 +- **WHEN** 用户执行 `/codex:rescue --worktree fix the bug` +- **THEN** 系统创建 git worktree 并在其中执行 Codex 任务 + +#### Scenario: 不使用 --worktree 标志 +- **WHEN** 用户执行 `/codex:rescue fix the bug` +- **THEN** 系统在源 repo 的工作目录中直接执行(现有行为不变) + +### Requirement: --worktree 与 --resume-last 互斥 +当同时指定 `--worktree` 和 `--resume-last`(或 `--resume`)时,系统必须(SHALL)拒绝执行并报错。 + +#### Scenario: 同时指定 --worktree 和 --resume-last +- **WHEN** 用户执行 `/codex:rescue --worktree --resume-last` +- **THEN** 系统输出错误信息 `--worktree and --resume-last are mutually exclusive` +- **THEN** 任务不执行 + +#### Scenario: 同时指定 --worktree 和 --resume +- **WHEN** 用户执行 `/codex:rescue --worktree --resume` +- **THEN** 系统输出错误信息 `--worktree and --resume are mutually exclusive` +- **THEN** 任务不执行 + +### Requirement: workspaceRoot 与 codexCwd 分离 +当启用 `--worktree` 时,系统必须(SHALL)将 workspaceRoot(源 repo)与 codexCwd(worktree 路径)分离。 + +#### Scenario: Worktree 模式下分离路径 +- **WHEN** 启用 `--worktree`,worktree 路径为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** workspaceRoot 为源 repo 路径(用于 state 存储和 config 读取) +- **THEN** codexCwd 为 worktree 路径(传给 Codex 作为工作目录) +- **THEN** job record 存储在源 repo 的 state 目录中 +- **THEN** sandbox_mode 从源 repo 的 `.codex/config.toml` 或 `~/.codex/config.toml` 读取 + +#### Scenario: 非 Worktree 模式下路径一致 +- **WHEN** 未启用 `--worktree` +- **THEN** workspaceRoot 和 codexCwd 均为源 repo 路径(现有行为不变) + +### Requirement: Worktree 信息存入 job record +系统必须(SHALL)在 job record 中记录 `worktreePath`、`worktreeBranch` 和 `worktreeBaseBranch` 字段。 + +#### Scenario: 记录 worktree 信息 +- **WHEN** 系统在 `/repo/.claude/worktrees/task-abc123/` 创建 worktree,分支为 `codex-rescue/task-abc123-fix-bug`,基础分支为 `main` +- **THEN** job record 包含: + - `worktreePath: "/repo/.claude/worktrees/task-abc123/"` + - `worktreeBranch: "codex-rescue/task-abc123-fix-bug"` + - `worktreeBaseBranch: "main"` + +### Requirement: 后台模式支持 --worktree +系统必须(SHALL)支持 `--worktree --background` 组合,worktree 在前台创建,后台 worker 使用已创建的路径。 + +#### Scenario: 后台模式 + worktree +- **WHEN** 用户执行 `/codex:rescue --worktree --background fix the bug` +- **THEN** 系统在前台创建 worktree +- **THEN** 将 worktreePath 写入 job record 的 request 字段 +- **THEN** 启动后台 worker 进程 +- **THEN** worker 从 job record 读取 worktreePath,直接在该路径下执行 Codex 任务 + +### Requirement: Task request 传递 worktreePath +`buildTaskRequest` 函数必须(SHALL)接受 `worktreePath` 参数并写入 request 对象。 + +#### Scenario: Request 包含 worktreePath +- **WHEN** 调用 `buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId, worktreePath: "/path/to/wt" })` +- **THEN** 返回的 request 对象包含 `worktreePath: "/path/to/wt"` + +### Requirement: executeTaskRun 使用 worktreePath +`executeTaskRun` 函数必须(SHALL)在 `request.worktreePath` 存在时,将其作为 Codex 的工作目录。 + +#### Scenario: 使用 worktreePath 作为 Codex 工作目录 +- **WHEN** `request.worktreePath` 为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** `runAppServerTurn` 的第一个参数为 `/repo/.claude/worktrees/task-abc123/` +- **THEN** Codex 在该 worktree 中执行任务 diff --git a/package-lock.json b/package-lock.json index 82d04a25..5661b928 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.4.0", "license": "Apache-2.0", "devDependencies": { "@types/node": "^25.5.0", diff --git a/package.json b/package.json index 833fd51c..18665ede 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@openai/codex-plugin-cc", - "version": "1.0.4", + "version": "1.4.0", "private": true, "type": "module", "description": "Use Codex from Claude Code to review code or delegate tasks.", @@ -11,6 +11,7 @@ "scripts": { "bump-version": "node scripts/bump-version.mjs", "check-version": "node scripts/bump-version.mjs --check", + "setup-hooks": "git config core.hooksPath .githooks && echo 'Git hooks installed to .githooks/'", "prebuild": "mkdir -p plugins/codex/.generated/app-server-types && codex app-server generate-ts --out plugins/codex/.generated/app-server-types", "build": "tsc -p tsconfig.app-server.json", "test": "node --test tests/*.test.mjs" diff --git a/plugins/codex/.claude-plugin/plugin.json b/plugins/codex/.claude-plugin/plugin.json index da262028..d336b914 100644 --- a/plugins/codex/.claude-plugin/plugin.json +++ b/plugins/codex/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "codex", - "version": "1.0.4", + "version": "1.4.0", "description": "Use Codex from Claude Code to review code or delegate tasks.", "author": { "name": "OpenAI" diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index 7009ec86..f907ed8e 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -37,6 +37,7 @@ Forwarding rules: - `--fresh` means do not add `--resume-last`. - If the user is clearly asking to continue prior Codex work in this repository, such as "continue", "keep going", "resume", "apply the top fix", or "dig deeper", add `--resume-last` unless `--fresh` is present. - Otherwise forward the task as a fresh `task` run. +- `--worktree` runs the task in an isolated git worktree. Preserve it for the forwarded `task` call. `--worktree` and `--resume-last` are mutually exclusive — if both are present, report the conflict and do not forward. - Preserve the user's task text as-is apart from stripping routing flags. - Return the stdout of the `codex-companion` command exactly as-is. - If the Bash call fails or Codex cannot be invoked, return nothing. diff --git a/plugins/codex/commands/observe.md b/plugins/codex/commands/observe.md new file mode 100644 index 00000000..e75a6bee --- /dev/null +++ b/plugins/codex/commands/observe.md @@ -0,0 +1,19 @@ +--- +description: Launch a live observer for a Codex job — opens a supported terminal split when available +argument-hint: '[job-id] [--cwd ]' +allowed-tools: Bash(node:*) +--- + +!`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" observe --spawn $ARGUMENTS` + +Present the command output to the user verbatim. Do not add summary or commentary. + +**Behavior** + +- Inside tmux: opens a new vertical split (`split-window -h`) running the live observer for the requested (or latest running) Codex job. +- Inside Ghostty on macOS: opens a right split in the calling terminal when its tty can be matched; otherwise opens a new Ghostty window. +- Inside iTerm2 on macOS: opens a vertical split in the calling session when its tty can be matched; otherwise opens a new iTerm2 window. +- If macOS Automation permission is required, prints the System Settings path to enable the terminal app and rerun the command. +- Outside supported terminals: prints the exact command for the user to paste into a separate terminal window. + +The observer shows real-time phase indicators, tool calls, command output, and file changes with ANSI colors. It exits automatically when the task completes, or with `Ctrl+C` (Codex task continues running). diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md index 56de9555..b912389e 100644 --- a/plugins/codex/commands/rescue.md +++ b/plugins/codex/commands/rescue.md @@ -1,6 +1,6 @@ --- description: Delegate investigation, an explicit fix request, or follow-up rescue work to the Codex rescue subagent -argument-hint: "[--background|--wait] [--resume|--fresh] [--model ] [--effort ] [what Codex should investigate, solve, or continue]" +argument-hint: "[--background|--wait] [--worktree] [--resume|--fresh] [--model ] [--effort ] [what Codex should investigate, solve, or continue]" allowed-tools: Bash(node:*), AskUserQuestion, Agent --- @@ -13,10 +13,27 @@ $ARGUMENTS Execution mode: -- If the request includes `--background`, run the `codex:codex-rescue` subagent in the background. +- If the request includes `--background`, run the `codex:codex-rescue` subagent in the background using `run_in_background: true`, then set up completion monitoring: + 1. Parse the subagent's JSON output to extract `jobId`, `jobsDir`, and `signalFile`. + 2. Set up a completion watcher using the `Monitor` tool so the main thread is notified when the background worker finishes: + ``` + Monitor: + command: until [ -f "" ]; do sleep 2; done; echo "done $(cat )" + description: "codex background task " + timeout_ms: 3600000 + persistent: false + ``` + Replace `` and `` with the actual values from step 1. + 3. When the Monitor fires (the worker wrote the `.done` signal file), call `PushNotification` to wake the main thread: + ``` + PushNotification: "Codex task finished: . Run /codex:result to see output." + ``` + Extract `` from the Monitor output line (`completed` or `failed`). + 4. Do not skip the Monitor setup. Do not poll `/codex:status` manually — rely on the Monitor to detect completion. - If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground. - If neither flag is present, default to foreground. - `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text. +- `--worktree` runs the task in an isolated git worktree. Codex works in `.claude/worktrees//` on a separate branch, leaving the main working directory untouched. `--worktree` and `--resume`/`--resume-last` are mutually exclusive. Preserve `--worktree` for the forwarded `task` call. - `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text. - If the request includes `--resume`, do not ask whether to continue. The user already chose. - If the request includes `--fresh`, do not ask whether to continue. The user already chose. @@ -47,3 +64,4 @@ Operating rules: - Leave `--resume` and `--fresh` in the forwarded request. The subagent handles that routing when it builds the `task` command. - If the helper reports that Codex is missing or unauthenticated, stop and tell the user to run `/codex:setup`. - If the user did not supply a request, ask what Codex should investigate or fix. +- **Thread exclusivity**: While a Codex task is running, do not manually run `codex resume` on the same thread from a terminal. The Codex backend enforces single-turn exclusivity per thread, and attempting to resume an active thread will block or pause your CLI session. Wait for the task to complete (check `/codex:status`), or use `/codex:cancel` to stop the task first. If you need to run Codex in parallel, start a fresh thread with `codex` (without `--resume`). diff --git a/plugins/codex/scripts/app-server-broker.mjs b/plugins/codex/scripts/app-server-broker.mjs index 1954274f..95c892a4 100644 --- a/plugins/codex/scripts/app-server-broker.mjs +++ b/plugins/codex/scripts/app-server-broker.mjs @@ -9,6 +9,7 @@ import { parseArgs } from "./lib/args.mjs"; import { BROKER_BUSY_RPC_CODE, CodexAppServerClient } from "./lib/app-server.mjs"; import { parseBrokerEndpoint } from "./lib/broker-endpoint.mjs"; +const IDLE_TIMEOUT_MS = 5000; const STREAMING_METHODS = new Set(["turn/start", "review/start", "thread/compact/start"]); function buildStreamThreadIds(method, params, result) { @@ -70,6 +71,28 @@ async function main() { let activeStreamSocket = null; let activeStreamThreadIds = null; const sockets = new Set(); + let idleTimer = null; + let serverRef = null; + + function startIdleTimer() { + cancelIdleTimer(); + idleTimer = setTimeout(async () => { + if (serverRef) { + await shutdown(serverRef); + process.exit(0); + } + }, IDLE_TIMEOUT_MS); + if (idleTimer.unref) { + idleTimer.unref(); + } + } + + function cancelIdleTimer() { + if (idleTimer) { + clearTimeout(idleTimer); + idleTimer = null; + } + } function clearSocketOwnership(socket) { if (activeRequestSocket === socket) { @@ -100,6 +123,7 @@ async function main() { } async function shutdown(server) { + cancelIdleTimer(); for (const socket of sockets) { socket.end(); } @@ -117,6 +141,7 @@ async function main() { const server = net.createServer((socket) => { sockets.add(socket); + cancelIdleTimer(); socket.setEncoding("utf8"); let buffer = ""; @@ -225,11 +250,17 @@ async function main() { socket.on("close", () => { sockets.delete(socket); clearSocketOwnership(socket); + if (sockets.size === 0) { + startIdleTimer(); + } }); socket.on("error", () => { sockets.delete(socket); clearSocketOwnership(socket); + if (sockets.size === 0) { + startIdleTimer(); + } }); }); @@ -243,7 +274,9 @@ async function main() { process.exit(0); }); + serverRef = server; server.listen(listenTarget.path); + startIdleTimer(); } main().catch((error) => { diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..eac07304 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -7,6 +7,9 @@ import process from "node:process"; import { fileURLToPath } from "node:url"; import { parseArgs, splitRawArgumentString } from "./lib/args.mjs"; +import { resolveCodexSandboxMode } from "./lib/codex-config.mjs"; +import { createEventStream, EVENT_TYPES, emitEvent } from "./lib/event-stream.mjs"; +import { handleObserveCommand } from "./lib/observe.mjs"; import { buildPersistentTaskThreadName, DEFAULT_CONTINUE_PROMPT, @@ -28,6 +31,7 @@ import { generateJobId, getConfig, listJobs, + resolveJobsDir, setConfig, upsertJob, writeJobFile @@ -47,10 +51,11 @@ import { createJobRecord, createProgressReporter, nowIso, + resolveSignalFile, runTrackedJob, SESSION_ID_ENV } from "./lib/tracked-jobs.mjs"; -import { resolveWorkspaceRoot } from "./lib/workspace.mjs"; +import { resolveWorkspaceRoot, createWorktree } from "./lib/workspace.mjs"; import { renderNativeReviewResult, renderReviewResult, @@ -80,7 +85,8 @@ function printUsage() { " node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model ] [--effort ] [prompt]", " node scripts/codex-companion.mjs status [job-id] [--all] [--json]", " node scripts/codex-companion.mjs result [job-id] [--json]", - " node scripts/codex-companion.mjs cancel [job-id] [--json]" + " node scripts/codex-companion.mjs cancel [job-id] [--json]", + " node scripts/codex-companion.mjs observe [job-id] [--cwd ]" ].join("\n") ); } @@ -457,6 +463,7 @@ async function executeReviewRun(request) { async function executeTaskRun(request) { const workspaceRoot = resolveWorkspaceRoot(request.cwd); + const codexCwd = request.worktreePath ?? workspaceRoot; ensureCodexAvailable(request.cwd); const taskMetadata = buildTaskRunMetadata({ @@ -479,13 +486,13 @@ async function executeTaskRun(request) { throw new Error("Provide a prompt, a prompt file, piped stdin, or use --resume-last."); } - const result = await runAppServerTurn(workspaceRoot, { + const result = await runAppServerTurn(codexCwd, { resumeThreadId, prompt: request.prompt, defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "", model: request.model, effort: request.effort, - sandbox: request.write ? "workspace-write" : "read-only", + sandbox: resolveCodexSandboxMode(workspaceRoot) ?? (request.write ? "workspace-write" : "read-only"), onProgress: request.onProgress, persistThread: true, threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) @@ -502,7 +509,10 @@ async function executeTaskRun(request) { { title: taskMetadata.title, jobId: request.jobId ?? null, - write: Boolean(request.write) + write: Boolean(request.write), + worktreePath: request.worktreePath ?? null, + worktreeBranch: request.worktreeBranch ?? null, + worktreeBaseBranch: request.worktreeBaseBranch ?? null } ); const payload = { @@ -510,7 +520,10 @@ async function executeTaskRun(request) { threadId: result.threadId, rawOutput, touchedFiles: result.touchedFiles, - reasoningSummary: result.reasoningSummary + reasoningSummary: result.reasoningSummary, + worktreePath: request.worktreePath ?? null, + worktreeBranch: request.worktreeBranch ?? null, + worktreeBaseBranch: request.worktreeBaseBranch ?? null }; return { @@ -551,7 +564,17 @@ function buildTaskRunMetadata({ prompt, resumeLast = false }) { } function renderQueuedTaskLaunch(payload) { - return `${payload.title} started in the background as ${payload.jobId}. Check /codex:status ${payload.jobId} for progress.\n`; + const lines = [`${payload.title} started in the background as ${payload.jobId}. Check /codex:status ${payload.jobId} for progress.`]; + if (payload.worktreePath) { + lines.push(` Worktree: ${payload.worktreePath}`); + if (payload.worktreeBranch) { + lines.push(` Branch: ${payload.worktreeBranch}`); + } + } + if (payload.signalFile) { + lines.push(` Signal: ${payload.signalFile}`); + } + return `${lines.join("\n")}\n`; } function getJobKindLabel(kind, jobClass) { @@ -561,9 +584,9 @@ function getJobKindLabel(kind, jobClass) { return jobClass === "review" ? "review" : "rescue"; } -function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summary, write = false }) { +function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summary, write = false, id }) { return createJobRecord({ - id: generateJobId(prefix), + id: id ?? generateJobId(prefix), kind, kindLabel: getJobKindLabel(kind, jobClass), title, @@ -576,29 +599,46 @@ function createCompanionJob({ prefix, kind, title, workspaceRoot, jobClass, summ function createTrackedProgress(job, options = {}) { const logFile = options.logFile ?? createJobLogFile(job.workspaceRoot, job.id, job.title); + const jobsDir = resolveJobsDir(job.workspaceRoot); + const eventStream = createEventStream(job.id, jobsDir); return { logFile, + eventFile: eventStream.eventFile, + eventStream, progress: createProgressReporter({ stderr: Boolean(options.stderr), logFile, + eventStream, onEvent: createJobProgressUpdater(job.workspaceRoot, job.id) }) }; } -function buildTaskJob(workspaceRoot, taskMetadata, write) { - return createCompanionJob({ +function buildTaskJob(workspaceRoot, taskMetadata, write, worktreeInfo = null, id = null) { + const base = createCompanionJob({ prefix: "task", kind: "task", title: taskMetadata.title, workspaceRoot, jobClass: "task", summary: taskMetadata.summary, - write + write, + id }); + + if (!worktreeInfo) { + return base; + } + + return { + ...base, + worktreePath: worktreeInfo.worktreePath, + worktreeBranch: worktreeInfo.worktreeBranch, + worktreeBaseBranch: worktreeInfo.worktreeBaseBranch + }; } -function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId }) { +function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId, worktreePath = null, worktreeBranch = null, worktreeBaseBranch = null }) { return { cwd, model, @@ -606,7 +646,10 @@ function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId prompt, write, resumeLast, - jobId + jobId, + worktreePath, + worktreeBranch, + worktreeBaseBranch }; } @@ -626,11 +669,18 @@ function requireTaskRequest(prompt, resumeLast) { } async function runForegroundCommand(job, runner, options = {}) { - const { logFile, progress } = createTrackedProgress(job, { + const { logFile, eventFile, eventStream, progress } = createTrackedProgress(job, { logFile: options.logFile, stderr: !options.json }); - const execution = await runTrackedJob(job, () => runner(progress), { logFile }); + const execution = await runTrackedJob(job, () => runner(progress), { logFile, eventFile }); + if (eventStream) { + emitEvent(eventStream, EVENT_TYPES.COMPLETED, { + status: execution.exitStatus === 0 ? "success" : "failure", + phase: execution.exitStatus === 0 ? "done" : "failed", + summary: execution.summary ?? null + }); + } outputResult(options.json ? execution.payload : execution.rendered, options.json); if (execution.exitStatus !== 0) { process.exitCode = execution.exitStatus; @@ -652,16 +702,20 @@ function spawnDetachedTaskWorker(cwd, jobId) { } function enqueueBackgroundTask(cwd, job, request) { - const { logFile } = createTrackedProgress(job); + const { logFile, eventFile } = createTrackedProgress(job); appendLogLine(logFile, "Queued for background execution."); const child = spawnDetachedTaskWorker(cwd, job.id); + const jobsDir = resolveJobsDir(job.workspaceRoot); + const signalFile = resolveSignalFile(jobsDir, job.id); const queuedRecord = { ...job, status: "queued", phase: "queued", pid: child.pid ?? null, logFile, + eventFile, + signalFile, request }; writeJobFile(job.workspaceRoot, job.id, queuedRecord); @@ -673,7 +727,12 @@ function enqueueBackgroundTask(cwd, job, request) { status: "queued", title: job.title, summary: job.summary, - logFile + logFile, + eventFile, + jobsDir, + signalFile, + worktreePath: job.worktreePath ?? null, + worktreeBranch: job.worktreeBranch ?? null }, logFile }; @@ -732,7 +791,7 @@ async function handleReview(argv) { async function handleTask(argv) { const { options, positionals } = parseCommandInput(argv, { valueOptions: ["model", "effort", "cwd", "prompt-file"], - booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"], + booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background", "worktree"], aliasMap: { m: "model" } @@ -746,20 +805,32 @@ async function handleTask(argv) { const resumeLast = Boolean(options["resume-last"] || options.resume); const fresh = Boolean(options.fresh); + const worktree = Boolean(options.worktree); if (resumeLast && fresh) { throw new Error("Choose either --resume/--resume-last or --fresh."); } + if (worktree && resumeLast) { + throw new Error("Choose either --worktree or --resume/--resume-last."); + } const write = Boolean(options.write); const taskMetadata = buildTaskRunMetadata({ prompt, resumeLast }); + // Create worktree if requested (before job creation so we have the path) + let worktreeInfo = null; + let preassignedJobId = null; + if (worktree) { + preassignedJobId = generateJobId("task"); + worktreeInfo = createWorktree(workspaceRoot, preassignedJobId, prompt); + } + if (options.background) { ensureCodexAvailable(cwd); requireTaskRequest(prompt, resumeLast); - const job = buildTaskJob(workspaceRoot, taskMetadata, write); + const job = buildTaskJob(workspaceRoot, taskMetadata, write, worktreeInfo, preassignedJobId); const request = buildTaskRequest({ cwd, model, @@ -767,14 +838,17 @@ async function handleTask(argv) { prompt, write, resumeLast, - jobId: job.id + jobId: job.id, + worktreePath: worktreeInfo?.worktreePath ?? null, + worktreeBranch: worktreeInfo?.worktreeBranch ?? null, + worktreeBaseBranch: worktreeInfo?.worktreeBaseBranch ?? null }); const { payload } = enqueueBackgroundTask(cwd, job, request); outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json); return; } - const job = buildTaskJob(workspaceRoot, taskMetadata, write); + const job = buildTaskJob(workspaceRoot, taskMetadata, write, worktreeInfo, preassignedJobId); await runForegroundCommand( job, (progress) => @@ -786,6 +860,9 @@ async function handleTask(argv) { write, resumeLast, jobId: job.id, + worktreePath: worktreeInfo?.worktreePath ?? null, + worktreeBranch: worktreeInfo?.worktreeBranch ?? null, + worktreeBaseBranch: worktreeInfo?.worktreeBaseBranch ?? null, onProgress: progress }), { json: options.json } @@ -813,7 +890,7 @@ async function handleTaskWorker(argv) { throw new Error(`Stored job ${options["job-id"]} is missing its task request payload.`); } - const { logFile, progress } = createTrackedProgress( + const { logFile, eventFile, eventStream, progress } = createTrackedProgress( { ...storedJob, workspaceRoot @@ -822,19 +899,27 @@ async function handleTaskWorker(argv) { logFile: storedJob.logFile ?? null } ); - await runTrackedJob( + const execution = await runTrackedJob( { ...storedJob, workspaceRoot, - logFile + logFile, + eventFile }, () => executeTaskRun({ ...request, onProgress: progress }), - { logFile } + { logFile, eventFile } ); + if (eventStream) { + emitEvent(eventStream, EVENT_TYPES.COMPLETED, { + status: execution.exitStatus === 0 ? "success" : "failure", + phase: execution.exitStatus === 0 ? "done" : "failed", + summary: execution.summary ?? null + }); + } } async function handleStatus(argv) { @@ -930,7 +1015,7 @@ async function handleCancel(argv) { const threadId = existing.threadId ?? job.threadId ?? null; const turnId = existing.turnId ?? job.turnId ?? null; - const interrupt = await interruptAppServerTurn(cwd, { threadId, turnId }); + const interrupt = await interruptAppServerTurn(workspaceRoot, { threadId, turnId }); if (interrupt.attempted) { appendLogLine( job.logFile, @@ -1015,6 +1100,9 @@ async function main() { case "cancel": await handleCancel(argv); break; + case "observe": + await handleObserveCommand(argv); + break; default: throw new Error(`Unknown subcommand: ${subcommand}`); } diff --git a/plugins/codex/scripts/lib/app-server.mjs b/plugins/codex/scripts/lib/app-server.mjs index 127c8376..9c94ce18 100644 --- a/plugins/codex/scripts/lib/app-server.mjs +++ b/plugins/codex/scripts/lib/app-server.mjs @@ -32,6 +32,7 @@ const DEFAULT_CLIENT_INFO = { /** @type {InitializeCapabilities} */ const DEFAULT_CAPABILITIES = { experimentalApi: false, + requestAttestation: false, optOutNotificationMethods: [ "item/agentMessage/delta", "item/reasoning/summaryTextDelta", diff --git a/plugins/codex/scripts/lib/broker-lifecycle.mjs b/plugins/codex/scripts/lib/broker-lifecycle.mjs index ef763819..870ce7ce 100644 --- a/plugins/codex/scripts/lib/broker-lifecycle.mjs +++ b/plugins/codex/scripts/lib/broker-lifecycle.mjs @@ -6,6 +6,7 @@ import process from "node:process"; import { spawn } from "node:child_process"; import { fileURLToPath } from "node:url"; import { createBrokerEndpoint, parseBrokerEndpoint } from "./broker-endpoint.mjs"; +import { terminateProcessTree } from "./process.mjs"; import { resolveStateDir } from "./state.mjs"; export const PID_FILE_ENV = "CODEX_COMPANION_APP_SERVER_PID_FILE"; @@ -123,7 +124,7 @@ export async function ensureBrokerSession(cwd, options = {}) { logFile: existing.logFile ?? null, sessionDir: existing.sessionDir ?? null, pid: existing.pid ?? null, - killProcess: options.killProcess ?? null + killProcess: options.killProcess ?? terminateProcessTree }); clearBrokerSession(cwd); } @@ -154,7 +155,7 @@ export async function ensureBrokerSession(cwd, options = {}) { logFile, sessionDir, pid: child.pid ?? null, - killProcess: options.killProcess ?? null + killProcess: options.killProcess ?? terminateProcessTree }); return null; } diff --git a/plugins/codex/scripts/lib/codex-config.mjs b/plugins/codex/scripts/lib/codex-config.mjs new file mode 100644 index 00000000..2a177e32 --- /dev/null +++ b/plugins/codex/scripts/lib/codex-config.mjs @@ -0,0 +1,59 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +const VALID_SANDBOX_MODES = new Set(["read-only", "workspace-write", "danger-full-access"]); + +/** + * Extract `sandbox_mode` from a Codex config.toml file. + * Returns null if the file does not exist, cannot be read, or the key is absent/invalid. + * + * Only handles the simple `key = "value"` syntax used by Codex config. + * Does not attempt full TOML parsing — no arrays, tables, or inline tables. + */ +export function readSandboxModeFromFile(filePath) { + let content; + try { + content = fs.readFileSync(filePath, "utf8"); + } catch { + return null; + } + + for (const rawLine of content.split(/\r?\n/)) { + const line = rawLine.replace(/#.*$/, "").trim(); + if (!line) continue; + + const match = line.match(/^sandbox_mode\s*=\s*"([^"]*)"/); + if (!match) continue; + + const value = match[1].trim(); + if (VALID_SANDBOX_MODES.has(value)) { + return value; + } + } + + return null; +} + +/** + * Resolve the effective Codex `sandbox_mode` for a workspace. + * + * Precedence (matches Codex CLI behavior): + * 1. Project-level `.codex/config.toml` in the workspace root + * 2. User-level `~/.codex/config.toml` + * + * Returns the resolved value, or null if nothing is configured. + */ +export function resolveCodexSandboxMode(workspaceRoot) { + const projectConfig = workspaceRoot + ? readSandboxModeFromFile(path.join(workspaceRoot, ".codex", "config.toml")) + : null; + if (projectConfig) return projectConfig; + + const userConfig = readSandboxModeFromFile(path.join(os.homedir(), ".codex", "config.toml")); + if (userConfig) return userConfig; + + return null; +} + +export { VALID_SANDBOX_MODES }; diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index f2fe88bd..2db9d1fb 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -60,8 +60,7 @@ function buildThreadParams(cwd, options = {}) { approvalPolicy: options.approvalPolicy ?? "never", sandbox: options.sandbox ?? "read-only", serviceName: SERVICE_NAME, - ephemeral: options.ephemeral ?? true, - experimentalRawEvents: false + ephemeral: options.ephemeral ?? true }; } diff --git a/plugins/codex/scripts/lib/event-stream.mjs b/plugins/codex/scripts/lib/event-stream.mjs new file mode 100644 index 00000000..442cfc42 --- /dev/null +++ b/plugins/codex/scripts/lib/event-stream.mjs @@ -0,0 +1,56 @@ +import fs from "node:fs"; +import path from "node:path"; + +const EVENT_TYPES = Object.freeze({ + PHASE: "phase", + TOOL_CALL: "tool_call", + TOOL_DONE: "tool_done", + COMMAND: "command", + COMMAND_DONE: "command_done", + FILE_CHANGE: "file_change", + MESSAGE: "message", + REASONING: "reasoning", + COMPLETED: "completed" +}); + +const EVENT_FILE_EXTENSION = ".events.jsonl"; + +function nowIso() { + return new Date().toISOString(); +} + +function resolveEventFilePath(jobsDir, jobId) { + return path.join(jobsDir, `${jobId}${EVENT_FILE_EXTENSION}`); +} + +export function createEventStream(jobId, jobsDir) { + const eventFile = resolveEventFilePath(jobsDir, jobId); + try { + fs.writeFileSync(eventFile, "", "utf8"); + } catch { + // Best-effort; do not fail if the file cannot be created. + } + return { eventFile, jobId }; +} + +export function emitEvent(stream, type, data = {}) { + if (!stream || !stream.eventFile) { + return; + } + try { + const line = JSON.stringify({ t: nowIso(), type, ...data }); + fs.appendFileSync(stream.eventFile, `${line}\n`, "utf8"); + } catch { + // Write failures are silently ignored; event stream is best-effort. + } +} + +export function closeEventStream(_stream) { + // No-op placeholder for future cleanup. +} + +export function resolveJobEventFile(jobsDir, jobId) { + return resolveEventFilePath(jobsDir, jobId); +} + +export { EVENT_TYPES, EVENT_FILE_EXTENSION }; diff --git a/plugins/codex/scripts/lib/job-control.mjs b/plugins/codex/scripts/lib/job-control.mjs index ad152c15..924c00b1 100644 --- a/plugins/codex/scripts/lib/job-control.mjs +++ b/plugins/codex/scripts/lib/job-control.mjs @@ -1,7 +1,14 @@ import fs from "node:fs"; import { getSessionRuntimeStatus } from "./codex.mjs"; -import { getConfig, listJobs, readJobFile, resolveJobFile } from "./state.mjs"; +import { + collectWorkspaceJobsAcrossRoots, + findJobByIdAcrossWorkspaces, + getConfig, + listJobs, + readJobFile, + resolveJobFile +} from "./state.mjs"; import { SESSION_ID_ENV } from "./tracked-jobs.mjs"; import { resolveWorkspaceRoot } from "./workspace.mjs"; @@ -207,13 +214,30 @@ function matchJobReference(jobs, reference, predicate = () => true) { throw new Error(`Job reference "${reference}" is ambiguous. Use a longer job id.`); } - throw new Error(`No job found for "${reference}". Run /codex:status to list known jobs.`); + return null; +} + +function findCrossWorkspaceMatch(reference, predicate) { + if (!reference) { + return null; + } + const cross = findJobByIdAcrossWorkspaces(reference); + if (!cross) { + return null; + } + if (predicate && !predicate(cross.job)) { + return { ...cross, predicateRejected: true }; + } + return cross; } export function buildStatusSnapshot(cwd, options = {}) { const workspaceRoot = resolveWorkspaceRoot(cwd); const config = getConfig(workspaceRoot); - const jobs = sortJobsNewestFirst(filterJobsForCurrentSession(listJobs(workspaceRoot), options)); + const rawJobs = options.all + ? collectWorkspaceJobsAcrossRoots(workspaceRoot) + : filterJobsForCurrentSession(listJobs(workspaceRoot), options); + const jobs = sortJobsNewestFirst(rawJobs); const maxJobs = options.maxJobs ?? DEFAULT_MAX_STATUS_JOBS; const maxProgressLines = options.maxProgressLines ?? DEFAULT_MAX_PROGRESS_LINES; @@ -243,34 +267,59 @@ export function buildSingleJobSnapshot(cwd, reference, options = {}) { const workspaceRoot = resolveWorkspaceRoot(cwd); const jobs = sortJobsNewestFirst(listJobs(workspaceRoot)); const selected = matchJobReference(jobs, reference); - if (!selected) { - throw new Error(`No job found for "${reference}". Run /codex:status to inspect known jobs.`); + + if (selected) { + return { + workspaceRoot, + job: enrichJob(selected, { maxProgressLines: options.maxProgressLines }) + }; } - return { - workspaceRoot, - job: enrichJob(selected, { maxProgressLines: options.maxProgressLines }) - }; + const cross = findCrossWorkspaceMatch(reference); + if (cross) { + return { + workspaceRoot: cross.job.workspaceRoot ?? workspaceRoot, + job: enrichJob(cross.job, { maxProgressLines: options.maxProgressLines }), + crossWorkspace: true, + crossWorkspaceStateDir: cross.stateDir + }; + } + + throw new Error(`No job found for "${reference}". Run /codex:status to inspect known jobs.`); } export function resolveResultJob(cwd, reference) { const workspaceRoot = resolveWorkspaceRoot(cwd); const jobs = sortJobsNewestFirst(reference ? listJobs(workspaceRoot) : filterJobsForCurrentSession(listJobs(workspaceRoot))); - const selected = matchJobReference( - jobs, - reference, - (job) => job.status === "completed" || job.status === "failed" || job.status === "cancelled" - ); + const isFinished = (job) => + job.status === "completed" || job.status === "failed" || job.status === "cancelled"; + const isActive = (job) => job.status === "queued" || job.status === "running"; + const selected = matchJobReference(jobs, reference, isFinished); if (selected) { return { workspaceRoot, job: selected }; } - const active = matchJobReference(jobs, reference, (job) => job.status === "queued" || job.status === "running"); + const active = matchJobReference(jobs, reference, isActive); if (active) { throw new Error(`Job ${active.id} is still ${active.status}. Check /codex:status and try again once it finishes.`); } + const cross = findCrossWorkspaceMatch(reference, isFinished); + if (cross && !cross.predicateRejected) { + return { + workspaceRoot: cross.job.workspaceRoot ?? workspaceRoot, + job: cross.job, + crossWorkspace: true, + crossWorkspaceStateDir: cross.stateDir + }; + } + if (cross && cross.predicateRejected) { + throw new Error( + `Job ${cross.job.id} is still ${cross.job.status} in another workspace. Check /codex:status and try again once it finishes.` + ); + } + if (reference) { throw new Error(`No finished job found for "${reference}". Run /codex:status to inspect active jobs.`); } @@ -281,14 +330,29 @@ export function resolveResultJob(cwd, reference) { export function resolveCancelableJob(cwd, reference, options = {}) { const workspaceRoot = resolveWorkspaceRoot(cwd); const jobs = sortJobsNewestFirst(listJobs(workspaceRoot)); - const activeJobs = jobs.filter((job) => job.status === "queued" || job.status === "running"); + const isActive = (job) => job.status === "queued" || job.status === "running"; + const activeJobs = jobs.filter(isActive); if (reference) { const selected = matchJobReference(activeJobs, reference); - if (!selected) { - throw new Error(`No active job found for "${reference}".`); + if (selected) { + return { workspaceRoot, job: selected }; } - return { workspaceRoot, job: selected }; + const cross = findCrossWorkspaceMatch(reference, isActive); + if (cross && !cross.predicateRejected) { + return { + workspaceRoot: cross.job.workspaceRoot ?? workspaceRoot, + job: cross.job, + crossWorkspace: true, + crossWorkspaceStateDir: cross.stateDir + }; + } + if (cross && cross.predicateRejected) { + throw new Error( + `Job ${cross.job.id} is not active (status: ${cross.job.status}). Nothing to cancel.` + ); + } + throw new Error(`No active job found for "${reference}".`); } const sessionScopedActiveJobs = filterJobsForCurrentSession(activeJobs, options); diff --git a/plugins/codex/scripts/lib/observe.mjs b/plugins/codex/scripts/lib/observe.mjs new file mode 100644 index 00000000..8327ee2f --- /dev/null +++ b/plugins/codex/scripts/lib/observe.mjs @@ -0,0 +1,449 @@ +import fs from "node:fs"; +import { fileURLToPath } from "node:url"; + +import { parseArgs } from "./args.mjs"; +import { EVENT_TYPES } from "./event-stream.mjs"; +import { shellQuote, spawnObserverInTerminal } from "./spawner.mjs"; +import { findJobByIdAcrossWorkspaces, loadState, resolveJobsDir } from "./state.mjs"; +import { resolveWorkspaceRoot } from "./workspace.mjs"; + +const COMPANION_SCRIPT = fileURLToPath(new URL("../codex-companion.mjs", import.meta.url)); + +const POLL_INTERVAL_MS = 500; +const WATCH_DEBOUNCE_MS = 100; + +const ANSI = { + reset: "\x1b[0m", + dim: "\x1b[2m", + italic: "\x1b[3m", + red: "\x1b[31m", + green: "\x1b[32m", + yellow: "\x1b[33m", + blue: "\x1b[34m", + magenta: "\x1b[35m", + cyan: "\x1b[36m", + white: "\x1b[37m" +}; + +const PHASE_SPINNERS = { + starting: "⠋", + investigating: "⠙", + finalizing: "⠴", + done: "✓", + failed: "✗" +}; + +const PHASE_COLORS = { + starting: ANSI.cyan, + investigating: ANSI.yellow, + finalizing: ANSI.green, + done: ANSI.green, + failed: ANSI.red +}; + +function renderPhase(event) { + const phase = event.phase ?? "unknown"; + const spinner = PHASE_SPINNERS[phase] ?? "•"; + const color = PHASE_COLORS[phase] ?? ANSI.white; + const message = event.message ?? ""; + return `${color}${spinner} ${phase}${ANSI.reset}${message ? ` ${ANSI.dim}${message}${ANSI.reset}` : ""}`; +} + +function renderToolCall(event) { + const tool = event.tool ?? "unknown"; + const extra = event.path ?? event.detail ?? ""; + return `${ANSI.cyan}→ ${tool}${extra ? ` ${extra}` : ""}${ANSI.reset}`; +} + +function renderToolDone(_event) { + return `${ANSI.dim} ✓ completed${ANSI.reset}`; +} + +function renderCommand(event) { + const cmd = event.cmd ?? ""; + return `${ANSI.blue}$ ${cmd}${ANSI.reset}`; +} + +function renderCommandDone(event) { + const exit = event.exit ?? 0; + const color = exit === 0 ? ANSI.green : ANSI.red; + return `${color} exit ${exit}${ANSI.reset}`; +} + +function renderFileChange(event) { + const filePath = event.path ?? ""; + const action = event.action ?? ""; + return `${ANSI.yellow}✎ ${filePath}${action ? ` (${action})` : ""}${ANSI.reset}`; +} + +function renderMessage(event) { + const text = event.text ?? event.logBody ?? event.message ?? ""; + if (!text) { + return ""; + } + return text + .split("\n") + .map((line) => `${ANSI.dim}│${ANSI.reset} ${line}`) + .join("\n"); +} + +function renderReasoning(event) { + const sections = event.sections ?? []; + if (sections.length === 0) { + return ""; + } + return sections + .map((section) => `${ANSI.dim}${ANSI.italic}• ${section}${ANSI.reset}`) + .join("\n"); +} + +function renderCompleted(event) { + const status = event.status ?? "unknown"; + const timestamp = event.t ?? ""; + const color = status === "success" ? ANSI.green : ANSI.red; + const summary = event.summary ? ` ${ANSI.dim}${event.summary}${ANSI.reset}` : ""; + return `${color}● completed at ${timestamp}${ANSI.reset}${summary}`; +} + +export function renderEvent(event) { + const type = event.type ?? EVENT_TYPES.PHASE; + switch (type) { + case EVENT_TYPES.PHASE: + return renderPhase(event); + case EVENT_TYPES.TOOL_CALL: + return renderToolCall(event); + case EVENT_TYPES.TOOL_DONE: + return renderToolDone(event); + case EVENT_TYPES.COMMAND: + return renderCommand(event); + case EVENT_TYPES.COMMAND_DONE: + return renderCommandDone(event); + case EVENT_TYPES.FILE_CHANGE: + return renderFileChange(event); + case EVENT_TYPES.MESSAGE: + return renderMessage(event); + case EVENT_TYPES.REASONING: + return renderReasoning(event); + case EVENT_TYPES.COMPLETED: + return renderCompleted(event); + default: + return event.message ? `${ANSI.dim}${event.message}${ANSI.reset}` : ""; + } +} + +export function readEventsFromOffset(eventFile, offset = 0) { + if (!eventFile || !fs.existsSync(eventFile)) { + return { events: [], newOffset: 0 }; + } + + const content = fs.readFileSync(eventFile, "utf8"); + if (!content || offset >= content.length) { + return { events: [], newOffset: content.length }; + } + + const newContent = content.slice(offset); + const events = []; + for (const line of newContent.split("\n")) { + if (!line.trim()) { + continue; + } + try { + events.push(JSON.parse(line)); + } catch { + // Skip malformed lines. + } + } + + return { events, newOffset: content.length }; +} + +function findLatestRunningJob(state) { + const runningJobs = (state.jobs ?? []).filter((job) => job.status === "running"); + if (runningJobs.length === 0) { + return null; + } + runningJobs.sort((a, b) => String(b.startedAt ?? "").localeCompare(String(a.startedAt ?? ""))); + return runningJobs[0]; +} + +function findJobById(state, jobId) { + return (state.jobs ?? []).find((job) => job.id === jobId) ?? null; +} + +function resolveEventFileForJob(cwd, job) { + if (job.eventFile) { + return job.eventFile; + } + const jobsDir = resolveJobsDir(cwd); + return `${jobsDir}/${job.id}.events.jsonl`; +} + +export function tailEventStream(eventFile, onEvent) { + let offset = 0; + let watcher = null; + let pollTimer = null; + let debounceTimer = null; + let stopped = false; + let onStopCallback = null; + + function processNewEvents() { + if (stopped) { + return; + } + const { events, newOffset } = readEventsFromOffset(eventFile, offset); + offset = newOffset; + for (const event of events) { + onEvent(event); + if (event.type === EVENT_TYPES.COMPLETED) { + stop(); + return; + } + } + } + + function startPolling() { + pollTimer = setInterval(processNewEvents, POLL_INTERVAL_MS); + } + + function startWatching() { + try { + watcher = fs.watch(eventFile, () => { + if (debounceTimer) { + clearTimeout(debounceTimer); + } + debounceTimer = setTimeout(processNewEvents, WATCH_DEBOUNCE_MS); + }); + watcher.on("error", () => { + if (watcher) { + watcher.close(); + watcher = null; + } + startPolling(); + }); + } catch { + startPolling(); + } + } + + // Read existing events first + processNewEvents(); + + // Start watching for new events + if (!stopped && fs.existsSync(eventFile)) { + startWatching(); + } else if (!stopped) { + // File doesn't exist yet, poll until it appears + pollTimer = setInterval(() => { + if (fs.existsSync(eventFile)) { + clearInterval(pollTimer); + pollTimer = null; + processNewEvents(); + if (!stopped) { + startWatching(); + } + } + }, POLL_INTERVAL_MS); + } + + function stop() { + if (stopped) { + return; + } + stopped = true; + if (debounceTimer) { + clearTimeout(debounceTimer); + debounceTimer = null; + } + if (pollTimer) { + clearInterval(pollTimer); + pollTimer = null; + } + if (watcher) { + watcher.close(); + watcher = null; + } + onStopCallback?.(); + } + + function onStop(callback) { + onStopCallback = callback; + if (stopped) { + callback(); + } + } + + return { stop, onStop, isStopped: () => stopped }; +} + +function buildObserverCommand({ positionals, options }) { + const observerArgs = ["observe", ...positionals]; + if (options.cwd) { + observerArgs.push("--cwd", options.cwd); + } + return [process.execPath, COMPANION_SCRIPT, ...observerArgs].map(shellQuote).join(" "); +} + +function renderFallbackHint({ workspaceRoot, command }) { + return [ + `${ANSI.dim}Not running inside a supported terminal (tmux, Ghostty on macOS, or iTerm2 on macOS).${ANSI.reset}`, + "", + "Open a new terminal window and run:", + "", + ` cd ${workspaceRoot}`, + ` ${command}`, + "" + ].join("\n"); +} + +const SPAWN_SUCCESS_LABELS = { + tmux: "tmux pane", + "ghostty-mac": "Ghostty split or new window", + "iterm2-mac": "iTerm2 split or new window" +}; + +const AUTOMATION_APP_LABELS = { + "ghostty-mac": "Ghostty", + "iterm2-mac": "iTerm2" +}; + +export async function handleObserveSpawn({ + positionals, + options, + workspaceRoot, + spawner = spawnObserverInTerminal +}) { + const command = buildObserverCommand({ positionals, options }); + const result = spawner({ cwd: workspaceRoot, command }); + + if (result.spawned) { + const target = positionals[0] ? `job ${positionals[0]}` : "latest running job"; + const label = SPAWN_SUCCESS_LABELS[result.kind] ?? result.kind; + process.stdout.write(`${ANSI.green}✓ Observer launched in ${label}${ANSI.reset} (${target})\n`); + return; + } + + if (result.reason === "automation-permission-denied") { + const app = AUTOMATION_APP_LABELS[result.kind] ?? result.kind; + process.stdout.write(`! macOS Automation permission needed for ${app}. Open System Settings → Privacy & Security → Automation, enable ${app}, then rerun /codex:observe.\n`); + return; + } + + if (result.reason === "unsafe-command") { + process.stdout.write(`${ANSI.red}✗ Refusing to spawn: composed command contains a control character (${result.error}). Run the command manually:${ANSI.reset}\n\n`); + } else if (result.error) { + const label = SPAWN_SUCCESS_LABELS[result.kind] ?? result.kind; + process.stdout.write(`${ANSI.red}✗ Failed to open ${label}: ${result.error}${ANSI.reset}\n\n`); + } + + process.stdout.write(renderFallbackHint({ workspaceRoot, command })); +} + +export async function handleObserveCommand(argv) { + const { options, positionals } = parseArgs(argv, { + valueOptions: ["cwd"], + booleanOptions: ["json", "spawn"] + }); + + const cwd = options.cwd ?? process.cwd(); + let workspaceRoot; + try { + workspaceRoot = resolveWorkspaceRoot(cwd); + } catch { + workspaceRoot = cwd; + } + + if (options.spawn) { + await handleObserveSpawn({ positionals, options, workspaceRoot }); + return; + } + + const jobId = positionals[0] ?? null; + const state = loadState(workspaceRoot); + + let job; + let crossWorkspaceMatch = null; + if (jobId) { + job = findJobById(state, jobId); + if (!job) { + crossWorkspaceMatch = findJobByIdAcrossWorkspaces(jobId); + if (crossWorkspaceMatch) { + job = crossWorkspaceMatch.job; + } else { + process.stderr.write(`Error: Job not found: ${jobId}\n`); + process.exitCode = 1; + return; + } + } + } else { + job = findLatestRunningJob(state); + if (!job) { + process.stderr.write("No running Codex jobs found.\n"); + process.exitCode = 1; + return; + } + } + + const eventFile = resolveEventFileForJob(workspaceRoot, job); + const isCompleted = job.status === "completed" || job.status === "failed" || job.status === "cancelled"; + + // Print header + const statusColor = isCompleted ? (job.status === "completed" ? ANSI.green : ANSI.red) : ANSI.yellow; + process.stdout.write(`${ANSI.dim}Codex Observer — ${job.id} — ${statusColor}${job.status}${ANSI.reset}\n`); + if (crossWorkspaceMatch) { + process.stdout.write(`${ANSI.dim}(job belongs to another workspace; reading from ${crossWorkspaceMatch.stateDir})${ANSI.reset}\n`); + } + process.stdout.write("\n"); + + if (isCompleted) { + // Render full history and exit + const { events } = readEventsFromOffset(eventFile, 0); + if (events.length === 0) { + process.stdout.write(`${ANSI.dim}No events recorded for this job.${ANSI.reset}\n`); + } else { + for (const event of events) { + const rendered = renderEvent(event); + if (rendered) { + process.stdout.write(`${rendered}\n`); + } + } + } + return; + } + + // Live tail mode + let waitingShown = false; + if (!fs.existsSync(eventFile)) { + process.stdout.write(`${ANSI.dim}Waiting for events...${ANSI.reset}\n`); + waitingShown = true; + } + + let firstEvent = true; + const tail = tailEventStream(eventFile, (event) => { + if (waitingShown && firstEvent) { + // Clear the "waiting" line + process.stdout.write("\x1b[1A\x1b[2K"); + waitingShown = false; + } + firstEvent = false; + const rendered = renderEvent(event); + if (rendered) { + process.stdout.write(`${rendered}\n`); + } + }); + + // SIGINT handler + const sigintHandler = () => { + tail.stop(); + process.stdout.write(`\n${ANSI.dim}Observer detached. Codex task continues.${ANSI.reset}\n`); + process.exit(0); + }; + process.on("SIGINT", sigintHandler); + + // Wait for tail to complete (completed event seen or error) + await new Promise((resolve) => { + tail.onStop(() => { + process.removeListener("SIGINT", sigintHandler); + resolve(); + }); + }); +} diff --git a/plugins/codex/scripts/lib/render.mjs b/plugins/codex/scripts/lib/render.mjs index 2ec18523..5e9d86bc 100644 --- a/plugins/codex/scripts/lib/render.mjs +++ b/plugins/codex/scripts/lib/render.mjs @@ -161,6 +161,17 @@ function pushJobDetails(lines, job, options = {}) { lines.push(` ${line}`); } } + if (job.worktreePath) { + lines.push(` Worktree path: ${job.worktreePath}`); + if (job.worktreeBranch) { + lines.push(` Worktree branch: ${job.worktreeBranch}`); + } + if (options.showWorktreeActions && job.worktreeBaseBranch) { + lines.push(` Diff: git diff ${job.worktreeBaseBranch}...${job.worktreeBranch}`); + lines.push(` Merge: git merge ${job.worktreeBranch}`); + lines.push(` Remove: git worktree remove ${job.worktreePath}`); + } + } } function appendReasoningSection(lines, reasoningSummary) { @@ -314,12 +325,38 @@ export function renderNativeReviewResult(result, meta) { export function renderTaskResult(parsedResult, meta) { const rawOutput = typeof parsedResult?.rawOutput === "string" ? parsedResult.rawOutput : ""; + const worktreeBlock = renderWorktreesBlock(meta); + if (rawOutput) { - return rawOutput.endsWith("\n") ? rawOutput : `${rawOutput}\n`; + const base = rawOutput.endsWith("\n") ? rawOutput : `${rawOutput}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; } const message = String(parsedResult?.failureMessage ?? "").trim() || "Codex did not return a final message."; - return `${message}\n`; + const base = `${message}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; +} + +export function renderWorktreesBlock(meta) { + if (!meta?.worktreePath) { + return null; + } + + const lines = [ + "Worktree:", + ` Path: ${meta.worktreePath}`, + ` Branch: ${meta.worktreeBranch ?? "unknown"}` + ]; + + if (meta.worktreeBaseBranch) { + lines.push(""); + lines.push("Next steps:"); + lines.push(` Diff: git diff ${meta.worktreeBaseBranch}...${meta.worktreeBranch}`); + lines.push(` Merge: git merge ${meta.worktreeBranch}`); + lines.push(` Remove: git worktree remove ${meta.worktreePath}`); + } + + return `${lines.join("\n")}\n`; } export function renderStatusReport(report) { @@ -382,7 +419,8 @@ export function renderJobStatusReport(job) { showLog: true, showCancelHint: true, showResultHint: true, - showReviewHint: true + showReviewHint: true, + showWorktreeActions: true }); return `${lines.join("\n").trimEnd()}\n`; } @@ -390,12 +428,15 @@ export function renderJobStatusReport(job) { export function renderStoredJobResult(job, storedJob) { const threadId = storedJob?.threadId ?? job.threadId ?? null; const resumeCommand = threadId ? `codex resume ${threadId}` : null; + const worktreeBlock = renderWorktreesBlock(job); + if (isStructuredReviewStoredResult(storedJob) && storedJob?.rendered) { const output = storedJob.rendered.endsWith("\n") ? storedJob.rendered : `${storedJob.rendered}\n`; if (!threadId) { - return output; + return worktreeBlock ? `${output}\n${worktreeBlock}` : output; } - return `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + const base = `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; } const rawOutput = @@ -405,17 +446,19 @@ export function renderStoredJobResult(job, storedJob) { if (rawOutput) { const output = rawOutput.endsWith("\n") ? rawOutput : `${rawOutput}\n`; if (!threadId) { - return output; + return worktreeBlock ? `${output}\n${worktreeBlock}` : output; } - return `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + const base = `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; } if (storedJob?.rendered) { const output = storedJob.rendered.endsWith("\n") ? storedJob.rendered : `${storedJob.rendered}\n`; if (!threadId) { - return output; + return worktreeBlock ? `${output}\n${worktreeBlock}` : output; } - return `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + const base = `${output}\nCodex session ID: ${threadId}\nResume in Codex: ${resumeCommand}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; } const lines = [ @@ -442,7 +485,8 @@ export function renderStoredJobResult(job, storedJob) { lines.push("", "No captured result payload was stored for this job."); } - return `${lines.join("\n").trimEnd()}\n`; + const base = `${lines.join("\n").trimEnd()}\n`; + return worktreeBlock ? `${base}\n${worktreeBlock}` : base; } export function renderCancelReport(job) { diff --git a/plugins/codex/scripts/lib/spawner.mjs b/plugins/codex/scripts/lib/spawner.mjs new file mode 100644 index 00000000..173c3a94 --- /dev/null +++ b/plugins/codex/scripts/lib/spawner.mjs @@ -0,0 +1,288 @@ +import { execFileSync, spawnSync } from "node:child_process"; + +const BACKENDS = [ + { + kind: "tmux", + cmd: "tmux", + detect: (env) => Boolean(env.TMUX && env.TMUX.length > 0), + build: buildTmuxSplitArgs, + classifyFailure: classifyTmuxFailure + }, + { + kind: "ghostty-mac", + cmd: "osascript", + detect: (env, platform) => platform === "darwin" && !env.TMUX && env.TERM_PROGRAM === "ghostty", + build: buildGhosttyMacArgs, + classifyFailure: classifyGhosttyFailure + }, + { + kind: "iterm2-mac", + cmd: "osascript", + detect: (env, platform) => platform === "darwin" && !env.TMUX && env.TERM_PROGRAM === "iTerm.app", + build: buildIterm2MacArgs, + classifyFailure: classifyIterm2Failure + } +]; + +export function detectTerminal(env = process.env, platform = process.platform) { + const backend = BACKENDS.find((candidate) => candidate.detect(env, platform)); + return { kind: backend?.kind ?? "none" }; +} + +export function buildTmuxSplitArgs({ cwd, command }) { + return ["split-window", "-h", "-c", cwd, command]; +} + +export function composeShellInvocation({ cwd, command }) { + return `cd ${shellQuote(cwd)} && ${command}`; +} + +export function rejectControlChars(value) { + const str = String(value); + for (let index = 0; index < str.length; index += 1) { + const code = str.charCodeAt(index); + if (code >= 0x00 && code <= 0x1f && code !== 0x09) { + return { ok: false, byte: code, index }; + } + } + return { ok: true }; +} + +export function discoverCallerTty({ + startPid = process.pid, + runProbe = defaultRunProbe +} = {}) { + let pid = startPid; + for (let depth = 0; depth < 10; depth += 1) { + if (!pid || pid <= 1) { + return null; + } + + let output; + try { + output = runProbe("ps", ["-o", "tty=,ppid=", "-p", String(pid)]); + } catch { + return null; + } + + const parsed = parsePsTtyOutput(output); + if (!parsed) { + return null; + } + + if (parsed.tty && parsed.tty !== "?" && parsed.tty !== "??") { + return parsed.tty.startsWith("/dev/") ? parsed.tty : `/dev/${parsed.tty}`; + } + + pid = parsed.ppid; + } + return null; +} + +function defaultRunProbe(cmd, args) { + return execFileSync(cmd, args, { encoding: "utf8", timeout: 250 }); +} + +function parsePsTtyOutput(output) { + const line = String(output).trim().split("\n").find(Boolean); + if (!line) { + return null; + } + const parts = line.trim().split(/\s+/); + if (parts.length < 2) { + return null; + } + const ppid = Number(parts[parts.length - 1]); + return { tty: parts.slice(0, -1).join(" "), ppid: Number.isFinite(ppid) ? ppid : null }; +} + +export function escapeAppleScriptLiteral(value) { + return String(value).replace(/\\/g, "\\\\").replace(/"/g, '\\"'); +} + +export function osascriptArgsFromLines(lines) { + return lines.flatMap((line) => ["-e", line]); +} + +export function buildGhosttyMacArgs({ composed, callerTty: _callerTty }) { + // Ghostty 1.3's terminal object exposes id/name/working directory only — + // there is no `tty` property. Until upstream adds one, always open a new + // window. `new window` returns a window, so we drill down to a terminal + // before calling `input text`. + const literal = escapeAppleScriptLiteral(composed); + const lines = [ + 'tell application "Ghostty"', + "activate", + "set newWin to new window", + "set newTerm to terminal 1 of selected tab of newWin", + `input text "${literal}\\n" to newTerm`, + "end tell" + ]; + + return osascriptArgsFromLines(lines); +} + +export function buildIterm2MacArgs({ composed, callerTty }) { + // iTerm2 object model: window -> tabs -> sessions. `sessions` is NOT an + // element of `window`; it lives on `tab`. Traversal must nest through + // tabs to find a session whose `tty` matches the caller. + const literal = escapeAppleScriptLiteral(composed); + const lines = [ + 'tell application "iTerm"', + "activate" + ]; + + if (callerTty) { + lines.push( + `set targetTty to "${escapeAppleScriptLiteral(callerTty)}"`, + "set matched to missing value", + "repeat with w in windows", + "repeat with tb in tabs of w", + "repeat with s in sessions of tb", + "if tty of s is targetTty then", + "set matched to s", + "exit repeat", + "end if", + "end repeat", + "if matched is not missing value then exit repeat", + "end repeat", + "if matched is not missing value then exit repeat", + "end repeat", + "if matched is not missing value then", + "tell matched", + "set newSession to split vertically with default profile", + "end tell", + "else", + "set newWindow to create window with default profile", + "set newSession to current session of newWindow", + "end if" + ); + } else { + lines.push( + "set newWindow to create window with default profile", + "set newSession to current session of newWindow" + ); + } + + lines.push( + `write text "${literal}" to newSession`, + "end tell" + ); + + return osascriptArgsFromLines(lines); +} + +export function spawnObserverInTerminal({ + cwd, + command, + env = process.env, + platform = process.platform, + runner = spawnSync, + discoverTty = () => discoverCallerTty() +}) { + const terminal = detectTerminal(env, platform); + const backend = BACKENDS.find((candidate) => candidate.kind === terminal.kind); + + if (!backend) { + return { spawned: false, kind: "none" }; + } + + if (backend.kind === "tmux") { + const result = runner(backend.cmd, backend.build({ cwd, command }), { stdio: "ignore" }); + return classifySpawnResult({ backend, result }); + } + + const composed = composeShellInvocation({ cwd, command }); + const guard = rejectControlChars(composed); + if (!guard.ok) { + return { + spawned: false, + kind: backend.kind, + reason: "unsafe-command", + error: unsafeCommandMessage({ guard, composed }) + }; + } + + const callerTty = discoverTty(); + const result = runner( + backend.cmd, + backend.build({ composed, callerTty }), + { stdio: ["ignore", "ignore", "pipe"] } + ); + + return classifySpawnResult({ backend, result }); +} + +function classifySpawnResult({ backend, result }) { + if (result.error) { + return { + spawned: false, + kind: backend.kind, + ...backend.classifyFailure(result) + }; + } + + if (result.status === 0) { + return { spawned: true, kind: backend.kind }; + } + + return { + spawned: false, + kind: backend.kind, + ...backend.classifyFailure(result) + }; +} + +function unsafeCommandMessage({ guard, composed }) { + const byteName = controlByteName(guard.byte); + const location = composed.lastIndexOf(" && ", guard.index) === -1 ? "cwd" : "command"; + return `composed command contains ${byteName} at ${location} offset ${guard.index}`; +} + +function controlByteName(byte) { + if (byte === 0x00) { + return "NUL"; + } + if (byte === 0x0a) { + return "embedded newline"; + } + if (byte === 0x0d) { + return "carriage return"; + } + return `control character 0x${byte.toString(16).padStart(2, "0")}`; +} + +function classifyTmuxFailure({ status, error }) { + if (error) { + return { error: error.message ?? String(error) }; + } + return { error: `tmux exited with status ${status}` }; +} + +export function classifyGhosttyFailure({ status, stderr, error }) { + return classifyOsascriptFailure({ kind: "ghostty-mac", status, stderr, error }); +} + +export function classifyIterm2Failure({ status, stderr, error }) { + return classifyOsascriptFailure({ kind: "iterm2-mac", status, stderr, error }); +} + +function classifyOsascriptFailure({ kind, status, stderr = "", error }) { + if (error) { + return { error: `Failed to drive ${kind}: ${error.message ?? String(error)}` }; + } + const message = Buffer.isBuffer(stderr) ? stderr.toString("utf8") : String(stderr ?? ""); + if (message.includes("(-1743)") || /not authorized to send apple events/i.test(message)) { + return { + reason: "automation-permission-denied", + error: `Automation permission needed for ${kind}` + }; + } + const detail = message.trim() ? `: ${message.trim()}` : ""; + return { error: `Failed to drive ${kind}: osascript exited with status ${status}${detail}` }; +} + +export function shellQuote(value) { + const str = String(value); + return `'${str.replace(/'/g, `'\\''`)}'`; +} diff --git a/plugins/codex/scripts/lib/state.mjs b/plugins/codex/scripts/lib/state.mjs index 2da23498..7278d5dc 100644 --- a/plugins/codex/scripts/lib/state.mjs +++ b/plugins/codex/scripts/lib/state.mjs @@ -7,7 +7,9 @@ import { resolveWorkspaceRoot } from "./workspace.mjs"; const STATE_VERSION = 1; const PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA"; -const FALLBACK_STATE_ROOT_DIR = path.join(os.tmpdir(), "codex-companion"); +const DEFAULT_STATE_ROOT_DIR = path.join(os.homedir(), ".codex-companion", "state"); +const LEGACY_TMPDIR_ROOT = path.join(os.tmpdir(), "codex-companion"); +const CLAUDE_PLUGINS_DATA_DIR = path.join(os.homedir(), ".claude", "plugins", "data"); const STATE_FILE_NAME = "state.json"; const JOBS_DIR_NAME = "jobs"; const MAX_JOBS = 50; @@ -26,8 +28,61 @@ function defaultState() { }; } -export function resolveStateDir(cwd) { - const workspaceRoot = resolveWorkspaceRoot(cwd); +const LEGACY_ROOTS_ENV = "CODEX_COMPANION_LEGACY_ROOTS"; + +function resolveStateRoot() { + const pluginDataDir = process.env[PLUGIN_DATA_ENV]; + return pluginDataDir ? path.join(pluginDataDir, "state") : DEFAULT_STATE_ROOT_DIR; +} + +function discoverPluginDataCodexRoots() { + const out = []; + try { + if (!fs.existsSync(CLAUDE_PLUGINS_DATA_DIR)) { + return out; + } + for (const entry of fs.readdirSync(CLAUDE_PLUGINS_DATA_DIR, { withFileTypes: true })) { + if (entry.isDirectory() && /codex/i.test(entry.name)) { + out.push(path.join(CLAUDE_PLUGINS_DATA_DIR, entry.name, "state")); + } + } + } catch { + // Best-effort; ignore unreadable plugin data dir. + } + return out; +} + +function resolveLegacyRoots() { + const override = process.env[LEGACY_ROOTS_ENV]; + if (override === "") { + return []; + } + if (override) { + return override.split(path.delimiter).filter(Boolean); + } + return [DEFAULT_STATE_ROOT_DIR, LEGACY_TMPDIR_ROOT, ...discoverPluginDataCodexRoots()]; +} + +function collectCandidateStateRoots() { + const seen = new Set(); + const roots = []; + + const push = (root) => { + if (root && !seen.has(root)) { + seen.add(root); + roots.push(root); + } + }; + + push(resolveStateRoot()); + for (const root of resolveLegacyRoots()) { + push(root); + } + + return roots; +} + +function computeStateSlugHash(workspaceRoot) { let canonicalWorkspaceRoot = workspaceRoot; try { canonicalWorkspaceRoot = fs.realpathSync.native(workspaceRoot); @@ -38,9 +93,90 @@ export function resolveStateDir(cwd) { const slugSource = path.basename(workspaceRoot) || "workspace"; const slug = slugSource.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "workspace"; const hash = createHash("sha256").update(canonicalWorkspaceRoot).digest("hex").slice(0, 16); - const pluginDataDir = process.env[PLUGIN_DATA_ENV]; - const stateRoot = pluginDataDir ? path.join(pluginDataDir, "state") : FALLBACK_STATE_ROOT_DIR; - return path.join(stateRoot, `${slug}-${hash}`); + return `${slug}-${hash}`; +} + +export function resolveStateDir(cwd) { + const workspaceRoot = resolveWorkspaceRoot(cwd); + return path.join(resolveStateRoot(), computeStateSlugHash(workspaceRoot)); +} + +export function collectWorkspaceJobsAcrossRoots(cwd) { + const workspaceRoot = resolveWorkspaceRoot(cwd); + const slugHash = computeStateSlugHash(workspaceRoot); + const merged = new Map(); + + for (const stateRoot of collectCandidateStateRoots()) { + const stateFile = path.join(stateRoot, slugHash, STATE_FILE_NAME); + if (!fs.existsSync(stateFile)) { + continue; + } + try { + const parsed = JSON.parse(fs.readFileSync(stateFile, "utf8")); + const jobs = Array.isArray(parsed.jobs) ? parsed.jobs : []; + for (const job of jobs) { + if (!job?.id) { + continue; + } + const existing = merged.get(job.id); + if (!existing) { + merged.set(job.id, job); + continue; + } + const existingUpdated = String(existing.updatedAt ?? ""); + const candidateUpdated = String(job.updatedAt ?? ""); + if (candidateUpdated.localeCompare(existingUpdated) > 0) { + merged.set(job.id, job); + } + } + } catch { + // Skip corrupted state files. + } + } + + return [...merged.values()]; +} + +export function findJobByIdAcrossWorkspaces(jobId) { + if (!jobId) { + return null; + } + + for (const stateRoot of collectCandidateStateRoots()) { + if (!fs.existsSync(stateRoot)) { + continue; + } + + let entries; + try { + entries = fs.readdirSync(stateRoot, { withFileTypes: true }); + } catch { + continue; + } + + for (const entry of entries) { + if (!entry.isDirectory()) { + continue; + } + const stateDir = path.join(stateRoot, entry.name); + const stateFile = path.join(stateDir, STATE_FILE_NAME); + if (!fs.existsSync(stateFile)) { + continue; + } + try { + const parsed = JSON.parse(fs.readFileSync(stateFile, "utf8")); + const jobs = Array.isArray(parsed.jobs) ? parsed.jobs : []; + const job = jobs.find((entry) => entry.id === jobId); + if (job) { + return { stateDir, job }; + } + } catch { + // Skip corrupted state files + } + } + } + + return null; } export function resolveStateFile(cwd) { @@ -109,6 +245,7 @@ export function saveState(cwd, state) { } removeJobFile(resolveJobFile(cwd, job.id)); removeFileIfExists(job.logFile); + removeFileIfExists(job.eventFile); } fs.writeFileSync(resolveStateFile(cwd), `${JSON.stringify(nextState, null, 2)}\n`, "utf8"); @@ -185,6 +322,11 @@ export function resolveJobLogFile(cwd, jobId) { return path.join(resolveJobsDir(cwd), `${jobId}.log`); } +export function resolveJobEventFile(cwd, jobId) { + ensureStateDir(cwd); + return path.join(resolveJobsDir(cwd), `${jobId}.events.jsonl`); +} + export function resolveJobFile(cwd, jobId) { ensureStateDir(cwd); return path.join(resolveJobsDir(cwd), `${jobId}.json`); diff --git a/plugins/codex/scripts/lib/tracked-jobs.mjs b/plugins/codex/scripts/lib/tracked-jobs.mjs index 90286901..e63d9f9b 100644 --- a/plugins/codex/scripts/lib/tracked-jobs.mjs +++ b/plugins/codex/scripts/lib/tracked-jobs.mjs @@ -1,7 +1,9 @@ import fs from "node:fs"; +import path from "node:path"; import process from "node:process"; -import { readJobFile, resolveJobFile, resolveJobLogFile, upsertJob, writeJobFile } from "./state.mjs"; +import { emitEvent, EVENT_TYPES } from "./event-stream.mjs"; +import { readJobFile, resolveJobFile, resolveJobLogFile, resolveJobsDir, upsertJob, writeJobFile } from "./state.mjs"; export const SESSION_ID_ENV = "CODEX_COMPANION_SESSION_ID"; @@ -48,6 +50,22 @@ export function appendLogBlock(logFile, title, body) { fs.appendFileSync(logFile, `\n[${nowIso()}] ${title}\n${String(body).trimEnd()}\n`, "utf8"); } +export function resolveSignalFile(jobsDir, jobId) { + return path.join(jobsDir, `${jobId}.done`); +} + +export function writeCompletionSignalFile(jobsDir, jobId, status, summary) { + const signalFile = resolveSignalFile(jobsDir, jobId); + const safeStatus = status === "completed" ? "completed" : "failed"; + const line = `[${nowIso()}] ${safeStatus} ${jobId}${summary ? ` ${summary}` : ""}`; + try { + fs.writeFileSync(signalFile, `${line}\n`, "utf8"); + } catch { + // Signal file is best-effort; do not fail the job if it cannot be written. + } + return signalFile; +} + export function createJobLogFile(workspaceRoot, jobId, title) { const logFile = resolveJobLogFile(workspaceRoot, jobId); fs.writeFileSync(logFile, "", "utf8"); @@ -114,8 +132,19 @@ export function createJobProgressUpdater(workspaceRoot, jobId) { }; } -export function createProgressReporter({ stderr = false, logFile = null, onEvent = null } = {}) { - if (!stderr && !logFile && !onEvent) { +function inferEventStreamType(event) { + const title = event.logTitle ?? ""; + if (/reasoning summary/i.test(title)) { + return EVENT_TYPES.REASONING; + } + if (/message$/i.test(title) || /review output/i.test(title)) { + return EVENT_TYPES.MESSAGE; + } + return EVENT_TYPES.PHASE; +} + +export function createProgressReporter({ stderr = false, logFile = null, onEvent = null, eventStream = null } = {}) { + if (!stderr && !logFile && !onEvent && !eventStream) { return null; } @@ -127,6 +156,26 @@ export function createProgressReporter({ stderr = false, logFile = null, onEvent } appendLogLine(logFile, event.message); appendLogBlock(logFile, event.logTitle, event.logBody); + if (eventStream) { + const type = inferEventStreamType(event); + const data = { phase: event.phase }; + if (event.message) { + data.message = event.message; + } + if (event.threadId) { + data.threadId = event.threadId; + } + if (event.turnId) { + data.turnId = event.turnId; + } + if (event.logTitle) { + data.logTitle = event.logTitle; + } + if (event.logBody) { + data.logBody = event.logBody; + } + emitEvent(eventStream, type, data); + } onEvent?.(event); }; } @@ -146,11 +195,14 @@ export async function runTrackedJob(job, runner, options = {}) { startedAt: nowIso(), phase: "starting", pid: process.pid, - logFile: options.logFile ?? job.logFile ?? null + logFile: options.logFile ?? job.logFile ?? null, + eventFile: options.eventFile ?? job.eventFile ?? null }; writeJobFile(job.workspaceRoot, job.id, runningRecord); upsertJob(job.workspaceRoot, runningRecord); + const jobsDir = resolveJobsDir(job.workspaceRoot); + try { const execution = await runner(); const completionStatus = execution.exitStatus === 0 ? "completed" : "failed"; @@ -177,6 +229,7 @@ export async function runTrackedJob(job, runner, options = {}) { completedAt }); appendLogBlock(options.logFile ?? job.logFile ?? null, "Final output", execution.rendered); + writeCompletionSignalFile(jobsDir, job.id, completionStatus, execution.summary); return execution; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); @@ -199,6 +252,7 @@ export async function runTrackedJob(job, runner, options = {}) { errorMessage, completedAt }); + writeCompletionSignalFile(jobsDir, job.id, "failed", errorMessage); throw error; } } diff --git a/plugins/codex/scripts/lib/workspace.mjs b/plugins/codex/scripts/lib/workspace.mjs index 89a0060b..5e203e83 100644 --- a/plugins/codex/scripts/lib/workspace.mjs +++ b/plugins/codex/scripts/lib/workspace.mjs @@ -1,4 +1,8 @@ +import fs from "node:fs"; +import path from "node:path"; + import { ensureGitRepository } from "./git.mjs"; +import { runCommand } from "./process.mjs"; export function resolveWorkspaceRoot(cwd) { try { @@ -7,3 +11,76 @@ export function resolveWorkspaceRoot(cwd) { return cwd; } } + +const WORKTREE_DIR = ".claude/worktrees"; +const WORKTREE_BRANCH_PREFIX = "codex-rescue"; +const WORKTREE_PROMPT_MAX_LENGTH = 32; + +export function resolveWorktreePath(sourceRoot, jobId) { + return path.join(sourceRoot, WORKTREE_DIR, jobId); +} + +export function generateWorktreeBranch(jobId, prompt) { + if (!prompt || !prompt.trim()) { + return `${WORKTREE_BRANCH_PREFIX}/${jobId}`; + } + + const normalized = prompt + .trim() + .toLowerCase() + .replace(/[^a-z0-9\s-]/g, "") + .replace(/\s+/g, "-") + .replace(/-+/g, "-") + .replace(/^-|-$/g, ""); + + if (!normalized) { + return `${WORKTREE_BRANCH_PREFIX}/${jobId}`; + } + + const truncated = normalized.slice(0, WORKTREE_PROMPT_MAX_LENGTH).replace(/-$/, ""); + return `${WORKTREE_BRANCH_PREFIX}/${jobId}-${truncated}`; +} + +export function createWorktree(sourceRoot, jobId, prompt) { + const worktreePath = resolveWorktreePath(sourceRoot, jobId); + const worktreeBranch = generateWorktreeBranch(jobId, prompt); + + // Get current branch as base + const baseResult = runCommand("git", ["branch", "--show-current"], { cwd: sourceRoot }); + const baseBranch = baseResult.status === 0 && baseResult.stdout.trim() + ? baseResult.stdout.trim() + : "HEAD"; + + // Check if worktree path already exists + if (fs.existsSync(worktreePath)) { + // Check if it's already a worktree + const listResult = runCommand("git", ["worktree", "list", "--porcelain"], { cwd: sourceRoot }); + if (listResult.status === 0 && listResult.stdout.includes(worktreePath)) { + // Reuse existing worktree + return { worktreePath, worktreeBranch, worktreeBaseBranch: baseBranch }; + } + // Path exists but not as worktree - error + throw new Error( + `Worktree path already exists: ${worktreePath}\n` + + `Please remove it manually or use a different job ID.` + ); + } + + // Create parent directory + fs.mkdirSync(path.dirname(worktreePath), { recursive: true }); + + // Create worktree + const createResult = runCommand( + "git", + ["worktree", "add", "-b", worktreeBranch, worktreePath], + { cwd: sourceRoot } + ); + + if (createResult.status !== 0) { + throw new Error( + `Failed to create worktree: ${createResult.stderr || createResult.stdout}` + ); + } + + return { worktreePath, worktreeBranch, worktreeBaseBranch: baseBranch }; +} diff --git a/scripts/pre-push-check.mjs b/scripts/pre-push-check.mjs new file mode 100644 index 00000000..7be7a31d --- /dev/null +++ b/scripts/pre-push-check.mjs @@ -0,0 +1,245 @@ +#!/usr/bin/env node + +/** + * Pre-push hook: validates CHANGELOG, version bump, and README consistency. + * + * Runs via git pre-push hook. Analyzes commits being pushed and checks: + * 1. If package.json version changed → CHANGELOG.md must contain the new version + * 2. If plugin source files changed but version didn't bump → suggest bump type + * 3. Auto-detects suggested bump type (major / minor / patch) from file changes + */ + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; + +const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), ".."); + +// Files that constitute user-facing functionality +const SOURCE_GLOBS = [ + "plugins/codex/scripts/", + "plugins/codex/commands/", + "plugins/codex/agents/", + "plugins/codex/skills/", + "plugins/codex/hooks/", + "plugins/codex/prompts/" +]; + +// Files that trigger minor bump (new modules, not new commands) +const MINOR_INDICATORS = [ + "plugins/codex/scripts/lib/", + "plugins/codex/skills/", + "plugins/codex/agents/", + "plugins/codex/hooks/" +]; + +// Files that trigger major bump (new user-facing commands) +const MAJOR_INDICATORS = [ + "plugins/codex/commands/" +]; + +function git(args, options = {}) { + return spawnSync("git", args, { + cwd: options.cwd ?? ROOT, + encoding: "utf8" + }); +} + +function getUpstream() { + const branch = git(["rev-parse", "--abbrev-ref", "HEAD"]).stdout?.trim(); + if (!branch || branch === "HEAD") { + return null; + } + const upstream = git(["rev-parse", "--abbrev-ref", `${branch}@{upstream}`]).stdout?.trim(); + return upstream || null; +} + +function getPushRange(upstream) { + // If no upstream, compare against origin/main + const base = upstream || "origin/main"; + return `${base}..HEAD`; +} + +function getCommitMessages(range) { + const result = git(["log", range, "--format=%s"]); + if (result.status !== 0) { + return []; + } + return result.stdout.trim().split("\n").filter(Boolean); +} + +function getChangedFiles(range) { + const result = git(["diff", "--name-only", range]); + if (result.status !== 0) { + return []; + } + return result.stdout.trim().split("\n").filter(Boolean); +} + +function getVersionAtRef(ref) { + const result = git(["show", `${ref}:package.json`]); + if (result.status !== 0) { + return null; + } + try { + return JSON.parse(result.stdout).version; + } catch { + return null; + } +} + +function getCurrentVersion() { + const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, "package.json"), "utf8")); + return pkg.version; +} + +function isSourceFile(file) { + return SOURCE_GLOBS.some((glob) => file.startsWith(glob)); +} + +/** + * Detect suggested bump type from file changes and commit messages. + * + * - major: new commands (breaking API surface) or BREAKING CHANGE in commits + * - minor: new lib modules, skills, agents, hooks, or feat: commits + * - patch: bug fixes, docs, tests, refactors + */ +function detectBumpType(files, commits) { + const hasBreakingCommit = commits.some( + (msg) => /BREAKING CHANGE/i.test(msg) || /^[a-z]+(\(.+\))?!:/.test(msg) + ); + const hasNewCommand = files.some((file) => + file.startsWith("plugins/codex/commands/") && !file.endsWith(".md") + ? false // only new .md command files count + : file.startsWith("plugins/codex/commands/") + ); + + if (hasBreakingCommit || hasNewCommand) { + return { type: "major", reason: hasBreakingCommit ? "BREAKING CHANGE in commit" : "new command file added" }; + } + + const hasMinorChange = files.some((file) => MINOR_INDICATORS.some((prefix) => file.startsWith(prefix))); + const hasFeatCommit = commits.some((msg) => /^feat(\(.+\))?:/i.test(msg)); + + if (hasMinorChange || hasFeatCommit) { + return { type: "minor", reason: hasFeatCommit ? "feat: commit found" : "new module/skill/agent/hook added" }; + } + + return { type: "patch", reason: "bug fix, docs, or refactor" }; +} + +function parseVersion(v) { + const match = v.match(/^(\d+)\.(\d+)\.(\d+)/); + if (!match) { + return null; + } + return { major: +match[1], minor: +match[2], patch: +match[3] }; +} + +function versionDiff(oldV, newV) { + const old = parseVersion(oldV); + const cur = parseVersion(newV); + if (!old || !cur) { + return null; + } + if (cur.major > old.major) return "major"; + if (cur.minor > old.minor) return "minor"; + if (cur.patch > old.patch) return "patch"; + return null; // same or downgrade +} + +function checkChangelogHasVersion(version) { + const changelogPath = path.join(ROOT, "CHANGELOG.md"); + if (!fs.existsSync(changelogPath)) { + return false; + } + const content = fs.readFileSync(changelogPath, "utf8"); + // Match ## [1.2.3] or ## 1.2.3 + const pattern = new RegExp(`^##\\s+\\[?${version.replace(/\./g, "\\.")}\\]?`, "m"); + return pattern.test(content); +} + +function main() { + const upstream = getUpstream(); + const range = getPushRange(upstream); + + const commits = getCommitMessages(range); + if (commits.length === 0) { + // Nothing to push + process.exit(0); + } + + const files = getChangedFiles(range); + const baseRef = upstream || "origin/main"; + const baseVersion = getVersionAtRef(baseRef); + const currentVersion = getCurrentVersion(); + const versionChanged = baseVersion !== currentVersion; + const changelogUpdated = checkChangelogHasVersion(currentVersion); + const readmeChanged = files.includes("README.md") || files.includes("README.zh-CN.md"); + const sourceChanged = files.some(isSourceFile); + + const suggested = detectBumpType(files, commits); + const actualBump = versionChanged ? versionDiff(baseVersion, currentVersion) : null; + + const errors = []; + const warnings = []; + + if (versionChanged) { + // Version was bumped — must have matching CHANGELOG entry + if (!changelogUpdated) { + errors.push( + `Version bumped to ${currentVersion} but CHANGELOG.md has no entry for this version.\n` + + ` Add a ## [${currentVersion}] section to CHANGELOG.md.` + ); + } + // Check bump type matches suggested + if (actualBump && actualBump !== suggested.type) { + warnings.push( + `Version bump is ${actualBump} (${baseVersion} → ${currentVersion}), ` + + `but changes suggest ${suggested.type} (${suggested.reason}).` + ); + } + } else if (sourceChanged) { + // Source files changed but version not bumped + errors.push( + `Plugin source files changed but version was not bumped.\n` + + ` Current version: ${currentVersion}\n` + + ` Suggested bump: ${suggested.type} (${suggested.reason})\n` + + ` Run: node scripts/bump-version.mjs ` + ); + } + + if (sourceChanged && !readmeChanged && versionChanged) { + warnings.push( + `Version was bumped but README.md was not updated.\n` + + ` Consider updating documentation for user-facing changes.` + ); + } + + // Output + if (errors.length > 0 || warnings.length > 0) { + process.stderr.write("\n Pre-push checks:\n\n"); + } + + for (const err of errors) { + process.stderr.write(` ✗ ${err}\n\n`); + } + for (const warn of warnings) { + process.stderr.write(` ⚠ ${warn}\n\n`); + } + + if (errors.length > 0) { + process.stderr.write(` Push blocked. Fix the issues above, then push again.\n`); + process.stderr.write(` To bypass: git push --no-verify\n\n`); + process.exit(1); + } + + if (warnings.length > 0) { + process.stderr.write(` Push proceeding with warnings.\n\n`); + } + + process.exit(0); +} + +main(); diff --git a/tests/codex-config.test.mjs b/tests/codex-config.test.mjs new file mode 100644 index 00000000..0fc1f2c5 --- /dev/null +++ b/tests/codex-config.test.mjs @@ -0,0 +1,125 @@ +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +import { readSandboxModeFromFile, resolveCodexSandboxMode, VALID_SANDBOX_MODES } from "../plugins/codex/scripts/lib/codex-config.mjs"; + +describe("readSandboxModeFromFile", () => { + let tempDir; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "codex-config-test-")); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("returns null for non-existent file", () => { + const result = readSandboxModeFromFile(join(tempDir, "does-not-exist.toml")); + assert.equal(result, null); + }); + + it("reads valid sandbox_mode values", () => { + for (const mode of VALID_SANDBOX_MODES) { + const file = join(tempDir, `config-${mode}.toml`); + writeFileSync(file, `sandbox_mode = "${mode}"\n`); + assert.equal(readSandboxModeFromFile(file), mode); + } + }); + + it("returns null for invalid sandbox_mode value", () => { + const file = join(tempDir, "config.toml"); + writeFileSync(file, 'sandbox_mode = "invalid-mode"\n'); + assert.equal(readSandboxModeFromFile(file), null); + }); + + it("handles whitespace and comments", () => { + const file = join(tempDir, "config.toml"); + writeFileSync(file, ' sandbox_mode = "danger-full-access" # full access\n'); + assert.equal(readSandboxModeFromFile(file), "danger-full-access"); + }); + + it("ignores commented-out sandbox_mode", () => { + const file = join(tempDir, "config.toml"); + writeFileSync(file, '# sandbox_mode = "danger-full-access"\n'); + assert.equal(readSandboxModeFromFile(file), null); + }); + + it("handles file with other config values", () => { + const file = join(tempDir, "config.toml"); + writeFileSync(file, [ + 'model = "gpt-5.4-mini"', + 'model_reasoning_effort = "high"', + 'sandbox_mode = "workspace-write"', + 'network_access = true' + ].join("\n")); + assert.equal(readSandboxModeFromFile(file), "workspace-write"); + }); +}); + +describe("resolveCodexSandboxMode", () => { + let tempDir; + let originalHome; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "codex-config-resolve-test-")); + originalHome = process.env.HOME; + }); + + afterEach(() => { + process.env.HOME = originalHome; + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("returns null when no config files exist", () => { + process.env.HOME = join(tempDir, "empty-home"); + mkdirSync(process.env.HOME, { recursive: true }); + const result = resolveCodexSandboxMode(tempDir); + assert.equal(result, null); + }); + + it("reads from user-level config when project config is absent", () => { + process.env.HOME = tempDir; + const userCodexDir = join(tempDir, ".codex"); + mkdirSync(userCodexDir, { recursive: true }); + writeFileSync(join(userCodexDir, "config.toml"), 'sandbox_mode = "read-only"\n'); + + const result = resolveCodexSandboxMode(join(tempDir, "workspace")); + assert.equal(result, "read-only"); + }); + + it("prefers project-level config over user-level", () => { + process.env.HOME = tempDir; + + const userCodexDir = join(tempDir, ".codex"); + mkdirSync(userCodexDir, { recursive: true }); + writeFileSync(join(userCodexDir, "config.toml"), 'sandbox_mode = "read-only"\n'); + + const workspaceRoot = join(tempDir, "workspace"); + const projectCodexDir = join(workspaceRoot, ".codex"); + mkdirSync(projectCodexDir, { recursive: true }); + writeFileSync(join(projectCodexDir, "config.toml"), 'sandbox_mode = "danger-full-access"\n'); + + const result = resolveCodexSandboxMode(workspaceRoot); + assert.equal(result, "danger-full-access"); + }); + + it("falls back to user-level when project config has invalid value", () => { + process.env.HOME = tempDir; + + const userCodexDir = join(tempDir, ".codex"); + mkdirSync(userCodexDir, { recursive: true }); + writeFileSync(join(userCodexDir, "config.toml"), 'sandbox_mode = "workspace-write"\n'); + + const workspaceRoot = join(tempDir, "workspace"); + const projectCodexDir = join(workspaceRoot, ".codex"); + mkdirSync(projectCodexDir, { recursive: true }); + writeFileSync(join(projectCodexDir, "config.toml"), 'sandbox_mode = "invalid"\n'); + + const result = resolveCodexSandboxMode(workspaceRoot); + assert.equal(result, "workspace-write"); + }); +}); diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 3724ffa4..fcf1567c 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -75,6 +75,7 @@ test("continue is not exposed as a user-facing command", () => { assert.deepEqual(commandFiles, [ "adversarial-review.md", "cancel.md", + "observe.md", "rescue.md", "result.md", "review.md", diff --git a/tests/event-stream.test.mjs b/tests/event-stream.test.mjs new file mode 100644 index 00000000..48cc9dbe --- /dev/null +++ b/tests/event-stream.test.mjs @@ -0,0 +1,109 @@ +import { mkdtempSync, readFileSync, existsSync, rmSync, chmodSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +import { createEventStream, emitEvent, closeEventStream, EVENT_TYPES } from "../plugins/codex/scripts/lib/event-stream.mjs"; + +describe("createEventStream", () => { + let tempDir; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "event-stream-test-")); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("creates an empty .events.jsonl file", () => { + const stream = createEventStream("job-123", tempDir); + assert.ok(stream.eventFile.endsWith("job-123.events.jsonl")); + assert.ok(existsSync(stream.eventFile)); + assert.equal(readFileSync(stream.eventFile, "utf8"), ""); + }); + + it("returns a stream object with eventFile and jobId", () => { + const stream = createEventStream("job-abc", tempDir); + assert.equal(stream.jobId, "job-abc"); + assert.ok(typeof stream.eventFile === "string"); + }); +}); + +describe("emitEvent", () => { + let tempDir; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "event-stream-test-")); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("appends a JSON line to the event file", () => { + const stream = createEventStream("job-1", tempDir); + emitEvent(stream, EVENT_TYPES.PHASE, { phase: "starting", message: "Thread ready" }); + + const content = readFileSync(stream.eventFile, "utf8").trim(); + const parsed = JSON.parse(content); + assert.equal(parsed.type, "phase"); + assert.equal(parsed.phase, "starting"); + assert.equal(parsed.message, "Thread ready"); + assert.ok(parsed.t); // ISO timestamp + }); + + it("appends multiple events in order", () => { + const stream = createEventStream("job-2", tempDir); + emitEvent(stream, EVENT_TYPES.PHASE, { phase: "starting" }); + emitEvent(stream, EVENT_TYPES.TOOL_CALL, { tool: "Read", path: "src/foo.ts" }); + emitEvent(stream, EVENT_TYPES.TOOL_DONE, { tool: "Read" }); + + const lines = readFileSync(stream.eventFile, "utf8").trim().split("\n"); + assert.equal(lines.length, 3); + assert.equal(JSON.parse(lines[0]).type, "phase"); + assert.equal(JSON.parse(lines[1]).type, "tool_call"); + assert.equal(JSON.parse(lines[2]).type, "tool_done"); + }); + + it("silently ignores write failures when stream has no eventFile", () => { + emitEvent({ eventFile: null }, EVENT_TYPES.PHASE, { phase: "test" }); + // No error thrown + }); + + it("silently ignores write failures on read-only directory", () => { + const stream = createEventStream("job-3", tempDir); + // Remove write permission on the file + chmodSync(stream.eventFile, 0o444); + // Should not throw + emitEvent(stream, EVENT_TYPES.PHASE, { phase: "test" }); + // Restore permissions for cleanup + chmodSync(stream.eventFile, 0o644); + }); +}); + +describe("closeEventStream", () => { + it("is a no-op that does not throw", () => { + assert.doesNotThrow(() => closeEventStream(null)); + assert.doesNotThrow(() => closeEventStream({ eventFile: "/tmp/fake" })); + }); +}); + +describe("EVENT_TYPES", () => { + it("has all expected event type constants", () => { + assert.equal(EVENT_TYPES.PHASE, "phase"); + assert.equal(EVENT_TYPES.TOOL_CALL, "tool_call"); + assert.equal(EVENT_TYPES.TOOL_DONE, "tool_done"); + assert.equal(EVENT_TYPES.COMMAND, "command"); + assert.equal(EVENT_TYPES.COMMAND_DONE, "command_done"); + assert.equal(EVENT_TYPES.FILE_CHANGE, "file_change"); + assert.equal(EVENT_TYPES.MESSAGE, "message"); + assert.equal(EVENT_TYPES.REASONING, "reasoning"); + assert.equal(EVENT_TYPES.COMPLETED, "completed"); + }); + + it("is frozen (immutable)", () => { + assert.ok(Object.isFrozen(EVENT_TYPES)); + }); +}); diff --git a/tests/helpers.mjs b/tests/helpers.mjs index 945ae0e7..81f4b3c7 100644 --- a/tests/helpers.mjs +++ b/tests/helpers.mjs @@ -4,6 +4,51 @@ import path from "node:path"; import process from "node:process"; import { spawnSync } from "node:child_process"; +// Guard against test runs polluting the real user plugin data dir. If +// CLAUDE_PLUGIN_DATA is unset or points outside the OS tmpdir (which is what +// happens when `npm test` is invoked from a Claude Code session that inherits +// the host plugin path), redirect it to a per-suite tmp dir so any state the +// companion script writes lands somewhere we can ignore. +const TMPDIR_REAL = (() => { + try { + return fs.realpathSync.native(os.tmpdir()); + } catch { + return os.tmpdir(); + } +})(); + +function isInsideTmpdir(target) { + if (!target) { + return false; + } + let resolved = target; + try { + resolved = fs.realpathSync.native(target); + } catch { + resolved = path.resolve(target); + } + const tmpdirWithSep = TMPDIR_REAL.endsWith(path.sep) ? TMPDIR_REAL : `${TMPDIR_REAL}${path.sep}`; + return resolved === TMPDIR_REAL || resolved.startsWith(tmpdirWithSep); +} + +if (!isInsideTmpdir(process.env.CLAUDE_PLUGIN_DATA)) { + process.env.CLAUDE_PLUGIN_DATA = fs.mkdtempSync(path.join(os.tmpdir(), "codex-plugin-test-suite-")); +} + +// Default to fully-isolated state scanning. Tests that want to verify the +// multi-root fallback behavior can override CODEX_COMPANION_LEGACY_ROOTS in +// their own setup. +if (process.env.CODEX_COMPANION_LEGACY_ROOTS == null) { + process.env.CODEX_COMPANION_LEGACY_ROOTS = ""; +} + +// Drop the session id inherited from the host Claude Code session. Otherwise +// status/result tests that seed fixture jobs without a sessionId hit the +// filterJobsForCurrentSession path and see an empty list. Tests that +// intentionally exercise session-scoped filtering set the env explicitly when +// spawning subprocesses. +delete process.env.CODEX_COMPANION_SESSION_ID; + export function makeTempDir(prefix = "codex-plugin-test-") { return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); } diff --git a/tests/job-control.test.mjs b/tests/job-control.test.mjs new file mode 100644 index 00000000..3b377724 --- /dev/null +++ b/tests/job-control.test.mjs @@ -0,0 +1,244 @@ +import fs from "node:fs"; +import path from "node:path"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +import { + buildSingleJobSnapshot, + buildStatusSnapshot, + resolveCancelableJob, + resolveResultJob +} from "../plugins/codex/scripts/lib/job-control.mjs"; +import { collectWorkspaceJobsAcrossRoots, resolveStateDir } from "../plugins/codex/scripts/lib/state.mjs"; + +describe("job-control cross-workspace fallback", () => { + let pluginDataDir; + let currentWorkspace; + let previousPluginData; + + beforeEach(() => { + pluginDataDir = mkdtempSync(path.join(tmpdir(), "job-control-cross-")); + currentWorkspace = mkdtempSync(path.join(tmpdir(), "job-control-cwd-")); + previousPluginData = process.env.CLAUDE_PLUGIN_DATA; + process.env.CLAUDE_PLUGIN_DATA = pluginDataDir; + }); + + afterEach(() => { + if (previousPluginData === undefined) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginData; + } + rmSync(pluginDataDir, { recursive: true, force: true }); + rmSync(currentWorkspace, { recursive: true, force: true }); + }); + + function writeRemoteWorkspaceJob(slug, job) { + const stateDir = path.join(pluginDataDir, "state", slug); + fs.mkdirSync(stateDir, { recursive: true }); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify({ version: 1, jobs: [job] }, null, 2)}\n`, + "utf8" + ); + return stateDir; + } + + it("buildSingleJobSnapshot falls back to cross-workspace by job id", () => { + const job = { + id: "task-abc-running", + status: "running", + workspaceRoot: "/some/other/repo", + logFile: path.join(pluginDataDir, "log.txt"), + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString() + }; + const stateDir = writeRemoteWorkspaceJob("remote-aaaaaaaaaaaaaaaa", job); + + const snapshot = buildSingleJobSnapshot(currentWorkspace, "task-abc-running"); + assert.equal(snapshot.crossWorkspace, true); + assert.equal(snapshot.crossWorkspaceStateDir, stateDir); + assert.equal(snapshot.workspaceRoot, "/some/other/repo"); + assert.equal(snapshot.job.id, "task-abc-running"); + assert.equal(snapshot.job.status, "running"); + }); + + it("buildSingleJobSnapshot still throws when job id is unknown anywhere", () => { + assert.throws( + () => buildSingleJobSnapshot(currentWorkspace, "task-nope"), + /No job found for "task-nope"/ + ); + }); + + it("resolveResultJob falls back to cross-workspace finished job", () => { + const job = { + id: "task-done-1", + status: "completed", + workspaceRoot: "/some/other/repo", + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString() + }; + writeRemoteWorkspaceJob("remote-bbbbbbbbbbbbbbbb", job); + + const result = resolveResultJob(currentWorkspace, "task-done-1"); + assert.equal(result.crossWorkspace, true); + assert.equal(result.workspaceRoot, "/some/other/repo"); + assert.equal(result.job.id, "task-done-1"); + }); + + it("resolveResultJob rejects cross-workspace job that is still running", () => { + const job = { + id: "task-still-running", + status: "running", + workspaceRoot: "/some/other/repo" + }; + writeRemoteWorkspaceJob("remote-cccccccccccccccc", job); + + assert.throws( + () => resolveResultJob(currentWorkspace, "task-still-running"), + /is still running in another workspace/ + ); + }); + + it("resolveCancelableJob falls back to cross-workspace running job by id", () => { + const job = { + id: "task-cancelable", + status: "running", + workspaceRoot: "/some/other/repo", + pid: 0 + }; + writeRemoteWorkspaceJob("remote-dddddddddddddddd", job); + + const result = resolveCancelableJob(currentWorkspace, "task-cancelable", { env: {} }); + assert.equal(result.crossWorkspace, true); + assert.equal(result.workspaceRoot, "/some/other/repo"); + assert.equal(result.job.id, "task-cancelable"); + }); + + it("resolveCancelableJob rejects a non-active cross-workspace match with a clear message", () => { + const job = { + id: "task-not-active", + status: "completed", + workspaceRoot: "/some/other/repo" + }; + writeRemoteWorkspaceJob("remote-eeeeeeeeeeeeeeee", job); + + assert.throws( + () => resolveCancelableJob(currentWorkspace, "task-not-active", { env: {} }), + /Nothing to cancel/ + ); + }); + + it("resolveCancelableJob without reference still requires a local active job", () => { + assert.throws( + () => resolveCancelableJob(currentWorkspace, "", { env: {} }), + /No active Codex jobs to cancel/ + ); + }); +}); + +describe("multi-root state scan", () => { + let primaryDataDir; + let legacyRoot; + let currentWorkspace; + let previousPluginData; + let previousLegacyRoots; + + beforeEach(() => { + primaryDataDir = mkdtempSync(path.join(tmpdir(), "multi-root-primary-")); + legacyRoot = mkdtempSync(path.join(tmpdir(), "multi-root-legacy-")); + currentWorkspace = mkdtempSync(path.join(tmpdir(), "multi-root-cwd-")); + previousPluginData = process.env.CLAUDE_PLUGIN_DATA; + previousLegacyRoots = process.env.CODEX_COMPANION_LEGACY_ROOTS; + process.env.CLAUDE_PLUGIN_DATA = primaryDataDir; + process.env.CODEX_COMPANION_LEGACY_ROOTS = legacyRoot; + }); + + afterEach(() => { + if (previousPluginData === undefined) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginData; + } + if (previousLegacyRoots === undefined) { + delete process.env.CODEX_COMPANION_LEGACY_ROOTS; + } else { + process.env.CODEX_COMPANION_LEGACY_ROOTS = previousLegacyRoots; + } + rmSync(primaryDataDir, { recursive: true, force: true }); + rmSync(legacyRoot, { recursive: true, force: true }); + rmSync(currentWorkspace, { recursive: true, force: true }); + }); + + function writeStateAt(rootDir, slug, state) { + const stateDir = path.join(rootDir, slug); + fs.mkdirSync(stateDir, { recursive: true }); + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify(state, null, 2)}\n`, + "utf8" + ); + return stateDir; + } + + it("findJobByIdAcrossWorkspaces falls through to a legacy root", () => { + const job = { + id: "task-only-in-legacy", + status: "completed", + workspaceRoot: "/legacy/repo", + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString() + }; + const stateDir = writeStateAt(legacyRoot, "legacy-foo-0011223344556677", { + version: 1, + jobs: [job] + }); + + const snapshot = buildSingleJobSnapshot(currentWorkspace, "task-only-in-legacy"); + assert.equal(snapshot.crossWorkspace, true); + assert.equal(snapshot.crossWorkspaceStateDir, stateDir); + assert.equal(snapshot.job.id, "task-only-in-legacy"); + }); + + it("buildStatusSnapshot --all merges jobs for the same workspace across roots", () => { + const primaryStateDir = resolveStateDir(currentWorkspace); + const slug = path.basename(primaryStateDir); + + writeStateAt(path.join(primaryDataDir, "state"), slug, { + version: 1, + jobs: [ + { + id: "task-primary", + status: "completed", + createdAt: "2026-05-22T10:00:00.000Z", + updatedAt: "2026-05-22T10:00:00.000Z" + } + ] + }); + + writeStateAt(legacyRoot, slug, { + version: 1, + jobs: [ + { + id: "task-legacy", + status: "completed", + createdAt: "2026-05-22T09:00:00.000Z", + updatedAt: "2026-05-22T09:00:00.000Z" + } + ] + }); + + const merged = collectWorkspaceJobsAcrossRoots(currentWorkspace) + .map((job) => job.id) + .sort(); + assert.deepEqual(merged, ["task-legacy", "task-primary"]); + + const snapshot = buildStatusSnapshot(currentWorkspace, { all: true, env: {} }); + const ids = [...snapshot.running, ...(snapshot.latestFinished ? [snapshot.latestFinished] : []), ...snapshot.recent] + .map((job) => job.id) + .sort(); + assert.deepEqual(ids, ["task-legacy", "task-primary"]); + }); +}); diff --git a/tests/observe.test.mjs b/tests/observe.test.mjs new file mode 100644 index 00000000..b6102229 --- /dev/null +++ b/tests/observe.test.mjs @@ -0,0 +1,288 @@ +import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; + +import { handleObserveCommand, handleObserveSpawn, readEventsFromOffset, renderEvent } from "../plugins/codex/scripts/lib/observe.mjs"; +import { EVENT_TYPES } from "../plugins/codex/scripts/lib/event-stream.mjs"; +import { findJobByIdAcrossWorkspaces } from "../plugins/codex/scripts/lib/state.mjs"; + +describe("readEventsFromOffset", () => { + let tempDir; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "observe-test-")); + }); + + afterEach(() => { + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("returns empty events for missing file", () => { + const result = readEventsFromOffset(join(tempDir, "missing.jsonl"), 0); + assert.deepEqual(result.events, []); + assert.equal(result.newOffset, 0); + }); + + it("returns empty events for empty file", () => { + const file = join(tempDir, "empty.jsonl"); + writeFileSync(file, ""); + const result = readEventsFromOffset(file, 0); + assert.deepEqual(result.events, []); + assert.equal(result.newOffset, 0); + }); + + it("parses all lines from offset 0", () => { + const file = join(tempDir, "events.jsonl"); + const line1 = JSON.stringify({ t: "2026-01-01", type: "phase", phase: "starting" }); + const line2 = JSON.stringify({ t: "2026-01-01", type: "message", text: "hello" }); + writeFileSync(file, `${line1}\n${line2}\n`); + + const result = readEventsFromOffset(file, 0); + assert.equal(result.events.length, 2); + assert.equal(result.events[0].type, "phase"); + assert.equal(result.events[1].type, "message"); + assert.ok(result.newOffset > 0); + }); + + it("reads only new content from given offset", () => { + const file = join(tempDir, "events.jsonl"); + const line1 = JSON.stringify({ t: "2026-01-01", type: "phase", phase: "starting" }); + writeFileSync(file, `${line1}\n`); + + const first = readEventsFromOffset(file, 0); + assert.equal(first.events.length, 1); + + const line2 = JSON.stringify({ t: "2026-01-01", type: "tool_call", tool: "Read" }); + writeFileSync(file, `${line1}\n${line2}\n`); + + const second = readEventsFromOffset(file, first.newOffset); + assert.equal(second.events.length, 1); + assert.equal(second.events[0].type, "tool_call"); + }); + + it("skips malformed lines without throwing", () => { + const file = join(tempDir, "events.jsonl"); + const valid = JSON.stringify({ t: "2026-01-01", type: "phase", phase: "done" }); + writeFileSync(file, `not-json\n${valid}\n{broken\n`); + + const result = readEventsFromOffset(file, 0); + assert.equal(result.events.length, 1); + assert.equal(result.events[0].type, "phase"); + }); +}); + +describe("renderEvent", () => { + it("renders phase events with spinner and color", () => { + const output = renderEvent({ type: EVENT_TYPES.PHASE, phase: "starting", message: "Thread ready" }); + assert.ok(output.includes("starting")); + assert.ok(output.includes("Thread ready")); + }); + + it("renders tool_call events", () => { + const output = renderEvent({ type: EVENT_TYPES.TOOL_CALL, tool: "Read", path: "src/foo.ts" }); + assert.ok(output.includes("→")); + assert.ok(output.includes("Read")); + assert.ok(output.includes("src/foo.ts")); + }); + + it("renders tool_done events", () => { + const output = renderEvent({ type: EVENT_TYPES.TOOL_DONE, tool: "Read" }); + assert.ok(output.includes("✓")); + assert.ok(output.includes("completed")); + }); + + it("renders command events", () => { + const output = renderEvent({ type: EVENT_TYPES.COMMAND, cmd: "npm test" }); + assert.ok(output.includes("$")); + assert.ok(output.includes("npm test")); + }); + + it("renders command_done with exit 0 in green", () => { + const output = renderEvent({ type: EVENT_TYPES.COMMAND_DONE, cmd: "npm test", exit: 0 }); + assert.ok(output.includes("exit 0")); + assert.ok(output.includes("\x1b[32m")); // green + }); + + it("renders command_done with non-zero exit in red", () => { + const output = renderEvent({ type: EVENT_TYPES.COMMAND_DONE, cmd: "npm test", exit: 1 }); + assert.ok(output.includes("exit 1")); + assert.ok(output.includes("\x1b[31m")); // red + }); + + it("renders file_change events", () => { + const output = renderEvent({ type: EVENT_TYPES.FILE_CHANGE, path: "src/auth.ts", action: "modify" }); + assert.ok(output.includes("✎")); + assert.ok(output.includes("src/auth.ts")); + assert.ok(output.includes("modify")); + }); + + it("renders message events with border", () => { + const output = renderEvent({ type: EVENT_TYPES.MESSAGE, text: "Fixed the bug" }); + assert.ok(output.includes("│")); + assert.ok(output.includes("Fixed the bug")); + }); + + it("renders reasoning events with bullets", () => { + const output = renderEvent({ type: EVENT_TYPES.REASONING, sections: ["Step 1", "Step 2"] }); + assert.ok(output.includes("•")); + assert.ok(output.includes("Step 1")); + assert.ok(output.includes("Step 2")); + }); + + it("renders completed events with timestamp", () => { + const output = renderEvent({ type: EVENT_TYPES.COMPLETED, status: "success", t: "2026-05-20T15:42:33Z" }); + assert.ok(output.includes("●")); + assert.ok(output.includes("completed at")); + assert.ok(output.includes("2026-05-20T15:42:33Z")); + assert.ok(output.includes("\x1b[32m")); // green for success + }); + + it("renders completed failure events in red", () => { + const output = renderEvent({ type: EVENT_TYPES.COMPLETED, status: "failure", t: "2026-05-20T15:42:33Z" }); + assert.ok(output.includes("\x1b[31m")); // red for failure + }); + + it("returns empty string for empty message events", () => { + const output = renderEvent({ type: EVENT_TYPES.MESSAGE, text: "" }); + assert.equal(output, ""); + }); +}); + +describe("findJobByIdAcrossWorkspaces", () => { + let pluginDataDir; + let previousPluginData; + + beforeEach(() => { + pluginDataDir = mkdtempSync(join(tmpdir(), "observe-cross-ws-")); + previousPluginData = process.env.CLAUDE_PLUGIN_DATA; + process.env.CLAUDE_PLUGIN_DATA = pluginDataDir; + }); + + afterEach(() => { + if (previousPluginData === undefined) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginData; + } + rmSync(pluginDataDir, { recursive: true, force: true }); + }); + + function writeWorkspaceState(slug, state) { + const dir = join(pluginDataDir, "state", slug); + mkdirSync(dir, { recursive: true }); + writeFileSync(join(dir, "state.json"), `${JSON.stringify(state, null, 2)}\n`, "utf8"); + return dir; + } + + it("returns null when stateRoot does not exist", () => { + assert.equal(findJobByIdAcrossWorkspaces("task-abc"), null); + }); + + it("returns null for missing jobId", () => { + assert.equal(findJobByIdAcrossWorkspaces(""), null); + assert.equal(findJobByIdAcrossWorkspaces(null), null); + }); + + it("finds a job stored in a different workspace state file", () => { + const jobRecord = { + id: "task-mpgzdj45-hcr1o6", + status: "running", + eventFile: "/abs/path/events.jsonl" + }; + const expectedDir = writeWorkspaceState("security-planck-7a3129dd96b457cb", { + version: 1, + jobs: [jobRecord] + }); + + const result = findJobByIdAcrossWorkspaces("task-mpgzdj45-hcr1o6"); + assert.ok(result, "expected cross-workspace match"); + assert.equal(result.stateDir, expectedDir); + assert.equal(result.job.id, "task-mpgzdj45-hcr1o6"); + assert.equal(result.job.eventFile, "/abs/path/events.jsonl"); + }); + + it("returns null when no workspace contains the jobId", () => { + writeWorkspaceState("other-1234567890abcdef", { + version: 1, + jobs: [{ id: "task-other", status: "completed" }] + }); + assert.equal(findJobByIdAcrossWorkspaces("task-missing"), null); + }); + + it("skips corrupted state.json files instead of throwing", () => { + const corruptedDir = join(pluginDataDir, "state", "corrupt-aaaaaaaaaaaaaaaa"); + mkdirSync(corruptedDir, { recursive: true }); + writeFileSync(join(corruptedDir, "state.json"), "{not valid json", "utf8"); + + writeWorkspaceState("good-bbbbbbbbbbbbbbbb", { + version: 1, + jobs: [{ id: "task-good", status: "completed" }] + }); + + const result = findJobByIdAcrossWorkspaces("task-good"); + assert.ok(result); + assert.equal(result.job.id, "task-good"); + }); +}); + +describe("handleObserveCommand --spawn", () => { + let tempDir; + let originalTmux; + let originalWrite; + let captured; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "observe-spawn-")); + originalTmux = process.env.TMUX; + delete process.env.TMUX; + originalWrite = process.stdout.write.bind(process.stdout); + captured = ""; + process.stdout.write = (chunk) => { + captured += String(chunk); + return true; + }; + }); + + afterEach(() => { + process.stdout.write = originalWrite; + if (originalTmux === undefined) { + delete process.env.TMUX; + } else { + process.env.TMUX = originalTmux; + } + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("prints fallback hint when not inside tmux", async () => { + await handleObserveCommand(["--spawn", "--cwd", tempDir, "task-abc"]); + assert.match(captured, /Not running inside.*tmux/); + assert.match(captured, /Open a new terminal/); + assert.match(captured, /codex-companion\.mjs/); + assert.match(captured, /observe.*task-abc/); + }); + + it("includes the workspace cwd in the fallback hint", async () => { + await handleObserveCommand(["--spawn", "--cwd", tempDir]); + assert.ok(captured.includes(`cd ${tempDir}`)); + }); + + it("prints Automation permission message without copy-paste fallback", async () => { + await handleObserveSpawn({ + positionals: ["task-abc"], + options: { cwd: tempDir }, + workspaceRoot: tempDir, + spawner: () => ({ + spawned: false, + kind: "ghostty-mac", + reason: "automation-permission-denied", + error: "Automation permission needed for Ghostty" + }) + }); + + assert.match(captured, /Automation permission needed/); + assert.match(captured, /Ghostty/); + assert.doesNotMatch(captured, /Open a new terminal/); + }); +}); diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..9e3083a5 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -833,6 +833,50 @@ test("task --background enqueues a detached worker and exposes per-job status", assert.match(resultPayload.storedJob.rendered, /Handled the requested task/); }); +test("task --background writes a .done signal file on completion for Monitor-based notification", async () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir, "slow-task"); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const launched = run("node", [SCRIPT, "task", "--background", "--json", "investigate the failing test"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(launched.status, 0, launched.stderr); + const launchPayload = JSON.parse(launched.stdout); + assert.equal(launchPayload.status, "queued"); + assert.ok(launchPayload.signalFile, "launch payload must include signalFile"); + assert.ok(launchPayload.jobsDir, "launch payload must include jobsDir"); + assert.equal(launchPayload.signalFile, path.join(launchPayload.jobsDir, `${launchPayload.jobId}.done`)); + + // The signal file should not exist yet (task is still running). + assert.equal(fs.existsSync(launchPayload.signalFile), false, "signal file must not exist before completion"); + + // Wait for the background worker to finish. + const waitedStatus = run( + "node", + [SCRIPT, "status", launchPayload.jobId, "--wait", "--timeout-ms", "15000", "--json"], + { + cwd: repo, + env: buildEnv(binDir) + } + ); + assert.equal(waitedStatus.status, 0, waitedStatus.stderr); + const waitedPayload = JSON.parse(waitedStatus.stdout); + assert.equal(waitedPayload.job.status, "completed"); + + // The signal file should now exist and contain the completion marker. + await waitFor(() => fs.existsSync(launchPayload.signalFile)); + const signalContent = fs.readFileSync(launchPayload.signalFile, "utf8"); + assert.match(signalContent, /completed/); + assert.match(signalContent, new RegExp(launchPayload.jobId)); +}); + test("review rejects focus text because it is native-review only", () => { const repo = makeTempDir(); const binDir = makeTempDir(); diff --git a/tests/spawner.test.mjs b/tests/spawner.test.mjs new file mode 100644 index 00000000..7c3aa919 --- /dev/null +++ b/tests/spawner.test.mjs @@ -0,0 +1,555 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { + detectTerminal, + buildTmuxSplitArgs, + buildGhosttyMacArgs, + buildIterm2MacArgs, + composeShellInvocation, + discoverCallerTty, + spawnObserverInTerminal +} from "../plugins/codex/scripts/lib/spawner.mjs"; + +function scriptFromArgs(args) { + const lines = []; + for (let i = 0; i < args.length; i += 2) { + assert.equal(args[i], "-e"); + lines.push(args[i + 1]); + } + return lines.join("\n"); +} + +describe("detectTerminal", () => { + it("detects tmux when $TMUX is set", () => { + const result = detectTerminal({ TMUX: "/tmp/tmux-1000/default,1234,0" }); + assert.equal(result.kind, "tmux"); + }); + + it("returns none when $TMUX is unset", () => { + const result = detectTerminal({}); + assert.equal(result.kind, "none"); + }); + + it("returns none when $TMUX is empty string", () => { + const result = detectTerminal({ TMUX: "" }); + assert.equal(result.kind, "none"); + }); + + it("detects ghostty-mac on macOS Ghostty without tmux", () => { + const result = detectTerminal({ TERM_PROGRAM: "ghostty" }, "darwin"); + assert.equal(result.kind, "ghostty-mac"); + }); + + it("detects iterm2-mac on macOS iTerm2 without tmux", () => { + const result = detectTerminal({ TERM_PROGRAM: "iTerm.app" }, "darwin"); + assert.equal(result.kind, "iterm2-mac"); + }); + + it("returns none for mac terminal names on non-darwin platforms", () => { + assert.equal(detectTerminal({ TERM_PROGRAM: "ghostty" }, "linux").kind, "none"); + assert.equal(detectTerminal({ TERM_PROGRAM: "iTerm.app" }, "linux").kind, "none"); + }); +}); + +describe("Detection precedence", () => { + it("selects tmux before Ghostty when both signals are present", () => { + const result = detectTerminal({ TMUX: "x", TERM_PROGRAM: "ghostty" }, "darwin"); + assert.equal(result.kind, "tmux"); + }); + + it("selects tmux before iTerm2 when both signals are present", () => { + const result = detectTerminal({ TMUX: "x", TERM_PROGRAM: "iTerm.app" }, "darwin"); + assert.equal(result.kind, "tmux"); + }); +}); + +describe("buildTmuxSplitArgs", () => { + it("produces split-window -h with cwd and command", () => { + const args = buildTmuxSplitArgs({ + cwd: "/path/to/project", + command: "node /abs/companion.mjs observe abc123" + }); + assert.deepEqual(args, [ + "split-window", + "-h", + "-c", + "/path/to/project", + "node /abs/companion.mjs observe abc123" + ]); + }); +}); + +describe("spawnObserverInTerminal", () => { + it("invokes tmux when inside tmux and reports success", () => { + const calls = []; + const runner = (cmd, args) => { + calls.push({ cmd, args }); + return { status: 0 }; + }; + + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "node x observe", + env: { TMUX: "x" }, + runner + }); + + assert.equal(result.spawned, true); + assert.equal(result.kind, "tmux"); + assert.equal(calls.length, 1); + assert.equal(calls[0].cmd, "tmux"); + assert.deepEqual(calls[0].args, [ + "split-window", + "-h", + "-c", + "/p", + "node x observe" + ]); + }); + + it("reports failure when tmux exits non-zero", () => { + const runner = () => ({ status: 1 }); + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "node x observe", + env: { TMUX: "x" }, + runner + }); + + assert.equal(result.spawned, false); + assert.equal(result.kind, "tmux"); + assert.ok(result.error); + }); + + it("reports failure with error message when runner throws an error object", () => { + const runner = () => ({ status: null, error: new Error("tmux not installed") }); + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "node x observe", + env: { TMUX: "x" }, + runner + }); + + assert.equal(result.spawned, false); + assert.match(result.error, /tmux not installed/); + }); + + it("does not invoke runner when not inside tmux", () => { + let called = false; + const runner = () => { + called = true; + return { status: 0 }; + }; + + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "node x observe", + env: {}, + runner + }); + + assert.equal(called, false); + assert.equal(result.spawned, false); + assert.equal(result.kind, "none"); + }); + + it("invokes Ghostty through osascript with new-window-only flow", () => { + const calls = []; + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x' 'observe'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => "/dev/ttys123", + runner: (cmd, args, opts) => { + calls.push({ cmd, args, opts }); + return { status: 0 }; + } + }); + + assert.deepEqual(result, { spawned: true, kind: "ghostty-mac" }); + assert.equal(calls[0].cmd, "osascript"); + assert.deepEqual(calls[0].opts, { stdio: ["ignore", "ignore", "pipe"] }); + + const script = scriptFromArgs(calls[0].args); + assert.match(script, /tell application "Ghostty"/); + // Ghostty's terminal object has no `tty` property as of 1.3, so the + // implementation does not perform tty-based matching. + assert.doesNotMatch(script, /tty of t/); + assert.doesNotMatch(script, /repeat with t in terminals/); + // new window returns a window — input text must target a terminal. + assert.match(script, /set newWin to new window/); + assert.match(script, /set newTerm to terminal 1 of selected tab of newWin/); + assert.match(script, /input text "cd '\/p' && 'node' 'x' 'observe'\\n" to newTerm/); + }); + + it("invokes iTerm2 through osascript with nested-tabs traversal and new-window fallback", () => { + const calls = []; + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x' 'observe'", + env: { TERM_PROGRAM: "iTerm.app" }, + platform: "darwin", + discoverTty: () => "/dev/ttys456", + runner: (cmd, args, opts) => { + calls.push({ cmd, args, opts }); + return { status: 0 }; + } + }); + + assert.deepEqual(result, { spawned: true, kind: "iterm2-mac" }); + assert.equal(calls[0].cmd, "osascript"); + assert.deepEqual(calls[0].opts, { stdio: ["ignore", "ignore", "pipe"] }); + + const script = scriptFromArgs(calls[0].args); + assert.match(script, /tell application "iTerm"/); + // iTerm2 object model is window -> tabs -> sessions; sessions is NOT + // directly an element of window. The traversal must nest through tabs. + assert.match(script, /repeat with w in windows/); + assert.match(script, /repeat with tb in tabs of w/); + assert.match(script, /repeat with s in sessions of tb/); + // tabs-of must appear before sessions-of in the script source so the + // outer loop is over tabs. + assert.ok( + script.indexOf("tabs of w") < script.indexOf("sessions of tb"), + "tabs of w should be iterated before sessions of tb" + ); + assert.match(script, /tty of s/); + assert.match(script, /\/dev\/ttys456/); + assert.match(script, /split vertically with default profile/); + assert.match(script, /create window with default profile/); + assert.match(script, /write text "cd '\/p' && 'node' 'x' 'observe'" to newSession/); + }); + + it("returns unsafe-command and does not invoke runner for newline in cwd", () => { + let called = false; + const result = spawnObserverInTerminal({ + cwd: "/tmp/foo\nbar", + command: "'node' 'x'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => "/dev/ttys1", + runner: () => { + called = true; + return { status: 0 }; + } + }); + + assert.equal(called, false); + assert.equal(result.spawned, false); + assert.equal(result.kind, "ghostty-mac"); + assert.equal(result.reason, "unsafe-command"); + assert.match(result.error, /newline/i); + assert.match(result.error, /cwd/i); + }); + + it("returns unsafe-command and does not invoke runner for NUL in cwd", () => { + let called = false; + const result = spawnObserverInTerminal({ + cwd: "/tmp/foo\0bar", + command: "'node' 'x'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => null, + runner: () => { + called = true; + return { status: 0 }; + } + }); + + assert.equal(called, false); + assert.equal(result.reason, "unsafe-command"); + assert.match(result.error, /NUL/i); + }); + + it("returns unsafe-command and does not invoke runner for carriage return in command", () => { + let called = false; + const result = spawnObserverInTerminal({ + cwd: "/tmp", + command: "'node'\r'x'", + env: { TERM_PROGRAM: "iTerm.app" }, + platform: "darwin", + discoverTty: () => null, + runner: () => { + called = true; + return { status: 0 }; + } + }); + + assert.equal(called, false); + assert.equal(result.kind, "iterm2-mac"); + assert.equal(result.reason, "unsafe-command"); + assert.match(result.error, /carriage return|control character/i); + }); + + it("allows tab and space in composed command and invokes runner", () => { + let called = false; + const result = spawnObserverInTerminal({ + cwd: "/tmp/dir with space", + command: "'node'\t'x'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => null, + runner: () => { + called = true; + return { status: 0 }; + } + }); + + assert.equal(called, true); + assert.equal(result.spawned, true); + }); + + it("embeds the discovered caller tty in the iTerm2 AppleScript comparison", () => { + const calls = []; + spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x'", + env: { TERM_PROGRAM: "iTerm.app" }, + platform: "darwin", + discoverTty: () => "/dev/ttys999", + runner: (cmd, args) => { + calls.push({ cmd, args }); + return { status: 0 }; + } + }); + + assert.match(scriptFromArgs(calls[0].args), /set targetTty to "\/dev\/ttys999"/); + }); + + it("Ghostty script does not embed caller tty because Ghostty has no tty property", () => { + const calls = []; + spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => "/dev/ttys999", + runner: (cmd, args) => { + calls.push({ cmd, args }); + return { status: 0 }; + } + }); + + const script = scriptFromArgs(calls[0].args); + assert.doesNotMatch(script, /\/dev\/ttys999/); + assert.doesNotMatch(script, /targetTty/); + }); + + it("builds the iTerm2 new-window branch when caller tty cannot be discovered", () => { + const calls = []; + spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x'", + env: { TERM_PROGRAM: "iTerm.app" }, + platform: "darwin", + discoverTty: () => null, + runner: (cmd, args) => { + calls.push({ cmd, args }); + return { status: 0 }; + } + }); + + const script = scriptFromArgs(calls[0].args); + assert.doesNotMatch(script, /repeat with/); + assert.doesNotMatch(script, /split vertically/); + assert.match(script, /create window with default profile/); + assert.match(script, /set newSession to current session of newWindow/); + }); + + it("classifies osascript error number -1743 as automation-permission-denied", () => { + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x'", + env: { TERM_PROGRAM: "ghostty" }, + platform: "darwin", + discoverTty: () => null, + runner: () => ({ status: 1, stderr: "(-1743) Not authorized to send Apple events to Ghostty" }) + }); + + assert.equal(result.spawned, false); + assert.equal(result.kind, "ghostty-mac"); + assert.equal(result.reason, "automation-permission-denied"); + assert.match(result.error, /Automation permission/i); + }); + + it("classifies lowercase not authorized phrase as automation-permission-denied", () => { + const result = spawnObserverInTerminal({ + cwd: "/p", + command: "'node' 'x'", + env: { TERM_PROGRAM: "iTerm.app" }, + platform: "darwin", + discoverTty: () => null, + runner: () => ({ status: 1, stderr: "not authorized to send apple events" }) + }); + + assert.equal(result.spawned, false); + assert.equal(result.kind, "iterm2-mac"); + assert.equal(result.reason, "automation-permission-denied"); + }); +}); + +describe("composeShellInvocation", () => { + it("quotes cwd with spaces", () => { + const result = composeShellInvocation({ + cwd: "/Users/dragon.cl/work projects/codex-plugin-cc", + command: "'/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'" + }); + + assert.equal( + result, + "cd '/Users/dragon.cl/work projects/codex-plugin-cc' && '/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'" + ); + }); + + it("escapes a single quote in cwd", () => { + const result = composeShellInvocation({ + cwd: "/tmp/it's-a-trap", + command: "'/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'" + }); + + assert.ok(result.startsWith("cd '/tmp/it'\\''s-a-trap' && ")); + }); + + it("keeps cwd shell metacharacters inside the quoted literal", () => { + const result = composeShellInvocation({ + cwd: "/tmp/foo;rm -rf /;", + command: "'node'" + }); + + assert.equal(result, "cd '/tmp/foo;rm -rf /;' && 'node'"); + }); + + it("preserves unicode cwd bytes verbatim", () => { + const result = composeShellInvocation({ + cwd: "/Users/田中/プロジェクト", + command: "'node'" + }); + + assert.equal(result, "cd '/Users/田中/プロジェクト' && 'node'"); + }); + + it("preserves pre-quoted command tokens byte-for-byte", () => { + const command = "'/abs/node' '/abs/companion.mjs' 'observe' 'task-abc'"; + const result = composeShellInvocation({ cwd: "/tmp", command }); + + assert.ok(result.endsWith(` && ${command}`)); + }); + + it("does not add another shell-quote layer around command tokens with metacharacters", () => { + const command = "'/abs/node' '/abs/companion.mjs' 'observe' 'task with$weird;chars'"; + const result = composeShellInvocation({ cwd: "/tmp", command }); + + assert.ok(result.endsWith(` && ${command}`)); + }); + + it("feeds the composed shell invocation into AppleScript escaping in order", () => { + const composed = composeShellInvocation({ + cwd: "/tmp/project", + command: "'node' 'say \"hi\" and C:\\tmp'" + }); + const script = scriptFromArgs(buildGhosttyMacArgs({ composed, callerTty: null })); + + assert.match(script, /input text "cd '\/tmp\/project' && 'node' 'say \\"hi\\" and C:\\\\tmp'\\n"/); + }); +}); + +describe("discoverCallerTty", () => { + it("returns the tty of the immediate parent when ps yields a real device", () => { + const calls = []; + const runProbe = (cmd, args) => { + calls.push({ cmd, args }); + return "ttys004 4242\n"; + }; + const tty = discoverCallerTty({ startPid: 9999, runProbe }); + assert.equal(tty, "/dev/ttys004"); + assert.equal(calls.length, 1); + assert.deepEqual(calls[0].args, ["-o", "tty=,ppid=", "-p", "9999"]); + }); + + it("walks past a `??` ancestor to find a real tty further up", () => { + const responses = new Map([ + ["100", "?? 50\n"], + ["50", "ttys010 1\n"] + ]); + const seen = []; + const runProbe = (_cmd, args) => { + const pid = args[args.length - 1]; + seen.push(pid); + const out = responses.get(pid); + if (!out) { + throw new Error(`unexpected probe pid=${pid}`); + } + return out; + }; + const tty = discoverCallerTty({ startPid: 100, runProbe }); + assert.equal(tty, "/dev/ttys010"); + assert.deepEqual(seen, ["100", "50"]); + }); + + it("returns the tty unchanged when ps already includes the /dev/ prefix", () => { + const runProbe = () => "/dev/ttys020 4242\n"; + assert.equal( + discoverCallerTty({ startPid: 9999, runProbe }), + "/dev/ttys020" + ); + }); + + it("returns null when ancestry hits ppid <= 1 without a real tty", () => { + const responses = new Map([ + ["123", "?? 1\n"] + ]); + const runProbe = (_cmd, args) => responses.get(args[args.length - 1]); + assert.equal(discoverCallerTty({ startPid: 123, runProbe }), null); + }); + + it("returns null when runProbe throws", () => { + const runProbe = () => { + throw new Error("ps not available"); + }; + assert.equal(discoverCallerTty({ startPid: 9999, runProbe }), null); + }); + + it("returns null when ps output is empty or malformed", () => { + assert.equal(discoverCallerTty({ startPid: 9999, runProbe: () => "" }), null); + assert.equal(discoverCallerTty({ startPid: 9999, runProbe: () => "garbage\n" }), null); + }); + + it("caps walk depth at 10 ancestors and returns null on overrun", () => { + let probes = 0; + const runProbe = () => { + probes += 1; + // Each level reports `??` and bumps to a fresh nonzero ppid, forcing + // the loop to exhaust its depth budget. + return `?? ${100 + probes}\n`; + }; + assert.equal(discoverCallerTty({ startPid: 1000, runProbe }), null); + assert.equal(probes, 10); + }); + + it("returns null when startPid is invalid", () => { + const runProbe = () => { + throw new Error("should not be called"); + }; + assert.equal(discoverCallerTty({ startPid: 0, runProbe }), null); + assert.equal(discoverCallerTty({ startPid: 1, runProbe }), null); + assert.equal(discoverCallerTty({ startPid: null, runProbe }), null); + }); +}); + +describe("build osascript args", () => { + it("escapes double quotes and backslashes for Ghostty AppleScript literals", () => { + const composed = "cd '/tmp' && 'node' 'say \"hi\" and C:\\tmp'"; + const script = scriptFromArgs(buildGhosttyMacArgs({ composed, callerTty: null })); + + assert.match(script, /say \\"hi\\" and C:\\\\tmp/); + }); + + it("escapes double quotes and backslashes for iTerm2 AppleScript literals", () => { + const composed = "cd '/tmp' && 'node' 'say \"hi\" and C:\\tmp'"; + const script = scriptFromArgs(buildIterm2MacArgs({ composed, callerTty: null })); + + assert.match(script, /say \\"hi\\" and C:\\\\tmp/); + }); +}); diff --git a/tests/state.test.mjs b/tests/state.test.mjs index 0f8f57ce..a11ccd94 100644 --- a/tests/state.test.mjs +++ b/tests/state.test.mjs @@ -7,13 +7,24 @@ import assert from "node:assert/strict"; import { makeTempDir } from "./helpers.mjs"; import { resolveJobFile, resolveJobLogFile, resolveStateDir, resolveStateFile, saveState } from "../plugins/codex/scripts/lib/state.mjs"; -test("resolveStateDir uses a temp-backed per-workspace directory", () => { +test("resolveStateDir falls back to a HOME-anchored directory when CLAUDE_PLUGIN_DATA is unset", () => { const workspace = makeTempDir(); - const stateDir = resolveStateDir(workspace); + const previousPluginDataDir = process.env.CLAUDE_PLUGIN_DATA; + delete process.env.CLAUDE_PLUGIN_DATA; + + try { + const stateDir = resolveStateDir(workspace); + const expectedRoot = path.join(os.homedir(), ".codex-companion", "state"); - assert.equal(stateDir.startsWith(os.tmpdir()), true); - assert.match(path.basename(stateDir), /.+-[a-f0-9]{16}$/); - assert.match(stateDir, new RegExp(`^${os.tmpdir().replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`)); + assert.equal(stateDir.startsWith(expectedRoot), true, `expected ${stateDir} to start with ${expectedRoot}`); + assert.match(path.basename(stateDir), /.+-[a-f0-9]{16}$/); + } finally { + if (previousPluginDataDir == null) { + delete process.env.CLAUDE_PLUGIN_DATA; + } else { + process.env.CLAUDE_PLUGIN_DATA = previousPluginDataDir; + } + } }); test("resolveStateDir uses CLAUDE_PLUGIN_DATA when it is provided", () => { diff --git a/tests/worktree-render.test.mjs b/tests/worktree-render.test.mjs new file mode 100644 index 00000000..11966c57 --- /dev/null +++ b/tests/worktree-render.test.mjs @@ -0,0 +1,78 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; + +import { renderWorktreesBlock, renderTaskResult } from "../plugins/codex/scripts/lib/render.mjs"; + +describe("renderWorktreesBlock", () => { + it("returns null when no worktreePath", () => { + assert.equal(renderWorktreesBlock({}), null); + assert.equal(renderWorktreesBlock({ worktreePath: null }), null); + }); + + it("renders worktree info with path and branch", () => { + const result = renderWorktreesBlock({ + worktreePath: "/repo/.claude/worktrees/task-abc123", + worktreeBranch: "codex-rescue/task-abc123-fix-bug", + worktreeBaseBranch: "main" + }); + + assert.ok(result.includes("Worktree:")); + assert.ok(result.includes("/repo/.claude/worktrees/task-abc123")); + assert.ok(result.includes("codex-rescue/task-abc123-fix-bug")); + assert.ok(result.includes("git diff main...codex-rescue/task-abc123-fix-bug")); + assert.ok(result.includes("git merge codex-rescue/task-abc123-fix-bug")); + assert.ok(result.includes("git worktree remove /repo/.claude/worktrees/task-abc123")); + }); + + it("renders without next steps when no baseBranch", () => { + const result = renderWorktreesBlock({ + worktreePath: "/repo/.claude/worktrees/task-abc123", + worktreeBranch: "codex-rescue/task-abc123" + }); + + assert.ok(result.includes("Worktree:")); + assert.ok(result.includes("/repo/.claude/worktrees/task-abc123")); + assert.ok(!result.includes("Next steps:")); + }); +}); + +describe("renderTaskResult with worktree", () => { + it("appends worktree block to raw output", () => { + const result = renderTaskResult( + { rawOutput: "Task completed successfully." }, + { + worktreePath: "/repo/.claude/worktrees/task-abc123", + worktreeBranch: "codex-rescue/task-abc123-fix-bug", + worktreeBaseBranch: "main" + } + ); + + assert.ok(result.includes("Task completed successfully.")); + assert.ok(result.includes("Worktree:")); + assert.ok(result.includes("/repo/.claude/worktrees/task-abc123")); + }); + + it("appends worktree block to failure message", () => { + const result = renderTaskResult( + { failureMessage: "Task failed." }, + { + worktreePath: "/repo/.claude/worktrees/task-abc123", + worktreeBranch: "codex-rescue/task-abc123-fix-bug", + worktreeBaseBranch: "main" + } + ); + + assert.ok(result.includes("Task failed.")); + assert.ok(result.includes("Worktree:")); + }); + + it("returns plain output when no worktree", () => { + const result = renderTaskResult( + { rawOutput: "Task completed." }, + {} + ); + + assert.equal(result, "Task completed.\n"); + assert.ok(!result.includes("Worktree:")); + }); +}); diff --git a/tests/worktree.test.mjs b/tests/worktree.test.mjs new file mode 100644 index 00000000..6fd41c1f --- /dev/null +++ b/tests/worktree.test.mjs @@ -0,0 +1,156 @@ +import { mkdtempSync, mkdirSync, writeFileSync, rmSync, existsSync, readdirSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, it, beforeEach, afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; + +import { + resolveWorktreePath, + generateWorktreeBranch, + createWorktree +} from "../plugins/codex/scripts/lib/workspace.mjs"; + +function run(command, args, options = {}) { + return spawnSync(command, args, { + cwd: options.cwd, + encoding: "utf8", + windowsHide: true + }); +} + +function initGitRepo(cwd) { + run("git", ["init", "-b", "main"], { cwd }); + run("git", ["config", "user.name", "Codex Plugin Tests"], { cwd }); + run("git", ["config", "user.email", "tests@example.com"], { cwd }); + run("git", ["config", "commit.gpgsign", "false"], { cwd }); + writeFileSync(join(cwd, "README.md"), "# test\n"); + run("git", ["add", "."], { cwd }); + run("git", ["commit", "-m", "initial"], { cwd }); +} + +describe("resolveWorktreePath", () => { + it("returns path under .claude/worktrees with jobId", () => { + const result = resolveWorktreePath("/repo", "task-abc123"); + assert.equal(result, "/repo/.claude/worktrees/task-abc123"); + }); + + it("handles nested source root", () => { + const result = resolveWorktreePath("/home/user/projects/myrepo", "task-xyz"); + assert.equal(result, "/home/user/projects/myrepo/.claude/worktrees/task-xyz"); + }); +}); + +describe("generateWorktreeBranch", () => { + it("generates branch with jobId only when prompt is empty", () => { + const result = generateWorktreeBranch("task-abc123", ""); + assert.equal(result, "codex-rescue/task-abc123"); + }); + + it("generates branch with jobId only when prompt is null", () => { + const result = generateWorktreeBranch("task-abc123", null); + assert.equal(result, "codex-rescue/task-abc123"); + }); + + it("includes truncated prompt in branch name", () => { + const result = generateWorktreeBranch("task-abc123", "Fix the authentication bug"); + assert.equal(result, "codex-rescue/task-abc123-fix-the-authentication-bug"); + }); + + it("truncates long prompts to 32 characters", () => { + const longPrompt = "This is a very long prompt that should be truncated to thirty two characters"; + const result = generateWorktreeBranch("task-abc123", longPrompt); + assert.ok(result.startsWith("codex-rescue/task-abc123-")); + const suffix = result.replace("codex-rescue/task-abc123-", ""); + assert.ok(suffix.length <= 32, `suffix "${suffix}" is ${suffix.length} chars`); + }); + + it("removes special characters from prompt", () => { + const result = generateWorktreeBranch("task-abc123", "Fix bug #123 (urgent!)"); + assert.equal(result, "codex-rescue/task-abc123-fix-bug-123-urgent"); + }); + + it("converts spaces to hyphens", () => { + const result = generateWorktreeBranch("task-abc123", "add new feature"); + assert.equal(result, "codex-rescue/task-abc123-add-new-feature"); + }); + + it("collapses multiple hyphens", () => { + const result = generateWorktreeBranch("task-abc123", "fix---bug"); + assert.equal(result, "codex-rescue/task-abc123-fix-bug"); + }); + + it("strips leading and trailing hyphens from prompt part", () => { + const result = generateWorktreeBranch("task-abc123", " -fix bug- "); + assert.equal(result, "codex-rescue/task-abc123-fix-bug"); + }); + + it("handles prompt with only special characters", () => { + const result = generateWorktreeBranch("task-abc123", "!!!@@@###"); + assert.equal(result, "codex-rescue/task-abc123"); + }); +}); + +describe("createWorktree", () => { + let tempDir; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), "codex-worktree-test-")); + initGitRepo(tempDir); + }); + + afterEach(() => { + // Remove worktrees first to avoid permission issues + const worktreesDir = join(tempDir, ".claude", "worktrees"); + if (existsSync(worktreesDir)) { + const entries = readdirSync(worktreesDir); + for (const entry of entries) { + const wtPath = join(worktreesDir, entry); + run("git", ["worktree", "remove", "--force", wtPath], { cwd: tempDir }); + } + } + rmSync(tempDir, { recursive: true, force: true }); + }); + + it("creates a worktree with the correct path and branch", () => { + const result = createWorktree(tempDir, "task-abc123", "fix bug"); + + assert.ok(result.worktreePath.endsWith("/.claude/worktrees/task-abc123")); + assert.equal(result.worktreeBranch, "codex-rescue/task-abc123-fix-bug"); + assert.equal(result.worktreeBaseBranch, "main"); + assert.ok(existsSync(result.worktreePath)); + + // Verify branch exists + const branchList = run("git", ["branch", "--list"], { cwd: tempDir }); + assert.ok(branchList.stdout.includes("codex-rescue/task-abc123-fix-bug")); + }); + + it("creates worktree without prompt", () => { + const result = createWorktree(tempDir, "task-xyz", ""); + + assert.equal(result.worktreeBranch, "codex-rescue/task-xyz"); + assert.ok(existsSync(result.worktreePath)); + }); + + it("reuses existing worktree at the same path", () => { + // Create first worktree + const first = createWorktree(tempDir, "task-reuse", "first"); + + // Create again at same path (same jobId) + const second = createWorktree(tempDir, "task-reuse", "first"); + + assert.equal(first.worktreePath, second.worktreePath); + assert.ok(existsSync(second.worktreePath)); + }); + + it("throws when path exists but is not a worktree", () => { + const worktreePath = join(tempDir, ".claude", "worktrees", "task-conflict"); + mkdirSync(worktreePath, { recursive: true }); + writeFileSync(join(worktreePath, "file.txt"), "not a worktree"); + + assert.throws( + () => createWorktree(tempDir, "task-conflict", "test"), + /Worktree path already exists/ + ); + }); +});