openai · Pgarciapg · Apr 3, 2026 · Apr 11, 2026 · Apr 11, 2026 · May 4, 2026
diff --git a/README.md b/README.md
@@ -63,6 +63,7 @@ After install, you should see:
 
 - the slash commands listed below
 - the `codex:codex-rescue` subagent in `/agents`
+- three internal skills used by the rescue subagent: `codex-cli-runtime`, `codex-result-handling`, and `gpt-5-4-prompting` (these are not user-invocable)
 
 One simple first run is:
 

diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md
@@ -38,7 +38,8 @@ Forwarding rules:
 - Otherwise forward the task as a fresh `task` run.
 - Preserve the user's task text as-is apart from stripping routing flags.
 - Return the stdout of the `codex-companion` command exactly as-is.
-- If the Bash call fails or Codex cannot be invoked, return nothing.
+- If the Bash call returns a JSON object with `"status": "error"`, report the `error` field to the user.
+- If the Bash call fails or Codex cannot be invoked, return the stderr output if available.
 
 Response style:
 

diff --git a/plugins/codex/commands/rescue.md b/plugins/codex/commands/rescue.md
@@ -1,6 +1,6 @@
 ---
 description: Delegate investigation, an explicit fix request, or follow-up rescue work to the Codex rescue subagent
-argument-hint: "[--background|--wait] [--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [what Codex should investigate, solve, or continue]"
+argument-hint: "[--background|--wait] [--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [--context <text>] [what Codex should investigate, solve, or continue]"
 context: fork
 allowed-tools: Bash(node:*), AskUserQuestion
 ---
@@ -17,7 +17,7 @@ Execution mode:
 - If the request includes `--wait`, run the `codex:codex-rescue` subagent in the foreground.
 - If neither flag is present, default to foreground.
 - `--background` and `--wait` are execution flags for Claude Code. Do not forward them to `task`, and do not treat them as part of the natural-language task text.
-- `--model` and `--effort` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text.
+- `--model`, `--effort`, and `--context` are runtime-selection flags. Preserve them for the forwarded `task` call, but do not treat them as part of the natural-language task text.
 - If the request includes `--resume`, do not ask whether to continue. The user already chose.
 - If the request includes `--fresh`, do not ask whether to continue. The user already chose.
 - Otherwise, before starting Codex, check for a resumable rescue thread from this Claude session by running:

diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs
@@ -77,7 +77,7 @@ function printUsage() {
       "  node scripts/codex-companion.mjs setup [--enable-review-gate|--disable-review-gate] [--json]",
       "  node scripts/codex-companion.mjs review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>]",
       "  node scripts/codex-companion.mjs adversarial-review [--wait|--background] [--base <ref>] [--scope <auto|working-tree|branch>] [focus text]",
-      "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [prompt]",
+      "  node scripts/codex-companion.mjs task [--background] [--write] [--resume-last|--resume|--fresh] [--model <model|spark>] [--effort <none|minimal|low|medium|high|xhigh>] [--context <text>] [prompt]",
       "  node scripts/codex-companion.mjs status [job-id] [--all] [--json]",
       "  node scripts/codex-companion.mjs result [job-id] [--json]",
       "  node scripts/codex-companion.mjs cancel [job-id] [--json]"
@@ -451,9 +451,12 @@ async function executeTaskRun(request) {
     throw new Error("Provide a prompt, a prompt file, piped stdin, or use --resume-last.");
   }
 
+  const contextSuffix = request.context ? `\n\n---\n\nAdditional context:\n${request.context}` : "";
+  const fullPrompt = request.prompt ? `${request.prompt}${contextSuffix}` : "";
+
   const result = await runAppServerTurn(workspaceRoot, {
     resumeThreadId,
-    prompt: request.prompt,
+    prompt: fullPrompt,
     defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "",
     model: request.model,
     effort: request.effort,
@@ -570,15 +573,16 @@ function buildTaskJob(workspaceRoot, taskMetadata, write) {
   });
 }
 
-function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId }) {
+function buildTaskRequest({ cwd, model, effort, prompt, write, resumeLast, jobId, context }) {
   return {
     cwd,
     model,
     effort,
     prompt,
     write,
     resumeLast,
-    jobId
+    jobId,
+    context
   };
 }
 
@@ -703,10 +707,11 @@ async function handleReview(argv) {
 
 async function handleTask(argv) {
   const { options, positionals } = parseCommandInput(argv, {
-    valueOptions: ["model", "effort", "cwd", "prompt-file"],
+    valueOptions: ["model", "effort", "cwd", "prompt-file", "context"],
     booleanOptions: ["json", "write", "resume-last", "resume", "fresh", "background"],
     aliasMap: {
-      m: "model"
+      m: "model",
+      c: "context"
     }
   });
 
@@ -727,6 +732,8 @@ async function handleTask(argv) {
     resumeLast
   });
 
+  const context = options.context ?? null;
+
   if (options.background) {
     ensureCodexReady(cwd);
     requireTaskRequest(prompt, resumeLast);
@@ -739,7 +746,8 @@ async function handleTask(argv) {
       prompt,
       write,
       resumeLast,
-      jobId: job.id
+      jobId: job.id,
+      context
     });
     const { payload } = enqueueBackgroundTask(cwd, job, request);
     outputCommandResult(payload, renderQueuedTaskLaunch(payload), options.json);
@@ -758,6 +766,7 @@ async function handleTask(argv) {
         write,
         resumeLast,
         jobId: job.id,
+        context,
         onProgress: progress
       }),
     { json: options.json }
@@ -1002,6 +1011,15 @@ async function main() {
 
 main().catch((error) => {
   const message = error instanceof Error ? error.message : String(error);
+  // Emit structured JSON envelope to stdout so codex-rescue agent can capture errors.
+  // The agent prompt instructs Codex to return stdout as-is, but it only captures stdout.
+  // Without this, the agent sees empty output when the companion fails.
+  const envelope = JSON.stringify({
+    status: "error",
+    error: message,
+    exitCode: 1
+  });
+  process.stdout.write(`${envelope}\n`);
   process.stderr.write(`${message}\n`);
   process.exitCode = 1;
 });
diff --git a/plugins/codex/scripts/lib/broker-lifecycle.mjs b/plugins/codex/scripts/lib/broker-lifecycle.mjs
@@ -40,19 +40,31 @@ export async function waitForBrokerEndpoint(endpoint, timeoutMs = 2000) {
   return false;
 }
 
-export async function sendBrokerShutdown(endpoint) {
+export async function sendBrokerShutdown(endpoint, timeoutMs = 5000) {
   await new Promise((resolve) => {
     const socket = connectToEndpoint(endpoint);
     socket.setEncoding("utf8");
+
+    const timer = setTimeout(() => {
+      socket.destroy();
+      resolve();
+    }, timeoutMs);
+    timer.unref?.();
+
+    const cleanup = () => {
+      clearTimeout(timer);
+      resolve();
+    };
+
     socket.on("connect", () => {
       socket.write(`${JSON.stringify({ id: 1, method: "broker/shutdown", params: {} })}\n`);
     });
     socket.on("data", () => {
       socket.end();
-      resolve();
+      cleanup();
     });
-    socket.on("error", resolve);
-    socket.on("close", resolve);
+    socket.on("error", cleanup);
+    socket.on("close", cleanup);
   });
 }
 

diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs
@@ -52,13 +52,28 @@ function cleanCodexStderr(stderr) {
     .join("\n");
 }
 
+/**
+ * On Windows, sandboxed execution modes are not supported by the Codex CLI.
+ * This function coerces the sandbox mode to "danger-full-access" on Windows
+ * to prevent runtime failures.
+ * @param {string | null} sandbox - The requested sandbox mode.
+ * @returns {string} - The coerced sandbox mode.
+ */
+function coerceWindowsSandbox(sandbox) {
+  if (process.platform !== "win32") {
+    return sandbox ?? "read-only";
+  }
+  // On Windows, all sandbox modes except "danger-full-access" fail.
+  return "danger-full-access";
+}
+
 /** @returns {ThreadStartParams} */
 function buildThreadParams(cwd, options = {}) {
   return {
     cwd,
     model: options.model ?? null,
     approvalPolicy: options.approvalPolicy ?? "never",
-    sandbox: options.sandbox ?? "read-only",
+    sandbox: coerceWindowsSandbox(options.sandbox),
     serviceName: SERVICE_NAME,
     ephemeral: options.ephemeral ?? true,
     experimentalRawEvents: false
@@ -72,7 +87,7 @@ function buildResumeParams(threadId, cwd, options = {}) {
     cwd,
     model: options.model ?? null,
     approvalPolicy: options.approvalPolicy ?? "never",
-    sandbox: options.sandbox ?? "read-only"
+    sandbox: coerceWindowsSandbox(options.sandbox)
   };
 }
 

diff --git a/plugins/codex/scripts/lib/state.mjs b/plugins/codex/scripts/lib/state.mjs
@@ -6,7 +6,8 @@ import path from "node:path";
 import { resolveWorkspaceRoot } from "./workspace.mjs";
 
 const STATE_VERSION = 1;
-const PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA";
+const CODEX_PLUGIN_DATA_ENV = "CODEX_PLUGIN_DATA";
+const CLAUDE_PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA";
 const FALLBACK_STATE_ROOT_DIR = path.join(os.tmpdir(), "codex-companion");
 const STATE_FILE_NAME = "state.json";
 const JOBS_DIR_NAME = "jobs";
@@ -38,7 +39,7 @@ export function resolveStateDir(cwd) {
   const slugSource = path.basename(workspaceRoot) || "workspace";
   const slug = slugSource.replace(/[^a-zA-Z0-9._-]+/g, "-").replace(/^-+|-+$/g, "") || "workspace";
   const hash = createHash("sha256").update(canonicalWorkspaceRoot).digest("hex").slice(0, 16);
-  const pluginDataDir = process.env[PLUGIN_DATA_ENV];
+  const pluginDataDir = process.env[CODEX_PLUGIN_DATA_ENV] || process.env[CLAUDE_PLUGIN_DATA_ENV];
   const stateRoot = pluginDataDir ? path.join(pluginDataDir, "state") : FALLBACK_STATE_ROOT_DIR;
   return path.join(stateRoot, `${slug}-${hash}`);
 }

diff --git a/plugins/codex/scripts/session-lifecycle-hook.mjs b/plugins/codex/scripts/session-lifecycle-hook.mjs
@@ -17,7 +17,8 @@ import { loadState, resolveStateFile, saveState } from "./lib/state.mjs";
 import { resolveWorkspaceRoot } from "./lib/workspace.mjs";
 
 export const SESSION_ID_ENV = "CODEX_COMPANION_SESSION_ID";
-const PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA";
+const CODEX_PLUGIN_DATA_ENV = "CODEX_PLUGIN_DATA";
+const CLAUDE_PLUGIN_DATA_ENV = "CLAUDE_PLUGIN_DATA";
 
 function readHookInput() {
   const raw = fs.readFileSync(0, "utf8").trim();
@@ -75,7 +76,9 @@ function cleanupSessionJobs(cwd, sessionId) {
 
 function handleSessionStart(input) {
   appendEnvVar(SESSION_ID_ENV, input.session_id);
-  appendEnvVar(PLUGIN_DATA_ENV, process.env[PLUGIN_DATA_ENV]);
+  // Export the Claude-provided plugin data dir under a codex-specific name
+  // to avoid polluting the global session environment with CLAUDE_PLUGIN_DATA.
+  appendEnvVar(CODEX_PLUGIN_DATA_ENV, process.env[CLAUDE_PLUGIN_DATA_ENV]);
 }
 
 async function handleSessionEnd(input) {

diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md
@@ -33,11 +33,13 @@ Command selection:
 - `--resume`: always use `task --resume-last`, even if the request text is ambiguous.
 - `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
 - `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
+- `--context "<text>"`: pass additional context to Codex that will be appended to the prompt. Use this to provide extra background information, constraints, or specifications.
 - `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
 
 Safety rules:
 - Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.
 - Preserve the user's task text as-is apart from stripping routing flags.
 - Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own.
 - Return the stdout of the `task` command exactly as-is.
-- If the Bash call fails or Codex cannot be invoked, return nothing.
+- If the Bash call returns a JSON object with `"status": "error"`, report the `error` field to the user.
+- If the Bash call fails or Codex cannot be invoked, return the stderr output if available.
diff --git a/tests/args.test.mjs b/tests/args.test.mjs
@@ -0,0 +1,70 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { parseArgs, splitRawArgumentString } from "../plugins/codex/scripts/lib/args.mjs";
+
+// --- parseArgs ---
+
+test("parseArgs: boolean flag --flag=true sets true, --flag=false sets false", () => {
+  const configTrue = parseArgs(["--verbose=true"], { booleanOptions: ["verbose"] });
+  assert.equal(configTrue.options.verbose, true);
+
+  const configFalse = parseArgs(["--verbose=false"], { booleanOptions: ["verbose"] });
+  assert.equal(configFalse.options.verbose, false);
+});
+
+test("parseArgs: value option --output consumes next token", () => {
+  const { options } = parseArgs(["--output", "/tmp/out.txt"], { valueOptions: ["output"] });
+  assert.equal(options.output, "/tmp/out.txt");
+});
+
+test("parseArgs: inline value --output=path uses inline value", () => {
+  const { options } = parseArgs(["--output=/tmp/out.txt"], { valueOptions: ["output"] });
+  assert.equal(options.output, "/tmp/out.txt");
+});
+
+test("parseArgs: short alias -o resolved via aliasMap", () => {
+  const { options } = parseArgs(["-o", "/tmp/out.txt"], {
+    valueOptions: ["output"],
+    aliasMap: { o: "output" },
+  });
+  assert.equal(options.output, "/tmp/out.txt");
+});
+
+test("parseArgs: positionals after -- land in positionals array", () => {
+  const { options, positionals } = parseArgs(
+    ["--verbose", "--", "--not-a-flag", "file.txt"],
+    { booleanOptions: ["verbose"] }
+  );
+  assert.equal(options.verbose, true);
+  assert.deepEqual(positionals, ["--not-a-flag", "file.txt"]);
+});
+
+test("parseArgs: missing value for value option throws Error", () => {
+  assert.throws(
+    () => parseArgs(["--output"], { valueOptions: ["output"] }),
+    { message: "Missing value for --output" }
+  );
+});
+
+// --- splitRawArgumentString ---
+
+test("splitRawArgumentString: space-separated tokens", () => {
+  assert.deepEqual(splitRawArgumentString("foo bar baz"), ["foo", "bar", "baz"]);
+});
+
+test("splitRawArgumentString: single-quoted string with spaces becomes one token", () => {
+  assert.deepEqual(splitRawArgumentString("hello 'foo bar' world"), ["hello", "foo bar", "world"]);
+});
+
+test("splitRawArgumentString: double-quoted string with spaces becomes one token", () => {
+  assert.deepEqual(splitRawArgumentString('hello "foo bar" world'), ["hello", "foo bar", "world"]);
+});
+
+test("splitRawArgumentString: backslash escape preserves next char", () => {
+  assert.deepEqual(splitRawArgumentString("foo\\ bar baz"), ["foo bar", "baz"]);
+});
+
+test("splitRawArgumentString: trailing backslash appended literally", () => {
+  assert.deepEqual(splitRawArgumentString("foo\\"), ["foo\\"]);
+});
diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs
@@ -102,7 +102,7 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(rescue, /run the `codex:codex-rescue` subagent in the background/i);
   assert.match(rescue, /default to foreground/i);
   assert.match(rescue, /Do not forward them to `task`/i);
-  assert.match(rescue, /`--model` and `--effort` are runtime-selection flags/i);
+  assert.match(rescue, /`--model`, `--effort`, and `--context` are runtime-selection flags/i);
   assert.match(rescue, /Leave `--effort` unset unless the user explicitly asks for a specific reasoning effort/i);
   assert.match(rescue, /If they ask for `spark`, map it to `gpt-5\.3-codex-spark`/i);
   assert.match(rescue, /If the request includes `--resume`, do not ask whether to continue/i);
@@ -127,7 +127,7 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
   assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
   assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
-  assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
+  assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return the stderr output if available/i);
   assert.match(agent, /gpt-5-4-prompting/);
   assert.match(agent, /only to tighten the user's request into a better Codex prompt/i);
   assert.match(agent, /Do not use that skill to inspect the repository, reason through the problem yourself, draft a solution, or do any independent work/i);
@@ -142,7 +142,7 @@ test("rescue command absorbs continue semantics", () => {
   assert.match(runtimeSkill, /Strip it before calling `task`/i);
   assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
   assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
-  assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
+  assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return the stderr output if available/i);
   assert.match(readme, /`codex:codex-rescue` subagent/i);
   assert.match(readme, /if you do not pass `--model` or `--effort`, Codex chooses its own defaults/i);
   assert.match(readme, /--model gpt-5\.4-mini --effort medium/i);

diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs
@@ -169,6 +169,10 @@ function structuredReviewPayload(prompt) {
 }
 
 function taskPayload(prompt, resume) {
+  if (BEHAVIOR === "empty-stdout") {
+    return "";
+  }
+
   if (prompt.includes("<task>") && prompt.includes("Only review the work from the previous Claude turn.")) {
     if (BEHAVIOR === "adversarial-clean") {
       return "ALLOW: No blocking issues found in the previous turn.";

diff --git a/tests/prompts.test.mjs b/tests/prompts.test.mjs
@@ -0,0 +1,25 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { interpolateTemplate } from "../plugins/codex/scripts/lib/prompts.mjs";
+
+test("interpolateTemplate: replaces {{KEY}} with provided variable", () => {
+  assert.equal(interpolateTemplate("Hello {{NAME}}", { NAME: "World" }), "Hello World");
+});
+
+test("interpolateTemplate: replaces multiple different keys in one pass", () => {
+  const result = interpolateTemplate("{{GREETING}}, {{NAME}}!", { GREETING: "Hi", NAME: "Alice" });
+  assert.equal(result, "Hi, Alice!");
+});
+
+test("interpolateTemplate: unknown key is replaced with empty string", () => {
+  assert.equal(interpolateTemplate("Hello {{MISSING}}", {}), "Hello ");
+});
+
+test("interpolateTemplate: template with no placeholders is returned unchanged", () => {
+  assert.equal(interpolateTemplate("no placeholders here", { KEY: "val" }), "no placeholders here");
+});
+
+test("interpolateTemplate: key appearing twice is replaced both times", () => {
+  assert.equal(interpolateTemplate("{{X}} and {{X}}", { X: "ok" }), "ok and ok");
+});