From e494a8e9aa2aa1fb32486781cf608e697c1df9b4 Mon Sep 17 00:00:00 2001 From: Julian Alvarado Date: Thu, 28 May 2026 12:50:19 -0600 Subject: [PATCH] fix: keep background jobs alive across SessionEnd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `--background` is supposed to mean "outlive the session that started me", but `session-lifecycle-hook.mjs:cleanupSessionJobs` was unconditionally terminating every running job belonging to the ending session — including the detached worker that `enqueueBackgroundTask` had just spawned. The result: any background task dispatched from a subagent (whose SessionEnd fires as soon as its turn finishes) was SIGTERM'd a few seconds in. The JSONL transcript froze at the kill timestamp, the parent session's later status probe found no live job, and the caller never got a result. Same hook then also tore down the broker the worker depended on. Fix: * Tag jobs created via `enqueueBackgroundTask` with `background: true`. * `cleanupSessionJobs` now skips termination for background jobs and preserves their entry in `state.json` so any session in the workspace can still poll for status / fetch results. * `handleSessionEnd` defers broker shutdown when active background jobs are still present in the workspace — the broker outlives this session until the last background worker is done with it. Adds a runtime test that mixes a background and a foreground job under the same sessionId, drives SessionEnd, and asserts the foreground worker is killed + its state pruned while the background worker stays alive and remains in state. --- plugins/codex/scripts/codex-companion.mjs | 1 + .../codex/scripts/session-lifecycle-hook.mjs | 40 +++++- tests/runtime.test.mjs | 132 ++++++++++++++++++ 3 files changed, 168 insertions(+), 5 deletions(-) diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..76261830 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -660,6 +660,7 @@ function enqueueBackgroundTask(cwd, job, request) { ...job, status: "queued", phase: "queued", + background: true, pid: child.pid ?? null, logFile, request diff --git a/plugins/codex/scripts/session-lifecycle-hook.mjs b/plugins/codex/scripts/session-lifecycle-hook.mjs index 9655eaef..4638f169 100644 --- a/plugins/codex/scripts/session-lifecycle-hook.mjs +++ b/plugins/codex/scripts/session-lifecycle-hook.mjs @@ -50,12 +50,18 @@ function cleanupSessionJobs(cwd, sessionId) { } const state = loadState(workspaceRoot); - const removedJobs = state.jobs.filter((job) => job.sessionId === sessionId); - if (removedJobs.length === 0) { + const sessionJobs = state.jobs.filter((job) => job.sessionId === sessionId); + if (sessionJobs.length === 0) { return; } - for (const job of removedJobs) { + for (const job of sessionJobs) { + // Background jobs are explicitly dispatched to outlive the session that + // started them. Leave them running and leave their state entry intact so + // any session in the workspace can still poll for status/results. + if (job.background) { + continue; + } const stillRunning = job.status === "queued" || job.status === "running"; if (!stillRunning) { continue; @@ -69,10 +75,25 @@ function cleanupSessionJobs(cwd, sessionId) { saveState(workspaceRoot, { ...state, - jobs: state.jobs.filter((job) => job.sessionId !== sessionId) + jobs: state.jobs.filter((job) => job.sessionId !== sessionId || job.background) }); } +function hasActiveBackgroundJobs(cwd) { + if (!cwd) { + return false; + } + const workspaceRoot = resolveWorkspaceRoot(cwd); + const stateFile = resolveStateFile(workspaceRoot); + if (!fs.existsSync(stateFile)) { + return false; + } + const state = loadState(workspaceRoot); + return state.jobs.some( + (job) => job.background && (job.status === "queued" || job.status === "running") + ); +} + function handleSessionStart(input) { appendEnvVar(SESSION_ID_ENV, input.session_id); appendEnvVar(PLUGIN_DATA_ENV, process.env[PLUGIN_DATA_ENV]); @@ -95,11 +116,20 @@ async function handleSessionEnd(input) { const sessionDir = brokerSession?.sessionDir ?? null; const pid = brokerSession?.pid ?? null; + cleanupSessionJobs(cwd, input.session_id || process.env[SESSION_ID_ENV]); + + // Detached background workers depend on the broker for codex app-server + // calls. If any background jobs are still active in this workspace, leave + // the broker running — a later SessionEnd (or the workers themselves) will + // tear it down when nothing depends on it anymore. + if (hasActiveBackgroundJobs(cwd)) { + return; + } + if (brokerEndpoint) { await sendBrokerShutdown(brokerEndpoint); } - cleanupSessionJobs(cwd, input.session_id || process.env[SESSION_ID_ENV]); teardownBrokerSession({ endpoint: brokerEndpoint, pidFile, diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..96b27363 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -1787,6 +1787,138 @@ test("session end fully cleans up jobs for the ending session", async (t) => { assert.equal(otherJob.logFile, otherSessionLog); }); +test("session end preserves background jobs and their broker so workers survive their dispatching session", async (t) => { + const repo = makeTempDir(); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const stateDir = resolveStateDir(repo); + const jobsDir = path.join(stateDir, "jobs"); + fs.mkdirSync(jobsDir, { recursive: true }); + + const backgroundLog = path.join(jobsDir, "background.log"); + const foregroundLog = path.join(jobsDir, "foreground.log"); + const backgroundJobFile = path.join(jobsDir, "task-background.json"); + const foregroundJobFile = path.join(jobsDir, "review-foreground.json"); + fs.writeFileSync(backgroundLog, "background\n", "utf8"); + fs.writeFileSync(foregroundLog, "foreground\n", "utf8"); + fs.writeFileSync(backgroundJobFile, JSON.stringify({ id: "task-background" }, null, 2), "utf8"); + fs.writeFileSync(foregroundJobFile, JSON.stringify({ id: "review-foreground" }, null, 2), "utf8"); + + const backgroundSleeper = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], { + cwd: repo, + detached: true, + stdio: "ignore" + }); + backgroundSleeper.unref(); + const foregroundSleeper = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], { + cwd: repo, + detached: true, + stdio: "ignore" + }); + foregroundSleeper.unref(); + + t.after(() => { + for (const proc of [backgroundSleeper, foregroundSleeper]) { + try { + process.kill(-proc.pid, "SIGTERM"); + } catch { + try { + process.kill(proc.pid, "SIGTERM"); + } catch { + // Ignore missing process. + } + } + } + }); + + fs.writeFileSync( + path.join(stateDir, "state.json"), + `${JSON.stringify( + { + version: 1, + config: { stopReviewGate: false }, + jobs: [ + { + id: "task-background", + status: "running", + title: "Codex Task", + sessionId: "sess-current", + background: true, + pid: backgroundSleeper.pid, + logFile: backgroundLog, + createdAt: "2026-03-18T15:30:00.000Z", + updatedAt: "2026-03-18T15:31:00.000Z" + }, + { + id: "review-foreground", + status: "running", + title: "Codex Review", + sessionId: "sess-current", + pid: foregroundSleeper.pid, + logFile: foregroundLog, + createdAt: "2026-03-18T15:32:00.000Z", + updatedAt: "2026-03-18T15:33:00.000Z" + } + ] + }, + null, + 2 + )}\n`, + "utf8" + ); + + const result = run("node", [SESSION_HOOK, "SessionEnd"], { + cwd: repo, + env: { + ...process.env, + CODEX_COMPANION_SESSION_ID: "sess-current" + }, + input: JSON.stringify({ + hook_event_name: "SessionEnd", + session_id: "sess-current", + cwd: repo + }) + }); + + assert.equal(result.status, 0, result.stderr); + + // Foreground job killed + pruned from state. + await waitFor(() => { + try { + process.kill(foregroundSleeper.pid, 0); + return false; + } catch (error) { + return error?.code === "ESRCH"; + } + }); + + // Background job still alive — its worker outlives the session that started it. + assert.equal( + (() => { + try { + process.kill(backgroundSleeper.pid, 0); + return true; + } catch { + return false; + } + })(), + true, + "background job worker should not be terminated by SessionEnd" + ); + + const state = JSON.parse(fs.readFileSync(path.join(stateDir, "state.json"), "utf8")); + assert.deepEqual( + state.jobs.map((job) => job.id), + ["task-background"], + "background job stays in state so later sessions can poll it" + ); + assert.equal(fs.existsSync(backgroundJobFile), true, "background job file preserved"); + assert.equal(fs.existsSync(backgroundLog), true, "background log preserved"); +}); + test("stop hook runs a stop-time review task and blocks on findings when the review gate is enabled", () => { const repo = makeTempDir(); const binDir = makeTempDir();