From 531de047a94aa6c3c26042b719756c8633776d73 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Mon, 19 Jan 2026 22:13:26 +0100 Subject: [PATCH 01/20] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20harness=20+?= =?UTF-8?q?=20Ralph=20loop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds workspace-local harness config (checklist + gates) and an opt-in Ralph loop runner. - Backend services: WorkspaceHarnessService, GateRunnerService, GitCheckpointService, LoopRunnerService - ORPC: workspace.harness + workspace.loop endpoints - UI: RightSidebar Harness tab + command palette actions for gates/checkpoint/loop Signed-off-by: Thomas Kosiewski --- _Generated with • Model: openai:gpt-5.2 • Thinking: high • Cost: 0.17_ Change-Id: I99428a620b0bd65e9b9a2bb9023b9dd9e0843bc1 --- src/browser/components/RightSidebar.tsx | 12 + .../components/RightSidebar/HarnessTab.tsx | 362 +++++++++++ .../RightSidebar/tabs/TabLabels.tsx | 10 +- .../components/RightSidebar/tabs/index.ts | 1 + .../components/RightSidebar/tabs/registry.ts | 51 +- src/browser/types/rightSidebar.ts | 9 +- src/browser/utils/commandIds.ts | 8 + src/browser/utils/commands/sources.ts | 71 +++ src/browser/utils/rightSidebarLayout.ts | 8 +- src/cli/cli.test.ts | 4 + src/cli/server.test.ts | 4 + src/cli/server.ts | 4 + src/common/orpc/schemas.ts | 16 + src/common/orpc/schemas/api.ts | 92 +++ src/common/orpc/schemas/harness.ts | 110 ++++ src/common/types/harness.ts | 26 + src/common/types/message.ts | 10 + src/desktop/main.ts | 4 + src/node/orpc/context.ts | 8 + src/node/orpc/router.ts | 255 ++++++++ src/node/services/gateRunnerService.ts | 168 ++++++ src/node/services/gitCheckpointService.ts | 169 ++++++ src/node/services/loopRunnerService.ts | 570 ++++++++++++++++++ src/node/services/serviceContainer.ts | 19 + src/node/services/workspaceHarnessService.ts | 529 ++++++++++++++++ tests/ipc/setup.ts | 5 + 26 files changed, 2502 insertions(+), 23 deletions(-) create mode 100644 src/browser/components/RightSidebar/HarnessTab.tsx create mode 100644 src/common/orpc/schemas/harness.ts create mode 100644 src/common/types/harness.ts create mode 100644 src/node/services/gateRunnerService.ts create mode 100644 src/node/services/gitCheckpointService.ts create mode 100644 src/node/services/loopRunnerService.ts create mode 100644 src/node/services/workspaceHarnessService.ts diff --git a/src/browser/components/RightSidebar.tsx b/src/browser/components/RightSidebar.tsx index 276ca1556e..cfff951312 100644 --- a/src/browser/components/RightSidebar.tsx +++ b/src/browser/components/RightSidebar.tsx @@ -71,6 +71,7 @@ import { import { CostsTabLabel, ExplorerTabLabel, + HarnessTabLabel, FileTabLabel, ReviewTabLabel, StatsTabLabel, @@ -79,6 +80,7 @@ import { type ReviewStats, } from "./RightSidebar/tabs"; import { FileViewerTab } from "./RightSidebar/FileViewer"; +import { HarnessTab } from "./RightSidebar/HarnessTab"; import { ExplorerTab } from "./RightSidebar/ExplorerTab"; import { DndContext, @@ -345,6 +347,8 @@ const RightSidebarTabsetNode: React.FC = (props) => label = ; } else if (tab === "explorer") { label = ; + } else if (tab === "harness") { + label = ; } else if (tab === "stats") { label = ; } else if (isTerminal) { @@ -385,11 +389,13 @@ const RightSidebarTabsetNode: React.FC = (props) => const costsPanelId = `${tabsetBaseId}-panel-costs`; const reviewPanelId = `${tabsetBaseId}-panel-review`; + const harnessPanelId = `${tabsetBaseId}-panel-harness`; const explorerPanelId = `${tabsetBaseId}-panel-explorer`; const statsPanelId = `${tabsetBaseId}-panel-stats`; const costsTabId = `${tabsetBaseId}-tab-costs`; const reviewTabId = `${tabsetBaseId}-tab-review`; + const harnessTabId = `${tabsetBaseId}-tab-harness`; const explorerTabId = `${tabsetBaseId}-tab-explorer`; const statsTabId = `${tabsetBaseId}-tab-stats`; @@ -512,6 +518,12 @@ const RightSidebarTabsetNode: React.FC = (props) => )} + {props.node.activeTab === "harness" && ( +
+ +
+ )} + {props.node.activeTab === "explorer" && (
(null); + const [error, setError] = React.useState(null); + const [busy, setBusy] = React.useState(false); + + const refresh = React.useCallback(async () => { + if (!apiState.api) return; + + setError(null); + try { + const result = await apiState.api.workspace.harness.get({ workspaceId: props.workspaceId }); + if (!result.success) { + setError(result.error); + return; + } + setData(result.data); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } + }, [apiState.api, props.workspaceId]); + + React.useEffect(() => { + void refresh(); + }, [refresh]); + + // Keep loop state live while the tab is mounted. + React.useEffect(() => { + const api = apiState.api; + if (!api) return; + + const abortController = new AbortController(); + const { signal } = abortController; + + (async () => { + try { + const iterator = await api.workspace.loop.subscribe( + { workspaceId: props.workspaceId }, + { signal } + ); + + for await (const loopState of iterator) { + if (signal.aborted) break; + setData((prev) => (prev ? { ...prev, loopState } : prev)); + } + } catch (err) { + if (!signal.aborted) { + console.error("Failed to subscribe to loop state:", err); + } + } + })(); + + return () => abortController.abort(); + }, [apiState.api, props.workspaceId]); + + const runAction = React.useCallback( + async (fn: (api: APIClient) => Promise) => { + const api = apiState.api; + if (!api) return; + + setBusy(true); + setError(null); + try { + await fn(api); + await refresh(); + } catch (err) { + setError(err instanceof Error ? err.message : String(err)); + } finally { + setBusy(false); + } + }, + [apiState.api, refresh] + ); + + if (apiState.status !== "connected" && apiState.status !== "degraded") { + return ( +
+
+

API not connected.

+
+
+ ); + } + + if (!data) { + return ( +
+
+

Harness

+ +
+ {error &&
{error}
} +
Loading…
+
+ ); + } + + const loopState = data.loopState; + + return ( +
+
+

Harness

+
+ +
+
+ + {error &&
{error}
} + +
+ + + +
+ +
+
Files
+
+
{data.paths.progressPath}
+
{data.paths.configPath}
+
+ {!data.exists && ( +
+ No harness file yet. Create it by editing the config path above. +
+ )} +
+ +
+
+
+
Loop
+
+ {loopState.status} • iteration {loopState.iteration} +
+
+
+ {loopState.status !== "running" ? ( + + ) : ( + + )} + +
+
+ +
+
+
Started
+
{formatTimestamp(loopState.startedAt)}
+
+
+
Failures
+
{loopState.consecutiveFailures}
+
+
+
Current item
+
{loopState.currentItemTitle ?? "—"}
+
+
+
Stopped reason
+
{loopState.stoppedReason ?? "—"}
+
+
+
+ +
+
Checklist
+
+ {data.config.checklist.length === 0 ? ( +
(no checklist items)
+ ) : ( +
    + {data.config.checklist.map((item) => ( +
  • + {formatChecklistStatus(item.status)}{" "} + {item.title} +
  • + ))} +
+ )} +
+
+ +
+
Last gates
+
+ {data.lastGateRun ? ( + <> +
+ {data.lastGateRun.ok ? "PASS" : "FAIL"} •{" "} + {Math.round(data.lastGateRun.totalDurationMs / 1000)}s • finished{" "} + {formatTimestamp(data.lastGateRun.finishedAt)} +
+ {data.lastGateRun.results.length > 0 && ( +
+ Details +
+ {data.lastGateRun.results.map((r, idx) => ( +
+
{r.command}
+
exit {r.exitCode}
+ {(r.stderr || r.stdout) && ( +
+                            {(r.stderr ? `stderr:\n${r.stderr}\n` : "") +
+                              (r.stdout ? `stdout:\n${r.stdout}` : "")}
+                          
+ )} +
+ ))} +
+
+ )} + + ) : ( +
(not run yet)
+ )} +
+
+ +
+
Last checkpoint
+
+ {data.lastCheckpoint ? ( + <> +
{data.lastCheckpoint.committed ? "Committed" : "No changes"}
+
{data.lastCheckpoint.commitSha ?? "—"}
+
{data.lastCheckpoint.commitMessage ?? "—"}
+ + ) : ( +
(none)
+ )} +
+
+
+ ); +} diff --git a/src/browser/components/RightSidebar/tabs/TabLabels.tsx b/src/browser/components/RightSidebar/tabs/TabLabels.tsx index 26c33d9734..cc2aa63b68 100644 --- a/src/browser/components/RightSidebar/tabs/TabLabels.tsx +++ b/src/browser/components/RightSidebar/tabs/TabLabels.tsx @@ -5,7 +5,7 @@ */ import React from "react"; -import { ExternalLink, FolderTree, Terminal as TerminalIcon, X } from "lucide-react"; +import { ExternalLink, FolderTree, ListChecks, Terminal as TerminalIcon, X } from "lucide-react"; import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip"; import { FileIcon } from "../../FileIcon"; import { formatTabDuration, type ReviewStats } from "./registry"; @@ -71,6 +71,14 @@ export const ExplorerTabLabel: React.FC = () => ( ); +/** Harness tab label with checklist icon */ +export const HarnessTabLabel: React.FC = () => ( + + + Harness + +); + interface FileTabLabelProps { /** File path (relative to workspace) */ filePath: string; diff --git a/src/browser/components/RightSidebar/tabs/index.ts b/src/browser/components/RightSidebar/tabs/index.ts index 4744d46062..3a61fbc7fa 100644 --- a/src/browser/components/RightSidebar/tabs/index.ts +++ b/src/browser/components/RightSidebar/tabs/index.ts @@ -24,6 +24,7 @@ export { export { CostsTabLabel, ExplorerTabLabel, + HarnessTabLabel, FileTabLabel, ReviewTabLabel, StatsTabLabel, diff --git a/src/browser/components/RightSidebar/tabs/registry.ts b/src/browser/components/RightSidebar/tabs/registry.ts index 9662e2c0cf..65de65c1c1 100644 --- a/src/browser/components/RightSidebar/tabs/registry.ts +++ b/src/browser/components/RightSidebar/tabs/registry.ts @@ -78,25 +78,30 @@ export interface TabConfig { } /** Static tab configurations (non-terminal tabs) */ -export const TAB_CONFIGS: Record<"costs" | "review" | "explorer" | "stats", TabConfig> = { - costs: { - name: "Costs", - contentClassName: "overflow-y-auto p-[15px]", - }, - review: { - name: "Review", - contentClassName: "overflow-y-auto p-0", - }, - explorer: { - name: "Explorer", - contentClassName: "overflow-y-auto p-0", - }, - stats: { - name: "Stats", - contentClassName: "overflow-y-auto p-[15px]", - featureFlag: "statsTab", - }, -}; +export const TAB_CONFIGS: Record<"costs" | "review" | "explorer" | "harness" | "stats", TabConfig> = + { + costs: { + name: "Costs", + contentClassName: "overflow-y-auto p-[15px]", + }, + review: { + name: "Review", + contentClassName: "overflow-y-auto p-0", + }, + harness: { + name: "Harness", + contentClassName: "overflow-y-auto p-[15px]", + }, + explorer: { + name: "Explorer", + contentClassName: "overflow-y-auto p-0", + }, + stats: { + name: "Stats", + contentClassName: "overflow-y-auto p-[15px]", + featureFlag: "statsTab", + }, + }; /** Terminal tab configuration */ export const TERMINAL_TAB_CONFIG: TabConfig = { @@ -114,7 +119,13 @@ export const FILE_TAB_CONFIG: TabConfig = { /** Get config for a tab type */ export function getTabConfig(tab: TabType): TabConfig { - if (tab === "costs" || tab === "review" || tab === "explorer" || tab === "stats") { + if ( + tab === "costs" || + tab === "review" || + tab === "explorer" || + tab === "harness" || + tab === "stats" + ) { return TAB_CONFIGS[tab]; } // File tabs diff --git a/src/browser/types/rightSidebar.ts b/src/browser/types/rightSidebar.ts index b12d6796de..e00036d83d 100644 --- a/src/browser/types/rightSidebar.ts +++ b/src/browser/types/rightSidebar.ts @@ -1,4 +1,11 @@ -export const RIGHT_SIDEBAR_TABS = ["costs", "review", "terminal", "explorer", "stats"] as const; +export const RIGHT_SIDEBAR_TABS = [ + "costs", + "review", + "terminal", + "explorer", + "harness", + "stats", +] as const; /** Base tab types that are always valid */ export type BaseTabType = (typeof RIGHT_SIDEBAR_TABS)[number]; diff --git a/src/browser/utils/commandIds.ts b/src/browser/utils/commandIds.ts index 9b86a083a8..e5dc25562a 100644 --- a/src/browser/utils/commandIds.ts +++ b/src/browser/utils/commandIds.ts @@ -44,6 +44,14 @@ export const CommandIds = { chatInterrupt: () => "chat:interrupt" as const, chatJumpBottom: () => "chat:jumpBottom" as const, chatVoiceInput: () => "chat:voiceInput" as const, + + // Harness commands + harnessRunGates: () => "harness:runGates" as const, + harnessCheckpoint: () => "harness:checkpoint" as const, + harnessResetContext: () => "harness:resetContext" as const, + harnessLoopStart: () => "harness:loop:start" as const, + harnessLoopPause: () => "harness:loop:pause" as const, + harnessLoopStop: () => "harness:loop:stop" as const, chatClearTimingStats: () => "chat:clearTimingStats" as const, // Mode commands diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts index e4380ec324..1c71b84240 100644 --- a/src/browser/utils/commands/sources.ts +++ b/src/browser/utils/commands/sources.ts @@ -90,6 +90,7 @@ export const COMMAND_SECTIONS = { WORKSPACES: "Workspaces", LAYOUTS: "Layouts", NAVIGATION: "Navigation", + HARNESS: "Harness", CHAT: "Chat", MODE: "Modes & Model", HELP: "Help", @@ -102,6 +103,7 @@ const section = { layouts: COMMAND_SECTIONS.LAYOUTS, workspaces: COMMAND_SECTIONS.WORKSPACES, navigation: COMMAND_SECTIONS.NAVIGATION, + harness: COMMAND_SECTIONS.HARNESS, chat: COMMAND_SECTIONS.CHAT, appearance: COMMAND_SECTIONS.APPEARANCE, mode: COMMAND_SECTIONS.MODE, @@ -641,6 +643,75 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi window.dispatchEvent(createCustomEvent(CUSTOM_EVENTS.TOGGLE_VOICE_INPUT)); }, }); + list.push({ + id: CommandIds.harnessRunGates(), + title: "Run harness gates", + subtitle: "Run the workspace harness gate commands", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.harness?.runGates({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); + list.push({ + id: CommandIds.harnessCheckpoint(), + title: "Harness checkpoint", + subtitle: "Commit changes if gates are passing", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.harness?.checkpoint({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); + list.push({ + id: CommandIds.harnessLoopStart(), + title: "Harness loop: Start", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.loop?.start({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); + list.push({ + id: CommandIds.harnessLoopPause(), + title: "Harness loop: Pause", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.loop?.pause({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); + list.push({ + id: CommandIds.harnessLoopStop(), + title: "Harness loop: Stop", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.loop?.stop({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); + list.push({ + id: CommandIds.harnessResetContext(), + title: "Harness reset context", + subtitle: "Replace chat history with a harness bearings summary", + section: section.harness, + run: async () => { + const result = await p.api?.workspace.harness?.resetContext({ workspaceId: id }); + if (result && !result.success) { + console.error(result.error); + } + }, + }); list.push({ id: CommandIds.chatClearTimingStats(), title: "Clear Timing Stats", diff --git a/src/browser/utils/rightSidebarLayout.ts b/src/browser/utils/rightSidebarLayout.ts index 00e5b303a1..f1fb6b56f8 100644 --- a/src/browser/utils/rightSidebarLayout.ts +++ b/src/browser/utils/rightSidebarLayout.ts @@ -58,7 +58,7 @@ export interface RightSidebarLayoutState { export function getDefaultRightSidebarLayoutState(activeTab: TabType): RightSidebarLayoutState { // Default tabs exclude terminal - users add terminals via the "+" button - const baseTabs: TabType[] = ["costs", "review", "explorer"]; + const baseTabs: TabType[] = ["costs", "review", "explorer", "harness"]; const tabs = baseTabs.includes(activeTab) ? baseTabs : [...baseTabs, activeTab]; return { @@ -109,6 +109,12 @@ export function parseRightSidebarLayoutState( if (!layoutContainsTab(raw.root, "explorer")) { injectTabIntoLayout(raw.root, "explorer"); } + + // Migrate: inject "harness" tab if missing from persisted layout + if (!layoutContainsTab(raw.root, "harness")) { + injectTabIntoLayout(raw.root, "harness"); + } + return raw; } diff --git a/src/cli/cli.test.ts b/src/cli/cli.test.ts index 102814c0ff..15d6cc2c96 100644 --- a/src/cli/cli.test.ts +++ b/src/cli/cli.test.ts @@ -82,6 +82,10 @@ async function createTestServer(authToken?: string): Promise { sessionUsageService: services.sessionUsageService, signingService: services.signingService, coderService: services.coderService, + workspaceHarnessService: services.workspaceHarnessService, + gateRunnerService: services.gateRunnerService, + gitCheckpointService: services.gitCheckpointService, + loopRunnerService: services.loopRunnerService, }; // Use the actual createOrpcServer function diff --git a/src/cli/server.test.ts b/src/cli/server.test.ts index 70c670b2cd..9d62d28ef4 100644 --- a/src/cli/server.test.ts +++ b/src/cli/server.test.ts @@ -85,6 +85,10 @@ async function createTestServer(): Promise { sessionUsageService: services.sessionUsageService, signingService: services.signingService, coderService: services.coderService, + workspaceHarnessService: services.workspaceHarnessService, + gateRunnerService: services.gateRunnerService, + gitCheckpointService: services.gitCheckpointService, + loopRunnerService: services.loopRunnerService, }; // Use the actual createOrpcServer function diff --git a/src/cli/server.ts b/src/cli/server.ts index 0d47c99fe4..28e24387cf 100644 --- a/src/cli/server.ts +++ b/src/cli/server.ts @@ -118,6 +118,10 @@ const mockWindow: BrowserWindow = { sessionUsageService: serviceContainer.sessionUsageService, signingService: serviceContainer.signingService, coderService: serviceContainer.coderService, + workspaceHarnessService: serviceContainer.workspaceHarnessService, + gateRunnerService: serviceContainer.gateRunnerService, + gitCheckpointService: serviceContainer.gitCheckpointService, + loopRunnerService: serviceContainer.loopRunnerService, }; const mdnsAdvertiser = new MdnsAdvertiserService(); diff --git a/src/common/orpc/schemas.ts b/src/common/orpc/schemas.ts index ca2e050761..089fb438bf 100644 --- a/src/common/orpc/schemas.ts +++ b/src/common/orpc/schemas.ts @@ -26,6 +26,22 @@ export { } from "./schemas/workspace"; // Workspace stats schemas +// Harness schemas +export { + HarnessChecklistItemSchema, + HarnessChecklistStatusSchema, + HarnessContextResetStrategySchema, + HarnessGateCommandResultSchema, + HarnessGateRunResultSchema, + HarnessGateSchema, + HarnessLoopSettingsSchema, + HarnessLoopStateSchema, + HarnessLoopStatusSchema, + GitCheckpointResultSchema, + WorkspaceHarnessConfigSchema, + WorkspaceHarnessFilePathsSchema, +} from "./schemas/harness"; + export { ActiveStreamStatsSchema, CompletedStreamStatsSchema, diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index a549ef668e..994a6f7210 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -31,6 +31,13 @@ import { AgentDefinitionPackageSchema, AgentIdSchema, } from "./agentDefinition"; +import { + HarnessGateRunResultSchema, + HarnessLoopStateSchema, + GitCheckpointResultSchema, + WorkspaceHarnessConfigSchema, + WorkspaceHarnessFilePathsSchema, +} from "./harness"; import { MCPAddParamsSchema, MCPRemoveParamsSchema, @@ -693,6 +700,91 @@ export const workspace = { output: ResultSchema(z.void(), z.string()), }, }, + + /** Workspace-local harness config + gates */ + harness: { + get: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema( + z + .object({ + config: WorkspaceHarnessConfigSchema, + paths: WorkspaceHarnessFilePathsSchema, + exists: z.boolean(), + lastGateRun: HarnessGateRunResultSchema.nullable(), + lastCheckpoint: GitCheckpointResultSchema.nullable(), + loopState: HarnessLoopStateSchema, + }) + .strict(), + z.string() + ), + }, + set: { + input: z + .object({ + workspaceId: z.string(), + config: WorkspaceHarnessConfigSchema, + }) + .strict(), + output: ResultSchema(WorkspaceHarnessConfigSchema, z.string()), + }, + runGates: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema(HarnessGateRunResultSchema, z.string()), + }, + checkpoint: { + input: z + .object({ + workspaceId: z.string(), + messageTemplate: z.string().optional(), + }) + .strict(), + output: ResultSchema(GitCheckpointResultSchema, z.string()), + }, + /** Replace chat history with a short loop-style bearings message. */ + resetContext: { + input: z + .object({ + workspaceId: z.string(), + note: z.string().optional(), + }) + .strict(), + output: ResultSchema(z.void(), z.string()), + }, + }, + /** Ralph loop runner */ + loop: { + getState: { + input: z.object({ workspaceId: z.string() }), + output: HarnessLoopStateSchema, + }, + start: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema(z.void(), z.string()), + }, + pause: { + input: z + .object({ + workspaceId: z.string(), + reason: z.string().optional(), + }) + .strict(), + output: ResultSchema(z.void(), z.string()), + }, + stop: { + input: z + .object({ + workspaceId: z.string(), + reason: z.string().optional(), + }) + .strict(), + output: ResultSchema(z.void(), z.string()), + }, + subscribe: { + input: z.object({ workspaceId: z.string() }), + output: eventIterator(HarnessLoopStateSchema), + }, + }, }; export type WorkspaceSendMessageOutput = z.infer; diff --git a/src/common/orpc/schemas/harness.ts b/src/common/orpc/schemas/harness.ts new file mode 100644 index 0000000000..ce0fe55543 --- /dev/null +++ b/src/common/orpc/schemas/harness.ts @@ -0,0 +1,110 @@ +import { z } from "zod"; +import { ToolPolicySchema } from "./stream"; + +export const HarnessChecklistStatusSchema = z.enum(["todo", "doing", "done", "blocked"]); + +export const HarnessChecklistItemSchema = z + .object({ + id: z.string().min(1), + title: z.string().min(1), + status: HarnessChecklistStatusSchema, + notes: z.string().optional(), + }) + .strict(); + +export const HarnessGateSchema = z + .object({ + id: z.string().min(1).optional(), + title: z.string().min(1).optional(), + command: z.string().min(1), + timeoutSecs: z.number().int().positive().optional(), + }) + .strict(); + +export const HarnessContextResetStrategySchema = z.enum(["replace_history", "none"]); + +export const HarnessLoopSettingsSchema = z + .object({ + /** Hard cap on iterations for a single run. */ + maxIterations: z.number().int().positive().optional(), + /** Hard cap on wall-clock time for a single run. */ + maxWallTimeMins: z.number().int().positive().optional(), + /** Pause when gates fail this many times in a row. */ + maxConsecutiveFailures: z.number().int().positive().optional(), + /** How to reset context between iterations. */ + contextReset: HarnessContextResetStrategySchema.optional(), + /** When true, auto-commit after gates pass. */ + autoCommit: z.boolean().optional(), + /** Commit message template (supports simple placeholders like {{item}}). */ + commitMessageTemplate: z.string().optional(), + /** Optional tool policy overrides for loop iterations. */ + toolPolicy: ToolPolicySchema.optional(), + }) + .strict(); + +export const WorkspaceHarnessConfigSchema = z + .object({ + version: z.literal(1), + checklist: z.array(HarnessChecklistItemSchema), + gates: z.array(HarnessGateSchema), + loop: HarnessLoopSettingsSchema.optional(), + }) + .strict(); + +export const WorkspaceHarnessFilePathsSchema = z + .object({ + configPath: z.string(), + progressPath: z.string(), + }) + .strict(); + +export const HarnessGateCommandResultSchema = z + .object({ + command: z.string(), + exitCode: z.number(), + durationMs: z.number(), + stdout: z.string(), + stderr: z.string(), + truncatedStdout: z.boolean().optional(), + truncatedStderr: z.boolean().optional(), + }) + .strict(); + +export const HarnessGateRunResultSchema = z + .object({ + ok: z.boolean(), + startedAt: z.number(), + finishedAt: z.number(), + totalDurationMs: z.number(), + results: z.array(HarnessGateCommandResultSchema), + }) + .strict(); + +export const GitCheckpointResultSchema = z + .object({ + committed: z.boolean(), + dirtyBefore: z.boolean(), + dirtyAfter: z.boolean(), + commitSha: z.string().nullable(), + commitMessage: z.string().nullable(), + stdout: z.string().optional(), + stderr: z.string().optional(), + }) + .strict(); + +export const HarnessLoopStatusSchema = z.enum(["stopped", "running", "paused"]); + +export const HarnessLoopStateSchema = z + .object({ + status: HarnessLoopStatusSchema, + startedAt: z.number().nullable(), + iteration: z.number(), + consecutiveFailures: z.number(), + currentItemId: z.string().nullable(), + currentItemTitle: z.string().nullable(), + lastGateRun: HarnessGateRunResultSchema.nullable(), + lastCheckpoint: GitCheckpointResultSchema.nullable(), + lastError: z.string().nullable(), + stoppedReason: z.string().nullable(), + }) + .strict(); diff --git a/src/common/types/harness.ts b/src/common/types/harness.ts new file mode 100644 index 0000000000..ff0d770c7a --- /dev/null +++ b/src/common/types/harness.ts @@ -0,0 +1,26 @@ +import type { z } from "zod"; +import type { + HarnessChecklistItemSchema, + HarnessChecklistStatusSchema, + HarnessContextResetStrategySchema, + HarnessGateRunResultSchema, + HarnessGateSchema, + HarnessLoopSettingsSchema, + HarnessLoopStateSchema, + HarnessLoopStatusSchema, + GitCheckpointResultSchema, + WorkspaceHarnessConfigSchema, + WorkspaceHarnessFilePathsSchema, +} from "@/common/orpc/schemas"; + +export type HarnessChecklistStatus = z.infer; +export type HarnessChecklistItem = z.infer; +export type HarnessGate = z.infer; +export type HarnessContextResetStrategy = z.infer; +export type HarnessLoopSettings = z.infer; +export type WorkspaceHarnessConfig = z.infer; +export type WorkspaceHarnessFilePaths = z.infer; +export type HarnessGateRunResult = z.infer; +export type GitCheckpointResult = z.infer; +export type HarnessLoopStatus = z.infer; +export type HarnessLoopState = z.infer; diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 0bb339c759..8a1d1a9487 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -266,6 +266,16 @@ export type MuxFrontendMetadata = MuxFrontendMetadataBase & type: "plan-display"; // Ephemeral plan display from /plan command path: string; } + | { + type: "harness-bearings"; + } + | { + type: "harness-loop"; + iteration?: number; + } + | { + type: "harness-loop-bearings"; + } | { type: "normal"; // Regular messages } diff --git a/src/desktop/main.ts b/src/desktop/main.ts index c06b3c9abf..438db41b6f 100644 --- a/src/desktop/main.ts +++ b/src/desktop/main.ts @@ -378,6 +378,10 @@ async function loadServices(): Promise { sessionUsageService: services.sessionUsageService, signingService: services.signingService, coderService: services.coderService, + workspaceHarnessService: services.workspaceHarnessService, + gateRunnerService: services.gateRunnerService, + gitCheckpointService: services.gitCheckpointService, + loopRunnerService: services.loopRunnerService, }; electronIpcMain.handle("mux:get-is-rosetta", async () => { diff --git a/src/node/orpc/context.ts b/src/node/orpc/context.ts index 3cd5493476..1a2798240b 100644 --- a/src/node/orpc/context.ts +++ b/src/node/orpc/context.ts @@ -2,6 +2,10 @@ import type { IncomingHttpHeaders } from "http"; import type { Config } from "@/node/config"; import type { AIService } from "@/node/services/aiService"; import type { ProjectService } from "@/node/services/projectService"; +import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; +import type { GateRunnerService } from "@/node/services/gateRunnerService"; +import type { GitCheckpointService } from "@/node/services/gitCheckpointService"; +import type { LoopRunnerService } from "@/node/services/loopRunnerService"; import type { WorkspaceService } from "@/node/services/workspaceService"; import type { MuxGatewayOauthService } from "@/node/services/muxGatewayOauthService"; import type { ProviderService } from "@/node/services/providerService"; @@ -29,6 +33,10 @@ export interface ORPCContext { config: Config; aiService: AIService; projectService: ProjectService; + workspaceHarnessService: WorkspaceHarnessService; + gateRunnerService: GateRunnerService; + gitCheckpointService: GitCheckpointService; + loopRunnerService: LoopRunnerService; workspaceService: WorkspaceService; taskService: TaskService; providerService: ProviderService; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index db6f11c839..270920dc44 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -18,6 +18,7 @@ import { createAsyncMessageQueue } from "@/common/utils/asyncMessageQueue"; import { createRuntime, checkRuntimeAvailability } from "@/node/runtime/runtimeFactory"; import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers"; import { readPlanFile } from "@/node/utils/runtime/helpers"; +import { createMuxMessage } from "@/common/types/message"; import { secretsToRecord } from "@/common/types/secrets"; import { roundToBase2 } from "@/common/telemetry/utils"; import { createAsyncEventQueue } from "@/common/utils/asyncEventIterator"; @@ -1693,6 +1694,260 @@ export const router = (authToken?: string) => { } }), }, + harness: { + get: t + .input(schemas.workspace.harness.get.input) + .output(schemas.workspace.harness.get.output) + .handler(async ({ context, input }) => { + try { + const harness = await context.workspaceHarnessService.getHarnessForWorkspace( + input.workspaceId + ); + const [lastGateRun, lastCheckpoint, loopState] = await Promise.all([ + context.gateRunnerService.getLastGateRun(input.workspaceId), + context.gitCheckpointService.getLastCheckpoint(input.workspaceId), + context.loopRunnerService.getState(input.workspaceId), + ]); + + return { + success: true, + data: { + config: harness.config, + paths: harness.paths, + exists: harness.exists, + lastGateRun, + lastCheckpoint, + loopState, + }, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + set: t + .input(schemas.workspace.harness.set.input) + .output(schemas.workspace.harness.set.output) + .handler(async ({ context, input }) => { + try { + const loopState = await context.loopRunnerService.getState(input.workspaceId); + const normalized = await context.workspaceHarnessService.setHarnessForWorkspace( + input.workspaceId, + input.config, + { loopState } + ); + return { success: true, data: normalized }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + runGates: t + .input(schemas.workspace.harness.runGates.input) + .output(schemas.workspace.harness.runGates.output) + .handler(async ({ context, input }) => { + const result = await context.gateRunnerService.runGates(input.workspaceId); + if (!result.success) { + return { success: false, error: result.error }; + } + return { success: true, data: result.data }; + }), + checkpoint: t + .input(schemas.workspace.harness.checkpoint.input) + .output(schemas.workspace.harness.checkpoint.output) + .handler(async ({ context, input }) => { + try { + const harness = await context.workspaceHarnessService.getHarnessForWorkspace( + input.workspaceId + ); + const loopState = await context.loopRunnerService.getState(input.workspaceId); + + const template = + input.messageTemplate ?? + harness.config.loop?.commitMessageTemplate ?? + "mux(harness): {{item}}"; + + const result = await context.gitCheckpointService.checkpoint(input.workspaceId, { + messageTemplate: template, + itemTitle: loopState.currentItemTitle ?? "checkpoint", + iteration: loopState.iteration, + }); + + if (!result.success) { + return { success: false, error: result.error }; + } + + return { success: true, data: result.data }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + resetContext: t + .input(schemas.workspace.harness.resetContext.input) + .output(schemas.workspace.harness.resetContext.output) + .handler(async ({ context, input }) => { + try { + const [harness, loopState, lastGateRun, lastCheckpoint, workspaceInfo] = + await Promise.all([ + context.workspaceHarnessService.getHarnessForWorkspace(input.workspaceId), + context.loopRunnerService.getState(input.workspaceId), + context.gateRunnerService.getLastGateRun(input.workspaceId), + context.gitCheckpointService.getLastCheckpoint(input.workspaceId), + context.workspaceService.getInfo(input.workspaceId), + ]); + + const workspaceName = workspaceInfo?.name ?? input.workspaceId; + const configPathHint = `.mux/${workspaceName}.harness.jsonc`; + const progressPathHint = `.mux/${workspaceName}.harness.progress.md`; + + const lines: string[] = []; + lines.push("# Harness bearings"); + lines.push(""); + lines.push(`- Loop status: ${loopState.status}`); + lines.push(`- Iteration: ${loopState.iteration}`); + if (loopState.currentItemTitle) { + lines.push(`- Current item: ${loopState.currentItemTitle}`); + } + if (lastGateRun) { + lines.push(`- Last gates: ${lastGateRun.ok ? "PASS" : "FAIL"}`); + } + if (lastCheckpoint?.commitSha) { + lines.push(`- Last commit: ${lastCheckpoint.commitSha}`); + } + if (input.note) { + lines.push(`- Note: ${input.note}`); + } + lines.push(""); + lines.push("Harness files:"); + lines.push(`- ${progressPathHint}`); + lines.push(`- ${configPathHint}`); + lines.push(""); + lines.push("Checklist:"); + if (harness.config.checklist.length === 0) { + lines.push("(no checklist items)"); + } else { + for (const item of harness.config.checklist) { + const marker = + item.status === "done" + ? "[x]" + : item.status === "doing" + ? "[~]" + : item.status === "blocked" + ? "[!]" + : "[ ]"; + lines.push(`- ${marker} ${item.title}`); + } + } + + const summary = lines.join("\n"); + + const summaryMessage = createMuxMessage( + `harness-reset-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`, + "assistant", + summary, + { + timestamp: Date.now(), + compacted: "user", + mode: "exec", + muxMetadata: { type: "harness-bearings" }, + } + ); + + const replaceResult = await context.workspaceService.replaceHistory( + input.workspaceId, + summaryMessage + ); + if (!replaceResult.success) { + return { success: false, error: replaceResult.error }; + } + + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + }, + loop: { + getState: t + .input(schemas.workspace.loop.getState.input) + .output(schemas.workspace.loop.getState.output) + .handler(async ({ context, input }) => { + return context.loopRunnerService.getState(input.workspaceId); + }), + start: t + .input(schemas.workspace.loop.start.input) + .output(schemas.workspace.loop.start.output) + .handler(async ({ context, input }) => { + try { + const result = await context.loopRunnerService.start(input.workspaceId); + if (!result.success) { + return { success: false, error: result.error }; + } + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + pause: t + .input(schemas.workspace.loop.pause.input) + .output(schemas.workspace.loop.pause.output) + .handler(async ({ context, input }) => { + try { + const result = await context.loopRunnerService.pause(input.workspaceId, input.reason); + if (!result.success) { + return { success: false, error: result.error }; + } + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + stop: t + .input(schemas.workspace.loop.stop.input) + .output(schemas.workspace.loop.stop.output) + .handler(async ({ context, input }) => { + try { + const result = await context.loopRunnerService.stop(input.workspaceId, input.reason); + if (!result.success) { + return { success: false, error: result.error }; + } + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), + subscribe: t + .input(schemas.workspace.loop.subscribe.input) + .output(schemas.workspace.loop.subscribe.output) + .handler(async function* ({ context, input }) { + const { workspaceId } = input; + const service = context.loopRunnerService; + + const queue = createAsyncEventQueue>>(); + + const onChange = (changedWorkspaceId: string) => { + if (changedWorkspaceId !== workspaceId) { + return; + } + void service.getState(workspaceId).then(queue.push); + }; + + service.on("change", onChange); + + try { + queue.push(await service.getState(workspaceId)); + yield* queue.iterate(); + } finally { + queue.end(); + service.off("change", onChange); + } + }), + }, }, tasks: { create: t diff --git a/src/node/services/gateRunnerService.ts b/src/node/services/gateRunnerService.ts new file mode 100644 index 0000000000..1673cc92e5 --- /dev/null +++ b/src/node/services/gateRunnerService.ts @@ -0,0 +1,168 @@ +import * as fsPromises from "fs/promises"; +import * as path from "path"; + +import assert from "@/common/utils/assert"; +import { Ok, Err, type Result } from "@/common/types/result"; +import type { HarnessGate, HarnessGateRunResult } from "@/common/types/harness"; +import { HarnessGateRunResultSchema } from "@/common/orpc/schemas"; +import type { Config } from "@/node/config"; +import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; +import { execBuffered } from "@/node/utils/runtime/helpers"; +import { log } from "@/node/services/log"; + +const LAST_GATES_FILENAME = "harness-last-gates.json"; + +// Keep logs reasonably small for IPC and persisted state. This is only for UI display. +const MAX_OUTPUT_CHARS = 100_000; + +function truncateOutput(value: string): { output: string; truncated: boolean } { + if (value.length <= MAX_OUTPUT_CHARS) { + return { output: value, truncated: false }; + } + return { output: value.slice(-MAX_OUTPUT_CHARS), truncated: true }; +} + +export class GateRunnerService { + constructor( + private readonly config: Config, + private readonly workspaceHarnessService: WorkspaceHarnessService + ) { + assert(config, "GateRunnerService requires a Config instance"); + assert( + workspaceHarnessService, + "GateRunnerService requires a WorkspaceHarnessService instance" + ); + } + + private getLastGatesPath(workspaceId: string): string { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmed = workspaceId.trim(); + assert(trimmed.length > 0, "workspaceId must not be empty"); + return path.join(this.config.sessionsDir, trimmed, LAST_GATES_FILENAME); + } + + async getLastGateRun(workspaceId: string): Promise { + const filePath = this.getLastGatesPath(workspaceId); + + try { + const raw = await fsPromises.readFile(filePath, "utf-8"); + const parsed: unknown = JSON.parse(raw) as unknown; + const result = HarnessGateRunResultSchema.safeParse(parsed); + return result.success ? result.data : null; + } catch { + return null; + } + } + + private async persistLastGateRun( + workspaceId: string, + result: HarnessGateRunResult + ): Promise { + const filePath = this.getLastGatesPath(workspaceId); + const dir = path.dirname(filePath); + + try { + await fsPromises.mkdir(dir, { recursive: true }); + await fsPromises.writeFile(filePath, JSON.stringify(result, null, 2) + "\n", "utf-8"); + } catch (error) { + log.debug("[HARNESS] Failed to persist last gate run", { workspaceId, error }); + } + } + + async runGates( + workspaceId: string, + gatesOverride?: HarnessGate[] + ): Promise> { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + + const gates = + gatesOverride ?? + (await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId)).config.gates; + + const startedAt = Date.now(); + const results: HarnessGateRunResult["results"] = []; + + if (gates.length === 0) { + const finishedAt = Date.now(); + const run: HarnessGateRunResult = { + ok: true, + startedAt, + finishedAt, + totalDurationMs: finishedAt - startedAt, + results: [], + }; + await this.persistLastGateRun(workspaceId, run); + return Ok(run); + } + + const { runtime, workspacePath } = + await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId); + + const readyResult = await runtime.ensureReady(); + if (!readyResult.ready) { + const msg = readyResult.error ?? "Runtime not ready"; + return Err(msg); + } + + let ok = true; + + for (const gate of gates) { + const timeout = gate.timeoutSecs ?? 10 * 60; + + try { + const execResult = await execBuffered(runtime, gate.command, { + cwd: workspacePath, + timeout, + }); + + const stdout = truncateOutput(execResult.stdout); + const stderr = truncateOutput(execResult.stderr); + + results.push({ + command: gate.command, + exitCode: execResult.exitCode, + durationMs: execResult.duration, + stdout: stdout.output, + stderr: stderr.output, + truncatedStdout: stdout.truncated || undefined, + truncatedStderr: stderr.truncated || undefined, + }); + + if (execResult.exitCode !== 0) { + ok = false; + } + } catch (error) { + ok = false; + const message = error instanceof Error ? error.message : String(error); + + results.push({ + command: gate.command, + exitCode: 1, + durationMs: 0, + stdout: "", + stderr: message, + }); + } + + if (!ok) { + // Stop at the first failure to keep iterations tight (Ralph-style backpressure). + break; + } + } + + const finishedAt = Date.now(); + + const run: HarnessGateRunResult = { + ok, + startedAt, + finishedAt, + totalDurationMs: finishedAt - startedAt, + results, + }; + + await this.persistLastGateRun(workspaceId, run); + await this.workspaceHarnessService.updateProgressFile(workspaceId); + + return Ok(run); + } +} diff --git a/src/node/services/gitCheckpointService.ts b/src/node/services/gitCheckpointService.ts new file mode 100644 index 0000000000..813acc4f85 --- /dev/null +++ b/src/node/services/gitCheckpointService.ts @@ -0,0 +1,169 @@ +import * as fsPromises from "fs/promises"; +import * as path from "path"; + +import assert from "@/common/utils/assert"; +import { shellQuote } from "@/common/utils/shell"; +import { Ok, Err, type Result } from "@/common/types/result"; +import type { GitCheckpointResult } from "@/common/types/harness"; +import { GitCheckpointResultSchema } from "@/common/orpc/schemas"; +import type { Config } from "@/node/config"; +import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; +import { execBuffered } from "@/node/utils/runtime/helpers"; +import { log } from "@/node/services/log"; + +const LAST_CHECKPOINT_FILENAME = "harness-last-checkpoint.json"; + +// Keep stdout/stderr small enough to store in session state. +const MAX_LOG_CHARS = 50_000; + +function truncateLog(value: string): string { + if (value.length <= MAX_LOG_CHARS) return value; + return value.slice(-MAX_LOG_CHARS); +} + +function renderTemplate(template: string, vars: Record): string { + return template.replace(/\{\{\s*([^}]+?)\s*\}\}/g, (_m, keyRaw: string) => { + const key = keyRaw.trim(); + return vars[key] ?? ""; + }); +} + +export class GitCheckpointService { + constructor( + private readonly config: Config, + private readonly workspaceHarnessService: WorkspaceHarnessService + ) { + assert(config, "GitCheckpointService requires a Config instance"); + assert( + workspaceHarnessService, + "GitCheckpointService requires a WorkspaceHarnessService instance" + ); + } + + private getLastCheckpointPath(workspaceId: string): string { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmed = workspaceId.trim(); + assert(trimmed.length > 0, "workspaceId must not be empty"); + return path.join(this.config.sessionsDir, trimmed, LAST_CHECKPOINT_FILENAME); + } + + async getLastCheckpoint(workspaceId: string): Promise { + const filePath = this.getLastCheckpointPath(workspaceId); + + try { + const raw = await fsPromises.readFile(filePath, "utf-8"); + const parsed: unknown = JSON.parse(raw) as unknown; + const result = GitCheckpointResultSchema.safeParse(parsed); + return result.success ? result.data : null; + } catch { + return null; + } + } + + private async persistLastCheckpoint( + workspaceId: string, + result: GitCheckpointResult + ): Promise { + const filePath = this.getLastCheckpointPath(workspaceId); + const dir = path.dirname(filePath); + + try { + await fsPromises.mkdir(dir, { recursive: true }); + await fsPromises.writeFile(filePath, JSON.stringify(result, null, 2) + "\n", "utf-8"); + } catch (error) { + log.debug("[HARNESS] Failed to persist last checkpoint", { workspaceId, error }); + } + } + + async checkpoint( + workspaceId: string, + options: { messageTemplate: string; itemTitle?: string; iteration?: number } + ): Promise> { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + assert(options && typeof options === "object", "options is required"); + assert(typeof options.messageTemplate === "string", "messageTemplate must be a string"); + + const { runtime, workspacePath } = + await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId); + + const readyResult = await runtime.ensureReady(); + if (!readyResult.ready) { + const msg = readyResult.error ?? "Runtime not ready"; + return Err(msg); + } + + const statusBefore = await execBuffered(runtime, "git status --porcelain", { + cwd: workspacePath, + timeout: 30, + }); + + const dirtyBefore = statusBefore.exitCode === 0 && statusBefore.stdout.trim().length > 0; + + if (!dirtyBefore) { + const res: GitCheckpointResult = { + committed: false, + dirtyBefore: false, + dirtyAfter: false, + commitSha: null, + commitMessage: null, + }; + await this.persistLastCheckpoint(workspaceId, res); + return Ok(res); + } + + const messageRaw = renderTemplate(options.messageTemplate, { + item: options.itemTitle ?? "(no item)", + iteration: options.iteration !== undefined ? String(options.iteration) : "", + workspaceId, + }).trim(); + + const message = messageRaw.length > 0 ? messageRaw : "mux(harness): checkpoint"; + + const addResult = await execBuffered(runtime, "git add -A", { + cwd: workspacePath, + timeout: 60, + }); + + if (addResult.exitCode !== 0) { + return Err(truncateLog(addResult.stderr || addResult.stdout || "git add -A failed")); + } + + // Use shellQuote to keep the commit message stable across runtimes. + const commitResult = await execBuffered(runtime, `git commit -m ${shellQuote(message)}`, { + cwd: workspacePath, + timeout: 120, + }); + + if (commitResult.exitCode !== 0) { + return Err(truncateLog(commitResult.stderr || commitResult.stdout || "git commit failed")); + } + + const shaResult = await execBuffered(runtime, "git rev-parse HEAD", { + cwd: workspacePath, + timeout: 30, + }); + + const commitSha = shaResult.exitCode === 0 ? shaResult.stdout.trim() : ""; + + const statusAfter = await execBuffered(runtime, "git status --porcelain", { + cwd: workspacePath, + timeout: 30, + }); + + const dirtyAfter = statusAfter.exitCode === 0 && statusAfter.stdout.trim().length > 0; + + const res: GitCheckpointResult = { + committed: true, + dirtyBefore, + dirtyAfter, + commitSha: commitSha.length > 0 ? commitSha : null, + commitMessage: message, + stdout: truncateLog(commitResult.stdout).trim() || undefined, + stderr: truncateLog(commitResult.stderr).trim() || undefined, + }; + + await this.persistLastCheckpoint(workspaceId, res); + + return Ok(res); + } +} diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts new file mode 100644 index 0000000000..7c8d5bb7e3 --- /dev/null +++ b/src/node/services/loopRunnerService.ts @@ -0,0 +1,570 @@ +import { EventEmitter } from "events"; +import * as fsPromises from "fs/promises"; +import * as path from "path"; + +import assert from "@/common/utils/assert"; +import { Ok, type Result } from "@/common/types/result"; +import type { + HarnessChecklistItem, + HarnessGateRunResult, + HarnessLoopState, +} from "@/common/types/harness"; +import { HarnessLoopStateSchema } from "@/common/orpc/schemas"; +import { createMuxMessage } from "@/common/types/message"; +import { defaultModel } from "@/common/utils/ai/models"; +import type { WorkspaceService } from "@/node/services/workspaceService"; +import type { AIService } from "@/node/services/aiService"; +import type { Config } from "@/node/config"; +import { log } from "@/node/services/log"; +import { MutexMap } from "@/node/utils/concurrency/mutexMap"; +import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; +import type { GateRunnerService } from "@/node/services/gateRunnerService"; +import type { GitCheckpointService } from "@/node/services/gitCheckpointService"; +import { execBuffered } from "@/node/utils/runtime/helpers"; + +const LOOP_STATE_FILENAME = "harness-loop.json"; + +const DEFAULT_STATE: HarnessLoopState = { + status: "stopped", + startedAt: null, + iteration: 0, + consecutiveFailures: 0, + currentItemId: null, + currentItemTitle: null, + lastGateRun: null, + lastCheckpoint: null, + lastError: null, + stoppedReason: null, +}; + +function coerceNonEmptyString(value: unknown): string | null { + if (typeof value !== "string") return null; + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; +} + +function findNextChecklistItem(config: { + checklist: HarnessChecklistItem[]; +}): HarnessChecklistItem | null { + const doing = config.checklist.find((item) => item.status === "doing"); + if (doing) return doing; + + const todo = config.checklist.find((item) => item.status === "todo"); + if (todo) return todo; + + return null; +} + +function hasUnfinishedChecklistItems(config: { checklist: HarnessChecklistItem[] }): boolean { + return config.checklist.some((item) => item.status !== "done"); +} + +function buildIterationPrompt(params: { + iteration: number; + itemTitle: string; + configPathHint: string; + progressPathHint: string; +}): string { + const lines: string[] = []; + lines.push(`Ralph loop iteration ${params.iteration}`); + lines.push(""); + lines.push(`Work on: ${params.itemTitle}`); + lines.push(""); + lines.push("Rules:"); + lines.push("- Make a small, mergeable change."); + lines.push("- Run the configured gates (see harness config) before stopping."); + lines.push("- Do NOT start the next checklist item."); + lines.push(""); + lines.push("Harness files:"); + lines.push(`- ${params.progressPathHint}`); + lines.push(`- ${params.configPathHint}`); + return lines.join("\n"); +} + +function renderLoopSummaryMarkdown(params: { + workspaceId: string; + iteration: number; + currentItemTitle: string | null; + configPathHint: string; + progressPathHint: string; + checklist: HarnessChecklistItem[]; + lastGateRun: HarnessGateRunResult | null; + lastCommitSha: string | null; + note?: string; +}): string { + const lines: string[] = []; + + lines.push("# Ralph loop bearings"); + lines.push(""); + lines.push(`- Workspace: ${params.workspaceId}`); + lines.push(`- Iteration: ${params.iteration}`); + if (params.currentItemTitle) { + lines.push(`- Current item: ${params.currentItemTitle}`); + } + if (params.lastGateRun) { + lines.push( + `- Gates: ${params.lastGateRun.ok ? "PASS" : "FAIL"} (${Math.round( + params.lastGateRun.totalDurationMs / 1000 + )}s)` + ); + } + if (params.lastCommitSha) { + lines.push(`- Last commit: ${params.lastCommitSha}`); + } + if (params.note) { + lines.push(`- Note: ${params.note}`); + } + lines.push(""); + + lines.push("Harness files:"); + lines.push(`- ${params.progressPathHint}`); + lines.push(`- ${params.configPathHint}`); + lines.push(""); + + lines.push("Checklist:"); + if (params.checklist.length === 0) { + lines.push("(no checklist items)"); + } else { + for (const item of params.checklist) { + const marker = + item.status === "done" + ? "[x]" + : item.status === "doing" + ? "[~]" + : item.status === "blocked" + ? "[!]" + : "[ ]"; + lines.push(`- ${marker} ${item.title}`); + } + } + + lines.push(""); + lines.push("Continue with one small step, then run gates and stop."); + + return lines.join("\n"); +} + +export class LoopRunnerService extends EventEmitter { + private readonly locks = new MutexMap(); + private readonly states = new Map(); + private readonly controllers = new Map(); + + constructor( + private readonly config: Config, + private readonly workspaceService: WorkspaceService, + private readonly aiService: AIService, + private readonly workspaceHarnessService: WorkspaceHarnessService, + private readonly gateRunnerService: GateRunnerService, + private readonly gitCheckpointService: GitCheckpointService + ) { + super(); + assert(config, "LoopRunnerService requires a Config instance"); + assert(workspaceService, "LoopRunnerService requires a WorkspaceService instance"); + assert(aiService, "LoopRunnerService requires an AIService instance"); + assert( + workspaceHarnessService, + "LoopRunnerService requires a WorkspaceHarnessService instance" + ); + assert(gateRunnerService, "LoopRunnerService requires a GateRunnerService instance"); + assert(gitCheckpointService, "LoopRunnerService requires a GitCheckpointService instance"); + } + + private getStatePath(workspaceId: string): string { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmed = workspaceId.trim(); + assert(trimmed.length > 0, "workspaceId must not be empty"); + return path.join(this.config.sessionsDir, trimmed, LOOP_STATE_FILENAME); + } + + private async persistState(workspaceId: string, state: HarnessLoopState): Promise { + const filePath = this.getStatePath(workspaceId); + const dir = path.dirname(filePath); + + try { + await fsPromises.mkdir(dir, { recursive: true }); + await fsPromises.writeFile(filePath, JSON.stringify(state, null, 2) + "\n", "utf-8"); + } catch (error) { + log.debug("[HARNESS] Failed to persist loop state", { workspaceId, error }); + } + + // Best-effort: keep progress file in sync, but never block loop control on remote IO. + void this.workspaceHarnessService + .updateProgressFile(workspaceId, state) + .catch((error: unknown) => { + log.debug("[HARNESS] Failed to update progress file", { workspaceId, error }); + }); + + this.emit("change", workspaceId); + } + + private async loadStateFromDisk(workspaceId: string): Promise { + const filePath = this.getStatePath(workspaceId); + + try { + const raw = await fsPromises.readFile(filePath, "utf-8"); + const parsed: unknown = JSON.parse(raw) as unknown; + const result = HarnessLoopStateSchema.safeParse(parsed); + if (!result.success) { + return { ...DEFAULT_STATE }; + } + + // If mux restarts mid-loop, force manual resume. + if (result.data.status === "running") { + return { + ...result.data, + status: "paused", + stoppedReason: result.data.stoppedReason ?? "Mux restarted; resume manually", + }; + } + + return result.data; + } catch { + return { ...DEFAULT_STATE }; + } + } + + private async getStateUnlocked(workspaceId: string): Promise { + const cached = this.states.get(workspaceId); + if (cached) { + return cached; + } + + const loaded = await this.loadStateFromDisk(workspaceId); + this.states.set(workspaceId, loaded); + return loaded; + } + + async getState(workspaceId: string): Promise { + return this.locks.withLock(workspaceId, () => this.getStateUnlocked(workspaceId)); + } + + async start(workspaceId: string): Promise> { + return this.locks.withLock(workspaceId, async () => { + const prev = await this.getStateUnlocked(workspaceId); + if (prev.status === "running") { + return Ok(undefined); + } + + const next: HarnessLoopState = { + ...prev, + status: "running", + startedAt: prev.status === "paused" ? (prev.startedAt ?? Date.now()) : Date.now(), + iteration: prev.status === "paused" ? prev.iteration : 0, + consecutiveFailures: prev.status === "paused" ? prev.consecutiveFailures : 0, + stoppedReason: null, + lastError: null, + }; + + this.states.set(workspaceId, next); + await this.persistState(workspaceId, next); + + this.startRunner(workspaceId); + + return Ok(undefined); + }); + } + + async pause(workspaceId: string, reason?: string): Promise> { + return this.locks.withLock(workspaceId, async () => { + const prev = await this.getStateUnlocked(workspaceId); + if (prev.status !== "running") { + return Ok(undefined); + } + + const next: HarnessLoopState = { + ...prev, + status: "paused", + stoppedReason: coerceNonEmptyString(reason) ?? prev.stoppedReason, + }; + + this.states.set(workspaceId, next); + await this.persistState(workspaceId, next); + + // Best-effort: stop any in-flight stream. + void this.aiService.stopStream(workspaceId, { soft: true }); + + const controller = this.controllers.get(workspaceId); + controller?.abort(); + + return Ok(undefined); + }); + } + + async stop(workspaceId: string, reason?: string): Promise> { + return this.locks.withLock(workspaceId, async () => { + const prev = await this.getStateUnlocked(workspaceId); + + const next: HarnessLoopState = { + ...prev, + status: "stopped", + startedAt: null, + currentItemId: null, + currentItemTitle: null, + consecutiveFailures: 0, + stoppedReason: coerceNonEmptyString(reason) ?? prev.stoppedReason, + }; + + this.states.set(workspaceId, next); + await this.persistState(workspaceId, next); + + void this.aiService.stopStream(workspaceId, { soft: true }); + + const controller = this.controllers.get(workspaceId); + controller?.abort(); + this.controllers.delete(workspaceId); + + return Ok(undefined); + }); + } + + private startRunner(workspaceId: string): void { + const existing = this.controllers.get(workspaceId); + existing?.abort(); + + const abortController = new AbortController(); + this.controllers.set(workspaceId, abortController); + + void this.runLoop(workspaceId, abortController.signal) + .catch((error: unknown) => { + log.error("[HARNESS] Loop runner crashed", { workspaceId, error }); + }) + .finally(() => { + const current = this.controllers.get(workspaceId); + if (current === abortController) { + this.controllers.delete(workspaceId); + } + }); + } + + private async isGitDirty(workspaceId: string): Promise { + try { + const { runtime, workspacePath } = + await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId); + + const ready = await runtime.ensureReady(); + if (!ready.ready) { + return false; + } + + const status = await execBuffered(runtime, "git status --porcelain", { + cwd: workspacePath, + timeout: 30, + }); + + return status.exitCode === 0 && status.stdout.trim().length > 0; + } catch { + return false; + } + } + + private async runLoop(workspaceId: string, signal: AbortSignal): Promise { + while (!signal.aborted) { + const state = await this.getState(workspaceId); + if (state.status !== "running") { + return; + } + + const harness = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId); + const config = harness.config; + const loop = config.loop; + + const maxIterations = loop?.maxIterations ?? 50; + const maxWallTimeMins = loop?.maxWallTimeMins ?? 8 * 60; + const maxConsecutiveFailures = loop?.maxConsecutiveFailures ?? 3; + const contextReset = loop?.contextReset ?? "replace_history"; + const autoCommit = loop?.autoCommit ?? true; + const commitMessageTemplate = loop?.commitMessageTemplate ?? "mux(harness): {{item}}"; + const toolPolicy = loop?.toolPolicy; + + if (state.iteration >= maxIterations) { + await this.pause(workspaceId, `Max iterations reached (${maxIterations})`); + return; + } + + if (state.startedAt) { + const elapsedMins = (Date.now() - state.startedAt) / 1000 / 60; + if (elapsedMins >= maxWallTimeMins) { + await this.pause(workspaceId, `Max wall time reached (${maxWallTimeMins} mins)`); + return; + } + } + + const info = await this.workspaceService.getInfo(workspaceId); + if (!info) { + await this.pause(workspaceId, "Workspace not found"); + return; + } + + const configPathHint = `.mux/${info.name}.harness.jsonc`; + const progressPathHint = `.mux/${info.name}.harness.progress.md`; + + const modelString = + info.aiSettingsByMode?.exec?.model ?? info.aiSettings?.model ?? defaultModel; + const thinkingLevel = + info.aiSettingsByMode?.exec?.thinkingLevel ?? info.aiSettings?.thinkingLevel; + + const blocked = config.checklist.find((item) => item.status === "blocked") ?? null; + const nextItem = findNextChecklistItem(config); + + const isFinalCleanup = nextItem === null; + if (isFinalCleanup && blocked) { + await this.pause(workspaceId, `Checklist blocked: ${blocked.title}`); + return; + } + + const itemTitle = nextItem?.title ?? "Final cleanup (gates + git clean)"; + const prompt = buildIterationPrompt({ + iteration: state.iteration, + itemTitle, + configPathHint, + progressPathHint, + }); + + const updatedStateBeforeSend: HarnessLoopState = { + ...state, + currentItemId: nextItem?.id ?? null, + currentItemTitle: itemTitle, + }; + + this.states.set(workspaceId, updatedStateBeforeSend); + await this.persistState(workspaceId, updatedStateBeforeSend); + + // If this is a checklist item, mark it doing before we start. + if (nextItem?.status === "todo") { + await this.workspaceHarnessService.setHarnessForWorkspace( + workspaceId, + { + ...config, + checklist: config.checklist.map((item) => + item.id === nextItem.id ? { ...item, status: "doing" as const } : item + ), + }, + { loopState: updatedStateBeforeSend } + ); + } + + const sendResult = await this.workspaceService.sendMessage(workspaceId, prompt, { + model: modelString, + thinkingLevel, + mode: "exec", + toolPolicy, + muxMetadata: { type: "harness-loop", iteration: updatedStateBeforeSend.iteration }, + }); + + if (!sendResult.success) { + await this.pause(workspaceId, `sendMessage failed: ${sendResult.error.type}`); + return; + } + + if (signal.aborted) { + return; + } + + // Run gates (stop on first failure). + const gatesResult = await this.gateRunnerService.runGates(workspaceId, config.gates); + if (!gatesResult.success) { + await this.pause(workspaceId, `Failed to run gates: ${gatesResult.error}`); + return; + } + + let nextState: HarnessLoopState = { + ...updatedStateBeforeSend, + lastGateRun: gatesResult.data, + lastError: gatesResult.data.ok ? null : "Gates failed", + }; + + if (gatesResult.data.ok) { + nextState = { ...nextState, consecutiveFailures: 0 }; + + if (autoCommit) { + const checkpointResult = await this.gitCheckpointService.checkpoint(workspaceId, { + messageTemplate: commitMessageTemplate, + itemTitle, + iteration: nextState.iteration, + }); + + if (!checkpointResult.success) { + await this.pause(workspaceId, `Checkpoint failed: ${checkpointResult.error}`); + return; + } + + nextState = { ...nextState, lastCheckpoint: checkpointResult.data }; + } + + // If this was a checklist item, mark it done. + if (nextItem) { + await this.workspaceHarnessService.setHarnessForWorkspace( + workspaceId, + { + ...config, + checklist: config.checklist.map((item) => + item.id === nextItem.id ? { ...item, status: "done" as const } : item + ), + }, + { loopState: nextState } + ); + } + } else { + const failures = nextState.consecutiveFailures + 1; + nextState = { ...nextState, consecutiveFailures: failures }; + + if (failures >= maxConsecutiveFailures) { + await this.pause(workspaceId, `Gates failed ${maxConsecutiveFailures} times in a row`); + return; + } + } + + // Stop condition: when checklist is finished and the repo is clean. + if (!hasUnfinishedChecklistItems(config) && gatesResult.data.ok && !blocked) { + const dirty = await this.isGitDirty(workspaceId); + if (!dirty) { + await this.stop(workspaceId, "All checklist items done; gates passing; git clean"); + return; + } + } + + nextState = { ...nextState, iteration: nextState.iteration + 1 }; + this.states.set(workspaceId, nextState); + await this.persistState(workspaceId, nextState); + + if (contextReset === "replace_history") { + const summary = renderLoopSummaryMarkdown({ + workspaceId, + iteration: nextState.iteration, + currentItemTitle: nextState.currentItemTitle, + configPathHint, + progressPathHint, + checklist: config.checklist, + lastGateRun: nextState.lastGateRun, + lastCommitSha: nextState.lastCheckpoint?.commitSha ?? null, + }); + + const summaryMessage = createMuxMessage( + `harness-loop-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`, + "assistant", + summary, + { + timestamp: Date.now(), + compacted: "user", + mode: "exec", + muxMetadata: { type: "harness-loop-bearings" }, + } + ); + + const replaceResult = await this.workspaceService.replaceHistory( + workspaceId, + summaryMessage + ); + if (!replaceResult.success) { + log.debug("[HARNESS] Failed to reset context", { + workspaceId, + error: replaceResult.error, + }); + } + } + + // Give the event loop a breath so stop/pause can land quickly. + await new Promise((resolve) => setTimeout(resolve, 25)); + } + } +} diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 0f3ca6c079..807d012ea9 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -46,6 +46,10 @@ import { SessionTimingService } from "@/node/services/sessionTimingService"; import { ExperimentsService } from "@/node/services/experimentsService"; import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; import { MCPConfigService } from "@/node/services/mcpConfigService"; +import { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; +import { GateRunnerService } from "@/node/services/gateRunnerService"; +import { GitCheckpointService } from "@/node/services/gitCheckpointService"; +import { LoopRunnerService } from "@/node/services/loopRunnerService"; import { WorkspaceMcpOverridesService } from "@/node/services/workspaceMcpOverridesService"; import { MCPServerManager } from "@/node/services/mcpServerManager"; import { SessionUsageService } from "@/node/services/sessionUsageService"; @@ -83,6 +87,10 @@ export class ServiceContainer { private readonly partialService: PartialService; public readonly aiService: AIService; public readonly projectService: ProjectService; + public readonly workspaceHarnessService: WorkspaceHarnessService; + public readonly gateRunnerService: GateRunnerService; + public readonly gitCheckpointService: GitCheckpointService; + public readonly loopRunnerService: LoopRunnerService; public readonly workspaceService: WorkspaceService; public readonly taskService: TaskService; public readonly providerService: ProviderService; @@ -116,6 +124,9 @@ export class ServiceContainer { this.historyService = new HistoryService(config); this.partialService = new PartialService(config, this.historyService); this.projectService = new ProjectService(config); + this.workspaceHarnessService = new WorkspaceHarnessService(config); + this.gateRunnerService = new GateRunnerService(config, this.workspaceHarnessService); + this.gitCheckpointService = new GitCheckpointService(config, this.workspaceHarnessService); this.initStateManager = new InitStateManager(config); this.workspaceMcpOverridesService = new WorkspaceMcpOverridesService(config); this.mcpConfigService = new MCPConfigService(); @@ -156,6 +167,14 @@ export class ServiceContainer { this.workspaceService, this.initStateManager ); + this.loopRunnerService = new LoopRunnerService( + config, + this.workspaceService, + this.aiService, + this.workspaceHarnessService, + this.gateRunnerService, + this.gitCheckpointService + ); this.aiService.setTaskService(this.taskService); // Idle compaction service - auto-compacts workspaces after configured idle period this.idleCompactionService = new IdleCompactionService( diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts new file mode 100644 index 0000000000..c867953908 --- /dev/null +++ b/src/node/services/workspaceHarnessService.ts @@ -0,0 +1,529 @@ +import * as path from "path"; +import * as jsonc from "jsonc-parser"; + +import assert from "@/common/utils/assert"; +import type { + HarnessChecklistItem, + HarnessChecklistStatus, + HarnessLoopSettings, + HarnessLoopState, + WorkspaceHarnessConfig, + WorkspaceHarnessFilePaths, +} from "@/common/types/harness"; +import type { ToolPolicy } from "@/common/utils/tools/toolPolicy"; +import type { RuntimeConfig } from "@/common/types/runtime"; +import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; +import type { Config } from "@/node/config"; +import { createRuntime } from "@/node/runtime/runtimeFactory"; +import { execBuffered, readFileString, writeFileString } from "@/node/utils/runtime/helpers"; +import { log } from "@/node/services/log"; + +const HARNESS_DIR = ".mux"; + +const HARNESS_GITIGNORE_PATTERNS = [ + `${HARNESS_DIR}/*.harness.jsonc`, + `${HARNESS_DIR}/*.harness.progress.md`, +]; + +const DEFAULT_LOOP_SETTINGS: Required< + Pick< + HarnessLoopSettings, + | "maxIterations" + | "maxWallTimeMins" + | "maxConsecutiveFailures" + | "contextReset" + | "autoCommit" + | "commitMessageTemplate" + > +> & { toolPolicy?: ToolPolicy } = { + maxIterations: 50, + maxWallTimeMins: 8 * 60, + maxConsecutiveFailures: 3, + contextReset: "replace_history", + autoCommit: true, + commitMessageTemplate: "mux(harness): {{item}}", +}; + +const DEFAULT_HARNESS_CONFIG: WorkspaceHarnessConfig = { + version: 1, + checklist: [], + gates: [], + loop: { ...DEFAULT_LOOP_SETTINGS }, +}; + +function joinForRuntime(runtimeConfig: RuntimeConfig | undefined, ...parts: string[]): string { + assert(parts.length > 0, "joinForRuntime requires at least one path segment"); + + // Remote runtimes run inside a POSIX shell (SSH host, Docker container), even if the user is + // running mux on Windows. Use POSIX joins so we don't accidentally introduce backslashes. + const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker"; + return usePosix ? path.posix.join(...parts) : path.join(...parts); +} + +function isAbsoluteForRuntime(runtimeConfig: RuntimeConfig | undefined, filePath: string): boolean { + const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker"; + return usePosix ? path.posix.isAbsolute(filePath) : path.isAbsolute(filePath); +} + +function isChecklistStatus(value: unknown): value is HarnessChecklistStatus { + return value === "todo" || value === "doing" || value === "done" || value === "blocked"; +} + +function clampPositiveInt( + value: unknown, + fallback: number, + { min, max }: { min: number; max: number } +): number { + if (typeof value !== "number" || !Number.isFinite(value)) { + return fallback; + } + const rounded = Math.floor(value); + if (rounded < min) return min; + if (rounded > max) return max; + return rounded; +} + +function normalizeChecklistItem(raw: unknown, index: number): HarnessChecklistItem | null { + if (!raw || typeof raw !== "object") { + return null; + } + + const obj = raw as Record; + + const title = typeof obj.title === "string" ? obj.title.trim() : ""; + if (title.length === 0) { + return null; + } + + const status = isChecklistStatus(obj.status) ? obj.status : ("todo" as const); + + const idRaw = typeof obj.id === "string" ? obj.id.trim() : ""; + const id = idRaw.length > 0 ? idRaw : `item-${index + 1}`; + + const notes = + typeof obj.notes === "string" && obj.notes.trim().length > 0 ? obj.notes.trim() : undefined; + + return { id, title, status, notes }; +} + +function normalizeWorkspaceHarnessConfig(raw: unknown): WorkspaceHarnessConfig { + if (!raw || typeof raw !== "object") { + return { ...DEFAULT_HARNESS_CONFIG }; + } + + const obj = raw as Record; + + const checklist: HarnessChecklistItem[] = []; + if (Array.isArray(obj.checklist)) { + for (const [index, entry] of obj.checklist.entries()) { + const normalized = normalizeChecklistItem(entry, index); + if (normalized) { + checklist.push(normalized); + } + } + } + + const gates = Array.isArray(obj.gates) + ? obj.gates + .map((g) => { + if (!g || typeof g !== "object") return null; + const gate = g as Record; + const command = typeof gate.command === "string" ? gate.command.trim() : ""; + if (command.length === 0) return null; + + const id = + typeof gate.id === "string" && gate.id.trim().length > 0 ? gate.id.trim() : undefined; + const title = + typeof gate.title === "string" && gate.title.trim().length > 0 + ? gate.title.trim() + : undefined; + const timeoutSecs = + typeof gate.timeoutSecs === "number" && + Number.isFinite(gate.timeoutSecs) && + gate.timeoutSecs > 0 + ? Math.floor(gate.timeoutSecs) + : undefined; + + return { id, title, command, timeoutSecs }; + }) + .filter((g): g is NonNullable => g !== null) + : []; + + const loopRaw = + obj.loop && typeof obj.loop === "object" ? (obj.loop as Record) : {}; + + const loop: HarnessLoopSettings = { + maxIterations: clampPositiveInt(loopRaw.maxIterations, DEFAULT_LOOP_SETTINGS.maxIterations, { + min: 1, + max: 1000, + }), + maxWallTimeMins: clampPositiveInt( + loopRaw.maxWallTimeMins, + DEFAULT_LOOP_SETTINGS.maxWallTimeMins, + { + min: 1, + max: 7 * 24 * 60, + } + ), + maxConsecutiveFailures: clampPositiveInt( + loopRaw.maxConsecutiveFailures, + DEFAULT_LOOP_SETTINGS.maxConsecutiveFailures, + { min: 1, max: 50 } + ), + contextReset: + loopRaw.contextReset === "none" || loopRaw.contextReset === "replace_history" + ? loopRaw.contextReset + : DEFAULT_LOOP_SETTINGS.contextReset, + autoCommit: + typeof loopRaw.autoCommit === "boolean" + ? loopRaw.autoCommit + : DEFAULT_LOOP_SETTINGS.autoCommit, + commitMessageTemplate: + typeof loopRaw.commitMessageTemplate === "string" && + loopRaw.commitMessageTemplate.trim().length > 0 + ? loopRaw.commitMessageTemplate.trim() + : DEFAULT_LOOP_SETTINGS.commitMessageTemplate, + toolPolicy: Array.isArray(loopRaw.toolPolicy) ? (loopRaw.toolPolicy as ToolPolicy) : undefined, + }; + + const normalized: WorkspaceHarnessConfig = { + version: 1, + checklist, + gates, + loop, + }; + + return normalized; +} + +async function statIsFile( + runtime: ReturnType, + filePath: string +): Promise { + try { + const stat = await runtime.stat(filePath); + return !stat.isDirectory; + } catch { + return false; + } +} + +function formatChecklistItemForProgress(item: HarnessChecklistItem): string { + const checkbox = + item.status === "done" + ? "[x]" + : item.status === "doing" + ? "[~]" + : item.status === "blocked" + ? "[!]" + : "[ ]"; + return `- ${checkbox} ${item.title}`; +} + +function renderProgressMarkdown(params: { + metadata: FrontendWorkspaceMetadata; + config: WorkspaceHarnessConfig; + paths: WorkspaceHarnessFilePaths; + loopState?: HarnessLoopState; +}): string { + const nowIso = new Date().toISOString(); + + const lines: string[] = []; + lines.push(`# Harness Progress`); + lines.push(""); + lines.push(`- Workspace: ${params.metadata.name} (${params.metadata.id})`); + lines.push(`- Updated: ${nowIso}`); + lines.push(`- Harness file: ${params.paths.configPath}`); + lines.push(""); + + lines.push("## Checklist"); + if (params.config.checklist.length === 0) { + lines.push("(no checklist items)"); + } else { + for (const item of params.config.checklist) { + lines.push(formatChecklistItemForProgress(item)); + } + } + lines.push(""); + + lines.push("## Gates"); + if (params.config.gates.length === 0) { + lines.push("(no gates configured)"); + } else { + for (const gate of params.config.gates) { + lines.push(`- ${gate.command}`); + } + } + lines.push(""); + + if (params.loopState) { + lines.push("## Loop"); + lines.push(`- Status: ${params.loopState.status}`); + lines.push(`- Iteration: ${params.loopState.iteration}`); + if (params.loopState.currentItemTitle) { + lines.push(`- Current item: ${params.loopState.currentItemTitle}`); + } + if (params.loopState.lastGateRun) { + lines.push( + `- Last gates: ${params.loopState.lastGateRun.ok ? "PASS" : "FAIL"} (${Math.round( + params.loopState.lastGateRun.totalDurationMs / 1000 + )}s)` + ); + } + if (params.loopState.lastCheckpoint?.commitSha) { + lines.push(`- Last commit: ${params.loopState.lastCheckpoint.commitSha}`); + } + if (params.loopState.lastError) { + lines.push(`- Last error: ${params.loopState.lastError}`); + } + lines.push(""); + } + + return lines.join("\n") + "\n"; +} + +export class WorkspaceHarnessService { + constructor(private readonly config: Config) { + assert(config, "WorkspaceHarnessService requires a Config instance"); + } + + private async getWorkspaceMetadata(workspaceId: string): Promise { + assert(typeof workspaceId === "string", "workspaceId must be a string"); + const trimmed = workspaceId.trim(); + assert(trimmed.length > 0, "workspaceId must not be empty"); + + const all = await this.config.getAllWorkspaceMetadata(); + const metadata = all.find((m) => m.id === trimmed); + if (!metadata) { + throw new Error(`Workspace metadata not found for ${trimmed}`); + } + + return metadata; + } + + async getRuntimeAndWorkspacePath(workspaceId: string): Promise<{ + metadata: FrontendWorkspaceMetadata; + runtime: ReturnType; + workspacePath: string; + }> { + const metadata = await this.getWorkspaceMetadata(workspaceId); + + const runtime = createRuntime( + metadata.runtimeConfig ?? { type: "local", srcBaseDir: this.config.srcDir }, + { projectPath: metadata.projectPath } + ); + + // In-place workspaces (CLI/benchmarks) store the workspace path directly by setting + // metadata.projectPath === metadata.name. + const isInPlace = metadata.projectPath === metadata.name; + const workspacePath = isInPlace + ? metadata.projectPath + : runtime.getWorkspacePath(metadata.projectPath, metadata.name); + + assert( + typeof workspacePath === "string" && workspacePath.length > 0, + "workspacePath is required" + ); + + return { metadata, runtime, workspacePath }; + } + + private getHarnessFilePaths( + workspacePath: string, + runtimeConfig: RuntimeConfig | undefined, + workspaceName: string + ): WorkspaceHarnessFilePaths { + assert(typeof workspacePath === "string", "workspacePath must be a string"); + assert(typeof workspaceName === "string", "workspaceName must be a string"); + + const prefix = workspaceName.trim().length > 0 ? workspaceName.trim() : "workspace"; + + return { + configPath: joinForRuntime( + runtimeConfig, + workspacePath, + HARNESS_DIR, + `${prefix}.harness.jsonc` + ), + progressPath: joinForRuntime( + runtimeConfig, + workspacePath, + HARNESS_DIR, + `${prefix}.harness.progress.md` + ), + }; + } + + private async readHarnessFile( + runtime: ReturnType, + filePath: string + ): Promise { + try { + const raw = await readFileString(runtime, filePath); + const errors: jsonc.ParseError[] = []; + const parsed: unknown = jsonc.parse(raw, errors) as unknown; + if (errors.length > 0) { + log.warn("[HARNESS] Failed to parse harness config (JSONC parse errors)", { + filePath, + errorCount: errors.length, + }); + return {}; + } + return parsed; + } catch (error) { + log.debug("[HARNESS] Failed to read harness config file", { filePath, error }); + return {}; + } + } + + private async ensureHarnessDir( + runtime: ReturnType, + workspacePath: string, + runtimeConfig: RuntimeConfig | undefined + ): Promise { + const harnessDirPath = joinForRuntime(runtimeConfig, workspacePath, HARNESS_DIR); + + try { + await runtime.ensureDir(harnessDirPath); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Failed to create ${HARNESS_DIR} directory: ${msg}`); + } + } + + private async ensureHarnessGitignored( + runtime: ReturnType, + workspacePath: string, + runtimeConfig: RuntimeConfig | undefined + ): Promise { + try { + const isInsideGitResult = await execBuffered(runtime, "git rev-parse --is-inside-work-tree", { + cwd: workspacePath, + timeout: 10, + }); + if (isInsideGitResult.exitCode !== 0 || isInsideGitResult.stdout.trim() !== "true") { + return; + } + + const excludePathResult = await execBuffered( + runtime, + "git rev-parse --git-path info/exclude", + { + cwd: workspacePath, + timeout: 10, + } + ); + if (excludePathResult.exitCode !== 0) { + return; + } + + const excludeFilePathRaw = excludePathResult.stdout.trim(); + if (excludeFilePathRaw.length === 0) { + return; + } + + const excludeFilePath = isAbsoluteForRuntime(runtimeConfig, excludeFilePathRaw) + ? excludeFilePathRaw + : joinForRuntime(runtimeConfig, workspacePath, excludeFilePathRaw); + + let existing = ""; + try { + existing = await readFileString(runtime, excludeFilePath); + } catch { + // Missing exclude file is OK. + } + + const existingPatterns = new Set( + existing + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => line.length > 0) + ); + const missingPatterns = HARNESS_GITIGNORE_PATTERNS.filter( + (pattern) => !existingPatterns.has(pattern) + ); + if (missingPatterns.length === 0) { + return; + } + + const needsNewline = existing.length > 0 && !existing.endsWith("\n"); + const updated = existing + (needsNewline ? "\n" : "") + missingPatterns.join("\n") + "\n"; + + await writeFileString(runtime, excludeFilePath, updated); + } catch (error) { + // Best-effort only; never fail a workspace operation because git exclude couldn't be updated. + log.debug("[HARNESS] Failed to add harness files to git exclude", { + workspacePath, + error, + }); + } + } + + async getHarnessForWorkspace(workspaceId: string): Promise<{ + config: WorkspaceHarnessConfig; + paths: WorkspaceHarnessFilePaths; + exists: boolean; + }> { + const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); + const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + + const exists = await statIsFile(runtime, paths.configPath); + if (!exists) { + return { config: { ...DEFAULT_HARNESS_CONFIG }, paths, exists: false }; + } + + const parsed = await this.readHarnessFile(runtime, paths.configPath); + return { + config: normalizeWorkspaceHarnessConfig(parsed), + paths, + exists: true, + }; + } + + async setHarnessForWorkspace( + workspaceId: string, + config: WorkspaceHarnessConfig, + options?: { loopState?: HarnessLoopState } + ): Promise { + assert(config && typeof config === "object", "config must be an object"); + + const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); + const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + + const normalized = normalizeWorkspaceHarnessConfig(config); + + await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); + + await writeFileString(runtime, paths.configPath, JSON.stringify(normalized, null, 2) + "\n"); + await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); + + // Best-effort: keep the progress file up-to-date for both users and agent context. + try { + const progressMarkdown = renderProgressMarkdown({ + metadata, + config: normalized, + paths, + loopState: options?.loopState, + }); + await writeFileString(runtime, paths.progressPath, progressMarkdown); + } catch (error) { + log.debug("[HARNESS] Failed to update harness progress file", { workspaceId, error }); + } + + return normalized; + } + + async updateProgressFile(workspaceId: string, loopState?: HarnessLoopState): Promise { + try { + const { metadata, runtime, workspacePath } = + await this.getRuntimeAndWorkspacePath(workspaceId); + const { config, paths } = await this.getHarnessForWorkspace(workspaceId); + + await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); + const progressMarkdown = renderProgressMarkdown({ metadata, config, paths, loopState }); + await writeFileString(runtime, paths.progressPath, progressMarkdown); + } catch (error) { + log.debug("[HARNESS] Failed to update progress file", { workspaceId, error }); + } + } +} diff --git a/tests/ipc/setup.ts b/tests/ipc/setup.ts index e6dd3d9b5a..6477ba0f03 100644 --- a/tests/ipc/setup.ts +++ b/tests/ipc/setup.ts @@ -105,7 +105,12 @@ export async function createTestEnvironment(): Promise { sessionUsageService: services.sessionUsageService, signingService: services.signingService, coderService: services.coderService, + workspaceHarnessService: services.workspaceHarnessService, + gateRunnerService: services.gateRunnerService, + gitCheckpointService: services.gitCheckpointService, + loopRunnerService: services.loopRunnerService, }; + const orpc = createOrpcTestClient(orpcContext); return { From 9732ba316be3db9a093ff2898b11db5482e68649 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 20 Jan 2026 12:03:32 +0100 Subject: [PATCH 02/20] =?UTF-8?q?=F0=9F=A4=96=20feat:=20start=20Ralph=20lo?= =?UTF-8?q?op=20from=20plan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I15d81ab1136b5437df531ba6cb3e23cf84c321a0 Signed-off-by: Thomas Kosiewski --- .../tools/ProposePlanToolCall.test.tsx | 100 ++++++++++- .../components/tools/ProposePlanToolCall.tsx | 60 +++++++ src/common/orpc/schemas/api.ts | 4 + src/node/orpc/router.ts | 105 ++++++++++++ .../services/workspaceHarnessFromPlan.test.ts | 39 +++++ src/node/services/workspaceHarnessFromPlan.ts | 156 ++++++++++++++++++ 6 files changed, 463 insertions(+), 1 deletion(-) create mode 100644 src/node/services/workspaceHarnessFromPlan.test.ts create mode 100644 src/node/services/workspaceHarnessFromPlan.ts diff --git a/src/browser/components/tools/ProposePlanToolCall.test.tsx b/src/browser/components/tools/ProposePlanToolCall.test.tsx index 61e0c81831..bb2664f57b 100644 --- a/src/browser/components/tools/ProposePlanToolCall.test.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.test.tsx @@ -17,6 +17,8 @@ interface SendMessageArgs { options: SendMessageOptions; } +type StartFromPlanResult = { success: true; data: undefined } | { success: false; error: string }; + type GetPlanContentResult = | { success: true; data: { content: string; path: string } } | { success: false; error: string }; @@ -45,6 +47,9 @@ interface MockApi { deletePlanFile?: boolean; }) => Promise; sendMessage: (args: SendMessageArgs) => Promise<{ success: true; data: undefined }>; + loop: { + startFromPlan: (args: { workspaceId: string }) => Promise; + }; }; } @@ -89,8 +94,11 @@ void mock.module("@/browser/hooks/useOpenInEditor", () => ({ })); void mock.module("@/browser/contexts/WorkspaceContext", () => ({ + useOptionalWorkspaceContext: () => ({ + workspaceMetadata: new Map(), + }), useWorkspaceContext: () => ({ - workspaceMetadata: new Map(), + workspaceMetadata: new Map(), }), })); @@ -199,6 +207,9 @@ describe("ProposePlanToolCall", () => { data: { content: "# My Plan\n\nDo the thing.", path: planPath }, }), replaceChatHistory: (_args) => Promise.resolve({ success: true, data: undefined }), + loop: { + startFromPlan: () => Promise.resolve({ success: true, data: undefined }), + }, sendMessage: (args: SendMessageArgs) => { sendMessageCalls.push(args); return Promise.resolve({ success: true, data: undefined }); @@ -285,6 +296,9 @@ describe("ProposePlanToolCall", () => { sendMessageCalls.push(args); return Promise.resolve({ success: true, data: undefined }); }, + loop: { + startFromPlan: () => Promise.resolve({ success: true, data: undefined }), + }, }, }; @@ -327,4 +341,88 @@ describe("ProposePlanToolCall", () => { expect(summaryMessage.parts?.[0]?.text).toContain("*Plan file preserved at:*"); expect(summaryMessage.parts?.[0]?.text).toContain(planPath); }); + + test("switches to exec and starts Ralph loop when clicking Start Ralph loop", async () => { + const workspaceId = "ws-123"; + const planPath = "~/.mux/plans/demo/ws-123.md"; + + // Start in plan mode. + window.localStorage.setItem(getAgentIdKey(workspaceId), JSON.stringify("plan")); + + const startFromPlanCalls: Array<{ workspaceId: string }> = []; + + let resolveStartFromPlan!: (value: StartFromPlanResult) => void; + const startFromPlanPromise = new Promise((resolve) => { + resolveStartFromPlan = resolve; + }); + + mockApi = { + config: { + getConfig: () => + Promise.resolve({ + taskSettings: { maxParallelAgentTasks: 3, maxTaskNestingDepth: 3 }, + agentAiDefaults: {}, + subagentAiDefaults: {}, + }), + }, + workspace: { + getPlanContent: () => + Promise.resolve({ + success: true, + data: { content: "# My Plan\n\nDo the thing.", path: planPath }, + }), + replaceChatHistory: () => Promise.resolve({ success: true, data: undefined }), + sendMessage: () => Promise.resolve({ success: true, data: undefined }), + loop: { + startFromPlan: (args: { workspaceId: string }) => { + startFromPlanCalls.push(args); + return startFromPlanPromise; + }, + }, + }, + }; + + const view = render( + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Start Ralph loop" })); + + await waitFor(() => expect(startFromPlanCalls.length).toBe(1)); + expect(startFromPlanCalls[0]?.workspaceId).toBe(workspaceId); + + await waitFor(() => { + const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement; + expect(button.disabled).toBe(true); + }); + + resolveStartFromPlan({ success: true, data: undefined }); + + await waitFor(() => { + const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement; + expect(button.disabled).toBe(false); + }); + + const agentKey = getAgentIdKey(workspaceId); + const updatePersistedStateMaybeMock = updatePersistedState as unknown as { + mock?: { calls: unknown[][] }; + }; + if (updatePersistedStateMaybeMock.mock) { + expect(updatePersistedState).toHaveBeenCalledWith(agentKey, "exec"); + } else { + expect(JSON.parse(window.localStorage.getItem(agentKey)!)).toBe("exec"); + } + }); }); diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx index f6c5c91ba8..cc739a9a7c 100644 --- a/src/browser/components/tools/ProposePlanToolCall.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.tsx @@ -38,6 +38,7 @@ import { ListStart, Pencil, Play, + RefreshCw, X, } from "lucide-react"; import { ShareMessagePopover } from "../ShareMessagePopover"; @@ -131,13 +132,17 @@ export const ProposePlanToolCall: React.FC = (props) = } = props; const { expanded, toggleExpanded } = useToolExpansion(true); // Expand by default const [showRaw, setShowRaw] = useState(false); + const [isStartingLoop, setIsStartingLoop] = useState(false); + const isStartingLoopRef = useRef(false); const [isImplementing, setIsImplementing] = useState(false); const [implementReplacesChatHistory, setImplementReplacesChatHistory] = useState(false); const isImplementingRef = useRef(false); const isMountedRef = useRef(true); const { api } = useAPI(); const openInEditor = useOpenInEditor(); + const loopError = usePopoverError(); const workspaceContext = useOptionalWorkspaceContext(); + const startLoopButtonRef = useRef(null); const editorError = usePopoverError(); const editButtonRef = useRef(null); @@ -369,6 +374,42 @@ export const ProposePlanToolCall: React.FC = (props) = } } }; + + const handleStartRalphLoop = () => { + if (!workspaceId || !api) return; + if (isStartingLoopRef.current) return; + + // Capture positioning from the ref for error popover placement + const anchorPosition = startLoopButtonRef.current + ? (() => { + const { bottom, left } = startLoopButtonRef.current.getBoundingClientRect(); + return { top: bottom + 8, left }; + })() + : { top: 100, left: 100 }; + + isStartingLoopRef.current = true; + setIsStartingLoop(true); + + // Switch to exec so the UI matches the loop runner. + updatePersistedState(getAgentIdKey(workspaceId), "exec"); + + api.workspace.loop + .startFromPlan({ workspaceId }) + .then((result) => { + if (!result.success) { + loopError.showError("start-ralph-loop", result.error, anchorPosition); + } + }) + .catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + loopError.showError("start-ralph-loop", message, anchorPosition); + }) + .finally(() => { + isStartingLoopRef.current = false; + setIsStartingLoop(false); + }); + }; + // Copy to clipboard with feedback const { copied, copyToClipboard } = useCopyToClipboard(); @@ -448,6 +489,23 @@ export const ProposePlanToolCall: React.FC = (props) = ? "Replace chat history with this plan, switch to Exec, and start implementing" : "Switch to Exec and start implementing", }); + + actionButtons.push({ + label: "Start Ralph loop", + component: ( +
+ , + tooltip: "Generate a harness from the plan (if needed) and start the loop", + }} + /> +
+ ), + }); } } @@ -538,6 +596,7 @@ export const ProposePlanToolCall: React.FC = (props) = <>
{planUI}
+ ); } @@ -557,6 +616,7 @@ export const ProposePlanToolCall: React.FC = (props) = {modal} + ); }; diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index 994a6f7210..a481cf30d3 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -758,6 +758,10 @@ export const workspace = { input: z.object({ workspaceId: z.string() }), output: HarnessLoopStateSchema, }, + startFromPlan: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema(z.void(), z.string()), + }, start: { input: z.object({ workspaceId: z.string() }), output: ResultSchema(z.void(), z.string()), diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 270920dc44..a08f016c66 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1,3 +1,4 @@ +import { generateObject } from "ai"; import { os } from "@orpc/server"; import * as schemas from "@/common/orpc/schemas"; import type { ORPCContext } from "./context"; @@ -5,6 +6,11 @@ import { selectModelForNameGeneration, generateWorkspaceIdentity, } from "@/node/services/workspaceTitleGenerator"; +import { formatSendMessageError } from "@/node/services/utils/sendMessageError"; +import { + HarnessFromPlanDraftSchema, + createWorkspaceHarnessConfigFromPlanDraft, +} from "@/node/services/workspaceHarnessFromPlan"; import type { UpdateStatus, WorkspaceActivitySnapshot, @@ -1870,6 +1876,105 @@ export const router = (authToken?: string) => { }), }, loop: { + startFromPlan: t + .input(schemas.workspace.loop.startFromPlan.input) + .output(schemas.workspace.loop.startFromPlan.output) + .handler(async ({ context, input }) => { + try { + const harness = await context.workspaceHarnessService.getHarnessForWorkspace( + input.workspaceId + ); + + // Don't stomp on user-edited harnesses. + if (harness.exists) { + const result = await context.loopRunnerService.start(input.workspaceId); + if (!result.success) { + return { success: false, error: result.error }; + } + return { success: true, data: undefined }; + } + + const metadata = await context.workspaceService.getInfo(input.workspaceId); + if (!metadata) { + return { success: false, error: `Workspace not found: ${input.workspaceId}` }; + } + + const runtime = createRuntime(metadata.runtimeConfig, { + projectPath: metadata.projectPath, + }); + + const planResult = await readPlanFile( + runtime, + metadata.name, + metadata.projectName, + input.workspaceId + ); + + if (!planResult.exists) { + return { + success: false, + error: `Plan file not found at ${planResult.path}`, + }; + } + + const userModel = + metadata.aiSettingsByMode?.exec?.model ?? metadata.aiSettings?.model; + const modelString = await selectModelForNameGeneration( + context.aiService, + undefined, + userModel + ); + if (!modelString) { + return { + success: false, + error: "No AI model available to generate a harness from this plan", + }; + } + + const modelResult = await context.aiService.createModel(modelString); + if (!modelResult.success) { + return { + success: false, + error: formatSendMessageError(modelResult.error).message, + }; + } + + const generation = await generateObject({ + model: modelResult.data, + schema: HarnessFromPlanDraftSchema, + mode: "json", + prompt: `Generate a Ralph harness (checklist + optional gates) from this plan. + +Rules: +- Checklist items should be small, mergeable steps (max 20). +- Gates should be safe, single commands that run checks (prefer make targets like "make static-check"). +- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. + +Plan: + +${planResult.content}`, + }); + + const derived = createWorkspaceHarnessConfigFromPlanDraft(generation.object); + + const loopState = await context.loopRunnerService.getState(input.workspaceId); + await context.workspaceHarnessService.setHarnessForWorkspace( + input.workspaceId, + derived.config, + { loopState } + ); + + const startResult = await context.loopRunnerService.start(input.workspaceId); + if (!startResult.success) { + return { success: false, error: startResult.error }; + } + + return { success: true, data: undefined }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), getState: t .input(schemas.workspace.loop.getState.input) .output(schemas.workspace.loop.getState.output) diff --git a/src/node/services/workspaceHarnessFromPlan.test.ts b/src/node/services/workspaceHarnessFromPlan.test.ts new file mode 100644 index 0000000000..284d8a9ba4 --- /dev/null +++ b/src/node/services/workspaceHarnessFromPlan.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, it } from "bun:test"; + +import { createWorkspaceHarnessConfigFromPlanDraft } from "./workspaceHarnessFromPlan"; + +describe("workspaceHarnessFromPlan", () => { + it("derives a non-empty checklist with stable IDs", () => { + const result = createWorkspaceHarnessConfigFromPlanDraft({ + checklist: [{ title: "Add schema" }, { title: "Update router" }], + gates: [{ command: "make static-check" }], + }); + + expect(result.usedFallback).toBe(false); + expect(result.config.checklist.map((i) => i.id)).toEqual(["item-1", "item-2"]); + expect(result.config.checklist.map((i) => i.status)).toEqual(["todo", "todo"]); + expect(result.config.loop?.autoCommit).toBe(true); + }); + + it("falls back to a single checklist item when the draft is empty", () => { + const result = createWorkspaceHarnessConfigFromPlanDraft({}); + + expect(result.usedFallback).toBe(true); + expect(result.config.checklist).toEqual([ + { id: "item-1", title: "Implement the plan", status: "todo" }, + ]); + expect(result.config.loop?.autoCommit).toBe(false); + }); + + it("drops unsafe gates and disables auto-commit", () => { + const result = createWorkspaceHarnessConfigFromPlanDraft({ + checklist: [{ title: "Ship it" }], + gates: [{ command: "rm -rf /" }, { command: "make typecheck" }], + }); + + expect(result.usedFallback).toBe(false); + expect(result.droppedUnsafeGates).toBe(true); + expect(result.config.gates.map((g) => g.command)).toEqual(["make typecheck"]); + expect(result.config.loop?.autoCommit).toBe(false); + }); +}); diff --git a/src/node/services/workspaceHarnessFromPlan.ts b/src/node/services/workspaceHarnessFromPlan.ts new file mode 100644 index 0000000000..aac144fb76 --- /dev/null +++ b/src/node/services/workspaceHarnessFromPlan.ts @@ -0,0 +1,156 @@ +import assert from "@/common/utils/assert"; +import type { + HarnessChecklistItem, + HarnessGate, + WorkspaceHarnessConfig, +} from "@/common/types/harness"; +import { z } from "zod"; + +export const HarnessFromPlanDraftSchema = z + .object({ + checklist: z + .array( + z + .object({ + title: z.string().min(1), + notes: z.string().optional(), + }) + .strict() + ) + .optional(), + gates: z + .array( + z + .object({ + command: z.string().min(1), + title: z.string().optional(), + timeoutSecs: z.number().int().positive().optional(), + }) + .strict() + ) + .optional(), + loop: z + .object({ + autoCommit: z.boolean().optional(), + }) + .strict() + .optional(), + }) + .strict(); + +export type HarnessFromPlanDraft = z.infer; + +function fallbackHarnessConfig(): WorkspaceHarnessConfig { + return { + version: 1, + checklist: [{ id: "item-1", title: "Implement the plan", status: "todo" }], + gates: [], + loop: { autoCommit: false }, + }; +} + +export function isSafeHarnessGateCommand(command: string): boolean { + assert(typeof command === "string", "command must be a string"); + + const trimmed = command.trim(); + if (trimmed.length === 0) { + return false; + } + + // Keep gate commands single-line and boring. These are executed with a shell, and this is + // AI-generated by default, so we heavily restrict what can be persisted. + if (/\r|\n/.test(trimmed)) { + return false; + } + + if (trimmed.length > 200) { + return false; + } + + // Disallow common shell metacharacters that enable chaining / redirection. + if (/[;&|><`"'$]/.test(trimmed)) { + return false; + } + + // Allowlist simple check runners. + if (trimmed === "make") { + return false; + } + + return trimmed.startsWith("make ") || trimmed.startsWith("bun "); +} + +export function createWorkspaceHarnessConfigFromPlanDraft(draft: unknown): { + config: WorkspaceHarnessConfig; + usedFallback: boolean; + droppedUnsafeGates: boolean; +} { + const parsed = HarnessFromPlanDraftSchema.safeParse(draft); + if (!parsed.success) { + return { config: fallbackHarnessConfig(), usedFallback: true, droppedUnsafeGates: false }; + } + + const rawChecklist = parsed.data.checklist ?? []; + + const checklist: HarnessChecklistItem[] = rawChecklist + .map((item) => ({ + title: item.title.trim(), + notes: typeof item.notes === "string" ? item.notes.trim() : undefined, + })) + .filter((item) => item.title.length > 0) + .slice(0, 20) + .map((item, index) => ({ + id: `item-${index + 1}`, + title: item.title, + status: "todo" as const, + notes: item.notes && item.notes.length > 0 ? item.notes : undefined, + })); + + if (checklist.length === 0) { + return { config: fallbackHarnessConfig(), usedFallback: true, droppedUnsafeGates: false }; + } + + const rawGates = parsed.data.gates ?? []; + + let droppedUnsafeGates = false; + const gates: HarnessGate[] = []; + + for (const [index, gate] of rawGates.entries()) { + const command = gate.command.trim(); + if (!isSafeHarnessGateCommand(command)) { + droppedUnsafeGates = true; + continue; + } + + const title = + typeof gate.title === "string" && gate.title.trim().length > 0 + ? gate.title.trim() + : undefined; + + gates.push({ + id: `gate-${index + 1}`, + title, + command, + timeoutSecs: gate.timeoutSecs, + }); + } + + const suggestedAutoCommit = parsed.data.loop?.autoCommit; + + // Default: only auto-commit when we have at least one safe gate. + // If the model tried to provide unsafe gates, disable auto-commit entirely. + const autoCommit = droppedUnsafeGates + ? false + : typeof suggestedAutoCommit === "boolean" + ? suggestedAutoCommit + : gates.length > 0; + + const config: WorkspaceHarnessConfig = { + version: 1, + checklist, + gates, + loop: { autoCommit }, + }; + + return { config, usedFallback: false, droppedUnsafeGates }; +} From 684b2a8a22d5f7790404b6e7fb120269518c0431 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 20 Jan 2026 13:28:12 +0100 Subject: [PATCH 03/20] fix: move harness artifacts into .mux/harness Change-Id: Ide9e2ac1fa93252310350441843ae4d7eaa0ad25 Signed-off-by: Thomas Kosiewski --- src/node/orpc/router.ts | 4 ++-- src/node/services/loopRunnerService.ts | 4 ++-- src/node/services/workspaceHarnessService.ts | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index a08f016c66..c428848f66 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1804,8 +1804,8 @@ export const router = (authToken?: string) => { ]); const workspaceName = workspaceInfo?.name ?? input.workspaceId; - const configPathHint = `.mux/${workspaceName}.harness.jsonc`; - const progressPathHint = `.mux/${workspaceName}.harness.progress.md`; + const configPathHint = `.mux/harness/${workspaceName}.harness.jsonc`; + const progressPathHint = `.mux/harness/${workspaceName}.harness.progress.md`; const lines: string[] = []; lines.push("# Harness bearings"); diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index 7c8d5bb7e3..fb4dd9e2c2 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -395,8 +395,8 @@ export class LoopRunnerService extends EventEmitter { return; } - const configPathHint = `.mux/${info.name}.harness.jsonc`; - const progressPathHint = `.mux/${info.name}.harness.progress.md`; + const configPathHint = `.mux/harness/${info.name}.harness.jsonc`; + const progressPathHint = `.mux/harness/${info.name}.harness.progress.md`; const modelString = info.aiSettingsByMode?.exec?.model ?? info.aiSettings?.model ?? defaultModel; diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts index c867953908..08049c1809 100644 --- a/src/node/services/workspaceHarnessService.ts +++ b/src/node/services/workspaceHarnessService.ts @@ -18,7 +18,7 @@ import { createRuntime } from "@/node/runtime/runtimeFactory"; import { execBuffered, readFileString, writeFileString } from "@/node/utils/runtime/helpers"; import { log } from "@/node/services/log"; -const HARNESS_DIR = ".mux"; +const HARNESS_DIR = ".mux/harness"; const HARNESS_GITIGNORE_PATTERNS = [ `${HARNESS_DIR}/*.harness.jsonc`, From f0dc47ab18427cdd6896bf608ba934357611def1 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 20 Jan 2026 17:10:43 +0100 Subject: [PATCH 04/20] =?UTF-8?q?=F0=9F=A4=96=20feat:=20improve=20Start=20?= =?UTF-8?q?Ralph=20loop=20UX=20and=20harness=20gen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I0f684cca69decbe2756577ec54c321ea0e13b182 Signed-off-by: Thomas Kosiewski --- docs/agents/index.mdx | 67 ++++++++++++- .../components/tools/ProposePlanToolCall.tsx | 78 ++++++++++++++- src/node/builtinAgents/harness-from-plan.md | 49 ++++++++++ src/node/orpc/router.ts | 91 ++++++++++++++++-- .../builtInAgentContent.generated.ts | 1 + .../builtInAgentDefinitions.ts | 1 + .../services/workspaceHarnessFromPlan.test.ts | 46 ++++++++- src/node/services/workspaceHarnessFromPlan.ts | 96 ++++++++++++++++--- 8 files changed, 406 insertions(+), 23 deletions(-) create mode 100644 src/node/builtinAgents/harness-from-plan.md diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 518df2d338..1b6ff08deb 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -521,9 +521,74 @@ Example: -{/* END BUILTIN_AGENTS */} +### Harness from Plan (internal) + +**Generate a Ralph harness draft from a plan (internal)** + + + +```md +--- +name: Harness from Plan +description: Generate a Ralph harness draft from a plan (internal) +base: exec +ui: + hidden: true +subagent: + runnable: true + append_prompt: | + You are a sub-agent generating a Ralph harness draft from a plan. + + - Use read-only investigation only (no file edits, no state changes). + - Output ONLY a single JSON object in a fenced code block (language: json). + - When complete, call agent_report exactly once with that JSON block. +tools: + # Remove editing and task tools from exec base (read-only agent) + remove: + - file_edit_.* + - task + - task_.* + - agent_skill_read + - agent_skill_read_file +--- + +You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. + +=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === + +- You MUST NOT create, edit, delete, move, or copy files. +- You MUST NOT create temporary files anywhere (including /tmp). +- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. +- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). +- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). + +Rules: + +- Checklist items should be small, mergeable steps (max 20). +- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). +- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. + +Output format: a single fenced code block (language: json) containing one JSON object. + +Example JSON object: + +{ +"checklist": [{ "title": "...", "notes": "..." }], +"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], +"loop": { "autoCommit": false } +} +``` + + + +> > > > > > > 5873e369c (🤖 feat: improve Start Ralph loop UX and harness gen) +> > > > > > > {/* END BUILTIN_AGENTS */} ## Related Docs - Scoped instructions in `AGENTS.md`: see [Instruction Files](/agents/instruction-files) - Built-in skills (`agent_skill_read`): see [Agent Skills](/agents/agent-skills) + +``` + +``` diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx index cc739a9a7c..43a40b992b 100644 --- a/src/browser/components/tools/ProposePlanToolCall.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.tsx @@ -5,6 +5,7 @@ import type { LegacyProposePlanToolArgs, LegacyProposePlanToolResult, } from "@/common/types/tools"; +import type { HarnessLoopState } from "@/common/types/harness"; import { ToolContainer, ToolHeader, @@ -136,6 +137,7 @@ export const ProposePlanToolCall: React.FC = (props) = const isStartingLoopRef = useRef(false); const [isImplementing, setIsImplementing] = useState(false); const [implementReplacesChatHistory, setImplementReplacesChatHistory] = useState(false); + const [loopState, setLoopState] = useState(null); const isImplementingRef = useRef(false); const isMountedRef = useRef(true); const { api } = useAPI(); @@ -225,6 +227,31 @@ export const ProposePlanToolCall: React.FC = (props) = // status in deps ensures refetch when tool completes (captures final file state) }, [api, workspaceId, isLatest, isEphemeralPreview, cacheKey, status]); + // Keep loop state live for the latest plan. + useEffect(() => { + if (isEphemeralPreview || !isLatest || !workspaceId || !api) return; + + const abortController = new AbortController(); + const { signal } = abortController; + + (async () => { + try { + const iterator = await api.workspace.loop.subscribe({ workspaceId }, { signal }); + + for await (const nextLoopState of iterator) { + if (signal.aborted) break; + setLoopState(nextLoopState); + } + } catch (err) { + if (!signal.aborted) { + console.error("Failed to subscribe to loop state:", err); + } + } + })(); + + return () => abortController.abort(); + }, [api, workspaceId, isLatest, isEphemeralPreview]); + // Determine plan content and title based on result type // For ephemeral previews, use direct content/path props // For the latest plan, prefer fresh content from disk (external edit support) @@ -435,6 +462,18 @@ export const ProposePlanToolCall: React.FC = (props) = } }; + const showPlanPlaceholder = + !errorMessage && !showRaw && planContent.trim().length === 0 && status !== "completed"; + const planPlaceholderText = + status === "executing" ? "Generating plan preview…" : "Preparing plan…"; + + const showInlineLoopState = + !isEphemeralPreview && + !!api && + !!workspaceId && + isLatest && + status === "completed" && + !errorMessage; const statusDisplay = getStatusDisplay(status); // Build action buttons array (similar to AssistantMessage) @@ -499,7 +538,7 @@ export const ProposePlanToolCall: React.FC = (props) = label: "Start Ralph loop", onClick: handleStartRalphLoop, disabled: !api || isStartingLoop, - icon: , + icon: , tooltip: "Generate a harness from the plan (if needed) and start the loop", }} /> @@ -560,13 +599,42 @@ export const ProposePlanToolCall: React.FC = (props) =
+ ) : showPlanPlaceholder ? ( +
+ {planPlaceholderText} +
) : (
)} - {/* Completion guidance: only for completed tool calls without errors, not ephemeral previews */} + {/* Loop status + completion guidance */} + + {showInlineLoopState && ( +
+
Loop status
+
+ {loopState ? `${loopState.status} • iteration ${loopState.iteration}` : "Loading…"} +
+ {loopState?.currentItemTitle && ( +
+ Current: {loopState.currentItemTitle} +
+ )} + {loopState && loopState.consecutiveFailures > 0 && ( +
+ Consecutive failures: {loopState.consecutiveFailures} +
+ )} + {loopState?.stoppedReason && ( +
Stopped: {loopState.stoppedReason}
+ )} + {loopState?.lastError && ( +
{loopState.lastError}
+ )} +
+ )} {!isEphemeralPreview && status === "completed" && !errorMessage && (
Respond with revisions or switch to the Exec agent ( @@ -575,6 +643,12 @@ export const ProposePlanToolCall: React.FC = (props) =
)} + {isStartingLoop && ( +
+ Starting Ralph loop… (generating harness if needed) +
+ )} + {/* Actions row at the bottom (matching MessageWindow style) */}
{actionButtons.map((button, index) => ( diff --git a/src/node/builtinAgents/harness-from-plan.md b/src/node/builtinAgents/harness-from-plan.md new file mode 100644 index 0000000000..46b1ac48ad --- /dev/null +++ b/src/node/builtinAgents/harness-from-plan.md @@ -0,0 +1,49 @@ +--- +name: Harness from Plan +description: Generate a Ralph harness draft from a plan (internal) +base: exec +ui: + hidden: true +subagent: + runnable: true + append_prompt: | + You are a sub-agent generating a Ralph harness draft from a plan. + + - Use read-only investigation only (no file edits, no state changes). + - Output ONLY a single JSON object in a fenced code block (language: json). + - When complete, call agent_report exactly once with that JSON block. +tools: + # Remove editing and task tools from exec base (read-only agent) + remove: + - file_edit_.* + - task + - task_.* + - agent_skill_read + - agent_skill_read_file +--- + +You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. + +=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === + +- You MUST NOT create, edit, delete, move, or copy files. +- You MUST NOT create temporary files anywhere (including /tmp). +- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. +- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). +- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). + +Rules: + +- Checklist items should be small, mergeable steps (max 20). +- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). +- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. + +Output format: a single fenced code block (language: json) containing one JSON object. + +Example JSON object: + +{ +"checklist": [{ "title": "...", "notes": "..." }], +"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], +"loop": { "autoCommit": false } +} diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index c428848f66..de718fc7fc 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -10,6 +10,7 @@ import { formatSendMessageError } from "@/node/services/utils/sendMessageError"; import { HarnessFromPlanDraftSchema, createWorkspaceHarnessConfigFromPlanDraft, + extractJsonObjectFromMarkdown, } from "@/node/services/workspaceHarnessFromPlan"; import type { UpdateStatus, @@ -1939,11 +1940,86 @@ export const router = (authToken?: string) => { }; } - const generation = await generateObject({ - model: modelResult.data, - schema: HarnessFromPlanDraftSchema, - mode: "json", - prompt: `Generate a Ralph harness (checklist + optional gates) from this plan. + const buildHarnessFromPlanTaskPrompt = (options?: { errorHint?: string }): string => { + const errorHint = + typeof options?.errorHint === "string" && options.errorHint.trim().length > 0 + ? `\n\nPrevious attempt error:\n${options.errorHint.trim().slice(0, 2000)}\n\nFix the output and try again.` + : ""; + + return `Generate a Ralph harness draft (checklist + optional gates) from this plan. + +Rules: +- Checklist items should be small, mergeable steps (max 20). +- Gates should be safe, single commands that run checks (prefer make targets like "make static-check"). +- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. + +Output: +- Return ONLY a single JSON object in a fenced code block (language: json). +${errorHint} + +Plan: + +${planResult.content}`; + }; + + const runHarnessFromPlanTask = async (options?: { errorHint?: string }) => { + try { + const taskResult = await context.taskService.create({ + parentWorkspaceId: input.workspaceId, + kind: "agent", + agentId: "harness-from-plan", + prompt: buildHarnessFromPlanTaskPrompt(options), + title: "Generate harness from plan", + modelString, + }); + + if (!taskResult.success) { + return { success: false as const, error: taskResult.error }; + } + + const report = await context.taskService.waitForAgentReport( + taskResult.data.taskId, + { + requestingWorkspaceId: input.workspaceId, + } + ); + + const extracted = extractJsonObjectFromMarkdown(report.reportMarkdown); + if (!extracted.success) { + return { success: false as const, error: extracted.error }; + } + + const parsedDraft = HarnessFromPlanDraftSchema.safeParse(extracted.data); + if (!parsedDraft.success) { + return { success: false as const, error: parsedDraft.error.message }; + } + + return { success: true as const, data: parsedDraft.data }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false as const, error: message }; + } + }; + + const firstAttempt = await runHarnessFromPlanTask(); + const secondAttempt = firstAttempt.success + ? null + : await runHarnessFromPlanTask({ errorHint: firstAttempt.error }); + + const draftFromTask = firstAttempt.success + ? firstAttempt.data + : secondAttempt?.success + ? secondAttempt.data + : null; + + const draft = + draftFromTask ?? + ( + await generateObject({ + model: modelResult.data, + schema: HarnessFromPlanDraftSchema, + mode: "json", + prompt: `Generate a Ralph harness (checklist + optional gates) from this plan. Rules: - Checklist items should be small, mergeable steps (max 20). @@ -1953,9 +2029,10 @@ Rules: Plan: ${planResult.content}`, - }); + }) + ).object; - const derived = createWorkspaceHarnessConfigFromPlanDraft(generation.object); + const derived = createWorkspaceHarnessConfigFromPlanDraft(draft); const loopState = await context.loopRunnerService.getState(input.workspaceId); await context.workspaceHarnessService.setHarnessForWorkspace( diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index 656d43cd8d..e10e3b86f4 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -6,6 +6,7 @@ export const BUILTIN_AGENT_CONTENT = { "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n", + "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"checklist\": [{ \"title\": \"...\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", "mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `
/` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", diff --git a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts index 80e69470ff..03553d1ed1 100644 --- a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts +++ b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts @@ -18,6 +18,7 @@ const BUILT_IN_SOURCES: BuiltInSource[] = [ { id: "exec", content: BUILTIN_AGENT_CONTENT.exec }, { id: "plan", content: BUILTIN_AGENT_CONTENT.plan }, { id: "compact", content: BUILTIN_AGENT_CONTENT.compact }, + { id: "harness-from-plan", content: BUILTIN_AGENT_CONTENT["harness-from-plan"] }, { id: "explore", content: BUILTIN_AGENT_CONTENT.explore }, { id: "system1_bash", content: BUILTIN_AGENT_CONTENT.system1_bash }, { id: "mux", content: BUILTIN_AGENT_CONTENT.mux }, diff --git a/src/node/services/workspaceHarnessFromPlan.test.ts b/src/node/services/workspaceHarnessFromPlan.test.ts index 284d8a9ba4..9e1a7fca8a 100644 --- a/src/node/services/workspaceHarnessFromPlan.test.ts +++ b/src/node/services/workspaceHarnessFromPlan.test.ts @@ -1,6 +1,9 @@ import { describe, expect, it } from "bun:test"; -import { createWorkspaceHarnessConfigFromPlanDraft } from "./workspaceHarnessFromPlan"; +import { + createWorkspaceHarnessConfigFromPlanDraft, + extractJsonObjectFromMarkdown, +} from "./workspaceHarnessFromPlan"; describe("workspaceHarnessFromPlan", () => { it("derives a non-empty checklist with stable IDs", () => { @@ -36,4 +39,45 @@ describe("workspaceHarnessFromPlan", () => { expect(result.config.gates.map((g) => g.command)).toEqual(["make typecheck"]); expect(result.config.loop?.autoCommit).toBe(false); }); + + it("dedupes checklist titles and drops trivial placeholders", () => { + const result = createWorkspaceHarnessConfigFromPlanDraft({ + checklist: [ + { title: "TODO" }, + { title: "Add schema" }, + { title: "Add schema " }, + { title: "Update router" }, + { title: "TBD" }, + ], + }); + + expect(result.usedFallback).toBe(false); + expect(result.config.checklist.map((i) => i.title)).toEqual(["Add schema", "Update router"]); + }); + + describe("extractJsonObjectFromMarkdown", () => { + it("parses a ```json fenced block", () => { + const res = extractJsonObjectFromMarkdown('```json\n{"checklist": []}\n```'); + + expect(res.success).toBe(true); + if (res.success) { + expect(res.data).toEqual({ checklist: [] }); + } + }); + + it("parses raw JSON", () => { + const res = extractJsonObjectFromMarkdown('{"checklist": []}'); + + expect(res.success).toBe(true); + if (res.success) { + expect(res.data).toEqual({ checklist: [] }); + } + }); + + it("fails on non-JSON", () => { + const res = extractJsonObjectFromMarkdown("not json"); + + expect(res.success).toBe(false); + }); + }); }); diff --git a/src/node/services/workspaceHarnessFromPlan.ts b/src/node/services/workspaceHarnessFromPlan.ts index aac144fb76..3e4883b7b9 100644 --- a/src/node/services/workspaceHarnessFromPlan.ts +++ b/src/node/services/workspaceHarnessFromPlan.ts @@ -49,6 +49,27 @@ function fallbackHarnessConfig(): WorkspaceHarnessConfig { }; } +const MAX_CHECKLIST_TITLE_LENGTH = 200; + +function isTriviallyBadChecklistTitle(title: string): boolean { + assert(typeof title === "string", "title must be a string"); + + const trimmed = title.trim(); + if (trimmed.length === 0) return true; + + const normalized = trimmed.toLowerCase(); + if ( + normalized === "todo" || + normalized === "tbd" || + normalized === "todo." || + normalized === "tbd." + ) { + return true; + } + + return trimmed.length > MAX_CHECKLIST_TITLE_LENGTH; +} + export function isSafeHarnessGateCommand(command: string): boolean { assert(typeof command === "string", "command must be a string"); @@ -92,19 +113,29 @@ export function createWorkspaceHarnessConfigFromPlanDraft(draft: unknown): { const rawChecklist = parsed.data.checklist ?? []; - const checklist: HarnessChecklistItem[] = rawChecklist - .map((item) => ({ - title: item.title.trim(), - notes: typeof item.notes === "string" ? item.notes.trim() : undefined, - })) - .filter((item) => item.title.length > 0) - .slice(0, 20) - .map((item, index) => ({ - id: `item-${index + 1}`, - title: item.title, + const checklist: HarnessChecklistItem[] = []; + const seenTitles = new Set(); + + for (const item of rawChecklist) { + const title = item.title.trim(); + if (title.length === 0) continue; + if (isTriviallyBadChecklistTitle(title)) continue; + + const normalizedTitle = title.toLowerCase(); + if (seenTitles.has(normalizedTitle)) continue; + seenTitles.add(normalizedTitle); + + const notes = typeof item.notes === "string" ? item.notes.trim() : undefined; + + checklist.push({ + id: `item-${checklist.length + 1}`, + title, status: "todo" as const, - notes: item.notes && item.notes.length > 0 ? item.notes : undefined, - })); + notes: notes && notes.length > 0 ? notes : undefined, + }); + + if (checklist.length >= 20) break; + } if (checklist.length === 0) { return { config: fallbackHarnessConfig(), usedFallback: true, droppedUnsafeGates: false }; @@ -154,3 +185,44 @@ export function createWorkspaceHarnessConfigFromPlanDraft(draft: unknown): { return { config, usedFallback: false, droppedUnsafeGates }; } + +export function extractJsonObjectFromMarkdown( + markdown: string +): { success: true; data: unknown } | { success: false; error: string } { + assert(typeof markdown === "string", "markdown must be a string"); + + const trimmed = markdown.trim(); + if (trimmed.length === 0) { + return { success: false, error: "Empty agent_report" }; + } + + const fencedMatch = /```json\s*([\s\S]*?)```/i.exec(trimmed); + const candidate = (fencedMatch ? fencedMatch[1] : trimmed).trim(); + + const tryParse = (text: string): { ok: true; value: unknown } | { ok: false; error: string } => { + try { + return { ok: true, value: JSON.parse(text) }; + } catch (error) { + return { ok: false, error: error instanceof Error ? error.message : String(error) }; + } + }; + + let parsed = tryParse(candidate); + if (!parsed.ok && !fencedMatch) { + const start = trimmed.indexOf("{"); + const end = trimmed.lastIndexOf("}"); + if (start !== -1 && end !== -1 && end > start) { + parsed = tryParse(trimmed.slice(start, end + 1)); + } + } + + if (!parsed.ok) { + return { success: false, error: `Failed to parse JSON: ${parsed.error}` }; + } + + if (typeof parsed.value !== "object" || parsed.value === null || Array.isArray(parsed.value)) { + return { success: false, error: "Expected a JSON object" }; + } + + return { success: true, data: parsed.value }; +} From 50418ea7650942926f2c66f386cc8cc6a44bc022 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 20 Jan 2026 17:59:08 +0100 Subject: [PATCH 05/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20make=20harness=20pr?= =?UTF-8?q?ogress=20file=20an=20append-only=20journal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Iebbcc21aaa8a919be5e1217c0d44b6cee070d782 Signed-off-by: Thomas Kosiewski --- src/node/orpc/router.ts | 12 +- src/node/services/loopRunnerService.test.ts | 21 ++ src/node/services/loopRunnerService.ts | 55 ++-- .../services/workspaceHarnessService.test.ts | 120 ++++++++ src/node/services/workspaceHarnessService.ts | 260 ++++++++++++------ 5 files changed, 347 insertions(+), 121 deletions(-) create mode 100644 src/node/services/loopRunnerService.test.ts create mode 100644 src/node/services/workspaceHarnessService.test.ts diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index de718fc7fc..0594d4fc0e 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1737,11 +1737,9 @@ export const router = (authToken?: string) => { .output(schemas.workspace.harness.set.output) .handler(async ({ context, input }) => { try { - const loopState = await context.loopRunnerService.getState(input.workspaceId); const normalized = await context.workspaceHarnessService.setHarnessForWorkspace( input.workspaceId, - input.config, - { loopState } + input.config ); return { success: true, data: normalized }; } catch (error) { @@ -1805,8 +1803,8 @@ export const router = (authToken?: string) => { ]); const workspaceName = workspaceInfo?.name ?? input.workspaceId; - const configPathHint = `.mux/harness/${workspaceName}.harness.jsonc`; - const progressPathHint = `.mux/harness/${workspaceName}.harness.progress.md`; + const configPathHint = `.mux/harness/${workspaceName}.jsonc`; + const progressPathHint = `.mux/harness/${workspaceName}.progress.md`; const lines: string[] = []; lines.push("# Harness bearings"); @@ -2034,11 +2032,9 @@ ${planResult.content}`, const derived = createWorkspaceHarnessConfigFromPlanDraft(draft); - const loopState = await context.loopRunnerService.getState(input.workspaceId); await context.workspaceHarnessService.setHarnessForWorkspace( input.workspaceId, - derived.config, - { loopState } + derived.config ); const startResult = await context.loopRunnerService.start(input.workspaceId); diff --git a/src/node/services/loopRunnerService.test.ts b/src/node/services/loopRunnerService.test.ts new file mode 100644 index 0000000000..a7dec25afc --- /dev/null +++ b/src/node/services/loopRunnerService.test.ts @@ -0,0 +1,21 @@ +import { describe, expect, it } from "bun:test"; + +import { buildIterationPrompt } from "./loopRunnerService"; + +describe("buildIterationPrompt", () => { + it("includes item id + journal guidance", () => { + const prompt = buildIterationPrompt({ + iteration: 3, + itemId: "item-1", + itemTitle: "Do something", + configPathHint: ".mux/harness/branch.jsonc", + progressPathHint: ".mux/harness/branch.progress.md", + }); + + expect(prompt).toContain("Checklist item: item-1 — Do something"); + expect(prompt).toContain("skim the journal"); + expect(prompt).toContain("append a short entry"); + expect(prompt).toContain("Journal: .mux/harness/branch.progress.md"); + expect(prompt).toContain("Config: .mux/harness/branch.jsonc"); + }); +}); diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index fb4dd9e2c2..5b330e2381 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -59,8 +59,9 @@ function hasUnfinishedChecklistItems(config: { checklist: HarnessChecklistItem[] return config.checklist.some((item) => item.status !== "done"); } -function buildIterationPrompt(params: { +export function buildIterationPrompt(params: { iteration: number; + itemId: string; itemTitle: string; configPathHint: string; progressPathHint: string; @@ -68,16 +69,20 @@ function buildIterationPrompt(params: { const lines: string[] = []; lines.push(`Ralph loop iteration ${params.iteration}`); lines.push(""); - lines.push(`Work on: ${params.itemTitle}`); + lines.push(`Checklist item: ${params.itemId} — ${params.itemTitle}`); lines.push(""); lines.push("Rules:"); lines.push("- Make a small, mergeable change."); lines.push("- Run the configured gates (see harness config) before stopping."); lines.push("- Do NOT start the next checklist item."); + lines.push(`- Before coding: skim the journal for prior attempts on item ${params.itemId}.`); + lines.push( + "- After you finish (and gates), append a short entry to the journal (do not edit old entries)." + ); lines.push(""); lines.push("Harness files:"); - lines.push(`- ${params.progressPathHint}`); - lines.push(`- ${params.configPathHint}`); + lines.push(`- Journal: ${params.progressPathHint}`); + lines.push(`- Config: ${params.configPathHint}`); return lines.join("\n"); } @@ -187,11 +192,11 @@ export class LoopRunnerService extends EventEmitter { log.debug("[HARNESS] Failed to persist loop state", { workspaceId, error }); } - // Best-effort: keep progress file in sync, but never block loop control on remote IO. + // Best-effort: ensure harness journal exists, but never block loop control on remote IO. void this.workspaceHarnessService .updateProgressFile(workspaceId, state) .catch((error: unknown) => { - log.debug("[HARNESS] Failed to update progress file", { workspaceId, error }); + log.debug("[HARNESS] Failed to ensure harness journal exists", { workspaceId, error }); }); this.emit("change", workspaceId); @@ -395,8 +400,8 @@ export class LoopRunnerService extends EventEmitter { return; } - const configPathHint = `.mux/harness/${info.name}.harness.jsonc`; - const progressPathHint = `.mux/harness/${info.name}.harness.progress.md`; + const configPathHint = `.mux/harness/${info.name}.jsonc`; + const progressPathHint = `.mux/harness/${info.name}.progress.md`; const modelString = info.aiSettingsByMode?.exec?.model ?? info.aiSettings?.model ?? defaultModel; @@ -413,8 +418,10 @@ export class LoopRunnerService extends EventEmitter { } const itemTitle = nextItem?.title ?? "Final cleanup (gates + git clean)"; + const itemId = nextItem?.id ?? "final-cleanup"; const prompt = buildIterationPrompt({ iteration: state.iteration, + itemId, itemTitle, configPathHint, progressPathHint, @@ -431,16 +438,12 @@ export class LoopRunnerService extends EventEmitter { // If this is a checklist item, mark it doing before we start. if (nextItem?.status === "todo") { - await this.workspaceHarnessService.setHarnessForWorkspace( - workspaceId, - { - ...config, - checklist: config.checklist.map((item) => - item.id === nextItem.id ? { ...item, status: "doing" as const } : item - ), - }, - { loopState: updatedStateBeforeSend } - ); + await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, { + ...config, + checklist: config.checklist.map((item) => + item.id === nextItem.id ? { ...item, status: "doing" as const } : item + ), + }); } const sendResult = await this.workspaceService.sendMessage(workspaceId, prompt, { @@ -493,16 +496,12 @@ export class LoopRunnerService extends EventEmitter { // If this was a checklist item, mark it done. if (nextItem) { - await this.workspaceHarnessService.setHarnessForWorkspace( - workspaceId, - { - ...config, - checklist: config.checklist.map((item) => - item.id === nextItem.id ? { ...item, status: "done" as const } : item - ), - }, - { loopState: nextState } - ); + await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, { + ...config, + checklist: config.checklist.map((item) => + item.id === nextItem.id ? { ...item, status: "done" as const } : item + ), + }); } } else { const failures = nextState.consecutiveFailures + 1; diff --git a/src/node/services/workspaceHarnessService.test.ts b/src/node/services/workspaceHarnessService.test.ts new file mode 100644 index 0000000000..219cf008e7 --- /dev/null +++ b/src/node/services/workspaceHarnessService.test.ts @@ -0,0 +1,120 @@ +import { afterEach, beforeEach, describe, expect, it } from "bun:test"; +import * as fs from "fs/promises"; +import * as os from "os"; +import * as path from "path"; + +import { Config } from "@/node/config"; + +import { WorkspaceHarnessService } from "./workspaceHarnessService"; + +function getWorkspacePath(args: { + srcDir: string; + projectName: string; + workspaceName: string; +}): string { + return path.join(args.srcDir, args.projectName, args.workspaceName); +} + +async function pathExists(filePath: string): Promise { + try { + await fs.stat(filePath); + return true; + } catch { + return false; + } +} + +describe("WorkspaceHarnessService (journal)", () => { + let tempDir: string; + let config: Config; + + beforeEach(async () => { + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mux-harness-journal-test-")); + config = new Config(tempDir); + }); + + afterEach(async () => { + await fs.rm(tempDir, { recursive: true, force: true }); + }); + + async function setupWorkspace(): Promise<{ + workspaceId: string; + workspaceName: string; + workspacePath: string; + }> { + const projectPath = "/fake/project"; + const workspaceId = "ws-id"; + const workspaceName = "branch"; + + const workspacePath = getWorkspacePath({ + srcDir: config.srcDir, + projectName: "project", + workspaceName, + }); + await fs.mkdir(workspacePath, { recursive: true }); + + await config.editConfig((cfg) => { + cfg.projects.set(projectPath, { + workspaces: [ + { + path: workspacePath, + id: workspaceId, + name: workspaceName, + runtimeConfig: { type: "worktree", srcBaseDir: config.srcDir }, + }, + ], + }); + return cfg; + }); + + return { workspaceId, workspaceName, workspacePath }; + } + + it("creates a journal file when writing harness config", async () => { + const { workspaceId, workspaceName, workspacePath } = await setupWorkspace(); + + const service = new WorkspaceHarnessService(config); + await service.setHarnessForWorkspace(workspaceId, { + version: 1, + checklist: [], + gates: [], + loop: {}, + }); + + const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`); + + expect(await pathExists(journalPath)).toBe(true); + + const contents = await fs.readFile(journalPath, "utf-8"); + expect(contents).toContain("# Harness journal (append-only)"); + expect(contents).toContain("## Entry template"); + expect(contents).toContain(`.mux/harness/${workspaceName}.jsonc`); + }); + + it("does not overwrite an existing journal file", async () => { + const { workspaceId, workspaceName, workspacePath } = await setupWorkspace(); + + const service = new WorkspaceHarnessService(config); + await service.setHarnessForWorkspace(workspaceId, { + version: 1, + checklist: [], + gates: [], + loop: {}, + }); + + const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`); + + await fs.writeFile(journalPath, "CUSTOM\n", "utf-8"); + + await service.updateProgressFile(workspaceId); + await service.setHarnessForWorkspace(workspaceId, { + version: 1, + checklist: [{ id: "item-1", title: "Do something", status: "todo" }], + gates: [], + loop: {}, + }); + + const after = await fs.readFile(journalPath, "utf-8"); + expect(after).toBe("CUSTOM\n"); + }); +}); diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts index 08049c1809..9587e983f5 100644 --- a/src/node/services/workspaceHarnessService.ts +++ b/src/node/services/workspaceHarnessService.ts @@ -20,10 +20,7 @@ import { log } from "@/node/services/log"; const HARNESS_DIR = ".mux/harness"; -const HARNESS_GITIGNORE_PATTERNS = [ - `${HARNESS_DIR}/*.harness.jsonc`, - `${HARNESS_DIR}/*.harness.progress.md`, -]; +const HARNESS_GITIGNORE_PATTERNS = [`${HARNESS_DIR}/*.jsonc`, `${HARNESS_DIR}/*.progress.md`]; const DEFAULT_LOOP_SETTINGS: Required< Pick< @@ -208,78 +205,34 @@ async function statIsFile( } } -function formatChecklistItemForProgress(item: HarnessChecklistItem): string { - const checkbox = - item.status === "done" - ? "[x]" - : item.status === "doing" - ? "[~]" - : item.status === "blocked" - ? "[!]" - : "[ ]"; - return `- ${checkbox} ${item.title}`; -} - -function renderProgressMarkdown(params: { +function renderHarnessJournalBootstrapMarkdown(params: { metadata: FrontendWorkspaceMetadata; - config: WorkspaceHarnessConfig; paths: WorkspaceHarnessFilePaths; - loopState?: HarnessLoopState; }): string { const nowIso = new Date().toISOString(); + const configBasename = path.basename(params.paths.configPath); + const lines: string[] = []; - lines.push(`# Harness Progress`); + lines.push("# Harness journal (append-only)"); + lines.push(""); + lines.push("This file is an append-only journal for Ralph loop work in this workspace."); + lines.push("Append new entries at the bottom. Do not edit or rewrite older entries."); lines.push(""); lines.push(`- Workspace: ${params.metadata.name} (${params.metadata.id})`); - lines.push(`- Updated: ${nowIso}`); - lines.push(`- Harness file: ${params.paths.configPath}`); + lines.push(`- Created: ${nowIso}`); + lines.push(`- Harness config: ${path.posix.join(HARNESS_DIR, configBasename)}`); lines.push(""); - - lines.push("## Checklist"); - if (params.config.checklist.length === 0) { - lines.push("(no checklist items)"); - } else { - for (const item of params.config.checklist) { - lines.push(formatChecklistItemForProgress(item)); - } - } + lines.push("## Entry template"); lines.push(""); - - lines.push("## Gates"); - if (params.config.gates.length === 0) { - lines.push("(no gates configured)"); - } else { - for (const gate of params.config.gates) { - lines.push(`- ${gate.command}`); - } - } + lines.push("### — Iteration N — Item: "); + lines.push("- Did:"); + lines.push("- Tried:"); + lines.push("- Learned:"); + lines.push("- Dead ends:"); + lines.push("- Next:"); lines.push(""); - - if (params.loopState) { - lines.push("## Loop"); - lines.push(`- Status: ${params.loopState.status}`); - lines.push(`- Iteration: ${params.loopState.iteration}`); - if (params.loopState.currentItemTitle) { - lines.push(`- Current item: ${params.loopState.currentItemTitle}`); - } - if (params.loopState.lastGateRun) { - lines.push( - `- Last gates: ${params.loopState.lastGateRun.ok ? "PASS" : "FAIL"} (${Math.round( - params.loopState.lastGateRun.totalDurationMs / 1000 - )}s)` - ); - } - if (params.loopState.lastCheckpoint?.commitSha) { - lines.push(`- Last commit: ${params.loopState.lastCheckpoint.commitSha}`); - } - if (params.loopState.lastError) { - lines.push(`- Last error: ${params.loopState.lastError}`); - } - lines.push(""); - } - - return lines.join("\n") + "\n"; + return lines.join("\n"); } export class WorkspaceHarnessService { @@ -328,7 +281,7 @@ export class WorkspaceHarnessService { return { metadata, runtime, workspacePath }; } - private getHarnessFilePaths( + private getLegacyHarnessFilePaths( workspacePath: string, runtimeConfig: RuntimeConfig | undefined, workspaceName: string @@ -353,6 +306,26 @@ export class WorkspaceHarnessService { ), }; } + private getHarnessFilePaths( + workspacePath: string, + runtimeConfig: RuntimeConfig | undefined, + workspaceName: string + ): WorkspaceHarnessFilePaths { + assert(typeof workspacePath === "string", "workspacePath must be a string"); + assert(typeof workspaceName === "string", "workspaceName must be a string"); + + const prefix = workspaceName.trim().length > 0 ? workspaceName.trim() : "workspace"; + + return { + configPath: joinForRuntime(runtimeConfig, workspacePath, HARNESS_DIR, `${prefix}.jsonc`), + progressPath: joinForRuntime( + runtimeConfig, + workspacePath, + HARNESS_DIR, + `${prefix}.progress.md` + ), + }; + } private async readHarnessFile( runtime: ReturnType<typeof createRuntime>, @@ -459,6 +432,68 @@ export class WorkspaceHarnessService { } } + private async ensureHarnessJournalExists(params: { + metadata: FrontendWorkspaceMetadata; + runtime: ReturnType<typeof createRuntime>; + workspacePath: string; + runtimeConfig: RuntimeConfig | undefined; + paths: WorkspaceHarnessFilePaths; + legacyPaths: WorkspaceHarnessFilePaths; + }): Promise<void> { + try { + await this.ensureHarnessDir(params.runtime, params.workspacePath, params.runtimeConfig); + + const exists = await statIsFile(params.runtime, params.paths.progressPath); + if (exists) { + return; + } + + let legacyProgressContents = ""; + const legacyExists = await statIsFile(params.runtime, params.legacyPaths.progressPath); + if (legacyExists) { + try { + legacyProgressContents = await readFileString( + params.runtime, + params.legacyPaths.progressPath + ); + } catch (error) { + log.debug("[HARNESS] Failed to read legacy harness progress file", { + filePath: params.legacyPaths.progressPath, + error, + }); + } + } + + let markdown = renderHarnessJournalBootstrapMarkdown({ + metadata: params.metadata, + paths: params.paths, + }); + + if (legacyProgressContents.trim().length > 0) { + markdown += + "\n## Migrated content (legacy progress file)\n\n" + + legacyProgressContents.trimEnd() + + "\n"; + } + + await writeFileString( + params.runtime, + params.paths.progressPath, + markdown.endsWith("\n") ? markdown : `${markdown}\n` + ); + await this.ensureHarnessGitignored( + params.runtime, + params.workspacePath, + params.runtimeConfig + ); + } catch (error) { + log.debug("[HARNESS] Failed to ensure harness journal file exists", { + workspacePath: params.workspacePath, + error, + }); + } + } + async getHarnessForWorkspace(workspaceId: string): Promise<{ config: WorkspaceHarnessConfig; paths: WorkspaceHarnessFilePaths; @@ -466,8 +501,41 @@ export class WorkspaceHarnessService { }> { const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + const legacyPaths = this.getLegacyHarnessFilePaths( + workspacePath, + metadata.runtimeConfig, + metadata.name + ); + + let exists = await statIsFile(runtime, paths.configPath); + if (!exists) { + const legacyExists = await statIsFile(runtime, legacyPaths.configPath); + if (legacyExists) { + try { + const rawLegacy = await readFileString(runtime, legacyPaths.configPath); + await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); + await writeFileString( + runtime, + paths.configPath, + rawLegacy.endsWith("\n") ? rawLegacy : `${rawLegacy}\n` + ); + await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); + exists = true; + } catch (error) { + log.debug("[HARNESS] Failed to migrate legacy harness config file", { + workspaceId, + error, + }); + const parsedLegacy = await this.readHarnessFile(runtime, legacyPaths.configPath); + return { + config: normalizeWorkspaceHarnessConfig(parsedLegacy), + paths: legacyPaths, + exists: true, + }; + } + } + } - const exists = await statIsFile(runtime, paths.configPath); if (!exists) { return { config: { ...DEFAULT_HARNESS_CONFIG }, paths, exists: false }; } @@ -482,48 +550,70 @@ export class WorkspaceHarnessService { async setHarnessForWorkspace( workspaceId: string, - config: WorkspaceHarnessConfig, - options?: { loopState?: HarnessLoopState } + config: WorkspaceHarnessConfig ): Promise<WorkspaceHarnessConfig> { assert(config && typeof config === "object", "config must be an object"); const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + const legacyPaths = this.getLegacyHarnessFilePaths( + workspacePath, + metadata.runtimeConfig, + metadata.name + ); const normalized = normalizeWorkspaceHarnessConfig(config); + const serialized = JSON.stringify(normalized, null, 2) + "\n"; await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); - await writeFileString(runtime, paths.configPath, JSON.stringify(normalized, null, 2) + "\n"); + await writeFileString(runtime, paths.configPath, serialized); await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); - // Best-effort: keep the progress file up-to-date for both users and agent context. + // Best-effort: keep the legacy file updated for downgrade compatibility. try { - const progressMarkdown = renderProgressMarkdown({ - metadata, - config: normalized, - paths, - loopState: options?.loopState, - }); - await writeFileString(runtime, paths.progressPath, progressMarkdown); + const legacyExists = await statIsFile(runtime, legacyPaths.configPath); + if (legacyExists) { + await writeFileString(runtime, legacyPaths.configPath, serialized); + } } catch (error) { - log.debug("[HARNESS] Failed to update harness progress file", { workspaceId, error }); + log.debug("[HARNESS] Failed to update legacy harness config file", { workspaceId, error }); } + await this.ensureHarnessJournalExists({ + metadata, + runtime, + workspacePath, + runtimeConfig: metadata.runtimeConfig, + paths, + legacyPaths, + }); + return normalized; } - async updateProgressFile(workspaceId: string, loopState?: HarnessLoopState): Promise<void> { + async updateProgressFile(workspaceId: string, _loopState?: HarnessLoopState): Promise<void> { try { const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); - const { config, paths } = await this.getHarnessForWorkspace(workspaceId); - await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); - const progressMarkdown = renderProgressMarkdown({ metadata, config, paths, loopState }); - await writeFileString(runtime, paths.progressPath, progressMarkdown); + const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + const legacyPaths = this.getLegacyHarnessFilePaths( + workspacePath, + metadata.runtimeConfig, + metadata.name + ); + + await this.ensureHarnessJournalExists({ + metadata, + runtime, + workspacePath, + runtimeConfig: metadata.runtimeConfig, + paths, + legacyPaths, + }); } catch (error) { - log.debug("[HARNESS] Failed to update progress file", { workspaceId, error }); + log.debug("[HARNESS] Failed to ensure harness journal exists", { workspaceId, error }); } } } From 137486b6120bbc30a2463b1466447a438cfe0f06 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 20 Jan 2026 21:04:15 +0100 Subject: [PATCH 06/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20include=20plan=20pa?= =?UTF-8?q?th=20in=20harness=20bearings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include the workspace plan file path in harness reset/loop bearings summaries. Signed-off-by: Thomas Kosiewski <tk@coder.com> --- _Generated with `mux` • Model: `openai:gpt-5.2` • Thinking: `xhigh` • Cost: $47.81_ Change-Id: I89cf61ac2e147042882b58297d0bf9dde49835fd --- src/node/orpc/router.ts | 16 ++++++++++++++++ src/node/services/loopRunnerService.ts | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 0594d4fc0e..d7a230b82f 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -24,6 +24,7 @@ import { createAsyncMessageQueue } from "@/common/utils/asyncMessageQueue"; import { createRuntime, checkRuntimeAvailability } from "@/node/runtime/runtimeFactory"; import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers"; +import { getPlanFilePath } from "@/common/utils/planStorage"; import { readPlanFile } from "@/node/utils/runtime/helpers"; import { createMuxMessage } from "@/common/types/message"; import { secretsToRecord } from "@/common/types/secrets"; @@ -1805,6 +1806,18 @@ export const router = (authToken?: string) => { const workspaceName = workspaceInfo?.name ?? input.workspaceId; const configPathHint = `.mux/harness/${workspaceName}.jsonc`; const progressPathHint = `.mux/harness/${workspaceName}.progress.md`; + const planPathHint = (() => { + if (!workspaceInfo) { + return null; + } + + const runtime = createRuntime(workspaceInfo.runtimeConfig, { + projectPath: workspaceInfo.projectPath, + }); + const muxHome = runtime.getMuxHome(); + + return getPlanFilePath(workspaceName, workspaceInfo.projectName, muxHome); + })(); const lines: string[] = []; lines.push("# Harness bearings"); @@ -1827,6 +1840,9 @@ export const router = (authToken?: string) => { lines.push("Harness files:"); lines.push(`- ${progressPathHint}`); lines.push(`- ${configPathHint}`); + if (planPathHint) { + lines.push(`- Plan: ${planPathHint}`); + } lines.push(""); lines.push("Checklist:"); if (harness.config.checklist.length === 0) { diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index 5b330e2381..c711e2b99b 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -12,6 +12,7 @@ import type { import { HarnessLoopStateSchema } from "@/common/orpc/schemas"; import { createMuxMessage } from "@/common/types/message"; import { defaultModel } from "@/common/utils/ai/models"; +import { getPlanFilePath } from "@/common/utils/planStorage"; import type { WorkspaceService } from "@/node/services/workspaceService"; import type { AIService } from "@/node/services/aiService"; import type { Config } from "@/node/config"; @@ -20,6 +21,7 @@ import { MutexMap } from "@/node/utils/concurrency/mutexMap"; import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService"; import type { GateRunnerService } from "@/node/services/gateRunnerService"; import type { GitCheckpointService } from "@/node/services/gitCheckpointService"; +import { createRuntime } from "@/node/runtime/runtimeFactory"; import { execBuffered } from "@/node/utils/runtime/helpers"; const LOOP_STATE_FILENAME = "harness-loop.json"; @@ -92,6 +94,7 @@ function renderLoopSummaryMarkdown(params: { currentItemTitle: string | null; configPathHint: string; progressPathHint: string; + planPathHint: string; checklist: HarnessChecklistItem[]; lastGateRun: HarnessGateRunResult | null; lastCommitSha: string | null; @@ -124,6 +127,7 @@ function renderLoopSummaryMarkdown(params: { lines.push("Harness files:"); lines.push(`- ${params.progressPathHint}`); lines.push(`- ${params.configPathHint}`); + lines.push(`- Plan: ${params.planPathHint}`); lines.push(""); lines.push("Checklist:"); @@ -527,12 +531,16 @@ export class LoopRunnerService extends EventEmitter { await this.persistState(workspaceId, nextState); if (contextReset === "replace_history") { + const runtime = createRuntime(info.runtimeConfig, { projectPath: info.projectPath }); + const planPathHint = getPlanFilePath(info.name, info.projectName, runtime.getMuxHome()); + const summary = renderLoopSummaryMarkdown({ workspaceId, iteration: nextState.iteration, currentItemTitle: nextState.currentItemTitle, configPathHint, progressPathHint, + planPathHint, checklist: config.checklist, lastGateRun: nextState.lastGateRun, lastCommitSha: nextState.lastCheckpoint?.commitSha ?? null, From 705ebb8ef32f22c1c58fa438bac62a25753e00d6 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 20 Jan 2026 23:23:15 +0100 Subject: [PATCH 07/20] =?UTF-8?q?=F0=9F=A4=96=20feat:=20interactive=20harn?= =?UTF-8?q?ess=20init=20approval=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Icf5963d92a65300117de0c264272f8ca3952c4e0 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- docs/agents/index.mdx | 55 +++- src/browser/components/ChatPane.tsx | 16 ++ .../components/Messages/MessageRenderer.tsx | 4 + .../components/Messages/ToolMessage.tsx | 13 +- .../tools/ProposeHarnessToolCall.tsx | 269 ++++++++++++++++++ .../tools/ProposePlanToolCall.test.tsx | 48 ++-- .../components/tools/ProposePlanToolCall.tsx | 18 +- .../tools/shared/getToolComponent.ts | 7 +- src/browser/styles/globals.css | 20 ++ src/common/types/tools.ts | 15 + src/common/utils/tools/toolDefinitions.ts | 9 + src/common/utils/tools/tools.ts | 18 +- src/node/builtinAgents/harness-init.md | 41 +++ .../builtInAgentContent.generated.ts | 1 + .../builtInAgentDefinitions.ts | 1 + .../resolveToolPolicy.test.ts | 1 + .../agentDefinitions/resolveToolPolicy.ts | 1 + src/node/services/agentPresets.ts | 4 +- src/node/services/agentSession.ts | 4 +- src/node/services/aiService.ts | 11 +- src/node/services/ptc/toolBridge.test.ts | 2 + src/node/services/ptc/toolBridge.ts | 1 + src/node/services/tools/fileCommon.test.ts | 63 ++++ src/node/services/tools/fileCommon.ts | 77 ++++- src/node/services/tools/propose_harness.ts | 156 ++++++++++ src/node/services/tools/task.ts | 15 +- 26 files changed, 812 insertions(+), 58 deletions(-) create mode 100644 src/browser/components/tools/ProposeHarnessToolCall.tsx create mode 100644 src/node/builtinAgents/harness-init.md create mode 100644 src/node/services/tools/propose_harness.ts diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 1b6ff08deb..ae9b8d5487 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -581,8 +581,59 @@ Example JSON object: </Accordion> -> > > > > > > 5873e369c (🤖 feat: improve Start Ralph loop UX and harness gen) -> > > > > > > {/* END BUILTIN_AGENTS */} +### Harness Init (internal) + +**Interactive harness generation + approval (internal)** + +<Accordion title="View harness-init.md"> + +```md +--- +name: Harness Init +description: Interactive harness generation + approval (internal) +base: exec +ui: + hidden: true + color: var(--color-harness-init-mode) +subagent: + runnable: false +--- + +You are in Harness Init mode. + +Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. + +=== CRITICAL: LIMITED EDIT MODE === + +- You may ONLY create/edit files under: `.mux/harness/*.jsonc` +- Do NOT modify source code or other repo files. +- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). + - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. + +Repo-aware investigation: + +- Identify which commands should be used as gates by checking repo-native entrypoints: + - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. +- Map the plan’s changes to impacted subsystems by tracing callsites/imports. + +Gates: + +- Prefer a small set of safe, single commands. +- Do NOT use shell chaining, pipes, redirects, or quotes. + +Delegation: + +- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. + +When the harness file is ready for user review: + +- Call `propose_harness` exactly once. +- Do NOT start the Ralph loop yourself; the UI will start it after user approval. +``` + +</Accordion> + +{/* END BUILTIN_AGENTS */} ## Related Docs diff --git a/src/browser/components/ChatPane.tsx b/src/browser/components/ChatPane.tsx index 3518533423..3d82b921c5 100644 --- a/src/browser/components/ChatPane.tsx +++ b/src/browser/components/ChatPane.tsx @@ -549,6 +549,17 @@ export const ChatPane: React.FC<ChatPaneProps> = (props) => { } } + // Find the ID of the latest propose_harness tool call for external edit detection + // Only the latest harness should fetch fresh content from disk + let latestProposeHarnessId: string | null = null; + for (let i = transformedMessages.length - 1; i >= 0; i--) { + const msg = transformedMessages[i]; + if (msg.type === "tool" && msg.toolName === "propose_harness") { + latestProposeHarnessId = msg.id; + break; + } + } + return ( <div ref={chatAreaRef} @@ -648,6 +659,11 @@ export const ChatPane: React.FC<ChatPaneProps> = (props) => { msg.toolName === "propose_plan" && msg.id === latestProposePlanId } + isLatestProposeHarness={ + msg.type === "tool" && + msg.toolName === "propose_harness" && + msg.id === latestProposeHarnessId + } bashOutputGroup={bashOutputGroup} userMessageNavigation={ msg.type === "user" && userMessageNavMap diff --git a/src/browser/components/Messages/MessageRenderer.tsx b/src/browser/components/Messages/MessageRenderer.tsx index a7a7b6280e..aad48f627b 100644 --- a/src/browser/components/Messages/MessageRenderer.tsx +++ b/src/browser/components/Messages/MessageRenderer.tsx @@ -24,6 +24,8 @@ interface MessageRendererProps { onReviewNote?: (data: ReviewNoteData) => void; /** Whether this message is the latest propose_plan tool call (for external edit detection) */ isLatestProposePlan?: boolean; + /** Whether this message is the latest propose_harness tool call (for external edit detection) */ + isLatestProposeHarness?: boolean; /** Optional bash_output grouping info (computed at render-time) */ bashOutputGroup?: BashOutputGroupInfo; /** Navigation info for user messages (backward/forward between user messages) */ @@ -40,6 +42,7 @@ export const MessageRenderer = React.memo<MessageRendererProps>( isCompacting, onReviewNote, isLatestProposePlan, + isLatestProposeHarness, bashOutputGroup, userMessageNavigation, }) => { @@ -72,6 +75,7 @@ export const MessageRenderer = React.memo<MessageRendererProps>( workspaceId={workspaceId} onReviewNote={onReviewNote} isLatestProposePlan={isLatestProposePlan} + isLatestProposeHarness={isLatestProposeHarness} bashOutputGroup={bashOutputGroup} /> ); diff --git a/src/browser/components/Messages/ToolMessage.tsx b/src/browser/components/Messages/ToolMessage.tsx index 037c95b0f6..f72ca3815f 100644 --- a/src/browser/components/Messages/ToolMessage.tsx +++ b/src/browser/components/Messages/ToolMessage.tsx @@ -17,6 +17,8 @@ interface ToolMessageProps { onReviewNote?: (data: ReviewNoteData) => void; /** Whether this is the latest propose_plan in the conversation */ isLatestProposePlan?: boolean; + /** Whether this is the latest propose_harness in the conversation */ + isLatestProposeHarness?: boolean; /** Optional bash_output grouping info */ bashOutputGroup?: BashOutputGroupInfo; } @@ -27,6 +29,7 @@ export const ToolMessage: React.FC<ToolMessageProps> = ({ workspaceId, onReviewNote, isLatestProposePlan, + isLatestProposeHarness, bashOutputGroup, }) => { const { toolName, args, result, status, toolCallId } = message; @@ -40,6 +43,12 @@ export const ToolMessage: React.FC<ToolMessageProps> = ({ ? bashOutputGroup.position : undefined; + const isLatest = + toolName === "propose_plan" + ? isLatestProposePlan + : toolName === "propose_harness" + ? isLatestProposeHarness + : undefined; // Extract hook output if present (only shown when hook produced output) const hookOutput = extractHookOutput(result); const hookDuration = extractHookDuration(result); @@ -59,8 +68,8 @@ export const ToolMessage: React.FC<ToolMessageProps> = ({ startedAt={message.timestamp} // FileEdit-specific onReviewNote={onReviewNote} - // ProposePlan-specific - isLatest={isLatestProposePlan} + // ProposePlan/ProposeHarness-specific + isLatest={isLatest} // BashOutput-specific groupPosition={groupPosition} // CodeExecution-specific diff --git a/src/browser/components/tools/ProposeHarnessToolCall.tsx b/src/browser/components/tools/ProposeHarnessToolCall.tsx new file mode 100644 index 0000000000..013d6e14d9 --- /dev/null +++ b/src/browser/components/tools/ProposeHarnessToolCall.tsx @@ -0,0 +1,269 @@ +import React, { useEffect, useRef, useState } from "react"; +import { ClipboardCheck, ClipboardList, Play } from "lucide-react"; + +import type { ProposeHarnessToolError, ProposeHarnessToolResult } from "@/common/types/tools"; +import type { WorkspaceHarnessConfig } from "@/common/types/harness"; +import { useAPI } from "@/browser/contexts/API"; +import { usePopoverError } from "@/browser/hooks/usePopoverError"; +import { getAgentIdKey } from "@/common/constants/storage"; +import { updatePersistedState } from "@/browser/hooks/usePersistedState"; +import { cn } from "@/common/lib/utils"; + +import { + ExpandIcon, + StatusIndicator, + ToolContainer, + ToolDetails, + ToolHeader, + ToolName, +} from "./shared/ToolPrimitives"; +import { getStatusDisplay, type ToolStatus, useToolExpansion } from "./shared/toolUtils"; +import { PopoverError } from "../PopoverError"; +import { IconActionButton, type ButtonConfig } from "../Messages/MessageWindow"; + +interface HarnessGetData { + config: WorkspaceHarnessConfig; + paths: { configPath: string; progressPath: string }; + exists: boolean; +} + +function isProposeHarnessResult(result: unknown): result is ProposeHarnessToolResult { + return ( + result !== null && + typeof result === "object" && + "success" in result && + result.success === true && + "harnessPath" in result + ); +} + +function isProposeHarnessError(result: unknown): result is ProposeHarnessToolError { + return ( + result !== null && + typeof result === "object" && + "success" in result && + result.success === false && + "error" in result + ); +} + +function formatChecklistStatus(status: string): string { + if (status === "done") return "[x]"; + if (status === "doing") return "[~]"; + if (status === "blocked") return "[!]"; + return "[ ]"; +} + +interface ProposeHarnessToolCallProps { + args: unknown; + result: unknown; + status: ToolStatus; + workspaceId?: string; + className?: string; + /** Whether this is the latest propose_harness tool call (for external edit detection) */ + isLatest?: boolean; +} + +export const ProposeHarnessToolCall: React.FC<ProposeHarnessToolCallProps> = (props) => { + const { result, status, workspaceId, className, isLatest } = props; + const { expanded, toggleExpanded } = useToolExpansion(true); + const { api } = useAPI(); + const loopError = usePopoverError(); + + const [data, setData] = useState<HarnessGetData | null>(null); + + const [isStartingLoop, setIsStartingLoop] = useState(false); + const isStartingLoopRef = useRef(false); + + const startButtonRef = useRef<HTMLDivElement>(null); + + // Fetch fresh harness config for the latest propose_harness. + useEffect(() => { + if (!isLatest || !workspaceId || !api || status !== "completed") { + return; + } + + const fetchHarness = async () => { + try { + const res = await api.workspace.harness.get({ workspaceId }); + if (!res.success) { + return; + } + setData(res.data); + } catch { + // Best-effort only. + } + }; + + void fetchHarness(); + + const handleFocus = () => void fetchHarness(); + window.addEventListener("focus", handleFocus); + return () => window.removeEventListener("focus", handleFocus); + }, [api, isLatest, status, workspaceId]); + + let harnessPath: string | undefined; + let errorMessage: string | undefined; + + if (isProposeHarnessResult(result)) { + harnessPath = result.harnessPath; + } + + if (isProposeHarnessError(result)) { + errorMessage = result.error; + } + + const statusDisplay = getStatusDisplay(status); + + const handleApproveAndStart = () => { + if (!workspaceId || !api) return; + if (isStartingLoopRef.current) return; + + // Capture positioning from the ref for error popover placement + const anchorPosition = startButtonRef.current + ? (() => { + const { bottom, left } = startButtonRef.current.getBoundingClientRect(); + return { top: bottom + 8, left }; + })() + : { top: 100, left: 100 }; + + isStartingLoopRef.current = true; + setIsStartingLoop(true); + + // Switch to exec so the loop runner uses Exec mode settings. + updatePersistedState(getAgentIdKey(workspaceId), "exec"); + + api.workspace.loop + .start({ workspaceId }) + .then((res) => { + if (!res.success) { + loopError.showError("approve-harness", res.error, anchorPosition); + } + }) + .catch((error: unknown) => { + const message = error instanceof Error ? error.message : String(error); + loopError.showError("approve-harness", message, anchorPosition); + }) + .finally(() => { + isStartingLoopRef.current = false; + setIsStartingLoop(false); + }); + }; + + const actionButtons: ButtonConfig[] = []; + + if (workspaceId && status === "completed" && !errorMessage) { + actionButtons.push({ + label: "Approve & Start Ralph loop", + component: ( + <div ref={startButtonRef}> + <IconActionButton + button={{ + label: "Approve & Start Ralph loop", + onClick: handleApproveAndStart, + disabled: !api || isStartingLoop, + icon: <Play className={cn(isStartingLoop && "animate-pulse")} />, + tooltip: "Switch to Exec and start the Ralph loop with this harness", + }} + /> + </div> + ), + }); + } + + const showChecklist = data?.config.checklist && data.config.checklist.length > 0; + const showGates = data?.config.gates && data.config.gates.length > 0; + + const body = ( + <div className={cn("plan-surface rounded-md p-3 shadow-md", className)}> + <div className="plan-divider mb-3 flex items-center gap-2 border-b pb-2"> + <ClipboardList aria-hidden="true" className="h-4 w-4" /> + <div className="text-harness-init-mode font-mono text-[13px] font-semibold"> + Harness proposal + </div> + </div> + + {errorMessage ? ( + <div className="text-error rounded-sm p-2 font-mono text-xs">{errorMessage}</div> + ) : status !== "completed" ? ( + <div className="border-border-light text-muted rounded-sm border border-dashed p-3 font-mono text-xs"> + Validating harness… + </div> + ) : data ? ( + <div className="space-y-3"> + <div className="border-border-light rounded border p-3"> + <div className="text-secondary text-xs">Files</div> + <div className="mt-1 font-mono text-xs"> + <div>{data.paths.configPath}</div> + <div>{data.paths.progressPath}</div> + </div> + </div> + + {showChecklist && ( + <div className="border-border-light rounded border p-3"> + <div className="text-secondary text-xs">Checklist</div> + <div className="mt-2 space-y-1 font-mono text-xs"> + {data.config.checklist.map((item) => ( + <div key={item.id}> + {formatChecklistStatus(item.status)} {item.title} + </div> + ))} + </div> + </div> + )} + + {showGates && ( + <div className="border-border-light rounded border p-3"> + <div className="text-secondary text-xs">Gates</div> + <div className="mt-2 space-y-1 font-mono text-xs"> + {data.config.gates.map((gate, index) => ( + <div key={gate.id ?? `${gate.command}-${index}`}>- {gate.command}</div> + ))} + </div> + </div> + )} + + {!showChecklist && !showGates && ( + <div className="border-border-light text-secondary rounded border border-dashed p-3 text-xs"> + Harness is empty. Edit the harness config and call propose_harness again. + </div> + )} + </div> + ) : ( + <div className="border-border-light rounded border p-3"> + <div className="text-secondary text-xs">Files</div> + <div className="mt-1 font-mono text-xs"> + <div>{harnessPath ?? "(unknown harness path)"}</div> + </div> + </div> + )} + + {actionButtons.length > 0 && ( + <div className="mt-3 flex items-center gap-0.5"> + {actionButtons.map((button, index) => ( + <IconActionButton key={index} button={button} /> + ))} + <div className="text-muted ml-1 inline-flex items-center gap-1 text-[11px]"> + <ClipboardCheck className="h-3.5 w-3.5" aria-hidden="true" /> + Review, then approve to start the loop. + </div> + </div> + )} + </div> + ); + + return ( + <> + <ToolContainer expanded={expanded}> + <ToolHeader onClick={toggleExpanded}> + <ExpandIcon expanded={expanded}>▶</ExpandIcon> + <ToolName>propose_harness</ToolName> + <StatusIndicator status={status}>{statusDisplay}</StatusIndicator> + </ToolHeader> + + {expanded && <ToolDetails>{body}</ToolDetails>} + </ToolContainer> + <PopoverError error={loopError.error} prefix="Failed to start Ralph loop" /> + </> + ); +}; diff --git a/src/browser/components/tools/ProposePlanToolCall.test.tsx b/src/browser/components/tools/ProposePlanToolCall.test.tsx index bb2664f57b..fbe5b3d1d2 100644 --- a/src/browser/components/tools/ProposePlanToolCall.test.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.test.tsx @@ -17,7 +17,7 @@ interface SendMessageArgs { options: SendMessageOptions; } -type StartFromPlanResult = { success: true; data: undefined } | { success: false; error: string }; +type SendMessageResult = { success: true; data: undefined } | { success: false; error: string }; type GetPlanContentResult = | { success: true; data: { content: string; path: string } } @@ -46,10 +46,8 @@ interface MockApi { summaryMessage: unknown; deletePlanFile?: boolean; }) => Promise<ResultVoid>; - sendMessage: (args: SendMessageArgs) => Promise<{ success: true; data: undefined }>; - loop: { - startFromPlan: (args: { workspaceId: string }) => Promise<StartFromPlanResult>; - }; + sendMessage: (args: SendMessageArgs) => Promise<SendMessageResult>; + loop: Record<string, unknown>; }; } @@ -207,9 +205,7 @@ describe("ProposePlanToolCall", () => { data: { content: "# My Plan\n\nDo the thing.", path: planPath }, }), replaceChatHistory: (_args) => Promise.resolve({ success: true, data: undefined }), - loop: { - startFromPlan: () => Promise.resolve({ success: true, data: undefined }), - }, + loop: {}, sendMessage: (args: SendMessageArgs) => { sendMessageCalls.push(args); return Promise.resolve({ success: true, data: undefined }); @@ -296,9 +292,7 @@ describe("ProposePlanToolCall", () => { sendMessageCalls.push(args); return Promise.resolve({ success: true, data: undefined }); }, - loop: { - startFromPlan: () => Promise.resolve({ success: true, data: undefined }), - }, + loop: {}, }, }; @@ -342,18 +336,18 @@ describe("ProposePlanToolCall", () => { expect(summaryMessage.parts?.[0]?.text).toContain(planPath); }); - test("switches to exec and starts Ralph loop when clicking Start Ralph loop", async () => { + test("switches to harness-init and sends a harness proposal request when clicking Start Ralph loop", async () => { const workspaceId = "ws-123"; const planPath = "~/.mux/plans/demo/ws-123.md"; // Start in plan mode. window.localStorage.setItem(getAgentIdKey(workspaceId), JSON.stringify("plan")); - const startFromPlanCalls: Array<{ workspaceId: string }> = []; + const sendMessageCalls: SendMessageArgs[] = []; - let resolveStartFromPlan!: (value: StartFromPlanResult) => void; - const startFromPlanPromise = new Promise<StartFromPlanResult>((resolve) => { - resolveStartFromPlan = resolve; + let resolveSendMessage!: (value: SendMessageResult) => void; + const sendMessagePromise = new Promise<SendMessageResult>((resolve) => { + resolveSendMessage = resolve; }); mockApi = { @@ -372,13 +366,11 @@ describe("ProposePlanToolCall", () => { data: { content: "# My Plan\n\nDo the thing.", path: planPath }, }), replaceChatHistory: () => Promise.resolve({ success: true, data: undefined }), - sendMessage: () => Promise.resolve({ success: true, data: undefined }), - loop: { - startFromPlan: (args: { workspaceId: string }) => { - startFromPlanCalls.push(args); - return startFromPlanPromise; - }, + sendMessage: (args: SendMessageArgs) => { + sendMessageCalls.push(args); + return sendMessagePromise; }, + loop: {}, }, }; @@ -400,15 +392,17 @@ describe("ProposePlanToolCall", () => { fireEvent.click(view.getByRole("button", { name: "Start Ralph loop" })); - await waitFor(() => expect(startFromPlanCalls.length).toBe(1)); - expect(startFromPlanCalls[0]?.workspaceId).toBe(workspaceId); + await waitFor(() => expect(sendMessageCalls.length).toBe(1)); + expect(sendMessageCalls[0]?.message).toBe( + "Generate a Ralph harness from the current plan and propose it" + ); await waitFor(() => { const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement; expect(button.disabled).toBe(true); }); - resolveStartFromPlan({ success: true, data: undefined }); + resolveSendMessage({ success: true, data: undefined }); await waitFor(() => { const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement; @@ -420,9 +414,9 @@ describe("ProposePlanToolCall", () => { mock?: { calls: unknown[][] }; }; if (updatePersistedStateMaybeMock.mock) { - expect(updatePersistedState).toHaveBeenCalledWith(agentKey, "exec"); + expect(updatePersistedState).toHaveBeenCalledWith(agentKey, "harness-init"); } else { - expect(JSON.parse(window.localStorage.getItem(agentKey)!)).toBe("exec"); + expect(JSON.parse(window.localStorage.getItem(agentKey)!)).toBe("harness-init"); } }); }); diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx index 43a40b992b..124ee8f120 100644 --- a/src/browser/components/tools/ProposePlanToolCall.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.tsx @@ -30,6 +30,7 @@ import { usePopoverError } from "@/browser/hooks/usePopoverError"; import { PopoverError } from "../PopoverError"; import { getAgentIdKey, getPlanContentKey } from "@/common/constants/storage"; import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState"; +import { formatSendMessageError } from "@/common/utils/errors/formatSendError"; import { buildSendMessageOptions } from "@/browser/hooks/useSendMessageOptions"; import { Clipboard, @@ -417,14 +418,19 @@ export const ProposePlanToolCall: React.FC<ProposePlanToolCallProps> = (props) = isStartingLoopRef.current = true; setIsStartingLoop(true); - // Switch to exec so the UI matches the loop runner. - updatePersistedState(getAgentIdKey(workspaceId), "exec"); + // Switch to harness-init before sending so send options (agentId/mode) match. + updatePersistedState(getAgentIdKey(workspaceId), "harness-init"); - api.workspace.loop - .startFromPlan({ workspaceId }) + api.workspace + .sendMessage({ + workspaceId, + message: "Generate a Ralph harness from the current plan and propose it", + options: buildSendMessageOptions(workspaceId), + }) .then((result) => { if (!result.success) { - loopError.showError("start-ralph-loop", result.error, anchorPosition); + const formatted = formatSendMessageError(result.error); + loopError.showError("start-ralph-loop", formatted.message, anchorPosition); } }) .catch((error: unknown) => { @@ -539,7 +545,7 @@ export const ProposePlanToolCall: React.FC<ProposePlanToolCallProps> = (props) = onClick: handleStartRalphLoop, disabled: !api || isStartingLoop, icon: <RefreshCw className={cn(isStartingLoop && "animate-spin")} />, - tooltip: "Generate a harness from the plan (if needed) and start the loop", + tooltip: "Switch to Harness Init and propose a harness for approval", }} /> </div> diff --git a/src/browser/components/tools/shared/getToolComponent.ts b/src/browser/components/tools/shared/getToolComponent.ts index feb20a5fc8..9a96b46e40 100644 --- a/src/browser/components/tools/shared/getToolComponent.ts +++ b/src/browser/components/tools/shared/getToolComponent.ts @@ -17,6 +17,7 @@ import { FileReadToolCall } from "../FileReadToolCall"; import { WebFetchToolCall } from "../WebFetchToolCall"; import { WebSearchToolCall } from "../WebSearchToolCall"; import { AskUserQuestionToolCall } from "../AskUserQuestionToolCall"; +import { ProposeHarnessToolCall } from "../ProposeHarnessToolCall"; import { ProposePlanToolCall } from "../ProposePlanToolCall"; import { TodoToolCall } from "../TodoToolCall"; import { StatusSetToolCall } from "../StatusSetToolCall"; @@ -51,7 +52,7 @@ interface ToolRegistryEntry { * Registry mapping tool names to their components and validation schemas. * Adding a new tool: add one line here. * - * Note: Some tools (ask_user_question, propose_plan, todo_write, status_set) require + * Note: Some tools (ask_user_question, propose_plan, propose_harness, todo_write, status_set) require * props like workspaceId/toolCallId that aren't available in nested context. This is * fine because the backend excludes these from code_execution sandbox (see EXCLUDED_TOOLS * in src/node/services/ptc/toolBridge.ts). They can never appear in nested tool calls. @@ -87,6 +88,10 @@ const TOOL_REGISTRY: Record<string, ToolRegistryEntry> = { component: ProposePlanToolCall, schema: TOOL_DEFINITIONS.propose_plan.schema, }, + propose_harness: { + component: ProposeHarnessToolCall, + schema: TOOL_DEFINITIONS.propose_harness.schema, + }, todo_write: { component: TodoToolCall, schema: TOOL_DEFINITIONS.todo_write.schema }, status_set: { component: StatusSetToolCall, schema: TOOL_DEFINITIONS.status_set.schema }, notify: { component: NotifyToolCall, schema: TOOL_DEFINITIONS.notify.schema }, diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css index 2234871cb6..ac069017e6 100644 --- a/src/browser/styles/globals.css +++ b/src/browser/styles/globals.css @@ -64,6 +64,11 @@ --color-exec-mode-hover: hsl(268.56 94.04% 67%); --color-exec-mode-light: hsl(268.56 94.04% 78%); + --color-harness-init-mode: hsl(175 60% 42%); + --color-harness-init-mode-hover: hsl(175 60% 52%); + --color-harness-init-mode-light: hsl(175 60% 62%); + --color-harness-init-mode-alpha: hsla(175 60% 42% / 0.1); + /* Edit mode: amber/gold for editing warnings and barriers */ --color-edit-mode: hsl(38 80% 45%); --color-edit-mode-hover: hsl(38 80% 55%); @@ -356,6 +361,11 @@ --color-exec-mode-hover: hsl(268.56 94.04% 67%); --color-exec-mode-light: hsl(268.56 94.04% 78%); + --color-harness-init-mode: hsl(175 60% 38%); + --color-harness-init-mode-hover: hsl(175 60% 46%); + --color-harness-init-mode-light: hsl(175 60% 58%); + --color-harness-init-mode-alpha: hsla(175 60% 38% / 0.08); + --color-pending: hsl(30 100% 64%); --color-debug-mode: hsl(214 100% 56%); @@ -589,6 +599,11 @@ --color-exec-mode-hover: color-mix(in srgb, var(--color-exec-mode), white 18%); --color-exec-mode-light: color-mix(in srgb, var(--color-exec-mode), white 42%); + --color-harness-init-mode: #24837b; /* Flexoki cyan-600 */ + --color-harness-init-mode-hover: color-mix(in srgb, var(--color-harness-init-mode), white 18%); + --color-harness-init-mode-light: color-mix(in srgb, var(--color-harness-init-mode), white 42%); + --color-harness-init-mode-alpha: hsl(from var(--color-harness-init-mode) h s l / 0.08); + --color-edit-mode: #ad8301; /* Flexoki yellow-600 */ --color-edit-mode-hover: color-mix(in srgb, var(--color-edit-mode), white 18%); --color-edit-mode-light: color-mix(in srgb, var(--color-edit-mode), white 42%); @@ -795,6 +810,11 @@ --color-exec-mode-hover: color-mix(in srgb, var(--color-exec-mode), white 10%); --color-exec-mode-light: color-mix(in srgb, var(--color-exec-mode), white 22%); + --color-harness-init-mode: #3aa99f; /* Flexoki cyan-400 */ + --color-harness-init-mode-hover: color-mix(in srgb, var(--color-harness-init-mode), white 10%); + --color-harness-init-mode-light: color-mix(in srgb, var(--color-harness-init-mode), white 22%); + --color-harness-init-mode-alpha: hsl(from var(--color-harness-init-mode) h s l / 0.12); + --color-edit-mode: #d0a215; /* Flexoki yellow-400 */ --color-edit-mode-hover: color-mix(in srgb, var(--color-edit-mode), white 10%); --color-edit-mode-light: color-mix(in srgb, var(--color-edit-mode), white 22%); diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts index 2021cda813..c112c39b7a 100644 --- a/src/common/types/tools.ts +++ b/src/common/types/tools.ts @@ -278,6 +278,21 @@ export interface LegacyProposePlanToolResult { message: string; } +// Propose Harness Tool Types +// Args derived from schema +export type ProposeHarnessToolArgs = z.infer<typeof TOOL_DEFINITIONS.propose_harness.schema>; + +export interface ProposeHarnessToolResult { + success: true; + harnessPath: string; + message: string; +} + +export interface ProposeHarnessToolError { + success: false; + error: string; +} + // Todo Tool Types export interface TodoItem { content: string; diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index 8ac1144635..e69790e282 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -659,6 +659,14 @@ export const TOOL_DEFINITIONS = { "After calling this tool, do not paste the plan contents or mention the plan file path; the UI already shows the full plan.", schema: z.object({}), }, + propose_harness: { + description: + "Signal that your harness is complete and ready for user approval. " + + "This tool validates the harness config file you wrote under .mux/harness. " + + "You must write your harness file before calling this tool. " + + "After calling this tool, do not paste the full harness contents; the UI already shows it.", + schema: z.object({}), + }, task: { description: "Spawn a sub-agent task (child workspace). " + @@ -1255,6 +1263,7 @@ export function getAvailableTools( "file_edit_insert", "ask_user_question", "propose_plan", + "propose_harness", "bash", "task", "task_await", diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index 64c329e4ee..22da648e1d 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -10,6 +10,7 @@ import { createFileEditReplaceStringTool } from "@/node/services/tools/file_edit // DISABLED: import { createFileEditReplaceLinesTool } from "@/node/services/tools/file_edit_replace_lines"; import { createFileEditInsertTool } from "@/node/services/tools/file_edit_insert"; import { createAskUserQuestionTool } from "@/node/services/tools/ask_user_question"; +import { createProposeHarnessTool } from "@/node/services/tools/propose_harness"; import { createProposePlanTool } from "@/node/services/tools/propose_plan"; import { createTodoWriteTool, createTodoReadTool } from "@/node/services/tools/todo"; import { createStatusSetTool } from "@/node/services/tools/status_set"; @@ -60,9 +61,18 @@ export interface ToolConfiguration { overflow_policy?: "truncate" | "tmpfile"; /** Background process manager for bash tool (optional, AI-only) */ backgroundProcessManager?: BackgroundProcessManager; - /** When true, restrict edits to the plan file (plan agent behavior). */ - planFileOnly?: boolean; - /** Plan file path - only this file can be edited when planFileOnly is true. */ + /** Current UI mode (plan or exec) - used for plan file path enforcement */ + mode?: UIMode; + /** Active agent id (resolved). Used for tool-level restrictions. */ + agentId?: string; + /** + * Optional allowlist of file path globs that may be edited via file_edit_* tools. + * + * When set, file edit tools will reject edits to paths that don't match. + * Relative patterns are resolved against cwd. + */ + allowedEditPaths?: string[]; + /** Plan file path - only this file can be edited in plan mode */ planFilePath?: string; /** * Optional callback for emitting UI-only workspace chat events. @@ -286,6 +296,8 @@ export async function getToolsForModel( // and line number miscalculations. Use file_edit_replace_string instead. // file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)), + propose_harness: wrap(createProposeHarnessTool(config)), + // Sub-agent task orchestration (child workspaces) task: wrap(createTaskTool(config)), task_await: wrap(createTaskAwaitTool(config)), diff --git a/src/node/builtinAgents/harness-init.md b/src/node/builtinAgents/harness-init.md new file mode 100644 index 0000000000..1be28e4b06 --- /dev/null +++ b/src/node/builtinAgents/harness-init.md @@ -0,0 +1,41 @@ +--- +name: Harness Init +description: Interactive harness generation + approval (internal) +base: exec +ui: + hidden: true + color: var(--color-harness-init-mode) +subagent: + runnable: false +--- + +You are in Harness Init mode. + +Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. + +=== CRITICAL: LIMITED EDIT MODE === + +- You may ONLY create/edit files under: `.mux/harness/*.jsonc` +- Do NOT modify source code or other repo files. +- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). + - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. + +Repo-aware investigation: + +- Identify which commands should be used as gates by checking repo-native entrypoints: + - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. +- Map the plan’s changes to impacted subsystems by tracing callsites/imports. + +Gates: + +- Prefer a small set of safe, single commands. +- Do NOT use shell chaining, pipes, redirects, or quotes. + +Delegation: + +- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. + +When the harness file is ready for user review: + +- Call `propose_harness` exactly once. +- Do NOT start the Ralph loop yourself; the UI will start it after user approval. diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index e10e3b86f4..de08862724 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -7,6 +7,7 @@ export const BUILTIN_AGENT_CONTENT = { "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n", "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"checklist\": [{ \"title\": \"...\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", + "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\n\n- You may ONLY create/edit files under: `.mux/harness/*.jsonc`\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", "mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `<details>/<summary>` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", diff --git a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts index 03553d1ed1..4110250ec3 100644 --- a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts +++ b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts @@ -19,6 +19,7 @@ const BUILT_IN_SOURCES: BuiltInSource[] = [ { id: "plan", content: BUILTIN_AGENT_CONTENT.plan }, { id: "compact", content: BUILTIN_AGENT_CONTENT.compact }, { id: "harness-from-plan", content: BUILTIN_AGENT_CONTENT["harness-from-plan"] }, + { id: "harness-init", content: BUILTIN_AGENT_CONTENT["harness-init"] }, { id: "explore", content: BUILTIN_AGENT_CONTENT.explore }, { id: "system1_bash", content: BUILTIN_AGENT_CONTENT.system1_bash }, { id: "mux", content: BUILTIN_AGENT_CONTENT.mux }, diff --git a/src/node/services/agentDefinitions/resolveToolPolicy.test.ts b/src/node/services/agentDefinitions/resolveToolPolicy.test.ts index 47749c60dd..619a1416ea 100644 --- a/src/node/services/agentDefinitions/resolveToolPolicy.test.ts +++ b/src/node/services/agentDefinitions/resolveToolPolicy.test.ts @@ -61,6 +61,7 @@ describe("resolveToolPolicyForAgent", () => { { regex_match: "task", action: "disable" }, { regex_match: "task_.*", action: "disable" }, { regex_match: "propose_plan", action: "disable" }, + { regex_match: "propose_harness", action: "disable" }, { regex_match: "ask_user_question", action: "disable" }, { regex_match: "agent_report", action: "enable" }, ]); diff --git a/src/node/services/agentDefinitions/resolveToolPolicy.ts b/src/node/services/agentDefinitions/resolveToolPolicy.ts index bb3749b820..bbce511f2d 100644 --- a/src/node/services/agentDefinitions/resolveToolPolicy.ts +++ b/src/node/services/agentDefinitions/resolveToolPolicy.ts @@ -24,6 +24,7 @@ const SUBAGENT_HARD_DENY: ToolPolicy = [ { regex_match: "task", action: "disable" }, { regex_match: "task_.*", action: "disable" }, { regex_match: "propose_plan", action: "disable" }, + { regex_match: "propose_harness", action: "disable" }, { regex_match: "ask_user_question", action: "disable" }, ]; diff --git a/src/node/services/agentPresets.ts b/src/node/services/agentPresets.ts index c741f66bea..9e166952bd 100644 --- a/src/node/services/agentPresets.ts +++ b/src/node/services/agentPresets.ts @@ -47,8 +47,9 @@ function buildSystemPrompt(args: { const EXEC_PRESET: AgentPreset = { agentType: "exec", toolPolicy: [ - // Only the main plan-mode session should call propose_plan. + // Only the main workspace session should call propose_* approval tools. { regex_match: "propose_plan", action: "disable" }, + { regex_match: "propose_harness", action: "disable" }, ], systemPrompt: buildSystemPrompt({ agentLabel: "Exec", @@ -59,6 +60,7 @@ const EXEC_PRESET: AgentPreset = { rules: [ "- You MUST NOT spawn additional sub-agent tasks.", "- Do not call propose_plan.", + "- Do not call propose_harness.", "- Prefer small, reviewable diffs and run targeted checks when feasible.", ], }), diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index f90bdb9eb1..451f0a6a13 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -1319,7 +1319,9 @@ export class AgentSession { // Trigger a metadata refresh so the right sidebar updates immediately. if ( payload.type === "tool-call-end" && - (payload.toolName === "propose_plan" || payload.toolName.startsWith("file_edit_")) + (payload.toolName === "propose_plan" || + payload.toolName === "propose_harness" || + payload.toolName.startsWith("file_edit_")) ) { this.onPostCompactionStateChange?.(); } diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index c7e2f6c8c9..40e2c019de 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1760,10 +1760,13 @@ export class AIService extends EventEmitter { ), runtimeTempDir, backgroundProcessManager: this.backgroundProcessManager, - // Plan agent configuration for plan file access. - // - read: plan file is readable in all agents (useful context) - // - write: enforced by file_edit_* tools (plan file is read-only outside plan agent) - planFileOnly: agentIsPlanLike, + // Plan/exec mode configuration for plan file access. + // - read: plan file is readable in all modes (useful context) + // - write: enforced by file_edit_* tools (plan file is read-only outside plan mode) + mode: effectiveMode, + agentId: effectiveAgentId, + allowedEditPaths: + effectiveAgentId === "harness-init" ? [".mux/harness/*.jsonc"] : undefined, emitChatEvent: (event) => { // Defensive: tools should only emit events for the workspace they belong to. if ("workspaceId" in event && event.workspaceId !== workspaceId) { diff --git a/src/node/services/ptc/toolBridge.test.ts b/src/node/services/ptc/toolBridge.test.ts index a2a2baab12..9bb02a0595 100644 --- a/src/node/services/ptc/toolBridge.test.ts +++ b/src/node/services/ptc/toolBridge.test.ts @@ -57,6 +57,7 @@ describe("ToolBridge", () => { code_execution: createMockTool("code_execution", z.object({}), () => ({})), ask_user_question: createMockTool("ask_user_question", z.object({}), () => ({})), propose_plan: createMockTool("propose_plan", z.object({}), () => ({})), + propose_harness: createMockTool("propose_harness", z.object({}), () => ({})), todo_write: createMockTool("todo_write", z.object({}), () => ({})), todo_read: createMockTool("todo_read", z.object({}), () => ({})), status_set: createMockTool("status_set", z.object({}), () => ({})), @@ -68,6 +69,7 @@ describe("ToolBridge", () => { expect(names).toEqual(["file_read"]); expect(names).not.toContain("code_execution"); expect(names).not.toContain("ask_user_question"); + expect(names).not.toContain("propose_harness"); expect(names).not.toContain("propose_plan"); expect(names).not.toContain("todo_write"); expect(names).not.toContain("todo_read"); diff --git a/src/node/services/ptc/toolBridge.ts b/src/node/services/ptc/toolBridge.ts index 7acd376aea..f4e0c2dee2 100644 --- a/src/node/services/ptc/toolBridge.ts +++ b/src/node/services/ptc/toolBridge.ts @@ -14,6 +14,7 @@ const EXCLUDED_TOOLS = new Set([ "code_execution", // Prevent recursive sandbox creation "ask_user_question", // Requires UI interaction "propose_plan", // Mode-specific, call directly + "propose_harness", // UI-specific, call directly "todo_write", // UI-specific "todo_read", // UI-specific "status_set", // UI-specific diff --git a/src/node/services/tools/fileCommon.test.ts b/src/node/services/tools/fileCommon.test.ts index 2acb50334e..2dbee293cb 100644 --- a/src/node/services/tools/fileCommon.test.ts +++ b/src/node/services/tools/fileCommon.test.ts @@ -149,6 +149,69 @@ describe("fileCommon", () => { }); }); + describe("validatePlanModeAccess", () => { + const cwd = "/workspace/project"; + const runtime = createRuntime({ type: "local", srcBaseDir: cwd }); + + function buildConfig(overrides: Partial<ToolConfiguration>): ToolConfiguration { + return { + cwd, + runtime, + runtimeTempDir: "/tmp", + ...overrides, + }; + } + + it("allows edits to allowlisted files", async () => { + const config = buildConfig({ + mode: "exec", + allowedEditPaths: [".mux/harness/*.jsonc"], + }); + + expect(await validatePlanModeAccess(".mux/harness/main.jsonc", config)).toBeNull(); + expect( + await validatePlanModeAccess("/workspace/project/.mux/harness/main.jsonc", config) + ).toBeNull(); + }); + + it("rejects edits to non-allowlisted files", async () => { + const config = buildConfig({ + mode: "exec", + allowedEditPaths: [".mux/harness/*.jsonc"], + }); + + const result = await validatePlanModeAccess("src/main.ts", config); + expect(result).not.toBeNull(); + expect(result?.success).toBe(false); + expect(result?.error).toContain("File edits are restricted to"); + expect(result?.error).toContain(".mux/harness/*.jsonc"); + }); + + it("rejects edits to allowlisted directory with wrong extension", async () => { + const config = buildConfig({ + mode: "exec", + allowedEditPaths: [".mux/harness/*.jsonc"], + }); + + const result = await validatePlanModeAccess(".mux/harness/main.progress.md", config); + expect(result).not.toBeNull(); + expect(result?.error).toContain("File edits are restricted to"); + }); + + it("keeps plan file read-only outside plan mode even if allowlisted", async () => { + const planFilePath = "/workspace/project/plan.md"; + const config = buildConfig({ + mode: "exec", + planFilePath, + allowedEditPaths: ["/workspace/project/plan.md"], + }); + + const result = await validatePlanModeAccess(planFilePath, config); + expect(result).not.toBeNull(); + expect(result?.error).toContain("Plan file is read-only outside plan mode"); + }); + }); + describe("validateNoRedundantPrefix", () => { const cwd = "/workspace/project"; const runtime = createRuntime({ type: "local", srcBaseDir: cwd }); diff --git a/src/node/services/tools/fileCommon.ts b/src/node/services/tools/fileCommon.ts index 08e3aa7902..85818495bc 100644 --- a/src/node/services/tools/fileCommon.ts +++ b/src/node/services/tools/fileCommon.ts @@ -11,6 +11,40 @@ import type { ToolConfiguration } from "@/common/utils/tools/tools"; */ export const MAX_FILE_SIZE = 1024 * 1024; // 1MB +function normalizeForGlobMatch(value: string): string { + return value.replace(/\\/g, "/"); +} + +function globToRegExp(pattern: string): RegExp { + let regex = "^"; + const normalized = normalizeForGlobMatch(pattern); + + for (let i = 0; i < normalized.length; i += 1) { + const char = normalized[i]; + if (!char) continue; + + if (char === "*") { + const next = normalized[i + 1]; + if (next === "*") { + regex += ".*"; + i += 1; + } else { + regex += "[^/]*"; + } + continue; + } + + // Escape regex metacharacters. + if (/[\\^$.*+?()|[\]{}]/.test(char)) { + regex += `\\${char}`; + } else { + regex += char; + } + } + + regex += "$"; + return new RegExp(regex); +} export interface PlanModeValidationError { success: false; error: string; @@ -22,6 +56,7 @@ export interface PlanModeValidationError { * - Editing plan file outside plan mode (read-only) * - Editing non-plan file in plan mode * - Path is outside cwd (for non-plan files) + * - Path is not allowlisted (when allowedEditPaths is configured) * * Returns null if validation passes. */ @@ -29,28 +64,30 @@ export async function validatePlanModeAccess( filePath: string, config: ToolConfiguration ): Promise<PlanModeValidationError | null> { - // Plan file is always read-only outside the plan agent. + const isPlanFile = await isPlanFilePath(filePath, config); + + // Plan file is always read-only outside plan mode. // This is especially important for SSH runtimes, where cwd validation is intentionally skipped. - if ((await isPlanFilePath(filePath, config)) && !config.planFileOnly) { + if (isPlanFile && config.mode !== "plan") { return { success: false, - error: `Plan file is read-only outside the plan agent: ${filePath}`, + error: `Plan file is read-only outside plan mode: ${filePath}`, }; } - // Plan-agent restriction: only allow editing the plan file (and require exact string match). - if (config.planFileOnly && config.planFilePath) { + // Plan-mode restriction: only allow editing the plan file (and require exact string match). + if (config.mode === "plan" && config.planFilePath) { if (filePath !== config.planFilePath) { - if (await isPlanFilePath(filePath, config)) { + if (isPlanFile) { return { success: false, - error: `In the plan agent, you must use the exact plan file path from the instructions: ${config.planFilePath} (attempted: ${filePath}; this resolves to the plan file but absolute/alternate paths are not allowed)`, + error: `In plan mode, you must use the exact plan file path from the instructions: ${config.planFilePath} (attempted: ${filePath}; this resolves to the plan file but absolute/alternate paths are not allowed)`, }; } return { success: false, - error: `In the plan agent, only the plan file can be edited. You must use the exact plan file path: ${config.planFilePath} (attempted: ${filePath})`, + error: `In plan mode, only the plan file can be edited. You must use the exact plan file path: ${config.planFilePath} (attempted: ${filePath})`, }; } // Skip cwd validation for plan file - it may be outside workspace @@ -65,6 +102,30 @@ export async function validatePlanModeAccess( } } + // Optional allowlist restriction (e.g., harness-init can only edit its harness config). + if (!isPlanFile && config.allowedEditPaths && config.allowedEditPaths.length > 0) { + const allowed = config.allowedEditPaths + .map((pattern) => pattern.trim()) + .filter((p) => p.length > 0); + if (allowed.length > 0) { + const resolvedPath = normalizeForGlobMatch( + config.runtime.normalizePath(filePath, config.cwd) + ); + const isAllowed = allowed.some((pattern) => { + const resolvedPattern = normalizeForGlobMatch( + config.runtime.normalizePath(pattern, config.cwd) + ); + return globToRegExp(resolvedPattern).test(resolvedPath); + }); + if (!isAllowed) { + return { + success: false, + error: `File edits are restricted to: ${allowed.join(", ")} (attempted: ${filePath})`, + }; + } + } + } + return null; } diff --git a/src/node/services/tools/propose_harness.ts b/src/node/services/tools/propose_harness.ts new file mode 100644 index 0000000000..c379b02fd9 --- /dev/null +++ b/src/node/services/tools/propose_harness.ts @@ -0,0 +1,156 @@ +import { tool } from "ai"; +import { z } from "zod"; +import * as jsonc from "jsonc-parser"; + +import { WorkspaceHarnessConfigSchema } from "@/common/orpc/schemas"; +import type { ToolFactory } from "@/common/utils/tools/tools"; +import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import { RuntimeError } from "@/node/runtime/Runtime"; +import { execBuffered, readFileString } from "@/node/utils/runtime/helpers"; + +const proposeHarnessSchema = z.object({}); + +const HARNESS_DIR = ".mux/harness"; + +function normalizeWorkspaceName(value: unknown): string { + return typeof value === "string" && value.trim().length > 0 ? value.trim() : ""; +} + +function isAllowedHarnessGitPath(pathFromGit: string): boolean { + return pathFromGit.startsWith(`${HARNESS_DIR}/`) && pathFromGit.endsWith(".jsonc"); +} + +function extractGitStatusPath(line: string): string | null { + // Example porcelain lines: + // " M src/foo.ts" + // "?? .mux/harness/main.jsonc" + // "R old -> new" + if (typeof line !== "string" || line.length < 4) { + return null; + } + + const pathPart = line.slice(3).trim(); + if (!pathPart) { + return null; + } + + const arrowIndex = pathPart.indexOf(" -> "); + if (arrowIndex >= 0) { + return pathPart.slice(arrowIndex + 4).trim(); + } + + return pathPart; +} + +export const createProposeHarnessTool: ToolFactory = (config) => { + return tool({ + description: TOOL_DEFINITIONS.propose_harness.description, + inputSchema: proposeHarnessSchema, + execute: async () => { + const workspaceName = normalizeWorkspaceName(config.muxEnv?.MUX_WORKSPACE_NAME); + if (!workspaceName) { + return { + success: false as const, + error: "No workspace name available (missing MUX_WORKSPACE_NAME).", + }; + } + + const prefix = workspaceName; + const harnessPath = config.runtime.normalizePath( + `${HARNESS_DIR}/${prefix}.jsonc`, + config.cwd + ); + + let harnessContent: string; + try { + harnessContent = await readFileString(config.runtime, harnessPath); + } catch (err) { + if (err instanceof RuntimeError) { + return { + success: false as const, + error: `No harness file found at ${harnessPath}. Please write your harness to this file before calling propose_harness.`, + }; + } + throw err; + } + + if (harnessContent === "") { + return { + success: false as const, + error: `Harness file at ${harnessPath} is empty. Please write your harness content before calling propose_harness.`, + }; + } + + const parseErrors: jsonc.ParseError[] = []; + const parsed = jsonc.parse(harnessContent, parseErrors) as unknown; + if (parseErrors.length > 0) { + return { + success: false as const, + error: `Harness file at ${harnessPath} is not valid JSONC.`, + }; + } + + const validated = WorkspaceHarnessConfigSchema.safeParse(parsed); + if (!validated.success) { + return { + success: false as const, + error: `Harness file at ${harnessPath} does not match the expected schema: ${validated.error.message}`, + }; + } + + // Defensive: ensure harness-init didn't accidentally mutate other repo files (e.g. via bash). + try { + const isGitRepo = await execBuffered( + config.runtime, + "git rev-parse --is-inside-work-tree", + { + cwd: config.cwd, + timeout: 10, + } + ); + if (isGitRepo.exitCode === 0 && isGitRepo.stdout.trim() === "true") { + const status = await execBuffered(config.runtime, "git status --porcelain", { + cwd: config.cwd, + timeout: 10, + }); + if (status.exitCode === 0) { + const dirtyPaths = status.stdout + .split(/\r?\n/) + .map((line) => extractGitStatusPath(line)) + .filter((p): p is string => Boolean(p)); + const nonHarness = dirtyPaths.filter((p) => !isAllowedHarnessGitPath(p)); + if (nonHarness.length > 0) { + return { + success: false as const, + error: + `Working tree has changes outside ${HARNESS_DIR}/*.jsonc: ` + + nonHarness.slice(0, 10).join(", "), + }; + } + } + } + } catch { + // Best-effort only. + } + + // Record file state for external edit detection + if (config.recordFileState) { + try { + const fileStat = await config.runtime.stat(harnessPath); + config.recordFileState(harnessPath, { + content: harnessContent, + timestamp: fileStat.modifiedTime.getTime(), + }); + } catch { + // File stat failed, skip recording + } + } + + return { + success: true as const, + harnessPath, + message: "Harness proposed. Waiting for user approval.", + }; + }, + }); +}; diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts index a161f68865..b894908a1b 100644 --- a/src/node/services/tools/task.ts +++ b/src/node/services/tools/task.ts @@ -68,9 +68,18 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { throw new Error("Sub-agent workspaces may not spawn additional sub-agent tasks."); } - // Plan agent is explicitly non-executing. Allow only read-only exploration tasks. - if (config.planFileOnly && requestedAgentId !== "explore") { - throw new Error('In the plan agent you may only spawn agentId: "explore" tasks.'); + // Defense-in-depth: some agents are never valid as sub-agents. + if (requestedAgentId === "harness-init") { + throw new Error('agentId "harness-init" may not be spawned as a sub-agent task.'); + } + + // Harness init is explicitly non-executing. Allow only read-only exploration tasks. + if (config.agentId === "harness-init" && requestedAgentId !== "explore") { + throw new Error('In Harness Init you may only spawn agentId: "explore" tasks.'); + } + // Plan mode is explicitly non-executing. Allow only read-only exploration tasks. + if (config.mode === "plan" && requestedAgentId !== "explore") { + throw new Error('In Plan Mode you may only spawn agentId: "explore" tasks.'); } const modelString = From 58e90a182fa0cc864306702c2d71cb9583afc189 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 20 Jan 2026 23:42:41 +0100 Subject: [PATCH 08/20] =?UTF-8?q?=F0=9F=A4=96=20tests:=20stabilize=20sideb?= =?UTF-8?q?ar=20+=20bash=20integration=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Ie569d9a08cf122c8d7dce626003d1620a6e37bf9 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/browser/utils/rightSidebarLayout.test.ts | 6 +-- tests/e2e/scenarios/sidebarDragDrop.spec.ts | 6 ++- tests/ipc/backgroundBashDirect.test.ts | 42 ++++++++++++++++++-- tests/ipc/executeBash.test.ts | 16 +++++++- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/src/browser/utils/rightSidebarLayout.test.ts b/src/browser/utils/rightSidebarLayout.test.ts index eecaba2c55..e46ac4a779 100644 --- a/src/browser/utils/rightSidebarLayout.test.ts +++ b/src/browser/utils/rightSidebarLayout.test.ts @@ -122,19 +122,19 @@ test("moveTabToTabset removes empty source tabset", () => { }); test("reorderTabInTabset reorders tabs within a tabset", () => { - // Default layout has ["costs", "review", "explorer"]; reorder costs from 0 to 1 + // Default layout has ["costs", "review", "explorer", "harness"]; reorder costs from 0 to 1 const s0 = getDefaultRightSidebarLayoutState("costs"); const s1 = reorderTabInTabset(s0, "tabset-1", 0, 1); expect(s1.root.type).toBe("tabset"); if (s1.root.type !== "tabset") throw new Error("expected tabset"); - expect(s1.root.tabs).toEqual(["review", "costs", "explorer"]); + expect(s1.root.tabs).toEqual(["review", "costs", "explorer", "harness"]); expect(s1.root.activeTab).toBe("costs"); }); test("dockTabToEdge splits a tabset and moves the dragged tab into the new pane", () => { - // Default layout has ["costs", "review", "explorer"]; drag review into a bottom split + // Default layout has ["costs", "review", "explorer", "harness"]; drag review into a bottom split const s0 = getDefaultRightSidebarLayoutState("costs"); const s1 = dockTabToEdge(s0, "review", "tabset-1", "tabset-1", "bottom"); diff --git a/tests/e2e/scenarios/sidebarDragDrop.spec.ts b/tests/e2e/scenarios/sidebarDragDrop.spec.ts index a1dd839ab2..6b982fe986 100644 --- a/tests/e2e/scenarios/sidebarDragDrop.spec.ts +++ b/tests/e2e/scenarios/sidebarDragDrop.spec.ts @@ -203,7 +203,11 @@ test.describe("sidebar drag and drop", () => { const topTabs = await tablists[0].getByRole("tab").all(); const bottomTabs = await tablists[1].getByRole("tab").all(); - expect(topTabs.length).toBe(3); // Costs, Review, Explorer + expect(topTabs.length).toBeGreaterThanOrEqual(3); + await expect(tablists[0]).toContainText("Costs"); + await expect(tablists[0]).toContainText("Review"); + await expect(tablists[0]).toContainText("Explorer"); + expect(bottomTabs.length).toBe(1); // Costs (duplicate tab in split) }); diff --git a/tests/ipc/backgroundBashDirect.test.ts b/tests/ipc/backgroundBashDirect.test.ts index 6ab3b52361..827d518bfe 100644 --- a/tests/ipc/backgroundBashDirect.test.ts +++ b/tests/ipc/backgroundBashDirect.test.ts @@ -19,7 +19,12 @@ import * as fs from "fs/promises"; import * as os from "os"; import * as path from "path"; import { createTestEnvironment, cleanupTestEnvironment, type TestEnvironment } from "./setup"; -import { createTempGitRepo, cleanupTempGitRepo, generateBranchName } from "./helpers"; +import { + createTempGitRepo, + cleanupTempGitRepo, + generateBranchName, + waitForInitComplete, +} from "./helpers"; import { detectDefaultTrunkBranch } from "../../src/node/git"; import { LocalRuntime } from "../../src/node/runtime/LocalRuntime"; import { BackgroundProcessManager } from "../../src/node/services/backgroundProcessManager"; @@ -70,6 +75,9 @@ describe("Background Bash Direct Integration", () => { } workspaceId = result.metadata.id; workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo; + + // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes. + await waitForInitComplete(env, workspaceId, 30_000); }); afterAll(async () => { @@ -263,6 +271,9 @@ describe("Background Bash Output Capture", () => { } workspaceId = result.metadata.id; workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo; + + // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes. + await waitForInitComplete(env, workspaceId, 30_000); }); afterAll(async () => { @@ -379,6 +390,9 @@ describe("Foreground to Background Migration", () => { } workspaceId = result.metadata.id; workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo; + + // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes. + await waitForInitComplete(env, workspaceId, 30_000); }); afterAll(async () => { @@ -519,8 +533,8 @@ describe("Foreground to Background Migration", () => { { toolCallId, messages: [] } ) as Promise<ToolExecuteResult>; - // Wait for marker1 to output - await new Promise((resolve) => setTimeout(resolve, FOREGROUND_MIGRATION_READY_MS)); + // Wait for marker1 to output (extra slack for slower CI runners) + await new Promise((resolve) => setTimeout(resolve, 800)); // Send to background mid-execution manager.sendToBackground(toolCallId); @@ -589,7 +603,27 @@ describe("Foreground to Background Migration", () => { // Either it completed normally or was backgrounded expect(result.success).toBe(true); - expect(result.output).toContain(marker); + if (!result.success) return; + + if (result.output?.includes(marker)) { + expect(result.output).toContain(marker); + return; + } + + // On some platforms the process can exit during send-to-background, before output is collected. + // Verify the marker still exists in the persisted output log. + if (result.backgroundProcessId) { + const proc = await manager.getProcess(result.backgroundProcessId); + expect(proc).toBeDefined(); + + const outputPath = path.join(proc!.outputDir, "output.log"); + const fullOutput = await fs.readFile(outputPath, "utf-8"); + expect(fullOutput).toContain(marker); + return; + } + + // If we weren't backgrounded, the marker should have been included in the immediate output. + expect(result.output ?? "").toContain(marker); }); it("should not kill backgrounded process when abort signal fires", async () => { diff --git a/tests/ipc/executeBash.test.ts b/tests/ipc/executeBash.test.ts index 58e4d76f5a..c529d9d987 100644 --- a/tests/ipc/executeBash.test.ts +++ b/tests/ipc/executeBash.test.ts @@ -66,7 +66,7 @@ function expectWorkspaceCreationSuccess(result: WorkspaceCreationResult): Worksp } const GIT_FETCH_TIMEOUT_SECS = process.platform === "win32" ? 15 : 5; -const TEST_TIMEOUT_MS = process.platform === "win32" ? 60_000 : 15_000; +const TEST_TIMEOUT_MS = process.platform === "win32" ? 60_000 : 30_000; // Skip all tests if TEST_INTEGRATION is not set const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; @@ -87,6 +87,9 @@ describeIntegration("executeBash", () => { const workspaceId = metadata.id; const client = resolveOrpcClient(env); + // Wait for init to complete (prevents Windows filesystem timing issues) + await waitForInitComplete(env, workspaceId, 30_000); + // Execute a simple bash command (pwd should return workspace path) const pwdResult = await client.workspace.executeBash({ workspaceId, script: "pwd" }); @@ -153,6 +156,9 @@ describeIntegration("executeBash", () => { const workspaceId = expectWorkspaceCreationSuccess(createResult).id; const client = resolveOrpcClient(env); + // Wait for init to complete (prevents Windows filesystem timing issues) + await waitForInitComplete(env, workspaceId, 30_000); + // Execute a command that will fail const failResult = await client.workspace.executeBash({ workspaceId, @@ -189,6 +195,9 @@ describeIntegration("executeBash", () => { const workspaceId = expectWorkspaceCreationSuccess(createResult).id; const client = resolveOrpcClient(env); + // Wait for init to complete (prevents Windows filesystem timing issues) + await waitForInitComplete(env, workspaceId, 30_000); + // Execute a command that takes longer than the timeout const timeoutResult = await client.workspace.executeBash({ workspaceId, @@ -225,6 +234,9 @@ describeIntegration("executeBash", () => { const workspaceId = expectWorkspaceCreationSuccess(createResult).id; const client = resolveOrpcClient(env); + // Wait for init to complete (prevents Windows filesystem timing issues) + await waitForInitComplete(env, workspaceId, 30_000); + // Execute a command that generates 400 lines (well under 10K limit for IPC truncate policy) const result = await client.workspace.executeBash({ workspaceId, @@ -331,7 +343,7 @@ describeIntegration("executeBash", () => { const client = resolveOrpcClient(env); // Wait for init to complete (prevents Windows filesystem timing issues) - await waitForInitComplete(env, workspaceId); + await waitForInitComplete(env, workspaceId, 30_000); // Verify GIT_TERMINAL_PROMPT is set to 0 const gitEnvResult = await executeBashUntilReady( From d39a024bc7d439d7bd968bd3b5bdf454cdbbe1ad Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 20 Jan 2026 23:47:14 +0100 Subject: [PATCH 09/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20preserve=20harness?= =?UTF-8?q?=20edits=20when=20updating=20checklist=20status?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I88bf5879b908141790c6119d99f93983071a6b5e Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/node/services/loopRunnerService.ts | 56 ++++++++++++++++++++------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index c711e2b99b..cdc3bd7270 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -206,6 +206,48 @@ export class LoopRunnerService extends EventEmitter { this.emit("change", workspaceId); } + /** + * Update checklist item status without clobbering concurrent harness edits. + * + * The loop runner may hold an in-memory snapshot of the harness config for the + * duration of an iteration. Users (or harness-init) can edit the harness file + * concurrently; when we update a status (todo→doing, doing→done), we must merge + * onto the latest on-disk config to avoid overwriting those edits. + */ + private async updateChecklistItemStatus( + workspaceId: string, + itemId: string, + status: HarnessChecklistItem["status"] + ): Promise<void> { + assert(typeof itemId === "string" && itemId.trim().length > 0, "itemId must be non-empty"); + + try { + const latest = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId); + const existing = latest.config.checklist.find((item) => item.id === itemId) ?? null; + if (!existing) { + return; + } + + if (existing.status === status) { + return; + } + + await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, { + ...latest.config, + checklist: latest.config.checklist.map((item) => + item.id === itemId ? { ...item, status } : item + ), + }); + } catch (error) { + log.debug("[HARNESS] Failed to update checklist item status", { + workspaceId, + itemId, + status, + error, + }); + } + } + private async loadStateFromDisk(workspaceId: string): Promise<HarnessLoopState> { const filePath = this.getStatePath(workspaceId); @@ -442,12 +484,7 @@ export class LoopRunnerService extends EventEmitter { // If this is a checklist item, mark it doing before we start. if (nextItem?.status === "todo") { - await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, { - ...config, - checklist: config.checklist.map((item) => - item.id === nextItem.id ? { ...item, status: "doing" as const } : item - ), - }); + await this.updateChecklistItemStatus(workspaceId, nextItem.id, "doing"); } const sendResult = await this.workspaceService.sendMessage(workspaceId, prompt, { @@ -500,12 +537,7 @@ export class LoopRunnerService extends EventEmitter { // If this was a checklist item, mark it done. if (nextItem) { - await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, { - ...config, - checklist: config.checklist.map((item) => - item.id === nextItem.id ? { ...item, status: "done" as const } : item - ), - }); + await this.updateChecklistItemStatus(workspaceId, nextItem.id, "done"); } } else { const failures = nextState.consecutiveFailures + 1; From 0a0abf9e6cf4f7f57c6c6aaaf6003feb2b892adc Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Wed, 21 Jan 2026 08:13:08 +0100 Subject: [PATCH 10/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20hide=20plan=20loop?= =?UTF-8?q?=20status=20when=20inactive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Ic9c7e77915dcf5662b2cf767f93202c928d13c91 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- .../components/tools/ProposePlanToolCall.tsx | 21 ++++++++------ src/browser/stories/mocks/orpc.ts | 28 +++++++++++++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx index 124ee8f120..19217c5ef7 100644 --- a/src/browser/components/tools/ProposePlanToolCall.tsx +++ b/src/browser/components/tools/ProposePlanToolCall.tsx @@ -480,6 +480,13 @@ export const ProposePlanToolCall: React.FC<ProposePlanToolCallProps> = (props) = isLatest && status === "completed" && !errorMessage; + + const isLoopStateRelevant = (state: HarnessLoopState) => + state.status !== "stopped" || + state.iteration > 0 || + state.consecutiveFailures > 0 || + state.lastError !== null || + state.stoppedReason !== null; const statusDisplay = getStatusDisplay(status); // Build action buttons array (similar to AssistantMessage) @@ -617,26 +624,24 @@ export const ProposePlanToolCall: React.FC<ProposePlanToolCallProps> = (props) = {/* Loop status + completion guidance */} - {showInlineLoopState && ( + {showInlineLoopState && loopState && isLoopStateRelevant(loopState) && ( <div className="border-border-light mt-3 rounded border p-3"> <div className="text-secondary text-xs">Loop status</div> - <div className="mt-1 text-sm"> - {loopState ? `${loopState.status} • iteration ${loopState.iteration}` : "Loading…"} - </div> - {loopState?.currentItemTitle && ( + <div className="mt-1 text-sm">{`${loopState.status} • iteration ${loopState.iteration}`}</div> + {loopState.currentItemTitle && ( <div className="text-secondary mt-1 text-xs"> Current: <span className="text-light">{loopState.currentItemTitle}</span> </div> )} - {loopState && loopState.consecutiveFailures > 0 && ( + {loopState.consecutiveFailures > 0 && ( <div className="text-error mt-1 text-xs"> Consecutive failures: {loopState.consecutiveFailures} </div> )} - {loopState?.stoppedReason && ( + {loopState.stoppedReason && ( <div className="text-secondary mt-1 text-xs">Stopped: {loopState.stoppedReason}</div> )} - {loopState?.lastError && ( + {loopState.lastError && ( <div className="text-error mt-2 text-xs">{loopState.lastError}</div> )} </div> diff --git a/src/browser/stories/mocks/orpc.ts b/src/browser/stories/mocks/orpc.ts index ec372eba6f..ca6ec3d3df 100644 --- a/src/browser/stories/mocks/orpc.ts +++ b/src/browser/stories/mocks/orpc.ts @@ -690,6 +690,34 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl yield* []; await new Promise<void>(() => undefined); }, + loop: { + subscribe: async function* ( + _input: { workspaceId: string }, + options?: { signal?: AbortSignal } + ) { + // Yield initial state, then keep the subscription open (like a real eventIterator). + yield { + status: "stopped" as const, + startedAt: null, + iteration: 0, + consecutiveFailures: 0, + currentItemId: null, + currentItemTitle: null, + lastGateRun: null, + lastCheckpoint: null, + lastError: null, + stoppedReason: null, + }; + + await new Promise<void>((resolve) => { + if (options?.signal?.aborted) { + resolve(); + return; + } + options?.signal?.addEventListener("abort", () => resolve(), { once: true }); + }); + }, + }, activity: { list: () => Promise.resolve({}), subscribe: async function* () { From f1baf1eef1e62318835e889f66a549d939c68d6c Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Wed, 21 Jan 2026 12:35:05 +0100 Subject: [PATCH 11/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20hide=20Harness=20ta?= =?UTF-8?q?b=20when=20no=20harness=20files=20exist?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add lightweight workspace.harness.exists endpoint - Remove legacy harness filename support - Conditionally show/remove Harness tab in right sidebar and persisted layout Signed-off-by: Thomas Kosiewski <tk@coder.com> --- _Generated with `mux` • Model: `openai:gpt-5.2` • Thinking: `xhigh` • Cost: $74.55_ Change-Id: Icfd5f621eefc533c855a202e8f65739b3194791a --- src/browser/components/RightSidebar.tsx | 159 ++++++++++++++++--- src/browser/utils/rightSidebarLayout.test.ts | 6 +- src/browser/utils/rightSidebarLayout.ts | 7 +- src/common/orpc/schemas/api.ts | 12 ++ src/node/orpc/router.ts | 14 ++ src/node/services/workspaceHarnessService.ts | 121 ++------------ 6 files changed, 179 insertions(+), 140 deletions(-) diff --git a/src/browser/components/RightSidebar.tsx b/src/browser/components/RightSidebar.tsx index cfff951312..2bd7d60a99 100644 --- a/src/browser/components/RightSidebar.tsx +++ b/src/browser/components/RightSidebar.tsx @@ -11,7 +11,11 @@ import { updatePersistedState, usePersistedState, } from "@/browser/hooks/usePersistedState"; -import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore"; +import { + useWorkspaceUsage, + useWorkspaceStatsSnapshot, + workspaceStore, +} from "@/browser/stores/WorkspaceStore"; import { useFeatureFlags } from "@/browser/contexts/FeatureFlagsContext"; import { useAPI } from "@/browser/contexts/API"; import { CostsTab } from "./RightSidebar/CostsTab"; @@ -21,6 +25,7 @@ import { ErrorBoundary } from "./ErrorBoundary"; import { StatsTab } from "./RightSidebar/StatsTab"; import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; +import { RefreshController } from "@/browser/utils/RefreshController"; import { matchesKeybind, KEYBINDS, formatKeybind } from "@/browser/utils/ui/keybinds"; import { SidebarCollapseButton } from "./ui/SidebarCollapseButton"; import { cn } from "@/common/lib/utils"; @@ -51,7 +56,6 @@ import { parseRightSidebarLayoutState, removeTabEverywhere, reorderTabInTabset, - selectTabByIndex, selectTabInTabset, setFocusedTabset, updateSplitSizes, @@ -191,6 +195,7 @@ const DragAwarePanelResizeHandle: React.FC<{ return <PanelResizeHandle className={className} />; }; +type HarnessPresence = "unknown" | "exists" | "missing"; type TabsetNode = Extract<RightSidebarLayoutNode, { type: "tabset" }>; interface RightSidebarTabsetNodeProps { @@ -206,6 +211,7 @@ interface RightSidebarTabsetNodeProps { onReviewStatsChange: (stats: ReviewStats | null) => void; sessionCost: number | null; statsTabEnabled: boolean; + harnessTabEnabled: boolean; sessionDuration: number | null; /** Whether any sidebar tab is currently being dragged */ isDraggingTab: boolean; @@ -237,13 +243,19 @@ interface RightSidebarTabsetNodeProps { } const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => { + const isTabEnabled = (tab: TabType): boolean => { + if (tab === "stats") return props.statsTabEnabled; + if (tab === "harness") return props.harnessTabEnabled; + return true; + }; + + const activeTab = isTabEnabled(props.node.activeTab) + ? props.node.activeTab + : (props.node.tabs.find(isTabEnabled) ?? props.node.activeTab); const tabsetBaseId = `${props.baseId}-${props.node.id}`; // Content container class comes from tab registry - each tab defines its own padding/overflow - const tabsetContentClassName = cn( - "relative flex-1 min-h-0", - getTabContentClassName(props.node.activeTab) - ); + const tabsetContentClassName = cn("relative flex-1 min-h-0", getTabContentClassName(activeTab)); // Drop zones using @dnd-kit's useDroppable const { setNodeRef: contentRef, isOver: isOverContent } = useDroppable({ @@ -297,7 +309,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => const terminalTabs = props.node.tabs.filter(isTerminalTab); const items = props.node.tabs.flatMap((tab) => { - if (tab === "stats" && !props.statsTabEnabled) { + if (!isTabEnabled(tab)) { return []; } @@ -372,7 +384,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => { id: tabId, panelId, - selected: props.node.activeTab === tab, + selected: activeTab === tab, onSelect: () => selectTab(tab), label, tooltip, @@ -469,7 +481,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => )} /> - {props.node.activeTab === "costs" && ( + {activeTab === "costs" && ( <div role="tabpanel" id={costsPanelId} aria-labelledby={costsTabId}> <CostsTab workspaceId={props.workspaceId} /> </div> @@ -479,7 +491,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => {terminalTabs.map((terminalTab) => { const terminalTabId = `${tabsetBaseId}-tab-${terminalTab}`; const terminalPanelId = `${tabsetBaseId}-panel-${terminalTab}`; - const isActive = props.node.activeTab === terminalTab; + const isActive = activeTab === terminalTab; // Check if this terminal should be auto-focused (was just opened via keybind) const terminalSessionId = getTerminalSessionId(terminalTab); const shouldAutoFocus = isActive && terminalSessionId === props.autoFocusTerminalSession; @@ -510,7 +522,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => role="tabpanel" id={statsPanelId} aria-labelledby={statsTabId} - hidden={props.node.activeTab !== "stats"} + hidden={activeTab !== "stats"} > <ErrorBoundary workspaceInfo="Stats tab"> <StatsTab workspaceId={props.workspaceId} /> @@ -518,13 +530,13 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => </div> )} - {props.node.activeTab === "harness" && ( + {props.harnessTabEnabled && activeTab === "harness" && ( <div role="tabpanel" id={harnessPanelId} aria-labelledby={harnessTabId}> <HarnessTab workspaceId={props.workspaceId} /> </div> )} - {props.node.activeTab === "explorer" && ( + {activeTab === "explorer" && ( <div role="tabpanel" id={explorerPanelId} @@ -544,7 +556,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => const filePath = getFilePath(fileTab); const fileTabId = `${tabsetBaseId}-tab-${fileTab}`; const filePanelId = `${tabsetBaseId}-panel-${fileTab}`; - const isActive = props.node.activeTab === fileTab; + const isActive = activeTab === fileTab; return ( <div @@ -566,7 +578,7 @@ const RightSidebarTabsetNode: React.FC<RightSidebarTabsetNodeProps> = (props) => ); })} - {props.node.activeTab === "review" && ( + {activeTab === "review" && ( <div role="tabpanel" id={reviewPanelId} aria-labelledby={reviewTabId} className="h-full"> <ReviewPanel key={`${props.workspaceId}:${props.node.id}`} @@ -615,6 +627,11 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ // Stats tab feature flag const { statsTabState } = useFeatureFlags(); + + const [harnessPresence, setHarnessPresence] = React.useState<HarnessPresence>("unknown"); + + const { api } = useAPI(); + const harnessTabEnabled = harnessPresence === "exists"; const statsTabEnabled = Boolean(statsTabState?.enabled); // Read last-used focused tab for better defaults when initializing a new layout. @@ -667,6 +684,46 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ setLayoutDraft(null); }, [setLayoutRaw]); + const refreshHarnessPresence = React.useCallback(async () => { + if (!api) return; + + try { + const result = await api.workspace.harness.exists({ workspaceId }); + if (!result.success) { + return; + } + + setHarnessPresence(result.data.exists ? "exists" : "missing"); + } catch { + // Defensive: keep the previous state (don't crash / don't force-hide). + } + }, [api, workspaceId]); + + const harnessPresenceRefreshController = React.useMemo( + () => + new RefreshController({ + onRefresh: refreshHarnessPresence, + debounceMs: 1000, + refreshOnFocus: true, + }), + [refreshHarnessPresence] + ); + + React.useEffect(() => { + harnessPresenceRefreshController.bindListeners(); + return () => harnessPresenceRefreshController.dispose(); + }, [harnessPresenceRefreshController]); + + React.useEffect(() => { + setHarnessPresence("unknown"); + harnessPresenceRefreshController.requestImmediate(); + }, [harnessPresenceRefreshController, workspaceId]); + + React.useEffect(() => { + return workspaceStore.subscribeFileModifyingTool(() => { + harnessPresenceRefreshController.schedule(); + }, workspaceId); + }, [harnessPresenceRefreshController, workspaceId]); const layout = React.useMemo( () => parseRightSidebarLayoutState(layoutDraft ?? layoutRaw, initialActiveTab), [layoutDraft, layoutRaw, initialActiveTab] @@ -691,9 +748,33 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ return prev; }); }, [initialActiveTab, setLayoutRaw, statsTabEnabled]); - // If we ever deserialize an invalid layout (e.g. schema changes), reset to defaults. + + // If harness files exist, ensure the Harness tab exists in the layout. + // If missing, ensure it doesn't linger in persisted layouts. + React.useEffect(() => { + if (harnessPresence === "unknown") { + return; + } + + setLayoutRaw((prevRaw) => { + const prev = parseRightSidebarLayoutState(prevRaw, initialActiveTab); + const hasHarness = collectAllTabs(prev.root).includes("harness"); + + if (harnessPresence === "exists" && !hasHarness) { + // Add harness tab to the focused tabset without stealing focus. + return addTabToFocusedTabset(prev, "harness", false); + } + + if (harnessPresence === "missing" && hasHarness) { + return removeTabEverywhere(prev, "harness"); + } + + return prev; + }); + }, [harnessPresence, initialActiveTab, setLayoutRaw]); React.useEffect(() => { if (!isRightSidebarLayoutState(layoutRaw)) { + // If we ever deserialize an invalid layout (e.g. schema changes), reset to defaults. setLayoutRaw(layout); } }, [layout, layoutRaw, setLayoutRaw]); @@ -760,9 +841,22 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ layoutRawRef.current, initialActiveTab ); - const allTabs = collectAllTabsWithTabset(currentLayout.root); + const allTabs = collectAllTabsWithTabset(currentLayout.root).filter(({ tab }) => { + if (tab === "stats" && !statsTabEnabled) { + return false; + } + if (tab === "harness" && !harnessTabEnabled) { + return false; + } + return true; + }); + const target = allTabs[i]; - if (target && isTerminalTab(target.tab)) { + if (!target) { + return; + } + + if (isTerminalTab(target.tab)) { const sessionId = getTerminalSessionId(target.tab); if (sessionId) { setAutoFocusTerminalSession(sessionId); @@ -773,7 +867,9 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ _setFocusTrigger((prev) => prev + 1); } - setLayout((prev) => selectTabByIndex(prev, i)); + setLayout((prev) => + selectTabInTabset(setFocusedTabset(prev, target.tabsetId), target.tabsetId, target.tab) + ); setCollapsed(false); return; } @@ -782,7 +878,15 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ window.addEventListener("keydown", handleKeyDown); return () => window.removeEventListener("keydown", handleKeyDown); - }, [initialActiveTab, setAutoFocusTerminalSession, setCollapsed, setLayout, _setFocusTrigger]); + }, [ + harnessTabEnabled, + initialActiveTab, + setAutoFocusTerminalSession, + setCollapsed, + setLayout, + statsTabEnabled, + _setFocusTrigger, + ]); const usage = useWorkspaceUsage(workspaceId); @@ -790,13 +894,22 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ // Build map of tab → position for keybind tooltips const tabPositions = React.useMemo(() => { - const allTabs = collectAllTabsWithTabset(layout.root); + const allTabs = collectAllTabsWithTabset(layout.root).filter(({ tab }) => { + if (tab === "stats" && !statsTabEnabled) { + return false; + } + if (tab === "harness" && !harnessTabEnabled) { + return false; + } + return true; + }); + const positions = new Map<TabType, number>(); allTabs.forEach(({ tab }, index) => { positions.set(tab, index); }); return positions; - }, [layout.root]); + }, [harnessTabEnabled, layout.root, statsTabEnabled]); // Calculate session cost for tab display const sessionCost = React.useMemo(() => { @@ -840,7 +953,6 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ }); // API for opening terminal windows and managing sessions - const { api } = useAPI(); // Keyboard shortcut for closing active tab (Ctrl/Cmd+W) // Works for terminal tabs and file tabs @@ -1220,6 +1332,7 @@ const RightSidebarComponent: React.FC<RightSidebarProps> = ({ focusTrigger={focusTrigger} onReviewNote={onReviewNote} reviewStats={reviewStats} + harnessTabEnabled={harnessTabEnabled} statsTabEnabled={statsTabEnabled} sessionDuration={sessionDuration} onReviewStatsChange={setReviewStats} diff --git a/src/browser/utils/rightSidebarLayout.test.ts b/src/browser/utils/rightSidebarLayout.test.ts index e46ac4a779..eecaba2c55 100644 --- a/src/browser/utils/rightSidebarLayout.test.ts +++ b/src/browser/utils/rightSidebarLayout.test.ts @@ -122,19 +122,19 @@ test("moveTabToTabset removes empty source tabset", () => { }); test("reorderTabInTabset reorders tabs within a tabset", () => { - // Default layout has ["costs", "review", "explorer", "harness"]; reorder costs from 0 to 1 + // Default layout has ["costs", "review", "explorer"]; reorder costs from 0 to 1 const s0 = getDefaultRightSidebarLayoutState("costs"); const s1 = reorderTabInTabset(s0, "tabset-1", 0, 1); expect(s1.root.type).toBe("tabset"); if (s1.root.type !== "tabset") throw new Error("expected tabset"); - expect(s1.root.tabs).toEqual(["review", "costs", "explorer", "harness"]); + expect(s1.root.tabs).toEqual(["review", "costs", "explorer"]); expect(s1.root.activeTab).toBe("costs"); }); test("dockTabToEdge splits a tabset and moves the dragged tab into the new pane", () => { - // Default layout has ["costs", "review", "explorer", "harness"]; drag review into a bottom split + // Default layout has ["costs", "review", "explorer"]; drag review into a bottom split const s0 = getDefaultRightSidebarLayoutState("costs"); const s1 = dockTabToEdge(s0, "review", "tabset-1", "tabset-1", "bottom"); diff --git a/src/browser/utils/rightSidebarLayout.ts b/src/browser/utils/rightSidebarLayout.ts index f1fb6b56f8..bdccb953fb 100644 --- a/src/browser/utils/rightSidebarLayout.ts +++ b/src/browser/utils/rightSidebarLayout.ts @@ -58,7 +58,7 @@ export interface RightSidebarLayoutState { export function getDefaultRightSidebarLayoutState(activeTab: TabType): RightSidebarLayoutState { // Default tabs exclude terminal - users add terminals via the "+" button - const baseTabs: TabType[] = ["costs", "review", "explorer", "harness"]; + const baseTabs: TabType[] = ["costs", "review", "explorer"]; const tabs = baseTabs.includes(activeTab) ? baseTabs : [...baseTabs, activeTab]; return { @@ -110,11 +110,6 @@ export function parseRightSidebarLayoutState( injectTabIntoLayout(raw.root, "explorer"); } - // Migrate: inject "harness" tab if missing from persisted layout - if (!layoutContainsTab(raw.root, "harness")) { - injectTabIntoLayout(raw.root, "harness"); - } - return raw; } diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index a481cf30d3..17135cc5ea 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -703,6 +703,18 @@ export const workspace = { /** Workspace-local harness config + gates */ harness: { + exists: { + input: z.object({ workspaceId: z.string() }), + output: ResultSchema( + z + .object({ + exists: z.boolean(), + paths: WorkspaceHarnessFilePathsSchema, + }) + .strict(), + z.string() + ), + }, get: { input: z.object({ workspaceId: z.string() }), output: ResultSchema( diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index d7a230b82f..21b48b5f07 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1703,6 +1703,20 @@ export const router = (authToken?: string) => { }), }, harness: { + exists: t + .input(schemas.workspace.harness.exists.input) + .output(schemas.workspace.harness.exists.output) + .handler(async ({ context, input }) => { + try { + const presence = await context.workspaceHarnessService.getHarnessPresenceForWorkspace( + input.workspaceId + ); + return { success: true, data: presence }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { success: false, error: message }; + } + }), get: t .input(schemas.workspace.harness.get.input) .output(schemas.workspace.harness.get.output) diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts index 9587e983f5..1a9ba6bf17 100644 --- a/src/node/services/workspaceHarnessService.ts +++ b/src/node/services/workspaceHarnessService.ts @@ -281,31 +281,6 @@ export class WorkspaceHarnessService { return { metadata, runtime, workspacePath }; } - private getLegacyHarnessFilePaths( - workspacePath: string, - runtimeConfig: RuntimeConfig | undefined, - workspaceName: string - ): WorkspaceHarnessFilePaths { - assert(typeof workspacePath === "string", "workspacePath must be a string"); - assert(typeof workspaceName === "string", "workspaceName must be a string"); - - const prefix = workspaceName.trim().length > 0 ? workspaceName.trim() : "workspace"; - - return { - configPath: joinForRuntime( - runtimeConfig, - workspacePath, - HARNESS_DIR, - `${prefix}.harness.jsonc` - ), - progressPath: joinForRuntime( - runtimeConfig, - workspacePath, - HARNESS_DIR, - `${prefix}.harness.progress.md` - ), - }; - } private getHarnessFilePaths( workspacePath: string, runtimeConfig: RuntimeConfig | undefined, @@ -438,7 +413,6 @@ export class WorkspaceHarnessService { workspacePath: string; runtimeConfig: RuntimeConfig | undefined; paths: WorkspaceHarnessFilePaths; - legacyPaths: WorkspaceHarnessFilePaths; }): Promise<void> { try { await this.ensureHarnessDir(params.runtime, params.workspacePath, params.runtimeConfig); @@ -448,34 +422,11 @@ export class WorkspaceHarnessService { return; } - let legacyProgressContents = ""; - const legacyExists = await statIsFile(params.runtime, params.legacyPaths.progressPath); - if (legacyExists) { - try { - legacyProgressContents = await readFileString( - params.runtime, - params.legacyPaths.progressPath - ); - } catch (error) { - log.debug("[HARNESS] Failed to read legacy harness progress file", { - filePath: params.legacyPaths.progressPath, - error, - }); - } - } - - let markdown = renderHarnessJournalBootstrapMarkdown({ + const markdown = renderHarnessJournalBootstrapMarkdown({ metadata: params.metadata, paths: params.paths, }); - if (legacyProgressContents.trim().length > 0) { - markdown += - "\n## Migrated content (legacy progress file)\n\n" + - legacyProgressContents.trimEnd() + - "\n"; - } - await writeFileString( params.runtime, params.paths.progressPath, @@ -494,6 +445,17 @@ export class WorkspaceHarnessService { } } + async getHarnessPresenceForWorkspace(workspaceId: string): Promise<{ + exists: boolean; + paths: WorkspaceHarnessFilePaths; + }> { + const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); + const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); + + const exists = await statIsFile(runtime, paths.configPath); + return { exists, paths }; + } + async getHarnessForWorkspace(workspaceId: string): Promise<{ config: WorkspaceHarnessConfig; paths: WorkspaceHarnessFilePaths; @@ -501,40 +463,7 @@ export class WorkspaceHarnessService { }> { const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); - const legacyPaths = this.getLegacyHarnessFilePaths( - workspacePath, - metadata.runtimeConfig, - metadata.name - ); - - let exists = await statIsFile(runtime, paths.configPath); - if (!exists) { - const legacyExists = await statIsFile(runtime, legacyPaths.configPath); - if (legacyExists) { - try { - const rawLegacy = await readFileString(runtime, legacyPaths.configPath); - await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); - await writeFileString( - runtime, - paths.configPath, - rawLegacy.endsWith("\n") ? rawLegacy : `${rawLegacy}\n` - ); - await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); - exists = true; - } catch (error) { - log.debug("[HARNESS] Failed to migrate legacy harness config file", { - workspaceId, - error, - }); - const parsedLegacy = await this.readHarnessFile(runtime, legacyPaths.configPath); - return { - config: normalizeWorkspaceHarnessConfig(parsedLegacy), - paths: legacyPaths, - exists: true, - }; - } - } - } + const exists = await statIsFile(runtime, paths.configPath); if (!exists) { return { config: { ...DEFAULT_HARNESS_CONFIG }, paths, exists: false }; @@ -556,12 +485,6 @@ export class WorkspaceHarnessService { const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId); const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); - const legacyPaths = this.getLegacyHarnessFilePaths( - workspacePath, - metadata.runtimeConfig, - metadata.name - ); - const normalized = normalizeWorkspaceHarnessConfig(config); const serialized = JSON.stringify(normalized, null, 2) + "\n"; @@ -570,23 +493,12 @@ export class WorkspaceHarnessService { await writeFileString(runtime, paths.configPath, serialized); await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); - // Best-effort: keep the legacy file updated for downgrade compatibility. - try { - const legacyExists = await statIsFile(runtime, legacyPaths.configPath); - if (legacyExists) { - await writeFileString(runtime, legacyPaths.configPath, serialized); - } - } catch (error) { - log.debug("[HARNESS] Failed to update legacy harness config file", { workspaceId, error }); - } - await this.ensureHarnessJournalExists({ metadata, runtime, workspacePath, runtimeConfig: metadata.runtimeConfig, paths, - legacyPaths, }); return normalized; @@ -598,19 +510,12 @@ export class WorkspaceHarnessService { await this.getRuntimeAndWorkspacePath(workspaceId); const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name); - const legacyPaths = this.getLegacyHarnessFilePaths( - workspacePath, - metadata.runtimeConfig, - metadata.name - ); - await this.ensureHarnessJournalExists({ metadata, runtime, workspacePath, runtimeConfig: metadata.runtimeConfig, paths, - legacyPaths, }); } catch (error) { log.debug("[HARNESS] Failed to ensure harness journal exists", { workspaceId, error }); From 584f22b71161df2885c5191488a4919b4548ac19 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Wed, 21 Jan 2026 14:54:21 +0100 Subject: [PATCH 12/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20preserve=20Harness?= =?UTF-8?q?=20tab=20when=20stat=20fails?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I19674059830f6d2a447a96dfef6cebb64c65143e Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/node/services/workspaceHarnessService.ts | 34 ++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts index 1a9ba6bf17..34aed18cdd 100644 --- a/src/node/services/workspaceHarnessService.ts +++ b/src/node/services/workspaceHarnessService.ts @@ -193,6 +193,32 @@ function normalizeWorkspaceHarnessConfig(raw: unknown): WorkspaceHarnessConfig { return normalized; } +function isNotFoundStatError(error: unknown): boolean { + if (!error) { + return false; + } + + if (typeof error === "object") { + if ("code" in error && (error as { code?: unknown }).code === "ENOENT") { + return true; + } + + if ("cause" in error) { + const cause = (error as { cause?: unknown }).cause; + if (cause && cause !== error) { + return isNotFoundStatError(cause); + } + } + } + + if (error instanceof Error) { + const message = error.message; + return message.includes("ENOENT") || message.includes("No such file or directory"); + } + + return false; +} + async function statIsFile( runtime: ReturnType<typeof createRuntime>, filePath: string @@ -200,8 +226,12 @@ async function statIsFile( try { const stat = await runtime.stat(filePath); return !stat.isDirectory; - } catch { - return false; + } catch (error) { + if (isNotFoundStatError(error)) { + return false; + } + + throw error; } } From 2e5afddaa08025a1233414297878cf2398e43322 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Wed, 21 Jan 2026 15:19:02 +0100 Subject: [PATCH 13/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20prevent=20git=20pro?= =?UTF-8?q?mpts=20+=20correct=20SSH2=20exit=20codes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I5d1a99f30bf1c8997053eab48894cac50f4d7317 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/common/constants/env.ts | 5 +++++ src/node/services/bashExecutionService.ts | 14 ++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/common/constants/env.ts b/src/common/constants/env.ts index d443c60fcb..ea26645401 100644 --- a/src/common/constants/env.ts +++ b/src/common/constants/env.ts @@ -11,4 +11,9 @@ export const NON_INTERACTIVE_ENV_VARS = { VISUAL: "true", // Another common editor environment variable // Prevent git from prompting for credentials GIT_TERMINAL_PROMPT: "0", // Disables git credential prompts + + // Some Git installs (notably on Windows) can still try to prompt via askpass helpers. + // Force a non-interactive askpass implementation so commands fail quickly. + GIT_ASKPASS: "echo", + SSH_ASKPASS: "echo", } as const; diff --git a/src/node/services/bashExecutionService.ts b/src/node/services/bashExecutionService.ts index 3755721ede..c23f47805c 100644 --- a/src/node/services/bashExecutionService.ts +++ b/src/node/services/bashExecutionService.ts @@ -1,6 +1,7 @@ import { spawn } from "child_process"; import type { ChildProcess } from "child_process"; import { log } from "./log"; +import { NON_INTERACTIVE_ENV_VARS } from "@/common/constants/env"; import { getBashPath } from "@/node/utils/main/bashPath"; /** @@ -85,16 +86,9 @@ export class BashExecutionService { ...process.env, // Inject secrets as environment variables ...(secrets ?? {}), - // Prevent interactive editors from blocking bash execution - // Critical for git operations like rebase/commit that try to open editors - GIT_EDITOR: "true", // Git-specific editor (highest priority) - GIT_SEQUENCE_EDITOR: "true", // For interactive rebase sequences - EDITOR: "true", // General fallback for non-git commands - VISUAL: "true", // Another common editor environment variable - // Prevent git from prompting for credentials - // Critical for operations like fetch/pull that might try to authenticate - // Without this, git can hang waiting for user input if credentials aren't configured - GIT_TERMINAL_PROMPT: "0", // Disables git credential prompts + + // Prevent interactive editors / credential prompts from blocking execution. + ...NON_INTERACTIVE_ENV_VARS, }; } From 81955998c01621d2e0e1ee8ab497b74000b6a896 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Wed, 21 Jan 2026 15:23:07 +0100 Subject: [PATCH 14/20] =?UTF-8?q?=F0=9F=A4=96=20fix:=20pause=20loop=20when?= =?UTF-8?q?=20harness=20config=20load=20fails?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I44047b8a001042fec6b21d46baf7502a231540ce Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/node/services/loopRunnerService.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index cdc3bd7270..c1017e951e 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -376,8 +376,12 @@ export class LoopRunnerService extends EventEmitter { this.controllers.set(workspaceId, abortController); void this.runLoop(workspaceId, abortController.signal) - .catch((error: unknown) => { + .catch(async (error: unknown) => { log.error("[HARNESS] Loop runner crashed", { workspaceId, error }); + + // Defensive: if the runner crashes, make sure we don't strand the state as "running". + const message = error instanceof Error ? error.message : String(error); + await this.pause(workspaceId, `Loop runner crashed: ${message}`); }) .finally(() => { const current = this.controllers.get(workspaceId); @@ -415,7 +419,15 @@ export class LoopRunnerService extends EventEmitter { return; } - const harness = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId); + let harness: Awaited<ReturnType<WorkspaceHarnessService["getHarnessForWorkspace"]>>; + try { + harness = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + await this.pause(workspaceId, `Failed to load harness config: ${message}`); + return; + } + const config = harness.config; const loop = config.loop; From 2b449656548ec70a677d19a80af1be12b3e5e834 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Mon, 26 Jan 2026 20:39:09 +0100 Subject: [PATCH 15/20] fix: migrate tool config to plan/exec mode Replace planFileOnly with mode/agentId/allowedEditPaths and update tests + docs. Change-Id: If00ef62e3877084ef650822af645e254371c9d3b Signed-off-by: Thomas Kosiewski <tk@coder.com> --- src/common/utils/tools/tools.ts | 1 + src/node/orpc/router.ts | 2 +- src/node/services/aiService.ts | 7 +++++-- src/node/services/loopRunnerService.ts | 5 +++-- src/node/services/tools/fileCommon.test.ts | 2 +- src/node/services/tools/file_edit_insert.test.ts | 10 +++++----- src/node/services/tools/file_edit_operation.test.ts | 10 +++++----- src/node/services/tools/file_read.test.ts | 2 +- src/node/services/tools/file_read.ts | 4 ++-- src/node/services/tools/task.test.ts | 6 +++--- 10 files changed, 27 insertions(+), 22 deletions(-) diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index 22da648e1d..e04c3209d7 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -39,6 +39,7 @@ import type { InitStateManager } from "@/node/services/initStateManager"; import type { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; import type { TaskService } from "@/node/services/taskService"; import type { WorkspaceChatMessage } from "@/common/orpc/types"; +import type { UIMode } from "@/common/types/mode"; import type { FileState } from "@/node/services/agentSession"; import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 21b48b5f07..335975cd50 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1947,7 +1947,7 @@ export const router = (authToken?: string) => { } const userModel = - metadata.aiSettingsByMode?.exec?.model ?? metadata.aiSettings?.model; + metadata.aiSettingsByAgent?.exec?.model ?? metadata.aiSettings?.model; const modelString = await selectModelForNameGeneration( context.aiService, undefined, diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 40e2c019de..e52a174e9d 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1416,7 +1416,10 @@ export class AIService extends EventEmitter { runtime: earlyRuntime, runtimeTempDir: os.tmpdir(), secrets: {}, - planFileOnly: agentIsPlanLike, + mode: effectiveMode === "plan" ? "plan" : "exec", + agentId: effectiveAgentId, + allowedEditPaths: + effectiveAgentId === "harness-init" ? [".mux/harness/*.jsonc"] : undefined, }, "", // Empty workspace ID for early stub config this.initStateManager, @@ -1763,7 +1766,7 @@ export class AIService extends EventEmitter { // Plan/exec mode configuration for plan file access. // - read: plan file is readable in all modes (useful context) // - write: enforced by file_edit_* tools (plan file is read-only outside plan mode) - mode: effectiveMode, + mode: effectiveMode === "plan" ? "plan" : "exec", agentId: effectiveAgentId, allowedEditPaths: effectiveAgentId === "harness-init" ? [".mux/harness/*.jsonc"] : undefined, diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts index c1017e951e..1d8fcaed40 100644 --- a/src/node/services/loopRunnerService.ts +++ b/src/node/services/loopRunnerService.ts @@ -462,9 +462,9 @@ export class LoopRunnerService extends EventEmitter { const progressPathHint = `.mux/harness/${info.name}.progress.md`; const modelString = - info.aiSettingsByMode?.exec?.model ?? info.aiSettings?.model ?? defaultModel; + info.aiSettingsByAgent?.exec?.model ?? info.aiSettings?.model ?? defaultModel; const thinkingLevel = - info.aiSettingsByMode?.exec?.thinkingLevel ?? info.aiSettings?.thinkingLevel; + info.aiSettingsByAgent?.exec?.thinkingLevel ?? info.aiSettings?.thinkingLevel; const blocked = config.checklist.find((item) => item.status === "blocked") ?? null; const nextItem = findNextChecklistItem(config); @@ -502,6 +502,7 @@ export class LoopRunnerService extends EventEmitter { const sendResult = await this.workspaceService.sendMessage(workspaceId, prompt, { model: modelString, thinkingLevel, + agentId: "exec", mode: "exec", toolPolicy, muxMetadata: { type: "harness-loop", iteration: updatedStateBeforeSend.iteration }, diff --git a/src/node/services/tools/fileCommon.test.ts b/src/node/services/tools/fileCommon.test.ts index 2dbee293cb..eb4b434094 100644 --- a/src/node/services/tools/fileCommon.test.ts +++ b/src/node/services/tools/fileCommon.test.ts @@ -333,7 +333,7 @@ describe("fileCommon", () => { cwd: "/home/user/project", runtime: mockRuntime, runtimeTempDir: "/tmp", - planFileOnly: true, + mode: "plan", planFilePath, }; diff --git a/src/node/services/tools/file_edit_insert.test.ts b/src/node/services/tools/file_edit_insert.test.ts index 563acf03bd..dabb4a69e0 100644 --- a/src/node/services/tools/file_edit_insert.test.ts +++ b/src/node/services/tools/file_edit_insert.test.ts @@ -181,7 +181,7 @@ describe("file_edit_insert plan mode enforcement", () => { cwd: workspaceCwd, runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }), runtimeTempDir: testDir, - planFileOnly: true, + mode: "plan", planFilePath: planFilePath, }); @@ -194,7 +194,7 @@ describe("file_edit_insert plan mode enforcement", () => { expect(result.success).toBe(false); if (!result.success) { - expect(result.error).toContain("In the plan agent, only the plan file can be edited"); + expect(result.error).toContain("In plan mode, only the plan file can be edited"); } }); @@ -210,7 +210,7 @@ describe("file_edit_insert plan mode enforcement", () => { cwd: workspaceCwd, runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }), runtimeTempDir: testDir, - planFileOnly: true, + mode: "plan", planFilePath: planFilePath, }); @@ -265,7 +265,7 @@ describe("file_edit_insert plan mode enforcement", () => { cwd: workspaceCwd, runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }), runtimeTempDir: testDir, - planFileOnly: true, + mode: "plan", planFilePath: realPlanPath, // The REAL plan file path }); @@ -279,7 +279,7 @@ describe("file_edit_insert plan mode enforcement", () => { expect(result.success).toBe(false); if (!result.success) { - expect(result.error).toContain("In the plan agent, only the plan file can be edited"); + expect(result.error).toContain("In plan mode, only the plan file can be edited"); expect(result.error).toContain("exact plan file path"); expect(result.error).toContain(realPlanPath); expect(result.error).toContain(".mux/plan.md"); diff --git a/src/node/services/tools/file_edit_operation.test.ts b/src/node/services/tools/file_edit_operation.test.ts index 71e29d553b..c9b51912b1 100644 --- a/src/node/services/tools/file_edit_operation.test.ts +++ b/src/node/services/tools/file_edit_operation.test.ts @@ -129,7 +129,7 @@ describe("executeFileEditOperation plan mode enforcement", () => { cwd: TEST_CWD, runtime: mockRuntime, runtimeTempDir: "/tmp", - planFileOnly: true, + mode: "plan", planFilePath: PLAN_FILE_PATH, }, filePath: OTHER_FILE_PATH, @@ -138,7 +138,7 @@ describe("executeFileEditOperation plan mode enforcement", () => { expect(result.success).toBe(false); if (!result.success) { - expect(result.error).toContain("In the plan agent, only the plan file can be edited"); + expect(result.error).toContain("In plan mode, only the plan file can be edited"); expect(result.error).toContain(OTHER_FILE_PATH); } @@ -162,7 +162,7 @@ describe("executeFileEditOperation plan mode enforcement", () => { cwd: workspaceCwd, runtime: new LocalRuntime(workspaceCwd), runtimeTempDir: tempDir.path, - planFileOnly: true, + mode: "plan", planFilePath: planPath, }, filePath: planPath, @@ -237,7 +237,7 @@ describe("executeFileEditOperation plan mode enforcement", () => { expect(result.success).toBe(false); if (!result.success) { - expect(result.error).toContain("read-only outside the plan agent"); + expect(result.error).toContain("read-only outside plan mode"); } // Verify file was not modified @@ -285,7 +285,7 @@ describe("executeFileEditOperation plan mode enforcement", () => { cwd: "/home/user/project", runtime: mockRuntime, runtimeTempDir: "/tmp", - planFileOnly: true, + mode: "plan", planFilePath: "/home/user/.mux/sessions/ws/plan.md", }, filePath: "../.mux/sessions/ws/plan.md", // Alternate path to plan file diff --git a/src/node/services/tools/file_read.test.ts b/src/node/services/tools/file_read.test.ts index 5478dd401d..e9b787577e 100644 --- a/src/node/services/tools/file_read.test.ts +++ b/src/node/services/tools/file_read.test.ts @@ -420,7 +420,7 @@ describe("file_read tool", () => { cwd: testDir, runtime: new LocalRuntime(testDir), runtimeTempDir: testDir, - planFileOnly: true, + mode: "plan", planFilePath: planPath, }); diff --git a/src/node/services/tools/file_read.ts b/src/node/services/tools/file_read.ts index e32261babc..4758d4d82a 100644 --- a/src/node/services/tools/file_read.ts +++ b/src/node/services/tools/file_read.ts @@ -49,8 +49,8 @@ export const createFileReadTool: ToolFactory = (config: ToolConfiguration) => { if (pathValidation) { // In plan mode, hint about the plan file path to help model recover const hint = - config.planFileOnly && config.planFilePath - ? ` In the plan agent, use the exact plan file path string as provided: ${config.planFilePath}` + config.mode === "plan" && config.planFilePath + ? ` In plan mode, use the exact plan file path string as provided: ${config.planFilePath}` : ""; return { success: false, diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts index 2e51f30e51..15193380c7 100644 --- a/src/node/services/tools/task.test.ts +++ b/src/node/services/tools/task.test.ts @@ -116,7 +116,7 @@ describe("task tool", () => { } }); - it('should reject spawning "exec" tasks while in plan agent', async () => { + it('should reject spawning "exec" tasks while in plan mode', async () => { using tempDir = new TestTempDir("test-task-tool"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); @@ -133,7 +133,7 @@ describe("task tool", () => { const tool = createTaskTool({ ...baseConfig, - planFileOnly: true, + mode: "plan", taskService, }); @@ -151,7 +151,7 @@ describe("task tool", () => { expect(caught).toBeInstanceOf(Error); if (caught instanceof Error) { - expect(caught.message).toMatch(/plan agent/i); + expect(caught.message).toMatch(/plan mode/i); } expect(create).not.toHaveBeenCalled(); expect(waitForAgentReport).not.toHaveBeenCalled(); From 210ef7cb356267b0df4440f7edd66823c8fa2553 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Mon, 26 Jan 2026 21:40:51 +0100 Subject: [PATCH 16/20] fix: handle slashy workspace names in harness paths Ensure harness parent dirs exist before writing config/journal and update journal hint + tests. Change-Id: Id05475a1414d0e5b2a26efa3b63c49f619390b62 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- .../services/workspaceHarnessService.test.ts | 24 +++++++++- src/node/services/workspaceHarnessService.ts | 45 ++++++++++++++----- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/node/services/workspaceHarnessService.test.ts b/src/node/services/workspaceHarnessService.test.ts index 219cf008e7..da7834703e 100644 --- a/src/node/services/workspaceHarnessService.test.ts +++ b/src/node/services/workspaceHarnessService.test.ts @@ -37,14 +37,13 @@ describe("WorkspaceHarnessService (journal)", () => { await fs.rm(tempDir, { recursive: true, force: true }); }); - async function setupWorkspace(): Promise<{ + async function setupWorkspace(workspaceName = "branch"): Promise<{ workspaceId: string; workspaceName: string; workspacePath: string; }> { const projectPath = "/fake/project"; const workspaceId = "ws-id"; - const workspaceName = "branch"; const workspacePath = getWorkspacePath({ srcDir: config.srcDir, @@ -91,6 +90,27 @@ describe("WorkspaceHarnessService (journal)", () => { expect(contents).toContain(`.mux/harness/${workspaceName}.jsonc`); }); + it("creates harness files for slashy workspace names", async () => { + const { workspaceId, workspaceName, workspacePath } = await setupWorkspace("feature/foo"); + + const service = new WorkspaceHarnessService(config); + await service.setHarnessForWorkspace(workspaceId, { + version: 1, + checklist: [], + gates: [], + loop: {}, + }); + + const configPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.jsonc`); + const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`); + + expect(await pathExists(configPath)).toBe(true); + expect(await pathExists(journalPath)).toBe(true); + + const contents = await fs.readFile(journalPath, "utf-8"); + expect(contents).toContain(`.mux/harness/${workspaceName}.jsonc`); + }); + it("does not overwrite an existing journal file", async () => { const { workspaceId, workspaceName, workspacePath } = await setupWorkspace(); diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts index 34aed18cdd..a969fe3ea6 100644 --- a/src/node/services/workspaceHarnessService.ts +++ b/src/node/services/workspaceHarnessService.ts @@ -20,7 +20,7 @@ import { log } from "@/node/services/log"; const HARNESS_DIR = ".mux/harness"; -const HARNESS_GITIGNORE_PATTERNS = [`${HARNESS_DIR}/*.jsonc`, `${HARNESS_DIR}/*.progress.md`]; +const HARNESS_GITIGNORE_PATTERNS = [`${HARNESS_DIR}/**/*.jsonc`, `${HARNESS_DIR}/**/*.progress.md`]; const DEFAULT_LOOP_SETTINGS: Required< Pick< @@ -57,6 +57,11 @@ function joinForRuntime(runtimeConfig: RuntimeConfig | undefined, ...parts: stri return usePosix ? path.posix.join(...parts) : path.join(...parts); } +function dirnameForRuntime(runtimeConfig: RuntimeConfig | undefined, filePath: string): string { + const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker"; + return usePosix ? path.posix.dirname(filePath) : path.dirname(filePath); +} + function isAbsoluteForRuntime(runtimeConfig: RuntimeConfig | undefined, filePath: string): boolean { const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker"; return usePosix ? path.posix.isAbsolute(filePath) : path.isAbsolute(filePath); @@ -241,7 +246,9 @@ function renderHarnessJournalBootstrapMarkdown(params: { }): string { const nowIso = new Date().toISOString(); - const configBasename = path.basename(params.paths.configPath); + const configPrefix = + params.metadata.name.trim().length > 0 ? params.metadata.name.trim() : "workspace"; + const configRelPath = path.posix.join(HARNESS_DIR, `${configPrefix}.jsonc`); const lines: string[] = []; lines.push("# Harness journal (append-only)"); @@ -251,7 +258,7 @@ function renderHarnessJournalBootstrapMarkdown(params: { lines.push(""); lines.push(`- Workspace: ${params.metadata.name} (${params.metadata.id})`); lines.push(`- Created: ${nowIso}`); - lines.push(`- Harness config: ${path.posix.join(HARNESS_DIR, configBasename)}`); + lines.push(`- Harness config: ${configRelPath}`); lines.push(""); lines.push("## Entry template"); lines.push(""); @@ -357,15 +364,26 @@ export class WorkspaceHarnessService { private async ensureHarnessDir( runtime: ReturnType<typeof createRuntime>, workspacePath: string, - runtimeConfig: RuntimeConfig | undefined + runtimeConfig: RuntimeConfig | undefined, + paths?: WorkspaceHarnessFilePaths ): Promise<void> { const harnessDirPath = joinForRuntime(runtimeConfig, workspacePath, HARNESS_DIR); - try { - await runtime.ensureDir(harnessDirPath); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - throw new Error(`Failed to create ${HARNESS_DIR} directory: ${msg}`); + const dirPaths = new Set([harnessDirPath]); + if (paths) { + // Workspace names can contain slashes (e.g. "feature/foo"), which means harness files may end up + // nested under `.mux/harness/feature/foo.jsonc`. Ensure parent dirs exist before writing. + dirPaths.add(dirnameForRuntime(runtimeConfig, paths.configPath)); + dirPaths.add(dirnameForRuntime(runtimeConfig, paths.progressPath)); + } + + for (const dirPath of dirPaths) { + try { + await runtime.ensureDir(dirPath); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + throw new Error(`Failed to create harness directory (${dirPath}): ${msg}`); + } } } @@ -445,7 +463,12 @@ export class WorkspaceHarnessService { paths: WorkspaceHarnessFilePaths; }): Promise<void> { try { - await this.ensureHarnessDir(params.runtime, params.workspacePath, params.runtimeConfig); + await this.ensureHarnessDir( + params.runtime, + params.workspacePath, + params.runtimeConfig, + params.paths + ); const exists = await statIsFile(params.runtime, params.paths.progressPath); if (exists) { @@ -518,7 +541,7 @@ export class WorkspaceHarnessService { const normalized = normalizeWorkspaceHarnessConfig(config); const serialized = JSON.stringify(normalized, null, 2) + "\n"; - await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig); + await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig, paths); await writeFileString(runtime, paths.configPath, serialized); await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig); From 3a7e19ec250993afc45175a8addafd830fb3f01e Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Mon, 26 Jan 2026 22:09:50 +0100 Subject: [PATCH 17/20] fix: allow harness-init to edit nested harness paths Include nested harness glob in harness-init edit allowlist and update docs/tests. Change-Id: I5385802f1711dec7af09cc4526d8abc38c25df09 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- docs/agents/index.mdx | 2 +- src/node/builtinAgents/harness-init.md | 2 +- .../agentDefinitions/builtInAgentContent.generated.ts | 2 +- src/node/services/aiService.ts | 8 ++++++-- src/node/services/tools/fileCommon.test.ts | 3 ++- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index ae9b8d5487..93be2b936c 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -605,7 +605,7 @@ Your job is to create or refine a Ralph harness for this workspace based on the === CRITICAL: LIMITED EDIT MODE === -- You may ONLY create/edit files under: `.mux/harness/*.jsonc` +- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` - Do NOT modify source code or other repo files. - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. diff --git a/src/node/builtinAgents/harness-init.md b/src/node/builtinAgents/harness-init.md index 1be28e4b06..5e567adb6c 100644 --- a/src/node/builtinAgents/harness-init.md +++ b/src/node/builtinAgents/harness-init.md @@ -15,7 +15,7 @@ Your job is to create or refine a Ralph harness for this workspace based on the === CRITICAL: LIMITED EDIT MODE === -- You may ONLY create/edit files under: `.mux/harness/*.jsonc` +- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` - Do NOT modify source code or other repo files. - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index de08862724..c893cf6dc0 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -7,7 +7,7 @@ export const BUILTIN_AGENT_CONTENT = { "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n", "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"checklist\": [{ \"title\": \"...\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", - "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\n\n- You may ONLY create/edit files under: `.mux/harness/*.jsonc`\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", + "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", "mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `<details>/<summary>` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index e52a174e9d..5ea1cca1ad 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1419,7 +1419,9 @@ export class AIService extends EventEmitter { mode: effectiveMode === "plan" ? "plan" : "exec", agentId: effectiveAgentId, allowedEditPaths: - effectiveAgentId === "harness-init" ? [".mux/harness/*.jsonc"] : undefined, + effectiveAgentId === "harness-init" + ? [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"] + : undefined, }, "", // Empty workspace ID for early stub config this.initStateManager, @@ -1769,7 +1771,9 @@ export class AIService extends EventEmitter { mode: effectiveMode === "plan" ? "plan" : "exec", agentId: effectiveAgentId, allowedEditPaths: - effectiveAgentId === "harness-init" ? [".mux/harness/*.jsonc"] : undefined, + effectiveAgentId === "harness-init" + ? [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"] + : undefined, emitChatEvent: (event) => { // Defensive: tools should only emit events for the workspace they belong to. if ("workspaceId" in event && event.workspaceId !== workspaceId) { diff --git a/src/node/services/tools/fileCommon.test.ts b/src/node/services/tools/fileCommon.test.ts index eb4b434094..678bf0e9a0 100644 --- a/src/node/services/tools/fileCommon.test.ts +++ b/src/node/services/tools/fileCommon.test.ts @@ -165,13 +165,14 @@ describe("fileCommon", () => { it("allows edits to allowlisted files", async () => { const config = buildConfig({ mode: "exec", - allowedEditPaths: [".mux/harness/*.jsonc"], + allowedEditPaths: [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"], }); expect(await validatePlanModeAccess(".mux/harness/main.jsonc", config)).toBeNull(); expect( await validatePlanModeAccess("/workspace/project/.mux/harness/main.jsonc", config) ).toBeNull(); + expect(await validatePlanModeAccess(".mux/harness/feature/foo.jsonc", config)).toBeNull(); }); it("rejects edits to non-allowlisted files", async () => { From 6a11aca5ab4780ce76e297b04611189f3b54072a Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 27 Jan 2026 12:18:11 +0100 Subject: [PATCH 18/20] chore: regenerate agent docs and built-ins Change-Id: I1f40d2ed2df5614ef7513e8a2efc61ac13fc7e73 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- docs/agents/index.mdx | 224 +++++++++++++++++++++--------------------- 1 file changed, 112 insertions(+), 112 deletions(-) diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 93be2b936c..011988cdc7 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -405,6 +405,118 @@ You are in Explore mode (read-only). </Accordion> +### Harness from Plan (internal) + +**Generate a Ralph harness draft from a plan (internal)** + +<Accordion title="View harness-from-plan.md"> + +```md +--- +name: Harness from Plan +description: Generate a Ralph harness draft from a plan (internal) +base: exec +ui: + hidden: true +subagent: + runnable: true + append_prompt: | + You are a sub-agent generating a Ralph harness draft from a plan. + + - Use read-only investigation only (no file edits, no state changes). + - Output ONLY a single JSON object in a fenced code block (language: json). + - When complete, call agent_report exactly once with that JSON block. +tools: + # Remove editing and task tools from exec base (read-only agent) + remove: + - file_edit_.* + - task + - task_.* + - agent_skill_read + - agent_skill_read_file +--- + +You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. + +=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === + +- You MUST NOT create, edit, delete, move, or copy files. +- You MUST NOT create temporary files anywhere (including /tmp). +- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. +- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). +- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). + +Rules: + +- Checklist items should be small, mergeable steps (max 20). +- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). +- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. + +Output format: a single fenced code block (language: json) containing one JSON object. + +Example JSON object: + +{ +"checklist": [{ "title": "...", "notes": "..." }], +"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], +"loop": { "autoCommit": false } +} +``` + +</Accordion> + +### Harness Init (internal) + +**Interactive harness generation + approval (internal)** + +<Accordion title="View harness-init.md"> + +```md +--- +name: Harness Init +description: Interactive harness generation + approval (internal) +base: exec +ui: + hidden: true + color: var(--color-harness-init-mode) +subagent: + runnable: false +--- + +You are in Harness Init mode. + +Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. + +=== CRITICAL: LIMITED EDIT MODE === + +- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` +- Do NOT modify source code or other repo files. +- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). + - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. + +Repo-aware investigation: + +- Identify which commands should be used as gates by checking repo-native entrypoints: + - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. +- Map the plan’s changes to impacted subsystems by tracing callsites/imports. + +Gates: + +- Prefer a small set of safe, single commands. +- Do NOT use shell chaining, pipes, redirects, or quotes. + +Delegation: + +- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. + +When the harness file is ready for user review: + +- Call `propose_harness` exactly once. +- Do NOT start the Ralph loop yourself; the UI will start it after user approval. +``` + +</Accordion> + ### Mux (internal) **Configure mux global behavior (system workspace)** @@ -521,118 +633,6 @@ Example: </Accordion> -### Harness from Plan (internal) - -**Generate a Ralph harness draft from a plan (internal)** - -<Accordion title="View harness-from-plan.md"> - -```md ---- -name: Harness from Plan -description: Generate a Ralph harness draft from a plan (internal) -base: exec -ui: - hidden: true -subagent: - runnable: true - append_prompt: | - You are a sub-agent generating a Ralph harness draft from a plan. - - - Use read-only investigation only (no file edits, no state changes). - - Output ONLY a single JSON object in a fenced code block (language: json). - - When complete, call agent_report exactly once with that JSON block. -tools: - # Remove editing and task tools from exec base (read-only agent) - remove: - - file_edit_.* - - task - - task_.* - - agent_skill_read - - agent_skill_read_file ---- - -You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. - -=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === - -- You MUST NOT create, edit, delete, move, or copy files. -- You MUST NOT create temporary files anywhere (including /tmp). -- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. -- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). -- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). - -Rules: - -- Checklist items should be small, mergeable steps (max 20). -- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). -- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. - -Output format: a single fenced code block (language: json) containing one JSON object. - -Example JSON object: - -{ -"checklist": [{ "title": "...", "notes": "..." }], -"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], -"loop": { "autoCommit": false } -} -``` - -</Accordion> - -### Harness Init (internal) - -**Interactive harness generation + approval (internal)** - -<Accordion title="View harness-init.md"> - -```md ---- -name: Harness Init -description: Interactive harness generation + approval (internal) -base: exec -ui: - hidden: true - color: var(--color-harness-init-mode) -subagent: - runnable: false ---- - -You are in Harness Init mode. - -Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. - -=== CRITICAL: LIMITED EDIT MODE === - -- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` -- Do NOT modify source code or other repo files. -- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. - -Repo-aware investigation: - -- Identify which commands should be used as gates by checking repo-native entrypoints: - - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. -- Map the plan’s changes to impacted subsystems by tracing callsites/imports. - -Gates: - -- Prefer a small set of safe, single commands. -- Do NOT use shell chaining, pipes, redirects, or quotes. - -Delegation: - -- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. - -When the harness file is ready for user review: - -- Call `propose_harness` exactly once. -- Do NOT start the Ralph loop yourself; the UI will start it after user approval. -``` - -</Accordion> - {/* END BUILTIN_AGENTS */} ## Related Docs From a29f45c10ecfcdcf4a820d72ec9cc95e716e7da3 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 27 Jan 2026 14:22:25 +0100 Subject: [PATCH 19/20] harness: inject config schema into prompts Change-Id: I8c417c433ed340807524e11609d2c4e39cdf07bb Signed-off-by: Thomas Kosiewski <tk@coder.com> --- docs/agents/index.mdx | 17 ++- src/node/builtinAgents/harness-from-plan.md | 5 +- src/node/builtinAgents/harness-init.md | 12 ++ .../builtInAgentContent.generated.ts | 4 +- src/node/services/aiService.ts | 8 ++ .../harnessConfigSchemaPrompt.test.ts | 117 ++++++++++++++++++ .../services/harnessConfigSchemaPrompt.ts | 45 +++++++ 7 files changed, 204 insertions(+), 4 deletions(-) create mode 100644 src/node/services/harnessConfigSchemaPrompt.test.ts create mode 100644 src/node/services/harnessConfigSchemaPrompt.ts diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 011988cdc7..16c04886c4 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -436,6 +436,8 @@ tools: - agent_skill_read_file --- +The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +Follow it exactly (extra/unknown keys will fail validation). You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. === CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === @@ -457,7 +459,8 @@ Output format: a single fenced code block (language: json) containing one JSON o Example JSON object: { -"checklist": [{ "title": "...", "notes": "..." }], +"version": 1, +"checklist": [{ "id": "item-1", "title": "...", "status": "todo", "notes": "..." }], "gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], "loop": { "autoCommit": false } } @@ -481,6 +484,11 @@ ui: color: var(--color-harness-init-mode) subagent: runnable: false +tools: + remove: + - web_search + - web_fetch + - google_search --- You are in Harness Init mode. @@ -488,8 +496,15 @@ You are in Harness Init mode. Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. === CRITICAL: LIMITED EDIT MODE === +Harness schema: + +- The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +- Follow the schema exactly (extra/unknown keys will fail validation). +- Web tools are disabled in this mode; do not attempt to look up harness docs online. - You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` +- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too. + - Do NOT modify source code or other repo files. - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. diff --git a/src/node/builtinAgents/harness-from-plan.md b/src/node/builtinAgents/harness-from-plan.md index 46b1ac48ad..fc73900a71 100644 --- a/src/node/builtinAgents/harness-from-plan.md +++ b/src/node/builtinAgents/harness-from-plan.md @@ -22,6 +22,8 @@ tools: - agent_skill_read_file --- +The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +Follow it exactly (extra/unknown keys will fail validation). You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. === CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === @@ -43,7 +45,8 @@ Output format: a single fenced code block (language: json) containing one JSON o Example JSON object: { -"checklist": [{ "title": "...", "notes": "..." }], +"version": 1, +"checklist": [{ "id": "item-1", "title": "...", "status": "todo", "notes": "..." }], "gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], "loop": { "autoCommit": false } } diff --git a/src/node/builtinAgents/harness-init.md b/src/node/builtinAgents/harness-init.md index 5e567adb6c..ac46806577 100644 --- a/src/node/builtinAgents/harness-init.md +++ b/src/node/builtinAgents/harness-init.md @@ -7,6 +7,11 @@ ui: color: var(--color-harness-init-mode) subagent: runnable: false +tools: + remove: + - web_search + - web_fetch + - google_search --- You are in Harness Init mode. @@ -14,8 +19,15 @@ You are in Harness Init mode. Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. === CRITICAL: LIMITED EDIT MODE === +Harness schema: + +- The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +- Follow the schema exactly (extra/unknown keys will fail validation). +- Web tools are disabled in this mode; do not attempt to look up harness docs online. - You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` +- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too. + - Do NOT modify source code or other repo files. - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index c893cf6dc0..5b40d60507 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -6,8 +6,8 @@ export const BUILTIN_AGENT_CONTENT = { "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n", - "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"checklist\": [{ \"title\": \"...\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", - "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", + "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nThe `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`.\nFollow it exactly (extra/unknown keys will fail validation).\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"version\": 1,\n\"checklist\": [{ \"id\": \"item-1\", \"title\": \"...\", \"status\": \"todo\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", + "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\ntools:\n remove:\n - web_search\n - web_fetch\n - google_search\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\nHarness schema:\n\n- The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`.\n- Follow the schema exactly (extra/unknown keys will fail validation).\n- Web tools are disabled in this mode; do not attempt to look up harness docs online.\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too.\n\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", "mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `<details>/<summary>` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 5ea1cca1ad..a196ab36be 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -41,6 +41,7 @@ import type { BashOutputEvent } from "@/common/types/stream"; import type { MuxMessage, MuxTextPart } from "@/common/types/message"; import { createMuxMessage } from "@/common/types/message"; import type { Config, ProviderConfig } from "@/node/config"; +import { maybeAppendHarnessConfigSchemaToAdditionalInstructions } from "./harnessConfigSchemaPrompt"; import { StreamManager } from "./streamManager"; import type { InitStateManager } from "./initStateManager"; import type { SendMessageError } from "@/common/types/errors"; @@ -1504,6 +1505,13 @@ export class AIService extends EventEmitter { : nestingInstruction; } + // Harness agents need a schema-aware prompt so they don't web-search for an internal/WIP spec. + // This block is generated from the Zod schema at runtime to avoid schema drift. + effectiveAdditionalInstructions = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ + agentId: effectiveAgentId, + additionalInstructions: effectiveAdditionalInstructions, + }); + // Read plan content for agent transition (plan-like → exec-like) // Only read if switching to exec-like agent and last assistant was plan-like. let planContentForTransition: string | undefined; diff --git a/src/node/services/harnessConfigSchemaPrompt.test.ts b/src/node/services/harnessConfigSchemaPrompt.test.ts new file mode 100644 index 0000000000..39c9b53477 --- /dev/null +++ b/src/node/services/harnessConfigSchemaPrompt.test.ts @@ -0,0 +1,117 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import type { Tool } from "ai"; + +import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace"; +import type { WorkspaceMetadata } from "@/common/types/workspace"; +import { applyToolPolicy } from "@/common/utils/tools/toolPolicy"; +import { LocalRuntime } from "@/node/runtime/LocalRuntime"; +import { + clearBuiltInAgentCache, + getBuiltInAgentDefinitions, +} from "./agentDefinitions/builtInAgentDefinitions"; +import { resolveToolPolicyForAgent } from "./agentDefinitions/resolveToolPolicy"; +import { buildSystemMessage } from "./systemMessage"; +import { DisposableTempDir } from "./tempDir"; +import { maybeAppendHarnessConfigSchemaToAdditionalInstructions } from "./harnessConfigSchemaPrompt"; + +describe("harness config schema prompt injection", () => { + test("includes <harness_config_schema> in additional instructions for harness agents", async () => { + using tempDir = new DisposableTempDir("harness-schema-prompt"); + + const projectDir = path.join(tempDir.path, "project"); + const workspaceDir = path.join(tempDir.path, "workspace"); + const globalMuxDir = path.join(tempDir.path, "global-mux"); + + await fs.mkdir(projectDir, { recursive: true }); + await fs.mkdir(workspaceDir, { recursive: true }); + await fs.mkdir(globalMuxDir, { recursive: true }); + + const originalMuxRoot = process.env.MUX_ROOT; + process.env.MUX_ROOT = globalMuxDir; + + try { + const runtime = new LocalRuntime(tempDir.path); + const metadata: WorkspaceMetadata = { + id: "test-workspace", + name: "test-workspace", + projectName: "test-project", + projectPath: projectDir, + runtimeConfig: DEFAULT_RUNTIME_CONFIG, + }; + + for (const agentId of ["harness-init", "harness-from-plan"] as const) { + const additional = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ + agentId, + additionalInstructions: "extra", + }); + expect(additional).toContain("<harness_config_schema"); + + const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir, additional); + expect(systemMessage).toContain("<harness_config_schema"); + + const match = /<harness_config_schema[^>]*>\s*([\s\S]*?)\s*<\/harness_config_schema>/m.exec( + systemMessage + ); + expect(match).not.toBeNull(); + + const schema = JSON.parse(match![1]) as { required?: string[] }; + const required = schema.required ?? []; + expect(required).toContain("version"); + expect(required).toContain("checklist"); + expect(required).toContain("gates"); + } + + const nonHarness = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ + agentId: "exec", + additionalInstructions: "extra", + }); + expect(nonHarness).toBe("extra"); + } finally { + if (originalMuxRoot === undefined) { + delete process.env.MUX_ROOT; + } else { + process.env.MUX_ROOT = originalMuxRoot; + } + } + }); +}); + +describe("harness-init tool policy", () => { + test("disables web_* tools", () => { + clearBuiltInAgentCache(); + const builtIns = getBuiltInAgentDefinitions(); + + const harnessInit = builtIns.find((a) => a.id === "harness-init"); + const exec = builtIns.find((a) => a.id === "exec"); + + expect(harnessInit).toBeDefined(); + expect(exec).toBeDefined(); + + const agents = [{ tools: harnessInit!.frontmatter.tools }, { tools: exec!.frontmatter.tools }]; + + const policy = resolveToolPolicyForAgent({ + agents, + isSubagent: false, + disableTaskToolsForDepth: false, + }); + + const tool = {} as unknown as Tool; + const filtered = applyToolPolicy( + { + file_read: tool, + web_search: tool, + web_fetch: tool, + google_search: tool, + }, + policy + ); + + expect(Object.keys(filtered)).toContain("file_read"); + expect(Object.keys(filtered)).not.toContain("web_search"); + expect(Object.keys(filtered)).not.toContain("web_fetch"); + expect(Object.keys(filtered)).not.toContain("google_search"); + }); +}); diff --git a/src/node/services/harnessConfigSchemaPrompt.ts b/src/node/services/harnessConfigSchemaPrompt.ts new file mode 100644 index 0000000000..0047c845c0 --- /dev/null +++ b/src/node/services/harnessConfigSchemaPrompt.ts @@ -0,0 +1,45 @@ +import assert from "@/common/utils/assert"; +import { z } from "zod"; +import { WorkspaceHarnessConfigSchema } from "@/common/orpc/schemas/harness"; + +/** + * Prompt-time JSON Schema for `.mux/harness/*.jsonc`. + * + * We generate this from the Zod schema (source of truth) at runtime so the + * model always sees a schema that exactly matches validation. + */ +let cachedHarnessConfigSchemaBlock: string | null = null; + +function getHarnessConfigSchemaPromptBlock(): string { + if (cachedHarnessConfigSchemaBlock) return cachedHarnessConfigSchemaBlock; + + const jsonSchema = z.toJSONSchema(WorkspaceHarnessConfigSchema); + assert( + jsonSchema && typeof jsonSchema === "object", + "Expected z.toJSONSchema(WorkspaceHarnessConfigSchema) to return an object" + ); + + cachedHarnessConfigSchemaBlock = [ + `<harness_config_schema format="jsonschema">`, + JSON.stringify(jsonSchema, null, 2), + `</harness_config_schema>`, + ].join("\n"); + + return cachedHarnessConfigSchemaBlock; +} + +export function maybeAppendHarnessConfigSchemaToAdditionalInstructions(args: { + agentId: string; + additionalInstructions: string | undefined; +}): string | undefined { + const shouldInject = args.agentId === "harness-init" || args.agentId === "harness-from-plan"; + if (!shouldInject) return args.additionalInstructions; + + const block = getHarnessConfigSchemaPromptBlock(); + const additional = args.additionalInstructions; + if (additional && additional.trim().length > 0) { + return `${additional}\n\n${block}`; + } + + return block; +} From 9f4d0a208a818b97f9c329c6db3c157742c7933f Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski <tk@coder.com> Date: Tue, 27 Jan 2026 18:30:59 +0100 Subject: [PATCH 20/20] harness-init: inject output path and drop harness-from-plan - Require Harness Init to delegate exploration to explore subagents\n- Inject <harness_output_path> derived from workspace name\n- Remove harness-from-plan agent + startFromPlan RPC Change-Id: Ie2b72ffc3bd7705c08d1ce861e73093518a14078 Signed-off-by: Thomas Kosiewski <tk@coder.com> --- docs/agents/index.mdx | 97 +++------- src/common/orpc/schemas/api.ts | 4 - src/node/builtinAgents/harness-from-plan.md | 52 ----- src/node/builtinAgents/harness-init.md | 30 ++- src/node/orpc/router.ts | 180 ------------------ .../builtInAgentContent.generated.ts | 3 +- .../builtInAgentDefinitions.ts | 1 - src/node/services/aiService.ts | 1 + .../harnessConfigSchemaPrompt.test.ts | 8 +- .../services/harnessConfigSchemaPrompt.ts | 19 +- 10 files changed, 70 insertions(+), 325 deletions(-) delete mode 100644 src/node/builtinAgents/harness-from-plan.md diff --git a/docs/agents/index.mdx b/docs/agents/index.mdx index 16c04886c4..4ebb34b97c 100644 --- a/docs/agents/index.mdx +++ b/docs/agents/index.mdx @@ -405,76 +405,13 @@ You are in Explore mode (read-only). </Accordion> -### Harness from Plan (internal) - -**Generate a Ralph harness draft from a plan (internal)** - -<Accordion title="View harness-from-plan.md"> - -```md ---- -name: Harness from Plan -description: Generate a Ralph harness draft from a plan (internal) -base: exec -ui: - hidden: true -subagent: - runnable: true - append_prompt: | - You are a sub-agent generating a Ralph harness draft from a plan. - - - Use read-only investigation only (no file edits, no state changes). - - Output ONLY a single JSON object in a fenced code block (language: json). - - When complete, call agent_report exactly once with that JSON block. -tools: - # Remove editing and task tools from exec base (read-only agent) - remove: - - file_edit_.* - - task - - task_.* - - agent_skill_read - - agent_skill_read_file ---- - -The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. -Follow it exactly (extra/unknown keys will fail validation). -You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. - -=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === - -- You MUST NOT create, edit, delete, move, or copy files. -- You MUST NOT create temporary files anywhere (including /tmp). -- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. -- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). -- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). - -Rules: - -- Checklist items should be small, mergeable steps (max 20). -- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). -- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. - -Output format: a single fenced code block (language: json) containing one JSON object. - -Example JSON object: - -{ -"version": 1, -"checklist": [{ "id": "item-1", "title": "...", "status": "todo", "notes": "..." }], -"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], -"loop": { "autoCommit": false } -} -``` - -</Accordion> - ### Harness Init (internal) **Interactive harness generation + approval (internal)** <Accordion title="View harness-init.md"> -```md +````md --- name: Harness Init description: Interactive harness generation + approval (internal) @@ -496,10 +433,16 @@ You are in Harness Init mode. Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. === CRITICAL: LIMITED EDIT MODE === -Harness schema: +Harness schema + output path: - The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +- The required harness output file path is provided as `<harness_output_path>` (derived from `MUX_WORKSPACE_NAME`). - Follow the schema exactly (extra/unknown keys will fail validation). + +- Write the final harness config to the exact `<harness_output_path>` file. + - Do NOT invent filenames. + - Create/edit ONLY that one harness file (no extra drafts). + - Web tools are disabled in this mode; do not attempt to look up harness docs online. - You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` @@ -509,26 +452,32 @@ Harness schema: - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. -Repo-aware investigation: +=== REQUIRED WORKFLOW === -- Identify which commands should be used as gates by checking repo-native entrypoints: - - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. -- Map the plan’s changes to impacted subsystems by tracing callsites/imports. +1. Start by spawning 1-4 read-only `explore` subagents via `task` with `agentId: "explore"`. + - Keep each prompt focused (e.g. CI/workflows, Make targets, tests, etc.). + - Tell them to avoid web_search/web_fetch/google_search. + - Wait for all reports before writing the harness file. + + Suggested prompt template: + - Summarize repo-native gate entrypoints (Makefile, package.json scripts, .github/workflows/\*). + - Recommend: + - Checklist items (short titles + optional notes) + - Gate commands (exact command strings + optional title/timeout) + - (Optional) include a fenced ```json draft with { "checklist": [...], "gates": [...] } + +2. Synthesize the explore reports into a single harness config (matching `<harness_config_schema>`) and write it to `<harness_output_path>`. Gates: - Prefer a small set of safe, single commands. - Do NOT use shell chaining, pipes, redirects, or quotes. -Delegation: - -- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. - When the harness file is ready for user review: - Call `propose_harness` exactly once. - Do NOT start the Ralph loop yourself; the UI will start it after user approval. -``` +```` </Accordion> diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index 17135cc5ea..0fc4df07a9 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -770,10 +770,6 @@ export const workspace = { input: z.object({ workspaceId: z.string() }), output: HarnessLoopStateSchema, }, - startFromPlan: { - input: z.object({ workspaceId: z.string() }), - output: ResultSchema(z.void(), z.string()), - }, start: { input: z.object({ workspaceId: z.string() }), output: ResultSchema(z.void(), z.string()), diff --git a/src/node/builtinAgents/harness-from-plan.md b/src/node/builtinAgents/harness-from-plan.md deleted file mode 100644 index fc73900a71..0000000000 --- a/src/node/builtinAgents/harness-from-plan.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -name: Harness from Plan -description: Generate a Ralph harness draft from a plan (internal) -base: exec -ui: - hidden: true -subagent: - runnable: true - append_prompt: | - You are a sub-agent generating a Ralph harness draft from a plan. - - - Use read-only investigation only (no file edits, no state changes). - - Output ONLY a single JSON object in a fenced code block (language: json). - - When complete, call agent_report exactly once with that JSON block. -tools: - # Remove editing and task tools from exec base (read-only agent) - remove: - - file_edit_.* - - task - - task_.* - - agent_skill_read - - agent_skill_read_file ---- - -The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. -Follow it exactly (extra/unknown keys will fail validation). -You generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt. - -=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS === - -- You MUST NOT create, edit, delete, move, or copy files. -- You MUST NOT create temporary files anywhere (including /tmp). -- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files. -- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.). -- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.). - -Rules: - -- Checklist items should be small, mergeable steps (max 20). -- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. "make static-check"). -- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. - -Output format: a single fenced code block (language: json) containing one JSON object. - -Example JSON object: - -{ -"version": 1, -"checklist": [{ "id": "item-1", "title": "...", "status": "todo", "notes": "..." }], -"gates": [{ "command": "make static-check", "title": "...", "timeoutSecs": 600 }], -"loop": { "autoCommit": false } -} diff --git a/src/node/builtinAgents/harness-init.md b/src/node/builtinAgents/harness-init.md index ac46806577..738bd13ace 100644 --- a/src/node/builtinAgents/harness-init.md +++ b/src/node/builtinAgents/harness-init.md @@ -19,10 +19,16 @@ You are in Harness Init mode. Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository. === CRITICAL: LIMITED EDIT MODE === -Harness schema: +Harness schema + output path: - The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`. +- The required harness output file path is provided as `<harness_output_path>` (derived from `MUX_WORKSPACE_NAME`). - Follow the schema exactly (extra/unknown keys will fail validation). + +- Write the final harness config to the exact `<harness_output_path>` file. + - Do NOT invent filenames. + - Create/edit ONLY that one harness file (no extra drafts). + - Web tools are disabled in this mode; do not attempt to look up harness docs online. - You may ONLY create/edit files under: `.mux/harness/**/*.jsonc` @@ -32,21 +38,27 @@ Harness schema: - Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.). - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch. -Repo-aware investigation: +=== REQUIRED WORKFLOW === + +1. Start by spawning 1-4 read-only `explore` subagents via `task` with `agentId: "explore"`. + - Keep each prompt focused (e.g. CI/workflows, Make targets, tests, etc.). + - Tell them to avoid web_search/web_fetch/google_search. + - Wait for all reports before writing the harness file. -- Identify which commands should be used as gates by checking repo-native entrypoints: - - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc. -- Map the plan’s changes to impacted subsystems by tracing callsites/imports. + Suggested prompt template: + - Summarize repo-native gate entrypoints (Makefile, package.json scripts, .github/workflows/\*). + - Recommend: + - Checklist items (short titles + optional notes) + - Gate commands (exact command strings + optional title/timeout) + - (Optional) include a fenced ```json draft with { "checklist": [...], "gates": [...] } + +2. Synthesize the explore reports into a single harness config (matching `<harness_config_schema>`) and write it to `<harness_output_path>`. Gates: - Prefer a small set of safe, single commands. - Do NOT use shell chaining, pipes, redirects, or quotes. -Delegation: - -- You may spawn only read-only exploration subagents via `task` with `agentId: "explore"`. - When the harness file is ready for user review: - Call `propose_harness` exactly once. diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 335975cd50..ec41fadbbf 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -1,4 +1,3 @@ -import { generateObject } from "ai"; import { os } from "@orpc/server"; import * as schemas from "@/common/orpc/schemas"; import type { ORPCContext } from "./context"; @@ -6,12 +5,6 @@ import { selectModelForNameGeneration, generateWorkspaceIdentity, } from "@/node/services/workspaceTitleGenerator"; -import { formatSendMessageError } from "@/node/services/utils/sendMessageError"; -import { - HarnessFromPlanDraftSchema, - createWorkspaceHarnessConfigFromPlanDraft, - extractJsonObjectFromMarkdown, -} from "@/node/services/workspaceHarnessFromPlan"; import type { UpdateStatus, WorkspaceActivitySnapshot, @@ -1905,179 +1898,6 @@ export const router = (authToken?: string) => { }), }, loop: { - startFromPlan: t - .input(schemas.workspace.loop.startFromPlan.input) - .output(schemas.workspace.loop.startFromPlan.output) - .handler(async ({ context, input }) => { - try { - const harness = await context.workspaceHarnessService.getHarnessForWorkspace( - input.workspaceId - ); - - // Don't stomp on user-edited harnesses. - if (harness.exists) { - const result = await context.loopRunnerService.start(input.workspaceId); - if (!result.success) { - return { success: false, error: result.error }; - } - return { success: true, data: undefined }; - } - - const metadata = await context.workspaceService.getInfo(input.workspaceId); - if (!metadata) { - return { success: false, error: `Workspace not found: ${input.workspaceId}` }; - } - - const runtime = createRuntime(metadata.runtimeConfig, { - projectPath: metadata.projectPath, - }); - - const planResult = await readPlanFile( - runtime, - metadata.name, - metadata.projectName, - input.workspaceId - ); - - if (!planResult.exists) { - return { - success: false, - error: `Plan file not found at ${planResult.path}`, - }; - } - - const userModel = - metadata.aiSettingsByAgent?.exec?.model ?? metadata.aiSettings?.model; - const modelString = await selectModelForNameGeneration( - context.aiService, - undefined, - userModel - ); - if (!modelString) { - return { - success: false, - error: "No AI model available to generate a harness from this plan", - }; - } - - const modelResult = await context.aiService.createModel(modelString); - if (!modelResult.success) { - return { - success: false, - error: formatSendMessageError(modelResult.error).message, - }; - } - - const buildHarnessFromPlanTaskPrompt = (options?: { errorHint?: string }): string => { - const errorHint = - typeof options?.errorHint === "string" && options.errorHint.trim().length > 0 - ? `\n\nPrevious attempt error:\n${options.errorHint.trim().slice(0, 2000)}\n\nFix the output and try again.` - : ""; - - return `Generate a Ralph harness draft (checklist + optional gates) from this plan. - -Rules: -- Checklist items should be small, mergeable steps (max 20). -- Gates should be safe, single commands that run checks (prefer make targets like "make static-check"). -- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. - -Output: -- Return ONLY a single JSON object in a fenced code block (language: json). -${errorHint} - -Plan: - -${planResult.content}`; - }; - - const runHarnessFromPlanTask = async (options?: { errorHint?: string }) => { - try { - const taskResult = await context.taskService.create({ - parentWorkspaceId: input.workspaceId, - kind: "agent", - agentId: "harness-from-plan", - prompt: buildHarnessFromPlanTaskPrompt(options), - title: "Generate harness from plan", - modelString, - }); - - if (!taskResult.success) { - return { success: false as const, error: taskResult.error }; - } - - const report = await context.taskService.waitForAgentReport( - taskResult.data.taskId, - { - requestingWorkspaceId: input.workspaceId, - } - ); - - const extracted = extractJsonObjectFromMarkdown(report.reportMarkdown); - if (!extracted.success) { - return { success: false as const, error: extracted.error }; - } - - const parsedDraft = HarnessFromPlanDraftSchema.safeParse(extracted.data); - if (!parsedDraft.success) { - return { success: false as const, error: parsedDraft.error.message }; - } - - return { success: true as const, data: parsedDraft.data }; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - return { success: false as const, error: message }; - } - }; - - const firstAttempt = await runHarnessFromPlanTask(); - const secondAttempt = firstAttempt.success - ? null - : await runHarnessFromPlanTask({ errorHint: firstAttempt.error }); - - const draftFromTask = firstAttempt.success - ? firstAttempt.data - : secondAttempt?.success - ? secondAttempt.data - : null; - - const draft = - draftFromTask ?? - ( - await generateObject({ - model: modelResult.data, - schema: HarnessFromPlanDraftSchema, - mode: "json", - prompt: `Generate a Ralph harness (checklist + optional gates) from this plan. - -Rules: -- Checklist items should be small, mergeable steps (max 20). -- Gates should be safe, single commands that run checks (prefer make targets like "make static-check"). -- Do not use shell chaining, pipes, redirects, quotes, or destructive commands. - -Plan: - -${planResult.content}`, - }) - ).object; - - const derived = createWorkspaceHarnessConfigFromPlanDraft(draft); - - await context.workspaceHarnessService.setHarnessForWorkspace( - input.workspaceId, - derived.config - ); - - const startResult = await context.loopRunnerService.start(input.workspaceId); - if (!startResult.success) { - return { success: false, error: startResult.error }; - } - - return { success: true, data: undefined }; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - return { success: false, error: message }; - } - }), getState: t .input(schemas.workspace.loop.getState.input) .output(schemas.workspace.loop.getState.output) diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts index 5b40d60507..f1f92d9152 100644 --- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts +++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts @@ -6,8 +6,7 @@ export const BUILTIN_AGENT_CONTENT = { "compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n", "exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n", "explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n", - "harness-from-plan": "---\nname: Harness from Plan\ndescription: Generate a Ralph harness draft from a plan (internal)\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n append_prompt: |\n You are a sub-agent generating a Ralph harness draft from a plan.\n\n - Use read-only investigation only (no file edits, no state changes).\n - Output ONLY a single JSON object in a fenced code block (language: json).\n - When complete, call agent_report exactly once with that JSON block.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nThe `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`.\nFollow it exactly (extra/unknown keys will fail validation).\nYou generate a Ralph harness draft (checklist + optional gates) from the plan provided in the prompt.\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Use bash only for read-only operations (rg, ls, cat, git diff/show/log, etc.).\n\nRules:\n\n- Checklist items should be small, mergeable steps (max 20).\n- Gates should be safe single commands that run checks (prefer make targets from this repo, e.g. \"make static-check\").\n- Do not use shell chaining, pipes, redirects, quotes, or destructive commands.\n\nOutput format: a single fenced code block (language: json) containing one JSON object.\n\nExample JSON object:\n\n{\n\"version\": 1,\n\"checklist\": [{ \"id\": \"item-1\", \"title\": \"...\", \"status\": \"todo\", \"notes\": \"...\" }],\n\"gates\": [{ \"command\": \"make static-check\", \"title\": \"...\", \"timeoutSecs\": 600 }],\n\"loop\": { \"autoCommit\": false }\n}\n", - "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\ntools:\n remove:\n - web_search\n - web_fetch\n - google_search\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\nHarness schema:\n\n- The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`.\n- Follow the schema exactly (extra/unknown keys will fail validation).\n- Web tools are disabled in this mode; do not attempt to look up harness docs online.\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too.\n\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\nRepo-aware investigation:\n\n- Identify which commands should be used as gates by checking repo-native entrypoints:\n - `Makefile`, `package.json` scripts, `.github/workflows/*`, etc.\n- Map the plan’s changes to impacted subsystems by tracing callsites/imports.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nDelegation:\n\n- You may spawn only read-only exploration subagents via `task` with `agentId: \"explore\"`.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", + "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\ntools:\n remove:\n - web_search\n - web_fetch\n - google_search\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\nHarness schema + output path:\n\n- The `.mux/harness/*.jsonc` schema is provided in the system prompt as `<harness_config_schema>`.\n- The required harness output file path is provided as `<harness_output_path>` (derived from `MUX_WORKSPACE_NAME`).\n- Follow the schema exactly (extra/unknown keys will fail validation).\n\n- Write the final harness config to the exact `<harness_output_path>` file.\n - Do NOT invent filenames.\n - Create/edit ONLY that one harness file (no extra drafts).\n\n- Web tools are disabled in this mode; do not attempt to look up harness docs online.\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too.\n\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\n=== REQUIRED WORKFLOW ===\n\n1. Start by spawning 1-4 read-only `explore` subagents via `task` with `agentId: \"explore\"`.\n - Keep each prompt focused (e.g. CI/workflows, Make targets, tests, etc.).\n - Tell them to avoid web_search/web_fetch/google_search.\n - Wait for all reports before writing the harness file.\n\n Suggested prompt template:\n - Summarize repo-native gate entrypoints (Makefile, package.json scripts, .github/workflows/\\*).\n - Recommend:\n - Checklist items (short titles + optional notes)\n - Gate commands (exact command strings + optional title/timeout)\n - (Optional) include a fenced ```json draft with { \"checklist\": [...], \"gates\": [...] }\n\n2. Synthesize the explore reports into a single harness config (matching `<harness_config_schema>`) and write it to `<harness_output_path>`.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n", "mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n", "plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `<details>/<summary>` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n", "system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n", diff --git a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts index 4110250ec3..a9a9ddd136 100644 --- a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts +++ b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts @@ -18,7 +18,6 @@ const BUILT_IN_SOURCES: BuiltInSource[] = [ { id: "exec", content: BUILTIN_AGENT_CONTENT.exec }, { id: "plan", content: BUILTIN_AGENT_CONTENT.plan }, { id: "compact", content: BUILTIN_AGENT_CONTENT.compact }, - { id: "harness-from-plan", content: BUILTIN_AGENT_CONTENT["harness-from-plan"] }, { id: "harness-init", content: BUILTIN_AGENT_CONTENT["harness-init"] }, { id: "explore", content: BUILTIN_AGENT_CONTENT.explore }, { id: "system1_bash", content: BUILTIN_AGENT_CONTENT.system1_bash }, diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index a196ab36be..7aa80e611c 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1509,6 +1509,7 @@ export class AIService extends EventEmitter { // This block is generated from the Zod schema at runtime to avoid schema drift. effectiveAdditionalInstructions = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ agentId: effectiveAgentId, + workspaceName: metadata.name, additionalInstructions: effectiveAdditionalInstructions, }); diff --git a/src/node/services/harnessConfigSchemaPrompt.test.ts b/src/node/services/harnessConfigSchemaPrompt.test.ts index 39c9b53477..b2249819dc 100644 --- a/src/node/services/harnessConfigSchemaPrompt.test.ts +++ b/src/node/services/harnessConfigSchemaPrompt.test.ts @@ -42,15 +42,20 @@ describe("harness config schema prompt injection", () => { runtimeConfig: DEFAULT_RUNTIME_CONFIG, }; - for (const agentId of ["harness-init", "harness-from-plan"] as const) { + for (const agentId of ["harness-init"] as const) { const additional = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ agentId, + workspaceName: metadata.name, additionalInstructions: "extra", }); expect(additional).toContain("<harness_config_schema"); + expect(additional).toContain("<harness_output_path>"); + expect(additional).toContain(`.mux/harness/${metadata.name}.jsonc`); const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir, additional); expect(systemMessage).toContain("<harness_config_schema"); + expect(systemMessage).toContain("<harness_output_path>"); + expect(systemMessage).toContain(`.mux/harness/${metadata.name}.jsonc`); const match = /<harness_config_schema[^>]*>\s*([\s\S]*?)\s*<\/harness_config_schema>/m.exec( systemMessage @@ -66,6 +71,7 @@ describe("harness config schema prompt injection", () => { const nonHarness = maybeAppendHarnessConfigSchemaToAdditionalInstructions({ agentId: "exec", + workspaceName: metadata.name, additionalInstructions: "extra", }); expect(nonHarness).toBe("extra"); diff --git a/src/node/services/harnessConfigSchemaPrompt.ts b/src/node/services/harnessConfigSchemaPrompt.ts index 0047c845c0..7450ad076b 100644 --- a/src/node/services/harnessConfigSchemaPrompt.ts +++ b/src/node/services/harnessConfigSchemaPrompt.ts @@ -28,14 +28,29 @@ function getHarnessConfigSchemaPromptBlock(): string { return cachedHarnessConfigSchemaBlock; } +function normalizeWorkspaceName(value: unknown): string { + return typeof value === "string" && value.trim().length > 0 ? value.trim() : ""; +} + +function getHarnessOutputPathPromptBlock(workspaceName: unknown): string | null { + const normalized = normalizeWorkspaceName(workspaceName); + if (!normalized) return null; + + return `<harness_output_path>.mux/harness/${normalized}.jsonc</harness_output_path>`; +} + export function maybeAppendHarnessConfigSchemaToAdditionalInstructions(args: { agentId: string; + workspaceName: string | undefined; additionalInstructions: string | undefined; }): string | undefined { - const shouldInject = args.agentId === "harness-init" || args.agentId === "harness-from-plan"; + const shouldInject = args.agentId === "harness-init"; if (!shouldInject) return args.additionalInstructions; - const block = getHarnessConfigSchemaPromptBlock(); + const schemaBlock = getHarnessConfigSchemaPromptBlock(); + const outputPathBlock = getHarnessOutputPathPromptBlock(args.workspaceName); + const block = outputPathBlock ? `${schemaBlock}\n\n${outputPathBlock}` : schemaBlock; + const additional = args.additionalInstructions; if (additional && additional.trim().length > 0) { return `${additional}\n\n${block}`;