= (props) => {
msg.toolName === "propose_plan" &&
msg.id === latestProposePlanId
}
+ isLatestProposeHarness={
+ msg.type === "tool" &&
+ msg.toolName === "propose_harness" &&
+ msg.id === latestProposeHarnessId
+ }
bashOutputGroup={bashOutputGroup}
userMessageNavigation={
msg.type === "user" && userMessageNavMap
diff --git a/src/browser/components/Messages/MessageRenderer.tsx b/src/browser/components/Messages/MessageRenderer.tsx
index a7a7b6280e..aad48f627b 100644
--- a/src/browser/components/Messages/MessageRenderer.tsx
+++ b/src/browser/components/Messages/MessageRenderer.tsx
@@ -24,6 +24,8 @@ interface MessageRendererProps {
onReviewNote?: (data: ReviewNoteData) => void;
/** Whether this message is the latest propose_plan tool call (for external edit detection) */
isLatestProposePlan?: boolean;
+ /** Whether this message is the latest propose_harness tool call (for external edit detection) */
+ isLatestProposeHarness?: boolean;
/** Optional bash_output grouping info (computed at render-time) */
bashOutputGroup?: BashOutputGroupInfo;
/** Navigation info for user messages (backward/forward between user messages) */
@@ -40,6 +42,7 @@ export const MessageRenderer = React.memo
(
isCompacting,
onReviewNote,
isLatestProposePlan,
+ isLatestProposeHarness,
bashOutputGroup,
userMessageNavigation,
}) => {
@@ -72,6 +75,7 @@ export const MessageRenderer = React.memo(
workspaceId={workspaceId}
onReviewNote={onReviewNote}
isLatestProposePlan={isLatestProposePlan}
+ isLatestProposeHarness={isLatestProposeHarness}
bashOutputGroup={bashOutputGroup}
/>
);
diff --git a/src/browser/components/Messages/ToolMessage.tsx b/src/browser/components/Messages/ToolMessage.tsx
index 037c95b0f6..f72ca3815f 100644
--- a/src/browser/components/Messages/ToolMessage.tsx
+++ b/src/browser/components/Messages/ToolMessage.tsx
@@ -17,6 +17,8 @@ interface ToolMessageProps {
onReviewNote?: (data: ReviewNoteData) => void;
/** Whether this is the latest propose_plan in the conversation */
isLatestProposePlan?: boolean;
+ /** Whether this is the latest propose_harness in the conversation */
+ isLatestProposeHarness?: boolean;
/** Optional bash_output grouping info */
bashOutputGroup?: BashOutputGroupInfo;
}
@@ -27,6 +29,7 @@ export const ToolMessage: React.FC = ({
workspaceId,
onReviewNote,
isLatestProposePlan,
+ isLatestProposeHarness,
bashOutputGroup,
}) => {
const { toolName, args, result, status, toolCallId } = message;
@@ -40,6 +43,12 @@ export const ToolMessage: React.FC = ({
? bashOutputGroup.position
: undefined;
+ const isLatest =
+ toolName === "propose_plan"
+ ? isLatestProposePlan
+ : toolName === "propose_harness"
+ ? isLatestProposeHarness
+ : undefined;
// Extract hook output if present (only shown when hook produced output)
const hookOutput = extractHookOutput(result);
const hookDuration = extractHookDuration(result);
@@ -59,8 +68,8 @@ export const ToolMessage: React.FC = ({
startedAt={message.timestamp}
// FileEdit-specific
onReviewNote={onReviewNote}
- // ProposePlan-specific
- isLatest={isLatestProposePlan}
+ // ProposePlan/ProposeHarness-specific
+ isLatest={isLatest}
// BashOutput-specific
groupPosition={groupPosition}
// CodeExecution-specific
diff --git a/src/browser/components/RightSidebar.tsx b/src/browser/components/RightSidebar.tsx
index 276ca1556e..2bd7d60a99 100644
--- a/src/browser/components/RightSidebar.tsx
+++ b/src/browser/components/RightSidebar.tsx
@@ -11,7 +11,11 @@ import {
updatePersistedState,
usePersistedState,
} from "@/browser/hooks/usePersistedState";
-import { useWorkspaceUsage, useWorkspaceStatsSnapshot } from "@/browser/stores/WorkspaceStore";
+import {
+ useWorkspaceUsage,
+ useWorkspaceStatsSnapshot,
+ workspaceStore,
+} from "@/browser/stores/WorkspaceStore";
import { useFeatureFlags } from "@/browser/contexts/FeatureFlagsContext";
import { useAPI } from "@/browser/contexts/API";
import { CostsTab } from "./RightSidebar/CostsTab";
@@ -21,6 +25,7 @@ import { ErrorBoundary } from "./ErrorBoundary";
import { StatsTab } from "./RightSidebar/StatsTab";
import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
+import { RefreshController } from "@/browser/utils/RefreshController";
import { matchesKeybind, KEYBINDS, formatKeybind } from "@/browser/utils/ui/keybinds";
import { SidebarCollapseButton } from "./ui/SidebarCollapseButton";
import { cn } from "@/common/lib/utils";
@@ -51,7 +56,6 @@ import {
parseRightSidebarLayoutState,
removeTabEverywhere,
reorderTabInTabset,
- selectTabByIndex,
selectTabInTabset,
setFocusedTabset,
updateSplitSizes,
@@ -71,6 +75,7 @@ import {
import {
CostsTabLabel,
ExplorerTabLabel,
+ HarnessTabLabel,
FileTabLabel,
ReviewTabLabel,
StatsTabLabel,
@@ -79,6 +84,7 @@ import {
type ReviewStats,
} from "./RightSidebar/tabs";
import { FileViewerTab } from "./RightSidebar/FileViewer";
+import { HarnessTab } from "./RightSidebar/HarnessTab";
import { ExplorerTab } from "./RightSidebar/ExplorerTab";
import {
DndContext,
@@ -189,6 +195,7 @@ const DragAwarePanelResizeHandle: React.FC<{
return ;
};
+type HarnessPresence = "unknown" | "exists" | "missing";
type TabsetNode = Extract;
interface RightSidebarTabsetNodeProps {
@@ -204,6 +211,7 @@ interface RightSidebarTabsetNodeProps {
onReviewStatsChange: (stats: ReviewStats | null) => void;
sessionCost: number | null;
statsTabEnabled: boolean;
+ harnessTabEnabled: boolean;
sessionDuration: number | null;
/** Whether any sidebar tab is currently being dragged */
isDraggingTab: boolean;
@@ -235,13 +243,19 @@ interface RightSidebarTabsetNodeProps {
}
const RightSidebarTabsetNode: React.FC = (props) => {
+ const isTabEnabled = (tab: TabType): boolean => {
+ if (tab === "stats") return props.statsTabEnabled;
+ if (tab === "harness") return props.harnessTabEnabled;
+ return true;
+ };
+
+ const activeTab = isTabEnabled(props.node.activeTab)
+ ? props.node.activeTab
+ : (props.node.tabs.find(isTabEnabled) ?? props.node.activeTab);
const tabsetBaseId = `${props.baseId}-${props.node.id}`;
// Content container class comes from tab registry - each tab defines its own padding/overflow
- const tabsetContentClassName = cn(
- "relative flex-1 min-h-0",
- getTabContentClassName(props.node.activeTab)
- );
+ const tabsetContentClassName = cn("relative flex-1 min-h-0", getTabContentClassName(activeTab));
// Drop zones using @dnd-kit's useDroppable
const { setNodeRef: contentRef, isOver: isOverContent } = useDroppable({
@@ -295,7 +309,7 @@ const RightSidebarTabsetNode: React.FC = (props) =>
const terminalTabs = props.node.tabs.filter(isTerminalTab);
const items = props.node.tabs.flatMap((tab) => {
- if (tab === "stats" && !props.statsTabEnabled) {
+ if (!isTabEnabled(tab)) {
return [];
}
@@ -345,6 +359,8 @@ const RightSidebarTabsetNode: React.FC = (props) =>
label = ;
} else if (tab === "explorer") {
label = ;
+ } else if (tab === "harness") {
+ label = ;
} else if (tab === "stats") {
label = ;
} else if (isTerminal) {
@@ -368,7 +384,7 @@ const RightSidebarTabsetNode: React.FC = (props) =>
{
id: tabId,
panelId,
- selected: props.node.activeTab === tab,
+ selected: activeTab === tab,
onSelect: () => selectTab(tab),
label,
tooltip,
@@ -385,11 +401,13 @@ const RightSidebarTabsetNode: React.FC = (props) =>
const costsPanelId = `${tabsetBaseId}-panel-costs`;
const reviewPanelId = `${tabsetBaseId}-panel-review`;
+ const harnessPanelId = `${tabsetBaseId}-panel-harness`;
const explorerPanelId = `${tabsetBaseId}-panel-explorer`;
const statsPanelId = `${tabsetBaseId}-panel-stats`;
const costsTabId = `${tabsetBaseId}-tab-costs`;
const reviewTabId = `${tabsetBaseId}-tab-review`;
+ const harnessTabId = `${tabsetBaseId}-tab-harness`;
const explorerTabId = `${tabsetBaseId}-tab-explorer`;
const statsTabId = `${tabsetBaseId}-tab-stats`;
@@ -463,7 +481,7 @@ const RightSidebarTabsetNode: React.FC = (props) =>
)}
/>
- {props.node.activeTab === "costs" && (
+ {activeTab === "costs" && (
@@ -473,7 +491,7 @@ const RightSidebarTabsetNode: React.FC = (props) =>
{terminalTabs.map((terminalTab) => {
const terminalTabId = `${tabsetBaseId}-tab-${terminalTab}`;
const terminalPanelId = `${tabsetBaseId}-panel-${terminalTab}`;
- const isActive = props.node.activeTab === terminalTab;
+ const isActive = activeTab === terminalTab;
// Check if this terminal should be auto-focused (was just opened via keybind)
const terminalSessionId = getTerminalSessionId(terminalTab);
const shouldAutoFocus = isActive && terminalSessionId === props.autoFocusTerminalSession;
@@ -504,7 +522,7 @@ const RightSidebarTabsetNode: React.FC = (props) =>
role="tabpanel"
id={statsPanelId}
aria-labelledby={statsTabId}
- hidden={props.node.activeTab !== "stats"}
+ hidden={activeTab !== "stats"}
>
@@ -512,7 +530,13 @@ const RightSidebarTabsetNode: React.FC = (props) =>
)}
- {props.node.activeTab === "explorer" && (
+ {props.harnessTabEnabled && activeTab === "harness" && (
+ = (props) =>
const filePath = getFilePath(fileTab);
const fileTabId = `${tabsetBaseId}-tab-${fileTab}`;
const filePanelId = `${tabsetBaseId}-panel-${fileTab}`;
- const isActive = props.node.activeTab === fileTab;
+ const isActive = activeTab === fileTab;
return (
= (props) =>
);
})}
- {props.node.activeTab === "review" && (
+ {activeTab === "review" && (
= ({
// Stats tab feature flag
const { statsTabState } = useFeatureFlags();
+
+ const [harnessPresence, setHarnessPresence] = React.useState("unknown");
+
+ const { api } = useAPI();
+ const harnessTabEnabled = harnessPresence === "exists";
const statsTabEnabled = Boolean(statsTabState?.enabled);
// Read last-used focused tab for better defaults when initializing a new layout.
@@ -655,6 +684,46 @@ const RightSidebarComponent: React.FC = ({
setLayoutDraft(null);
}, [setLayoutRaw]);
+ const refreshHarnessPresence = React.useCallback(async () => {
+ if (!api) return;
+
+ try {
+ const result = await api.workspace.harness.exists({ workspaceId });
+ if (!result.success) {
+ return;
+ }
+
+ setHarnessPresence(result.data.exists ? "exists" : "missing");
+ } catch {
+ // Defensive: keep the previous state (don't crash / don't force-hide).
+ }
+ }, [api, workspaceId]);
+
+ const harnessPresenceRefreshController = React.useMemo(
+ () =>
+ new RefreshController({
+ onRefresh: refreshHarnessPresence,
+ debounceMs: 1000,
+ refreshOnFocus: true,
+ }),
+ [refreshHarnessPresence]
+ );
+
+ React.useEffect(() => {
+ harnessPresenceRefreshController.bindListeners();
+ return () => harnessPresenceRefreshController.dispose();
+ }, [harnessPresenceRefreshController]);
+
+ React.useEffect(() => {
+ setHarnessPresence("unknown");
+ harnessPresenceRefreshController.requestImmediate();
+ }, [harnessPresenceRefreshController, workspaceId]);
+
+ React.useEffect(() => {
+ return workspaceStore.subscribeFileModifyingTool(() => {
+ harnessPresenceRefreshController.schedule();
+ }, workspaceId);
+ }, [harnessPresenceRefreshController, workspaceId]);
const layout = React.useMemo(
() => parseRightSidebarLayoutState(layoutDraft ?? layoutRaw, initialActiveTab),
[layoutDraft, layoutRaw, initialActiveTab]
@@ -679,9 +748,33 @@ const RightSidebarComponent: React.FC = ({
return prev;
});
}, [initialActiveTab, setLayoutRaw, statsTabEnabled]);
- // If we ever deserialize an invalid layout (e.g. schema changes), reset to defaults.
+
+ // If harness files exist, ensure the Harness tab exists in the layout.
+ // If missing, ensure it doesn't linger in persisted layouts.
+ React.useEffect(() => {
+ if (harnessPresence === "unknown") {
+ return;
+ }
+
+ setLayoutRaw((prevRaw) => {
+ const prev = parseRightSidebarLayoutState(prevRaw, initialActiveTab);
+ const hasHarness = collectAllTabs(prev.root).includes("harness");
+
+ if (harnessPresence === "exists" && !hasHarness) {
+ // Add harness tab to the focused tabset without stealing focus.
+ return addTabToFocusedTabset(prev, "harness", false);
+ }
+
+ if (harnessPresence === "missing" && hasHarness) {
+ return removeTabEverywhere(prev, "harness");
+ }
+
+ return prev;
+ });
+ }, [harnessPresence, initialActiveTab, setLayoutRaw]);
React.useEffect(() => {
if (!isRightSidebarLayoutState(layoutRaw)) {
+ // If we ever deserialize an invalid layout (e.g. schema changes), reset to defaults.
setLayoutRaw(layout);
}
}, [layout, layoutRaw, setLayoutRaw]);
@@ -748,9 +841,22 @@ const RightSidebarComponent: React.FC = ({
layoutRawRef.current,
initialActiveTab
);
- const allTabs = collectAllTabsWithTabset(currentLayout.root);
+ const allTabs = collectAllTabsWithTabset(currentLayout.root).filter(({ tab }) => {
+ if (tab === "stats" && !statsTabEnabled) {
+ return false;
+ }
+ if (tab === "harness" && !harnessTabEnabled) {
+ return false;
+ }
+ return true;
+ });
+
const target = allTabs[i];
- if (target && isTerminalTab(target.tab)) {
+ if (!target) {
+ return;
+ }
+
+ if (isTerminalTab(target.tab)) {
const sessionId = getTerminalSessionId(target.tab);
if (sessionId) {
setAutoFocusTerminalSession(sessionId);
@@ -761,7 +867,9 @@ const RightSidebarComponent: React.FC = ({
_setFocusTrigger((prev) => prev + 1);
}
- setLayout((prev) => selectTabByIndex(prev, i));
+ setLayout((prev) =>
+ selectTabInTabset(setFocusedTabset(prev, target.tabsetId), target.tabsetId, target.tab)
+ );
setCollapsed(false);
return;
}
@@ -770,7 +878,15 @@ const RightSidebarComponent: React.FC = ({
window.addEventListener("keydown", handleKeyDown);
return () => window.removeEventListener("keydown", handleKeyDown);
- }, [initialActiveTab, setAutoFocusTerminalSession, setCollapsed, setLayout, _setFocusTrigger]);
+ }, [
+ harnessTabEnabled,
+ initialActiveTab,
+ setAutoFocusTerminalSession,
+ setCollapsed,
+ setLayout,
+ statsTabEnabled,
+ _setFocusTrigger,
+ ]);
const usage = useWorkspaceUsage(workspaceId);
@@ -778,13 +894,22 @@ const RightSidebarComponent: React.FC = ({
// Build map of tab → position for keybind tooltips
const tabPositions = React.useMemo(() => {
- const allTabs = collectAllTabsWithTabset(layout.root);
+ const allTabs = collectAllTabsWithTabset(layout.root).filter(({ tab }) => {
+ if (tab === "stats" && !statsTabEnabled) {
+ return false;
+ }
+ if (tab === "harness" && !harnessTabEnabled) {
+ return false;
+ }
+ return true;
+ });
+
const positions = new Map();
allTabs.forEach(({ tab }, index) => {
positions.set(tab, index);
});
return positions;
- }, [layout.root]);
+ }, [harnessTabEnabled, layout.root, statsTabEnabled]);
// Calculate session cost for tab display
const sessionCost = React.useMemo(() => {
@@ -828,7 +953,6 @@ const RightSidebarComponent: React.FC = ({
});
// API for opening terminal windows and managing sessions
- const { api } = useAPI();
// Keyboard shortcut for closing active tab (Ctrl/Cmd+W)
// Works for terminal tabs and file tabs
@@ -1208,6 +1332,7 @@ const RightSidebarComponent: React.FC = ({
focusTrigger={focusTrigger}
onReviewNote={onReviewNote}
reviewStats={reviewStats}
+ harnessTabEnabled={harnessTabEnabled}
statsTabEnabled={statsTabEnabled}
sessionDuration={sessionDuration}
onReviewStatsChange={setReviewStats}
diff --git a/src/browser/components/RightSidebar/HarnessTab.tsx b/src/browser/components/RightSidebar/HarnessTab.tsx
new file mode 100644
index 0000000000..78645a6719
--- /dev/null
+++ b/src/browser/components/RightSidebar/HarnessTab.tsx
@@ -0,0 +1,362 @@
+import React from "react";
+
+import { Button } from "@/browser/components/ui/button";
+import { useAPI, type APIClient } from "@/browser/contexts/API";
+import type {
+ HarnessGateRunResult,
+ HarnessLoopState,
+ GitCheckpointResult,
+ WorkspaceHarnessConfig,
+} from "@/common/types/harness";
+
+interface HarnessGetData {
+ config: WorkspaceHarnessConfig;
+ paths: { configPath: string; progressPath: string };
+ exists: boolean;
+ lastGateRun: HarnessGateRunResult | null;
+ lastCheckpoint: GitCheckpointResult | null;
+ loopState: HarnessLoopState;
+}
+
+function formatChecklistStatus(status: string): string {
+ if (status === "done") return "[x]";
+ if (status === "doing") return "[~]";
+ if (status === "blocked") return "[!]";
+ return "[ ]";
+}
+
+function formatTimestamp(ts: number | null | undefined): string {
+ if (!ts) return "—";
+ try {
+ return new Date(ts).toLocaleString();
+ } catch {
+ return String(ts);
+ }
+}
+
+export function HarnessTab(props: { workspaceId: string }): React.ReactNode {
+ const apiState = useAPI();
+
+ const [data, setData] = React.useState(null);
+ const [error, setError] = React.useState(null);
+ const [busy, setBusy] = React.useState(false);
+
+ const refresh = React.useCallback(async () => {
+ if (!apiState.api) return;
+
+ setError(null);
+ try {
+ const result = await apiState.api.workspace.harness.get({ workspaceId: props.workspaceId });
+ if (!result.success) {
+ setError(result.error);
+ return;
+ }
+ setData(result.data);
+ } catch (err) {
+ setError(err instanceof Error ? err.message : String(err));
+ }
+ }, [apiState.api, props.workspaceId]);
+
+ React.useEffect(() => {
+ void refresh();
+ }, [refresh]);
+
+ // Keep loop state live while the tab is mounted.
+ React.useEffect(() => {
+ const api = apiState.api;
+ if (!api) return;
+
+ const abortController = new AbortController();
+ const { signal } = abortController;
+
+ (async () => {
+ try {
+ const iterator = await api.workspace.loop.subscribe(
+ { workspaceId: props.workspaceId },
+ { signal }
+ );
+
+ for await (const loopState of iterator) {
+ if (signal.aborted) break;
+ setData((prev) => (prev ? { ...prev, loopState } : prev));
+ }
+ } catch (err) {
+ if (!signal.aborted) {
+ console.error("Failed to subscribe to loop state:", err);
+ }
+ }
+ })();
+
+ return () => abortController.abort();
+ }, [apiState.api, props.workspaceId]);
+
+ const runAction = React.useCallback(
+ async (fn: (api: APIClient) => Promise) => {
+ const api = apiState.api;
+ if (!api) return;
+
+ setBusy(true);
+ setError(null);
+ try {
+ await fn(api);
+ await refresh();
+ } catch (err) {
+ setError(err instanceof Error ? err.message : String(err));
+ } finally {
+ setBusy(false);
+ }
+ },
+ [apiState.api, refresh]
+ );
+
+ if (apiState.status !== "connected" && apiState.status !== "degraded") {
+ return (
+
+ );
+ }
+
+ if (!data) {
+ return (
+
+
+
Harness
+ void refresh()}>
+ Refresh
+
+
+ {error &&
{error}
}
+
Loading…
+
+ );
+ }
+
+ const loopState = data.loopState;
+
+ return (
+
+
+
Harness
+
+ void refresh()}>
+ Refresh
+
+
+
+
+ {error &&
{error}
}
+
+
+
+ void runAction(async (api) => {
+ const result = await api.workspace.harness.runGates({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Run gates
+
+
+ void runAction(async (api) => {
+ const result = await api.workspace.harness.checkpoint({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Checkpoint
+
+
+ void runAction(async (api) => {
+ const result = await api.workspace.harness.resetContext({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Reset context
+
+
+
+
+
Files
+
+
{data.paths.progressPath}
+
{data.paths.configPath}
+
+ {!data.exists && (
+
+ No harness file yet. Create it by editing the config path above.
+
+ )}
+
+
+
+
+
+
Loop
+
+ {loopState.status} • iteration {loopState.iteration}
+
+
+
+ {loopState.status !== "running" ? (
+
+ void runAction(async (api) => {
+ const result = await api.workspace.loop.start({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Start
+
+ ) : (
+
+ void runAction(async (api) => {
+ const result = await api.workspace.loop.pause({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Pause
+
+ )}
+
+ void runAction(async (api) => {
+ const result = await api.workspace.loop.stop({
+ workspaceId: props.workspaceId,
+ });
+ if (!result.success) throw new Error(result.error);
+ })
+ }
+ >
+ Stop
+
+
+
+
+
+
+
Started
+
{formatTimestamp(loopState.startedAt)}
+
+
+
Failures
+
{loopState.consecutiveFailures}
+
+
+
Current item
+
{loopState.currentItemTitle ?? "—"}
+
+
+
Stopped reason
+
{loopState.stoppedReason ?? "—"}
+
+
+
+
+
+
Checklist
+
+ {data.config.checklist.length === 0 ? (
+
(no checklist items)
+ ) : (
+
+ {data.config.checklist.map((item) => (
+
+ {formatChecklistStatus(item.status)} {" "}
+ {item.title}
+
+ ))}
+
+ )}
+
+
+
+
+
Last gates
+
+ {data.lastGateRun ? (
+ <>
+
+ {data.lastGateRun.ok ? "PASS" : "FAIL"} •{" "}
+ {Math.round(data.lastGateRun.totalDurationMs / 1000)}s • finished{" "}
+ {formatTimestamp(data.lastGateRun.finishedAt)}
+
+ {data.lastGateRun.results.length > 0 && (
+
+ Details
+
+ {data.lastGateRun.results.map((r, idx) => (
+
+
{r.command}
+
exit {r.exitCode}
+ {(r.stderr || r.stdout) && (
+
+ {(r.stderr ? `stderr:\n${r.stderr}\n` : "") +
+ (r.stdout ? `stdout:\n${r.stdout}` : "")}
+
+ )}
+
+ ))}
+
+
+ )}
+ >
+ ) : (
+
(not run yet)
+ )}
+
+
+
+
+
Last checkpoint
+
+ {data.lastCheckpoint ? (
+ <>
+
{data.lastCheckpoint.committed ? "Committed" : "No changes"}
+
{data.lastCheckpoint.commitSha ?? "—"}
+
{data.lastCheckpoint.commitMessage ?? "—"}
+ >
+ ) : (
+
(none)
+ )}
+
+
+
+ );
+}
diff --git a/src/browser/components/RightSidebar/tabs/TabLabels.tsx b/src/browser/components/RightSidebar/tabs/TabLabels.tsx
index 26c33d9734..cc2aa63b68 100644
--- a/src/browser/components/RightSidebar/tabs/TabLabels.tsx
+++ b/src/browser/components/RightSidebar/tabs/TabLabels.tsx
@@ -5,7 +5,7 @@
*/
import React from "react";
-import { ExternalLink, FolderTree, Terminal as TerminalIcon, X } from "lucide-react";
+import { ExternalLink, FolderTree, ListChecks, Terminal as TerminalIcon, X } from "lucide-react";
import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip";
import { FileIcon } from "../../FileIcon";
import { formatTabDuration, type ReviewStats } from "./registry";
@@ -71,6 +71,14 @@ export const ExplorerTabLabel: React.FC = () => (
);
+/** Harness tab label with checklist icon */
+export const HarnessTabLabel: React.FC = () => (
+
+
+ Harness
+
+);
+
interface FileTabLabelProps {
/** File path (relative to workspace) */
filePath: string;
diff --git a/src/browser/components/RightSidebar/tabs/index.ts b/src/browser/components/RightSidebar/tabs/index.ts
index 4744d46062..3a61fbc7fa 100644
--- a/src/browser/components/RightSidebar/tabs/index.ts
+++ b/src/browser/components/RightSidebar/tabs/index.ts
@@ -24,6 +24,7 @@ export {
export {
CostsTabLabel,
ExplorerTabLabel,
+ HarnessTabLabel,
FileTabLabel,
ReviewTabLabel,
StatsTabLabel,
diff --git a/src/browser/components/RightSidebar/tabs/registry.ts b/src/browser/components/RightSidebar/tabs/registry.ts
index 9662e2c0cf..65de65c1c1 100644
--- a/src/browser/components/RightSidebar/tabs/registry.ts
+++ b/src/browser/components/RightSidebar/tabs/registry.ts
@@ -78,25 +78,30 @@ export interface TabConfig {
}
/** Static tab configurations (non-terminal tabs) */
-export const TAB_CONFIGS: Record<"costs" | "review" | "explorer" | "stats", TabConfig> = {
- costs: {
- name: "Costs",
- contentClassName: "overflow-y-auto p-[15px]",
- },
- review: {
- name: "Review",
- contentClassName: "overflow-y-auto p-0",
- },
- explorer: {
- name: "Explorer",
- contentClassName: "overflow-y-auto p-0",
- },
- stats: {
- name: "Stats",
- contentClassName: "overflow-y-auto p-[15px]",
- featureFlag: "statsTab",
- },
-};
+export const TAB_CONFIGS: Record<"costs" | "review" | "explorer" | "harness" | "stats", TabConfig> =
+ {
+ costs: {
+ name: "Costs",
+ contentClassName: "overflow-y-auto p-[15px]",
+ },
+ review: {
+ name: "Review",
+ contentClassName: "overflow-y-auto p-0",
+ },
+ harness: {
+ name: "Harness",
+ contentClassName: "overflow-y-auto p-[15px]",
+ },
+ explorer: {
+ name: "Explorer",
+ contentClassName: "overflow-y-auto p-0",
+ },
+ stats: {
+ name: "Stats",
+ contentClassName: "overflow-y-auto p-[15px]",
+ featureFlag: "statsTab",
+ },
+ };
/** Terminal tab configuration */
export const TERMINAL_TAB_CONFIG: TabConfig = {
@@ -114,7 +119,13 @@ export const FILE_TAB_CONFIG: TabConfig = {
/** Get config for a tab type */
export function getTabConfig(tab: TabType): TabConfig {
- if (tab === "costs" || tab === "review" || tab === "explorer" || tab === "stats") {
+ if (
+ tab === "costs" ||
+ tab === "review" ||
+ tab === "explorer" ||
+ tab === "harness" ||
+ tab === "stats"
+ ) {
return TAB_CONFIGS[tab];
}
// File tabs
diff --git a/src/browser/components/tools/ProposeHarnessToolCall.tsx b/src/browser/components/tools/ProposeHarnessToolCall.tsx
new file mode 100644
index 0000000000..013d6e14d9
--- /dev/null
+++ b/src/browser/components/tools/ProposeHarnessToolCall.tsx
@@ -0,0 +1,269 @@
+import React, { useEffect, useRef, useState } from "react";
+import { ClipboardCheck, ClipboardList, Play } from "lucide-react";
+
+import type { ProposeHarnessToolError, ProposeHarnessToolResult } from "@/common/types/tools";
+import type { WorkspaceHarnessConfig } from "@/common/types/harness";
+import { useAPI } from "@/browser/contexts/API";
+import { usePopoverError } from "@/browser/hooks/usePopoverError";
+import { getAgentIdKey } from "@/common/constants/storage";
+import { updatePersistedState } from "@/browser/hooks/usePersistedState";
+import { cn } from "@/common/lib/utils";
+
+import {
+ ExpandIcon,
+ StatusIndicator,
+ ToolContainer,
+ ToolDetails,
+ ToolHeader,
+ ToolName,
+} from "./shared/ToolPrimitives";
+import { getStatusDisplay, type ToolStatus, useToolExpansion } from "./shared/toolUtils";
+import { PopoverError } from "../PopoverError";
+import { IconActionButton, type ButtonConfig } from "../Messages/MessageWindow";
+
+interface HarnessGetData {
+ config: WorkspaceHarnessConfig;
+ paths: { configPath: string; progressPath: string };
+ exists: boolean;
+}
+
+function isProposeHarnessResult(result: unknown): result is ProposeHarnessToolResult {
+ return (
+ result !== null &&
+ typeof result === "object" &&
+ "success" in result &&
+ result.success === true &&
+ "harnessPath" in result
+ );
+}
+
+function isProposeHarnessError(result: unknown): result is ProposeHarnessToolError {
+ return (
+ result !== null &&
+ typeof result === "object" &&
+ "success" in result &&
+ result.success === false &&
+ "error" in result
+ );
+}
+
+function formatChecklistStatus(status: string): string {
+ if (status === "done") return "[x]";
+ if (status === "doing") return "[~]";
+ if (status === "blocked") return "[!]";
+ return "[ ]";
+}
+
+interface ProposeHarnessToolCallProps {
+ args: unknown;
+ result: unknown;
+ status: ToolStatus;
+ workspaceId?: string;
+ className?: string;
+ /** Whether this is the latest propose_harness tool call (for external edit detection) */
+ isLatest?: boolean;
+}
+
+export const ProposeHarnessToolCall: React.FC = (props) => {
+ const { result, status, workspaceId, className, isLatest } = props;
+ const { expanded, toggleExpanded } = useToolExpansion(true);
+ const { api } = useAPI();
+ const loopError = usePopoverError();
+
+ const [data, setData] = useState(null);
+
+ const [isStartingLoop, setIsStartingLoop] = useState(false);
+ const isStartingLoopRef = useRef(false);
+
+ const startButtonRef = useRef(null);
+
+ // Fetch fresh harness config for the latest propose_harness.
+ useEffect(() => {
+ if (!isLatest || !workspaceId || !api || status !== "completed") {
+ return;
+ }
+
+ const fetchHarness = async () => {
+ try {
+ const res = await api.workspace.harness.get({ workspaceId });
+ if (!res.success) {
+ return;
+ }
+ setData(res.data);
+ } catch {
+ // Best-effort only.
+ }
+ };
+
+ void fetchHarness();
+
+ const handleFocus = () => void fetchHarness();
+ window.addEventListener("focus", handleFocus);
+ return () => window.removeEventListener("focus", handleFocus);
+ }, [api, isLatest, status, workspaceId]);
+
+ let harnessPath: string | undefined;
+ let errorMessage: string | undefined;
+
+ if (isProposeHarnessResult(result)) {
+ harnessPath = result.harnessPath;
+ }
+
+ if (isProposeHarnessError(result)) {
+ errorMessage = result.error;
+ }
+
+ const statusDisplay = getStatusDisplay(status);
+
+ const handleApproveAndStart = () => {
+ if (!workspaceId || !api) return;
+ if (isStartingLoopRef.current) return;
+
+ // Capture positioning from the ref for error popover placement
+ const anchorPosition = startButtonRef.current
+ ? (() => {
+ const { bottom, left } = startButtonRef.current.getBoundingClientRect();
+ return { top: bottom + 8, left };
+ })()
+ : { top: 100, left: 100 };
+
+ isStartingLoopRef.current = true;
+ setIsStartingLoop(true);
+
+ // Switch to exec so the loop runner uses Exec mode settings.
+ updatePersistedState(getAgentIdKey(workspaceId), "exec");
+
+ api.workspace.loop
+ .start({ workspaceId })
+ .then((res) => {
+ if (!res.success) {
+ loopError.showError("approve-harness", res.error, anchorPosition);
+ }
+ })
+ .catch((error: unknown) => {
+ const message = error instanceof Error ? error.message : String(error);
+ loopError.showError("approve-harness", message, anchorPosition);
+ })
+ .finally(() => {
+ isStartingLoopRef.current = false;
+ setIsStartingLoop(false);
+ });
+ };
+
+ const actionButtons: ButtonConfig[] = [];
+
+ if (workspaceId && status === "completed" && !errorMessage) {
+ actionButtons.push({
+ label: "Approve & Start Ralph loop",
+ component: (
+
+ ,
+ tooltip: "Switch to Exec and start the Ralph loop with this harness",
+ }}
+ />
+
+ ),
+ });
+ }
+
+ const showChecklist = data?.config.checklist && data.config.checklist.length > 0;
+ const showGates = data?.config.gates && data.config.gates.length > 0;
+
+ const body = (
+
+
+
+
+ Harness proposal
+
+
+
+ {errorMessage ? (
+
{errorMessage}
+ ) : status !== "completed" ? (
+
+ Validating harness…
+
+ ) : data ? (
+
+
+
Files
+
+
{data.paths.configPath}
+
{data.paths.progressPath}
+
+
+
+ {showChecklist && (
+
+
Checklist
+
+ {data.config.checklist.map((item) => (
+
+ {formatChecklistStatus(item.status)} {item.title}
+
+ ))}
+
+
+ )}
+
+ {showGates && (
+
+
Gates
+
+ {data.config.gates.map((gate, index) => (
+
- {gate.command}
+ ))}
+
+
+ )}
+
+ {!showChecklist && !showGates && (
+
+ Harness is empty. Edit the harness config and call propose_harness again.
+
+ )}
+
+ ) : (
+
+
Files
+
+
{harnessPath ?? "(unknown harness path)"}
+
+
+ )}
+
+ {actionButtons.length > 0 && (
+
+ {actionButtons.map((button, index) => (
+
+ ))}
+
+
+ Review, then approve to start the loop.
+
+
+ )}
+
+ );
+
+ return (
+ <>
+
+
+ ▶
+ propose_harness
+ {statusDisplay}
+
+
+ {expanded && {body} }
+
+
+ >
+ );
+};
diff --git a/src/browser/components/tools/ProposePlanToolCall.test.tsx b/src/browser/components/tools/ProposePlanToolCall.test.tsx
index 61e0c81831..fbe5b3d1d2 100644
--- a/src/browser/components/tools/ProposePlanToolCall.test.tsx
+++ b/src/browser/components/tools/ProposePlanToolCall.test.tsx
@@ -17,6 +17,8 @@ interface SendMessageArgs {
options: SendMessageOptions;
}
+type SendMessageResult = { success: true; data: undefined } | { success: false; error: string };
+
type GetPlanContentResult =
| { success: true; data: { content: string; path: string } }
| { success: false; error: string };
@@ -44,7 +46,8 @@ interface MockApi {
summaryMessage: unknown;
deletePlanFile?: boolean;
}) => Promise;
- sendMessage: (args: SendMessageArgs) => Promise<{ success: true; data: undefined }>;
+ sendMessage: (args: SendMessageArgs) => Promise;
+ loop: Record;
};
}
@@ -89,8 +92,11 @@ void mock.module("@/browser/hooks/useOpenInEditor", () => ({
}));
void mock.module("@/browser/contexts/WorkspaceContext", () => ({
+ useOptionalWorkspaceContext: () => ({
+ workspaceMetadata: new Map(),
+ }),
useWorkspaceContext: () => ({
- workspaceMetadata: new Map(),
+ workspaceMetadata: new Map(),
}),
}));
@@ -199,6 +205,7 @@ describe("ProposePlanToolCall", () => {
data: { content: "# My Plan\n\nDo the thing.", path: planPath },
}),
replaceChatHistory: (_args) => Promise.resolve({ success: true, data: undefined }),
+ loop: {},
sendMessage: (args: SendMessageArgs) => {
sendMessageCalls.push(args);
return Promise.resolve({ success: true, data: undefined });
@@ -285,6 +292,7 @@ describe("ProposePlanToolCall", () => {
sendMessageCalls.push(args);
return Promise.resolve({ success: true, data: undefined });
},
+ loop: {},
},
};
@@ -327,4 +335,88 @@ describe("ProposePlanToolCall", () => {
expect(summaryMessage.parts?.[0]?.text).toContain("*Plan file preserved at:*");
expect(summaryMessage.parts?.[0]?.text).toContain(planPath);
});
+
+ test("switches to harness-init and sends a harness proposal request when clicking Start Ralph loop", async () => {
+ const workspaceId = "ws-123";
+ const planPath = "~/.mux/plans/demo/ws-123.md";
+
+ // Start in plan mode.
+ window.localStorage.setItem(getAgentIdKey(workspaceId), JSON.stringify("plan"));
+
+ const sendMessageCalls: SendMessageArgs[] = [];
+
+ let resolveSendMessage!: (value: SendMessageResult) => void;
+ const sendMessagePromise = new Promise((resolve) => {
+ resolveSendMessage = resolve;
+ });
+
+ mockApi = {
+ config: {
+ getConfig: () =>
+ Promise.resolve({
+ taskSettings: { maxParallelAgentTasks: 3, maxTaskNestingDepth: 3 },
+ agentAiDefaults: {},
+ subagentAiDefaults: {},
+ }),
+ },
+ workspace: {
+ getPlanContent: () =>
+ Promise.resolve({
+ success: true,
+ data: { content: "# My Plan\n\nDo the thing.", path: planPath },
+ }),
+ replaceChatHistory: () => Promise.resolve({ success: true, data: undefined }),
+ sendMessage: (args: SendMessageArgs) => {
+ sendMessageCalls.push(args);
+ return sendMessagePromise;
+ },
+ loop: {},
+ },
+ };
+
+ const view = render(
+
+
+
+ );
+
+ fireEvent.click(view.getByRole("button", { name: "Start Ralph loop" }));
+
+ await waitFor(() => expect(sendMessageCalls.length).toBe(1));
+ expect(sendMessageCalls[0]?.message).toBe(
+ "Generate a Ralph harness from the current plan and propose it"
+ );
+
+ await waitFor(() => {
+ const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement;
+ expect(button.disabled).toBe(true);
+ });
+
+ resolveSendMessage({ success: true, data: undefined });
+
+ await waitFor(() => {
+ const button = view.getByRole("button", { name: "Start Ralph loop" }) as HTMLButtonElement;
+ expect(button.disabled).toBe(false);
+ });
+
+ const agentKey = getAgentIdKey(workspaceId);
+ const updatePersistedStateMaybeMock = updatePersistedState as unknown as {
+ mock?: { calls: unknown[][] };
+ };
+ if (updatePersistedStateMaybeMock.mock) {
+ expect(updatePersistedState).toHaveBeenCalledWith(agentKey, "harness-init");
+ } else {
+ expect(JSON.parse(window.localStorage.getItem(agentKey)!)).toBe("harness-init");
+ }
+ });
});
diff --git a/src/browser/components/tools/ProposePlanToolCall.tsx b/src/browser/components/tools/ProposePlanToolCall.tsx
index f6c5c91ba8..19217c5ef7 100644
--- a/src/browser/components/tools/ProposePlanToolCall.tsx
+++ b/src/browser/components/tools/ProposePlanToolCall.tsx
@@ -5,6 +5,7 @@ import type {
LegacyProposePlanToolArgs,
LegacyProposePlanToolResult,
} from "@/common/types/tools";
+import type { HarnessLoopState } from "@/common/types/harness";
import {
ToolContainer,
ToolHeader,
@@ -29,6 +30,7 @@ import { usePopoverError } from "@/browser/hooks/usePopoverError";
import { PopoverError } from "../PopoverError";
import { getAgentIdKey, getPlanContentKey } from "@/common/constants/storage";
import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState";
+import { formatSendMessageError } from "@/common/utils/errors/formatSendError";
import { buildSendMessageOptions } from "@/browser/hooks/useSendMessageOptions";
import {
Clipboard,
@@ -38,6 +40,7 @@ import {
ListStart,
Pencil,
Play,
+ RefreshCw,
X,
} from "lucide-react";
import { ShareMessagePopover } from "../ShareMessagePopover";
@@ -131,13 +134,18 @@ export const ProposePlanToolCall: React.FC = (props) =
} = props;
const { expanded, toggleExpanded } = useToolExpansion(true); // Expand by default
const [showRaw, setShowRaw] = useState(false);
+ const [isStartingLoop, setIsStartingLoop] = useState(false);
+ const isStartingLoopRef = useRef(false);
const [isImplementing, setIsImplementing] = useState(false);
const [implementReplacesChatHistory, setImplementReplacesChatHistory] = useState(false);
+ const [loopState, setLoopState] = useState(null);
const isImplementingRef = useRef(false);
const isMountedRef = useRef(true);
const { api } = useAPI();
const openInEditor = useOpenInEditor();
+ const loopError = usePopoverError();
const workspaceContext = useOptionalWorkspaceContext();
+ const startLoopButtonRef = useRef(null);
const editorError = usePopoverError();
const editButtonRef = useRef(null);
@@ -220,6 +228,31 @@ export const ProposePlanToolCall: React.FC = (props) =
// status in deps ensures refetch when tool completes (captures final file state)
}, [api, workspaceId, isLatest, isEphemeralPreview, cacheKey, status]);
+ // Keep loop state live for the latest plan.
+ useEffect(() => {
+ if (isEphemeralPreview || !isLatest || !workspaceId || !api) return;
+
+ const abortController = new AbortController();
+ const { signal } = abortController;
+
+ (async () => {
+ try {
+ const iterator = await api.workspace.loop.subscribe({ workspaceId }, { signal });
+
+ for await (const nextLoopState of iterator) {
+ if (signal.aborted) break;
+ setLoopState(nextLoopState);
+ }
+ } catch (err) {
+ if (!signal.aborted) {
+ console.error("Failed to subscribe to loop state:", err);
+ }
+ }
+ })();
+
+ return () => abortController.abort();
+ }, [api, workspaceId, isLatest, isEphemeralPreview]);
+
// Determine plan content and title based on result type
// For ephemeral previews, use direct content/path props
// For the latest plan, prefer fresh content from disk (external edit support)
@@ -369,6 +402,47 @@ export const ProposePlanToolCall: React.FC = (props) =
}
}
};
+
+ const handleStartRalphLoop = () => {
+ if (!workspaceId || !api) return;
+ if (isStartingLoopRef.current) return;
+
+ // Capture positioning from the ref for error popover placement
+ const anchorPosition = startLoopButtonRef.current
+ ? (() => {
+ const { bottom, left } = startLoopButtonRef.current.getBoundingClientRect();
+ return { top: bottom + 8, left };
+ })()
+ : { top: 100, left: 100 };
+
+ isStartingLoopRef.current = true;
+ setIsStartingLoop(true);
+
+ // Switch to harness-init before sending so send options (agentId/mode) match.
+ updatePersistedState(getAgentIdKey(workspaceId), "harness-init");
+
+ api.workspace
+ .sendMessage({
+ workspaceId,
+ message: "Generate a Ralph harness from the current plan and propose it",
+ options: buildSendMessageOptions(workspaceId),
+ })
+ .then((result) => {
+ if (!result.success) {
+ const formatted = formatSendMessageError(result.error);
+ loopError.showError("start-ralph-loop", formatted.message, anchorPosition);
+ }
+ })
+ .catch((error: unknown) => {
+ const message = error instanceof Error ? error.message : String(error);
+ loopError.showError("start-ralph-loop", message, anchorPosition);
+ })
+ .finally(() => {
+ isStartingLoopRef.current = false;
+ setIsStartingLoop(false);
+ });
+ };
+
// Copy to clipboard with feedback
const { copied, copyToClipboard } = useCopyToClipboard();
@@ -394,6 +468,25 @@ export const ProposePlanToolCall: React.FC = (props) =
}
};
+ const showPlanPlaceholder =
+ !errorMessage && !showRaw && planContent.trim().length === 0 && status !== "completed";
+ const planPlaceholderText =
+ status === "executing" ? "Generating plan preview…" : "Preparing plan…";
+
+ const showInlineLoopState =
+ !isEphemeralPreview &&
+ !!api &&
+ !!workspaceId &&
+ isLatest &&
+ status === "completed" &&
+ !errorMessage;
+
+ const isLoopStateRelevant = (state: HarnessLoopState) =>
+ state.status !== "stopped" ||
+ state.iteration > 0 ||
+ state.consecutiveFailures > 0 ||
+ state.lastError !== null ||
+ state.stoppedReason !== null;
const statusDisplay = getStatusDisplay(status);
// Build action buttons array (similar to AssistantMessage)
@@ -448,6 +541,23 @@ export const ProposePlanToolCall: React.FC = (props) =
? "Replace chat history with this plan, switch to Exec, and start implementing"
: "Switch to Exec and start implementing",
});
+
+ actionButtons.push({
+ label: "Start Ralph loop",
+ component: (
+
+ ,
+ tooltip: "Switch to Harness Init and propose a harness for approval",
+ }}
+ />
+
+ ),
+ });
}
}
@@ -502,13 +612,40 @@ export const ProposePlanToolCall: React.FC = (props) =
+ ) : showPlanPlaceholder ? (
+
+ {planPlaceholderText}
+
) : (
)}
- {/* Completion guidance: only for completed tool calls without errors, not ephemeral previews */}
+ {/* Loop status + completion guidance */}
+
+ {showInlineLoopState && loopState && isLoopStateRelevant(loopState) && (
+
+
Loop status
+
{`${loopState.status} • iteration ${loopState.iteration}`}
+ {loopState.currentItemTitle && (
+
+ Current: {loopState.currentItemTitle}
+
+ )}
+ {loopState.consecutiveFailures > 0 && (
+
+ Consecutive failures: {loopState.consecutiveFailures}
+
+ )}
+ {loopState.stoppedReason && (
+
Stopped: {loopState.stoppedReason}
+ )}
+ {loopState.lastError && (
+
{loopState.lastError}
+ )}
+
+ )}
{!isEphemeralPreview && status === "completed" && !errorMessage && (
Respond with revisions or switch to the Exec agent (
@@ -517,6 +654,12 @@ export const ProposePlanToolCall: React.FC
= (props) =
)}
+ {isStartingLoop && (
+
+ Starting Ralph loop… (generating harness if needed)
+
+ )}
+
{/* Actions row at the bottom (matching MessageWindow style) */}
{actionButtons.map((button, index) => (
@@ -538,6 +681,7 @@ export const ProposePlanToolCall: React.FC
= (props) =
<>
{planUI}
+
>
);
}
@@ -557,6 +701,7 @@ export const ProposePlanToolCall: React.FC = (props) =
{modal}
+
>
);
};
diff --git a/src/browser/components/tools/shared/getToolComponent.ts b/src/browser/components/tools/shared/getToolComponent.ts
index feb20a5fc8..9a96b46e40 100644
--- a/src/browser/components/tools/shared/getToolComponent.ts
+++ b/src/browser/components/tools/shared/getToolComponent.ts
@@ -17,6 +17,7 @@ import { FileReadToolCall } from "../FileReadToolCall";
import { WebFetchToolCall } from "../WebFetchToolCall";
import { WebSearchToolCall } from "../WebSearchToolCall";
import { AskUserQuestionToolCall } from "../AskUserQuestionToolCall";
+import { ProposeHarnessToolCall } from "../ProposeHarnessToolCall";
import { ProposePlanToolCall } from "../ProposePlanToolCall";
import { TodoToolCall } from "../TodoToolCall";
import { StatusSetToolCall } from "../StatusSetToolCall";
@@ -51,7 +52,7 @@ interface ToolRegistryEntry {
* Registry mapping tool names to their components and validation schemas.
* Adding a new tool: add one line here.
*
- * Note: Some tools (ask_user_question, propose_plan, todo_write, status_set) require
+ * Note: Some tools (ask_user_question, propose_plan, propose_harness, todo_write, status_set) require
* props like workspaceId/toolCallId that aren't available in nested context. This is
* fine because the backend excludes these from code_execution sandbox (see EXCLUDED_TOOLS
* in src/node/services/ptc/toolBridge.ts). They can never appear in nested tool calls.
@@ -87,6 +88,10 @@ const TOOL_REGISTRY: Record = {
component: ProposePlanToolCall,
schema: TOOL_DEFINITIONS.propose_plan.schema,
},
+ propose_harness: {
+ component: ProposeHarnessToolCall,
+ schema: TOOL_DEFINITIONS.propose_harness.schema,
+ },
todo_write: { component: TodoToolCall, schema: TOOL_DEFINITIONS.todo_write.schema },
status_set: { component: StatusSetToolCall, schema: TOOL_DEFINITIONS.status_set.schema },
notify: { component: NotifyToolCall, schema: TOOL_DEFINITIONS.notify.schema },
diff --git a/src/browser/stories/mocks/orpc.ts b/src/browser/stories/mocks/orpc.ts
index ec372eba6f..ca6ec3d3df 100644
--- a/src/browser/stories/mocks/orpc.ts
+++ b/src/browser/stories/mocks/orpc.ts
@@ -690,6 +690,34 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl
yield* [];
await new Promise(() => undefined);
},
+ loop: {
+ subscribe: async function* (
+ _input: { workspaceId: string },
+ options?: { signal?: AbortSignal }
+ ) {
+ // Yield initial state, then keep the subscription open (like a real eventIterator).
+ yield {
+ status: "stopped" as const,
+ startedAt: null,
+ iteration: 0,
+ consecutiveFailures: 0,
+ currentItemId: null,
+ currentItemTitle: null,
+ lastGateRun: null,
+ lastCheckpoint: null,
+ lastError: null,
+ stoppedReason: null,
+ };
+
+ await new Promise((resolve) => {
+ if (options?.signal?.aborted) {
+ resolve();
+ return;
+ }
+ options?.signal?.addEventListener("abort", () => resolve(), { once: true });
+ });
+ },
+ },
activity: {
list: () => Promise.resolve({}),
subscribe: async function* () {
diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css
index 2234871cb6..ac069017e6 100644
--- a/src/browser/styles/globals.css
+++ b/src/browser/styles/globals.css
@@ -64,6 +64,11 @@
--color-exec-mode-hover: hsl(268.56 94.04% 67%);
--color-exec-mode-light: hsl(268.56 94.04% 78%);
+ --color-harness-init-mode: hsl(175 60% 42%);
+ --color-harness-init-mode-hover: hsl(175 60% 52%);
+ --color-harness-init-mode-light: hsl(175 60% 62%);
+ --color-harness-init-mode-alpha: hsla(175 60% 42% / 0.1);
+
/* Edit mode: amber/gold for editing warnings and barriers */
--color-edit-mode: hsl(38 80% 45%);
--color-edit-mode-hover: hsl(38 80% 55%);
@@ -356,6 +361,11 @@
--color-exec-mode-hover: hsl(268.56 94.04% 67%);
--color-exec-mode-light: hsl(268.56 94.04% 78%);
+ --color-harness-init-mode: hsl(175 60% 38%);
+ --color-harness-init-mode-hover: hsl(175 60% 46%);
+ --color-harness-init-mode-light: hsl(175 60% 58%);
+ --color-harness-init-mode-alpha: hsla(175 60% 38% / 0.08);
+
--color-pending: hsl(30 100% 64%);
--color-debug-mode: hsl(214 100% 56%);
@@ -589,6 +599,11 @@
--color-exec-mode-hover: color-mix(in srgb, var(--color-exec-mode), white 18%);
--color-exec-mode-light: color-mix(in srgb, var(--color-exec-mode), white 42%);
+ --color-harness-init-mode: #24837b; /* Flexoki cyan-600 */
+ --color-harness-init-mode-hover: color-mix(in srgb, var(--color-harness-init-mode), white 18%);
+ --color-harness-init-mode-light: color-mix(in srgb, var(--color-harness-init-mode), white 42%);
+ --color-harness-init-mode-alpha: hsl(from var(--color-harness-init-mode) h s l / 0.08);
+
--color-edit-mode: #ad8301; /* Flexoki yellow-600 */
--color-edit-mode-hover: color-mix(in srgb, var(--color-edit-mode), white 18%);
--color-edit-mode-light: color-mix(in srgb, var(--color-edit-mode), white 42%);
@@ -795,6 +810,11 @@
--color-exec-mode-hover: color-mix(in srgb, var(--color-exec-mode), white 10%);
--color-exec-mode-light: color-mix(in srgb, var(--color-exec-mode), white 22%);
+ --color-harness-init-mode: #3aa99f; /* Flexoki cyan-400 */
+ --color-harness-init-mode-hover: color-mix(in srgb, var(--color-harness-init-mode), white 10%);
+ --color-harness-init-mode-light: color-mix(in srgb, var(--color-harness-init-mode), white 22%);
+ --color-harness-init-mode-alpha: hsl(from var(--color-harness-init-mode) h s l / 0.12);
+
--color-edit-mode: #d0a215; /* Flexoki yellow-400 */
--color-edit-mode-hover: color-mix(in srgb, var(--color-edit-mode), white 10%);
--color-edit-mode-light: color-mix(in srgb, var(--color-edit-mode), white 22%);
diff --git a/src/browser/types/rightSidebar.ts b/src/browser/types/rightSidebar.ts
index b12d6796de..e00036d83d 100644
--- a/src/browser/types/rightSidebar.ts
+++ b/src/browser/types/rightSidebar.ts
@@ -1,4 +1,11 @@
-export const RIGHT_SIDEBAR_TABS = ["costs", "review", "terminal", "explorer", "stats"] as const;
+export const RIGHT_SIDEBAR_TABS = [
+ "costs",
+ "review",
+ "terminal",
+ "explorer",
+ "harness",
+ "stats",
+] as const;
/** Base tab types that are always valid */
export type BaseTabType = (typeof RIGHT_SIDEBAR_TABS)[number];
diff --git a/src/browser/utils/commandIds.ts b/src/browser/utils/commandIds.ts
index 9b86a083a8..e5dc25562a 100644
--- a/src/browser/utils/commandIds.ts
+++ b/src/browser/utils/commandIds.ts
@@ -44,6 +44,14 @@ export const CommandIds = {
chatInterrupt: () => "chat:interrupt" as const,
chatJumpBottom: () => "chat:jumpBottom" as const,
chatVoiceInput: () => "chat:voiceInput" as const,
+
+ // Harness commands
+ harnessRunGates: () => "harness:runGates" as const,
+ harnessCheckpoint: () => "harness:checkpoint" as const,
+ harnessResetContext: () => "harness:resetContext" as const,
+ harnessLoopStart: () => "harness:loop:start" as const,
+ harnessLoopPause: () => "harness:loop:pause" as const,
+ harnessLoopStop: () => "harness:loop:stop" as const,
chatClearTimingStats: () => "chat:clearTimingStats" as const,
// Mode commands
diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts
index e4380ec324..1c71b84240 100644
--- a/src/browser/utils/commands/sources.ts
+++ b/src/browser/utils/commands/sources.ts
@@ -90,6 +90,7 @@ export const COMMAND_SECTIONS = {
WORKSPACES: "Workspaces",
LAYOUTS: "Layouts",
NAVIGATION: "Navigation",
+ HARNESS: "Harness",
CHAT: "Chat",
MODE: "Modes & Model",
HELP: "Help",
@@ -102,6 +103,7 @@ const section = {
layouts: COMMAND_SECTIONS.LAYOUTS,
workspaces: COMMAND_SECTIONS.WORKSPACES,
navigation: COMMAND_SECTIONS.NAVIGATION,
+ harness: COMMAND_SECTIONS.HARNESS,
chat: COMMAND_SECTIONS.CHAT,
appearance: COMMAND_SECTIONS.APPEARANCE,
mode: COMMAND_SECTIONS.MODE,
@@ -641,6 +643,75 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi
window.dispatchEvent(createCustomEvent(CUSTOM_EVENTS.TOGGLE_VOICE_INPUT));
},
});
+ list.push({
+ id: CommandIds.harnessRunGates(),
+ title: "Run harness gates",
+ subtitle: "Run the workspace harness gate commands",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.harness?.runGates({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
+ list.push({
+ id: CommandIds.harnessCheckpoint(),
+ title: "Harness checkpoint",
+ subtitle: "Commit changes if gates are passing",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.harness?.checkpoint({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
+ list.push({
+ id: CommandIds.harnessLoopStart(),
+ title: "Harness loop: Start",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.loop?.start({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
+ list.push({
+ id: CommandIds.harnessLoopPause(),
+ title: "Harness loop: Pause",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.loop?.pause({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
+ list.push({
+ id: CommandIds.harnessLoopStop(),
+ title: "Harness loop: Stop",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.loop?.stop({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
+ list.push({
+ id: CommandIds.harnessResetContext(),
+ title: "Harness reset context",
+ subtitle: "Replace chat history with a harness bearings summary",
+ section: section.harness,
+ run: async () => {
+ const result = await p.api?.workspace.harness?.resetContext({ workspaceId: id });
+ if (result && !result.success) {
+ console.error(result.error);
+ }
+ },
+ });
list.push({
id: CommandIds.chatClearTimingStats(),
title: "Clear Timing Stats",
diff --git a/src/browser/utils/rightSidebarLayout.ts b/src/browser/utils/rightSidebarLayout.ts
index 00e5b303a1..bdccb953fb 100644
--- a/src/browser/utils/rightSidebarLayout.ts
+++ b/src/browser/utils/rightSidebarLayout.ts
@@ -109,6 +109,7 @@ export function parseRightSidebarLayoutState(
if (!layoutContainsTab(raw.root, "explorer")) {
injectTabIntoLayout(raw.root, "explorer");
}
+
return raw;
}
diff --git a/src/cli/cli.test.ts b/src/cli/cli.test.ts
index 102814c0ff..15d6cc2c96 100644
--- a/src/cli/cli.test.ts
+++ b/src/cli/cli.test.ts
@@ -82,6 +82,10 @@ async function createTestServer(authToken?: string): Promise {
sessionUsageService: services.sessionUsageService,
signingService: services.signingService,
coderService: services.coderService,
+ workspaceHarnessService: services.workspaceHarnessService,
+ gateRunnerService: services.gateRunnerService,
+ gitCheckpointService: services.gitCheckpointService,
+ loopRunnerService: services.loopRunnerService,
};
// Use the actual createOrpcServer function
diff --git a/src/cli/server.test.ts b/src/cli/server.test.ts
index 70c670b2cd..9d62d28ef4 100644
--- a/src/cli/server.test.ts
+++ b/src/cli/server.test.ts
@@ -85,6 +85,10 @@ async function createTestServer(): Promise {
sessionUsageService: services.sessionUsageService,
signingService: services.signingService,
coderService: services.coderService,
+ workspaceHarnessService: services.workspaceHarnessService,
+ gateRunnerService: services.gateRunnerService,
+ gitCheckpointService: services.gitCheckpointService,
+ loopRunnerService: services.loopRunnerService,
};
// Use the actual createOrpcServer function
diff --git a/src/cli/server.ts b/src/cli/server.ts
index 0d47c99fe4..28e24387cf 100644
--- a/src/cli/server.ts
+++ b/src/cli/server.ts
@@ -118,6 +118,10 @@ const mockWindow: BrowserWindow = {
sessionUsageService: serviceContainer.sessionUsageService,
signingService: serviceContainer.signingService,
coderService: serviceContainer.coderService,
+ workspaceHarnessService: serviceContainer.workspaceHarnessService,
+ gateRunnerService: serviceContainer.gateRunnerService,
+ gitCheckpointService: serviceContainer.gitCheckpointService,
+ loopRunnerService: serviceContainer.loopRunnerService,
};
const mdnsAdvertiser = new MdnsAdvertiserService();
diff --git a/src/common/constants/env.ts b/src/common/constants/env.ts
index d443c60fcb..ea26645401 100644
--- a/src/common/constants/env.ts
+++ b/src/common/constants/env.ts
@@ -11,4 +11,9 @@ export const NON_INTERACTIVE_ENV_VARS = {
VISUAL: "true", // Another common editor environment variable
// Prevent git from prompting for credentials
GIT_TERMINAL_PROMPT: "0", // Disables git credential prompts
+
+ // Some Git installs (notably on Windows) can still try to prompt via askpass helpers.
+ // Force a non-interactive askpass implementation so commands fail quickly.
+ GIT_ASKPASS: "echo",
+ SSH_ASKPASS: "echo",
} as const;
diff --git a/src/common/orpc/schemas.ts b/src/common/orpc/schemas.ts
index ca2e050761..089fb438bf 100644
--- a/src/common/orpc/schemas.ts
+++ b/src/common/orpc/schemas.ts
@@ -26,6 +26,22 @@ export {
} from "./schemas/workspace";
// Workspace stats schemas
+// Harness schemas
+export {
+ HarnessChecklistItemSchema,
+ HarnessChecklistStatusSchema,
+ HarnessContextResetStrategySchema,
+ HarnessGateCommandResultSchema,
+ HarnessGateRunResultSchema,
+ HarnessGateSchema,
+ HarnessLoopSettingsSchema,
+ HarnessLoopStateSchema,
+ HarnessLoopStatusSchema,
+ GitCheckpointResultSchema,
+ WorkspaceHarnessConfigSchema,
+ WorkspaceHarnessFilePathsSchema,
+} from "./schemas/harness";
+
export {
ActiveStreamStatsSchema,
CompletedStreamStatsSchema,
diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts
index a549ef668e..0fc4df07a9 100644
--- a/src/common/orpc/schemas/api.ts
+++ b/src/common/orpc/schemas/api.ts
@@ -31,6 +31,13 @@ import {
AgentDefinitionPackageSchema,
AgentIdSchema,
} from "./agentDefinition";
+import {
+ HarnessGateRunResultSchema,
+ HarnessLoopStateSchema,
+ GitCheckpointResultSchema,
+ WorkspaceHarnessConfigSchema,
+ WorkspaceHarnessFilePathsSchema,
+} from "./harness";
import {
MCPAddParamsSchema,
MCPRemoveParamsSchema,
@@ -693,6 +700,103 @@ export const workspace = {
output: ResultSchema(z.void(), z.string()),
},
},
+
+ /** Workspace-local harness config + gates */
+ harness: {
+ exists: {
+ input: z.object({ workspaceId: z.string() }),
+ output: ResultSchema(
+ z
+ .object({
+ exists: z.boolean(),
+ paths: WorkspaceHarnessFilePathsSchema,
+ })
+ .strict(),
+ z.string()
+ ),
+ },
+ get: {
+ input: z.object({ workspaceId: z.string() }),
+ output: ResultSchema(
+ z
+ .object({
+ config: WorkspaceHarnessConfigSchema,
+ paths: WorkspaceHarnessFilePathsSchema,
+ exists: z.boolean(),
+ lastGateRun: HarnessGateRunResultSchema.nullable(),
+ lastCheckpoint: GitCheckpointResultSchema.nullable(),
+ loopState: HarnessLoopStateSchema,
+ })
+ .strict(),
+ z.string()
+ ),
+ },
+ set: {
+ input: z
+ .object({
+ workspaceId: z.string(),
+ config: WorkspaceHarnessConfigSchema,
+ })
+ .strict(),
+ output: ResultSchema(WorkspaceHarnessConfigSchema, z.string()),
+ },
+ runGates: {
+ input: z.object({ workspaceId: z.string() }),
+ output: ResultSchema(HarnessGateRunResultSchema, z.string()),
+ },
+ checkpoint: {
+ input: z
+ .object({
+ workspaceId: z.string(),
+ messageTemplate: z.string().optional(),
+ })
+ .strict(),
+ output: ResultSchema(GitCheckpointResultSchema, z.string()),
+ },
+ /** Replace chat history with a short loop-style bearings message. */
+ resetContext: {
+ input: z
+ .object({
+ workspaceId: z.string(),
+ note: z.string().optional(),
+ })
+ .strict(),
+ output: ResultSchema(z.void(), z.string()),
+ },
+ },
+ /** Ralph loop runner */
+ loop: {
+ getState: {
+ input: z.object({ workspaceId: z.string() }),
+ output: HarnessLoopStateSchema,
+ },
+ start: {
+ input: z.object({ workspaceId: z.string() }),
+ output: ResultSchema(z.void(), z.string()),
+ },
+ pause: {
+ input: z
+ .object({
+ workspaceId: z.string(),
+ reason: z.string().optional(),
+ })
+ .strict(),
+ output: ResultSchema(z.void(), z.string()),
+ },
+ stop: {
+ input: z
+ .object({
+ workspaceId: z.string(),
+ reason: z.string().optional(),
+ })
+ .strict(),
+ output: ResultSchema(z.void(), z.string()),
+ },
+ subscribe: {
+ input: z.object({ workspaceId: z.string() }),
+ output: eventIterator(HarnessLoopStateSchema),
+ },
+ },
};
export type WorkspaceSendMessageOutput = z.infer;
diff --git a/src/common/orpc/schemas/harness.ts b/src/common/orpc/schemas/harness.ts
new file mode 100644
index 0000000000..ce0fe55543
--- /dev/null
+++ b/src/common/orpc/schemas/harness.ts
@@ -0,0 +1,110 @@
+import { z } from "zod";
+import { ToolPolicySchema } from "./stream";
+
+export const HarnessChecklistStatusSchema = z.enum(["todo", "doing", "done", "blocked"]);
+
+export const HarnessChecklistItemSchema = z
+ .object({
+ id: z.string().min(1),
+ title: z.string().min(1),
+ status: HarnessChecklistStatusSchema,
+ notes: z.string().optional(),
+ })
+ .strict();
+
+export const HarnessGateSchema = z
+ .object({
+ id: z.string().min(1).optional(),
+ title: z.string().min(1).optional(),
+ command: z.string().min(1),
+ timeoutSecs: z.number().int().positive().optional(),
+ })
+ .strict();
+
+export const HarnessContextResetStrategySchema = z.enum(["replace_history", "none"]);
+
+export const HarnessLoopSettingsSchema = z
+ .object({
+ /** Hard cap on iterations for a single run. */
+ maxIterations: z.number().int().positive().optional(),
+ /** Hard cap on wall-clock time for a single run. */
+ maxWallTimeMins: z.number().int().positive().optional(),
+ /** Pause when gates fail this many times in a row. */
+ maxConsecutiveFailures: z.number().int().positive().optional(),
+ /** How to reset context between iterations. */
+ contextReset: HarnessContextResetStrategySchema.optional(),
+ /** When true, auto-commit after gates pass. */
+ autoCommit: z.boolean().optional(),
+ /** Commit message template (supports simple placeholders like {{item}}). */
+ commitMessageTemplate: z.string().optional(),
+ /** Optional tool policy overrides for loop iterations. */
+ toolPolicy: ToolPolicySchema.optional(),
+ })
+ .strict();
+
+export const WorkspaceHarnessConfigSchema = z
+ .object({
+ version: z.literal(1),
+ checklist: z.array(HarnessChecklistItemSchema),
+ gates: z.array(HarnessGateSchema),
+ loop: HarnessLoopSettingsSchema.optional(),
+ })
+ .strict();
+
+export const WorkspaceHarnessFilePathsSchema = z
+ .object({
+ configPath: z.string(),
+ progressPath: z.string(),
+ })
+ .strict();
+
+export const HarnessGateCommandResultSchema = z
+ .object({
+ command: z.string(),
+ exitCode: z.number(),
+ durationMs: z.number(),
+ stdout: z.string(),
+ stderr: z.string(),
+ truncatedStdout: z.boolean().optional(),
+ truncatedStderr: z.boolean().optional(),
+ })
+ .strict();
+
+export const HarnessGateRunResultSchema = z
+ .object({
+ ok: z.boolean(),
+ startedAt: z.number(),
+ finishedAt: z.number(),
+ totalDurationMs: z.number(),
+ results: z.array(HarnessGateCommandResultSchema),
+ })
+ .strict();
+
+export const GitCheckpointResultSchema = z
+ .object({
+ committed: z.boolean(),
+ dirtyBefore: z.boolean(),
+ dirtyAfter: z.boolean(),
+ commitSha: z.string().nullable(),
+ commitMessage: z.string().nullable(),
+ stdout: z.string().optional(),
+ stderr: z.string().optional(),
+ })
+ .strict();
+
+export const HarnessLoopStatusSchema = z.enum(["stopped", "running", "paused"]);
+
+export const HarnessLoopStateSchema = z
+ .object({
+ status: HarnessLoopStatusSchema,
+ startedAt: z.number().nullable(),
+ iteration: z.number(),
+ consecutiveFailures: z.number(),
+ currentItemId: z.string().nullable(),
+ currentItemTitle: z.string().nullable(),
+ lastGateRun: HarnessGateRunResultSchema.nullable(),
+ lastCheckpoint: GitCheckpointResultSchema.nullable(),
+ lastError: z.string().nullable(),
+ stoppedReason: z.string().nullable(),
+ })
+ .strict();
diff --git a/src/common/types/harness.ts b/src/common/types/harness.ts
new file mode 100644
index 0000000000..ff0d770c7a
--- /dev/null
+++ b/src/common/types/harness.ts
@@ -0,0 +1,26 @@
+import type { z } from "zod";
+import type {
+ HarnessChecklistItemSchema,
+ HarnessChecklistStatusSchema,
+ HarnessContextResetStrategySchema,
+ HarnessGateRunResultSchema,
+ HarnessGateSchema,
+ HarnessLoopSettingsSchema,
+ HarnessLoopStateSchema,
+ HarnessLoopStatusSchema,
+ GitCheckpointResultSchema,
+ WorkspaceHarnessConfigSchema,
+ WorkspaceHarnessFilePathsSchema,
+} from "@/common/orpc/schemas";
+
+export type HarnessChecklistStatus = z.infer;
+export type HarnessChecklistItem = z.infer;
+export type HarnessGate = z.infer;
+export type HarnessContextResetStrategy = z.infer;
+export type HarnessLoopSettings = z.infer;
+export type WorkspaceHarnessConfig = z.infer;
+export type WorkspaceHarnessFilePaths = z.infer;
+export type HarnessGateRunResult = z.infer;
+export type GitCheckpointResult = z.infer;
+export type HarnessLoopStatus = z.infer;
+export type HarnessLoopState = z.infer;
diff --git a/src/common/types/message.ts b/src/common/types/message.ts
index 0bb339c759..8a1d1a9487 100644
--- a/src/common/types/message.ts
+++ b/src/common/types/message.ts
@@ -266,6 +266,16 @@ export type MuxFrontendMetadata = MuxFrontendMetadataBase &
type: "plan-display"; // Ephemeral plan display from /plan command
path: string;
}
+ | {
+ type: "harness-bearings";
+ }
+ | {
+ type: "harness-loop";
+ iteration?: number;
+ }
+ | {
+ type: "harness-loop-bearings";
+ }
| {
type: "normal"; // Regular messages
}
diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts
index 2021cda813..c112c39b7a 100644
--- a/src/common/types/tools.ts
+++ b/src/common/types/tools.ts
@@ -278,6 +278,21 @@ export interface LegacyProposePlanToolResult {
message: string;
}
+// Propose Harness Tool Types
+// Args derived from schema
+export type ProposeHarnessToolArgs = z.infer;
+
+export interface ProposeHarnessToolResult {
+ success: true;
+ harnessPath: string;
+ message: string;
+}
+
+export interface ProposeHarnessToolError {
+ success: false;
+ error: string;
+}
+
// Todo Tool Types
export interface TodoItem {
content: string;
diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts
index 8ac1144635..e69790e282 100644
--- a/src/common/utils/tools/toolDefinitions.ts
+++ b/src/common/utils/tools/toolDefinitions.ts
@@ -659,6 +659,14 @@ export const TOOL_DEFINITIONS = {
"After calling this tool, do not paste the plan contents or mention the plan file path; the UI already shows the full plan.",
schema: z.object({}),
},
+ propose_harness: {
+ description:
+ "Signal that your harness is complete and ready for user approval. " +
+ "This tool validates the harness config file you wrote under .mux/harness. " +
+ "You must write your harness file before calling this tool. " +
+ "After calling this tool, do not paste the full harness contents; the UI already shows it.",
+ schema: z.object({}),
+ },
task: {
description:
"Spawn a sub-agent task (child workspace). " +
@@ -1255,6 +1263,7 @@ export function getAvailableTools(
"file_edit_insert",
"ask_user_question",
"propose_plan",
+ "propose_harness",
"bash",
"task",
"task_await",
diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts
index 64c329e4ee..e04c3209d7 100644
--- a/src/common/utils/tools/tools.ts
+++ b/src/common/utils/tools/tools.ts
@@ -10,6 +10,7 @@ import { createFileEditReplaceStringTool } from "@/node/services/tools/file_edit
// DISABLED: import { createFileEditReplaceLinesTool } from "@/node/services/tools/file_edit_replace_lines";
import { createFileEditInsertTool } from "@/node/services/tools/file_edit_insert";
import { createAskUserQuestionTool } from "@/node/services/tools/ask_user_question";
+import { createProposeHarnessTool } from "@/node/services/tools/propose_harness";
import { createProposePlanTool } from "@/node/services/tools/propose_plan";
import { createTodoWriteTool, createTodoReadTool } from "@/node/services/tools/todo";
import { createStatusSetTool } from "@/node/services/tools/status_set";
@@ -38,6 +39,7 @@ import type { InitStateManager } from "@/node/services/initStateManager";
import type { BackgroundProcessManager } from "@/node/services/backgroundProcessManager";
import type { TaskService } from "@/node/services/taskService";
import type { WorkspaceChatMessage } from "@/common/orpc/types";
+import type { UIMode } from "@/common/types/mode";
import type { FileState } from "@/node/services/agentSession";
import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition";
import type { AgentSkillDescriptor } from "@/common/types/agentSkill";
@@ -60,9 +62,18 @@ export interface ToolConfiguration {
overflow_policy?: "truncate" | "tmpfile";
/** Background process manager for bash tool (optional, AI-only) */
backgroundProcessManager?: BackgroundProcessManager;
- /** When true, restrict edits to the plan file (plan agent behavior). */
- planFileOnly?: boolean;
- /** Plan file path - only this file can be edited when planFileOnly is true. */
+ /** Current UI mode (plan or exec) - used for plan file path enforcement */
+ mode?: UIMode;
+ /** Active agent id (resolved). Used for tool-level restrictions. */
+ agentId?: string;
+ /**
+ * Optional allowlist of file path globs that may be edited via file_edit_* tools.
+ *
+ * When set, file edit tools will reject edits to paths that don't match.
+ * Relative patterns are resolved against cwd.
+ */
+ allowedEditPaths?: string[];
+ /** Plan file path - only this file can be edited in plan mode */
planFilePath?: string;
/**
* Optional callback for emitting UI-only workspace chat events.
@@ -286,6 +297,8 @@ export async function getToolsForModel(
// and line number miscalculations. Use file_edit_replace_string instead.
// file_edit_replace_lines: wrap(createFileEditReplaceLinesTool(config)),
+ propose_harness: wrap(createProposeHarnessTool(config)),
+
// Sub-agent task orchestration (child workspaces)
task: wrap(createTaskTool(config)),
task_await: wrap(createTaskAwaitTool(config)),
diff --git a/src/desktop/main.ts b/src/desktop/main.ts
index c06b3c9abf..438db41b6f 100644
--- a/src/desktop/main.ts
+++ b/src/desktop/main.ts
@@ -378,6 +378,10 @@ async function loadServices(): Promise {
sessionUsageService: services.sessionUsageService,
signingService: services.signingService,
coderService: services.coderService,
+ workspaceHarnessService: services.workspaceHarnessService,
+ gateRunnerService: services.gateRunnerService,
+ gitCheckpointService: services.gitCheckpointService,
+ loopRunnerService: services.loopRunnerService,
};
electronIpcMain.handle("mux:get-is-rosetta", async () => {
diff --git a/src/node/builtinAgents/harness-init.md b/src/node/builtinAgents/harness-init.md
new file mode 100644
index 0000000000..738bd13ace
--- /dev/null
+++ b/src/node/builtinAgents/harness-init.md
@@ -0,0 +1,65 @@
+---
+name: Harness Init
+description: Interactive harness generation + approval (internal)
+base: exec
+ui:
+ hidden: true
+ color: var(--color-harness-init-mode)
+subagent:
+ runnable: false
+tools:
+ remove:
+ - web_search
+ - web_fetch
+ - google_search
+---
+
+You are in Harness Init mode.
+
+Your job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.
+
+=== CRITICAL: LIMITED EDIT MODE ===
+Harness schema + output path:
+
+- The `.mux/harness/*.jsonc` schema is provided in the system prompt as ``.
+- The required harness output file path is provided as `` (derived from `MUX_WORKSPACE_NAME`).
+- Follow the schema exactly (extra/unknown keys will fail validation).
+
+- Write the final harness config to the exact `` file.
+ - Do NOT invent filenames.
+ - Create/edit ONLY that one harness file (no extra drafts).
+
+- Web tools are disabled in this mode; do not attempt to look up harness docs online.
+
+- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`
+- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too.
+
+- Do NOT modify source code or other repo files.
+- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).
+ - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.
+
+=== REQUIRED WORKFLOW ===
+
+1. Start by spawning 1-4 read-only `explore` subagents via `task` with `agentId: "explore"`.
+ - Keep each prompt focused (e.g. CI/workflows, Make targets, tests, etc.).
+ - Tell them to avoid web_search/web_fetch/google_search.
+ - Wait for all reports before writing the harness file.
+
+ Suggested prompt template:
+ - Summarize repo-native gate entrypoints (Makefile, package.json scripts, .github/workflows/\*).
+ - Recommend:
+ - Checklist items (short titles + optional notes)
+ - Gate commands (exact command strings + optional title/timeout)
+ - (Optional) include a fenced ```json draft with { "checklist": [...], "gates": [...] }
+
+2. Synthesize the explore reports into a single harness config (matching ``) and write it to ``.
+
+Gates:
+
+- Prefer a small set of safe, single commands.
+- Do NOT use shell chaining, pipes, redirects, or quotes.
+
+When the harness file is ready for user review:
+
+- Call `propose_harness` exactly once.
+- Do NOT start the Ralph loop yourself; the UI will start it after user approval.
diff --git a/src/node/orpc/context.ts b/src/node/orpc/context.ts
index 3cd5493476..1a2798240b 100644
--- a/src/node/orpc/context.ts
+++ b/src/node/orpc/context.ts
@@ -2,6 +2,10 @@ import type { IncomingHttpHeaders } from "http";
import type { Config } from "@/node/config";
import type { AIService } from "@/node/services/aiService";
import type { ProjectService } from "@/node/services/projectService";
+import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService";
+import type { GateRunnerService } from "@/node/services/gateRunnerService";
+import type { GitCheckpointService } from "@/node/services/gitCheckpointService";
+import type { LoopRunnerService } from "@/node/services/loopRunnerService";
import type { WorkspaceService } from "@/node/services/workspaceService";
import type { MuxGatewayOauthService } from "@/node/services/muxGatewayOauthService";
import type { ProviderService } from "@/node/services/providerService";
@@ -29,6 +33,10 @@ export interface ORPCContext {
config: Config;
aiService: AIService;
projectService: ProjectService;
+ workspaceHarnessService: WorkspaceHarnessService;
+ gateRunnerService: GateRunnerService;
+ gitCheckpointService: GitCheckpointService;
+ loopRunnerService: LoopRunnerService;
workspaceService: WorkspaceService;
taskService: TaskService;
providerService: ProviderService;
diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts
index db6f11c839..ec41fadbbf 100644
--- a/src/node/orpc/router.ts
+++ b/src/node/orpc/router.ts
@@ -17,7 +17,9 @@ import { createAsyncMessageQueue } from "@/common/utils/asyncMessageQueue";
import { createRuntime, checkRuntimeAvailability } from "@/node/runtime/runtimeFactory";
import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers";
+import { getPlanFilePath } from "@/common/utils/planStorage";
import { readPlanFile } from "@/node/utils/runtime/helpers";
+import { createMuxMessage } from "@/common/types/message";
import { secretsToRecord } from "@/common/types/secrets";
import { roundToBase2 } from "@/common/telemetry/utils";
import { createAsyncEventQueue } from "@/common/utils/asyncEventIterator";
@@ -1693,6 +1695,287 @@ export const router = (authToken?: string) => {
}
}),
},
+ harness: {
+ exists: t
+ .input(schemas.workspace.harness.exists.input)
+ .output(schemas.workspace.harness.exists.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const presence = await context.workspaceHarnessService.getHarnessPresenceForWorkspace(
+ input.workspaceId
+ );
+ return { success: true, data: presence };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ get: t
+ .input(schemas.workspace.harness.get.input)
+ .output(schemas.workspace.harness.get.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const harness = await context.workspaceHarnessService.getHarnessForWorkspace(
+ input.workspaceId
+ );
+ const [lastGateRun, lastCheckpoint, loopState] = await Promise.all([
+ context.gateRunnerService.getLastGateRun(input.workspaceId),
+ context.gitCheckpointService.getLastCheckpoint(input.workspaceId),
+ context.loopRunnerService.getState(input.workspaceId),
+ ]);
+
+ return {
+ success: true,
+ data: {
+ config: harness.config,
+ paths: harness.paths,
+ exists: harness.exists,
+ lastGateRun,
+ lastCheckpoint,
+ loopState,
+ },
+ };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ set: t
+ .input(schemas.workspace.harness.set.input)
+ .output(schemas.workspace.harness.set.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const normalized = await context.workspaceHarnessService.setHarnessForWorkspace(
+ input.workspaceId,
+ input.config
+ );
+ return { success: true, data: normalized };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ runGates: t
+ .input(schemas.workspace.harness.runGates.input)
+ .output(schemas.workspace.harness.runGates.output)
+ .handler(async ({ context, input }) => {
+ const result = await context.gateRunnerService.runGates(input.workspaceId);
+ if (!result.success) {
+ return { success: false, error: result.error };
+ }
+ return { success: true, data: result.data };
+ }),
+ checkpoint: t
+ .input(schemas.workspace.harness.checkpoint.input)
+ .output(schemas.workspace.harness.checkpoint.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const harness = await context.workspaceHarnessService.getHarnessForWorkspace(
+ input.workspaceId
+ );
+ const loopState = await context.loopRunnerService.getState(input.workspaceId);
+
+ const template =
+ input.messageTemplate ??
+ harness.config.loop?.commitMessageTemplate ??
+ "mux(harness): {{item}}";
+
+ const result = await context.gitCheckpointService.checkpoint(input.workspaceId, {
+ messageTemplate: template,
+ itemTitle: loopState.currentItemTitle ?? "checkpoint",
+ iteration: loopState.iteration,
+ });
+
+ if (!result.success) {
+ return { success: false, error: result.error };
+ }
+
+ return { success: true, data: result.data };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ resetContext: t
+ .input(schemas.workspace.harness.resetContext.input)
+ .output(schemas.workspace.harness.resetContext.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const [harness, loopState, lastGateRun, lastCheckpoint, workspaceInfo] =
+ await Promise.all([
+ context.workspaceHarnessService.getHarnessForWorkspace(input.workspaceId),
+ context.loopRunnerService.getState(input.workspaceId),
+ context.gateRunnerService.getLastGateRun(input.workspaceId),
+ context.gitCheckpointService.getLastCheckpoint(input.workspaceId),
+ context.workspaceService.getInfo(input.workspaceId),
+ ]);
+
+ const workspaceName = workspaceInfo?.name ?? input.workspaceId;
+ const configPathHint = `.mux/harness/${workspaceName}.jsonc`;
+ const progressPathHint = `.mux/harness/${workspaceName}.progress.md`;
+ const planPathHint = (() => {
+ if (!workspaceInfo) {
+ return null;
+ }
+
+ const runtime = createRuntime(workspaceInfo.runtimeConfig, {
+ projectPath: workspaceInfo.projectPath,
+ });
+ const muxHome = runtime.getMuxHome();
+
+ return getPlanFilePath(workspaceName, workspaceInfo.projectName, muxHome);
+ })();
+
+ const lines: string[] = [];
+ lines.push("# Harness bearings");
+ lines.push("");
+ lines.push(`- Loop status: ${loopState.status}`);
+ lines.push(`- Iteration: ${loopState.iteration}`);
+ if (loopState.currentItemTitle) {
+ lines.push(`- Current item: ${loopState.currentItemTitle}`);
+ }
+ if (lastGateRun) {
+ lines.push(`- Last gates: ${lastGateRun.ok ? "PASS" : "FAIL"}`);
+ }
+ if (lastCheckpoint?.commitSha) {
+ lines.push(`- Last commit: ${lastCheckpoint.commitSha}`);
+ }
+ if (input.note) {
+ lines.push(`- Note: ${input.note}`);
+ }
+ lines.push("");
+ lines.push("Harness files:");
+ lines.push(`- ${progressPathHint}`);
+ lines.push(`- ${configPathHint}`);
+ if (planPathHint) {
+ lines.push(`- Plan: ${planPathHint}`);
+ }
+ lines.push("");
+ lines.push("Checklist:");
+ if (harness.config.checklist.length === 0) {
+ lines.push("(no checklist items)");
+ } else {
+ for (const item of harness.config.checklist) {
+ const marker =
+ item.status === "done"
+ ? "[x]"
+ : item.status === "doing"
+ ? "[~]"
+ : item.status === "blocked"
+ ? "[!]"
+ : "[ ]";
+ lines.push(`- ${marker} ${item.title}`);
+ }
+ }
+
+ const summary = lines.join("\n");
+
+ const summaryMessage = createMuxMessage(
+ `harness-reset-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`,
+ "assistant",
+ summary,
+ {
+ timestamp: Date.now(),
+ compacted: "user",
+ mode: "exec",
+ muxMetadata: { type: "harness-bearings" },
+ }
+ );
+
+ const replaceResult = await context.workspaceService.replaceHistory(
+ input.workspaceId,
+ summaryMessage
+ );
+ if (!replaceResult.success) {
+ return { success: false, error: replaceResult.error };
+ }
+
+ return { success: true, data: undefined };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ },
+ loop: {
+ getState: t
+ .input(schemas.workspace.loop.getState.input)
+ .output(schemas.workspace.loop.getState.output)
+ .handler(async ({ context, input }) => {
+ return context.loopRunnerService.getState(input.workspaceId);
+ }),
+ start: t
+ .input(schemas.workspace.loop.start.input)
+ .output(schemas.workspace.loop.start.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const result = await context.loopRunnerService.start(input.workspaceId);
+ if (!result.success) {
+ return { success: false, error: result.error };
+ }
+ return { success: true, data: undefined };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ pause: t
+ .input(schemas.workspace.loop.pause.input)
+ .output(schemas.workspace.loop.pause.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const result = await context.loopRunnerService.pause(input.workspaceId, input.reason);
+ if (!result.success) {
+ return { success: false, error: result.error };
+ }
+ return { success: true, data: undefined };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ stop: t
+ .input(schemas.workspace.loop.stop.input)
+ .output(schemas.workspace.loop.stop.output)
+ .handler(async ({ context, input }) => {
+ try {
+ const result = await context.loopRunnerService.stop(input.workspaceId, input.reason);
+ if (!result.success) {
+ return { success: false, error: result.error };
+ }
+ return { success: true, data: undefined };
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ return { success: false, error: message };
+ }
+ }),
+ subscribe: t
+ .input(schemas.workspace.loop.subscribe.input)
+ .output(schemas.workspace.loop.subscribe.output)
+ .handler(async function* ({ context, input }) {
+ const { workspaceId } = input;
+ const service = context.loopRunnerService;
+
+ const queue = createAsyncEventQueue>>();
+
+ const onChange = (changedWorkspaceId: string) => {
+ if (changedWorkspaceId !== workspaceId) {
+ return;
+ }
+ void service.getState(workspaceId).then(queue.push);
+ };
+
+ service.on("change", onChange);
+
+ try {
+ queue.push(await service.getState(workspaceId));
+ yield* queue.iterate();
+ } finally {
+ queue.end();
+ service.off("change", onChange);
+ }
+ }),
+ },
},
tasks: {
create: t
diff --git a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts
index 656d43cd8d..f1f92d9152 100644
--- a/src/node/services/agentDefinitions/builtInAgentContent.generated.ts
+++ b/src/node/services/agentDefinitions/builtInAgentContent.generated.ts
@@ -6,6 +6,7 @@ export const BUILTIN_AGENT_CONTENT = {
"compact": "---\nname: Compact\ndescription: History compaction (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\n---\n\nYou are running a compaction/summarization pass. Your task is to write a concise summary of the conversation so far.\n\nIMPORTANT:\n\n- You have NO tools available. Do not attempt to call any tools or output JSON.\n- Simply write the summary as plain text prose.\n- Follow the user's instructions for what to include in the summary.\n",
"exec": "---\nname: Exec\ndescription: Implement changes in the repository\nui:\n color: var(--color-exec-mode)\nsubagent:\n runnable: true\n append_prompt: |\n If you are running as a sub-agent in a child workspace:\n\n - When you have a final answer, call agent_report exactly once.\n - Do not call task/task_await/task_list/task_terminate (subagent recursion is disabled).\n - Do not call propose_plan.\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n remove:\n # Exec mode doesn't use planning tools\n - propose_plan\n - ask_user_question\n # Internal-only tools\n - system1_keep_ranges\n---\n\nYou are in Exec mode.\n\n- Make minimal, correct, reviewable changes that match existing codebase patterns.\n- Prefer targeted commands and checks (typecheck/tests) when feasible.\n- Treat as a standing order: keep running checks and addressing failures until they pass or a blocker outside your control arises.\n",
"explore": "---\nname: Explore\ndescription: Read-only exploration of repository, environment, web, etc. Useful for investigation before making changes.\nbase: exec\nui:\n hidden: true\nsubagent:\n runnable: true\n skip_init_hook: true\n append_prompt: |\n You are an Explore sub-agent running inside a child workspace.\n\n - Explore the repository to answer the prompt using read-only investigation.\n - Return concise, actionable findings (paths, symbols, callsites, and facts).\n - When you have a final answer, call agent_report exactly once.\n - Do not call agent_report until you have completed the assigned task.\ntools:\n # Remove editing and task tools from exec base (read-only agent)\n remove:\n - file_edit_.*\n - task\n - task_.*\n - agent_skill_read\n - agent_skill_read_file\n---\n\nYou are in Explore mode (read-only).\n\n=== CRITICAL: READ-ONLY MODE - NO FILE MODIFICATIONS ===\n\n- You MUST NOT create, edit, delete, move, or copy files.\n- You MUST NOT create temporary files anywhere (including /tmp).\n- You MUST NOT use redirect operators (>, >>, |) or heredocs to write to files.\n- You MUST NOT run commands that change system state (rm, mv, cp, mkdir, touch, git add/commit, installs, etc.).\n- Prefer `file_read` for reading file contents (supports offset/limit paging).\n- Use bash only for read-only operations (rg, ls, git diff/show/log, etc.), or when you need piping/processing.\n",
+ "harness-init": "---\nname: Harness Init\ndescription: Interactive harness generation + approval (internal)\nbase: exec\nui:\n hidden: true\n color: var(--color-harness-init-mode)\nsubagent:\n runnable: false\ntools:\n remove:\n - web_search\n - web_fetch\n - google_search\n---\n\nYou are in Harness Init mode.\n\nYour job is to create or refine a Ralph harness for this workspace based on the current plan and the repository.\n\n=== CRITICAL: LIMITED EDIT MODE ===\nHarness schema + output path:\n\n- The `.mux/harness/*.jsonc` schema is provided in the system prompt as ``.\n- The required harness output file path is provided as `` (derived from `MUX_WORKSPACE_NAME`).\n- Follow the schema exactly (extra/unknown keys will fail validation).\n\n- Write the final harness config to the exact `` file.\n - Do NOT invent filenames.\n - Create/edit ONLY that one harness file (no extra drafts).\n\n- Web tools are disabled in this mode; do not attempt to look up harness docs online.\n\n- You may ONLY create/edit files under: `.mux/harness/**/*.jsonc`\n- If you delegate to read-only `explore` subagents, instruct them to avoid web_search/web_fetch/google_search too.\n\n- Do NOT modify source code or other repo files.\n- Use bash only for read-only investigation (rg, ls, cat, git diff/show/log, etc.).\n - No redirects/heredocs, no installs, no git add/commit, no rm/mv/cp/mkdir/touch.\n\n=== REQUIRED WORKFLOW ===\n\n1. Start by spawning 1-4 read-only `explore` subagents via `task` with `agentId: \"explore\"`.\n - Keep each prompt focused (e.g. CI/workflows, Make targets, tests, etc.).\n - Tell them to avoid web_search/web_fetch/google_search.\n - Wait for all reports before writing the harness file.\n\n Suggested prompt template:\n - Summarize repo-native gate entrypoints (Makefile, package.json scripts, .github/workflows/\\*).\n - Recommend:\n - Checklist items (short titles + optional notes)\n - Gate commands (exact command strings + optional title/timeout)\n - (Optional) include a fenced ```json draft with { \"checklist\": [...], \"gates\": [...] }\n\n2. Synthesize the explore reports into a single harness config (matching ``) and write it to ``.\n\nGates:\n\n- Prefer a small set of safe, single commands.\n- Do NOT use shell chaining, pipes, redirects, or quotes.\n\nWhen the harness file is ready for user review:\n\n- Call `propose_harness` exactly once.\n- Do NOT start the Ralph loop yourself; the UI will start it after user approval.\n",
"mux": "---\nname: Mux\ndescription: Configure mux global behavior (system workspace)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - mux_global_agents_read\n - mux_global_agents_write\n - ask_user_question\n---\n\nYou are the **Mux system assistant**.\n\nYour job is to help the user configure mux globally by editing the mux-wide instructions file:\n\n- `~/.mux/AGENTS.md`\n\n## Safety rules\n\n- You do **not** have access to arbitrary filesystem tools.\n- You do **not** have access to project secrets.\n- Before writing `~/.mux/AGENTS.md`, you must:\n 1) Read the current file (`mux_global_agents_read`).\n 2) Propose the exact change (show the new content or a concise diff).\n 3) Ask for explicit confirmation via `ask_user_question`.\n 4) Only then call `mux_global_agents_write` with `confirm: true`.\n\nIf the user declines, do not write anything.\n",
"plan": "---\nname: Plan\ndescription: Create a plan before coding\nui:\n color: var(--color-plan-mode)\nsubagent:\n runnable: false\ntools:\n add:\n # Allow all tools by default (includes MCP tools which have dynamic names)\n # Use tools.remove in child agents to restrict specific tools\n - .*\n # Note: file_edit_* tools ARE available but restricted to plan file only at runtime\n # Note: task tools ARE enabled - Plan delegates to Explore sub-agents\n---\n\nYou are in Plan Mode.\n\n- Every response MUST produce or update a plan—no exceptions.\n- Simple requests deserve simple plans; a straightforward task might only need a few bullet points. Match plan complexity to the problem.\n- Keep the plan scannable; put long rationale in `/` blocks.\n- Plans must be **self-contained**: include enough context, goals, constraints, and the core \"why\" so a new assistant can implement without needing the prior chat.\n- When Plan Mode is requested, assume the user wants the actual completed plan; do not merely describe how you would devise one.\n\n## Investigation step (required)\n\nBefore proposing a plan, identify what you must verify and use the best available tools\n(`file_read` for local file contents, search, or user questions). Do not guess. Investigation can be\ndone directly; sub-agents are optional.\n\nPrefer `file_read` over `bash cat` when reading files (including the plan file): long bash output may\nbe compacted, which can hide the middle of a document. Use `file_read` with offset/limit to page\nthrough larger files.\n\n## Plan format\n\n- Context/Why: Briefly restate the request, goals, and the rationale or user impact so the\n plan stands alone for a fresh implementer.\n- Evidence: List sources consulted (file paths, tool outputs, or user-provided info) and\n why they are sufficient. If evidence is missing, still produce a minimal plan and add a\n Questions section listing what you need to proceed.\n\nDetailed plan mode instructions (plan file path, sub-agent delegation, propose_plan workflow) are provided separately.\n",
"system1_bash": "---\nname: System1 Bash\ndescription: Fast bash-output filtering (internal)\nui:\n hidden: true\nsubagent:\n runnable: false\ntools:\n add:\n - system1_keep_ranges\n---\n\nYou are a fast bash-output filtering assistant.\n\nYou will be given:\n\n- `maxKeptLines` (budget)\n- `Display name` (optional): a short intent label for the command\n- `Bash script`\n- `Numbered output`\n\nGiven the numbered output, decide which lines to keep so the user sees the most relevant information.\n\nIMPORTANT:\n\n- You MUST call `system1_keep_ranges` exactly once.\n- Do NOT output markdown or prose. Only the tool call (with valid JSON arguments).\n\nRules:\n\n- Line numbers are 1-based indices into the numbered output.\n- Use the `Display name` and `Bash script` as intent hints.\n- If intent is exploration/listing/search (e.g. `ls`, `find`, `rg`, `grep`, `git status`), prioritize keeping\n representative file paths/matches and any summary/counts (not just errors).\n- If intent is build/test/logs, prefer errors, stack traces, failing test summaries, and actionable warnings.\n- If the script already narrows output to a slice (e.g. `head`, `tail`, `sed -n` line ranges), avoid extra\n denoising: prefer keeping most/all lines within the budget.\n- Never filter out git merge conflict markers (`<<<<<<<`, `|||||||`, `=======`, `>>>>>>>`). If the command is searching for these markers (e.g. `rg`/`grep`), do not keep only representative matches; keep all matches within the budget.\n- Prefer omitting tool-generated advisory blocks (especially git lines starting with `hint:`) that only suggest\n next-step commands or point to docs/help. Keep the underlying `error:`/`fatal:`/`CONFLICT` lines, file paths,\n and conflict markers instead.\n- Exception: keep `hint:` blocks when the script is explicitly searching for them (e.g. `rg '^hint:'`) or when\n the hint is the only clue explaining a blocking state.\n- Prefer high signal density: keep ranges tight around important lines plus minimal surrounding context.\n- Merge adjacent/overlapping ranges only when the lines between are also informative. Do NOT add noise just\n to reduce range count; it's OK to return many ranges when denoising (e.g., > 8).\n- Denoise aggressively: omit duplicate/redundant lines and repeated messages with the same meaning\n (e.g., repeated progress, retries, or identical stack traces). If the same error repeats, keep only\n the most informative instance plus minimal surrounding context.\n- If there are many similar warnings/errors, keep only a few representative examples (prefer those\n with file paths/line numbers) plus any summary/count.\n- Always keep at least 1 line if any output exists.\n- Choose ranges that keep at most `maxKeptLines` lines total (the caller may truncate).\n\nExample:\n\n- Numbered output:\n - 0001| building...\n - 0002| ERROR: expected X, got Y\n - 0003| at path/to/file.ts:12:3\n - 0004| done\n- Tool call:\n - system1_keep_ranges({\"keep_ranges\":[{\"start\":2,\"end\":3,\"reason\":\"error\"}]})\n",
diff --git a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts
index 80e69470ff..a9a9ddd136 100644
--- a/src/node/services/agentDefinitions/builtInAgentDefinitions.ts
+++ b/src/node/services/agentDefinitions/builtInAgentDefinitions.ts
@@ -18,6 +18,7 @@ const BUILT_IN_SOURCES: BuiltInSource[] = [
{ id: "exec", content: BUILTIN_AGENT_CONTENT.exec },
{ id: "plan", content: BUILTIN_AGENT_CONTENT.plan },
{ id: "compact", content: BUILTIN_AGENT_CONTENT.compact },
+ { id: "harness-init", content: BUILTIN_AGENT_CONTENT["harness-init"] },
{ id: "explore", content: BUILTIN_AGENT_CONTENT.explore },
{ id: "system1_bash", content: BUILTIN_AGENT_CONTENT.system1_bash },
{ id: "mux", content: BUILTIN_AGENT_CONTENT.mux },
diff --git a/src/node/services/agentDefinitions/resolveToolPolicy.test.ts b/src/node/services/agentDefinitions/resolveToolPolicy.test.ts
index 47749c60dd..619a1416ea 100644
--- a/src/node/services/agentDefinitions/resolveToolPolicy.test.ts
+++ b/src/node/services/agentDefinitions/resolveToolPolicy.test.ts
@@ -61,6 +61,7 @@ describe("resolveToolPolicyForAgent", () => {
{ regex_match: "task", action: "disable" },
{ regex_match: "task_.*", action: "disable" },
{ regex_match: "propose_plan", action: "disable" },
+ { regex_match: "propose_harness", action: "disable" },
{ regex_match: "ask_user_question", action: "disable" },
{ regex_match: "agent_report", action: "enable" },
]);
diff --git a/src/node/services/agentDefinitions/resolveToolPolicy.ts b/src/node/services/agentDefinitions/resolveToolPolicy.ts
index bb3749b820..bbce511f2d 100644
--- a/src/node/services/agentDefinitions/resolveToolPolicy.ts
+++ b/src/node/services/agentDefinitions/resolveToolPolicy.ts
@@ -24,6 +24,7 @@ const SUBAGENT_HARD_DENY: ToolPolicy = [
{ regex_match: "task", action: "disable" },
{ regex_match: "task_.*", action: "disable" },
{ regex_match: "propose_plan", action: "disable" },
+ { regex_match: "propose_harness", action: "disable" },
{ regex_match: "ask_user_question", action: "disable" },
];
diff --git a/src/node/services/agentPresets.ts b/src/node/services/agentPresets.ts
index c741f66bea..9e166952bd 100644
--- a/src/node/services/agentPresets.ts
+++ b/src/node/services/agentPresets.ts
@@ -47,8 +47,9 @@ function buildSystemPrompt(args: {
const EXEC_PRESET: AgentPreset = {
agentType: "exec",
toolPolicy: [
- // Only the main plan-mode session should call propose_plan.
+ // Only the main workspace session should call propose_* approval tools.
{ regex_match: "propose_plan", action: "disable" },
+ { regex_match: "propose_harness", action: "disable" },
],
systemPrompt: buildSystemPrompt({
agentLabel: "Exec",
@@ -59,6 +60,7 @@ const EXEC_PRESET: AgentPreset = {
rules: [
"- You MUST NOT spawn additional sub-agent tasks.",
"- Do not call propose_plan.",
+ "- Do not call propose_harness.",
"- Prefer small, reviewable diffs and run targeted checks when feasible.",
],
}),
diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts
index f90bdb9eb1..451f0a6a13 100644
--- a/src/node/services/agentSession.ts
+++ b/src/node/services/agentSession.ts
@@ -1319,7 +1319,9 @@ export class AgentSession {
// Trigger a metadata refresh so the right sidebar updates immediately.
if (
payload.type === "tool-call-end" &&
- (payload.toolName === "propose_plan" || payload.toolName.startsWith("file_edit_"))
+ (payload.toolName === "propose_plan" ||
+ payload.toolName === "propose_harness" ||
+ payload.toolName.startsWith("file_edit_"))
) {
this.onPostCompactionStateChange?.();
}
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index c7e2f6c8c9..7aa80e611c 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -41,6 +41,7 @@ import type { BashOutputEvent } from "@/common/types/stream";
import type { MuxMessage, MuxTextPart } from "@/common/types/message";
import { createMuxMessage } from "@/common/types/message";
import type { Config, ProviderConfig } from "@/node/config";
+import { maybeAppendHarnessConfigSchemaToAdditionalInstructions } from "./harnessConfigSchemaPrompt";
import { StreamManager } from "./streamManager";
import type { InitStateManager } from "./initStateManager";
import type { SendMessageError } from "@/common/types/errors";
@@ -1416,7 +1417,12 @@ export class AIService extends EventEmitter {
runtime: earlyRuntime,
runtimeTempDir: os.tmpdir(),
secrets: {},
- planFileOnly: agentIsPlanLike,
+ mode: effectiveMode === "plan" ? "plan" : "exec",
+ agentId: effectiveAgentId,
+ allowedEditPaths:
+ effectiveAgentId === "harness-init"
+ ? [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"]
+ : undefined,
},
"", // Empty workspace ID for early stub config
this.initStateManager,
@@ -1499,6 +1505,14 @@ export class AIService extends EventEmitter {
: nestingInstruction;
}
+ // Harness agents need a schema-aware prompt so they don't web-search for an internal/WIP spec.
+ // This block is generated from the Zod schema at runtime to avoid schema drift.
+ effectiveAdditionalInstructions = maybeAppendHarnessConfigSchemaToAdditionalInstructions({
+ agentId: effectiveAgentId,
+ workspaceName: metadata.name,
+ additionalInstructions: effectiveAdditionalInstructions,
+ });
+
// Read plan content for agent transition (plan-like → exec-like)
// Only read if switching to exec-like agent and last assistant was plan-like.
let planContentForTransition: string | undefined;
@@ -1760,10 +1774,15 @@ export class AIService extends EventEmitter {
),
runtimeTempDir,
backgroundProcessManager: this.backgroundProcessManager,
- // Plan agent configuration for plan file access.
- // - read: plan file is readable in all agents (useful context)
- // - write: enforced by file_edit_* tools (plan file is read-only outside plan agent)
- planFileOnly: agentIsPlanLike,
+ // Plan/exec mode configuration for plan file access.
+ // - read: plan file is readable in all modes (useful context)
+ // - write: enforced by file_edit_* tools (plan file is read-only outside plan mode)
+ mode: effectiveMode === "plan" ? "plan" : "exec",
+ agentId: effectiveAgentId,
+ allowedEditPaths:
+ effectiveAgentId === "harness-init"
+ ? [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"]
+ : undefined,
emitChatEvent: (event) => {
// Defensive: tools should only emit events for the workspace they belong to.
if ("workspaceId" in event && event.workspaceId !== workspaceId) {
diff --git a/src/node/services/bashExecutionService.ts b/src/node/services/bashExecutionService.ts
index 3755721ede..c23f47805c 100644
--- a/src/node/services/bashExecutionService.ts
+++ b/src/node/services/bashExecutionService.ts
@@ -1,6 +1,7 @@
import { spawn } from "child_process";
import type { ChildProcess } from "child_process";
import { log } from "./log";
+import { NON_INTERACTIVE_ENV_VARS } from "@/common/constants/env";
import { getBashPath } from "@/node/utils/main/bashPath";
/**
@@ -85,16 +86,9 @@ export class BashExecutionService {
...process.env,
// Inject secrets as environment variables
...(secrets ?? {}),
- // Prevent interactive editors from blocking bash execution
- // Critical for git operations like rebase/commit that try to open editors
- GIT_EDITOR: "true", // Git-specific editor (highest priority)
- GIT_SEQUENCE_EDITOR: "true", // For interactive rebase sequences
- EDITOR: "true", // General fallback for non-git commands
- VISUAL: "true", // Another common editor environment variable
- // Prevent git from prompting for credentials
- // Critical for operations like fetch/pull that might try to authenticate
- // Without this, git can hang waiting for user input if credentials aren't configured
- GIT_TERMINAL_PROMPT: "0", // Disables git credential prompts
+
+ // Prevent interactive editors / credential prompts from blocking execution.
+ ...NON_INTERACTIVE_ENV_VARS,
};
}
diff --git a/src/node/services/gateRunnerService.ts b/src/node/services/gateRunnerService.ts
new file mode 100644
index 0000000000..1673cc92e5
--- /dev/null
+++ b/src/node/services/gateRunnerService.ts
@@ -0,0 +1,168 @@
+import * as fsPromises from "fs/promises";
+import * as path from "path";
+
+import assert from "@/common/utils/assert";
+import { Ok, Err, type Result } from "@/common/types/result";
+import type { HarnessGate, HarnessGateRunResult } from "@/common/types/harness";
+import { HarnessGateRunResultSchema } from "@/common/orpc/schemas";
+import type { Config } from "@/node/config";
+import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService";
+import { execBuffered } from "@/node/utils/runtime/helpers";
+import { log } from "@/node/services/log";
+
+const LAST_GATES_FILENAME = "harness-last-gates.json";
+
+// Keep logs reasonably small for IPC and persisted state. This is only for UI display.
+const MAX_OUTPUT_CHARS = 100_000;
+
+function truncateOutput(value: string): { output: string; truncated: boolean } {
+ if (value.length <= MAX_OUTPUT_CHARS) {
+ return { output: value, truncated: false };
+ }
+ return { output: value.slice(-MAX_OUTPUT_CHARS), truncated: true };
+}
+
+export class GateRunnerService {
+ constructor(
+ private readonly config: Config,
+ private readonly workspaceHarnessService: WorkspaceHarnessService
+ ) {
+ assert(config, "GateRunnerService requires a Config instance");
+ assert(
+ workspaceHarnessService,
+ "GateRunnerService requires a WorkspaceHarnessService instance"
+ );
+ }
+
+ private getLastGatesPath(workspaceId: string): string {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmed = workspaceId.trim();
+ assert(trimmed.length > 0, "workspaceId must not be empty");
+ return path.join(this.config.sessionsDir, trimmed, LAST_GATES_FILENAME);
+ }
+
+ async getLastGateRun(workspaceId: string): Promise {
+ const filePath = this.getLastGatesPath(workspaceId);
+
+ try {
+ const raw = await fsPromises.readFile(filePath, "utf-8");
+ const parsed: unknown = JSON.parse(raw) as unknown;
+ const result = HarnessGateRunResultSchema.safeParse(parsed);
+ return result.success ? result.data : null;
+ } catch {
+ return null;
+ }
+ }
+
+ private async persistLastGateRun(
+ workspaceId: string,
+ result: HarnessGateRunResult
+ ): Promise {
+ const filePath = this.getLastGatesPath(workspaceId);
+ const dir = path.dirname(filePath);
+
+ try {
+ await fsPromises.mkdir(dir, { recursive: true });
+ await fsPromises.writeFile(filePath, JSON.stringify(result, null, 2) + "\n", "utf-8");
+ } catch (error) {
+ log.debug("[HARNESS] Failed to persist last gate run", { workspaceId, error });
+ }
+ }
+
+ async runGates(
+ workspaceId: string,
+ gatesOverride?: HarnessGate[]
+ ): Promise> {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+
+ const gates =
+ gatesOverride ??
+ (await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId)).config.gates;
+
+ const startedAt = Date.now();
+ const results: HarnessGateRunResult["results"] = [];
+
+ if (gates.length === 0) {
+ const finishedAt = Date.now();
+ const run: HarnessGateRunResult = {
+ ok: true,
+ startedAt,
+ finishedAt,
+ totalDurationMs: finishedAt - startedAt,
+ results: [],
+ };
+ await this.persistLastGateRun(workspaceId, run);
+ return Ok(run);
+ }
+
+ const { runtime, workspacePath } =
+ await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId);
+
+ const readyResult = await runtime.ensureReady();
+ if (!readyResult.ready) {
+ const msg = readyResult.error ?? "Runtime not ready";
+ return Err(msg);
+ }
+
+ let ok = true;
+
+ for (const gate of gates) {
+ const timeout = gate.timeoutSecs ?? 10 * 60;
+
+ try {
+ const execResult = await execBuffered(runtime, gate.command, {
+ cwd: workspacePath,
+ timeout,
+ });
+
+ const stdout = truncateOutput(execResult.stdout);
+ const stderr = truncateOutput(execResult.stderr);
+
+ results.push({
+ command: gate.command,
+ exitCode: execResult.exitCode,
+ durationMs: execResult.duration,
+ stdout: stdout.output,
+ stderr: stderr.output,
+ truncatedStdout: stdout.truncated || undefined,
+ truncatedStderr: stderr.truncated || undefined,
+ });
+
+ if (execResult.exitCode !== 0) {
+ ok = false;
+ }
+ } catch (error) {
+ ok = false;
+ const message = error instanceof Error ? error.message : String(error);
+
+ results.push({
+ command: gate.command,
+ exitCode: 1,
+ durationMs: 0,
+ stdout: "",
+ stderr: message,
+ });
+ }
+
+ if (!ok) {
+ // Stop at the first failure to keep iterations tight (Ralph-style backpressure).
+ break;
+ }
+ }
+
+ const finishedAt = Date.now();
+
+ const run: HarnessGateRunResult = {
+ ok,
+ startedAt,
+ finishedAt,
+ totalDurationMs: finishedAt - startedAt,
+ results,
+ };
+
+ await this.persistLastGateRun(workspaceId, run);
+ await this.workspaceHarnessService.updateProgressFile(workspaceId);
+
+ return Ok(run);
+ }
+}
diff --git a/src/node/services/gitCheckpointService.ts b/src/node/services/gitCheckpointService.ts
new file mode 100644
index 0000000000..813acc4f85
--- /dev/null
+++ b/src/node/services/gitCheckpointService.ts
@@ -0,0 +1,169 @@
+import * as fsPromises from "fs/promises";
+import * as path from "path";
+
+import assert from "@/common/utils/assert";
+import { shellQuote } from "@/common/utils/shell";
+import { Ok, Err, type Result } from "@/common/types/result";
+import type { GitCheckpointResult } from "@/common/types/harness";
+import { GitCheckpointResultSchema } from "@/common/orpc/schemas";
+import type { Config } from "@/node/config";
+import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService";
+import { execBuffered } from "@/node/utils/runtime/helpers";
+import { log } from "@/node/services/log";
+
+const LAST_CHECKPOINT_FILENAME = "harness-last-checkpoint.json";
+
+// Keep stdout/stderr small enough to store in session state.
+const MAX_LOG_CHARS = 50_000;
+
+function truncateLog(value: string): string {
+ if (value.length <= MAX_LOG_CHARS) return value;
+ return value.slice(-MAX_LOG_CHARS);
+}
+
+function renderTemplate(template: string, vars: Record): string {
+ return template.replace(/\{\{\s*([^}]+?)\s*\}\}/g, (_m, keyRaw: string) => {
+ const key = keyRaw.trim();
+ return vars[key] ?? "";
+ });
+}
+
+export class GitCheckpointService {
+ constructor(
+ private readonly config: Config,
+ private readonly workspaceHarnessService: WorkspaceHarnessService
+ ) {
+ assert(config, "GitCheckpointService requires a Config instance");
+ assert(
+ workspaceHarnessService,
+ "GitCheckpointService requires a WorkspaceHarnessService instance"
+ );
+ }
+
+ private getLastCheckpointPath(workspaceId: string): string {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmed = workspaceId.trim();
+ assert(trimmed.length > 0, "workspaceId must not be empty");
+ return path.join(this.config.sessionsDir, trimmed, LAST_CHECKPOINT_FILENAME);
+ }
+
+ async getLastCheckpoint(workspaceId: string): Promise {
+ const filePath = this.getLastCheckpointPath(workspaceId);
+
+ try {
+ const raw = await fsPromises.readFile(filePath, "utf-8");
+ const parsed: unknown = JSON.parse(raw) as unknown;
+ const result = GitCheckpointResultSchema.safeParse(parsed);
+ return result.success ? result.data : null;
+ } catch {
+ return null;
+ }
+ }
+
+ private async persistLastCheckpoint(
+ workspaceId: string,
+ result: GitCheckpointResult
+ ): Promise {
+ const filePath = this.getLastCheckpointPath(workspaceId);
+ const dir = path.dirname(filePath);
+
+ try {
+ await fsPromises.mkdir(dir, { recursive: true });
+ await fsPromises.writeFile(filePath, JSON.stringify(result, null, 2) + "\n", "utf-8");
+ } catch (error) {
+ log.debug("[HARNESS] Failed to persist last checkpoint", { workspaceId, error });
+ }
+ }
+
+ async checkpoint(
+ workspaceId: string,
+ options: { messageTemplate: string; itemTitle?: string; iteration?: number }
+ ): Promise> {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ assert(options && typeof options === "object", "options is required");
+ assert(typeof options.messageTemplate === "string", "messageTemplate must be a string");
+
+ const { runtime, workspacePath } =
+ await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId);
+
+ const readyResult = await runtime.ensureReady();
+ if (!readyResult.ready) {
+ const msg = readyResult.error ?? "Runtime not ready";
+ return Err(msg);
+ }
+
+ const statusBefore = await execBuffered(runtime, "git status --porcelain", {
+ cwd: workspacePath,
+ timeout: 30,
+ });
+
+ const dirtyBefore = statusBefore.exitCode === 0 && statusBefore.stdout.trim().length > 0;
+
+ if (!dirtyBefore) {
+ const res: GitCheckpointResult = {
+ committed: false,
+ dirtyBefore: false,
+ dirtyAfter: false,
+ commitSha: null,
+ commitMessage: null,
+ };
+ await this.persistLastCheckpoint(workspaceId, res);
+ return Ok(res);
+ }
+
+ const messageRaw = renderTemplate(options.messageTemplate, {
+ item: options.itemTitle ?? "(no item)",
+ iteration: options.iteration !== undefined ? String(options.iteration) : "",
+ workspaceId,
+ }).trim();
+
+ const message = messageRaw.length > 0 ? messageRaw : "mux(harness): checkpoint";
+
+ const addResult = await execBuffered(runtime, "git add -A", {
+ cwd: workspacePath,
+ timeout: 60,
+ });
+
+ if (addResult.exitCode !== 0) {
+ return Err(truncateLog(addResult.stderr || addResult.stdout || "git add -A failed"));
+ }
+
+ // Use shellQuote to keep the commit message stable across runtimes.
+ const commitResult = await execBuffered(runtime, `git commit -m ${shellQuote(message)}`, {
+ cwd: workspacePath,
+ timeout: 120,
+ });
+
+ if (commitResult.exitCode !== 0) {
+ return Err(truncateLog(commitResult.stderr || commitResult.stdout || "git commit failed"));
+ }
+
+ const shaResult = await execBuffered(runtime, "git rev-parse HEAD", {
+ cwd: workspacePath,
+ timeout: 30,
+ });
+
+ const commitSha = shaResult.exitCode === 0 ? shaResult.stdout.trim() : "";
+
+ const statusAfter = await execBuffered(runtime, "git status --porcelain", {
+ cwd: workspacePath,
+ timeout: 30,
+ });
+
+ const dirtyAfter = statusAfter.exitCode === 0 && statusAfter.stdout.trim().length > 0;
+
+ const res: GitCheckpointResult = {
+ committed: true,
+ dirtyBefore,
+ dirtyAfter,
+ commitSha: commitSha.length > 0 ? commitSha : null,
+ commitMessage: message,
+ stdout: truncateLog(commitResult.stdout).trim() || undefined,
+ stderr: truncateLog(commitResult.stderr).trim() || undefined,
+ };
+
+ await this.persistLastCheckpoint(workspaceId, res);
+
+ return Ok(res);
+ }
+}
diff --git a/src/node/services/harnessConfigSchemaPrompt.test.ts b/src/node/services/harnessConfigSchemaPrompt.test.ts
new file mode 100644
index 0000000000..b2249819dc
--- /dev/null
+++ b/src/node/services/harnessConfigSchemaPrompt.test.ts
@@ -0,0 +1,123 @@
+import * as fs from "node:fs/promises";
+import * as path from "node:path";
+
+import { describe, expect, test } from "bun:test";
+import type { Tool } from "ai";
+
+import { DEFAULT_RUNTIME_CONFIG } from "@/common/constants/workspace";
+import type { WorkspaceMetadata } from "@/common/types/workspace";
+import { applyToolPolicy } from "@/common/utils/tools/toolPolicy";
+import { LocalRuntime } from "@/node/runtime/LocalRuntime";
+import {
+ clearBuiltInAgentCache,
+ getBuiltInAgentDefinitions,
+} from "./agentDefinitions/builtInAgentDefinitions";
+import { resolveToolPolicyForAgent } from "./agentDefinitions/resolveToolPolicy";
+import { buildSystemMessage } from "./systemMessage";
+import { DisposableTempDir } from "./tempDir";
+import { maybeAppendHarnessConfigSchemaToAdditionalInstructions } from "./harnessConfigSchemaPrompt";
+
+describe("harness config schema prompt injection", () => {
+ test("includes in additional instructions for harness agents", async () => {
+ using tempDir = new DisposableTempDir("harness-schema-prompt");
+
+ const projectDir = path.join(tempDir.path, "project");
+ const workspaceDir = path.join(tempDir.path, "workspace");
+ const globalMuxDir = path.join(tempDir.path, "global-mux");
+
+ await fs.mkdir(projectDir, { recursive: true });
+ await fs.mkdir(workspaceDir, { recursive: true });
+ await fs.mkdir(globalMuxDir, { recursive: true });
+
+ const originalMuxRoot = process.env.MUX_ROOT;
+ process.env.MUX_ROOT = globalMuxDir;
+
+ try {
+ const runtime = new LocalRuntime(tempDir.path);
+ const metadata: WorkspaceMetadata = {
+ id: "test-workspace",
+ name: "test-workspace",
+ projectName: "test-project",
+ projectPath: projectDir,
+ runtimeConfig: DEFAULT_RUNTIME_CONFIG,
+ };
+
+ for (const agentId of ["harness-init"] as const) {
+ const additional = maybeAppendHarnessConfigSchemaToAdditionalInstructions({
+ agentId,
+ workspaceName: metadata.name,
+ additionalInstructions: "extra",
+ });
+ expect(additional).toContain("");
+ expect(additional).toContain(`.mux/harness/${metadata.name}.jsonc`);
+
+ const systemMessage = await buildSystemMessage(metadata, runtime, workspaceDir, additional);
+ expect(systemMessage).toContain("");
+ expect(systemMessage).toContain(`.mux/harness/${metadata.name}.jsonc`);
+
+ const match = /]*>\s*([\s\S]*?)\s*<\/harness_config_schema>/m.exec(
+ systemMessage
+ );
+ expect(match).not.toBeNull();
+
+ const schema = JSON.parse(match![1]) as { required?: string[] };
+ const required = schema.required ?? [];
+ expect(required).toContain("version");
+ expect(required).toContain("checklist");
+ expect(required).toContain("gates");
+ }
+
+ const nonHarness = maybeAppendHarnessConfigSchemaToAdditionalInstructions({
+ agentId: "exec",
+ workspaceName: metadata.name,
+ additionalInstructions: "extra",
+ });
+ expect(nonHarness).toBe("extra");
+ } finally {
+ if (originalMuxRoot === undefined) {
+ delete process.env.MUX_ROOT;
+ } else {
+ process.env.MUX_ROOT = originalMuxRoot;
+ }
+ }
+ });
+});
+
+describe("harness-init tool policy", () => {
+ test("disables web_* tools", () => {
+ clearBuiltInAgentCache();
+ const builtIns = getBuiltInAgentDefinitions();
+
+ const harnessInit = builtIns.find((a) => a.id === "harness-init");
+ const exec = builtIns.find((a) => a.id === "exec");
+
+ expect(harnessInit).toBeDefined();
+ expect(exec).toBeDefined();
+
+ const agents = [{ tools: harnessInit!.frontmatter.tools }, { tools: exec!.frontmatter.tools }];
+
+ const policy = resolveToolPolicyForAgent({
+ agents,
+ isSubagent: false,
+ disableTaskToolsForDepth: false,
+ });
+
+ const tool = {} as unknown as Tool;
+ const filtered = applyToolPolicy(
+ {
+ file_read: tool,
+ web_search: tool,
+ web_fetch: tool,
+ google_search: tool,
+ },
+ policy
+ );
+
+ expect(Object.keys(filtered)).toContain("file_read");
+ expect(Object.keys(filtered)).not.toContain("web_search");
+ expect(Object.keys(filtered)).not.toContain("web_fetch");
+ expect(Object.keys(filtered)).not.toContain("google_search");
+ });
+});
diff --git a/src/node/services/harnessConfigSchemaPrompt.ts b/src/node/services/harnessConfigSchemaPrompt.ts
new file mode 100644
index 0000000000..7450ad076b
--- /dev/null
+++ b/src/node/services/harnessConfigSchemaPrompt.ts
@@ -0,0 +1,60 @@
+import assert from "@/common/utils/assert";
+import { z } from "zod";
+import { WorkspaceHarnessConfigSchema } from "@/common/orpc/schemas/harness";
+
+/**
+ * Prompt-time JSON Schema for `.mux/harness/*.jsonc`.
+ *
+ * We generate this from the Zod schema (source of truth) at runtime so the
+ * model always sees a schema that exactly matches validation.
+ */
+let cachedHarnessConfigSchemaBlock: string | null = null;
+
+function getHarnessConfigSchemaPromptBlock(): string {
+ if (cachedHarnessConfigSchemaBlock) return cachedHarnessConfigSchemaBlock;
+
+ const jsonSchema = z.toJSONSchema(WorkspaceHarnessConfigSchema);
+ assert(
+ jsonSchema && typeof jsonSchema === "object",
+ "Expected z.toJSONSchema(WorkspaceHarnessConfigSchema) to return an object"
+ );
+
+ cachedHarnessConfigSchemaBlock = [
+ ``,
+ JSON.stringify(jsonSchema, null, 2),
+ ` `,
+ ].join("\n");
+
+ return cachedHarnessConfigSchemaBlock;
+}
+
+function normalizeWorkspaceName(value: unknown): string {
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : "";
+}
+
+function getHarnessOutputPathPromptBlock(workspaceName: unknown): string | null {
+ const normalized = normalizeWorkspaceName(workspaceName);
+ if (!normalized) return null;
+
+ return `.mux/harness/${normalized}.jsonc `;
+}
+
+export function maybeAppendHarnessConfigSchemaToAdditionalInstructions(args: {
+ agentId: string;
+ workspaceName: string | undefined;
+ additionalInstructions: string | undefined;
+}): string | undefined {
+ const shouldInject = args.agentId === "harness-init";
+ if (!shouldInject) return args.additionalInstructions;
+
+ const schemaBlock = getHarnessConfigSchemaPromptBlock();
+ const outputPathBlock = getHarnessOutputPathPromptBlock(args.workspaceName);
+ const block = outputPathBlock ? `${schemaBlock}\n\n${outputPathBlock}` : schemaBlock;
+
+ const additional = args.additionalInstructions;
+ if (additional && additional.trim().length > 0) {
+ return `${additional}\n\n${block}`;
+ }
+
+ return block;
+}
diff --git a/src/node/services/loopRunnerService.test.ts b/src/node/services/loopRunnerService.test.ts
new file mode 100644
index 0000000000..a7dec25afc
--- /dev/null
+++ b/src/node/services/loopRunnerService.test.ts
@@ -0,0 +1,21 @@
+import { describe, expect, it } from "bun:test";
+
+import { buildIterationPrompt } from "./loopRunnerService";
+
+describe("buildIterationPrompt", () => {
+ it("includes item id + journal guidance", () => {
+ const prompt = buildIterationPrompt({
+ iteration: 3,
+ itemId: "item-1",
+ itemTitle: "Do something",
+ configPathHint: ".mux/harness/branch.jsonc",
+ progressPathHint: ".mux/harness/branch.progress.md",
+ });
+
+ expect(prompt).toContain("Checklist item: item-1 — Do something");
+ expect(prompt).toContain("skim the journal");
+ expect(prompt).toContain("append a short entry");
+ expect(prompt).toContain("Journal: .mux/harness/branch.progress.md");
+ expect(prompt).toContain("Config: .mux/harness/branch.jsonc");
+ });
+});
diff --git a/src/node/services/loopRunnerService.ts b/src/node/services/loopRunnerService.ts
new file mode 100644
index 0000000000..1d8fcaed40
--- /dev/null
+++ b/src/node/services/loopRunnerService.ts
@@ -0,0 +1,622 @@
+import { EventEmitter } from "events";
+import * as fsPromises from "fs/promises";
+import * as path from "path";
+
+import assert from "@/common/utils/assert";
+import { Ok, type Result } from "@/common/types/result";
+import type {
+ HarnessChecklistItem,
+ HarnessGateRunResult,
+ HarnessLoopState,
+} from "@/common/types/harness";
+import { HarnessLoopStateSchema } from "@/common/orpc/schemas";
+import { createMuxMessage } from "@/common/types/message";
+import { defaultModel } from "@/common/utils/ai/models";
+import { getPlanFilePath } from "@/common/utils/planStorage";
+import type { WorkspaceService } from "@/node/services/workspaceService";
+import type { AIService } from "@/node/services/aiService";
+import type { Config } from "@/node/config";
+import { log } from "@/node/services/log";
+import { MutexMap } from "@/node/utils/concurrency/mutexMap";
+import type { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService";
+import type { GateRunnerService } from "@/node/services/gateRunnerService";
+import type { GitCheckpointService } from "@/node/services/gitCheckpointService";
+import { createRuntime } from "@/node/runtime/runtimeFactory";
+import { execBuffered } from "@/node/utils/runtime/helpers";
+
+const LOOP_STATE_FILENAME = "harness-loop.json";
+
+const DEFAULT_STATE: HarnessLoopState = {
+ status: "stopped",
+ startedAt: null,
+ iteration: 0,
+ consecutiveFailures: 0,
+ currentItemId: null,
+ currentItemTitle: null,
+ lastGateRun: null,
+ lastCheckpoint: null,
+ lastError: null,
+ stoppedReason: null,
+};
+
+function coerceNonEmptyString(value: unknown): string | null {
+ if (typeof value !== "string") return null;
+ const trimmed = value.trim();
+ return trimmed.length > 0 ? trimmed : null;
+}
+
+function findNextChecklistItem(config: {
+ checklist: HarnessChecklistItem[];
+}): HarnessChecklistItem | null {
+ const doing = config.checklist.find((item) => item.status === "doing");
+ if (doing) return doing;
+
+ const todo = config.checklist.find((item) => item.status === "todo");
+ if (todo) return todo;
+
+ return null;
+}
+
+function hasUnfinishedChecklistItems(config: { checklist: HarnessChecklistItem[] }): boolean {
+ return config.checklist.some((item) => item.status !== "done");
+}
+
+export function buildIterationPrompt(params: {
+ iteration: number;
+ itemId: string;
+ itemTitle: string;
+ configPathHint: string;
+ progressPathHint: string;
+}): string {
+ const lines: string[] = [];
+ lines.push(`Ralph loop iteration ${params.iteration}`);
+ lines.push("");
+ lines.push(`Checklist item: ${params.itemId} — ${params.itemTitle}`);
+ lines.push("");
+ lines.push("Rules:");
+ lines.push("- Make a small, mergeable change.");
+ lines.push("- Run the configured gates (see harness config) before stopping.");
+ lines.push("- Do NOT start the next checklist item.");
+ lines.push(`- Before coding: skim the journal for prior attempts on item ${params.itemId}.`);
+ lines.push(
+ "- After you finish (and gates), append a short entry to the journal (do not edit old entries)."
+ );
+ lines.push("");
+ lines.push("Harness files:");
+ lines.push(`- Journal: ${params.progressPathHint}`);
+ lines.push(`- Config: ${params.configPathHint}`);
+ return lines.join("\n");
+}
+
+function renderLoopSummaryMarkdown(params: {
+ workspaceId: string;
+ iteration: number;
+ currentItemTitle: string | null;
+ configPathHint: string;
+ progressPathHint: string;
+ planPathHint: string;
+ checklist: HarnessChecklistItem[];
+ lastGateRun: HarnessGateRunResult | null;
+ lastCommitSha: string | null;
+ note?: string;
+}): string {
+ const lines: string[] = [];
+
+ lines.push("# Ralph loop bearings");
+ lines.push("");
+ lines.push(`- Workspace: ${params.workspaceId}`);
+ lines.push(`- Iteration: ${params.iteration}`);
+ if (params.currentItemTitle) {
+ lines.push(`- Current item: ${params.currentItemTitle}`);
+ }
+ if (params.lastGateRun) {
+ lines.push(
+ `- Gates: ${params.lastGateRun.ok ? "PASS" : "FAIL"} (${Math.round(
+ params.lastGateRun.totalDurationMs / 1000
+ )}s)`
+ );
+ }
+ if (params.lastCommitSha) {
+ lines.push(`- Last commit: ${params.lastCommitSha}`);
+ }
+ if (params.note) {
+ lines.push(`- Note: ${params.note}`);
+ }
+ lines.push("");
+
+ lines.push("Harness files:");
+ lines.push(`- ${params.progressPathHint}`);
+ lines.push(`- ${params.configPathHint}`);
+ lines.push(`- Plan: ${params.planPathHint}`);
+ lines.push("");
+
+ lines.push("Checklist:");
+ if (params.checklist.length === 0) {
+ lines.push("(no checklist items)");
+ } else {
+ for (const item of params.checklist) {
+ const marker =
+ item.status === "done"
+ ? "[x]"
+ : item.status === "doing"
+ ? "[~]"
+ : item.status === "blocked"
+ ? "[!]"
+ : "[ ]";
+ lines.push(`- ${marker} ${item.title}`);
+ }
+ }
+
+ lines.push("");
+ lines.push("Continue with one small step, then run gates and stop.");
+
+ return lines.join("\n");
+}
+
+export class LoopRunnerService extends EventEmitter {
+ private readonly locks = new MutexMap();
+ private readonly states = new Map();
+ private readonly controllers = new Map();
+
+ constructor(
+ private readonly config: Config,
+ private readonly workspaceService: WorkspaceService,
+ private readonly aiService: AIService,
+ private readonly workspaceHarnessService: WorkspaceHarnessService,
+ private readonly gateRunnerService: GateRunnerService,
+ private readonly gitCheckpointService: GitCheckpointService
+ ) {
+ super();
+ assert(config, "LoopRunnerService requires a Config instance");
+ assert(workspaceService, "LoopRunnerService requires a WorkspaceService instance");
+ assert(aiService, "LoopRunnerService requires an AIService instance");
+ assert(
+ workspaceHarnessService,
+ "LoopRunnerService requires a WorkspaceHarnessService instance"
+ );
+ assert(gateRunnerService, "LoopRunnerService requires a GateRunnerService instance");
+ assert(gitCheckpointService, "LoopRunnerService requires a GitCheckpointService instance");
+ }
+
+ private getStatePath(workspaceId: string): string {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmed = workspaceId.trim();
+ assert(trimmed.length > 0, "workspaceId must not be empty");
+ return path.join(this.config.sessionsDir, trimmed, LOOP_STATE_FILENAME);
+ }
+
+ private async persistState(workspaceId: string, state: HarnessLoopState): Promise {
+ const filePath = this.getStatePath(workspaceId);
+ const dir = path.dirname(filePath);
+
+ try {
+ await fsPromises.mkdir(dir, { recursive: true });
+ await fsPromises.writeFile(filePath, JSON.stringify(state, null, 2) + "\n", "utf-8");
+ } catch (error) {
+ log.debug("[HARNESS] Failed to persist loop state", { workspaceId, error });
+ }
+
+ // Best-effort: ensure harness journal exists, but never block loop control on remote IO.
+ void this.workspaceHarnessService
+ .updateProgressFile(workspaceId, state)
+ .catch((error: unknown) => {
+ log.debug("[HARNESS] Failed to ensure harness journal exists", { workspaceId, error });
+ });
+
+ this.emit("change", workspaceId);
+ }
+
+ /**
+ * Update checklist item status without clobbering concurrent harness edits.
+ *
+ * The loop runner may hold an in-memory snapshot of the harness config for the
+ * duration of an iteration. Users (or harness-init) can edit the harness file
+ * concurrently; when we update a status (todo→doing, doing→done), we must merge
+ * onto the latest on-disk config to avoid overwriting those edits.
+ */
+ private async updateChecklistItemStatus(
+ workspaceId: string,
+ itemId: string,
+ status: HarnessChecklistItem["status"]
+ ): Promise {
+ assert(typeof itemId === "string" && itemId.trim().length > 0, "itemId must be non-empty");
+
+ try {
+ const latest = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId);
+ const existing = latest.config.checklist.find((item) => item.id === itemId) ?? null;
+ if (!existing) {
+ return;
+ }
+
+ if (existing.status === status) {
+ return;
+ }
+
+ await this.workspaceHarnessService.setHarnessForWorkspace(workspaceId, {
+ ...latest.config,
+ checklist: latest.config.checklist.map((item) =>
+ item.id === itemId ? { ...item, status } : item
+ ),
+ });
+ } catch (error) {
+ log.debug("[HARNESS] Failed to update checklist item status", {
+ workspaceId,
+ itemId,
+ status,
+ error,
+ });
+ }
+ }
+
+ private async loadStateFromDisk(workspaceId: string): Promise {
+ const filePath = this.getStatePath(workspaceId);
+
+ try {
+ const raw = await fsPromises.readFile(filePath, "utf-8");
+ const parsed: unknown = JSON.parse(raw) as unknown;
+ const result = HarnessLoopStateSchema.safeParse(parsed);
+ if (!result.success) {
+ return { ...DEFAULT_STATE };
+ }
+
+ // If mux restarts mid-loop, force manual resume.
+ if (result.data.status === "running") {
+ return {
+ ...result.data,
+ status: "paused",
+ stoppedReason: result.data.stoppedReason ?? "Mux restarted; resume manually",
+ };
+ }
+
+ return result.data;
+ } catch {
+ return { ...DEFAULT_STATE };
+ }
+ }
+
+ private async getStateUnlocked(workspaceId: string): Promise {
+ const cached = this.states.get(workspaceId);
+ if (cached) {
+ return cached;
+ }
+
+ const loaded = await this.loadStateFromDisk(workspaceId);
+ this.states.set(workspaceId, loaded);
+ return loaded;
+ }
+
+ async getState(workspaceId: string): Promise {
+ return this.locks.withLock(workspaceId, () => this.getStateUnlocked(workspaceId));
+ }
+
+ async start(workspaceId: string): Promise> {
+ return this.locks.withLock(workspaceId, async () => {
+ const prev = await this.getStateUnlocked(workspaceId);
+ if (prev.status === "running") {
+ return Ok(undefined);
+ }
+
+ const next: HarnessLoopState = {
+ ...prev,
+ status: "running",
+ startedAt: prev.status === "paused" ? (prev.startedAt ?? Date.now()) : Date.now(),
+ iteration: prev.status === "paused" ? prev.iteration : 0,
+ consecutiveFailures: prev.status === "paused" ? prev.consecutiveFailures : 0,
+ stoppedReason: null,
+ lastError: null,
+ };
+
+ this.states.set(workspaceId, next);
+ await this.persistState(workspaceId, next);
+
+ this.startRunner(workspaceId);
+
+ return Ok(undefined);
+ });
+ }
+
+ async pause(workspaceId: string, reason?: string): Promise> {
+ return this.locks.withLock(workspaceId, async () => {
+ const prev = await this.getStateUnlocked(workspaceId);
+ if (prev.status !== "running") {
+ return Ok(undefined);
+ }
+
+ const next: HarnessLoopState = {
+ ...prev,
+ status: "paused",
+ stoppedReason: coerceNonEmptyString(reason) ?? prev.stoppedReason,
+ };
+
+ this.states.set(workspaceId, next);
+ await this.persistState(workspaceId, next);
+
+ // Best-effort: stop any in-flight stream.
+ void this.aiService.stopStream(workspaceId, { soft: true });
+
+ const controller = this.controllers.get(workspaceId);
+ controller?.abort();
+
+ return Ok(undefined);
+ });
+ }
+
+ async stop(workspaceId: string, reason?: string): Promise> {
+ return this.locks.withLock(workspaceId, async () => {
+ const prev = await this.getStateUnlocked(workspaceId);
+
+ const next: HarnessLoopState = {
+ ...prev,
+ status: "stopped",
+ startedAt: null,
+ currentItemId: null,
+ currentItemTitle: null,
+ consecutiveFailures: 0,
+ stoppedReason: coerceNonEmptyString(reason) ?? prev.stoppedReason,
+ };
+
+ this.states.set(workspaceId, next);
+ await this.persistState(workspaceId, next);
+
+ void this.aiService.stopStream(workspaceId, { soft: true });
+
+ const controller = this.controllers.get(workspaceId);
+ controller?.abort();
+ this.controllers.delete(workspaceId);
+
+ return Ok(undefined);
+ });
+ }
+
+ private startRunner(workspaceId: string): void {
+ const existing = this.controllers.get(workspaceId);
+ existing?.abort();
+
+ const abortController = new AbortController();
+ this.controllers.set(workspaceId, abortController);
+
+ void this.runLoop(workspaceId, abortController.signal)
+ .catch(async (error: unknown) => {
+ log.error("[HARNESS] Loop runner crashed", { workspaceId, error });
+
+ // Defensive: if the runner crashes, make sure we don't strand the state as "running".
+ const message = error instanceof Error ? error.message : String(error);
+ await this.pause(workspaceId, `Loop runner crashed: ${message}`);
+ })
+ .finally(() => {
+ const current = this.controllers.get(workspaceId);
+ if (current === abortController) {
+ this.controllers.delete(workspaceId);
+ }
+ });
+ }
+
+ private async isGitDirty(workspaceId: string): Promise {
+ try {
+ const { runtime, workspacePath } =
+ await this.workspaceHarnessService.getRuntimeAndWorkspacePath(workspaceId);
+
+ const ready = await runtime.ensureReady();
+ if (!ready.ready) {
+ return false;
+ }
+
+ const status = await execBuffered(runtime, "git status --porcelain", {
+ cwd: workspacePath,
+ timeout: 30,
+ });
+
+ return status.exitCode === 0 && status.stdout.trim().length > 0;
+ } catch {
+ return false;
+ }
+ }
+
+ private async runLoop(workspaceId: string, signal: AbortSignal): Promise {
+ while (!signal.aborted) {
+ const state = await this.getState(workspaceId);
+ if (state.status !== "running") {
+ return;
+ }
+
+ let harness: Awaited>;
+ try {
+ harness = await this.workspaceHarnessService.getHarnessForWorkspace(workspaceId);
+ } catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ await this.pause(workspaceId, `Failed to load harness config: ${message}`);
+ return;
+ }
+
+ const config = harness.config;
+ const loop = config.loop;
+
+ const maxIterations = loop?.maxIterations ?? 50;
+ const maxWallTimeMins = loop?.maxWallTimeMins ?? 8 * 60;
+ const maxConsecutiveFailures = loop?.maxConsecutiveFailures ?? 3;
+ const contextReset = loop?.contextReset ?? "replace_history";
+ const autoCommit = loop?.autoCommit ?? true;
+ const commitMessageTemplate = loop?.commitMessageTemplate ?? "mux(harness): {{item}}";
+ const toolPolicy = loop?.toolPolicy;
+
+ if (state.iteration >= maxIterations) {
+ await this.pause(workspaceId, `Max iterations reached (${maxIterations})`);
+ return;
+ }
+
+ if (state.startedAt) {
+ const elapsedMins = (Date.now() - state.startedAt) / 1000 / 60;
+ if (elapsedMins >= maxWallTimeMins) {
+ await this.pause(workspaceId, `Max wall time reached (${maxWallTimeMins} mins)`);
+ return;
+ }
+ }
+
+ const info = await this.workspaceService.getInfo(workspaceId);
+ if (!info) {
+ await this.pause(workspaceId, "Workspace not found");
+ return;
+ }
+
+ const configPathHint = `.mux/harness/${info.name}.jsonc`;
+ const progressPathHint = `.mux/harness/${info.name}.progress.md`;
+
+ const modelString =
+ info.aiSettingsByAgent?.exec?.model ?? info.aiSettings?.model ?? defaultModel;
+ const thinkingLevel =
+ info.aiSettingsByAgent?.exec?.thinkingLevel ?? info.aiSettings?.thinkingLevel;
+
+ const blocked = config.checklist.find((item) => item.status === "blocked") ?? null;
+ const nextItem = findNextChecklistItem(config);
+
+ const isFinalCleanup = nextItem === null;
+ if (isFinalCleanup && blocked) {
+ await this.pause(workspaceId, `Checklist blocked: ${blocked.title}`);
+ return;
+ }
+
+ const itemTitle = nextItem?.title ?? "Final cleanup (gates + git clean)";
+ const itemId = nextItem?.id ?? "final-cleanup";
+ const prompt = buildIterationPrompt({
+ iteration: state.iteration,
+ itemId,
+ itemTitle,
+ configPathHint,
+ progressPathHint,
+ });
+
+ const updatedStateBeforeSend: HarnessLoopState = {
+ ...state,
+ currentItemId: nextItem?.id ?? null,
+ currentItemTitle: itemTitle,
+ };
+
+ this.states.set(workspaceId, updatedStateBeforeSend);
+ await this.persistState(workspaceId, updatedStateBeforeSend);
+
+ // If this is a checklist item, mark it doing before we start.
+ if (nextItem?.status === "todo") {
+ await this.updateChecklistItemStatus(workspaceId, nextItem.id, "doing");
+ }
+
+ const sendResult = await this.workspaceService.sendMessage(workspaceId, prompt, {
+ model: modelString,
+ thinkingLevel,
+ agentId: "exec",
+ mode: "exec",
+ toolPolicy,
+ muxMetadata: { type: "harness-loop", iteration: updatedStateBeforeSend.iteration },
+ });
+
+ if (!sendResult.success) {
+ await this.pause(workspaceId, `sendMessage failed: ${sendResult.error.type}`);
+ return;
+ }
+
+ if (signal.aborted) {
+ return;
+ }
+
+ // Run gates (stop on first failure).
+ const gatesResult = await this.gateRunnerService.runGates(workspaceId, config.gates);
+ if (!gatesResult.success) {
+ await this.pause(workspaceId, `Failed to run gates: ${gatesResult.error}`);
+ return;
+ }
+
+ let nextState: HarnessLoopState = {
+ ...updatedStateBeforeSend,
+ lastGateRun: gatesResult.data,
+ lastError: gatesResult.data.ok ? null : "Gates failed",
+ };
+
+ if (gatesResult.data.ok) {
+ nextState = { ...nextState, consecutiveFailures: 0 };
+
+ if (autoCommit) {
+ const checkpointResult = await this.gitCheckpointService.checkpoint(workspaceId, {
+ messageTemplate: commitMessageTemplate,
+ itemTitle,
+ iteration: nextState.iteration,
+ });
+
+ if (!checkpointResult.success) {
+ await this.pause(workspaceId, `Checkpoint failed: ${checkpointResult.error}`);
+ return;
+ }
+
+ nextState = { ...nextState, lastCheckpoint: checkpointResult.data };
+ }
+
+ // If this was a checklist item, mark it done.
+ if (nextItem) {
+ await this.updateChecklistItemStatus(workspaceId, nextItem.id, "done");
+ }
+ } else {
+ const failures = nextState.consecutiveFailures + 1;
+ nextState = { ...nextState, consecutiveFailures: failures };
+
+ if (failures >= maxConsecutiveFailures) {
+ await this.pause(workspaceId, `Gates failed ${maxConsecutiveFailures} times in a row`);
+ return;
+ }
+ }
+
+ // Stop condition: when checklist is finished and the repo is clean.
+ if (!hasUnfinishedChecklistItems(config) && gatesResult.data.ok && !blocked) {
+ const dirty = await this.isGitDirty(workspaceId);
+ if (!dirty) {
+ await this.stop(workspaceId, "All checklist items done; gates passing; git clean");
+ return;
+ }
+ }
+
+ nextState = { ...nextState, iteration: nextState.iteration + 1 };
+ this.states.set(workspaceId, nextState);
+ await this.persistState(workspaceId, nextState);
+
+ if (contextReset === "replace_history") {
+ const runtime = createRuntime(info.runtimeConfig, { projectPath: info.projectPath });
+ const planPathHint = getPlanFilePath(info.name, info.projectName, runtime.getMuxHome());
+
+ const summary = renderLoopSummaryMarkdown({
+ workspaceId,
+ iteration: nextState.iteration,
+ currentItemTitle: nextState.currentItemTitle,
+ configPathHint,
+ progressPathHint,
+ planPathHint,
+ checklist: config.checklist,
+ lastGateRun: nextState.lastGateRun,
+ lastCommitSha: nextState.lastCheckpoint?.commitSha ?? null,
+ });
+
+ const summaryMessage = createMuxMessage(
+ `harness-loop-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`,
+ "assistant",
+ summary,
+ {
+ timestamp: Date.now(),
+ compacted: "user",
+ mode: "exec",
+ muxMetadata: { type: "harness-loop-bearings" },
+ }
+ );
+
+ const replaceResult = await this.workspaceService.replaceHistory(
+ workspaceId,
+ summaryMessage
+ );
+ if (!replaceResult.success) {
+ log.debug("[HARNESS] Failed to reset context", {
+ workspaceId,
+ error: replaceResult.error,
+ });
+ }
+ }
+
+ // Give the event loop a breath so stop/pause can land quickly.
+ await new Promise((resolve) => setTimeout(resolve, 25));
+ }
+ }
+}
diff --git a/src/node/services/ptc/toolBridge.test.ts b/src/node/services/ptc/toolBridge.test.ts
index a2a2baab12..9bb02a0595 100644
--- a/src/node/services/ptc/toolBridge.test.ts
+++ b/src/node/services/ptc/toolBridge.test.ts
@@ -57,6 +57,7 @@ describe("ToolBridge", () => {
code_execution: createMockTool("code_execution", z.object({}), () => ({})),
ask_user_question: createMockTool("ask_user_question", z.object({}), () => ({})),
propose_plan: createMockTool("propose_plan", z.object({}), () => ({})),
+ propose_harness: createMockTool("propose_harness", z.object({}), () => ({})),
todo_write: createMockTool("todo_write", z.object({}), () => ({})),
todo_read: createMockTool("todo_read", z.object({}), () => ({})),
status_set: createMockTool("status_set", z.object({}), () => ({})),
@@ -68,6 +69,7 @@ describe("ToolBridge", () => {
expect(names).toEqual(["file_read"]);
expect(names).not.toContain("code_execution");
expect(names).not.toContain("ask_user_question");
+ expect(names).not.toContain("propose_harness");
expect(names).not.toContain("propose_plan");
expect(names).not.toContain("todo_write");
expect(names).not.toContain("todo_read");
diff --git a/src/node/services/ptc/toolBridge.ts b/src/node/services/ptc/toolBridge.ts
index 7acd376aea..f4e0c2dee2 100644
--- a/src/node/services/ptc/toolBridge.ts
+++ b/src/node/services/ptc/toolBridge.ts
@@ -14,6 +14,7 @@ const EXCLUDED_TOOLS = new Set([
"code_execution", // Prevent recursive sandbox creation
"ask_user_question", // Requires UI interaction
"propose_plan", // Mode-specific, call directly
+ "propose_harness", // UI-specific, call directly
"todo_write", // UI-specific
"todo_read", // UI-specific
"status_set", // UI-specific
diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts
index 0f3ca6c079..807d012ea9 100644
--- a/src/node/services/serviceContainer.ts
+++ b/src/node/services/serviceContainer.ts
@@ -46,6 +46,10 @@ import { SessionTimingService } from "@/node/services/sessionTimingService";
import { ExperimentsService } from "@/node/services/experimentsService";
import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager";
import { MCPConfigService } from "@/node/services/mcpConfigService";
+import { WorkspaceHarnessService } from "@/node/services/workspaceHarnessService";
+import { GateRunnerService } from "@/node/services/gateRunnerService";
+import { GitCheckpointService } from "@/node/services/gitCheckpointService";
+import { LoopRunnerService } from "@/node/services/loopRunnerService";
import { WorkspaceMcpOverridesService } from "@/node/services/workspaceMcpOverridesService";
import { MCPServerManager } from "@/node/services/mcpServerManager";
import { SessionUsageService } from "@/node/services/sessionUsageService";
@@ -83,6 +87,10 @@ export class ServiceContainer {
private readonly partialService: PartialService;
public readonly aiService: AIService;
public readonly projectService: ProjectService;
+ public readonly workspaceHarnessService: WorkspaceHarnessService;
+ public readonly gateRunnerService: GateRunnerService;
+ public readonly gitCheckpointService: GitCheckpointService;
+ public readonly loopRunnerService: LoopRunnerService;
public readonly workspaceService: WorkspaceService;
public readonly taskService: TaskService;
public readonly providerService: ProviderService;
@@ -116,6 +124,9 @@ export class ServiceContainer {
this.historyService = new HistoryService(config);
this.partialService = new PartialService(config, this.historyService);
this.projectService = new ProjectService(config);
+ this.workspaceHarnessService = new WorkspaceHarnessService(config);
+ this.gateRunnerService = new GateRunnerService(config, this.workspaceHarnessService);
+ this.gitCheckpointService = new GitCheckpointService(config, this.workspaceHarnessService);
this.initStateManager = new InitStateManager(config);
this.workspaceMcpOverridesService = new WorkspaceMcpOverridesService(config);
this.mcpConfigService = new MCPConfigService();
@@ -156,6 +167,14 @@ export class ServiceContainer {
this.workspaceService,
this.initStateManager
);
+ this.loopRunnerService = new LoopRunnerService(
+ config,
+ this.workspaceService,
+ this.aiService,
+ this.workspaceHarnessService,
+ this.gateRunnerService,
+ this.gitCheckpointService
+ );
this.aiService.setTaskService(this.taskService);
// Idle compaction service - auto-compacts workspaces after configured idle period
this.idleCompactionService = new IdleCompactionService(
diff --git a/src/node/services/tools/fileCommon.test.ts b/src/node/services/tools/fileCommon.test.ts
index 2acb50334e..678bf0e9a0 100644
--- a/src/node/services/tools/fileCommon.test.ts
+++ b/src/node/services/tools/fileCommon.test.ts
@@ -149,6 +149,70 @@ describe("fileCommon", () => {
});
});
+ describe("validatePlanModeAccess", () => {
+ const cwd = "/workspace/project";
+ const runtime = createRuntime({ type: "local", srcBaseDir: cwd });
+
+ function buildConfig(overrides: Partial): ToolConfiguration {
+ return {
+ cwd,
+ runtime,
+ runtimeTempDir: "/tmp",
+ ...overrides,
+ };
+ }
+
+ it("allows edits to allowlisted files", async () => {
+ const config = buildConfig({
+ mode: "exec",
+ allowedEditPaths: [".mux/harness/*.jsonc", ".mux/harness/**/*.jsonc"],
+ });
+
+ expect(await validatePlanModeAccess(".mux/harness/main.jsonc", config)).toBeNull();
+ expect(
+ await validatePlanModeAccess("/workspace/project/.mux/harness/main.jsonc", config)
+ ).toBeNull();
+ expect(await validatePlanModeAccess(".mux/harness/feature/foo.jsonc", config)).toBeNull();
+ });
+
+ it("rejects edits to non-allowlisted files", async () => {
+ const config = buildConfig({
+ mode: "exec",
+ allowedEditPaths: [".mux/harness/*.jsonc"],
+ });
+
+ const result = await validatePlanModeAccess("src/main.ts", config);
+ expect(result).not.toBeNull();
+ expect(result?.success).toBe(false);
+ expect(result?.error).toContain("File edits are restricted to");
+ expect(result?.error).toContain(".mux/harness/*.jsonc");
+ });
+
+ it("rejects edits to allowlisted directory with wrong extension", async () => {
+ const config = buildConfig({
+ mode: "exec",
+ allowedEditPaths: [".mux/harness/*.jsonc"],
+ });
+
+ const result = await validatePlanModeAccess(".mux/harness/main.progress.md", config);
+ expect(result).not.toBeNull();
+ expect(result?.error).toContain("File edits are restricted to");
+ });
+
+ it("keeps plan file read-only outside plan mode even if allowlisted", async () => {
+ const planFilePath = "/workspace/project/plan.md";
+ const config = buildConfig({
+ mode: "exec",
+ planFilePath,
+ allowedEditPaths: ["/workspace/project/plan.md"],
+ });
+
+ const result = await validatePlanModeAccess(planFilePath, config);
+ expect(result).not.toBeNull();
+ expect(result?.error).toContain("Plan file is read-only outside plan mode");
+ });
+ });
+
describe("validateNoRedundantPrefix", () => {
const cwd = "/workspace/project";
const runtime = createRuntime({ type: "local", srcBaseDir: cwd });
@@ -270,7 +334,7 @@ describe("fileCommon", () => {
cwd: "/home/user/project",
runtime: mockRuntime,
runtimeTempDir: "/tmp",
- planFileOnly: true,
+ mode: "plan",
planFilePath,
};
diff --git a/src/node/services/tools/fileCommon.ts b/src/node/services/tools/fileCommon.ts
index 08e3aa7902..85818495bc 100644
--- a/src/node/services/tools/fileCommon.ts
+++ b/src/node/services/tools/fileCommon.ts
@@ -11,6 +11,40 @@ import type { ToolConfiguration } from "@/common/utils/tools/tools";
*/
export const MAX_FILE_SIZE = 1024 * 1024; // 1MB
+function normalizeForGlobMatch(value: string): string {
+ return value.replace(/\\/g, "/");
+}
+
+function globToRegExp(pattern: string): RegExp {
+ let regex = "^";
+ const normalized = normalizeForGlobMatch(pattern);
+
+ for (let i = 0; i < normalized.length; i += 1) {
+ const char = normalized[i];
+ if (!char) continue;
+
+ if (char === "*") {
+ const next = normalized[i + 1];
+ if (next === "*") {
+ regex += ".*";
+ i += 1;
+ } else {
+ regex += "[^/]*";
+ }
+ continue;
+ }
+
+ // Escape regex metacharacters.
+ if (/[\\^$.*+?()|[\]{}]/.test(char)) {
+ regex += `\\${char}`;
+ } else {
+ regex += char;
+ }
+ }
+
+ regex += "$";
+ return new RegExp(regex);
+}
export interface PlanModeValidationError {
success: false;
error: string;
@@ -22,6 +56,7 @@ export interface PlanModeValidationError {
* - Editing plan file outside plan mode (read-only)
* - Editing non-plan file in plan mode
* - Path is outside cwd (for non-plan files)
+ * - Path is not allowlisted (when allowedEditPaths is configured)
*
* Returns null if validation passes.
*/
@@ -29,28 +64,30 @@ export async function validatePlanModeAccess(
filePath: string,
config: ToolConfiguration
): Promise {
- // Plan file is always read-only outside the plan agent.
+ const isPlanFile = await isPlanFilePath(filePath, config);
+
+ // Plan file is always read-only outside plan mode.
// This is especially important for SSH runtimes, where cwd validation is intentionally skipped.
- if ((await isPlanFilePath(filePath, config)) && !config.planFileOnly) {
+ if (isPlanFile && config.mode !== "plan") {
return {
success: false,
- error: `Plan file is read-only outside the plan agent: ${filePath}`,
+ error: `Plan file is read-only outside plan mode: ${filePath}`,
};
}
- // Plan-agent restriction: only allow editing the plan file (and require exact string match).
- if (config.planFileOnly && config.planFilePath) {
+ // Plan-mode restriction: only allow editing the plan file (and require exact string match).
+ if (config.mode === "plan" && config.planFilePath) {
if (filePath !== config.planFilePath) {
- if (await isPlanFilePath(filePath, config)) {
+ if (isPlanFile) {
return {
success: false,
- error: `In the plan agent, you must use the exact plan file path from the instructions: ${config.planFilePath} (attempted: ${filePath}; this resolves to the plan file but absolute/alternate paths are not allowed)`,
+ error: `In plan mode, you must use the exact plan file path from the instructions: ${config.planFilePath} (attempted: ${filePath}; this resolves to the plan file but absolute/alternate paths are not allowed)`,
};
}
return {
success: false,
- error: `In the plan agent, only the plan file can be edited. You must use the exact plan file path: ${config.planFilePath} (attempted: ${filePath})`,
+ error: `In plan mode, only the plan file can be edited. You must use the exact plan file path: ${config.planFilePath} (attempted: ${filePath})`,
};
}
// Skip cwd validation for plan file - it may be outside workspace
@@ -65,6 +102,30 @@ export async function validatePlanModeAccess(
}
}
+ // Optional allowlist restriction (e.g., harness-init can only edit its harness config).
+ if (!isPlanFile && config.allowedEditPaths && config.allowedEditPaths.length > 0) {
+ const allowed = config.allowedEditPaths
+ .map((pattern) => pattern.trim())
+ .filter((p) => p.length > 0);
+ if (allowed.length > 0) {
+ const resolvedPath = normalizeForGlobMatch(
+ config.runtime.normalizePath(filePath, config.cwd)
+ );
+ const isAllowed = allowed.some((pattern) => {
+ const resolvedPattern = normalizeForGlobMatch(
+ config.runtime.normalizePath(pattern, config.cwd)
+ );
+ return globToRegExp(resolvedPattern).test(resolvedPath);
+ });
+ if (!isAllowed) {
+ return {
+ success: false,
+ error: `File edits are restricted to: ${allowed.join(", ")} (attempted: ${filePath})`,
+ };
+ }
+ }
+ }
+
return null;
}
diff --git a/src/node/services/tools/file_edit_insert.test.ts b/src/node/services/tools/file_edit_insert.test.ts
index 563acf03bd..dabb4a69e0 100644
--- a/src/node/services/tools/file_edit_insert.test.ts
+++ b/src/node/services/tools/file_edit_insert.test.ts
@@ -181,7 +181,7 @@ describe("file_edit_insert plan mode enforcement", () => {
cwd: workspaceCwd,
runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }),
runtimeTempDir: testDir,
- planFileOnly: true,
+ mode: "plan",
planFilePath: planFilePath,
});
@@ -194,7 +194,7 @@ describe("file_edit_insert plan mode enforcement", () => {
expect(result.success).toBe(false);
if (!result.success) {
- expect(result.error).toContain("In the plan agent, only the plan file can be edited");
+ expect(result.error).toContain("In plan mode, only the plan file can be edited");
}
});
@@ -210,7 +210,7 @@ describe("file_edit_insert plan mode enforcement", () => {
cwd: workspaceCwd,
runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }),
runtimeTempDir: testDir,
- planFileOnly: true,
+ mode: "plan",
planFilePath: planFilePath,
});
@@ -265,7 +265,7 @@ describe("file_edit_insert plan mode enforcement", () => {
cwd: workspaceCwd,
runtime: createRuntime({ type: "local", srcBaseDir: workspaceCwd }),
runtimeTempDir: testDir,
- planFileOnly: true,
+ mode: "plan",
planFilePath: realPlanPath, // The REAL plan file path
});
@@ -279,7 +279,7 @@ describe("file_edit_insert plan mode enforcement", () => {
expect(result.success).toBe(false);
if (!result.success) {
- expect(result.error).toContain("In the plan agent, only the plan file can be edited");
+ expect(result.error).toContain("In plan mode, only the plan file can be edited");
expect(result.error).toContain("exact plan file path");
expect(result.error).toContain(realPlanPath);
expect(result.error).toContain(".mux/plan.md");
diff --git a/src/node/services/tools/file_edit_operation.test.ts b/src/node/services/tools/file_edit_operation.test.ts
index 71e29d553b..c9b51912b1 100644
--- a/src/node/services/tools/file_edit_operation.test.ts
+++ b/src/node/services/tools/file_edit_operation.test.ts
@@ -129,7 +129,7 @@ describe("executeFileEditOperation plan mode enforcement", () => {
cwd: TEST_CWD,
runtime: mockRuntime,
runtimeTempDir: "/tmp",
- planFileOnly: true,
+ mode: "plan",
planFilePath: PLAN_FILE_PATH,
},
filePath: OTHER_FILE_PATH,
@@ -138,7 +138,7 @@ describe("executeFileEditOperation plan mode enforcement", () => {
expect(result.success).toBe(false);
if (!result.success) {
- expect(result.error).toContain("In the plan agent, only the plan file can be edited");
+ expect(result.error).toContain("In plan mode, only the plan file can be edited");
expect(result.error).toContain(OTHER_FILE_PATH);
}
@@ -162,7 +162,7 @@ describe("executeFileEditOperation plan mode enforcement", () => {
cwd: workspaceCwd,
runtime: new LocalRuntime(workspaceCwd),
runtimeTempDir: tempDir.path,
- planFileOnly: true,
+ mode: "plan",
planFilePath: planPath,
},
filePath: planPath,
@@ -237,7 +237,7 @@ describe("executeFileEditOperation plan mode enforcement", () => {
expect(result.success).toBe(false);
if (!result.success) {
- expect(result.error).toContain("read-only outside the plan agent");
+ expect(result.error).toContain("read-only outside plan mode");
}
// Verify file was not modified
@@ -285,7 +285,7 @@ describe("executeFileEditOperation plan mode enforcement", () => {
cwd: "/home/user/project",
runtime: mockRuntime,
runtimeTempDir: "/tmp",
- planFileOnly: true,
+ mode: "plan",
planFilePath: "/home/user/.mux/sessions/ws/plan.md",
},
filePath: "../.mux/sessions/ws/plan.md", // Alternate path to plan file
diff --git a/src/node/services/tools/file_read.test.ts b/src/node/services/tools/file_read.test.ts
index 5478dd401d..e9b787577e 100644
--- a/src/node/services/tools/file_read.test.ts
+++ b/src/node/services/tools/file_read.test.ts
@@ -420,7 +420,7 @@ describe("file_read tool", () => {
cwd: testDir,
runtime: new LocalRuntime(testDir),
runtimeTempDir: testDir,
- planFileOnly: true,
+ mode: "plan",
planFilePath: planPath,
});
diff --git a/src/node/services/tools/file_read.ts b/src/node/services/tools/file_read.ts
index e32261babc..4758d4d82a 100644
--- a/src/node/services/tools/file_read.ts
+++ b/src/node/services/tools/file_read.ts
@@ -49,8 +49,8 @@ export const createFileReadTool: ToolFactory = (config: ToolConfiguration) => {
if (pathValidation) {
// In plan mode, hint about the plan file path to help model recover
const hint =
- config.planFileOnly && config.planFilePath
- ? ` In the plan agent, use the exact plan file path string as provided: ${config.planFilePath}`
+ config.mode === "plan" && config.planFilePath
+ ? ` In plan mode, use the exact plan file path string as provided: ${config.planFilePath}`
: "";
return {
success: false,
diff --git a/src/node/services/tools/propose_harness.ts b/src/node/services/tools/propose_harness.ts
new file mode 100644
index 0000000000..c379b02fd9
--- /dev/null
+++ b/src/node/services/tools/propose_harness.ts
@@ -0,0 +1,156 @@
+import { tool } from "ai";
+import { z } from "zod";
+import * as jsonc from "jsonc-parser";
+
+import { WorkspaceHarnessConfigSchema } from "@/common/orpc/schemas";
+import type { ToolFactory } from "@/common/utils/tools/tools";
+import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions";
+import { RuntimeError } from "@/node/runtime/Runtime";
+import { execBuffered, readFileString } from "@/node/utils/runtime/helpers";
+
+const proposeHarnessSchema = z.object({});
+
+const HARNESS_DIR = ".mux/harness";
+
+function normalizeWorkspaceName(value: unknown): string {
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : "";
+}
+
+function isAllowedHarnessGitPath(pathFromGit: string): boolean {
+ return pathFromGit.startsWith(`${HARNESS_DIR}/`) && pathFromGit.endsWith(".jsonc");
+}
+
+function extractGitStatusPath(line: string): string | null {
+ // Example porcelain lines:
+ // " M src/foo.ts"
+ // "?? .mux/harness/main.jsonc"
+ // "R old -> new"
+ if (typeof line !== "string" || line.length < 4) {
+ return null;
+ }
+
+ const pathPart = line.slice(3).trim();
+ if (!pathPart) {
+ return null;
+ }
+
+ const arrowIndex = pathPart.indexOf(" -> ");
+ if (arrowIndex >= 0) {
+ return pathPart.slice(arrowIndex + 4).trim();
+ }
+
+ return pathPart;
+}
+
+export const createProposeHarnessTool: ToolFactory = (config) => {
+ return tool({
+ description: TOOL_DEFINITIONS.propose_harness.description,
+ inputSchema: proposeHarnessSchema,
+ execute: async () => {
+ const workspaceName = normalizeWorkspaceName(config.muxEnv?.MUX_WORKSPACE_NAME);
+ if (!workspaceName) {
+ return {
+ success: false as const,
+ error: "No workspace name available (missing MUX_WORKSPACE_NAME).",
+ };
+ }
+
+ const prefix = workspaceName;
+ const harnessPath = config.runtime.normalizePath(
+ `${HARNESS_DIR}/${prefix}.jsonc`,
+ config.cwd
+ );
+
+ let harnessContent: string;
+ try {
+ harnessContent = await readFileString(config.runtime, harnessPath);
+ } catch (err) {
+ if (err instanceof RuntimeError) {
+ return {
+ success: false as const,
+ error: `No harness file found at ${harnessPath}. Please write your harness to this file before calling propose_harness.`,
+ };
+ }
+ throw err;
+ }
+
+ if (harnessContent === "") {
+ return {
+ success: false as const,
+ error: `Harness file at ${harnessPath} is empty. Please write your harness content before calling propose_harness.`,
+ };
+ }
+
+ const parseErrors: jsonc.ParseError[] = [];
+ const parsed = jsonc.parse(harnessContent, parseErrors) as unknown;
+ if (parseErrors.length > 0) {
+ return {
+ success: false as const,
+ error: `Harness file at ${harnessPath} is not valid JSONC.`,
+ };
+ }
+
+ const validated = WorkspaceHarnessConfigSchema.safeParse(parsed);
+ if (!validated.success) {
+ return {
+ success: false as const,
+ error: `Harness file at ${harnessPath} does not match the expected schema: ${validated.error.message}`,
+ };
+ }
+
+ // Defensive: ensure harness-init didn't accidentally mutate other repo files (e.g. via bash).
+ try {
+ const isGitRepo = await execBuffered(
+ config.runtime,
+ "git rev-parse --is-inside-work-tree",
+ {
+ cwd: config.cwd,
+ timeout: 10,
+ }
+ );
+ if (isGitRepo.exitCode === 0 && isGitRepo.stdout.trim() === "true") {
+ const status = await execBuffered(config.runtime, "git status --porcelain", {
+ cwd: config.cwd,
+ timeout: 10,
+ });
+ if (status.exitCode === 0) {
+ const dirtyPaths = status.stdout
+ .split(/\r?\n/)
+ .map((line) => extractGitStatusPath(line))
+ .filter((p): p is string => Boolean(p));
+ const nonHarness = dirtyPaths.filter((p) => !isAllowedHarnessGitPath(p));
+ if (nonHarness.length > 0) {
+ return {
+ success: false as const,
+ error:
+ `Working tree has changes outside ${HARNESS_DIR}/*.jsonc: ` +
+ nonHarness.slice(0, 10).join(", "),
+ };
+ }
+ }
+ }
+ } catch {
+ // Best-effort only.
+ }
+
+ // Record file state for external edit detection
+ if (config.recordFileState) {
+ try {
+ const fileStat = await config.runtime.stat(harnessPath);
+ config.recordFileState(harnessPath, {
+ content: harnessContent,
+ timestamp: fileStat.modifiedTime.getTime(),
+ });
+ } catch {
+ // File stat failed, skip recording
+ }
+ }
+
+ return {
+ success: true as const,
+ harnessPath,
+ message: "Harness proposed. Waiting for user approval.",
+ };
+ },
+ });
+};
diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts
index 2e51f30e51..15193380c7 100644
--- a/src/node/services/tools/task.test.ts
+++ b/src/node/services/tools/task.test.ts
@@ -116,7 +116,7 @@ describe("task tool", () => {
}
});
- it('should reject spawning "exec" tasks while in plan agent', async () => {
+ it('should reject spawning "exec" tasks while in plan mode', async () => {
using tempDir = new TestTempDir("test-task-tool");
const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" });
@@ -133,7 +133,7 @@ describe("task tool", () => {
const tool = createTaskTool({
...baseConfig,
- planFileOnly: true,
+ mode: "plan",
taskService,
});
@@ -151,7 +151,7 @@ describe("task tool", () => {
expect(caught).toBeInstanceOf(Error);
if (caught instanceof Error) {
- expect(caught.message).toMatch(/plan agent/i);
+ expect(caught.message).toMatch(/plan mode/i);
}
expect(create).not.toHaveBeenCalled();
expect(waitForAgentReport).not.toHaveBeenCalled();
diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts
index a161f68865..b894908a1b 100644
--- a/src/node/services/tools/task.ts
+++ b/src/node/services/tools/task.ts
@@ -68,9 +68,18 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => {
throw new Error("Sub-agent workspaces may not spawn additional sub-agent tasks.");
}
- // Plan agent is explicitly non-executing. Allow only read-only exploration tasks.
- if (config.planFileOnly && requestedAgentId !== "explore") {
- throw new Error('In the plan agent you may only spawn agentId: "explore" tasks.');
+ // Defense-in-depth: some agents are never valid as sub-agents.
+ if (requestedAgentId === "harness-init") {
+ throw new Error('agentId "harness-init" may not be spawned as a sub-agent task.');
+ }
+
+ // Harness init is explicitly non-executing. Allow only read-only exploration tasks.
+ if (config.agentId === "harness-init" && requestedAgentId !== "explore") {
+ throw new Error('In Harness Init you may only spawn agentId: "explore" tasks.');
+ }
+ // Plan mode is explicitly non-executing. Allow only read-only exploration tasks.
+ if (config.mode === "plan" && requestedAgentId !== "explore") {
+ throw new Error('In Plan Mode you may only spawn agentId: "explore" tasks.');
}
const modelString =
diff --git a/src/node/services/workspaceHarnessFromPlan.test.ts b/src/node/services/workspaceHarnessFromPlan.test.ts
new file mode 100644
index 0000000000..9e1a7fca8a
--- /dev/null
+++ b/src/node/services/workspaceHarnessFromPlan.test.ts
@@ -0,0 +1,83 @@
+import { describe, expect, it } from "bun:test";
+
+import {
+ createWorkspaceHarnessConfigFromPlanDraft,
+ extractJsonObjectFromMarkdown,
+} from "./workspaceHarnessFromPlan";
+
+describe("workspaceHarnessFromPlan", () => {
+ it("derives a non-empty checklist with stable IDs", () => {
+ const result = createWorkspaceHarnessConfigFromPlanDraft({
+ checklist: [{ title: "Add schema" }, { title: "Update router" }],
+ gates: [{ command: "make static-check" }],
+ });
+
+ expect(result.usedFallback).toBe(false);
+ expect(result.config.checklist.map((i) => i.id)).toEqual(["item-1", "item-2"]);
+ expect(result.config.checklist.map((i) => i.status)).toEqual(["todo", "todo"]);
+ expect(result.config.loop?.autoCommit).toBe(true);
+ });
+
+ it("falls back to a single checklist item when the draft is empty", () => {
+ const result = createWorkspaceHarnessConfigFromPlanDraft({});
+
+ expect(result.usedFallback).toBe(true);
+ expect(result.config.checklist).toEqual([
+ { id: "item-1", title: "Implement the plan", status: "todo" },
+ ]);
+ expect(result.config.loop?.autoCommit).toBe(false);
+ });
+
+ it("drops unsafe gates and disables auto-commit", () => {
+ const result = createWorkspaceHarnessConfigFromPlanDraft({
+ checklist: [{ title: "Ship it" }],
+ gates: [{ command: "rm -rf /" }, { command: "make typecheck" }],
+ });
+
+ expect(result.usedFallback).toBe(false);
+ expect(result.droppedUnsafeGates).toBe(true);
+ expect(result.config.gates.map((g) => g.command)).toEqual(["make typecheck"]);
+ expect(result.config.loop?.autoCommit).toBe(false);
+ });
+
+ it("dedupes checklist titles and drops trivial placeholders", () => {
+ const result = createWorkspaceHarnessConfigFromPlanDraft({
+ checklist: [
+ { title: "TODO" },
+ { title: "Add schema" },
+ { title: "Add schema " },
+ { title: "Update router" },
+ { title: "TBD" },
+ ],
+ });
+
+ expect(result.usedFallback).toBe(false);
+ expect(result.config.checklist.map((i) => i.title)).toEqual(["Add schema", "Update router"]);
+ });
+
+ describe("extractJsonObjectFromMarkdown", () => {
+ it("parses a ```json fenced block", () => {
+ const res = extractJsonObjectFromMarkdown('```json\n{"checklist": []}\n```');
+
+ expect(res.success).toBe(true);
+ if (res.success) {
+ expect(res.data).toEqual({ checklist: [] });
+ }
+ });
+
+ it("parses raw JSON", () => {
+ const res = extractJsonObjectFromMarkdown('{"checklist": []}');
+
+ expect(res.success).toBe(true);
+ if (res.success) {
+ expect(res.data).toEqual({ checklist: [] });
+ }
+ });
+
+ it("fails on non-JSON", () => {
+ const res = extractJsonObjectFromMarkdown("not json");
+
+ expect(res.success).toBe(false);
+ });
+ });
+});
diff --git a/src/node/services/workspaceHarnessFromPlan.ts b/src/node/services/workspaceHarnessFromPlan.ts
new file mode 100644
index 0000000000..3e4883b7b9
--- /dev/null
+++ b/src/node/services/workspaceHarnessFromPlan.ts
@@ -0,0 +1,228 @@
+import assert from "@/common/utils/assert";
+import type {
+ HarnessChecklistItem,
+ HarnessGate,
+ WorkspaceHarnessConfig,
+} from "@/common/types/harness";
+import { z } from "zod";
+
+export const HarnessFromPlanDraftSchema = z
+ .object({
+ checklist: z
+ .array(
+ z
+ .object({
+ title: z.string().min(1),
+ notes: z.string().optional(),
+ })
+ .strict()
+ )
+ .optional(),
+ gates: z
+ .array(
+ z
+ .object({
+ command: z.string().min(1),
+ title: z.string().optional(),
+ timeoutSecs: z.number().int().positive().optional(),
+ })
+ .strict()
+ )
+ .optional(),
+ loop: z
+ .object({
+ autoCommit: z.boolean().optional(),
+ })
+ .strict()
+ .optional(),
+ })
+ .strict();
+
+export type HarnessFromPlanDraft = z.infer;
+
+function fallbackHarnessConfig(): WorkspaceHarnessConfig {
+ return {
+ version: 1,
+ checklist: [{ id: "item-1", title: "Implement the plan", status: "todo" }],
+ gates: [],
+ loop: { autoCommit: false },
+ };
+}
+
+const MAX_CHECKLIST_TITLE_LENGTH = 200;
+
+function isTriviallyBadChecklistTitle(title: string): boolean {
+ assert(typeof title === "string", "title must be a string");
+
+ const trimmed = title.trim();
+ if (trimmed.length === 0) return true;
+
+ const normalized = trimmed.toLowerCase();
+ if (
+ normalized === "todo" ||
+ normalized === "tbd" ||
+ normalized === "todo." ||
+ normalized === "tbd."
+ ) {
+ return true;
+ }
+
+ return trimmed.length > MAX_CHECKLIST_TITLE_LENGTH;
+}
+
+export function isSafeHarnessGateCommand(command: string): boolean {
+ assert(typeof command === "string", "command must be a string");
+
+ const trimmed = command.trim();
+ if (trimmed.length === 0) {
+ return false;
+ }
+
+ // Keep gate commands single-line and boring. These are executed with a shell, and this is
+ // AI-generated by default, so we heavily restrict what can be persisted.
+ if (/\r|\n/.test(trimmed)) {
+ return false;
+ }
+
+ if (trimmed.length > 200) {
+ return false;
+ }
+
+ // Disallow common shell metacharacters that enable chaining / redirection.
+ if (/[;&|><`"'$]/.test(trimmed)) {
+ return false;
+ }
+
+ // Allowlist simple check runners.
+ if (trimmed === "make") {
+ return false;
+ }
+
+ return trimmed.startsWith("make ") || trimmed.startsWith("bun ");
+}
+
+export function createWorkspaceHarnessConfigFromPlanDraft(draft: unknown): {
+ config: WorkspaceHarnessConfig;
+ usedFallback: boolean;
+ droppedUnsafeGates: boolean;
+} {
+ const parsed = HarnessFromPlanDraftSchema.safeParse(draft);
+ if (!parsed.success) {
+ return { config: fallbackHarnessConfig(), usedFallback: true, droppedUnsafeGates: false };
+ }
+
+ const rawChecklist = parsed.data.checklist ?? [];
+
+ const checklist: HarnessChecklistItem[] = [];
+ const seenTitles = new Set();
+
+ for (const item of rawChecklist) {
+ const title = item.title.trim();
+ if (title.length === 0) continue;
+ if (isTriviallyBadChecklistTitle(title)) continue;
+
+ const normalizedTitle = title.toLowerCase();
+ if (seenTitles.has(normalizedTitle)) continue;
+ seenTitles.add(normalizedTitle);
+
+ const notes = typeof item.notes === "string" ? item.notes.trim() : undefined;
+
+ checklist.push({
+ id: `item-${checklist.length + 1}`,
+ title,
+ status: "todo" as const,
+ notes: notes && notes.length > 0 ? notes : undefined,
+ });
+
+ if (checklist.length >= 20) break;
+ }
+
+ if (checklist.length === 0) {
+ return { config: fallbackHarnessConfig(), usedFallback: true, droppedUnsafeGates: false };
+ }
+
+ const rawGates = parsed.data.gates ?? [];
+
+ let droppedUnsafeGates = false;
+ const gates: HarnessGate[] = [];
+
+ for (const [index, gate] of rawGates.entries()) {
+ const command = gate.command.trim();
+ if (!isSafeHarnessGateCommand(command)) {
+ droppedUnsafeGates = true;
+ continue;
+ }
+
+ const title =
+ typeof gate.title === "string" && gate.title.trim().length > 0
+ ? gate.title.trim()
+ : undefined;
+
+ gates.push({
+ id: `gate-${index + 1}`,
+ title,
+ command,
+ timeoutSecs: gate.timeoutSecs,
+ });
+ }
+
+ const suggestedAutoCommit = parsed.data.loop?.autoCommit;
+
+ // Default: only auto-commit when we have at least one safe gate.
+ // If the model tried to provide unsafe gates, disable auto-commit entirely.
+ const autoCommit = droppedUnsafeGates
+ ? false
+ : typeof suggestedAutoCommit === "boolean"
+ ? suggestedAutoCommit
+ : gates.length > 0;
+
+ const config: WorkspaceHarnessConfig = {
+ version: 1,
+ checklist,
+ gates,
+ loop: { autoCommit },
+ };
+
+ return { config, usedFallback: false, droppedUnsafeGates };
+}
+
+export function extractJsonObjectFromMarkdown(
+ markdown: string
+): { success: true; data: unknown } | { success: false; error: string } {
+ assert(typeof markdown === "string", "markdown must be a string");
+
+ const trimmed = markdown.trim();
+ if (trimmed.length === 0) {
+ return { success: false, error: "Empty agent_report" };
+ }
+
+ const fencedMatch = /```json\s*([\s\S]*?)```/i.exec(trimmed);
+ const candidate = (fencedMatch ? fencedMatch[1] : trimmed).trim();
+
+ const tryParse = (text: string): { ok: true; value: unknown } | { ok: false; error: string } => {
+ try {
+ return { ok: true, value: JSON.parse(text) };
+ } catch (error) {
+ return { ok: false, error: error instanceof Error ? error.message : String(error) };
+ }
+ };
+
+ let parsed = tryParse(candidate);
+ if (!parsed.ok && !fencedMatch) {
+ const start = trimmed.indexOf("{");
+ const end = trimmed.lastIndexOf("}");
+ if (start !== -1 && end !== -1 && end > start) {
+ parsed = tryParse(trimmed.slice(start, end + 1));
+ }
+ }
+
+ if (!parsed.ok) {
+ return { success: false, error: `Failed to parse JSON: ${parsed.error}` };
+ }
+
+ if (typeof parsed.value !== "object" || parsed.value === null || Array.isArray(parsed.value)) {
+ return { success: false, error: "Expected a JSON object" };
+ }
+
+ return { success: true, data: parsed.value };
+}
diff --git a/src/node/services/workspaceHarnessService.test.ts b/src/node/services/workspaceHarnessService.test.ts
new file mode 100644
index 0000000000..da7834703e
--- /dev/null
+++ b/src/node/services/workspaceHarnessService.test.ts
@@ -0,0 +1,140 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+import * as fs from "fs/promises";
+import * as os from "os";
+import * as path from "path";
+
+import { Config } from "@/node/config";
+
+import { WorkspaceHarnessService } from "./workspaceHarnessService";
+
+function getWorkspacePath(args: {
+ srcDir: string;
+ projectName: string;
+ workspaceName: string;
+}): string {
+ return path.join(args.srcDir, args.projectName, args.workspaceName);
+}
+
+async function pathExists(filePath: string): Promise {
+ try {
+ await fs.stat(filePath);
+ return true;
+ } catch {
+ return false;
+ }
+}
+
+describe("WorkspaceHarnessService (journal)", () => {
+ let tempDir: string;
+ let config: Config;
+
+ beforeEach(async () => {
+ tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "mux-harness-journal-test-"));
+ config = new Config(tempDir);
+ });
+
+ afterEach(async () => {
+ await fs.rm(tempDir, { recursive: true, force: true });
+ });
+
+ async function setupWorkspace(workspaceName = "branch"): Promise<{
+ workspaceId: string;
+ workspaceName: string;
+ workspacePath: string;
+ }> {
+ const projectPath = "/fake/project";
+ const workspaceId = "ws-id";
+
+ const workspacePath = getWorkspacePath({
+ srcDir: config.srcDir,
+ projectName: "project",
+ workspaceName,
+ });
+ await fs.mkdir(workspacePath, { recursive: true });
+
+ await config.editConfig((cfg) => {
+ cfg.projects.set(projectPath, {
+ workspaces: [
+ {
+ path: workspacePath,
+ id: workspaceId,
+ name: workspaceName,
+ runtimeConfig: { type: "worktree", srcBaseDir: config.srcDir },
+ },
+ ],
+ });
+ return cfg;
+ });
+
+ return { workspaceId, workspaceName, workspacePath };
+ }
+
+ it("creates a journal file when writing harness config", async () => {
+ const { workspaceId, workspaceName, workspacePath } = await setupWorkspace();
+
+ const service = new WorkspaceHarnessService(config);
+ await service.setHarnessForWorkspace(workspaceId, {
+ version: 1,
+ checklist: [],
+ gates: [],
+ loop: {},
+ });
+
+ const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`);
+
+ expect(await pathExists(journalPath)).toBe(true);
+
+ const contents = await fs.readFile(journalPath, "utf-8");
+ expect(contents).toContain("# Harness journal (append-only)");
+ expect(contents).toContain("## Entry template");
+ expect(contents).toContain(`.mux/harness/${workspaceName}.jsonc`);
+ });
+
+ it("creates harness files for slashy workspace names", async () => {
+ const { workspaceId, workspaceName, workspacePath } = await setupWorkspace("feature/foo");
+
+ const service = new WorkspaceHarnessService(config);
+ await service.setHarnessForWorkspace(workspaceId, {
+ version: 1,
+ checklist: [],
+ gates: [],
+ loop: {},
+ });
+
+ const configPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.jsonc`);
+ const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`);
+
+ expect(await pathExists(configPath)).toBe(true);
+ expect(await pathExists(journalPath)).toBe(true);
+
+ const contents = await fs.readFile(journalPath, "utf-8");
+ expect(contents).toContain(`.mux/harness/${workspaceName}.jsonc`);
+ });
+
+ it("does not overwrite an existing journal file", async () => {
+ const { workspaceId, workspaceName, workspacePath } = await setupWorkspace();
+
+ const service = new WorkspaceHarnessService(config);
+ await service.setHarnessForWorkspace(workspaceId, {
+ version: 1,
+ checklist: [],
+ gates: [],
+ loop: {},
+ });
+
+ const journalPath = path.join(workspacePath, ".mux", "harness", `${workspaceName}.progress.md`);
+
+ await fs.writeFile(journalPath, "CUSTOM\n", "utf-8");
+
+ await service.updateProgressFile(workspaceId);
+ await service.setHarnessForWorkspace(workspaceId, {
+ version: 1,
+ checklist: [{ id: "item-1", title: "Do something", status: "todo" }],
+ gates: [],
+ loop: {},
+ });
+
+ const after = await fs.readFile(journalPath, "utf-8");
+ expect(after).toBe("CUSTOM\n");
+ });
+});
diff --git a/src/node/services/workspaceHarnessService.ts b/src/node/services/workspaceHarnessService.ts
new file mode 100644
index 0000000000..a969fe3ea6
--- /dev/null
+++ b/src/node/services/workspaceHarnessService.ts
@@ -0,0 +1,577 @@
+import * as path from "path";
+import * as jsonc from "jsonc-parser";
+
+import assert from "@/common/utils/assert";
+import type {
+ HarnessChecklistItem,
+ HarnessChecklistStatus,
+ HarnessLoopSettings,
+ HarnessLoopState,
+ WorkspaceHarnessConfig,
+ WorkspaceHarnessFilePaths,
+} from "@/common/types/harness";
+import type { ToolPolicy } from "@/common/utils/tools/toolPolicy";
+import type { RuntimeConfig } from "@/common/types/runtime";
+import type { FrontendWorkspaceMetadata } from "@/common/types/workspace";
+import type { Config } from "@/node/config";
+import { createRuntime } from "@/node/runtime/runtimeFactory";
+import { execBuffered, readFileString, writeFileString } from "@/node/utils/runtime/helpers";
+import { log } from "@/node/services/log";
+
+const HARNESS_DIR = ".mux/harness";
+
+const HARNESS_GITIGNORE_PATTERNS = [`${HARNESS_DIR}/**/*.jsonc`, `${HARNESS_DIR}/**/*.progress.md`];
+
+const DEFAULT_LOOP_SETTINGS: Required<
+ Pick<
+ HarnessLoopSettings,
+ | "maxIterations"
+ | "maxWallTimeMins"
+ | "maxConsecutiveFailures"
+ | "contextReset"
+ | "autoCommit"
+ | "commitMessageTemplate"
+ >
+> & { toolPolicy?: ToolPolicy } = {
+ maxIterations: 50,
+ maxWallTimeMins: 8 * 60,
+ maxConsecutiveFailures: 3,
+ contextReset: "replace_history",
+ autoCommit: true,
+ commitMessageTemplate: "mux(harness): {{item}}",
+};
+
+const DEFAULT_HARNESS_CONFIG: WorkspaceHarnessConfig = {
+ version: 1,
+ checklist: [],
+ gates: [],
+ loop: { ...DEFAULT_LOOP_SETTINGS },
+};
+
+function joinForRuntime(runtimeConfig: RuntimeConfig | undefined, ...parts: string[]): string {
+ assert(parts.length > 0, "joinForRuntime requires at least one path segment");
+
+ // Remote runtimes run inside a POSIX shell (SSH host, Docker container), even if the user is
+ // running mux on Windows. Use POSIX joins so we don't accidentally introduce backslashes.
+ const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker";
+ return usePosix ? path.posix.join(...parts) : path.join(...parts);
+}
+
+function dirnameForRuntime(runtimeConfig: RuntimeConfig | undefined, filePath: string): string {
+ const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker";
+ return usePosix ? path.posix.dirname(filePath) : path.dirname(filePath);
+}
+
+function isAbsoluteForRuntime(runtimeConfig: RuntimeConfig | undefined, filePath: string): boolean {
+ const usePosix = runtimeConfig?.type === "ssh" || runtimeConfig?.type === "docker";
+ return usePosix ? path.posix.isAbsolute(filePath) : path.isAbsolute(filePath);
+}
+
+function isChecklistStatus(value: unknown): value is HarnessChecklistStatus {
+ return value === "todo" || value === "doing" || value === "done" || value === "blocked";
+}
+
+function clampPositiveInt(
+ value: unknown,
+ fallback: number,
+ { min, max }: { min: number; max: number }
+): number {
+ if (typeof value !== "number" || !Number.isFinite(value)) {
+ return fallback;
+ }
+ const rounded = Math.floor(value);
+ if (rounded < min) return min;
+ if (rounded > max) return max;
+ return rounded;
+}
+
+function normalizeChecklistItem(raw: unknown, index: number): HarnessChecklistItem | null {
+ if (!raw || typeof raw !== "object") {
+ return null;
+ }
+
+ const obj = raw as Record;
+
+ const title = typeof obj.title === "string" ? obj.title.trim() : "";
+ if (title.length === 0) {
+ return null;
+ }
+
+ const status = isChecklistStatus(obj.status) ? obj.status : ("todo" as const);
+
+ const idRaw = typeof obj.id === "string" ? obj.id.trim() : "";
+ const id = idRaw.length > 0 ? idRaw : `item-${index + 1}`;
+
+ const notes =
+ typeof obj.notes === "string" && obj.notes.trim().length > 0 ? obj.notes.trim() : undefined;
+
+ return { id, title, status, notes };
+}
+
+function normalizeWorkspaceHarnessConfig(raw: unknown): WorkspaceHarnessConfig {
+ if (!raw || typeof raw !== "object") {
+ return { ...DEFAULT_HARNESS_CONFIG };
+ }
+
+ const obj = raw as Record;
+
+ const checklist: HarnessChecklistItem[] = [];
+ if (Array.isArray(obj.checklist)) {
+ for (const [index, entry] of obj.checklist.entries()) {
+ const normalized = normalizeChecklistItem(entry, index);
+ if (normalized) {
+ checklist.push(normalized);
+ }
+ }
+ }
+
+ const gates = Array.isArray(obj.gates)
+ ? obj.gates
+ .map((g) => {
+ if (!g || typeof g !== "object") return null;
+ const gate = g as Record;
+ const command = typeof gate.command === "string" ? gate.command.trim() : "";
+ if (command.length === 0) return null;
+
+ const id =
+ typeof gate.id === "string" && gate.id.trim().length > 0 ? gate.id.trim() : undefined;
+ const title =
+ typeof gate.title === "string" && gate.title.trim().length > 0
+ ? gate.title.trim()
+ : undefined;
+ const timeoutSecs =
+ typeof gate.timeoutSecs === "number" &&
+ Number.isFinite(gate.timeoutSecs) &&
+ gate.timeoutSecs > 0
+ ? Math.floor(gate.timeoutSecs)
+ : undefined;
+
+ return { id, title, command, timeoutSecs };
+ })
+ .filter((g): g is NonNullable => g !== null)
+ : [];
+
+ const loopRaw =
+ obj.loop && typeof obj.loop === "object" ? (obj.loop as Record) : {};
+
+ const loop: HarnessLoopSettings = {
+ maxIterations: clampPositiveInt(loopRaw.maxIterations, DEFAULT_LOOP_SETTINGS.maxIterations, {
+ min: 1,
+ max: 1000,
+ }),
+ maxWallTimeMins: clampPositiveInt(
+ loopRaw.maxWallTimeMins,
+ DEFAULT_LOOP_SETTINGS.maxWallTimeMins,
+ {
+ min: 1,
+ max: 7 * 24 * 60,
+ }
+ ),
+ maxConsecutiveFailures: clampPositiveInt(
+ loopRaw.maxConsecutiveFailures,
+ DEFAULT_LOOP_SETTINGS.maxConsecutiveFailures,
+ { min: 1, max: 50 }
+ ),
+ contextReset:
+ loopRaw.contextReset === "none" || loopRaw.contextReset === "replace_history"
+ ? loopRaw.contextReset
+ : DEFAULT_LOOP_SETTINGS.contextReset,
+ autoCommit:
+ typeof loopRaw.autoCommit === "boolean"
+ ? loopRaw.autoCommit
+ : DEFAULT_LOOP_SETTINGS.autoCommit,
+ commitMessageTemplate:
+ typeof loopRaw.commitMessageTemplate === "string" &&
+ loopRaw.commitMessageTemplate.trim().length > 0
+ ? loopRaw.commitMessageTemplate.trim()
+ : DEFAULT_LOOP_SETTINGS.commitMessageTemplate,
+ toolPolicy: Array.isArray(loopRaw.toolPolicy) ? (loopRaw.toolPolicy as ToolPolicy) : undefined,
+ };
+
+ const normalized: WorkspaceHarnessConfig = {
+ version: 1,
+ checklist,
+ gates,
+ loop,
+ };
+
+ return normalized;
+}
+
+function isNotFoundStatError(error: unknown): boolean {
+ if (!error) {
+ return false;
+ }
+
+ if (typeof error === "object") {
+ if ("code" in error && (error as { code?: unknown }).code === "ENOENT") {
+ return true;
+ }
+
+ if ("cause" in error) {
+ const cause = (error as { cause?: unknown }).cause;
+ if (cause && cause !== error) {
+ return isNotFoundStatError(cause);
+ }
+ }
+ }
+
+ if (error instanceof Error) {
+ const message = error.message;
+ return message.includes("ENOENT") || message.includes("No such file or directory");
+ }
+
+ return false;
+}
+
+async function statIsFile(
+ runtime: ReturnType,
+ filePath: string
+): Promise {
+ try {
+ const stat = await runtime.stat(filePath);
+ return !stat.isDirectory;
+ } catch (error) {
+ if (isNotFoundStatError(error)) {
+ return false;
+ }
+
+ throw error;
+ }
+}
+
+function renderHarnessJournalBootstrapMarkdown(params: {
+ metadata: FrontendWorkspaceMetadata;
+ paths: WorkspaceHarnessFilePaths;
+}): string {
+ const nowIso = new Date().toISOString();
+
+ const configPrefix =
+ params.metadata.name.trim().length > 0 ? params.metadata.name.trim() : "workspace";
+ const configRelPath = path.posix.join(HARNESS_DIR, `${configPrefix}.jsonc`);
+
+ const lines: string[] = [];
+ lines.push("# Harness journal (append-only)");
+ lines.push("");
+ lines.push("This file is an append-only journal for Ralph loop work in this workspace.");
+ lines.push("Append new entries at the bottom. Do not edit or rewrite older entries.");
+ lines.push("");
+ lines.push(`- Workspace: ${params.metadata.name} (${params.metadata.id})`);
+ lines.push(`- Created: ${nowIso}`);
+ lines.push(`- Harness config: ${configRelPath}`);
+ lines.push("");
+ lines.push("## Entry template");
+ lines.push("");
+ lines.push("### — Iteration N — Item: — ");
+ lines.push("- Did:");
+ lines.push("- Tried:");
+ lines.push("- Learned:");
+ lines.push("- Dead ends:");
+ lines.push("- Next:");
+ lines.push("");
+ return lines.join("\n");
+}
+
+export class WorkspaceHarnessService {
+ constructor(private readonly config: Config) {
+ assert(config, "WorkspaceHarnessService requires a Config instance");
+ }
+
+ private async getWorkspaceMetadata(workspaceId: string): Promise {
+ assert(typeof workspaceId === "string", "workspaceId must be a string");
+ const trimmed = workspaceId.trim();
+ assert(trimmed.length > 0, "workspaceId must not be empty");
+
+ const all = await this.config.getAllWorkspaceMetadata();
+ const metadata = all.find((m) => m.id === trimmed);
+ if (!metadata) {
+ throw new Error(`Workspace metadata not found for ${trimmed}`);
+ }
+
+ return metadata;
+ }
+
+ async getRuntimeAndWorkspacePath(workspaceId: string): Promise<{
+ metadata: FrontendWorkspaceMetadata;
+ runtime: ReturnType;
+ workspacePath: string;
+ }> {
+ const metadata = await this.getWorkspaceMetadata(workspaceId);
+
+ const runtime = createRuntime(
+ metadata.runtimeConfig ?? { type: "local", srcBaseDir: this.config.srcDir },
+ { projectPath: metadata.projectPath }
+ );
+
+ // In-place workspaces (CLI/benchmarks) store the workspace path directly by setting
+ // metadata.projectPath === metadata.name.
+ const isInPlace = metadata.projectPath === metadata.name;
+ const workspacePath = isInPlace
+ ? metadata.projectPath
+ : runtime.getWorkspacePath(metadata.projectPath, metadata.name);
+
+ assert(
+ typeof workspacePath === "string" && workspacePath.length > 0,
+ "workspacePath is required"
+ );
+
+ return { metadata, runtime, workspacePath };
+ }
+
+ private getHarnessFilePaths(
+ workspacePath: string,
+ runtimeConfig: RuntimeConfig | undefined,
+ workspaceName: string
+ ): WorkspaceHarnessFilePaths {
+ assert(typeof workspacePath === "string", "workspacePath must be a string");
+ assert(typeof workspaceName === "string", "workspaceName must be a string");
+
+ const prefix = workspaceName.trim().length > 0 ? workspaceName.trim() : "workspace";
+
+ return {
+ configPath: joinForRuntime(runtimeConfig, workspacePath, HARNESS_DIR, `${prefix}.jsonc`),
+ progressPath: joinForRuntime(
+ runtimeConfig,
+ workspacePath,
+ HARNESS_DIR,
+ `${prefix}.progress.md`
+ ),
+ };
+ }
+
+ private async readHarnessFile(
+ runtime: ReturnType,
+ filePath: string
+ ): Promise {
+ try {
+ const raw = await readFileString(runtime, filePath);
+ const errors: jsonc.ParseError[] = [];
+ const parsed: unknown = jsonc.parse(raw, errors) as unknown;
+ if (errors.length > 0) {
+ log.warn("[HARNESS] Failed to parse harness config (JSONC parse errors)", {
+ filePath,
+ errorCount: errors.length,
+ });
+ return {};
+ }
+ return parsed;
+ } catch (error) {
+ log.debug("[HARNESS] Failed to read harness config file", { filePath, error });
+ return {};
+ }
+ }
+
+ private async ensureHarnessDir(
+ runtime: ReturnType,
+ workspacePath: string,
+ runtimeConfig: RuntimeConfig | undefined,
+ paths?: WorkspaceHarnessFilePaths
+ ): Promise {
+ const harnessDirPath = joinForRuntime(runtimeConfig, workspacePath, HARNESS_DIR);
+
+ const dirPaths = new Set([harnessDirPath]);
+ if (paths) {
+ // Workspace names can contain slashes (e.g. "feature/foo"), which means harness files may end up
+ // nested under `.mux/harness/feature/foo.jsonc`. Ensure parent dirs exist before writing.
+ dirPaths.add(dirnameForRuntime(runtimeConfig, paths.configPath));
+ dirPaths.add(dirnameForRuntime(runtimeConfig, paths.progressPath));
+ }
+
+ for (const dirPath of dirPaths) {
+ try {
+ await runtime.ensureDir(dirPath);
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ throw new Error(`Failed to create harness directory (${dirPath}): ${msg}`);
+ }
+ }
+ }
+
+ private async ensureHarnessGitignored(
+ runtime: ReturnType,
+ workspacePath: string,
+ runtimeConfig: RuntimeConfig | undefined
+ ): Promise {
+ try {
+ const isInsideGitResult = await execBuffered(runtime, "git rev-parse --is-inside-work-tree", {
+ cwd: workspacePath,
+ timeout: 10,
+ });
+ if (isInsideGitResult.exitCode !== 0 || isInsideGitResult.stdout.trim() !== "true") {
+ return;
+ }
+
+ const excludePathResult = await execBuffered(
+ runtime,
+ "git rev-parse --git-path info/exclude",
+ {
+ cwd: workspacePath,
+ timeout: 10,
+ }
+ );
+ if (excludePathResult.exitCode !== 0) {
+ return;
+ }
+
+ const excludeFilePathRaw = excludePathResult.stdout.trim();
+ if (excludeFilePathRaw.length === 0) {
+ return;
+ }
+
+ const excludeFilePath = isAbsoluteForRuntime(runtimeConfig, excludeFilePathRaw)
+ ? excludeFilePathRaw
+ : joinForRuntime(runtimeConfig, workspacePath, excludeFilePathRaw);
+
+ let existing = "";
+ try {
+ existing = await readFileString(runtime, excludeFilePath);
+ } catch {
+ // Missing exclude file is OK.
+ }
+
+ const existingPatterns = new Set(
+ existing
+ .split(/\r?\n/)
+ .map((line) => line.trim())
+ .filter((line) => line.length > 0)
+ );
+ const missingPatterns = HARNESS_GITIGNORE_PATTERNS.filter(
+ (pattern) => !existingPatterns.has(pattern)
+ );
+ if (missingPatterns.length === 0) {
+ return;
+ }
+
+ const needsNewline = existing.length > 0 && !existing.endsWith("\n");
+ const updated = existing + (needsNewline ? "\n" : "") + missingPatterns.join("\n") + "\n";
+
+ await writeFileString(runtime, excludeFilePath, updated);
+ } catch (error) {
+ // Best-effort only; never fail a workspace operation because git exclude couldn't be updated.
+ log.debug("[HARNESS] Failed to add harness files to git exclude", {
+ workspacePath,
+ error,
+ });
+ }
+ }
+
+ private async ensureHarnessJournalExists(params: {
+ metadata: FrontendWorkspaceMetadata;
+ runtime: ReturnType;
+ workspacePath: string;
+ runtimeConfig: RuntimeConfig | undefined;
+ paths: WorkspaceHarnessFilePaths;
+ }): Promise {
+ try {
+ await this.ensureHarnessDir(
+ params.runtime,
+ params.workspacePath,
+ params.runtimeConfig,
+ params.paths
+ );
+
+ const exists = await statIsFile(params.runtime, params.paths.progressPath);
+ if (exists) {
+ return;
+ }
+
+ const markdown = renderHarnessJournalBootstrapMarkdown({
+ metadata: params.metadata,
+ paths: params.paths,
+ });
+
+ await writeFileString(
+ params.runtime,
+ params.paths.progressPath,
+ markdown.endsWith("\n") ? markdown : `${markdown}\n`
+ );
+ await this.ensureHarnessGitignored(
+ params.runtime,
+ params.workspacePath,
+ params.runtimeConfig
+ );
+ } catch (error) {
+ log.debug("[HARNESS] Failed to ensure harness journal file exists", {
+ workspacePath: params.workspacePath,
+ error,
+ });
+ }
+ }
+
+ async getHarnessPresenceForWorkspace(workspaceId: string): Promise<{
+ exists: boolean;
+ paths: WorkspaceHarnessFilePaths;
+ }> {
+ const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId);
+ const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name);
+
+ const exists = await statIsFile(runtime, paths.configPath);
+ return { exists, paths };
+ }
+
+ async getHarnessForWorkspace(workspaceId: string): Promise<{
+ config: WorkspaceHarnessConfig;
+ paths: WorkspaceHarnessFilePaths;
+ exists: boolean;
+ }> {
+ const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId);
+ const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name);
+ const exists = await statIsFile(runtime, paths.configPath);
+
+ if (!exists) {
+ return { config: { ...DEFAULT_HARNESS_CONFIG }, paths, exists: false };
+ }
+
+ const parsed = await this.readHarnessFile(runtime, paths.configPath);
+ return {
+ config: normalizeWorkspaceHarnessConfig(parsed),
+ paths,
+ exists: true,
+ };
+ }
+
+ async setHarnessForWorkspace(
+ workspaceId: string,
+ config: WorkspaceHarnessConfig
+ ): Promise {
+ assert(config && typeof config === "object", "config must be an object");
+
+ const { metadata, runtime, workspacePath } = await this.getRuntimeAndWorkspacePath(workspaceId);
+ const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name);
+ const normalized = normalizeWorkspaceHarnessConfig(config);
+ const serialized = JSON.stringify(normalized, null, 2) + "\n";
+
+ await this.ensureHarnessDir(runtime, workspacePath, metadata.runtimeConfig, paths);
+
+ await writeFileString(runtime, paths.configPath, serialized);
+ await this.ensureHarnessGitignored(runtime, workspacePath, metadata.runtimeConfig);
+
+ await this.ensureHarnessJournalExists({
+ metadata,
+ runtime,
+ workspacePath,
+ runtimeConfig: metadata.runtimeConfig,
+ paths,
+ });
+
+ return normalized;
+ }
+
+ async updateProgressFile(workspaceId: string, _loopState?: HarnessLoopState): Promise {
+ try {
+ const { metadata, runtime, workspacePath } =
+ await this.getRuntimeAndWorkspacePath(workspaceId);
+
+ const paths = this.getHarnessFilePaths(workspacePath, metadata.runtimeConfig, metadata.name);
+ await this.ensureHarnessJournalExists({
+ metadata,
+ runtime,
+ workspacePath,
+ runtimeConfig: metadata.runtimeConfig,
+ paths,
+ });
+ } catch (error) {
+ log.debug("[HARNESS] Failed to ensure harness journal exists", { workspaceId, error });
+ }
+ }
+}
diff --git a/tests/e2e/scenarios/sidebarDragDrop.spec.ts b/tests/e2e/scenarios/sidebarDragDrop.spec.ts
index a1dd839ab2..6b982fe986 100644
--- a/tests/e2e/scenarios/sidebarDragDrop.spec.ts
+++ b/tests/e2e/scenarios/sidebarDragDrop.spec.ts
@@ -203,7 +203,11 @@ test.describe("sidebar drag and drop", () => {
const topTabs = await tablists[0].getByRole("tab").all();
const bottomTabs = await tablists[1].getByRole("tab").all();
- expect(topTabs.length).toBe(3); // Costs, Review, Explorer
+ expect(topTabs.length).toBeGreaterThanOrEqual(3);
+ await expect(tablists[0]).toContainText("Costs");
+ await expect(tablists[0]).toContainText("Review");
+ await expect(tablists[0]).toContainText("Explorer");
+
expect(bottomTabs.length).toBe(1); // Costs (duplicate tab in split)
});
diff --git a/tests/ipc/backgroundBashDirect.test.ts b/tests/ipc/backgroundBashDirect.test.ts
index 6ab3b52361..827d518bfe 100644
--- a/tests/ipc/backgroundBashDirect.test.ts
+++ b/tests/ipc/backgroundBashDirect.test.ts
@@ -19,7 +19,12 @@ import * as fs from "fs/promises";
import * as os from "os";
import * as path from "path";
import { createTestEnvironment, cleanupTestEnvironment, type TestEnvironment } from "./setup";
-import { createTempGitRepo, cleanupTempGitRepo, generateBranchName } from "./helpers";
+import {
+ createTempGitRepo,
+ cleanupTempGitRepo,
+ generateBranchName,
+ waitForInitComplete,
+} from "./helpers";
import { detectDefaultTrunkBranch } from "../../src/node/git";
import { LocalRuntime } from "../../src/node/runtime/LocalRuntime";
import { BackgroundProcessManager } from "../../src/node/services/backgroundProcessManager";
@@ -70,6 +75,9 @@ describe("Background Bash Direct Integration", () => {
}
workspaceId = result.metadata.id;
workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo;
+
+ // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes.
+ await waitForInitComplete(env, workspaceId, 30_000);
});
afterAll(async () => {
@@ -263,6 +271,9 @@ describe("Background Bash Output Capture", () => {
}
workspaceId = result.metadata.id;
workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo;
+
+ // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes.
+ await waitForInitComplete(env, workspaceId, 30_000);
});
afterAll(async () => {
@@ -379,6 +390,9 @@ describe("Foreground to Background Migration", () => {
}
workspaceId = result.metadata.id;
workspacePath = result.metadata.namedWorkspacePath ?? tempGitRepo;
+
+ // Avoid race conditions on slower platforms (Windows) where tools may run before init finishes.
+ await waitForInitComplete(env, workspaceId, 30_000);
});
afterAll(async () => {
@@ -519,8 +533,8 @@ describe("Foreground to Background Migration", () => {
{ toolCallId, messages: [] }
) as Promise;
- // Wait for marker1 to output
- await new Promise((resolve) => setTimeout(resolve, FOREGROUND_MIGRATION_READY_MS));
+ // Wait for marker1 to output (extra slack for slower CI runners)
+ await new Promise((resolve) => setTimeout(resolve, 800));
// Send to background mid-execution
manager.sendToBackground(toolCallId);
@@ -589,7 +603,27 @@ describe("Foreground to Background Migration", () => {
// Either it completed normally or was backgrounded
expect(result.success).toBe(true);
- expect(result.output).toContain(marker);
+ if (!result.success) return;
+
+ if (result.output?.includes(marker)) {
+ expect(result.output).toContain(marker);
+ return;
+ }
+
+ // On some platforms the process can exit during send-to-background, before output is collected.
+ // Verify the marker still exists in the persisted output log.
+ if (result.backgroundProcessId) {
+ const proc = await manager.getProcess(result.backgroundProcessId);
+ expect(proc).toBeDefined();
+
+ const outputPath = path.join(proc!.outputDir, "output.log");
+ const fullOutput = await fs.readFile(outputPath, "utf-8");
+ expect(fullOutput).toContain(marker);
+ return;
+ }
+
+ // If we weren't backgrounded, the marker should have been included in the immediate output.
+ expect(result.output ?? "").toContain(marker);
});
it("should not kill backgrounded process when abort signal fires", async () => {
diff --git a/tests/ipc/executeBash.test.ts b/tests/ipc/executeBash.test.ts
index 58e4d76f5a..c529d9d987 100644
--- a/tests/ipc/executeBash.test.ts
+++ b/tests/ipc/executeBash.test.ts
@@ -66,7 +66,7 @@ function expectWorkspaceCreationSuccess(result: WorkspaceCreationResult): Worksp
}
const GIT_FETCH_TIMEOUT_SECS = process.platform === "win32" ? 15 : 5;
-const TEST_TIMEOUT_MS = process.platform === "win32" ? 60_000 : 15_000;
+const TEST_TIMEOUT_MS = process.platform === "win32" ? 60_000 : 30_000;
// Skip all tests if TEST_INTEGRATION is not set
const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip;
@@ -87,6 +87,9 @@ describeIntegration("executeBash", () => {
const workspaceId = metadata.id;
const client = resolveOrpcClient(env);
+ // Wait for init to complete (prevents Windows filesystem timing issues)
+ await waitForInitComplete(env, workspaceId, 30_000);
+
// Execute a simple bash command (pwd should return workspace path)
const pwdResult = await client.workspace.executeBash({ workspaceId, script: "pwd" });
@@ -153,6 +156,9 @@ describeIntegration("executeBash", () => {
const workspaceId = expectWorkspaceCreationSuccess(createResult).id;
const client = resolveOrpcClient(env);
+ // Wait for init to complete (prevents Windows filesystem timing issues)
+ await waitForInitComplete(env, workspaceId, 30_000);
+
// Execute a command that will fail
const failResult = await client.workspace.executeBash({
workspaceId,
@@ -189,6 +195,9 @@ describeIntegration("executeBash", () => {
const workspaceId = expectWorkspaceCreationSuccess(createResult).id;
const client = resolveOrpcClient(env);
+ // Wait for init to complete (prevents Windows filesystem timing issues)
+ await waitForInitComplete(env, workspaceId, 30_000);
+
// Execute a command that takes longer than the timeout
const timeoutResult = await client.workspace.executeBash({
workspaceId,
@@ -225,6 +234,9 @@ describeIntegration("executeBash", () => {
const workspaceId = expectWorkspaceCreationSuccess(createResult).id;
const client = resolveOrpcClient(env);
+ // Wait for init to complete (prevents Windows filesystem timing issues)
+ await waitForInitComplete(env, workspaceId, 30_000);
+
// Execute a command that generates 400 lines (well under 10K limit for IPC truncate policy)
const result = await client.workspace.executeBash({
workspaceId,
@@ -331,7 +343,7 @@ describeIntegration("executeBash", () => {
const client = resolveOrpcClient(env);
// Wait for init to complete (prevents Windows filesystem timing issues)
- await waitForInitComplete(env, workspaceId);
+ await waitForInitComplete(env, workspaceId, 30_000);
// Verify GIT_TERMINAL_PROMPT is set to 0
const gitEnvResult = await executeBashUntilReady(
diff --git a/tests/ipc/setup.ts b/tests/ipc/setup.ts
index e6dd3d9b5a..6477ba0f03 100644
--- a/tests/ipc/setup.ts
+++ b/tests/ipc/setup.ts
@@ -105,7 +105,12 @@ export async function createTestEnvironment(): Promise {
sessionUsageService: services.sessionUsageService,
signingService: services.signingService,
coderService: services.coderService,
+ workspaceHarnessService: services.workspaceHarnessService,
+ gateRunnerService: services.gateRunnerService,
+ gitCheckpointService: services.gitCheckpointService,
+ loopRunnerService: services.loopRunnerService,
};
+
const orpc = createOrpcTestClient(orpcContext);
return {