Skip to content

Commit 27352ea

Browse files
Refactor Ralph session rotation with watchdog and stall detection (#170)
* Improve scroll-to-bottom UI and session status handling * Enhance Ralph service with session rotation and watchdog * Add tests for Ralph session rotation and cleanup * Add Kasm Docker build workflow
1 parent 4ad6c5f commit 27352ea

File tree

11 files changed

+502
-25
lines changed

11 files changed

+502
-25
lines changed

.github/workflows/docker-build.yml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,53 @@ jobs:
6262
cache-from: type=gha
6363
cache-to: type=gha,mode=max
6464
target: runner
65+
66+
build-kasm:
67+
runs-on: ubuntu-latest
68+
needs: build
69+
steps:
70+
- name: Checkout
71+
uses: actions/checkout@v4
72+
73+
- name: Get latest tool versions
74+
id: versions
75+
run: |
76+
UV_VERSION=$(git ls-remote --tags --sort=-v:refname https://github.com/astral-sh/uv.git 'refs/tags/[0-9]*' | head -1 | sed 's/.*refs\/tags\///')
77+
OPENCODE_VERSION=$(curl -s https://api.github.com/repos/anomalyco/opencode/releases/latest | sed -n 's/.*"tag_name": *"\([^"]*\)".*/\1/p')
78+
echo "uv=${UV_VERSION}" >> $GITHUB_OUTPUT
79+
echo "opencode=${OPENCODE_VERSION}" >> $GITHUB_OUTPUT
80+
81+
- name: Docker meta (Kasm)
82+
id: meta-kasm
83+
uses: docker/metadata-action@v5
84+
with:
85+
images: ghcr.io/${{ github.repository }}
86+
tags: |
87+
type=semver,pattern={{version}},suffix=-kasm
88+
type=semver,pattern={{major}}.{{minor}},suffix=-kasm
89+
type=raw,value=latest-kasm
90+
91+
- name: Set up Docker Buildx
92+
uses: docker/setup-buildx-action@v3
93+
94+
- name: Login to GHCR
95+
uses: docker/login-action@v3
96+
with:
97+
registry: ghcr.io
98+
username: ${{ github.actor }}
99+
password: ${{ secrets.GITHUB_TOKEN }}
100+
101+
- name: Build and push (Kasm)
102+
uses: docker/build-push-action@v5
103+
with:
104+
context: .
105+
file: Dockerfile.kasm
106+
platforms: linux/amd64
107+
push: true
108+
tags: ${{ steps.meta-kasm.outputs.tags }}
109+
labels: ${{ steps.meta-kasm.outputs.labels }}
110+
build-args: |
111+
UV_VERSION=${{ steps.versions.outputs.uv }}
112+
OPENCODE_VERSION=${{ steps.versions.outputs.opencode }}
113+
cache-from: type=gha
114+
cache-to: type=gha,mode=max

Dockerfile.kasm

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
FROM kasmweb/core-ubuntu-jammy:1.18.0
2+
USER root
3+
4+
ENV HOME /home/kasm-default-profile
5+
ENV STARTUPDIR /dockerstartup
6+
ENV INST_SCRIPTS $STARTUPDIR/install
7+
WORKDIR $HOME
8+
9+
######### Customize Container Here ###########
10+
11+
RUN apt-get update && apt-get install -y \
12+
git \
13+
curl \
14+
lsof \
15+
ripgrep \
16+
ca-certificates \
17+
grep \
18+
gawk \
19+
sed \
20+
findutils \
21+
coreutils \
22+
procps \
23+
jq \
24+
less \
25+
tree \
26+
file \
27+
python3 \
28+
python3-pip \
29+
python3-venv \
30+
chromium-browser \
31+
unzip \
32+
&& rm -rf /var/lib/apt/lists/*
33+
34+
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
35+
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
36+
&& apt-get update && apt-get install -y gh \
37+
&& rm -rf /var/lib/apt/lists/*
38+
39+
RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \
40+
&& apt-get install -y nodejs \
41+
&& rm -rf /var/lib/apt/lists/*
42+
43+
RUN corepack enable && corepack prepare pnpm@latest --activate
44+
45+
RUN curl -fsSL https://bun.sh/install | bash && \
46+
mv $HOME/.bun /opt/bun && \
47+
chmod -R 755 /opt/bun && \
48+
ln -s /opt/bun/bin/bun /usr/local/bin/bun
49+
50+
ARG UV_VERSION=latest
51+
ARG OPENCODE_VERSION=latest
52+
53+
RUN curl -LsSf https://astral.sh/uv/install.sh | UV_NO_MODIFY_PATH=1 sh && \
54+
mv $HOME/.local/bin/uv /usr/local/bin/uv && \
55+
mv $HOME/.local/bin/uvx /usr/local/bin/uvx && \
56+
chmod +x /usr/local/bin/uv /usr/local/bin/uvx && \
57+
if [ "${OPENCODE_VERSION}" = "latest" ]; then \
58+
curl -fsSL https://opencode.ai/install | bash -s -- --no-modify-path; \
59+
else \
60+
curl -fsSL https://opencode.ai/install | bash -s -- --version ${OPENCODE_VERSION} --no-modify-path; \
61+
fi && \
62+
mv $HOME/.opencode /opt/opencode && \
63+
chmod -R 755 /opt/opencode && \
64+
ln -s /opt/opencode/bin/opencode /usr/local/bin/opencode
65+
66+
COPY --from=ghcr.io/chriswritescode-dev/opencode-manager:latest /app /opt/opencode-manager
67+
COPY --from=ghcr.io/chriswritescode-dev/opencode-manager:latest /opt/opencode-plugins /opt/opencode-plugins
68+
69+
RUN mkdir -p /workspace /opt/opencode-manager/data && \
70+
chown -R 1000:0 /workspace /opt/opencode-manager/data /opt/opencode-plugins
71+
72+
ENV NODE_ENV=production
73+
ENV HOST=0.0.0.0
74+
ENV PORT=5003
75+
ENV OPENCODE_SERVER_PORT=5551
76+
ENV DATABASE_PATH=/opt/opencode-manager/data/opencode.db
77+
ENV WORKSPACE_PATH=/workspace
78+
ENV NODE_PATH=/opt/opencode-plugins/node_modules
79+
80+
COPY scripts/kasm-startup.sh $STARTUPDIR/custom_startup.sh
81+
RUN chmod +x $STARTUPDIR/custom_startup.sh
82+
83+
######### End Customizations ###########
84+
85+
RUN chown 1000:0 $HOME
86+
RUN $STARTUPDIR/set_user_permission.sh $HOME
87+
88+
ENV HOME /home/kasm-user
89+
WORKDIR $HOME
90+
RUN mkdir -p $HOME && chown -R 1000:0 $HOME
91+
92+
USER 1000

packages/memory/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@opencode-manager/memory",
3-
"version": "0.0.23",
3+
"version": "0.0.24",
44
"type": "module",
55
"main": "./dist/index.js",
66
"types": "./dist/index.d.ts",

packages/memory/src/agents/architect.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ All execution modes require a **title** — a short descriptive label for the se
9999
| Option | Tool | inPlace | Plan Content |
100100
|---|---|---|---|
101101
| New session | memory-plan-execute | false | Full self-contained plan |
102-
| Execute here | memory-plan-execute | true | "See plan above" |
102+
| Execute here | memory-plan-execute | true | "Execute the implementation plan from this conversation. Review all phases above and implement each one." |
103103
| Ralph (worktree) | memory-plan-ralph | false | Full self-contained plan |
104104
| Ralph (in place) | memory-plan-ralph | true | Full self-contained plan |
105105

packages/memory/src/agents/code.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ ${getInjectedMemory('code')}
6767
6868
Never generate or guess URLs unless they are programming-related.
6969
70+
## Plan Execution
71+
72+
When you receive a message indicating that an architect agent has created a plan for you to execute (e.g., referencing "the plan above" or "implementation plan"), you MUST:
73+
1. Review the plan from the conversation history
74+
2. Create a todo list from the plan phases
75+
3. Execute each phase by actually editing files, running commands, and making changes
76+
4. Do NOT just describe or summarize what you would do — implement it
77+
78+
You are the execution agent. Your job is to write code, not describe code.
79+
7080
## Project KV Store
7181
7282
You have access to a project-scoped key-value store with 24-hour TTL for ephemeral state:

packages/memory/src/hooks/ralph.ts

Lines changed: 68 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,37 @@ export function createRalphEventHandler(
254254
}
255255
}
256256

257+
async function rotateSession(oldSessionId: string, state: RalphState): Promise<string> {
258+
const createResult = await v2Client.session.create({
259+
title: state.worktreeName,
260+
directory: state.worktreeDir,
261+
})
262+
263+
if (createResult.error || !createResult.data) {
264+
throw new Error(`Failed to create new session: ${createResult.error}`)
265+
}
266+
267+
const newSessionId = createResult.data.id
268+
269+
const oldRetryTimeout = retryTimeouts.get(oldSessionId)
270+
if (oldRetryTimeout) {
271+
clearTimeout(oldRetryTimeout)
272+
retryTimeouts.delete(oldSessionId)
273+
}
274+
275+
ralphService.deleteState(oldSessionId)
276+
277+
stopWatchdog(oldSessionId)
278+
startWatchdog(newSessionId)
279+
280+
v2Client.session.delete({ sessionID: oldSessionId, directory: state.worktreeDir }).catch((err) => {
281+
logger.error(`Ralph: failed to delete old session ${oldSessionId}`, err)
282+
})
283+
284+
logger.log(`Ralph: rotated session ${oldSessionId}${newSessionId}`)
285+
return newSessionId
286+
}
287+
257288
async function handleCodingPhase(sessionId: string, state: RalphState): Promise<void> {
258289
let currentState = ralphService.getActiveState(sessionId)
259290
if (!currentState?.active) {
@@ -315,22 +346,34 @@ export function createRalphEventHandler(
315346
return
316347
}
317348

349+
let activeSessionId = sessionId
350+
try {
351+
activeSessionId = await rotateSession(sessionId, currentState)
352+
} catch (err) {
353+
logger.error(`Ralph: session rotation failed, continuing with existing session`, err)
354+
}
355+
318356
const nextIteration = currentState.iteration + 1
319-
ralphService.setState(sessionId, { ...currentState, iteration: nextIteration, errorCount: 0 })
357+
ralphService.setState(activeSessionId, {
358+
...currentState,
359+
sessionId: activeSessionId,
360+
iteration: nextIteration,
361+
errorCount: 0,
362+
})
320363

321364
const continuationPrompt = ralphService.buildContinuationPrompt({ ...currentState, iteration: nextIteration })
322-
logger.log(`Ralph iteration ${nextIteration} for session ${sessionId}`)
365+
logger.log(`Ralph iteration ${nextIteration} for session ${activeSessionId}`)
323366

324367
const currentConfig = getConfig()
325368
const ralphModel = parseModelString(currentConfig.ralph?.model) ?? parseModelString(currentConfig.executionModel)
326369

327370
const sendContinuationPromptWithModel = async () => {
328-
const freshState = ralphService.getActiveState(sessionId)
371+
const freshState = ralphService.getActiveState(activeSessionId)
329372
if (!freshState?.active) {
330373
throw new Error('loop_cancelled')
331374
}
332375
const result = await v2Client.session.promptAsync({
333-
sessionID: sessionId,
376+
sessionID: activeSessionId,
334377
directory: freshState.worktreeDir,
335378
parts: [{ type: 'text' as const, text: continuationPrompt }],
336379
model: ralphModel!,
@@ -339,12 +382,12 @@ export function createRalphEventHandler(
339382
}
340383

341384
const sendContinuationPromptWithoutModel = async () => {
342-
const freshState = ralphService.getActiveState(sessionId)
385+
const freshState = ralphService.getActiveState(activeSessionId)
343386
if (!freshState?.active) {
344387
throw new Error('loop_cancelled')
345388
}
346389
const result = await v2Client.session.promptAsync({
347-
sessionID: sessionId,
390+
sessionID: activeSessionId,
348391
directory: freshState.worktreeDir,
349392
parts: [{ type: 'text' as const, text: continuationPrompt }],
350393
})
@@ -365,7 +408,7 @@ export function createRalphEventHandler(
365408
throw result.error
366409
}
367410
}
368-
await handlePromptError(sessionId, currentState, 'failed to send continuation prompt', promptResult.error, retryFn)
411+
await handlePromptError(activeSessionId, currentState, 'failed to send continuation prompt', promptResult.error, retryFn)
369412
return
370413
}
371414

@@ -375,7 +418,7 @@ export function createRalphEventHandler(
375418
logger.log(`coding phase using default model (fallback)`)
376419
}
377420

378-
consecutiveStalls.set(sessionId, 0)
421+
consecutiveStalls.set(activeSessionId, 0)
379422
}
380423

381424
async function handleAuditingPhase(sessionId: string, state: RalphState): Promise<void> {
@@ -412,8 +455,16 @@ export function createRalphEventHandler(
412455
return
413456
}
414457

415-
ralphService.setState(sessionId, {
458+
let activeSessionId = sessionId
459+
try {
460+
activeSessionId = await rotateSession(sessionId, currentState)
461+
} catch (err) {
462+
logger.error(`Ralph: session rotation failed, continuing with existing session`, err)
463+
}
464+
465+
ralphService.setState(activeSessionId, {
416466
...currentState,
467+
sessionId: activeSessionId,
417468
iteration: nextIteration,
418469
phase: 'coding',
419470
lastAuditResult: auditFindings,
@@ -425,18 +476,18 @@ export function createRalphEventHandler(
425476
{ ...currentState, iteration: nextIteration },
426477
auditFindings,
427478
)
428-
logger.log(`Ralph iteration ${nextIteration} for session ${sessionId}`)
479+
logger.log(`Ralph iteration ${nextIteration} for session ${activeSessionId}`)
429480

430481
const currentConfig = getConfig()
431482
const ralphModel = parseModelString(currentConfig.ralph?.model) ?? parseModelString(currentConfig.executionModel)
432483

433484
const sendContinuationPromptWithModel = async () => {
434-
const freshState = ralphService.getActiveState(sessionId)
485+
const freshState = ralphService.getActiveState(activeSessionId)
435486
if (!freshState?.active) {
436487
throw new Error('loop_cancelled')
437488
}
438489
const result = await v2Client.session.promptAsync({
439-
sessionID: sessionId,
490+
sessionID: activeSessionId,
440491
directory: freshState.worktreeDir,
441492
parts: [{ type: 'text' as const, text: continuationPrompt }],
442493
model: ralphModel!,
@@ -445,12 +496,12 @@ export function createRalphEventHandler(
445496
}
446497

447498
const sendContinuationPromptWithoutModel = async () => {
448-
const freshState = ralphService.getActiveState(sessionId)
499+
const freshState = ralphService.getActiveState(activeSessionId)
449500
if (!freshState?.active) {
450501
throw new Error('loop_cancelled')
451502
}
452503
const result = await v2Client.session.promptAsync({
453-
sessionID: sessionId,
504+
sessionID: activeSessionId,
454505
directory: freshState.worktreeDir,
455506
parts: [{ type: 'text' as const, text: continuationPrompt }],
456507
})
@@ -466,7 +517,7 @@ export function createRalphEventHandler(
466517

467518
if (promptResult.error) {
468519
const retryFn = async () => {
469-
const freshState = ralphService.getActiveState(sessionId)
520+
const freshState = ralphService.getActiveState(activeSessionId)
470521
if (!freshState?.active) {
471522
throw new Error('loop_cancelled')
472523
}
@@ -475,7 +526,7 @@ export function createRalphEventHandler(
475526
throw result.error
476527
}
477528
}
478-
await handlePromptError(sessionId, currentState, 'failed to send continuation prompt after audit', promptResult.error, retryFn)
529+
await handlePromptError(activeSessionId, currentState, 'failed to send continuation prompt after audit', promptResult.error, retryFn)
479530
return
480531
}
481532

@@ -485,7 +536,7 @@ export function createRalphEventHandler(
485536
logger.log(`coding continuation using default model (fallback)`)
486537
}
487538

488-
consecutiveStalls.set(sessionId, 0)
539+
consecutiveStalls.set(activeSessionId, 0)
489540
}
490541

491542
async function onEvent(input: { event: { type: string; properties?: Record<string, unknown> } }): Promise<void> {

0 commit comments

Comments
 (0)