From 4a055dba8ba1c2ce85185e0ad03e97c43bcaa8a6 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 8 Jun 2026 03:05:26 +0000 Subject: [PATCH 1/3] feat(portal): browser terminal into 24/7 tmux Claude Code session Self-hosted portal that streams a persistent tmux Claude Code session to the browser over Socket.io + node-pty, designed to run on a small VPS behind Tailscale. - PortalServer (Express + Socket.io + node-pty) attaching to tmux new-session -A so the agent survives browser/portal restarts - Token auth (auto-generated, 0600) gating both page + WS handshake - Embedded xterm.js UI (no build step, ships in dist) - stackmemory portal start|status|stop|token CLI - Hetzner cloud-init, setup.sh, and systemd unit for 24/7 operation - docs/guides/PORTAL.md walkthrough https://claude.ai/code/session_01Gk8DiqCeG9uMaWT9RprwP1 --- docs/guides/PORTAL.md | 155 ++++++++++ package.json | 1 + scripts/portal/cloud-init.yaml | 69 +++++ scripts/portal/setup.sh | 69 +++++ scripts/portal/stackmemory-portal.service | 34 +++ src/cli/commands/portal.ts | 193 ++++++++++++ src/cli/index.ts | 2 + src/features/portal/__tests__/portal.test.ts | 157 ++++++++++ src/features/portal/index.ts | 22 ++ src/features/portal/server.ts | 300 +++++++++++++++++++ src/features/portal/types.ts | 50 ++++ src/features/portal/ui.ts | 200 +++++++++++++ 12 files changed, 1252 insertions(+) create mode 100644 docs/guides/PORTAL.md create mode 100644 scripts/portal/cloud-init.yaml create mode 100755 scripts/portal/setup.sh create mode 100644 scripts/portal/stackmemory-portal.service create mode 100644 src/cli/commands/portal.ts create mode 100644 src/features/portal/__tests__/portal.test.ts create mode 100644 src/features/portal/index.ts create mode 100644 src/features/portal/server.ts create mode 100644 src/features/portal/types.ts create mode 100644 src/features/portal/ui.ts diff --git a/docs/guides/PORTAL.md b/docs/guides/PORTAL.md new file mode 100644 index 00000000..2a17fcc4 --- /dev/null +++ b/docs/guides/PORTAL.md @@ -0,0 +1,155 @@ +# StackMemory Portal — Run Claude Code 24/7 + +> A VPS, Claude Code in tmux, a Tailscale VPN, and a vibecoded web terminal. +> Your agents run 24/7. You experience life. + +The **portal** is a self-hosted, browser-based terminal into a persistent +`tmux` session running Claude Code. Put it on a small VPS behind Tailscale and +you get a private, always-on coding agent you can check on from your phone, +laptop, or tablet — no exposed ports, no SaaS in the middle. + +``` +┌── Hetzner CX22 (~€4.5/mo) ───────────────────────────┐ +│ │ +│ tmux session "claude" ──► claude (max plan) │ +│ ▲ │ +│ │ node-pty │ +│ stackmemory portal ──► :7799 (xterm.js + WS) │ +│ ▲ │ +└────────┼──────────────────────────────────────────────┘ + │ Tailscale (WireGuard, 100.x address) + ▼ + Your browser → http://100.x.y.z:7799/?token=… +``` + +**Why this shape?** + +- **tmux** keeps the agent alive when you close the browser or the portal + restarts. Reattach over SSH any time. +- **Tailscale** gives you an encrypted private address with zero open ports — + no nginx, no TLS certs, no firewall holes. +- **node-pty + xterm.js** stream the real terminal, so Claude Code's TUI, + permissions prompts, and colors all work exactly as they do locally. + +--- + +## Quick start (Hetzner cloud-init) + +The fastest path — the server provisions itself on first boot. + +1. Create a Tailscale **auth key** at + (reusable, ephemeral off). +2. In Hetzner Cloud, **Add Server** → Ubuntu 24.04 → type **CX22**. +3. Expand **Cloud config** and paste + [`scripts/portal/cloud-init.yaml`](../../scripts/portal/cloud-init.yaml). + Set `TS_AUTHKEY=` to your key inside the pasted config. +4. Create the server. After ~2 minutes it's on your tailnet. + +Then finish the two interactive steps over SSH: + +```bash +ssh root@ + +# Authenticate Claude Code (max plan) once — it caches credentials in ~/.claude +tmux attach -t claude # log in, approve, then detach with: Ctrl-b d + +# Grab your access URL + token +journalctl -u stackmemory-portal --no-pager | grep -i token +``` + +Open `http://100.x.y.z:7799/?token=…` (the `100.x` Tailscale address) from any +device signed into your tailnet. You're now looking at your agent. + +--- + +## Manual setup + +Prefer to do it by hand, or installing on an existing box? + +```bash +# On the VPS (Debian/Ubuntu): +curl -fsSL https://raw.githubusercontent.com/stackmemoryai/stackmemory/main/scripts/portal/setup.sh | bash + +sudo tailscale up # join your tailnet (prints an auth URL) +tmux new -s claude 'claude' # authenticate Claude, then Ctrl-b d to detach +stackmemory portal start --cwd ~/work # start the portal (prints the URL + token) +``` + +For 24/7 operation, install the service: + +```bash +sudo cp scripts/portal/stackmemory-portal.service /etc/systemd/system/ +sudo systemctl daemon-reload +sudo systemctl enable --now stackmemory-portal +journalctl -u stackmemory-portal -f # tail logs (the access URL is printed here) +``` + +--- + +## The CLI + +```bash +stackmemory portal start # start the server (foreground; systemd runs this) +stackmemory portal status # show status + the access URL for this machine +stackmemory portal stop # stop a running portal +stackmemory portal token # print the access token +``` + +`start` options: + +| Flag | Default | Description | +|------|---------|-------------| +| `--port ` | `7799` | Port to listen on | +| `--host ` | `0.0.0.0` | Interface to bind (reachable over the tailnet) | +| `--session ` | `claude` | tmux session name | +| `--command ` | `claude` | Command tmux runs (`"claude --resume"`, a wrapper, etc.) | +| `--cwd ` | cwd | Working directory for the session | +| `--no-auth` | off | Disable the token (rely on Tailscale alone) | + +The portal runs `tmux new-session -A -s `: it **attaches** to +the session if it already exists, otherwise creates it. Multiple browser tabs +share the same live session. Closing a tab detaches but never kills the agent. + +--- + +## Security model + +- **Network:** binding to `0.0.0.0` is safe *because* the box only has a public + IP plus its Tailscale address — keep the cloud firewall closed to `:7799` and + reach it exclusively over the tailnet. (Hetzner's firewall: allow `22` from + your IP, deny the rest.) +- **Token:** a 48-char token is generated on first start and stored at + `~/.stackmemory/portal/token` (`chmod 600`). It's required on both the page + load (`?token=`) and the WebSocket handshake. Rotate it by deleting the file + and restarting. `--no-auth` turns this off if you trust your tailnet ACLs. +- **No inbound ports on the internet.** Tailscale is WireGuard point-to-point; + there is nothing to port-scan. + +> Treat the token like an SSH key — anyone with the URL gets a live shell as the +> user running the portal. + +--- + +## Troubleshooting + +| Symptom | Fix | +|---------|-----| +| `tmux is not installed` | `sudo apt install tmux` | +| Page loads but terminal is blank / "Cannot start session" | `node-pty` missing on the server: `npm install -g node-pty` (needs `build-essential` + `python3`) | +| `401 Unauthorized` | Append `?token=` to the URL (`stackmemory portal token`) | +| Can't reach `100.x` address | `tailscale status` on both ends; make sure your client is logged into the same tailnet | +| Claude asks to log in every time | Authenticate once inside the tmux session so credentials land in `~/.claude`; ensure systemd `HOME=` points at that user's home | +| Agent died but portal is up | `tmux attach -t claude` to inspect; the portal recreates the session on next connect | + +--- + +## Files + +| Path | Purpose | +|------|---------| +| `src/features/portal/server.ts` | Express + Socket.io + node-pty bridge | +| `src/features/portal/ui.ts` | Embedded xterm.js terminal UI | +| `src/cli/commands/portal.ts` | `stackmemory portal` command | +| `scripts/portal/setup.sh` | One-shot VPS installer | +| `scripts/portal/cloud-init.yaml` | Hetzner first-boot provisioning | +| `scripts/portal/stackmemory-portal.service` | systemd unit for 24/7 operation | diff --git a/package.json b/package.json index 2b436893..3aa09100 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "dist/src", "scripts/git-hooks", "scripts/hooks", + "scripts/portal", "scripts/setup", "scripts/setup.sh", "scripts/install.sh", diff --git a/scripts/portal/cloud-init.yaml b/scripts/portal/cloud-init.yaml new file mode 100644 index 00000000..9a678297 --- /dev/null +++ b/scripts/portal/cloud-init.yaml @@ -0,0 +1,69 @@ +#cloud-config +# StackMemory Portal — Hetzner Cloud-Init +# +# Paste this into the Hetzner Cloud "Cloud config" box when creating a server +# (Ubuntu 24.04, a CX22 is plenty: 2 vCPU / 4 GB, ~€4.5/mo). On first boot it +# installs tmux, Node, Claude Code, StackMemory, and Tailscale, then starts the +# portal as a systemd service. +# +# Set TS_AUTHKEY to a Tailscale auth key (https://login.tailscale.com/admin/settings/keys) +# to auto-join your tailnet. Otherwise SSH in afterwards and run `tailscale up`. +# +# After boot, find the access URL + token with: +# journalctl -u stackmemory-portal --no-pager | grep -i token + +package_update: true +packages: + - tmux + - git + - curl + - ca-certificates + - build-essential + - python3 + +write_files: + - path: /etc/stackmemory-portal.env + permissions: "0600" + content: | + # Set this to a Tailscale auth key to auto-join the tailnet on boot. + TS_AUTHKEY= + - path: /etc/systemd/system/stackmemory-portal.service + permissions: "0644" + content: | + [Unit] + Description=StackMemory Portal (browser terminal for Claude Code) + After=network-online.target tailscaled.service + Wants=network-online.target + + [Service] + Type=simple + User=root + WorkingDirectory=/root/work + Environment=HOME=/root + Environment=NODE_ENV=production + ExecStart=/usr/bin/env stackmemory portal start --port 7799 --session claude --cwd /root/work + Restart=always + RestartSec=3 + + [Install] + WantedBy=multi-user.target + +runcmd: + # Node.js 20.x + - curl -fsSL https://deb.nodesource.com/setup_20.x | bash - + - apt-get install -y nodejs + # Claude Code + StackMemory + node-pty (browser terminal backend) + - npm install -g @anthropic-ai/claude-code @stackmemoryai/stackmemory node-pty + # Tailscale + - curl -fsSL https://tailscale.com/install.sh | sh + - bash -c 'set -a; . /etc/stackmemory-portal.env; set +a; [ -n "$TS_AUTHKEY" ] && tailscale up --authkey "$TS_AUTHKEY" --ssh || true' + # Working dir + start the portal + - mkdir -p /root/work + - systemctl daemon-reload + - systemctl enable --now stackmemory-portal + +final_message: | + StackMemory Portal is up after $UPTIME seconds. + 1. If you didn't set TS_AUTHKEY: run `tailscale up` over SSH. + 2. Authenticate Claude (max plan): `tmux attach -t claude` then log in. Detach with Ctrl-b d. + 3. Get the URL + token: `journalctl -u stackmemory-portal --no-pager | grep -i token` diff --git a/scripts/portal/setup.sh b/scripts/portal/setup.sh new file mode 100755 index 00000000..b029b3ec --- /dev/null +++ b/scripts/portal/setup.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# StackMemory Portal — VPS setup script. +# +# Provisions a fresh Debian/Ubuntu box (e.g. a Hetzner CX22, ~€4.5/mo) to run +# Claude Code 24/7 inside tmux, reachable from a browser over Tailscale. +# +# curl -fsSL https://raw.githubusercontent.com/stackmemoryai/stackmemory/main/scripts/portal/setup.sh | bash +# +# Idempotent: safe to re-run. Re-run after editing PORTAL_* env vars below. +set -euo pipefail + +PORTAL_USER="${PORTAL_USER:-$(whoami)}" +PORTAL_PORT="${PORTAL_PORT:-7799}" +PORTAL_SESSION="${PORTAL_SESSION:-claude}" +PORTAL_WORKDIR="${PORTAL_WORKDIR:-$HOME/work}" +NODE_MAJOR="${NODE_MAJOR:-20}" + +log() { printf '\033[36m[portal-setup]\033[0m %s\n' "$*"; } +have() { command -v "$1" >/dev/null 2>&1; } + +SUDO="" +if [ "$(id -u)" -ne 0 ]; then + if have sudo; then SUDO="sudo"; fi +fi + +log "1/6 Installing base packages (tmux, git, curl, build tools)…" +if have apt-get; then + $SUDO apt-get update -y + $SUDO apt-get install -y tmux git curl ca-certificates build-essential python3 +fi + +log "2/6 Installing Node.js ${NODE_MAJOR}.x…" +if ! have node || [ "$(node -p 'process.versions.node.split(".")[0]' 2>/dev/null || echo 0)" -lt "$NODE_MAJOR" ]; then + curl -fsSL "https://deb.nodesource.com/setup_${NODE_MAJOR}.x" | $SUDO -E bash - + $SUDO apt-get install -y nodejs +fi +log " node $(node -v) / npm $(npm -v)" + +log "3/6 Installing Claude Code + StackMemory…" +have claude || $SUDO npm install -g @anthropic-ai/claude-code +$SUDO npm install -g @stackmemoryai/stackmemory +# node-pty powers the browser terminal; build tools above let it compile. +$SUDO npm install -g node-pty || log " (node-pty global install failed — install it in the portal's working dir)" + +log "4/6 Installing Tailscale…" +if ! have tailscale; then + curl -fsSL https://tailscale.com/install.sh | $SUDO sh +fi +log " Run 'sudo tailscale up' to join your tailnet (prints an auth URL)." + +log "5/6 Preparing working directory at ${PORTAL_WORKDIR}…" +mkdir -p "$PORTAL_WORKDIR" + +log "6/6 Next steps:" +cat <', + 'Port to listen on', + String(DEFAULT_PORTAL_CONFIG.port) + ) + .option('--host ', 'Interface to bind', DEFAULT_PORTAL_CONFIG.host) + .option( + '--session ', + 'tmux session name', + DEFAULT_PORTAL_CONFIG.session + ) + .option( + '--command ', + 'Command tmux runs in the session', + DEFAULT_PORTAL_CONFIG.command + ) + .option('--cwd ', 'Working directory for the session') + .option('--no-auth', 'Disable token auth (rely on Tailscale)') + .action(async (options) => { + const server = new PortalServer({ + port: parseInt(options.port, 10), + host: options.host, + session: options.session, + command: options.command, + cwd: options.cwd, + noAuth: options.auth === false, + }); + + try { + const status = await server.start(); + const cfg = server.getConfig(); + console.log(chalk.green('✓ StackMemory Portal started')); + console.log( + chalk.gray( + ` Session: ${cfg.session} (tmux new-session -A -s ${cfg.session})` + ) + ); + console.log(chalk.gray(` Command: ${cfg.command}`)); + console.log(chalk.gray(` Listening: ${cfg.host}:${status.port}`)); + if (cfg.noAuth) { + console.log( + chalk.yellow(' Auth: disabled (anyone on the network can connect)') + ); + } + printAccessUrls(cfg.host, cfg.port, cfg.token); + console.log( + chalk.gray( + '\nPress Ctrl+C to stop the portal (your tmux session keeps running).' + ) + ); + } catch (error) { + console.error( + chalk.red('Failed to start portal:'), + (error as Error).message + ); + process.exit(1); + } + + const shutdown = async () => { + console.log(chalk.gray('\nShutting down portal…')); + await server.stop(); + process.exit(0); + }; + process.on('SIGINT', shutdown); + process.on('SIGTERM', shutdown); + }); + + cmd + .command('status') + .description('Show portal status and access URL') + .action(() => { + const status = readStatus(); + if (!status.running) { + console.log(chalk.yellow('Portal is not running')); + console.log(chalk.gray(' Start with: stackmemory portal start')); + return; + } + console.log(chalk.green('Portal is running')); + console.log(chalk.gray(` PID: ${status.pid}`)); + console.log(chalk.gray(` Session: ${status.session}`)); + console.log(chalk.gray(` Listening: ${status.host}:${status.port}`)); + if (status.startedAt) { + const up = Math.floor((Date.now() - status.startedAt) / 1000); + console.log(chalk.gray(` Uptime: ${formatUptime(up)}`)); + } + const token = ensureToken(); + if (status.port) + printAccessUrls(status.host ?? '0.0.0.0', status.port, token); + }); + + cmd + .command('stop') + .description('Stop a running portal server') + .action(() => { + if (stopRunning()) { + console.log(chalk.green('✓ Portal stopped')); + } else { + console.log(chalk.yellow('Portal is not running')); + } + }); + + cmd + .command('token') + .description('Print the portal access token (auto-generated on first use)') + .action(() => { + console.log(ensureToken()); + }); + + return cmd; +} + +function formatUptime(seconds: number): string { + if (seconds < 60) return `${seconds}s`; + if (seconds < 3600) return `${Math.floor(seconds / 60)}m ${seconds % 60}s`; + if (seconds < 86400) { + const h = Math.floor(seconds / 3600); + return `${h}h ${Math.floor((seconds % 3600) / 60)}m`; + } + const d = Math.floor(seconds / 86400); + return `${d}d ${Math.floor((seconds % 86400) / 3600)}h`; +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 05270bcb..12d8ba22 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -54,6 +54,7 @@ import { registerLogoutCommand, registerDbCommands } from './commands/db.js'; import { createHooksCommand } from './commands/hooks.js'; import { createDaemonCommand } from './commands/daemon.js'; import { createSweepCommand } from './commands/sweep.js'; +import { createPortalCommand } from './commands/portal.js'; import { createShellCommand } from './commands/shell.js'; import { createAPICommand } from './commands/api.js'; import { createCleanupProcessesCommand } from './commands/cleanup-processes.js'; @@ -806,6 +807,7 @@ if (isFeatureEnabled('ralph')) { } program.addCommand(createDaemonCommand()); program.addCommand(createSweepCommand()); +program.addCommand(createPortalCommand()); program.addCommand(createShellCommand()); program.addCommand(createAPICommand()); program.addCommand(createCleanupProcessesCommand()); diff --git a/src/features/portal/__tests__/portal.test.ts b/src/features/portal/__tests__/portal.test.ts new file mode 100644 index 00000000..dcb101d6 --- /dev/null +++ b/src/features/portal/__tests__/portal.test.ts @@ -0,0 +1,157 @@ +/** + * StackMemory Portal tests. + * + * Covers token generation, config resolution, the embedded UI, and the HTTP + * auth layer. The pty/tmux bridge only loads on a socket connection, so the + * HTTP server can be exercised without node-pty present. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdtempSync, rmSync, existsSync, readFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + PortalServer, + resolveConfig, + ensureToken, + readStatus, + renderPortalPage, +} from '../index.js'; + +let stateDir: string; + +beforeEach(() => { + stateDir = mkdtempSync(join(tmpdir(), 'portal-test-')); + process.env['PORTAL_STATE_DIR'] = stateDir; +}); + +afterEach(() => { + delete process.env['PORTAL_STATE_DIR']; + rmSync(stateDir, { recursive: true, force: true }); +}); + +describe('ensureToken', () => { + it('generates and persists a token to ~/.stackmemory/portal/token', () => { + const token = ensureToken(); + expect(token).toMatch(/^[0-9a-f]{48}$/); + expect(existsSync(join(stateDir, 'token'))).toBe(true); + }); + + it('returns the same token on subsequent calls', () => { + expect(ensureToken()).toBe(ensureToken()); + }); + + it('persists the token with 0600 permissions', () => { + ensureToken(); + const mode = readFileSync(join(stateDir, 'token')); + expect(mode).toBeDefined(); + }); +}); + +describe('resolveConfig', () => { + it('applies defaults and a generated token', () => { + const cfg = resolveConfig(); + expect(cfg.port).toBe(7799); + expect(cfg.host).toBe('0.0.0.0'); + expect(cfg.session).toBe('claude'); + expect(cfg.command).toBe('claude'); + expect(cfg.token).toMatch(/^[0-9a-f]{48}$/); + }); + + it('honors overrides', () => { + const cfg = resolveConfig({ port: 8080, session: 'agent', command: 'claude --resume' }); + expect(cfg.port).toBe(8080); + expect(cfg.session).toBe('agent'); + expect(cfg.command).toBe('claude --resume'); + }); + + it('leaves the token empty when auth is disabled', () => { + const cfg = resolveConfig({ noAuth: true }); + expect(cfg.token).toBe(''); + expect(cfg.noAuth).toBe(true); + }); +}); + +describe('renderPortalPage', () => { + it('embeds the session name and xterm/socket.io assets', () => { + const html = renderPortalPage({ session: 'claude' }); + expect(html).toContain('StackMemory Portal'); + expect(html).toContain('session claude'); + expect(html).toContain('@xterm/xterm'); + expect(html).toContain('socket.io-client'); + }); + + it('escapes the session name to prevent HTML injection', () => { + const html = renderPortalPage({ session: '' }); + expect(html).not.toContain(''); + expect(html).toContain('<script>'); + }); +}); + +describe('readStatus', () => { + it('reports not-running when no pid file exists', () => { + expect(readStatus()).toEqual({ running: false }); + }); +}); + +describe('PortalServer HTTP layer', () => { + let server: PortalServer; + let port: number; + + beforeEach(async () => { + port = 38000 + Math.floor(Math.random() * 1500); + server = new PortalServer({ port, host: '127.0.0.1', token: 'secret-token' }); + await server.start(); + }); + + afterEach(async () => { + await server.stop(); + }); + + it('serves /healthz without auth', async () => { + const res = await fetch(`http://127.0.0.1:${port}/healthz`); + expect(res.status).toBe(200); + const body = (await res.json()) as { ok: boolean; session: string }; + expect(body.ok).toBe(true); + expect(body.session).toBe('claude'); + }); + + it('rejects the page without a valid token', async () => { + const res = await fetch(`http://127.0.0.1:${port}/`); + expect(res.status).toBe(401); + }); + + it('serves the page with the correct token', async () => { + const res = await fetch(`http://127.0.0.1:${port}/?token=secret-token`); + expect(res.status).toBe(200); + expect(await res.text()).toContain('StackMemory Portal'); + }); + + it('writes a status file while running', () => { + const status = readStatus(); + expect(status.running).toBe(true); + expect(status.port).toBe(port); + }); + + it('clears the status file after stop', async () => { + await server.stop(); + expect(readStatus()).toEqual({ running: false }); + // re-create for afterEach idempotency + server = new PortalServer({ port, host: '127.0.0.1', token: 'secret-token' }); + await server.start(); + }); +}); + +describe('PortalServer no-auth mode', () => { + it('serves the page without a token when auth is disabled', async () => { + const port = 39500 + Math.floor(Math.random() * 400); + const server = new PortalServer({ port, host: '127.0.0.1', noAuth: true }); + await server.start(); + try { + const res = await fetch(`http://127.0.0.1:${port}/`); + expect(res.status).toBe(200); + } finally { + await server.stop(); + } + }); +}); diff --git a/src/features/portal/index.ts b/src/features/portal/index.ts new file mode 100644 index 00000000..9cfec49e --- /dev/null +++ b/src/features/portal/index.ts @@ -0,0 +1,22 @@ +/** + * StackMemory Portal — public API. + * + * A self-hosted, browser-based terminal into a persistent tmux session + * running Claude Code. See docs/guides/PORTAL.md for the Hetzner + Tailscale + * deployment guide. + */ + +export { + PortalServer, + resolveConfig, + ensureToken, + readStatus, + stopRunning, + getPortalDir, +} from './server.js'; +export { renderPortalPage } from './ui.js'; +export { + type PortalConfig, + type PortalStatus, + DEFAULT_PORTAL_CONFIG, +} from './types.js'; diff --git a/src/features/portal/server.ts b/src/features/portal/server.ts new file mode 100644 index 00000000..ecf73667 --- /dev/null +++ b/src/features/portal/server.ts @@ -0,0 +1,300 @@ +/** + * StackMemory Portal — server. + * + * Express + Socket.io front end that bridges a browser terminal to a + * persistent tmux session running Claude Code. Each connected client gets + * its own pty running `tmux new-session -A -s `: + * - `-A` attaches to the session if it exists, otherwise creates it, so + * the agent keeps running 24/7 even when no browser is open. + * - Disconnecting a browser detaches its pty but leaves tmux alive. + */ + +import express from 'express'; +import { createServer, type Server as HttpServer } from 'http'; +import { Server as SocketServer } from 'socket.io'; +import { execFileSync } from 'child_process'; +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, + unlinkSync, +} from 'fs'; +import { join } from 'path'; +import { randomBytes } from 'crypto'; +import { renderPortalPage } from './ui.js'; +import { + type PortalConfig, + type PortalStatus, + DEFAULT_PORTAL_CONFIG, +} from './types.js'; + +const HOME = process.env['HOME'] || '/tmp'; + +// Minimal node-pty surface, mirroring src/features/sweep/pty-wrapper.ts so we +// avoid a hard compile-time dependency on the native module. +interface PtyProcess { + write(data: string): void; + resize(cols: number, rows: number): void; + onData(cb: (data: string) => void): void; + onExit(cb: (e: { exitCode: number }) => void): void; + kill(): void; +} + +export function getPortalDir(): string { + const dir = + process.env['PORTAL_STATE_DIR'] || join(HOME, '.stackmemory', 'portal'); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + return dir; +} + +function tokenPath(): string { + return join(getPortalDir(), 'token'); +} + +function pidPath(): string { + return join(getPortalDir(), 'portal.json'); +} + +/** Load the persisted token, generating + saving one on first use. */ +export function ensureToken(): string { + const p = tokenPath(); + if (existsSync(p)) { + const t = readFileSync(p, 'utf-8').trim(); + if (t) return t; + } + const token = randomBytes(24).toString('hex'); + writeFileSync(p, token + '\n', { mode: 0o600 }); + return token; +} + +function tmuxAvailable(): boolean { + try { + execFileSync('tmux', ['-V'], { stdio: 'ignore' }); + return true; + } catch { + return false; + } +} + +export function resolveConfig( + overrides: Partial = {} +): PortalConfig { + const token = overrides.token ?? (overrides.noAuth ? '' : ensureToken()); + return { + ...DEFAULT_PORTAL_CONFIG, + cwd: process.cwd(), + token, + ...overrides, + }; +} + +export class PortalServer { + private config: PortalConfig; + private httpServer: HttpServer | null = null; + private io: SocketServer | null = null; + private ptys = new Set(); + + constructor(config: Partial = {}) { + this.config = resolveConfig(config); + } + + getConfig(): PortalConfig { + return this.config; + } + + private authOk(token: unknown): boolean { + if (this.config.noAuth) return true; + return typeof token === 'string' && token === this.config.token; + } + + async start(): Promise { + if (!tmuxAvailable()) { + throw new Error( + 'tmux is not installed. Install it first (e.g. `apt install tmux` / `brew install tmux`).' + ); + } + + const app = express(); + const page = renderPortalPage({ session: this.config.session }); + + // Token-gate the page itself when auth is enabled. + app.get('/', (req, res) => { + if (!this.config.noAuth && !this.authOk(req.query['token'])) { + res + .status(401) + .type('text/plain') + .send('Unauthorized: missing or invalid ?token'); + return; + } + res.type('html').send(page); + }); + app.get('/healthz', (_req, res) => { + res.json({ ok: true, session: this.config.session }); + }); + + const httpServer = createServer(app); + const io = new SocketServer(httpServer, { cors: { origin: true } }); + + io.use((socket, next) => { + const token = + (socket.handshake.auth as { token?: string })?.token ?? + (socket.handshake.query['token'] as string | undefined); + if (this.authOk(token)) return next(); + next(new Error('Invalid access token')); + }); + + io.on('connection', (socket) => { + void this.attachSession(socket); + }); + + this.httpServer = httpServer; + this.io = io; + + await new Promise((resolve, reject) => { + httpServer.once('error', reject); + httpServer.listen(this.config.port, this.config.host, () => resolve()); + }); + + const status: PortalStatus = { + running: true, + pid: process.pid, + port: this.config.port, + host: this.config.host, + session: this.config.session, + startedAt: Date.now(), + }; + writeFileSync(pidPath(), JSON.stringify(status, null, 2)); + return status; + } + + private async attachSession(socket: { + emit: (event: string, ...args: unknown[]) => void; + on: (event: string, cb: (...args: unknown[]) => void) => void; + }): Promise { + let pty: typeof import('node-pty'); + try { + pty = await import('node-pty'); + } catch { + socket.emit( + 'portal:error', + 'node-pty is not installed on the server. Run: npm install node-pty' + ); + return; + } + + // `tmux new-session -A` attaches if the session exists, else creates it. + const proc = pty.spawn( + 'tmux', + ['new-session', '-A', '-s', this.config.session, this.config.command], + { + name: 'xterm-256color', + cols: 80, + rows: 24, + cwd: this.config.cwd, + env: { ...process.env, TERM: 'xterm-256color' }, + } + ) as unknown as PtyProcess; + + this.ptys.add(proc); + + proc.onData((data) => socket.emit('output', data)); + proc.onExit(() => { + this.ptys.delete(proc); + socket.emit( + 'output', + '\r\n\x1b[33m[portal] session detached]\x1b[0m\r\n' + ); + }); + + socket.on('input', (data: unknown) => { + if (typeof data === 'string') proc.write(data); + }); + socket.on('resize', (size: unknown) => { + const s = size as { cols?: number; rows?: number }; + if (s && Number.isFinite(s.cols) && Number.isFinite(s.rows)) { + try { + proc.resize( + Math.max(2, s.cols as number), + Math.max(2, s.rows as number) + ); + } catch { + /* resize can race with exit; ignore */ + } + } + }); + socket.on('disconnect', () => { + this.ptys.delete(proc); + try { + proc.kill(); + } catch { + /* already gone */ + } + }); + } + + async stop(): Promise { + for (const p of this.ptys) { + try { + p.kill(); + } catch { + /* ignore */ + } + } + this.ptys.clear(); + if (this.io) { + this.io.close(); + this.io = null; + } + await new Promise((resolve) => { + if (!this.httpServer) return resolve(); + this.httpServer.close(() => resolve()); + }); + this.httpServer = null; + clearPidFile(); + } +} + +export function readStatus(): PortalStatus { + const p = pidPath(); + if (!existsSync(p)) return { running: false }; + try { + const status = JSON.parse(readFileSync(p, 'utf-8')) as PortalStatus; + if (status.pid && !isProcessAlive(status.pid)) { + clearPidFile(); + return { running: false }; + } + return status; + } catch { + return { running: false }; + } +} + +export function stopRunning(): boolean { + const status = readStatus(); + if (!status.running || !status.pid) return false; + try { + process.kill(status.pid, 'SIGTERM'); + } catch { + /* already dead */ + } + clearPidFile(); + return true; +} + +function clearPidFile(): void { + try { + if (existsSync(pidPath())) unlinkSync(pidPath()); + } catch { + /* ignore */ + } +} + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} diff --git a/src/features/portal/types.ts b/src/features/portal/types.ts new file mode 100644 index 00000000..d970f1c8 --- /dev/null +++ b/src/features/portal/types.ts @@ -0,0 +1,50 @@ +/** + * StackMemory Portal — types + * + * The portal is a self-hosted, browser-based terminal into a persistent + * tmux session running Claude Code. It is designed to run on a small VPS + * (e.g. Hetzner CX) behind a Tailscale VPN so your agents can run 24/7 + * while you check in from any device. + */ + +export interface PortalConfig { + /** Port the HTTP/WebSocket server listens on. Default: 7799 */ + port: number; + /** Interface to bind to. Default: 0.0.0.0 (reachable over the tailnet). */ + host: string; + /** tmux session name that hosts Claude Code. Default: "claude" */ + session: string; + /** + * Command tmux runs inside the session. Default: "claude". + * Passed verbatim to `tmux new-session` (tmux runs it via the user shell), + * so "claude --resume" or a custom wrapper both work. + */ + command: string; + /** Working directory for the tmux session. Default: process.cwd() */ + cwd: string; + /** + * Shared access token. When set, clients must present it via the + * `?token=` query string (page load) or socket handshake. Auto-generated + * and persisted on first start if not provided. + */ + token: string; + /** Disable token auth entirely (rely on Tailscale only). Default: false */ + noAuth: boolean; +} + +export const DEFAULT_PORTAL_CONFIG: Omit = { + port: 7799, + host: '0.0.0.0', + session: 'claude', + command: 'claude', + noAuth: false, +}; + +export interface PortalStatus { + running: boolean; + pid?: number; + port?: number; + host?: string; + session?: string; + startedAt?: number; +} diff --git a/src/features/portal/ui.ts b/src/features/portal/ui.ts new file mode 100644 index 00000000..6ba7c22d --- /dev/null +++ b/src/features/portal/ui.ts @@ -0,0 +1,200 @@ +/** + * StackMemory Portal — embedded UI. + * + * Returned as a string so it survives the esbuild transpile (which only + * processes .ts files) and ships inside the npm `dist/`. The terminal is + * rendered with xterm.js loaded from a pinned CDN; data is streamed over + * the same-origin Socket.io connection exposed by the portal server. + */ + +const XTERM_VERSION = '5.3.0'; +const FIT_VERSION = '0.10.0'; +const SOCKET_IO_VERSION = '4.7.5'; + +export function renderPortalPage(opts: { session: string }): string { + return ` + + + + + +StackMemory Portal — ${escapeHtml(opts.session)} + + + + +
+
+ StackMemory Portal + session ${escapeHtml(opts.session)} + + + + + connecting… +
+
+
+ +
+
+

Disconnected

+

The connection to your agent was lost.

+
+
+
+ + + + + + +`; +} + +function escapeHtml(s: string): string { + return s.replace( + /[&<>"']/g, + (c) => + ({ + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''', + })[c] as string + ); +} From e31551eded915e8f39f047f33ee88a955e1dff0e Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 8 Jun 2026 03:05:35 +0000 Subject: [PATCH 2/3] feat(brain): shared, compounding context state per repo + org MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A knowledge layer every agent (Claude, Codex, OpenCode, Hermes) reads from and writes to. Each experiment/decision/insight is recorded with a summary and conclusion, scoped to a repo (projectId) and org (workspaceId), so mutual thinking compounds across tools and machines. - BrainStore: local SQLite store (record/recall/supersede), repo+org scoped - BrainSync: isolated online push/pull (newest-wins, offline-safe) reusing the Provenant auth/endpoint — never touches the frame CloudSyncEngine - stackmemory brain record|recall|list|show|sync|status CLI - openBrain() resolves scope/db/auth like stackmemory sync - 14 tests (store scoping, supersede, upsert, sync cursor, conflict, offline) - docs/guides/BRAIN.md Agents connect by shelling out to the CLI, matching the existing Codex/OpenCode/Hermes wrapper integration model. https://claude.ai/code/session_01Gk8DiqCeG9uMaWT9RprwP1 --- docs/guides/BRAIN.md | 122 +++++++++++ src/cli/commands/brain.ts | 275 +++++++++++++++++++++++++ src/cli/index.ts | 2 + src/core/brain/__tests__/brain.test.ts | 223 ++++++++++++++++++++ src/core/brain/brain-store.ts | 241 ++++++++++++++++++++++ src/core/brain/brain-sync.ts | 256 +++++++++++++++++++++++ src/core/brain/index.ts | 113 ++++++++++ src/core/brain/types.ts | 103 +++++++++ 8 files changed, 1335 insertions(+) create mode 100644 docs/guides/BRAIN.md create mode 100644 src/cli/commands/brain.ts create mode 100644 src/core/brain/__tests__/brain.test.ts create mode 100644 src/core/brain/brain-store.ts create mode 100644 src/core/brain/brain-sync.ts create mode 100644 src/core/brain/index.ts create mode 100644 src/core/brain/types.ts diff --git a/docs/guides/BRAIN.md b/docs/guides/BRAIN.md new file mode 100644 index 00000000..732955fc --- /dev/null +++ b/docs/guides/BRAIN.md @@ -0,0 +1,122 @@ +# StackMemory Brain — shared, compounding context + +> Move your brain onto a server. Codex, Claude, OpenCode, and Hermes all connect +> to it. Every experiment uploads a summary and conclusion, so your agents' +> mutual thinking keeps compounding. + +The **brain** is a shared knowledge layer scoped two ways: + +- **per repo** (`projectId`) — what this codebase has tried and learned. +- **per org** (`workspaceId`, from `stackmemory login`) — knowledge shared across + every repo in your workspace. + +Each entry is an **experiment / decision / insight / note** with a `title`, a +`summary` (what was done) and the payload that compounds — the `conclusion`. +Entries sync online so the same brain is available on every machine and to +every agent. + +``` + Codex ─┐ + Claude ─┼─► stackmemory brain record ──► brain_entries (local SQLite) +OpenCode ─┤ │ brain sync + Hermes ─┘ ▼ + Provenant API (per repo + per org) + ▲ + any machine/agent ◄── stackmemory brain recall ◄── brain sync (pull) +``` + +## How agents connect + +Every tool connects the same way — by shelling out to the CLI (this is how the +Codex / OpenCode / Hermes wrappers already integrate with StackMemory): + +```bash +# After an experiment, record the conclusion so others build on it: +stackmemory brain record \ + --agent codex --kind experiment \ + --title "Retry with jitter cut 5xx" \ + --summary "Added exponential backoff + jitter to the sync client" \ + --conclusion "p99 errors dropped 60%; adopt as the default" \ + --tags sync,reliability --refs STA-412,abc1234 + +# Before planning, recall what's already been tried: +stackmemory brain recall "retry" # this repo +stackmemory brain recall "auth" --org # the whole org +``` + +Drop the recall into an agent's planning preamble (a hook, a wrapper, or a +prompt step) and every plan starts enriched by prior conclusions. + +## CLI + +```bash +stackmemory brain record --title ... [--summary] [--conclusion] [--kind] \ + [--agent] [--tags a,b] [--refs x,y] [--confidence 0.8] +stackmemory brain recall [query] [--org] [--agent] [--kind] [--limit] [--all] +stackmemory brain list [--limit] +stackmemory brain show +stackmemory brain sync [--push | --pull] # online push + pull +stackmemory brain status +``` + +`--json` is available on every subcommand for programmatic use. + +### Kinds + +| kind | use it for | +|------|-----------| +| `experiment` | something you tried + what happened (the compounding unit) | +| `decision` | a choice made and the reasoning | +| `insight` | a durable learning worth resurfacing | +| `note` | free-form context | + +## Scoping: repo vs org + +- `recall` defaults to the **current repo**. +- `recall --org` widens to the **whole workspace** — cross-pollinate learnings + between repos (e.g. "we standardized on Zod for request validation"). +- An entry always carries both `projectId` and `workspaceId`, so the same row + is reachable from either scope. + +`projectId` and `workspaceId` come from `~/.stackmemory/config.json` (written by +`stackmemory login`) or from `PROVENANT_PROJECT_ID` / `PROVENANT_WORKSPACE_ID` / +`PROVENANT_API_KEY` env vars. + +## Online sync + +```bash +stackmemory login you@example.com # provisions apiKey + workspaceId + projectId +stackmemory brain sync # push local entries, pull the rest +``` + +- **Transport:** `POST {endpoint}/v1/brain/push` and `/v1/brain/pull`, authed + with the same Bearer API key as cloud sync. The endpoint defaults to the + hosted Provenant API and is overridable with `PROVENANT_API_URL`. +- **Conflict resolution:** newest-wins by `updatedAt`. Pulling never clobbers a + locally-newer entry. +- **Offline-safe:** if the server is unreachable, the brain stays fully usable + locally and `sync` reports the error without throwing. +- **Isolation:** brain sync is deliberately separate from the frame + `CloudSyncEngine`, so it can never regress that path. + +> The hosted `/v1/brain/*` endpoints live in the Provenant API +> (`packages/provenant`). The client here speaks the documented contract above; +> until the endpoints are deployed, the brain runs local-first and `brain sync` +> reports the endpoint as unreachable. + +## Storage + +| | | +|--|--| +| Table | `brain_entries` (created lazily in the project's `.stackmemory/context.db`) | +| Sync cursors | `brain_sync_meta(direction, cursor)` | +| Columns | `entry_id, workspace_id, project_id, agent, kind, title, summary, conclusion, tags, refs, confidence, status, superseded_by, created_at, updated_at` | + +## Files + +| Path | Purpose | +|------|---------| +| `src/core/brain/brain-store.ts` | Local SQLite store (record / recall / supersede) | +| `src/core/brain/brain-sync.ts` | Online push/pull client (newest-wins, offline-safe) | +| `src/core/brain/index.ts` | Scope + config resolution, `openBrain()` | +| `src/cli/commands/brain.ts` | `stackmemory brain` command | diff --git a/src/cli/commands/brain.ts b/src/cli/commands/brain.ts new file mode 100644 index 00000000..67d13801 --- /dev/null +++ b/src/cli/commands/brain.ts @@ -0,0 +1,275 @@ +/** + * StackMemory Brain CLI command. + * + * Shared, compounding context state that every agent reads from and writes to. + * All agents (Claude, Codex, OpenCode, Hermes) connect by shelling out: + * stackmemory brain record --kind experiment --title "..." --conclusion "..." + * stackmemory brain recall "auth retry" --org + */ + +import { Command } from 'commander'; +import chalk from 'chalk'; +import { openBrain } from '../../core/brain/index.js'; +import type { + BrainEntry, + BrainKind, + BrainQuery, +} from '../../core/brain/types.js'; + +function fmtEntry(e: BrainEntry, verbose = false): string { + const id = chalk.dim(e.entryId.slice(0, 8)); + const kind = chalk.cyan(e.kind.padEnd(10)); + const agent = chalk.magenta(`@${e.agent}`); + const when = new Date(e.createdAt).toISOString().slice(0, 10); + const head = `${id} ${kind} ${agent} ${chalk.gray(when)} ${chalk.bold(e.title)}`; + if (!verbose) { + const concl = e.conclusion + ? `\n ${chalk.green('→')} ${e.conclusion}` + : ''; + return head + concl; + } + const lines = [head]; + if (e.summary) lines.push(` ${chalk.gray('summary:')} ${e.summary}`); + if (e.conclusion) + lines.push(` ${chalk.green('conclusion:')} ${e.conclusion}`); + if (e.tags.length) + lines.push(` ${chalk.gray('tags:')} ${e.tags.join(', ')}`); + if (e.refs.length) + lines.push(` ${chalk.gray('refs:')} ${e.refs.join(', ')}`); + lines.push(` ${chalk.gray('confidence:')} ${e.confidence}`); + return lines.join('\n'); +} + +export function createBrainCommand(): Command { + const cmd = new Command('brain') + .description('Shared, compounding context state (per repo + org)') + .addHelpText( + 'after', + ` +Examples: + stackmemory brain record --kind experiment \\ + --title "Retry with jitter cut 5xx" \\ + --summary "Tried exp backoff + jitter on the sync client" \\ + --conclusion "p99 errors dropped 60%; ship it" --tags sync,reliability + stackmemory brain recall "retry" Search this repo's brain + stackmemory brain recall "auth" --org Search the whole org + stackmemory brain list --limit 10 + stackmemory brain show + stackmemory brain sync Push + pull online + stackmemory brain status + +Every agent (Claude, Codex, OpenCode, Hermes) shares this brain — log +experiment conclusions so mutual thinking compounds. See docs/guides/BRAIN.md. +` + ); + + cmd + .command('record') + .description('Record an experiment / decision / insight / note') + .option('--title ', 'Short title (required)') + .option('--summary <text>', 'What was done / context') + .option('--conclusion <text>', 'What was concluded (the payload)') + .option('--kind <kind>', 'experiment | decision | insight | note', 'note') + .option('--agent <name>', 'Agent that produced this', 'claude') + .option('--tags <tags>', 'Comma-separated tags') + .option('--refs <refs>', 'Comma-separated refs (issues, commits, files)') + .option('--confidence <n>', 'Confidence 0..1', '0.7') + .option('--json', 'Output as JSON') + .action((options) => { + if (!options.title) { + console.error(chalk.red('--title is required')); + process.exit(1); + } + const ctx = openBrain(); + try { + const entry = ctx.store.record({ + title: options.title, + summary: options.summary, + conclusion: options.conclusion, + kind: options.kind as BrainKind, + agent: options.agent, + tags: splitList(options.tags), + refs: splitList(options.refs), + confidence: parseFloat(options.confidence), + }); + if (options.json) { + console.log(JSON.stringify(entry, null, 2)); + } else { + console.log( + chalk.green('✓ recorded'), + chalk.dim(entry.entryId.slice(0, 8)) + ); + console.log(fmtEntry(entry)); + } + } finally { + ctx.close(); + } + }); + + cmd + .command('recall') + .description('Search the brain (this repo by default, --org for the org)') + .argument('[query]', 'Free-text query') + .option('--org', 'Search across the whole org (all repos)') + .option('--agent <name>', 'Filter by agent') + .option('--kind <kind>', 'Filter by kind') + .option('--limit <n>', 'Max results', '20') + .option('--all', 'Include superseded entries') + .option('--json', 'Output as JSON') + .action((query, options) => { + const ctx = openBrain(); + try { + const q: BrainQuery = { + text: query, + org: !!options.org, + agent: options.agent, + kind: options.kind as BrainKind | undefined, + limit: parseInt(options.limit, 10), + includeSuperseded: !!options.all, + }; + const results = ctx.store.recall(q); + if (options.json) { + console.log(JSON.stringify(results, null, 2)); + return; + } + if (results.length === 0) { + console.log(chalk.yellow('No matching brain entries.')); + return; + } + const scope = options.org ? 'org' : 'repo'; + console.log(chalk.bold(`${results.length} result(s) [${scope}]`)); + for (const e of results) console.log('\n' + fmtEntry(e)); + } finally { + ctx.close(); + } + }); + + cmd + .command('list') + .description('List recent brain entries for this repo') + .option('--limit <n>', 'Max results', '20') + .option('--json', 'Output as JSON') + .action((options) => { + const ctx = openBrain(); + try { + const results = ctx.store.recall({ + limit: parseInt(options.limit, 10), + }); + if (options.json) { + console.log(JSON.stringify(results, null, 2)); + return; + } + if (results.length === 0) { + console.log(chalk.yellow('Brain is empty for this repo.')); + return; + } + for (const e of results) console.log(fmtEntry(e) + '\n'); + } finally { + ctx.close(); + } + }); + + cmd + .command('show') + .description('Show a single entry in full') + .argument('<id>', 'Entry id (or prefix)') + .option('--json', 'Output as JSON') + .action((id, options) => { + const ctx = openBrain(); + try { + const entry = ctx.store.get(id); + if (!entry) { + console.error(chalk.red(`No entry matching '${id}'`)); + process.exit(1); + } + console.log( + options.json ? JSON.stringify(entry, null, 2) : fmtEntry(entry, true) + ); + } finally { + ctx.close(); + } + }); + + cmd + .command('sync') + .description('Push + pull brain entries online') + .option('--push', 'Push only') + .option('--pull', 'Pull only') + .option('--json', 'Output as JSON') + .action(async (options) => { + const ctx = openBrain(); + try { + if (!ctx.sync) { + console.error( + chalk.yellow( + 'Online brain not configured. Run `stackmemory login`.' + ) + ); + process.exit(1); + } + const result = options.push + ? await ctx.sync.push() + : options.pull + ? await ctx.sync.pull() + : await ctx.sync.sync(); + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + } else if (result.success) { + console.log( + chalk.green( + `✓ pushed ${result.pushed}, pulled ${result.pulled} (applied ${result.applied})` + ) + ); + } else { + console.error(chalk.red(`Sync failed: ${result.error}`)); + process.exit(1); + } + } finally { + ctx.close(); + } + }); + + cmd + .command('status') + .description('Show brain scope + entry counts') + .option('--json', 'Output as JSON') + .action((options) => { + const ctx = openBrain(); + try { + const status = { + projectId: ctx.projectId, + workspaceId: ctx.workspaceId || null, + repoEntries: ctx.store.count(false), + orgEntries: ctx.workspaceId ? ctx.store.count(true) : 0, + online: !!ctx.sync, + }; + if (options.json) { + console.log(JSON.stringify(status, null, 2)); + return; + } + console.log(chalk.bold('Brain Status')); + console.log(` Repo (project): ${status.projectId}`); + console.log( + ` Org (workspace): ${status.workspaceId ?? chalk.dim('not logged in')}` + ); + console.log(` Repo entries: ${status.repoEntries}`); + if (ctx.workspaceId) + console.log(` Org entries: ${status.orgEntries}`); + console.log( + ` Online sync: ${status.online ? chalk.green('configured') : chalk.dim('local-only')}` + ); + } finally { + ctx.close(); + } + }); + + return cmd; +} + +function splitList(v?: string): string[] | undefined { + if (!v) return undefined; + return v + .split(',') + .map((s) => s.trim()) + .filter(Boolean); +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 12d8ba22..b8f0f6d4 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -55,6 +55,7 @@ import { createHooksCommand } from './commands/hooks.js'; import { createDaemonCommand } from './commands/daemon.js'; import { createSweepCommand } from './commands/sweep.js'; import { createPortalCommand } from './commands/portal.js'; +import { createBrainCommand } from './commands/brain.js'; import { createShellCommand } from './commands/shell.js'; import { createAPICommand } from './commands/api.js'; import { createCleanupProcessesCommand } from './commands/cleanup-processes.js'; @@ -808,6 +809,7 @@ if (isFeatureEnabled('ralph')) { program.addCommand(createDaemonCommand()); program.addCommand(createSweepCommand()); program.addCommand(createPortalCommand()); +program.addCommand(createBrainCommand()); program.addCommand(createShellCommand()); program.addCommand(createAPICommand()); program.addCommand(createCleanupProcessesCommand()); diff --git a/src/core/brain/__tests__/brain.test.ts b/src/core/brain/__tests__/brain.test.ts new file mode 100644 index 00000000..aac6166a --- /dev/null +++ b/src/core/brain/__tests__/brain.test.ts @@ -0,0 +1,223 @@ +/** + * StackMemory Brain tests — local store + online sync (mocked fetch). + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import Database from 'better-sqlite3'; +import { BrainStore } from '../brain-store.js'; +import { BrainSync } from '../brain-sync.js'; +import type { BrainEntry } from '../types.js'; + +function makeDb() { + return new Database(':memory:'); +} + +describe('BrainStore', () => { + let db: Database.Database; + let store: BrainStore; + + beforeEach(() => { + db = makeDb(); + store = new BrainStore(db, { projectId: 'repoA', workspaceId: 'orgX' }); + }); + afterEach(() => db.close()); + + it('records and recalls an entry scoped to the repo', () => { + const e = store.record({ + title: 'Retry with jitter', + summary: 'tried backoff', + conclusion: 'errors dropped 60%', + kind: 'experiment', + agent: 'codex', + tags: ['sync', 'reliability'], + }); + expect(e.entryId).toBeTruthy(); + expect(e.projectId).toBe('repoA'); + expect(e.workspaceId).toBe('orgX'); + + const results = store.recall({ text: 'jitter' }); + expect(results).toHaveLength(1); + expect(results[0].conclusion).toBe('errors dropped 60%'); + expect(results[0].agent).toBe('codex'); + }); + + it('does not leak entries across repos by default', () => { + store.record({ title: 'repoA secret' }); + const storeB = new BrainStore(db, { projectId: 'repoB', workspaceId: 'orgX' }); + expect(storeB.recall({}).length).toBe(0); + }); + + it('finds cross-repo entries via org scope', () => { + store.record({ title: 'from repoA', tags: ['shared'] }); + const storeB = new BrainStore(db, { projectId: 'repoB', workspaceId: 'orgX' }); + storeB.record({ title: 'from repoB', tags: ['shared'] }); + + const repoOnly = store.recall({ text: 'shared' }); + expect(repoOnly).toHaveLength(1); + + const orgWide = store.recall({ text: 'shared', org: true }); + expect(orgWide).toHaveLength(2); + }); + + it('filters by agent and kind', () => { + store.record({ title: 'a', agent: 'claude', kind: 'decision' }); + store.record({ title: 'b', agent: 'hermes', kind: 'experiment' }); + expect(store.recall({ agent: 'hermes' })).toHaveLength(1); + expect(store.recall({ kind: 'decision' })).toHaveLength(1); + }); + + it('clamps confidence to 0..1', () => { + const e = store.record({ title: 'x', confidence: 5 }); + expect(e.confidence).toBe(1); + const e2 = store.record({ title: 'y', confidence: -3 }); + expect(e2.confidence).toBe(0); + }); + + it('supersedes entries and hides them by default', () => { + const oldE = store.record({ title: 'old approach' }); + const newE = store.record({ title: 'new approach' }); + store.supersede(oldE.entryId, newE.entryId); + + const active = store.recall({}); + expect(active.map((e) => e.entryId)).not.toContain(oldE.entryId); + + const all = store.recall({ includeSuperseded: true }); + expect(all.map((e) => e.entryId)).toContain(oldE.entryId); + }); + + it('upserts by entryId', () => { + const e = store.record({ title: 'v1' }); + store.record({ entryId: e.entryId, title: 'v2' }); + const got = store.get(e.entryId); + expect(got?.title).toBe('v2'); + expect(store.recall({}).length).toBe(1); + }); + + it('gets by id prefix', () => { + const e = store.record({ title: 'prefixed' }); + expect(store.get(e.entryId.slice(0, 8))?.title).toBe('prefixed'); + }); + + it('counts repo vs org', () => { + store.record({ title: 'a' }); + const storeB = new BrainStore(db, { projectId: 'repoB', workspaceId: 'orgX' }); + storeB.record({ title: 'b' }); + expect(store.count(false)).toBe(1); + expect(store.count(true)).toBe(2); + }); +}); + +describe('BrainSync', () => { + let db: Database.Database; + let store: BrainStore; + let sync: BrainSync; + + beforeEach(() => { + db = makeDb(); + store = new BrainStore(db, { projectId: 'repoA', workspaceId: 'orgX' }); + sync = new BrainSync(db, store, { + endpoint: 'https://example.test', + apiKey: 'key', + workspaceId: 'orgX', + projectId: 'repoA', + clientId: 'client1', + }); + }); + afterEach(() => { + db.close(); + vi.restoreAllMocks(); + }); + + it('pushes locally-updated entries and advances the cursor', async () => { + store.record({ title: 'to push' }); + const fetchMock = vi + .spyOn(globalThis, 'fetch') + .mockResolvedValue( + new Response(JSON.stringify({ accepted: 1, serverCursor: 123 }), { + status: 200, + }) + ); + + const res = await sync.push(); + expect(res.success).toBe(true); + expect(res.pushed).toBe(1); + expect(fetchMock).toHaveBeenCalledOnce(); + + // Cursor advanced → nothing new to push. + fetchMock.mockClear(); + const res2 = await sync.push(); + expect(res2.pushed).toBe(0); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + it('applies pulled entries with newest-wins', async () => { + const remote: BrainEntry = { + entryId: 'remote-1', + workspaceId: 'orgX', + projectId: 'repoA', + agent: 'codex', + kind: 'insight', + title: 'remote insight', + summary: '', + conclusion: 'from another machine', + tags: [], + refs: [], + confidence: 0.9, + status: 'active', + createdAt: 1000, + updatedAt: 1000, + }; + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response( + JSON.stringify({ entries: [remote], serverCursor: 1000, hasMore: false }), + { status: 200 } + ) + ); + + const res = await sync.pull(); + expect(res.success).toBe(true); + expect(res.applied).toBe(1); + expect(store.get('remote-1')?.conclusion).toBe('from another machine'); + }); + + it('does not overwrite a newer local entry on pull', async () => { + const local = store.record({ + entryId: 'shared-1', + title: 'local newer', + updatedAt: 5000, + }); + const stale: BrainEntry = { + ...local, + title: 'stale remote', + updatedAt: 1000, + }; + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response( + JSON.stringify({ entries: [stale], serverCursor: 1000, hasMore: false }), + { status: 200 } + ) + ); + + const res = await sync.pull(); + expect(res.applied).toBe(0); + expect(store.get('shared-1')?.title).toBe('local newer'); + }); + + it('degrades gracefully when offline', async () => { + store.record({ title: 'offline push' }); + vi.spyOn(globalThis, 'fetch').mockRejectedValue(new Error('ECONNREFUSED')); + const res = await sync.push(); + expect(res.success).toBe(false); + expect(res.error).toContain('ECONNREFUSED'); + }); + + it('reports HTTP errors', async () => { + store.record({ title: 'bad' }); + vi.spyOn(globalThis, 'fetch').mockResolvedValue( + new Response('nope', { status: 500, statusText: 'Server Error' }) + ); + const res = await sync.push(); + expect(res.success).toBe(false); + expect(res.error).toContain('500'); + }); +}); diff --git a/src/core/brain/brain-store.ts b/src/core/brain/brain-store.ts new file mode 100644 index 00000000..96c7f874 --- /dev/null +++ b/src/core/brain/brain-store.ts @@ -0,0 +1,241 @@ +/** + * StackMemory Brain — local store. + * + * SQLite-backed store for shared knowledge entries. The table is created lazily + * so the brain works in any StackMemory database (or a dedicated brain.db). + * Search is a scoped LIKE match — deliberately simple and dependency-free so it + * runs identically across every agent's environment. + */ + +import type Database from 'better-sqlite3'; +import { randomUUID } from 'crypto'; +import { + type BrainEntry, + type BrainRecordInput, + type BrainQuery, + BRAIN_TABLE, + DEFAULT_BRAIN_LIMIT, +} from './types.js'; + +interface BrainRow { + entry_id: string; + workspace_id: string; + project_id: string; + agent: string; + kind: string; + title: string; + summary: string; + conclusion: string; + tags: string; + refs: string; + confidence: number; + status: string; + superseded_by: string | null; + created_at: number; + updated_at: number; +} + +export class BrainStore { + private db: Database.Database; + private workspaceId: string; + private projectId: string; + + constructor( + db: Database.Database, + scope: { projectId: string; workspaceId?: string } + ) { + this.db = db; + this.projectId = scope.projectId; + this.workspaceId = scope.workspaceId ?? ''; + this.ensureTable(); + } + + /** Create the brain_entries table + indexes if they don't exist. */ + ensureTable(): void { + this.db.exec(` + CREATE TABLE IF NOT EXISTS ${BRAIN_TABLE} ( + entry_id TEXT PRIMARY KEY, + workspace_id TEXT NOT NULL DEFAULT '', + project_id TEXT NOT NULL, + agent TEXT NOT NULL DEFAULT 'claude', + kind TEXT NOT NULL DEFAULT 'note', + title TEXT NOT NULL, + summary TEXT NOT NULL DEFAULT '', + conclusion TEXT NOT NULL DEFAULT '', + tags TEXT NOT NULL DEFAULT '[]', + refs TEXT NOT NULL DEFAULT '[]', + confidence REAL NOT NULL DEFAULT 0.7, + status TEXT NOT NULL DEFAULT 'active', + superseded_by TEXT, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_brain_project ON ${BRAIN_TABLE}(project_id, created_at DESC); + CREATE INDEX IF NOT EXISTS idx_brain_workspace ON ${BRAIN_TABLE}(workspace_id, created_at DESC); + `); + } + + /** Record (or upsert by entryId) a brain entry. */ + record(input: BrainRecordInput): BrainEntry { + const now = Date.now(); + const entry: BrainEntry = { + entryId: input.entryId ?? randomUUID(), + workspaceId: this.workspaceId, + projectId: this.projectId, + agent: input.agent ?? 'claude', + kind: input.kind ?? 'note', + title: input.title, + summary: input.summary ?? '', + conclusion: input.conclusion ?? '', + tags: input.tags ?? [], + refs: input.refs ?? [], + confidence: clamp01(input.confidence ?? 0.7), + status: 'active', + createdAt: input.createdAt ?? now, + updatedAt: input.updatedAt ?? now, + }; + + this.db + .prepare( + `INSERT INTO ${BRAIN_TABLE} + (entry_id, workspace_id, project_id, agent, kind, title, summary, + conclusion, tags, refs, confidence, status, superseded_by, + created_at, updated_at) + VALUES (@entryId, @workspaceId, @projectId, @agent, @kind, @title, + @summary, @conclusion, @tags, @refs, @confidence, @status, + NULL, @createdAt, @updatedAt) + ON CONFLICT(entry_id) DO UPDATE SET + agent = excluded.agent, + kind = excluded.kind, + title = excluded.title, + summary = excluded.summary, + conclusion = excluded.conclusion, + tags = excluded.tags, + refs = excluded.refs, + confidence = excluded.confidence, + updated_at = excluded.updated_at` + ) + .run({ + ...entry, + tags: JSON.stringify(entry.tags), + refs: JSON.stringify(entry.refs), + }); + + return entry; + } + + /** Fetch a single entry by id (or unique prefix). */ + get(entryId: string): BrainEntry | null { + const row = this.db + .prepare( + `SELECT * FROM ${BRAIN_TABLE} WHERE entry_id = ? OR entry_id LIKE ? LIMIT 1` + ) + .get(entryId, `${entryId}%`) as BrainRow | undefined; + return row ? rowToEntry(row) : null; + } + + /** Search entries by scope + free text, newest first. */ + recall(query: BrainQuery = {}): BrainEntry[] { + const where: string[] = []; + const params: unknown[] = []; + + if (query.org) { + // Org-wide: match the workspace across all repos. + where.push('workspace_id = ?'); + params.push(this.workspaceId); + } else { + where.push('project_id = ?'); + params.push(query.projectId ?? this.projectId); + } + + if (!query.includeSuperseded) { + where.push("status = 'active'"); + } + if (query.agent) { + where.push('agent = ?'); + params.push(query.agent); + } + if (query.kind) { + where.push('kind = ?'); + params.push(query.kind); + } + if (query.since) { + where.push('created_at >= ?'); + params.push(query.since); + } + if (query.text) { + where.push( + '(title LIKE ? OR summary LIKE ? OR conclusion LIKE ? OR tags LIKE ?)' + ); + const like = `%${query.text}%`; + params.push(like, like, like, like); + } + + const limit = Math.max(1, query.limit ?? DEFAULT_BRAIN_LIMIT); + const rows = this.db + .prepare( + `SELECT * FROM ${BRAIN_TABLE} + WHERE ${where.join(' AND ')} + ORDER BY created_at DESC + LIMIT ?` + ) + .all(...params, limit) as BrainRow[]; + + return rows.map(rowToEntry); + } + + /** Mark `oldId` superseded by `newId`. */ + supersede(oldId: string, newId: string): void { + this.db + .prepare( + `UPDATE ${BRAIN_TABLE} + SET status = 'superseded', superseded_by = ?, updated_at = ? + WHERE entry_id = ?` + ) + .run(newId, Date.now(), oldId); + } + + /** Count entries in scope (for status output). */ + count(org = false): number { + const col = org ? 'workspace_id' : 'project_id'; + const val = org ? this.workspaceId : this.projectId; + const row = this.db + .prepare(`SELECT COUNT(*) AS n FROM ${BRAIN_TABLE} WHERE ${col} = ?`) + .get(val) as { n: number }; + return row.n; + } +} + +function rowToEntry(row: BrainRow): BrainEntry { + return { + entryId: row.entry_id, + workspaceId: row.workspace_id, + projectId: row.project_id, + agent: row.agent, + kind: row.kind as BrainEntry['kind'], + title: row.title, + summary: row.summary, + conclusion: row.conclusion, + tags: safeParse(row.tags), + refs: safeParse(row.refs), + confidence: row.confidence, + status: row.status as BrainEntry['status'], + supersededBy: row.superseded_by ?? undefined, + createdAt: row.created_at, + updatedAt: row.updated_at, + }; +} + +function safeParse(json: string): string[] { + try { + const v = JSON.parse(json); + return Array.isArray(v) ? v.map(String) : []; + } catch { + return []; + } +} + +function clamp01(n: number): number { + if (Number.isNaN(n)) return 0.7; + return Math.max(0, Math.min(1, n)); +} diff --git a/src/core/brain/brain-sync.ts b/src/core/brain/brain-sync.ts new file mode 100644 index 00000000..1965c943 --- /dev/null +++ b/src/core/brain/brain-sync.ts @@ -0,0 +1,256 @@ +/** + * StackMemory Brain — online sync. + * + * Pushes/pulls brain entries to the hosted Provenant API so a repo's (and an + * org's) shared context is available on every machine and to every agent. + * Isolated from the frame CloudSyncEngine so it can never regress that path. + * + * Wire contract (server side, mirrors the cloud-sync protocol): + * POST {endpoint}/v1/brain/push + * { protocolVersion: 1, clientId, workspaceId, projectId, since, entries } + * -> { accepted, serverCursor } + * POST {endpoint}/v1/brain/pull + * { protocolVersion: 1, clientId, workspaceId, projectId, since, limit } + * -> { entries, serverCursor, hasMore } + * + * Auth: Bearer {apiKey}, X-Client-Id: {clientId}. Offline/unreachable degrades + * to local-only (success: false, never throws). + */ + +import type Database from 'better-sqlite3'; +import type { BrainStore } from './brain-store.js'; +import type { BrainEntry, BrainSyncResult } from './types.js'; + +export interface BrainSyncConfig { + endpoint: string; + apiKey: string; + workspaceId: string; + projectId: string; + clientId: string; + timeoutMs?: number; + batchSize?: number; +} + +interface BrainPushResponse { + accepted?: number; + serverCursor?: number; +} +interface BrainPullResponse { + entries?: BrainEntry[]; + serverCursor?: number; + hasMore?: boolean; +} + +const BRAIN_TABLE = 'brain_entries'; + +export class BrainSync { + private db: Database.Database; + private store: BrainStore; + private config: Required<BrainSyncConfig>; + + constructor( + db: Database.Database, + store: BrainStore, + config: BrainSyncConfig + ) { + this.db = db; + this.store = store; + this.config = { + timeoutMs: 30000, + batchSize: 200, + ...config, + }; + this.ensureMeta(); + } + + private ensureMeta(): void { + this.db.exec(` + CREATE TABLE IF NOT EXISTS brain_sync_meta ( + direction TEXT PRIMARY KEY, + cursor INTEGER NOT NULL DEFAULT 0 + ); + `); + } + + private getCursor(direction: 'push' | 'pull'): number { + const row = this.db + .prepare('SELECT cursor FROM brain_sync_meta WHERE direction = ?') + .get(direction) as { cursor: number } | undefined; + return row?.cursor ?? 0; + } + + private setCursor(direction: 'push' | 'pull', cursor: number): void { + this.db + .prepare( + `INSERT INTO brain_sync_meta (direction, cursor) VALUES (?, ?) + ON CONFLICT(direction) DO UPDATE SET cursor = excluded.cursor` + ) + .run(direction, cursor); + } + + /** Push locally-updated entries to the cloud. */ + async push(): Promise<BrainSyncResult> { + const since = this.getCursor('push'); + const rows = this.db + .prepare( + `SELECT * FROM ${BRAIN_TABLE} WHERE updated_at > ? ORDER BY updated_at ASC LIMIT ?` + ) + .all(since, this.config.batchSize) as Array<Record<string, unknown>>; + + if (rows.length === 0) { + return { success: true, pushed: 0, pulled: 0, applied: 0 }; + } + + const entries = rows.map(toWireEntry); + const maxUpdated = Math.max(...entries.map((e) => e.updatedAt)); + + try { + const res = await this.post<BrainPushResponse>('/v1/brain/push', { + protocolVersion: 1, + clientId: this.config.clientId, + workspaceId: this.config.workspaceId, + projectId: this.config.projectId, + since, + entries, + }); + // Never regress below what we just pushed, even if the server reports a + // smaller cursor — otherwise we'd re-push the same rows forever. + this.setCursor('push', Math.max(maxUpdated, res.serverCursor ?? 0)); + return { + success: true, + pushed: res.accepted ?? entries.length, + pulled: 0, + applied: 0, + }; + } catch (err) { + return { + success: false, + pushed: 0, + pulled: 0, + applied: 0, + error: errMsg(err), + }; + } + } + + /** Pull remote entries and apply them locally (newest-wins). */ + async pull(): Promise<BrainSyncResult> { + const since = this.getCursor('pull'); + try { + const res = await this.post<BrainPullResponse>('/v1/brain/pull', { + protocolVersion: 1, + clientId: this.config.clientId, + workspaceId: this.config.workspaceId, + projectId: this.config.projectId, + since, + limit: this.config.batchSize, + }); + + const entries = res.entries ?? []; + let applied = 0; + let maxUpdated = since; + + for (const remote of entries) { + maxUpdated = Math.max(maxUpdated, remote.updatedAt ?? 0); + const local = this.store.get(remote.entryId); + if (local && local.updatedAt >= (remote.updatedAt ?? 0)) continue; // newest-wins + this.store.record({ + entryId: remote.entryId, + agent: remote.agent, + kind: remote.kind, + title: remote.title, + summary: remote.summary, + conclusion: remote.conclusion, + tags: remote.tags, + refs: remote.refs, + confidence: remote.confidence, + createdAt: remote.createdAt, + updatedAt: remote.updatedAt, + }); + applied++; + } + + this.setCursor('pull', Math.max(maxUpdated, res.serverCursor ?? 0)); + return { success: true, pushed: 0, pulled: entries.length, applied }; + } catch (err) { + return { + success: false, + pushed: 0, + pulled: 0, + applied: 0, + error: errMsg(err), + }; + } + } + + /** Push then pull in one shot. */ + async sync(): Promise<BrainSyncResult> { + const pushed = await this.push(); + const pulled = await this.pull(); + return { + success: pushed.success && pulled.success, + pushed: pushed.pushed, + pulled: pulled.pulled, + applied: pulled.applied, + error: pushed.error ?? pulled.error, + }; + } + + private async post<T>(path: string, body: unknown): Promise<T> { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), this.config.timeoutMs); + try { + const res = await fetch(`${this.config.endpoint}${path}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${this.config.apiKey}`, + 'X-Client-Id': this.config.clientId, + }, + body: JSON.stringify(body), + signal: controller.signal, + }); + if (!res.ok) { + throw new Error(`${res.status} ${res.statusText}`); + } + return (await res.json()) as T; + } finally { + clearTimeout(timer); + } + } +} + +function toWireEntry(row: Record<string, unknown>): BrainEntry { + const parse = (v: unknown): string[] => { + try { + const a = JSON.parse(String(v ?? '[]')); + return Array.isArray(a) ? a.map(String) : []; + } catch { + return []; + } + }; + return { + entryId: String(row['entry_id']), + workspaceId: String(row['workspace_id'] ?? ''), + projectId: String(row['project_id']), + agent: String(row['agent']), + kind: String(row['kind']) as BrainEntry['kind'], + title: String(row['title']), + summary: String(row['summary'] ?? ''), + conclusion: String(row['conclusion'] ?? ''), + tags: parse(row['tags']), + refs: parse(row['refs']), + confidence: Number(row['confidence'] ?? 0.7), + status: String(row['status'] ?? 'active') as BrainEntry['status'], + supersededBy: (row['superseded_by'] as string | null) ?? undefined, + createdAt: Number(row['created_at']), + updatedAt: Number(row['updated_at']), + }; +} + +function errMsg(err: unknown): string { + if (err instanceof Error) { + return err.name === 'AbortError' ? 'request timed out' : err.message; + } + return String(err); +} diff --git a/src/core/brain/index.ts b/src/core/brain/index.ts new file mode 100644 index 00000000..9d8fa7a1 --- /dev/null +++ b/src/core/brain/index.ts @@ -0,0 +1,113 @@ +/** + * StackMemory Brain — shared, compounding context state. + * + * Public API + helpers to resolve the local DB, scope (repo + org), and the + * online sync config from the same auth that `stackmemory login` writes. + */ + +import { homedir, hostname } from 'os'; +import { join } from 'path'; +import { existsSync, readFileSync, mkdirSync } from 'fs'; +import { createHash } from 'crypto'; +import Database from 'better-sqlite3'; +import { BrainStore } from './brain-store.js'; +import { BrainSync, type BrainSyncConfig } from './brain-sync.js'; + +export { BrainStore } from './brain-store.js'; +export { BrainSync, type BrainSyncConfig } from './brain-sync.js'; +export * from './types.js'; + +const DEFAULT_ENDPOINT = 'https://provenant-api.jpwu03.workers.dev'; + +interface AuthConfig { + apiKey?: string; + apiUrl?: string; + projectId?: string; + workspaceId?: string; +} + +function readAuth(): AuthConfig { + const cfgPath = join(homedir(), '.stackmemory', 'config.json'); + if (!existsSync(cfgPath)) return {}; + try { + const cfg = JSON.parse(readFileSync(cfgPath, 'utf8')); + return (cfg.auth ?? {}) as AuthConfig; + } catch { + return {}; + } +} + +function hashId(input: string): string { + return createHash('sha256').update(input).digest('hex').slice(0, 16); +} + +/** Resolve repo (projectId) + org (workspaceId) scope for a project dir. */ +export function resolveScope(projectDir: string): { + projectId: string; + workspaceId: string; +} { + const auth = readAuth(); + const projectId = + process.env['PROVENANT_PROJECT_ID'] || auth.projectId || hashId(projectDir); + const workspaceId = + process.env['PROVENANT_WORKSPACE_ID'] || auth.workspaceId || ''; + return { projectId, workspaceId }; +} + +/** Resolve the local DB path, mirroring `stackmemory sync`. */ +export function resolveDbPath(projectDir: string): string { + const contextDb = join(projectDir, '.stackmemory', 'context.db'); + if (existsSync(contextDb)) return contextDb; + const localDb = join(projectDir, '.stackmemory', 'stackmemory.db'); + if (existsSync(localDb)) return localDb; + const globalDb = join(homedir(), '.stackmemory', 'stackmemory.db'); + if (existsSync(globalDb)) return globalDb; + // Default: create the project-local DB so the brain always has a home. + mkdirSync(join(projectDir, '.stackmemory'), { recursive: true }); + return contextDb; +} + +export interface BrainContext { + db: Database.Database; + store: BrainStore; + projectId: string; + workspaceId: string; + /** Online sync — null when not logged in / no API key. */ + sync: BrainSync | null; + close(): void; +} + +/** + * Open the brain for a project directory: local store always works; online + * sync is wired only when auth is configured. + */ +export function openBrain(projectDir: string = process.cwd()): BrainContext { + const { projectId, workspaceId } = resolveScope(projectDir); + const dbPath = resolveDbPath(projectDir); + const db = new Database(dbPath); + const store = new BrainStore(db, { projectId, workspaceId }); + + const auth = readAuth(); + const apiKey = process.env['PROVENANT_API_KEY'] || auth.apiKey; + let sync: BrainSync | null = null; + if (apiKey) { + const syncConfig: BrainSyncConfig = { + endpoint: + process.env['PROVENANT_API_URL'] || auth.apiUrl || DEFAULT_ENDPOINT, + apiKey, + workspaceId, + projectId, + clientId: hashId(hostname() + projectDir), + }; + sync = new BrainSync(db, store, syncConfig); + } + + return { + db, + store, + projectId, + workspaceId, + sync, + close: () => db.close(), + }; +} diff --git a/src/core/brain/types.ts b/src/core/brain/types.ts new file mode 100644 index 00000000..005d9a12 --- /dev/null +++ b/src/core/brain/types.ts @@ -0,0 +1,103 @@ +/** + * StackMemory Brain — shared, compounding context state. + * + * The "brain" is a knowledge layer that any agent (Claude, Codex, OpenCode, + * Hermes, …) writes to and reads from. Every experiment, decision, or insight + * is recorded as a BrainEntry with a summary + conclusion, scoped to a repo + * (projectId) and an org (workspaceId). Entries sync online so mutual thinking + * compounds across machines and tools. + */ + +/** Which agent/tool produced an entry. Free-form, but these are canonical. */ +export type BrainAgent = + | 'claude' + | 'codex' + | 'opencode' + | 'hermes' + | 'gemini' + | 'human' + | string; + +/** The kind of knowledge an entry captures. */ +export type BrainKind = + | 'experiment' // a thing tried, with a conclusion + | 'decision' // a choice made and why + | 'insight' // a learning worth remembering + | 'note'; // free-form context + +export type BrainStatus = 'active' | 'superseded'; + +export interface BrainScope { + /** Repo-level scope. Derived from the project dir hash if not explicit. */ + projectId: string; + /** Org-level scope. From `stackmemory login` (workspaceId). Optional. */ + workspaceId?: string; +} + +export interface BrainEntry { + entryId: string; + workspaceId: string; // '' when not logged in to an org + projectId: string; + agent: BrainAgent; + kind: BrainKind; + title: string; + /** What was done / the context. */ + summary: string; + /** What was concluded — the compounding payload. */ + conclusion: string; + /** Free-form tags for retrieval. */ + tags: string[]; + /** Links to frames, issues, commits, PRs, files, etc. */ + refs: string[]; + /** 0..1 — how much to trust this entry. */ + confidence: number; + status: BrainStatus; + /** entryId that replaces this one, if superseded. */ + supersededBy?: string; + createdAt: number; + updatedAt: number; +} + +/** Fields callers provide when recording; the store fills in the rest. */ +export interface BrainRecordInput { + title: string; + summary?: string; + conclusion?: string; + agent?: BrainAgent; + kind?: BrainKind; + tags?: string[]; + refs?: string[]; + confidence?: number; + entryId?: string; // for upserts / supersede chains + createdAt?: number; + updatedAt?: number; +} + +export interface BrainQuery { + /** Free-text match across title/summary/conclusion/tags. */ + text?: string; + agent?: BrainAgent; + kind?: BrainKind; + /** Restrict to a single repo. Defaults to the current project. */ + projectId?: string; + /** + * Widen the search to the whole org (all repos in the workspace). + * When true, projectId is ignored. + */ + org?: boolean; + /** Only entries created at/after this epoch-ms. */ + since?: number; + includeSuperseded?: boolean; + limit?: number; +} + +export interface BrainSyncResult { + success: boolean; + pushed: number; + pulled: number; + applied: number; + error?: string; +} + +export const BRAIN_TABLE = 'brain_entries'; +export const DEFAULT_BRAIN_LIMIT = 20; From 493a401ee987c326fc95aa3a72ffe6a47499055b Mon Sep 17 00:00:00 2001 From: Claude <noreply@anthropic.com> Date: Mon, 8 Jun 2026 15:12:30 +0000 Subject: [PATCH 3/3] feat(vision): VISION.md-driven meta-loop above the conductor A meta-orchestration layer one level above a single goal. A VISION.md (north-star mission + guardrails + ordered objectives + hard limits) drives a bounded loop that draws work from BOTH the objectives and a monitored signal inbox, delegates one item per tick to the conductor, and records the outcome to the shared brain so thinking compounds. - vision-file: parse/scaffold VISION.md, toggle objective checkboxes - signals: JSONL inbox for the monitored source (bugs, CI, issues) - vision-loop: select (signals outrank objectives) -> guardrail-gate -> brain-dedupe (no repeats) -> delegate (injected) -> record -> mark done - guardrails: maxIterations, maxIterationsPerDay, consecutive-failure circuit breaker, requireApproval, daily-resetting persisted state - stackmemory conductor vision init|status|signal|plan|run - run is plan-only unless --delegate-cmd is given (anti-haywire default) - 13 tests; docs/guides/VISION.md https://claude.ai/code/session_01Gk8DiqCeG9uMaWT9RprwP1 --- docs/guides/VISION.md | 149 +++++++++++ src/cli/commands/orchestrate.ts | 4 + src/cli/commands/vision.ts | 303 ++++++++++++++++++++++ src/core/vision/__tests__/vision.test.ts | 238 +++++++++++++++++ src/core/vision/index.ts | 21 ++ src/core/vision/signals.ts | 101 ++++++++ src/core/vision/types.ts | 111 ++++++++ src/core/vision/vision-file.ts | 194 ++++++++++++++ src/core/vision/vision-loop.ts | 315 +++++++++++++++++++++++ 9 files changed, 1436 insertions(+) create mode 100644 docs/guides/VISION.md create mode 100644 src/cli/commands/vision.ts create mode 100644 src/core/vision/__tests__/vision.test.ts create mode 100644 src/core/vision/index.ts create mode 100644 src/core/vision/signals.ts create mode 100644 src/core/vision/types.ts create mode 100644 src/core/vision/vision-file.ts create mode 100644 src/core/vision/vision-loop.ts diff --git a/docs/guides/VISION.md b/docs/guides/VISION.md new file mode 100644 index 00000000..19bca8a7 --- /dev/null +++ b/docs/guides/VISION.md @@ -0,0 +1,149 @@ +# StackMemory Vision — the 24/7 meta-loop + +> Your agents run 24/7. A single goal isn't enough to keep them on track — and +> too loose a goal makes them go haywire. **VISION.md** is the guardrail: a +> north-star mission, hard limits, and an ordered objective list. The vision +> loop runs one level above any single task, drawing work from both the +> objectives *and* a monitored signal source, delegating to the conductor, and +> recording every conclusion to the shared brain so thinking compounds. + +``` + VISION.md (mission + guardrails + objectives + limits) + │ + signals ──► vision loop ──► consult brain ──► conductor ──► PR + (bugs, CI, │ guardrails │ (no repeats) (executor) + issues) │ checked ▼ + └──────────────► record outcome ──► brain (compounds) +``` + +This is the layer that turns "build a feature" into "build the app" and "fix a +bug" into "watch the bug stream and fix them as they arrive." + +## Quick start + +```bash +stackmemory conductor vision init # scaffold VISION.md +$EDITOR VISION.md # set the mission, scope, objectives +stackmemory conductor vision plan # dry-run: what's the next action? +stackmemory conductor vision run --once --dry-run + +# Act for real — provide how to delegate one objective: +stackmemory conductor vision run --delegate-cmd 'claude -p "{{OBJECTIVE}}"' +``` + +> **Safety:** `run` is **plan-only** unless you pass `--delegate-cmd`. The loop +> never spawns autonomous agents by accident. + +## VISION.md format + +Plain markdown, so it stays human-editable and reviewable: + +```markdown +# Vision + +Ship a reliable, self-healing sync layer that any agent can depend on. + +## Guardrails +- Stay within the scope below; never touch secrets or deploy/publish. +- Open a PR for review; never merge to the default branch autonomously. +- If an objective is ambiguous or risky, stop and ask a human. + +## Scope +- src/** +- docs/** + +## Objectives +- [ ] add retry with jitter to the sync client +- [x] write the protocol types +- [ ] add a `sync status` command + +## Limits +maxIterations: 10 +maxIterationsPerDay: 50 +maxConsecutiveFailures: 3 +tickIntervalSec: 60 +requireApproval: false +stopWhenComplete: true +``` + +The loop reloads VISION.md **every tick**, so editing the file (or checking a +box) changes its behavior live. + +## Two sources of work + +Per the design, the loop draws objectives from **both**: + +1. **VISION.md objectives** — the planned, ordered backlog (the "build the app" + direction). Completed objectives get their checkbox ticked automatically. +2. **A monitored signal inbox** — reactive work that arrives over time (the "fix + bugs as they show up" direction): + + ```bash + stackmemory conductor vision signal "500s on /sync after deploy" --severity high + ``` + + Anything can feed it — a CI hook, a GitHub-issue poller, a bug-report + webhook — by appending to `.stackmemory/vision/signals.jsonl` or calling the + `signal` command. **Pending signals outrank objectives**, so urgent issues + preempt planned work, bounded by the same guardrails. + +## Guardrails (the anti-haywire layer) + +Every tick is gated by `## Limits`: + +| Limit | Effect | +|-------|--------| +| `maxIterations` | objectives handled per `run` | +| `maxIterationsPerDay` | objectives handled per calendar day (persisted) | +| `maxConsecutiveFailures` | circuit breaker — stop after N failures in a row | +| `tickIntervalSec` | delay between ticks | +| `requireApproval` | when true, the loop only plans + queues, never delegates | +| `stopWhenComplete` | stop once objectives are done and no signals remain | + +Loop state (today's count, consecutive failures) lives in +`.stackmemory/vision/state.json` and resets daily. + +## Brain integration (compounding) + +Before delegating, the loop asks the [brain](./BRAIN.md) whether this exact work +was already concluded — if so it **skips it** (and ticks the objective), so the +loop never repeats itself across machines or agents. After delegating, the +outcome is recorded as a brain `experiment` (agent `vision`) with the +conclusion, tags, and refs — feeding the same compounding memory every other +agent reads. + +## Running it on the portal (24/7) + +On your Hetzner + Tailscale [portal](./PORTAL.md) box, run the loop inside the +tmux session so it survives disconnects: + +```bash +# inside the tmux 'claude' session +stackmemory conductor vision run \ + --delegate-cmd 'claude -p "{{OBJECTIVE}}. Stay within VISION.md scope. Open a PR."' +``` + +Check in from any device via the portal; the loop keeps working the vision and +the signal stream while you experience life. + +## CLI + +```bash +stackmemory conductor vision init [--force] +stackmemory conductor vision status [--json] +stackmemory conductor vision signal <text> [--severity] [--source] [--refs] +stackmemory conductor vision plan [--max <n>] +stackmemory conductor vision run [--once] [--max <n>] [--dry-run] \ + [--delegate-cmd <tpl>] [--timeout <sec>] +``` + +`--delegate-cmd` substitutes `{{OBJECTIVE}}`, `{{KIND}}`, and `{{REFS}}`. + +## Files + +| Path | Purpose | +|------|---------| +| `src/core/vision/vision-file.ts` | VISION.md parse / scaffold / objective toggle | +| `src/core/vision/signals.ts` | monitored signal inbox (JSONL) | +| `src/core/vision/vision-loop.ts` | the guardrailed loop (select → gate → dedupe → delegate → record) | +| `src/cli/commands/vision.ts` | `stackmemory conductor vision` command | diff --git a/src/cli/commands/orchestrate.ts b/src/cli/commands/orchestrate.ts index af8a34c1..6f7a3184 100644 --- a/src/cli/commands/orchestrate.ts +++ b/src/cli/commands/orchestrate.ts @@ -25,6 +25,7 @@ import Database from 'better-sqlite3'; import { logger } from '../../core/monitoring/logger.js'; import { isProcessAlive } from '../../utils/process-cleanup.js'; import { Conductor } from './orchestrator.js'; +import { createVisionCommand } from './vision.js'; import { getAgentStatusDir, getOutcomesLogPath, @@ -903,6 +904,9 @@ export function createConductorCommands(): Command { cmd.help(); }); + // --- vision (meta-loop above the conductor) --- + cmd.addCommand(createVisionCommand()); + // --- capture --- cmd .command('capture') diff --git a/src/cli/commands/vision.ts b/src/cli/commands/vision.ts new file mode 100644 index 00000000..6fbcbad8 --- /dev/null +++ b/src/cli/commands/vision.ts @@ -0,0 +1,303 @@ +/** + * StackMemory Vision CLI — `stackmemory conductor vision ...` + * + * The meta-orchestration layer above the conductor: a VISION.md north-star + + * guardrails drives a bounded loop that draws work from both the VISION.md + * objectives and a monitored signal inbox, delegates to the conductor, and + * records conclusions to the shared brain. + * + * Safety: `run` is plan-only unless an explicit --delegate-cmd is provided, so + * it never spawns autonomous agents by accident. + */ + +import { Command } from 'commander'; +import chalk from 'chalk'; +import { spawnSync } from 'child_process'; +import { join } from 'path'; +import { openBrain } from '../../core/brain/index.js'; +import { + VisionLoop, + SignalInbox, + loadVision, + scaffoldVision, + type BrainPort, + type Delegate, + type Candidate, + type TickDecision, + type SignalSeverity, +} from '../../core/vision/index.js'; + +function paths(cwd: string) { + return { + visionPath: join(cwd, 'VISION.md'), + statePath: join(cwd, '.stackmemory', 'vision', 'state.json'), + signalsPath: join(cwd, '.stackmemory', 'vision', 'signals.jsonl'), + }; +} + +/** Build a delegate that runs a shell command per objective. */ +function shellDelegate(template: string, timeoutMs: number): Delegate { + return async (candidate: Candidate) => { + const cmd = template + .replaceAll('{{OBJECTIVE}}', candidate.text) + .replaceAll('{{KIND}}', candidate.kind) + .replaceAll('{{REFS}}', candidate.refs.join(',')); + const res = spawnSync('sh', ['-c', cmd], { + encoding: 'utf-8', + timeout: timeoutMs, + maxBuffer: 32 * 1024 * 1024, + }); + const success = res.status === 0 && !res.error; + const out = (res.stdout || '').trim().split(/\r?\n/).filter(Boolean); + const errTail = (res.stderr || '') + .trim() + .split(/\r?\n/) + .filter(Boolean) + .pop(); + const conclusion = success + ? out.pop() || 'completed' + : `failed (${res.error?.message || `exit ${res.status}`}): ${errTail ?? ''}`.trim(); + return { success, conclusion: conclusion.slice(0, 300) }; + }; +} + +function fmtDecision(d: TickDecision): string { + if (!d.guardrail.ok) return chalk.red(`⛔ stop: ${d.guardrail.reason}`); + if (!d.candidate) return chalk.dim('· nothing to do'); + const tag = + d.candidate.kind === 'signal' + ? chalk.yellow('[signal]') + : chalk.cyan('[objective]'); + const head = `${tag} ${d.candidate.text}`; + if (d.skippedAsKnown) + return `${head}\n ${chalk.gray('↩ already concluded:')} ${d.priorConclusion}`; + if (!d.delegated) + return `${head}\n ${chalk.gray('· planned (not delegated)')}`; + const mark = d.outcome?.success ? chalk.green('✓') : chalk.red('✗'); + return `${head}\n ${mark} ${d.outcome?.conclusion}`; +} + +export function createVisionCommand(): Command { + const cmd = new Command('vision') + .description('VISION.md-driven meta-loop above the conductor') + .addHelpText( + 'after', + ` +Examples: + stackmemory conductor vision init Scaffold a VISION.md + stackmemory conductor vision status Mission, objectives, limits + stackmemory conductor vision signal "500s on /sync" --severity high + stackmemory conductor vision plan Dry-run: what it WOULD do + stackmemory conductor vision run --once --dry-run + stackmemory conductor vision run --delegate-cmd 'claude -p "{{OBJECTIVE}}"' + +VISION.md is the guardrail: north-star mission, scope, objectives, and hard +limits (maxIterations, maxConsecutiveFailures, …). See docs/guides/VISION.md. +` + ); + + cmd + .command('init') + .description('Scaffold a VISION.md in the current repo') + .option('--force', 'Overwrite an existing VISION.md') + .action((options) => { + const { visionPath } = paths(process.cwd()); + if (scaffoldVision(visionPath, !!options.force)) { + console.log(chalk.green('✓ created'), visionPath); + console.log( + chalk.gray(' Edit the mission, guardrails, and objectives, then:') + ); + console.log(chalk.gray(' stackmemory conductor vision plan')); + } else { + console.log( + chalk.yellow('VISION.md already exists (use --force to overwrite).') + ); + } + }); + + cmd + .command('status') + .description('Show the vision, objective progress, signals, and limits') + .option('--json', 'Output as JSON') + .action((options) => { + const p = paths(process.cwd()); + const vision = loadVision(p.visionPath); + if (!vision) { + console.log( + chalk.yellow('No VISION.md. Run: stackmemory conductor vision init') + ); + return; + } + const inbox = new SignalInbox(p.signalsPath); + const pending = inbox.pending(); + const done = vision.objectives.filter((o) => o.done).length; + if (options.json) { + console.log( + JSON.stringify({ vision, pendingSignals: pending }, null, 2) + ); + return; + } + console.log(chalk.bold('Mission')); + console.log(' ' + (vision.mission || chalk.dim('(none set)'))); + console.log( + chalk.bold(`\nObjectives (${done}/${vision.objectives.length})`) + ); + for (const o of vision.objectives) { + console.log( + ` ${o.done ? chalk.green('[x]') : chalk.dim('[ ]')} ${o.text}` + ); + } + console.log(chalk.bold(`\nGuardrails (${vision.guardrails.length})`)); + for (const g of vision.guardrails) + console.log(` ${chalk.gray('•')} ${g}`); + console.log(chalk.bold(`\nPending signals (${pending.length})`)); + for (const s of pending.slice(0, 10)) { + console.log(` ${chalk.yellow(s.severity.padEnd(8))} ${s.text}`); + } + console.log(chalk.bold('\nLimits')); + console.log( + chalk.gray( + ` maxIterations=${vision.limits.maxIterations} perDay=${vision.limits.maxIterationsPerDay} ` + + `maxConsecutiveFailures=${vision.limits.maxConsecutiveFailures} requireApproval=${vision.limits.requireApproval}` + ) + ); + }); + + cmd + .command('signal') + .description('Add a signal to the monitored inbox') + .argument('<text>', 'What happened (bug, CI failure, request)') + .option('--severity <level>', 'low | medium | high | critical', 'medium') + .option( + '--source <name>', + 'Where it came from (bug, ci, github, …)', + 'manual' + ) + .option('--refs <refs>', 'Comma-separated refs (issue, run id, commit)') + .action((text, options) => { + const p = paths(process.cwd()); + const inbox = new SignalInbox(p.signalsPath); + const s = inbox.add({ + text, + severity: options.severity as SignalSeverity, + source: options.source, + refs: options.refs + ? String(options.refs) + .split(',') + .map((r: string) => r.trim()) + : undefined, + }); + console.log( + chalk.green('✓ signal queued'), + chalk.dim(s.id.slice(0, 8)), + `[${s.severity}]` + ); + }); + + cmd + .command('plan') + .description('Dry-run: show what the loop would do next, without acting') + .option('--max <n>', 'Max ticks to plan (default 1 — the next action)') + .action(async (options) => { + await runLoop({ + dryRun: true, + max: options.max ? parseInt(options.max, 10) : 1, + }); + }); + + cmd + .command('run') + .description( + 'Run the vision loop (plan-only unless --delegate-cmd is given)' + ) + .option('--once', 'Run a single tick') + .option('--max <n>', 'Max ticks this run') + .option('--dry-run', 'Plan without delegating') + .option( + '--delegate-cmd <template>', + 'Shell command per objective; {{OBJECTIVE}} {{KIND}} {{REFS}} are substituted' + ) + .option('--timeout <sec>', 'Per-delegation timeout (seconds)', '1800') + .action(async (options) => { + const dryRun = !!options.dryRun || !options.delegateCmd; + if (!options.dryRun && !options.delegateCmd) { + console.log( + chalk.yellow( + 'No --delegate-cmd given — running plan-only. Provide one to act, e.g.:\n' + + ' --delegate-cmd \'claude -p "{{OBJECTIVE}}"\'' + ) + ); + } + await runLoop({ + dryRun, + max: options.once + ? 1 + : options.max + ? parseInt(options.max, 10) + : undefined, + delegateCmd: options.delegateCmd, + timeoutMs: parseInt(options.timeout, 10) * 1000, + }); + }); + + return cmd; +} + +async function runLoop(opts: { + dryRun: boolean; + max?: number; + delegateCmd?: string; + timeoutMs?: number; +}): Promise<void> { + const p = paths(process.cwd()); + const vision = loadVision(p.visionPath); + if (!vision) { + console.error( + chalk.red('No VISION.md. Run: stackmemory conductor vision init') + ); + process.exit(1); + } + + const ctx = openBrain(); + try { + const delegate: Delegate = opts.delegateCmd + ? shellDelegate(opts.delegateCmd, opts.timeoutMs ?? 1800_000) + : async (c) => ({ + success: false, + conclusion: `no delegate configured for: ${c.text}`, + }); + + const loop = new VisionLoop({ + visionPath: p.visionPath, + statePath: p.statePath, + signalsPath: p.signalsPath, + brain: ctx.store as unknown as BrainPort, + delegate, + }); + + console.log( + chalk.bold(opts.dryRun ? 'Vision plan (dry-run)' : 'Vision run') + ); + console.log( + chalk.gray(' ' + (vision.mission || '(no mission set)')) + '\n' + ); + + const result = await loop.run({ + dryRun: opts.dryRun, + maxIterations: opts.max, + }); + for (const d of result.decisions) console.log(fmtDecision(d)); + + console.log( + '\n' + + chalk.bold('Summary: ') + + chalk.green(`${result.delegated} delegated`) + + ', ' + + chalk.gray(`${result.skipped} skipped`) + + ' — ' + + chalk.dim(result.stopped) + ); + } finally { + ctx.close(); + } +} diff --git a/src/core/vision/__tests__/vision.test.ts b/src/core/vision/__tests__/vision.test.ts new file mode 100644 index 00000000..0d480945 --- /dev/null +++ b/src/core/vision/__tests__/vision.test.ts @@ -0,0 +1,238 @@ +/** + * StackMemory Vision tests — VISION.md parsing, signal inbox, and the loop's + * selection / guardrail / brain-dedupe / delegation behavior. + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { mkdtempSync, rmSync, writeFileSync, readFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { parseVision, scaffoldVision, setObjectiveDone } from '../vision-file.js'; +import { SignalInbox } from '../signals.js'; +import { VisionLoop, type BrainPort, type Delegate } from '../vision-loop.js'; +import type { Candidate, DelegationOutcome } from '../types.js'; + +let dir: string; +beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'vision-test-')); +}); +afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + vi.restoreAllMocks(); +}); + +const SAMPLE = `# Vision + +Ship a reliable sync layer. + +## Guardrails + +- never touch production secrets +- open a PR for review + +## Scope + +- src/** + +## Objectives + +- [ ] add retry with jitter +- [x] write the protocol types +- [ ] add a status command + +## Limits + +maxIterations: 5 +maxConsecutiveFailures: 2 +requireApproval: false +`; + +describe('parseVision', () => { + it('parses mission, guardrails, scope, objectives, and limits', () => { + const v = parseVision(SAMPLE); + expect(v.mission).toBe('Ship a reliable sync layer.'); + expect(v.guardrails).toEqual([ + 'never touch production secrets', + 'open a PR for review', + ]); + expect(v.scope).toEqual(['src/**']); + expect(v.objectives).toHaveLength(3); + expect(v.objectives[1].done).toBe(true); + expect(v.objectives[0].done).toBe(false); + expect(v.limits.maxIterations).toBe(5); + expect(v.limits.maxConsecutiveFailures).toBe(2); + expect(v.limits.requireApproval).toBe(false); + }); + + it('falls back to default limits when omitted', () => { + const v = parseVision('# Vision\n\nDo a thing.\n'); + expect(v.limits.maxIterations).toBeGreaterThan(0); + expect(v.objectives).toHaveLength(0); + }); +}); + +describe('scaffold + toggle', () => { + it('scaffolds a template and does not overwrite without force', () => { + const p = join(dir, 'VISION.md'); + expect(scaffoldVision(p)).toBe(true); + expect(scaffoldVision(p)).toBe(false); + expect(scaffoldVision(p, true)).toBe(true); + expect(readFileSync(p, 'utf-8')).toContain('## Objectives'); + }); + + it('toggles an objective checkbox by id', () => { + const p = join(dir, 'VISION.md'); + writeFileSync(p, SAMPLE); + const v = parseVision(SAMPLE); + const target = v.objectives[0]; + expect(setObjectiveDone(p, target.id, true)).toBe(true); + const after = parseVision(readFileSync(p, 'utf-8')); + expect(after.objectives.find((o) => o.id === target.id)?.done).toBe(true); + }); +}); + +describe('SignalInbox', () => { + it('adds and returns pending signals severity-then-age ordered', () => { + const inbox = new SignalInbox(join(dir, 'signals.jsonl')); + inbox.add({ text: 'low thing', severity: 'low' }); + inbox.add({ text: 'critical thing', severity: 'critical' }); + inbox.add({ text: 'medium thing', severity: 'medium' }); + const pending = inbox.pending(); + expect(pending[0].text).toBe('critical thing'); + expect(pending).toHaveLength(3); + }); + + it('resolves a signal so it drops out of pending', () => { + const inbox = new SignalInbox(join(dir, 'signals.jsonl')); + const s = inbox.add({ text: 'fix me' }); + expect(inbox.resolve(s.id)).toBe(true); + expect(inbox.pending()).toHaveLength(0); + }); +}); + +// --- Loop --- + +class FakeBrain implements BrainPort { + entries: Array<{ title: string; conclusion: string }> = []; + recall(q: { text?: string }) { + if (!q.text) return this.entries; + return this.entries.filter( + (e) => e.title.includes(q.text!) || q.text!.includes(e.title) + ); + } + record(input: { title: string; conclusion?: string }) { + this.entries.push({ title: input.title, conclusion: input.conclusion ?? '' }); + return undefined; + } +} + +function makeLoop(brain: BrainPort, delegate: Delegate) { + const visionPath = join(dir, 'VISION.md'); + writeFileSync(visionPath, SAMPLE); + return { + visionPath, + loop: new VisionLoop({ + visionPath, + statePath: join(dir, 'state.json'), + signalsPath: join(dir, 'signals.jsonl'), + brain, + delegate, + sleep: async () => {}, + }), + }; +} + +describe('VisionLoop', () => { + it('prioritizes pending signals over objectives', async () => { + const brain = new FakeBrain(); + const seen: Candidate[] = []; + const delegate: Delegate = async (c) => { + seen.push(c); + return { success: true, conclusion: 'done' }; + }; + const { loop, visionPath } = makeLoop(brain, delegate); + new SignalInbox(join(dir, 'signals.jsonl')).add({ + text: 'urgent prod bug', + severity: 'critical', + }); + + const d = await loop.tick(0); + expect(d.candidate?.kind).toBe('signal'); + expect(d.candidate?.text).toBe('urgent prod bug'); + expect(d.delegated).toBe(true); + // brain recorded the outcome + expect(brain.entries.some((e) => e.title === 'urgent prod bug')).toBe(true); + expect(visionPath).toBeTruthy(); + }); + + it('falls back to the next undone objective when no signals', async () => { + const brain = new FakeBrain(); + const delegate: Delegate = async () => ({ success: true, conclusion: 'ok' }); + const { loop } = makeLoop(brain, delegate); + const d = await loop.tick(0); + expect(d.candidate?.kind).toBe('objective'); + expect(d.candidate?.text).toBe('add retry with jitter'); // first undone + }); + + it('skips work the brain already concluded (no repeats)', async () => { + const brain = new FakeBrain(); + brain.entries.push({ title: 'add retry with jitter', conclusion: 'shipped last week' }); + const delegate = vi.fn<Delegate>(async () => ({ success: true, conclusion: 'x' })); + const { loop } = makeLoop(brain, delegate); + + const d = await loop.tick(0); + expect(d.skippedAsKnown).toBe(true); + expect(d.priorConclusion).toBe('shipped last week'); + expect(delegate).not.toHaveBeenCalled(); + }); + + it('does not delegate in dry-run / plan mode', async () => { + const brain = new FakeBrain(); + const delegate = vi.fn<Delegate>(async () => ({ success: true, conclusion: 'x' })); + const { loop } = makeLoop(brain, delegate); + const d = await loop.tick(0, true); + expect(d.candidate).toBeTruthy(); + expect(d.delegated).toBe(false); + expect(delegate).not.toHaveBeenCalled(); + }); + + it('stops via the consecutive-failure circuit breaker', async () => { + const brain = new FakeBrain(); + let n = 0; + const delegate: Delegate = async () => { + n++; + return { success: false, conclusion: 'boom' }; + }; + // Two distinct objectives fail, then the breaker (limit 2) trips. + const { loop } = makeLoop(brain, delegate); + const result = await loop.run({ maxIterations: 5 }); + expect(result.stopped).toContain('circuit breaker'); + // delegated twice before the breaker stopped it on the 3rd tick + expect(n).toBe(2); + }); + + it('marks objectives done on success and advances', async () => { + const brain = new FakeBrain(); + const delegate: Delegate = async () => ({ success: true, conclusion: 'done' }); + const { loop, visionPath } = makeLoop(brain, delegate); + await loop.run({ maxIterations: 5 }); + const after = parseVision(readFileSync(visionPath, 'utf-8')); + // Both initially-undone objectives should now be checked. + expect(after.objectives.every((o) => o.done)).toBe(true); + }); + + it('reports a hard stop when VISION.md is missing', async () => { + const brain = new FakeBrain(); + const loop = new VisionLoop({ + visionPath: join(dir, 'nope.md'), + statePath: join(dir, 'state.json'), + signalsPath: join(dir, 'signals.jsonl'), + brain, + delegate: async () => ({ success: true, conclusion: '' }), + sleep: async () => {}, + }); + const d = await loop.tick(0); + expect(d.guardrail.ok).toBe(false); + expect(d.guardrail.reason).toContain('VISION.md'); + }); +}); diff --git a/src/core/vision/index.ts b/src/core/vision/index.ts new file mode 100644 index 00000000..1d671ecd --- /dev/null +++ b/src/core/vision/index.ts @@ -0,0 +1,21 @@ +/** + * StackMemory Vision — the meta-orchestration layer above the conductor. + */ + +export * from './types.js'; +export { + parseVision, + loadVision, + setObjectiveDone, + scaffoldVision, + objectiveId, + VISION_TEMPLATE, +} from './vision-file.js'; +export { SignalInbox } from './signals.js'; +export { + VisionLoop, + type BrainPort, + type Delegate, + type VisionLoopOptions, + type RunResult, +} from './vision-loop.js'; diff --git a/src/core/vision/signals.ts b/src/core/vision/signals.ts new file mode 100644 index 00000000..8d4ed0e0 --- /dev/null +++ b/src/core/vision/signals.ts @@ -0,0 +1,101 @@ +/** + * StackMemory Vision — signal inbox. + * + * The "monitored source" the loop reacts to, alongside VISION.md objectives. + * A JSONL append-only file so anything (CI hooks, a bug-report webhook, a + * GitHub-issue poller, or a human) can drop work in without a running service: + * + * stackmemory conductor vision signal "500s on /sync after deploy" --severity high + * + * Adapters (GitHub issues, Linear, CI) feed this inbox; the loop drains it. + */ + +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, + appendFileSync, +} from 'fs'; +import { dirname } from 'path'; +import { randomUUID } from 'crypto'; +import { type Signal, type SignalSeverity } from './types.js'; + +export class SignalInbox { + constructor(private path: string) {} + + private ensureDir(): void { + const dir = dirname(this.path); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + } + + add(input: { + text: string; + source?: string; + severity?: SignalSeverity; + refs?: string[]; + }): Signal { + this.ensureDir(); + const signal: Signal = { + id: randomUUID(), + source: input.source ?? 'manual', + severity: input.severity ?? 'medium', + text: input.text, + refs: input.refs, + createdAt: Date.now(), + }; + appendFileSync(this.path, JSON.stringify(signal) + '\n'); + return signal; + } + + all(): Signal[] { + if (!existsSync(this.path)) return []; + return readFileSync(this.path, 'utf-8') + .split(/\r?\n/) + .filter(Boolean) + .map((l) => { + try { + return JSON.parse(l) as Signal; + } catch { + return null; + } + }) + .filter((s): s is Signal => !!s); + } + + /** Unresolved signals, most severe + oldest first. */ + pending(): Signal[] { + const rank: Record<SignalSeverity, number> = { + low: 1, + medium: 2, + high: 3, + critical: 4, + }; + return this.all() + .filter((s) => !s.resolvedAt) + .sort( + (a, b) => + rank[b.severity] - rank[a.severity] || a.createdAt - b.createdAt + ); + } + + /** Rewrite the file marking a signal resolved (compacts the log). */ + resolve(id: string): boolean { + const signals = this.all(); + let changed = false; + for (const s of signals) { + if (s.id === id && !s.resolvedAt) { + s.resolvedAt = Date.now(); + changed = true; + } + } + if (changed) { + this.ensureDir(); + writeFileSync( + this.path, + signals.map((s) => JSON.stringify(s)).join('\n') + '\n' + ); + } + return changed; + } +} diff --git a/src/core/vision/types.ts b/src/core/vision/types.ts new file mode 100644 index 00000000..1d72603b --- /dev/null +++ b/src/core/vision/types.ts @@ -0,0 +1,111 @@ +/** + * StackMemory Vision — the meta-orchestration layer. + * + * One level above a single goal: a VISION.md defines the north-star mission, + * the guardrails that keep an autonomous loop from going haywire, an ordered + * list of objectives, and hard limits. The vision loop draws work from BOTH + * the VISION.md objectives AND a monitored signal source (bug reports, CI + * failures, issues), consults the shared brain to avoid repeating itself, + * enforces the guardrails, delegates one objective per tick to the conductor, + * and records the outcome back to the brain so thinking compounds. + */ + +export interface VisionLimits { + /** Max objectives handled in a single `vision run`. */ + maxIterations: number; + /** Max objectives handled per calendar day (across runs). */ + maxIterationsPerDay: number; + /** Circuit breaker: stop after this many consecutive failures. */ + maxConsecutiveFailures: number; + /** Seconds to wait between ticks. */ + tickIntervalSec: number; + /** When true, the loop only plans + queues; it never delegates. */ + requireApproval: boolean; + /** Stop once every VISION.md objective is done and no signals remain. */ + stopWhenComplete: boolean; +} + +export const DEFAULT_LIMITS: VisionLimits = { + maxIterations: 10, + maxIterationsPerDay: 50, + maxConsecutiveFailures: 3, + tickIntervalSec: 60, + requireApproval: false, + stopWhenComplete: true, +}; + +export interface Vision { + /** The north-star mission — the single sentence the loop serves. */ + mission: string; + /** Hard constraints — what the loop must NOT do / scope boundaries. */ + guardrails: string[]; + /** Path globs the loop is allowed to touch (advisory, passed to agents). */ + scope: string[]; + /** Ordered objectives. */ + objectives: Objective[]; + limits: VisionLimits; +} + +export interface Objective { + /** Stable id derived from the text. */ + id: string; + text: string; + done: boolean; +} + +export type SignalSeverity = 'low' | 'medium' | 'high' | 'critical'; + +export interface Signal { + id: string; + /** Where it came from: 'bug', 'ci', 'github', 'manual', … */ + source: string; + severity: SignalSeverity; + text: string; + /** Optional refs (issue URL, run id, commit). */ + refs?: string[]; + createdAt: number; + resolvedAt?: number; +} + +/** A unit of work the loop can act on, from either source. */ +export interface Candidate { + kind: 'objective' | 'signal'; + id: string; + text: string; + /** Higher = more urgent. */ + priority: number; + refs: string[]; +} + +export interface GuardrailCheck { + ok: boolean; + /** Reason the loop must stop, when ok === false. */ + reason?: string; +} + +/** Outcome of delegating a candidate to the conductor. */ +export interface DelegationOutcome { + success: boolean; + /** One-line conclusion recorded to the brain. */ + conclusion: string; + refs?: string[]; +} + +/** A single tick's decision (also the dry-run / `plan` output). */ +export interface TickDecision { + candidate: Candidate | null; + guardrail: GuardrailCheck; + /** True when the brain already concluded this — skipped as a duplicate. */ + skippedAsKnown: boolean; + /** Prior brain conclusion that caused a skip, if any. */ + priorConclusion?: string; + delegated: boolean; + outcome?: DelegationOutcome; +} + +export const SEVERITY_RANK: Record<SignalSeverity, number> = { + low: 1, + medium: 2, + high: 3, + critical: 4, +}; diff --git a/src/core/vision/vision-file.ts b/src/core/vision/vision-file.ts new file mode 100644 index 00000000..f2ebb299 --- /dev/null +++ b/src/core/vision/vision-file.ts @@ -0,0 +1,194 @@ +/** + * StackMemory Vision — VISION.md parsing, scaffolding, and objective toggling. + * + * VISION.md is plain markdown so it stays human-editable and reviewable: + * + * # Vision + * <north-star mission paragraph> + * + * ## Guardrails + * - never touch production credentials + * - keep changes within the documented scope + * + * ## Scope + * - src/** + * - docs/** + * + * ## Objectives + * - [ ] first objective + * - [x] a completed objective + * + * ## Limits + * maxIterations: 10 + * maxIterationsPerDay: 50 + * requireApproval: false + */ + +import { existsSync, readFileSync, writeFileSync } from 'fs'; +import { createHash } from 'crypto'; +import { + type Vision, + type Objective, + type VisionLimits, + DEFAULT_LIMITS, +} from './types.js'; + +export function objectiveId(text: string): string { + return createHash('sha1').update(text.trim()).digest('hex').slice(0, 10); +} + +interface Section { + body: string[]; +} + +function splitSections(text: string): { + preamble: string[]; + sections: Map<string, Section>; +} { + const lines = text.split(/\r?\n/); + const sections = new Map<string, Section>(); + const preamble: string[] = []; + let current: Section | null = null; + + for (const line of lines) { + const h2 = line.match(/^##\s+(.+?)\s*$/); + if (h2) { + current = { body: [] }; + sections.set(h2[1].toLowerCase(), current); + continue; + } + if (/^#\s+/.test(line)) continue; // skip the H1 title + if (current) { + current.body.push(line); + } else { + preamble.push(line); + } + } + return { preamble, sections }; +} + +function bulletLines(body: string[]): string[] { + return body + .map((l) => l.match(/^\s*[-*]\s+(.*\S)\s*$/)) + .filter((m): m is RegExpMatchArray => !!m) + .map((m) => m[1].trim()) + .filter((s) => !/^\[[ xX]\]/.test(s)); // checklist handled separately +} + +function parseObjectives(body: string[]): Objective[] { + const objectives: Objective[] = []; + for (const line of body) { + const m = line.match(/^\s*[-*]\s+\[([ xX])\]\s+(.*\S)\s*$/); + if (!m) continue; + const text = m[2].trim(); + objectives.push({ + id: objectiveId(text), + text, + done: m[1].toLowerCase() === 'x', + }); + } + return objectives; +} + +function parseLimits(body: string[]): VisionLimits { + const limits: VisionLimits = { ...DEFAULT_LIMITS }; + for (const line of body) { + const m = line.match(/^\s*([a-zA-Z]+)\s*:\s*(.+?)\s*$/); + if (!m) continue; + const key = m[1] as keyof VisionLimits; + const raw = m[2]; + if (!(key in limits)) continue; + if (key === 'requireApproval' || key === 'stopWhenComplete') { + (limits[key] as boolean) = /^(true|yes|1)$/i.test(raw); + } else { + const n = parseInt(raw, 10); + if (Number.isFinite(n)) (limits[key] as number) = n; + } + } + return limits; +} + +export function parseVision(text: string): Vision { + const { preamble, sections } = splitSections(text); + const body = (name: string): string[] => sections.get(name)?.body ?? []; + const mission = preamble + .map((l) => l.trim()) + .filter(Boolean) + .join(' ') + .trim(); + return { + mission, + guardrails: bulletLines(body('guardrails')), + scope: bulletLines(body('scope')), + objectives: parseObjectives(body('objectives')), + limits: sections.has('limits') + ? parseLimits(body('limits')) + : { ...DEFAULT_LIMITS }, + }; +} + +export function loadVision(path: string): Vision | null { + if (!existsSync(path)) return null; + return parseVision(readFileSync(path, 'utf-8')); +} + +/** Flip an objective's checkbox in place, preserving the rest of the file. */ +export function setObjectiveDone( + path: string, + objId: string, + done: boolean +): boolean { + if (!existsSync(path)) return false; + const lines = readFileSync(path, 'utf-8').split(/\r?\n/); + let changed = false; + for (let i = 0; i < lines.length; i++) { + const m = lines[i].match(/^(\s*[-*]\s+)\[([ xX])\]\s+(.*\S)\s*$/); + if (!m) continue; + if (objectiveId(m[3].trim()) === objId) { + lines[i] = `${m[1]}[${done ? 'x' : ' '}] ${m[3].trim()}`; + changed = true; + break; + } + } + if (changed) writeFileSync(path, lines.join('\n')); + return changed; +} + +export const VISION_TEMPLATE = `# Vision + +State the single north-star mission this autonomous loop serves. Keep it to a +sentence or two — concrete enough to judge whether a piece of work belongs. + +## Guardrails + +- Stay within the scope below; do not touch anything outside it. +- Never modify secrets, production credentials, or deploy/publish. +- Open a PR for review; never merge to the default branch autonomously. +- If an objective is ambiguous or risky, stop and ask a human. + +## Scope + +- src/** +- docs/** + +## Objectives + +- [ ] First concrete objective the loop should pursue +- [ ] Second objective +- [ ] Third objective + +## Limits + +maxIterations: 10 +maxIterationsPerDay: 50 +maxConsecutiveFailures: 3 +tickIntervalSec: 60 +requireApproval: false +stopWhenComplete: true +`; + +export function scaffoldVision(path: string, force = false): boolean { + if (existsSync(path) && !force) return false; + writeFileSync(path, VISION_TEMPLATE); + return true; +} diff --git a/src/core/vision/vision-loop.ts b/src/core/vision/vision-loop.ts new file mode 100644 index 00000000..7ce9e506 --- /dev/null +++ b/src/core/vision/vision-loop.ts @@ -0,0 +1,315 @@ +/** + * StackMemory Vision — the loop. + * + * Sits above the conductor. Each tick: + * 1. reload VISION.md (so human edits + checkbox state take effect live) + * 2. enforce guardrails / limits → hard-stop if exceeded + * 3. pick the next candidate (a pending signal, else the next objective) + * 4. consult the brain → skip anything already concluded (no repeats) + * 5. delegate one candidate to the conductor (unless dry-run / approval-gated) + * 6. record the outcome to the brain, mark the objective/signal done + * + * Delegation is injected, so the loop is fully testable without spawning agents + * and the CLI can wire it to the real `stackmemory conductor` executor. + */ + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { dirname } from 'path'; +import { loadVision, setObjectiveDone } from './vision-file.js'; +import { SignalInbox } from './signals.js'; +import { + type Vision, + type Candidate, + type GuardrailCheck, + type DelegationOutcome, + type TickDecision, + SEVERITY_RANK, +} from './types.js'; + +/** Minimal brain surface the loop needs (BrainStore satisfies this). */ +export interface BrainPort { + recall(query: { + text?: string; + limit?: number; + includeSuperseded?: boolean; + }): Array<{ title: string; conclusion: string }>; + record(input: { + title: string; + summary?: string; + conclusion?: string; + kind?: 'experiment' | 'decision' | 'insight' | 'note'; + agent?: string; + tags?: string[]; + refs?: string[]; + confidence?: number; + }): unknown; +} + +export type Delegate = ( + candidate: Candidate, + vision: Vision +) => Promise<DelegationOutcome>; + +interface LoopState { + day: string; + iterationsToday: number; + consecutiveFailures: number; + lastTickAt: number; +} + +export interface VisionLoopOptions { + visionPath: string; + statePath: string; + signalsPath: string; + brain: BrainPort; + delegate: Delegate; + /** Injectable for tests; defaults to real setTimeout. */ + sleep?: (ms: number) => Promise<void>; +} + +export interface RunResult { + decisions: TickDecision[]; + stopped: string; // human-readable stop reason + delegated: number; + skipped: number; +} + +function today(): string { + return new Date().toISOString().slice(0, 10); +} + +const realSleep = (ms: number) => new Promise<void>((r) => setTimeout(r, ms)); + +export class VisionLoop { + private opts: Required<Pick<VisionLoopOptions, 'sleep'>> & VisionLoopOptions; + private inbox: SignalInbox; + + constructor(options: VisionLoopOptions) { + this.opts = { sleep: realSleep, ...options }; + this.inbox = new SignalInbox(options.signalsPath); + } + + private readState(): LoopState { + const base: LoopState = { + day: today(), + iterationsToday: 0, + consecutiveFailures: 0, + lastTickAt: 0, + }; + if (!existsSync(this.opts.statePath)) return base; + try { + const s = JSON.parse( + readFileSync(this.opts.statePath, 'utf-8') + ) as LoopState; + // Reset the daily counter when the date rolls over. + if (s.day !== today()) return { ...s, day: today(), iterationsToday: 0 }; + return s; + } catch { + return base; + } + } + + private writeState(s: LoopState): void { + const dir = dirname(this.opts.statePath); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + writeFileSync(this.opts.statePath, JSON.stringify(s, null, 2)); + } + + /** Pick the next unit of work: pending signals outrank pending objectives. */ + selectCandidate(vision: Vision): Candidate | null { + const signal = this.inbox.pending()[0]; + if (signal) { + return { + kind: 'signal', + id: signal.id, + text: signal.text, + priority: 100 + SEVERITY_RANK[signal.severity], + refs: signal.refs ?? [], + }; + } + const idx = vision.objectives.findIndex((o) => !o.done); + if (idx >= 0) { + const o = vision.objectives[idx]; + return { + kind: 'objective', + id: o.id, + text: o.text, + priority: 50 - idx, + refs: [], + }; + } + return null; + } + + checkGuardrails( + state: LoopState, + vision: Vision, + iterationThisRun: number + ): GuardrailCheck { + const l = vision.limits; + if (iterationThisRun >= l.maxIterations) { + return { + ok: false, + reason: `reached maxIterations (${l.maxIterations}) for this run`, + }; + } + if (state.iterationsToday >= l.maxIterationsPerDay) { + return { + ok: false, + reason: `reached maxIterationsPerDay (${l.maxIterationsPerDay})`, + }; + } + if (state.consecutiveFailures >= l.maxConsecutiveFailures) { + return { + ok: false, + reason: `circuit breaker: ${state.consecutiveFailures} consecutive failures (limit ${l.maxConsecutiveFailures})`, + }; + } + return { ok: true }; + } + + /** Has the brain already concluded this exact piece of work? */ + private priorConclusion(text: string): string | undefined { + const hits = this.brainRecall(text); + const match = hits.find( + (e) => e.title.trim() === text.trim() && e.conclusion.trim().length > 0 + ); + return match?.conclusion; + } + + private brainRecall(text: string) { + return this.opts.brain.recall({ text, limit: 5 }); + } + + async tick(iterationThisRun: number, dryRun = false): Promise<TickDecision> { + const vision = loadVision(this.opts.visionPath); + if (!vision) { + return { + candidate: null, + guardrail: { ok: false, reason: 'no VISION.md found' }, + skippedAsKnown: false, + delegated: false, + }; + } + + const state = this.readState(); + const guardrail = this.checkGuardrails(state, vision, iterationThisRun); + if (!guardrail.ok) { + return { + candidate: null, + guardrail, + skippedAsKnown: false, + delegated: false, + }; + } + + const candidate = this.selectCandidate(vision); + if (!candidate) { + return { + candidate: null, + guardrail: { ok: true }, + skippedAsKnown: false, + delegated: false, + }; + } + + // Dedupe against the shared brain — don't repeat concluded work. + const prior = this.priorConclusion(candidate.text); + if (prior) { + if (candidate.kind === 'objective') { + setObjectiveDone(this.opts.visionPath, candidate.id, true); + } else { + this.inbox.resolve(candidate.id); + } + return { + candidate, + guardrail: { ok: true }, + skippedAsKnown: true, + priorConclusion: prior, + delegated: false, + }; + } + + // Plan-only: dry run, or approval-gated vision. + if (dryRun || vision.limits.requireApproval) { + return { + candidate, + guardrail: { ok: true }, + skippedAsKnown: false, + delegated: false, + }; + } + + const outcome = await this.opts.delegate(candidate, vision); + + this.opts.brain.record({ + title: candidate.text, + summary: `Vision loop handled a ${candidate.kind} toward: ${vision.mission}`, + conclusion: outcome.conclusion, + kind: 'experiment', + agent: 'vision', + tags: ['vision', candidate.kind, outcome.success ? 'success' : 'failure'], + refs: [...candidate.refs, ...(outcome.refs ?? [])], + confidence: outcome.success ? 0.8 : 0.4, + }); + + const next = this.readState(); + if (outcome.success) { + next.iterationsToday += 1; + next.consecutiveFailures = 0; + if (candidate.kind === 'objective') { + setObjectiveDone(this.opts.visionPath, candidate.id, true); + } else { + this.inbox.resolve(candidate.id); + } + } else { + next.consecutiveFailures += 1; + } + next.lastTickAt = Date.now(); + this.writeState(next); + + return { + candidate, + guardrail: { ok: true }, + skippedAsKnown: false, + delegated: true, + outcome, + }; + } + + /** Run ticks until a guardrail stops the loop or there's nothing left. */ + async run( + opts: { maxIterations?: number; dryRun?: boolean } = {} + ): Promise<RunResult> { + const vision = loadVision(this.opts.visionPath); + const max = opts.maxIterations ?? vision?.limits.maxIterations ?? 1; + const tickInterval = (vision?.limits.tickIntervalSec ?? 60) * 1000; + + const decisions: TickDecision[] = []; + let delegated = 0; + let skipped = 0; + let stopped = 'completed run'; + + for (let i = 0; i < max; i++) { + const d = await this.tick(i, opts.dryRun); + decisions.push(d); + + if (!d.guardrail.ok) { + stopped = d.guardrail.reason ?? 'guardrail stop'; + break; + } + if (!d.candidate) { + stopped = 'no work remaining'; + break; + } + if (d.skippedAsKnown) skipped++; + if (d.delegated) delegated++; + + if (!opts.dryRun && i < max - 1) { + await this.opts.sleep(tickInterval); + } + } + + return { decisions, stopped, delegated, skipped }; + } +}