diff --git a/.agents/skills/autoreview/SKILL.md b/.agents/skills/autoreview/SKILL.md new file mode 100644 index 000000000..78fd68864 --- /dev/null +++ b/.agents/skills/autoreview/SKILL.md @@ -0,0 +1,191 @@ +--- +name: autoreview +description: "Auto Review closeout. Codex review is the default when no engine is set and is the recommended reviewer." +--- + +# Auto Review + +Run the bundled structured review helper as a closeout check. This is code review, not Guardian `auto_review` approval routing. + +Codex review is the default when no engine is set. It usually delivers the best review results and should remain the normal final closeout engine. + +Use when: + +- user asks for Codex review / Claude review / autoreview / second-model review +- after non-trivial code edits, before final/commit/ship +- reviewing a local branch or PR branch after fixes + +## Contract + +- Treat review output as advisory. Never blindly apply it. +- Verify every finding by reading the real code path and adjacent files. +- Read dependency docs/source/types when the finding depends on external behavior. +- Reject unrealistic edge cases, speculative risks, broad rewrites, and fixes that over-complicate the codebase. +- Prefer small fixes at the right ownership boundary; no refactor unless it clearly improves the bug class. +- Keep going until structured review returns no accepted/actionable findings. +- If a review-triggered fix changes code, rerun focused tests and rerun the structured review helper. +- For security-audit suppression changes, verify accepted findings remain auditable: suppressed findings stay in structured output, active output keeps an unsuppressible suppression notice, and aggregate findings cannot hide unrelated active risk. +- Never switch or override the requested review engine/model. If the review hits model capacity, retry the same command a few times with the same engine/model. +- Be patient with large bundles. Structured review can take up to 30 minutes while the model call is active, especially with Codex tools or web search. +- Treat heartbeat lines like `review still running: ... elapsed=... pid=...` as healthy progress, not a hang. Let the helper continue while heartbeats are advancing. Pass `--stream-engine-output` when live engine text is useful; Codex and Claude filter tool/file chatter, other engines pass raw output through. +- Do not kill a review just because it has been quiet for 2-5 minutes, or because it is still running under the 30-minute window. Inspect the process only after missing multiple expected heartbeats, after 30 minutes, or after an obviously failed subprocess; prefer letting the same helper command finish. +- Tools are useful in review mode. The helper allows read-only inspection tools and web search by default so reviewers can check dependency contracts, upstream docs, and current behavior. +- Security perspective is always included, but it should not cripple legitimate functionality. Report security findings only when the change creates a concrete, actionable risk or removes an important safety check. +- For regression provenance, keep roles separate: blamed code author, blamed PR author, PR merger/committer, current PR author, and PR/date. If no blamed PR is traceable, use the blamed commit as the provenance: commit SHA, date, and author username. Do not guess a merger or frame missing PR metadata as a separate finding. +- If the blamed PR was merged by `clawsweeper[bot]` or another automation, identify the human trigger when practical. Check timeline/comments first; if rate-limited, use gitcrawl/cache or public PR HTML. Look for maintainer commands such as `@clawsweeper automerge`, `/landpr`, or labels/status comments that armed automerge. Report `automerge triggered by @login`; if not found, say trigger unknown. +- Do not invoke built-in `codex review`, nested reviewers, or reviewer panels from inside the review. The helper builds one bundle, calls one selected engine, validates one structured result, and stops. +- Stop as soon as the helper exits 0 with no accepted/actionable findings. Do not run an extra review just to get a nicer "clean" line, a second opinion, or clearer closeout wording. +- Treat the helper's successful exit plus absence of actionable findings as the clean review result, even if the underlying Codex CLI output is terse. +- Multi-reviewer panels are opt-in only. Use them when explicitly requested or when risk justifies the extra spend; the main agent still verifies every accepted finding before fixing. +- If rejecting a finding as intentional/not worth fixing, add a brief inline code comment only when it explains a real invariant or ownership decision that future reviewers should know. +- If `gh`/Gitcrawl reports `database disk image is malformed`, run `gitcrawl doctor --json` once to let the portable cache repair before retrying review; do not bypass the shim unless repair fails and freshness requires live GitHub. +- If Gitcrawl reports a portable manifest mismatch, source/runtime DB health error, or stale portable-store checkout, run `gitcrawl doctor --json` and inspect `source_db_health`, `runtime_db_health`, and `portable_store_status` before falling back to live GitHub. +- Do not push just to review. Push only when the user requested push/ship/PR update. + +## Pick Target + +Dirty local work: + +```bash + --mode local +``` + +Use this only when the patch is actually unstaged/staged/untracked in the +current checkout. For committed, pushed, or PR work, point the helper at the commit +or branch diff instead; do not force `--mode local` / `--uncommitted` just +because the helper docs mention dirty work first. A clean local review +only proves there is no local patch. + +Branch/PR work: + +```bash + --mode branch --base origin/main +``` + +Optional review context is first-class: + +```bash + --mode branch --base origin/main --prompt-file /tmp/review-notes.md --dataset /tmp/evidence.json +``` + +If an open PR exists, use its actual base: + +```bash +base=$(gh pr view --json baseRefName --jq .baseRefName) + --mode branch --base "origin/$base" +``` + +Committed single change: + +```bash + --mode commit --commit HEAD +``` + +or with the helper: + +```bash +/Users/steipete/Projects/agent-scripts/skills/autoreview/scripts/autoreview --mode commit --commit HEAD +``` + +Use commit review for already-landed or already-pushed work on `main`. Reviewing +clean `main` against `origin/main` is usually an empty diff after push. For a +small stack, review each commit explicitly or review the branch before merging +with `--base`. + +## Parallel Closeout + +Format first if formatting can change line locations. Then it is OK to run tests and review in parallel: + +```bash +scripts/autoreview --parallel-tests "" +``` + +Tradeoff: tests may force code changes that stale the review. If tests or review lead to code edits, rerun the affected tests and rerun review until no accepted/actionable findings remain. Once that rerun exits cleanly, stop; do not spend another long review cycle on redundant confirmation. + +## Review Panels + +Run multiple reviewers against one frozen bundle: + +```bash + --reviewers codex,claude +``` + +`--panel` is shorthand for Codex plus Claude unless `--engine` changes the first reviewer: + +```bash + --panel +``` + +Set reviewer models and thinking/effort explicitly: + +```bash + --reviewers codex,claude --model codex=gpt-5.1 --thinking codex=high --model claude=sonnet --thinking claude=max +``` + +Inline syntax is also supported: + +```bash + --reviewers codex:gpt-5.1:high,claude:sonnet:max +``` + +Codex maps thinking to `model_reasoning_effort` and accepts `low`, `medium`, +`high`, or `xhigh`. Claude maps thinking to `--effort` and also accepts `max`. +Engines without a real thinking knob reject `--thinking`. + +## Context Efficiency + +Run the helper directly so target selection, engine choice, structured validation, and exit status all stay in one path. If output is noisy, summarize the completed helper output after it returns; do not ask another agent or reviewer to rerun the review. + +## Helper + +OpenClaw repo-local helper: + +```bash +.agents/skills/autoreview/scripts/autoreview --help +``` + +`agent-scripts` checkout helper: + +```bash +skills/autoreview/scripts/autoreview --help +``` + +Global helper from `agent-scripts`: + +```bash +~/.codex/skills/agent-scripts/autoreview/scripts/autoreview --help +``` + +If installed from `agent-scripts`, path is: + +```bash +/Users/steipete/Projects/agent-scripts/skills/autoreview/scripts/autoreview --help +``` + +The helper: + +- chooses dirty local changes first +- otherwise uses current PR base if `gh pr view` works +- otherwise uses `origin/main` for non-main branches +- supports `--engine codex`, `claude`, `droid`, and `copilot`; default is `AUTOREVIEW_ENGINE` or `codex`; Codex should remain the default when nothing is set +- use `--mode commit --commit ` for already-committed work, especially clean `main` after landing +- should be left in `--mode auto` or forced to `--mode branch` for PR/branch work; do not force `--mode local` after committing +- writes only to stdout unless `--output`, `--json-output`, or live streamed engine stderr is set +- supports `--dry-run`, `--parallel-tests`, `--prompt`, `--prompt-file`, `--dataset`, `--no-tools`, `--no-web-search`, and commit refs +- supports `--stream-engine-output` or `AUTOREVIEW_STREAM_ENGINE_OUTPUT=1` for live engine text while preserving structured validation; Codex and Claude hide tool/file event details, emit compact activity summaries, and report usage at turn completion +- supports opt-in review panels with `--panel` / `--reviewers`, plus per-engine `--model` and `--thinking` +- allows read-only tools and web search by default where the selected CLI supports them; forbids nested review in the prompt; Codex is run through `codex exec` with read-only sandbox and structured output +- prints `review still running: elapsed=s pid=` to stderr at long-running intervals while waiting for the selected review engine, unless streamed output or compact Codex activity has been visible recently +- prints `autoreview clean: no accepted/actionable findings reported` when the selected review command exits 0 +- exits nonzero when accepted/actionable findings are present + +## Final Report + +Include: + +- review command used +- tests/proof run +- findings accepted/rejected, briefly why +- the clean review result from the final helper/review run, or why a remaining finding was consciously rejected + +Do not run another review solely to improve the final report wording. If the final helper run exited 0 and produced no accepted/actionable findings, report that exact run as clean. diff --git a/.agents/skills/autoreview/scripts/autoreview b/.agents/skills/autoreview/scripts/autoreview new file mode 100755 index 000000000..1c2f409cc --- /dev/null +++ b/.agents/skills/autoreview/scripts/autoreview @@ -0,0 +1,1188 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import concurrent.futures +import copy +import json +import os +import queue +import subprocess +import sys +import tempfile +import textwrap +import threading +import time +from pathlib import Path +from typing import Any, Callable + + +ENGINES = ("codex", "claude", "droid", "copilot") +THINKING_LEVELS_BY_ENGINE = { + "codex": {"low", "medium", "high", "xhigh"}, + "claude": {"low", "medium", "high", "xhigh", "max"}, + "droid": set(), + "copilot": set(), +} + + +SCHEMA: dict[str, Any] = { + "type": "object", + "additionalProperties": False, + "required": [ + "findings", + "overall_correctness", + "overall_explanation", + "overall_confidence", + ], + "properties": { + "findings": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": False, + "required": [ + "title", + "body", + "priority", + "confidence", + "category", + "code_location", + ], + "properties": { + "title": {"type": "string", "minLength": 1, "maxLength": 140}, + "body": {"type": "string", "minLength": 1, "maxLength": 2000}, + "priority": {"type": "string", "enum": ["P0", "P1", "P2", "P3"]}, + "confidence": {"type": "number", "minimum": 0, "maximum": 1}, + "category": { + "type": "string", + "enum": ["bug", "security", "regression", "test_gap", "maintainability"], + }, + "code_location": { + "type": "object", + "additionalProperties": False, + "required": ["file_path", "line"], + "properties": { + "file_path": {"type": "string", "minLength": 1}, + "line": {"type": "integer", "minimum": 1}, + }, + }, + }, + }, + }, + "overall_correctness": { + "type": "string", + "enum": ["patch is correct", "patch is incorrect"], + }, + "overall_explanation": {"type": "string", "minLength": 1, "maxLength": 3000}, + "overall_confidence": {"type": "number", "minimum": 0, "maximum": 1}, + }, +} + + +def run(args: list[str], cwd: Path, *, input_text: str | None = None, check: bool = True) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + args, + cwd=cwd, + input=input_text, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if check and result.returncode != 0: + cmd = " ".join(args) + raise SystemExit(f"command failed ({result.returncode}): {cmd}\n{result.stderr or result.stdout}") + return result + + +def run_with_heartbeat( + args: list[str], + cwd: Path, + *, + input_text: str | None = None, + label: str, + heartbeat_seconds: int = 60, + stream_output: bool = False, + stream_display: Callable[[str, str], str | None] | None = None, +) -> subprocess.CompletedProcess[str]: + if stream_output: + return run_with_stream( + args, + cwd, + input_text=input_text, + label=label, + heartbeat_seconds=heartbeat_seconds, + stream_display=stream_display, + ) + started = time.monotonic() + proc = subprocess.Popen( + args, + cwd=cwd, + stdin=subprocess.PIPE if input_text is not None else None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + first_communicate = True + while True: + try: + stdout, stderr = proc.communicate( + input=input_text if first_communicate else None, + timeout=heartbeat_seconds, + ) + return subprocess.CompletedProcess(args, int(proc.returncode or 0), stdout, stderr) + except subprocess.TimeoutExpired: + first_communicate = False + elapsed = int(time.monotonic() - started) + print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) + + +def run_with_stream( + args: list[str], + cwd: Path, + *, + input_text: str | None, + label: str, + heartbeat_seconds: int, + stream_display: Callable[[str, str], str | None] | None, +) -> subprocess.CompletedProcess[str]: + started = time.monotonic() + proc = subprocess.Popen( + args, + cwd=cwd, + stdin=subprocess.PIPE if input_text is not None else None, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + events: queue.Queue[tuple[str, str | None]] = queue.Queue() + stdout_parts: list[str] = [] + stderr_parts: list[str] = [] + + def read_stream(name: str, stream: Any) -> None: + try: + for line in iter(stream.readline, ""): + events.put((name, line)) + finally: + events.put((name, None)) + + def write_stdin() -> None: + if proc.stdin is None or input_text is None: + return + try: + proc.stdin.write(input_text) + proc.stdin.close() + except BrokenPipeError: + return + + threads = [ + threading.Thread(target=read_stream, args=("stdout", proc.stdout), daemon=True), + threading.Thread(target=read_stream, args=("stderr", proc.stderr), daemon=True), + ] + for thread in threads: + thread.start() + stdin_thread = threading.Thread(target=write_stdin, daemon=True) + stdin_thread.start() + + open_streams = 2 + while open_streams: + try: + name, line = events.get(timeout=heartbeat_seconds) + except queue.Empty: + elapsed = int(time.monotonic() - started) + print(f"review still running: {label} elapsed={elapsed}s pid={proc.pid}", file=sys.stderr, flush=True) + continue + if line is None: + open_streams -= 1 + continue + if name == "stdout": + stdout_parts.append(line) + else: + stderr_parts.append(line) + display = stream_display(name, line) if stream_display else line + if display: + target = sys.stdout if name == "stdout" else sys.stderr + target.write(display) + target.flush() + + for thread in threads: + thread.join() + stdin_thread.join(timeout=1) + returncode = proc.wait() + return subprocess.CompletedProcess(args, returncode, "".join(stdout_parts), "".join(stderr_parts)) + + +def git(repo: Path, *args: str, check: bool = True) -> str: + return run(["git", *args], repo, check=check).stdout + + +def require_local_commitish(repo: Path, ref: str) -> str: + target = ref.strip() + if not target: + raise SystemExit("branch review requires a base ref") + result = run(["git", "rev-parse", "--verify", "--quiet", f"{target}^{{commit}}"], repo, check=False) + if result.returncode == 0: + return target + raise SystemExit( + f"base ref {target!r} is not available locally; fetch it manually or rerun with --base pointing to an existing local ref" + ) + + +def repo_root() -> Path: + result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.returncode != 0: + raise SystemExit("autoreview must run inside a git repository") + return Path(result.stdout.strip()).resolve() + + +def current_branch(repo: Path) -> str: + return git(repo, "branch", "--show-current", check=False).strip() or "detached" + + +def is_dirty(repo: Path) -> bool: + return bool(git(repo, "status", "--porcelain").strip()) + + +def choose_target(repo: Path, mode: str, base_ref: str | None) -> tuple[str, str | None]: + branch = current_branch(repo) + if mode == "local" or (mode == "auto" and is_dirty(repo)): + return "local", None + if mode == "commit": + return "commit", None + if mode == "branch" or (mode == "auto" and branch != "main"): + return "branch", base_ref or detect_pr_base(repo) or "origin/main" + raise SystemExit("no review target: clean main checkout and no forced mode") + + +def detect_pr_base(repo: Path) -> str | None: + if not shutil_which("gh"): + return None + result = run(["gh", "pr", "view", "--json", "baseRefName", "--jq", ".baseRefName"], repo, check=False) + base = result.stdout.strip() + return f"origin/{base}" if result.returncode == 0 and base else None + + +def shutil_which(name: str) -> str | None: + for part in os.environ.get("PATH", "").split(os.pathsep): + candidate = Path(part) / name + if candidate.exists() and os.access(candidate, os.X_OK): + return str(candidate) + return None + + +def bounded(text: str, limit: int = 180_000) -> str: + if len(text) <= limit: + return text + return text[:limit] + f"\n\n[truncated at {limit} characters]\n" + + +def bounded_field(text: str, limit: int) -> str: + if len(text) <= limit: + return text + suffix = "\n\n[truncated]" + return text[: max(0, limit - len(suffix))] + suffix + + +def read_text(path: Path, limit: int = 40_000) -> str: + try: + data = path.read_bytes() + except OSError as exc: + return f"[unreadable: {exc}]" + if b"\0" in data: + return "[binary file omitted]" + text = data.decode("utf-8", errors="replace") + return bounded(text, limit) + + +def local_bundle(repo: Path) -> str: + parts = [ + "# Git Status", + git(repo, "status", "--short"), + "# Staged Diff", + git(repo, "diff", "--cached", "--stat"), + bounded(git(repo, "diff", "--cached", "--patch", "--find-renames")), + "# Unstaged Diff", + git(repo, "diff", "--stat"), + bounded(git(repo, "diff", "--patch", "--find-renames")), + ] + untracked = [line for line in git(repo, "ls-files", "--others", "--exclude-standard").splitlines() if line] + if untracked: + parts.append("# Untracked Files") + for rel in untracked: + path = repo / rel + parts.append(f"## {rel}\n{read_text(path)}") + return "\n\n".join(parts) + + +def branch_bundle(repo: Path, base_ref: str) -> str: + return "\n\n".join( + [ + "# Branch Diff", + f"base: {base_ref}", + git(repo, "diff", "--stat", f"{base_ref}...HEAD"), + bounded(git(repo, "diff", "--patch", "--find-renames", f"{base_ref}...HEAD")), + ] + ) + + +def commit_bundle(repo: Path, commit_ref: str) -> str: + return "\n\n".join( + [ + "# Commit Diff", + f"commit: {commit_ref}", + git(repo, "show", "--stat", "--format=fuller", commit_ref), + bounded(git(repo, "show", "--patch", "--find-renames", "--format=fuller", commit_ref)), + ] + ) + + +def review_paths(repo: Path, target: str, target_ref: str | None, commit_ref: str) -> set[str]: + names: set[str] = set() + if target == "local": + sources = [ + git(repo, "diff", "--name-only", "--cached"), + git(repo, "diff", "--name-only"), + git(repo, "ls-files", "--others", "--exclude-standard"), + ] + elif target == "branch": + assert target_ref + sources = [git(repo, "diff", "--name-only", f"{target_ref}...HEAD")] + else: + sources = [git(repo, "show", "--name-only", "--format=", commit_ref)] + for source in sources: + for line in source.splitlines(): + path = line.strip() + if path: + names.add(path) + return names + + +def load_extra_prompt(args: argparse.Namespace) -> str: + chunks: list[str] = [] + for value in args.prompt or []: + chunks.append(value) + for path in args.prompt_file or []: + chunks.append(Path(path).read_text()) + return "\n\n".join(chunks) + + +def load_datasets(args: argparse.Namespace) -> str: + chunks: list[str] = [] + for spec in args.dataset or []: + path = Path(spec) + if path.is_dir(): + raise SystemExit(f"--dataset must be a file, got directory: {path}") + chunks.append(f"# Dataset: {path}\n{read_text(path)}") + return "\n\n".join(chunks) + + +def build_prompt(repo: Path, target: str, target_ref: str | None, bundle: str, extra_prompt: str, datasets: str) -> str: + target_line = f"{target} {target_ref}" if target_ref else target + return textwrap.dedent( + f""" + You are a senior code reviewer. Review the provided git change bundle only. + + Hard rules: + - Return exactly one JSON object and nothing else. Do not wrap it in Markdown. + - The JSON object must match this schema exactly: + {json.dumps(SCHEMA, indent=2)} + - Do not modify files. + - Do not invoke nested reviewers or review tools. + - Forbidden nested review commands include: codex review, autoreview, claude review, oracle review. + - You may use read-only tools and web search to inspect files, dependency contracts, upstream docs, current behavior, and security implications. + - Shell commands, if available, must be read-only inspection commands. Do not run tests, formatters, package installs, generators, network mutation commands, git mutation commands, or commands that write files. + - Report only actionable defects introduced or exposed by this change. + - Prefer high-signal findings over style feedback. + - Include security findings: injection, secret leaks, authz/authn bypass, path traversal, unsafe deserialization, unsafe filesystem or shell use, privacy leaks, and credential handling. + - Do not reject legitimate functionality merely because it touches shell, filesystem, network, auth, or sensitive data. Report a security finding only when the patch creates a concrete exploitable risk, removes an important safety check, or lacks validation at a trust boundary. + - For each finding, use the smallest file/line location that demonstrates the issue. + - If there are no actionable findings, return an empty findings array and mark the patch correct. + + Review target: {target_line} + Repository: {repo} + + {extra_prompt} + + {datasets} + + # Change Bundle + {bundle} + """ + ).strip() + + +def write_json_temp(data: dict[str, Any]) -> Path: + handle = tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) + with handle: + json.dump(data, handle) + return Path(handle.name) + + +def run_codex(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if not args.tools: + raise SystemExit("--no-tools is not supported by the Codex engine; use --engine claude --no-tools for a no-tools run") + schema_path = write_json_temp(SCHEMA) + output_path = Path(tempfile.NamedTemporaryFile("w", suffix=".json", delete=False).name) + cmd = [args.codex_bin, "--ask-for-approval", "never"] + if args.web_search: + cmd.append("--search") + if args.model: + cmd.extend(["--model", args.model]) + if args.thinking: + cmd.extend(["-c", f'model_reasoning_effort="{args.thinking}"']) + cmd.append("exec") + if args.stream_engine_output: + cmd.append("--json") + cmd.extend( + [ + "--ephemeral", + "-C", + str(repo), + "-s", + "read-only", + "--output-schema", + str(schema_path), + "--output-last-message", + str(output_path), + "-", + ] + ) + result = run_with_heartbeat( + cmd, + repo, + input_text=prompt, + label="codex", + stream_output=args.stream_engine_output, + stream_display=CodexStreamDisplay() if args.stream_engine_output else None, + ) + try: + output = output_path.read_text() + finally: + schema_path.unlink(missing_ok=True) + output_path.unlink(missing_ok=True) + if result.returncode != 0: + raise SystemExit(f"codex engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return output or result.stdout + + +def run_claude(args: argparse.Namespace, repo: Path, prompt: str) -> str: + cmd = [ + args.claude_bin, + "--print", + "--no-session-persistence", + "--output-format", + "stream-json" if args.stream_engine_output else "json", + "--json-schema", + json.dumps(SCHEMA), + ] + if args.tools: + cmd.extend(["--allowedTools", claude_allowed_tools(args)]) + else: + cmd.extend(["--tools", ""]) + if args.stream_engine_output: + cmd.append("--verbose") + if args.model: + cmd.extend(["--model", args.model]) + if args.thinking: + cmd.extend(["--effort", args.thinking]) + result = run_with_heartbeat( + cmd, + repo, + input_text=prompt, + label="claude", + stream_output=args.stream_engine_output, + stream_display=ClaudeStreamDisplay() if args.stream_engine_output else None, + ) + if result.returncode != 0: + raise SystemExit(f"claude engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +def run_droid(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.thinking: + raise SystemExit("--thinking is not supported by the droid engine") + prompt_path = Path(tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False).name) + prompt_path.write_text(prompt) + cmd = [ + args.droid_bin, + "exec", + "--cwd", + str(repo), + "--output-format", + "json", + "-f", + str(prompt_path), + ] + if args.model: + cmd.extend(["--model", args.model]) + if not args.tools: + cmd.extend(["--disabled-tools", "*"]) + result = run_with_heartbeat(cmd, repo, label="droid", stream_output=args.stream_engine_output) + prompt_path.unlink(missing_ok=True) + if result.returncode != 0: + raise SystemExit(f"droid engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +def run_copilot(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.thinking: + raise SystemExit("--thinking is not supported by the copilot engine") + if not args.tools: + raise SystemExit("--no-tools is not supported by the copilot engine; copilot requires a read-only file view tool to load the review bundle without exposing it in argv") + with tempfile.TemporaryDirectory(prefix="autoreview-copilot.") as tempdir: + prompt_path = Path(tempdir) / "prompt.txt" + prompt_path.write_text(prompt) + os.chmod(prompt_path, 0o600) + cmd = [ + args.copilot_bin, + "-C", + tempdir, + "-p", + "Read ./prompt.txt and follow it exactly. Return only the requested JSON object.", + "--output-format", + "json", + "--stream", + "on" if args.stream_engine_output else "off", + "--no-ask-user", + "--disable-builtin-mcps", + ] + if args.model: + cmd.extend(["--model", args.model]) + cmd.extend( + [ + "--available-tools=read_agent,rg,view,web_fetch", + "--allow-tool=read_agent", + "--allow-tool=rg", + "--allow-tool=view", + "--allow-tool=web_fetch", + ] + ) + if args.web_search: + cmd.append("--allow-all-urls") + result = run_with_heartbeat(cmd, Path(tempdir), label="copilot", stream_output=args.stream_engine_output) + if result.returncode != 0: + raise SystemExit(f"copilot engine failed ({result.returncode})\n{result.stderr or result.stdout}") + return result.stdout + + +class CodexStreamDisplay: + def __init__(self, *, activity_seconds: int = 20) -> None: + self.activity_seconds = activity_seconds + self.hidden_events = 0 + self.last_visible = time.monotonic() + + def __call__(self, name: str, line: str) -> str | None: + if name != "stdout": + return line + try: + event = json.loads(line) + except json.JSONDecodeError: + return self.visible(line) + event_type = event.get("type") + if event_type == "thread.started": + return self.visible(f"codex thread: {event.get('thread_id', '')}\n") + if event_type == "turn.started": + return self.visible("codex turn started\n") + if event_type == "turn.completed": + usage = event.get("usage") + message = format_codex_usage(usage) + "\n" if isinstance(usage, dict) else "codex turn completed\n" + return self.visible(self.flush_hidden() + message) + item = event.get("item") + if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str): + return self.visible(self.flush_hidden() + item["text"].rstrip() + "\n") + return self.hidden_activity() + + def hidden_activity(self) -> str | None: + self.hidden_events += 1 + if time.monotonic() - self.last_visible < self.activity_seconds: + return None + return self.visible(self.flush_hidden()) + + def flush_hidden(self) -> str: + if not self.hidden_events: + return "" + count = self.hidden_events + self.hidden_events = 0 + return f"codex activity: {count} hidden tool/status events\n" + + def visible(self, text: str) -> str: + self.last_visible = time.monotonic() + return text + + +class ClaudeStreamDisplay: + def __init__(self, *, activity_seconds: int = 20) -> None: + self.activity_seconds = activity_seconds + self.hidden_events = 0 + self.last_visible = time.monotonic() + self.started = False + + def __call__(self, name: str, line: str) -> str | None: + if name != "stdout": + return line + try: + event = json.loads(line) + except json.JSONDecodeError: + return self.visible(line) + event_type = event.get("type") + if event_type == "system" and not self.started: + self.started = True + return self.visible("claude turn started\n") + if event_type == "assistant": + return self.assistant_message(event) + if event_type == "result": + return self.visible(self.flush_hidden() + self.result_summary(event)) + return self.hidden_activity() + + def assistant_message(self, event: dict[str, Any]) -> str | None: + message = event.get("message") + if not isinstance(message, dict): + return self.hidden_activity() + chunks: list[str] = [] + for item in message.get("content", []): + if not isinstance(item, dict): + continue + if item.get("type") == "text" and isinstance(item.get("text"), str): + chunks.append(item["text"].rstrip()) + if chunks: + return self.visible(self.flush_hidden() + "\n".join(chunks) + "\n") + return self.hidden_activity() + + def result_summary(self, event: dict[str, Any]) -> str: + usage = event.get("usage") + fields: list[str] = [] + if isinstance(usage, dict): + for key in ( + "input_tokens", + "cache_read_input_tokens", + "cache_creation_input_tokens", + "output_tokens", + ): + value = usage.get(key) + if isinstance(value, int): + fields.append(f"{key}={value}") + cost = event.get("total_cost_usd") + if isinstance(cost, (int, float)) and not isinstance(cost, bool): + fields.append(f"cost_usd={cost:.6f}") + return "claude usage: " + " ".join(fields) + "\n" if fields else "claude turn completed\n" + + def hidden_activity(self) -> str | None: + self.hidden_events += 1 + if time.monotonic() - self.last_visible < self.activity_seconds: + return None + return self.visible(self.flush_hidden()) + + def flush_hidden(self) -> str: + if not self.hidden_events: + return "" + count = self.hidden_events + self.hidden_events = 0 + return f"claude activity: {count} hidden tool/status events\n" + + def visible(self, text: str) -> str: + self.last_visible = time.monotonic() + return text + + +def format_codex_usage(usage: dict[str, Any]) -> str: + fields = [ + "input_tokens", + "cached_input_tokens", + "output_tokens", + "reasoning_output_tokens", + ] + parts = [f"{field}={usage[field]}" for field in fields if isinstance(usage.get(field), int)] + return "codex usage: " + " ".join(parts) if parts else "codex usage: unavailable" + + +def claude_allowed_tools(args: argparse.Namespace) -> str: + tools = [tool.strip() for tool in args.claude_allowed_tools.split(",") if tool.strip()] + if not args.web_search: + tools = [tool for tool in tools if tool not in {"WebSearch", "WebFetch"}] + return ",".join(tools) + + +def extract_json(text: str) -> dict[str, Any]: + stripped = text.strip() + if not stripped: + raise SystemExit("review engine returned empty output") + try: + parsed = json.loads(stripped) + except json.JSONDecodeError as exc: + fenced_report = parse_json_candidate(stripped) + if isinstance(fenced_report, dict) and "findings" in fenced_report: + return fenced_report + jsonl_report = extract_json_from_jsonl(stripped) + if jsonl_report: + return jsonl_report + raise SystemExit(f"review engine returned non-JSON output: {exc}\n{stripped[:2000]}") + if isinstance(parsed, dict) and "findings" in parsed: + return parsed + if isinstance(parsed, dict) and isinstance(parsed.get("structured_output"), dict): + return parsed["structured_output"] + if isinstance(parsed, dict) and isinstance(parsed.get("result"), str): + result_json = parse_json_candidate(parsed["result"]) + if isinstance(result_json, dict) and "findings" in result_json: + return result_json + raise SystemExit(f"review engine result was not structured JSON:\n{parsed['result'][:2000]}") + jsonl_report = extract_json_from_jsonl(stripped) + if jsonl_report: + return jsonl_report + raise SystemExit(f"review engine returned unexpected JSON shape:\n{json.dumps(parsed)[:2000]}") + + +def extract_json_from_jsonl(text: str) -> dict[str, Any] | None: + candidates: list[str | dict[str, Any]] = [] + for line in text.splitlines(): + line = line.strip() + if not line: + continue + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + if not isinstance(event, dict): + continue + part = event.get("part") + if isinstance(part, dict) and isinstance(part.get("text"), str): + candidates.append(part["text"]) + data = event.get("data") + if isinstance(data, dict) and isinstance(data.get("content"), str): + candidates.append(data["content"]) + if isinstance(event.get("result"), str): + candidates.append(event["result"]) + if isinstance(event.get("structured_output"), dict): + candidates.append(event["structured_output"]) + for candidate in reversed(candidates): + if isinstance(candidate, dict): + if "findings" in candidate: + return candidate + continue + parsed = parse_json_candidate(candidate) + if isinstance(parsed, dict) and "findings" in parsed: + return parsed + return None + + +def parse_json_candidate(text: str) -> Any | None: + stripped = text.strip() + if stripped.startswith("```"): + lines = stripped.splitlines() + if lines and lines[0].startswith("```") and lines[-1].strip() == "```": + stripped = "\n".join(lines[1:-1]).strip() + try: + parsed = json.loads(stripped) + except json.JSONDecodeError: + return None + if isinstance(parsed, str) and parsed != text: + nested = parse_json_candidate(parsed) + return nested if nested is not None else parsed + return parsed + + +def validate_report(report: dict[str, Any], repo: Path, changed_paths: set[str], required: list[str]) -> None: + allowed_top = {"findings", "overall_correctness", "overall_explanation", "overall_confidence"} + extra_top = set(report) - allowed_top + if extra_top: + raise SystemExit(f"review JSON has unexpected top-level keys: {sorted(extra_top)}") + for key in SCHEMA["required"]: + if key not in report: + raise SystemExit(f"review JSON missing required key: {key}") + if not isinstance(report["findings"], list): + raise SystemExit("review JSON findings must be an array") + if report.get("overall_correctness") not in {"patch is correct", "patch is incorrect"}: + raise SystemExit(f"review JSON has invalid overall_correctness: {report.get('overall_correctness')}") + if not isinstance(report.get("overall_explanation"), str) or not report["overall_explanation"]: + raise SystemExit("review JSON overall_explanation must be a non-empty string") + if len(report["overall_explanation"]) > 3000: + raise SystemExit("review JSON overall_explanation is too long") + if not number_in_range(report.get("overall_confidence")): + raise SystemExit("review JSON overall_confidence must be numeric") + finding_text = "" + kept_findings: list[dict[str, Any]] = [] + ignored_findings: list[tuple[int, dict[str, Any], str, int]] = [] + for index, finding in enumerate(report["findings"]): + if not isinstance(finding, dict): + raise SystemExit(f"finding {index} must be an object") + allowed_finding = {"title", "body", "priority", "confidence", "category", "code_location"} + extra_finding = set(finding) - allowed_finding + if extra_finding: + raise SystemExit(f"finding {index} has unexpected keys: {sorted(extra_finding)}") + for key in allowed_finding: + if key not in finding: + raise SystemExit(f"finding {index} missing required key: {key}") + title = finding.get("title") + if not isinstance(title, str) or not title or len(title) > 140: + raise SystemExit(f"finding {index} has invalid title") + body = finding.get("body") + if not isinstance(body, str) or not body or len(body) > 2000: + raise SystemExit(f"finding {index} has invalid body") + priority = finding.get("priority") + if priority not in {"P0", "P1", "P2", "P3"}: + raise SystemExit(f"finding {index} has invalid priority: {priority}") + if not number_in_range(finding.get("confidence")): + raise SystemExit(f"finding {index} has invalid confidence") + category = finding.get("category") + if category not in {"bug", "security", "regression", "test_gap", "maintainability"}: + raise SystemExit(f"finding {index} has invalid category: {category}") + location = finding.get("code_location") + if not isinstance(location, dict): + raise SystemExit(f"finding {index} missing code_location") + rel = str(location.get("file_path", "")).strip() + line = location.get("line") + if not rel or not isinstance(line, int) or line < 1: + raise SystemExit(f"finding {index} has invalid location: {location}") + if Path(rel).is_absolute() or ".." in Path(rel).parts: + raise SystemExit(f"finding {index} uses invalid file path: {rel}") + if rel not in changed_paths: + ignored_findings.append((index, finding, rel, line)) + continue + kept_findings.append(finding) + finding_text += "\n" + json.dumps(finding, sort_keys=True) + if ignored_findings: + for index, finding, rel, line in ignored_findings: + title = finding.get("title", "") + print( + f"autoreview ignored out-of-scope finding {index}: {title} ({rel}:{line})", + file=sys.stderr, + ) + print(bounded_field(str(finding.get("body", "")), 500), file=sys.stderr) + report["findings"] = kept_findings + if not kept_findings and report["overall_correctness"] == "patch is incorrect": + note = f"Ignored {len(ignored_findings)} out-of-scope finding(s) outside the reviewed change." + explanation = report["overall_explanation"].rstrip() + report["overall_correctness"] = "patch is correct" + report["overall_explanation"] = bounded_field(f"{explanation}\n\n{note}", 3000) + haystack = finding_text.lower() + for needle in required: + if needle.lower() not in haystack: + raise SystemExit(f"required finding text not found: {needle}") + + +def number_in_range(value: Any) -> bool: + return isinstance(value, (int, float)) and not isinstance(value, bool) and 0 <= value <= 1 + + +def print_report(report: dict[str, Any], *, label: str = "autoreview") -> None: + findings = report["findings"] + if findings: + print(f"{label} findings: {len(findings)}") + elif report["overall_correctness"] == "patch is incorrect": + print(f"{label} verdict: patch is incorrect without discrete findings") + else: + print(f"{label} clean: no accepted/actionable findings reported") + for finding in findings: + loc = finding["code_location"] + print(f"[{finding['priority']}] {finding['title']}") + print(f"{loc['file_path']}:{loc['line']}") + print(f"{finding['body']}") + print() + print(f"overall: {report['overall_correctness']} ({report['overall_confidence']})") + print(report["overall_explanation"]) + + +def start_parallel_tests(command: str, repo: Path) -> tuple[subprocess.Popen, float]: + print(f"tests: {command}") + return subprocess.Popen(command, cwd=repo, shell=True), time.time() + + +def finish_parallel_tests(proc: subprocess.Popen, started: float) -> int: + proc.wait() + print(f"tests exit: {proc.returncode} after {int(time.time() - started)}s") + return int(proc.returncode or 0) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Bundle-driven AI code review.") + parser.add_argument("--mode", choices=["auto", "local", "branch", "commit"], default="auto") + parser.add_argument("--base") + parser.add_argument("--commit", default="HEAD") + parser.add_argument("--engine", choices=ENGINES, default=os.environ.get("AUTOREVIEW_ENGINE", "codex")) + parser.add_argument("--reviewers", help="Comma-separated review panel, e.g. codex,claude or codex:gpt-5:high.") + parser.add_argument("--panel", action="store_true", help="Run a Codex/Claude review panel unless --engine changes the first reviewer.") + parser.add_argument("--model", action="append", help="Model for all reviewers or engine=model. Repeatable.") + parser.add_argument("--thinking", action="append", help="Thinking/effort for all reviewers or engine=level. Repeatable. Codex: low, medium, high, xhigh. Claude: low, medium, high, xhigh, max.") + parser.add_argument("--allow-partial-panel", action="store_true", help="Continue panel output when one reviewer fails.") + parser.add_argument("--codex-bin", default=os.environ.get("CODEX_BIN", "codex")) + parser.add_argument("--claude-bin", default=os.environ.get("CLAUDE_BIN", "claude")) + parser.add_argument("--droid-bin", default=os.environ.get("DROID_BIN", "droid")) + parser.add_argument("--copilot-bin", default=os.environ.get("COPILOT_BIN", "copilot")) + parser.add_argument("--no-tools", dest="tools", action="store_false", default=True, help="Disable tools for engines that support it. Codex and copilot reject no-tools review.") + parser.add_argument("--no-web-search", dest="web_search", action="store_false", default=True) + parser.add_argument( + "--claude-allowed-tools", + default=os.environ.get( + "AUTOREVIEW_CLAUDE_TOOLS", + "Read,Grep,Glob,WebSearch,WebFetch", + ), + ) + parser.add_argument("--prompt", action="append", help="Additional review instruction text.") + parser.add_argument("--prompt-file", action="append", help="Additional review instruction file.") + parser.add_argument("--dataset", action="append", help="Extra evidence file to include in the review bundle.") + parser.add_argument("--output", help="Write human output to a file as well as stdout.") + parser.add_argument("--json-output", help="Write validated structured review JSON.") + parser.add_argument( + "--stream-engine-output", + action="store_true", + default=os.environ.get("AUTOREVIEW_STREAM_ENGINE_OUTPUT") == "1", + help="Stream review engine output while preserving buffered output for validation. Codex output is filtered to hide tool/file chatter.", + ) + parser.add_argument("--parallel-tests", help="Run a test command concurrently with review; failure fails the helper.") + parser.add_argument("--require-finding", action="append", default=[], help="Require finding text to contain this substring.") + parser.add_argument("--expect-findings", action="store_true", help="Treat findings as success; for harness acceptance tests.") + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args() + if args.engine not in ENGINES: + raise SystemExit(f"invalid --engine/AUTOREVIEW_ENGINE: {args.engine}") + return args + + +def run_engine(args: argparse.Namespace, repo: Path, prompt: str) -> str: + if args.engine == "codex": + return run_codex(args, repo, prompt) + if args.engine == "claude": + return run_claude(args, repo, prompt) + if args.engine == "droid": + return run_droid(args, repo, prompt) + if args.engine == "copilot": + return run_copilot(args, repo, prompt) + raise SystemExit(f"unsupported engine: {args.engine}") + + +def parse_keyed_options(values: list[str] | None, option: str) -> tuple[str | None, dict[str, str]]: + global_value: str | None = None + per_engine: dict[str, str] = {} + for raw in values or []: + value = raw.strip() + if not value: + raise SystemExit(f"--{option} cannot be empty") + if "=" in value: + engine, engine_value = value.split("=", 1) + engine = engine.strip() + engine_value = engine_value.strip() + if engine not in ENGINES: + raise SystemExit(f"--{option} uses unknown engine: {engine}") + if not engine_value: + raise SystemExit(f"--{option} for {engine} cannot be empty") + if engine in per_engine: + raise SystemExit(f"--{option} specified more than once for {engine}") + per_engine[engine] = engine_value + else: + if global_value is not None: + raise SystemExit(f"--{option} global value specified more than once") + global_value = value + return global_value, per_engine + + +def parse_reviewer_token(token: str) -> tuple[str, str | None, str | None]: + parts = [part.strip() for part in token.split(":")] + if len(parts) > 3 or not parts[0]: + raise SystemExit(f"invalid reviewer spec: {token}") + engine = parts[0] + if engine not in ENGINES: + raise SystemExit(f"unknown reviewer engine: {engine}") + model = parts[1] if len(parts) >= 2 and parts[1] else None + thinking = parts[2] if len(parts) == 3 and parts[2] else None + return engine, model, thinking + + +def reviewer_args(args: argparse.Namespace) -> list[argparse.Namespace]: + global_model, model_by_engine = parse_keyed_options(args.model, "model") + global_thinking, thinking_by_engine = parse_keyed_options(args.thinking, "thinking") + reviewers: list[tuple[str, str | None, str | None]] = [] + if args.reviewers: + tokens = [token.strip() for token in args.reviewers.split(",") if token.strip()] + if len(tokens) == 1 and tokens[0] == "all": + tokens = list(ENGINES) + reviewers = [parse_reviewer_token(token) for token in tokens] + elif args.panel: + engines = [args.engine] + for engine in ("codex", "claude"): + if engine not in engines: + engines.append(engine) + reviewers = [(engine, None, None) for engine in engines] + else: + reviewers = [(args.engine, None, None)] + + seen: set[str] = set() + result: list[argparse.Namespace] = [] + for engine, inline_model, inline_thinking in reviewers: + if engine in seen: + raise SystemExit(f"reviewer specified more than once: {engine}") + seen.add(engine) + model = inline_model or model_by_engine.get(engine) or global_model + thinking = inline_thinking or thinking_by_engine.get(engine) or global_thinking + if thinking and thinking not in THINKING_LEVELS_BY_ENGINE[engine]: + valid = ", ".join(sorted(THINKING_LEVELS_BY_ENGINE[engine])) or "none" + raise SystemExit(f"invalid thinking level for {engine}: {thinking} (valid: {valid})") + clone = copy.copy(args) + clone.engine = engine + clone.model = model + clone.thinking = thinking + result.append(clone) + return result + + +def reviewer_label(args: argparse.Namespace) -> str: + parts = [args.engine] + if args.model: + parts.append(f"model={args.model}") + if args.thinking: + parts.append(f"thinking={args.thinking}") + return " ".join(parts) + + +def run_reviewer(args: argparse.Namespace, repo: Path, prompt: str, changed_paths: set[str], required: list[str]) -> dict[str, Any]: + raw = run_engine(args, repo, prompt) + report = extract_json(raw) + validate_report(report, repo, changed_paths, required) + return report + + +def merge_panel_reports(reports: list[tuple[str, dict[str, Any]]]) -> dict[str, Any]: + findings: list[dict[str, Any]] = [] + seen: set[tuple[str, int, str, str]] = set() + for label, report in reports: + for finding in report["findings"]: + location = finding["code_location"] + key = ( + location["file_path"], + location["line"], + finding["category"], + " ".join(finding["title"].lower().split()), + ) + if key in seen: + continue + seen.add(key) + merged = copy.deepcopy(finding) + merged["body"] = bounded_field(f"Reviewer: {label}\n\n{merged['body']}", 2000) + findings.append(merged) + incorrect = bool(findings) or any(report["overall_correctness"] == "patch is incorrect" for _, report in reports) + summary = ", ".join(f"{label}: {len(report['findings'])} finding(s)" for label, report in reports) + return { + "findings": findings, + "overall_correctness": "patch is incorrect" if incorrect else "patch is correct", + "overall_explanation": f"Panel review complete. {summary}.", + "overall_confidence": max((report["overall_confidence"] for _, report in reports), default=0.5), + } + + +def run_panel(args: argparse.Namespace, reviewers: list[argparse.Namespace], repo: Path, prompt: str, changed_paths: set[str]) -> dict[str, Any]: + reports: list[tuple[str, dict[str, Any]]] = [] + failures: list[str] = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=len(reviewers)) as executor: + future_by_label = { + executor.submit(run_reviewer, reviewer, repo, prompt, changed_paths, []): reviewer_label(reviewer) + for reviewer in reviewers + } + for future in concurrent.futures.as_completed(future_by_label): + label = future_by_label[future] + try: + reports.append((label, future.result())) + except SystemExit as exc: + failures.append(f"{label}: {exc}") + except Exception as exc: + failures.append(f"{label}: {exc}") + if failures and not args.allow_partial_panel: + raise SystemExit("autoreview panel failed\n" + "\n".join(failures)) + if failures: + for failure in failures: + print(f"panel reviewer failed: {failure}") + if not reports: + raise SystemExit("autoreview panel produced no reports") + reports.sort(key=lambda item: item[0]) + report = merge_panel_reports(reports) + validate_report(report, repo, changed_paths, args.require_finding) + return report + + +def main() -> int: + args = parse_args() + reviewers = reviewer_args(args) + repo = repo_root() + target, target_ref = choose_target(repo, args.mode, args.base) + print(f"autoreview target: {target}") + print(f"branch: {current_branch(repo)}") + if len(reviewers) == 1 and not args.reviewers and not args.panel: + print(f"engine: {reviewers[0].engine}") + if reviewers[0].model: + print(f"model: {reviewers[0].model}") + if reviewers[0].thinking: + print(f"thinking: {reviewers[0].thinking}") + else: + print(f"reviewers: {', '.join(reviewer_label(reviewer) for reviewer in reviewers)}") + print(f"tools: {'on' if args.tools else 'off'}") + print(f"web_search: {'on' if args.web_search else 'off'}") + display_ref = args.commit if target == "commit" else target_ref + if display_ref: + print(f"ref: {display_ref}") + if args.dry_run: + return 0 + + if target == "local": + bundle = local_bundle(repo) + elif target == "branch": + assert target_ref + target_ref = require_local_commitish(repo, target_ref) + bundle = branch_bundle(repo, target_ref) + else: + bundle = commit_bundle(repo, args.commit) + target_ref = args.commit + prompt = build_prompt(repo, target, target_ref, bundle, load_extra_prompt(args), load_datasets(args)) + changed_paths = review_paths(repo, target, target_ref, args.commit) + print(f"bundle: {len(prompt)} chars") + + tests_proc: tuple[subprocess.Popen, float] | None = None + if args.parallel_tests: + tests_proc = start_parallel_tests(args.parallel_tests, repo) + try: + if len(reviewers) == 1: + report = run_reviewer(reviewers[0], repo, prompt, changed_paths, args.require_finding) + label = "autoreview" + else: + report = run_panel(args, reviewers, repo, prompt, changed_paths) + label = "autoreview panel" + if args.json_output: + Path(args.json_output).write_text(json.dumps(report, indent=2) + "\n") + + if args.output: + original_stdout = sys.stdout + with Path(args.output).open("w") as handle: + sys.stdout = Tee(original_stdout, handle) + print_report(report, label=label) + sys.stdout = original_stdout + else: + print_report(report, label=label) + finally: + tests_status = finish_parallel_tests(*tests_proc) if tests_proc else 0 + + has_findings = bool(report["findings"]) + overall_incorrect = report["overall_correctness"] == "patch is incorrect" + if tests_status != 0: + return 1 + if args.expect_findings: + return 0 if has_findings else 1 + return 1 if has_findings or overall_incorrect else 0 + + +class Tee: + def __init__(self, *streams: Any) -> None: + self.streams = streams + + def write(self, data: str) -> None: + for stream in self.streams: + stream.write(data) + + def flush(self) -> None: + for stream in self.streams: + stream.flush() + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.agents/skills/autoreview/scripts/test-review-harness b/.agents/skills/autoreview/scripts/test-review-harness new file mode 100755 index 000000000..58105bc55 --- /dev/null +++ b/.agents/skills/autoreview/scripts/test-review-harness @@ -0,0 +1,176 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: test-review-harness [--fixture malicious|benign] [--engine codex|claude|droid|copilot]... + +Creates a temporary git repo with either a deliberately unsafe patch or a +security-sensitive-but-safe patch, then verifies each selected engine through +autoreview. +Default engines: codex, claude. +EOF +} + +engines=() +fixture=malicious +while [[ $# -gt 0 ]]; do + case "$1" in + --fixture) + fixture=${2:-} + shift 2 + ;; + --engine) + engines+=("${2:-}") + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + usage >&2 + exit 2 + ;; + esac +done + +case "$fixture" in + malicious|benign) ;; + *) + usage >&2 + exit 2 + ;; +esac + +if [[ ${#engines[@]} -eq 0 ]]; then + engines=(codex claude) +fi + +script_dir=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +repo=$(mktemp -d "${TMPDIR:-/tmp}/autoreview-fixture.XXXXXX") +trap 'rm -rf "$repo"' EXIT + +cd "$repo" +git init --quiet +git config user.name "Review Fixture" +git config user.email "review-fixture@example.com" + +if [[ "$fixture" == malicious ]]; then + cat > app.js <<'EOF' +export function uploadPath(name) { + return `uploads/${name.replaceAll("/", "")}`; +} + +export function publicUser(user) { + return { id: user.id, name: user.name }; +} +EOF +else + cat > app.js <<'EOF' +import path from "node:path"; + +const uploadsRoot = path.resolve("uploads"); + +function safeChildPath(root, name) { + if (name.includes("/") || name.includes("\\") || name.includes("\0")) { + throw new Error("invalid child name"); + } + const resolved = path.resolve(root, name); + if (!resolved.startsWith(root + path.sep)) throw new Error("path escaped root"); + return resolved; +} + +export function uploadPath(name) { + return safeChildPath(uploadsRoot, name); +} + +export function publicUser(user) { + return { id: user.id, name: user.name }; +} +EOF +fi + +git add app.js +git commit --quiet -m "initial safe version" + +if [[ "$fixture" == malicious ]]; then + cat > app.js <<'EOF' +import { execSync } from "node:child_process"; + +export function uploadPath(name) { + return `uploads/${name}`; +} + +export function deleteUpload(name) { + return execSync(`rm -rf uploads/${name}`); +} + +export function publicUser(user) { + return { id: user.id, name: user.name, password: user.password }; +} +EOF +else + cat > app.js <<'EOF' +import { execFile } from "node:child_process"; +import path from "node:path"; +import { promisify } from "node:util"; + +const uploadsRoot = path.resolve("uploads"); +const reposRoot = path.resolve("repos"); +const execFileAsync = promisify(execFile); + +function safeChildPath(root, name) { + if (name.includes("/") || name.includes("\\") || name.includes("\0")) { + throw new Error("invalid child name"); + } + const resolved = path.resolve(root, name); + if (!resolved.startsWith(root + path.sep)) throw new Error("path escaped root"); + return resolved; +} + +function repoChildPath(root, name) { + if (!/^[A-Za-z0-9._-]+$/.test(name)) throw new Error("invalid repo name"); + return safeChildPath(root, name); +} + +export function uploadPath(name) { + return safeChildPath(uploadsRoot, name); +} + +export async function repoStatus(repoName) { + const { stdout } = await execFileAsync("git", ["status", "--short"], { + cwd: repoChildPath(reposRoot, repoName), + encoding: "utf8", + maxBuffer: 16 * 1024 * 1024, + }); + return stdout; +} + +export function publicUser(user) { + return { id: user.id, name: user.name }; +} + +export function accountSettingsForOwner(user, requesterId) { + if (String(requesterId) !== String(user.id)) throw new Error("forbidden"); + return { id: user.id, hasPassword: Boolean(user.passwordHash) }; +} +EOF +fi + +for engine in "${engines[@]}"; do + echo "== $engine ==" + if [[ "$fixture" == malicious ]]; then + "$script_dir/autoreview" \ + --mode local \ + --engine "$engine" \ + --prompt "This is an acceptance test fixture. The changed app.js patch contains real security bugs. Review normally and report only actionable defects from the patch." \ + --require-finding "command" \ + --expect-findings + else + "$script_dir/autoreview" \ + --mode local \ + --engine "$engine" \ + --prompt "Security calibration fixture: this patch intentionally uses filesystem paths, async execFile, and owner-gated password-adjacent state safely. Do not flag legitimate shell/filesystem/auth-adjacent functionality unless there is a concrete exploitable risk in the diff." + fi +done diff --git a/.compozy/tasks/tokens-perf/analysis/collect_token_baseline.py b/.compozy/tasks/tokens-perf/analysis/collect_token_baseline.py new file mode 100644 index 000000000..9c21c59e1 --- /dev/null +++ b/.compozy/tasks/tokens-perf/analysis/collect_token_baseline.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +"""Read-only token baseline collector for the tokens-perf incident. + +The script intentionally reads only: +- Claude Code JSONL transcripts from one project directory. +- AGH SQLite/crash-bundle observability files. + +It does not inspect provider auth/config/secret paths. +""" + +from __future__ import annotations + +import argparse +import json +import sqlite3 +from collections import Counter, defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + + +TOKEN_FIELDS = ( + "input_tokens", + "cache_creation_input_tokens", + "cache_read_input_tokens", + "output_tokens", +) + + +def parse_time(value: str | None) -> datetime | None: + if not value: + return None + text = value.strip() + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + parsed = datetime.fromisoformat(text) + except ValueError: + return None + if parsed.tzinfo is None: + return parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +def iso_utc(value: datetime | None) -> str | None: + if value is None: + return None + return value.astimezone(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") + + +def minute_bucket(value: datetime | None) -> str: + if value is None: + return "unknown" + rounded = value.astimezone(timezone.utc).replace(second=0, microsecond=0) + return iso_utc(rounded) or "unknown" + + +def text_from_content(value: Any, max_chars: int = 80_000) -> str: + parts: list[str] = [] + + def walk(node: Any) -> None: + if sum(len(part) for part in parts) >= max_chars: + return + if isinstance(node, str): + parts.append(node[:max_chars]) + return + if isinstance(node, list): + for item in node: + walk(item) + return + if isinstance(node, dict): + for key in ("text", "content", "input"): + if key in node: + walk(node[key]) + return + + walk(value) + text = "\n".join(parts) + if len(text) > max_chars: + return text[:max_chars] + return text + + +@dataclass +class TranscriptStats: + path: Path + first_timestamp: datetime | None = None + first_user_text: str = "" + raw_assistant_usage_rows: int = 0 + deduped_assistant_calls: int = 0 + usage: Counter[str] = field(default_factory=Counter) + + def classify(self) -> str: + lowered = self.first_user_text.lower() + if "extractor candidate prompt v1" in lowered and "what_not_to_save v1" in lowered: + return "memory_extractor" + if " dict[str, Any]: + seen: set[tuple[str, str, str]] = set() + totals: Counter[str] = Counter() + by_class: dict[str, Counter[str]] = defaultdict(Counter) + session_starts_by_class: Counter[str] = Counter() + calls_by_minute: Counter[str] = Counter() + cache_read_by_minute: Counter[str] = Counter() + top_files: list[dict[str, Any]] = [] + transcript_count = 0 + raw_rows = 0 + deduped_calls = 0 + + for path in sorted(project_dir.glob("*.jsonl")): + transcript_count += 1 + stats = TranscriptStats(path=path) + + with path.open("r", encoding="utf-8", errors="replace") as handle: + for line_no, line in enumerate(handle, 1): + try: + row = json.loads(line) + except json.JSONDecodeError: + continue + + timestamp = parse_time(row.get("timestamp")) + if timestamp and (stats.first_timestamp is None or timestamp < stats.first_timestamp): + stats.first_timestamp = timestamp + + row_type = row.get("type") + message = row.get("message") if isinstance(row.get("message"), dict) else {} + + if row_type == "user" and not stats.first_user_text: + content = message.get("content", row.get("content")) + stats.first_user_text = text_from_content(content) + + if row_type != "assistant": + continue + + usage = message.get("usage") if isinstance(message.get("usage"), dict) else None + if not usage: + continue + if not in_window(timestamp, start, end): + continue + + stats.raw_assistant_usage_rows += 1 + raw_rows += 1 + + request_id = str(row.get("requestId") or row.get("request_id") or "") + message_id = str(message.get("id") or row.get("uuid") or f"line-{line_no}") + dedupe_key = (path.name, request_id, message_id) + if dedupe_key in seen: + continue + seen.add(dedupe_key) + + stats.deduped_assistant_calls += 1 + deduped_calls += 1 + + for field_name in TOKEN_FIELDS: + value = usage.get(field_name) or 0 + if isinstance(value, (int, float)): + stats.usage[field_name] += int(value) + totals[field_name] += int(value) + + bucket = minute_bucket(timestamp) + calls_by_minute[bucket] += 1 + cache_read_by_minute[bucket] += int(usage.get("cache_read_input_tokens") or 0) + + cls = stats.classify() + if stats.first_timestamp and in_window(stats.first_timestamp, start, end): + session_starts_by_class[cls] += 1 + if stats.raw_assistant_usage_rows > 0: + by_class[cls]["transcripts"] += 1 + by_class[cls]["raw_assistant_usage_rows"] += stats.raw_assistant_usage_rows + by_class[cls]["deduped_assistant_calls"] += stats.deduped_assistant_calls + for field_name in TOKEN_FIELDS: + by_class[cls][field_name] += stats.usage[field_name] + + top_files.append( + { + "file": path.name, + "class": cls, + "first_timestamp": iso_utc(stats.first_timestamp), + "deduped_assistant_calls": stats.deduped_assistant_calls, + "cache_creation_input_tokens": stats.usage["cache_creation_input_tokens"], + "cache_read_input_tokens": stats.usage["cache_read_input_tokens"], + "output_tokens": stats.usage["output_tokens"], + } + ) + + context_total = ( + totals["input_tokens"] + + totals["cache_creation_input_tokens"] + + totals["cache_read_input_tokens"] + ) + top_files.sort(key=lambda row: int(row["cache_read_input_tokens"]), reverse=True) + + return { + "project_dir": str(project_dir), + "transcripts": transcript_count, + "raw_assistant_usage_rows": raw_rows, + "deduped_assistant_calls": deduped_calls, + "usage": dict(totals), + "context_token_movement": context_total, + "by_class": {key: dict(value) for key, value in sorted(by_class.items())}, + "session_starts_in_window_by_class": dict(session_starts_by_class), + "top_files_by_cache_read": top_files[:15], + "top_minutes_by_calls": calls_by_minute.most_common(10), + "top_minutes_by_cache_read": cache_read_by_minute.most_common(10), + } + + +def in_window(value: datetime | None, start: datetime | None, end: datetime | None) -> bool: + if value is None: + return False + if start and value < start: + return False + if end and value > end: + return False + return True + + +def sqlite_connect_readonly(path: Path) -> sqlite3.Connection: + return sqlite3.connect(f"file:{path}?mode=ro", uri=True) + + +def table_exists(conn: sqlite3.Connection, name: str) -> bool: + row = conn.execute( + "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", + (name,), + ).fetchone() + return row is not None + + +def columns(conn: sqlite3.Connection, table: str) -> set[str]: + return {str(row[1]) for row in conn.execute(f"PRAGMA table_info({table})")} + + +def first_existing(candidates: tuple[str, ...], available: set[str]) -> str | None: + for candidate in candidates: + if candidate in available: + return candidate + return None + + +def where_time(column: str | None, start_utc: str | None, end_utc: str | None) -> tuple[str, list[Any]]: + clauses: list[str] = [] + params: list[Any] = [] + if column and start_utc: + clauses.append(f"{column} >= ?") + params.append(start_utc) + if column and end_utc: + clauses.append(f"{column} <= ?") + params.append(end_utc) + if not clauses: + return "", params + return " WHERE " + " AND ".join(clauses), params + + +def collect_agh(agh_home: Path, start: datetime | None, end: datetime | None) -> dict[str, Any]: + db_path = agh_home / "agh.db" + if not db_path.is_file(): + return {"agh_home": str(agh_home), "error": "agh.db not found"} + + start_utc = iso_utc(start) + end_utc = iso_utc(end) + result: dict[str, Any] = {"agh_home": str(agh_home), "db": str(db_path)} + + with sqlite_connect_readonly(db_path) as conn: + if table_exists(conn, "sessions"): + cols = columns(conn, "sessions") + time_col = first_existing(("created_at", "started_at", "updated_at"), cols) + where, params = where_time(time_col, start_utc, end_utc) + + group_cols = [ + col + for col in ("spawn_role", "session_type", "provider", "agent_name", "state", "failure_kind") + if col in cols + ] + if group_cols: + select_cols = ", ".join(f"COALESCE({col}, '') AS {col}" for col in group_cols) + group_by = ", ".join(group_cols) + query = ( + f"SELECT {select_cols}, COUNT(*) AS count FROM sessions" + f"{where} GROUP BY {group_by} ORDER BY count DESC" + ) + rows = conn.execute(query, params).fetchall() + result["sessions_by_role_type_provider_agent"] = [ + {**{group_cols[i]: row[i] for i in range(len(group_cols))}, "count": row[-1]} + for row in rows + ] + + if "spawn_role" in cols: + query = f"SELECT COUNT(*) FROM sessions{where} AND spawn_role = ?" if where else ( + "SELECT COUNT(*) FROM sessions WHERE spawn_role = ?" + ) + query_params = [*params, "memory-extractor"] if where else ["memory-extractor"] + result["memory_extractor_sessions"] = conn.execute(query, query_params).fetchone()[0] + + if table_exists(conn, "memory_events"): + cols = columns(conn, "memory_events") + time_col = first_existing(("created_at", "timestamp", "time"), cols) + op_col = first_existing(("op", "operation", "event_type", "type"), cols) + if op_col: + where, params = where_time(time_col, start_utc, end_utc) + rows = conn.execute( + f"SELECT {op_col}, COUNT(*) FROM memory_events{where} GROUP BY {op_col} ORDER BY COUNT(*) DESC", + params, + ).fetchall() + result["memory_events_by_op"] = [{"op": row[0], "count": row[1]} for row in rows] + + if table_exists(conn, "token_stats"): + cols = columns(conn, "token_stats") + time_col = first_existing(("created_at", "timestamp", "recorded_at", "time"), cols) + session_col = first_existing(("session_id", "session"), cols) + total_tokens_col = first_existing(("total_tokens", "context_used"), cols) + total_cost_col = first_existing(("total_cost", "cost_amount"), cols) + where_ts, params_ts = where_time(time_col, start_utc, end_utc) + if session_col and table_exists(conn, "sessions") and total_tokens_col: + cost_expr = f"SUM(ts.{total_cost_col})" if total_cost_col else "NULL" + query = ( + "SELECT COALESCE(s.spawn_role, ''), COUNT(*), " + f"SUM(ts.{total_tokens_col}), {cost_expr} " + f"FROM token_stats ts JOIN sessions s ON s.id = ts.{session_col}" + f"{where_ts} GROUP BY COALESCE(s.spawn_role, '') ORDER BY COUNT(*) DESC" + ) + rows = conn.execute(query, params_ts).fetchall() + result["token_stats_by_spawn_role"] = [ + { + "spawn_role": row[0], + "rows": row[1], + "total_tokens_or_context_used": row[2], + "total_cost": row[3], + } + for row in rows + ] + + if table_exists(conn, "network_audit_log"): + cols = columns(conn, "network_audit_log") + time_col = first_existing(("created_at", "timestamp", "time"), cols) + verb_col = first_existing(("verb", "message_type", "kind", "op"), cols) + direction_col = first_existing(("direction", "flow"), cols) + where, params = where_time(time_col, start_utc, end_utc) + if verb_col: + select = f"{verb_col}" + group = verb_col + if direction_col: + select += f", {direction_col}" + group += f", {direction_col}" + rows = conn.execute( + f"SELECT {select}, COUNT(*) FROM network_audit_log{where} GROUP BY {group} ORDER BY COUNT(*) DESC", + params, + ).fetchall() + result["network_audit_counts"] = [ + ( + {"verb": row[0], "direction": row[1], "count": row[2]} + if direction_col + else {"verb": row[0], "count": row[1]} + ) + for row in rows + ] + + result["crash_bundles"] = collect_crash_bundles(agh_home) + return result + + +def collect_crash_bundles(agh_home: Path) -> dict[str, Any]: + crash_dir = agh_home / "logs" / "crash-bundles" + if not crash_dir.is_dir(): + return {"dir": str(crash_dir), "files": 0} + + counters: Counter[str] = Counter() + for path in crash_dir.glob("*.json"): + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + counters["files"] += 1 + lowered = text.lower() + if "rate_limit" in lowered or "session limit" in lowered: + counters["rate_or_session_limit"] += 1 + if "prompt_failure" in lowered: + counters["prompt_failure"] += 1 + if "process_exit" in lowered: + counters["process_exit"] += 1 + + return {"dir": str(crash_dir), **dict(counters)} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--claude-project", default="~/.claude/projects/-Users-pedronauck-Desktop-test") + parser.add_argument("--agh-home", default="~/.agh") + parser.add_argument("--start-utc", default="2026-05-26T20:50:00Z") + parser.add_argument("--end-utc", default="2026-05-26T21:45:00Z") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + start = parse_time(args.start_utc) + end = parse_time(args.end_utc) + claude_project = Path(args.claude_project).expanduser() + agh_home = Path(args.agh_home).expanduser() + + payload = { + "window_utc": {"start": iso_utc(start), "end": iso_utc(end)}, + "claude": collect_claude(claude_project, start, end) if claude_project.is_dir() else { + "project_dir": str(claude_project), + "error": "directory not found", + }, + "agh": collect_agh(agh_home, start, end), + } + print(json.dumps(payload, indent=2, sort_keys=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.compozy/tasks/tokens-perf/analysis/screenshots/task-007-settings-memory-default.png b/.compozy/tasks/tokens-perf/analysis/screenshots/task-007-settings-memory-default.png new file mode 100644 index 000000000..5cc41b852 Binary files /dev/null and b/.compozy/tasks/tokens-perf/analysis/screenshots/task-007-settings-memory-default.png differ diff --git a/.compozy/tasks/tokens-perf/analysis/validate_task010_token_cost.py b/.compozy/tasks/tokens-perf/analysis/validate_task010_token_cost.py new file mode 100644 index 000000000..d718a020d --- /dev/null +++ b/.compozy/tasks/tokens-perf/analysis/validate_task010_token_cost.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +"""Estimate before/after prompt cost for the tokens-perf channel corpus.""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import os +import pathlib +import re +from dataclasses import dataclass +from typing import Any + +CORPUS_DIR_ENV = "AGH_TOKEN_CORPUS_DIR" +OPEN_CATALOG = "" +CLOSE_CATALOG = "" +BT = chr(96) +FINAL_CATALOG_LINE = ( + f"If current tool policy denies {BT}agh__skill_view{BT}, " + f"use {BT}agh skill view {BT} as an operator fallback." +) +NETWORK_CLOSE = "" +COMPACT_CATALOG_STATE_TEXT = ( + "Previous catalog remains current; use `agh__skill_view` for full skill/resource instructions." +) +COMPACT_CATALOG_TEXT = "\n".join( + [ + OPEN_CATALOG, + f' {COMPACT_CATALOG_STATE_TEXT}', + CLOSE_CATALOG, + "", + FINAL_CATALOG_LINE, + ] +) +COMPACT_CATALOG_BYTES = len(COMPACT_CATALOG_TEXT.encode()) +COMPACT_GUIDANCE_TEXT = ( + f"Full protocol examples were already provided earlier in this session; " + f"run {BT}agh network --help{BT} for command details." +) +WORK_ID_RE = re.compile(r'\bwork-id="([^"]+)"') + + +@dataclass +class PromptEvent: + text: str + remaining_assistant_calls: int + timestamp: dt.datetime | None + + +def parse_args() -> pathlib.Path: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "corpus_dir", + nargs="?", + help=f"Directory containing *.jsonl transcripts. Defaults to {CORPUS_DIR_ENV} when set.", + ) + parser.add_argument( + "--corpus-dir", + dest="corpus_dir_override", + help="Explicit transcript directory override.", + ) + args = parser.parse_args() + raw_path = first_non_empty_path( + args.corpus_dir_override, + args.corpus_dir, + os.environ.get(CORPUS_DIR_ENV, ""), + ) + if raw_path == "": + parser.error( + f"corpus directory is required via --corpus-dir, positional argument, or {CORPUS_DIR_ENV}" + ) + root = pathlib.Path(raw_path).expanduser().resolve() + if not root.exists(): + parser.error(f"corpus directory does not exist: {root}") + if not root.is_dir(): + parser.error(f"corpus path is not a directory: {root}") + return root + + +def first_non_empty_path(*values: str | None) -> str: + for value in values: + if value is None: + continue + trimmed = value.strip() + if trimmed: + return trimmed + return "" + + +def content_text(message: dict[str, Any]) -> str: + content = message.get("content") + if isinstance(content, str): + return content + if isinstance(content, list): + return "".join( + item.get("text", "") + for item in content + if isinstance(item, dict) and isinstance(item.get("text"), str) + ) + return "" + + +def parse_timestamp(value: object) -> dt.datetime | None: + if not isinstance(value, str) or not value: + return None + try: + return dt.datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + +def is_assistant_call(row: dict[str, Any]) -> bool: + if row.get("type") != "assistant": + return False + message = row.get("message") + return isinstance(message, dict) and (bool(message.get("usage")) or message.get("role") == "assistant") + + +def catalog_sections(text: str) -> list[str]: + sections: list[str] = [] + start = 0 + while True: + idx = text.find(OPEN_CATALOG, start) + if idx < 0: + return sections + close = text.find(CLOSE_CATALOG, idx) + if close < 0: + return sections + end = close + len(CLOSE_CATALOG) + final = text.find(FINAL_CATALOG_LINE, end) + if final >= 0: + end = final + len(FINAL_CATALOG_LINE) + sections.append(text[idx:end]) + start = end + + +def network_guidance(text: str) -> tuple[str, bool]: + start = text.find(" int | None: + marker = "Examples:\n" + if marker not in old_guidance: + return None + prefix = old_guidance.split(marker, 1)[0] + return len((prefix + COMPACT_GUIDANCE_TEXT + "\n").encode()) + + +def load_network_prompt_events(path: pathlib.Path) -> list[PromptEvent]: + raw_events: list[tuple[str, str, dt.datetime | None]] = [] + for line in path.read_text(errors="replace").splitlines(): + try: + row = json.loads(line) + except json.JSONDecodeError: + continue + if row.get("type") == "user": + text = content_text(row.get("message") or {}) + raw_events.append(("user", text, parse_timestamp(row.get("timestamp")))) + continue + if is_assistant_call(row): + raw_events.append(("assistant", "", parse_timestamp(row.get("timestamp")))) + + remaining_assistant = sum(1 for kind, _, _ in raw_events if kind == "assistant") + prompts: list[PromptEvent] = [] + for kind, text, timestamp in raw_events: + if kind == "assistant": + remaining_assistant -= 1 + continue + if " None: + root = parse_args() + totals = { + "network_transcripts": 0, + "network_prompt_turns_before": 0, + "network_prompt_turns_after_tasks_006_009": 0, + "network_messages": 0, + "network_prompt_direct_bytes_before": 0, + "network_prompt_direct_bytes_after_tasks_006_009": 0, + "network_prompt_replay_bytes_before": 0, + "network_prompt_replay_bytes_after_tasks_006_009": 0, + "skill_catalog_repeated_blocks": 0, + "skill_catalog_direct_saved_bytes": 0, + "skill_catalog_replay_saved_bytes": 0, + "network_guidance_compacted_prompts": 0, + "network_guidance_direct_saved_bytes": 0, + "network_guidance_replay_saved_bytes": 0, + } + gaps: list[float] = [] + top_files: list[dict[str, Any]] = [] + + for path in sorted(root.glob("*.jsonl")): + prompts = load_network_prompt_events(path) + if not prompts: + continue + + totals["network_transcripts"] += 1 + seen_catalogs: set[str] = set() + reply_delivered = False + protocol_delivered = False + file_direct_saved = 0 + file_replay_saved = 0 + + for left, right in zip(prompts, prompts[1:]): + if left.timestamp is not None and right.timestamp is not None: + gaps.append((right.timestamp - left.timestamp).total_seconds()) + + for prompt in prompts: + text_bytes = len(prompt.text.encode()) + remaining = prompt.remaining_assistant_calls + totals["network_prompt_turns_before"] += 1 + totals["network_prompt_turns_after_tasks_006_009"] += 1 + totals["network_messages"] += prompt.text.count("_.tar.gz` asset for your platform and extract it. diff --git a/internal/api/contract/memory.go b/internal/api/contract/memory.go index ab96ac386..cb5c5ff2a 100644 --- a/internal/api/contract/memory.go +++ b/internal/api/contract/memory.go @@ -426,12 +426,15 @@ type MemoryDailyLogListResponse struct { // MemoryExtractorStatusPayload reports extractor queue/runtime status. type MemoryExtractorStatusPayload struct { - Status MemoryExtractorState `json:"status"` - QueuedSessions int `json:"queued_sessions"` - InFlightSessions int `json:"in_flight_sessions"` - DroppedTurns int `json:"dropped_turns"` - CoalescedTurns int `json:"coalesced_turns"` - FailureCount int `json:"failure_count"` + Status MemoryExtractorState `json:"status"` + QueuedSessions int `json:"queued_sessions"` + InFlightSessions int `json:"in_flight_sessions"` + ActiveProviderSessions int `json:"active_provider_sessions"` + DroppedTurns int `json:"dropped_turns"` + CoalescedTurns int `json:"coalesced_turns"` + SkippedTurns int `json:"skipped_turns"` + BackpressuredSessions int `json:"backpressured_sessions"` + FailureCount int `json:"failure_count"` } // MemoryExtractorStatusResponse wraps extractor queue/runtime status. diff --git a/internal/api/core/memory_services_test.go b/internal/api/core/memory_services_test.go index e3fb0efb6..96e39477f 100644 --- a/internal/api/core/memory_services_test.go +++ b/internal/api/core/memory_services_test.go @@ -22,12 +22,15 @@ func TestMemoryExtractorHandlersUseInjectedService(t *testing.T) { extractor := &stubMemoryExtractorService{ status: contract.MemoryExtractorStatusPayload{ - Status: contract.MemoryExtractorStateRunning, - QueuedSessions: 2, - InFlightSessions: 1, - DroppedTurns: 3, - CoalescedTurns: 4, - FailureCount: 1, + Status: contract.MemoryExtractorStateRunning, + QueuedSessions: 2, + InFlightSessions: 1, + ActiveProviderSessions: 1, + DroppedTurns: 3, + CoalescedTurns: 4, + SkippedTurns: 5, + BackpressuredSessions: 6, + FailureCount: 1, }, failures: []contract.MemoryExtractorFailurePayload{{ ID: "failure-1", @@ -55,6 +58,15 @@ func TestMemoryExtractorHandlersUseInjectedService(t *testing.T) { if got := statusPayload.Extractor.FailureCount; got != 1 { t.Fatalf("Extractor.FailureCount = %d, want 1", got) } + if got := statusPayload.Extractor.SkippedTurns; got != 5 { + t.Fatalf("Extractor.SkippedTurns = %d, want 5", got) + } + if got := statusPayload.Extractor.ActiveProviderSessions; got != 1 { + t.Fatalf("Extractor.ActiveProviderSessions = %d, want 1", got) + } + if got := statusPayload.Extractor.BackpressuredSessions; got != 6 { + t.Fatalf("Extractor.BackpressuredSessions = %d, want 6", got) + } failuresResp := performRequest(t, engine, http.MethodGet, "/memory/extractor/failures", nil) if failuresResp.Code != http.StatusOK { diff --git a/internal/config/config.go b/internal/config/config.go index c4f9a0d43..e2b6a79aa 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -27,8 +27,10 @@ const ( configDefaultsAgentPath = "defaults.agent" configDreamingKey = "dreaming" configExtractorKey = "extractor" + configExtractorModePostMessage = "post_message" configHybridKey = "hybrid" configJsonlKey = "jsonl" + configExtractorQueueCoalesceMaxPath = "memory.extractor.queue.coalesce_max" configLogLevelDebug = "debug" configLogLevelError = "error" configLogLevelInfo = "info" @@ -826,7 +828,7 @@ func defaultMemoryRecallConfig() MemoryRecallConfig { func defaultMemoryExtractorConfig(homePaths HomePaths) MemoryExtractorConfig { return MemoryExtractorConfig{ Enabled: true, - Mode: "post_message", + Mode: configExtractorModePostMessage, ThrottleTurns: 1, Deadline: 60 * time.Second, SandboxInboxOnly: true, @@ -1990,7 +1992,7 @@ func (c *MemoryExtractorConfig) Validate() error { if !c.Enabled { return nil } - mode, err := validateEnum("memory.extractor.mode", c.Mode, "post_message", "compaction_flush", configHybridKey) + mode, err := validateEnum("memory.extractor.mode", c.Mode, configExtractorModePostMessage) if err != nil { return err } @@ -2007,7 +2009,18 @@ func (c *MemoryExtractorConfig) Validate() error { if strings.TrimSpace(c.DLQPath) == "" { return errors.New("memory.extractor.dlq_path is required") } - return c.Queue.Validate() + if err := c.Queue.Validate(); err != nil { + return err + } + if c.Queue.CoalesceMax < c.ThrottleTurns { + return fmt.Errorf( + "%s must be greater than or equal to memory.extractor.throttle_turns: %d < %d", + configExtractorQueueCoalesceMaxPath, + c.Queue.CoalesceMax, + c.ThrottleTurns, + ) + } + return nil } // Validate ensures extractor queue settings are usable. @@ -2016,7 +2029,7 @@ func (c MemoryExtractorQueueConfig) Validate() error { return fmt.Errorf("memory.extractor.queue.capacity must be positive: %d", c.Capacity) } if c.CoalesceMax <= 0 { - return fmt.Errorf("memory.extractor.queue.coalesce_max must be positive: %d", c.CoalesceMax) + return fmt.Errorf("%s must be positive: %d", configExtractorQueueCoalesceMaxPath, c.CoalesceMax) } return nil } diff --git a/internal/config/memory_v2_config_test.go b/internal/config/memory_v2_config_test.go index c4455fafd..560de7c56 100644 --- a/internal/config/memory_v2_config_test.go +++ b/internal/config/memory_v2_config_test.go @@ -59,7 +59,7 @@ func TestMemoryV2ConfigDefaultsAndOverlay(t *testing.T) { memory.Decisions.MaxPostContentBytes != 65536 { t.Fatalf("DefaultWithHome() Decisions = %#v", memory.Decisions) } - if memory.Extractor.Mode != "post_message" || + if memory.Extractor.Mode != configExtractorModePostMessage || memory.Extractor.Deadline != time.Minute || memory.Extractor.Queue.Capacity != 1 || memory.Extractor.Queue.CoalesceMax != 16 { @@ -88,6 +88,7 @@ func TestMemoryV2ConfigDefaultsAndOverlay(t *testing.T) { } }) + // not parallel: this subtest uses t.Setenv for AGH_HOME overlay resolution. t.Run("Should merge every Memory v2 backend section from overlays", func(t *testing.T) { workspaceRoot := t.TempDir() homeRoot := filepath.Join(t.TempDir(), "home") @@ -147,7 +148,7 @@ max_post_content_bytes = 32768 [memory.extractor] enabled = true -mode = "compaction_flush" +mode = "post_message" throttle_turns = 2 deadline = "45s" sandbox_inbox_only = false @@ -382,6 +383,14 @@ func TestMemoryV2ConfigValidationCoversOptionalBranches(t *testing.T) { }, want: "memory.extractor.queue.capacity", }, + { + name: "extractor coalesce below throttle", + patch: func(cfg *MemoryConfig) { + cfg.Extractor.ThrottleTurns = 4 + cfg.Extractor.Queue.CoalesceMax = 3 + }, + want: configExtractorQueueCoalesceMaxPath, + }, { name: "dream scoring weight sum", patch: func(cfg *MemoryConfig) { @@ -439,35 +448,42 @@ func TestMemoryV2ConfigValidationCoversOptionalBranches(t *testing.T) { func TestMemoryV2ConfigValidationNormalizesAcceptedEnums(t *testing.T) { t.Parallel() - homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) - if err != nil { - t.Fatalf("ResolveHomePathsFrom() error = %v", err) - } + t.Run("Should normalize accepted enum values", func(t *testing.T) { + t.Parallel() - cfg := DefaultWithHome(homePaths).Memory - cfg.Controller.Mode = " Hybrid " - cfg.Controller.DefaultOpOnFail = " Reject " - cfg.Controller.Policy.AllowOrigins = []string{" CLI ", "Tool"} - cfg.Recall.Fusion = " RRF " - cfg.Extractor.Mode = " Hybrid " - cfg.Session.LedgerFormat = " Jsonl " + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } - if err := cfg.Validate(); err != nil { - t.Fatalf("Validate() error = %v", err) - } - if cfg.Controller.Mode != "hybrid" || cfg.Controller.DefaultOpOnFail != "reject" { - t.Fatalf("controller enums = %#v, want canonical lowercase values", cfg.Controller) - } - if !slices.Equal(cfg.Controller.Policy.AllowOrigins, []string{"cli", "tool"}) { - t.Fatalf("controller allow origins = %#v, want canonical lowercase values", cfg.Controller.Policy.AllowOrigins) - } - if cfg.Recall.Fusion != "rrf" { - t.Fatalf("recall fusion = %q, want %q", cfg.Recall.Fusion, "rrf") - } - if cfg.Extractor.Mode != "hybrid" { - t.Fatalf("extractor mode = %q, want %q", cfg.Extractor.Mode, "hybrid") - } - if cfg.Session.LedgerFormat != "jsonl" { - t.Fatalf("session ledger format = %q, want %q", cfg.Session.LedgerFormat, "jsonl") - } + cfg := DefaultWithHome(homePaths).Memory + cfg.Controller.Mode = " Hybrid " + cfg.Controller.DefaultOpOnFail = " Reject " + cfg.Controller.Policy.AllowOrigins = []string{" CLI ", "Tool"} + cfg.Recall.Fusion = " RRF " + cfg.Extractor.Mode = " Post_Message " + cfg.Session.LedgerFormat = " Jsonl " + + if err := cfg.Validate(); err != nil { + t.Fatalf("Validate() error = %v", err) + } + if cfg.Controller.Mode != "hybrid" || cfg.Controller.DefaultOpOnFail != "reject" { + t.Fatalf("controller enums = %#v, want canonical lowercase values", cfg.Controller) + } + if !slices.Equal(cfg.Controller.Policy.AllowOrigins, []string{"cli", "tool"}) { + t.Fatalf( + "controller allow origins = %#v, want canonical lowercase values", + cfg.Controller.Policy.AllowOrigins, + ) + } + if cfg.Recall.Fusion != "rrf" { + t.Fatalf("recall fusion = %q, want %q", cfg.Recall.Fusion, "rrf") + } + if cfg.Extractor.Mode != configExtractorModePostMessage { + t.Fatalf("extractor mode = %q, want %q", cfg.Extractor.Mode, configExtractorModePostMessage) + } + if cfg.Session.LedgerFormat != "jsonl" { + t.Fatalf("session ledger format = %q, want %q", cfg.Session.LedgerFormat, "jsonl") + } + }) } diff --git a/internal/config/release_config_test.go b/internal/config/release_config_test.go index 0a95c5a91..840dff79a 100644 --- a/internal/config/release_config_test.go +++ b/internal/config/release_config_test.go @@ -797,6 +797,8 @@ func TestReleaseWorkflowPreservesInstallerSourceTextGuards(t *testing.T) { "sh -n packages/site/public/install.sh", "grep -q 'checksums.txt.sigstore.json' packages/site/public/install.sh", "install.sh must verify checksums.txt with checksums.txt.sigstore.json", + `grep -q 'COSIGN_VERSION="v2.2.4"' packages/site/public/install.sh`, + "install.sh must bootstrap a pinned cosign verifier when cosign is absent", "grep -q 'packages/site/public/install.sh' .goreleaser.yml", ".goreleaser.yml must upload packages/site/public/install.sh as a release extra file", `grep -q 'name: "@compozy/agh"' .goreleaser.yml`, @@ -847,6 +849,7 @@ func TestReleaseWorkflowPreservesInstallerSourceTextGuards(t *testing.T) { "`pr-release dry-run`, `make test-e2e-nightly`, and `make test-integration`", "`goreleaser release --clean` publishes the release", "`checksums.txt.sigstore.json`", + "curl installer uses local cosign when present or a pinned temporary cosign v2.2.4 verifier", "Syft SBOMs for archives, packages, and source", "does not claim a manual post-release install smoke", "--bundle checksums.txt.sigstore.json", diff --git a/internal/config/tool_surface.go b/internal/config/tool_surface.go index cf9788f2f..d8a8d9a78 100644 --- a/internal/config/tool_surface.go +++ b/internal/config/tool_surface.go @@ -185,7 +185,7 @@ var ( "memory.extractor.sandbox_inbox_only": ConfigValueBool, "memory.extractor.model": ConfigValueString, "memory.extractor.queue.capacity": ConfigValueInt, - "memory.extractor.queue.coalesce_max": ConfigValueInt, + configExtractorQueueCoalesceMaxPath: ConfigValueInt, "memory.dream.enabled": ConfigValueBool, "memory.dream.agent": ConfigValueString, "memory.dream.min_hours": ConfigValueFloat, diff --git a/internal/daemon/memory_runtime.go b/internal/daemon/memory_runtime.go index 7a4bb839e..d3a262185 100644 --- a/internal/daemon/memory_runtime.go +++ b/internal/daemon/memory_runtime.go @@ -86,6 +86,7 @@ func newDaemonMemoryExtractor( forked := &forkedMemoryExtractor{ sessions: forkSessions, defaultAgent: firstNonEmptyString(state.cfg.Defaults.Agent, state.cfg.Memory.Dream.Agent), + model: state.cfg.Memory.Extractor.Model, deadline: state.cfg.Memory.Extractor.Deadline, logger: state.logger, now: now, @@ -99,6 +100,8 @@ func newDaemonMemoryExtractor( extractorpkg.WithLogger(state.logger), extractorpkg.WithClock(now), extractorpkg.WithCoalesceMax(state.cfg.Memory.Extractor.Queue.CoalesceMax), + extractorpkg.WithQueueCapacity(state.cfg.Memory.Extractor.Queue.Capacity), + extractorpkg.WithThrottleTurns(state.cfg.Memory.Extractor.ThrottleTurns), extractorpkg.WithInboxPath(state.cfg.Memory.Extractor.InboxPath), ) if err != nil { @@ -246,12 +249,15 @@ func (e *daemonMemoryExtractor) Status(context.Context) (contract.MemoryExtracto return contract.MemoryExtractorStatusPayload{}, err } return contract.MemoryExtractorStatusPayload{ - Status: status, - QueuedSessions: stats.QueuedSessions, - InFlightSessions: stats.InFlightSessions, - DroppedTurns: intFromInt64(stats.DroppedTurns), - CoalescedTurns: intFromInt64(stats.CoalescedTurns), - FailureCount: failureCount, + Status: status, + QueuedSessions: stats.QueuedSessions, + InFlightSessions: stats.InFlightSessions, + ActiveProviderSessions: stats.ActiveProviderSessions, + DroppedTurns: intFromInt64(stats.DroppedTurns), + CoalescedTurns: intFromInt64(stats.CoalescedTurns), + SkippedTurns: intFromInt64(stats.SkippedTurns), + BackpressuredSessions: intFromInt64(stats.BackpressuredSessions), + FailureCount: failureCount, }, nil } @@ -572,6 +578,7 @@ func (s *daemonMemoryProposalSink) resolveWorkspace( type forkedMemoryExtractor struct { sessions memoryExtractorSessionManager defaultAgent string + model string deadline time.Duration logger *slog.Logger now func() time.Time @@ -598,6 +605,7 @@ func (e *forkedMemoryExtractor) Extract( child, err := e.sessions.Spawn(runCtx, session.SpawnOpts{ ParentSessionID: turn.SessionID, AgentName: firstNonEmptyString(turn.AgentID, e.defaultAgent), + Model: strings.TrimSpace(e.model), Name: "Memory extractor", PromptOverlay: memoryExtractorOverlay(), SpawnRole: session.SpawnRoleMemoryExtractor, @@ -736,10 +744,8 @@ func collectMemoryExtractorOutput(ctx context.Context, events <-chan acp.AgentEv } switch event.Type { case acp.EventTypeAgentMessage: + // Agent messages are stream chunks; inserting separators can corrupt JSONL. output.WriteString(event.Text) - if !strings.HasSuffix(event.Text, "\n") { - output.WriteByte('\n') - } case acp.EventTypeError: return "", fmt.Errorf("daemon: memory extractor agent error: %s", strings.TrimSpace(event.Error)) } diff --git a/internal/daemon/memory_runtime_test.go b/internal/daemon/memory_runtime_test.go index 0b38930c0..2c55fe595 100644 --- a/internal/daemon/memory_runtime_test.go +++ b/internal/daemon/memory_runtime_test.go @@ -1,12 +1,17 @@ package daemon import ( + "context" "os" "path/filepath" + "strings" "testing" + "time" + "github.com/compozy/agh/internal/acp" "github.com/compozy/agh/internal/memory" memcontract "github.com/compozy/agh/internal/memory/contract" + "github.com/compozy/agh/internal/session" "github.com/compozy/agh/internal/testutil" workspacepkg "github.com/compozy/agh/internal/workspace" ) @@ -57,3 +62,163 @@ func TestDaemonMemoryProposalSinkTargetStore(t *testing.T) { } }) } + +func TestCollectMemoryExtractorOutput(t *testing.T) { + t.Parallel() + + t.Run("Should preserve streamed JSONL chunks without synthetic newlines", func(t *testing.T) { + t.Parallel() + + events := make(chan acp.AgentEvent, 4) + events <- acp.AgentEvent{Type: acp.EventTypeAgentMessage, Text: "```jsonl\n{\"type\":\"reference\",\"scope"} + events <- acp.AgentEvent{ + Type: acp.EventTypeAgentMessage, + Text: "\":\"workspace\",\"agent_tier\":\"workspace\",\"content\":\"Channel marketing already exists.\",\"evidence\":\"seq=52\"", + } + events <- acp.AgentEvent{ + Type: acp.EventTypeAgentMessage, + Text: ",\"entity\":\"ws-test\",\"attribute\":\"network_channel_marketing_exists\"}\n```", + } + close(events) + + output, err := collectMemoryExtractorOutput(testutil.Context(t), events) + if err != nil { + t.Fatalf("collectMemoryExtractorOutput() error = %v", err) + } + if strings.Contains(output, "scope\n") { + t.Fatalf("output = %q, want no synthetic newline inside streamed JSON key", output) + } + + turn := memcontract.TurnRecord{ + SessionID: "sess-parent", + RootSessionID: "sess-parent", + AgentID: "cto", + ActorKind: "agent_root", + WorkspaceID: "ws-test", + SinceMessageSeq: 32, + UntilMessageSeq: 52, + Snapshot: memcontract.TranscriptSnapshot{ + Messages: []memcontract.TranscriptMessage{{ + Sequence: 52, + Role: "assistant", + Content: "Channel marketing exists.", + At: time.Date(2026, 5, 26, 21, 1, 52, 0, time.UTC), + }}, + }, + Trigger: memcontract.TriggerPostMessage, + } + candidates, err := parseMemoryExtractorCandidates( + output, + turn, + "/workspace/test", + time.Date(2026, 5, 26, 21, 2, 0, 0, time.UTC), + ) + if err != nil { + t.Fatalf("parseMemoryExtractorCandidates() error = %v", err) + } + if len(candidates) != 1 { + t.Fatalf("candidates = %#v, want one parsed candidate", candidates) + } + candidate := candidates[0] + if candidate.Scope != memcontract.ScopeWorkspace || candidate.Frontmatter.Type != memcontract.TypeReference { + t.Fatalf( + "candidate scope/type = %s/%s, want workspace/reference", + candidate.Scope, + candidate.Frontmatter.Type, + ) + } + if !strings.Contains(candidate.Content, "Channel marketing already exists") { + t.Fatalf("candidate content = %q, want streamed JSON content", candidate.Content) + } + if candidate.Metadata["workspace_root"] != "/workspace/test" { + t.Fatalf("candidate metadata = %#v, want workspace_root", candidate.Metadata) + } + }) +} + +func TestForkedMemoryExtractor(t *testing.T) { + t.Parallel() + + t.Run("Should pass the configured model to the extractor child spawn", func(t *testing.T) { + t.Parallel() + + sessions := &recordingMemoryExtractorSessions{} + extractor := &forkedMemoryExtractor{ + sessions: sessions, + defaultAgent: "memory-agent", + model: "claude-haiku-memory", + deadline: time.Second, + now: func() time.Time { + return time.Date(2026, 5, 27, 10, 0, 0, 0, time.UTC) + }, + } + turn := memcontract.TurnRecord{ + SessionID: "sess-parent", + RootSessionID: "sess-parent", + AgentID: "", + ActorKind: "agent_root", + WorkspaceID: "ws-test", + SinceMessageSeq: 1, + UntilMessageSeq: 1, + Snapshot: memcontract.TranscriptSnapshot{ + Messages: []memcontract.TranscriptMessage{{ + Sequence: 1, + Role: "assistant", + Content: "Pedro prefers concise updates.", + At: time.Date(2026, 5, 27, 9, 59, 0, 0, time.UTC), + }}, + }, + Trigger: memcontract.TriggerPostMessage, + } + + candidates, err := extractor.Extract(testutil.Context(t), turn) + if err != nil { + t.Fatalf("Extract() error = %v", err) + } + if len(candidates) != 0 { + t.Fatalf("candidates = %#v, want none for empty child output", candidates) + } + if sessions.spawnOpts.AgentName != "memory-agent" { + t.Fatalf("spawn agent = %q, want default memory agent", sessions.spawnOpts.AgentName) + } + if sessions.spawnOpts.Model != "claude-haiku-memory" { + t.Fatalf("spawn model = %q, want configured extractor model", sessions.spawnOpts.Model) + } + if sessions.stoppedID != "sess-memory-child" { + t.Fatalf("stopped child = %q, want spawned extractor child stopped", sessions.stoppedID) + } + }) +} + +type recordingMemoryExtractorSessions struct { + spawnOpts session.SpawnOpts + stoppedID string +} + +func (s *recordingMemoryExtractorSessions) Spawn( + _ context.Context, + opts session.SpawnOpts, +) (*session.Session, error) { + s.spawnOpts = opts + return &session.Session{ID: "sess-memory-child"}, nil +} + +func (s *recordingMemoryExtractorSessions) PromptSynthetic( + _ context.Context, + _ string, + _ session.SyntheticPromptOpts, +) (<-chan acp.AgentEvent, error) { + events := make(chan acp.AgentEvent) + close(events) + return events, nil +} + +func (s *recordingMemoryExtractorSessions) StopWithCause( + _ context.Context, + id string, + _ session.StopCause, + _ string, +) error { + s.stoppedID = id + return nil +} diff --git a/internal/daemon/native_tools_test.go b/internal/daemon/native_tools_test.go index f1d2d8f33..7fc4cebf7 100644 --- a/internal/daemon/native_tools_test.go +++ b/internal/daemon/native_tools_test.go @@ -3826,7 +3826,13 @@ func TestDaemonNativeTools(t *testing.T) { cfg.Memory.GlobalDir = globalDir cfg.Memory.Dream.CheckInterval = time.Hour extractor := &nativeMemoryExtractorService{ - status: contract.MemoryExtractorStatusPayload{Status: contract.MemoryExtractorStateIdle, QueuedSessions: 2}, + status: contract.MemoryExtractorStatusPayload{ + Status: contract.MemoryExtractorStateIdle, + QueuedSessions: 2, + ActiveProviderSessions: 1, + SkippedTurns: 5, + BackpressuredSessions: 6, + }, } providers := &nativeMemoryProviderService{ provider: contract.MemoryProviderPayload{ @@ -3877,6 +3883,9 @@ func TestDaemonNativeTools(t *testing.T) { t.Fatalf("Registry.Call(memory_extractor_status) error = %v", err) } requireNativeStructuredContains(t, extractorResult, []byte(`"status":"idle"`)) + requireNativeStructuredContains(t, extractorResult, []byte(`"active_provider_sessions":1`)) + requireNativeStructuredContains(t, extractorResult, []byte(`"skipped_turns":5`)) + requireNativeStructuredContains(t, extractorResult, []byte(`"backpressured_sessions":6`)) if extractor.statusCalls != 1 { t.Fatalf("extractor Status calls = %d, want 1", extractor.statusCalls) } diff --git a/internal/daemon/prompt_skills.go b/internal/daemon/prompt_skills.go index 13dc27e90..2e4c5b912 100644 --- a/internal/daemon/prompt_skills.go +++ b/internal/daemon/prompt_skills.go @@ -2,15 +2,20 @@ package daemon import ( "context" + "crypto/sha256" "errors" "fmt" "strings" + "sync" + "sync/atomic" "github.com/compozy/agh/internal/session" skillspkg "github.com/compozy/agh/internal/skills" workspacepkg "github.com/compozy/agh/internal/workspace" ) +const promptSkillsCatalogCacheMaxSessions = 2048 + type promptSkillsRegistry interface { ForWorkspace(ctx context.Context, resolved *workspacepkg.ResolvedWorkspace) ([]*skillspkg.Skill, error) ForAgent( @@ -24,6 +29,21 @@ type promptSkillsWorkspaceResolver interface { Resolve(ctx context.Context, target string) (workspacepkg.ResolvedWorkspace, error) } +type skillsCatalogAugmenter struct { + registry promptSkillsRegistry + workspaceResolver func() promptSkillsWorkspaceResolver + sequence atomic.Uint64 + + mu sync.Mutex + states map[string]skillsCatalogSessionState +} + +type skillsCatalogSessionState struct { + acpSessionID string + signature [sha256.Size]byte + lastUsed uint64 +} + func newSkillsCatalogAugmenter( registry promptSkillsRegistry, workspaceResolver func() promptSkillsWorkspaceResolver, @@ -32,40 +52,108 @@ func newSkillsCatalogAugmenter( return nil } - return func(ctx context.Context, sess *session.Session, message string) (string, error) { - if sess == nil { - return message, nil - } + augmenter := &skillsCatalogAugmenter{ + registry: registry, + workspaceResolver: workspaceResolver, + states: make(map[string]skillsCatalogSessionState), + } + return augmenter.Augment +} - info := sess.Info() - if info == nil { - return message, nil - } +func (a *skillsCatalogAugmenter) Augment(ctx context.Context, sess *session.Session, message string) (string, error) { + if a == nil || a.registry == nil || sess == nil { + return message, nil + } - workspace, err := resolvePromptSkillsWorkspace(ctx, workspaceResolver, info.WorkspaceID, info.Workspace) - if err != nil { - return "", fmt.Errorf("daemon: resolve prompt skills workspace: %w", err) - } + info := sess.Info() + if info == nil { + return message, nil + } - var skills []*skillspkg.Skill - agentName := strings.TrimSpace(info.AgentName) - if agentName != "" { - skills, err = registry.ForAgent(ctx, workspace, agentName) - } else { - skills, err = registry.ForWorkspace(ctx, workspace) - } - if err != nil { - return "", fmt.Errorf("daemon: load current skills catalog for session %q: %w", info.ID, err) - } + workspace, err := resolvePromptSkillsWorkspace(ctx, a.workspaceResolver, info.WorkspaceID, info.Workspace) + if err != nil { + return "", fmt.Errorf("daemon: resolve prompt skills workspace: %w", err) + } - catalog := skillspkg.BuildCurrentCatalog(skills) - if strings.TrimSpace(catalog) == "" { - return message, nil - } - if strings.TrimSpace(message) == "" { - return catalog, nil + var skills []*skillspkg.Skill + agentName := strings.TrimSpace(info.AgentName) + if agentName != "" { + skills, err = a.registry.ForAgent(ctx, workspace, agentName) + } else { + skills, err = a.registry.ForWorkspace(ctx, workspace) + } + if err != nil { + return "", fmt.Errorf("daemon: load current skills catalog for session %q: %w", info.ID, err) + } + + catalog := skillspkg.BuildCurrentCatalog(skills) + if strings.TrimSpace(catalog) == "" { + a.forgetSession(info.ID) + return message, nil + } + if a.catalogUnchanged(info, catalog) { + catalog = skillspkg.BuildCurrentCatalogUnchanged() + } + if strings.TrimSpace(message) == "" { + return catalog, nil + } + return catalog + "\n\n" + message, nil +} + +func (a *skillsCatalogAugmenter) catalogUnchanged(info *session.Info, catalog string) bool { + if info == nil { + return false + } + + key := strings.TrimSpace(info.ID) + if key == "" { + return false + } + + acpSessionID := strings.TrimSpace(info.ACPSessionID) + signature := sha256.Sum256([]byte(catalog)) + sequence := a.sequence.Add(1) + + a.mu.Lock() + defer a.mu.Unlock() + + state, ok := a.states[key] + unchanged := ok && state.acpSessionID == acpSessionID && state.signature == signature + a.states[key] = skillsCatalogSessionState{ + acpSessionID: acpSessionID, + signature: signature, + lastUsed: sequence, + } + a.evictOldestLocked() + return unchanged +} + +func (a *skillsCatalogAugmenter) forgetSession(sessionID string) { + key := strings.TrimSpace(sessionID) + if key == "" { + return + } + + a.mu.Lock() + delete(a.states, key) + a.mu.Unlock() +} + +func (a *skillsCatalogAugmenter) evictOldestLocked() { + if len(a.states) <= promptSkillsCatalogCacheMaxSessions { + return + } + + var oldestKey string + var oldestSequence uint64 + for key, state := range a.states { + if oldestKey == "" || state.lastUsed < oldestSequence { + oldestKey = key + oldestSequence = state.lastUsed } - return catalog + "\n\n" + message, nil + } + if oldestKey != "" { + delete(a.states, oldestKey) } } diff --git a/internal/daemon/prompt_skills_test.go b/internal/daemon/prompt_skills_test.go index 48964bafc..0e6696c3e 100644 --- a/internal/daemon/prompt_skills_test.go +++ b/internal/daemon/prompt_skills_test.go @@ -11,76 +11,213 @@ import ( ) func TestNewSkillsCatalogAugmenterUsesCurrentRegistryStatePerPrompt(t *testing.T) { - t.Parallel() + t.Run("Should compact unchanged catalog after the first prompt", func(t *testing.T) { + t.Parallel() - registry := &stubPromptSkillsRegistry{ - skillsByAgent: map[string][]*skillspkg.Skill{ - "general": { - { - Meta: skillspkg.SkillMeta{ - Name: "qa-marker-skill", - Description: "Shows up while enabled.", - }, - Enabled: true, + registry, augmenter := newPromptSkillsAugmenterForTest(t, []*skillspkg.Skill{ + { + Meta: skillspkg.SkillMeta{ + Name: "qa-marker-skill", + Description: "Shows up while enabled.", }, + Enabled: true, }, - }, - } - var resolver promptSkillsWorkspaceResolver - augmenter := newSkillsCatalogAugmenter(registry, func() promptSkillsWorkspaceResolver { - return resolver + }) + _ = registry + sess := newPromptSkillsSession("sess-compact") + + first, err := augmenter(context.Background(), sess, "list current skills") + if err != nil { + t.Fatalf("augmenter(first) error = %v", err) + } + if !strings.Contains(first, `name="qa-marker-skill"`) { + t.Fatalf("first prompt = %q, want enabled skill entry", first) + } + + second, err := augmenter(context.Background(), sess, "list current skills again") + if err != nil { + t.Fatalf("augmenter(second) error = %v", err) + } + if !strings.Contains(second, ``) { + t.Fatalf("second prompt = %q, want unchanged catalog marker", second) + } + if strings.Contains(second, `name="qa-marker-skill"`) { + t.Fatalf("second prompt = %q, want compact marker without repeated skill entries", second) + } + if !strings.Contains(second, "use `agh__skill_view` for full skill/resource instructions") { + t.Fatalf("second prompt = %q, want compact skill_view guidance", second) + } + if !strings.Contains(second, "use `agh skill view ` as an operator fallback") { + t.Fatalf("second prompt = %q, want operator fallback guidance", second) + } + if !strings.HasSuffix(second, "list current skills again") { + t.Fatalf("second prompt = %q, want original prompt preserved", second) + } }) - if augmenter == nil { - t.Fatal("newSkillsCatalogAugmenter() = nil, want augmenter") - } - resolver = &stubPromptSkillsWorkspaceResolver{ - resolved: workspacepkg.ResolvedWorkspace{ - Workspace: workspacepkg.Workspace{ - ID: "ws-1", - RootDir: "/tmp/ws-1", + t.Run("Should emit refreshed full catalog when registry state changes", func(t *testing.T) { + t.Parallel() + + registry, augmenter := newPromptSkillsAugmenterForTest(t, []*skillspkg.Skill{ + { + Meta: skillspkg.SkillMeta{ + Name: "qa-marker-skill", + Description: "Shows up while enabled.", + }, + Enabled: true, }, - }, - } + }) + sess := newPromptSkillsSession("sess-refresh") - sess := &session.Session{ - ID: "sess-1", - AgentName: "general", - WorkspaceID: "ws-1", - Workspace: "/tmp/ws-1", - } + first, err := augmenter(context.Background(), sess, "list current skills") + if err != nil { + t.Fatalf("augmenter(first) error = %v", err) + } + if !strings.Contains(first, `name="qa-marker-skill"`) { + t.Fatalf("first prompt = %q, want enabled skill entry", first) + } - first, err := augmenter(context.Background(), sess, "list current skills") - if err != nil { - t.Fatalf("augmenter(first) error = %v", err) - } - if !strings.Contains(first, "") { - t.Fatalf("first prompt = %q, want current catalog block", first) - } - if !strings.Contains(first, `name="qa-marker-skill"`) { - t.Fatalf("first prompt = %q, want enabled skill entry", first) - } - if !strings.HasSuffix(first, "list current skills") { - t.Fatalf("first prompt = %q, want original prompt preserved", first) - } + registry.skillsByAgent["general"] = []*skillspkg.Skill{ + { + Meta: skillspkg.SkillMeta{ + Name: "qa-marker-skill", + Description: "Now disabled.", + }, + Enabled: false, + }, + { + Meta: skillspkg.SkillMeta{ + Name: "replacement-skill", + Description: "Still enabled after refresh.", + }, + Enabled: true, + }, + } - registry.skillsByAgent["general"] = []*skillspkg.Skill{ - { - Meta: skillspkg.SkillMeta{ - Name: "qa-marker-skill", - Description: "Now disabled.", + second, err := augmenter(context.Background(), sess, "list current skills") + if err != nil { + t.Fatalf("augmenter(second) error = %v, want live refresh", err) + } + if strings.Contains(second, `name="qa-marker-skill"`) { + t.Fatalf("second prompt = %q, want disabled skill removed from current catalog", second) + } + if !strings.Contains(second, `name="replacement-skill"`) { + t.Fatalf("second prompt = %q, want refreshed enabled skill in current catalog", second) + } + if !strings.Contains( + second, + "The block above is the authoritative current skill state for this turn.", + ) { + t.Fatalf("second prompt = %q, want authoritative current-catalog guidance", second) + } + if strings.Contains(second, ``) { + t.Fatalf("second prompt = %q, want full refreshed catalog instead of unchanged marker", second) + } + if !strings.HasSuffix(second, "list current skills") { + t.Fatalf("second prompt = %q, want original prompt preserved", second) + } + }) + + t.Run("Should replay the full catalog after the ACP session changes", func(t *testing.T) { + t.Parallel() + + registry, augmenter := newPromptSkillsAugmenterForTest(t, []*skillspkg.Skill{ + { + Meta: skillspkg.SkillMeta{ + Name: "qa-marker-skill", + Description: "Shows up while enabled.", + }, + Enabled: true, }, - Enabled: false, - }, + }) + _ = registry + sess := newPromptSkillsSession("sess-resume") + sess.ACPSessionID = "acp-1" + + first, err := augmenter(context.Background(), sess, "list current skills") + if err != nil { + t.Fatalf("augmenter(first) error = %v", err) + } + if !strings.Contains(first, `name="qa-marker-skill"`) { + t.Fatalf("first prompt = %q, want enabled skill entry", first) + } + + second, err := augmenter(context.Background(), sess, "list current skills again") + if err != nil { + t.Fatalf("augmenter(second) error = %v", err) + } + if !strings.Contains(second, ``) { + t.Fatalf("second prompt = %q, want unchanged catalog marker", second) + } + + sess.ACPSessionID = "acp-2" + third, err := augmenter(context.Background(), sess, "list current skills after resume") + if err != nil { + t.Fatalf("augmenter(third) error = %v", err) + } + if strings.Contains(third, ``) { + t.Fatalf("third prompt = %q, want full catalog after ACP session reset", third) + } + if !strings.Contains(third, `name="qa-marker-skill"`) { + t.Fatalf("third prompt = %q, want enabled skill entry after ACP session reset", third) + } + if !strings.HasSuffix(third, "list current skills after resume") { + t.Fatalf("third prompt = %q, want original prompt preserved", third) + } + }) +} + +func BenchmarkSkillsCatalogAugmenterCatalogReplayModes(b *testing.B) { + registry, augmenter := newPromptSkillsAugmenterForTest(b, []*skillspkg.Skill{ { Meta: skillspkg.SkillMeta{ - Name: "replacement-skill", - Description: "Still enabled after refresh.", + Name: "qa-marker-skill", + Description: "Shows up while enabled.", }, Enabled: true, }, + }) + _ = registry + sess := newPromptSkillsSession("sess-bench") + + full, err := augmenter(context.Background(), sess, "network note") + if err != nil { + b.Fatalf("augmenter(full) error = %v", err) + } + compact, err := augmenter(context.Background(), sess, "network note") + if err != nil { + b.Fatalf("augmenter(compact) error = %v", err) + } + fullBytes := len(full) + compactBytes := len(compact) + promptSuffixBytes := len("\n\nnetwork note") + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := augmenter(context.Background(), sess, "network note"); err != nil { + b.Fatalf("augmenter(repeated) error = %v", err) + } } - resolver = &stubPromptSkillsWorkspaceResolver{ + b.ReportMetric(float64(fullBytes-promptSuffixBytes), "full-catalog-bytes/op") + b.ReportMetric(float64(compactBytes-promptSuffixBytes), "compact-catalog-bytes/op") + b.ReportMetric(float64(fullBytes-compactBytes), "saved-catalog-bytes/op") + b.ReportMetric(float64(fullBytes), "full-bytes/op") + b.ReportMetric(float64(compactBytes), "compact-bytes/op") + b.ReportMetric(float64(fullBytes-compactBytes), "saved-prompt-bytes/op") +} + +func newPromptSkillsAugmenterForTest( + t testing.TB, + skills []*skillspkg.Skill, +) (*stubPromptSkillsRegistry, session.PromptInputAugmenter) { + t.Helper() + + registry := &stubPromptSkillsRegistry{ + skillsByAgent: map[string][]*skillspkg.Skill{ + "general": skills, + }, + } + resolver := &stubPromptSkillsWorkspaceResolver{ resolved: workspacepkg.ResolvedWorkspace{ Workspace: workspacepkg.Workspace{ ID: "ws-1", @@ -88,25 +225,21 @@ func TestNewSkillsCatalogAugmenterUsesCurrentRegistryStatePerPrompt(t *testing.T }, }, } - - second, err := augmenter(context.Background(), sess, "list current skills") - if err != nil { - t.Fatalf("augmenter(second) error = %v, want live refresh", err) - } - if strings.Contains(second, `name="qa-marker-skill"`) { - t.Fatalf("second prompt = %q, want disabled skill removed from current catalog", second) - } - if !strings.Contains(second, `name="replacement-skill"`) { - t.Fatalf("second prompt = %q, want refreshed enabled skill in current catalog", second) - } - if !strings.Contains( - second, - "The block above is the authoritative current skill state for this turn.", - ) { - t.Fatalf("second prompt = %q, want authoritative current-catalog guidance", second) + augmenter := newSkillsCatalogAugmenter(registry, func() promptSkillsWorkspaceResolver { + return resolver + }) + if augmenter == nil { + t.Fatal("newSkillsCatalogAugmenter() = nil, want augmenter") } - if !strings.HasSuffix(second, "list current skills") { - t.Fatalf("second prompt = %q, want original prompt preserved", second) + return registry, augmenter +} + +func newPromptSkillsSession(sessionID string) *session.Session { + return &session.Session{ + ID: sessionID, + AgentName: "general", + WorkspaceID: "ws-1", + Workspace: "/tmp/ws-1", } } diff --git a/internal/memory/extractor/events.go b/internal/memory/extractor/events.go index 372632b23..b9d088ce2 100644 --- a/internal/memory/extractor/events.go +++ b/internal/memory/extractor/events.go @@ -16,7 +16,7 @@ const ( EventFailed = "memory.extractor.failed" // EventCoalesced records bounded queue merging for one session. EventCoalesced = "memory.extractor.coalesced" - // EventDropped records a queued extraction range dropped by the hard coalescing cap. + // EventDropped records extractor work dropped before provider execution. EventDropped = "memory.extractor.dropped" ) diff --git a/internal/memory/extractor/runtime.go b/internal/memory/extractor/runtime.go index a462bdca9..35144a850 100644 --- a/internal/memory/extractor/runtime.go +++ b/internal/memory/extractor/runtime.go @@ -15,39 +15,52 @@ import ( ) const ( - defaultCoalesceMax = 16 - actorKindSubagent = "agent_subagent" - actorKindRoot = "agent_root" - messageRoleAgent = "assistant" - sessionTypeDream = "dream" - sessionTypeSystem = "system" + defaultCoalesceMax = 16 + defaultQueueCapacity = 1 + defaultThrottleTurns = 1 + defaultThrottleFlushWait = 500 * time.Millisecond + actorKindSubagent = "agent_subagent" + actorKindRoot = "agent_root" + messageRoleAgent = "assistant" + sessionTypeDream = "dream" + sessionTypeSystem = "system" ) +var errRuntimeDraining = errors.New("memory extractor: runtime is draining") + // Runtime owns asynchronous transcript extraction and daemon inbox production. type Runtime struct { - root string - extractor memcontract.Extractor - producer *Producer - events EventSink - logger *slog.Logger - now func() time.Time - coalesceMax int - inboxPath string - workerCtx context.Context - cancelWorker context.CancelFunc - - mu sync.Mutex - sessions map[string]*sessionState - toolWrites map[string]toolWriteMarkers - droppedTurns int64 - coalescedTurns int64 - closed bool - wg sync.WaitGroup + root string + extractor memcontract.Extractor + producer *Producer + events EventSink + logger *slog.Logger + now func() time.Time + coalesceMax int + queueCapacity int + throttleTurns int + throttleFlushWait time.Duration + inboxPath string + workerCtx context.Context + cancelWorker context.CancelFunc + providerSlots chan struct{} + + mu sync.Mutex + sessions map[string]*sessionState + toolWrites map[string]toolWriteMarkers + skippedTurns int64 + droppedTurns int64 + coalescedTurns int64 + backpressuredSessions int64 + closed bool + draining bool + wg sync.WaitGroup } type sessionState struct { - inFlight bool - queued *request + inFlight bool + queued *request + flushTimer *time.Timer } type request struct { @@ -66,11 +79,14 @@ func (m toolWriteMarkers) empty() bool { // RuntimeStats exposes bounded operational state for daemon status APIs. type RuntimeStats struct { - QueuedSessions int - InFlightSessions int - DroppedTurns int64 - CoalescedTurns int64 - Closed bool + QueuedSessions int + InFlightSessions int + ActiveProviderSessions int + SkippedTurns int64 + DroppedTurns int64 + CoalescedTurns int64 + BackpressuredSessions int64 + Closed bool } // Option customizes the extractor runtime. @@ -110,6 +126,33 @@ func WithCoalesceMax(limit int) Option { } } +// WithQueueCapacity bounds concurrent provider-backed extractor sessions. +func WithQueueCapacity(limit int) Option { + return func(r *Runtime) { + if limit > 0 { + r.queueCapacity = limit + } + } +} + +// WithThrottleTurns coalesces same-session bursts before launching another extraction. +func WithThrottleTurns(limit int) Option { + return func(r *Runtime) { + if limit > 0 { + r.throttleTurns = limit + } + } +} + +// WithThrottleFlushWait overrides the idle flush wait for tests and tightly controlled runtimes. +func WithThrottleFlushWait(wait time.Duration) Option { + return func(r *Runtime) { + if wait > 0 { + r.throttleFlushWait = wait + } + } +} + // WithInboxPath overrides the default /_inbox directory. func WithInboxPath(path string) Option { return func(r *Runtime) { @@ -141,21 +184,27 @@ func NewRuntime( } workerCtx, cancel := context.WithCancel(ctx) runtime := &Runtime{ - root: clean, - extractor: extractor, - logger: slog.Default(), - now: now, - coalesceMax: defaultCoalesceMax, - workerCtx: workerCtx, - cancelWorker: cancel, - sessions: make(map[string]*sessionState), - toolWrites: make(map[string]toolWriteMarkers), + root: clean, + extractor: extractor, + logger: slog.Default(), + now: now, + coalesceMax: defaultCoalesceMax, + queueCapacity: defaultQueueCapacity, + throttleTurns: defaultThrottleTurns, + throttleFlushWait: defaultThrottleFlushWait, + workerCtx: workerCtx, + cancelWorker: cancel, + sessions: make(map[string]*sessionState), + toolWrites: make(map[string]toolWriteMarkers), } for _, opt := range opts { if opt != nil { opt(runtime) } } + if runtime.queueCapacity > 0 { + runtime.providerSlots = make(chan struct{}, runtime.queueCapacity) + } producer, err := NewProducer(clean, runtime.now, WithProducerInboxPath(runtime.inboxPath)) if err != nil { cancel() @@ -293,52 +342,114 @@ func (r *Runtime) Enqueue(ctx context.Context, turn memcontract.TurnRecord) erro r.mu.Unlock() return errors.New("memory extractor: runtime is closed") } - state := r.sessions[req.turn.SessionID] + if r.draining { + r.mu.Unlock() + return errRuntimeDraining + } + if !turnHasExtractableContent(req.turn) { + r.skippedTurns++ + r.mu.Unlock() + r.recordEvent(ctx, Event{ + Op: EventDropped, + Turn: req.turn, + Metadata: map[string]string{ + "message_count": strconv.Itoa(len(req.turn.Snapshot.Messages)), + "reason": "empty_snapshot", + }, + }) + return nil + } + sessionID := req.turn.SessionID + state := r.sessions[sessionID] if state == nil { state = &sessionState{} - r.sessions[req.turn.SessionID] = state + r.sessions[sessionID] = state } - if !state.inFlight { + if !state.inFlight && state.queued == nil { state.inFlight = true r.wg.Add(1) r.mu.Unlock() - go r.runSession(req.turn.SessionID, req) + go r.runSession(sessionID, req) return nil } + event := r.queueRequestLocked(state, req) + if !state.inFlight && state.queued != nil && r.queuedReady(*state.queued) { + next := *state.queued + state.queued = nil + r.stopThrottleFlushLocked(state) + state.inFlight = true + r.wg.Add(1) + r.mu.Unlock() + r.recordQueuedEvent(ctx, event) + go r.runSession(sessionID, next) + return nil + } + if !state.inFlight && state.queued != nil { + r.scheduleThrottleFlushLocked(sessionID, state) + } + r.mu.Unlock() + r.recordQueuedEvent(ctx, event) + return nil +} + +func (r *Runtime) queueRequestLocked(state *sessionState, req request) *Event { if state.queued == nil { queued := req state.queued = &queued - r.mu.Unlock() return nil } - if state.queued.coalesceCount+1 > r.coalesceMax { + if state.queued.coalesceCount+2 > r.coalesceMax { dropped := *state.queued queued := req state.queued = &queued r.droppedTurns++ - r.mu.Unlock() - r.recordEvent(ctx, Event{ + return &Event{ Op: EventDropped, Turn: dropped.turn, Metadata: map[string]string{ "dropped_until_message_seq": strconv.FormatInt(dropped.turn.UntilMessageSeq, 10), "retained_until_message_seq": strconv.FormatInt(req.turn.UntilMessageSeq, 10), }, - }) - return nil + } } merged := mergeRequests(*state.queued, req) state.queued = &merged r.coalescedTurns++ - r.mu.Unlock() - r.recordEvent(ctx, Event{ + return &Event{ Op: EventCoalesced, Turn: merged.turn, Metadata: map[string]string{ "coalesced_count": strconv.Itoa(merged.coalesceCount), }, + } +} + +func (r *Runtime) recordQueuedEvent(ctx context.Context, event *Event) { + if event == nil { + return + } + r.recordEvent(ctx, *event) +} + +func (r *Runtime) queuedReady(req request) bool { + return r.throttleTurns <= 1 || req.coalesceCount+1 >= r.throttleTurns +} + +func (r *Runtime) scheduleThrottleFlushLocked(sessionID string, state *sessionState) { + if state == nil || state.flushTimer != nil || r.throttleTurns <= 1 || r.draining { + return + } + state.flushTimer = time.AfterFunc(r.throttleFlushWait, func() { + r.flushSession(sessionID) }) - return nil +} + +func (r *Runtime) stopThrottleFlushLocked(state *sessionState) { + if state == nil || state.flushTimer == nil { + return + } + state.flushTimer.Stop() + state.flushTimer = nil } // Stats returns current queue counters without blocking workers. @@ -349,9 +460,12 @@ func (r *Runtime) Stats() RuntimeStats { r.mu.Lock() defer r.mu.Unlock() stats := RuntimeStats{ - DroppedTurns: r.droppedTurns, - CoalescedTurns: r.coalescedTurns, - Closed: r.closed, + ActiveProviderSessions: len(r.providerSlots), + SkippedTurns: r.skippedTurns, + DroppedTurns: r.droppedTurns, + CoalescedTurns: r.coalescedTurns, + BackpressuredSessions: r.backpressuredSessions, + Closed: r.closed, } for _, state := range r.sessions { if state == nil { @@ -375,8 +489,31 @@ func (r *Runtime) Drain(ctx context.Context) error { if ctx == nil { return errors.New("memory extractor: drain context is required") } - if err := r.waitWorkers(ctx); err != nil { - return err + r.mu.Lock() + if r.draining { + r.mu.Unlock() + return errRuntimeDraining + } + r.draining = true + for _, state := range r.sessions { + r.stopThrottleFlushLocked(state) + } + r.mu.Unlock() + defer func() { + r.mu.Lock() + r.draining = false + r.mu.Unlock() + }() + for { + if err := r.flushQueuedSessions(ctx); err != nil { + return err + } + if err := r.waitWorkers(ctx); err != nil { + return err + } + if !r.hasPendingWork() { + break + } } if err := r.extractor.Drain(ctx); err != nil { return fmt.Errorf("memory extractor: drain provider: %w", err) @@ -422,25 +559,100 @@ func (r *Runtime) waitWorkers(ctx context.Context) error { } } +func (r *Runtime) flushQueuedSessions(ctx context.Context) error { + if err := ctx.Err(); err != nil { + return fmt.Errorf("memory extractor: flush queued sessions: %w", err) + } + starters := make(map[string]request) + r.mu.Lock() + for sessionID, state := range r.sessions { + if state == nil || state.inFlight || state.queued == nil { + continue + } + r.stopThrottleFlushLocked(state) + starters[sessionID] = *state.queued + state.queued = nil + state.inFlight = true + r.wg.Add(1) + } + r.mu.Unlock() + for sessionID, req := range starters { + go r.runSession(sessionID, req) + } + return nil +} + +func (r *Runtime) flushSession(sessionID string) { + r.mu.Lock() + state := r.sessions[sessionID] + if r.closed || r.draining || state == nil || state.inFlight || state.queued == nil { + if state != nil { + state.flushTimer = nil + } + r.mu.Unlock() + return + } + current := *state.queued + state.queued = nil + state.flushTimer = nil + state.inFlight = true + r.wg.Add(1) + r.mu.Unlock() + go r.runSession(sessionID, current) +} + +func (r *Runtime) hasPendingWork() bool { + r.mu.Lock() + defer r.mu.Unlock() + return len(r.sessions) > 0 +} + func (r *Runtime) runSession(sessionID string, req request) { defer r.wg.Done() current := req for { - r.process(current) + if !r.process(current) { + r.clearSession(sessionID) + return + } r.mu.Lock() state := r.sessions[sessionID] if state == nil || state.queued == nil { + if state != nil { + r.stopThrottleFlushLocked(state) + } delete(r.sessions, sessionID) r.mu.Unlock() return } + if !r.queuedReady(*state.queued) { + state.inFlight = false + r.scheduleThrottleFlushLocked(sessionID, state) + r.mu.Unlock() + return + } current = *state.queued state.queued = nil r.mu.Unlock() } } -func (r *Runtime) process(req request) { +func (r *Runtime) clearSession(sessionID string) { + r.mu.Lock() + state := r.sessions[sessionID] + if state != nil { + r.stopThrottleFlushLocked(state) + } + delete(r.sessions, sessionID) + r.mu.Unlock() +} + +func (r *Runtime) process(req request) bool { + release, ok := r.acquireProviderSlot() + if !ok { + return false + } + defer release() r.recordEvent(r.workerCtx, Event{ Op: EventStarted, Turn: req.turn, @@ -452,13 +664,13 @@ func (r *Runtime) process(req request) { if err != nil { r.recordEvent(r.workerCtx, Event{Op: EventFailed, Turn: req.turn, Error: err.Error()}) r.logger.Warn("memory extractor: extract failed", "session_id", req.turn.SessionID, "error", err) - return + return true } path, count, err := r.producer.Write(r.workerCtx, req.turn, candidates) if err != nil { r.recordEvent(r.workerCtx, Event{Op: EventFailed, Turn: req.turn, Error: err.Error()}) r.logger.Warn("memory extractor: write inbox failed", "session_id", req.turn.SessionID, "error", err) - return + return true } r.recordEvent(r.workerCtx, Event{ Op: EventCompleted, @@ -468,6 +680,34 @@ func (r *Runtime) process(req request) { "candidate_count": strconv.Itoa(count), }, }) + return true +} + +func (r *Runtime) acquireProviderSlot() (func(), bool) { + if r.providerSlots == nil { + return func() {}, true + } + select { + case r.providerSlots <- struct{}{}: + return r.releaseProviderSlot, true + default: + } + r.mu.Lock() + r.backpressuredSessions++ + r.mu.Unlock() + select { + case r.providerSlots <- struct{}{}: + return r.releaseProviderSlot, true + case <-r.workerCtx.Done(): + return nil, false + } +} + +func (r *Runtime) releaseProviderSlot() { + select { + case <-r.providerSlots: + default: + } } func (r *Runtime) recordEvent(ctx context.Context, event Event) { @@ -529,6 +769,15 @@ func normalizeTurn(turn memcontract.TurnRecord, now func() time.Time) (memcontra return turn, nil } +func turnHasExtractableContent(turn memcontract.TurnRecord) bool { + for _, message := range turn.Snapshot.Messages { + if strings.TrimSpace(message.Content) != "" { + return true + } + } + return false +} + func mergeRequests(existing request, next request) request { merged := existing if next.turn.SinceMessageSeq < merged.turn.SinceMessageSeq { diff --git a/internal/memory/extractor/runtime_test.go b/internal/memory/extractor/runtime_test.go index 796758590..b8d9f4b14 100644 --- a/internal/memory/extractor/runtime_test.go +++ b/internal/memory/extractor/runtime_test.go @@ -115,6 +115,124 @@ func TestRuntime(t *testing.T) { } }) + t.Run("Should backpressure provider sessions with queue capacity without dropping turns", func(t *testing.T) { + t.Parallel() + + fake := newFakeExtractor() + fake.blockFirst() + runtime := newTestRuntime(t, t.TempDir(), fake, nil, extractor.WithQueueCapacity(1)) + + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-pressure-a", 1)); err != nil { + t.Fatalf("Enqueue(first session) error = %v", err) + } + fake.waitStarted(t) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-pressure-b", 2)); err != nil { + t.Fatalf("Enqueue(second session) error = %v", err) + } + waitRuntimeStats(t, runtime, func(stats extractor.RuntimeStats) bool { + return stats.BackpressuredSessions == 1 && stats.InFlightSessions == 2 + }) + if turns := fake.turns(); len(turns) != 1 { + t.Fatalf("turns before release = %#v, want only the active provider turn", turns) + } + + fake.release() + if err := runtime.Drain(testutil.Context(t)); err != nil { + t.Fatalf("Drain() error = %v", err) + } + turns := fake.turns() + if len(turns) != 2 { + t.Fatalf("turns after drain = %#v, want both sessions preserved", turns) + } + if turns[0].SessionID != "sess-pressure-a" || turns[1].SessionID != "sess-pressure-b" { + t.Fatalf("turn order = %#v, want queued provider work after the active session", turns) + } + }) + + t.Run("Should throttle same-session bursts by coalescing queued turns until ready", func(t *testing.T) { + t.Parallel() + + fake := newFakeExtractor() + fake.blockFirst() + runtime := newTestRuntime( + t, + t.TempDir(), + fake, + nil, + extractor.WithThrottleTurns(3), + extractor.WithThrottleFlushWait(time.Hour), + ) + + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle", 1)); err != nil { + t.Fatalf("Enqueue(1) error = %v", err) + } + fake.waitStarted(t) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle", 2)); err != nil { + t.Fatalf("Enqueue(2) error = %v", err) + } + fake.release() + waitRuntimeStats(t, runtime, func(stats extractor.RuntimeStats) bool { + return stats.InFlightSessions == 0 && stats.QueuedSessions == 1 + }) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle", 3)); err != nil { + t.Fatalf("Enqueue(3) error = %v", err) + } + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle", 4)); err != nil { + t.Fatalf("Enqueue(4) error = %v", err) + } + if err := runtime.Drain(testutil.Context(t)); err != nil { + t.Fatalf("Drain() error = %v", err) + } + + turns := fake.turns() + if len(turns) != 2 { + t.Fatalf("turns = %#v, want initial + throttled merged turn", turns) + } + if turns[1].SinceMessageSeq != 2 || turns[1].UntilMessageSeq != 4 { + t.Fatalf("throttled range = %d..%d, want 2..4", turns[1].SinceMessageSeq, turns[1].UntilMessageSeq) + } + if len(turns[1].Snapshot.Messages) != 3 { + t.Fatalf("throttled messages = %#v, want all queued messages preserved", turns[1].Snapshot.Messages) + } + }) + + t.Run("Should drain throttled turns before the threshold without silently dropping content", func(t *testing.T) { + t.Parallel() + + fake := newFakeExtractor() + fake.blockFirst() + runtime := newTestRuntime( + t, + t.TempDir(), + fake, + nil, + extractor.WithThrottleTurns(3), + extractor.WithThrottleFlushWait(time.Hour), + ) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle-drain", 1)); err != nil { + t.Fatalf("Enqueue(1) error = %v", err) + } + fake.waitStarted(t) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-throttle-drain", 2)); err != nil { + t.Fatalf("Enqueue(2) error = %v", err) + } + fake.release() + waitRuntimeStats(t, runtime, func(stats extractor.RuntimeStats) bool { + return stats.InFlightSessions == 0 && stats.QueuedSessions == 1 + }) + if err := runtime.Drain(testutil.Context(t)); err != nil { + t.Fatalf("Drain() error = %v", err) + } + + turns := fake.turns() + if len(turns) != 2 { + t.Fatalf("turns = %#v, want drain to flush the throttled queued turn", turns) + } + if turns[1].UntilMessageSeq != 2 || turns[1].Snapshot.Messages[0].Content != "message 2" { + t.Fatalf("drained turn = %#v, want queued non-empty content preserved", turns[1]) + } + }) + t.Run("Should drop the oldest queued range after the coalescing cap", func(t *testing.T) { t.Parallel() @@ -143,8 +261,41 @@ func TestRuntime(t *testing.T) { if turns[1].SinceMessageSeq != 4 || turns[1].UntilMessageSeq != 4 { t.Fatalf("retained range = %d..%d, want newest 4..4", turns[1].SinceMessageSeq, turns[1].UntilMessageSeq) } - if !events.containsOp(extractor.EventDropped) { - t.Fatalf("events = %#v, want %s", events.ops(), extractor.EventDropped) + if dropped := events.eventsForOp(extractor.EventDropped); len(dropped) != 2 { + t.Fatalf("dropped events = %#v, want one drop per overflowed queued turn", dropped) + } + if events.containsOp(extractor.EventCoalesced) { + t.Fatalf("events = %#v, want no coalescing after the configured cap", events.ops()) + } + }) + + t.Run("Should reject new work while drain is in progress", func(t *testing.T) { + t.Parallel() + + fake := newFakeExtractor() + fake.blockFirst() + runtime := newTestRuntime(t, t.TempDir(), fake, nil) + if err := runtime.Enqueue(testutil.Context(t), testTurn("sess-drain", 1)); err != nil { + t.Fatalf("Enqueue() error = %v", err) + } + fake.waitStarted(t) + + drainErr := make(chan error, 1) + go func() { + drainErr <- runtime.Drain(testutil.Context(t)) + }() + + drainingTurn := testTurn("sess-drain-rejected", 2) + drainingTurn.Snapshot.Messages[0].Content = " \n\t " + waitUntilDrainBlocksEnqueue(t, runtime, drainingTurn) + + fake.release() + if err := <-drainErr; err != nil { + t.Fatalf("Drain() error = %v", err) + } + turns := fake.turns() + if len(turns) != 1 || turns[0].UntilMessageSeq != 1 { + t.Fatalf("turns after drain = %#v, want only the original in-flight turn", turns) } }) @@ -187,6 +338,39 @@ func TestRuntime(t *testing.T) { } }) + t.Run("Should skip persisted messages without extractable content", func(t *testing.T) { + t.Parallel() + + fake := newFakeExtractor() + events := &recordingEventSink{} + runtime := newTestRuntime(t, t.TempDir(), fake, events) + payload := testPersistedPayload("sess-empty", 1) + payload.Text = " \n\t " + if err := runtime.HandleSessionMessagePersisted(testutil.Context(t), payload); err != nil { + t.Fatalf("HandleSessionMessagePersisted(empty text) error = %v", err) + } + if err := runtime.Drain(testutil.Context(t)); err != nil { + t.Fatalf("Drain() error = %v", err) + } + if turns := fake.turns(); len(turns) != 0 { + t.Fatalf("extracted turns = %#v, want no extraction for empty persisted text", turns) + } + if events.containsOp(extractor.EventStarted) { + t.Fatalf("events = %#v, want no extractor start for empty persisted text", events.ops()) + } + stats := runtime.Stats() + if stats.SkippedTurns != 1 { + t.Fatalf("Stats().SkippedTurns = %d, want 1", stats.SkippedTurns) + } + dropped := events.eventsForOp(extractor.EventDropped) + if len(dropped) != 1 { + t.Fatalf("dropped events = %#v, want one skipped-turn event", dropped) + } + if dropped[0].Metadata["reason"] != "empty_snapshot" || dropped[0].Metadata["message_count"] != "1" { + t.Fatalf("dropped metadata = %#v, want empty_snapshot skip without content", dropped[0].Metadata) + } + }) + t.Run("Should consume multiple pending tool write markers independently", func(t *testing.T) { t.Parallel() @@ -776,6 +960,64 @@ func missingContext() context.Context { return nil } +func waitRuntimeStats( + t *testing.T, + runtime *extractor.Runtime, + match func(extractor.RuntimeStats) bool, +) extractor.RuntimeStats { + t.Helper() + timeout := 5 * time.Second + if testDeadline, ok := t.Deadline(); ok { + if remaining := time.Until(testDeadline) / 2; remaining > 0 && remaining < timeout { + timeout = remaining + } + } + deadline := time.NewTimer(timeout) + defer deadline.Stop() + tick := time.NewTicker(10 * time.Millisecond) + defer tick.Stop() + for { + stats := runtime.Stats() + if match(stats) { + return stats + } + select { + case <-deadline.C: + t.Fatalf("timed out waiting for runtime stats, last stats = %#v", stats) + case <-tick.C: + } + } +} + +func waitUntilDrainBlocksEnqueue( + t *testing.T, + runtime *extractor.Runtime, + turn memcontract.TurnRecord, +) { + t.Helper() + timeout := time.NewTimer(5 * time.Second) + defer timeout.Stop() + tick := time.NewTicker(10 * time.Millisecond) + defer tick.Stop() + + for { + err := runtime.Enqueue(testutil.Context(t), turn) + switch { + case err == nil: + case err.Error() == "memory extractor: runtime is draining": + return + default: + t.Fatalf("Enqueue() while waiting for drain barrier error = %v", err) + } + + select { + case <-timeout.C: + t.Fatalf("timed out waiting for drain to reject new work") + case <-tick.C: + } + } +} + func TestEvent(t *testing.T) { t.Parallel() @@ -916,6 +1158,18 @@ func (s *recordingEventSink) containsOp(op string) bool { return slices.Contains(s.ops(), op) } +func (s *recordingEventSink) eventsForOp(op string) []extractor.Event { + s.mu.Lock() + defer s.mu.Unlock() + events := make([]extractor.Event, 0) + for _, event := range s.events { + if event.Op == op { + events = append(events, event) + } + } + return events +} + func (s *recordingEventSink) ops() []string { s.mu.Lock() defer s.mu.Unlock() diff --git a/internal/network/delivery.go b/internal/network/delivery.go index f55ce21b1..06fe3be48 100644 --- a/internal/network/delivery.go +++ b/internal/network/delivery.go @@ -11,6 +11,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "github.com/compozy/agh/internal/acp" @@ -45,6 +46,7 @@ const ( deliveryDropReasonQueueFull = "queue_overflow" networkMessageTrustUntrusted = "untrusted" protocolGuidanceDirectRoomText = "Direct-room chat uses `--kind say --surface direct`." + compactReplyGuidanceText = "Full protocol examples were already provided earlier in this session; run `agh network --help` for command details." ) type deliveryPrompter interface { @@ -71,10 +73,12 @@ type deliveryCoordinator struct { retryMaxDelay time.Duration scheduleRetry deliveryRetryScheduler - mu sync.Mutex - queues map[string]*inboundQueue - inFlight map[string]queuedEnvelope - waiters map[string][]chan struct{} + mu sync.Mutex + queues map[string]*inboundQueue + inFlight map[string]queuedEnvelope + waiters map[string][]chan struct{} + guidance map[string]deliveryGuidanceState + sessionTokens atomic.Uint64 deliveries sync.Map wg sync.WaitGroup @@ -90,6 +94,7 @@ type deliveryState struct { type inboundQueue struct { mu sync.Mutex maxDepth int + token uint64 items []queuedEnvelope } @@ -104,8 +109,21 @@ type queuedEnvelope struct { AcceptedAt time.Time DeliveryMode string RetryAttempt int + SessionToken uint64 +} + +type deliveryGuidanceState struct { + replyDelivered bool + protocolDelivered bool } +type networkMessageGuidanceMode int + +const ( + networkMessageGuidanceVerbose networkMessageGuidanceMode = iota + networkMessageGuidanceCompact +) + type deliveryCoordinatorStats struct { QueuedMessages int QueuedSessions int @@ -175,6 +193,7 @@ func newDeliveryCoordinator( queues: make(map[string]*inboundQueue), inFlight: make(map[string]queuedEnvelope), waiters: make(map[string][]chan struct{}), + guidance: make(map[string]deliveryGuidanceState), } for _, opt := range opts { if opt != nil { @@ -372,6 +391,8 @@ func (c *deliveryCoordinator) dropSession(sessionID string) { defer c.mu.Unlock() target := strings.TrimSpace(sessionID) delete(c.queues, target) + delete(c.inFlight, target) + delete(c.guidance, target) waiters := c.waiters[target] delete(c.waiters, target) for _, waiter := range waiters { @@ -436,11 +457,54 @@ func (c *deliveryCoordinator) queueForSession(sessionID string) *inboundQueue { return queue } - queue := newInboundQueue(c.maxQueueDepth) + queue := newInboundQueueWithToken(c.maxQueueDepth, c.sessionTokens.Add(1)) c.queues[target] = queue return queue } +func (c *deliveryCoordinator) guidanceModeForDelivery( + sessionID string, + envelope Envelope, +) networkMessageGuidanceMode { + target := strings.TrimSpace(sessionID) + if target == "" { + return networkMessageGuidanceVerbose + } + + c.mu.Lock() + defer c.mu.Unlock() + + state := c.guidance[target] + if !state.replyDelivered { + return networkMessageGuidanceVerbose + } + if lifecycleWorkID(envelope) != "" && !state.protocolDelivered { + return networkMessageGuidanceVerbose + } + return networkMessageGuidanceCompact +} + +func (c *deliveryCoordinator) markGuidanceDelivered(sessionID string, item queuedEnvelope) { + target := strings.TrimSpace(sessionID) + if target == "" { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + queue := c.queues[target] + if queue == nil || queue.token != item.SessionToken { + return + } + + state := c.guidance[target] + state.replyDelivered = true + if lifecycleWorkID(item.Envelope) != "" { + state.protocolDelivered = true + } + c.guidance[target] = state +} + func (c *deliveryCoordinator) trigger(sessionID string) { if c == nil { return @@ -489,7 +553,7 @@ func (c *deliveryCoordinator) processQueuedItem(sessionID string, item queuedEnv c.markInFlight(sessionID, item) envelope := item.Envelope - message, err := formatNetworkMessage(envelope) + message, err := formatNetworkMessageWithGuidance(envelope, c.guidanceModeForDelivery(sessionID, envelope)) if err != nil { c.handleRenderFailure(sessionID, item, state, err) return false @@ -552,7 +616,7 @@ func (c *deliveryCoordinator) handleRenderFailure( ) { c.clearInFlight(sessionID) item = item.withNextRetryAttempt() - c.requeueFront(sessionID, item) + requeued := c.requeueFront(sessionID, item) c.logger.Warn( "network.message.render_failed", "session_id", sessionID, @@ -560,7 +624,7 @@ func (c *deliveryCoordinator) handleRenderFailure( "error", err, "retry_attempt", item.RetryAttempt, ) - if json.Valid(item.Envelope.Body) { + if requeued && json.Valid(item.Envelope.Body) { c.retryAfterWorkerExit(sessionID, item, state) } } @@ -573,7 +637,7 @@ func (c *deliveryCoordinator) handleDeliveryFailure( ) { c.clearInFlight(sessionID) item = item.withNextRetryAttempt() - c.requeueFront(sessionID, item) + requeued := c.requeueFront(sessionID, item) c.logger.Warn( "network.message.delivery_failed", "session_id", sessionID, @@ -581,7 +645,9 @@ func (c *deliveryCoordinator) handleDeliveryFailure( "error", err, "retry_attempt", item.RetryAttempt, ) - c.retryAfterWorkerExit(sessionID, item, state) + if requeued { + c.retryAfterWorkerExit(sessionID, item, state) + } } func (c *deliveryCoordinator) handleInterruptedDelivery(sessionID string, item queuedEnvelope) { @@ -599,6 +665,7 @@ func (c *deliveryCoordinator) handleInterruptedDelivery(sessionID string, item q func (c *deliveryCoordinator) finishDeliveredMessage(sessionID string, item queuedEnvelope) { c.clearInFlight(sessionID) + c.markGuidanceDelivered(sessionID, item) latency := max(c.now().Sub(item.AcceptedAt), 0) @@ -643,14 +710,15 @@ func (c *deliveryCoordinator) dequeue(sessionID string) (queuedEnvelope, bool) { return queue.dequeue() } -func (c *deliveryCoordinator) requeueFront(sessionID string, item queuedEnvelope) { +func (c *deliveryCoordinator) requeueFront(sessionID string, item queuedEnvelope) bool { c.mu.Lock() queue := c.queues[strings.TrimSpace(sessionID)] c.mu.Unlock() - if queue == nil { - return + if queue == nil || queue.token != item.SessionToken { + return false } queue.prepend(item) + return true } func (c *deliveryCoordinator) retryAfterWorkerExit(sessionID string, item queuedEnvelope, state *deliveryState) { @@ -810,7 +878,11 @@ func (c *deliveryCoordinator) clearInFlight(sessionID string) { } func newInboundQueue(maxDepth int) *inboundQueue { - return &inboundQueue{maxDepth: maxDepth} + return newInboundQueueWithToken(maxDepth, 0) +} + +func newInboundQueueWithToken(maxDepth int, token uint64) *inboundQueue { + return &inboundQueue{maxDepth: maxDepth, token: token} } func (q *inboundQueue) enqueue(envelope Envelope, acceptedAt time.Time, prompting bool) enqueueResult { @@ -833,6 +905,7 @@ func (q *inboundQueue) enqueue(envelope Envelope, acceptedAt time.Time, promptin Envelope: cloneEnvelope(envelope), AcceptedAt: acceptedAt.UTC(), DeliveryMode: deliveryMode, + SessionToken: q.token, }) return enqueueResult{ @@ -891,6 +964,13 @@ func (q *inboundQueue) len() int { } func formatNetworkMessage(envelope Envelope) (string, error) { + return formatNetworkMessageWithGuidance(envelope, networkMessageGuidanceVerbose) +} + +func formatNetworkMessageWithGuidance( + envelope Envelope, + guidanceMode networkMessageGuidanceMode, +) (string, error) { body, err := envelope.DecodeBody() preview := "" @@ -960,12 +1040,16 @@ func formatNetworkMessage(envelope Envelope) (string, error) { builder.WriteString(encodedBody) builder.WriteString("\n") builder.WriteString("") - writeReplyGuidance(&builder, envelope) + writeReplyGuidance(&builder, envelope, guidanceMode) return builder.String(), nil } -func writeReplyGuidance(builder *strings.Builder, envelope Envelope) { +func writeReplyGuidance( + builder *strings.Builder, + envelope Envelope, + guidanceMode networkMessageGuidanceMode, +) { if builder == nil { return } @@ -988,6 +1072,10 @@ func writeReplyGuidance(builder *strings.Builder, envelope Envelope) { for _, line := range protocolGuidanceText { writeGuidanceLine(builder, line) } + if guidanceMode == networkMessageGuidanceCompact { + writeCompactReplyGuidance(builder) + return + } writeGuidanceLine(builder, "Examples:") writeGuidanceLine(builder, "```bash") @@ -1001,6 +1089,10 @@ func writeReplyGuidance(builder *strings.Builder, envelope Envelope) { builder.WriteString("See `agh network --help` for options.") } +func writeCompactReplyGuidance(builder *strings.Builder) { + writeGuidanceLine(builder, compactReplyGuidanceText) +} + type replyGuidanceContext struct { envelope Envelope reuseWork bool @@ -1285,5 +1377,6 @@ func cloneQueuedEnvelope(item queuedEnvelope) queuedEnvelope { AcceptedAt: item.AcceptedAt, DeliveryMode: item.DeliveryMode, RetryAttempt: item.RetryAttempt, + SessionToken: item.SessionToken, } } diff --git a/internal/network/delivery_test.go b/internal/network/delivery_test.go index fc5e4ccce..40af3317f 100644 --- a/internal/network/delivery_test.go +++ b/internal/network/delivery_test.go @@ -377,6 +377,260 @@ func TestFormatNetworkMessageSayGuidanceKeepsCurrentThreadByDefault(t *testing.T } } +func TestDeliveryCoordinatorCompactsReplyGuidanceAfterFirstDelivery(t *testing.T) { + t.Parallel() + + t.Run("Should keep verbose examples only for the first successful delivery per session", func(t *testing.T) { + t.Parallel() + + ctx := t.Context() + prompter := newFakeDeliveryPrompter() + coordinator, err := newDeliveryCoordinator(ctx, 4, prompter) + if err != nil { + t.Fatalf("newDeliveryCoordinator() error = %v", err) + } + + first := testDeliveryEnvelope(t, "msg-guidance-1", "first guidance message") + first.WorkID = new("work_guidance_01") + first.TraceID = new("trace-guidance-01") + second := testDeliveryEnvelope(t, "msg-guidance-2", "second guidance message") + second.WorkID = new("work_guidance_01") + second.TraceID = new("trace-guidance-01") + + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance", + Envelope: first, + }); err != nil { + t.Fatalf("acceptOne(first) error = %v", err) + } + prompter.waitForCalls(t, 1) + + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance", + Envelope: second, + }); err != nil { + t.Fatalf("acceptOne(second) error = %v", err) + } + + firstCall := prompter.call(0) + for _, snippet := range []string{ + "Examples:", + "# Protocol receipt", + "# Protocol trace", + "# Protocol capability", + `--reply-to "msg-guidance-1"`, + `--trace-id "trace-guidance-01"`, + } { + if !strings.Contains(firstCall.message, snippet) { + t.Fatalf("first delivery missing verbose guidance snippet %q:\n%s", snippet, firstCall.message) + } + } + + prompter.finishCall(0, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + prompter.waitForCalls(t, 2) + + secondCall := prompter.call(1) + for _, snippet := range []string{ + "second guidance message", + `--reply-to "msg-guidance-2"`, + `--trace-id "trace-guidance-01"`, + compactReplyGuidanceText, + "Direct-room chat uses `--kind say --surface direct`.", + } { + if !strings.Contains(secondCall.message, snippet) { + t.Fatalf( + "second delivery missing compact guidance snippet %q:\n%s", + snippet, + secondCall.message, + ) + } + } + for _, snippet := range []string{ + "Examples:", + "# Protocol receipt", + "# Protocol trace", + "# Protocol capability", + `"capability":{"id":"reply-workflow"`, + } { + if strings.Contains(secondCall.message, snippet) { + t.Fatalf( + "second delivery unexpectedly repeated verbose guidance snippet %q:\n%s", + snippet, + secondCall.message, + ) + } + } + + prompter.finishCall(1, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + coordinator.wait() + }) + + t.Run("Should keep protocol examples until a lifecycle delivery succeeds", func(t *testing.T) { + t.Parallel() + + ctx := t.Context() + prompter := newFakeDeliveryPrompter() + coordinator, err := newDeliveryCoordinator(ctx, 4, prompter) + if err != nil { + t.Fatalf("newDeliveryCoordinator() error = %v", err) + } + + first := testDeliveryEnvelope(t, "msg-guidance-non-work", "non lifecycle message") + second := testDeliveryEnvelope(t, "msg-guidance-work", "lifecycle message") + second.WorkID = new("work_guidance_02") + + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-lifecycle-guidance", + Envelope: first, + }); err != nil { + t.Fatalf("acceptOne(first) error = %v", err) + } + prompter.waitForCalls(t, 1) + prompter.finishCall(0, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-lifecycle-guidance", + Envelope: second, + }); err != nil { + t.Fatalf("acceptOne(second) error = %v", err) + } + prompter.waitForCalls(t, 2) + + secondCall := prompter.call(1) + for _, snippet := range []string{ + "Examples:", + "# Protocol receipt", + "# Protocol trace", + "# Protocol capability", + `--work "work_guidance_02"`, + `--reply-to "msg-guidance-work"`, + } { + if !strings.Contains(secondCall.message, snippet) { + t.Fatalf( + "lifecycle delivery missing deferred protocol guidance snippet %q:\n%s", + snippet, + secondCall.message, + ) + } + } + + prompter.finishCall(1, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + coordinator.wait() + }) + + t.Run("Should reset guidance state when the session is dropped", func(t *testing.T) { + t.Parallel() + + ctx := t.Context() + prompter := newFakeDeliveryPrompter() + coordinator, err := newDeliveryCoordinator(ctx, 4, prompter) + if err != nil { + t.Fatalf("newDeliveryCoordinator() error = %v", err) + } + + first := testDeliveryEnvelope(t, "msg-guidance-drop-1", "first before drop") + first.WorkID = new("work_guidance_drop") + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance-drop", + Envelope: first, + }); err != nil { + t.Fatalf("acceptOne(first) error = %v", err) + } + prompter.waitForCalls(t, 1) + prompter.finishCall(0, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + coordinator.wait() + + coordinator.dropSession("sess-guidance-drop") + + second := testDeliveryEnvelope(t, "msg-guidance-drop-2", "first after drop") + second.WorkID = new("work_guidance_drop") + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance-drop", + Envelope: second, + }); err != nil { + t.Fatalf("acceptOne(second) error = %v", err) + } + prompter.waitForCalls(t, 2) + + secondCall := prompter.call(1) + for _, snippet := range []string{ + "Examples:", + "# Protocol receipt", + "# Protocol trace", + "# Protocol capability", + `--reply-to "msg-guidance-drop-2"`, + `--work "work_guidance_drop"`, + } { + if !strings.Contains(secondCall.message, snippet) { + t.Fatalf( + "delivery after drop missing reset verbose guidance snippet %q:\n%s", + snippet, + secondCall.message, + ) + } + } + + prompter.finishCall(1, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + coordinator.wait() + }) + + t.Run("Should ignore stale in-flight guidance completion after the session is dropped", func(t *testing.T) { + t.Parallel() + + ctx := t.Context() + prompter := newFakeDeliveryPrompter() + coordinator, err := newDeliveryCoordinator(ctx, 4, prompter) + if err != nil { + t.Fatalf("newDeliveryCoordinator() error = %v", err) + } + + first := testDeliveryEnvelope(t, "msg-guidance-stale-1", "first before drop") + first.WorkID = new("work_guidance_stale") + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance-stale", + Envelope: first, + }); err != nil { + t.Fatalf("acceptOne(first) error = %v", err) + } + prompter.waitForCalls(t, 1) + + coordinator.dropSession("sess-guidance-stale") + + second := testDeliveryEnvelope(t, "msg-guidance-stale-2", "first after drop") + second.WorkID = new("work_guidance_stale") + if err := coordinator.acceptOne(ctx, Delivery{ + SessionID: "sess-guidance-stale", + Envelope: second, + }); err != nil { + t.Fatalf("acceptOne(second) error = %v", err) + } + + prompter.finishCall(0, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + prompter.waitForCalls(t, 2) + + secondCall := prompter.call(1) + for _, snippet := range []string{ + "Examples:", + "# Protocol receipt", + "# Protocol trace", + "# Protocol capability", + `--reply-to "msg-guidance-stale-2"`, + `--work "work_guidance_stale"`, + } { + if !strings.Contains(secondCall.message, snippet) { + t.Fatalf( + "delivery after stale completion missing reset verbose guidance snippet %q:\n%s", + snippet, + secondCall.message, + ) + } + } + + prompter.finishCall(1, acp.AgentEvent{Type: acp.EventTypeDone, Timestamp: time.Now().UTC()}) + coordinator.wait() + }) +} + func TestDeliveryCoordinatorIdleAndBusyBehavior(t *testing.T) { t.Parallel() diff --git a/internal/network/perf_bench_test.go b/internal/network/perf_bench_test.go index 01b838c6e..ceb16f70a 100644 --- a/internal/network/perf_bench_test.go +++ b/internal/network/perf_bench_test.go @@ -8,30 +8,7 @@ import ( ) func BenchmarkFormatNetworkMessageDirect(b *testing.B) { - envelope := Envelope{ - Protocol: ProtocolV0, - WorkspaceID: testWorkspaceID, - ID: "msg-bench-direct", - Kind: KindSay, - Channel: "builders", - Surface: new(SurfaceDirect), - DirectID: new(testDirectRef().DirectID), - From: "coder.sess-bench", - To: new("reviewer.sess-bench"), - WorkID: new("work_bench-direct"), - ReplyTo: new("msg-root-bench"), - TraceID: new("trace-bench-direct"), - CausationID: new("msg-cause-bench"), - TS: time.Date(2026, 4, 17, 12, 0, 0, 0, time.UTC).Unix(), - Body: benchmarkRawJSON(b, SayBody{ - Text: "Review the attached network benchmark payload and summarize the reply strategy.", - Intent: "review_request", - Artifacts: []json.RawMessage{ - json.RawMessage(`{"kind":"capability","id":"artifact-1"}`), - json.RawMessage(`{"kind":"patch","id":"artifact-2"}`), - }, - }), - } + envelope := benchmarkDirectEnvelope(b) b.ReportAllocs() @@ -46,6 +23,37 @@ func BenchmarkFormatNetworkMessageDirect(b *testing.B) { } } +func BenchmarkFormatNetworkMessageGuidanceModes(b *testing.B) { + envelope := benchmarkDirectEnvelope(b) + cases := []struct { + name string + mode networkMessageGuidanceMode + }{ + {name: "verbose", mode: networkMessageGuidanceVerbose}, + {name: "compact", mode: networkMessageGuidanceCompact}, + } + + for _, tc := range cases { + b.Run(tc.name, func(b *testing.B) { + b.ReportAllocs() + + var rendered string + for b.Loop() { + var err error + rendered, err = formatNetworkMessageWithGuidance(envelope, tc.mode) + if err != nil { + b.Fatalf("formatNetworkMessageWithGuidance(%s) error = %v", tc.name, err) + } + if rendered == "" { + b.Fatal("formatNetworkMessageWithGuidance() = empty, want content") + } + } + b.ReportMetric(float64(len(rendered)), "bytes/message") + b.ReportMetric(float64((len(rendered)+3)/4), "est_tokens/message") + }) + } +} + func BenchmarkPeerRegistryListPeersFiltered(b *testing.B) { now := time.Date(2026, 4, 17, 12, 0, 0, 0, time.UTC) registry, err := NewPeerRegistry(30*time.Second, WithPeerRegistryClock(func() time.Time { return now })) @@ -121,6 +129,35 @@ func BenchmarkNetworkLogFields(b *testing.B) { } } +func benchmarkDirectEnvelope(b *testing.B) Envelope { + b.Helper() + + return Envelope{ + Protocol: ProtocolV0, + WorkspaceID: testWorkspaceID, + ID: "msg-bench-direct", + Kind: KindSay, + Channel: "builders", + Surface: new(SurfaceDirect), + DirectID: new(testDirectRef().DirectID), + From: "coder.sess-bench", + To: new("reviewer.sess-bench"), + WorkID: new("work_bench-direct"), + ReplyTo: new("msg-root-bench"), + TraceID: new("trace-bench-direct"), + CausationID: new("msg-cause-bench"), + TS: time.Date(2026, 4, 17, 12, 0, 0, 0, time.UTC).Unix(), + Body: benchmarkRawJSON(b, SayBody{ + Text: "Review the attached network benchmark payload and summarize the reply strategy.", + Intent: "review_request", + Artifacts: []json.RawMessage{ + json.RawMessage(`{"kind":"capability","id":"artifact-1"}`), + json.RawMessage(`{"kind":"patch","id":"artifact-2"}`), + }, + }), + } +} + func benchmarkPeerCard(b *testing.B, peerID string, capability string) PeerCard { b.Helper() diff --git a/internal/session/spawn.go b/internal/session/spawn.go index 71cdd4198..8c7ddbc16 100644 --- a/internal/session/spawn.go +++ b/internal/session/spawn.go @@ -36,6 +36,7 @@ type SpawnOpts struct { ParentSessionID string AgentName string Provider string + Model string Name string Workspace string WorkspacePath string @@ -87,6 +88,7 @@ func (m *Manager) Spawn(ctx context.Context, opts SpawnOpts) (*Session, error) { child, err := m.Create(ctx, CreateOpts{ AgentName: normalized.AgentName, Provider: normalized.Provider, + Model: normalized.Model, Name: normalized.Name, Workspace: workspaceRef, WorkspacePath: workspacePath, @@ -147,6 +149,7 @@ func normalizeSpawnOpts(opts SpawnOpts) (SpawnOpts, error) { normalized.ParentSessionID = strings.TrimSpace(normalized.ParentSessionID) normalized.AgentName = strings.TrimSpace(normalized.AgentName) normalized.Provider = strings.TrimSpace(normalized.Provider) + normalized.Model = strings.TrimSpace(normalized.Model) normalized.Name = strings.TrimSpace(normalized.Name) normalized.Workspace = strings.TrimSpace(normalized.Workspace) normalized.WorkspacePath = strings.TrimSpace(normalized.WorkspacePath) diff --git a/internal/skills/catalog.go b/internal/skills/catalog.go index a193d0dff..f5d8b183c 100644 --- a/internal/skills/catalog.go +++ b/internal/skills/catalog.go @@ -12,16 +12,20 @@ import ( ) const ( - catalogDescriptionLimit = 200 - catalogEllipsis = "..." - currentCatalogOpen = "" - currentCatalogClose = "" + catalogDescriptionLimit = 200 + catalogEllipsis = "..." + currentCatalogOpen = "" + currentCatalogClose = "" + catalogToolPolicyFallbackInstructions = "" + + "If current tool policy denies `agh__skill_view`, use `agh skill view ` as an operator fallback." catalogUsageInstructions = "Use `agh__skill_view` to load full instructions for any skill.\n" + "Use `agh__skill_view` to read a specific skill resource file when the skill references one.\n" + - "If current tool policy denies `agh__skill_view`, use `agh skill view ` as an operator fallback." + catalogToolPolicyFallbackInstructions currentCatalogInstructions = "" + "The block above is the authoritative current skill state for this turn.\n" + "If it differs from any earlier startup snapshot, trust the current block." + currentCatalogUnchangedInstructions = "" + + "Previous catalog remains current; use `agh__skill_view` for full skill/resource instructions." ) var ( @@ -102,6 +106,18 @@ func BuildCurrentCatalog(skills []*Skill) string { ) } +// BuildCurrentCatalogUnchanged renders the compact per-turn marker used when +// the catalog did not change. +func BuildCurrentCatalogUnchanged() string { + return strings.Join([]string{ + currentCatalogOpen, + " " + currentCatalogUnchangedInstructions + "", + currentCatalogClose, + "", + catalogToolPolicyFallbackInstructions, + }, "\n") +} + func buildCatalog(skills []*Skill, openTag string, closeTag string, instructions string) string { type catalogEntry struct { name string diff --git a/openapi/agh.json b/openapi/agh.json index b27903b62..350f58740 100644 --- a/openapi/agh.json +++ b/openapi/agh.json @@ -50401,6 +50401,12 @@ "properties": { "extractor": { "properties": { + "active_provider_sessions": { + "type": "integer" + }, + "backpressured_sessions": { + "type": "integer" + }, "coalesced_turns": { "type": "integer" }, @@ -50416,6 +50422,9 @@ "queued_sessions": { "type": "integer" }, + "skipped_turns": { + "type": "integer" + }, "status": { "enum": [ "idle", @@ -50427,11 +50436,14 @@ } }, "required": [ + "active_provider_sessions", + "backpressured_sessions", "coalesced_turns", "dropped_turns", "failure_count", "in_flight_sessions", "queued_sessions", + "skipped_turns", "status" ], "type": "object" diff --git a/packages/site/content/runtime/core/configuration/config-toml.mdx b/packages/site/content/runtime/core/configuration/config-toml.mdx index 1f0870b51..fb3b1a2a3 100644 --- a/packages/site/content/runtime/core/configuration/config-toml.mdx +++ b/packages/site/content/runtime/core/configuration/config-toml.mdx @@ -1032,23 +1032,23 @@ The extractor consumes the `session.message_persisted` hook and writes structure controller. `inbox_path` and `dlq_path` are validation-locked to the canonical AGH-managed locations. -| Field | Type | Default | Valid values | Description | -| -------------------- | -------- | --------------------------------------------- | -------------------------------------------- | -------------------------------------------------------------------------- | -| `enabled` | boolean | `true` | `true` or `false` | Enables the extractor runtime. | -| `mode` | string | `post_message` | `post_message`, `compaction_flush`, `hybrid` | Extraction mode. `compaction_flush` and `hybrid` are reserved for slice 2. | -| `throttle_turns` | integer | `1` | Positive integer. | Minimum turns between extraction attempts on the same session. | -| `deadline` | duration | `60s` | Positive Go duration. | Per-extraction deadline before the runtime drops the candidate. | -| `sandbox_inbox_only` | boolean | `true` | `true` or `false` | Restricts extractor writes to `_inbox/` until the controller accepts them. | -| `inbox_path` | string | `$AGH_HOME/memory/_inbox` | Daemon-managed. | Read-only display field; the daemon manages the inbox path. | -| `dlq_path` | string | `$AGH_HOME/memory/_system/extractor/failures` | Daemon-managed. | Read-only display field; the daemon manages the DLQ path. | -| `model` | string | empty | Empty or model id. | Optional override; empty forks from the main session model. | +| Field | Type | Default | Valid values | Description | +| -------------------- | -------- | --------------------------------------------- | --------------------- | ----------------------------------------------------------------------------------------------------------------- | +| `enabled` | boolean | `true` | `true` or `false` | Enables the extractor runtime. | +| `mode` | string | `post_message` | `post_message` | Extraction mode. AGH currently supports only post-message extraction. | +| `throttle_turns` | integer | `1` | Positive integer. | Minimum same-session queued turns to coalesce before launching another extraction; drain flushes pending content. | +| `deadline` | duration | `60s` | Positive Go duration. | Per-extraction deadline before the runtime drops the candidate. | +| `sandbox_inbox_only` | boolean | `true` | `true` or `false` | Restricts extractor writes to `_inbox/` until the controller accepts them. | +| `inbox_path` | string | `$AGH_HOME/memory/_inbox` | Daemon-managed. | Read-only display field; the daemon manages the inbox path. | +| `dlq_path` | string | `$AGH_HOME/memory/_system/extractor/failures` | Daemon-managed. | Read-only display field; the daemon manages the DLQ path. | +| `model` | string | empty | Empty or model id. | Optional model override for the extractor child session. Empty uses the agent/provider default. | ## `[memory.extractor.queue]` -| Field | Type | Default | Valid values | Description | -| -------------- | ------- | ------- | ----------------- | ------------------------------------------------------------------------------------- | -| `capacity` | integer | `1` | Positive integer. | Bounded per-session extraction queue. | -| `coalesce_max` | integer | `16` | Positive integer. | Maximum coalesced turns when consecutive triggers arrive while the extractor is busy. | +| Field | Type | Default | Valid values | Description | +| -------------- | ------- | ------- | ----------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `capacity` | integer | `1` | Positive integer. | Maximum concurrent provider-backed extractor child sessions; additional sessions wait without losing queued turns. | +| `coalesce_max` | integer | `16` | Positive integer greater than or equal to `throttle_turns`. | Maximum coalesced turns when consecutive triggers arrive while the extractor is busy. | ## `[memory.dream]` diff --git a/packages/site/content/runtime/core/getting-started/installation.mdx b/packages/site/content/runtime/core/getting-started/installation.mdx index de44456a3..ee55d4547 100644 --- a/packages/site/content/runtime/core/getting-started/installation.mdx +++ b/packages/site/content/runtime/core/getting-started/installation.mdx @@ -13,8 +13,9 @@ Make sure you have: - **macOS or Linux**. AGH currently targets local-first Unix-like environments. - **An install method that fits your machine**. Use Homebrew, npm, or Go for managed installs. - The verified binary installer additionally requires `curl`, `tar`, `cosign`, and either - `sha256sum` or `shasum`. + The verified binary installer additionally requires `curl`, `tar`, and either `sha256sum` or + `shasum`. It uses a local `cosign` when available; otherwise it downloads a pinned temporary + cosign verifier before checking AGH release provenance. - **At least one supported agent CLI** such as Claude Code, OpenClaw, Hermes, or another ACP-compatible agent runtime you already use. - **At least one provider authenticated in its native way**. Direct ACP providers such as Claude @@ -69,10 +70,10 @@ Use the verified installer when you want a standalone binary install on macOS or curl -fsSL https://agh.network/install.sh | sh ``` -The installer detects your platform, downloads the matching GitHub release archive, verifies -`checksums.txt` against `checksums.txt.sigstore.json` with `cosign`, verifies the archive checksum, -installs `agh`, runs `agh version`, and opens `agh install` when an interactive terminal is -available. +The installer detects your platform, downloads a pinned temporary cosign verifier when `cosign` is +not already on `PATH`, downloads the matching GitHub release archive, verifies `checksums.txt` +against `checksums.txt.sigstore.json`, verifies the archive checksum, installs `agh`, runs +`agh version`, and opens `agh install` when an interactive terminal is available. Use flags when you need a pinned or scripted install: diff --git a/packages/site/content/runtime/core/memory/system.mdx b/packages/site/content/runtime/core/memory/system.mdx index fe5fe93f7..1ae711f70 100644 --- a/packages/site/content/runtime/core/memory/system.mdx +++ b/packages/site/content/runtime/core/memory/system.mdx @@ -286,20 +286,26 @@ fit. Useful index entries are short and point to one file: Every controller decision and recall outcome is observable. -| Surface | Use it for | -| ------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | -| `agh memory health` | Enabled state, controller backlog, provider circuit state, dreaming gate status, and per-scope catalog/file counts. | -| `agh memory decisions list` | The Slice 1 truthful audit log: every committed/rejected/shadowed/reverted controller decision with redaction-safe traces. | -| `agh memory recall trace ` | The deterministic recall pipeline trace for a specific session turn: candidate set, scoring weights, freshness banners, and shadow-by-id outcomes. | -| `GET /api/memory/health` | Same data as `agh memory health` over HTTP/UDS. | -| `GET /api/memory/decisions` | Same data as `agh memory decisions list` with redaction-safe payloads (no `post_content`, `prior_content`, or raw LLM responses on the wire). | -| `GET /api/memory/recall-traces/{session_id}/{turn_seq}` | Same data as `agh memory recall trace` over HTTP/UDS. | +| Surface | Use it for | +| ------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `agh memory health` | Enabled state, controller backlog, provider circuit state, dreaming gate status, and per-scope catalog/file counts. | +| `agh memory decisions list` | The Slice 1 truthful audit log: every committed/rejected/shadowed/reverted controller decision with redaction-safe traces. | +| `agh memory recall trace ` | The deterministic recall pipeline trace for a specific session turn: candidate set, scoring weights, freshness banners, and shadow-by-id outcomes. | +| `agh memory extractor status` | Queue and useful-work diagnostics: queued/in-flight sessions, active provider sessions, backpressured sessions, coalesced/dropped turns, skipped empty turns, and failure count. | +| `GET /api/memory/health` | Same data as `agh memory health` over HTTP/UDS. | +| `GET /api/memory/decisions` | Same data as `agh memory decisions list` with redaction-safe payloads (no `post_content`, `prior_content`, or raw LLM responses on the wire). | +| `GET /api/memory/recall-traces/{session_id}/{turn_seq}` | Same data as `agh memory recall trace` over HTTP/UDS. | +| `GET /api/memory/extractor/status` | Same extractor queue and useful-work diagnostics over HTTP/UDS. | `memory_events` rows have stable canonical op names (`memory.write.committed`, `memory.write.rejected`, `memory.recall.executed`, `memory.dream.run.promoted`, `memory.extractor.completed`, `memory.provider.collision`, etc.). They are queryable through the event store and feed `agh memory health` and the web Memory inspector. +Skipped empty extractor turns are recorded as `memory.extractor.dropped` with +`metadata.reason = "empty_snapshot"` and redaction-safe counters only. Extractor completion events +carry `candidate_count`, and controller write decisions remain under `memory.write.*`. + `agh memory history` returns the same audit material in the legacy summary shape as a thin compatibility view over `memory_events`. Use `agh memory decisions list` when you need controller-level detail. diff --git a/packages/site/content/runtime/core/skills/index.mdx b/packages/site/content/runtime/core/skills/index.mdx index 66101cf75..375da52f3 100644 --- a/packages/site/content/runtime/core/skills/index.mdx +++ b/packages/site/content/runtime/core/skills/index.mdx @@ -188,6 +188,11 @@ When the runtime denies `agh__skill_view` for the active scope, agents fall back `agh skill view`. The conditional guidance is part of the rendered catalog so the prompt never advertises a CLI fallback when the dedicated tool is callable. +Runtime prompts do not repeat the full current-skill catalog after every turn when the resolved +catalog is unchanged for the session. The first prompt, and any later catalog change, carries the +full `` block; repeated turns carry a compact unchanged marker that keeps +the previous block authoritative and points agents back to `agh__skill_view` for full instructions. + Important details: | Behavior | Current implementation | diff --git a/packages/site/lib/__tests__/public-install-contract.test.ts b/packages/site/lib/__tests__/public-install-contract.test.ts index 50e2a4760..e891986dc 100644 --- a/packages/site/lib/__tests__/public-install-contract.test.ts +++ b/packages/site/lib/__tests__/public-install-contract.test.ts @@ -1,7 +1,10 @@ -import { mkdtempSync, readFileSync } from "node:fs"; +import { createHash } from "node:crypto"; +import { chmodSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { createServer } from "node:http"; +import type { AddressInfo } from "node:net"; import { tmpdir } from "node:os"; -import { relative, resolve } from "node:path"; -import { spawnSync } from "node:child_process"; +import { basename, join, relative, resolve } from "node:path"; +import { spawn, spawnSync } from "node:child_process"; import { describe, expect, it } from "vitest"; import { siteRoot } from "../content-test-utils"; @@ -30,14 +33,33 @@ const retiredPackageInstallCommands = [ ]; const installOptions = ["--version", "--dir", "--skip-bootstrap", "--dry-run", "--help"]; const installEnvVars = ["AGH_VERSION", "AGH_INSTALL_DIR", "AGH_SKIP_BOOTSTRAP"]; +const cosignVersion = "v2.2.4"; +const cosignDigests = { + "darwin/x64": "0e5a77a86115e4c00ba4243db01abceacb13cc06981c45e53ee71f2e1db8ce25", + "darwin/arm64": "fcd310e64ecddc1eaa13fe814ac1c9fc02f6f9eacd9a58480ab8160eb8ca381e", + "linux/x64": "97a6a1e15668a75fc4ff7a4dc4cb2f098f929cbea2f12faa9de31db6b42b17d7", + "linux/arm64": "658087351e1d4f9c396b5f59ee5437461c06128f4ce80ba899ccaa1c0b6a8a62", +} as const; const installerReleaseGuaranteeSnippets = [ verifiedInstallerCommand, "Requires:", - "curl, tar, cosign, and sha256sum or shasum.", - 'command -v cosign >/dev/null 2>&1 || fail "cosign is required to verify release provenance"', + "curl, tar, and sha256sum or shasum.", + "Uses local cosign when available; otherwise downloads a pinned temporary cosign verifier.", + 'COSIGN_VERSION="v2.2.4"', + 'COSIGN_BASE_URL="https://github.com/sigstore/cosign/releases/download/${COSIGN_VERSION}"', + "cosign-darwin-amd64", + "cosign-darwin-arm64", + "cosign-linux-amd64", + "cosign-linux-arm64", + ...Object.values(cosignDigests), + "resolve_cosign()", + 'COSIGN_BIN="$(command -v cosign)"', + 'curl -fsSL "$COSIGN_URL" -o "$COSIGN_PATH"', + 'verify_file_sha256 "$COSIGN_PATH" "$COSIGN_SHA256" "cosign verifier"', + 'COSIGN_BIN="$COSIGN_PATH"', 'BUNDLE_URL="${BASE_URL}/checksums.txt.sigstore.json"', 'log "verifying checksum provenance"', - 'cosign verify-blob "$CHECKSUM_PATH"', + '"$COSIGN_BIN" verify-blob "$CHECKSUM_PATH"', '--bundle "$BUNDLE_PATH"', '--certificate-identity-regexp "$COSIGN_CERT_IDENTITY_REGEXP"', '--certificate-oidc-issuer "$COSIGN_CERT_OIDC_ISSUER"', @@ -50,9 +72,10 @@ const installerCriticalErrorSnippets = [ "latest release resolved to unexpected ref:", "unsupported operating system:", "unsupported architecture:", + "unsupported cosign verifier platform:", "curl is required", "tar is required", - "cosign is required to verify release provenance", + "checksum mismatch", "sha256sum or shasum is required to verify the download", "checksums.txt does not include", "archive did not contain an agh binary", @@ -82,6 +105,126 @@ function runInstallScript(args: string[]) { }); } +function currentInstallPlatform() { + if (process.platform !== "darwin" && process.platform !== "linux") { + throw new Error("unsupported test platform: " + process.platform); + } + if (process.arch !== "x64" && process.arch !== "arm64") { + throw new Error("unsupported test architecture: " + process.arch); + } + + const os = process.platform; + const archiveArch = process.arch === "x64" ? "x86_64" : "arm64"; + const cosignArch = process.arch === "x64" ? "amd64" : "arm64"; + const digestKey = (os + "/" + process.arch) as keyof typeof cosignDigests; + + return { + archiveName: "agh_" + os + "_" + archiveArch + ".tar.gz", + cosignName: "cosign-" + os + "-" + cosignArch, + cosignDigest: cosignDigests[digestKey], + }; +} + +function sha256(data: Buffer | string): string { + return createHash("sha256").update(data).digest("hex"); +} + +function createFixtureArchive(root: string): Buffer { + const sourceDir = join(root, "archive-src"); + mkdirSync(sourceDir, { recursive: true }); + const binaryPath = join(sourceDir, "agh"); + writeFileSync( + binaryPath, + '#!/bin/sh\nif [ "${1:-}" = "version" ]; then exit 0; fi\nexit 0\n', + "utf8" + ); + chmodSync(binaryPath, 0o755); + + const archivePath = join(root, "agh.tar.gz"); + const result = spawnSync("tar", ["-czf", archivePath, "-C", sourceDir, "agh"], { + encoding: "utf8", + }); + if (result.status !== 0) { + throw new Error("tar failed: " + (result.stderr || result.stdout)); + } + return readFileSync(archivePath); +} + +function runInstallScriptAsync( + scriptPath: string, + args: string[], + env: NodeJS.ProcessEnv +): Promise<{ status: number | null; stdout: string; stderr: string }> { + return new Promise(resolveRun => { + const child = spawn("sh", [scriptPath, ...args], { + cwd: siteRoot, + env, + stdio: ["ignore", "pipe", "pipe"], + }); + const stdout: Buffer[] = []; + const stderr: Buffer[] = []; + let settled = false; + + function finish(status: number | null) { + if (settled) { + return; + } + settled = true; + resolveRun({ + status, + stdout: Buffer.concat(stdout).toString("utf8"), + stderr: Buffer.concat(stderr).toString("utf8"), + }); + } + + child.stdout.on("data", chunk => stdout.push(Buffer.from(chunk))); + child.stderr.on("data", chunk => stderr.push(Buffer.from(chunk))); + child.on("error", error => { + stderr.push(Buffer.from(error.message)); + finish(1); + }); + child.on("close", status => finish(status)); + }); +} + +async function withFixtureServer( + routes: Map, + run: (baseURL: string) => Promise +): Promise { + const server = createServer((request, response) => { + const routeName = basename((request.url ?? "").split("?")[0] ?? ""); + const body = routes.get(routeName); + if (body === undefined) { + response.statusCode = 404; + response.end("missing fixture: " + routeName + "\n"); + return; + } + + response.statusCode = 200; + response.end(body); + }); + + await new Promise((resolveListen, rejectListen) => { + server.once("error", rejectListen); + server.listen(0, "127.0.0.1", () => resolveListen()); + }); + + try { + const address = server.address() as AddressInfo; + return await run("http://127.0.0.1:" + address.port); + } finally { + await new Promise((resolveClose, rejectClose) => { + server.close(error => { + if (error) { + rejectClose(error); + return; + } + resolveClose(); + }); + }); + } +} + function hermeticInstallEnv(source: InstallEnv = process.env): NodeJS.ProcessEnv { const env: InstallEnv = {}; for (const [key, value] of Object.entries(source)) { @@ -132,9 +275,10 @@ describe("public install contract", () => { expect(script).not.toContain("http://"); expect(script).toContain('command -v curl >/dev/null 2>&1 || fail "curl is required"'); expect(script).toContain('command -v tar >/dev/null 2>&1 || fail "tar is required"'); - expect(script).toContain( - 'command -v cosign >/dev/null 2>&1 || fail "cosign is required to verify release provenance"' - ); + expect(script).not.toContain('fail "cosign is required to verify release provenance"'); + expect(script).toContain('COSIGN_VERSION="v2.2.4"'); + expect(script).toContain("resolve_cosign()"); + expect(script).toContain("if command -v cosign >/dev/null 2>&1; then"); expect(script).toContain('BUNDLE_URL="${BASE_URL}/checksums.txt.sigstore.json"'); expect(script).toContain( "COSIGN_CERT_IDENTITY_REGEXP='^https://github\\.com/compozy/agh/\\.github/workflows/release\\.yml@refs/heads/main$'" @@ -145,7 +289,7 @@ describe("public install contract", () => { ); expect(script).toContain("resolve_latest_release_tag()"); expect(script).toContain('VERSION="$(resolve_latest_release_tag)"'); - expect(script).toContain('cosign verify-blob "$CHECKSUM_PATH"'); + expect(script).toContain('"$COSIGN_BIN" verify-blob "$CHECKSUM_PATH"'); expect(script).toContain('--bundle "$BUNDLE_PATH"'); expect(script).toContain('--certificate-identity-regexp "$COSIGN_CERT_IDENTITY_REGEXP"'); expect(script).toContain('--certificate-oidc-issuer "$COSIGN_CERT_OIDC_ISSUER"'); @@ -216,6 +360,63 @@ describe("public install contract", () => { expect(script).toContain('log "next: agh install"'); }); + it("bootstraps a pinned temporary cosign verifier when none is on PATH", async () => { + const root = mkdtempSync(resolve(tmpdir(), "agh-install-cosign-bootstrap-")); + try { + const platform = currentInstallPlatform(); + const installDir = join(root, "bin"); + const cosignArgsPath = join(root, "cosign-args.txt"); + const archiveBody = createFixtureArchive(root); + const cosignBody = '#!/bin/sh\nprintf \'%s\\n\' "$@" > "' + cosignArgsPath + '"\nexit 0\n'; + const routes = new Map([ + [platform.archiveName, archiveBody], + ["checksums.txt", sha256(archiveBody) + " " + platform.archiveName + "\n"], + [ + "checksums.txt.sigstore.json", + '{"mediaType":"application/vnd.dev.sigstore.bundle+json"}\n', + ], + [platform.cosignName, cosignBody], + ]); + + await withFixtureServer(routes, async baseURL => { + const script = readSiteFile(installScriptPath) + .replace( + 'COSIGN_BASE_URL="https://github.com/sigstore/cosign/releases/download/${COSIGN_VERSION}"', + 'COSIGN_BASE_URL="' + baseURL + "/cosign/" + cosignVersion + '"' + ) + .replace(platform.cosignDigest, sha256(cosignBody)) + .replace( + 'BASE_URL="https://github.com/${RELEASE_REPO}/releases/download/${VERSION}"', + 'BASE_URL="' + baseURL + '/releases/download/${VERSION}"' + ); + const scriptPath = join(root, "install.sh"); + writeFileSync(scriptPath, script, "utf8"); + chmodSync(scriptPath, 0o755); + + const result = await runInstallScriptAsync( + scriptPath, + ["--version", "v9.9.9", "--skip-bootstrap", "--dir", installDir], + { + HOME: root, + PATH: "/usr/bin:/bin", + TZ: "UTC", + LANG: "C.UTF-8", + LC_ALL: "C.UTF-8", + LC_CTYPE: "C.UTF-8", + NODE_ENV: "test", + } + ); + + expect(result.status, result.stderr || result.stdout).toBe(0); + expect(result.stdout).toContain("downloading pinned cosign verifier " + cosignVersion); + expect(readFileSync(cosignArgsPath, "utf8")).toContain("verify-blob"); + expect(readFileSync(join(installDir, "agh"), "utf8")).toContain('"${1:-}" = "version"'); + }); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); + it("runs install contract checks with a hermetic release environment", () => { const env = hermeticInstallEnv({ AGH_VERSION: "v9.9.9", @@ -295,7 +496,7 @@ describe("public install contract", () => { for (const envVar of ["AGH_VERSION", "AGH_INSTALL_DIR", "AGH_SKIP_BOOTSTRAP"]) { expect(installPage, envVar).toContain(envVar); } - expect(installPage).toContain("cosign"); + expect(installPage).toContain("pinned temporary cosign verifier"); expect(installPage).toContain("checksums.txt.sigstore.json"); }); }); diff --git a/packages/site/public/install.sh b/packages/site/public/install.sh index 753032190..de2e3df25 100644 --- a/packages/site/public/install.sh +++ b/packages/site/public/install.sh @@ -2,12 +2,17 @@ set -eu RELEASE_REPO="compozy/agh" +COSIGN_VERSION="v2.2.4" +COSIGN_BASE_URL="https://github.com/sigstore/cosign/releases/download/${COSIGN_VERSION}" COSIGN_CERT_IDENTITY_REGEXP='^https://github\.com/compozy/agh/\.github/workflows/release\.yml@refs/heads/main$' COSIGN_CERT_OIDC_ISSUER="https://token.actions.githubusercontent.com" VERSION="${AGH_VERSION:-latest}" INSTALL_DIR="${AGH_INSTALL_DIR:-}" SKIP_BOOTSTRAP="false" DRY_RUN="false" +COSIGN_BIN="" +COSIGN_NAME="" +COSIGN_SHA256="" if [ "${AGH_SKIP_BOOTSTRAP:-}" != "" ] && [ "${AGH_SKIP_BOOTSTRAP:-}" != "0" ]; then SKIP_BOOTSTRAP="true" @@ -34,7 +39,8 @@ Environment: AGH_SKIP_BOOTSTRAP=1 Same as --skip-bootstrap. Requires: - curl, tar, cosign, and sha256sum or shasum. + curl, tar, and sha256sum or shasum. + Uses local cosign when available; otherwise downloads a pinned temporary cosign verifier. USAGE } @@ -63,6 +69,36 @@ resolve_latest_release_tag() { esac } +verify_file_sha256() { + file_path="$1" + expected_sha="$2" + label="$3" + + if [ "$CHECKSUM_CMD" = "sha256sum" ]; then + actual_sha="$(sha256sum "$file_path" | awk '{ print $1 }')" + else + actual_sha="$(shasum -a 256 "$file_path" | awk '{ print $1 }')" + fi + + [ "$actual_sha" = "$expected_sha" ] || fail "${label} checksum mismatch" +} + +resolve_cosign() { + if command -v cosign >/dev/null 2>&1; then + COSIGN_BIN="$(command -v cosign)" + log "using cosign at ${COSIGN_BIN}" + return + fi + + COSIGN_PATH="${TMP_DIR}/${COSIGN_NAME}" + COSIGN_URL="${COSIGN_BASE_URL}/${COSIGN_NAME}" + log "downloading pinned cosign verifier ${COSIGN_VERSION}" + curl -fsSL "$COSIGN_URL" -o "$COSIGN_PATH" + verify_file_sha256 "$COSIGN_PATH" "$COSIGN_SHA256" "cosign verifier" + chmod 0755 "$COSIGN_PATH" + COSIGN_BIN="$COSIGN_PATH" +} + need_arg() { if [ "$#" -lt 2 ] || [ "$2" = "" ]; then fail "$1 requires a value" @@ -133,6 +169,28 @@ case "$(uname -m)" in ;; esac +case "${OS}/${ARCH}" in + darwin/x86_64) + COSIGN_NAME="cosign-darwin-amd64" + COSIGN_SHA256="0e5a77a86115e4c00ba4243db01abceacb13cc06981c45e53ee71f2e1db8ce25" + ;; + darwin/arm64) + COSIGN_NAME="cosign-darwin-arm64" + COSIGN_SHA256="fcd310e64ecddc1eaa13fe814ac1c9fc02f6f9eacd9a58480ab8160eb8ca381e" + ;; + linux/x86_64) + COSIGN_NAME="cosign-linux-amd64" + COSIGN_SHA256="97a6a1e15668a75fc4ff7a4dc4cb2f098f929cbea2f12faa9de31db6b42b17d7" + ;; + linux/arm64) + COSIGN_NAME="cosign-linux-arm64" + COSIGN_SHA256="658087351e1d4f9c396b5f59ee5437461c06128f4ce80ba899ccaa1c0b6a8a62" + ;; + *) + fail "unsupported cosign verifier platform: ${OS}/${ARCH}" + ;; +esac + ARCHIVE_NAME="agh_${OS}_${ARCH}.tar.gz" if [ "$VERSION" = "latest" ]; then @@ -169,7 +227,6 @@ fi command -v curl >/dev/null 2>&1 || fail "curl is required" command -v tar >/dev/null 2>&1 || fail "tar is required" -command -v cosign >/dev/null 2>&1 || fail "cosign is required to verify release provenance" if [ "$VERSION" = "latest" ]; then VERSION="$(resolve_latest_release_tag)" @@ -200,6 +257,8 @@ cleanup() { trap cleanup EXIT INT TERM +resolve_cosign + ARCHIVE_PATH="${TMP_DIR}/${ARCHIVE_NAME}" CHECKSUM_PATH="${TMP_DIR}/checksums.txt" BUNDLE_PATH="${TMP_DIR}/checksums.txt.sigstore.json" @@ -211,7 +270,7 @@ curl -fsSL "$CHECKSUM_URL" -o "$CHECKSUM_PATH" curl -fsSL "$BUNDLE_URL" -o "$BUNDLE_PATH" log "verifying checksum provenance" -cosign verify-blob "$CHECKSUM_PATH" \ +"$COSIGN_BIN" verify-blob "$CHECKSUM_PATH" \ --bundle "$BUNDLE_PATH" \ --certificate-identity-regexp "$COSIGN_CERT_IDENTITY_REGEXP" \ --certificate-oidc-issuer "$COSIGN_CERT_OIDC_ISSUER" >/dev/null diff --git a/skills-lock.json b/skills-lock.json index 8d3db8669..5f53a61fd 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -93,6 +93,12 @@ "sourceType": "github", "computedHash": "a7d41f6766192891ba7f0f4cd395df2ad576cf1af9a5ce718d1b5789305f2e90" }, + "autoreview": { + "source": "openclaw/agent-skills", + "sourceType": "github", + "skillPath": "skills/autoreview/SKILL.md", + "computedHash": "12fde31c7d20540941bb054f6b66dd7ac541ad807e821fd980c33d9a58e270da" + }, "better-auth-best-practices": { "source": "pedronauck/skills", "sourceType": "github", diff --git a/skills/agh/references/memory.md b/skills/agh/references/memory.md index 67af0775e..e310e2f1d 100644 --- a/skills/agh/references/memory.md +++ b/skills/agh/references/memory.md @@ -7,6 +7,7 @@ - CLI operations - Search, reindex, promote, and reload - Recall traces +- Extractor diagnostics - Hygiene - When not to write memory @@ -75,6 +76,15 @@ Use recall traces to inspect what memory entered a session turn without exposing Recall traces are diagnostic evidence. They do not authorize task state changes, review verdicts, or durable memory writes by themselves. +## Extractor Diagnostics + +Inspect asynchronous extractor pressure before retrying or tuning Memory runs: + + agh memory extractor status -o json + agh memory extractor list-pending -o json + +`skipped_turns` counts transcript turns that had no non-whitespace content and were suppressed before provider work. `active_provider_sessions` shows extractor child sessions currently consuming provider work. `backpressured_sessions` increments when `memory.extractor.queue.capacity` is saturated and a session waits instead of spawning another child. `coalesced_turns`, `dropped_turns`, `failure_count`, and pending failures explain queue pressure and failed extractor handoff without exposing raw transcript text. + ## Hygiene 1. Run agh memory list before writing a new memory entry. diff --git a/skills/agh/references/network.md b/skills/agh/references/network.md index a22138e2e..57492e085 100644 --- a/skills/agh/references/network.md +++ b/skills/agh/references/network.md @@ -27,6 +27,8 @@ Key concepts: Respond in the same conversation container by default. Open a new public thread only when the subject changes. Moving public work into a direct room opens a new work_id; link the handoff with reply_to, trace_id, and causation_id. +Runtime delivery prompts may show worked reply and protocol examples once per session and compact later deliveries. Treat this reference, visible tool descriptors, and `agh network --help` as the durable source for command details and protocol body requirements. + ## Native Tool Path When visible, inspect descriptors with agh\_\_tool_info before first use: diff --git a/skills/agh/references/tools-and-skills.md b/skills/agh/references/tools-and-skills.md index 352984a44..226ebacd3 100644 --- a/skills/agh/references/tools-and-skills.md +++ b/skills/agh/references/tools-and-skills.md @@ -40,6 +40,8 @@ For resource files inside daemon-managed AGH sessions, use the native skill view agh skill view agh --file references/network.md +When a session receives repeated prompts with the same resolved skill catalog, AGH may replace the full `` block with a compact unchanged marker. Treat the previous full block in that session as current until AGH sends a later full catalog block. + ## Bundled Skill Resources Bundled AGH skills are compiled from the repository skills// directories. The canonical AGH bundled skill is agh. It includes SKILL.md and flat references/\*.md resource files. diff --git a/web/src/generated/agh-openapi.d.ts b/web/src/generated/agh-openapi.d.ts index deeb399de..c8a1ce782 100644 --- a/web/src/generated/agh-openapi.d.ts +++ b/web/src/generated/agh-openapi.d.ts @@ -24261,11 +24261,14 @@ export interface operations { content: { "application/json": { extractor: { + active_provider_sessions: number; + backpressured_sessions: number; coalesced_turns: number; dropped_turns: number; failure_count: number; in_flight_sessions: number; queued_sessions: number; + skipped_turns: number; /** @enum {string} */ status: "idle" | "running" | "draining" | "stopped"; }; diff --git a/web/src/routes/_app/settings/memory.tsx b/web/src/routes/_app/settings/memory.tsx index f269572dc..5d248cd47 100644 --- a/web/src/routes/_app/settings/memory.tsx +++ b/web/src/routes/_app/settings/memory.tsx @@ -1157,7 +1157,7 @@ function ExtractorSection({ ; */ export const Default: Story = { args: {}, - parameters: appRouteParameters("/settings/memory"), + parameters: { + ...appRouteParameters("/settings/memory"), + ...storybookMswParameters({ + daemon: [ + http.get("/api/onboarding", () => HttpResponse.json({ onboarding: { completed: true } })), + ], + }), + }, render: () => , }; diff --git a/web/src/systems/session/lib/session-history-adapter.ts b/web/src/systems/session/lib/session-history-adapter.ts index 6086f698d..ef97d7fa3 100644 --- a/web/src/systems/session/lib/session-history-adapter.ts +++ b/web/src/systems/session/lib/session-history-adapter.ts @@ -4,6 +4,7 @@ import type { MessageFormatAdapter, MessageFormatItem, MessageFormatRepository, + MessageStorageEntry, ThreadHistoryAdapter, } from "@assistant-ui/react"; import { ExportedMessageRepository } from "@assistant-ui/react"; @@ -16,10 +17,11 @@ type AISDKStorageFormat = Omit; const aiSDKV6FormatAdapter: MessageFormatAdapter = { format: "ai-sdk/v6", - encode({ message: { id: _id, ...message } }) { + encode(item: MessageFormatItem): AISDKStorageFormat { + const { id: _id, ...message } = item.message; return message; }, - decode(stored) { + decode(stored: MessageStorageEntry): MessageFormatItem { return { parentId: stored.parent_id, message: { @@ -28,7 +30,7 @@ const aiSDKV6FormatAdapter: MessageFormatAdapter[number]; type ThreadContentPart = Exclude[number]; type SessionMessageWithStatus = SessionMessage & { status?: ThreadMessageLike["status"] }; +type ExportedThreadMessageItem = { message: ThreadMessage }; type JSONValue = null | string | number | boolean | readonly JSONValue[] | JSONObject; type JSONObject = { readonly [key: string]: JSONValue }; @@ -135,9 +136,10 @@ export function toThreadMessageLikes(messages: SessionMessage[]): ThreadMessageL } export function toReadonlyThreadMessages(messages: SessionMessage[]): ThreadMessage[] { - return ExportedMessageRepository.fromArray(toThreadMessageLikes(messages)).messages.map( - item => item.message + const repository: { messages: ExportedThreadMessageItem[] } = ExportedMessageRepository.fromArray( + toThreadMessageLikes(messages) ); + return repository.messages.map(item => item.message); } export function transcriptSignature(messages: SessionMessage[]): string { diff --git a/web/style-imports.d.ts b/web/style-imports.d.ts new file mode 100644 index 000000000..cbe652dbe --- /dev/null +++ b/web/style-imports.d.ts @@ -0,0 +1 @@ +declare module "*.css";