diff --git a/.changeset/ai-preview-history.md b/.changeset/ai-preview-history.md new file mode 100644 index 0000000000..b2503d6bad --- /dev/null +++ b/.changeset/ai-preview-history.md @@ -0,0 +1,5 @@ +--- +"@platejs/ai": patch +--- + +Fix insert-mode AI preview history so streamed chunks stay out of undo history and selection survives accept, undo, and redo diff --git a/.changeset/slate-redo-selection.md b/.changeset/slate-redo-selection.md new file mode 100644 index 0000000000..c919d06613 --- /dev/null +++ b/.changeset/slate-redo-selection.md @@ -0,0 +1,5 @@ +--- +"@platejs/slate": patch +--- + +Fix redo to restore selection after undoing history batches that clear the active selection diff --git a/.claude/.skiller.json b/.claude/.skiller.json index 847056e601..2246262425 100644 --- a/.claude/.skiller.json +++ b/.claude/.skiller.json @@ -26,6 +26,22 @@ "sourceRelPath": ".claude/commands/translate.md", "destRelPath": "translate" }, + { + "sourceType": "plugin", + "pluginId": "compound-engineering@every-marketplace", + "pluginVersion": "2.40.0", + "sourceKind": "agent", + "sourceRelPath": "agents/document-review/adversarial-document-reviewer.md", + "destRelPath": "adversarial-document-reviewer" + }, + { + "sourceType": "plugin", + "pluginId": "compound-engineering@every-marketplace", + "pluginVersion": "2.40.0", + "sourceKind": "agent", + "sourceRelPath": "agents/review/adversarial-reviewer.md", + "destRelPath": "adversarial-reviewer" + }, { "sourceType": "plugin", "pluginId": "agent-browser@agent-browser", @@ -199,24 +215,24 @@ "pluginId": "compound-engineering@every-marketplace", "pluginVersion": "2.40.0", "sourceKind": "agent", - "sourceRelPath": "agents/review/code-simplicity-reviewer.md", - "destRelPath": "code-simplicity-reviewer" + "sourceRelPath": "agents/review/cli-agent-readiness-reviewer.md", + "destRelPath": "cli-agent-readiness-reviewer" }, { "sourceType": "plugin", "pluginId": "compound-engineering@every-marketplace", "pluginVersion": "2.40.0", "sourceKind": "agent", - "sourceRelPath": "agents/document-review/coherence-reviewer.md", - "destRelPath": "coherence-reviewer" + "sourceRelPath": "agents/review/code-simplicity-reviewer.md", + "destRelPath": "code-simplicity-reviewer" }, { "sourceType": "plugin", "pluginId": "compound-engineering@every-marketplace", "pluginVersion": "2.40.0", - "sourceKind": "skill", - "sourceRelPath": "skills/compound-docs", - "destRelPath": "compound-docs" + "sourceKind": "agent", + "sourceRelPath": "agents/document-review/coherence-reviewer.md", + "destRelPath": "coherence-reviewer" }, { "sourceType": "plugin", @@ -282,14 +298,6 @@ "sourceRelPath": "skills/debug", "destRelPath": "debug" }, - { - "sourceType": "plugin", - "pluginId": "compound-engineering@every-marketplace", - "pluginVersion": "2.40.0", - "sourceKind": "skill", - "sourceRelPath": "skills/deepen-plan", - "destRelPath": "deepen-plan" - }, { "sourceType": "plugin", "pluginId": "compound-engineering@every-marketplace", @@ -626,6 +634,14 @@ "sourceRelPath": "agents/document-review/product-lens-reviewer.md", "destRelPath": "product-lens-reviewer" }, + { + "sourceType": "plugin", + "pluginId": "compound-engineering@every-marketplace", + "pluginVersion": "2.40.0", + "sourceKind": "agent", + "sourceRelPath": "agents/review/project-standards-reviewer.md", + "destRelPath": "project-standards-reviewer" + }, { "sourceType": "plugin", "pluginId": "compound-engineering@every-marketplace", diff --git a/.claude/docs/plans/2026-03-26-ai-preview-localized-rollback.md b/.claude/docs/plans/2026-03-26-ai-preview-localized-rollback.md new file mode 100644 index 0000000000..40ea3858f0 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-ai-preview-localized-rollback.md @@ -0,0 +1,53 @@ +# AI Preview Localized Rollback + +## Goal + +Replace full-document AI preview snapshotting with localized insert-mode rollback so preview accept/cancel only touch the preview block range, while preserving current undo/redo and selection behavior. + +## Checklist + +- [completed] Read current localized stream path, accept path, and preview tests +- [completed] Add failing tests for localized preview ownership and rollback +- [completed] Implement localized preview state and marker-based range handling +- [completed] Run targeted verification for touched packages and tests +- [blocked] Run `check`, then create or update the PR + +## Verification + +- `bun test packages/ai/src/lib/transforms/aiStreamSnapshot.spec.ts packages/ai/src/lib/transforms/undoAI.spec.ts apps/www/src/__tests__/package-integration/ai-chat-streaming/streamHistory.slow.tsx` +- `pnpm install` +- `pnpm turbo build --filter=./packages/ai --filter=./apps/www` +- `pnpm turbo typecheck --filter=./packages/ai` +- `pnpm build` +- `pnpm turbo typecheck --filter=./packages/ai --filter=./apps/www` after root build +- `pnpm lint:fix` +- `agent-browser open http://localhost:3100/blocks/editor-ai` + +## Blocker + +- `pnpm check` still fails outside this diff in existing fast AI chat tests: + - `packages/ai/src/react/ai-chat/hooks/useAIChatEditor.spec.tsx` + - `packages/ai/src/react/ai-chat/hooks/useEditorChat.spec.tsx` + - `packages/ai/src/react/ai-chat/utils/getLastAssistantMessage.spec.ts` +- The failure is a `SyntaxError` from missing exports in `packages/plate/src/react/index.tsx`, so PR creation is blocked by repo rules. + +## Agreed Design + +- Keep the public surface on `tf.ai.*` +- Optimize insert-mode preview only +- Keep preview state private and editor-scoped +- `beginPreview` accepts `{ originalBlocks }` +- `selectionBefore` is still captured inside `beginPreview` +- Preview-owned content is tracked by transient top-level block markers +- `cancelPreview` restores the exact original block slice plus selection +- `acceptPreview` localizes cleanup/commit and never rebuilds the whole document +- Untouched blocks should keep identity because preview accept/cancel stop using full-doc `setValue` + +## Open Implementation Notes + +- Mark preview-owned top-level blocks with `aiPreview: true` +- Do not mark the `aiChat` anchor block as preview-owned +- If preview starts by removing an empty paragraph, capture that exact block in `originalBlocks` +- If preview inserts after existing content, capture `[]` +- Derive the current preview range from contiguous marked top-level blocks +- If preview range is invalid/non-contiguous, fall back safely instead of silently guessing diff --git a/.claude/docs/plans/2026-03-26-ai-preview-tf-api-refactor.md b/.claude/docs/plans/2026-03-26-ai-preview-tf-api-refactor.md new file mode 100644 index 0000000000..d7698f1156 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-ai-preview-tf-api-refactor.md @@ -0,0 +1,22 @@ +# AI Preview `tf.ai` Refactor + +## Goal + +Replace the low-level AI preview snapshot helper surface with `tf.ai.*` lifecycle transforms, keep the full-document snapshot strategy private, migrate insert-mode preview callers, and preserve the current no-history preview behavior. + +## Checklist + +- [completed] Gather current AI preview/history patterns and relevant learnings +- [completed] Add or update tests for the new preview lifecycle contract +- [completed] Implement `tf.ai.*` preview transforms in `BaseAIPlugin` +- [completed] Migrate AI chat preview callsites off direct snapshot helpers +- [completed] Run required verification for `packages/ai` and `apps/www` + +## Findings + +- The current preview storage lives in `packages/ai/src/lib/transforms/aiStreamSnapshot.ts` as a `WeakMap` keyed by editor. +- The current low-level helper surface is used directly by `acceptAIChat`, `undoAI`, `resetAIChat`, the editor AI kit, and the streaming integration test. +- The existing solution doc already established the correct behavior boundary: preview chunks stay out of history; accept commits one fresh batch; AI undo restores the pre-stream value while preview is active. +- `AIPlugin` already owns editor mutation semantics like `insertNodes`, `removeMarks`, `removeNodes`, and `undo`, so preview lifecycle belongs there too. +- The cleanest public surface here is `tf.ai.*`, while library internals and stricter callsites can still reach the same transforms through `editor.getTransforms(BaseAIPlugin).ai` where generic editor typing is narrower. +- Package-scoped verification passed for `@platejs/ai`; filtered `apps/www` typecheck still has unrelated workspace export/type failures after the required root `pnpm build`, so that debt remains outside this refactor. diff --git a/.claude/docs/plans/2026-03-26-ai-stream-snapshot-design-grill.md b/.claude/docs/plans/2026-03-26-ai-stream-snapshot-design-grill.md new file mode 100644 index 0000000000..6cedca6148 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-ai-stream-snapshot-design-grill.md @@ -0,0 +1,17 @@ +# AI Stream Snapshot Design Grill + +## Goal + +Stress-test the next-step design for AI insert-mode preview state, especially whether snapshot state should live in plugin state, what the public surface should be, and how to avoid a performance regression on long documents. + +## Checklist + +- [in_progress] Inspect current AI/plugin state patterns in the repo +- [pending] Decide whether preview state should live in plugin state, transform state, or private module state +- [pending] Decide whether the public surface should be `api`, `tf`, or package-private helpers +- [pending] Walk the remaining design forks with explicit recommendations + +## Findings + +- The current implementation stores preview state in a module-level `WeakMap` keyed by editor. +- The current helper surface is low-level snapshot terminology, not workflow terminology. diff --git a/.claude/docs/plans/2026-03-26-ignore-tmp-artifacts.md b/.claude/docs/plans/2026-03-26-ignore-tmp-artifacts.md new file mode 100644 index 0000000000..9aaec87102 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-ignore-tmp-artifacts.md @@ -0,0 +1,18 @@ +# Ignore Tmp Artifacts + +## Goal + +Stop root-level `tmp*` artifacts from being committed and remove the accidentally committed browser proof PNGs from the current PR branch. + +## Checklist + +- [in_progress] Inspect current ignore rules and tracked tmp artifacts +- [pending] Add ignore rule for root `tmp*` artifacts +- [pending] Remove tracked tmp PNGs from the branch diff +- [pending] Run the PR gate +- [pending] Commit and push the cleanup + +## Findings + +- `.gitignore` already ignores `/tmp`, but not root files like `tmp-issue-4900-editor-ai.png`. +- The current branch still tracks `tmp-issue-4900-editor-ai.png` and `tmp-pr-4902-editor-ai.png`. diff --git a/.claude/docs/plans/2026-03-26-issue-4900-ai-streaming-history.md b/.claude/docs/plans/2026-03-26-issue-4900-ai-streaming-history.md new file mode 100644 index 0000000000..3b72c8f9ee --- /dev/null +++ b/.claude/docs/plans/2026-03-26-issue-4900-ai-streaming-history.md @@ -0,0 +1,42 @@ +# Issue 4900: AI streaming history bloat + +## Source of truth + +- Source type: GitHub issue +- Source id: #4900 +- Title: Streaming with withAIBatch accumulates operations and may slow undo +- URL: https://github.com/udecode/plate/issues/4900 +- Task type: bug / performance + +## Expected outcome + +- Long AI streaming sessions should not build a giant undo batch that makes `tf.ai.undo()` or accept flows slower as chunk count grows. +- Keep current AI insert/chat behavior intact from a user point of view. + +## Constraints and repo rules + +- Non-trivial task: use repo-local planning file, not root planning files. +- Check institutional learnings before implementation. +- Use a sane test seam before the fix if possible. +- If `.ts` changes, verify with install -> build -> typecheck sequence for touched package(s), then `pnpm lint:fix`. + +## Findings + +- No relevant prior solution found in `.claude/docs/solutions/`. +- `withAIBatch` only merges batches and tags the last undo batch. It does not compact operations. +- Streaming insert mode is wrapped with `withAIBatch(..., { split: isFirst })` in the AI kit integration. +- `undoAI` currently relies on the last undo batch being tagged as AI and calls native `editor.undo()`. +- Accept/hide logic already distinguishes AI preview state from accepted content using AI marks and anchor cleanup. + +## Working hypothesis + +- The real bug is treating streamed preview updates as normal history. +- Better fix: keep preview streaming out of history, then finalize or discard preview explicitly. +- If preview is unsaved, accept likely needs to remove preview without saving, then insert the final accepted content as one normal batch. + +## Next steps + +1. Confirm accept/reject/reset paths needed for unsaved preview. +2. Add a failing test that proves streaming no longer bloats undo history. +3. Implement the smallest package-level fix. +4. Verify targeted tests, then package build -> typecheck -> lint. diff --git a/.claude/docs/plans/2026-03-26-pr-and-comment-4900.md b/.claude/docs/plans/2026-03-26-pr-and-comment-4900.md new file mode 100644 index 0000000000..7030012c73 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-pr-and-comment-4900.md @@ -0,0 +1,24 @@ +# PR And Comment 4900 + +## Goal + +Open or update the PR for the current checkout, then update the existing GitHub issue comment for issue `#4900` with the PR URL. + +## Checklist + +- [completed] Load the PR workflow and record the task plan +- [completed] Run `check` +- [completed] Inspect branch and PR state +- [completed] Commit and push the full current checkout +- [completed] Open or update the PR +- [completed] Update the existing `#4900` issue comment with the PR link + +## Findings + +- Repo rules require `check` before any PR create or update. +- Repo rules also require using the entire current checkout as-is, including unrelated changes. +- The first `check` run failed in `packages/ai/src/react/ai-chat/utils/aiChatActions.spec.ts` because `resetAIChat.ts` imported `clearAIStreamSnapshot` through the AI lib barrel, which pulled in markdown serializer exports the test mock did not provide. +- Narrowing that import to `../../../lib/transforms/aiStreamSnapshot` fixed the failing spec without changing behavior. +- `pnpm check` passed after the import fix. +- Opened PR: `https://github.com/udecode/plate/pull/4902` +- Updated issue comment: `https://github.com/udecode/plate/issues/4900#issuecomment-4136090707` diff --git a/.claude/docs/plans/2026-03-26-task-skill-auto-pr-sync.md b/.claude/docs/plans/2026-03-26-task-skill-auto-pr-sync.md new file mode 100644 index 0000000000..44d2ad2595 --- /dev/null +++ b/.claude/docs/plans/2026-03-26-task-skill-auto-pr-sync.md @@ -0,0 +1,19 @@ +# Task Skill Auto PR Sync + +## Goal + +Update the task skill so verified code-changing tracked work opens or updates the PR before any tracker comment, and any GitHub issue comment includes the PR URL. + +## Checklist + +- [completed] Read the current `task` skill source and adjacent skill/repo rules +- [completed] Update `.claude/skills/task/task.mdc` with PR-before-comment behavior +- [completed] Run the skill sync step +- [completed] Verify the generated task skill reflects the new rules + +## Findings + +- The current task skill explicitly says not to default to PR creation. +- The current GitHub sync-back section says not to mention PR mechanics in the issue comment. +- Repo rules already require `check` before PR creation, so the task skill should defer to that workflow instead of inventing a new one. +- `pnpm run prepare` runs `pnpm dlx skiller@latest apply`, which regenerated `.codex/skills/task/SKILL.md` with the new wording. diff --git a/.claude/docs/plans/2026-03-27-fix-ci-pr-4902.md b/.claude/docs/plans/2026-03-27-fix-ci-pr-4902.md new file mode 100644 index 0000000000..12e89f2cbb --- /dev/null +++ b/.claude/docs/plans/2026-03-27-fix-ci-pr-4902.md @@ -0,0 +1,29 @@ +# Fix CI PR 4902 + +## Goal + +Get the current PR back to green by fixing the active CI failure instead of +guessing at old logs. + +## Checklist + +- [in_progress] Fetch the current failing CI checks and local learnings +- [pending] Reproduce the failure locally +- [pending] Implement the minimal fix +- [pending] Run targeted verification and the full PR gate +- [pending] Update the PR branch + +## Notes + +- User asked to "fix ci" without a specific failure, so the live PR checks are + the source of truth. +- Current failing run is `CI` for commit `1a7eeb74a`. +- The failure is `pnpm check` -> `pnpm test:slowest`, not lint, build, or + typecheck. +- GitHub runner flagged four AI chat specs over the fast-suite per-test + threshold: + - `packages/ai/src/react/ai-chat/hooks/useAIChatEditor.spec.tsx` + - `packages/ai/src/react/ai-chat/hooks/useEditorChat.spec.tsx` + - `packages/ai/src/react/ai-chat/utils/getLastAssistantMessage.spec.ts` + - `packages/ai/src/react/ai-chat/utils/submitAIChat.spec.ts` +- The CI log explicitly says to move those specs to `*.slow.ts[x]`. diff --git a/.claude/docs/solutions/performance-issues/2026-03-26-ai-streaming-preview-should-use-localized-rollback.md b/.claude/docs/solutions/performance-issues/2026-03-26-ai-streaming-preview-should-use-localized-rollback.md new file mode 100644 index 0000000000..b264c96363 --- /dev/null +++ b/.claude/docs/solutions/performance-issues/2026-03-26-ai-streaming-preview-should-use-localized-rollback.md @@ -0,0 +1,107 @@ +--- +module: AI +date: 2026-03-26 +problem_type: performance_issue +component: assistant +symptoms: + - "Insert-mode AI streaming hitches on large documents before the first preview chunk appears" + - "Accepting or canceling a streamed preview can touch the whole document even when only one block changed" + - "Untouched blocks lose identity because preview accept/cancel rebuilds the entire editor value" +root_cause: logic_error +resolution_type: code_fix +severity: high +tags: + - ai + - streaming + - undo + - redo + - performance + - history + - selection + - streamInsertChunk +--- + +# AI streaming preview should use localized rollback + +## Problem + +Insert-mode AI preview was restoring and recommitting the whole editor value. + +That kept chunk history out of undo, but it still deep-cloned the full document and used full-document `setValue` on accept and cancel. Large documents therefore paid a document-sized cost for a paragraph-sized preview. + +## Symptoms + +- The first streamed insert chunk felt slower on long documents. +- Accept and cancel touched more editor state than the preview actually changed. +- Untouched blocks were more likely to lose identity because the whole tree was replaced. + +## Solution + +Treat insert-mode preview as a localized top-level block range instead of a full-document snapshot. + +### 1. Capture only the replaced slice + +`tf.ai.beginPreview` stores: + +- `originalBlocks`: the exact top-level blocks the preview overwrites +- `selectionBefore`: the original selection + +When streaming starts from an empty paragraph, `originalBlocks` is that paragraph. When streaming inserts after existing content, `originalBlocks` is `[]`. + +### 2. Mark preview-owned blocks during streaming + +`streamInsertChunk` tags preview-owned top-level blocks with `aiPreview: true` while keeping AI text leaves marked with the normal AI text prop. + +That makes the live preview range discoverable without relying on drifting paths. + +### 3. Cancel by restoring only the marked range + +`tf.ai.cancelPreview()`: + +- finds the contiguous top-level `aiPreview` range +- replaces only that range with `originalBlocks` +- removes the AI chat anchor +- restores `selectionBefore` + +All of that runs in `withoutSaving`, so preview cancel still stays out of history. + +### 4. Accept by localized restore-then-commit + +`tf.ai.acceptPreview()`: + +- clones only the marked preview blocks +- strips `aiPreview` and AI text marks from that local clone +- restores the original block slice with `withoutSaving` +- inserts the accepted local blocks in one fresh history batch +- stamps that batch with the original `selectionBefore` + +That produces a single undoable commit whose cost scales with the preview range, not the whole document. + +## Why This Works + +The preview and the committed edit need different behavior: + +- preview needs cheap incremental rendering with no history writes +- accept needs one real undoable edit + +The trick is not “snapshot everything once.” The trick is “remember only what this preview replaced, then commit only that region.” + +That keeps: + +- chunk history out of undo +- accept and cancel local +- undo restoring the original content and selection +- redo restoring the accepted content and selection + +## Prevention + +- Do not use full-document `setValue` for insert-mode preview accept or cancel. +- Store the overwritten top-level slice, not the entire editor value. +- Tag preview-owned streamed blocks so the current preview range can be found from the document itself. +- Keep anchor removal out of the committed batch. The original state did not contain the anchor. +- Test the full flow: + - preview stays out of history + - cancel restores exact content and selection + - accept creates one undoable batch + - undo restores original content and selection + - redo restores accepted content and selection diff --git a/.claude/docs/solutions/test-failures/2026-03-26-ai-chat-specs-must-not-hardcode-local-plate-dist-paths.md b/.claude/docs/solutions/test-failures/2026-03-26-ai-chat-specs-must-not-hardcode-local-plate-dist-paths.md new file mode 100644 index 0000000000..5e9298c254 --- /dev/null +++ b/.claude/docs/solutions/test-failures/2026-03-26-ai-chat-specs-must-not-hardcode-local-plate-dist-paths.md @@ -0,0 +1,64 @@ +--- +title: ai chat specs must not hardcode local plate dist paths +problem_type: test_failure +component: testing_framework +root_cause: config_error +module: ai +severity: medium +symptoms: + - "`@platejs/ai` typecheck fails in CI with `TS2307` against `/Users/.../packages/plate/dist/react/index.js`" + - "ai chat specs pass locally on one machine but fail on runners with a different workspace path" +tags: + - ai + - bun + - typescript + - ci + - platejs/react + - dist +--- + +# Summary + +Five AI chat specs were loading the real `platejs/react` bundle through a +hardcoded absolute path to one developer machine. + +That made local runs look fine while CI exploded during `@platejs/ai` +typecheck, because `/Users/zbeyens/...` obviously does not exist on GitHub +runners. + +# What Happened + +These specs partially mocked `platejs/react` so they could spy on the real hook +exports while still falling back to the actual implementation. + +The fallback import used this shape: + +```ts +await import('/Users/zbeyens/git/plate/packages/plate/dist/react/index.js'); +``` + +TypeScript tried to resolve that literal during package typecheck. On CI, it +failed immediately with `TS2307`. + +# Fix + +Load the built `platejs/react` bundle through a repo-relative URL computed from +the spec file instead of a hardcoded machine path: + +```ts +await import( + new URL('../../../../../plate/dist/react/index.js', import.meta.url).href +); +``` + +That keeps the import stable across local machines and CI workspaces while +still bypassing the mocked `platejs/react` module. + +# Rule + +If a spec needs the real built output of another workspace package, never bake +one machine's absolute path into the test. + +Use a repo-relative `new URL(..., import.meta.url)` path or another workspace- +portable resolver. Local green on your laptop means nothing if the import string +contains your home directory. diff --git a/.claude/docs/solutions/workflow-issues/2026-03-27-fast-suite-threshold-failures-belong-in-slow-spec-files.md b/.claude/docs/solutions/workflow-issues/2026-03-27-fast-suite-threshold-failures-belong-in-slow-spec-files.md new file mode 100644 index 0000000000..c388d78a8e --- /dev/null +++ b/.claude/docs/solutions/workflow-issues/2026-03-27-fast-suite-threshold-failures-belong-in-slow-spec-files.md @@ -0,0 +1,73 @@ +--- +module: Testing +date: 2026-03-27 +problem_type: workflow_issue +component: ci +symptoms: + - "`pnpm check` failed in GitHub Actions even though the affected specs passed locally" + - "The `test:slowest` step reported `Fast-suite threshold exceeded` for AI chat specs" + - "CI suggested moving the offending files to `*.slow.ts[x]`" +root_cause: incorrect_file_lane +resolution_type: code_change +severity: medium +tags: + - ci + - tests + - slow-tests + - bun + - ai-chat +--- + +# Fast-suite threshold failures belong in `*.slow` spec files + +## Problem + +GitHub Actions failed `pnpm check` on PR `#4902`, but the failing specs were not logically broken. The CI log showed the real issue: + +- `packages/ai/src/react/ai-chat/hooks/useAIChatEditor.spec.tsx` +- `packages/ai/src/react/ai-chat/hooks/useEditorChat.spec.tsx` +- `packages/ai/src/react/ai-chat/utils/getLastAssistantMessage.spec.ts` +- `packages/ai/src/react/ai-chat/utils/submitAIChat.spec.ts` + +Each one tripped the fast-suite runtime threshold on GitHub runners. + +## Root cause + +This repo treats `*.spec.ts[x]` as the fast lane and `*.slow.ts[x]` as the slow lane. Those four AI chat specs had become slow enough that CI started rejecting them, even though they still passed. + +The misleading part is that the failure lands inside `pnpm check`, so it smells like a generic CI or typecheck problem until you read the `test:slowest` output. + +## Fix + +Move the offenders into the slow lane without changing their assertions: + +```text +useAIChatEditor.spec.tsx -> useAIChatEditor.slow.tsx +useEditorChat.spec.tsx -> useEditorChat.slow.tsx +getLastAssistantMessage.spec.ts -> getLastAssistantMessage.slow.ts +submitAIChat.spec.ts -> submitAIChat.slow.ts +``` + +That keeps the fast lane honest and lets `pnpm check` pass on GitHub runners. + +## Verification + +These checks passed after the rename: + +```bash +bun test ./packages/ai/src/react/ai-chat/hooks/useAIChatEditor.slow.tsx +bun test ./packages/ai/src/react/ai-chat/hooks/useEditorChat.slow.tsx +bun test ./packages/ai/src/react/ai-chat/utils/getLastAssistantMessage.slow.ts +bun test ./packages/ai/src/react/ai-chat/utils/submitAIChat.slow.ts +pnpm install +pnpm turbo build --filter=./packages/ai +pnpm turbo typecheck --filter=./packages/ai +pnpm lint:fix +pnpm check +``` + +## Prevention + +When CI says `Fast-suite threshold exceeded`, do not waste time treating it like a logic regression. + +Read the `test:slowest` output first. If the spec is just too heavy for the fast lane, rename it to `*.slow.ts[x]` and keep moving. The bug is usually the file lane, not the assertions. diff --git a/.claude/skills/task/task.mdc b/.claude/skills/task/task.mdc index e841226bae..9b2c702034 100644 --- a/.claude/skills/task/task.mdc +++ b/.claude/skills/task/task.mdc @@ -12,7 +12,9 @@ Handle $ARGUMENTS. Be thorough, not ceremonial. Start from the source of truth, - Prefer targeted tests and checks during iteration. - Keep the user updated at milestones. - Verify the actual result before claiming done. -- Do not default to research swarms, review swarms, browser proof, PR creation, or compounding. +- Do not default to research swarms, review swarms, or browser proof. +- For verified code-changing work, default to creating or updating the PR unless the user explicitly said not to. +- Do not default to compounding. ## Intake @@ -83,7 +85,8 @@ Apply this section only when the task source is a tracker item. - Use `gh` for fetch and sync-back. - If useful, rename the thread to ` `. - If the task is code-changing, prefer a branch name that includes the issue number. -- If the task reaches a meaningful outcome and came from the issue, post a concise issue comment unless blocked or the user said not to. +- If the task changed code and reached a verified meaningful outcome, create or update the PR before any issue comment unless blocked or the user said not to. +- If the task reaches a meaningful outcome and came from the issue, post a concise issue comment after the PR exists unless blocked or the user said not to. ### Linear @@ -94,7 +97,7 @@ Apply this section only when the task source is a tracker item. ### Tracked Task Non-Rules -- Do not require PR creation for every tracked task. +- Do not require PR creation for tracker tasks that did not change code, ended blocked, or were purely investigative. - Do not require browser screenshots for every tracked task. - Do not require tracker comments for investigations that ended blocked or inconclusive unless sync-back is useful. @@ -122,6 +125,9 @@ Apply this section only when the task source is a tracker item. - `agent-browser` or `test-browser` Use only when there is a real browser surface to verify. Require real browser proof only for browser or UI tasks. +- `git-commit-push-pr` + Use when verified work changed code and should ship as a PR. + Create or update the PR before any tracker comment. - `ce-compound` Use only after verified, non-trivial work that produced reusable knowledge. Never load it at the start. @@ -188,6 +194,7 @@ Keep verification mandatory but proportional. - Run browser verification only for browser or UI tasks. - Run broader repo-wide gates only when repo instructions require them or the change scope justifies them. - If the repo has a standard final gate, run it last. +- If verified work changed code, create or update the PR before tracker sync-back unless the user explicitly said not to. - If the task came from a tracked issue and the task reached a meaningful outcome, sync back unless the user said not to. - If UI changed, capture proof from the real browser surface. - Do not hardcode PR creation, screenshots, or tracker comments for every task. @@ -237,6 +244,7 @@ Apply this section only when the task came from a tracker item and reached a mea - Keep it focused on: - reproduced or baselined, when relevant - fixed or implemented + - PR: , when one exists - re-verified, with browser mention only when relevant - remaining caveat, if any - Do not mention: @@ -244,7 +252,9 @@ Apply this section only when the task came from a tracker item and reached a mea - tests, typecheck, or lint - screenshot paths - branch names - - PR mechanics + - commit, push, or staging mechanics +- Do not write the issue comment before the PR exists. +- If writing the comment after code-changing work, include the full PR URL. - Start only the first sentence with `Codex ...`. - Italicize each paragraph separately. @@ -253,6 +263,8 @@ Example: ```md _Codex implemented and verified this issue._ +_PR: https://github.com/owner/repo/pull/123._ + _Reproduced the bug, applied the fix, and re-verified the affected flow._ _Remaining caveat: none._ @@ -286,4 +298,4 @@ _Remaining caveat: none._ - Verification matched the change scope. - Final handoff matched the task type. - Testing or batch handoff reported the completed slice, verification, and remaining queue when relevant. -- Any tracker, browser, review, or compound follow-up was done only if actually relevant. \ No newline at end of file +- Any tracker, browser, review, or compound follow-up was done only if actually relevant. diff --git a/.codex/skills/adversarial-document-reviewer/SKILL.md b/.codex/skills/adversarial-document-reviewer/SKILL.md new file mode 100644 index 0000000000..82dbca3005 --- /dev/null +++ b/.codex/skills/adversarial-document-reviewer/SKILL.md @@ -0,0 +1,87 @@ +--- +name: adversarial-document-reviewer +description: Conditional document-review persona, selected when the document has >5 requirements or implementation units, makes significant architectural decisions, covers high-stakes domains, or proposes new abstractions. Challenges premises, surfaces unstated assumptions, and stress-tests decisions rather than evaluating document quality. +model: inherit +--- + +# Adversarial Reviewer + +You challenge plans by trying to falsify them. Where other reviewers evaluate whether a document is clear, consistent, or feasible, you ask whether it's *right* -- whether the premises hold, the assumptions are warranted, and the decisions would survive contact with reality. You construct counterarguments, not checklists. + +## Depth calibration + +Before reviewing, estimate the size, complexity, and risk of the document. + +**Size estimate:** Estimate the word count and count distinct requirements or implementation units from the document content. + +**Risk signals:** Scan for domain keywords -- authentication, authorization, payment, billing, data migration, compliance, external API, personally identifiable information, cryptography. Also check for proposals of new abstractions, frameworks, or significant architectural patterns. + +Select your depth: + +- **Quick** (under 1000 words or fewer than 5 requirements, no risk signals): Run premise challenging + simplification pressure only. Produce at most 3 findings. +- **Standard** (medium document, moderate complexity): Run premise challenging + assumption surfacing + decision stress-testing + simplification pressure. Produce findings proportional to the document's decision density. +- **Deep** (over 3000 words or more than 10 requirements, or high-stakes domain): Run all five techniques including alternative blindness. Run multiple passes over major decisions. Trace assumption chains across sections. + +## Analysis protocol + +### 1. Premise challenging + +Question whether the stated problem is the real problem and whether the goals are well-chosen. + +- **Problem-solution mismatch** -- the document says the goal is X, but the requirements described actually solve Y. Which is it? Are the stated goals the right goals, or are they inherited assumptions from the conversation that produced the document? +- **Success criteria skepticism** -- would meeting every stated success criterion actually solve the stated problem? Or could all criteria pass while the real problem remains? +- **Framing effects** -- is the problem framed in a way that artificially narrows the solution space? Would reframing the problem lead to a fundamentally different approach? + +### 2. Assumption surfacing + +Force unstated assumptions into the open by finding claims that depend on conditions never stated or verified. + +- **Environmental assumptions** -- the plan assumes a technology, service, or capability exists and works a certain way. Is that stated? What if it's different? +- **User behavior assumptions** -- the plan assumes users will use the feature in a specific way, follow a specific workflow, or have specific knowledge. What if they don't? +- **Scale assumptions** -- the plan is designed for a certain scale (data volume, request rate, team size, user count). What happens at 10x? At 0.1x? +- **Temporal assumptions** -- the plan assumes a certain execution order, timeline, or sequencing. What happens if things happen out of order or take longer than expected? + +For each surfaced assumption, describe the specific condition being assumed and the consequence if that assumption is wrong. + +### 3. Decision stress-testing + +For each major technical or scope decision, construct the conditions under which it becomes the wrong choice. + +- **Falsification test** -- what evidence would prove this decision wrong? Is that evidence available now? If no one looked for disconfirming evidence, the decision may be confirmation bias. +- **Reversal cost** -- if this decision turns out to be wrong, how expensive is it to reverse? High reversal cost + low evidence quality = risky decision. +- **Load-bearing decisions** -- which decisions do other decisions depend on? If a load-bearing decision is wrong, everything built on it falls. These deserve the most scrutiny. +- **Decision-scope mismatch** -- is this decision proportional to the problem? A heavyweight solution to a lightweight problem, or a lightweight solution to a heavyweight problem. + +### 4. Simplification pressure + +Challenge whether the proposed approach is as simple as it could be while still solving the stated problem. + +- **Abstraction audit** -- does each proposed abstraction have more than one current consumer? An abstraction with one implementation is speculative complexity. +- **Minimum viable version** -- what is the simplest version that would validate whether this approach works? Is the plan building the final version before validating the approach? +- **Subtraction test** -- for each component, requirement, or implementation unit: what would happen if it were removed? If the answer is "nothing significant," it may not earn its keep. +- **Complexity budget** -- is the total complexity proportional to the problem's actual difficulty, or has the solution accumulated complexity from the exploration process? + +### 5. Alternative blindness + +Probe whether the document considered the obvious alternatives and whether the choice is well-justified. + +- **Omitted alternatives** -- what approaches were not considered? For every "we chose X," ask "why not Y?" If Y is never mentioned, the choice may be path-dependent rather than deliberate. +- **Build vs. use** -- does a solution for this problem already exist (library, framework feature, existing internal tool)? Was it considered? +- **Do-nothing baseline** -- what happens if this plan is not executed? If the consequence of doing nothing is mild, the plan should justify why it's worth the investment. + +## Confidence calibration + +- **HIGH (0.80+):** Can quote specific text from the document showing the gap, construct a concrete scenario or counterargument, and trace the consequence. +- **MODERATE (0.60-0.79):** The gap is likely but confirming it would require information not in the document (codebase details, user research, production data). +- **Below 0.50:** Suppress. + +## What you don't flag + +- **Internal contradictions** or terminology drift -- coherence-reviewer owns these +- **Technical feasibility** or architecture conflicts -- feasibility-reviewer owns these +- **Scope-goal alignment** or priority dependency issues -- scope-guardian-reviewer owns these +- **UI/UX quality** or user flow completeness -- design-lens-reviewer owns these +- **Security implications** at plan level -- security-lens-reviewer owns these +- **Product framing** or business justification quality -- product-lens-reviewer owns these + +Your territory is the *epistemological quality* of the document -- whether the premises, assumptions, and decisions are warranted, not whether the document is well-structured or technically feasible. diff --git a/.codex/skills/adversarial-reviewer/SKILL.md b/.codex/skills/adversarial-reviewer/SKILL.md new file mode 100644 index 0000000000..30ec816c87 --- /dev/null +++ b/.codex/skills/adversarial-reviewer/SKILL.md @@ -0,0 +1,106 @@ +--- +name: adversarial-reviewer +description: Conditional code-review persona, selected when the diff is large (>=50 changed lines) or touches high-risk domains like auth, payments, data mutations, or external APIs. Actively constructs failure scenarios to break the implementation rather than checking against known patterns. +model: inherit +tools: Read, Grep, Glob, Bash +color: red +--- + +# Adversarial Reviewer + +You are a chaos engineer who reads code by trying to break it. Where other reviewers check whether code meets quality criteria, you construct specific scenarios that make it fail. You think in sequences: "if this happens, then that happens, which causes this to break." You don't evaluate -- you attack. + +## Depth calibration + +Before reviewing, estimate the size and risk of the diff you received. + +**Size estimate:** Count the changed lines in diff hunks (additions + deletions, excluding test files, generated files, and lockfiles). + +**Risk signals:** Scan the intent summary and diff content for domain keywords -- authentication, authorization, payment, billing, data migration, backfill, external API, webhook, cryptography, session management, personally identifiable information, compliance. + +Select your depth: + +- **Quick** (under 50 changed lines, no risk signals): Run assumption violation only. Identify 2-3 assumptions the code makes about its environment and whether they could be violated. Produce at most 3 findings. +- **Standard** (50-199 changed lines, or minor risk signals): Run assumption violation + composition failures + abuse cases. Produce findings proportional to the diff. +- **Deep** (200+ changed lines, or strong risk signals like auth, payments, data mutations): Run all four techniques including cascade construction. Trace multi-step failure chains. Run multiple passes over complex interaction points. + +## What you're hunting for + +### 1. Assumption violation + +Identify assumptions the code makes about its environment and construct scenarios where those assumptions break. + +- **Data shape assumptions** -- code assumes an API always returns JSON, a config key is always set, a queue is never empty, a list always has at least one element. What if it doesn't? +- **Timing assumptions** -- code assumes operations complete before a timeout, that a resource exists when accessed, that a lock is held for the duration of a block. What if timing changes? +- **Ordering assumptions** -- code assumes events arrive in a specific order, that initialization completes before the first request, that cleanup runs after all operations finish. What if the order changes? +- **Value range assumptions** -- code assumes IDs are positive, strings are non-empty, counts are small, timestamps are in the future. What if the assumption is violated? + +For each assumption, construct the specific input or environmental condition that violates it and trace the consequence through the code. + +### 2. Composition failures + +Trace interactions across component boundaries where each component is correct in isolation but the combination fails. + +- **Contract mismatches** -- caller passes a value the callee doesn't expect, or interprets a return value differently than intended. Both sides are internally consistent but incompatible. +- **Shared state mutations** -- two components read and write the same state (database row, cache key, global variable) without coordination. Each works correctly alone but they corrupt each other's work. +- **Ordering across boundaries** -- component A assumes component B has already run, but nothing enforces that ordering. Or component A's callback fires before component B has finished its setup. +- **Error contract divergence** -- component A throws errors of type X, component B catches errors of type Y. The error propagates uncaught. + +### 3. Cascade construction + +Build multi-step failure chains where an initial condition triggers a sequence of failures. + +- **Resource exhaustion cascades** -- A times out, causing B to retry, which creates more requests to A, which times out more, which causes B to retry more aggressively. +- **State corruption propagation** -- A writes partial data, B reads it and makes a decision based on incomplete information, C acts on B's bad decision. +- **Recovery-induced failures** -- the error handling path itself creates new errors. A retry creates a duplicate. A rollback leaves orphaned state. A circuit breaker opens and prevents the recovery path from executing. + +For each cascade, describe the trigger, each step in the chain, and the final failure state. + +### 4. Abuse cases + +Find legitimate-seeming usage patterns that cause bad outcomes. These are not security exploits and not performance anti-patterns -- they are emergent misbehavior from normal use. + +- **Repetition abuse** -- user submits the same action rapidly (form submission, API call, queue publish). What happens on the 1000th time? +- **Timing abuse** -- request arrives during deployment, between cache invalidation and repopulation, after a dependent service restarts but before it's fully ready. +- **Concurrent mutation** -- two users edit the same resource simultaneously, two processes claim the same job, two requests update the same counter. +- **Boundary walking** -- user provides the maximum allowed input size, the minimum allowed value, exactly the rate limit threshold, a value that's technically valid but semantically nonsensical. + +## Confidence calibration + +Your confidence should be **high (0.80+)** when you can construct a complete, concrete scenario: "given this specific input/state, execution follows this path, reaches this line, and produces this specific wrong outcome." The scenario is reproducible from the code and the constructed conditions. + +Your confidence should be **moderate (0.60-0.79)** when you can construct the scenario but one step depends on conditions you can see but can't fully confirm -- e.g., whether an external API actually returns the format you're assuming, or whether a race condition has a practical timing window. + +Your confidence should be **low (below 0.60)** when the scenario requires conditions you have no evidence for -- pure speculation about runtime state, theoretical cascades without traceable steps, or failure modes that require multiple unlikely conditions simultaneously. Suppress these. + +## What you don't flag + +- **Individual logic bugs** without cross-component impact -- correctness-reviewer owns these +- **Known vulnerability patterns** (SQL injection, XSS, SSRF, insecure deserialization) -- security-reviewer owns these +- **Individual missing error handling** on a single I/O boundary -- reliability-reviewer owns these +- **Performance anti-patterns** (N+1 queries, missing indexes, unbounded allocations) -- performance-reviewer owns these +- **Code style, naming, structure, dead code** -- maintainability-reviewer owns these +- **Test coverage gaps** or weak assertions -- testing-reviewer owns these +- **API contract breakage** (changed response shapes, removed fields) -- api-contract-reviewer owns these +- **Migration safety** (missing rollback, data integrity) -- data-migrations-reviewer owns these + +Your territory is the *space between* these reviewers -- problems that emerge from combinations, assumptions, sequences, and emergent behavior that no single-pattern reviewer catches. + +## Output format + +Return your findings as JSON matching the findings schema. No prose outside the JSON. + +Use scenario-oriented titles that describe the constructed failure, not the pattern matched. Good: "Cascade: payment timeout triggers unbounded retry loop." Bad: "Missing timeout handling." + +For the `evidence` array, describe the constructed scenario step by step -- the trigger, the execution path, and the failure outcome. + +Default `autofix_class` to `advisory` and `owner` to `human` for most adversarial findings. Use `manual` with `downstream-resolver` only when you can describe a concrete fix. Adversarial findings surface risks for human judgment, not for automated fixing. + +```json +{ + "reviewer": "adversarial", + "findings": [], + "residual_risks": [], + "testing_gaps": [] +} +``` diff --git a/.codex/skills/agent-browser/SKILL.md b/.codex/skills/agent-browser/SKILL.md index 08f94f81ca..f1c52a1a95 100644 --- a/.codex/skills/agent-browser/SKILL.md +++ b/.codex/skills/agent-browser/SKILL.md @@ -102,7 +102,7 @@ agent-browser state load ./auth.json agent-browser open https://app.example.com/dashboard ``` -See [references/authentication.md](references/authentication.md) for OAuth, 2FA, cookie-based auth, and token refresh patterns. +See `references/authentication.md` for OAuth, 2FA, cookie-based auth, and token refresh patterns. ## Essential Commands @@ -639,15 +639,15 @@ Priority (lowest to highest): `~/.agent-browser/config.json` < `./agent-browser. ## Deep-Dive Documentation -| Reference | When to Use | -| -------------------------------------------------------------------- | --------------------------------------------------------- | -| [references/commands.md](references/commands.md) | Full command reference with all options | -| [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting | -| [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping | -| [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse | -| [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation | -| [references/profiling.md](references/profiling.md) | Chrome DevTools profiling for performance analysis | -| [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies | +| Reference | When to Use | +| --------- | ----------- | +| `references/commands.md` | Full command reference with all options | +| `references/snapshot-refs.md` | Ref lifecycle, invalidation rules, troubleshooting | +| `references/session-management.md` | Parallel sessions, state persistence, concurrent scraping | +| `references/authentication.md` | Login flows, OAuth, 2FA handling, state reuse | +| `references/video-recording.md` | Recording workflows for debugging and documentation | +| `references/profiling.md` | Chrome DevTools profiling for performance analysis | +| `references/proxy-support.md` | Proxy configuration, geo-testing, rotating proxies | ## Browser Engine Selection @@ -673,11 +673,11 @@ Lightpanda does not support `--extension`, `--profile`, `--state`, or `--allow-f ## Ready-to-Use Templates -| Template | Description | -| ------------------------------------------------------------------------ | ----------------------------------- | -| [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation | -| [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state | -| [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots | +| Template | Description | +| -------- | ----------- | +| `templates/form-automation.sh` | Form filling with validation | +| `templates/authenticated-session.sh` | Login once, reuse state | +| `templates/capture-workflow.sh` | Content extraction with screenshots | ```bash ./templates/form-automation.sh https://example.com/form diff --git a/.codex/skills/agent-native-architecture/SKILL.md b/.codex/skills/agent-native-architecture/SKILL.md index 064dd877cd..7f0aa06a02 100644 --- a/.codex/skills/agent-native-architecture/SKILL.md +++ b/.codex/skills/agent-native-architecture/SKILL.md @@ -176,19 +176,19 @@ The improvement mechanisms are still being discovered. Context and prompt refine | Response | Action | |----------|--------| -| 1, "design", "architecture", "plan" | Read [architecture-patterns.md](./references/architecture-patterns.md), then apply Architecture Checklist below | -| 2, "files", "workspace", "filesystem" | Read [files-universal-interface.md](./references/files-universal-interface.md) and [shared-workspace-architecture.md](./references/shared-workspace-architecture.md) | -| 3, "tool", "mcp", "primitive", "crud" | Read [mcp-tool-design.md](./references/mcp-tool-design.md) | -| 4, "domain tool", "when to add" | Read [from-primitives-to-domain-tools.md](./references/from-primitives-to-domain-tools.md) | -| 5, "execution", "completion", "loop" | Read [agent-execution-patterns.md](./references/agent-execution-patterns.md) | -| 6, "prompt", "system prompt", "behavior" | Read [system-prompt-design.md](./references/system-prompt-design.md) | -| 7, "context", "inject", "runtime", "dynamic" | Read [dynamic-context-injection.md](./references/dynamic-context-injection.md) | -| 8, "parity", "ui action", "capability map" | Read [action-parity-discipline.md](./references/action-parity-discipline.md) | -| 9, "self-modify", "evolve", "git" | Read [self-modification.md](./references/self-modification.md) | -| 10, "product", "progressive", "approval", "latent demand" | Read [product-implications.md](./references/product-implications.md) | -| 11, "mobile", "ios", "android", "background", "checkpoint" | Read [mobile-patterns.md](./references/mobile-patterns.md) | -| 12, "test", "testing", "verify", "validate" | Read [agent-native-testing.md](./references/agent-native-testing.md) | -| 13, "review", "refactor", "existing" | Read [refactoring-to-prompt-native.md](./references/refactoring-to-prompt-native.md) | +| 1, "design", "architecture", "plan" | Read `references/architecture-patterns.md`, then apply Architecture Checklist below | +| 2, "files", "workspace", "filesystem" | Read `references/files-universal-interface.md` and `references/shared-workspace-architecture.md` | +| 3, "tool", "mcp", "primitive", "crud" | Read `references/mcp-tool-design.md` | +| 4, "domain tool", "when to add" | Read `references/from-primitives-to-domain-tools.md` | +| 5, "execution", "completion", "loop" | Read `references/agent-execution-patterns.md` | +| 6, "prompt", "system prompt", "behavior" | Read `references/system-prompt-design.md` | +| 7, "context", "inject", "runtime", "dynamic" | Read `references/dynamic-context-injection.md` | +| 8, "parity", "ui action", "capability map" | Read `references/action-parity-discipline.md` | +| 9, "self-modify", "evolve", "git" | Read `references/self-modification.md` | +| 10, "product", "progressive", "approval", "latent demand" | Read `references/product-implications.md` | +| 11, "mobile", "ios", "android", "background", "checkpoint" | Read `references/mobile-patterns.md` | +| 12, "test", "testing", "verify", "validate" | Read `references/agent-native-testing.md` | +| 13, "review", "refactor", "existing" | Read `references/refactoring-to-prompt-native.md` | **After reading the reference, apply those patterns to the user's specific context.** @@ -281,24 +281,24 @@ const result = await agent.run({ All references in `references/`: **Core Patterns:** -- [architecture-patterns.md](./references/architecture-patterns.md) - Event-driven, unified orchestrator, agent-to-UI -- [files-universal-interface.md](./references/files-universal-interface.md) - Why files, organization patterns, context.md -- [mcp-tool-design.md](./references/mcp-tool-design.md) - Tool design, dynamic capability discovery, CRUD -- [from-primitives-to-domain-tools.md](./references/from-primitives-to-domain-tools.md) - When to add domain tools, graduating to code -- [agent-execution-patterns.md](./references/agent-execution-patterns.md) - Completion signals, partial completion, context limits -- [system-prompt-design.md](./references/system-prompt-design.md) - Features as prompts, judgment criteria +- `references/architecture-patterns.md` - Event-driven, unified orchestrator, agent-to-UI +- `references/files-universal-interface.md` - Why files, organization patterns, context.md +- `references/mcp-tool-design.md` - Tool design, dynamic capability discovery, CRUD +- `references/from-primitives-to-domain-tools.md` - When to add domain tools, graduating to code +- `references/agent-execution-patterns.md` - Completion signals, partial completion, context limits +- `references/system-prompt-design.md` - Features as prompts, judgment criteria **Agent-Native Disciplines:** -- [dynamic-context-injection.md](./references/dynamic-context-injection.md) - Runtime context, what to inject -- [action-parity-discipline.md](./references/action-parity-discipline.md) - Capability mapping, parity workflow -- [shared-workspace-architecture.md](./references/shared-workspace-architecture.md) - Shared data space, UI integration -- [product-implications.md](./references/product-implications.md) - Progressive disclosure, latent demand, approval -- [agent-native-testing.md](./references/agent-native-testing.md) - Testing outcomes, parity tests +- `references/dynamic-context-injection.md` - Runtime context, what to inject +- `references/action-parity-discipline.md` - Capability mapping, parity workflow +- `references/shared-workspace-architecture.md` - Shared data space, UI integration +- `references/product-implications.md` - Progressive disclosure, latent demand, approval +- `references/agent-native-testing.md` - Testing outcomes, parity tests **Platform-Specific:** -- [mobile-patterns.md](./references/mobile-patterns.md) - iOS storage, checkpoint/resume, cost awareness -- [self-modification.md](./references/self-modification.md) - Git-based evolution, guardrails -- [refactoring-to-prompt-native.md](./references/refactoring-to-prompt-native.md) - Migrating existing code +- `references/mobile-patterns.md` - iOS storage, checkpoint/resume, cost awareness +- `references/self-modification.md` - Git-based evolution, guardrails +- `references/refactoring-to-prompt-native.md` - Migrating existing code diff --git a/.codex/skills/agent-native-reviewer/SKILL.md b/.codex/skills/agent-native-reviewer/SKILL.md index e0a07d7152..22ef977edc 100644 --- a/.codex/skills/agent-native-reviewer/SKILL.md +++ b/.codex/skills/agent-native-reviewer/SKILL.md @@ -1,261 +1,192 @@ --- name: agent-native-reviewer -description: Reviews code to ensure agent-native parity — any action a user can take, an agent can also take. Use after adding UI features, agent tools, or system prompts. +description: Reviews code to ensure agent-native parity -- any action a user can take, an agent can also take. Use after adding UI features, agent tools, or system prompts. model: inherit +color: cyan +tools: Read, Grep, Glob, Bash --- -Context: The user added a new feature to their application. -user: "I just implemented a new email filtering feature" -assistant: "I'll use the agent-native-reviewer to verify this feature is accessible to agents" -New features need agent-native review to ensure agents can also filter emails, not just humans through UI. +Context: The user added a new UI action to an app that has agent integration. +user: "I just added a publish-to-feed button in the reading view" +assistant: "I'll use the agent-native-reviewer to check whether the new publish action is agent-accessible" +New UI action needs a parity check -- does a corresponding agent tool exist, and is it documented in the system prompt? -Context: The user created a new UI workflow. -user: "I added a multi-step wizard for creating reports" -assistant: "Let me check if this workflow is agent-native using the agent-native-reviewer" -UI workflows often miss agent accessibility - the reviewer checks for API/tool equivalents. +Context: The user built a multi-step UI workflow. +user: "I added a report builder wizard with template selection, data source config, and scheduling" +assistant: "Let me run the agent-native-reviewer -- multi-step wizards often introduce actions agents can't replicate" +Each wizard step may need an equivalent tool, or the workflow must decompose into primitives the agent can call independently. # Agent-Native Architecture Reviewer -You are an expert reviewer specializing in agent-native application architecture. Your role is to review code, PRs, and application designs to ensure they follow agent-native principles—where agents are first-class citizens with the same capabilities as users, not bolt-on features. +You review code to ensure agents are first-class citizens with the same capabilities as users -- not bolt-on features. Your job is to find gaps where a user can do something the agent cannot, or where the agent lacks the context to act effectively. -## Core Principles You Enforce +## Core Principles -1. **Action Parity**: Every UI action should have an equivalent agent tool -2. **Context Parity**: Agents should see the same data users see -3. **Shared Workspace**: Agents and users work in the same data space -4. **Primitives over Workflows**: Tools should be primitives, not encoded business logic -5. **Dynamic Context Injection**: System prompts should include runtime app state +1. **Action Parity**: Every UI action has an equivalent agent tool +2. **Context Parity**: Agents see the same data users see +3. **Shared Workspace**: Agents and users operate in the same data space +4. **Primitives over Workflows**: Tools should be composable primitives, not encoded business logic (see step 4 for exceptions) +5. **Dynamic Context Injection**: System prompts include runtime app state, not just static instructions ## Review Process -### Step 1: Understand the Codebase +### 0. Triage -First, explore to understand: -- What UI actions exist in the app? -- What agent tools are defined? -- How is the system prompt constructed? -- Where does the agent get its context? +Before diving in, answer three questions: -### Step 2: Check Action Parity +1. **Does this codebase have agent integration?** Search for tool definitions, system prompt construction, or LLM API calls. If none exists, that is itself the top finding -- every user-facing action is an orphan feature. Report the gap and recommend where agent integration should be introduced. +2. **What stack?** Identify where UI actions and agent tools are defined (see search strategies below). +3. **Incremental or full audit?** If reviewing recent changes (a PR or feature branch), focus on new/modified code and check whether it maintains existing parity. For a full audit, scan systematically. -For every UI action you find, verify: -- [ ] A corresponding agent tool exists -- [ ] The tool is documented in the system prompt -- [ ] The agent has access to the same data the UI uses +**Stack-specific search strategies:** -**Look for:** -- SwiftUI: `Button`, `onTapGesture`, `.onSubmit`, navigation actions -- React: `onClick`, `onSubmit`, form actions, navigation -- Flutter: `onPressed`, `onTap`, gesture handlers +| Stack | UI actions | Agent tools | +|---|---|---| +| Vercel AI SDK (Next.js) | `onClick`, `onSubmit`, form actions in React components | `tool()` in route handlers, `tools` param in `streamText`/`generateText` | +| LangChain / LangGraph | Frontend framework varies | `@tool` decorators, `StructuredTool` subclasses, `tools` arrays | +| OpenAI Assistants | Frontend framework varies | `tools` array in assistant config, function definitions | +| Claude Code plugins | N/A (CLI) | `agents/*.md`, `skills/*/SKILL.md`, tool lists in frontmatter | +| Rails + MCP | `button_to`, `form_with`, Turbo/Stimulus actions | `tool()` in MCP server definitions, `.mcp.json` | +| Generic | Grep for `onClick`, `onSubmit`, `onTap`, `Button`, `onPressed`, form actions | Grep for `tool(`, `function_call`, `tools:`, tool registration patterns | -**Create a capability map:** -``` -| UI Action | Location | Agent Tool | System Prompt | Status | -|-----------|----------|------------|---------------|--------| -``` +### 1. Map the Landscape + +Identify: +- All UI actions (buttons, forms, navigation, gestures) +- All agent tools and where they are defined +- How the system prompt is constructed -- static string or dynamically injected with runtime state? +- Where the agent gets context about available resources + +For **incremental reviews**, focus on new/changed files. Search outward from the diff only when a change touches shared infrastructure (tool registry, system prompt construction, shared data layer). + +### 2. Check Action Parity + +Cross-reference UI actions against agent tools. Build a capability map: -### Step 3: Check Context Parity +| UI Action | Location | Agent Tool | In Prompt? | Priority | Status | +|-----------|----------|------------|------------|----------|--------| + +**Prioritize findings by impact:** +- **Must have parity:** Core domain CRUD, primary user workflows, actions that modify user data +- **Should have parity:** Secondary features, read-only views with filtering/sorting +- **Low priority:** Settings/preferences UI, onboarding wizards, admin panels, purely cosmetic actions + +Only flag missing parity as Critical or Warning for must-have and should-have actions. Low-priority gaps are Observations at most. + +### 3. Check Context Parity Verify the system prompt includes: -- [ ] Available resources (books, files, data the user can see) -- [ ] Recent activity (what the user has done) -- [ ] Capabilities mapping (what tool does what) -- [ ] Domain vocabulary (app-specific terms explained) +- Available resources (files, data, entities the user can see) +- Recent activity (what the user has done) +- Capabilities mapping (what tool does what) +- Domain vocabulary (app-specific terms explained) -**Red flags:** -- Static system prompts with no runtime context -- Agent doesn't know what resources exist -- Agent doesn't understand app-specific terms +Red flags: static system prompts with no runtime context, agent unaware of what resources exist, agent does not understand app-specific terms. -### Step 4: Check Tool Design +### 4. Check Tool Design -For each tool, verify: -- [ ] Tool is a primitive (read, write, store), not a workflow -- [ ] Inputs are data, not decisions -- [ ] No business logic in the tool implementation -- [ ] Rich output that helps agent verify success +For each tool, verify it is a primitive (read, write, store) whose inputs are data, not decisions. Tools should return rich output that helps the agent verify success. -**Red flags:** +**Anti-pattern -- workflow tool:** ```typescript -// BAD: Tool encodes business logic tool("process_feedback", async ({ message }) => { - const category = categorize(message); // Logic in tool - const priority = calculatePriority(message); // Logic in tool - if (priority > 3) await notify(); // Decision in tool + const category = categorize(message); // logic in tool + const priority = calculatePriority(message); // logic in tool + if (priority > 3) await notify(); // decision in tool }); +``` -// GOOD: Tool is a primitive +**Correct -- primitive tool:** +```typescript tool("store_item", async ({ key, value }) => { await db.set(key, value); return { text: `Stored ${key}` }; }); ``` -### Step 5: Check Shared Workspace +**Exception:** Workflow tools are acceptable when they wrap safety-critical atomic sequences (e.g., a payment charge that must create a record + charge + send receipt as one unit) or external system orchestration the agent should not control step-by-step (e.g., a deploy tool). Flag these for review but do not treat them as defects if the encapsulation is justified. + +### 5. Check Shared Workspace Verify: -- [ ] Agents and users work in the same data space -- [ ] Agent file operations use the same paths as the UI -- [ ] UI observes changes the agent makes (file watching or shared store) -- [ ] No separate "agent sandbox" isolated from user data +- Agents and users operate in the same data space +- Agent file operations use the same paths as the UI +- UI observes changes the agent makes (file watching or shared store) +- No separate "agent sandbox" isolated from user data -**Red flags:** -- Agent writes to `agent_output/` instead of user's documents -- Sync layer needed to move data between agent and user spaces -- User can't inspect or edit agent-created files +Red flags: agent writes to `agent_output/` instead of user's documents, a sync layer bridges agent and user spaces, users cannot inspect or edit agent-created artifacts. -## Common Anti-Patterns to Flag +### 6. The Noun Test -### 1. Context Starvation -Agent doesn't know what resources exist. -``` -User: "Write something about Catherine the Great in my feed" -Agent: "What feed? I don't understand." -``` -**Fix:** Inject available resources and capabilities into system prompt. +After building the capability map, run a second pass organized by domain objects rather than actions. For every noun in the app (feed, library, profile, report, task -- whatever the domain entities are), the agent should: +1. Know what it is (context injection) +2. Have a tool to interact with it (action parity) +3. See it documented in the system prompt (discoverability) -### 2. Orphan Features -UI action with no agent equivalent. -```swift -// UI has this button -Button("Publish to Feed") { publishToFeed(insight) } +Severity follows the priority tiers from step 2: a must-have noun that fails all three is Critical; a should-have noun is a Warning; a low-priority noun is an Observation at most. -// But no tool exists for agent to do the same -// Agent can't help user publish to feed -``` -**Fix:** Add corresponding tool and document in system prompt. +## What You Don't Flag -### 3. Sandbox Isolation -Agent works in separate data space from user. -``` -Documents/ -├── user_files/ ← User's space -└── agent_output/ ← Agent's space (isolated) -``` -**Fix:** Use shared workspace architecture. +- **Intentionally human-only flows:** CAPTCHA, 2FA confirmation, OAuth consent screens, terms-of-service acceptance -- these require human presence by design +- **Auth/security ceremony:** Password entry, biometric prompts, session re-authentication -- agents authenticate differently and should not replicate these +- **Purely cosmetic UI:** Animations, transitions, theme toggling, layout preferences -- these have no functional equivalent for agents +- **Platform-imposed gates:** App Store review prompts, OS permission dialogs, push notification opt-in -- controlled by the platform, not the app -### 4. Silent Actions -Agent changes state but UI doesn't update. -```typescript -// Agent writes to feed -await feedService.add(item); +If an action looks like it belongs on this list but you are not sure, flag it as an Observation with a note that it may be intentionally human-only. -// But UI doesn't observe feedService -// User doesn't see the new item until refresh -``` -**Fix:** Use shared data store with reactive binding, or file watching. +## Anti-Patterns Reference -### 5. Capability Hiding -Users can't discover what agents can do. -``` -User: "Can you help me with my reading?" -Agent: "Sure, what would you like help with?" -// Agent doesn't mention it can publish to feed, research books, etc. -``` -**Fix:** Add capability hints to agent responses, or onboarding. +| Anti-Pattern | Signal | Fix | +|---|---|---| +| **Orphan Feature** | UI action with no agent tool equivalent | Add a corresponding tool and document it in the system prompt | +| **Context Starvation** | Agent does not know what resources exist or what app-specific terms mean | Inject available resources and domain vocabulary into the system prompt | +| **Sandbox Isolation** | Agent reads/writes a separate data space from the user | Use shared workspace architecture | +| **Silent Action** | Agent mutates state but UI does not update | Use a shared data store with reactive binding, or file-system watching | +| **Capability Hiding** | Users cannot discover what the agent can do | Surface capabilities in agent responses or onboarding | +| **Workflow Tool** | Tool encodes business logic instead of being a composable primitive | Extract primitives; move orchestration logic to the system prompt (unless justified -- see step 4) | +| **Decision Input** | Tool accepts a decision enum instead of raw data the agent should choose | Accept data; let the agent decide | -### 6. Workflow Tools -Tools that encode business logic instead of being primitives. -**Fix:** Extract primitives, move logic to system prompt. +## Confidence Calibration -### 7. Decision Inputs -Tools that accept decisions instead of data. -```typescript -// BAD: Tool accepts decision -tool("format_report", { format: z.enum(["markdown", "html", "pdf"]) }) +**High (0.80+):** The gap is directly visible -- a UI action exists with no corresponding tool, or a tool embeds clear business logic. Traceable from the code alone. -// GOOD: Agent decides, tool just writes -tool("write_file", { path: z.string(), content: z.string() }) -``` +**Moderate (0.60-0.79):** The gap is likely but depends on context not fully visible in the diff -- e.g., whether a system prompt is assembled dynamically elsewhere. -## Review Output Format +**Low (below 0.60):** The gap requires runtime observation or user intent you cannot confirm from code. Suppress these. -Structure your review as: +## Output Format ```markdown ## Agent-Native Architecture Review ### Summary -[One paragraph assessment of agent-native compliance] +[One paragraph: what kind of app, what agent integration exists, overall parity assessment] ### Capability Map -| UI Action | Location | Agent Tool | Prompt Ref | Status | -|-----------|----------|------------|------------|--------| -| ... | ... | ... | ... | ✅/⚠️/❌ | +| UI Action | Location | Agent Tool | In Prompt? | Priority | Status | +|-----------|----------|------------|------------|----------|--------| ### Findings -#### Critical Issues (Must Fix) -1. **[Issue Name]**: [Description] - - Location: [file:line] - - Impact: [What breaks] - - Fix: [How to fix] +#### Critical (Must Fix) +1. **[Issue]** -- `file:line` -- [Description]. Fix: [How] #### Warnings (Should Fix) -1. **[Issue Name]**: [Description] - - Location: [file:line] - - Recommendation: [How to improve] - -#### Observations (Consider) -1. **[Observation]**: [Description and suggestion] +1. **[Issue]** -- `file:line` -- [Description]. Recommendation: [How] -### Recommendations - -1. [Prioritized list of improvements] -2. ... +#### Observations +1. **[Observation]** -- [Description and suggestion] ### What's Working Well - - [Positive observations about agent-native patterns in use] -### Agent-Native Score -- **X/Y capabilities are agent-accessible** -- **Verdict**: [PASS/NEEDS WORK] +### Score +- **X/Y high-priority capabilities are agent-accessible** +- **Verdict:** PASS | NEEDS WORK ``` - -## Review Triggers - -Use this review when: -- PRs add new UI features (check for tool parity) -- PRs add new agent tools (check for proper design) -- PRs modify system prompts (check for completeness) -- Periodic architecture audits -- User reports agent confusion ("agent didn't understand X") - -## Quick Checks - -### The "Write to Location" Test -Ask: "If a user said 'write something to [location]', would the agent know how?" - -For every noun in your app (feed, library, profile, settings), the agent should: -1. Know what it is (context injection) -2. Have a tool to interact with it (action parity) -3. Be documented in the system prompt (discoverability) - -### The Surprise Test -Ask: "If given an open-ended request, can the agent figure out a creative approach?" - -Good agents use available tools creatively. If the agent can only do exactly what you hardcoded, you have workflow tools instead of primitives. - -## Mobile-Specific Checks - -For iOS/Android apps, also verify: -- [ ] Background execution handling (checkpoint/resume) -- [ ] Permission requests in tools (photo library, files, etc.) -- [ ] Cost-aware design (batch calls, defer to WiFi) -- [ ] Offline graceful degradation - -## Questions to Ask During Review - -1. "Can the agent do everything the user can do?" -2. "Does the agent know what resources exist?" -3. "Can users inspect and edit agent work?" -4. "Are tools primitives or workflows?" -5. "Would a new feature require a new tool, or just a prompt update?" -6. "If this fails, how does the agent (and user) know?" diff --git a/.codex/skills/andrew-kane-gem-writer/SKILL.md b/.codex/skills/andrew-kane-gem-writer/SKILL.md index a874108d55..f4f431da47 100644 --- a/.codex/skills/andrew-kane-gem-writer/SKILL.md +++ b/.codex/skills/andrew-kane-gem-writer/SKILL.md @@ -177,8 +177,8 @@ end ## Reference Files For deeper patterns, see: -- **[references/module-organization.md](references/module-organization.md)** - Directory layouts, method decomposition -- **[references/rails-integration.md](references/rails-integration.md)** - Railtie, Engine, on_load patterns -- **[references/database-adapters.md](references/database-adapters.md)** - Multi-database support patterns -- **[references/testing-patterns.md](references/testing-patterns.md)** - Multi-version testing, CI setup -- **[references/resources.md](references/resources.md)** - Links to Kane's repos and articles +- `references/module-organization.md` - Directory layouts, method decomposition +- `references/rails-integration.md` - Railtie, Engine, on_load patterns +- `references/database-adapters.md` - Multi-database support patterns +- `references/testing-patterns.md` - Multi-version testing, CI setup +- `references/resources.md` - Links to Kane's repos and articles diff --git a/.codex/skills/best-practices-researcher/SKILL.md b/.codex/skills/best-practices-researcher/SKILL.md index d1c9f0157b..5901542e2d 100644 --- a/.codex/skills/best-practices-researcher/SKILL.md +++ b/.codex/skills/best-practices-researcher/SKILL.md @@ -43,7 +43,7 @@ Before going online, check if curated knowledge already exists in skills: - Frontend/Design → `frontend-design`, `swiss-design` - TypeScript/React → `react-best-practices` - AI/Agents → `agent-native-architecture` - - Documentation → `compound-docs`, `every-style-editor` + - Documentation → `ce:compound`, `every-style-editor` - File operations → `rclone`, `git-worktree` - Image generation → `gemini-imagegen` diff --git a/.codex/skills/ce-compound-refresh/SKILL.md b/.codex/skills/ce-compound-refresh/SKILL.md index 9058c3e8e0..c16ac44ade 100644 --- a/.codex/skills/ce-compound-refresh/SKILL.md +++ b/.codex/skills/ce-compound-refresh/SKILL.md @@ -503,13 +503,22 @@ If a doc cluster has 3+ overlapping docs, process pairwise: consolidate the two Process Replace candidates **one at a time, sequentially**. Each replacement is written by a subagent to protect the main context window. +When a replacement is needed, read the documentation contract files and pass their contents into the replacement subagent's task prompt: + +- `references/schema.yaml` — frontmatter fields and enum values +- `references/yaml-schema.md` — category mapping +- `assets/resolution-template.md` — section structure + +Do not let replacement subagents invent frontmatter fields, enum values, or section order from memory. + **When evidence is sufficient:** 1. Spawn a single subagent to write the replacement learning. Pass it: - The old learning's full content - A summary of the investigation evidence (what changed, what the current code does, why the old guidance is misleading) - The target path and category (same category as the old learning unless the category itself changed) -2. The subagent writes the new learning following `ce:compound`'s document format: YAML frontmatter (title, category, date, module, component, tags), problem description, root cause, current solution with code examples, and prevention tips. It should use dedicated file search and read tools if it needs additional context beyond what was passed. + - The relevant contents of the three support files listed above +2. The subagent writes the new learning using the support files as the source of truth: `references/schema.yaml` for frontmatter fields and enum values, `references/yaml-schema.md` for category mapping, and `assets/resolution-template.md` for section order. It should use dedicated file search and read tools if it needs additional context beyond what was passed. 3. After the subagent completes, the orchestrator deletes the old learning file. The new learning's frontmatter may include `supersedes: [old learning filename]` for traceability, but this is optional — the git history and commit message provide the same information. **When evidence is insufficient:** diff --git a/.codex/skills/ce-compound-refresh/assets/resolution-template.md b/.codex/skills/ce-compound-refresh/assets/resolution-template.md new file mode 100644 index 0000000000..9e0620881e --- /dev/null +++ b/.codex/skills/ce-compound-refresh/assets/resolution-template.md @@ -0,0 +1,37 @@ +--- +title: [Clear problem title] +date: [YYYY-MM-DD] +category: [docs/solutions subdirectory] +module: [Module or area] +problem_type: [schema enum] +component: [schema enum] +symptoms: + - [Observable symptom 1] +root_cause: [schema enum] +resolution_type: [schema enum] +severity: [schema enum] +tags: [keyword-one, keyword-two] +--- + +# [Clear problem title] + +## Problem +[1-2 sentence description of the issue and user-visible impact] + +## Symptoms +- [Observable symptom or error] + +## What Didn't Work +- [Attempted fix and why it failed] + +## Solution +[The fix that worked, including code snippets when useful] + +## Why This Works +[Root cause explanation and why the fix addresses it] + +## Prevention +- [Concrete practice, test, or guardrail] + +## Related Issues +- [Related docs or issues, if any] diff --git a/.codex/skills/ce-compound-refresh/references/schema.yaml b/.codex/skills/ce-compound-refresh/references/schema.yaml new file mode 100644 index 0000000000..ebf2438506 --- /dev/null +++ b/.codex/skills/ce-compound-refresh/references/schema.yaml @@ -0,0 +1,127 @@ +# Documentation schema for learnings written by ce:compound +# Treat this as the canonical frontmatter contract for docs/solutions/. + +required_fields: + module: + type: string + description: "Module or area affected by the problem" + + date: + type: string + pattern: '^\d{4}-\d{2}-\d{2}$' + description: "Date the problem was solved (YYYY-MM-DD)" + + problem_type: + type: enum + values: + - build_error + - test_failure + - runtime_error + - performance_issue + - database_issue + - security_issue + - ui_bug + - integration_issue + - logic_error + - developer_experience + - workflow_issue + - best_practice + - documentation_gap + description: "Primary category of the problem" + + component: + type: enum + values: + - rails_model + - rails_controller + - rails_view + - service_object + - background_job + - database + - frontend_stimulus + - hotwire_turbo + - email_processing + - brief_system + - assistant + - authentication + - payments + - development_workflow + - testing_framework + - documentation + - tooling + description: "Component involved" + + symptoms: + type: array[string] + min_items: 1 + max_items: 5 + description: "Observable symptoms such as errors or broken behavior" + + root_cause: + type: enum + values: + - missing_association + - missing_include + - missing_index + - wrong_api + - scope_issue + - thread_violation + - async_timing + - memory_leak + - config_error + - logic_error + - test_isolation + - missing_validation + - missing_permission + - missing_workflow_step + - inadequate_documentation + - missing_tooling + - incomplete_setup + description: "Fundamental technical cause of the problem" + + resolution_type: + type: enum + values: + - code_fix + - migration + - config_change + - test_fix + - dependency_update + - environment_setup + - workflow_improvement + - documentation_update + - tooling_addition + - seed_data_update + description: "Type of fix applied" + + severity: + type: enum + values: + - critical + - high + - medium + - low + description: "Impact severity" + +optional_fields: + rails_version: + type: string + pattern: '^\d+\.\d+\.\d+$' + description: "Rails version in X.Y.Z format" + + related_components: + type: array[string] + description: "Other components involved" + + tags: + type: array[string] + max_items: 8 + description: "Search keywords, lowercase and hyphen-separated" + +validation_rules: + - "All required fields must be present" + - "Enum fields must match allowed values exactly" + - "symptoms must be a YAML array with 1-5 items" + - "date must match YYYY-MM-DD format" + - "rails_version, if provided, must match X.Y.Z format" + - "tags should be lowercase and hyphen-separated" diff --git a/.codex/skills/ce-compound-refresh/references/yaml-schema.md b/.codex/skills/ce-compound-refresh/references/yaml-schema.md new file mode 100644 index 0000000000..abc7b696ba --- /dev/null +++ b/.codex/skills/ce-compound-refresh/references/yaml-schema.md @@ -0,0 +1,50 @@ +# YAML Frontmatter Schema + +`schema.yaml` in this directory is the canonical contract for `docs/solutions/` frontmatter written by `ce:compound`. + +Use this file as the quick reference for: +- required fields +- enum values +- validation expectations +- category mapping + +## Required Fields + +- **module**: Module or area affected by the problem +- **date**: ISO date in `YYYY-MM-DD` +- **problem_type**: One of `build_error`, `test_failure`, `runtime_error`, `performance_issue`, `database_issue`, `security_issue`, `ui_bug`, `integration_issue`, `logic_error`, `developer_experience`, `workflow_issue`, `best_practice`, `documentation_gap` +- **component**: One of `rails_model`, `rails_controller`, `rails_view`, `service_object`, `background_job`, `database`, `frontend_stimulus`, `hotwire_turbo`, `email_processing`, `brief_system`, `assistant`, `authentication`, `payments`, `development_workflow`, `testing_framework`, `documentation`, `tooling` +- **symptoms**: YAML array with 1-5 concrete symptoms +- **root_cause**: One of `missing_association`, `missing_include`, `missing_index`, `wrong_api`, `scope_issue`, `thread_violation`, `async_timing`, `memory_leak`, `config_error`, `logic_error`, `test_isolation`, `missing_validation`, `missing_permission`, `missing_workflow_step`, `inadequate_documentation`, `missing_tooling`, `incomplete_setup` +- **resolution_type**: One of `code_fix`, `migration`, `config_change`, `test_fix`, `dependency_update`, `environment_setup`, `workflow_improvement`, `documentation_update`, `tooling_addition`, `seed_data_update` +- **severity**: One of `critical`, `high`, `medium`, `low` + +## Optional Fields + +- **rails_version**: Rails version in `X.Y.Z` format +- **related_components**: Other components involved +- **tags**: Search keywords, lowercase and hyphen-separated + +## Category Mapping + +- `build_error` -> `docs/solutions/build-errors/` +- `test_failure` -> `docs/solutions/test-failures/` +- `runtime_error` -> `docs/solutions/runtime-errors/` +- `performance_issue` -> `docs/solutions/performance-issues/` +- `database_issue` -> `docs/solutions/database-issues/` +- `security_issue` -> `docs/solutions/security-issues/` +- `ui_bug` -> `docs/solutions/ui-bugs/` +- `integration_issue` -> `docs/solutions/integration-issues/` +- `logic_error` -> `docs/solutions/logic-errors/` +- `developer_experience` -> `docs/solutions/developer-experience/` +- `workflow_issue` -> `docs/solutions/workflow-issues/` +- `best_practice` -> `docs/solutions/best-practices/` +- `documentation_gap` -> `docs/solutions/documentation-gaps/` + +## Validation Rules + +1. All required fields must be present. +2. Enum fields must match the allowed values exactly. +3. `symptoms` must be a YAML array with 1-5 items. +4. `date` must match `YYYY-MM-DD`. +5. `rails_version`, if present, must match `X.Y.Z`. diff --git a/.codex/skills/ce-compound/SKILL.md b/.codex/skills/ce-compound/SKILL.md index f3c47fe3d9..9662c780a8 100644 --- a/.codex/skills/ce-compound/SKILL.md +++ b/.codex/skills/ce-compound/SKILL.md @@ -21,6 +21,16 @@ Captures problem solutions while context is fresh, creating structured documenta /ce:compound [brief context] # Provide additional context hint ``` +## Support Files + +These files are the durable contract for the workflow. Read them on-demand at the step that needs them — do not bulk-load at skill start. + +- `references/schema.yaml` — canonical frontmatter fields and enum values (read when validating YAML) +- `references/yaml-schema.md` — category mapping from problem_type to directory (read when classifying) +- `assets/resolution-template.md` — section structure for new docs (read when assembling) + +When spawning subagents, pass the relevant file contents into the task prompt so they have the contract without needing cross-skill paths. + ## Execution Strategy **Always run full mode by default.** Proceed directly to Phase 1 unless the user explicitly requests compact-safe mode (e.g., `/ce:compound --compact` or "use compact mode"). @@ -68,36 +78,11 @@ Launch these subagents IN PARALLEL. Each returns text data to the orchestrator. - Extracts conversation history - Identifies problem type, component, symptoms - Incorporates auto memory excerpts (if provided by the orchestrator) as supplementary evidence when identifying problem type, component, and symptoms - - Validates all enum fields against the schema values below - - Maps problem_type to the `docs/solutions/` category directory + - Reads `references/schema.yaml` for enum validation + - Reads `references/yaml-schema.md` for category mapping into `docs/solutions/` - Suggests a filename using the pattern `[sanitized-problem-slug]-[date].md` - Returns: YAML frontmatter skeleton (must include `category:` field mapped from problem_type), category directory path, and suggested filename - - **Schema enum values (validate against these exactly):** - - - **problem_type**: build_error, test_failure, runtime_error, performance_issue, database_issue, security_issue, ui_bug, integration_issue, logic_error, developer_experience, workflow_issue, best_practice, documentation_gap - - **component**: rails_model, rails_controller, rails_view, service_object, background_job, database, frontend_stimulus, hotwire_turbo, email_processing, brief_system, assistant, authentication, payments, development_workflow, testing_framework, documentation, tooling - - **root_cause**: missing_association, missing_include, missing_index, wrong_api, scope_issue, thread_violation, async_timing, memory_leak, config_error, logic_error, test_isolation, missing_validation, missing_permission, missing_workflow_step, inadequate_documentation, missing_tooling, incomplete_setup - - **resolution_type**: code_fix, migration, config_change, test_fix, dependency_update, environment_setup, workflow_improvement, documentation_update, tooling_addition, seed_data_update - - **severity**: critical, high, medium, low - - **Category mapping (problem_type -> directory):** - - | problem_type | Directory | - |---|---| - | build_error | build-errors/ | - | test_failure | test-failures/ | - | runtime_error | runtime-errors/ | - | performance_issue | performance-issues/ | - | database_issue | database-issues/ | - | security_issue | security-issues/ | - | ui_bug | ui-bugs/ | - | integration_issue | integration-issues/ | - | logic_error | logic-errors/ | - | developer_experience | developer-experience/ | - | workflow_issue | workflow-issues/ | - | best_practice | best-practices/ | - | documentation_gap | documentation-gaps/ | + - Does not invent enum values, categories, or frontmatter fields from memory; reads the schema and mapping files above #### 2. **Solution Extractor** - Analyzes all investigation steps @@ -169,11 +154,13 @@ The orchestrating agent (main conversation) performs these steps: When updating an existing doc, preserve its file path and frontmatter structure. Update the solution, code examples, prevention tips, and any stale references. Add a `last_updated: YYYY-MM-DD` field to the frontmatter. Do not change the title unless the problem framing has materially shifted. -3. Assemble complete markdown file from the collected pieces -4. Validate YAML frontmatter against schema +3. Assemble complete markdown file from the collected pieces, reading `assets/resolution-template.md` for the section structure of new docs +4. Validate YAML frontmatter against `references/schema.yaml` 5. Create directory if needed: `mkdir -p docs/solutions/[category]/` 6. Write the file: either the updated existing doc or the new `docs/solutions/[category]/[filename].md` +When creating a new doc, preserve the section order from `assets/resolution-template.md` unless the user explicitly asks for a different structure. + ### Phase 2.5: Selective Refresh Check @@ -253,8 +240,8 @@ When context budget is tight, this mode skips parallel subagents entirely. The o The orchestrator (main conversation) performs ALL of the following in one sequential pass: 1. **Extract from conversation**: Identify the problem, root cause, and solution from conversation history. Also read MEMORY.md from the auto memory directory if it exists -- use any relevant notes as supplementary context alongside conversation history. Tag any memory-sourced content incorporated into the final doc with "(auto memory [claude])" -2. **Classify**: Determine category and filename (same categories as full mode) -3. **Write minimal doc**: Create `docs/solutions/[category]/[filename].md` with: +2. **Classify**: Read `references/schema.yaml` and `references/yaml-schema.md`, then determine category and filename from them +3. **Write minimal doc**: Create `docs/solutions/[category]/[filename].md` using `assets/resolution-template.md` as the base structure, with: - YAML frontmatter (title, category, date, tags) - Problem description (1-2 sentences) - Root cause (1-2 sentences) @@ -400,9 +387,9 @@ Build → Test → Find Issue → Research → Improve → Document → Validate Use /ce:compound [context] to document immediately without waiting for auto-detection. -## Routes To +## Output -`compound-docs` skill +Writes the final learning directly into `docs/solutions/`. ## Applicable Specialized Agents diff --git a/.codex/skills/ce-compound/assets/resolution-template.md b/.codex/skills/ce-compound/assets/resolution-template.md new file mode 100644 index 0000000000..9e0620881e --- /dev/null +++ b/.codex/skills/ce-compound/assets/resolution-template.md @@ -0,0 +1,37 @@ +--- +title: [Clear problem title] +date: [YYYY-MM-DD] +category: [docs/solutions subdirectory] +module: [Module or area] +problem_type: [schema enum] +component: [schema enum] +symptoms: + - [Observable symptom 1] +root_cause: [schema enum] +resolution_type: [schema enum] +severity: [schema enum] +tags: [keyword-one, keyword-two] +--- + +# [Clear problem title] + +## Problem +[1-2 sentence description of the issue and user-visible impact] + +## Symptoms +- [Observable symptom or error] + +## What Didn't Work +- [Attempted fix and why it failed] + +## Solution +[The fix that worked, including code snippets when useful] + +## Why This Works +[Root cause explanation and why the fix addresses it] + +## Prevention +- [Concrete practice, test, or guardrail] + +## Related Issues +- [Related docs or issues, if any] diff --git a/.codex/skills/ce-compound/references/schema.yaml b/.codex/skills/ce-compound/references/schema.yaml new file mode 100644 index 0000000000..ebf2438506 --- /dev/null +++ b/.codex/skills/ce-compound/references/schema.yaml @@ -0,0 +1,127 @@ +# Documentation schema for learnings written by ce:compound +# Treat this as the canonical frontmatter contract for docs/solutions/. + +required_fields: + module: + type: string + description: "Module or area affected by the problem" + + date: + type: string + pattern: '^\d{4}-\d{2}-\d{2}$' + description: "Date the problem was solved (YYYY-MM-DD)" + + problem_type: + type: enum + values: + - build_error + - test_failure + - runtime_error + - performance_issue + - database_issue + - security_issue + - ui_bug + - integration_issue + - logic_error + - developer_experience + - workflow_issue + - best_practice + - documentation_gap + description: "Primary category of the problem" + + component: + type: enum + values: + - rails_model + - rails_controller + - rails_view + - service_object + - background_job + - database + - frontend_stimulus + - hotwire_turbo + - email_processing + - brief_system + - assistant + - authentication + - payments + - development_workflow + - testing_framework + - documentation + - tooling + description: "Component involved" + + symptoms: + type: array[string] + min_items: 1 + max_items: 5 + description: "Observable symptoms such as errors or broken behavior" + + root_cause: + type: enum + values: + - missing_association + - missing_include + - missing_index + - wrong_api + - scope_issue + - thread_violation + - async_timing + - memory_leak + - config_error + - logic_error + - test_isolation + - missing_validation + - missing_permission + - missing_workflow_step + - inadequate_documentation + - missing_tooling + - incomplete_setup + description: "Fundamental technical cause of the problem" + + resolution_type: + type: enum + values: + - code_fix + - migration + - config_change + - test_fix + - dependency_update + - environment_setup + - workflow_improvement + - documentation_update + - tooling_addition + - seed_data_update + description: "Type of fix applied" + + severity: + type: enum + values: + - critical + - high + - medium + - low + description: "Impact severity" + +optional_fields: + rails_version: + type: string + pattern: '^\d+\.\d+\.\d+$' + description: "Rails version in X.Y.Z format" + + related_components: + type: array[string] + description: "Other components involved" + + tags: + type: array[string] + max_items: 8 + description: "Search keywords, lowercase and hyphen-separated" + +validation_rules: + - "All required fields must be present" + - "Enum fields must match allowed values exactly" + - "symptoms must be a YAML array with 1-5 items" + - "date must match YYYY-MM-DD format" + - "rails_version, if provided, must match X.Y.Z format" + - "tags should be lowercase and hyphen-separated" diff --git a/.codex/skills/ce-compound/references/yaml-schema.md b/.codex/skills/ce-compound/references/yaml-schema.md new file mode 100644 index 0000000000..abc7b696ba --- /dev/null +++ b/.codex/skills/ce-compound/references/yaml-schema.md @@ -0,0 +1,50 @@ +# YAML Frontmatter Schema + +`schema.yaml` in this directory is the canonical contract for `docs/solutions/` frontmatter written by `ce:compound`. + +Use this file as the quick reference for: +- required fields +- enum values +- validation expectations +- category mapping + +## Required Fields + +- **module**: Module or area affected by the problem +- **date**: ISO date in `YYYY-MM-DD` +- **problem_type**: One of `build_error`, `test_failure`, `runtime_error`, `performance_issue`, `database_issue`, `security_issue`, `ui_bug`, `integration_issue`, `logic_error`, `developer_experience`, `workflow_issue`, `best_practice`, `documentation_gap` +- **component**: One of `rails_model`, `rails_controller`, `rails_view`, `service_object`, `background_job`, `database`, `frontend_stimulus`, `hotwire_turbo`, `email_processing`, `brief_system`, `assistant`, `authentication`, `payments`, `development_workflow`, `testing_framework`, `documentation`, `tooling` +- **symptoms**: YAML array with 1-5 concrete symptoms +- **root_cause**: One of `missing_association`, `missing_include`, `missing_index`, `wrong_api`, `scope_issue`, `thread_violation`, `async_timing`, `memory_leak`, `config_error`, `logic_error`, `test_isolation`, `missing_validation`, `missing_permission`, `missing_workflow_step`, `inadequate_documentation`, `missing_tooling`, `incomplete_setup` +- **resolution_type**: One of `code_fix`, `migration`, `config_change`, `test_fix`, `dependency_update`, `environment_setup`, `workflow_improvement`, `documentation_update`, `tooling_addition`, `seed_data_update` +- **severity**: One of `critical`, `high`, `medium`, `low` + +## Optional Fields + +- **rails_version**: Rails version in `X.Y.Z` format +- **related_components**: Other components involved +- **tags**: Search keywords, lowercase and hyphen-separated + +## Category Mapping + +- `build_error` -> `docs/solutions/build-errors/` +- `test_failure` -> `docs/solutions/test-failures/` +- `runtime_error` -> `docs/solutions/runtime-errors/` +- `performance_issue` -> `docs/solutions/performance-issues/` +- `database_issue` -> `docs/solutions/database-issues/` +- `security_issue` -> `docs/solutions/security-issues/` +- `ui_bug` -> `docs/solutions/ui-bugs/` +- `integration_issue` -> `docs/solutions/integration-issues/` +- `logic_error` -> `docs/solutions/logic-errors/` +- `developer_experience` -> `docs/solutions/developer-experience/` +- `workflow_issue` -> `docs/solutions/workflow-issues/` +- `best_practice` -> `docs/solutions/best-practices/` +- `documentation_gap` -> `docs/solutions/documentation-gaps/` + +## Validation Rules + +1. All required fields must be present. +2. Enum fields must match the allowed values exactly. +3. `symptoms` must be a YAML array with 1-5 items. +4. `date` must match `YYYY-MM-DD`. +5. `rails_version`, if present, must match `X.Y.Z`. diff --git a/.codex/skills/ce-plan/SKILL.md b/.codex/skills/ce-plan/SKILL.md index 9a6c3ec63b..d8c1eec3a1 100644 --- a/.codex/skills/ce-plan/SKILL.md +++ b/.codex/skills/ce-plan/SKILL.md @@ -61,6 +61,12 @@ If the user references an existing plan file or there is an obvious recent match - Confirm whether to update it in place or create a new plan - If updating, preserve completed checkboxes and revise only the still-relevant sections +**Re-deepen fast path:** If the plan appears complete (all major sections present, implementation units defined, `status: active`) and the user's request is specifically about deepening or strengthening the plan — detected by signal words like "deepen", "strengthen", "confidence", "gaps", or an explicit request to re-deepen — short-circuit directly to Phase 5.3 (Confidence Check and Deepening). This avoids re-running the full planning workflow just to evaluate deepening. + +Normal editing requests (e.g., "update the test scenarios", "add a new implementation unit") should NOT trigger the fast path — they follow the standard resume flow. + +If the plan already has a `deepened: YYYY-MM-DD` frontmatter field and there is no explicit user request to re-deepen, the fast path still applies the same confidence-gap evaluation — it does not force deepening. + #### 0.2 Find Upstream Requirements Document Before asking planning questions, search `docs/brainstorms/` for files matching `*-requirements.md`. @@ -190,12 +196,13 @@ The repo-research-analyst output includes a structured Technology & Infrastructu **Always lean toward external research when:** - The topic is high-risk: security, payments, privacy, external APIs, migrations, compliance -- The codebase lacks relevant local patterns +- The codebase lacks relevant local patterns -- fewer than 3 direct examples of the pattern this plan needs +- Local patterns exist for an adjacent domain but not the exact one -- e.g., the codebase has HTTP clients but not webhook receivers, or has background jobs but not event-driven pub/sub. Adjacent patterns suggest the team is comfortable with the technology layer but may not know domain-specific pitfalls. When this signal is present, frame the external research query around the domain gap specifically, not the general technology - The user is exploring unfamiliar territory - The technology scan found the relevant layer absent or thin in the codebase **Skip external research when:** -- The codebase already shows a strong local pattern +- The codebase already shows a strong local pattern -- multiple direct examples (not adjacent-domain), recently touched, following current conventions - The user already knows the intended shape - Additional external context would add little practical value - The technology scan found the relevant layer well-established with existing examples to follow @@ -220,6 +227,18 @@ Summarize: - Related issues, PRs, or prior art - Any constraints that should materially shape the plan +#### 1.4b Reclassify Depth When Research Reveals External Contract Surfaces + +If the current classification is **Lightweight** and Phase 1 research found that the work touches any of these external contract surfaces, reclassify to **Standard**: + +- Environment variables consumed by external systems, CI, or other repositories +- Exported public APIs, CLI flags, or command-line interface contracts +- CI/CD configuration files (`.github/workflows/`, `Dockerfile`, deployment scripts) +- Shared types or interfaces imported by downstream consumers +- Documentation referenced by external URLs or linked from other systems + +This ensures flow analysis (Phase 1.5) runs and the confidence check (Phase 5.3) applies critical-section bonuses. Announce the reclassification briefly: "Reclassifying to Standard — this change touches [environment variables / exported APIs / CI config] with external consumers." + #### 1.5 Flow and Edge-Case Analysis (Conditional) For **Standard** or **Deep** plans, or when user flow completeness is still unclear, run: @@ -386,7 +405,7 @@ type: [feat|fix|refactor] status: active date: YYYY-MM-DD origin: docs/brainstorms/YYYY-MM-DD--requirements.md # include when planning from a requirements doc -deepened: YYYY-MM-DD # optional, set later by deepen-plan when the plan is substantively strengthened +deepened: YYYY-MM-DD # optional, set when the confidence check substantively strengthens the plan --- # [Plan Title] @@ -588,24 +607,297 @@ Plan written to docs/plans/[filename] **Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip interactive questions. Make the needed choices automatically and proceed to writing the plan. -#### 5.3 Post-Generation Options +#### 5.3 Confidence Check and Deepening + +After writing the plan file, automatically evaluate whether the plan needs strengthening. This phase runs without asking the user for approval. The user sees what is being strengthened but does not need to make a decision. + +`document-review` and this confidence check are different: +- Use the `document-review` skill when the document needs clarity, simplification, completeness, or scope control +- This confidence check strengthens rationale, sequencing, risk treatment, and system-wide thinking when the plan is structurally sound but still needs stronger grounding + +**Pipeline mode:** This phase runs in pipeline/disable-model-invocation mode using the same gate logic described below. No user interaction needed. + +##### 5.3.1 Classify Plan Depth and Topic Risk + +Determine the plan depth from the document: +- **Lightweight** - small, bounded, low ambiguity, usually 2-4 implementation units +- **Standard** - moderate complexity, some technical decisions, usually 3-6 units +- **Deep** - cross-cutting, high-risk, or strategically important work, usually 4-8 units or phased delivery + +Build a risk profile. Treat these as high-risk signals: +- Authentication, authorization, or security-sensitive behavior +- Payments, billing, or financial flows +- Data migrations, backfills, or persistent data changes +- External APIs or third-party integrations +- Privacy, compliance, or user data handling +- Cross-interface parity or multi-surface behavior +- Significant rollout, monitoring, or operational concerns + +##### 5.3.2 Gate: Decide Whether to Deepen + +- **Lightweight** plans usually do not need deepening unless they are high-risk +- **Standard** plans often benefit when one or more important sections still look thin +- **Deep** or high-risk plans often benefit from a targeted second pass +- **Thin local grounding override:** If Phase 1.2 triggered external research because local patterns were thin (fewer than 3 direct examples or adjacent-domain match), always proceed to scoring regardless of how grounded the plan appears. When the plan was built on unfamiliar territory, claims about system behavior are more likely to be assumptions than verified facts. The scoring pass is cheap — if the plan is genuinely solid, scoring finds nothing and exits quickly + +If the plan already appears sufficiently grounded and the thin-grounding override does not apply, report "Confidence check passed — no sections need strengthening" and proceed to Phase 5.4. + +##### 5.3.3 Score Confidence Gaps + +Use a checklist-first, risk-weighted scoring pass. + +For each section, compute: +- **Trigger count** - number of checklist problems that apply +- **Risk bonus** - add 1 if the topic is high-risk and this section is materially relevant to that risk +- **Critical-section bonus** - add 1 for `Key Technical Decisions`, `Implementation Units`, `System-Wide Impact`, `Risks & Dependencies`, or `Open Questions` in `Standard` or `Deep` plans + +Treat a section as a candidate if: +- it hits **2+ total points**, or +- it hits **1+ point** in a high-risk domain and the section is materially important + +Choose only the top **2-5** sections by score. If deepening a lightweight plan (high-risk exception), cap at **1-2** sections. + +If the plan already has a `deepened:` date: +- Prefer sections that have not yet been substantially strengthened, if their scores are comparable +- Revisit an already-deepened section only when it still scores clearly higher than alternatives + +**Section Checklists:** + +**Requirements Trace** +- Requirements are vague or disconnected from implementation units +- Success criteria are missing or not reflected downstream +- Units do not clearly advance the traced requirements +- Origin requirements are not clearly carried forward + +**Context & Research / Sources & References** +- Relevant repo patterns are named but never used in decisions or implementation units +- Cited learnings or references do not materially shape the plan +- High-risk work lacks appropriate external or internal grounding +- Research is generic instead of tied to this repo or this plan + +**Key Technical Decisions** +- A decision is stated without rationale +- Rationale does not explain tradeoffs or rejected alternatives +- The decision does not connect back to scope, requirements, or origin context +- An obvious design fork exists but the plan never addresses why one path won + +**Open Questions** +- Product blockers are hidden as assumptions +- Planning-owned questions are incorrectly deferred to implementation +- Resolved questions have no clear basis in repo context, research, or origin decisions +- Deferred items are too vague to be useful later + +**High-Level Technical Design (when present)** +- The sketch uses the wrong medium for the work +- The sketch contains implementation code rather than pseudo-code +- The non-prescriptive framing is missing or weak +- The sketch does not connect to the key technical decisions or implementation units + +**High-Level Technical Design (when absent)** *(Standard or Deep plans only)* +- The work involves DSL design, API surface design, multi-component integration, complex data flow, or state-heavy lifecycle +- Key technical decisions would be easier to validate with a visual or pseudo-code representation +- The approach section of implementation units is thin and a higher-level technical design would provide context + +**Implementation Units** +- Dependency order is unclear or likely wrong +- File paths or test file paths are missing where they should be explicit +- Units are too large, too vague, or broken into micro-steps +- Approach notes are thin or do not name the pattern to follow +- Test scenarios or verification outcomes are vague + +**System-Wide Impact** +- Affected interfaces, callbacks, middleware, entry points, or parity surfaces are missing +- Failure propagation is underexplored +- State lifecycle, caching, or data integrity risks are absent where relevant +- Integration coverage is weak for cross-layer work + +**Risks & Dependencies / Documentation / Operational Notes** +- Risks are listed without mitigation +- Rollout, monitoring, migration, or support implications are missing when warranted +- External dependency assumptions are weak or unstated +- Security, privacy, performance, or data risks are absent where they obviously apply + +Use the plan's own `Context & Research` and `Sources & References` as evidence. If those sections cite a pattern, learning, or risk that never affects decisions, implementation units, or verification, treat that as a confidence gap. + +##### 5.3.4 Report and Dispatch Targeted Research + +Before dispatching agents, report what sections are being strengthened and why: + +```text +Strengthening [section names] — [brief reason for each, e.g., "decision rationale is thin", "cross-boundary effects aren't mapped"] +``` + +For each selected section, choose the smallest useful agent set. Do **not** run every agent. Use at most **1-3 agents per section** and usually no more than **8 agents total**. + +Use fully-qualified agent names inside Task calls. + +**Deterministic Section-to-Agent Mapping:** + +**Requirements Trace / Open Questions classification** +- `compound-engineering:workflow:spec-flow-analyzer` for missing user flows, edge cases, and handoff gaps +- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for repo-grounded patterns, conventions, and implementation reality checks -After writing the plan file, present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. +**Context & Research / Sources & References gaps** +- `compound-engineering:research:learnings-researcher` for institutional knowledge and past solved problems +- `compound-engineering:research:framework-docs-researcher` for official framework or library behavior +- `compound-engineering:research:best-practices-researcher` for current external patterns and industry guidance +- Add `compound-engineering:research:git-history-analyzer` only when historical rationale or prior art is materially missing + +**Key Technical Decisions** +- `compound-engineering:review:architecture-strategist` for design integrity, boundaries, and architectural tradeoffs +- Add `compound-engineering:research:framework-docs-researcher` or `compound-engineering:research:best-practices-researcher` when the decision needs external grounding beyond repo evidence + +**High-Level Technical Design** +- `compound-engineering:review:architecture-strategist` for validating that the technical design accurately represents the intended approach and identifying gaps +- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for grounding the technical design in existing repo patterns and conventions +- Add `compound-engineering:research:best-practices-researcher` when the technical design involves a DSL, API surface, or pattern that benefits from external validation + +**Implementation Units / Verification** +- `compound-engineering:research:repo-research-analyst` (Scope: `patterns`) for concrete file targets, patterns to follow, and repo-specific sequencing clues +- `compound-engineering:review:pattern-recognition-specialist` for consistency, duplication risks, and alignment with existing patterns +- Add `compound-engineering:workflow:spec-flow-analyzer` when sequencing depends on user flow or handoff completeness + +**System-Wide Impact** +- `compound-engineering:review:architecture-strategist` for cross-boundary effects, interface surfaces, and architectural knock-on impact +- Add the specific specialist that matches the risk: + - `compound-engineering:review:performance-oracle` for scalability, latency, throughput, and resource-risk analysis + - `compound-engineering:review:security-sentinel` for auth, validation, exploit surfaces, and security boundary review + - `compound-engineering:review:data-integrity-guardian` for migrations, persistent state safety, consistency, and data lifecycle risks + +**Risks & Dependencies / Operational Notes** +- Use the specialist that matches the actual risk: + - `compound-engineering:review:security-sentinel` for security, auth, privacy, and exploit risk + - `compound-engineering:review:data-integrity-guardian` for persistent data safety, constraints, and transaction boundaries + - `compound-engineering:review:data-migration-expert` for migration realism, backfills, and production data transformation risk + - `compound-engineering:review:deployment-verification-agent` for rollout checklists, rollback planning, and launch verification + - `compound-engineering:review:performance-oracle` for capacity, latency, and scaling concerns + +**Agent Prompt Shape:** + +For each selected section, pass: +- The scope prefix from the mapping above when the agent supports scoped invocation +- A short plan summary +- The exact section text +- Why the section was selected, including which checklist triggers fired +- The plan depth and risk profile +- A specific question to answer + +Instruct the agent to return: +- findings that change planning quality +- stronger rationale, sequencing, verification, risk treatment, or references +- no implementation code +- no shell commands + +##### 5.3.5 Choose Research Execution Mode + +Use the lightest mode that will work: + +- **Direct mode** - Default. Use when the selected section set is small and the parent can safely read the agent outputs inline. +- **Artifact-backed mode** - Use only when the selected research scope is large enough that inline returns would create unnecessary context pressure. + +Signals that justify artifact-backed mode: +- More than 5 agents are likely to return meaningful findings +- The selected section excerpts are long enough that repeating them in multiple agent outputs would be wasteful +- The topic is high-risk and likely to attract bulky source-backed analysis + +If artifact-backed mode is not clearly warranted, stay in direct mode. + +Artifact-backed mode uses a per-run scratch directory under `.context/compound-engineering/ce-plan/deepen/`. + +##### 5.3.6 Run Targeted Research + +Launch the selected agents in parallel using the execution mode chosen above. If the current platform does not support parallel dispatch, run them sequentially instead. + +Prefer local repo and institutional evidence first. Use external research only when the gap cannot be closed responsibly from repo context or already-cited sources. + +If a selected section can be improved by reading the origin document more carefully, do that before dispatching external agents. + +**Direct mode:** Have each selected agent return its findings directly to the parent. Keep the return payload focused: strongest findings only, the evidence or sources that matter, the concrete planning improvement implied by the finding. + +**Artifact-backed mode:** For each selected agent, instruct it to write one compact artifact file in the scratch directory and return only a short completion summary. Each artifact should contain: target section, why selected, 3-7 findings, source-backed rationale, the specific plan change implied by each finding. No implementation code, no shell commands. + +If an artifact is missing or clearly malformed, re-run that agent or fall back to direct-mode reasoning for that section. + +If agent outputs conflict: +- Prefer repo-grounded and origin-grounded evidence over generic advice +- Prefer official framework documentation over secondary best-practice summaries when the conflict is about library behavior +- If a real tradeoff remains, record it explicitly in the plan + +##### 5.3.7 Synthesize and Update the Plan + +Strengthen only the selected sections. Keep the plan coherent and preserve its overall structure. + +Allowed changes: +- Clarify or strengthen decision rationale +- Tighten requirements trace or origin fidelity +- Reorder or split implementation units when sequencing is weak +- Add missing pattern references, file/test paths, or verification outcomes +- Expand system-wide impact, risks, or rollout treatment where justified +- Reclassify open questions between `Resolved During Planning` and `Deferred to Implementation` when evidence supports the change +- Strengthen, replace, or add a High-Level Technical Design section when the work warrants it and the current representation is weak +- Strengthen or add per-unit technical design fields where the unit's approach is non-obvious +- Add or update `deepened: YYYY-MM-DD` in frontmatter when the plan was substantively improved + +Do **not**: +- Add implementation code — no imports, exact method signatures, or framework-specific syntax. Pseudo-code sketches and DSL grammars are allowed +- Add git commands, commit choreography, or exact test command recipes +- Add generic `Research Insights` subsections everywhere +- Rewrite the entire plan from scratch +- Invent new product requirements, scope changes, or success criteria without surfacing them explicitly + +If research reveals a product-level ambiguity that should change behavior or scope: +- Do not silently decide it here +- Record it under `Open Questions` +- Recommend `ce:brainstorm` if the gap is truly product-defining + +##### 5.3.8 Final Checks and Cleanup + +Before proceeding to post-generation options: +- Confirm the plan is stronger in specific ways, not merely longer +- Confirm the planning boundary is intact +- Confirm origin decisions were preserved when an origin document exists + +If artifact-backed mode was used: +- Clean up the temporary scratch directory after the plan is safely updated +- If cleanup is not practical on the current platform, note where the artifacts were left + +#### 5.4 Post-Generation Options + +**Pipeline mode:** If invoked from an automated workflow such as LFG, SLFG, or any `disable-model-invocation` context, skip the interactive menu below and return control to the caller immediately. The plan file has already been written and the confidence check has already run — the caller (e.g., lfg, slfg) determines the next step. + +After the confidence check completes (or is skipped), present the options using the platform's blocking question tool when available (see Interaction Method). Otherwise present numbered options in chat and wait for the user's reply before proceeding. **Question:** "Plan ready at `docs/plans/YYYY-MM-DD-NNN---plan.md`. What would you like to do next?" -**Options:** +**Option ordering depends on plan characteristics.** Lead with document-review when any of these conditions are met: + +- **Deep** plan +- High-risk signals present +- The confidence check deepened 3+ sections +- **Standard** plan where Phase 1.2 triggered external research due to thin local grounding (fewer than 3 direct examples or adjacent-domain match) — when the plan was built on unfamiliar territory, the adversarial reviewer's assumption surfacing catches factual claims about system behavior that structural scoring cannot verify + +Include a recommendation explaining why: + +"This plan has [significant architectural decisions / high-risk security concerns / cross-cutting impact / thin local grounding for a key domain]. Its adversarial reviewer will stress-test the premises and decisions before implementation." + +**Options when document-review is recommended:** +1. **Run `document-review` skill** - Stress-test premises and decisions through structured document review (recommended) +2. **Open plan in editor** - Open the plan file for review +3. **Share to Proof** - Upload the plan for collaborative review and sharing +4. **Start `/ce:work`** - Begin implementing this plan in the current environment +5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it +6. **Create Issue** - Create an issue in the configured tracker + +**Options for Standard or Lightweight plans:** 1. **Open plan in editor** - Open the plan file for review -2. **Run `/deepen-plan`** - Stress-test weak sections with targeted research when the plan needs more confidence -3. **Run `document-review` skill** - Improve the plan through structured document review -4. **Share to Proof** - Upload the plan for collaborative review and sharing -5. **Start `/ce:work`** - Begin implementing this plan in the current environment -6. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it -7. **Create Issue** - Create an issue in the configured tracker +2. **Run `document-review` skill** - Improve the plan through structured document review +3. **Share to Proof** - Upload the plan for collaborative review and sharing +4. **Start `/ce:work`** - Begin implementing this plan in the current environment +5. **Start `/ce:work` in another session** - Begin implementing in a separate agent session when the current platform supports it +6. **Create Issue** - Create an issue in the configured tracker Based on selection: - **Open plan in editor** → Open `docs/plans/.md` using the current platform's file-open or editor mechanism (e.g., `open` on macOS, `xdg-open` on Linux, or the IDE's file-open API) -- **`/deepen-plan`** → Call `/deepen-plan` with the plan path - **`document-review` skill** → Load the `document-review` skill with the plan path - **Share to Proof** → Upload the plan: ```bash @@ -622,8 +914,6 @@ Based on selection: - **Create Issue** → Follow the Issue Creation section below - **Other** → Accept free text for revisions and loop back to options -If running with ultrathink enabled, or the platform's reasoning/effort level is set to max or extra-high, automatically run `/deepen-plan` only when the plan is `Standard` or `Deep`, high-risk, or still shows meaningful confidence gaps in decisions, sequencing, system-wide impact, risks, or verification. - ## Issue Creation When the user selects "Create Issue", detect their project tracker from `AGENTS.md` or, if needed for compatibility, `CLAUDE.md`: diff --git a/.codex/skills/ce-review/SKILL.md b/.codex/skills/ce-review/SKILL.md index 6db20a0761..e6bd5d447d 100644 --- a/.codex/skills/ce-review/SKILL.md +++ b/.codex/skills/ce-review/SKILL.md @@ -73,7 +73,7 @@ Routing rules: ## Reviewers -13 reviewer personas in layered conditionals, plus CE-specific agents. See [persona-catalog.md](./references/persona-catalog.md) for the full catalog. +15 reviewer personas in layered conditionals, plus CE-specific agents. See the persona catalog included below for the full catalog. **Always-on (every review):** @@ -82,6 +82,7 @@ Routing rules: | `compound-engineering:review:correctness-reviewer` | Logic errors, edge cases, state bugs, error propagation | | `compound-engineering:review:testing-reviewer` | Coverage gaps, weak assertions, brittle tests | | `compound-engineering:review:maintainability-reviewer` | Coupling, complexity, naming, dead code, abstraction debt | +| `compound-engineering:review:project-standards-reviewer` | CLAUDE.md and AGENTS.md compliance -- frontmatter, references, naming, portability | | `compound-engineering:review:agent-native-reviewer` | Verify new features are agent-accessible | | `compound-engineering:research:learnings-researcher` | Search docs/solutions/ for past issues related to this PR | @@ -94,6 +95,7 @@ Routing rules: | `compound-engineering:review:api-contract-reviewer` | Routes, serializers, type signatures, versioning | | `compound-engineering:review:data-migrations-reviewer` | Migrations, schema changes, backfills | | `compound-engineering:review:reliability-reviewer` | Error handling, retries, timeouts, background jobs | +| `compound-engineering:review:adversarial-reviewer` | Diff >=50 changed non-test/non-generated/non-lockfile lines, or auth, payments, data mutations, external APIs | **Stack-specific conditional (selected per diff):** @@ -114,7 +116,7 @@ Routing rules: ## Review Scope -Every review spawns all 3 always-on personas plus the 2 CE always-on agents, then adds whichever cross-cutting and stack-specific conditionals fit the diff. The model naturally right-sizes: a small config change triggers 0 conditionals = 5 reviewers. A Rails auth feature might trigger security + reliability + kieran-rails + dhh-rails = 9 reviewers. +Every review spawns all 4 always-on personas plus the 2 CE always-on agents, then adds whichever cross-cutting and stack-specific conditionals fit the diff. The model naturally right-sizes: a small config change triggers 0 conditionals = 6 reviewers. A Rails auth feature might trigger security + reliability + kieran-rails + dhh-rails = 10 reviewers. ## Protected Artifacts @@ -324,7 +326,7 @@ Pass this to every reviewer in their spawn prompt. Intent shapes *how hard each ### Stage 3: Select reviewers -Read the diff and file list from Stage 1. The 3 always-on personas and 2 CE always-on agents are automatic. For each cross-cutting and stack-specific conditional persona in [persona-catalog.md](./references/persona-catalog.md), decide whether the diff warrants it. This is agent judgment, not keyword matching. +Read the diff and file list from Stage 1. The 4 always-on personas and 2 CE always-on agents are automatic. For each cross-cutting and stack-specific conditional persona in the persona catalog included below, decide whether the diff warrants it. This is agent judgment, not keyword matching. Stack-specific personas are additive. A Rails UI change may warrant `kieran-rails` plus `julik-frontend-races`; a TypeScript API diff may warrant `kieran-typescript` plus `api-contract` and `reliability`. @@ -337,6 +339,7 @@ Review team: - correctness (always) - testing (always) - maintainability (always) +- project-standards (always) - agent-native-reviewer (always) - learnings-researcher (always) - security -- new endpoint in routes.rb accepts user-provided redirect URL @@ -348,20 +351,30 @@ Review team: This is progress reporting, not a blocking confirmation. +### Stage 3b: Discover project standards paths + +Before spawning sub-agents, find the file paths (not contents) of all relevant standards files for the `project-standards` persona. Use the native file-search/glob tool to locate: + +1. Use the native file-search tool (e.g., Glob in Claude Code) to find all `**/CLAUDE.md` and `**/AGENTS.md` in the repo. +2. Filter to those whose directory is an ancestor of at least one changed file. A standards file governs all files below it (e.g., `plugins/compound-engineering/AGENTS.md` applies to everything under `plugins/compound-engineering/`). + +Pass the resulting path list to the `project-standards` persona inside a `` block in its review context (see Stage 4). The persona reads the files itself, targeting only the sections relevant to the changed file types. This keeps the orchestrator's work cheap (path discovery only) and avoids bloating the subagent prompt with content the reviewer may not fully need. + ### Stage 4: Spawn sub-agents -Spawn each selected persona reviewer as a parallel sub-agent using the template in [subagent-template.md](./references/subagent-template.md). Each persona sub-agent receives: +Spawn each selected persona reviewer as a parallel sub-agent using the subagent template included below. Each persona sub-agent receives: 1. Their persona file content (identity, failure modes, calibration, suppress conditions) -2. Shared diff-scope rules from [diff-scope.md](./references/diff-scope.md) -3. The JSON output contract from [findings-schema.json](./references/findings-schema.json) +2. Shared diff-scope rules from the diff-scope reference included below +3. The JSON output contract from the findings schema included below 4. Review context: intent summary, file list, diff +5. **For `project-standards` only:** the standards file path list from Stage 3b, wrapped in a `` block appended to the review context Persona sub-agents are **read-only**: they review and return structured JSON. They do not edit files or propose refactors. Read-only here means **non-mutating**, not "no shell access." Reviewer sub-agents may use non-mutating inspection commands when needed to gather evidence or verify scope, including read-oriented `git` / `gh` usage such as `git diff`, `git show`, `git blame`, `git log`, and `gh pr view`. They must not edit files, change branches, commit, push, create PRs, or otherwise mutate the checkout or repository state. -Each persona sub-agent returns JSON matching [findings-schema.json](./references/findings-schema.json): +Each persona sub-agent returns JSON matching the findings schema included below: ```json { @@ -395,7 +408,7 @@ Convert multiple reviewer JSON payloads into one deduplicated, confidence-gated ### Stage 6: Synthesize and present -Assemble the final report using the template in [review-output-template.md](./references/review-output-template.md): +Assemble the final report using the review output template included below: 1. **Header.** Scope, intent, mode, reviewer team with per-conditional justifications. 2. **Findings.** Grouped by severity (P0, P1, P2, P3). Each finding shows file, issue, reviewer(s), confidence, and synthesized route. @@ -517,3 +530,27 @@ If "Push fixes": push the branch with `git push` to update the existing PR. ## Fallback If the platform doesn't support parallel sub-agents, run reviewers sequentially. Everything else (stages, output format, merge pipeline) stays the same. + +--- + +## Included References + +### Persona Catalog + +@./references/persona-catalog.md + +### Subagent Template + +@./references/subagent-template.md + +### Diff Scope Rules + +@./references/diff-scope.md + +### Findings Schema + +@./references/findings-schema.json + +### Review Output Template + +@./references/review-output-template.md diff --git a/.codex/skills/ce-review/references/persona-catalog.md b/.codex/skills/ce-review/references/persona-catalog.md index be6dfdc27b..ddaf837000 100644 --- a/.codex/skills/ce-review/references/persona-catalog.md +++ b/.codex/skills/ce-review/references/persona-catalog.md @@ -1,8 +1,8 @@ # Persona Catalog -13 reviewer personas organized into always-on, cross-cutting conditional, and stack-specific conditional layers, plus CE-specific agents. The orchestrator uses this catalog to select which reviewers to spawn for each review. +15 reviewer personas organized into always-on, cross-cutting conditional, and stack-specific conditional layers, plus CE-specific agents. The orchestrator uses this catalog to select which reviewers to spawn for each review. -## Always-on (3 personas + 2 CE agents) +## Always-on (4 personas + 2 CE agents) Spawned on every review regardless of diff content. @@ -13,6 +13,7 @@ Spawned on every review regardless of diff content. | `correctness` | `compound-engineering:review:correctness-reviewer` | Logic errors, edge cases, state bugs, error propagation, intent compliance | | `testing` | `compound-engineering:review:testing-reviewer` | Coverage gaps, weak assertions, brittle tests, missing edge case tests | | `maintainability` | `compound-engineering:review:maintainability-reviewer` | Coupling, complexity, naming, dead code, premature abstraction | +| `project-standards` | `compound-engineering:review:project-standards-reviewer` | CLAUDE.md and AGENTS.md compliance -- frontmatter, references, naming, cross-platform portability, tool selection | **CE agents (unstructured output, synthesized separately):** @@ -21,7 +22,7 @@ Spawned on every review regardless of diff content. | `compound-engineering:review:agent-native-reviewer` | Verify new features are agent-accessible | | `compound-engineering:research:learnings-researcher` | Search docs/solutions/ for past issues related to this PR's modules and patterns | -## Conditional (5 personas) +## Conditional (6 personas) Spawned when the orchestrator identifies relevant patterns in the diff. The orchestrator reads the full diff and reasons about selection -- this is agent judgment, not keyword matching. @@ -32,6 +33,7 @@ Spawned when the orchestrator identifies relevant patterns in the diff. The orch | `api-contract` | `compound-engineering:review:api-contract-reviewer` | Route definitions, serializer/interface changes, event schemas, exported type signatures, API versioning | | `data-migrations` | `compound-engineering:review:data-migrations-reviewer` | Migration files, schema changes, backfill scripts, data transformations | | `reliability` | `compound-engineering:review:reliability-reviewer` | Error handling, retry logic, circuit breakers, timeouts, background jobs, async handlers, health checks | +| `adversarial` | `compound-engineering:review:adversarial-reviewer` | Diff has >=50 changed non-test, non-generated, non-lockfile lines, OR touches auth, payments, data mutations, external API integrations, or other high-risk domains | ## Stack-Specific Conditional (5 personas) @@ -56,7 +58,7 @@ These CE-native agents provide specialized analysis beyond what the persona agen ## Selection rules -1. **Always spawn all 3 always-on personas** plus the 2 CE always-on agents. +1. **Always spawn all 4 always-on personas** plus the 2 CE always-on agents. 2. **For each cross-cutting conditional persona**, the orchestrator reads the diff and decides whether the persona's domain is relevant. This is a judgment call, not a keyword match. 3. **For each stack-specific conditional persona**, use file types and changed patterns as a starting point, then decide whether the diff actually introduces meaningful work for that reviewer. Do not spawn language-specific reviewers just because one config or generated file happens to match the extension. 4. **For CE conditional agents**, spawn when the diff includes migration files (`db/migrate/*.rb`, `db/schema.rb`) or data backfill scripts. diff --git a/.codex/skills/cli-agent-readiness-reviewer/SKILL.md b/.codex/skills/cli-agent-readiness-reviewer/SKILL.md new file mode 100644 index 0000000000..606a8cb25b --- /dev/null +++ b/.codex/skills/cli-agent-readiness-reviewer/SKILL.md @@ -0,0 +1,435 @@ +--- +name: cli-agent-readiness-reviewer +description: Reviews CLI source code, plans, or specs for AI agent readiness using a severity-based rubric focused on whether a CLI is merely usable by agents or genuinely optimized for them. +model: inherit +color: yellow +--- + + + +Context: The user is building a CLI and wants to check if the code is agent-friendly. +user: "Review our CLI code in src/cli/ for agent readiness" +assistant: "I'll use the cli-agent-readiness-reviewer to evaluate your CLI source code against agent-readiness principles." +The user is building a CLI. The agent reads the source code — argument parsing, output formatting, error handling — and evaluates against the 7 principles. + + +Context: The user has a plan for a CLI they want to build. +user: "We're designing a CLI for our deployment platform. Here's the spec — how agent-ready is this design?" +assistant: "I'll use the cli-agent-readiness-reviewer to evaluate your CLI spec against agent-readiness principles." +The CLI doesn't exist yet. The agent reads the plan and evaluates the design against each principle, flagging gaps before code is written. + + +Context: The user wants to review a PR that adds CLI commands. +user: "This PR adds new subcommands to our CLI. Can you check them for agent friendliness?" +assistant: "I'll use the cli-agent-readiness-reviewer to review the new subcommands for agent readiness." +The agent reads the changed files, finds the new subcommand definitions, and evaluates them against the 7 principles. + + +Context: The user wants to evaluate specific commands or flags, not the whole CLI. +user: "Check the `mycli export` and `mycli import` commands for agent readiness — especially the output formatting" +assistant: "I'll use the cli-agent-readiness-reviewer to evaluate those two commands, focusing on structured output." +The user scoped the review to specific commands and a specific concern. The agent evaluates only those commands, going deeper on the requested area while still covering all 7 principles. + + + +# CLI Agent-Readiness Reviewer + +You review CLI **source code**, **plans**, and **specs** for AI agent readiness — how well the CLI will work when the "user" is an autonomous agent, not a human at a keyboard. + +You are a code reviewer, not a black-box tester. Read the implementation (or design) to understand what the CLI does, then evaluate it against the 7 principles below. + +This is not a generic CLI review. It is an **agent-optimization review**: +- The question is not only "can an agent use this CLI?" +- The question is also "where will an agent waste time, tokens, retries, or operator intervention?" + +Do **not** reduce the review to pass/fail. Classify findings using: +- **Blocker** — prevents reliable autonomous use +- **Friction** — usable, but costly, brittle, or inefficient for agents +- **Optimization** — not broken, but materially improvable for better agent throughput and reliability + +Evaluate commands by **command type** — different types have different priority principles: + +| Command type | Most important principles | +|---|---| +| Read/query | Structured output, bounded output, composability | +| Mutating | Non-interactive, actionable errors, safety, idempotence | +| Streaming/logging | Filtering, truncation controls, clean stderr/stdout | +| Interactive/bootstrap | Automation escape hatch, `--no-input`, scriptable alternatives | +| Bulk/export | Pagination, range selection, machine-readable output | + +## Step 1: Locate the CLI and Identify the Framework + +Determine what you're reviewing: + +- **Source code** — read argument parsing setup, command definitions, output formatting, error handling, help text +- **Plan or spec** — evaluate the design; flag principles the document doesn't address as **gaps** (opportunities to strengthen before implementation) + +If the user doesn't point to specific files, search the codebase: +- Argument parsing libraries: Click, argparse, Commander, clap, Cobra, yargs, oclif, Thor +- Entry points: `cli.py`, `cli.ts`, `main.rs`, `bin/`, `cmd/`, `src/cli/` +- Package.json `bin` field, setup.py `console_scripts`, Cargo.toml `[[bin]]` + +**Identify the framework early.** Your recommendations, what you credit as "already handled," and what you flag as missing all depend on knowing what the framework gives you for free vs. what the developer must implement. See the Framework Idioms Reference at the end of this document. + +**Scoping:** If the user names specific commands, flags, or areas of concern, evaluate those — don't override their focus with your own selection. When no scope is given, identify 3-5 primary subcommands using these signals: +- **README/docs references** — commands featured in documentation are primary workflows +- **Test coverage** — commands with the most test cases are the most exercised paths +- **Code volume** — a 200-line command handler matters more than a 20-line one +- Don't use help text ordering as a priority signal — most frameworks list subcommands alphabetically + +Before scoring anything, identify the command type for each command you review. Do not over-apply a principle where it does not fit. Example: strict idempotence matters far more for `deploy` than for `logs tail`. + +## Step 2: Evaluate Against the 7 Principles + +Evaluate in priority order: check for **Blockers** first across all principles, then **Friction**, then **Optimization** opportunities. This ensures the most critical issues are surfaced before refinements. For source code, cite specific files, functions, and line numbers. For plans, quote the relevant sections. For principles a plan doesn't mention, flag the gap and recommend what to add. + +For each principle, answer: +1. Is there a **Blocker**, **Friction**, or **Optimization** issue here? +2. What is the evidence? +3. How does the command type affect the assessment? +4. What is the most framework-idiomatic fix? + +--- + +### Principle 1: Non-Interactive by Default for Automation Paths + +Any command an agent might reasonably automate should be invocable without prompts. Interactive mode can exist, but it should be a convenience layer, not the only path. + +**In code, look for:** +- Interactive prompt library imports (inquirer, prompt_toolkit, dialoguer, readline) +- `input()` / `readline()` calls without TTY guards +- Confirmation prompts without `--yes`/`--force` bypass +- Wizard or multi-step flows without flag-based alternatives +- TTY detection gating interactivity (`process.stdout.isTTY`, `sys.stdin.isatty()`, `atty::is()`) +- `--no-input` or `--non-interactive` flag definitions + +**In plans, look for:** interactive flows without flag bypass, setup wizards without `--no-input`, no mention of CI/automation usage. + +**Severity guidance:** +- **Blocker**: a primary automation path depends on a prompt or TUI flow +- **Friction**: most prompts are bypassable, but behavior is inconsistent or poorly documented +- **Optimization**: explicit non-interactive affordances exist, but could be made more uniform or discoverable + +When relevant, suggest a practical test purpose such as: "detach stdin and confirm the command exits or errors within a timeout rather than hanging." + +--- + +### Principle 2: Structured, Parseable Output + +Commands that return data should expose a stable machine-readable representation and predictable process semantics. + +**In code, look for:** +- `--json`, `--format`, or `--output` flag definitions on data-returning commands +- Serialization calls (JSON.stringify, json.dumps, serde_json, to_json) +- Explicit exit code setting with distinct codes for distinct failure types +- stdout vs stderr separation — data to stdout, messages/logs to stderr +- What success output contains — structured data with IDs and URLs, or just "Done!" +- TTY checks before emitting color codes, spinners, progress bars, or emoji + +**In plans, look for:** output format definitions, exit code semantics, whether structured output is mentioned at all. + +**Severity guidance:** +- **Blocker**: data-bearing commands are prose-only, ANSI-heavy, or mix data with diagnostics in ways that break parsing +- **Friction**: some commands expose machine-readable output but coverage is inconsistent or polluted by stderr/stdout mixing +- **Optimization**: structured output exists, but fields, identifiers, or format consistency could be improved + +Do not require `--json` literally if the CLI has another well-documented stable machine format. The issue is machine readability, not one flag spelling. + +--- + +### Principle 3: Progressive Help Discovery + +Agents discover capabilities incrementally: top-level help, then subcommand help, then examples. Review help for discoverability, not just the presence of the word "example." + +**In code, look for:** +- Per-subcommand description strings and example strings +- Whether the argument parser generates layered help (most frameworks do by default — note when this is free) +- Help text verbosity — under ~80 lines per subcommand is good; 200+ lines floods agent context +- Whether common flags are listed before obscure ones + +**In plans, look for:** help text strategy, whether examples are planned per subcommand. + +Assess whether each important subcommand help includes: +- A one-line purpose +- A concrete invocation pattern +- Required arguments or required flags +- Important modifiers or safety flags + +**Severity guidance:** +- **Blocker**: subcommand help is missing or too incomplete to discover invocation shape +- **Friction**: help exists but omits examples, required inputs, or important modifiers +- **Optimization**: help works but could be tightened, reordered, or made more example-driven + +--- + +### Principle 4: Fail Fast with Actionable Errors + +When input is missing or invalid, error immediately with a message that helps the next attempt succeed. + +**In code, look for:** +- What happens when required args are missing — usage hint, or prompt, or hang? +- Custom error messages that include correct syntax or valid values +- Input validation before side effects (not after partial execution) +- Error output that includes example invocations +- Try/catch that swallows errors silently or returns generic messages + +**In plans, look for:** error handling strategy, error message format, validation approach. + +**Severity guidance:** +- **Blocker**: failures are silent, vague, hanging, or buried in stack traces +- **Friction**: the error identifies the failure but not the correction path +- **Optimization**: the error is actionable but could better suggest valid values, examples, or next commands + +--- + +### Principle 5: Safe Retries and Explicit Mutation Boundaries + +Agents retry, resume, and sometimes replay commands. Mutating commands should make that safe when possible, and dangerous mutations should be explicit. + +**In code, look for:** +- `--dry-run` flag on state-changing commands and whether it's actually wired up +- `--force`/`--yes` flags (presence indicates the default path has safety prompts — good) +- "Already exists" handling, upsert logic, create-or-update patterns +- Whether destructive operations (delete, overwrite) have confirmation gates + +**In plans, look for:** idempotency requirements, dry-run support, destructive action handling. + +Scope this principle by command type: +- For `create`, `update`, `apply`, `deploy`, and similar commands, idempotence or duplicate detection is high-value +- For `send`, `trigger`, `append`, or `run-now` commands, exact idempotence may be impossible; in those cases, explicit mutation boundaries and audit-friendly output matter more + +**Severity guidance:** +- **Blocker**: retries can easily duplicate or corrupt state with no warning or visibility +- **Friction**: some safety affordances exist, but they are inconsistent or too opaque for automation +- **Optimization**: command safety is acceptable, but previews, identifiers, or duplicate detection could be stronger + +--- + +### Principle 6: Composable and Predictable Command Structure + +Agents chain commands and pipe output between tools. The CLI should be easy to compose without brittle adapters or memorized exceptions. + +**In code, look for:** +- Flag-based vs positional argument patterns +- Stdin reading support (`--stdin`, reading from pipe, `-` as filename alias) +- Consistent command structure across related subcommands +- Output clean when piped — no color, no spinners, no interactive noise when not a TTY + +**In plans, look for:** command naming conventions, stdin/pipe support, composability examples. + +Do not treat all positional arguments as a flaw. Conventional positional forms may be fine. Focus on ambiguity, inconsistency, and pipeline-hostile behavior. + +**Severity guidance:** +- **Blocker**: commands cannot be chained cleanly or behave unpredictably in pipelines +- **Friction**: some commands are pipeable, but naming, ordering, or stdin behavior is inconsistent +- **Optimization**: command structure is serviceable, but could be more regular or easier for agents to infer + +--- + +### Principle 7: Bounded, High-Signal Responses + +Every token of CLI output consumes limited agent context. Large outputs are sometimes justified, but defaults should be proportionate to the common task and provide ways to narrow. + +**In code, look for:** +- Default limits on list/query commands (e.g., `default=50`, `max_results=100`) +- `--limit`, `--filter`, `--since`, `--max` flag definitions +- `--quiet`/`--verbose` output modes +- Pagination implementation (cursor, offset, page) +- Whether unbounded queries are possible by default — an unfiltered `list` returning thousands of rows is a context killer +- Truncation messages that guide the agent toward narrowing results + +**In plans, look for:** default result limits, filtering/pagination design, verbosity controls. + +Treat fixed thresholds as heuristics, not laws. A default above roughly 500 lines is often a `Friction` signal for routine queries, but may be justified for explicit bulk/export commands. + +**Severity guidance:** +- **Blocker**: a routine query command dumps huge output by default with no narrowing controls +- **Friction**: narrowing exists, but defaults are too broad or truncation provides no guidance +- **Optimization**: defaults are acceptable, but could be better bounded or more teachable to agents + +--- + +## Step 3: Produce the Report + +```markdown +## CLI Agent-Readiness Review: + +**Input type**: Source code / Plan / Spec +**Framework**: +**Command types reviewed**: +**Files reviewed**: +**Overall judgment**: + +### Scorecard + +| # | Principle | Severity | Key Finding | +|---|-----------|----------|-------------| +| 1 | Non-interactive automation paths | Blocker/Friction/Optimization/None | | +| 2 | Structured output | Blocker/Friction/Optimization/None | | +| 3 | Progressive help discovery | Blocker/Friction/Optimization/None | | +| 4 | Actionable errors | Blocker/Friction/Optimization/None | | +| 5 | Safe retries and mutation boundaries | Blocker/Friction/Optimization/None | | +| 6 | Composable command structure | Blocker/Friction/Optimization/None | | +| 7 | Bounded responses | Blocker/Friction/Optimization/None | | + +### Detailed Findings + +#### Principle 1: Non-Interactive Automation Paths — + +**Evidence:** + + +**Command-type context:** + + +**Framework context:** + + +**Assessment:** + + +**Recommendation:** + + +**Practical check or test to add:** + + +[repeat for each principle] + +### Highest-Impact Improvements + +1. +2. ... + +### What's Working Well + +- +``` + +## Review Guidelines + +- **Cite evidence.** File paths, line numbers, function names for code. Quoted sections for plans. Never score on impressions. +- **Credit the framework.** When the argument parser handles something automatically, note it. The principle is satisfied even if the developer didn't explicitly implement it. Don't flag what's already free. +- **Recommendations must be framework-idiomatic.** "Add `@click.option('--json', 'output_json', is_flag=True)` to the deploy command" is useful. "Add a --json flag" is generic. Use the patterns from the Framework Idioms Reference. +- **Include a practical check or test assertion per finding.** Prefer test purpose plus an environment-adaptable assertion over brittle shell snippets that assume a specific OS utility layout. +- **Gaps are opportunities.** For plans and specs, a principle not addressed is a gap to fill before implementation, not a failure. +- **Give credit for what works.** When a CLI is partially compliant, acknowledge the good patterns. +- **Do not flatten everything into a score.** The review should tell the user where agent use will break, where it will be costly, and where it is already strong. +- **Use the principle names consistently.** Keep wording aligned with the 7 principle names defined in this document. + +--- + +## Framework Idioms Reference + +Once you identify the CLI framework, use this knowledge to calibrate your review. Credit what the framework handles automatically. Flag what it doesn't. Write recommendations using idiomatic patterns for that framework. + +### Python — Click + +**Gives you for free:** +- Layered help with `--help` on every command/group +- Error + usage hint on missing required options +- Type validation on parameters + +**Doesn't give you — must implement:** +- `--json` output — add `@click.option('--json', 'output_json', is_flag=True)` and branch on it in the handler +- TTY detection — use `sys.stdout.isatty()` or `click.get_text_stream('stdout').isatty()` +- `--no-input` — Click prompts for missing values when `prompt=True` is set on an option; make sure required inputs are options with `required=True` (errors on missing) not `prompt=True` (blocks agents) +- Stdin reading — use `click.get_text_stream('stdin')` or `type=click.File('-')` +- Exit codes — Click uses `sys.exit(1)` on errors by default but doesn't differentiate error types; use `ctx.exit(code)` for distinct codes + +**Anti-patterns to flag:** +- `prompt=True` on options without a `--no-input` guard +- `click.confirm()` without checking `--yes`/`--force` first +- Using `click.echo()` for both data and messages (no stdout/stderr separation) — use `click.echo(..., err=True)` for messages + +### Python — argparse + +**Gives you for free:** +- Usage/error message on missing required args +- Layered help via subparsers + +**Doesn't give you — must implement:** +- Examples in help text — use `epilog` with `RawDescriptionHelpFormatter` +- `--json` output — entirely manual +- Stdin support — use `type=argparse.FileType('r')` with `default='-'` or `nargs='?'` +- TTY detection, exit codes, output separation — all manual + +**Anti-patterns to flag:** +- Using `input()` for missing values instead of making arguments required +- Default `HelpFormatter` truncating epilog examples — need `RawDescriptionHelpFormatter` + +### Go — Cobra + +**Gives you for free:** +- Layered help with usage and examples fields — but only if `Example:` field is populated +- Error on unknown flags +- Consistent subcommand structure via `AddCommand` +- `--help` on every command + +**Doesn't give you — must implement:** +- `--json`/`--output` — common pattern is a persistent `--output` flag on root with `json`/`table`/`yaml` values +- `--dry-run` — entirely manual +- Stdin — use `os.Stdin` or `cobra.ExactArgs` for validation, `cmd.InOrStdin()` for reading +- TTY detection — use `golang.org/x/term` or `mattn/go-isatty` + +**Anti-patterns to flag:** +- Empty `Example:` fields on commands +- Using `fmt.Println` for both data and errors — use `cmd.OutOrStdout()` and `cmd.ErrOrStderr()` +- `RunE` functions that return `nil` on failure instead of an error + +### Rust — clap + +**Gives you for free:** +- Layered help from derive macros +- Compile-time validation of required args +- Typed parsing with strong error messages +- Consistent subcommand structure via enums + +**Doesn't give you — must implement:** +- `--json` output — use `serde_json::to_string_pretty` with a `--format` flag +- `--dry-run` — manual flag and logic +- Stdin — use `std::io::stdin()` with `is_terminal::IsTerminal` to detect piped input +- TTY detection — `is-terminal` crate (`is_terminal::IsTerminal` trait) +- Exit codes — use `std::process::exit()` with distinct codes or `ExitCode` + +**Anti-patterns to flag:** +- Using `println!` for both data and diagnostics — use `eprintln!` for messages +- No examples in help text — add via `#[command(after_help = "Examples:\n mycli deploy --env staging")]` + +### Node.js — Commander / yargs / oclif + +**Gives you for free:** +- Commander: layered help, error on missing required, `--help` on all commands +- yargs: `.demandOption()` for required flags, `.example()` for help examples, `.fail()` for custom errors +- oclif: layered help, examples; `--json` available but requires per-command opt-in via `static enableJsonFlag = true` + +**Doesn't give you — must implement:** +- Commander: no built-in `--json`; stdin reading; TTY detection (`process.stdout.isTTY`) +- yargs: `--json` is manual; stdin via `process.stdin` +- oclif: `--json` requires per-command opt-in via `static enableJsonFlag = true` + +**Anti-patterns to flag:** +- Using `inquirer` or `prompts` without checking `process.stdin.isTTY` first +- `console.log` for both data and messages — use `process.stdout.write` and `process.stderr.write` +- Commander `.action()` that calls `process.exit(0)` on errors + +### Ruby — Thor + +**Gives you for free:** +- Layered help, subcommand structure +- `method_option` for named flags +- Error on unknown flags + +**Doesn't give you — must implement:** +- `--json` output — manual +- Stdin — use `$stdin.read` or `ARGF` +- TTY detection — `$stdout.tty?` +- Exit codes — `exit 1` or `abort` + +**Anti-patterns to flag:** +- Using `ask()` or `yes?()` without a `--yes` flag bypass +- `say` for both data and messages — use `$stderr.puts` for messages + +### Framework not listed + +If the framework isn't above, apply the same pattern: identify what the framework gives for free by reading its documentation or source, what must be implemented manually, and what idiomatic patterns exist for each principle. Note your findings in the report so the user understands the basis for your recommendations. diff --git a/.codex/skills/compound-docs/SKILL.md b/.codex/skills/compound-docs/SKILL.md deleted file mode 100644 index 6e1b53538f..0000000000 --- a/.codex/skills/compound-docs/SKILL.md +++ /dev/null @@ -1,511 +0,0 @@ ---- -name: compound-docs -description: Capture solved problems as categorized documentation with YAML frontmatter for fast lookup -disable-model-invocation: true -allowed-tools: - - Read - - Write - - Bash - - Grep -preconditions: - - Problem has been solved (not in-progress) - - Solution has been verified working ---- - -# compound-docs Skill - -**Purpose:** Automatically document solved problems to build searchable institutional knowledge with category-based organization (enum-validated problem types). - -## Overview - -This skill captures problem solutions immediately after confirmation, creating structured documentation that serves as a searchable knowledge base for future sessions. - -**Organization:** Single-file architecture - each problem documented as one markdown file in its symptom category directory (e.g., `docs/solutions/performance-issues/n-plus-one-briefs.md`). Files use YAML frontmatter for metadata and searchability. - ---- - - - -## 7-Step Process - - -### Step 1: Detect Confirmation - -**Auto-invoke after phrases:** - -- "that worked" -- "it's fixed" -- "working now" -- "problem solved" -- "that did it" - -**OR manual:** `/doc-fix` command - -**Non-trivial problems only:** - -- Multiple investigation attempts needed -- Tricky debugging that took time -- Non-obvious solution -- Future sessions would benefit - -**Skip documentation for:** - -- Simple typos -- Obvious syntax errors -- Trivial fixes immediately corrected - - - -### Step 2: Gather Context - -Extract from conversation history: - -**Required information:** - -- **Module name**: Which module or component had the problem -- **Symptom**: Observable error/behavior (exact error messages) -- **Investigation attempts**: What didn't work and why -- **Root cause**: Technical explanation of actual problem -- **Solution**: What fixed it (code/config changes) -- **Prevention**: How to avoid in future - -**Environment details:** - -- Rails version -- Stage (0-6 or post-implementation) -- OS version -- File/line references - -**BLOCKING REQUIREMENT:** If critical context is missing (module name, exact error, stage, or resolution steps), ask user and WAIT for response before proceeding to Step 3: - -``` -I need a few details to document this properly: - -1. Which module had this issue? [ModuleName] -2. What was the exact error message or symptom? -3. What stage were you in? (0-6 or post-implementation) - -[Continue after user provides details] -``` - - - -### Step 3: Check Existing Docs - -Search docs/solutions/ for similar issues: - -```bash -# Search by error message keywords -grep -r "exact error phrase" docs/solutions/ - -# Search by symptom category -ls docs/solutions/[category]/ -``` - -**IF similar issue found:** - -THEN present decision options: - -``` -Found similar issue: docs/solutions/[path] - -What's next? -1. Create new doc with cross-reference (recommended) -2. Update existing doc (only if same root cause) -3. Other - -Choose (1-3): _ -``` - -WAIT for user response, then execute chosen action. - -**ELSE** (no similar issue found): - -Proceed directly to Step 4 (no user interaction needed). - - - -### Step 4: Generate Filename - -Format: `[sanitized-symptom]-[module]-[YYYYMMDD].md` - -**Sanitization rules:** - -- Lowercase -- Replace spaces with hyphens -- Remove special characters except hyphens -- Truncate to reasonable length (< 80 chars) - -**Examples:** - -- `missing-include-BriefSystem-20251110.md` -- `parameter-not-saving-state-EmailProcessing-20251110.md` -- `webview-crash-on-resize-Assistant-20251110.md` - - - -### Step 5: Validate YAML Schema - -**CRITICAL:** All docs require validated YAML frontmatter with enum validation. - - - -**Validate against schema:** -Load `schema.yaml` and classify the problem against the enum values defined in [yaml-schema.md](./references/yaml-schema.md). Ensure all required fields are present and match allowed values exactly. - -**BLOCK if validation fails:** - -``` -❌ YAML validation failed - -Errors: -- problem_type: must be one of schema enums, got "compilation_error" -- severity: must be one of [critical, high, medium, low], got "invalid" -- symptoms: must be array with 1-5 items, got string - -Please provide corrected values. -``` - -**GATE ENFORCEMENT:** Do NOT proceed to Step 6 (Create Documentation) until YAML frontmatter passes all validation rules defined in `schema.yaml`. - - - - - -### Step 6: Create Documentation - -**Determine category from problem_type:** Use the category mapping defined in [yaml-schema.md](./references/yaml-schema.md) (lines 49-61). - -**Create documentation file:** - -```bash -PROBLEM_TYPE="[from validated YAML]" -CATEGORY="[mapped from problem_type]" -FILENAME="[generated-filename].md" -DOC_PATH="docs/solutions/${CATEGORY}/${FILENAME}" - -# Create directory if needed -mkdir -p "docs/solutions/${CATEGORY}" - -# Write documentation using template from assets/resolution-template.md -# (Content populated with Step 2 context and validated YAML frontmatter) -``` - -**Result:** -- Single file in category directory -- Enum validation ensures consistent categorization - -**Create documentation:** Populate the structure from `assets/resolution-template.md` with context gathered in Step 2 and validated YAML frontmatter from Step 5. - - - -### Step 7: Cross-Reference & Critical Pattern Detection - -If similar issues found in Step 3: - -**Update existing doc:** - -```bash -# Add Related Issues link to similar doc -echo "- See also: [$FILENAME]($REAL_FILE)" >> [similar-doc.md] -``` - -**Update new doc:** -Already includes cross-reference from Step 6. - -**Update patterns if applicable:** - -If this represents a common pattern (3+ similar issues): - -```bash -# Add to docs/solutions/patterns/common-solutions.md -cat >> docs/solutions/patterns/common-solutions.md << 'EOF' - -## [Pattern Name] - -**Common symptom:** [Description] -**Root cause:** [Technical explanation] -**Solution pattern:** [General approach] - -**Examples:** -- [Link to doc 1] -- [Link to doc 2] -- [Link to doc 3] -EOF -``` - -**Critical Pattern Detection (Optional Proactive Suggestion):** - -If this issue has automatic indicators suggesting it might be critical: -- Severity: `critical` in YAML -- Affects multiple modules OR foundational stage (Stage 2 or 3) -- Non-obvious solution - -Then in the decision menu (Step 8), add a note: -``` -💡 This might be worth adding to Required Reading (Option 2) -``` - -But **NEVER auto-promote**. User decides via decision menu (Option 2). - -**Template for critical pattern addition:** - -When user selects Option 2 (Add to Required Reading), use the template from `assets/critical-pattern-template.md` to structure the pattern entry. Number it sequentially based on existing patterns in `docs/solutions/patterns/critical-patterns.md`. - - - - ---- - - - -## Decision Menu After Capture - -After successful documentation, present options and WAIT for user response: - -``` -✓ Solution documented - -File created: -- docs/solutions/[category]/[filename].md - -What's next? -1. Continue workflow (recommended) -2. Add to Required Reading - Promote to critical patterns (critical-patterns.md) -3. Link related issues - Connect to similar problems -4. Add to existing skill - Add to a learning skill (e.g., hotwire-native) -5. Create new skill - Extract into new learning skill -6. View documentation - See what was captured -7. Other -``` - -**Handle responses:** - -**Option 1: Continue workflow** - -- Return to calling skill/workflow -- Documentation is complete - -**Option 2: Add to Required Reading** ⭐ PRIMARY PATH FOR CRITICAL PATTERNS - -User selects this when: -- System made this mistake multiple times across different modules -- Solution is non-obvious but must be followed every time -- Foundational requirement (Rails, Rails API, threading, etc.) - -Action: -1. Extract pattern from the documentation -2. Format as ❌ WRONG vs ✅ CORRECT with code examples -3. Add to `docs/solutions/patterns/critical-patterns.md` -4. Add cross-reference back to this doc -5. Confirm: "✓ Added to Required Reading. All subagents will see this pattern before code generation." - -**Option 3: Link related issues** - -- Prompt: "Which doc to link? (provide filename or describe)" -- Search docs/solutions/ for the doc -- Add cross-reference to both docs -- Confirm: "✓ Cross-reference added" - -**Option 4: Add to existing skill** - -User selects this when the documented solution relates to an existing learning skill: - -Action: -1. Prompt: "Which skill? (hotwire-native, etc.)" -2. Determine which reference file to update (resources.md, patterns.md, or examples.md) -3. Add link and brief description to appropriate section -4. Confirm: "✓ Added to [skill-name] skill in [file]" - -Example: For Hotwire Native Tailwind variants solution: -- Add to `hotwire-native/references/resources.md` under "Project-Specific Resources" -- Add to `hotwire-native/references/examples.md` with link to solution doc - -**Option 5: Create new skill** - -User selects this when the solution represents the start of a new learning domain: - -Action: -1. Prompt: "What should the new skill be called? (e.g., stripe-billing, email-processing)" -2. Run `python3 .claude/skills/skill-creator/scripts/init_skill.py [skill-name]` -3. Create initial reference files with this solution as first example -4. Confirm: "✓ Created new [skill-name] skill with this solution as first example" - -**Option 6: View documentation** - -- Display the created documentation -- Present decision menu again - -**Option 7: Other** - -- Ask what they'd like to do - - - ---- - - - -## Integration Points - -**Invoked by:** -- /compound command (primary interface) -- Manual invocation in conversation after solution confirmed -- Can be triggered by detecting confirmation phrases like "that worked", "it's fixed", etc. - -**Invokes:** -- None (terminal skill - does not delegate to other skills) - -**Handoff expectations:** -All context needed for documentation should be present in conversation history before invocation. - - - ---- - - - -## Success Criteria - -Documentation is successful when ALL of the following are true: - -- ✅ YAML frontmatter validated (all required fields, correct formats) -- ✅ File created in docs/solutions/[category]/[filename].md -- ✅ Enum values match schema.yaml exactly -- ✅ Code examples included in solution section -- ✅ Cross-references added if related issues found -- ✅ User presented with decision menu and action confirmed - - - ---- - -## Error Handling - -**Missing context:** - -- Ask user for missing details -- Don't proceed until critical info provided - -**YAML validation failure:** - -- Show specific errors -- Present retry with corrected values -- BLOCK until valid - -**Similar issue ambiguity:** - -- Present multiple matches -- Let user choose: new doc, update existing, or link as duplicate - -**Module not in modules documentation:** - -- Warn but don't block -- Proceed with documentation -- Suggest: "Add [Module] to modules documentation if not there" - ---- - -## Execution Guidelines - -**MUST do:** -- Validate YAML frontmatter (BLOCK if invalid per Step 5 validation gate) -- Extract exact error messages from conversation -- Include code examples in solution section -- Create directories before writing files (`mkdir -p`) -- Ask user and WAIT if critical context missing - -**MUST NOT do:** -- Skip YAML validation (validation gate is blocking) -- Use vague descriptions (not searchable) -- Omit code examples or cross-references - ---- - -## Quality Guidelines - -**Good documentation has:** - -- ✅ Exact error messages (copy-paste from output) -- ✅ Specific file:line references -- ✅ Observable symptoms (what you saw, not interpretations) -- ✅ Failed attempts documented (helps avoid wrong paths) -- ✅ Technical explanation (not just "what" but "why") -- ✅ Code examples (before/after if applicable) -- ✅ Prevention guidance (how to catch early) -- ✅ Cross-references (related issues) - -**Avoid:** - -- ❌ Vague descriptions ("something was wrong") -- ❌ Missing technical details ("fixed the code") -- ❌ No context (which version? which file?) -- ❌ Just code dumps (explain why it works) -- ❌ No prevention guidance -- ❌ No cross-references - ---- - -## Example Scenario - -**User:** "That worked! The N+1 query is fixed." - -**Skill activates:** - -1. **Detect confirmation:** "That worked!" triggers auto-invoke -2. **Gather context:** - - Module: Brief System - - Symptom: Brief generation taking >5 seconds, N+1 query when loading email threads - - Failed attempts: Added pagination (didn't help), checked background job performance - - Solution: Added eager loading with `includes(:emails)` on Brief model - - Root cause: Missing eager loading causing separate database query per email thread -3. **Check existing:** No similar issue found -4. **Generate filename:** `n-plus-one-brief-generation-BriefSystem-20251110.md` -5. **Validate YAML:** - ```yaml - module: Brief System - date: 2025-11-10 - problem_type: performance_issue - component: rails_model - symptoms: - - "N+1 query when loading email threads" - - "Brief generation taking >5 seconds" - root_cause: missing_include - severity: high - tags: [n-plus-one, eager-loading, performance] - ``` - ✅ Valid -6. **Create documentation:** - - `docs/solutions/performance-issues/n-plus-one-brief-generation-BriefSystem-20251110.md` -7. **Cross-reference:** None needed (no similar issues) - -**Output:** - -``` -✓ Solution documented - -File created: -- docs/solutions/performance-issues/n-plus-one-brief-generation-BriefSystem-20251110.md - -What's next? -1. Continue workflow (recommended) -2. Add to Required Reading - Promote to critical patterns (critical-patterns.md) -3. Link related issues - Connect to similar problems -4. Add to existing skill - Add to a learning skill (e.g., hotwire-native) -5. Create new skill - Extract into new learning skill -6. View documentation - See what was captured -7. Other -``` - ---- - -## Future Enhancements - -**Not in Phase 7 scope, but potential:** - -- Search by date range -- Filter by severity -- Tag-based search interface -- Metrics (most common issues, resolution time) -- Export to shareable format (community knowledge sharing) -- Import community solutions diff --git a/.codex/skills/compound-docs/assets/critical-pattern-template.md b/.codex/skills/compound-docs/assets/critical-pattern-template.md deleted file mode 100644 index 48d1b1777f..0000000000 --- a/.codex/skills/compound-docs/assets/critical-pattern-template.md +++ /dev/null @@ -1,34 +0,0 @@ -# Critical Pattern Template - -Use this template when adding a pattern to `docs/solutions/patterns/critical-patterns.md`: - ---- - -## N. [Pattern Name] (ALWAYS REQUIRED) - -### ❌ WRONG ([Will cause X error]) -```[language] -[code showing wrong approach] -``` - -### ✅ CORRECT -```[language] -[code showing correct approach] -``` - -**Why:** [Technical explanation of why this is required] - -**Placement/Context:** [When this applies] - -**Documented in:** `docs/solutions/[category]/[filename].md` - ---- - -**Instructions:** -1. Replace N with the next pattern number -2. Replace [Pattern Name] with descriptive title -3. Fill in WRONG example with code that causes the problem -4. Fill in CORRECT example with the solution -5. Explain the technical reason in "Why" -6. Clarify when this pattern applies in "Placement/Context" -7. Link to the full troubleshooting doc where this was originally solved diff --git a/.codex/skills/compound-docs/assets/resolution-template.md b/.codex/skills/compound-docs/assets/resolution-template.md deleted file mode 100644 index ac4d0c1d9a..0000000000 --- a/.codex/skills/compound-docs/assets/resolution-template.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -module: [Module name or "System" for system-wide] -date: [YYYY-MM-DD] -problem_type: [build_error|test_failure|runtime_error|performance_issue|database_issue|security_issue|ui_bug|integration_issue|logic_error] -component: [rails_model|rails_controller|rails_view|service_object|background_job|database|frontend_stimulus|hotwire_turbo|email_processing|brief_system|assistant|authentication|payments] -symptoms: - - [Observable symptom 1 - specific error message or behavior] - - [Observable symptom 2 - what user actually saw/experienced] -root_cause: [missing_association|missing_include|missing_index|wrong_api|scope_issue|thread_violation|async_timing|memory_leak|config_error|logic_error|test_isolation|missing_validation|missing_permission] -rails_version: [7.1.2 - optional] -resolution_type: [code_fix|migration|config_change|test_fix|dependency_update|environment_setup] -severity: [critical|high|medium|low] -tags: [keyword1, keyword2, keyword3] ---- - -# Troubleshooting: [Clear Problem Title] - -## Problem -[1-2 sentence clear description of the issue and what the user experienced] - -## Environment -- Module: [Name or "System-wide"] -- Rails Version: [e.g., 7.1.2] -- Affected Component: [e.g., "Email Processing model", "Brief System service", "Authentication controller"] -- Date: [YYYY-MM-DD when this was solved] - -## Symptoms -- [Observable symptom 1 - what the user saw/experienced] -- [Observable symptom 2 - error messages, visual issues, unexpected behavior] -- [Continue as needed - be specific] - -## What Didn't Work - -**Attempted Solution 1:** [Description of what was tried] -- **Why it failed:** [Technical reason this didn't solve the problem] - -**Attempted Solution 2:** [Description of second attempt] -- **Why it failed:** [Technical reason] - -[Continue for all significant attempts that DIDN'T work] - -[If nothing else was attempted first, write:] -**Direct solution:** The problem was identified and fixed on the first attempt. - -## Solution - -[The actual fix that worked - provide specific details] - -**Code changes** (if applicable): -```ruby -# Before (broken): -[Show the problematic code] - -# After (fixed): -[Show the corrected code with explanation] -``` - -**Database migration** (if applicable): -```ruby -# Migration change: -[Show what was changed in the migration] -``` - -**Commands run** (if applicable): -```bash -# Steps taken to fix: -[Commands or actions] -``` - -## Why This Works - -[Technical explanation of:] -1. What was the ROOT CAUSE of the problem? -2. Why does the solution address this root cause? -3. What was the underlying issue (API misuse, configuration error, Rails version issue, etc.)? - -[Be detailed enough that future developers understand the "why", not just the "what"] - -## Prevention - -[How to avoid this problem in future development:] -- [Specific coding practice, check, or pattern to follow] -- [What to watch out for] -- [How to catch this early] - -## Related Issues - -[If any similar problems exist in docs/solutions/, link to them:] -- See also: [another-related-issue.md](../category/another-related-issue.md) -- Similar to: [related-problem.md](../category/related-problem.md) - -[If no related issues, write:] -No related issues documented yet. diff --git a/.codex/skills/compound-docs/references/yaml-schema.md b/.codex/skills/compound-docs/references/yaml-schema.md deleted file mode 100644 index f73024427e..0000000000 --- a/.codex/skills/compound-docs/references/yaml-schema.md +++ /dev/null @@ -1,65 +0,0 @@ -# YAML Frontmatter Schema - -**See `.claude/skills/codify-docs/schema.yaml` for the complete schema specification.** - -## Required Fields - -- **module** (string): Module name (e.g., "EmailProcessing") or "System" for system-wide issues -- **date** (string): ISO 8601 date (YYYY-MM-DD) -- **problem_type** (enum): One of [build_error, test_failure, runtime_error, performance_issue, database_issue, security_issue, ui_bug, integration_issue, logic_error, developer_experience, workflow_issue, best_practice, documentation_gap] -- **component** (enum): One of [rails_model, rails_controller, rails_view, service_object, background_job, database, frontend_stimulus, hotwire_turbo, email_processing, brief_system, assistant, authentication, payments, development_workflow, testing_framework, documentation, tooling] -- **symptoms** (array): 1-5 specific observable symptoms -- **root_cause** (enum): One of [missing_association, missing_include, missing_index, wrong_api, scope_issue, thread_violation, async_timing, memory_leak, config_error, logic_error, test_isolation, missing_validation, missing_permission, missing_workflow_step, inadequate_documentation, missing_tooling, incomplete_setup] -- **resolution_type** (enum): One of [code_fix, migration, config_change, test_fix, dependency_update, environment_setup, workflow_improvement, documentation_update, tooling_addition, seed_data_update] -- **severity** (enum): One of [critical, high, medium, low] - -## Optional Fields - -- **rails_version** (string): Rails version in X.Y.Z format -- **tags** (array): Searchable keywords (lowercase, hyphen-separated) - -## Validation Rules - -1. All required fields must be present -2. Enum fields must match allowed values exactly (case-sensitive) -3. symptoms must be YAML array with 1-5 items -4. date must match YYYY-MM-DD format -5. rails_version (if provided) must match X.Y.Z format -6. tags should be lowercase, hyphen-separated - -## Example - -```yaml ---- -module: Email Processing -date: 2025-11-12 -problem_type: performance_issue -component: rails_model -symptoms: - - "N+1 query when loading email threads" - - "Brief generation taking >5 seconds" -root_cause: missing_include -rails_version: 7.1.2 -resolution_type: code_fix -severity: high -tags: [n-plus-one, eager-loading, performance] ---- -``` - -## Category Mapping - -Based on `problem_type`, documentation is filed in: - -- **build_error** → `docs/solutions/build-errors/` -- **test_failure** → `docs/solutions/test-failures/` -- **runtime_error** → `docs/solutions/runtime-errors/` -- **performance_issue** → `docs/solutions/performance-issues/` -- **database_issue** → `docs/solutions/database-issues/` -- **security_issue** → `docs/solutions/security-issues/` -- **ui_bug** → `docs/solutions/ui-bugs/` -- **integration_issue** → `docs/solutions/integration-issues/` -- **logic_error** → `docs/solutions/logic-errors/` -- **developer_experience** → `docs/solutions/developer-experience/` -- **workflow_issue** → `docs/solutions/workflow-issues/` -- **best_practice** → `docs/solutions/best-practices/` -- **documentation_gap** → `docs/solutions/documentation-gaps/` diff --git a/.codex/skills/compound-docs/schema.yaml b/.codex/skills/compound-docs/schema.yaml deleted file mode 100644 index 0396b14d22..0000000000 --- a/.codex/skills/compound-docs/schema.yaml +++ /dev/null @@ -1,176 +0,0 @@ -# CORA Documentation Schema -# This schema MUST be validated before writing any documentation file - -required_fields: - module: - type: string - description: "Module/area of CORA (e.g., 'Email Processing', 'Brief System', 'Authentication')" - examples: - - "Email Processing" - - "Brief System" - - "Assistant" - - "Authentication" - - date: - type: string - pattern: '^\d{4}-\d{2}-\d{2}$' - description: "Date when this problem was solved (YYYY-MM-DD)" - - problem_type: - type: enum - values: - - build_error # Rails, bundle, compilation errors - - test_failure # Test failures, flaky tests - - runtime_error # Exceptions, crashes during execution - - performance_issue # Slow queries, memory issues, N+1 queries - - database_issue # Migration, query, schema problems - - security_issue # Authentication, authorization, XSS, SQL injection - - ui_bug # Frontend, Stimulus, Turbo issues - - integration_issue # External service, API integration problems - - logic_error # Business logic bugs - - developer_experience # DX issues: workflow, tooling, seed data, dev setup - - workflow_issue # Development process, missing steps, unclear practices - - best_practice # Documenting patterns and practices to follow - - documentation_gap # Missing or inadequate documentation - description: "Primary category of the problem" - - component: - type: enum - values: - - rails_model # ActiveRecord models - - rails_controller # ActionController - - rails_view # ERB templates, ViewComponent - - service_object # Custom service classes - - background_job # Sidekiq, Active Job - - database # PostgreSQL, migrations, schema - - frontend_stimulus # Stimulus JS controllers - - hotwire_turbo # Turbo Streams, Turbo Drive - - email_processing # Email handling, mailers - - brief_system # Brief generation, summarization - - assistant # AI assistant, prompts - - authentication # Devise, user auth - - payments # Stripe, billing - - development_workflow # Dev process, seed data, tooling - - testing_framework # Test setup, fixtures, VCR - - documentation # README, guides, inline docs - - tooling # Scripts, generators, CLI tools - description: "CORA component involved" - - symptoms: - type: array[string] - min_items: 1 - max_items: 5 - description: "Observable symptoms (error messages, visual issues, crashes)" - examples: - - "N+1 query detected in brief generation" - - "Brief emails not appearing in summary" - - "Turbo Stream response returns 404" - - root_cause: - type: enum - values: - - missing_association # Incorrect Rails associations - - missing_include # Missing eager loading (N+1) - - missing_index # Database performance issue - - wrong_api # Using deprecated/incorrect Rails API - - scope_issue # Incorrect query scope or filtering - - thread_violation # Real-time unsafe operation - - async_timing # Async/background job timing - - memory_leak # Memory leak or excessive allocation - - config_error # Configuration or environment issue - - logic_error # Algorithm/business logic bug - - test_isolation # Test isolation or fixture issue - - missing_validation # Missing model validation - - missing_permission # Authorization check missing - - missing_workflow_step # Skipped or undocumented workflow step - - inadequate_documentation # Missing or unclear documentation - - missing_tooling # Lacking helper scripts or automation - - incomplete_setup # Missing seed data, fixtures, or config - description: "Fundamental cause of the problem" - - resolution_type: - type: enum - values: - - code_fix # Fixed by changing source code - - migration # Fixed by database migration - - config_change # Fixed by changing configuration - - test_fix # Fixed by correcting tests - - dependency_update # Fixed by updating gem/dependency - - environment_setup # Fixed by environment configuration - - workflow_improvement # Improved development workflow or process - - documentation_update # Added or updated documentation - - tooling_addition # Added helper script or automation - - seed_data_update # Updated db/seeds.rb or fixtures - description: "Type of fix applied" - - severity: - type: enum - values: - - critical # Blocks production or development (build fails, data loss) - - high # Impairs core functionality (feature broken, security issue) - - medium # Affects specific feature (UI broken, performance impact) - - low # Minor issue or edge case - description: "Impact severity" - -optional_fields: - rails_version: - type: string - pattern: '^\d+\.\d+\.\d+$' - description: "Rails version where this was encountered (e.g., '7.1.0')" - - related_components: - type: array[string] - description: "Other components that interact with this issue" - - tags: - type: array[string] - max_items: 8 - description: "Searchable keywords (lowercase, hyphen-separated)" - examples: - - "n-plus-one" - - "eager-loading" - - "test-isolation" - - "turbo-stream" - -validation_rules: - - "module must be a valid CORA module name" - - "date must be in YYYY-MM-DD format" - - "problem_type must match one of the enum values" - - "component must match one of the enum values" - - "symptoms must be specific and observable (not vague)" - - "root_cause must be the ACTUAL cause, not a symptom" - - "resolution_type must match one of the enum values" - - "severity must match one of the enum values" - - "tags should be lowercase, hyphen-separated" - -# Example valid front matter: -# --- -# module: Email Processing -# date: 2025-11-12 -# problem_type: performance_issue -# component: rails_model -# symptoms: -# - N+1 query when loading email threads -# - Brief generation taking >5 seconds -# root_cause: missing_include -# rails_version: 7.1.2 -# resolution_type: code_fix -# severity: high -# tags: [n-plus-one, eager-loading, performance] -# --- -# -# Example DX issue front matter: -# --- -# module: Development Workflow -# date: 2025-11-13 -# problem_type: developer_experience -# component: development_workflow -# symptoms: -# - No example data for new feature in development -# - Rails db:seed doesn't demonstrate new capabilities -# root_cause: incomplete_setup -# rails_version: 7.1.2 -# resolution_type: seed_data_update -# severity: low -# tags: [seed-data, dx, workflow] -# --- diff --git a/.codex/skills/compound-engineering-every-style-editor/SKILL.md b/.codex/skills/compound-engineering-every-style-editor/SKILL.md index a4729d0cd1..b5a6d167d4 100644 --- a/.codex/skills/compound-engineering-every-style-editor/SKILL.md +++ b/.codex/skills/compound-engineering-every-style-editor/SKILL.md @@ -44,7 +44,7 @@ Review each paragraph systematically, checking for: - Word choice and usage (overused words, passive voice) - Adherence to Every style guide rules -Reference the complete [EVERY_WRITE_STYLE.md](./references/EVERY_WRITE_STYLE.md) for specific rules when in doubt. +Reference the complete style guide at `references/EVERY_WRITE_STYLE.md` for specific rules when in doubt. ### Step 3: Mechanical Review @@ -99,7 +99,7 @@ FINAL RECOMMENDATIONS ## Style Guide Reference -The complete Every style guide is included in [EVERY_WRITE_STYLE.md](./references/EVERY_WRITE_STYLE.md). Key areas to focus on: +The complete Every style guide is at `references/EVERY_WRITE_STYLE.md`. Key areas to focus on: - **Quick Rules**: Title case for headlines, sentence case elsewhere - **Tone**: Active voice, avoid overused words (actually, very, just), be specific diff --git a/.codex/skills/deepen-plan/SKILL.md b/.codex/skills/deepen-plan/SKILL.md deleted file mode 100644 index 6c3204f04a..0000000000 --- a/.codex/skills/deepen-plan/SKILL.md +++ /dev/null @@ -1,409 +0,0 @@ ---- -name: deepen-plan -description: Stress-test an existing implementation plan and selectively strengthen weak sections with targeted research. Use when a plan needs more confidence around decisions, sequencing, system-wide impact, risks, or verification. Best for Standard or Deep plans, or high-risk topics such as auth, payments, migrations, external APIs, and security. For structural or clarity improvements, prefer document-review instead. -argument-hint: '[path to plan file]' ---- - -# Deepen Plan - -## Introduction - -**Note: The current year is 2026.** Use this when searching for recent documentation and best practices. - -`ce:plan` does the first planning pass. `deepen-plan` is a second-pass confidence check. - -Use this skill when the plan already exists and the question is not "Is this document clear?" but rather "Is this plan grounded enough for the complexity and risk involved?" - -This skill does **not** turn plans into implementation scripts. It identifies weak sections, runs targeted research only for those sections, and strengthens the plan in place. - -`document-review` and `deepen-plan` are different: -- Use the `document-review` skill when the document needs clarity, simplification, completeness, or scope control -- Use `deepen-plan` when the document is structurally sound but still needs stronger rationale, sequencing, risk treatment, or system-wide thinking - -## Interaction Method - -Use the platform's question tool when available. When asking the user a question, prefer the platform's blocking question tool if one exists (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). Otherwise, present numbered options in chat and wait for the user's reply before proceeding. - -Ask one question at a time. Prefer a concise single-select choice when natural options exist. - -## Plan File - - #$ARGUMENTS - -If the plan path above is empty: -1. Check `docs/plans/` for recent files -2. Ask the user which plan to deepen using the platform's blocking question tool when available (see Interaction Method). Otherwise, present numbered options in chat and wait for the user's reply before proceeding - -Do not proceed until you have a valid plan file path. - -## Core Principles - -1. **Stress-test, do not inflate** - Deepening should increase justified confidence, not make the plan longer for its own sake. -2. **Selective depth only** - Focus on the weakest 2-5 sections rather than enriching everything. -3. **Prefer the simplest execution mode** - Use direct agent synthesis by default. Switch to artifact-backed research only when the selected research scope is large enough that returning all findings inline would create avoidable context pressure. -4. **Preserve the planning boundary** - No implementation code, no git command choreography, no exact test command recipes. -5. **Use artifact-contained evidence** - Work from the written plan, its `Context & Research`, `Sources & References`, and its origin document when present. -6. **Respect product boundaries** - Do not invent new product requirements. If deepening reveals a product-level gap, surface it as an open question or route back to `ce:brainstorm`. -7. **Prioritize risk and cross-cutting impact** - The more dangerous or interconnected the work, the more valuable another planning pass becomes. - -## Workflow - -### Phase 0: Load the Plan and Decide Whether Deepening Is Warranted - -#### 0.1 Read the Plan and Supporting Inputs - -Read the plan file completely. - -If the plan frontmatter includes an `origin:` path: -- Read the origin document too -- Use it to check whether the plan still reflects the product intent, scope boundaries, and success criteria - -#### 0.2 Classify Plan Depth and Topic Risk - -Determine the plan depth from the document: -- **Lightweight** - small, bounded, low ambiguity, usually 2-4 implementation units -- **Standard** - moderate complexity, some technical decisions, usually 3-6 units -- **Deep** - cross-cutting, high-risk, or strategically important work, usually 4-8 units or phased delivery - -Also build a risk profile. Treat these as high-risk signals: -- Authentication, authorization, or security-sensitive behavior -- Payments, billing, or financial flows -- Data migrations, backfills, or persistent data changes -- External APIs or third-party integrations -- Privacy, compliance, or user data handling -- Cross-interface parity or multi-surface behavior -- Significant rollout, monitoring, or operational concerns - -#### 0.3 Decide Whether to Deepen - -Use this default: -- **Lightweight** plans usually do not need deepening unless they are high-risk or the user explicitly requests it -- **Standard** plans often benefit when one or more important sections still look thin -- **Deep** or high-risk plans often benefit from a targeted second pass - -If the plan already appears sufficiently grounded: -- Say so briefly -- Recommend moving to `/ce:work` or the `document-review` skill -- If the user explicitly asked to deepen anyway, continue with a light pass and deepen at most 1-2 sections - -### Phase 1: Parse the Current `ce:plan` Structure - -Map the plan into the current template. Look for these sections, or their nearest equivalents: -- `Overview` -- `Problem Frame` -- `Requirements Trace` -- `Scope Boundaries` -- `Context & Research` -- `Key Technical Decisions` -- `Open Questions` -- `High-Level Technical Design` (optional overview — pseudo-code, DSL grammar, mermaid diagram, or data flow) -- `Implementation Units` (may include per-unit `Technical design` subsections) -- `System-Wide Impact` -- `Risks & Dependencies` -- `Documentation / Operational Notes` -- `Sources & References` -- Optional deep-plan sections such as `Alternative Approaches Considered`, `Success Metrics`, `Phased Delivery`, `Risk Analysis & Mitigation`, and `Operational / Rollout Notes` - -If the plan was written manually or uses different headings: -- Map sections by intent rather than exact heading names -- If a section is structurally present but titled differently, treat it as the equivalent section -- If the plan truly lacks a section, decide whether that absence is intentional for the plan depth or a confidence gap worth scoring - -Also collect: -- Frontmatter, including existing `deepened:` date if present -- Number of implementation units -- Which files and test files are named -- Which learnings, patterns, or external references are cited -- Which sections appear omitted because they were unnecessary versus omitted because they are missing - -### Phase 2: Score Confidence Gaps - -Use a checklist-first, risk-weighted scoring pass. - -For each section, compute: -- **Trigger count** - number of checklist problems that apply -- **Risk bonus** - add 1 if the topic is high-risk and this section is materially relevant to that risk -- **Critical-section bonus** - add 1 for `Key Technical Decisions`, `Implementation Units`, `System-Wide Impact`, `Risks & Dependencies`, or `Open Questions` in `Standard` or `Deep` plans - -Treat a section as a candidate if: -- it hits **2+ total points**, or -- it hits **1+ point** in a high-risk domain and the section is materially important - -Choose only the top **2-5** sections by score. If the user explicitly asked to deepen a lightweight plan, cap at **1-2** sections unless the topic is high-risk. - -Example: -- A `Key Technical Decisions` section with 1 checklist trigger and the critical-section bonus scores **2 points** and is a candidate -- A `Risks & Dependencies` section with 1 checklist trigger in a high-risk migration plan also becomes a candidate because the risk bonus applies - -If the plan already has a `deepened:` date: -- Prefer sections that have not yet been substantially strengthened, if their scores are comparable -- Revisit an already-deepened section only when it still scores clearly higher than alternatives or the user explicitly asks for another pass on it - -#### 2.1 Section Checklists - -Use these triggers. - -**Requirements Trace** -- Requirements are vague or disconnected from implementation units -- Success criteria are missing or not reflected downstream -- Units do not clearly advance the traced requirements -- Origin requirements are not clearly carried forward - -**Context & Research / Sources & References** -- Relevant repo patterns are named but never used in decisions or implementation units -- Cited learnings or references do not materially shape the plan -- High-risk work lacks appropriate external or internal grounding -- Research is generic instead of tied to this repo or this plan - -**Key Technical Decisions** -- A decision is stated without rationale -- Rationale does not explain tradeoffs or rejected alternatives -- The decision does not connect back to scope, requirements, or origin context -- An obvious design fork exists but the plan never addresses why one path won - -**Open Questions** -- Product blockers are hidden as assumptions -- Planning-owned questions are incorrectly deferred to implementation -- Resolved questions have no clear basis in repo context, research, or origin decisions -- Deferred items are too vague to be useful later - -**High-Level Technical Design (when present)** -- The sketch uses the wrong medium for the work (e.g., pseudo-code where a sequence diagram would communicate better) -- The sketch contains implementation code (imports, exact signatures, framework-specific syntax) rather than pseudo-code -- The non-prescriptive framing is missing or weak -- The sketch does not connect to the key technical decisions or implementation units - -**High-Level Technical Design (when absent)** *(Standard or Deep plans only)* -- The work involves DSL design, API surface design, multi-component integration, complex data flow, or state-heavy lifecycle -- Key technical decisions would be easier to validate with a visual or pseudo-code representation -- The approach section of implementation units is thin and a higher-level technical design would provide context - -**Implementation Units** -- Dependency order is unclear or likely wrong -- File paths or test file paths are missing where they should be explicit -- Units are too large, too vague, or broken into micro-steps -- Approach notes are thin or do not name the pattern to follow -- Test scenarios or verification outcomes are vague - -**System-Wide Impact** -- Affected interfaces, callbacks, middleware, entry points, or parity surfaces are missing -- Failure propagation is underexplored -- State lifecycle, caching, or data integrity risks are absent where relevant -- Integration coverage is weak for cross-layer work - -**Risks & Dependencies / Documentation / Operational Notes** -- Risks are listed without mitigation -- Rollout, monitoring, migration, or support implications are missing when warranted -- External dependency assumptions are weak or unstated -- Security, privacy, performance, or data risks are absent where they obviously apply - -Use the plan's own `Context & Research` and `Sources & References` as evidence. If those sections cite a pattern, learning, or risk that never affects decisions, implementation units, or verification, treat that as a confidence gap. - -### Phase 3: Select Targeted Research Agents - -For each selected section, choose the smallest useful agent set. Do **not** run every agent. Use at most **1-3 agents per section** and usually no more than **8 agents total**. - -Use fully-qualified agent names inside Task calls. - -#### 3.1 Deterministic Section-to-Agent Mapping - -**Requirements Trace / Open Questions classification** -- `compound-engineering:workflow:spec-flow-analyzer` for missing user flows, edge cases, and handoff gaps -- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for repo-grounded patterns, conventions, and implementation reality checks - -**Context & Research / Sources & References gaps** -- `compound-engineering:research:learnings-researcher` for institutional knowledge and past solved problems -- `compound-engineering:research:framework-docs-researcher` for official framework or library behavior -- `compound-engineering:research:best-practices-researcher` for current external patterns and industry guidance -- Add `compound-engineering:research:git-history-analyzer` only when historical rationale or prior art is materially missing - -**Key Technical Decisions** -- `compound-engineering:review:architecture-strategist` for design integrity, boundaries, and architectural tradeoffs -- Add `compound-engineering:research:framework-docs-researcher` or `compound-engineering:research:best-practices-researcher` when the decision needs external grounding beyond repo evidence - -**High-Level Technical Design** -- `compound-engineering:review:architecture-strategist` for validating that the technical design accurately represents the intended approach and identifying gaps -- `compound-engineering:research:repo-research-analyst` (Scope: `architecture, patterns`) for grounding the technical design in existing repo patterns and conventions -- Add `compound-engineering:research:best-practices-researcher` when the technical design involves a DSL, API surface, or pattern that benefits from external validation - -**Implementation Units / Verification** -- `compound-engineering:research:repo-research-analyst` (Scope: `patterns`) for concrete file targets, patterns to follow, and repo-specific sequencing clues -- `compound-engineering:review:pattern-recognition-specialist` for consistency, duplication risks, and alignment with existing patterns -- Add `compound-engineering:workflow:spec-flow-analyzer` when sequencing depends on user flow or handoff completeness - -**System-Wide Impact** -- `compound-engineering:review:architecture-strategist` for cross-boundary effects, interface surfaces, and architectural knock-on impact -- Add the specific specialist that matches the risk: - - `compound-engineering:review:performance-oracle` for scalability, latency, throughput, and resource-risk analysis - - `compound-engineering:review:security-sentinel` for auth, validation, exploit surfaces, and security boundary review - - `compound-engineering:review:data-integrity-guardian` for migrations, persistent state safety, consistency, and data lifecycle risks - -**Risks & Dependencies / Operational Notes** -- Use the specialist that matches the actual risk: - - `compound-engineering:review:security-sentinel` for security, auth, privacy, and exploit risk - - `compound-engineering:review:data-integrity-guardian` for persistent data safety, constraints, and transaction boundaries - - `compound-engineering:review:data-migration-expert` for migration realism, backfills, and production data transformation risk - - `compound-engineering:review:deployment-verification-agent` for rollout checklists, rollback planning, and launch verification - - `compound-engineering:review:performance-oracle` for capacity, latency, and scaling concerns - -#### 3.2 Agent Prompt Shape - -For each selected section, pass: -- The scope prefix from section 3.1 (e.g., `Scope: architecture, patterns.`) when the agent supports scoped invocation -- A short plan summary -- The exact section text -- Why the section was selected, including which checklist triggers fired -- The plan depth and risk profile -- A specific question to answer - -Instruct the agent to return: -- findings that change planning quality -- stronger rationale, sequencing, verification, risk treatment, or references -- no implementation code -- no shell commands - -#### 3.3 Choose Research Execution Mode - -Use the lightest mode that will work: - -- **Direct mode** - Default. Use when the selected section set is small and the parent can safely read the agent outputs inline. -- **Artifact-backed mode** - Use only when the selected research scope is large enough that inline returns would create unnecessary context pressure. - -Signals that justify artifact-backed mode: -- More than 5 agents are likely to return meaningful findings -- The selected section excerpts are long enough that repeating them in multiple agent outputs would be wasteful -- The topic is high-risk and likely to attract bulky source-backed analysis -- The platform has a history of parent-context instability on large parallel returns - -If artifact-backed mode is not clearly warranted, stay in direct mode. - -### Phase 4: Run Targeted Research and Review - -Launch the selected agents in parallel using the execution mode chosen in Step 3.3. If the current platform does not support parallel dispatch, run them sequentially instead. - -Prefer local repo and institutional evidence first. Use external research only when the gap cannot be closed responsibly from repo context or already-cited sources. - -If a selected section can be improved by reading the origin document more carefully, do that before dispatching external agents. - -#### 4.1 Direct Mode - -Have each selected agent return its findings directly to the parent. - -Keep the return payload focused: -- strongest findings only -- the evidence or sources that matter -- the concrete planning improvement implied by the finding - -If a direct-mode agent starts producing bulky or repetitive output, stop and switch the remaining research to artifact-backed mode instead of letting the parent context bloat. - -#### 4.2 Artifact-Backed Mode - -Use a per-run scratch directory under `.context/compound-engineering/deepen-plan/`, for example `.context/compound-engineering/deepen-plan//` or `.context/compound-engineering/deepen-plan//`. - -Use the scratch directory only for the current deepening pass. - -For each selected agent: -- give it the same plan summary, section text, trigger rationale, depth, and risk profile described in Step 3.2 -- instruct it to write one compact artifact file for its assigned section or sections -- have it return only a short completion summary to the parent - -Prefer a compact markdown artifact unless machine-readable structure is clearly useful. Each artifact should contain: -- target section id and title -- why the section was selected -- 3-7 findings that materially improve planning quality -- source-backed rationale, including whether the evidence came from repo context, origin context, institutional learnings, official docs, or external best practices -- the specific plan change implied by each finding -- any unresolved tradeoff that should remain explicit in the plan - -Artifact rules: -- no implementation code -- no shell commands -- no checkpoint logs or self-diagnostics -- no duplicated boilerplate across files -- no judge or merge sub-pipeline - -Before synthesis: -- quickly verify that each selected section has at least one usable artifact -- if an artifact is missing or clearly malformed, re-run that agent or fall back to direct-mode reasoning for that section instead of building a validation pipeline - -If agent outputs conflict: -- Prefer repo-grounded and origin-grounded evidence over generic advice -- Prefer official framework documentation over secondary best-practice summaries when the conflict is about library behavior -- If a real tradeoff remains, record it explicitly in the plan rather than pretending the conflict does not exist - -### Phase 5: Synthesize and Rewrite the Plan - -Strengthen only the selected sections. Keep the plan coherent and preserve its overall structure. - -If artifact-backed mode was used: -- read the plan, origin document if present, and the selected section artifacts -- also incorporate any findings already returned inline from direct-mode agents before a mid-run switch, so early results are not silently dropped -- synthesize in one pass -- do not create a separate judge, merge, or quality-review phase unless the user explicitly asks for another pass - -Allowed changes: -- Clarify or strengthen decision rationale -- Tighten requirements trace or origin fidelity -- Reorder or split implementation units when sequencing is weak -- Add missing pattern references, file/test paths, or verification outcomes -- Expand system-wide impact, risks, or rollout treatment where justified -- Reclassify open questions between `Resolved During Planning` and `Deferred to Implementation` when evidence supports the change -- Strengthen, replace, or add a High-Level Technical Design section when the work warrants it and the current representation is weak, uses the wrong medium, or is absent where it would help. Preserve the non-prescriptive framing -- Strengthen or add per-unit technical design fields where the unit's approach is non-obvious and the current approach notes are thin -- Add an optional deep-plan section only when it materially improves execution quality -- Add or update `deepened: YYYY-MM-DD` in frontmatter when the plan was substantively improved - -Do **not**: -- Add implementation code — no imports, exact method signatures, or framework-specific syntax. Pseudo-code sketches and DSL grammars are allowed in both the top-level High-Level Technical Design section and per-unit technical design fields -- Add git commands, commit choreography, or exact test command recipes -- Add generic `Research Insights` subsections everywhere -- Rewrite the entire plan from scratch -- Invent new product requirements, scope changes, or success criteria without surfacing them explicitly - -If research reveals a product-level ambiguity that should change behavior or scope: -- Do not silently decide it here -- Record it under `Open Questions` -- Recommend `ce:brainstorm` if the gap is truly product-defining - -### Phase 6: Final Checks and Write the File - -Before writing: -- Confirm the plan is stronger in specific ways, not merely longer -- Confirm the planning boundary is intact -- Confirm the selected sections were actually the weakest ones -- Confirm origin decisions were preserved when an origin document exists -- Confirm the final plan still feels right-sized for its depth -- If artifact-backed mode was used, confirm the scratch artifacts did not become a second hidden plan format - -Update the plan file in place by default. - -If the user explicitly requests a separate file, append `-deepened` before `.md`, for example: -- `docs/plans/2026-03-15-001-feat-example-plan-deepened.md` - -If artifact-backed mode was used and the user did not ask to inspect the scratch files: -- clean up the temporary scratch directory after the plan is safely written -- if cleanup is not practical on the current platform, say where the artifacts were left and that they are temporary workflow output - -## Post-Enhancement Options - -If substantive changes were made, present next steps using the platform's blocking question tool when available (see Interaction Method). Otherwise, present numbered options in chat and wait for the user's reply before proceeding. - -**Question:** "Plan deepened at `[plan_path]`. What would you like to do next?" - -**Options:** -1. **View diff** - Show what changed -2. **Run `document-review` skill** - Improve the updated plan through structured document review -3. **Start `ce:work` skill** - Begin implementing the plan -4. **Deepen specific sections further** - Run another targeted deepening pass on named sections - -Based on selection: -- **View diff** -> Show the important additions and changed sections -- **`document-review` skill** -> Load the `document-review` skill with the plan path -- **Start `ce:work` skill** -> Call the `ce:work` skill with the plan path -- **Deepen specific sections further** -> Ask which sections still feel weak and run another targeted pass only for those sections - -If no substantive changes were warranted: -- Say that the plan already appears sufficiently grounded -- Offer the `document-review` skill or `/ce:work` as the next step instead - -NEVER CODE! Research, challenge, and strengthen the plan. diff --git a/.codex/skills/dhh-rails-style/SKILL.md b/.codex/skills/dhh-rails-style/SKILL.md index 326440f5ef..107111f0b5 100644 --- a/.codex/skills/dhh-rails-style/SKILL.md +++ b/.codex/skills/dhh-rails-style/SKILL.md @@ -57,12 +57,12 @@ What are you working on? | Response | Reference to Read | |----------|-------------------| -| 1, controller | [controllers.md](./references/controllers.md) | -| 2, model | [models.md](./references/models.md) | -| 3, view, frontend, turbo, stimulus, css | [frontend.md](./references/frontend.md) | -| 4, architecture, routing, auth, job, cache | [architecture.md](./references/architecture.md) | -| 5, test, testing, minitest, fixture | [testing.md](./references/testing.md) | -| 6, gem, dependency, library | [gems.md](./references/gems.md) | +| 1, controller | `references/controllers.md` | +| 2, model | `references/models.md` | +| 3, view, frontend, turbo, stimulus, css | `references/frontend.md` | +| 4, architecture, routing, auth, job, cache | `references/architecture.md` | +| 5, test, testing, minitest, fixture | `references/testing.md` | +| 6, gem, dependency, library | `references/gems.md` | | 7, review | Read all references, then review code | | 8, general task | Read relevant references based on context | @@ -153,12 +153,12 @@ All detailed patterns in `references/`: | File | Topics | |------|--------| -| [controllers.md](./references/controllers.md) | REST mapping, concerns, Turbo responses, API patterns, HTTP caching | -| [models.md](./references/models.md) | Concerns, state records, callbacks, scopes, POROs, authorization, broadcasting | -| [frontend.md](./references/frontend.md) | Turbo Streams, Stimulus controllers, CSS layers, OKLCH colors, partials | -| [architecture.md](./references/architecture.md) | Routing, authentication, jobs, Current attributes, caching, database patterns | -| [testing.md](./references/testing.md) | Minitest, fixtures, unit/integration/system tests, testing patterns | -| [gems.md](./references/gems.md) | What they use vs avoid, decision framework, Gemfile examples | +| `references/controllers.md` | REST mapping, concerns, Turbo responses, API patterns, HTTP caching | +| `references/models.md` | Concerns, state records, callbacks, scopes, POROs, authorization, broadcasting | +| `references/frontend.md` | Turbo Streams, Stimulus controllers, CSS layers, OKLCH colors, partials | +| `references/architecture.md` | Routing, authentication, jobs, Current attributes, caching, database patterns | +| `references/testing.md` | Minitest, fixtures, unit/integration/system tests, testing patterns | +| `references/gems.md` | What they use vs avoid, decision framework, Gemfile examples | diff --git a/.codex/skills/document-review/SKILL.md b/.codex/skills/document-review/SKILL.md index ca83d4759c..468ced8791 100644 --- a/.codex/skills/document-review/SKILL.md +++ b/.codex/skills/document-review/SKILL.md @@ -48,6 +48,12 @@ Analyze the document content to determine which conditional personas to activate - Scope boundary language that seems misaligned with stated goals - Goals that don't clearly connect to requirements +**adversarial** -- activate when the document contains: +- More than 5 distinct requirements or implementation units +- Explicit architectural or scope decisions with stated rationale +- High-stakes domains (auth, payments, data migrations, external integrations) +- Proposals of new abstractions, frameworks, or significant architectural patterns + ## Phase 2: Announce and Dispatch Personas ### Announce the Review Team @@ -73,15 +79,16 @@ Add activated conditional personas: - `compound-engineering:document-review:design-lens-reviewer` - `compound-engineering:document-review:security-lens-reviewer` - `compound-engineering:document-review:scope-guardian-reviewer` +- `compound-engineering:document-review:adversarial-document-reviewer` ### Dispatch -Dispatch all agents in **parallel** using the platform's task/agent tool (e.g., Agent tool in Claude Code, spawn in Codex). Each agent receives the prompt built from the [subagent template](./references/subagent-template.md) with these variables filled: +Dispatch all agents in **parallel** using the platform's task/agent tool (e.g., Agent tool in Claude Code, spawn in Codex). Each agent receives the prompt built from the subagent template included below with these variables filled: | Variable | Value | |----------|-------| | `{persona_file}` | Full content of the agent's markdown file | -| `{schema}` | Content of [findings-schema.json](./references/findings-schema.json) | +| `{schema}` | Content of the findings schema included below | | `{document_type}` | "requirements" or "plan" from Phase 1 classification | | `{document_path}` | Path to the document | | `{document_content}` | Full text of the document | @@ -90,7 +97,7 @@ Pass each agent the **full document** -- do not split into sections. **Error handling:** If an agent fails or times out, proceed with findings from agents that completed. Note the failed agent in the Coverage section. Do not block the entire review on a single agent failure. -**Dispatch limit:** Even at maximum (6 agents), use parallel dispatch. These are document reviewers with bounded scope reading a single document -- parallel is safe and fast. +**Dispatch limit:** Even at maximum (7 agents), use parallel dispatch. These are document reviewers with bounded scope reading a single document -- parallel is safe and fast. ## Phase 3: Synthesize Findings @@ -98,7 +105,7 @@ Process findings from all agents through this pipeline. **Order matters** -- eac ### 3.1 Validate -Check each agent's returned JSON against [findings-schema.json](./references/findings-schema.json): +Check each agent's returned JSON against the findings schema included below: - Drop findings missing any required field defined in the schema - Drop findings with invalid enum values - Note the agent name for any malformed output in the Coverage section @@ -118,14 +125,15 @@ When fingerprints match across personas: ### 3.4 Promote Residual Concerns Scan the residual concerns (findings suppressed in 3.2) for: -- **Cross-persona corroboration**: A residual concern from Persona A overlaps with an above-threshold finding from Persona B. Promote at P2 with confidence 0.55-0.65. -- **Concrete blocking risks**: A residual concern describes a specific, concrete risk that would block implementation. Promote at P2 with confidence 0.55. +- **Cross-persona corroboration**: A residual concern from Persona A overlaps with an above-threshold finding from Persona B. Promote at P2 with confidence 0.55-0.65. Inherit `finding_type` from the corroborating above-threshold finding. +- **Concrete blocking risks**: A residual concern describes a specific, concrete risk that would block implementation. Promote at P2 with confidence 0.55. Set `finding_type: omission` (blocking risks surfaced as residual concerns are inherently about something the document failed to address). ### 3.5 Resolve Contradictions When personas disagree on the same section: - Create a **combined finding** presenting both perspectives - Set `autofix_class: present` +- Set `finding_type: error` (contradictions are by definition about conflicting things the document says, not things it omits) - Frame as a tradeoff, not a verdict Specific conflict patterns: @@ -137,14 +145,17 @@ Specific conflict patterns: | Autofix Class | Route | |---------------|-------| -| `auto` | Apply automatically -- local deterministic fix (terminology, formatting, cross-references) | -| `present` | Present to user for judgment | +| `auto` | Apply automatically -- local deterministic fix (terminology, formatting, cross-references, completeness corrections where the correct value is verifiable from the document itself) | +| `batch_confirm` | Group for single batch approval -- obvious fixes that touch meaning but have one clear correct answer | +| `present` | Present individually for user judgment | + +Demote any `auto` finding that lacks a `suggested_fix` to `batch_confirm`. Demote any `batch_confirm` finding that lacks a `suggested_fix` to `present`. -Demote any `auto` finding that lacks a `suggested_fix` to `present` -- the orchestrator cannot apply a fix without concrete replacement text. +**Completeness corrections eligible for `auto`:** A finding qualifies when the correct fix is deterministically derivable from other content in the document. Examples: a count says "6 units" but the document lists 7, a summary omits an item that appears in the detailed list, a cross-reference points to a renamed section. If the fix requires judgment about *what* to add (not just *that* something is missing), it belongs in `batch_confirm` or `present`. ### 3.7 Sort -Sort findings for presentation: P0 -> P1 -> P2 -> P3, then by confidence (descending), then by document order (section position). +Sort findings for presentation: P0 -> P1 -> P2 -> P3, then by finding type (errors before omissions), then by confidence (descending), then by document order (section position). ## Phase 4: Apply and Present @@ -153,17 +164,28 @@ Sort findings for presentation: P0 -> P1 -> P2 -> P3, then by confidence (descen Apply all `auto` findings to the document in a **single pass**: - Edit the document inline using the platform's edit tool - Track what was changed for the "Auto-fixes Applied" section -- Do not ask for approval -- these are unambiguously correct (terminology fixes, formatting, cross-references) +- Do not ask for approval -- these are unambiguously correct + +### Batch Confirm + +If any `batch_confirm` findings exist, present them as a group for a single approval: +- List the proposed fixes in a numbered table +- Use the platform's blocking question tool (AskUserQuestion in Claude Code, request_user_input in Codex, ask_user in Gemini) to ask: "Apply these N fixes? (yes/no/select)". If no blocking question tool is available, present the table with numbered options and wait for the user's reply before proceeding. +- If approved, apply all in a single pass +- If "select", let the user pick which to apply +- If rejected, demote remaining to the `present` findings list + +This turns N obvious-but-meaning-touching fixes into 1 interaction instead of N. ### Present Remaining Findings -Present all other findings to the user using the format from [review-output-template.md](./references/review-output-template.md): -- Group by severity (P0 -> P3) -- Include the Coverage table showing which personas ran -- Show auto-fixes that were applied -- Include residual concerns and deferred questions if any +Present `present` findings using the review output template included below. Within each severity level, separate findings by type: +- **Errors** (design tensions, contradictions, incorrect statements) first -- these need resolution +- **Omissions** (missing steps, absent details, forgotten entries) second -- these need additions + +Brief summary at the top: "Applied N auto-fixes. Batched M fixes for approval. K findings to consider (X errors, Y omissions)." -Brief summary at the top: "Applied N auto-fixes. M findings to consider (X at P0/P1)." +Include the Coverage table, auto-fixes applied, residual concerns, and deferred questions. ### Protected Artifacts @@ -193,8 +215,24 @@ Return "Review complete" as the terminal signal for callers. - Do not add new sections or requirements the user didn't discuss - Do not over-engineer or add complexity - Do not create separate review files or add metadata sections -- Do not modify any of the 4 caller skills (ce-brainstorm, ce-plan, ce-plan-beta, deepen-plan-beta) +- Do not modify any of the 2 caller skills (ce-brainstorm, ce-plan) ## Iteration Guidance On subsequent passes, re-dispatch personas and re-synthesize. The auto-fix mechanism and confidence gating prevent the same findings from recurring once fixed. If findings are repetitive across passes, recommend completion. + +--- + +## Included References + +### Subagent Template + +@./references/subagent-template.md + +### Findings Schema + +@./references/findings-schema.json + +### Review Output Template + +@./references/review-output-template.md diff --git a/.codex/skills/document-review/references/findings-schema.json b/.codex/skills/document-review/references/findings-schema.json index cb9a6295c2..5ec70c32df 100644 --- a/.codex/skills/document-review/references/findings-schema.json +++ b/.codex/skills/document-review/references/findings-schema.json @@ -19,6 +19,7 @@ "severity", "section", "why_it_matters", + "finding_type", "autofix_class", "confidence", "evidence" @@ -44,8 +45,13 @@ }, "autofix_class": { "type": "string", - "enum": ["auto", "present"], - "description": "How this issue should be handled. auto = local deterministic fix the orchestrator can apply without asking (terminology, formatting, cross-references). present = requires user judgment." + "enum": ["auto", "batch_confirm", "present"], + "description": "How this issue should be handled. auto = local deterministic fix applied silently (terminology, formatting, cross-references, completeness corrections). batch_confirm = obvious fix with a clear correct answer, but touches meaning enough to warrant grouped approval. present = requires individual user judgment." + }, + "finding_type": { + "type": "string", + "enum": ["error", "omission"], + "description": "Whether the finding is a mistake in what the document says (error) or something the document forgot to say (omission). Errors are design tensions, contradictions, or incorrect statements. Omissions are missing mechanical steps, forgotten list entries, or absent details." }, "suggested_fix": { "type": ["string", "null"], @@ -91,8 +97,13 @@ "P3": "Minor improvement. User's discretion." }, "autofix_classes": { - "auto": "Local, deterministic document fix: terminology consistency, formatting, cross-reference correction. Must be unambiguous and not change the document's meaning.", - "present": "Requires user judgment -- strategic questions, tradeoffs, meaning-changing fixes, or informational findings." + "auto": "Local, deterministic document fix: terminology consistency, formatting, cross-reference correction, completeness corrections (wrong counts, missing list entries, undefined values where the correct value is verifiable from elsewhere in the document). Must be unambiguous.", + "batch_confirm": "Obvious fix with a clear correct answer, but touches document meaning enough to warrant user awareness. Grouped with other batch_confirm findings for a single approval rather than individual review. Examples: adding a missing implementation step that is mechanically implied, updating a section summary to reflect its own contents.", + "present": "Requires individual user judgment -- strategic questions, design tradeoffs, meaning-changing fixes, or findings where reasonable people could disagree on the right action." + }, + "finding_types": { + "error": "Something the document says that is wrong -- contradictions, incorrect statements, design tensions, incoherent tradeoffs. These are mistakes in what exists.", + "omission": "Something the document forgot to say -- missing mechanical steps, absent list entries, undefined thresholds, forgotten cross-references. These are gaps in completeness." } } } diff --git a/.codex/skills/document-review/references/review-output-template.md b/.codex/skills/document-review/references/review-output-template.md index 21b03f80ae..4e585d9a32 100644 --- a/.codex/skills/document-review/references/review-output-template.md +++ b/.codex/skills/document-review/references/review-output-template.md @@ -15,35 +15,52 @@ Use this **exact format** when presenting synthesized review findings. Findings - security-lens -- plan adds public API endpoint with auth flow - scope-guardian -- plan has 15 requirements across 3 priority levels +Applied 3 auto-fixes. Batched 2 fixes for approval. 4 findings to consider (2 errors, 2 omissions). + ### Auto-fixes Applied -- Standardized "pipeline"/"workflow" terminology to "pipeline" throughout (coherence, auto) -- Fixed cross-reference: Section 4 referenced "Section 3.2" which is actually "Section 3.1" (coherence, auto) +- Standardized "pipeline"/"workflow" terminology to "pipeline" throughout (coherence) +- Fixed cross-reference: Section 4 referenced "Section 3.2" which is actually "Section 3.1" (coherence) +- Updated unit count from "6 units" to "7 units" to match listed units (coherence) + +### Batch Confirm + +These fixes have one clear correct answer but touch document meaning. Apply all? + +| # | Section | Fix | Reviewer | +|---|---------|-----|----------| +| 1 | Unit 4 | Add "update API rate-limit config" step -- implied by Unit 3's rate-limit introduction | feasibility | +| 2 | Verification | Add auth token refresh to test scenarios -- required by Unit 2's token expiry handling | security-lens | ### P0 -- Must Fix -| # | Section | Issue | Reviewer | Confidence | Route | -|---|---------|-------|----------|------------|-------| -| 1 | Requirements Trace | Goal states "offline support" but technical approach assumes persistent connectivity | coherence | 0.92 | `present` | +#### Errors + +| # | Section | Issue | Reviewer | Confidence | +|---|---------|-------|----------|------------| +| 1 | Requirements Trace | Goal states "offline support" but technical approach assumes persistent connectivity | coherence | 0.92 | ### P1 -- Should Fix -| # | Section | Issue | Reviewer | Confidence | Route | -|---|---------|-------|----------|------------|-------| -| 2 | Implementation Unit 3 | Plan proposes custom auth when codebase already uses Devise | feasibility | 0.85 | `present` | -| 3 | Scope Boundaries | 8 of 12 units build admin infrastructure; only 2 touch stated goal | scope-guardian | 0.80 | `present` | +#### Errors -### P2 -- Consider Fixing +| # | Section | Issue | Reviewer | Confidence | +|---|---------|-------|----------|------------| +| 2 | Scope Boundaries | 8 of 12 units build admin infrastructure; only 2 touch stated goal | scope-guardian | 0.80 | + +#### Omissions -| # | Section | Issue | Reviewer | Confidence | Route | -|---|---------|-------|----------|------------|-------| -| 4 | API Design | Public webhook endpoint has no rate limiting mentioned | security-lens | 0.75 | `present` | +| # | Section | Issue | Reviewer | Confidence | +|---|---------|-------|----------|------------| +| 3 | Implementation Unit 3 | Plan proposes custom auth but does not mention existing Devise setup or migration path | feasibility | 0.85 | + +### P2 -- Consider Fixing -### P3 -- Minor +#### Omissions -| # | Section | Issue | Reviewer | Confidence | Route | -|---|---------|-------|----------|------------|-------| -| 5 | Overview | "Service" used to mean both microservice and business class | coherence | 0.65 | `auto` | +| # | Section | Issue | Reviewer | Confidence | +|---|---------|-------|----------|------------| +| 4 | API Design | Public webhook endpoint has no rate limiting mentioned | security-lens | 0.75 | ### Residual Concerns @@ -59,20 +76,22 @@ Use this **exact format** when presenting synthesized review findings. Findings ### Coverage -| Persona | Status | Findings | Residual | -|---------|--------|----------|----------| -| coherence | completed | 2 | 0 | -| feasibility | completed | 1 | 1 | -| security-lens | completed | 1 | 0 | -| scope-guardian | completed | 1 | 0 | -| product-lens | not activated | -- | -- | -| design-lens | not activated | -- | -- | +| Persona | Status | Findings | Auto | Batch | Present | Residual | +|---------|--------|----------|------|-------|---------|----------| +| coherence | completed | 3 | 2 | 0 | 1 | 0 | +| feasibility | completed | 2 | 0 | 1 | 1 | 1 | +| security-lens | completed | 2 | 0 | 1 | 1 | 0 | +| scope-guardian | completed | 1 | 0 | 0 | 1 | 0 | +| product-lens | not activated | -- | -- | -- | -- | -- | +| design-lens | not activated | -- | -- | -- | -- | -- | ``` ## Section Rules +- **Summary line**: Always present after the reviewer list. Format: "Applied N auto-fixes. Batched M fixes for approval. K findings to consider (X errors, Y omissions)." Omit any zero clause. - **Auto-fixes Applied**: List fixes that were applied automatically (auto class). Omit section if none. -- **P0-P3 sections**: Only include sections that have findings. Omit empty severity levels. +- **Batch Confirm**: Group `batch_confirm` findings for a single yes/no/select approval. Omit section if none. +- **P0-P3 sections**: Only include sections that have findings. Omit empty severity levels. Within each severity, separate into **Errors** and **Omissions** sub-headers. Omit a sub-header if that severity has none of that type. - **Residual Concerns**: Findings below confidence threshold that were promoted by cross-persona corroboration, plus unpromoted residual risks. Omit if none. - **Deferred Questions**: Questions for later workflow stages. Omit if none. -- **Coverage**: Always include. Shows which personas ran and their output counts. +- **Coverage**: Always include. Shows which personas ran and their output counts broken down by route (Auto, Batch, Present). diff --git a/.codex/skills/document-review/references/subagent-template.md b/.codex/skills/document-review/references/subagent-template.md index f21e0f1d13..870a8c21e1 100644 --- a/.codex/skills/document-review/references/subagent-template.md +++ b/.codex/skills/document-review/references/subagent-template.md @@ -22,9 +22,13 @@ Rules: - Suppress any finding below your stated confidence floor (see your Confidence calibration section). - Every finding MUST include at least one evidence item -- a direct quote from the document. - You are operationally read-only. Analyze the document and produce findings. Do not edit the document, create files, or make changes. You may use non-mutating tools (file reads, glob, grep, git log) to gather context about the codebase when evaluating feasibility or existing patterns. +- Set `finding_type` for every finding: + - `error`: Something the document says that is wrong -- contradictions, incorrect statements, design tensions, incoherent tradeoffs. + - `omission`: Something the document forgot to say -- missing mechanical steps, absent list entries, undefined thresholds, forgotten cross-references. - Set `autofix_class` conservatively: - - `auto`: Only for local, deterministic fixes -- terminology corrections, formatting fixes, cross-reference repairs. The fix must be unambiguous and not change the document's meaning. - - `present`: Everything else -- strategic questions, tradeoffs, meaning-changing fixes, informational findings. + - `auto`: Deterministic fixes where the correct value is verifiable from the document itself -- terminology corrections, formatting fixes, cross-reference repairs, wrong counts, missing list entries where the correct entry exists elsewhere in the document. The fix must be unambiguous. + - `batch_confirm`: Obvious fix with one clear correct answer, but it touches document meaning. Examples: adding a missing implementation step that is mechanically implied by other content, updating a summary to match its own details. Use when reasonable people would agree on the fix but it goes beyond cosmetic correction. + - `present`: Everything else -- strategic questions, tradeoffs, design tensions where reasonable people could disagree, informational findings. - `suggested_fix` is optional. Only include it when the fix is obvious and correct. For `present` findings, frame as a question instead. - If you find no issues, return an empty findings array. Still populate residual_risks and deferred_questions if applicable. - Use your suppress conditions. Do not flag issues that belong to other personas. diff --git a/.codex/skills/dspy-ruby/SKILL.md b/.codex/skills/dspy-ruby/SKILL.md index 577c72cfe0..734f3f5eea 100644 --- a/.codex/skills/dspy-ruby/SKILL.md +++ b/.codex/skills/dspy-ruby/SKILL.md @@ -647,14 +647,14 @@ end ## Resources -- [core-concepts.md](./references/core-concepts.md) — Signatures, modules, predictors, type system deep-dive -- [toolsets.md](./references/toolsets.md) — Tools::Base, Tools::Toolset DSL, type safety, testing -- [providers.md](./references/providers.md) — Provider adapters, RubyLLM, fiber-local LM context, compatibility matrix -- [optimization.md](./references/optimization.md) — MIPROv2, GEPA, evaluation framework, storage system -- [observability.md](./references/observability.md) — Event system, dspy-o11y gems, Langfuse, score reporting -- [signature-template.rb](./assets/signature-template.rb) — Signature scaffold with T::Enum, Date/Time, defaults, union types -- [module-template.rb](./assets/module-template.rb) — Module scaffold with .call(), lifecycle callbacks, fiber-local LM -- [config-template.rb](./assets/config-template.rb) — Rails initializer with RubyLLM, observability, feature flags +- `references/core-concepts.md` — Signatures, modules, predictors, type system deep-dive +- `references/toolsets.md` — Tools::Base, Tools::Toolset DSL, type safety, testing +- `references/providers.md` — Provider adapters, RubyLLM, fiber-local LM context, compatibility matrix +- `references/optimization.md` — MIPROv2, GEPA, evaluation framework, storage system +- `references/observability.md` — Event system, dspy-o11y gems, Langfuse, score reporting +- `assets/signature-template.rb` — Signature scaffold with T::Enum, Date/Time, defaults, union types +- `assets/module-template.rb` — Module scaffold with .call(), lifecycle callbacks, fiber-local LM +- `assets/config-template.rb` — Rails initializer with RubyLLM, observability, feature flags ## Key URLs diff --git a/.codex/skills/git-clean-gone-branches/SKILL.md b/.codex/skills/git-clean-gone-branches/SKILL.md index 4dbeb5858d..612af17547 100644 --- a/.codex/skills/git-clean-gone-branches/SKILL.md +++ b/.codex/skills/git-clean-gone-branches/SKILL.md @@ -19,7 +19,7 @@ bash scripts/clean-gone [scripts/clean-gone](./scripts/clean-gone) -The script runs `git fetch --prune` first, then parses `git branch -vv` for branches marked `: gone]`. It uses `command git` to bypass shell aliases and RTK proxies. +The script runs `git fetch --prune` first, then parses `git branch -vv` for branches marked `: gone]`. If the script outputs `__NONE__`, report that no stale branches were found and stop. @@ -45,9 +45,9 @@ This is a yes-or-no decision on the entire list -- do not offer multi-selection If the user confirms, delete each branch. For each branch: -1. Check if it has an associated worktree (`command git worktree list | grep "\\[$branch\\]"`) -2. If a worktree exists and is not the main repo root, remove it first: `command git worktree remove --force "$worktree_path"` -3. Delete the branch: `command git branch -D "$branch"` +1. Check if it has an associated worktree (`git worktree list | grep "\\[$branch\\]"`) +2. If a worktree exists and is not the main repo root, remove it first: `git worktree remove --force "$worktree_path"` +3. Delete the branch: `git branch -D "$branch"` Report results as you go: @@ -61,7 +61,3 @@ Cleaned up 3 branches. ``` If the user declines, acknowledge and stop without deleting anything. - -## Important: Use `command git` - -Always invoke git as `command git` in shell commands. This bypasses shell aliases and tools like RTK (Rust Token Killer) that proxy git commands, ensuring consistent behavior and output parsing. diff --git a/.codex/skills/git-clean-gone-branches/scripts/clean-gone b/.codex/skills/git-clean-gone-branches/scripts/clean-gone index 0dd81ba9f6..da5b92b60f 100755 --- a/.codex/skills/git-clean-gone-branches/scripts/clean-gone +++ b/.codex/skills/git-clean-gone-branches/scripts/clean-gone @@ -1,12 +1,11 @@ #!/usr/bin/env bash # clean-gone: List local branches whose remote tracking branch is gone. # Outputs one branch name per line, or nothing if none found. -# Uses `command git` to bypass aliases and RTK proxies. set -euo pipefail # Ensure we have current remote state -command git fetch --prune 2>/dev/null +git fetch --prune 2>/dev/null # Find branches marked [gone] in tracking info. # `git branch -vv` output format: @@ -37,7 +36,7 @@ while IFS= read -r line; do fi gone_branches+=("$branch_name") -done < <(command git branch -vv 2>/dev/null | grep ': gone]') +done < <(git branch -vv 2>/dev/null | grep ': gone]') if [[ ${#gone_branches[@]} -eq 0 ]]; then echo "__NONE__" diff --git a/.codex/skills/git-commit-push-pr/SKILL.md b/.codex/skills/git-commit-push-pr/SKILL.md index c86b36028d..be2c3176f7 100644 --- a/.codex/skills/git-commit-push-pr/SKILL.md +++ b/.codex/skills/git-commit-push-pr/SKILL.md @@ -1,35 +1,118 @@ --- name: git-commit-push-pr -description: Commit, push, and open a PR with an adaptive, value-first description. Use when the user says "commit and PR", "push and open a PR", "ship this", "create a PR", "open a pull request", "commit push PR", or wants to go from working changes to an open pull request in one step. Produces PR descriptions that scale in depth with the complexity of the change, avoiding cookie-cutter templates. +description: Commit, push, and open a PR with an adaptive, value-first description. Use when the user says "commit and PR", "push and open a PR", "ship this", "create a PR", "open a pull request", "commit push PR", or wants to go from working changes to an open pull request in one step. Also use when the user says "update the PR description", "refresh the PR description", "freshen the PR", or wants to rewrite an existing PR description. Produces PR descriptions that scale in depth with the complexity of the change, avoiding cookie-cutter templates. --- # Git Commit, Push, and PR -Go from working tree changes to an open pull request in a single workflow. The key differentiator of this skill is PR descriptions that communicate *value and intent* proportional to the complexity of the change. +Go from working tree changes to an open pull request in a single workflow, or update an existing PR description. The key differentiator of this skill is PR descriptions that communicate *value and intent* proportional to the complexity of the change. -## Workflow +## Mode detection + +If the user is asking to update, refresh, or rewrite an existing PR description (with no mention of committing or pushing), this is a **description-only update**. The user may also provide a focus for the update (e.g., "update the PR description and add the benchmarking results"). Note any focus instructions for use in DU-3. + +For description-only updates, follow the Description Update workflow below. Otherwise, follow the full workflow. + +--- + +## Description Update workflow + +### DU-1: Confirm intent + +Ask the user to confirm: "Update the PR description for this branch?" Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the question and wait for the user's reply. + +If the user declines, stop. + +### DU-2: Find the PR + +Run these commands to identify the branch and locate the PR: + +```bash +git branch --show-current +``` + +If empty (detached HEAD), report that there is no branch to update and stop. + +Otherwise, check for an existing open PR: + +```bash +gh pr view --json url,title,state +``` + +Interpret the result. Do not treat every non-zero exit as a fatal error here: + +- If it returns PR data with `state: OPEN`, an open PR exists for the current branch. +- If it returns PR data with a non-OPEN state (CLOSED, MERGED), treat this as "no open PR." Report that no open PR exists for this branch and stop. +- If it exits non-zero and the output indicates that no pull request exists for the current branch, treat that as the normal "no PR for this branch" state. Report that no open PR exists for this branch and stop. +- If it errors for another reason (auth, network, repo config), report the error and stop. + +### DU-3: Write and apply the updated description + +Read the current PR description: + +```bash +gh pr view --json body --jq '.body' +``` + +Follow the "Detect the base branch and remote" and "Gather the branch scope" sections of Step 6 to get the full branch diff. Use the PR found in DU-2 as the existing PR for base branch detection. Then write a new description following the writing principles in Step 6. If the user provided a focus, incorporate it into the description alongside the branch diff context. + +Compare the new description against the current one and summarize the substantial changes for the user (e.g., "Added coverage of the new caching layer, updated test plan, removed outdated migration notes"). If the user provided a focus, confirm it was addressed. Ask the user to confirm before applying. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the summary and wait for the user's reply. + +If confirmed, apply: + +```bash +gh pr edit --body "$(cat <<'EOF' +Updated description here +EOF +)" +``` + +Report the PR URL. + +--- + +## Full workflow ### Step 1: Gather context -Run these commands. Use `command git` to bypass aliases and RTK proxies. +Run these commands. ```bash -command git status -command git diff HEAD -command git branch --show-current -command git log --oneline -10 -command git rev-parse --abbrev-ref origin/HEAD +git status +git diff HEAD +git branch --show-current +git log --oneline -10 +git rev-parse --abbrev-ref origin/HEAD ``` The last command returns the remote default branch (e.g., `origin/main`). Strip the `origin/` prefix to get the branch name. If the command fails or returns a bare `HEAD`, try: ```bash -command gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' +gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' ``` If both fail, fall back to `main`. -If there are no changes, report that and stop. +Run `git branch --show-current`. If it returns an empty result, the repository is in detached HEAD state. Explain that a branch is required before committing and pushing. Ask whether to create a feature branch now. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the options and wait for the user's reply. + +- If the user agrees, derive a descriptive branch name from the change content, create it with `git checkout -b `, then run `git branch --show-current` again and use that result as the current branch name for the rest of the workflow. +- If the user declines, stop. + +If the `git status` result from this step shows a clean working tree (no staged, modified, or untracked files), check whether there are unpushed commits or a missing PR before stopping: + +1. Run `git branch --show-current` to get the current branch name. +2. Run `git rev-parse --abbrev-ref --symbolic-full-name @{u}` to check whether an upstream is configured. +3. If the command succeeds, run `git log ..HEAD --oneline` using the upstream name from the previous command. +4. If an upstream is configured, check for an existing PR using the method in Step 3. + +- If the current branch is `main`, `master`, or the resolved default branch from Step 1 and there is **no upstream** or there are **unpushed commits**, explain that pushing now would use the default branch directly. Ask whether to create a feature branch first. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the options and wait for the user's reply. +- If the user agrees, derive a descriptive branch name from the change content, create it with `git checkout -b `, then continue from Step 5 (push). +- If the user declines, report that this workflow cannot open a PR from the default branch directly and stop. +- If there is **no upstream**, treat the branch as needing its first push. Skip Step 4 (commit) and continue from Step 5 (push). +- If there are **unpushed commits**, skip Step 4 (commit) and continue from Step 5 (push). +- If all commits are pushed but **no open PR exists** and the current branch is `main`, `master`, or the resolved default branch from Step 1, report that there is no feature branch work to open as a PR and stop. +- If all commits are pushed but **no open PR exists**, skip Steps 4-5 and continue from Step 6 (write the PR description) and Step 7 (create the PR). +- If all commits are pushed **and an open PR exists**, report that and stop -- there is nothing to do. ### Step 2: Determine conventions @@ -41,22 +124,24 @@ Follow this priority order for commit messages *and* PR titles: ### Step 3: Check for existing PR -Before committing, check whether a PR already exists for the current branch: +Run `git branch --show-current` to get the current branch name. If it returns an empty result here, report that the workflow is still in detached HEAD state and stop. + +Then check for an existing open PR: ```bash -command gh pr view --json url,title,state +gh pr view --json url,title,state ``` -Interpret the result: +Interpret the result. Do not treat every non-zero exit as a fatal error here: -- If it **returns PR data with `state: OPEN`**, note the URL and continue to Step 4 (commit) and Step 5 (push). Then skip to Step 7 (existing PR flow) instead of creating a new PR. -- If it **returns PR data with a non-OPEN state** (CLOSED, MERGED), treat this the same as "no PR exists" -- the previous PR is done and a new one is needed. -- If it **errors with "no pull requests found"**, no PR exists. Continue to Step 4 through Step 8 as normal. -- If it **errors for another reason** (auth, network, repo config), report the error to the user and stop. +- If it **returns PR data with `state: OPEN`**, an open PR exists for the current branch. Note the URL and continue to Step 4 (commit) and Step 5 (push). Then skip to Step 7 (existing PR flow) instead of creating a new PR. +- If it **returns PR data with a non-OPEN state** (CLOSED, MERGED), treat this the same as "no PR exists" -- the previous PR is done and a new one is needed. Continue to Step 4 through Step 8 as normal. +- If it **exits non-zero and the output indicates that no pull request exists for the current branch**, no PR exists. Continue to Step 4 through Step 8 as normal. +- If it **errors** (auth, network, repo config), report the error to the user and stop. ### Step 4: Branch, stage, and commit -1. If on `main`, `master`, or the resolved default branch from Step 1, create a descriptive feature branch first (`command git checkout -b `). Derive the branch name from the change content. +1. Run `git branch --show-current`. If it returns `main`, `master`, or the resolved default branch from Step 1, create a descriptive feature branch first with `git checkout -b `. Derive the branch name from the change content. 2. Before staging everything together, scan the changed files for naturally distinct concerns. If modified files clearly group into separate logical changes (e.g., a refactor in one set of files and a new feature in another), create separate commits for each group. Keep this lightweight -- group at the **file level only** (no `git add -p`), split only when obvious, and aim for two or three logical commits at most. If it's ambiguous, one commit is fine. 3. Stage relevant files by name. Avoid `git add -A` or `git add .` to prevent accidentally including sensitive files. 4. Commit following the conventions from Step 2. Use a heredoc for the message. @@ -64,7 +149,7 @@ Interpret the result: ### Step 5: Push ```bash -command git push -u origin HEAD +git push -u origin HEAD ``` ### Step 6: Write the PR description @@ -79,26 +164,26 @@ Use this fallback chain. Stop at the first that succeeds: 1. **PR metadata** (if an existing PR was found in Step 3): ```bash - command gh pr view --json baseRefName,url + gh pr view --json baseRefName,url ``` Extract `baseRefName` as the base branch name. The PR URL contains the base repository (`https://github.com///pull/...`). Determine which local remote corresponds to that repository: ```bash - command git remote -v + git remote -v ``` Match the `owner/repo` from the PR URL against the fetch URLs. Use the matching remote as the base remote. If no remote matches, fall back to `origin`. 2. **`origin/HEAD` symbolic ref:** ```bash - command git symbolic-ref --quiet --short refs/remotes/origin/HEAD + git symbolic-ref --quiet --short refs/remotes/origin/HEAD ``` Strip the `origin/` prefix from the result. Use `origin` as the base remote. 3. **GitHub default branch metadata:** ```bash - command gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' + gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' ``` Use `origin` as the base remote. 4. **Common branch names** -- check `main`, `master`, `develop`, `trunk` in order. Use the first that exists on the remote: ```bash - command git rev-parse --verify origin/ + git rev-parse --verify origin/ ``` Use `origin` as the base remote. @@ -110,23 +195,23 @@ Once the base branch and remote are known: 1. Verify the remote-tracking ref exists locally and fetch if needed: ```bash - command git rev-parse --verify / + git rev-parse --verify / ``` If this fails (ref missing or stale), fetch it: ```bash - command git fetch --no-tags + git fetch --no-tags ``` 2. Find the merge base: ```bash - command git merge-base / HEAD + git merge-base / HEAD ``` 2. List all commits unique to this branch: ```bash - command git log --oneline ..HEAD + git log --oneline ..HEAD ``` 3. Get the full diff a reviewer will see: ```bash - command git diff ...HEAD + git diff ...HEAD ``` Use the full branch diff and commit list as the basis for the PR description -- not the working-tree diff from Step 1. @@ -155,7 +240,9 @@ Use this to select the right description depth: #### Writing principles - **Lead with value**: The first sentence should tell the reviewer *why this PR exists*, not *what files changed*. "Fixes timeout errors during batch exports" beats "Updated export_handler.py and config.yaml". +- **No orphaned opening paragraphs**: If the description uses `##` section headings anywhere, the opening summary must also be under a heading (e.g., `## Summary`). An untitled paragraph followed by titled sections looks like a missing heading. For short descriptions with no sections, a bare paragraph is fine. - **Describe the net result, not the journey**: The PR description is about the end state -- what changed and why. Do not include work-product details like bugs found and fixed during development, intermediate failures, debugging steps, iteration history, or refactoring done along the way. Those are part of getting the work done, not part of the result. If a bug fix happened during development, the fix is already in the diff -- mentioning it in the description implies it's a separate concern the reviewer should evaluate, when really it's just part of the final implementation. Exception: include process details only when they are critical for a reviewer to understand a design choice (e.g., "tried approach X first but it caused Y, so went with Z instead"). +- **When commits conflict, trust the final diff**: The commit list is supporting context, not the source of truth for the final PR description. If commit messages describe intermediate steps that were later revised or reverted (for example, "switch to gh pr list" followed by a later change back to `gh pr view`), describe the end state shown by the full branch diff. Do not narrate contradictory commit history as if all of it shipped. - **Explain the non-obvious**: If the diff is self-explanatory, don't narrate it. Spend description space on things the diff *doesn't* show: why this approach, what was considered and rejected, what the reviewer should pay attention to. - **Use structure when it earns its keep**: Headers, bullet lists, and tables are tools -- use them when they aid comprehension, not as mandatory template sections. An empty "## Breaking Changes" section adds noise. - **Markdown tables for data**: When there are before/after comparisons, performance numbers, or option trade-offs, a table communicates density well. Example: @@ -217,7 +304,7 @@ Fill in at PR creation time: #### New PR (no existing PR from Step 3) ```bash -command gh pr create --title "the pr title" --body "$(cat <<'EOF' +gh pr create --title "the pr title" --body "$(cat <<'EOF' PR description here --- @@ -237,7 +324,7 @@ The new commits are already on the PR from the push in Step 5. Report the PR URL - If **yes** -- write a new description following the same principles in Step 6 (size the full PR, not just the new commits), including the Compound Engineering badge unless one is already present in the existing description. Apply it: ```bash - command gh pr edit --body "$(cat <<'EOF' + gh pr edit --body "$(cat <<'EOF' Updated description here EOF )" @@ -248,7 +335,3 @@ The new commits are already on the PR from the push in Step 5. Report the PR URL ### Step 8: Report Output the PR URL so the user can navigate to it directly. - -## Important: Use `command git` and `command gh` - -Always invoke git as `command git` and gh as `command gh` in shell commands. This bypasses shell aliases and tools like RTK (Rust Token Killer) that proxy commands. diff --git a/.codex/skills/git-commit/SKILL.md b/.codex/skills/git-commit/SKILL.md index ca9b64a159..49752ae076 100644 --- a/.codex/skills/git-commit/SKILL.md +++ b/.codex/skills/git-commit/SKILL.md @@ -11,27 +11,30 @@ Create a single, well-crafted git commit from the current working tree changes. ### Step 1: Gather context -Run these commands to understand the current state. Use `command git` to bypass aliases and RTK proxies. +Run these commands to understand the current state. ```bash -command git status -command git diff HEAD -command git branch --show-current -command git log --oneline -10 -command git rev-parse --abbrev-ref origin/HEAD +git status +git diff HEAD +git branch --show-current +git log --oneline -10 +git rev-parse --abbrev-ref origin/HEAD ``` The last command returns the remote default branch (e.g., `origin/main`). Strip the `origin/` prefix to get the branch name. If the command fails or returns a bare `HEAD`, try: ```bash -command gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' +gh repo view --json defaultBranchRef --jq '.defaultBranchRef.name' ``` If both fail, fall back to `main`. -If there are no changes (nothing staged, nothing modified), report that and stop. +If the `git status` result from this step shows a clean working tree (no staged, modified, or untracked files), report that there is nothing to commit and stop. -If the current branch matches `main`, `master`, or the resolved default branch name, warn the user and ask whether to continue committing here or create a feature branch first. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the options and wait for the user's reply before proceeding. If the user chooses to create a branch, derive the name from the change content and switch to it before continuing. +Run `git branch --show-current`. If it returns an empty result, the repository is in detached HEAD state. Explain that a branch is required before committing if the user wants this work attached to a branch. Ask whether to create a feature branch now. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the options and wait for the user's reply before proceeding. + +- If the user chooses to create a branch, derive the name from the change content, create it with `git checkout -b `, then run `git branch --show-current` again and use that result as the current branch name for the rest of the workflow. +- If the user declines, continue with the detached HEAD commit. ### Step 2: Determine commit message convention @@ -52,6 +55,8 @@ Keep this lightweight: ### Step 4: Stage and commit +Run `git branch --show-current`. If it returns `main`, `master`, or the resolved default branch from Step 1, warn the user and ask whether to continue committing here or create a feature branch first. Use the platform's blocking question tool (`AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini). If no question tool is available, present the options and wait for the user's reply before proceeding. If the user chooses to create a branch, derive the name from the change content, create it with `git checkout -b `, then run `git branch --show-current` again and use that result as the current branch name for the rest of the workflow. + Stage the relevant files. Prefer staging specific files by name over `git add -A` or `git add .` to avoid accidentally including sensitive files (.env, credentials) or unrelated changes. Write the commit message: @@ -61,7 +66,7 @@ Write the commit message: Use a heredoc to preserve formatting: ```bash -command git commit -m "$(cat <<'EOF' +git commit -m "$(cat <<'EOF' type(scope): subject line here Optional body explaining why this change was made, @@ -72,8 +77,4 @@ EOF ### Step 5: Confirm -Run `command git status` after the commit to verify success. Report the commit hash(es) and subject line(s). - -## Important: Use `command git` - -Always invoke git as `command git` in shell commands. This bypasses shell aliases and tools like RTK (Rust Token Killer) that proxy git commands. +Run `git status` after the commit to verify success. Report the commit hash(es) and subject line(s). diff --git a/.codex/skills/learnings-researcher/SKILL.md b/.codex/skills/learnings-researcher/SKILL.md index 847e41115b..de392f5577 100644 --- a/.codex/skills/learnings-researcher/SKILL.md +++ b/.codex/skills/learnings-researcher/SKILL.md @@ -153,7 +153,10 @@ For each relevant document, return a summary in this format: ## Frontmatter Schema Reference -Reference the [yaml-schema.md](../../skills/compound-docs/references/yaml-schema.md) for the complete schema. Key enum values: +Use this on-demand schema reference when you need the full contract: +`../../skills/ce-compound/references/yaml-schema.md` + +Key enum values: **problem_type values:** - build_error, test_failure, runtime_error, performance_issue @@ -257,8 +260,7 @@ Structure your findings as: ## Integration Points This agent is designed to be invoked by: -- `/ce:plan` - To inform planning with institutional knowledge -- `/deepen-plan` - To add depth with relevant learnings +- `/ce:plan` - To inform planning with institutional knowledge and add depth during confidence checking - Manual invocation before starting work on a feature The goal is to surface relevant learnings in under 30 seconds for a typical solutions directory, enabling fast knowledge retrieval during planning phases. diff --git a/.codex/skills/lfg/SKILL.md b/.codex/skills/lfg/SKILL.md index a4543b774a..99edb3f8f5 100644 --- a/.codex/skills/lfg/SKILL.md +++ b/.codex/skills/lfg/SKILL.md @@ -5,7 +5,7 @@ argument-hint: '[feature description]' disable-model-invocation: true --- -CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required step. Do NOT jump ahead to coding or implementation. The plan phase (step 2, and step 3 when warranted) MUST be completed and verified BEFORE any work begins. Violating this order produces bad output. +CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required step. Do NOT jump ahead to coding or implementation. The plan phase (step 2) MUST be completed and verified BEFORE any work begins. Violating this order produces bad output. 1. **Optional:** If the `ralph-loop` skill is available, run `/ralph-loop:ralph-loop "finish all slash commands" --completion-promise "DONE"`. If not available or it fails, skip and continue to step 2 immediately. @@ -13,24 +13,18 @@ CRITICAL: You MUST execute every step below IN ORDER. Do NOT skip any required s GATE: STOP. Verify that the `ce:plan` workflow produced a plan file in `docs/plans/`. If no plan file was created, run `/ce:plan $ARGUMENTS` again. Do NOT proceed to step 3 until a written plan exists. -3. **Conditionally** run `/compound-engineering:deepen-plan` +3. `/ce:work` - Run the `deepen-plan` workflow only if the plan is `Standard` or `Deep`, touches a high-risk area (auth, security, payments, migrations, external APIs, significant rollout concerns), or still has obvious confidence gaps in decisions, sequencing, system-wide impact, risks, or verification. + GATE: STOP. Verify that implementation work was performed - files were created or modified beyond the plan. Do NOT proceed to step 4 if no code changes were made. - GATE: STOP. If you ran the `deepen-plan` workflow, confirm the plan was deepened or explicitly judged sufficiently grounded. If you skipped it, briefly note why and proceed to step 4. +4. `/ce:review mode:autofix` -4. `/ce:work` +5. `/compound-engineering:todo-resolve` - GATE: STOP. Verify that implementation work was performed - files were created or modified beyond the plan. Do NOT proceed to step 5 if no code changes were made. +6. `/compound-engineering:test-browser` -5. `/ce:review mode:autofix` +7. `/compound-engineering:feature-video` -6. `/compound-engineering:todo-resolve` - -7. `/compound-engineering:test-browser` - -8. `/compound-engineering:feature-video` - -9. Output `DONE` when video is in PR +8. Output `DONE` when video is in PR Start with step 2 now (or step 1 if ralph-loop is available). Remember: plan FIRST, then work. Never skip the plan. diff --git a/.codex/skills/project-standards-reviewer/SKILL.md b/.codex/skills/project-standards-reviewer/SKILL.md new file mode 100644 index 0000000000..4417d07c47 --- /dev/null +++ b/.codex/skills/project-standards-reviewer/SKILL.md @@ -0,0 +1,79 @@ +--- +name: project-standards-reviewer +description: Always-on code-review persona. Audits changes against the project's own CLAUDE.md and AGENTS.md standards -- frontmatter rules, reference inclusion, naming conventions, cross-platform portability, and tool selection policies. +model: inherit +tools: Read, Grep, Glob, Bash +color: blue +--- + +# Project Standards Reviewer + +You audit code changes against the project's own standards files -- CLAUDE.md, AGENTS.md, and any directory-scoped equivalents. Your job is to catch violations of rules the project has explicitly written down, not to invent new rules or apply generic best practices. Every finding you report must cite a specific rule from a specific standards file. + +## Standards discovery + +The orchestrator passes a `` block listing the file paths of all relevant CLAUDE.md and AGENTS.md files. These include root-level files plus any found in ancestor directories of changed files (a standards file in a parent directory governs everything below it). Read those files to obtain the review criteria. + +If no `` block is present (standalone usage), discover the paths yourself: + +1. Use the native file-search/glob tool to find all `CLAUDE.md` and `AGENTS.md` files in the repository. +2. For each changed file, check its ancestor directories up to the repo root for standards files. A file like `plugins/compound-engineering/AGENTS.md` applies to all changes under `plugins/compound-engineering/`. +3. Read each relevant standards file found. + +In either case, identify which sections apply to the file types in the diff. A skill compliance checklist does not apply to a TypeScript converter change. A commit convention section does not apply to a markdown content change. Match rules to the files they govern. + +## What you're hunting for + +- **YAML frontmatter violations** -- missing required fields (`name`, `description`), description values that don't follow the stated format ("what it does and when to use it"), names that don't match directory names. The standards files define what frontmatter must contain; check each changed skill or agent file against those requirements. + +- **Reference file inclusion mistakes** -- markdown links (`[file](./references/file.md)`) used for reference files where the standards require backtick paths or `@` inline inclusion. Backtick paths used for files the standards say should be `@`-inlined (small structural files under ~150 lines). `@` includes used for files the standards say should be backtick paths (large files, executable scripts). The standards file specifies which mode to use and why; cite the relevant rule. + +- **Broken cross-references** -- agent names that are not fully qualified (e.g., `learnings-researcher` instead of `compound-engineering:research:learnings-researcher`). Skill-to-skill references using slash syntax inside a SKILL.md where the standards say to use semantic wording. References to tools by platform-specific names without naming the capability class. + +- **Cross-platform portability violations** -- platform-specific tool names used without equivalents (e.g., `TodoWrite` instead of `TaskCreate`/`TaskUpdate`/`TaskList`). Slash references in pass-through SKILL.md files that won't be remapped. Assumptions about tool availability that break on other platforms. + +- **Tool selection violations in agent and skill content** -- shell commands (`find`, `ls`, `cat`, `head`, `tail`, `grep`, `rg`, `wc`, `tree`) instructed for routine file discovery, content search, or file reading where the standards require native tool usage. Chained shell commands (`&&`, `||`, `;`) or error suppression (`2>/dev/null`, `|| true`) where the standards say to use one simple command at a time. + +- **Naming and structure violations** -- files placed in the wrong directory category, component naming that doesn't match the stated convention, missing additions to README tables or counts when components are added or removed. + +- **Writing style violations** -- second person ("you should") where the standards require imperative/objective form. Hedge words in instructions (`might`, `could`, `consider`) that leave agent behavior undefined when the standards call for clear directives. + +- **Protected artifact violations** -- findings, suggestions, or instructions that recommend deleting or gitignoring files in paths the standards designate as protected (e.g., `docs/brainstorms/`, `docs/plans/`, `docs/solutions/`). + +## Confidence calibration + +Your confidence should be **high (0.80+)** when you can quote the specific rule from the standards file and point to the specific line in the diff that violates it. Both the rule and the violation are unambiguous. + +Your confidence should be **moderate (0.60-0.79)** when the rule exists in the standards file but applying it to this specific case requires judgment -- e.g., whether a skill description adequately "describes what it does and when to use it," or whether a file is small enough to qualify for `@` inclusion. + +Your confidence should be **low (below 0.60)** when the standards file is ambiguous about whether this constitutes a violation, or the rule might not apply to this file type. Suppress these. + +## What you don't flag + +- **Rules that don't apply to the changed file type.** Skill compliance checklist items are irrelevant when the diff is only TypeScript or test files. Commit conventions don't apply to markdown content changes. Match rules to what they govern. +- **Violations that automated checks already catch.** If `bun test` validates YAML strict parsing, or a linter enforces formatting, skip it. Focus on semantic compliance that tools miss. +- **Pre-existing violations in unchanged code.** If an existing SKILL.md already uses markdown links for references but the diff didn't touch those lines, mark it `pre_existing`. Only flag it as primary if the diff introduces or modifies the violation. +- **Generic best practices not in any standards file.** You review against the project's written rules, not industry conventions. If the standards files don't mention it, you don't flag it. +- **Opinions on the quality of the standards themselves.** The standards files are your criteria, not your review target. Do not suggest improvements to CLAUDE.md or AGENTS.md content. + +## Evidence requirements + +Every finding must include: + +1. The **exact quote or section reference** from the standards file that defines the rule being violated (e.g., "AGENTS.md, Skill Compliance Checklist: 'Do NOT use markdown links like `[filename.md](./references/filename.md)`'"). +2. The **specific line(s) in the diff** that violate the rule. + +A finding without both a cited rule and a cited violation is not a finding. Drop it. + +## Output format + +Return your findings as JSON matching the findings schema. No prose outside the JSON. + +```json +{ + "reviewer": "project-standards", + "findings": [], + "residual_risks": [], + "testing_gaps": [] +} +``` diff --git a/.codex/skills/slfg/SKILL.md b/.codex/skills/slfg/SKILL.md index de2c937a43..5543124112 100644 --- a/.codex/skills/slfg/SKILL.md +++ b/.codex/skills/slfg/SKILL.md @@ -11,29 +11,25 @@ Swarm-enabled LFG. Run these steps in order, parallelizing where indicated. Do n 1. **Optional:** If the `ralph-loop` skill is available, run `/ralph-loop:ralph-loop "finish all slash commands" --completion-promise "DONE"`. If not available or it fails, skip and continue to step 2 immediately. 2. `/ce:plan $ARGUMENTS` -3. **Conditionally** run `/compound-engineering:deepen-plan` - - Run the `deepen-plan` workflow only if the plan is `Standard` or `Deep`, touches a high-risk area (auth, security, payments, migrations, external APIs, significant rollout concerns), or still has obvious confidence gaps in decisions, sequencing, system-wide impact, risks, or verification - - If you run the `deepen-plan` workflow, confirm the plan was deepened or explicitly judged sufficiently grounded before moving on - - If you skip it, note why and continue to step 4 -4. `/ce:work` — **Use swarm mode**: Make a Task list and launch an army of agent swarm subagents to build the plan +3. `/ce:work` — **Use swarm mode**: Make a Task list and launch an army of agent swarm subagents to build the plan ## Parallel Phase -After work completes, launch steps 5 and 6 as **parallel swarm agents** (both only need code to be written): +After work completes, launch steps 4 and 5 as **parallel swarm agents** (both only need code to be written): -5. `/ce:review mode:report-only` — spawn as background Task agent -6. `/compound-engineering:test-browser` — spawn as background Task agent +4. `/ce:review mode:report-only` — spawn as background Task agent +5. `/compound-engineering:test-browser` — spawn as background Task agent Wait for both to complete before continuing. ## Autofix Phase -7. `/ce:review mode:autofix` — run sequentially after the parallel phase so it can safely mutate the checkout, apply `safe_auto` fixes, and emit residual todos for step 8 +6. `/ce:review mode:autofix` — run sequentially after the parallel phase so it can safely mutate the checkout, apply `safe_auto` fixes, and emit residual todos for step 7 ## Finalize Phase -8. `/compound-engineering:todo-resolve` — resolve findings, compound on learnings, clean up completed todos -9. `/compound-engineering:feature-video` — record the final walkthrough and add to PR -10. Output `DONE` when video is in PR +7. `/compound-engineering:todo-resolve` — resolve findings, compound on learnings, clean up completed todos +8. `/compound-engineering:feature-video` — record the final walkthrough and add to PR +9. Output `DONE` when video is in PR Start with step 1 now. diff --git a/.codex/skills/task/SKILL.md b/.codex/skills/task/SKILL.md index 3759a803e7..70d06b9beb 100644 --- a/.codex/skills/task/SKILL.md +++ b/.codex/skills/task/SKILL.md @@ -19,7 +19,9 @@ Handle $ARGUMENTS. Be thorough, not ceremonial. Start from the source of truth, - Prefer targeted tests and checks during iteration. - Keep the user updated at milestones. - Verify the actual result before claiming done. -- Do not default to research swarms, review swarms, browser proof, PR creation, or compounding. +- Do not default to research swarms, review swarms, or browser proof. +- For verified code-changing work, default to creating or updating the PR unless the user explicitly said not to. +- Do not default to compounding. ## Intake @@ -90,7 +92,8 @@ Apply this section only when the task source is a tracker item. - Use `gh` for fetch and sync-back. - If useful, rename the thread to ` `. - If the task is code-changing, prefer a branch name that includes the issue number. -- If the task reaches a meaningful outcome and came from the issue, post a concise issue comment unless blocked or the user said not to. +- If the task changed code and reached a verified meaningful outcome, create or update the PR before any issue comment unless blocked or the user said not to. +- If the task reaches a meaningful outcome and came from the issue, post a concise issue comment after the PR exists unless blocked or the user said not to. ### Linear @@ -101,7 +104,7 @@ Apply this section only when the task source is a tracker item. ### Tracked Task Non-Rules -- Do not require PR creation for every tracked task. +- Do not require PR creation for tracker tasks that did not change code, ended blocked, or were purely investigative. - Do not require browser screenshots for every tracked task. - Do not require tracker comments for investigations that ended blocked or inconclusive unless sync-back is useful. @@ -129,6 +132,9 @@ Apply this section only when the task source is a tracker item. - `agent-browser` or `test-browser` Use only when there is a real browser surface to verify. Require real browser proof only for browser or UI tasks. +- `git-commit-push-pr` + Use when verified work changed code and should ship as a PR. + Create or update the PR before any tracker comment. - `ce-compound` Use only after verified, non-trivial work that produced reusable knowledge. Never load it at the start. @@ -195,6 +201,7 @@ Keep verification mandatory but proportional. - Run browser verification only for browser or UI tasks. - Run broader repo-wide gates only when repo instructions require them or the change scope justifies them. - If the repo has a standard final gate, run it last. +- If verified work changed code, create or update the PR before tracker sync-back unless the user explicitly said not to. - If the task came from a tracked issue and the task reached a meaningful outcome, sync back unless the user said not to. - If UI changed, capture proof from the real browser surface. - Do not hardcode PR creation, screenshots, or tracker comments for every task. @@ -244,6 +251,7 @@ Apply this section only when the task came from a tracker item and reached a mea - Keep it focused on: - reproduced or baselined, when relevant - fixed or implemented + - PR: , when one exists - re-verified, with browser mention only when relevant - remaining caveat, if any - Do not mention: @@ -251,7 +259,9 @@ Apply this section only when the task came from a tracker item and reached a mea - tests, typecheck, or lint - screenshot paths - branch names - - PR mechanics + - commit, push, or staging mechanics +- Do not write the issue comment before the PR exists. +- If writing the comment after code-changing work, include the full PR URL. - Start only the first sentence with `Codex ...`. - Italicize each paragraph separately. @@ -260,6 +270,8 @@ Example: ```md _Codex implemented and verified this issue._ +_PR: https://github.com/owner/repo/pull/123._ + _Reproduced the bug, applied the fix, and re-verified the affected flow._ _Remaining caveat: none._ @@ -294,3 +306,4 @@ _Remaining caveat: none._ - Final handoff matched the task type. - Testing or batch handoff reported the completed slice, verification, and remaining queue when relevant. - Any tracker, browser, review, or compound follow-up was done only if actually relevant. + diff --git a/.codex/skills/test-xcode/SKILL.md b/.codex/skills/test-xcode/SKILL.md index dfb4d4e6d4..fc232e34de 100644 --- a/.codex/skills/test-xcode/SKILL.md +++ b/.codex/skills/test-xcode/SKILL.md @@ -94,6 +94,9 @@ Call `get_sim_logs` with the simulator UUID. Look for: - Error-level log messages - Failed network requests +**Known automation limitation — SwiftUI Text links:** +Simulated taps (via XcodeBuildMCP or any simulator automation tool) do not trigger gesture recognizers on SwiftUI `Text` views with inline `AttributedString` links. Taps report success but have no effect. This is a platform limitation — inline links are not exposed as separate elements in the accessibility tree. When a tap on a Text link has no visible effect, prompt the user to tap manually in the simulator. If the target URL is known, `xcrun simctl openurl ` can open it directly as a fallback. + ### 6. Human Verification (When Required) Pause for human input when testing touches flows that require device interaction. @@ -105,6 +108,7 @@ Pause for human input when testing touches flows that require device interaction | In-app purchases | "Complete a sandbox purchase" | | Camera/Photos | "Grant permissions and verify camera works" | | Location | "Allow location access and verify map updates" | +| SwiftUI Text links | "Please tap on [element description] manually — automated taps cannot trigger inline text links" | Ask the user (using the platform's question tool — e.g., `AskUserQuestion` in Claude Code, `request_user_input` in Codex, `ask_user` in Gemini — or present numbered options and wait): diff --git a/.codex/skills/todo-create/SKILL.md b/.codex/skills/todo-create/SKILL.md index ec7fc71107..7a6b15f9c3 100644 --- a/.codex/skills/todo-create/SKILL.md +++ b/.codex/skills/todo-create/SKILL.md @@ -34,7 +34,7 @@ The `.context/compound-engineering/todos/` directory is a file-based tracking sy ## File Structure -Each todo has YAML frontmatter and structured sections. Use the template at [todo-template.md](./assets/todo-template.md) when creating new todos. +Each todo has YAML frontmatter and structured sections. Use the todo template included below when creating new todos. ```yaml --- @@ -58,7 +58,7 @@ dependencies: ["001"] # Issue IDs this is blocked by 1. `mkdir -p .context/compound-engineering/todos/` 2. Search both paths for `[0-9]*-*.md`, find the highest numeric prefix, increment, zero-pad to 3 digits. -3. Read [todo-template.md](./assets/todo-template.md), write to canonical path as `{NEXT_ID}-pending-{priority}-{description}.md`. +3. Use the todo template included below, write to canonical path as `{NEXT_ID}-pending-{priority}-{description}.md`. 4. Fill Problem Statement, Findings, Proposed Solutions, Acceptance Criteria, and initial Work Log entry. 5. Set status: `pending` (needs triage) or `ready` (pre-approved). @@ -101,3 +101,9 @@ To check blockers: search for `{dep_id}-complete-*.md` in both paths. Missing ma ## Key Distinction This skill manages **durable, cross-session work items** persisted as markdown files. For temporary in-session step tracking, use platform task tools (`TaskCreate`/`TaskUpdate` in Claude Code, `update_plan` in Codex) instead. + +--- + +## Todo Template + +@./assets/todo-template.md diff --git a/.gitignore b/.gitignore index e59454bb5d..51d9960141 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ jspm_packages/ # compiled output /dist /tmp +/tmp* /out-tsc **/build diff --git a/apps/www/public/r/ai-docs.json b/apps/www/public/r/ai-docs.json index 2c654c5adf..9102c0d912 100644 --- a/apps/www/public/r/ai-docs.json +++ b/apps/www/public/r/ai-docs.json @@ -7,7 +7,7 @@ "files": [ { "path": "../../docs/(plugins)/(ai)/ai.mdx", - "content": "---\ntitle: AI\ndescription: AI-powered writing assistance.\ndocs:\n - route: https://pro.platejs.org/docs/examples/ai\n title: Plus\n---\n\n\n\n\n\n## Features\n\n- **Context-aware command menu** that adapts to cursor, text selection, and block selection workflows.\n- **Streaming Markdown/MDX insertion** with table, column, and code block support powered by `streamInsertChunk`.\n- **Insert and chat review modes** with undo-safe batching via `withAIBatch` and `tf.ai.undo()`.\n- **Block selection aware transforms** to replace or append entire sections using `tf.aiChat.replaceSelection` and `tf.aiChat.insertBelow`.\n- **Direct integration with `@ai-sdk/react`** so `api.aiChat.submit` can stream responses from Vercel AI SDK helpers.\n- **Suggestion and comment utilities** that diff AI edits, accept/reject changes, and map AI feedback back to document ranges.\n\n\n\n## Kit Usage\n\n\n\n### Installation\n\nThe fastest way to add AI functionality is with the `AIKit`. It ships the configured `AIPlugin`, `AIChatPlugin`, Markdown streaming helpers, cursor overlay, and their [Plate UI](/docs/installation/plate-ui) components.\n\n\n\n- [`AIMenu`](/docs/components/ai-menu): Floating command surface for prompts, tool shortcuts, and chat review.\n- [`AILoadingBar`](/docs/components/ai-loading-bar): Displays streaming status at the editor container.\n- [`AIAnchorElement`](/docs/components/ai-anchor-element): Invisible anchor node used to position the floating menu during streaming.\n- [`AILeaf`](/docs/components/ai-leaf): Renders AI-marked text with subtle styling.\n\n### Add Kit\n\n```tsx\nimport { createPlateEditor } from 'platejs/react';\nimport { AIKit } from '@/components/editor/plugins/ai-kit';\n\nconst editor = createPlateEditor({\n plugins: [\n // ...otherPlugins,\n ...AIKit,\n ],\n});\n```\n\n### Add API Route\n\nExpose a streaming command endpoint that proxies your model provider:\n\n\n\n### Configure Environment\n\nSet your AI Gateway key locally (replace with your provider secret if you are not using a gateway):\n\n```bash title=\".env.local\"\nAI_GATEWAY_API_KEY=\"your-api-key\"\n```\n\n\n\n## Manual Usage\n\n\n\n### Installation\n\n```bash\nnpm install @platejs/ai @platejs/markdown @platejs/selection @ai-sdk/react ai\n```\n\n`@platejs/suggestion` is optional but required for diff-based edit suggestions.\n\n### Add Plugins\n\n```tsx\nimport { createPlateEditor } from 'platejs/react';\nimport { AIChatPlugin, AIPlugin } from '@platejs/ai/react';\nimport { BlockSelectionPlugin } from '@platejs/selection/react';\nimport { MarkdownPlugin } from '@platejs/markdown';\n\nexport const editor = createPlateEditor({\n plugins: [\n BlockSelectionPlugin,\n MarkdownPlugin,\n AIPlugin,\n AIChatPlugin, // extended in the next step\n ],\n});\n```\n\n- `BlockSelectionPlugin`: Enables multi-block selections that `AIChatPlugin` relies on for insert/replace transforms.\n- `MarkdownPlugin`: Provides Markdown serialization used by streaming utilities.\n- `AIPlugin`: Adds the AI mark and transforms for undoing AI batches.\n- `AIChatPlugin`: Supplies the AI combobox, API helpers, and transforms.\n\nUse `AIPlugin.withComponent` with your own element (or [`AILeaf`](/docs/components/ai-leaf)) to highlight AI-generated text.\n\n### Configure AIChatPlugin\n\nExtend `AIChatPlugin` to hook streaming and edits. The example mirrors the core logic from `AIKit` while keeping the UI headless.\n\n```tsx\nimport { AIChatPlugin, applyAISuggestions, streamInsertChunk, useChatChunk } from '@platejs/ai/react';\nimport { withAIBatch } from '@platejs/ai';\nimport { getPluginType, KEYS, PathApi } from 'platejs';\nimport { usePluginOption } from 'platejs/react';\n\nexport const aiChatPlugin = AIChatPlugin.extend({\n options: {\n chatOptions: {\n api: '/api/ai/command',\n body: {\n model: 'openai/gpt-4o-mini',\n },\n },\n trigger: ' ',\n triggerPreviousCharPattern: /^\\s?$/,\n },\n useHooks: ({ editor, getOption }) => {\n const mode = usePluginOption(AIChatPlugin, 'mode');\n const toolName = usePluginOption(AIChatPlugin, 'toolName');\n\n useChatChunk({\n onChunk: ({ chunk, isFirst, text }) => {\n if (mode === 'insert') {\n if (isFirst) {\n editor.setOption(AIChatPlugin, 'streaming', true);\n\n editor.tf.insertNodes(\n {\n children: [{ text: '' }],\n type: getPluginType(editor, KEYS.aiChat),\n },\n {\n at: PathApi.next(editor.selection!.focus.path.slice(0, 1)),\n }\n );\n }\n\n if (!getOption('streaming')) return;\n\n withAIBatch(\n editor,\n () => {\n streamInsertChunk(editor, chunk, {\n textProps: {\n [getPluginType(editor, KEYS.ai)]: true,\n },\n });\n },\n { split: isFirst }\n );\n }\n\n if (toolName === 'edit' && mode === 'chat') {\n withAIBatch(\n editor,\n () => {\n applyAISuggestions(editor, text);\n },\n { split: isFirst }\n );\n }\n },\n onFinish: () => {\n editor.setOption(AIChatPlugin, 'streaming', false);\n editor.setOption(AIChatPlugin, '_blockChunks', '');\n editor.setOption(AIChatPlugin, '_blockPath', null);\n editor.setOption(AIChatPlugin, '_mdxName', null);\n },\n });\n },\n});\n```\n\n- `useChatChunk`: Watches `UseChatHelpers` status and yields incremental chunks.\n- `streamInsertChunk`: Streams Markdown/MDX into the document, reusing the existing block when possible.\n- `applyAISuggestions`: Converts responses into transient suggestion nodes when `toolName === 'edit'`.\n- `withAIBatch`: Marks history batches so `tf.ai.undo()` only reverts the last AI-generated change.\n\nProvide your own `render` components (toolbar button, floating menu, etc.) when you extend the plugin.\n\n### Build API Route\n\nHandle `api.aiChat.submit` requests on the server. Each request includes the chat `messages` from `@ai-sdk/react` and a `ctx` payload that contains the editor `children`, current `selection`, and last `toolName`.\n[Complete API example](https://github.com/udecode/plate-playground-template/blob/main/src/app/api/ai/command/route.ts)\n\n```ts title=\"app/api/ai/command/route.ts\"\nimport { createGateway } from '@ai-sdk/gateway';\nimport { convertToCoreMessages, streamText } from 'ai';\nimport { createSlateEditor } from 'platejs';\n\nimport { BaseEditorKit } from '@/registry/components/editor/editor-base-kit';\nimport { markdownJoinerTransform } from '@/registry/lib/markdown-joiner-transform';\n\nexport async function POST(req: Request) {\n const { apiKey, ctx, messages, model } = await req.json();\n\n const editor = createSlateEditor({\n plugins: BaseEditorKit,\n selection: ctx.selection,\n value: ctx.children,\n });\n\n const gateway = createGateway({\n apiKey: apiKey ?? process.env.AI_GATEWAY_API_KEY!,\n });\n\n const result = streamText({\n experimental_transform: markdownJoinerTransform(),\n messages: convertToCoreMessages(messages),\n model: gateway(model ?? 'openai/gpt-4o-mini'),\n system: ctx.toolName === 'edit' ? 'You are an editor that rewrites user text.' : undefined,\n });\n\n return result.toDataStreamResponse();\n}\n```\n\n- `ctx.children` and `ctx.selection` are rehydrated into a Slate editor so you can build rich prompts (see [Prompt Templates](#prompt-templates)).\n- Forward provider settings (model, apiKey, temperature, gateway flags, etc.) through `chatOptions.body`; everything you add is passed verbatim in the JSON payload and can be read before calling `createGateway`.\n- Always read secrets from the server. The client should only send opaque identifiers or short-lived tokens.\n- Return a streaming response so `useChat` and `useChatChunk` can process tokens incrementally.\n\n### Connect `useChat`\n\nBridge the editor and your model endpoint with `@ai-sdk/react`. Store helpers on the plugin so transforms can reload, stop, or show chat state.\n\n```tsx\nimport { useEffect } from 'react';\n\nimport { type UIMessage, DefaultChatTransport } from 'ai';\nimport { type UseChatHelpers, useChat } from '@ai-sdk/react';\nimport { AIChatPlugin } from '@platejs/ai/react';\nimport { useEditorPlugin } from 'platejs/react';\n\ntype ChatMessage = UIMessage<{}, { toolName: 'comment' | 'edit' | 'generate'; comment?: unknown }>;\n\nexport const useEditorAIChat = () => {\n const { editor, setOption } = useEditorPlugin(AIChatPlugin);\n\n const chat = useChat({\n id: 'editor',\n api: '/api/ai/command',\n transport: new DefaultChatTransport(),\n onData(data) {\n if (data.type === 'data-toolName') {\n editor.setOption(AIChatPlugin, 'toolName', data.data);\n }\n },\n });\n\n useEffect(() => {\n setOption('chat', chat as UseChatHelpers);\n }, [chat, setOption]);\n\n return chat;\n};\n```\n\nCombine the helper with `useEditorChat` to keep the floating menu anchored correctly:\n\n```tsx\nimport { useEditorChat } from '@platejs/ai/react';\n\nuseEditorChat({\n onOpenChange: (open) => {\n if (!open) chat.stop?.();\n },\n});\n```\n\nNow you can submit prompts programmatically:\n\n```tsx\nimport { AIChatPlugin } from '@platejs/ai/react';\n\neditor.getApi(AIChatPlugin).aiChat.submit('', {\n prompt: {\n default: 'Continue the document after {block}',\n selecting: 'Rewrite {selection} with a clearer tone',\n },\n toolName: 'generate',\n});\n```\n\n\n\n## Prompt Templates\n\n### Client Prompting\n\n- `api.aiChat.submit` accepts an `EditorPrompt`. Provide a string, an object with `default`/`selecting`/`blockSelecting`, or a function that receives `{ editor, isSelecting, isBlockSelecting }`. The helper `getEditorPrompt` in the client turns that value into the final string.\n- Combine it with `replacePlaceholders(editor, template, { prompt })` to expand `{editor}`, `{block}`, `{blockSelection}`, and `{prompt}` using Markdown generated by `@platejs/ai`.\n\n```tsx\nimport { replacePlaceholders } from '@platejs/ai';\n\neditor.getApi(AIChatPlugin).aiChat.submit('Improve tone', {\n prompt: ({ isSelecting }) =>\n isSelecting\n ? replacePlaceholders(editor, 'Rewrite {blockSelection} using a friendly tone.')\n : replacePlaceholders(editor, 'Continue {block} with two more sentences.'),\n toolName: 'generate',\n});\n```\n\n### Server Prompting\n\nThe demo backend in `apps/www/src/app/api/ai/command` reconstructs the editor from `ctx` and builds structured prompts:\n\n- `getChooseToolPrompt` decides whether the request is `generate`, `edit`, or `comment`.\n- `getGeneratePrompt`, `getEditPrompt`, and `getCommentPrompt` transform the current editor state into instructions tailored to each mode.\n- Utility helpers like `getMarkdown`, `getMarkdownWithSelection`, and `buildStructuredPrompt` (see `apps/www/src/app/api/ai/command/prompts.ts`) make it easy to embed block ids, selections, and MDX tags into the LLM request.\n\nAugment the payload you send from the client to fine-tune server prompts:\n\n```ts\neditor.setOption(aiChatPlugin, 'chatOptions', {\n api: '/api/ai/command',\n body: {\n model: 'openai/gpt-4o-mini',\n tone: 'playful',\n temperature: 0.4,\n },\n});\n```\n\nEverything under `chatOptions.body` arrives in the route handler, letting you swap providers, pass user-specific metadata, or branch into different prompt templates.\n\n## Keyboard Shortcuts\n\n\n Open the AI menu in an empty block (cursor mode)\n Show the AI menu (set via `shortcuts.show`)\n Hide the AI menu and stop streaming\n\n\n## Streaming\n\nThe streaming utilities keep complex layouts intact while responses arrive:\n\n- `streamInsertChunk(editor, chunk, options)` deserializes Markdown chunks, updates the current block in place, and appends new blocks as needed. Use `textProps`/`elementProps` to tag streamed nodes (e.g., mark AI text).\n- `streamDeserializeMd` and `streamDeserializeInlineMd` provide lower-level access if you need to control streaming for custom node types.\n- `streamSerializeMd` mirrors the editor state so you can detect drift between streamed content and the response buffer.\n\nReset the internal `_blockChunks`, `_blockPath`, and `_mdxName` options when streaming finishes to start the next response from a clean slate.\n\n## Streaming Example\n\n\n\n## Plate Plus\n\n\n\n## Hooks\n\n### `useAIChatEditor`\n\nRegisters an auxiliary editor for chat previews and deserializes Markdown with block-level memoization.\n\n\n\n Editor instance dedicated to the chat preview.\n Markdown content returned by the model.\n Pass `parser` to filter tokens before deserialization.\n\n\n\n```tsx\nimport { usePlateEditor } from 'platejs/react';\nimport { MarkdownPlugin } from '@platejs/markdown';\nimport { AIChatPlugin, useAIChatEditor } from '@platejs/ai/react';\n\nconst aiPreviewEditor = usePlateEditor({\n plugins: [MarkdownPlugin, AIChatPlugin],\n});\n\nuseAIChatEditor(aiPreviewEditor, responseMarkdown, {\n parser: { exclude: ['space'] },\n});\n```\n\n### `useEditorChat`\n\nConnects `UseChatHelpers` to editor state so the AI menu knows whether to anchor to cursor, selection, or block selection.\n\n\n\n void\" optional>Called when the menu opens on block selection.\n void\" optional>Called whenever the menu opens or closes.\n void\" optional>Called when the menu opens at the cursor.\n void\" optional>Called when the menu opens on a text selection.\n\n\n\n### `useChatChunk`\n\nStreams chat responses chunk-by-chunk and gives you full control over insertion.\n\n\n\n void\">Handle each streamed chunk.\n void\" optional>Called when streaming finishes.\n\n\n\n## Utilities\n\n### `withAIBatch`\n\nGroups editor operations into a single history batch and flags it as AI-generated so `tf.ai.undo()` removes it safely.\n\n\n\n Target editor.\n void\">Operations to run.\n Set `split: true` to start a new history batch.\n\n\n\n### `applyAISuggestions`\n\nDiffs AI output against stored `chatNodes` and writes transient suggestion nodes. Requires `@platejs/suggestion`.\n\n\n\n Editor to apply suggestions to.\n Markdown response from the model.\n\n\n\nComplementary helpers allow you to finalize or discard the diff:\n\n- `acceptAISuggestions(editor)`: Converts transient suggestion nodes into permanent suggestions.\n- `rejectAISuggestions(editor)`: Removes transient suggestion nodes and clears suggestion marks.\n\n### `aiCommentToRange`\n\nMaps streamed comment metadata back to document ranges so comments can be inserted automatically.\n\n\n\n Editor instance.\n Block id and text used to locate the range.\n\nRange matching the comment or `null` if it cannot be found.\n\n\n### `findTextRangeInBlock`\n\nFuzzy-search helper that uses LCS to find the closest match inside a block.\n\n\n\n Block node to search.\n Text snippet to locate.\n\nMatched range or `null`.\n\n\n### `getEditorPrompt`\n\nGenerates prompts that respect cursor, selection, or block selection states.\n\n\n\n Editor providing context.\n String, config, or function describing the prompt.\n\nContextualized prompt string.\n\n\n### `replacePlaceholders`\n\nReplaces placeholders like `{editor}`, `{blockSelection}`, and `{prompt}` with serialized Markdown.\n\n\n\n Editor providing content.\n Template text.\n Prompt value injected into `{prompt}`.\n\nTemplate with placeholders replaced by Markdown.\n\n\n## Plugins\n\n### `AIPlugin`\n\nAdds an `ai` mark to streamed text and exposes transforms to remove AI nodes or undo the last AI batch. Use `.withComponent` to render AI-marked text with a custom component.\n\n\n \n AI content is stored on text nodes.\n AI marks are regular text properties, not decorations.\n \n\n\n### `AIChatPlugin`\n\nMain plugin that powers the AI menu, chat state, and transforms.\n\n\n \n Character(s) that open the command menu. Defaults to `' '`.\n Pattern that must match the character before the trigger. Defaults to `/^\\s?$/`.\n boolean\" optional>Return `false` to cancel opening in specific contexts.\n Store helpers from `useChat` so API calls can access them.\n Snapshot of nodes used to diff edit suggestions (managed internally).\n Selection captured before submitting a prompt (managed internally).\n Controls whether responses stream directly into the document or open a review panel. Defaults to `'insert'`.\n Whether the AI menu is visible. Defaults to `false`.\n True while a response is streaming. Defaults to `false`.\n Active tool used to interpret the response.\n \n\n\n## API\n\n### `api.aiChat.submit(input, options?)`\n\nSubmits a prompt to your model provider. When `mode` is omitted it defaults to `'insert'` for a collapsed cursor and `'chat'` otherwise.\n\n\n\n Raw input from the user.\n Fine-tune submission behaviour.\n\n\n Override the response mode.\n Forwarded to `chat.sendMessage` (model, headers, etc.).\n String, config, or function processed by `getEditorPrompt`.\n Tags the submission so hooks can react differently.\n\n\n\n### `api.aiChat.reset(options?)`\n\nClears chat state, removes AI nodes, and optionally undoes the last AI batch.\n\n\n\n Pass `undo: false` to keep streamed content.\n\n\n\n### `api.aiChat.node(options?)`\n\nRetrieves the first AI node that matches the specified criteria.\n\n\n\n Set `anchor: true` to get the anchor node or `streaming: true` to retrieve the node currently being streamed into.\n\nMatching node entry, if found.\n\n\n### `api.aiChat.reload()`\n\nReplays the last prompt using the stored `UseChatHelpers`, restoring the original selection or block selection before resubmitting.\n\n### `api.aiChat.stop()`\n\nStops streaming and calls `chat.stop`.\n\n### `api.aiChat.show()`\n\nOpens the AI menu, clears previous chat messages, and resets tool state.\n\n### `api.aiChat.hide(options?)`\n\nCloses the AI menu, optionally undoing the last AI batch and refocusing the editor.\n\n\n\n Set `focus: false` to keep focus outside the editor or `undo: false` to preserve inserted content.\n\n\n\n## Transforms\n\n### `tf.aiChat.accept()`\n\nAccepts the latest response. In insert mode it removes AI marks and places the caret at the end of the streamed content. In chat mode it applies the pending suggestions.\n\n### `tf.aiChat.insertBelow(sourceEditor, options?)`\n\nInserts the chat preview (`sourceEditor`) below the current selection or block selection.\n\n\n\n Editor containing the generated content.\n Copy formatting from the source selection. Defaults to `'single'`.\n\n\n\n### `tf.aiChat.replaceSelection(sourceEditor, options?)`\n\nReplaces the current selection or block selection with the chat preview.\n\n\n\n Editor containing the generated content.\n Controls how much formatting from the original selection should be applied.\n\n\n\n### `tf.aiChat.removeAnchor(options?)`\n\nRemoves the temporary anchor node used to position the AI menu.\n\n\n\n Filters the nodes to remove.\n\n\n\n### `tf.ai.insertNodes(nodes, options?)`\n\nInserts nodes tagged with the AI mark at the current selection (or `options.target`).\n\n### `tf.ai.removeMarks(options?)`\n\nClears the AI mark from matching nodes.\n\n### `tf.ai.removeNodes(options?)`\n\nRemoves text nodes that are marked as AI-generated.\n\n### `tf.ai.undo()`\n\nUndoes the latest history entry if it was created by `withAIBatch` and contained AI content. Clears the paired redo entry to avoid re-applying AI output.\n\n## Customization\n\n### Adding Custom AI Commands\n\n\n\nExtend the `aiChatItems` map to add new commands. Each command receives `{ aiEditor, editor, input }` and can dispatch `api.aiChat.submit` with custom prompts or transforms.\n\n#### Simple Custom Command\n\n```tsx\nsummarizeInBullets: {\n icon: ,\n label: 'Summarize in bullets',\n value: 'summarizeInBullets',\n onSelect: ({ editor }) => {\n void editor.getApi(AIChatPlugin).aiChat.submit('', {\n prompt: 'Summarize the current selection using bullet points',\n toolName: 'generate',\n });\n },\n},\n```\n\n#### Command with Complex Logic\n\n```tsx\ngenerateTOC: {\n icon: ,\n label: 'Generate table of contents',\n value: 'generateTOC',\n onSelect: ({ editor }) => {\n const headings = editor.api.nodes({\n match: (n) => ['h1', 'h2', 'h3'].includes(n.type as string),\n });\n\n const prompt =\n headings.length === 0\n ? 'Create a realistic table of contents for this document'\n : 'Generate a table of contents that reflects the existing headings';\n\n void editor.getApi(AIChatPlugin).aiChat.submit('', {\n mode: 'insert',\n prompt,\n toolName: 'generate',\n });\n },\n},\n```\n\nThe menu automatically switches between command and suggestion states:\n\n- `cursorCommand`: Cursor is collapsed and no response yet.\n- `selectionCommand`: Text is selected and no response yet.\n- `cursorSuggestion` / `selectionSuggestion`: A response exists, so actions like Accept, Try Again, or Insert Below are shown.\n\nUse `toolName` (`'generate' | 'edit' | 'comment'`) to control how streaming hooks process the response. For example, `'edit'` enables diff-based suggestions, and `'comment'` allows you to convert streamed comments into discussion threads with `aiCommentToRange`.\n", + "content": "---\ntitle: AI\ndescription: AI-powered writing assistance.\ndocs:\n - route: https://pro.platejs.org/docs/examples/ai\n title: Plus\n---\n\n\n\n\n\n## Features\n\n- **Context-aware command menu** that adapts to cursor, text selection, and block selection workflows.\n- **Streaming Markdown/MDX insertion** with table, column, and code block support powered by `streamInsertChunk`.\n- **Insert and chat review modes** with localized insert previews plus undo-safe batching via `withAIBatch` and `tf.ai.undo()`.\n- **Block selection aware transforms** to replace or append entire sections using `tf.aiChat.replaceSelection` and `tf.aiChat.insertBelow`.\n- **Direct integration with `@ai-sdk/react`** so `api.aiChat.submit` can stream responses from Vercel AI SDK helpers.\n- **Suggestion and comment utilities** that diff AI edits, accept/reject changes, and map AI feedback back to document ranges.\n\n\n\n## Kit Usage\n\n\n\n### Installation\n\nThe fastest way to add AI functionality is with the `AIKit`. It ships the configured `AIPlugin`, `AIChatPlugin`, Markdown streaming helpers, cursor overlay, and their [Plate UI](/docs/installation/plate-ui) components.\n\n\n\n- [`AIMenu`](/docs/components/ai-menu): Floating command surface for prompts, tool shortcuts, and chat review.\n- [`AILoadingBar`](/docs/components/ai-loading-bar): Displays streaming status at the editor container.\n- [`AIAnchorElement`](/docs/components/ai-anchor-element): Invisible anchor node used to position the floating menu during streaming.\n- [`AILeaf`](/docs/components/ai-leaf): Renders AI-marked text with subtle styling.\n\n### Add Kit\n\n```tsx\nimport { createPlateEditor } from 'platejs/react';\nimport { AIKit } from '@/components/editor/plugins/ai-kit';\n\nconst editor = createPlateEditor({\n plugins: [\n // ...otherPlugins,\n ...AIKit,\n ],\n});\n```\n\n### Add API Route\n\nExpose a streaming command endpoint that proxies your model provider:\n\n\n\n### Configure Environment\n\nSet your AI Gateway key locally (replace with your provider secret if you are not using a gateway):\n\n```bash title=\".env.local\"\nAI_GATEWAY_API_KEY=\"your-api-key\"\n```\n\n\n\n## Manual Usage\n\n\n\n### Installation\n\n```bash\nnpm install @platejs/ai @platejs/markdown @platejs/selection @ai-sdk/react ai\n```\n\n`@platejs/suggestion` is optional but required for diff-based edit suggestions.\n\n### Add Plugins\n\n```tsx\nimport { createPlateEditor } from 'platejs/react';\nimport { AIChatPlugin, AIPlugin } from '@platejs/ai/react';\nimport { BlockSelectionPlugin } from '@platejs/selection/react';\nimport { MarkdownPlugin } from '@platejs/markdown';\n\nexport const editor = createPlateEditor({\n plugins: [\n BlockSelectionPlugin,\n MarkdownPlugin,\n AIPlugin,\n AIChatPlugin, // extended in the next step\n ],\n});\n```\n\n- `BlockSelectionPlugin`: Enables multi-block selections that `AIChatPlugin` relies on for insert/replace transforms.\n- `MarkdownPlugin`: Provides Markdown serialization used by streaming utilities.\n- `AIPlugin`: Adds the AI mark and transforms for undoing AI batches.\n- `AIChatPlugin`: Supplies the AI combobox, API helpers, and transforms.\n\nUse `AIPlugin.withComponent` with your own element (or [`AILeaf`](/docs/components/ai-leaf)) to highlight AI-generated text.\n\n### Configure AIChatPlugin\n\nExtend `AIChatPlugin` to hook streaming and edits. The example mirrors the core logic from `AIKit` while keeping the UI headless.\n\n```tsx\nimport cloneDeep from 'lodash/cloneDeep';\nimport { BaseAIPlugin, withAIBatch } from '@platejs/ai';\nimport {\n AIChatPlugin,\n applyAISuggestions,\n getInsertPreviewStart,\n streamInsertChunk,\n useChatChunk,\n} from '@platejs/ai/react';\nimport { ElementApi, getPluginType, KEYS, PathApi } from 'platejs';\nimport { usePluginOption } from 'platejs/react';\n\nexport const aiChatPlugin = AIChatPlugin.extend({\n options: {\n chatOptions: {\n api: '/api/ai/command',\n body: {\n model: 'openai/gpt-4o-mini',\n },\n },\n trigger: ' ',\n triggerPreviousCharPattern: /^\\s?$/,\n },\n useHooks: ({ editor, getOption }) => {\n const mode = usePluginOption(AIChatPlugin, 'mode');\n const toolName = usePluginOption(AIChatPlugin, 'toolName');\n\n useChatChunk({\n onChunk: ({ chunk, isFirst, text }) => {\n if (isFirst && mode === 'insert') {\n const { startBlock, startInEmptyParagraph } =\n getInsertPreviewStart(editor);\n\n editor.getTransforms(BaseAIPlugin).ai.beginPreview({\n originalBlocks:\n startInEmptyParagraph &&\n startBlock &&\n ElementApi.isElement(startBlock)\n ? [cloneDeep(startBlock)]\n : [],\n });\n\n editor.setOption(AIChatPlugin, 'streaming', true);\n\n editor.tf.withoutSaving(() => {\n editor.tf.insertNodes(\n {\n children: [{ text: '' }],\n type: getPluginType(editor, KEYS.aiChat),\n },\n {\n at: PathApi.next(editor.selection!.focus.path.slice(0, 1)),\n }\n );\n });\n }\n\n if (mode === 'insert') {\n editor.tf.withoutSaving(() => {\n if (!getOption('streaming')) return;\n\n editor.tf.withScrolling(() => {\n streamInsertChunk(editor, chunk, {\n textProps: {\n [getPluginType(editor, KEYS.ai)]: true,\n },\n });\n });\n });\n }\n\n if (toolName === 'edit' && mode === 'chat') {\n withAIBatch(\n editor,\n () => {\n applyAISuggestions(editor, text);\n },\n { split: isFirst }\n );\n }\n },\n onFinish: () => {\n editor.setOption(AIChatPlugin, 'streaming', false);\n editor.setOption(AIChatPlugin, '_blockChunks', '');\n editor.setOption(AIChatPlugin, '_blockPath', null);\n editor.setOption(AIChatPlugin, '_mdxName', null);\n },\n });\n },\n});\n```\n\n- `useChatChunk`: Watches `UseChatHelpers` status and yields incremental chunks.\n- `tf.ai.beginPreview`: Captures the rollback slice and selection for insert-mode preview before the first streamed chunk is written.\n- `streamInsertChunk`: Streams Markdown/MDX into the document, reusing the existing block when possible.\n- `applyAISuggestions`: Converts responses into transient suggestion nodes when `toolName === 'edit'`.\n- `withAIBatch`: Marks saved AI batches so suggestion review and accepted AI changes stay undo-safe.\n\nProvide your own `render` components (toolbar button, floating menu, etc.) when you extend the plugin.\n\n### Build API Route\n\nHandle `api.aiChat.submit` requests on the server. Each request includes the chat `messages` from `@ai-sdk/react` and a `ctx` payload that contains the editor `children`, current `selection`, and last `toolName`.\n[Complete API example](https://github.com/udecode/plate-playground-template/blob/main/src/app/api/ai/command/route.ts)\n\n```ts title=\"app/api/ai/command/route.ts\"\nimport { createGateway } from '@ai-sdk/gateway';\nimport { convertToCoreMessages, streamText } from 'ai';\nimport { createSlateEditor } from 'platejs';\n\nimport { BaseEditorKit } from '@/registry/components/editor/editor-base-kit';\nimport { markdownJoinerTransform } from '@/registry/lib/markdown-joiner-transform';\n\nexport async function POST(req: Request) {\n const { apiKey, ctx, messages, model } = await req.json();\n\n const editor = createSlateEditor({\n plugins: BaseEditorKit,\n selection: ctx.selection,\n value: ctx.children,\n });\n\n const gateway = createGateway({\n apiKey: apiKey ?? process.env.AI_GATEWAY_API_KEY!,\n });\n\n const result = streamText({\n experimental_transform: markdownJoinerTransform(),\n messages: convertToCoreMessages(messages),\n model: gateway(model ?? 'openai/gpt-4o-mini'),\n system: ctx.toolName === 'edit' ? 'You are an editor that rewrites user text.' : undefined,\n });\n\n return result.toDataStreamResponse();\n}\n```\n\n- `ctx.children` and `ctx.selection` are rehydrated into a Slate editor so you can build rich prompts (see [Prompt Templates](#prompt-templates)).\n- Forward provider settings (model, apiKey, temperature, gateway flags, etc.) through `chatOptions.body`; everything you add is passed verbatim in the JSON payload and can be read before calling `createGateway`.\n- Always read secrets from the server. The client should only send opaque identifiers or short-lived tokens.\n- Return a streaming response so `useChat` and `useChatChunk` can process tokens incrementally.\n\n### Connect `useChat`\n\nBridge the editor and your model endpoint with `@ai-sdk/react`. Store helpers on the plugin so transforms can reload, stop, or show chat state.\n\n```tsx\nimport { useEffect } from 'react';\n\nimport { type UIMessage, DefaultChatTransport } from 'ai';\nimport { type UseChatHelpers, useChat } from '@ai-sdk/react';\nimport { AIChatPlugin } from '@platejs/ai/react';\nimport { useEditorPlugin } from 'platejs/react';\n\ntype ChatMessage = UIMessage<{}, { toolName: 'comment' | 'edit' | 'generate'; comment?: unknown }>;\n\nexport const useEditorAIChat = () => {\n const { editor, setOption } = useEditorPlugin(AIChatPlugin);\n\n const chat = useChat({\n id: 'editor',\n api: '/api/ai/command',\n transport: new DefaultChatTransport(),\n onData(data) {\n if (data.type === 'data-toolName') {\n editor.setOption(AIChatPlugin, 'toolName', data.data);\n }\n },\n });\n\n useEffect(() => {\n setOption('chat', chat as UseChatHelpers);\n }, [chat, setOption]);\n\n return chat;\n};\n```\n\nCombine the helper with `useEditorChat` to keep the floating menu anchored correctly:\n\n```tsx\nimport { useEditorChat } from '@platejs/ai/react';\n\nuseEditorChat({\n onOpenChange: (open) => {\n if (!open) chat.stop?.();\n },\n});\n```\n\nNow you can submit prompts programmatically:\n\n```tsx\nimport { AIChatPlugin } from '@platejs/ai/react';\n\neditor.getApi(AIChatPlugin).aiChat.submit('', {\n prompt: {\n default: 'Continue the document after {block}',\n selecting: 'Rewrite {selection} with a clearer tone',\n },\n toolName: 'generate',\n});\n```\n\n\n\n## Prompt Templates\n\n### Client Prompting\n\n- `api.aiChat.submit` accepts an `EditorPrompt`. Provide a string, an object with `default`/`selecting`/`blockSelecting`, or a function that receives `{ editor, isSelecting, isBlockSelecting }`. The helper `getEditorPrompt` in the client turns that value into the final string.\n- Combine it with `replacePlaceholders(editor, template, { prompt })` to expand `{editor}`, `{block}`, `{blockSelection}`, and `{prompt}` using Markdown generated by `@platejs/ai`.\n\n```tsx\nimport { replacePlaceholders } from '@platejs/ai';\n\neditor.getApi(AIChatPlugin).aiChat.submit('Improve tone', {\n prompt: ({ isSelecting }) =>\n isSelecting\n ? replacePlaceholders(editor, 'Rewrite {blockSelection} using a friendly tone.')\n : replacePlaceholders(editor, 'Continue {block} with two more sentences.'),\n toolName: 'generate',\n});\n```\n\n### Server Prompting\n\nThe demo backend in `apps/www/src/app/api/ai/command` reconstructs the editor from `ctx` and builds structured prompts:\n\n- `getChooseToolPrompt` decides whether the request is `generate`, `edit`, or `comment`.\n- `getGeneratePrompt`, `getEditPrompt`, and `getCommentPrompt` transform the current editor state into instructions tailored to each mode.\n- Utility helpers like `getMarkdown`, `getMarkdownWithSelection`, and `buildStructuredPrompt` (see `apps/www/src/app/api/ai/command/prompts.ts`) make it easy to embed block ids, selections, and MDX tags into the LLM request.\n\nAugment the payload you send from the client to fine-tune server prompts:\n\n```ts\neditor.setOption(aiChatPlugin, 'chatOptions', {\n api: '/api/ai/command',\n body: {\n model: 'openai/gpt-4o-mini',\n tone: 'playful',\n temperature: 0.4,\n },\n});\n```\n\nEverything under `chatOptions.body` arrives in the route handler, letting you swap providers, pass user-specific metadata, or branch into different prompt templates.\n\n## Keyboard Shortcuts\n\n\n Open the AI menu in an empty block (cursor mode)\n Show the AI menu (set via `shortcuts.show`)\n Hide the AI menu and stop streaming\n\n\n## Streaming\n\nThe streaming utilities keep complex layouts intact while responses arrive:\n\n- `streamInsertChunk(editor, chunk, options)` deserializes Markdown chunks, updates the current block in place, and appends new blocks as needed. Use `textProps`/`elementProps` to tag streamed nodes (e.g., mark AI text).\n- `streamDeserializeMd` and `streamDeserializeInlineMd` provide lower-level access if you need to control streaming for custom node types.\n- `streamSerializeMd` mirrors the editor state so you can detect drift between streamed content and the response buffer.\n\nReset the internal `_blockChunks`, `_blockPath`, and `_mdxName` options when streaming finishes to start the next response from a clean slate.\n\n## Streaming Example\n\n\n\n## Plate Plus\n\n\n\n## Hooks\n\n### `useAIChatEditor`\n\nRegisters an auxiliary editor for chat previews and deserializes Markdown with block-level memoization.\n\n\n\n Editor instance dedicated to the chat preview.\n Markdown content returned by the model.\n Pass `parser` to filter tokens before deserialization.\n\n\n\n```tsx\nimport { usePlateEditor } from 'platejs/react';\nimport { MarkdownPlugin } from '@platejs/markdown';\nimport { AIChatPlugin, useAIChatEditor } from '@platejs/ai/react';\n\nconst aiPreviewEditor = usePlateEditor({\n plugins: [MarkdownPlugin, AIChatPlugin],\n});\n\nuseAIChatEditor(aiPreviewEditor, responseMarkdown, {\n parser: { exclude: ['space'] },\n});\n```\n\n### `useEditorChat`\n\nConnects `UseChatHelpers` to editor state so the AI menu knows whether to anchor to cursor, selection, or block selection.\n\n\n\n void\" optional>Called when the menu opens on block selection.\n void\" optional>Called whenever the menu opens or closes.\n void\" optional>Called when the menu opens at the cursor.\n void\" optional>Called when the menu opens on a text selection.\n\n\n\n### `useChatChunk`\n\nStreams chat responses chunk-by-chunk and gives you full control over insertion.\n\n\n\n void\">Handle each streamed chunk.\n void\" optional>Called when streaming finishes.\n\n\n\n## Utilities\n\n### `withAIBatch`\n\nGroups editor operations into a single history batch and flags it as AI-generated so `tf.ai.undo()` removes it safely.\n\n\n\n Target editor.\n void\">Operations to run.\n Set `split: true` to start a new history batch.\n\n\n\n### `applyAISuggestions`\n\nDiffs AI output against stored `chatNodes` and writes transient suggestion nodes. Requires `@platejs/suggestion`.\n\n\n\n Editor to apply suggestions to.\n Markdown response from the model.\n\n\n\nComplementary helpers allow you to finalize or discard the diff:\n\n- `acceptAISuggestions(editor)`: Converts transient suggestion nodes into permanent suggestions.\n- `rejectAISuggestions(editor)`: Removes transient suggestion nodes and clears suggestion marks.\n\n### `aiCommentToRange`\n\nMaps streamed comment metadata back to document ranges so comments can be inserted automatically.\n\n\n\n Editor instance.\n Block id and text used to locate the range.\n\nRange matching the comment or `null` if it cannot be found.\n\n\n### `findTextRangeInBlock`\n\nFuzzy-search helper that uses LCS to find the closest match inside a block.\n\n\n\n Block node to search.\n Text snippet to locate.\n\nMatched range or `null`.\n\n\n### `getEditorPrompt`\n\nGenerates prompts that respect cursor, selection, or block selection states.\n\n\n\n Editor providing context.\n String, config, or function describing the prompt.\n\nContextualized prompt string.\n\n\n### `replacePlaceholders`\n\nReplaces placeholders like `{editor}`, `{blockSelection}`, and `{prompt}` with serialized Markdown.\n\n\n\n Editor providing content.\n Template text.\n Prompt value injected into `{prompt}`.\n\nTemplate with placeholders replaced by Markdown.\n\n\n## Plugins\n\n### `AIPlugin`\n\nAdds an `ai` mark to streamed text and exposes transforms to remove AI nodes or undo the last AI batch. Use `.withComponent` to render AI-marked text with a custom component.\n\n\n \n AI content is stored on text nodes.\n AI marks are regular text properties, not decorations.\n \n\n\n### `AIChatPlugin`\n\nMain plugin that powers the AI menu, chat state, and transforms.\n\n\n \n Character(s) that open the command menu. Defaults to `' '`.\n Pattern that must match the character before the trigger. Defaults to `/^\\s?$/`.\n boolean\" optional>Return `false` to cancel opening in specific contexts.\n Store helpers from `useChat` so API calls can access them.\n Snapshot of nodes used to diff edit suggestions (managed internally).\n Selection captured before submitting a prompt (managed internally).\n Controls whether responses stream directly into the document or open a review panel. Defaults to `'insert'`.\n Whether the AI menu is visible. Defaults to `false`.\n True while a response is streaming. Defaults to `false`.\n Active tool used to interpret the response.\n \n\n\n## API\n\n### `api.aiChat.submit(input, options?)`\n\nSubmits a prompt to your model provider. When `mode` is omitted it defaults to `'insert'` for a collapsed cursor and `'chat'` otherwise.\n\n\n\n Raw input from the user.\n Fine-tune submission behaviour.\n\n\n Override the response mode.\n Forwarded to `chat.sendMessage` (model, headers, etc.).\n String, config, or function processed by `getEditorPrompt`.\n Tags the submission so hooks can react differently.\n\n\n\n### `api.aiChat.reset(options?)`\n\nClears chat state, removes AI nodes, and optionally undoes the last AI batch.\n\n\n\n Pass `undo: false` to keep streamed content.\n\n\n\n### `api.aiChat.node(options?)`\n\nRetrieves the first AI node that matches the specified criteria.\n\n\n\n Set `anchor: true` to get the anchor node or `streaming: true` to retrieve the node currently being streamed into.\n\nMatching node entry, if found.\n\n\n### `api.aiChat.reload()`\n\nReplays the last prompt using the stored `UseChatHelpers`, restoring the original selection or block selection before resubmitting.\n\n### `api.aiChat.stop()`\n\nStops streaming and calls `chat.stop`.\n\n### `api.aiChat.show()`\n\nOpens the AI menu, clears previous chat messages, and resets tool state.\n\n### `api.aiChat.hide(options?)`\n\nCloses the AI menu, optionally undoing the last AI batch and refocusing the editor.\n\n\n\n Set `focus: false` to keep focus outside the editor or `undo: false` to preserve inserted content.\n\n\n\n## Transforms\n\n### `tf.aiChat.accept()`\n\nAccepts the latest response. In insert mode it removes AI marks and places the caret at the end of the streamed content. In chat mode it applies the pending suggestions.\n\n### `tf.aiChat.insertBelow(sourceEditor, options?)`\n\nInserts the chat preview (`sourceEditor`) below the current selection or block selection.\n\n\n\n Editor containing the generated content.\n Copy formatting from the source selection. Defaults to `'single'`.\n\n\n\n### `tf.aiChat.replaceSelection(sourceEditor, options?)`\n\nReplaces the current selection or block selection with the chat preview.\n\n\n\n Editor containing the generated content.\n Controls how much formatting from the original selection should be applied.\n\n\n\n### `tf.aiChat.removeAnchor(options?)`\n\nRemoves the temporary anchor node used to position the AI menu.\n\n\n\n Filters the nodes to remove.\n\n\n\n### `tf.ai.insertNodes(nodes, options?)`\n\nInserts nodes tagged with the AI mark at the current selection (or `options.target`).\n\n### `tf.ai.removeMarks(options?)`\n\nClears the AI mark from matching nodes.\n\n### `tf.ai.removeNodes(options?)`\n\nRemoves text nodes that are marked as AI-generated.\n\n### `tf.ai.beginPreview(options?)`\n\nCaptures the rollback slice and selection for insert-mode AI preview. Call it once before writing the first unsaved preview chunk.\n\n\n\n Top-level blocks that the preview will overwrite. Use `[]` when preview inserts after existing content.\n\nReturns `true` when a new preview rollback point was stored, or `false` when preview state already exists.\n\n\n### `tf.ai.acceptPreview()`\n\nCommits the active preview as one fresh undoable batch, strips preview-only markers, and clears preview bookkeeping.\n\n\nReturns `true` when an active preview was committed.\n\n\n### `tf.ai.cancelPreview()`\n\nRestores the rollback point for the active preview and clears preview bookkeeping.\n\n\nReturns `true` when an active preview was restored.\n\n\n### `tf.ai.discardPreview()`\n\nClears preview bookkeeping without restoring content. Use it when the previewed content should stay in place.\n\n\nReturns `true` when active preview bookkeeping was cleared.\n\n\n### `tf.ai.hasPreview()`\n\nReports whether an insert-mode preview rollback point is currently active.\n\n\nReturns `true` when preview rollback state exists.\n\n\n### `tf.ai.undo()`\n\nUndoes the latest AI history entry when it was created by `withAIBatch`. If an insert-mode preview is active, it cancels that preview first instead of replaying every streamed chunk. In both cases it avoids re-applying AI output from redo.\n\n## Customization\n\n### Adding Custom AI Commands\n\n\n\nExtend the `aiChatItems` map to add new commands. Each command receives `{ aiEditor, editor, input }` and can dispatch `api.aiChat.submit` with custom prompts or transforms.\n\n#### Simple Custom Command\n\n```tsx\nsummarizeInBullets: {\n icon: ,\n label: 'Summarize in bullets',\n value: 'summarizeInBullets',\n onSelect: ({ editor }) => {\n void editor.getApi(AIChatPlugin).aiChat.submit('', {\n prompt: 'Summarize the current selection using bullet points',\n toolName: 'generate',\n });\n },\n},\n```\n\n#### Command with Complex Logic\n\n```tsx\ngenerateTOC: {\n icon: ,\n label: 'Generate table of contents',\n value: 'generateTOC',\n onSelect: ({ editor }) => {\n const headings = editor.api.nodes({\n match: (n) => ['h1', 'h2', 'h3'].includes(n.type as string),\n });\n\n const prompt =\n headings.length === 0\n ? 'Create a realistic table of contents for this document'\n : 'Generate a table of contents that reflects the existing headings';\n\n void editor.getApi(AIChatPlugin).aiChat.submit('', {\n mode: 'insert',\n prompt,\n toolName: 'generate',\n });\n },\n},\n```\n\nThe menu automatically switches between command and suggestion states:\n\n- `cursorCommand`: Cursor is collapsed and no response yet.\n- `selectionCommand`: Text is selected and no response yet.\n- `cursorSuggestion` / `selectionSuggestion`: A response exists, so actions like Accept, Try Again, or Insert Below are shown.\n\nUse `toolName` (`'generate' | 'edit' | 'comment'`) to control how streaming hooks process the response. For example, `'edit'` enables diff-based suggestions, and `'comment'` allows you to convert streamed comments into discussion threads with `aiCommentToRange`.\n", "type": "registry:file", "target": "content/docs/plate/(plugins)/(ai)/ai.mdx" } diff --git a/apps/www/public/r/ai-kit.json b/apps/www/public/r/ai-kit.json index 09e2c45778..9a5b8894dc 100644 --- a/apps/www/public/r/ai-kit.json +++ b/apps/www/public/r/ai-kit.json @@ -16,7 +16,7 @@ "files": [ { "path": "src/registry/components/editor/plugins/ai-kit.tsx", - "content": "'use client';\n\nimport { withAIBatch } from '@platejs/ai';\nimport {\n AIChatPlugin,\n AIPlugin,\n applyAISuggestions,\n streamInsertChunk,\n useChatChunk,\n} from '@platejs/ai/react';\nimport { getPluginType, KEYS, PathApi } from 'platejs';\nimport { usePluginOption } from 'platejs/react';\n\nimport { AILoadingBar, AIMenu } from '@/registry/ui/ai-menu';\nimport { AIAnchorElement, AILeaf } from '@/registry/ui/ai-node';\n\nimport { useChat } from '../use-chat';\nimport { CursorOverlayKit } from './cursor-overlay-kit';\nimport { MarkdownKit } from './markdown-kit';\n\nexport const aiChatPlugin = AIChatPlugin.extend({\n options: {\n chatOptions: {\n api: '/api/ai/command',\n body: {},\n },\n },\n render: {\n afterContainer: AILoadingBar,\n afterEditable: AIMenu,\n node: AIAnchorElement,\n },\n shortcuts: { show: { keys: 'mod+j' } },\n useHooks: ({ editor, getOption }) => {\n useChat();\n\n const mode = usePluginOption(AIChatPlugin, 'mode');\n const toolName = usePluginOption(AIChatPlugin, 'toolName');\n useChatChunk({\n onChunk: ({ chunk, isFirst, nodes, text: content }) => {\n if (isFirst && mode === 'insert') {\n editor.tf.withoutSaving(() => {\n editor.tf.insertNodes(\n {\n children: [{ text: '' }],\n type: getPluginType(editor, KEYS.aiChat),\n },\n {\n at: PathApi.next(editor.selection!.focus.path.slice(0, 1)),\n }\n );\n });\n editor.setOption(AIChatPlugin, 'streaming', true);\n }\n\n if (mode === 'insert' && nodes.length > 0) {\n withAIBatch(\n editor,\n () => {\n if (!getOption('streaming')) return;\n editor.tf.withScrolling(() => {\n streamInsertChunk(editor, chunk, {\n textProps: {\n [getPluginType(editor, KEYS.ai)]: true,\n },\n });\n });\n },\n { split: isFirst }\n );\n }\n\n if (toolName === 'edit' && mode === 'chat') {\n withAIBatch(\n editor,\n () => {\n applyAISuggestions(editor, content);\n },\n {\n split: isFirst,\n }\n );\n }\n },\n onFinish: () => {\n editor.setOption(AIChatPlugin, 'streaming', false);\n editor.setOption(AIChatPlugin, '_blockChunks', '');\n editor.setOption(AIChatPlugin, '_blockPath', null);\n editor.setOption(AIChatPlugin, '_mdxName', null);\n },\n });\n },\n});\n\nexport const AIKit = [\n ...CursorOverlayKit,\n ...MarkdownKit,\n AIPlugin.withComponent(AILeaf),\n aiChatPlugin,\n];\n", + "content": "'use client';\n\nimport cloneDeep from 'lodash/cloneDeep.js';\nimport { BaseAIPlugin, withAIBatch } from '@platejs/ai';\nimport {\n AIChatPlugin,\n AIPlugin,\n applyAISuggestions,\n getInsertPreviewStart,\n streamInsertChunk,\n useChatChunk,\n} from '@platejs/ai/react';\nimport { ElementApi, getPluginType, KEYS, PathApi } from 'platejs';\nimport { usePluginOption } from 'platejs/react';\n\nimport { AILoadingBar, AIMenu } from '@/registry/ui/ai-menu';\nimport { AIAnchorElement, AILeaf } from '@/registry/ui/ai-node';\n\nimport { useChat } from '../use-chat';\nimport { CursorOverlayKit } from './cursor-overlay-kit';\nimport { MarkdownKit } from './markdown-kit';\n\nexport const aiChatPlugin = AIChatPlugin.extend({\n options: {\n chatOptions: {\n api: '/api/ai/command',\n body: {},\n },\n },\n render: {\n afterContainer: AILoadingBar,\n afterEditable: AIMenu,\n node: AIAnchorElement,\n },\n shortcuts: { show: { keys: 'mod+j' } },\n useHooks: ({ editor, getOption }) => {\n useChat();\n\n const mode = usePluginOption(AIChatPlugin, 'mode');\n const toolName = usePluginOption(AIChatPlugin, 'toolName');\n useChatChunk({\n onChunk: ({ chunk, isFirst, nodes, text: content }) => {\n if (isFirst && mode === 'insert') {\n const { startBlock, startInEmptyParagraph } =\n getInsertPreviewStart(editor);\n\n editor.getTransforms(BaseAIPlugin).ai.beginPreview({\n originalBlocks:\n startInEmptyParagraph &&\n startBlock &&\n ElementApi.isElement(startBlock)\n ? [cloneDeep(startBlock)]\n : [],\n });\n\n editor.tf.withoutSaving(() => {\n editor.tf.insertNodes(\n {\n children: [{ text: '' }],\n type: getPluginType(editor, KEYS.aiChat),\n },\n {\n at: PathApi.next(editor.selection!.focus.path.slice(0, 1)),\n }\n );\n });\n editor.setOption(AIChatPlugin, 'streaming', true);\n }\n\n if (mode === 'insert' && nodes.length > 0) {\n editor.tf.withoutSaving(() => {\n if (!getOption('streaming')) return;\n\n editor.tf.withScrolling(() => {\n streamInsertChunk(editor, chunk, {\n textProps: {\n [getPluginType(editor, KEYS.ai)]: true,\n },\n });\n });\n });\n }\n\n if (toolName === 'edit' && mode === 'chat') {\n withAIBatch(\n editor,\n () => {\n applyAISuggestions(editor, content);\n },\n {\n split: isFirst,\n }\n );\n }\n },\n onFinish: () => {\n editor.setOption(AIChatPlugin, 'streaming', false);\n editor.setOption(AIChatPlugin, '_blockChunks', '');\n editor.setOption(AIChatPlugin, '_blockPath', null);\n editor.setOption(AIChatPlugin, '_mdxName', null);\n },\n });\n },\n});\n\nexport const AIKit = [\n ...CursorOverlayKit,\n ...MarkdownKit,\n AIPlugin.withComponent(AILeaf),\n aiChatPlugin,\n];\n", "type": "registry:component" } ] diff --git a/apps/www/public/r/components-changelog-docs.json b/apps/www/public/r/components-changelog-docs.json index 1d81262ad0..1737e05ebb 100644 --- a/apps/www/public/r/components-changelog-docs.json +++ b/apps/www/public/r/components-changelog-docs.json @@ -7,7 +7,7 @@ "files": [ { "path": "../../docs/components/changelog.mdx", - "content": "---\ntitle: Changelog\ndescription: Latest component updates and announcements.\ntoc: true\n---\n\nSince Plate UI is not a component library, a changelog is maintained here.\n\nUse the [CLI](https://platejs.org/docs/components/cli) to install the latest version of the components.\n\n## March 2026 #29\n\n### March 11 #29.1\n- `table-node`: Improved large-table editing performance by applying column widths at the table level, row heights at the row level, and keeping editable resize handles aligned with the table control column.\n\n## January 2026 #28\n\n### January 20 #28.3\n- **AIChat**: Fixed assistant input width not expanding when panel is expanded\n\n### January 19 #28.2\n- **`inline-combobox`**: Added keyboard navigation cycling at list boundaries\n - ArrowUp at first item cycles to last item\n - ArrowDown at last item cycles to first item\n - Prevents null activeId using capture-phase event handling\n\n### January 17 #28.1\n- **DOCX Import/Export**: New `@platejs/docx-io` package for Word document handling\n - `import-toolbar-button`: Import DOCX files with `importDocx`\n - `export-toolbar-button`: Export to DOCX with `exportToDocx`\n - `docx-export-kit`: DOCX-optimized components using `DocxExportPlugin.configure({ override: { components } })`\n- **DOCX Static Components**: Added to existing static files with `*Docx` naming pattern:\n - `callout-node-static`: `CalloutElementDocx`\n - `code-block-node-static`: `CodeBlockElementDocx`, `CodeLineElementDocx`, `CodeSyntaxLeafDocx`\n - `column-node-static`: `ColumnElementDocx`, `ColumnGroupElementDocx`\n - `equation-node-static`: `EquationElementDocx`, `InlineEquationElementDocx`\n - `toc-node-static`: `TocElementDocx`\n\n\n## December 2025 #27\n\n### December 23 #27.3\n- **`code-drawing-node`**: Added new code drawing component with inline editing support\n - Support for multiple diagram types: PlantUML, Graphviz, Flowchart, and Mermaid\n - Inline code editing with real-time preview (no popup dialog)\n - Multiple view modes: Both (code + preview), Code only, Image only\n - Responsive layout: horizontal on desktop, vertical on mobile\n - Floating toolbar with language selector, view mode selector, and download button\n - Debounced rendering (500ms) for optimal performance\n\n### December 23 #27.2\n- **AI Prompt System Refactoring**: Complete restructuring of AI command prompts for better maintainability\n - Split monolithic `prompts.ts` into focused modules: `getEditPrompt`, `getGeneratePrompt`, `getCommentPrompt`, `getEditTablePrompt`, `getChooseToolPrompt`\n - Added new `common.ts` for shared prompt utilities\n - Enhanced table cell editing capabilities with specialized prompts\n- **AI Table Cell Handling**: Improved support for editing single-cell tables\n - `use-chat`: Enhanced with parser options for better AI content processing\n - `cursor-overlay`: Fixed overlay positioning and behavior\n - Added tests for complex table markdown generation\n- **API Routes**: Updated AI command and copilot routes with improved prompt selection logic\n\n### December 16 #27.1\n- `table-node`: Fixed table row drag and drop - rows could be dragged but not dropped, and dropline wasn't showing. Added missing `nodeRef` to element ref composition.\n\n### November 30 #26.10\n- `inline-combobox`: Added Yjs collaboration support - combobox popover now only shows for the user who triggered it, preventing the popover from opening for all users in collaborative editing\n\n### October 21 #26.9\n- `suggestion-kit`: Remove `BlockSuggestion`use `SuggestionLineBreak` instead to fixes styles.\n- `use-chat`: Fix AI comment hiding when finished.\n\n### October 17 #26.8\n- **Static Components**: Updated all static component imports to use new `platejs/static` path\n - `*-node-static` components: Updated imports from `@platejs/core/react` to `platejs/static`\n - `editor-static`: Updated `PlateStatic` import path\n - `export-toolbar-button`: Updated static utilities import path\n - `import-toolbar-button`: Updated static utilities import path\n - `slate-to-html/page`: Updated static rendering imports\n - `comment-kit`, `suggestion-kit`: Updated static types imports\n\n### October 6 #26.7\n- `api/ai/command/route.ts`: Fix poor AI generation quality when blockSelecting.\n\n### October 5 #26.6\n- `ai-kit`: Removed unused `api` parameter from `useHooks` destructuring\n- `block-selection-kit`: Added keyboard shortcut handler to open AI menu with `mod+j` when blocks are selected\n\n### October 4 #26.5\n- `api/ai/command/route.ts`: fix #4669\n\n## September 2025 #26\n\n### September 28 #26.5\n- `transforms`: Fixed slash command duplicate block insertion - prevents creating duplicate blocks when selecting the same block type in empty blocks\n\n### September 20 #26.4\n- **AI Suggestions**: Major improvements to AI suggestion system with better content handling and UI enhancements\n - `ai-menu`: \n - Added new `AILoadingBar` component with animated spinner and stop functionality\n - Enhanced comment acceptance UI with Accept/Reject buttons\n - Improved context-aware menu states based on selection and operation mode\n - Better keyboard navigation with ESC to stop operations\n - `ai-toolbar-button`: Streamlined implementation\n - `api/ai/command/route.ts`: \n - Added multi-tool support (generate, edit, comment) with automatic intent classification\n - Switched to Google Gemini model (`gemini-2.5-flash`)\n - Enhanced prompt templates with placeholder support\n - Added MDX tag preservation\n - `markdown-joiner-transform`: Added smart buffering for smoother streaming of markdown content\n - `use-chat`: Simplified implementation with better error handling\n - `ai-kit`: Added markdown plugin to AI configuration\n - `markdown-kit`: Added AI plugin integration\n - `link-node`: Added AI-specific click handler functionality\n - `block-context-menu`: Removed redundant AI options (now handled by ai-menu)\n - `block-discussion`: Minor styling improvements\n - `fixed-toolbar-buttons`: Simplified AI button implementation\n\n### September 7 #26.3\n- `block-context-menu`: Fixed menu position sticking when triggered multiple times in different locations\n\n### September 5 #26.2\n- `block-draggable`: Fixed block selection to work with right-click events\n\n### September 4 #26.1\n- **AI Comments**: Added AI-powered comment functionality for document review and feedback\n - `use-chat`: Enhanced chat hook with AI comment support and improved streaming capabilities\n - `ai-menu`: Updated AI menu with comment generation options and improved UI\n - `ai-toolbar-button`: Added support for AI comment actions\n - `block-context-menu`: Integrated AI comment options into block context menu\n - `fixed-toolbar-buttons`: Added AI comment button to toolbar\n - `ai-kit`: remove all prompt templates, use directly in `api/ai/command/route.ts`.\n - `api/ai/command/route.ts`: Added comment functionality.\n\n## August 2025 #25\n\n\n### August 17 #25.2\n- `block-discussion`: Removed `useFocusedLast` check for showing discussion popover.\n\n### August 1 #25.1\n- **Floating toolbar improvements**: Multiple components now use the new `useFocusedLast` hook to only show their floating toolbars when their editor is the last focused editor, preventing toolbar conflicts in multi-editor scenarios:\n - `ai-menu`\n - `block-discussion`\n - `column-node`\n - `media-toolbar`\n - `table-node`\n- `block-draggable`: \n - Select list children on handle click\n - Focus selected blocks on handle click\n\n## July 2025 #24\n\n### July 29 #24.11\n- `block-draggable`: Fixed table drag and drop preview display with horizontal scroll compensation. Drag preview elements now correctly display content even when the original element has horizontal scroll\n- `block-draggable`: Added `isAboutToDrag` state to improve preview handling - tracks when drag is about to start (mousedown but not yet dragging) for better preview cleanup\n\n### July 27 #24.10\n- `ai-kit`: support custom node type\n- `indent-kit`: add `KEYS.img` to `IndentPlugin`\n- `list-kit`: add `KEYS.img` to `ListPlugin`\n- `markdown-joiner-transform.ts`: add `markdownJoinerTransform` to transform chunks like [**,bold,**] to [**bold**] make the md deserializer happy.\n- `api/ai/command/route.ts`: use `markdownJoinerTransform` to transform chunks.\n\n### July 26 #24.9\n- `list-classic-kit`: Added `ListPlugin` to restore List functionalities (indent with Tab / Shift+Tab, new item when pressing enter, ...).\n\n### July 25 #24.8\n- `block-draggable`: Added support for automatically selecting list item children when dragging. When dragging a list item, all nested items with bigger indent are now included in the drag operation\n\n### July 23 #24.7\n- `block-draggable`: Updated to use new `addOnContextMenu` API from BlockSelectionPlugin for cleaner context menu handling\n\n### July 18 #24.6\n- `block-context-menu`: Fixed context menu not opening when right-clicking on block margin areas\n- `block-draggable`: Added wrapper div with context menu handler to ensure block selection on margin clicks\n\n### July 14 #24.5\n- `block-draggable`: Added support for dragging multiple blocks using editor's native selection (previously only block-selection was supported)\n\n### July 3 #24.4\n- `slate-to-html`: Added `EditorViewDemo` component for static editor rendering using `createStaticEditor`\n### July 4 #24.3\n\n- `list-classic-node`: Fix styling that affects `TaskListElement` by force overriding list-style-type (set to none).\n\n### July 3 #24.2\n\n- **Task list support in list-classic**: Added task list functionality with checkboxes to the list-classic plugin\n - `list-classic-kit`: Added `TaskListPlugin` with `TaskListElement` component\n - `list-classic-node`: Added `TaskListElement` and `TaskListItemElement` components with checkbox support\n - `transforms-classic`: New file for classic list transforms\n - `insert-toolbar-classic-button`: New component for inserting classic list types (bulleted, numbered, task)\n - `turn-into-toolbar-classic-button`: New component for converting blocks to classic list types\n - `floating-toolbar-classic-buttons`: New component for floating toolbar with classic list support\n - `floating-toolbar-classic-kit`: New kit that includes classic list toolbar buttons\n\n### July 2 #24.1\n- `editor`: Added `EditorView` component using the new `PlateView` from `@platejs/core/react` for static editor rendering with copy functionality\n\n## June 2025 #23\n\n### June 29 #23.9\n- `link-node`: Remove `useLink`\n- `link-node-static`: missing `getLinkAttributes`\n- `media-image-node`: `attributes.alt` type casting\n\n### June 26 #23.7\n- `inline-combobox`: Fixed combobox not closing when clicking outside the editor\n\n### June 24 #23.6\n- `transform.ts`: add `toggleCodeBlock` to `setBlockMap`. Fix the structural error of the code_block created by `turn-into-toolbar-button.tsx`.\n\n### June 20 #23.5\n- [Drag and drop improvements](https://github.com/udecode/plate/pull/4385)\n- `block-draggable`: Fixed drag and drop functionality with multiple selected blocks and resolved drop positioning issues on margins.\n- `block-selection-kit`: It is now possible to select the entire table (table), but the rows (tr) will only be selected if your selection box is within the table.\n- `table-node`: Add block selection styles to the table.\n\n### June 18 #23.4\n\n- `table-node`: Fix bug affecting cursor position and improve performance\n\n### June 16 #23.3\n\n- `block-draggable`: use `getPluginByType` instead of `getContainerTypes`\n\n### June 13 #23.2\n\n- `editor`: Fix placeholder positioning `**:data-slate-placeholder:!top-1/2 **:data-slate-placeholder:-translate-y-1/2`.\n- `block-placeholder-kit`: Change placeholder color to `text-muted-foreground/80` to match `editor` one.\n\n### June 9 #23.1\n\n**Plate 49**\n\nMerging files, using a more consistent naming convention, and removing unused `export` statements.\n\nComponents:\n\n- Now that basic nodes have a default HTML element, you can remove `withProps(..., { as: '...' })` plugin components.\n- To improve decoupling, plugins are not imported anymore only for their keys. Import `KEYS` from `@udecode/plate` instead, as a unified source of keys.\n - `ParagraphPlugin.key` -> `KEYS.p`\n - `INDENT_LIST_KEYS.listStyleType` -> `KEYS.listType`\n - `ListStyleType.Decimal` -> `KEYS.ol`\n - `ListStyleType.Disc` -> `KEYS.ul`\n - `list` (classic) -> `KEYS.listClassic`\n - `ol` (classic) -> `KEYS.olClassic`\n - `ul` (classic) -> `KEYS.ulClassic`\n - `li` (classic) -> `KEYS.liClassic`\n - `action_item` (classic) -> `KEYS.listTodoClassic`\n- Rename all `*-element`, `*-leaf` files to `*-node` (and static versions)\n- Removed `ai-anchor-element`, merged into `ai-node`\n- Removed `ai-loading-bar`, merged into `ai-menu`\n- Removed `ai-menu-items`, merged into `ai-menu`\n- Renamed `align-dropdown-menu` to `align-toolbar-button`, `AlignDropdownMenu` to `AlignToolbarButton`\n- Renamed `api-ai` to `ai-api`\n- Renamed `api-uploadthing` to `media-uploadthing-api`\n- `BlockSelection`: fix block selection for tables\n- Removed `code-block-combobox`, merged into `code-block-node`\n- Removed `code-line-element`, merged into `code-block-node` (and static version)\n- Removed `code-syntax-leaf`, merged into `code-block-node` (and static version)\n- Rename `color-toolbar-button` to `font-color-toolbar-button`, `ColorDropdownMenu` to `FontColorToolbarButton`\n- Removed all `color-*` files, merged into `font-color-toolbar-button`\n - Rename `color-dropdown-menu` to `font-color-toolbar-button`\n- Removed `column-group-element`, merged into `column-node` (and static version)\n- Removed `comment-create-form`, merged into `comment`\n- Renamed `draggable` to `block-draggable`, `DraggableAboveNodes` to `BlockDraggable`\n- Renamed `emoji-input-element` to `emoji-node`\n- Removed all `emoji-*` files (except `emoji-input-node`), merged into `emoji-toolbar-button`\n - Rename `EmojiToolbarDropdown` to `EmojiPopover`, `EmojiDropdownMenu` to `EmojiToolbarButton`\n - `EmojiPicker` `icons` prop is now optional and defaulted to `emojiCategoryIcons` and `emojiSearchIcons`\n- Renamed `image-preview` to `media-preview-dialog`, `ImagePreview` to `MediaPreviewDialog`\n- Renamed `image-element` to `media-image-node`\n - Renamed `MediaFileElement` to `FileElement` (and static version)\n - Renamed `MediaAudioElement` to `AudioElement` (and static version)\n - Renamed `MediaVideoElement` to `VideoElement` (and static version)\n- Renamed `indent-list-toolbar-button` to `list-toolbar-button`\n - Renamed `BulletedIndentListToolbarButton` to `BulletedListToolbarButton`\n - Renamed `NumberedIndentListToolbarButton` to `NumberedListToolbarButton`\n- Renamed `indent-todo-marker` to `block-list`\n- Removed `indent-fire-marker`\n- Removed `indent-todo-toolbar-button`, merged into `list-toolbar-button`\n- Renamed `IndentTodoToolbarButton` into `TodoListToolbarButton`\n- Removed `inline-equation-element` and `equation-popover`, merged into `equation-node` (and static version)\n- Removed `inline-equation-toolbar-button`, merged into `equation-toolbar-button`\n- Renamed `insert-dropdown-menu` to `insert-toolbar-button`, `InsertDropdownMenu` to `InsertToolbarButton`\n- Renamed `line-height-dropdown-menu` to `line-height-toolbar-button`, `LineHeightDropdownMenu` to `LineHeightToolbarButton`\n- Rename `link-floating-toolbar` to `link-toolbar`\n- Removed `list-indent-toolbar-button`, merged into `list-classic-toolbar-button`\n- Renamed `ListIndentToolbarButton` to `IndentToolbarButton`\n- Renamed `list-node` to `list-classic-node`\n- Renamed `media-popover` to `media-toolbar`, `MediaPopover` to `MediaToolbar`\n- Renamed `mode-dropdown-menu` to `mode-toolbar-button`, `ModeDropdownMenu` to `ModeToolbarButton`\n- Renamed `more-dropdown-menu` to `more-toolbar-button`, `MoreDropdownMenu` to `MoreToolbarButton`\n- Removed `outdent-toolbar-button`, merged into `indent-toolbar-button`\n- `table-icons`: rename `Border*` to `Border*Icon`\n- Renamed `slash-input-element` to `slash-input-node`\n- Renamed `SuggestionBelowNodes` to `SuggestionLineBreak`\n- Removed `table-cell-element`, merged into `table-node` (and static version)\n- Removed `table-row-element`, merged into `table-node` (and static version)\n- Renamed `table-dropdown-menu` to `table-toolbar-button`, `TableDropdownMenu` to `TableToolbarButton`\n- Removed `todo-list-node`, merged into `list-classic-node`\n- Renamed `turn-into-dropdown-menu` to `turn-into-toolbar-button`, `TurnIntoDropdownMenu` to `TurnIntoToolbarButton`\n- `export-toolbar-button`, `indent-list-plugins`: remove fire from `listStyleTypes`\n- `useCommentEditor`: `usePlateEditor` instead of `useCreateEditor`\n- Removed `placeholder`, `withPlaceholder`. Migration: use `block-placeholder-kit`, `BlockPlaceholderPlugin` instead.\n- `line-height-toolbar-button`: remove `useLineHeightDropdownMenu` hook.\n- `font-color-toolbar-button`: remove `useColorInput` hook.\n\nPlugins:\n\n- Renamed all `*-plugin`, `*-plugins` files to `-kit`, and `*Plugin`, `*Plugins` to `*Kit`. A **plugin kit** is a collection of configured plugins.\n - Renamed `editor-plugins` to `editor-kit`\n - Renamed `equation-plugins` to `math-kit`, `equationPlugins` to `MathKit`\n - Renamed `indent-list-plugins` to `list-kit`, `indentListPlugins` to `ListKit`\n - Added `BlockList` component to `block-list`, used in `list-kit`\n - Removed `use-create-editor`, use `usePlateEditor` instead\n- `ai-kit`: add `show` shortcut. Remove `useHotkeys('mod+j')` from `ai-menu`\n- `comment-kit`: add `setDraft` transform, shortcut\n- `basic-marks-kit`, `basic-blocks-kit`: add shortcuts\n\n- `transforms`, `block-draggable`: remove `STRUCTURE_TYPES`, those are now inferred from `plugin.node.isContainer`. Use instead `editor.meta.containerTypes`.\n- Remove `structuralTypes` from `useSelectionFragmentProp` usages.\n\n## May 2025 #22\n\n### May 26 #22.7\n\n- [Fix combobox closing issue](https://github.com/udecode/plate/pull/4322)\n- `inline-combobox`: fix `insertPoint` not being updated when the combobox is closed.\n\n### May 15 #22.6\n\n- [Fix inline math keyboard behavior and style](https://github.com/udecode/plate/pull/4305)\n- `equation-popover`: Focus back to the editor when the popover is closed.\n- `inline-equation-element`: When selecting it should be highlighted.\n\n### May 11-12 #22.5\n\n- [Templates migration to Plate 48](https://github.com/udecode/plate/pull/4298/files)\n- Migration to shadcn v4: \n - Plate had a forked version of shadcn/ui primitives that could conflict with your existing components. Our components now **fully depend** on the original shadcn/ui primitives, easing the integration of Plate into your existing shadcn/ui set of components.\n - All components updated to [Tailwind v4](https://ui.shadcn.com/docs/tailwind-v4). \n - See the updated [installation guide](/docs/components/installation).\n- Migration to React 19. If you're using React 18, you may need to use `React.forwardRef` in a few places.\n- Migration to [shadcn CLI](https://ui.shadcn.com/docs/cli):\n - Remove `registries` from `components.json`\n - Use `npx shadcn` instead of `npx shadcx`\n- [MCP support](/docs/mcp)\n- Remove `withRef` from all components\n- Import `cn` from `@/lib/utils` instead of `@udecode/cn` to stay consistent with shadcn/ui\n- Remove unused `className`, `style` props from all element and leaf components\n- `draggable`:\n - Fix dnd in Firefox\n- `media-placeholder-element`: refactor to use `use-upload-file` hook instead of `uploadthing`\n - Migration: `npx shadcn@latest add @plate/api-uploadthing`\n\n### May 6 #22.3\n\n- `ai-chat-editor`: support none-standard markdown nodes.\n- `slash-input-element`: add callout support.\n- `block-selection-plugins.tsx`: fix block selection not working.\n\n### May 4 #22.2\n\n- `ai/command`: forked smoothStream from `ai` package now uses 30ms delay by default (only code blocks and tables use 100ms delay).\n\nv48 migration:\n- `PlateElement`, `PlateLeaf` and `PlateText` HTML attributes are moved from top-level props to `attributes` prop.\n- Remove `nodeProps` prop from `PlateElement`, `PlateLeaf`, `PlateText`. Use `attributes` prop instead.\n- Migrated components: \n - `block-discussion`\n - `comment-leaf`\n - `date-element`\n - `draggable`\n - `excalidraw-element`\n - `hr-element` + `-static`\n - `image-element` + `-static`\n - `link-element`\n - `media-audio-element`\n - `media-file-element`\n - `media-placeholder-element`\n - `media-video-element`\n - `mention-element`\n - `placeholder`\n - `suggestion-leaf`\n - `table-cell-element` + `-static`\n - `table-element`\n - `tag-element`\n\n### May 2 #22.1\n- `use-chat`: add `_abortFakeStream`.\n- `ai-menu`: Fix menu items are displayed incorrectly.\n- `ai-loading-bar`: Move esc keydown handler to `ai-menu`.\n- `ai/command`: add chunking delay to 100ms (Temporary workaround with performance issue).\n\n\n## April 2025 #21\n\n### April 30 #21.3\n\n- `autoformat-plugin`: allow starting a new indent list with numbers other than 1\n\n### April 29 #21.2\n\n- `ai-leaf`: add `aiIndicatorVariants` to display loading state.\n- `cursor-overlay`: hide when ai is streaming.\n- `api/ai/command`: fix chunking issue.\n\nAdd `discussion-plugin`:\n- add `discussionPlugin` to `editor-plugins`, remove `configure` from `suggestionPlugin`\n- refactor `block-suggestion`, `comment` to use `discussionPlugin`\n- fix `comment-create-form` to create discussion when none exists\n- style changes in `suggestion-leaf`\n- fix `link-floating-toolbar` to support internal links, and placement top when suggestion or comment is active\n\n### April 19 #21.1\n\n- `ai-anchor-element`: add `ai-anchor-element` component that is inserted before streaming, removed after streaming, and used for positioning the ai-menu\n- `ai-loading-bar`: add `ai-loading-bar` component that is used to display the loading progress of the insert mode streaming\n- `ai-menu`: migrate to latest `ai` package\n- `ai-menu-items`: add `generateMarkdownSample`\n- `ai-plugins`: Remove the single-paragraph limit from prompts\n- `editor`: introduce `PlateContainer` component\n\n### April 2 #21.1\n\n- `export-toolbar-button`: fix pdf export issue with `html2canvas-pro`\n- `import-toolbar-button`: fix sometimes unable to select the file\n\n## March 2025 #20\n\n### March 12 2025 #20.4\n\n- `ai-toolbar-button`: add missing `@udecode/plate-ai` dependency.\n- `comment-toolbar-button`: add missing `comments-plugin` registry dependency.\n- `font-size-toolbar-button`: add missing `popover` registry dependency.\n- `tooltip`: add missing `button` registry dependency.\n\n### March 10 #20.3\n- `block-context-menu`: Prevent opening context menu in read-only mode\n\n### March 2 #20.2\n\n- `block-suggestion`: fix styles\n- `suggestion-leaf-static`: add static versions\n\n### March 1 #20.1\n\nPlate 46 - new code block\n\n- Migrated from Prism.js to lowlight for syntax highlighting\n - `code-block-element-static`, `code-block-element`, `code-block-combobox`: Updated to use lowlight classes. Default to github theme.\n - `code-syntax-leaf-static`, `code-syntax-leaf`: Updated to use lowlight token classes\n - Removed `prismjs` dependency and related styles\n - Use `lowlight` plugin option instead of `prism` option\n - `code-block-combobox`: add `Auto` language option, change language values to match lowlight\n- `autoformat-plugin`: prevent autoformat on code blocks\n\n```tsx\nimport { all, createLowlight } from 'lowlight';\n\nconst lowlight = createLowlight(all);\n\nCodeBlockPlugin.configure({\n options: {\n lowlight,\n },\n});\n```\n\n### Feburary 21 #19.3\n\n- `image-preview`: prevent block menu on image preivew mask\n- `media-popover`: hide media popover when image preivew is open\n\n### February 18 #19.2\n\nPlate 45 - new comments & suggestions UI\n\n- NEW `block-discussion` as the main container, used in `plate-element`\n- NEW `comment` for individual comment display\n- NEW `comment-create-form` with minimal Plate editor for input\n- Removed legacy components:\n - `comments-popover`\n - `comment-avatar`\n - `comment-reply-items`\n - `comment-value`\n - `comment-resolve-button`\n- NEW `block-suggestion`\n- NEW `suggestion-leaf`\n- NEW `suggestion-line-break`\n- Remove `plate-element`, import `PlateElement` from `@udecode/plate/react` instead. Add in `block-selection-plugins`:\n```tsx\nrender: {\n belowRootNodes: (props) => {\n if (!props.className?.includes('slate-selectable')) return null;\n\n return ;\n },\n},\n```\n\n### February 3 #19.1\n\n- React 19\n- TailwindCSS 4\n- Plate 45\n- Jotai 2\n- Zustand 6\n- `comment-more-dropdown`: remove `useCommentEditButtonState`, `useCommentDeleteButtonState`\n- `image-element`, `media-embed-element`, `media-video-element`, `mode-dropdown-menu`\n - use `const width = useResizableValue('width')`\n- `image-preview`: remove `useScaleInputState`, `useImagePreviewState`\n- `floating-toolbar`: \n - replace `useEventEditorSelectors` with `useEventEditorValue`\n- `media-popover`: \n - replace `floatingMediaActions` with `FloatingMediaStore.set`, \n - replace `useFloatingMediaSelectors` with `useFloatingMediaValue`\n\n## January 2025 #18\n\n### January 23 #18.8\n\n- `table-element`: fix styles, show table border controls when collapsed\n- `table-row-element`: refactor\n- `table-cell-element`: selection bg-brand\n\n### January 21 #18.7\n\n- `table-element`, `table-row-element`: support row dnd and selection\n- `plate-element`: add `blockSelectionClassName` prop\n- `editor`: z-50 for selection area\n- `draggable`: \n - Replace `editor.api.blockSelection.replaceSelectedIds` with `editor.api.blockSelection.clear`\n - Use `TooltipButton` for drag handle\n - Block select on drag handle click\n - Hide drag handle in table cells\n- `column-element`, `table-cell-element`: add `isSelectionAreaVisible` check\n- `block-selection`: hide if dragging\n- Replace `editor.api.blockSelection.addSelectedRow` with `editor.api.blockSelection.set`:\n - `ai-menu`\n - `equation-popover`\n- `align-dropdown-menu`: deprecate \n\n\n### January 18 #18.6\n\n- `inline-equation-element` and `equation-popover`: Fix: When selecting an inline equation, the popover should not open, as it causes the selection to be lost.\n\n### January 17 #18.5\n\n- `emoji-picker-search-bar`: add `autoFocus`\n\n### January 16 #18.4\n\n- `import-toolbar-button` and `export-toolbar-button`: add `markdown` support\n\n### January 14 #18.3\n- `fixed-toolbar-buttons`: add `import-toolbar-button`\n- `indent-fire-marker.tsx` Add `data-plate-prevent-deserialization` to prevent deserialization of the fire marker. Change the `span` tag to `li`.\n- `indent-todo-marker.tsx` change the `span` tag to `li`.\n- `image-element-static.tsx` and `hr-element-static.tsx`: Fix `nodeProps` not being passed to `SlateElement`.\n- `ai-chat-editor`:\n```tsx\nconst aiEditor = usePlateEditor({ plugins });\nuseAIChatEditor(aiEditor, content);\n```\n\n### January 12 #18.2\n\n- `ai-plugins`: remove `createAIEditor`, it's now created in `ai-chat-editor`\n- `ai-chat-editor`: just use `useAIChatEditor` (v42.1)\n- `ai-menu`: avoid collisions, remove `aiEditorRef`\n- `command`: add `focus-visible:outline-none`\n- `editor-static`: update `aiChat` padding\n- `transforms`: fix `insertBlock` used by slash commands: it should insert a new block if the newly inserted block is of the same type as the command.\n- `block-selection-plugins`: update `BlockSelectionPlugin`\n\n```tsx\nBlockSelectionPlugin.configure(({ editor }) => ({\n options: {\n enableContextMenu: true,\n isSelectable: (element, path) => {\n return (\n !['code_line', 'column', 'td'].includes(element.type) &&\n !editor.api.block({ above: true, at: path, match: { type: 'tr' } })\n );\n },\n },\n}))\n```\n\n \n### January 8 #18.1\n\n- v42 migration\n- `table-element`, `table-element-static`\n - Move icons to `table-icons`\n - Remove `colgroup`, col width is now set in `table-cell-element`\n- `table-row-element`: remove `hideBorder` prop\n- `table-cell-element`, `table-cell-element-static`: \n - column hover/resizing state is now using Tailwind instead of JS\n - **Major performance improvement**: all table cells were re-rendering on a single cell change. This is now fixed.\n - React.memo\n- `table-dropdown-menu`:\n - dynamic table insert\n - merge/split cells\n - insert row/col before\n- `tooltip`: add `TooltipButton`\n- `indent-list-toolbar-button`: Remove `IndentListToolbarButton` use `NumberedIndentListToolbarButton` and `BulletedIndentListToolbarButton` instead.\n- `table-dropdown-menu`: new insert table interface.\n- `column-group-element`: fix `ColumnFloatingToolbar` onColumnChange\n\n## December 2024 #17\n\n### December 28 #17.8\n\n- `export-toolbar-button`: add `katex` support\n- `plate-element`: remove `relative` className\n- All components using the `PlateElement` have had redundant `relative` class names removed.\n### December 27 #17.7\n\n- `fixed-toolbar-buttons`: add `font-size-toolbar-button`\n- `floating-toolbar`: add `inline-equation-toolbar-button`\n- `turn-into-dropdown-menu`: Fix: after turn into other block, the editor should regain focus.\n- `insert-dropdown-menu`: add `inline equation` and `equation` & fix the focus issue\n- `slash-input-element`: add `equation` and `inline equation`\n\n### December 23 #17.5\n\n- `table-element`: fix selection\n- before: `isSelectingCell && '[&_*::selection]:bg-none'`\n- after: `isSelectingCell && '[&_*::selection]:!bg-transparent'`\n\n\n### December 21 #17.4\n\nUpdate `tailwind.config.cjs` for better font support in the HTML export:\n\n```ts\nfontFamily: {\n heading: [\n 'var(--font-heading)',\n 'ui-sans-serif',\n '-apple-system',\n 'BlinkMacSystemFont',\n 'Segoe UI Variable Display',\n 'Segoe UI',\n 'Helvetica',\n 'Apple Color Emoji',\n 'Arial',\n 'sans-serif',\n 'Segoe UI Emoji',\n 'Segoe UI Symbol',\n 'Noto Color Emoji',\n ],\n mono: ['var(--font-mono)', ...fontFamily.mono],\n sans: [\n 'var(--font-sans)',\n 'ui-sans-serif',\n '-apple-system',\n 'BlinkMacSystemFont',\n 'Segoe UI Variable Display',\n 'Segoe UI',\n 'Helvetica',\n 'Apple Color Emoji',\n 'Arial',\n 'sans-serif',\n 'Segoe UI Emoji',\n 'Segoe UI Symbol',\n 'Noto Color Emoji',\n ],\n```\n\n\n### December 20 #17.3\n\n- `insertColumnGroup`, `toggleColumnGroup`: use `columns` option instead of `layout` \n- Remove `with-draggables`. Add [`DraggableAboveNodes`](https://github.com/udecode/plate/pull/3878/files#diff-493c12ebed9c3ef9fd8c3a723909b18ad439a448c0132d2d93e5341ee0888ad2) to `draggable`. Add to `DndPlugin` config:\n```tsx\nDndPlugin.configure({ render: { aboveNodes: DraggableAboveNodes } }),\n```\n- `column-element`, `image-element`, `media-video-element`: Remove `useDraggableState`. Use `const { isDragging, previewRef, handleRef } = useDraggable`\n- `column-group-element`: Remove `useColumnState`. Use instead:\n```tsx\nconst columnGroupElement = useElement(ColumnPlugin.key);\n\nconst onColumnChange = (widths: string[]) => {\n setColumns(editor, {\n at: findNodePath(editor, columnGroupElement),\n widths,\n });\n};\n```\n- `export-toolbar-button`: add `exportToHtml`\n\n### December 19 #17.2\n\nPlate 41\n\n- New RSC components for element and leaf components, filename ending with `-static.tsx`. Those are now added along with the default client components.\n- `editor`: add `select-text` to `editorVariants`\n- `date-element`: remove popover when read-only\n- `indent-todo-marker`: use `SlateRenderElementProps` type instead of `PlateRenderElementProps`\n- `hr-element`, `media-audio-element`, `media-embed-element`, `mention-element`: improve cursor styling\n- `media-file-element`: use `` instead of `div` + `onClick`\n- all element and leaf components: `className` prop is now placed before inline prop.\n\n### December 16 #17.1\n\n- `column-element`:\n - Add drag and drop support for columns\n - Add drag handle with tooltip\n - Fix column spacing and padding\n\n- `column-group-element`:\n - Remove gap between columns\n - Remove margin top\n\n- `draggable`:\n - Remove `DraggableProvider` HOC\n - Remove `DropLine` children prop\n\n## November 2024 #16\n\n### November 26 #16.9\n\nhttps://github.com/udecode/plate/pull/3809/files\n- Add `select-editor`, `tag-element`, `label`, `form`\n- Replace `cmdk` dependency with `@udecode/cmdk`. It's a controllable version of `cmdk`.\n- `command`: add variants\n- `editor`: add `select` variant\n- `popover`: add `animate` variant\n\nhttps://github.com/udecode/plate/pull/3807/files\n- `toc-element`: remove `