From 7d2bf480e830ce57daf5f999d71cf3bad7701433 Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 21 Mar 2026 22:51:58 -0700 Subject: [PATCH 1/2] chore: organize PRDs and create architecture docs Move completed PRDs to done/ folder: - job-registry.md (Job Registry implementation) - analytics-job.md (Analytics job implementation) - fix-executor-streaming-output.md (Terminal streaming fix) - fix-prd-execution-failures.md (Double rate-limit detection) - open-source-readiness.md (OSS community files) - remove-legacy-personas-and-filesystem-prd.md (Dead code cleanup) - prd-provider-schedule-overrides.md (Provider schedule overrides) - provider-agnostic-instructions.md (Instructions directory refactoring) - night-watch/provider-aware-queue.md (Per-bucket concurrency) Delete obsolete PRD: - refactor-interaction-listener.md (packages/slack/ removed in commit 46637a0) Add architecture docs: - docs/architecture/job-registry.md (Job registry architecture) - docs/architecture/analytics-job.md (Analytics job architecture) Co-Authored-By: Claude Opus 4.6 --- docs/PRDs/{ => done}/analytics-job.md | 0 .../fix-executor-streaming-output.md | 25 +- .../{ => done}/fix-prd-execution-failures.md | 14 +- docs/PRDs/{ => done}/job-registry.md | 37 +- docs/PRDs/{ => done}/open-source-readiness.md | 20 + .../prd-provider-schedule-overrides.md | 0 .../provider-agnostic-instructions.md | 0 ...move-legacy-personas-and-filesystem-prd.md | 0 .../{ => done}/provider-aware-queue.md | 0 docs/PRDs/pr-merge-keeper.md | 425 ++++++++++++++++++ docs/PRDs/refactor-interaction-listener.md | 285 ------------ docs/architecture/analytics-job.md | 169 +++++++ docs/architecture/job-registry.md | 212 +++++++++ 13 files changed, 883 insertions(+), 304 deletions(-) rename docs/PRDs/{ => done}/analytics-job.md (100%) rename docs/PRDs/{ => done}/fix-executor-streaming-output.md (90%) rename docs/PRDs/{ => done}/fix-prd-execution-failures.md (97%) rename docs/PRDs/{ => done}/job-registry.md (95%) rename docs/PRDs/{ => done}/open-source-readiness.md (99%) rename docs/PRDs/{ => done}/prd-provider-schedule-overrides.md (100%) rename docs/PRDs/{ => done}/provider-agnostic-instructions.md (100%) rename docs/PRDs/{ => done}/remove-legacy-personas-and-filesystem-prd.md (100%) rename docs/PRDs/night-watch/{ => done}/provider-aware-queue.md (100%) create mode 100644 docs/PRDs/pr-merge-keeper.md delete mode 100644 docs/PRDs/refactor-interaction-listener.md create mode 100644 docs/architecture/analytics-job.md create mode 100644 docs/architecture/job-registry.md diff --git a/docs/PRDs/analytics-job.md b/docs/PRDs/done/analytics-job.md similarity index 100% rename from docs/PRDs/analytics-job.md rename to docs/PRDs/done/analytics-job.md diff --git a/docs/PRDs/fix-executor-streaming-output.md b/docs/PRDs/done/fix-executor-streaming-output.md similarity index 90% rename from docs/PRDs/fix-executor-streaming-output.md rename to docs/PRDs/done/fix-executor-streaming-output.md index 221ca2cb..e1754938 100644 --- a/docs/PRDs/fix-executor-streaming-output.md +++ b/docs/PRDs/done/fix-executor-streaming-output.md @@ -9,6 +9,7 @@ **Problem:** When the executor launches `claude -p`, it logs "output will stream below" but no output actually streams to the terminal — all output is silently redirected to the log file via `>> "${LOG_FILE}" 2>&1`. **Files Analyzed:** + - `scripts/night-watch-helpers.sh` — `log()` function (writes ONLY to file) - `scripts/night-watch-cron.sh` — provider dispatch (lines 545-577, 624-637) - `scripts/night-watch-audit-cron.sh` — provider dispatch (lines 163-189) @@ -17,6 +18,7 @@ - `packages/core/src/utils/shell.ts` — `executeScriptWithOutput()` (already streams child stdout/stderr to terminal) **Current Behavior:** + - `log()` writes ONLY to `LOG_FILE` (`echo ... >> "${log_file}"`) — not to stdout or stderr - Provider commands redirect ALL output to file: `claude -p ... >> "${LOG_FILE}" 2>&1` - Node's `executeScriptWithOutput()` listens on the bash child's stdout/stderr pipes but receives nothing because the bash script sends everything to the file @@ -25,11 +27,13 @@ ## 2. Solution **Approach:** + - Replace `>> "${LOG_FILE}" 2>&1` with `2>&1 | tee -a "${LOG_FILE}"` for provider dispatch — output goes to both the log file AND stdout (which propagates through Node's pipe to the terminal) - Modify `log()` to also write to stderr so diagnostic messages are visible in the terminal during interactive `night-watch run` - All scripts already use `set -euo pipefail`, so pipe exit codes propagate correctly (if `claude` fails with code 1 and `tee` succeeds with 0, pipefail returns 1) **Key Decisions:** + - `tee -a` (append mode) preserves the existing log file behavior - Provider output goes to stdout via tee; diagnostic messages go to stderr via log — keeps them on separate channels - No changes to `executeScriptWithOutput()` needed — it already streams both pipes to the terminal @@ -41,6 +45,7 @@ ### Phase 1: Fix `log()` to also write to stderr + use `tee` for provider output **Files (5):** + - `scripts/night-watch-helpers.sh` — make `log()` also write to stderr - `scripts/night-watch-cron.sh` — replace `>> "${LOG_FILE}" 2>&1` with `2>&1 | tee -a "${LOG_FILE}"` (3 occurrences: main dispatch, codex dispatch, fallback) - `scripts/night-watch-audit-cron.sh` — same replacement (2 occurrences) @@ -50,6 +55,7 @@ **Implementation:** - [ ] In `night-watch-helpers.sh`, modify `log()` to also echo to stderr: + ```bash log() { local log_file="${LOG_FILE:?LOG_FILE not set}" @@ -82,6 +88,7 @@ **Pattern for each replacement:** Before: + ```bash if ( cd "${WORKTREE_DIR}" && timeout "${SESSION_MAX_RUNTIME}" \ @@ -92,6 +99,7 @@ if ( ``` After: + ```bash if ( cd "${WORKTREE_DIR}" && timeout "${SESSION_MAX_RUNTIME}" \ @@ -102,6 +110,7 @@ if ( ``` **Exit code behavior with `pipefail`:** + - All scripts use `set -euo pipefail` (line 2) - If `timeout ... claude` exits 124 (timeout) and `tee` exits 0 → pipe returns 124 ✓ - If `timeout ... claude` exits 1 (failure) and `tee` exits 0 → pipe returns 1 ✓ @@ -109,6 +118,7 @@ if ( - The `if (...); then` construct disables `set -e` for the condition, so non-zero exits are captured correctly **Rate-limit detection still works:** + - `check_rate_limited` greps the LOG_FILE — `tee -a` still writes everything to the file, so this is unchanged **Tests Required:** @@ -120,6 +130,7 @@ if ( | Manual | Smoke test: `bash -n scripts/night-watch-pr-reviewer-cron.sh` | No syntax errors | **User Verification:** + - Action: Run `night-watch run` (or trigger executor) - Expected: Diagnostic log messages AND claude's streaming output visible in the terminal in real time @@ -139,10 +150,10 @@ if ( ## Files to Modify -| File | Change | -|------|--------| -| `scripts/night-watch-helpers.sh` | `log()` also writes to stderr | -| `scripts/night-watch-cron.sh` | 3× replace `>> LOG 2>&1` with `2>&1 \| tee -a LOG` | -| `scripts/night-watch-audit-cron.sh` | 2× same replacement | -| `scripts/night-watch-qa-cron.sh` | 2× same replacement | -| `scripts/night-watch-pr-reviewer-cron.sh` | 2× same replacement | +| File | Change | +| ----------------------------------------- | -------------------------------------------------- | +| `scripts/night-watch-helpers.sh` | `log()` also writes to stderr | +| `scripts/night-watch-cron.sh` | 3× replace `>> LOG 2>&1` with `2>&1 \| tee -a LOG` | +| `scripts/night-watch-audit-cron.sh` | 2× same replacement | +| `scripts/night-watch-qa-cron.sh` | 2× same replacement | +| `scripts/night-watch-pr-reviewer-cron.sh` | 2× same replacement | diff --git a/docs/PRDs/fix-prd-execution-failures.md b/docs/PRDs/done/fix-prd-execution-failures.md similarity index 97% rename from docs/PRDs/fix-prd-execution-failures.md rename to docs/PRDs/done/fix-prd-execution-failures.md index 4a2d154e..289ce044 100644 --- a/docs/PRDs/fix-prd-execution-failures.md +++ b/docs/PRDs/done/fix-prd-execution-failures.md @@ -14,6 +14,7 @@ PRD execution is failing consistently across projects (night-watch-cli, autopilo When the proxy returns 429, the system correctly triggers a native Claude fallback. **However, if native Claude is also rate-limited**, the fallback exits with code 1 and the system records `provider_exit` instead of `rate_limited`. **Evidence from `logs/executor.log`:** + ``` API Error: 429 {"error":{"code":"1308","message":"Usage limit reached for 5 hour..."}} RATE-LIMITED: Proxy quota exhausted — triggering native Claude fallback @@ -24,6 +25,7 @@ FAIL: Night watch exited with code 1 while processing 69-ux-revamp... ``` **Impact:** The system records a `failure` with `reason=provider_exit` instead of `rate_limited`, which: + - Triggers a long cooldown (max_runtime-based) instead of a rate-limit-appropriate retry - Sends misleading failure notifications - Prevents the PRD from being retried once the rate limit resets @@ -33,6 +35,7 @@ FAIL: Night watch exited with code 1 while processing 69-ux-revamp... The function scans `tail -50` of the shared `executor.log` file, but log entries from **previous runs** can bleed into the current run's error detail. **Evidence:** Issue #70's failure detail contains issue #69's error message: + ``` detail=[2026-03-07 00:40:59] [PID:75449] FAIL: Night watch exited with code 1 while processing 69-ux-revamp... ``` @@ -44,6 +47,7 @@ This happens because the log is append-only and `latest_failure_detail()` doesn' In filesystem mode, `code-cleanup-q1-2026.md` was selected and executed despite the work already being merged to master. Claude correctly identified the work was done but didn't create a PR. The cron script then recorded `failure_no_pr_after_success`. **Evidence:** + ``` OUTCOME: exit_code=0 total_elapsed=363s prd=code-cleanup-q1-2026.md WARN: claude exited 0 but no open/merged PR found on night-watch/code-cleanup-q1-2026 @@ -62,6 +66,7 @@ This is a pre-existing filesystem mode issue (stale PRDs not moved to `done/`). After the native Claude fallback runs (line ~626), check if the fallback also hit a rate limit before falling through to the generic failure handler. **Implementation:** + 1. After `RATE_LIMIT_FALLBACK_TRIGGERED` block (lines 603-632), if `EXIT_CODE != 0`, scan fallback output for rate-limit indicators (`"hit your limit"`, `429`, `"Usage limit"`) 2. If detected, set a new flag `DOUBLE_RATE_LIMITED=1` 3. In the outcome handler (lines 711-726), when `DOUBLE_RATE_LIMITED=1`: @@ -72,6 +77,7 @@ After the native Claude fallback runs (line ~626), check if the fallback also hi **Specific changes in `night-watch-cron.sh`:** After line 632 (`fi` closing the fallback block), add: + ```bash # Detect double rate-limit: both proxy AND native Claude exhausted DOUBLE_RATE_LIMITED=0 @@ -84,6 +90,7 @@ fi ``` In the outcome handler, add a new branch before the generic `else` on line 711: + ```bash elif [ "${DOUBLE_RATE_LIMITED}" = "1" ]; then if [ -n "${ISSUE_NUMBER}" ]; then @@ -102,6 +109,7 @@ elif [ "${DOUBLE_RATE_LIMITED}" = "1" ]; then Modify `latest_failure_detail()` to accept an optional `since_line` parameter that filters to only lines written during the current run. **Implementation:** + 1. Change `latest_failure_detail()` (lines 79-92) to accept a second parameter `since_line` 2. Use `tail -n +${since_line}` instead of `tail -50` when `since_line` is provided 3. At the call site (line 712), pass the `LOG_LINE_BEFORE` captured at the start of the current attempt @@ -109,6 +117,7 @@ Modify `latest_failure_detail()` to accept an optional `since_line` parameter th **Specific changes:** Replace `latest_failure_detail()`: + ```bash latest_failure_detail() { local log_file="${1:?log_file required}" @@ -138,6 +147,7 @@ latest_failure_detail() { ``` Update call site at line 712: + ```bash PROVIDER_ERROR_DETAIL=$(latest_failure_detail "${LOG_FILE}" "${LOG_LINE_BEFORE}") ``` @@ -182,6 +192,6 @@ This is already handled — the `code-cleanup-q1-2026.md` issue was a one-time s ## Files to Modify -| File | Change | -|------|--------| +| File | Change | +| ----------------------------- | -------------------------------------------------------------------- | | `scripts/night-watch-cron.sh` | Add double-rate-limit detection, scope failure detail to current run | diff --git a/docs/PRDs/job-registry.md b/docs/PRDs/done/job-registry.md similarity index 95% rename from docs/PRDs/job-registry.md rename to docs/PRDs/done/job-registry.md index dad44ede..700c3ad5 100644 --- a/docs/PRDs/job-registry.md +++ b/docs/PRDs/done/job-registry.md @@ -7,6 +7,7 @@ **Problem:** Adding a new job type (e.g., analytics) requires touching 15+ files across 4 packages — types, constants, config normalization, env parsing, CLI command, server routes, API client, Scheduling UI, Settings UI, schedule templates, and more. Each job's state shape is inconsistent (executor/reviewer use top-level flat fields, qa/audit/analytics use nested config objects, slicer lives inside `roadmapScanner`). **Files Analyzed:** + - `packages/core/src/types.ts` — `JobType`, `IJobProviders`, `INightWatchConfig`, `IQaConfig`, `IAuditConfig`, `IAnalyticsConfig` - `packages/core/src/shared/types.ts` — duplicated type definitions for web contract - `packages/core/src/constants.ts` — `DEFAULT_*` per job, `VALID_JOB_TYPES`, `DEFAULT_QUEUE_PRIORITY`, `LOG_FILE_NAMES` @@ -22,6 +23,7 @@ - `web/store/useStore.ts` — minimal Zustand, no job-specific state **Current Behavior:** + - 6 job types exist: `executor`, `reviewer`, `qa`, `audit`, `slicer`, `analytics` - Executor/reviewer use flat top-level config fields (`cronSchedule`, `reviewerSchedule`, `executorEnabled`, `reviewerEnabled`) - QA/audit/analytics use nested config objects (`config.qa`, `config.audit`, `config.analytics`) with common shape: `{ enabled, schedule, maxRuntime, ...extras }` @@ -32,6 +34,7 @@ ## 2. Solution **Approach:** + 1. Create a **Job Registry** in `packages/core/src/jobs/` that defines each job's metadata, defaults, config access patterns, and env parsing rules in a single object 2. Extract a **`IBaseJobConfig`** interface (`{ enabled, schedule, maxRuntime }`) that all job configs extend 3. Replace per-job boilerplate in `config-normalize.ts` and `config-env.ts` with generic registry-driven loops @@ -39,6 +42,7 @@ 5. Add a **Zustand `jobs` slice** that provides computed job state derived from `status.config` so components don't need to know each job's config shape **Architecture Diagram:** + ```mermaid flowchart TB subgraph Core["@night-watch/core"] @@ -63,12 +67,14 @@ flowchart TB ``` **Key Decisions:** + - **Migrate executor/reviewer to nested config**: All jobs will use `config.jobs.{id}: { enabled, schedule, maxRuntime, ...extras }`. Auto-detect legacy flat format and migrate on load. This is a breaking config change but gives uniform access patterns. - **Registry is a const array, not DI**: Simple, testable, no runtime overhead - **Web job registry stores React components directly** for icons (type-safe, tree-shakeable) - **Generic `triggerJob(jobId)`** replaces per-job `triggerRun()`, `triggerReview()` etc. (keep old functions as thin wrappers for backward compat) **Data Changes:** + - `INightWatchConfig` gains `jobs: Record` — replaces flat executor/reviewer fields and nested qa/audit/analytics objects - Legacy flat fields (`cronSchedule`, `reviewerSchedule`, `executorEnabled`, `reviewerEnabled`) and nested objects (`qa`, `audit`, `analytics`, `roadmapScanner.slicerSchedule`) auto-detected and migrated on config load - Config file rewritten in new format on first save after migration @@ -100,6 +106,7 @@ sequenceDiagram **User-visible outcome:** Job registry exists and is the single source of truth for job metadata. All constants derived from it. Tests prove registry drives normalization. **Files (5):** + - `packages/core/src/jobs/job-registry.ts` — **NEW** — `IJobDefinition` interface + `JOB_REGISTRY` array + accessor utilities - `packages/core/src/jobs/index.ts` — **NEW** — barrel exports - `packages/core/src/types.ts` — add `IBaseJobConfig` interface @@ -110,21 +117,22 @@ sequenceDiagram - [ ] Define `IBaseJobConfig` interface: `{ enabled: boolean; schedule: string; maxRuntime: number }` - [ ] Define `IJobDefinition` interface with: + ```typescript interface IJobDefinition { id: JobType; - name: string; // "Executor", "QA", "Auditor" - description: string; // "Creates implementation PRs from PRDs" - cliCommand: string; // "run", "review", "qa", "audit", "planner", "analytics" - logName: string; // "executor", "reviewer", "night-watch-qa", etc. - lockSuffix: string; // ".lock", "-r.lock", "-qa.lock", etc. - queuePriority: number; // 50, 40, 30, 20, 10 + name: string; // "Executor", "QA", "Auditor" + description: string; // "Creates implementation PRs from PRDs" + cliCommand: string; // "run", "review", "qa", "audit", "planner", "analytics" + logName: string; // "executor", "reviewer", "night-watch-qa", etc. + lockSuffix: string; // ".lock", "-r.lock", "-qa.lock", etc. + queuePriority: number; // 50, 40, 30, 20, 10 // Env var prefix for NW_* overrides - envPrefix: string; // "NW_EXECUTOR", "NW_QA", "NW_AUDIT", etc. + envPrefix: string; // "NW_EXECUTOR", "NW_QA", "NW_AUDIT", etc. // Extra config field normalizers (beyond enabled/schedule/maxRuntime) - extraFields?: IExtraFieldDef[]; // e.g., QA's branchPatterns, artifacts, etc. + extraFields?: IExtraFieldDef[]; // e.g., QA's branchPatterns, artifacts, etc. // Defaults defaultConfig: TConfig; @@ -133,6 +141,7 @@ sequenceDiagram migrateLegacy?: (raw: Record) => Partial | undefined; } ``` + - [ ] Create `JOB_REGISTRY` const array with entries for all 6 job types - [ ] Create utility functions: `getJobDef(id)`, `getAllJobDefs()`, `getJobDefByCommand(cmd)` - [ ] Derive `VALID_JOB_TYPES`, `DEFAULT_QUEUE_PRIORITY`, `LOG_FILE_NAMES` from registry (keep exports stable) @@ -154,6 +163,7 @@ sequenceDiagram **User-visible outcome:** `config-normalize.ts` and `config-env.ts` use generic loops. Legacy flat config auto-migrated. Adding a job config section no longer requires per-job blocks. **Files (5):** + - `packages/core/src/jobs/job-registry.ts` — add `normalizeJobConfig()` and `buildJobEnvOverrides()` generic helpers - `packages/core/src/config-normalize.ts` — replace per-job normalization blocks with registry loop + legacy migration - `packages/core/src/config-env.ts` — replace per-job env blocks with registry loop @@ -161,6 +171,7 @@ sequenceDiagram - `packages/core/src/__tests__/config-normalize.test.ts` — verify normalization + migration **Implementation:** + - [ ] Add `normalizeJobConfig(rawConfig, jobDef)` that reads raw object, applies defaults, validates fields - [ ] Each `IJobDefinition` declares `extraFields` for job-specific fields beyond `{ enabled, schedule, maxRuntime }` - [ ] Add `migrateLegacyConfig(raw)` that detects old format (e.g., `cronSchedule` exists at top level) and transforms to new `jobs: { executor: { ... }, ... }` shape @@ -184,17 +195,19 @@ sequenceDiagram **User-visible outcome:** Scheduling and Settings pages read job definitions from a registry instead of hardcoded arrays. Zustand provides computed job state. **Files (4):** + - `web/utils/jobs.ts` — **NEW** — Web-side job registry with UI metadata - `web/store/useStore.ts` — add `jobs` computed slice derived from `status.config` - `web/api.ts` — add generic `triggerJob(jobId)` function - `web/utils/cron.ts` — derive schedule template keys from registry **Implementation:** + - [ ] Create `IWebJobDefinition` extending core `IJobDefinition` with UI fields: ```typescript interface IWebJobDefinition extends IJobDefinition { - icon: string; // lucide icon component name - triggerEndpoint: string; // '/api/actions/qa' + icon: string; // lucide icon component name + triggerEndpoint: string; // '/api/actions/qa' scheduleTemplateKey: string; // key in IScheduleTemplate.schedules settingsSection?: 'general' | 'advanced'; // where in Settings to show } @@ -223,11 +236,13 @@ sequenceDiagram **User-visible outcome:** Scheduling page renders job cards from the registry. Adding a new job automatically shows it in Scheduling. **Files (3):** + - `web/pages/Scheduling.tsx` — replace hardcoded `agents` array with registry-driven rendering - `web/components/scheduling/ScheduleConfig.tsx` — use registry for form fields - `web/utils/cron.ts` — update `IScheduleTemplate` to be extensible **Implementation:** + - [ ] Replace the hardcoded `agents: IAgentInfo[]` array with `WEB_JOB_REGISTRY.map(job => ...)` - [ ] Replace `handleJobToggle` if/else chain with generic `job.buildEnabledPatch(enabled, config)` - [ ] Replace `handleTriggerJob` map with generic `triggerJob(job.id)` @@ -247,11 +262,13 @@ sequenceDiagram **User-visible outcome:** Settings page job config sections rendered from registry. Adding a new job auto-shows its settings. **Files (3):** + - `web/pages/Settings.tsx` — replace per-job settings JSX blocks with registry loop - `web/components/dashboard/AgentStatusBar.tsx` — use registry for process status - `packages/server/src/routes/action.routes.ts` — generate routes from registry **Implementation:** + - [ ] Settings: iterate `WEB_JOB_REGISTRY` to render job config sections - [ ] Each `IWebJobDefinition` can declare its settings fields: `settingsFields: ISettingsField[]` - [ ] `AgentStatusBar`: derive process list from registry instead of hardcoded diff --git a/docs/PRDs/open-source-readiness.md b/docs/PRDs/done/open-source-readiness.md similarity index 99% rename from docs/PRDs/open-source-readiness.md rename to docs/PRDs/done/open-source-readiness.md index f451d5cc..1bfa98b8 100644 --- a/docs/PRDs/open-source-readiness.md +++ b/docs/PRDs/done/open-source-readiness.md @@ -7,6 +7,7 @@ **Problem:** The project is missing standard OSS community files and has several bugs in README/CI that would hurt a public launch. **Files Analyzed:** + - `README.md` — install command wrong, broken doc link, From Source uses npm not yarn - `.github/workflows/ci.yml` — targets `main` but default branch is `master` - `docs/contributing.md` — exists but not auto-discovered by GitHub (needs root copy) @@ -15,6 +16,7 @@ - No `CONTRIBUTING.md`, `CODE_OF_CONDUCT.md`, `SECURITY.md`, `CHANGELOG.md` at root **Current Behavior:** + - `npm install -g night-watch-cli` installs wrong/nonexistent package (real npm package is `@jonit-dev/night-watch-cli`) - `npm` badge links to wrong package name - CI push trigger never fires (wrong branch `main` vs `master`) @@ -25,6 +27,7 @@ ## 2. Solution **Approach:** + - Fix README bugs (install command, badge, broken link, From Source yarn command) - Fix CI workflow branch targets - Create community files: CONTRIBUTING.md, CODE_OF_CONDUCT.md, SECURITY.md, CHANGELOG.md @@ -32,6 +35,7 @@ - Clean up web/README.md **Key Decisions:** + - CONTRIBUTING.md goes in `.github/` so GitHub auto-links it; also keep `docs/contributing.md` unchanged - Use Contributor Covenant v2.1 for CODE_OF_CONDUCT.md - CHANGELOG.md starts from current version with a brief history @@ -44,6 +48,7 @@ ### Phase 1: Fix README and CI bugs **Files (4):** + - `README.md` — fix install command, badge, broken link, From Source yarn - `.github/workflows/ci.yml` — fix branch target main → master @@ -79,6 +84,7 @@ | Manual: `docs/architecture-overview.md` exists | File accessible | **Verification:** + - `yarn verify` passes (no TS/lint changes, but ensures nothing broken) --- @@ -86,6 +92,7 @@ ### Phase 2: Community files — CONTRIBUTING, CODE_OF_CONDUCT, SECURITY **Files (3):** + - `.github/CONTRIBUTING.md` — auto-discovered by GitHub - `CODE_OF_CONDUCT.md` — project root - `SECURITY.md` — project root @@ -99,6 +106,7 @@ - [ ] Create `SECURITY.md` at project root — vulnerability reporting via GitHub private security advisory **Content for `.github/CONTRIBUTING.md`:** + ```markdown # Contributing to Night Watch CLI @@ -113,6 +121,7 @@ Thank you for your interest in contributing! ## Reporting Bugs Open an issue using the **Bug Report** template. Include: + - Night Watch version (`night-watch --version`) - OS and Node.js version - Your `night-watch.config.json` (redact any API keys or tokens) @@ -121,6 +130,7 @@ Open an issue using the **Bug Report** template. Include: ## Requesting Features Open an issue using the **Feature Request** template. Describe: + - The problem you're solving - Your proposed solution - Alternatives you considered @@ -145,6 +155,7 @@ This project follows the [Contributor Covenant Code of Conduct](../CODE_OF_CONDU **Content for `CODE_OF_CONDUCT.md`:** Use Contributor Covenant v2.1 standard text with contact email placeholder. **Content for `SECURITY.md`:** + ```markdown # Security Policy @@ -162,6 +173,7 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf ``` **Verification:** + - `yarn verify` passes - GitHub shows CONTRIBUTING link on Issues/PRs page after pushing @@ -170,6 +182,7 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf ### Phase 3: GitHub templates and CHANGELOG **Files (4):** + - `.github/ISSUE_TEMPLATE/bug_report.md` - `.github/ISSUE_TEMPLATE/feature_request.md` - `.github/pull_request_template.md` @@ -183,6 +196,7 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf - [ ] Create `CHANGELOG.md` starting from current version `1.7.94` with brief history **Bug report template fields:** + - Night Watch version - OS + Node.js version - Config (redacted) @@ -191,12 +205,14 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf - Relevant logs **Feature request template fields:** + - Problem statement - Proposed solution - Alternatives considered - Additional context **PR template fields:** + - Summary of changes - Related issue (closes #) - Type of change (bug fix / feature / docs / refactor) @@ -205,6 +221,7 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf **CHANGELOG.md:** Start with current version block, note it covers changes from initial public release. Keep it brief — one block per recent milestone, not per patch. **Verification:** + - `yarn verify` passes - New issue on GitHub shows template selector - New PR on GitHub shows template pre-filled @@ -214,12 +231,15 @@ You should expect an acknowledgement within 48 hours. If a vulnerability is conf ### Phase 4: Cleanup web/README.md **Files (1):** + - `web/README.md` **Implementation:** + - [ ] Replace Google AI Studio boilerplate content with a brief description of the Night Watch web dashboard: what it is, how to start it (`yarn dev` from `web/`), and a link to `docs/WEB-UI.md` for full docs. **Verification:** + - `yarn verify` passes - `web/README.md` no longer references Google AI Studio or unrelated tooling diff --git a/docs/PRDs/prd-provider-schedule-overrides.md b/docs/PRDs/done/prd-provider-schedule-overrides.md similarity index 100% rename from docs/PRDs/prd-provider-schedule-overrides.md rename to docs/PRDs/done/prd-provider-schedule-overrides.md diff --git a/docs/PRDs/provider-agnostic-instructions.md b/docs/PRDs/done/provider-agnostic-instructions.md similarity index 100% rename from docs/PRDs/provider-agnostic-instructions.md rename to docs/PRDs/done/provider-agnostic-instructions.md diff --git a/docs/PRDs/remove-legacy-personas-and-filesystem-prd.md b/docs/PRDs/done/remove-legacy-personas-and-filesystem-prd.md similarity index 100% rename from docs/PRDs/remove-legacy-personas-and-filesystem-prd.md rename to docs/PRDs/done/remove-legacy-personas-and-filesystem-prd.md diff --git a/docs/PRDs/night-watch/provider-aware-queue.md b/docs/PRDs/night-watch/done/provider-aware-queue.md similarity index 100% rename from docs/PRDs/night-watch/provider-aware-queue.md rename to docs/PRDs/night-watch/done/provider-aware-queue.md diff --git a/docs/PRDs/pr-merge-keeper.md b/docs/PRDs/pr-merge-keeper.md new file mode 100644 index 00000000..3504306c --- /dev/null +++ b/docs/PRDs/pr-merge-keeper.md @@ -0,0 +1,425 @@ +# PRD: PR Merge Keeper — Automated Conflict Resolution & Ready-to-Merge Management + +**Complexity: 8 → HIGH mode** (+3 touches 10+ files, +2 new module from scratch, +2 multi-package, +1 external API integration) + +## 1. Context + +**Problem:** Open PRs drift out of date as the default branch evolves. Merge conflicts accumulate, review suggestions go unaddressed, and PRs stall. Currently, there is no automated job to keep PRs rebased, resolve conflicts, address review feedback, and label PRs as ready-to-merge. + +**Files Analyzed:** + +- `packages/core/src/jobs/job-registry.ts` — existing job definitions (6 types) +- `packages/core/src/types.ts` — `JobType`, `INightWatchConfig`, `IJobProviders`, `NotificationEvent` +- `packages/core/src/constants.ts` — defaults, queue priority +- `packages/core/src/utils/github.ts` — `gh` CLI wrappers for PR operations +- `packages/core/src/utils/git-utils.ts` — branch detection, timestamps +- `packages/core/src/utils/worktree-manager.ts` — worktree creation/cleanup +- `packages/cli/src/commands/review.ts` — reviewer pattern (closest analogue) +- `packages/cli/src/commands/shared/env-builder.ts` — shared env building +- `packages/cli/scripts/night-watch-helpers.sh` — `build_provider_cmd()`, `ensure_provider_on_path()` +- `packages/cli/scripts/night-watch-pr-reviewer-cron.sh` — reviewer bash script pattern + +**Current Behavior:** + +- PRs accumulate merge conflicts silently; developers must manually rebase +- Review suggestions (GitHub review comments) are not automatically addressed +- No label/signal to indicate a PR is conflict-free and review-complete +- The reviewer job reviews PRs and posts comments but does not resolve conflicts or address review feedback +- Auto-merge exists but only triggers after reviewer score threshold; no conflict resolution step + +## 2. Solution + +**Approach:** + +1. Register a new **`merger`** job type in the job registry with its own cron schedule (3x daily), CLI command, lock file, and queue priority +2. The job iterates **all open PRs** in the repo (not just night-watch branches), checking each for: (a) merge conflicts with default branch, (b) unresolved review comments/suggestions +3. For **merge conflicts**: check out the PR branch in a worktree, attempt `git rebase` on default branch. If git auto-merge fails, invoke the configured AI provider to resolve conflicts intelligently, then force-push the rebased branch +4. For **unresolved review comments**: use the AI provider to read GitHub review comments, implement the suggested changes, commit, and push — then request re-review from the original reviewer +5. When a PR is conflict-free and has no unresolved review threads, add a **`ready-to-merge`** GitHub label + +**Architecture Diagram:** + +```mermaid +flowchart TB + subgraph CLI["night-watch merge"] + CMD[merge command] --> ENV[buildEnvVars] + ENV --> SCRIPT[night-watch-merger-cron.sh] + end + + subgraph Script["Bash Script"] + SCRIPT --> FETCH[gh pr list --state open] + FETCH --> LOOP[For each PR] + LOOP --> CONFLICT{Has conflicts?} + CONFLICT -->|Yes| REBASE[git rebase default branch] + REBASE --> AUTO{Auto-resolved?} + AUTO -->|No| AI_RESOLVE[AI provider resolves conflicts] + AUTO -->|Yes| PUSH[force-push rebased branch] + AI_RESOLVE --> PUSH + CONFLICT -->|No| REVIEW{Unresolved reviews?} + PUSH --> REVIEW + REVIEW -->|Yes| AI_FIX[AI provider implements suggestions] + AI_FIX --> RE_REVIEW[Request re-review] + REVIEW -->|No| LABEL[Add ready-to-merge label] + end + + subgraph Core["@night-watch/core"] + REG[Job Registry] --> CMD + CONFIG[INightWatchConfig] --> CMD + end +``` + +**Key Decisions:** + +- **Uses configured AI provider** via `build_provider_cmd()` (same as all other jobs) — respects presets, schedule overrides, fallback chains +- **Scope: all open PRs** — not limited to night-watch branches; configurable via `branchPatterns` extra field if user wants to narrow scope +- **Force-push after rebase** — necessary for rebased branches; the job only force-pushes branches it has actively rebased (never the default branch) +- **`ready-to-merge` label** — added when PR is conflict-free + all review threads resolved; removed if new conflicts/reviews appear +- **Bash script pattern** — follows the same `night-watch-*-cron.sh` pattern as all other jobs for consistency +- **Review comment resolution** — AI reads review comments via `gh pr view --comments` and implements changes; requests re-review after push + +**Data Changes:** + +- `JobType` union gains `'merger'` value +- `IJobProviders` gains optional `merger?: Provider` +- New `IMergerConfig` interface extending `IBaseJobConfig` with extra fields +- New notification events: `merger_completed`, `merger_conflict_resolved`, `merger_failed` +- Job registry gains `merger` entry + +## 3. Sequence Flow + +```mermaid +sequenceDiagram + participant Cron as Cron / CLI + participant Script as merger-cron.sh + participant GH as GitHub (gh CLI) + participant AI as AI Provider + participant Git as Git + + Cron->>Script: night-watch merge + Script->>GH: gh pr list --state open --json ... + GH-->>Script: [PR1, PR2, ...] + + loop Each PR + Script->>GH: gh pr view PR --json mergeable,reviewThreads + alt Has merge conflicts + Script->>Git: git worktree add + git rebase + alt Rebase succeeds (auto-merge) + Script->>Git: git push --force-with-lease + else Rebase fails (conflicts) + Script->>AI: "Resolve merge conflicts in these files: ..." + AI-->>Script: Conflict resolution applied + Script->>Git: git add + git rebase --continue + Script->>Git: git push --force-with-lease + end + end + + alt Has unresolved review comments + Script->>GH: gh api /repos/.../pulls/N/reviews + Script->>AI: "Implement these review suggestions: ..." + AI-->>Script: Changes applied + Script->>Git: git commit + git push + Script->>GH: gh pr edit --add-reviewer (request re-review) + end + + alt Conflict-free + no unresolved reviews + Script->>GH: gh pr edit --add-label ready-to-merge + Script->>Script: log "PR #N is ready to merge" + else Still has issues + Script->>GH: gh pr edit --remove-label ready-to-merge + end + end + + Script-->>Cron: emit_result with summary +``` + +## 4. Execution Phases + +### Phase 1: Core Registration — Job Type & Config + +**User-visible outcome:** `merger` job type exists in the registry, config normalizes correctly, and `night-watch merge --dry-run` shows configuration. + +**Files (5):** + +- `packages/core/src/types.ts` — add `'merger'` to `JobType`, `IMergerConfig` interface, new notification events, `merger` to `IJobProviders` +- `packages/core/src/jobs/job-registry.ts` — add `merger` entry to `JOB_REGISTRY` +- `packages/core/src/constants.ts` — add `DEFAULT_MERGER_*` constants +- `packages/core/src/config.ts` — wire `merger` config normalization (follows existing pattern) +- `packages/core/src/index.ts` — export new types if needed + +**Implementation:** + +- [ ] Add `'merger'` to the `JobType` union type +- [ ] Add `merger?: Provider` to `IJobProviders` +- [ ] Define `IMergerConfig` extending `IBaseJobConfig`: + ```typescript + interface IMergerConfig extends IBaseJobConfig { + /** Branch patterns to match (empty = all open PRs) */ + branchPatterns: string[]; + /** Max PRs to process per run (0 = unlimited) */ + maxPrsPerRun: number; + /** Max runtime per individual PR in seconds */ + perPrTimeout: number; + /** Whether to attempt AI conflict resolution (vs skip conflicted PRs) */ + aiConflictResolution: boolean; + /** Whether to attempt AI review comment resolution */ + aiReviewResolution: boolean; + /** Label to add when PR is ready to merge */ + readyLabel: string; + } + ``` +- [ ] Add default constants: + ```typescript + DEFAULT_MERGER_ENABLED = true; + DEFAULT_MERGER_SCHEDULE = '15 6,14,22 * * *'; // 3x daily: 6:15, 14:15, 22:15 + DEFAULT_MERGER_MAX_RUNTIME = 3600; // 1 hour + DEFAULT_MERGER_MAX_PRS_PER_RUN = 0; // unlimited + DEFAULT_MERGER_PER_PR_TIMEOUT = 600; // 10 min per PR + DEFAULT_MERGER_AI_CONFLICT_RESOLUTION = true; + DEFAULT_MERGER_AI_REVIEW_RESOLUTION = true; + DEFAULT_MERGER_READY_LABEL = 'ready-to-merge'; + ``` +- [ ] Add `merger` entry to `JOB_REGISTRY`: + ```typescript + { + id: 'merger', + name: 'Merge Keeper', + description: 'Resolves merge conflicts, addresses review comments, and labels PRs ready-to-merge', + cliCommand: 'merge', + logName: 'merger', + lockSuffix: '-merger.lock', + queuePriority: 35, // between reviewer (40) and slicer (30) + envPrefix: 'NW_MERGER', + extraFields: [ + { name: 'branchPatterns', type: 'string[]', defaultValue: [] }, + { name: 'maxPrsPerRun', type: 'number', defaultValue: 0 }, + { name: 'perPrTimeout', type: 'number', defaultValue: 600 }, + { name: 'aiConflictResolution', type: 'boolean', defaultValue: true }, + { name: 'aiReviewResolution', type: 'boolean', defaultValue: true }, + { name: 'readyLabel', type: 'string', defaultValue: 'ready-to-merge' }, + ], + defaultConfig: { enabled: true, schedule: '15 6,14,22 * * *', maxRuntime: 3600, ... } + } + ``` +- [ ] Add `'merger_completed' | 'merger_conflict_resolved' | 'merger_failed'` to `NotificationEvent` union + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/core/src/__tests__/jobs/job-registry.test.ts` | `should include merger in job registry` | `expect(getJobDef('merger')).toBeDefined()` | +| `packages/core/src/__tests__/jobs/job-registry.test.ts` | `merger has correct defaults` | schedule, maxRuntime, queuePriority checks | +| `packages/core/src/__tests__/jobs/job-registry.test.ts` | `normalizeJobConfig handles merger extra fields` | all extra fields normalized with defaults | + +**Verification Plan:** + +1. **Unit Tests:** Registry lookup, config normalization for merger +2. **Evidence:** `yarn verify` passes, `yarn test` passes + +--- + +### Phase 2: CLI Command — `night-watch merge` + +**User-visible outcome:** Running `night-watch merge --dry-run` displays merger configuration, open PRs status, and provider info. Running `night-watch merge` executes the bash script. + +**Files (4):** + +- `packages/cli/src/commands/merge.ts` — **NEW** — CLI command implementation (follows review.ts pattern) +- `packages/cli/src/cli.ts` — register `mergeCommand` +- `packages/cli/src/commands/shared/env-builder.ts` — no changes needed (generic `buildBaseEnvVars` handles new job type) +- `packages/cli/src/commands/install.ts` — add `--no-merger` / `--merger` flags + +**Implementation:** + +- [ ] Create `merge.ts` following the reviewer command pattern: + - `IMergeOptions`: `{ dryRun, timeout, provider }` + - `buildEnvVars(config, options)`: calls `buildBaseEnvVars(config, 'merger', options.dryRun)` + merger-specific env vars: + - `NW_MERGER_MAX_RUNTIME` + - `NW_MERGER_MAX_PRS_PER_RUN` + - `NW_MERGER_PER_PR_TIMEOUT` + - `NW_MERGER_AI_CONFLICT_RESOLUTION` + - `NW_MERGER_AI_REVIEW_RESOLUTION` + - `NW_MERGER_READY_LABEL` + - `NW_MERGER_BRANCH_PATTERNS` + - `applyCliOverrides(config, options)`: timeout + provider overrides + - Dry-run mode: show config table, list open PRs with conflict/review status, env vars, command + - Execute mode: spinner + `executeScriptWithOutput` calling `night-watch-merger-cron.sh` + - Notification sending after completion +- [ ] Register in `cli.ts`: `mergeCommand(program)` +- [ ] Add `--no-merger` / `--merger` flags to install command for cron schedule control + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/merge.test.ts` | `buildEnvVars includes merger-specific vars` | env var keys present | +| `packages/cli/src/__tests__/commands/merge.test.ts` | `applyCliOverrides applies timeout override` | config mutated | + +**Verification Plan:** + +1. **Unit Tests:** env var building, config overrides +2. **Manual:** `night-watch merge --dry-run` outputs valid config +3. **Evidence:** `yarn verify` passes + +--- + +### Phase 3: Bash Script — Core Merger Logic + +**User-visible outcome:** `night-watch merge` iterates open PRs, detects conflicts, attempts git rebase, and invokes AI provider for unresolvable conflicts. Adds/removes `ready-to-merge` label. + +**Files (2):** + +- `packages/cli/scripts/night-watch-merger-cron.sh` — **NEW** — main merger bash script +- `packages/cli/scripts/night-watch-helpers.sh` — add any shared helper functions if needed (likely none) + +**Implementation:** + +- [ ] Script structure (following reviewer pattern): + ```bash + #!/usr/bin/env bash + set -euo pipefail + # Usage: night-watch-merger-cron.sh /path/to/project + ``` +- [ ] Parse env vars: + - `NW_MERGER_MAX_RUNTIME`, `NW_MERGER_MAX_PRS_PER_RUN`, `NW_MERGER_PER_PR_TIMEOUT` + - `NW_MERGER_AI_CONFLICT_RESOLUTION`, `NW_MERGER_AI_REVIEW_RESOLUTION` + - `NW_MERGER_READY_LABEL`, `NW_MERGER_BRANCH_PATTERNS` + - Standard provider vars via `NW_PROVIDER_CMD`, etc. +- [ ] Source `night-watch-helpers.sh` for `build_provider_cmd`, `log`, `emit_result`, `acquire_lock`, `release_lock`, `rotate_log`, `ensure_provider_on_path` +- [ ] Lock file acquisition: `/tmp/night-watch-merger-${PROJECT_RUNTIME_KEY}.lock` +- [ ] **PR Discovery:** + ```bash + gh pr list --state open --json number,title,headRefName,mergeable,reviewDecision,statusCheckRollup + ``` + + - Filter by `NW_MERGER_BRANCH_PATTERNS` if set (comma-separated) + - Respect `NW_MERGER_MAX_PRS_PER_RUN` +- [ ] **Per-PR processing loop:** + 1. **Conflict detection:** Check `mergeable` status from `gh pr view` + 2. **Rebase attempt:** + - Create worktree on the PR branch via `prepare_branch_worktree` or manual `git worktree add` + - `git fetch origin ${DEFAULT_BRANCH}` then `git rebase origin/${DEFAULT_BRANCH}` + - If rebase succeeds cleanly: `git push --force-with-lease origin ${BRANCH}` + - If rebase fails with conflicts and `NW_MERGER_AI_CONFLICT_RESOLUTION=1`: + - Abort rebase: `git rebase --abort` + - Build AI prompt: "You are in a git repository. The branch `{branch}` has merge conflicts with `{default_branch}`. Please rebase this branch onto `origin/{default_branch}` and resolve all merge conflicts. After resolving, ensure the code compiles and tests pass. Use `git rebase origin/{default_branch}` and resolve conflicts, then `git push --force-with-lease origin {branch}`." + - Invoke AI via `build_provider_cmd` + `timeout` + - If rebase fails and AI resolution is disabled: skip PR, log warning + 3. **Review comment resolution (if `NW_MERGER_AI_REVIEW_RESOLUTION=1`):** + - Check for unresolved review threads: `gh api repos/{owner}/{repo}/pulls/{number}/reviews` + - If unresolved threads exist: + - Build AI prompt: "You are in a git repository on branch `{branch}`. This PR has unresolved review comments from GitHub reviewers. Please read the review comments using `gh pr view {number} --comments`, understand the requested changes, implement them, commit with a descriptive message, and push. After pushing, the reviewers will be automatically notified." + - Invoke AI via `build_provider_cmd` + `timeout` + 4. **Ready-to-merge labeling:** + - After processing, re-check: `gh pr view {number} --json mergeable,reviewThreads` + - If conflict-free AND no unresolved review threads: + - `gh pr edit {number} --add-label ${READY_LABEL}` + - Log: "PR #{number} marked as ready-to-merge" + - Else: + - `gh pr edit {number} --remove-label ${READY_LABEL}` (ignore error if label not present) + 5. **Worktree cleanup** after each PR +- [ ] **Result emission:** + - Track: `prs_processed`, `conflicts_resolved`, `reviews_addressed`, `prs_ready`, `prs_failed` + - `emit_result "success" "prs_processed=${PROCESSED} conflicts_resolved=${CONFLICTS} reviews_addressed=${REVIEWS} prs_ready=${READY}"` +- [ ] **Timeout handling:** per-PR timeout via `NW_MERGER_PER_PR_TIMEOUT`, global timeout via `NW_MERGER_MAX_RUNTIME` + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/scripts/test-helpers.bats` | `merger lock acquisition` | lock file created/released | + +**Verification Plan:** + +1. **Manual test:** Run `night-watch merge --dry-run` in a project with open PRs +2. **Manual test:** Run `night-watch merge` on a repo with a known conflicted PR +3. **Evidence:** Log output shows PR iteration, conflict detection, resolution attempt + +--- + +### Phase 4: Notifications & Install Integration + +**User-visible outcome:** Merger job sends notifications on completion/failure. `night-watch install` includes merger cron schedule. Summary command includes merger data. + +**Files (5):** + +- `packages/core/src/utils/notify.ts` — add merger notification event formatting +- `packages/cli/src/commands/install.ts` — add merger cron entry generation +- `packages/cli/src/commands/uninstall.ts` — handle merger cron removal +- `packages/cli/src/commands/merge.ts` — wire notification sending in execute flow +- `packages/core/src/utils/summary.ts` — include merger stats in morning briefing + +**Implementation:** + +- [ ] Add notification message formatting for `merger_completed`, `merger_conflict_resolved`, `merger_failed` events +- [ ] `install.ts`: + - Add `--no-merger` flag to `IInstallOptions` + - Generate cron entry for merger using config schedule (same pattern as reviewer/qa) + - Format: `{schedule} cd {projectDir} && {nightWatchBin} merge >> {logDir}/merger.log 2>&1` +- [ ] `uninstall.ts`: remove merger cron entries in cleanup +- [ ] `merge.ts`: after script execution, build notification context and call `sendNotifications()` + - Include `prsProcessed`, `conflictsResolved`, `reviewsAddressed`, `prsReady` in context +- [ ] `summary.ts`: merge keeper stats in the action items / summary data: + - "N PRs are ready to merge" or "N PRs have unresolved conflicts" + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/merge.test.ts` | `sends merger_completed notification on success` | notification mock called | +| `packages/cli/src/__tests__/commands/install.test.ts` | `includes merger cron entry` | crontab contains merger schedule | + +**Verification Plan:** + +1. **Unit Tests:** notification formatting, install output +2. **Manual:** `night-watch install` shows merger in crontab, `night-watch summary` includes merger data +3. **Evidence:** `yarn verify` + `yarn test` pass + +--- + +### Phase 5: Edge Cases, Idempotency & Hardening + +**User-visible outcome:** Merger handles edge cases gracefully — protected branches, draft PRs, concurrent runs, AI resolution failures — without breaking existing PRs. + +**Files (3):** + +- `packages/cli/scripts/night-watch-merger-cron.sh` — edge case handling +- `packages/cli/src/commands/merge.ts` — preflight checks +- `packages/core/src/__tests__/commands/merge.test.ts` — edge case tests + +**Implementation:** + +- [ ] **Skip draft PRs:** filter out PRs with `isDraft: true` +- [ ] **Skip PRs with `skip-merger` label:** configurable skip label +- [ ] **Protected branch safety:** never force-push to the default branch; verify branch name before push +- [ ] **Idempotent label management:** don't fail if `ready-to-merge` label doesn't exist yet (auto-create via `gh label create` if missing, ignore errors) +- [ ] **AI resolution failure handling:** if AI provider fails or times out, skip the PR, log error, continue to next PR +- [ ] **Force-with-lease safety:** always use `--force-with-lease` instead of `--force` to avoid overwriting concurrent pushes +- [ ] **Rate limiting:** respect `NW_MERGER_MAX_PRS_PER_RUN` and global timeout +- [ ] **Re-check after rebase:** after force-pushing, wait briefly for GitHub to update mergeable status before labeling +- [ ] **Concurrent run protection:** lock file prevents multiple merger instances + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/merge.test.ts` | `skips draft PRs` | draft PRs excluded from processing | +| `packages/cli/src/__tests__/commands/merge.test.ts` | `skips PRs with skip-merger label` | labeled PRs excluded | +| `packages/cli/src/__tests__/commands/merge.test.ts` | `handles AI resolution failure gracefully` | continues to next PR | + +**Verification Plan:** + +1. **Unit Tests:** edge case filtering +2. **Integration test:** Run against a repo with draft PRs, labeled PRs, and conflicted PRs +3. **Evidence:** `yarn verify` + `yarn test` pass, no data loss in any scenario + +## 5. Acceptance Criteria + +- [ ] All 5 phases complete +- [ ] All specified tests pass +- [ ] `yarn verify` passes +- [ ] All automated checkpoint reviews passed +- [ ] `night-watch merge --dry-run` shows configuration and PR status +- [ ] `night-watch merge` processes open PRs, resolves conflicts via AI, addresses review comments +- [ ] `ready-to-merge` label added to PRs that are conflict-free with no unresolved reviews +- [ ] `night-watch install` includes merger cron schedule +- [ ] Notifications sent on completion/failure +- [ ] `night-watch summary` includes merger stats +- [ ] No force-push to protected/default branches +- [ ] Draft PRs and skip-labeled PRs are excluded +- [ ] AI resolution failures are handled gracefully (skip + continue) +- [ ] Job integrates with global queue system diff --git a/docs/PRDs/refactor-interaction-listener.md b/docs/PRDs/refactor-interaction-listener.md deleted file mode 100644 index 5240cf0a..00000000 --- a/docs/PRDs/refactor-interaction-listener.md +++ /dev/null @@ -1,285 +0,0 @@ -# PRD: Refactor SlackInteractionListener God Class - -**Complexity: 5 → MEDIUM mode** - ---- - -## 1. Context - -**Problem:** `packages/slack/src/interaction-listener.ts` is a 1954-line god class with ~8 distinct responsibilities violating SRP. - -**Files Analyzed:** - -- `packages/slack/src/interaction-listener.ts` (1954 lines) -- `packages/slack/src/__tests__/slack/interaction-listener.test.ts` (433 lines) -- `packages/slack/src/index.ts` (barrel exports) -- `packages/slack/src/factory.ts` (composition root) -- `packages/slack/src/personas.ts` (prior extraction example) -- `packages/slack/src/utils.ts` (prior extraction example) -- `packages/slack/src/deliberation.ts` (similar class structure) - -**Current Behavior:** - -- The class handles Socket Mode lifecycle, message parsing, URL context fetching, job/process spawning, proactive messaging loops, human-like timing simulation, persona intros, and a ~300-line message router — all in one file -- Standalone parsing functions are already pure but live alongside the class -- Re-exports from `personas.ts` exist for backward compatibility -- Tests import parsing helpers directly from `interaction-listener.js` -- `index.ts` exports `SlackInteractionListener` and `shouldIgnoreInboundSlackEvent` - ---- - -## 2. Solution - -**Approach:** - -- Extract standalone parsing functions + types into `message-parser.ts` -- Extract URL/GitHub context-fetching into `context-fetcher.ts` -- Extract process spawning (NW jobs, providers, audits) into `job-spawner.ts` as a class -- Extract proactive messaging loop into `proactive-loop.ts` as a class -- Keep `interaction-listener.ts` as a thin orchestrator (~700 lines) composing the extracted modules -- Maintain all existing exports via re-exports — zero breaking changes for consumers - -**Architecture Diagram:** - -```mermaid -flowchart TD - IL[SlackInteractionListener
~700 lines
Orchestrator] - MP[message-parser.ts
Pure parsing functions] - CF[context-fetcher.ts
URL/GitHub fetching] - JS[JobSpawner
Process spawning] - PL[ProactiveLoop
Timed proactive messages] - - IL -->|imports| MP - IL -->|imports| CF - IL -->|delegates| JS - IL -->|delegates| PL - JS -->|uses| SlackClient - JS -->|uses| DeliberationEngine - PL -->|uses| SlackClient - PL -->|uses| DeliberationEngine -``` - -**Key Decisions:** - -- Pure functions (parsing, URL fetching) extracted as standalone modules — no class needed -- `JobSpawner` and `ProactiveLoop` are classes receiving `SlackClient`, `DeliberationEngine`, and `INightWatchConfig` via constructor — same pattern as existing `DeliberationEngine` -- State maps (`_lastChannelActivityAt`, `_lastProactiveAt`, etc.) shared via injection where needed -- All existing `interaction-listener.js` exports preserved via re-exports — tests and barrel untouched unless imports need updating -- Remove the re-export block for personas (lines 36-43) and update test imports to import from `personas.js` and `message-parser.js` directly - -**Data Changes:** None - ---- - -## 3. Sequence Flow - -```mermaid -sequenceDiagram - participant SM as SocketMode - participant IL as InteractionListener - participant MP as MessageParser - participant CF as ContextFetcher - participant JS as JobSpawner - participant PL as ProactiveLoop - - SM->>IL: inbound event - IL->>MP: parse job/provider/issue request - MP-->>IL: parsed request or null - IL->>CF: fetch GitHub/URL context - CF-->>IL: enriched context string - alt Job/Provider/Issue request - IL->>JS: spawn job/provider - JS-->>IL: (async child process) - else Persona reply - IL->>IL: resolve persona, reply via engine - end - - Note over PL: Runs on interval timer - PL->>PL: check idle channels - PL->>JS: spawn code-watch audit -``` - ---- - -## 4. Execution Phases - -### Integration Points Checklist - -``` -How will this feature be reached? -- [x] Entry point: Socket Mode events (unchanged) -- [x] Caller file: factory.ts creates SlackInteractionListener (unchanged) -- [x] Registration/wiring: new classes instantiated inside InteractionListener constructor - -Is this user-facing? -- [x] NO → Internal refactoring, no behavior changes - -Full user flow: -1. Slack message arrives via Socket Mode (unchanged) -2. InteractionListener routes to parsers/spawners (same logic, different files) -3. Result displayed in Slack (unchanged) -``` - ---- - -#### Phase 1: Extract `message-parser.ts` — Pure parsing functions moved to dedicated module - -**Files (4):** - -- `packages/slack/src/message-parser.ts` — NEW: all parsing functions, types, constants -- `packages/slack/src/interaction-listener.ts` — remove standalone functions, import from `message-parser.ts` -- `packages/slack/src/__tests__/slack/interaction-listener.test.ts` — update imports -- `packages/slack/src/index.ts` — add `shouldIgnoreInboundSlackEvent` export from `message-parser.ts` - -**Implementation:** - -- [ ] Create `message-parser.ts` with: - - Constants: `JOB_STOPWORDS` - - Types: `TSlackJobName`, `TSlackProviderName`, `ISlackJobRequest`, `ISlackProviderRequest`, `ISlackIssuePickupRequest`, `IInboundSlackEvent`, `IEventsApiPayload`, `IAdHocThreadState` - - Functions: `normalizeForParsing`, `extractInboundEvent`, `buildInboundMessageKey`, `isAmbientTeamMessage`, `parseSlackJobRequest`, `parseSlackIssuePickupRequest`, `parseSlackProviderRequest`, `shouldIgnoreInboundSlackEvent`, `extractGitHubIssueUrls`, `extractGenericUrls` -- [ ] In `interaction-listener.ts`: replace standalone functions with imports from `message-parser.js` -- [ ] Remove the persona re-export block (lines 36-43) — these are already exported from `personas.ts` and `index.ts` -- [ ] Update test imports to use `message-parser.js` for parsers and `personas.js` for persona helpers -- [ ] Update `index.ts` to export `shouldIgnoreInboundSlackEvent` from `message-parser.js` - -**Tests Required:** - -| Test File | Test Name | Assertion | -| ------------------------------ | ------------------------- | ----------------------------- | -| `interaction-listener.test.ts` | All existing parser tests | All pass with updated imports | - -**Verification:** - -- `yarn verify` passes -- `yarn workspace @night-watch/slack test` passes -- All 25+ existing parser tests continue passing - ---- - -#### Phase 2: Extract `context-fetcher.ts` — URL/GitHub context enrichment - -**Files (2):** - -- `packages/slack/src/context-fetcher.ts` — NEW: `fetchUrlSummaries`, `fetchGitHubIssueContext` -- `packages/slack/src/interaction-listener.ts` — import from `context-fetcher.js` - -**Implementation:** - -- [ ] Create `context-fetcher.ts` with `fetchUrlSummaries` and `fetchGitHubIssueContext` -- [ ] In `interaction-listener.ts`: import these two functions from `context-fetcher.js` - -**Tests Required:** - -| Test File | Test Name | Assertion | -| -------------- | ------------------ | --------------------------------------- | -| existing tests | All existing tests | Continue passing (no public API change) | - -**Verification:** - -- `yarn verify` passes -- `yarn workspace @night-watch/slack test` passes - ---- - -#### Phase 3: Extract `job-spawner.ts` — Process spawning for NW jobs, providers, audits - -**Files (2):** - -- `packages/slack/src/job-spawner.ts` — NEW: `JobSpawner` class -- `packages/slack/src/interaction-listener.ts` — delegate to `JobSpawner` - -**Implementation:** - -- [ ] Create `JobSpawner` class with constructor `(slackClient, engine, config)` -- [ ] Move `extractLastMeaningfulLines` helper (used only by spawner) -- [ ] Move constants: `MAX_JOB_OUTPUT_CHARS` -- [ ] Move methods: - - `_spawnNightWatchJob` → `spawnNightWatchJob(job, project, channel, threadTs, persona, opts, callbacks)` - - `_spawnDirectProviderRequest` → `spawnDirectProviderRequest(request, project, channel, threadTs, persona, callbacks)` - - `_spawnCodeWatchAudit` → `spawnCodeWatchAudit(project, channel, callbacks)` -- [ ] Callbacks interface `IJobSpawnerCallbacks` for `markChannelActivity` and `markPersonaReply` to avoid circular coupling -- [ ] In `interaction-listener.ts`: instantiate `JobSpawner` in constructor, delegate calls - -**Tests Required:** - -| Test File | Test Name | Assertion | -| -------------- | ---------------------------- | ---------------- | -| existing tests | Lifecycle + all parser tests | Continue passing | - -**Verification:** - -- `yarn verify` passes -- `yarn workspace @night-watch/slack test` passes - ---- - -#### Phase 4: Extract `proactive-loop.ts` — Proactive messaging timer and code-watch - -**Files (2):** - -- `packages/slack/src/proactive-loop.ts` — NEW: `ProactiveLoop` class -- `packages/slack/src/interaction-listener.ts` — delegate to `ProactiveLoop` - -**Implementation:** - -- [ ] Create `ProactiveLoop` class with constructor `(engine, config, jobSpawner, stateAccessors)` -- [ ] Move constants: `PROACTIVE_IDLE_MS`, `PROACTIVE_MIN_INTERVAL_MS`, `PROACTIVE_SWEEP_INTERVAL_MS`, `PROACTIVE_CODEWATCH_MIN_INTERVAL_MS` -- [ ] Move methods: - - `_startProactiveLoop` → `start()` - - `_stopProactiveLoop` → `stop()` - - `_sendProactiveMessages` → `sendProactiveMessages()` - - `_runProactiveCodeWatch` → `runProactiveCodeWatch()` - - `_resolveProactiveChannelForProject` → `resolveProactiveChannelForProject()` -- [ ] State accessors interface for reading/writing `_lastChannelActivityAt`, `_lastProactiveAt`, `_lastCodeWatchAt` -- [ ] `ProactiveLoop` receives project/persona resolution helpers or accesses repos directly -- [ ] In `interaction-listener.ts`: instantiate `ProactiveLoop` in constructor, delegate `start()/stop()` - -**Tests Required:** - -| Test File | Test Name | Assertion | -| -------------- | ------------------- | ---------------- | -| existing tests | Lifecycle stop test | Continue passing | - -**Verification:** - -- `yarn verify` passes -- `yarn workspace @night-watch/slack test` passes - ---- - -#### Phase 5: Final cleanup and comprehensive verification - -**Files (2):** - -- `packages/slack/src/interaction-listener.ts` — final review, remove dead imports -- `packages/slack/src/index.ts` — verify all exports still work - -**Implementation:** - -- [ ] Audit `interaction-listener.ts` for unused imports after all extractions -- [ ] Verify barrel exports in `index.ts` cover all previously exported symbols -- [ ] Run full workspace verification - -**Tests Required:** - -| Test File | Test Name | Assertion | -| -------------- | --------------- | --------- | -| All test files | Full test suite | All pass | - -**Verification:** - -- `yarn verify` passes (full workspace) -- `yarn workspace @night-watch/slack test` passes -- Manual check: `interaction-listener.ts` is ~700 lines or less (down from 1954) - ---- - -## 5. Acceptance Criteria - -- [ ] All 5 phases complete -- [ ] All existing tests pass without modification (or with only import path updates) -- [ ] `yarn verify` passes -- [ ] `interaction-listener.ts` reduced to ~700 lines (orchestrator only) -- [ ] No new public API — all existing exports preserved via re-exports -- [ ] No behavior changes — pure structural refactoring -- [ ] Each extracted module has a single clear responsibility diff --git a/docs/architecture/analytics-job.md b/docs/architecture/analytics-job.md new file mode 100644 index 00000000..1a08778a --- /dev/null +++ b/docs/architecture/analytics-job.md @@ -0,0 +1,169 @@ +# Analytics Job Architecture + +The Analytics job analyzes product analytics data (Amplitude) and creates GitHub issues for trending patterns or anomalies. It runs on a weekly schedule by default. + +--- + +## Component Overview + +```mermaid +graph TD + subgraph Core["@night-watch/core"] + AR["analytics/analytics-runner.ts\nAnalyticsRunner.run()\nfetchEvents()\nanalyzeTrends()\ncreateIssues()"] + AAC["analytics/amplitude-client.ts\nAmplitudeClient\nfetchEvents()\nfetchUsers()"] + T["types.ts\nIAnalyticsConfig\nIAnalyticsEvent"] + end + + subgraph CLI["@night-watch/cli"] + CMD["commands/analytics.ts\nnw analytics command"] + end + + subgraph Server["@night-watch/server"] + ROUTES["analytics.routes.ts\nGET /api/analytics/config\nPOST /api/analytics/run"] + end + + subgraph External["External Services"] + AMP["Amplitude API\nEvents, Users"] + GH["GitHub API\nCreate issues"] + end + + CMD --> AR + AR --> AAC + AAC --> AMP + AR --> GH + ROUTES --> AR +``` + +--- + +## Job Configuration + +```typescript +interface IAnalyticsConfig extends IBaseJobConfig { + enabled: boolean; + schedule: string; // Default: "0 6 * * 1" (weekly Monday 6am) + maxRuntime: number; // Default: 900 (15 minutes) + + // Analytics-specific fields + lookbackDays: number; // Default: 7 + targetColumn: string; // Default: "Draft" (GitHub Projects column) + analysisPrompt: string; // Optional custom analysis prompt +} +``` + +Env override examples: + +- `NW_ANALYTICS_ENABLED=true` +- `NW_ANALYTICS_LOOKBACK_DAYS=14` +- `NW_ANALYTICS_TARGET_COLUMN=Ready` + +--- + +## Analytics Flow + +```mermaid +flowchart TD + Start([Job triggered]) --> LoadConfig["Load analytics config"] + LoadConfig --> Fetch["Fetch Amplitude events\nlast N days"] + Fetch --> Analyze["Analyze trends\nusing Claude API"] + Analyze --> Trends["Identify patterns:\n- User drops\n- Funnel breaks\n- Feature adoption"] + Trends --> Issues["Create GitHub issues\nfor findings"] + Issues --> Notify["Send notifications"] + Notify --> Done([Job complete]) +``` + +--- + +## Amplitude Client + +```typescript +class AmplitudeClient { + constructor(apiKey: string, apiSecret: string); + + fetchEvents(options: { + startDate: string; + endDate: string; + events?: string[]; + }): Promise; + + fetchUsers(userIds: string[]): Promise; +} +``` + +Event structure: + +```typescript +interface IAnalyticsEvent { + event_type: string; + user_id: string; + time: number; + event_properties?: Record; + user_properties?: Record; +} +``` + +--- + +## Analysis Patterns + +The job uses Claude to analyze events and detect: + +1. **User Drop-offs**: Sudden decreases in active users +2. **Funnel Breaks**: Conversion rate drops in key flows +3. **Feature Adoption**: New feature usage trends +4. **Anomalies**: Statistical outliers in metrics + +Example issue created: + +``` +Title: [Analytics] User drop-off detected - Week of 2026-03-15 + +Body: +- Active users dropped 23% vs previous week +- Key event `checkout_completed` down 31% +- Possible cause: Deploy on 2026-03-14 introduced regression +- Recommendation: Investigate recent checkout flow changes +``` + +--- + +## Key File Locations + +| File | Purpose | +| ------------------------------------------------- | ---------------------------------------------- | +| `packages/core/src/analytics/analytics-runner.ts` | Main analytics job runner | +| `packages/core/src/analytics/amplitude-client.ts` | Amplitude API client | +| `packages/core/src/analytics/amplitude-types.ts` | Analytics type definitions | +| `packages/core/src/analytics/index.ts` | Barrel exports | +| `packages/cli/src/commands/analytics.ts` | CLI command: `nw analytics` | +| `packages/server/src/routes/analytics.routes.ts` | Analytics API routes | +| `packages/core/src/jobs/job-registry.ts` | Analytics job registry entry | +| `web/api.ts` | `fetchAnalyticsConfig()`, `triggerAnalytics()` | +| `web/pages/Settings.tsx` | Analytics job settings UI | + +--- + +## CLI Commands + +```bash +# Run analytics job manually +nw analytics run + +# Show analytics configuration +nw analytics config + +# Test Amplitude connection +nw analytics test +``` + +--- + +## Web Integration + +The Analytics job appears in: + +- **Scheduling page**: Enable/disable, set schedule +- **Settings page**: Configure lookback days, target column, analysis prompt +- **Dashboard**: Shows last run status and any created issues + +The job is **disabled by default** (`enabled: false`) since it requires Amplitude credentials. diff --git a/docs/architecture/job-registry.md b/docs/architecture/job-registry.md new file mode 100644 index 00000000..29e70c13 --- /dev/null +++ b/docs/architecture/job-registry.md @@ -0,0 +1,212 @@ +# Job Registry Architecture + +The Job Registry is the single source of truth for all job type metadata, defaults, and configuration patterns. It reduces the cost of adding a new job type from touching 15+ files to just 2-3 files. + +--- + +## Component Overview + +```mermaid +graph TD + subgraph Core["@night-watch/core"] + JR["job-registry.ts\nJOB_REGISTRY[]\ngetJobDef(id)\nnormalizeJobConfig()\nbuildJobEnvOverrides()"] + CONSTS["constants.ts\nVALID_JOB_TYPES\nDEFAULT_QUEUE_PRIORITY\nLOG_FILE_NAMES"] + CN["config-normalize.ts\nRegistry-driven loop"] + CE["config-env.ts\nRegistry-driven loop"] + end + + subgraph Web["web/"] + WJR["web/utils/jobs.ts\nWEB_JOB_REGISTRY[]\ntriggerJob(id)"] + ZUSTAND["useStore.ts\njobs computed slice"] + SCHED["Scheduling.tsx\nRegistry.map() rendering"] + SETT["Settings.tsx\nRegistry.map() rendering"] + end + + subgraph Server["@night-watch/server"] + ROUTES["action.routes.ts\nRegistry loop route generation"] + end + + JR --> CONSTS + JR --> CN + JR --> CE + JR -.->|shared types| WJR + WJR --> SCHED + WJR --> SETT + WJR --> ROUTES + ZUSTAND --> SCHED + ZUSTAND --> SETT +``` + +--- + +## Job Definition Interface + +```typescript +interface IJobDefinition { + id: JobType; + name: string; + description: string; + cliCommand: string; + logName: string; + lockSuffix: string; + queuePriority: number; + envPrefix: string; + extraFields?: IExtraFieldDef[]; + defaultConfig: TConfig; +} +``` + +All jobs extend `IBaseJobConfig`: + +```typescript +interface IBaseJobConfig { + enabled: boolean; + schedule: string; + maxRuntime: number; +} +``` + +--- + +## Registered Jobs + +| Job Type | Name | CLI Command | Priority | Env Prefix | +| --------- | --------- | ----------- | -------- | -------------- | +| executor | Executor | `run` | 50 | `NW_EXECUTOR` | +| reviewer | Reviewer | `review` | 40 | `NW_REVIEWER` | +| slicer | Slicer | `planner` | 30 | `NW_SLICER` | +| qa | QA | `qa` | 20 | `NW_QA` | +| audit | Auditor | `audit` | 10 | `NW_AUDIT` | +| analytics | Analytics | `analytics` | 10 | `NW_ANALYTICS` | + +--- + +## Config Normalization Flow + +```mermaid +flowchart LR + Raw["Raw config object"] --> NR["normalizeJobConfig()\nregistry.get(id)"] + NR --> Base["Apply IBaseJobConfig defaults\nenabled, schedule, maxRuntime"] + Base --> Extra["Apply extraFields defaults\nif defined"] + Extra --> Result["Normalized config"] +``` + +The registry-driven normalization replaces per-job blocks in `config-normalize.ts`: + +**Before (per-job blocks):** + +```typescript +// QA job normalization +if (raw.qa) { + normalized.qa = { + enabled: raw.qa.enabled ?? true, + schedule: raw.qa.schedule ?? '45 2,10,18 * * *', + maxRuntime: raw.qa.maxRuntime ?? 3600, + branchPatterns: raw.qa.branchPatterns ?? [], + artifacts: raw.qa.artifacts ?? 'both', + // ... more fields + }; +} +``` + +**After (registry-driven):** + +```typescript +for (const jobDef of JOB_REGISTRY) { + const rawJob = raw[jobDef.id]; + if (rawJob) { + normalized[jobDef.id] = normalizeJobConfig(rawJob, jobDef); + } +} +``` + +--- + +## Env Override Flow + +```mermaid +flowchart LR + EnvVars["Process env\nNW_* vars"] --> BE["buildJobEnvOverrides()\nregistry entry"] + BE --> Parse["Parse by field type\nboolean, string, number,\nstring[], enum"] + Parse --> Merge["Merge with base config"] + Merge --> Result["Job config with env overrides"] +``` + +Env var naming: `{envPrefix}_{FIELD_UPPER_SNAKE}` + +Examples: + +- `NW_QA_ENABLED` → `qa.enabled` +- `NW_QA_BRANCH_PATTERNS` → `qa.branchPatterns` +- `NW_QA_ARTIFACTS` → `qa.artifacts` (enum validation) + +--- + +## Web Job Registry + +The web-side registry extends the core registry with UI-specific fields: + +```typescript +interface IWebJobDefinition extends IJobDefinition { + icon: string; // lucide icon name + triggerEndpoint: string; // '/api/actions/qa' + scheduleTemplateKey: string; + settingsSection?: 'general' | 'advanced'; +} +``` + +This enables: + +- Generic `triggerJob(jobId)` instead of per-job `triggerRun()`, `triggerReview()`, etc. +- Dynamic rendering of Scheduling and Settings pages via `WEB_JOB_REGISTRY.map()` + +--- + +## Key File Locations + +| File | Purpose | +| --------------------------------------------- | --------------------------------------------------------------------- | +| `packages/core/src/jobs/job-registry.ts` | Core registry, `JOB_REGISTRY`, utility functions | +| `packages/core/src/jobs/index.ts` | Barrel exports | +| `packages/core/src/config-normalize.ts` | Registry-driven normalization loop | +| `packages/core/src/config-env.ts` | Registry-driven env override loop | +| `packages/core/src/constants.ts` | Derives `VALID_JOB_TYPES`, `DEFAULT_QUEUE_PRIORITY`, `LOG_FILE_NAMES` | +| `packages/cli/src/commands/run.ts` | Uses `getJobDef()` for executor dispatch | +| `packages/server/src/routes/action.routes.ts` | Generates routes from registry | +| `web/utils/jobs.ts` | Web-side job registry | +| `web/api.ts` | Generic `triggerJob(jobId)` function | +| `web/pages/Scheduling.tsx` | Renders job cards from `WEB_JOB_REGISTRY` | +| `web/pages/Settings.tsx` | Renders job settings from `WEB_JOB_REGISTRY` | +| `web/store/useStore.ts` | Zustand `jobs` computed slice | + +--- + +## Adding a New Job Type + +With the registry, adding a new job requires only: + +1. **Core registry entry** (`packages/core/src/jobs/job-registry.ts`): + +```typescript +{ + id: 'metrics', + name: 'Metrics', + description: 'Generates metrics reports', + cliCommand: 'metrics', + logName: 'metrics', + lockSuffix: '-metrics.lock', + queuePriority: 10, + envPrefix: 'NW_METRICS', + defaultConfig: { + enabled: false, + schedule: '0 6 * * 1', + maxRuntime: 900, + }, +} +``` + +2. **CLI command** (`packages/cli/src/commands/metrics.ts`) + +3. **Web registry entry** (`web/utils/jobs.ts`) + +All other layers (normalization, env parsing, UI rendering, API routes) are automatically handled by the registry. From a49b036610b208db749fb5e36de16bb23288e2b4 Mon Sep 17 00:00:00 2001 From: Test User Date: Sat, 21 Mar 2026 23:11:56 -0700 Subject: [PATCH 2/2] feat: add e2e-validated label for automated acceptance proof on PRs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the E2E Validated Label PRD across 4 phases: Phase 1 — Label Definition & Config: - Add 'e2e-validated' (green #0e8a16) to NIGHT_WATCH_LABELS in labels.ts - Add validatedLabel: string to IQaConfig in types.ts - Add DEFAULT_QA_VALIDATED_LABEL constant and update DEFAULT_QA in constants.ts - Add validatedLabel extra field to QA job definition in job-registry.ts - Config normalization picks it up automatically via normalizeJobConfig Phase 2 — QA Script Integration: - Pass NW_QA_VALIDATED_LABEL env var from qa.ts buildEnvVars() - Show Validated Label in qa --dry-run config table - Read VALIDATED_LABEL in QA bash script with e2e-validated default - Add ensure_validated_label() helper (idempotent gh label create --force) - Apply label on 'passing' outcome, remove on 'issues_found'/'no_tests_needed' - Track VALIDATED_PRS_CSV and include in emit_result and Telegram messages Phase 3 — Init Label Sync: - Add step 11 to night-watch init: sync all NIGHT_WATCH_LABELS to GitHub - Skips gracefully when no GitHub remote or gh not authenticated - Updates totalSteps from 13 to 14 and adds Label Sync to summary table Phase 4 — Dry-Run & Summary Integration: - Validated Label shown in qa --dry-run config table (merged into Phase 2) - emit_result includes validated= field for all success/warning outcomes Tests: - packages/core/src/__tests__/board/labels.test.ts (new): e2e-validated presence - packages/core/src/__tests__/jobs/job-registry.test.ts: validatedLabel extra field - packages/cli/src/__tests__/commands/qa.test.ts: NW_QA_VALIDATED_LABEL env var - packages/cli/src/__tests__/commands/init.test.ts: label sync preconditions Co-Authored-By: Claude Sonnet 4.6 --- docs/PRDs/e2e-validated-label.md | 305 ++++++++++++++++++ .../cli/src/__tests__/commands/init.test.ts | 38 +++ .../cli/src/__tests__/commands/qa.test.ts | 30 ++ packages/cli/src/commands/init.ts | 46 ++- packages/cli/src/commands/qa.ts | 2 + .../core/src/__tests__/board/labels.test.ts | 101 ++++++ .../src/__tests__/jobs/job-registry.test.ts | 17 + packages/core/src/board/labels.ts | 5 + packages/core/src/constants.ts | 2 + packages/core/src/jobs/job-registry.ts | 3 + packages/core/src/types.ts | 2 + scripts/night-watch-qa-cron.sh | 34 +- 12 files changed, 575 insertions(+), 10 deletions(-) create mode 100644 docs/PRDs/e2e-validated-label.md create mode 100644 packages/core/src/__tests__/board/labels.test.ts diff --git a/docs/PRDs/e2e-validated-label.md b/docs/PRDs/e2e-validated-label.md new file mode 100644 index 00000000..6f4c0091 --- /dev/null +++ b/docs/PRDs/e2e-validated-label.md @@ -0,0 +1,305 @@ +# PRD: E2E Validated Label — Automated Acceptance Proof for PRs + +**Complexity: 4 → MEDIUM mode** (+1 touches 5 files, +2 multi-package, +1 external API integration) + +## 1. Context + +**Problem:** The QA job generates and runs e2e/integration tests on PRs, but there is no way to prove at a glance that a PR's acceptance requirements have been validated. Developers and reviewers must manually inspect QA comments to know if tests passed. There is no GitHub label signaling "this PR's e2e tests prove the work is done." + +**Files Analyzed:** +- `packages/cli/scripts/night-watch-qa-cron.sh` — QA bash script, classifies QA outcomes (passing, issues_found, no_tests_needed, unclassified) +- `packages/core/src/board/labels.ts` — label taxonomy (priority, category, horizon, operational) +- `packages/core/src/constants.ts` — `DEFAULT_QA_SKIP_LABEL`, other QA defaults +- `packages/core/src/types.ts` — `IQaConfig`, `INightWatchConfig` +- `packages/cli/src/commands/init.ts` — initialization flow, label creation not currently done +- `packages/cli/src/commands/qa.ts` — QA command, parses script results + +**Current Behavior:** +- QA job runs Playwright tests, posts a `` comment on each PR with results +- `classify_qa_comment_outcome()` already classifies outcomes as `passing`, `issues_found`, `no_tests_needed`, `unclassified` +- `validate_qa_evidence()` already validates QA evidence quality (marker exists, artifacts present) +- No label is applied to PRs that pass QA — the only QA-related label is `skip-qa` (to skip QA) +- The reviewer job adds `needs-human-review` label but nothing signals e2e validation +- Labels are statically defined in `labels.ts` but never auto-created on GitHub during `init` + +## 2. Solution + +**Approach:** +1. Add an `e2e-validated` label definition to the label taxonomy in `labels.ts` +2. After QA processes each PR, if the outcome is `passing` → apply the `e2e-validated` label via `gh pr edit --add-label`; if outcome is `issues_found` or `no_tests_needed` → remove the label (idempotent) +3. Ensure the label exists on GitHub before applying: add `gh label create` in the QA script (idempotent, `--force` updates if exists) +4. Wire label creation into `night-watch init` so all Night Watch labels (including `e2e-validated`) are synced to GitHub during project initialization +5. Make the label name configurable via `IQaConfig.validatedLabel` with default `e2e-validated` + +**Architecture Diagram:** +```mermaid +flowchart LR + QA[QA Script] --> CLASSIFY[classify_qa_comment_outcome] + CLASSIFY -->|passing| ADD[gh pr edit --add-label e2e-validated] + CLASSIFY -->|issues_found| REMOVE[gh pr edit --remove-label e2e-validated] + CLASSIFY -->|no_tests_needed| REMOVE + INIT[night-watch init] --> SYNC[gh label create for all NW labels] +``` + +**Key Decisions:** +- **Reuse existing `classify_qa_comment_outcome()`** — no new classification logic needed; the infrastructure already tells us if tests pass +- **Idempotent label operations** — `--add-label` and `--remove-label` are no-ops if already present/absent; `gh label create --force` updates existing +- **Configurable label name** — `config.qa.validatedLabel` (default: `e2e-validated`) allows customization +- **Label auto-creation** — `gh label create` is called in the QA script before first use (one-time, cached per run) so it works even without `init` +- **Init syncs all labels** — `night-watch init` step now creates all `NIGHT_WATCH_LABELS` on GitHub (including e2e-validated) + +**Data Changes:** +- `IQaConfig` gains `validatedLabel: string` field (default: `e2e-validated`) +- `NIGHT_WATCH_LABELS` array gains `e2e-validated` entry +- No database changes + +## 3. Sequence Flow + +```mermaid +sequenceDiagram + participant QA as QA Script + participant GH as GitHub (gh CLI) + + Note over QA: After provider completes for PR #N + + QA->>QA: classify_qa_comment_outcome(#N) + alt outcome = passing + QA->>GH: gh label create e2e-validated --force (idempotent) + QA->>GH: gh pr edit #N --add-label e2e-validated + QA->>QA: log "PR #N marked as e2e-validated" + else outcome = issues_found | no_tests_needed + QA->>GH: gh pr edit #N --remove-label e2e-validated + QA->>QA: log "PR #N e2e-validated label removed" + end +``` + +## 4. Execution Phases + +### Phase 1: Label Definition & Config — Add `e2e-validated` to the system + +**User-visible outcome:** `e2e-validated` label appears in `NIGHT_WATCH_LABELS`, `IQaConfig` has a `validatedLabel` field, and config normalizes correctly. + +**Files (5):** +- `packages/core/src/board/labels.ts` — add `e2e-validated` to `NIGHT_WATCH_LABELS` +- `packages/core/src/types.ts` — add `validatedLabel: string` to `IQaConfig` +- `packages/core/src/constants.ts` — add `DEFAULT_QA_VALIDATED_LABEL` constant, update `DEFAULT_QA` +- `packages/core/src/jobs/job-registry.ts` — add `validatedLabel` to QA extra fields +- `packages/core/src/config-normalize.ts` — normalize `validatedLabel` (fallback to default) + +**Implementation:** + +- [ ] In `labels.ts`, add to `NIGHT_WATCH_LABELS` array: + ```typescript + { + name: 'e2e-validated', + description: 'PR acceptance requirements validated by e2e/integration tests', + color: '0e8a16', // green + }, + ``` +- [ ] In `types.ts`, add to `IQaConfig`: + ```typescript + /** GitHub label to apply when e2e tests pass (proves acceptance requirements met) */ + validatedLabel: string; + ``` +- [ ] In `constants.ts`: + ```typescript + export const DEFAULT_QA_VALIDATED_LABEL = 'e2e-validated'; + ``` + Update `DEFAULT_QA` to include `validatedLabel: DEFAULT_QA_VALIDATED_LABEL` +- [ ] In `job-registry.ts`, add to the QA job's `extraFields`: + ```typescript + { name: 'validatedLabel', type: 'string', defaultValue: 'e2e-validated' }, + ``` +- [ ] In `config-normalize.ts`, ensure `validatedLabel` is normalized with string fallback (follow `skipLabel` pattern) + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/core/src/__tests__/jobs/job-registry.test.ts` | `qa job has validatedLabel extra field` | `expect(getJobDef('qa').extraFields).toContainEqual(expect.objectContaining({ name: 'validatedLabel' }))` | +| `packages/core/src/__tests__/board/labels.test.ts` | `NIGHT_WATCH_LABELS includes e2e-validated` | `expect(NIGHT_WATCH_LABELS.map(l => l.name)).toContain('e2e-validated')` | + +**Verification Plan:** +1. **Unit Tests:** Registry field check, label presence check, config normalization +2. **Evidence:** `yarn verify` passes, `yarn test` passes + +--- + +### Phase 2: QA Script — Apply/Remove Label After Test Classification + +**User-visible outcome:** After QA runs on a PR, the `e2e-validated` label is automatically added if tests pass, or removed if tests fail/are not needed. + +**Files (2):** +- `packages/cli/scripts/night-watch-qa-cron.sh` — add label application logic after classification +- `packages/cli/src/commands/qa.ts` — pass `validatedLabel` to env vars + +**Implementation:** + +- [ ] In `qa.ts` `buildEnvVars()`, add: + ```typescript + env.NW_QA_VALIDATED_LABEL = config.qa.validatedLabel; + ``` +- [ ] In `night-watch-qa-cron.sh`, read the env var: + ```bash + VALIDATED_LABEL="${NW_QA_VALIDATED_LABEL:-e2e-validated}" + ``` +- [ ] Add a helper function `ensure_label_exists()` near the top of the script: + ```bash + LABEL_ENSURED=0 + ensure_validated_label() { + if [ "${LABEL_ENSURED}" -eq 1 ]; then return 0; fi + gh label create "${VALIDATED_LABEL}" \ + --description "PR acceptance requirements validated by e2e/integration tests" \ + --color "0e8a16" \ + --force 2>/dev/null || true + LABEL_ENSURED=1 + } + ``` +- [ ] In the per-PR processing loop, after `classify_qa_comment_outcome`, add label logic: + ```bash + case "${QA_OUTCOME}" in + passing) + PASSING_PRS_CSV=$(append_csv "${PASSING_PRS_CSV}" "#${pr_num}") + # Apply e2e-validated label + ensure_validated_label + gh pr edit "${pr_num}" --add-label "${VALIDATED_LABEL}" 2>/dev/null || true + log "QA: PR #${pr_num} — added '${VALIDATED_LABEL}' label (tests passing)" + ;; + issues_found) + ISSUES_FOUND_PRS_CSV=$(append_csv "${ISSUES_FOUND_PRS_CSV}" "#${pr_num}") + # Remove e2e-validated label if present + gh pr edit "${pr_num}" --remove-label "${VALIDATED_LABEL}" 2>/dev/null || true + ;; + no_tests_needed) + NO_TESTS_PRS_CSV=$(append_csv "${NO_TESTS_PRS_CSV}" "#${pr_num}") + # Remove e2e-validated label — no tests doesn't prove acceptance + gh pr edit "${pr_num}" --remove-label "${VALIDATED_LABEL}" 2>/dev/null || true + ;; + *) + UNCLASSIFIED_PRS_CSV=$(append_csv "${UNCLASSIFIED_PRS_CSV}" "#${pr_num}") + ;; + esac + ``` +- [ ] In dry-run output, include the validated label setting + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/qa.test.ts` | `buildEnvVars includes NW_QA_VALIDATED_LABEL` | `expect(env.NW_QA_VALIDATED_LABEL).toBe('e2e-validated')` | +| `packages/cli/src/__tests__/commands/qa.test.ts` | `buildEnvVars uses custom validatedLabel from config` | custom label value passed through | + +**Verification Plan:** +1. **Unit Tests:** Env var presence, custom label passthrough +2. **Manual test:** Run `night-watch qa --dry-run` and verify `NW_QA_VALIDATED_LABEL` appears in env vars +3. **Manual test:** Run `night-watch qa` on a repo with a PR that has passing tests → label applied +4. **Evidence:** `yarn verify` passes, label visible on GitHub PR + +--- + +### Phase 3: Init Label Sync — Create All Night Watch Labels on GitHub + +**User-visible outcome:** `night-watch init` creates all Night Watch labels (including `e2e-validated`) on GitHub when the repo has a GitHub remote and `gh` is authenticated. + +**Files (2):** +- `packages/cli/src/commands/init.ts` — add label sync step +- `packages/core/src/board/labels.ts` — export exists, no changes needed (consumed by init) + +**Implementation:** + +- [ ] In `init.ts`, add a new step between the board setup step and the global registry step (renumber subsequent steps, update `totalSteps`): + ```typescript + // Step 11: Sync Night Watch labels to GitHub + step(11, totalSteps, 'Syncing Night Watch labels to GitHub...'); + if (!remoteStatus.hasGitHubRemote || !ghAuthenticated) { + info('Skipping label sync (no GitHub remote or gh not authenticated).'); + } else { + try { + const { NIGHT_WATCH_LABELS } = await import('@night-watch/core'); + let created = 0; + for (const label of NIGHT_WATCH_LABELS) { + try { + execSync( + `gh label create "${label.name}" --description "${label.description}" --color "${label.color}" --force`, + { cwd, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }, + ); + created++; + } catch { + // Label creation is best-effort + } + } + success(`Synced ${created}/${NIGHT_WATCH_LABELS.length} labels to GitHub`); + } catch (labelErr) { + warn(`Could not sync labels: ${labelErr instanceof Error ? labelErr.message : String(labelErr)}`); + } + } + ``` +- [ ] Update `totalSteps` from 13 to 14, renumber steps 11+ (global registry becomes 12, skills becomes 13, summary becomes 14) +- [ ] Add label sync status to the summary table + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/init.test.ts` | `init syncs Night Watch labels when gh is authenticated` | execSync called with `gh label create` for each label | +| `packages/cli/src/__tests__/commands/init.test.ts` | `init skips label sync when no GitHub remote` | no label creation calls | + +**Verification Plan:** +1. **Unit Tests:** Label sync invocation, skip conditions +2. **Manual test:** Run `night-watch init --force` in a project with GitHub remote → labels visible on GitHub +3. **Evidence:** `yarn verify` passes, labels visible at `https://github.com/{owner}/{repo}/labels` + +--- + +### Phase 4: Dry-Run & Summary Integration + +**User-visible outcome:** `night-watch qa --dry-run` shows the validated label config. The emit_result output includes the label information for downstream notification consumers. The QA script idempotency check skips re-processing PRs that already have the label only when the QA comment is also present. + +**Files (2):** +- `packages/cli/src/commands/qa.ts` — show validated label in dry-run config table +- `packages/cli/scripts/night-watch-qa-cron.sh` — include validated label stats in emit_result + +**Implementation:** + +- [ ] In `qa.ts` dry-run section, add a row to the config table: + ```typescript + configTable.push(['Validated Label', config.qa.validatedLabel]); + ``` +- [ ] In `night-watch-qa-cron.sh`, add `VALIDATED_PRS_CSV` tracker alongside other CSV trackers: + ```bash + VALIDATED_PRS_CSV="" + ``` +- [ ] After applying the `e2e-validated` label for passing PRs, append to `VALIDATED_PRS_CSV`: + ```bash + VALIDATED_PRS_CSV=$(append_csv "${VALIDATED_PRS_CSV}" "#${pr_num}") + ``` +- [ ] In `emit_result` calls, include `validated=` field: + ```bash + emit_result "success_qa" "prs=...|passing=...|validated=${VALIDATED_PRS_SUMMARY}|..." + ``` +- [ ] Add `VALIDATED_PRS_SUMMARY=$(csv_or_none "${VALIDATED_PRS_CSV}")` alongside other summaries +- [ ] In Telegram status messages, include "E2E validated: ${VALIDATED_PRS_SUMMARY}" line + +**Tests Required:** +| Test File | Test Name | Assertion | +|-----------|-----------|-----------| +| `packages/cli/src/__tests__/commands/qa.test.ts` | `dry-run config table includes validatedLabel` | config output contains label name | + +**Verification Plan:** +1. **Unit Tests:** Dry-run output includes validated label +2. **Manual test:** `night-watch qa --dry-run` shows "Validated Label: e2e-validated" +3. **Manual test:** After QA run, Telegram message includes "E2E validated" line +4. **Evidence:** `yarn verify` passes + +## 5. Acceptance Criteria + +- [ ] All 4 phases complete +- [ ] All specified tests pass +- [ ] `yarn verify` passes +- [ ] All automated checkpoint reviews passed +- [ ] `e2e-validated` label is defined in `NIGHT_WATCH_LABELS` with green color (#0e8a16) +- [ ] `IQaConfig.validatedLabel` is configurable with default `e2e-validated` +- [ ] QA script applies label when `classify_qa_comment_outcome()` returns `passing` +- [ ] QA script removes label when outcome is `issues_found` or `no_tests_needed` +- [ ] Label operations are idempotent (no error if label exists/doesn't exist) +- [ ] `night-watch init` creates all Night Watch labels on GitHub (including `e2e-validated`) +- [ ] `night-watch qa --dry-run` shows the validated label in config +- [ ] Label name is configurable via `NW_QA_VALIDATED_LABEL` env var +- [ ] `gh label create --force` ensures label exists before first use in QA script diff --git a/packages/cli/src/__tests__/commands/init.test.ts b/packages/cli/src/__tests__/commands/init.test.ts index 48a420b3..745590bd 100644 --- a/packages/cli/src/__tests__/commands/init.test.ts +++ b/packages/cli/src/__tests__/commands/init.test.ts @@ -575,4 +575,42 @@ describe('init command', () => { expect(content).toContain('Custom Night Watch Template'); }); }); + + describe('label sync preconditions', () => { + it('NIGHT_WATCH_LABELS includes e2e-validated for init sync', async () => { + const { NIGHT_WATCH_LABELS } = await import('@night-watch/core'); + const names = NIGHT_WATCH_LABELS.map((l: { name: string }) => l.name); + expect(names).toContain('e2e-validated'); + }); + + it('init skips label sync when no GitHub remote', () => { + // getGitHubRemoteStatus returns hasGitHubRemote: false for non-git dirs + const status = getGitHubRemoteStatus(tempDir); + expect(status.hasGitHubRemote).toBe(false); + }); + + it('label sync condition: skips when hasGitHubRemote is false', () => { + // Simulate the condition check: no sync when no GitHub remote + const remoteStatus = { hasGitHubRemote: false, remoteUrl: null }; + const ghAuthenticated = true; + const shouldSync = remoteStatus.hasGitHubRemote && ghAuthenticated; + expect(shouldSync).toBe(false); + }); + + it('label sync condition: skips when gh not authenticated', () => { + // Simulate the condition check: no sync when gh not authenticated + const remoteStatus = { hasGitHubRemote: true, remoteUrl: 'https://github.com/test/repo' }; + const ghAuthenticated = false; + const shouldSync = remoteStatus.hasGitHubRemote && ghAuthenticated; + expect(shouldSync).toBe(false); + }); + + it('label sync condition: proceeds when GitHub remote and gh authenticated', () => { + // Simulate the condition check: sync proceeds when both conditions met + const remoteStatus = { hasGitHubRemote: true, remoteUrl: 'https://github.com/test/repo' }; + const ghAuthenticated = true; + const shouldSync = remoteStatus.hasGitHubRemote && ghAuthenticated; + expect(shouldSync).toBe(true); + }); + }); }); diff --git a/packages/cli/src/__tests__/commands/qa.test.ts b/packages/cli/src/__tests__/commands/qa.test.ts index 2960f0f5..90ab5198 100644 --- a/packages/cli/src/__tests__/commands/qa.test.ts +++ b/packages/cli/src/__tests__/commands/qa.test.ts @@ -58,6 +58,7 @@ function createTestConfig(overrides: Partial = {}): INightWat artifacts: 'both', skipLabel: 'skip-qa', autoInstallPlaywright: true, + validatedLabel: 'e2e-validated', }, ...overrides, } as INightWatchConfig; @@ -170,6 +171,35 @@ describe('qa command', () => { expect(env.NW_QA_SKIP_LABEL).toBe('skip-qa'); }); + it('should set NW_QA_VALIDATED_LABEL from config', () => { + const config = createTestConfig(); + const options: IQaOptions = { dryRun: false }; + + const env = buildEnvVars(config, options); + + expect(env.NW_QA_VALIDATED_LABEL).toBe('e2e-validated'); + }); + + it('should use custom validatedLabel from config', () => { + const config = createTestConfig({ + qa: { + enabled: true, + schedule: '30 1,7,13,19 * * *', + maxRuntime: 3600, + branchPatterns: [], + artifacts: 'both', + skipLabel: 'skip-qa', + autoInstallPlaywright: true, + validatedLabel: 'custom-e2e-label', + }, + }); + const options: IQaOptions = { dryRun: false }; + + const env = buildEnvVars(config, options); + + expect(env.NW_QA_VALIDATED_LABEL).toBe('custom-e2e-label'); + }); + it('should set NW_QA_AUTO_INSTALL_PLAYWRIGHT to 1 when true', () => { const config = createTestConfig(); const options: IQaOptions = { dryRun: false }; diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index a7467974..f464c09f 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -573,7 +573,7 @@ export function initCommand(program: Command): void { const cwd = process.cwd(); const force = options.force || false; const prdDir = options.prdDir || DEFAULT_PRD_DIR; - const totalSteps = 13; + const totalSteps = 14; const interactive = isInteractiveInitSession(); console.log(); @@ -891,8 +891,41 @@ export function initCommand(program: Command): void { } } - // Step 11: Register in global registry - step(11, totalSteps, 'Registering project in global registry...'); + // Step 11: Sync Night Watch labels to GitHub + step(11, totalSteps, 'Syncing Night Watch labels to GitHub...'); + let labelSyncStatus = 'Skipped'; + if (!remoteStatus.hasGitHubRemote || !ghAuthenticated) { + labelSyncStatus = !remoteStatus.hasGitHubRemote + ? 'Skipped (no GitHub remote)' + : 'Skipped (gh auth required)'; + info('Skipping label sync (no GitHub remote or gh not authenticated).'); + } else { + try { + const { NIGHT_WATCH_LABELS } = await import('@night-watch/core'); + let created = 0; + for (const label of NIGHT_WATCH_LABELS) { + try { + execSync( + `gh label create "${label.name}" --description "${label.description}" --color "${label.color}" --force`, + { cwd, encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }, + ); + created++; + } catch { + // Label creation is best-effort + } + } + labelSyncStatus = `Synced ${created}/${NIGHT_WATCH_LABELS.length} labels`; + success(`Synced ${created}/${NIGHT_WATCH_LABELS.length} labels to GitHub`); + } catch (labelErr) { + labelSyncStatus = 'Failed'; + warn( + `Could not sync labels: ${labelErr instanceof Error ? labelErr.message : String(labelErr)}`, + ); + } + } + + // Step 12: Register in global registry + step(12, totalSteps, 'Registering project in global registry...'); try { const { registerProject } = await import('@night-watch/core'); const entry = registerProject(cwd); @@ -903,8 +936,8 @@ export function initCommand(program: Command): void { ); } - // Step 12: Install AI skills - step(12, totalSteps, 'Installing Night Watch skills...'); + // Step 13: Install AI skills + step(13, totalSteps, 'Installing Night Watch skills...'); const skillsResult = installSkills(cwd, selectedProvider, force, TEMPLATES_DIR); if (skillsResult.installed > 0) { success(`Installed ${skillsResult.installed} skills to ${skillsResult.location}`); @@ -918,7 +951,7 @@ export function initCommand(program: Command): void { } // Print summary - step(13, totalSteps, 'Initialization complete!'); + step(14, totalSteps, 'Initialization complete!'); // Summary with table header('Initialization Complete'); @@ -933,6 +966,7 @@ export function initCommand(program: Command): void { filesTable.push(['', `instructions/prd-creator.md (${templateSources[5].source})`]); filesTable.push(['Config File', CONFIG_FILE_NAME]); filesTable.push(['Board Setup', boardSetupStatus]); + filesTable.push(['Label Sync', labelSyncStatus]); filesTable.push(['Global Registry', '~/.night-watch/projects.json']); let skillsSummary: string; if (skillsResult.installed > 0) { diff --git a/packages/cli/src/commands/qa.ts b/packages/cli/src/commands/qa.ts index aefdd241..baf14ee1 100644 --- a/packages/cli/src/commands/qa.ts +++ b/packages/cli/src/commands/qa.ts @@ -100,6 +100,7 @@ export function buildEnvVars( // QA-specific settings env.NW_QA_SKIP_LABEL = config.qa.skipLabel; + env.NW_QA_VALIDATED_LABEL = config.qa.validatedLabel; env.NW_QA_ARTIFACTS = config.qa.artifacts; env.NW_QA_AUTO_INSTALL_PLAYWRIGHT = config.qa.autoInstallPlaywright ? '1' : '0'; env.NW_CLAUDE_MODEL_ID = @@ -190,6 +191,7 @@ export function qaCommand(program: Command): void { config.qa.branchPatterns.length > 0 ? config.qa.branchPatterns : config.branchPatterns; configTable.push(['Branch Patterns', branchPatterns.join(', ')]); configTable.push(['Skip Label', config.qa.skipLabel]); + configTable.push(['Validated Label', config.qa.validatedLabel]); configTable.push(['Artifacts', config.qa.artifacts]); configTable.push([ 'Auto-install Playwright', diff --git a/packages/core/src/__tests__/board/labels.test.ts b/packages/core/src/__tests__/board/labels.test.ts new file mode 100644 index 00000000..e4429b19 --- /dev/null +++ b/packages/core/src/__tests__/board/labels.test.ts @@ -0,0 +1,101 @@ +/** + * Tests for board label taxonomy + */ + +import { describe, it, expect } from 'vitest'; +import { + NIGHT_WATCH_LABELS, + PRIORITY_LABELS, + CATEGORY_LABELS, + HORIZON_LABELS, + isValidPriority, + isValidCategory, + isValidHorizon, +} from '../../board/labels.js'; + +describe('NIGHT_WATCH_LABELS', () => { + it('includes e2e-validated label', () => { + const names = NIGHT_WATCH_LABELS.map((l) => l.name); + expect(names).toContain('e2e-validated'); + }); + + it('e2e-validated has correct description and green color', () => { + const label = NIGHT_WATCH_LABELS.find((l) => l.name === 'e2e-validated'); + expect(label).toBeDefined(); + expect(label!.color).toBe('0e8a16'); + expect(label!.description).toBe( + 'PR acceptance requirements validated by e2e/integration tests', + ); + }); + + it('includes all priority labels', () => { + const names = NIGHT_WATCH_LABELS.map((l) => l.name); + for (const p of PRIORITY_LABELS) { + expect(names).toContain(p); + } + }); + + it('includes all category labels', () => { + const names = NIGHT_WATCH_LABELS.map((l) => l.name); + for (const c of CATEGORY_LABELS) { + expect(names).toContain(c); + } + }); + + it('includes all horizon labels', () => { + const names = NIGHT_WATCH_LABELS.map((l) => l.name); + for (const h of HORIZON_LABELS) { + expect(names).toContain(h); + } + }); + + it('each label has required fields', () => { + for (const label of NIGHT_WATCH_LABELS) { + expect(typeof label.name).toBe('string'); + expect(label.name.length).toBeGreaterThan(0); + expect(typeof label.description).toBe('string'); + expect(typeof label.color).toBe('string'); + expect(label.color).toMatch(/^[0-9a-f]{6}$/i); + } + }); +}); + +describe('isValidPriority', () => { + it('returns true for valid priority labels', () => { + expect(isValidPriority('P0')).toBe(true); + expect(isValidPriority('P1')).toBe(true); + expect(isValidPriority('P2')).toBe(true); + }); + + it('returns false for invalid labels', () => { + expect(isValidPriority('P3')).toBe(false); + expect(isValidPriority('e2e-validated')).toBe(false); + expect(isValidPriority('')).toBe(false); + }); +}); + +describe('isValidCategory', () => { + it('returns true for valid category labels', () => { + expect(isValidCategory('reliability')).toBe(true); + expect(isValidCategory('quality')).toBe(true); + expect(isValidCategory('product')).toBe(true); + }); + + it('returns false for invalid labels', () => { + expect(isValidCategory('e2e-validated')).toBe(false); + expect(isValidCategory('P0')).toBe(false); + }); +}); + +describe('isValidHorizon', () => { + it('returns true for valid horizon labels', () => { + expect(isValidHorizon('short-term')).toBe(true); + expect(isValidHorizon('medium-term')).toBe(true); + expect(isValidHorizon('long-term')).toBe(true); + }); + + it('returns false for invalid labels', () => { + expect(isValidHorizon('e2e-validated')).toBe(false); + expect(isValidHorizon('immediate')).toBe(false); + }); +}); diff --git a/packages/core/src/__tests__/jobs/job-registry.test.ts b/packages/core/src/__tests__/jobs/job-registry.test.ts index 857a4ae0..d8cb0771 100644 --- a/packages/core/src/__tests__/jobs/job-registry.test.ts +++ b/packages/core/src/__tests__/jobs/job-registry.test.ts @@ -66,6 +66,23 @@ describe('getJobDef', () => { expect(def!.name).toBe('QA'); }); + it('qa job has validatedLabel extra field', () => { + const def = getJobDef('qa'); + expect(def).toBeDefined(); + expect(def!.extraFields).toContainEqual( + expect.objectContaining({ name: 'validatedLabel' }), + ); + }); + + it('qa job validatedLabel extra field has correct defaults', () => { + const def = getJobDef('qa'); + expect(def).toBeDefined(); + const field = def!.extraFields?.find((f) => f.name === 'validatedLabel'); + expect(field).toBeDefined(); + expect(field!.type).toBe('string'); + expect(field!.defaultValue).toBe('e2e-validated'); + }); + it('returns correct definition for slicer', () => { const def = getJobDef('slicer'); expect(def).toBeDefined(); diff --git a/packages/core/src/board/labels.ts b/packages/core/src/board/labels.ts index d96742a7..b768cd28 100644 --- a/packages/core/src/board/labels.ts +++ b/packages/core/src/board/labels.ts @@ -137,6 +137,11 @@ export const NIGHT_WATCH_LABELS: ILabelDefinition[] = [ description: 'Created by the analytics job for Amplitude findings', color: '1d76db', }, + { + name: 'e2e-validated', + description: 'PR acceptance requirements validated by e2e/integration tests', + color: '0e8a16', + }, ]; // --------------------------------------------------------------------------- diff --git a/packages/core/src/constants.ts b/packages/core/src/constants.ts index c1e9aba6..ba793ea2 100644 --- a/packages/core/src/constants.ts +++ b/packages/core/src/constants.ts @@ -123,6 +123,7 @@ export const DEFAULT_QA_MAX_RUNTIME = 3600; // 1 hour export const DEFAULT_QA_ARTIFACTS: QaArtifacts = 'both'; export const DEFAULT_QA_SKIP_LABEL = 'skip-qa'; export const DEFAULT_QA_AUTO_INSTALL_PLAYWRIGHT = true; +export const DEFAULT_QA_VALIDATED_LABEL = 'e2e-validated'; export const DEFAULT_QA: IQaConfig = { enabled: DEFAULT_QA_ENABLED, @@ -132,6 +133,7 @@ export const DEFAULT_QA: IQaConfig = { artifacts: DEFAULT_QA_ARTIFACTS, skipLabel: DEFAULT_QA_SKIP_LABEL, autoInstallPlaywright: DEFAULT_QA_AUTO_INSTALL_PLAYWRIGHT, + validatedLabel: DEFAULT_QA_VALIDATED_LABEL, }; export const QA_LOG_NAME = 'night-watch-qa'; diff --git a/packages/core/src/jobs/job-registry.ts b/packages/core/src/jobs/job-registry.ts index 73b97236..2df38b9d 100644 --- a/packages/core/src/jobs/job-registry.ts +++ b/packages/core/src/jobs/job-registry.ts @@ -127,6 +127,7 @@ export const JOB_REGISTRY: IJobDefinition[] = [ }, { name: 'skipLabel', type: 'string', defaultValue: 'skip-qa' }, { name: 'autoInstallPlaywright', type: 'boolean', defaultValue: true }, + { name: 'validatedLabel', type: 'string', defaultValue: 'e2e-validated' }, ], defaultConfig: { enabled: true, @@ -136,11 +137,13 @@ export const JOB_REGISTRY: IJobDefinition[] = [ artifacts: 'both', skipLabel: 'skip-qa', autoInstallPlaywright: true, + validatedLabel: 'e2e-validated', } as IBaseJobConfig & { branchPatterns: string[]; artifacts: string; skipLabel: string; autoInstallPlaywright: boolean; + validatedLabel: string; }, }, { diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index b3ba9f1d..9e1101aa 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -318,6 +318,8 @@ export interface IQaConfig { skipLabel: string; /** Auto-install Playwright if missing during QA run */ autoInstallPlaywright: boolean; + /** GitHub label to apply when e2e tests pass (proves acceptance requirements met) */ + validatedLabel: string; } export interface IAuditConfig { diff --git a/scripts/night-watch-qa-cron.sh b/scripts/night-watch-qa-cron.sh index 163dd463..deb6a2b3 100755 --- a/scripts/night-watch-qa-cron.sh +++ b/scripts/night-watch-qa-cron.sh @@ -25,6 +25,7 @@ PROVIDER_CMD="${NW_PROVIDER_CMD:-claude}" PROVIDER_LABEL="${NW_PROVIDER_LABEL:-}" BRANCH_PATTERNS_RAW="${NW_BRANCH_PATTERNS:-feat/,night-watch/}" SKIP_LABEL="${NW_QA_SKIP_LABEL:-skip-qa}" +VALIDATED_LABEL="${NW_QA_VALIDATED_LABEL:-e2e-validated}" QA_ARTIFACTS="${NW_QA_ARTIFACTS:-both}" QA_AUTO_INSTALL_PLAYWRIGHT="${NW_QA_AUTO_INSTALL_PLAYWRIGHT:-1}" SCRIPT_START_TIME=$(date +%s) @@ -55,6 +56,16 @@ emit_result() { fi } +LABEL_ENSURED=0 +ensure_validated_label() { + if [ "${LABEL_ENSURED}" -eq 1 ]; then return 0; fi + gh label create "${VALIDATED_LABEL}" \ + --description "PR acceptance requirements validated by e2e/integration tests" \ + --color "0e8a16" \ + --force 2>/dev/null || true + LABEL_ENSURED=1 +} + # ── Global Job Queue Gate ──────────────────────────────────────────────────── # Atomically claim a DB slot or enqueue for later dispatch — no flock needed. if [ "${NW_QUEUE_ENABLED:-0}" = "1" ]; then @@ -487,6 +498,7 @@ fi EXIT_CODE=0 PROCESSED_PRS_CSV="" PASSING_PRS_CSV="" +VALIDATED_PRS_CSV="" ISSUES_FOUND_PRS_CSV="" NO_TESTS_PRS_CSV="" UNCLASSIFIED_PRS_CSV="" @@ -617,12 +629,23 @@ for pr_ref in ${PRS_NEEDING_QA}; do case "${QA_OUTCOME}" in passing) PASSING_PRS_CSV=$(append_csv "${PASSING_PRS_CSV}" "#${pr_num}") + # Apply e2e-validated label + ensure_validated_label + gh pr edit "${pr_num}" --add-label "${VALIDATED_LABEL}" 2>/dev/null || true + VALIDATED_PRS_CSV=$(append_csv "${VALIDATED_PRS_CSV}" "#${pr_num}") + log "QA: PR #${pr_num} — added '${VALIDATED_LABEL}' label (tests passing)" ;; issues_found) ISSUES_FOUND_PRS_CSV=$(append_csv "${ISSUES_FOUND_PRS_CSV}" "#${pr_num}") + # Remove e2e-validated label if present + gh pr edit "${pr_num}" --remove-label "${VALIDATED_LABEL}" 2>/dev/null || true + log "QA: PR #${pr_num} — removed '${VALIDATED_LABEL}' label (issues found)" ;; no_tests_needed) NO_TESTS_PRS_CSV=$(append_csv "${NO_TESTS_PRS_CSV}" "#${pr_num}") + # Remove e2e-validated label — no tests doesn't prove acceptance + gh pr edit "${pr_num}" --remove-label "${VALIDATED_LABEL}" 2>/dev/null || true + log "QA: PR #${pr_num} — removed '${VALIDATED_LABEL}' label (no tests needed)" ;; *) UNCLASSIFIED_PRS_CSV=$(append_csv "${UNCLASSIFIED_PRS_CSV}" "#${pr_num}") @@ -646,6 +669,7 @@ cleanup_worktrees "${PROJECT_DIR}" FINAL_PROCESSED_PRS_CSV="${PROCESSED_PRS_CSV:-${PRS_NEEDING_QA_CSV}}" PASSING_PRS_SUMMARY=$(csv_or_none "${PASSING_PRS_CSV}") +VALIDATED_PRS_SUMMARY=$(csv_or_none "${VALIDATED_PRS_CSV}") ISSUES_FOUND_PRS_SUMMARY=$(csv_or_none "${ISSUES_FOUND_PRS_CSV}") NO_TESTS_PRS_SUMMARY=$(csv_or_none "${NO_TESTS_PRS_CSV}") UNCLASSIFIED_PRS_SUMMARY=$(csv_or_none "${UNCLASSIFIED_PRS_CSV}") @@ -664,6 +688,7 @@ Provider (model): ${PROVIDER_MODEL_DISPLAY} Artifacts: ${QA_ARTIFACTS_DESC} (mode=${QA_ARTIFACTS}) Processed PRs: ${FINAL_PROCESSED_PRS_CSV} Passing tests: ${PASSING_PRS_SUMMARY} +E2E validated: ${VALIDATED_PRS_SUMMARY} Issues found by tests: ${ISSUES_FOUND_PRS_SUMMARY} No tests needed: ${NO_TESTS_PRS_SUMMARY} Reported (unclassified): ${UNCLASSIFIED_PRS_SUMMARY} @@ -680,9 +705,9 @@ ${QA_SCREENSHOT_SUMMARY}" fi send_telegram_status_message "🧪 Night Watch QA: warning" "${TELEGRAM_WARNING_BODY}" if [ -n "${REPO}" ]; then - emit_result "warning_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|warnings=${WARNING_PRS_SUMMARY}|repo=${REPO}" + emit_result "warning_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|validated=${VALIDATED_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|warnings=${WARNING_PRS_SUMMARY}|repo=${REPO}" else - emit_result "warning_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|warnings=${WARNING_PRS_SUMMARY}" + emit_result "warning_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|validated=${VALIDATED_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|warnings=${WARNING_PRS_SUMMARY}" fi else log "DONE: QA runner completed successfully" @@ -691,6 +716,7 @@ Provider (model): ${PROVIDER_MODEL_DISPLAY} Artifacts: ${QA_ARTIFACTS_DESC} (mode=${QA_ARTIFACTS}) Processed PRs: ${FINAL_PROCESSED_PRS_CSV} Passing tests: ${PASSING_PRS_SUMMARY} +E2E validated: ${VALIDATED_PRS_SUMMARY} Issues found by tests: ${ISSUES_FOUND_PRS_SUMMARY} No tests needed: ${NO_TESTS_PRS_SUMMARY} Reported (unclassified): ${UNCLASSIFIED_PRS_SUMMARY}" @@ -701,9 +727,9 @@ ${QA_SCREENSHOT_SUMMARY}" fi send_telegram_status_message "🧪 Night Watch QA: completed" "${TELEGRAM_SUCCESS_BODY}" if [ -n "${REPO}" ]; then - emit_result "success_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|repo=${REPO}" + emit_result "success_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|validated=${VALIDATED_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}|repo=${REPO}" else - emit_result "success_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}" + emit_result "success_qa" "prs=${FINAL_PROCESSED_PRS_CSV}|passing=${PASSING_PRS_SUMMARY}|validated=${VALIDATED_PRS_SUMMARY}|issues=${ISSUES_FOUND_PRS_SUMMARY}|no_tests=${NO_TESTS_PRS_SUMMARY}|unclassified=${UNCLASSIFIED_PRS_SUMMARY}" fi fi elif [ ${EXIT_CODE} -eq 124 ]; then