From d5f0d016aff065ead0401b10d35b29cd29378168 Mon Sep 17 00:00:00 2001 From: Jack Felke Date: Wed, 18 Mar 2026 13:41:13 -0700 Subject: [PATCH 1/5] docs: add example CLAUDE.md for automatic preflight integration Adds a ready-to-use CLAUDE.md template that makes Claude Code automatically run preflight_check on prompts. Users can copy it into their project to get preflight working without manual tool calls. Referenced from Quick Start in README and examples/README. --- README.md | 10 ++++++++++ examples/CLAUDE.md | 30 ++++++++++++++++++++++++++++++ examples/README.md | 16 ++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 examples/CLAUDE.md diff --git a/README.md b/README.md index 6d03f5d..7969d58 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,16 @@ claude mcp add preflight -- preflight-dev-serve > **Note:** `preflight-dev` runs the interactive setup wizard. `preflight-dev-serve` starts the MCP server — that's what you want in your Claude Code config. +### Make Claude use preflight automatically + +Add preflight rules to your project's `CLAUDE.md` so Claude runs `preflight_check` on every prompt without you asking: + +```bash +cp /path/to/preflight/examples/CLAUDE.md your-project/CLAUDE.md +``` + +See [`examples/CLAUDE.md`](examples/CLAUDE.md) for a ready-to-use template with recommended rules for when to preflight, session hygiene, and skip-lists. + --- ## How It Works diff --git a/examples/CLAUDE.md b/examples/CLAUDE.md new file mode 100644 index 0000000..da55f40 --- /dev/null +++ b/examples/CLAUDE.md @@ -0,0 +1,30 @@ +# CLAUDE.md — Preflight Integration Example +# +# Drop this into your project's CLAUDE.md (or .claude/CLAUDE.md) to make +# Claude Code automatically use preflight tools during your sessions. +# Customize the rules below to match your workflow. + +## Preflight Rules + +Before starting any non-trivial task, run `preflight_check` with my prompt. This catches vague instructions before they waste tokens on wrong→fix cycles. + +### When to use preflight tools: + +- **Every prompt**: `preflight_check` triages automatically — let it decide what's needed +- **Before multi-file changes**: Run `scope_work` to get a phased plan +- **Before sub-agent tasks**: Use `enrich_agent_task` to add context +- **After making a mistake**: Use `log_correction` so preflight learns the pattern +- **Before ending a session**: Run `checkpoint` to save state for next time +- **When I say "fix it" or "do the others"**: Use `sharpen_followup` to resolve what I actually mean + +### Session hygiene: + +- Run `check_session_health` if we've been going for a while without committing +- If I ask about something we did before, use `search_history` to find it +- Before declaring a task done, run `verify_completion` (type check + tests) + +### Don't preflight these: + +- Simple git commands (commit, push, status) +- Formatting / linting +- Reading files I explicitly named diff --git a/examples/README.md b/examples/README.md index 778f15d..f2fafc1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,6 +12,22 @@ The `.preflight/` directory contains example configuration files you can copy in └── api.yml # Manual contract definitions for cross-service types ``` +## `CLAUDE.md` Integration + +The `CLAUDE.md` file tells Claude Code how to behave in your project. Adding preflight rules here makes Claude automatically use preflight tools without you having to ask. + +```bash +# Copy the example into your project: +cp /path/to/preflight/examples/CLAUDE.md my-project/CLAUDE.md + +# Or append to your existing CLAUDE.md: +cat /path/to/preflight/examples/CLAUDE.md >> my-project/CLAUDE.md +``` + +This is the **recommended way** to integrate preflight — once it's in your `CLAUDE.md`, every session automatically runs `preflight_check` on your prompts. + +--- + ### Quick setup ```bash From c17f46344bf005464e2bd65b1f1631852a51e069 Mon Sep 17 00:00:00 2001 From: Jack Felke Date: Thu, 19 Mar 2026 08:20:24 -0700 Subject: [PATCH 2/5] feat(cli): add --help and --version flags, fix Node badge to 20+ - CLI now responds to --help/-h with usage info, profiles, and links - CLI now responds to --version/-v with package version - Previously, any flag just launched the interactive wizard - Fixed README badge from Node 18+ to Node 20+ (matches engines field) --- README.md | 2 +- src/cli/init.ts | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7969d58..e7a385f 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ A 24-tool MCP server for Claude Code that catches ambiguous instructions before [![MCP](https://img.shields.io/badge/MCP-Compatible-blueviolet)](https://modelcontextprotocol.io/) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE) [![npm](https://img.shields.io/npm/v/preflight-dev)](https://www.npmjs.com/package/preflight-dev) -[![Node 18+](https://img.shields.io/badge/node-18%2B-brightgreen?logo=node.js&logoColor=white)](https://nodejs.org/) +[![Node 20+](https://img.shields.io/badge/node-20%2B-brightgreen?logo=node.js&logoColor=white)](https://nodejs.org/) [Quick Start](#quick-start) · [How It Works](#how-it-works) · [Tool Reference](#tool-reference) · [Configuration](#configuration) · [Scoring](#the-12-category-scorecard) diff --git a/src/cli/init.ts b/src/cli/init.ts index dfaaa25..d1b0021 100644 --- a/src/cli/init.ts +++ b/src/cli/init.ts @@ -9,6 +9,46 @@ import { join, dirname } from "node:path"; import { existsSync } from "node:fs"; import { fileURLToPath } from "node:url"; +// Handle --help and --version before launching interactive wizard +const args = process.argv.slice(2); + +if (args.includes("--help") || args.includes("-h")) { + console.log(` +✈️ preflight-dev — MCP server for Claude Code prompt discipline + +Usage: + preflight-dev Interactive setup wizard (creates .mcp.json) + preflight-dev --help Show this help message + preflight-dev --version Show version + +The wizard will: + 1. Ask you to choose a profile (minimal / standard / full) + 2. Optionally create a .preflight/ config directory + 3. Write an .mcp.json so Claude Code auto-connects to preflight + +After setup, restart Claude Code and preflight tools will appear. + +Profiles: + minimal 4 tools — clarify_intent, check_session_health, session_stats, prompt_score + standard 16 tools — all prompt discipline + session_stats + prompt_score + full 20 tools — everything + timeline/vector search (needs LanceDB) + +More info: https://github.com/TerminalGravity/preflight +`); + process.exit(0); +} + +if (args.includes("--version") || args.includes("-v")) { + const pkgPath = join(dirname(fileURLToPath(import.meta.url)), "../../package.json"); + try { + const pkg = JSON.parse(await readFile(pkgPath, "utf-8")); + console.log(`preflight-dev v${pkg.version}`); + } catch { + console.log("preflight-dev (version unknown)"); + } + process.exit(0); +} + const rl = createInterface({ input: process.stdin, output: process.stdout }); function ask(question: string): Promise { From 59cb76ee19b366ae8e76e65fce638ee091e48f9f Mon Sep 17 00:00:00 2001 From: Jack Felke Date: Thu, 19 Mar 2026 08:46:52 -0700 Subject: [PATCH 3/5] feat: add export_timeline tool for markdown session reports Adds a new 'export_timeline' MCP tool that generates markdown reports from timeline data with: - Summary stats table (events by type, correction rate, commits/prompt) - ASCII activity chart grouped by day - Recent commits log - Correction insights - Error summary - Configurable period (day/week/month) with offset - Optional save to ~/.preflight/reports/ Includes tests (2 passing). Closes #5 --- src/index.ts | 2 + src/tools/export-timeline.ts | 350 ++++++++++++++++++++++++++++++++++ tests/export-timeline.test.ts | 153 +++++++++++++++ 3 files changed, 505 insertions(+) create mode 100644 src/tools/export-timeline.ts create mode 100644 tests/export-timeline.test.ts diff --git a/src/index.ts b/src/index.ts index e7e9d00..c2a525a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -49,6 +49,7 @@ import { registerScanSessions } from "./tools/scan-sessions.js"; import { registerGenerateScorecard } from "./tools/generate-scorecard.js"; import { registerSearchContracts } from "./tools/search-contracts.js"; import { registerEstimateCost } from "./tools/estimate-cost.js"; +import { registerExportTimeline } from "./tools/export-timeline.js"; // Validate related projects from config function validateRelatedProjects(): void { @@ -110,6 +111,7 @@ const toolRegistry: Array<[string, RegisterFn]> = [ ["generate_scorecard", registerGenerateScorecard], ["estimate_cost", registerEstimateCost], ["search_contracts", registerSearchContracts], + ["export_timeline", registerExportTimeline], ]; let registered = 0; diff --git a/src/tools/export-timeline.ts b/src/tools/export-timeline.ts new file mode 100644 index 0000000..407f074 --- /dev/null +++ b/src/tools/export-timeline.ts @@ -0,0 +1,350 @@ +// ============================================================================= +// export_timeline — Generate markdown reports from timeline data +// Weekly summaries, prompt quality trends, activity breakdowns +// Closes #5 +// ============================================================================= + +import { z } from "zod"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { getTimeline, listIndexedProjects } from "../lib/timeline-db.js"; +import { getRelatedProjects } from "../lib/config.js"; +import type { SearchScope } from "../types.js"; +import type { TimelineEvent } from "../lib/timeline-db.js"; +import { writeFileSync, mkdirSync } from "fs"; +import { join } from "path"; +import { homedir } from "os"; + +// ── Helpers ───────────────────────────────────────────────────────────────── + +const REPORTS_DIR = join(homedir(), ".preflight", "reports"); + +const TYPE_LABELS: Record = { + prompt: "Prompts", + assistant: "Responses", + tool_call: "Tool Calls", + correction: "Corrections", + commit: "Commits", + compaction: "Compactions", + sub_agent_spawn: "Sub-agent Spawns", + error: "Errors", +}; + +const TYPE_ICONS: Record = { + prompt: "💬", + assistant: "🤖", + tool_call: "🔧", + correction: "❌", + commit: "📦", + compaction: "🗜️", + sub_agent_spawn: "🚀", + error: "⚠️", +}; + +async function getSearchProjects(scope: SearchScope): Promise { + const currentProject = process.env.CLAUDE_PROJECT_DIR; + switch (scope) { + case "current": + return currentProject ? [currentProject] : []; + case "related": { + const related = getRelatedProjects(); + return currentProject ? [currentProject, ...related] : related; + } + case "all": { + const projects = await listIndexedProjects(); + return projects.map((p) => p.project); + } + default: + return currentProject ? [currentProject] : []; + } +} + +function getDateRange( + period: "day" | "week" | "month", + offset: number = 0 +): { since: string; until: string; label: string } { + const now = new Date(); + let start: Date; + let end: Date; + + if (period === "day") { + start = new Date(now); + start.setDate(start.getDate() - offset); + start.setHours(0, 0, 0, 0); + end = new Date(start); + end.setDate(end.getDate() + 1); + const label = start.toISOString().slice(0, 10); + return { since: start.toISOString(), until: end.toISOString(), label }; + } + + if (period === "week") { + // Start of current week (Monday) minus offset weeks + start = new Date(now); + const dayOfWeek = start.getDay(); + const diff = dayOfWeek === 0 ? 6 : dayOfWeek - 1; // Monday = 0 + start.setDate(start.getDate() - diff - offset * 7); + start.setHours(0, 0, 0, 0); + end = new Date(start); + end.setDate(end.getDate() + 7); + const label = `Week of ${start.toISOString().slice(0, 10)}`; + return { since: start.toISOString(), until: end.toISOString(), label }; + } + + // month + start = new Date(now.getFullYear(), now.getMonth() - offset, 1); + end = new Date(now.getFullYear(), now.getMonth() - offset + 1, 1); + const monthNames = [ + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December", + ]; + const label = `${monthNames[start.getMonth()]} ${start.getFullYear()}`; + return { since: start.toISOString(), until: end.toISOString(), label }; +} + +interface DayStats { + total: number; + byType: Record; + events: TimelineEvent[]; +} + +function groupByDay(events: TimelineEvent[]): Map { + const days = new Map(); + for (const e of events) { + const day = e.timestamp + ? new Date(e.timestamp).toISOString().slice(0, 10) + : "unknown"; + if (!days.has(day)) { + days.set(day, { total: 0, byType: {}, events: [] }); + } + const stats = days.get(day)!; + stats.total++; + stats.byType[e.type] = (stats.byType[e.type] || 0) + 1; + stats.events.push(e); + } + return days; +} + +function buildActivityChart(days: Map, sortedDays: string[]): string { + if (sortedDays.length === 0) return ""; + const maxEvents = Math.max(...sortedDays.map((d) => days.get(d)!.total)); + const barWidth = 20; + const lines: string[] = ["### Activity Chart", "```"]; + for (const day of sortedDays) { + const count = days.get(day)!.total; + const bar = "█".repeat(Math.round((count / maxEvents) * barWidth)); + lines.push(`${day} ${bar} ${count}`); + } + lines.push("```", ""); + return lines.join("\n"); +} + +function buildSummaryStats(events: TimelineEvent[]): string { + const byType: Record = {}; + for (const e of events) { + byType[e.type] = (byType[e.type] || 0) + 1; + } + + const lines: string[] = ["### Summary", ""]; + lines.push(`| Metric | Count |`); + lines.push(`|--------|-------|`); + lines.push(`| **Total Events** | ${events.length} |`); + + for (const [type, count] of Object.entries(byType).sort((a, b) => b[1] - a[1])) { + const icon = TYPE_ICONS[type] || "❓"; + const label = TYPE_LABELS[type] || type; + lines.push(`| ${icon} ${label} | ${count} |`); + } + + // Correction rate + const prompts = byType["prompt"] || 0; + const corrections = byType["correction"] || 0; + if (prompts > 0) { + const rate = ((corrections / prompts) * 100).toFixed(1); + lines.push(`| 📊 Correction Rate | ${rate}% |`); + } + + // Commits per prompt + const commits = byType["commit"] || 0; + if (prompts > 0) { + const ratio = (commits / prompts).toFixed(2); + lines.push(`| 📊 Commits/Prompt | ${ratio} |`); + } + + lines.push(""); + return lines.join("\n"); +} + +function buildTopCommits(events: TimelineEvent[], limit: number = 10): string { + const commits = events + .filter((e) => e.type === "commit") + .slice(0, limit); + + if (commits.length === 0) return ""; + + const lines: string[] = ["### Recent Commits", ""]; + for (const c of commits) { + const time = c.timestamp + ? new Date(c.timestamp).toISOString().slice(0, 16).replace("T", " ") + : "??"; + const msg = (c.content || "").slice(0, 100).replace(/\n/g, " "); + lines.push(`- \`${time}\` ${msg}`); + } + lines.push(""); + return lines.join("\n"); +} + +function buildCorrectionInsights(events: TimelineEvent[]): string { + const corrections = events.filter((e) => e.type === "correction"); + if (corrections.length === 0) return ""; + + const lines: string[] = ["### Corrections & Lessons", ""]; + for (const c of corrections.slice(0, 10)) { + const content = (c.content || "").slice(0, 150).replace(/\n/g, " "); + lines.push(`- ❌ ${content}`); + } + lines.push(""); + return lines.join("\n"); +} + +function buildErrorSummary(events: TimelineEvent[]): string { + const errors = events.filter((e) => e.type === "error"); + if (errors.length === 0) return ""; + + const lines: string[] = ["### Errors", ""]; + for (const e of errors.slice(0, 10)) { + const content = (e.content || "").slice(0, 150).replace(/\n/g, " "); + lines.push(`- ⚠️ ${content}`); + } + lines.push(""); + return lines.join("\n"); +} + +// ── Main Export ───────────────────────────────────────────────────────────── + +export function registerExportTimeline(server: McpServer) { + server.tool( + "export_timeline", + "Generate a markdown report from timeline data. Produces session summaries with activity charts, commit logs, correction insights, and key metrics. Optionally saves to ~/.preflight/reports/.", + { + scope: z + .enum(["current", "related", "all"]) + .default("current") + .describe("Search scope: current project, related, or all indexed"), + project: z + .string() + .optional() + .describe("Filter to specific project (overrides scope)"), + period: z + .enum(["day", "week", "month"]) + .default("week") + .describe("Report period"), + offset: z + .number() + .default(0) + .describe("Period offset (0 = current, 1 = previous, etc.)"), + save: z + .boolean() + .default(false) + .describe("Save report to ~/.preflight/reports/"), + include_commits: z.boolean().default(true).describe("Include commit log"), + include_corrections: z + .boolean() + .default(true) + .describe("Include correction insights"), + include_errors: z.boolean().default(true).describe("Include error summary"), + include_chart: z.boolean().default(true).describe("Include activity chart"), + }, + async (params) => { + const { since, until, label } = getDateRange(params.period, params.offset); + + // Resolve projects + let projectDirs: string[]; + if (params.project) { + projectDirs = [params.project]; + } else { + projectDirs = await getSearchProjects(params.scope); + } + + if (projectDirs.length === 0) { + return { + content: [ + { + type: "text", + text: `No projects found for scope "${params.scope}". Set CLAUDE_PROJECT_DIR or onboard projects first.`, + }, + ], + }; + } + + // Fetch all events for the period (large limit for reports) + const events = await getTimeline({ + project_dirs: projectDirs, + since, + until, + limit: 5000, + offset: 0, + }); + + if (events.length === 0) { + return { + content: [ + { + type: "text", + text: `## Report: ${label}\n\n_No events found for this period._`, + }, + ], + }; + } + + // Build report + const projName = params.project || params.scope; + const days = groupByDay(events); + const sortedDays = [...days.keys()].sort(); + + const sections: string[] = [ + `# 📊 Session Report: ${label}`, + `**Scope:** ${projName} | **Period:** ${since.slice(0, 10)} → ${until.slice(0, 10)} | **Events:** ${events.length}`, + "", + buildSummaryStats(events), + ]; + + if (params.include_chart) { + sections.push(buildActivityChart(days, sortedDays)); + } + + if (params.include_commits) { + sections.push(buildTopCommits(events)); + } + + if (params.include_corrections) { + sections.push(buildCorrectionInsights(events)); + } + + if (params.include_errors) { + sections.push(buildErrorSummary(events)); + } + + // Footer + sections.push( + "---", + `_Generated by preflight export_timeline on ${new Date().toISOString().slice(0, 16)}_` + ); + + const report = sections.filter(Boolean).join("\n"); + + // Optionally save + let savedPath: string | undefined; + if (params.save) { + mkdirSync(REPORTS_DIR, { recursive: true }); + const filename = `report-${params.period}-${since.slice(0, 10)}.md`; + savedPath = join(REPORTS_DIR, filename); + writeFileSync(savedPath, report, "utf-8"); + } + + const footer = savedPath ? `\n\n_Saved to \`${savedPath}\`_` : ""; + + return { + content: [{ type: "text", text: report + footer }], + }; + } + ); +} diff --git a/tests/export-timeline.test.ts b/tests/export-timeline.test.ts new file mode 100644 index 0000000..9ca7b9e --- /dev/null +++ b/tests/export-timeline.test.ts @@ -0,0 +1,153 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +// We test the helper functions by extracting the logic. +// Since the tool registers on an McpServer, we test the report generation indirectly +// by mocking the timeline-db and invoking the registered tool handler. + +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { registerExportTimeline } from "../src/tools/export-timeline.js"; + +// Mock timeline-db +vi.mock("../src/lib/timeline-db.js", () => ({ + getTimeline: vi.fn().mockResolvedValue([ + { + id: "1", + timestamp: "2025-03-17T10:00:00Z", + type: "prompt", + project: "/test", + branch: "main", + session_id: "s1", + source_file: "test.jsonl", + source_line: 1, + content: "Implement auth flow", + }, + { + id: "2", + timestamp: "2025-03-17T11:00:00Z", + type: "commit", + project: "/test", + branch: "main", + session_id: "s1", + source_file: "test.jsonl", + source_line: 2, + content: "feat: add auth middleware", + }, + { + id: "3", + timestamp: "2025-03-17T12:00:00Z", + type: "correction", + project: "/test", + branch: "main", + session_id: "s1", + source_file: "test.jsonl", + source_line: 3, + content: "Wrong import path used", + }, + { + id: "4", + timestamp: "2025-03-18T09:00:00Z", + type: "prompt", + project: "/test", + branch: "main", + session_id: "s2", + source_file: "test.jsonl", + source_line: 4, + content: "Fix auth bug", + }, + { + id: "5", + timestamp: "2025-03-18T10:00:00Z", + type: "error", + project: "/test", + branch: "main", + session_id: "s2", + source_file: "test.jsonl", + source_line: 5, + content: "TypeError: Cannot read property 'token'", + }, + ]), + listIndexedProjects: vi.fn().mockResolvedValue([{ project: "/test" }]), +})); + +vi.mock("../src/lib/config.js", () => ({ + getRelatedProjects: vi.fn().mockReturnValue([]), +})); + +describe("export_timeline", () => { + let toolHandler: any; + + beforeEach(() => { + // Capture the tool handler by mocking McpServer.tool + const server = new McpServer({ name: "test", version: "1.0.0" }); + const originalTool = server.tool.bind(server); + + server.tool = ((...args: any[]) => { + // The handler is the last argument + toolHandler = args[args.length - 1]; + return originalTool(...args); + }) as any; + + registerExportTimeline(server); + }); + + it("generates a markdown report with all sections", async () => { + const result = await toolHandler({ + scope: "all", + period: "week", + offset: 0, + save: false, + include_commits: true, + include_corrections: true, + include_errors: true, + include_chart: true, + }); + + const text = result.content[0].text; + + // Header + expect(text).toContain("📊 Session Report:"); + expect(text).toContain("**Events:** 5"); + + // Summary table + expect(text).toContain("Total Events"); + expect(text).toContain("Prompts"); + expect(text).toContain("Correction Rate"); + + // Activity chart + expect(text).toContain("Activity Chart"); + expect(text).toContain("█"); + + // Commits section + expect(text).toContain("Recent Commits"); + expect(text).toContain("feat: add auth middleware"); + + // Corrections section + expect(text).toContain("Corrections & Lessons"); + expect(text).toContain("Wrong import path"); + + // Errors section + expect(text).toContain("Errors"); + expect(text).toContain("TypeError"); + + // Footer + expect(text).toContain("Generated by preflight"); + }); + + it("returns empty message when no events", async () => { + const { getTimeline } = await import("../src/lib/timeline-db.js"); + (getTimeline as any).mockResolvedValueOnce([]); + + const result = await toolHandler({ + scope: "all", + period: "week", + offset: 0, + save: false, + include_commits: true, + include_corrections: true, + include_errors: true, + include_chart: true, + }); + + expect(result.content[0].text).toContain("No events found"); + }); +}); From 39b385681da0f7fb85f1925cea95e127cc1ee7ba Mon Sep 17 00:00:00 2001 From: Jack Felke Date: Thu, 19 Mar 2026 09:15:33 -0700 Subject: [PATCH 4/5] test: add comprehensive tests for preflight_check tool Adds 10 tests covering: - Trivial prompt pass-through - force_level=skip bypass - Ambiguous prompt detection (vague pronouns, short prompts, vague verbs) - Multi-step execution plan generation - Git state inclusion in clarification - Triage confidence/reasons display - Pattern match triage boosting - Risk level assignment in execution plans Brings test count from 45 to 55. --- tests/preflight-check.test.ts | 167 ++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 tests/preflight-check.test.ts diff --git a/tests/preflight-check.test.ts b/tests/preflight-check.test.ts new file mode 100644 index 0000000..dc1d94c --- /dev/null +++ b/tests/preflight-check.test.ts @@ -0,0 +1,167 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { registerPreflightCheck } from "../src/tools/preflight-check.js"; + +// Mock all external dependencies +vi.mock("../src/lib/git.js", () => ({ + run: vi.fn().mockReturnValue(""), + getBranch: vi.fn().mockReturnValue("main"), + getStatus: vi.fn().mockReturnValue("M src/index.ts"), + getRecentCommits: vi.fn().mockReturnValue("abc1234 feat: initial commit"), + getDiffFiles: vi.fn().mockReturnValue("src/index.ts"), + getStagedFiles: vi.fn().mockReturnValue(""), +})); + +vi.mock("../src/lib/state.js", () => ({ + now: vi.fn().mockReturnValue("2026-03-19T09:00:00.000Z"), +})); + +vi.mock("../src/lib/files.js", () => ({ + PROJECT_DIR: "/tmp/test-project", + findWorkspaceDocs: vi.fn().mockReturnValue({}), +})); + +vi.mock("../src/lib/config.js", () => ({ + getConfig: vi.fn().mockReturnValue({ + triage: { + strictness: "normal", + rules: { + always_check: [], + skip: [], + cross_service_keywords: ["api", "schema", "contract"], + }, + }, + related_projects: [], + }), +})); + +vi.mock("../src/lib/timeline-db.js", () => ({ + searchSemantic: vi.fn().mockResolvedValue([]), +})); + +vi.mock("../src/lib/patterns.js", () => ({ + loadPatterns: vi.fn().mockReturnValue([]), + matchPatterns: vi.fn().mockReturnValue([]), + formatPatternMatches: vi.fn().mockReturnValue(""), +})); + +describe("preflight_check", () => { + let toolHandler: any; + + beforeEach(() => { + vi.clearAllMocks(); + const server = new McpServer({ name: "test", version: "1.0.0" }); + const originalTool = server.tool.bind(server); + server.tool = ((...args: any[]) => { + toolHandler = args[args.length - 1]; + return originalTool(...args); + }) as any; + registerPreflightCheck(server); + }); + + it("returns clear for trivial prompts", async () => { + const result = await toolHandler({ + prompt: "fix the typo in README.md", + }); + const text = result.content[0].text; + expect(text).toContain("✅ Preflight: clear to proceed."); + }); + + it("force_level=skip always passes through", async () => { + const result = await toolHandler({ + prompt: "rewrite the entire codebase and deploy to production then migrate the database", + force_level: "skip", + }); + const text = result.content[0].text; + expect(text).toBe("✅ Preflight: clear to proceed."); + }); + + it("detects ambiguous prompts with vague pronouns", async () => { + const result = await toolHandler({ + prompt: "fix it", + force_level: "light", + }); + const text = result.content[0].text; + expect(text).toContain("Preflight Check"); + expect(text).toContain("Clarification"); + expect(text).toContain("vague pronouns"); + }); + + it("detects very short prompts as needing clarification", async () => { + const result = await toolHandler({ + prompt: "update", + force_level: "light", + }); + const text = result.content[0].text; + expect(text).toContain("Clarification"); + expect(text).toContain("Very short prompt"); + }); + + it("builds execution plan for multi-step prompts", async () => { + const result = await toolHandler({ + prompt: "update the schema then fix the API routes then deploy to production", + force_level: "full", + }); + const text = result.content[0].text; + expect(text).toContain("Execution Plan"); + expect(text).toContain("Checkpoints"); + expect(text).toContain("Scope"); + }); + + it("includes git state in clarification section", async () => { + const result = await toolHandler({ + prompt: "refactor those components", + force_level: "light", + }); + const text = result.content[0].text; + expect(text).toContain("Git State"); + expect(text).toContain("Branch: `main`"); + expect(text).toContain("Dirty files: 1"); + }); + + it("shows triage confidence and reasons", async () => { + const result = await toolHandler({ + prompt: "fix it and update them", + force_level: "light", + }); + const text = result.content[0].text; + expect(text).toContain("Triage:"); + expect(text).toContain("confidence:"); + }); + + it("flags vague verbs without file targets", async () => { + const result = await toolHandler({ + prompt: "refactor the code to be better", + force_level: "light", + }); + const text = result.content[0].text; + expect(text).toContain("Vague verb without specific file targets"); + }); + + it("boosts triage level when patterns match", async () => { + const { matchPatterns } = await import("../src/lib/patterns.js"); + (matchPatterns as any).mockReturnValueOnce([ + { pattern: "forgot to import", frequency: 3 }, + ]); + + const result = await toolHandler({ + prompt: "add the auth check to login.ts", + }); + const text = result.content[0].text; + // Should no longer be trivial — pattern match boosts it + expect(text).toContain("Known pitfall"); + expect(text).toContain("corrected this 3x before"); + }); + + it("assigns risk levels in execution plan", async () => { + const result = await toolHandler({ + prompt: "update the database schema then change the API endpoint then update the frontend", + force_level: "full", + }); + const text = result.content[0].text; + // Schema changes should be HIGH risk + expect(text).toContain("🔴 HIGH"); + // API changes should be MEDIUM risk + expect(text).toContain("🟡 MEDIUM"); + }); +}); From 0301bf4a78e07253bc4fc4468b4320f50e922c7e Mon Sep 17 00:00:00 2001 From: Jack Felke Date: Thu, 19 Mar 2026 09:48:24 -0700 Subject: [PATCH 5/5] fix: prevent shell injection and broken commands in run() callers Several tools were passing shell syntax (pipes, redirects, || chains) and non-git commands (cat, find, pnpm, gh) to run(), which uses execFileSync without a shell. This caused silent failures: - Shell redirections (2>/dev/null) passed as literal git args - Pipe chains (| grep, | tail) passed as literal git args - 'git' prefix doubled (run already prepends 'git') - Non-git commands (find, gh, pnpm) routed through git execFileSync Fix: - Add shell() helper using execSync for commands needing shell features - Harden run() string parsing to strip leading 'git' and shell syntax - Migrate 10 call sites across 8 tools to use shell() or proper arrays - Add 13 tests covering run() cleanup and shell() behavior Affected tools: audit-workspace, clarify-intent, enrich-agent-task, sequence-tasks, session-handoff, session-health, verify-completion --- src/lib/git.ts | 42 ++++++++++- src/tools/audit-workspace.ts | 6 +- src/tools/clarify-intent.ts | 6 +- src/tools/enrich-agent-task.ts | 6 +- src/tools/sequence-tasks.ts | 4 +- src/tools/session-handoff.ts | 4 +- src/tools/session-health.ts | 4 +- src/tools/verify-completion.ts | 4 +- tests/lib/git.test.ts | 129 +++++++++++++++++++++++++++++++++ 9 files changed, 186 insertions(+), 19 deletions(-) create mode 100644 tests/lib/git.test.ts diff --git a/src/lib/git.ts b/src/lib/git.ts index a32ee3c..2f0abc0 100644 --- a/src/lib/git.ts +++ b/src/lib/git.ts @@ -1,14 +1,52 @@ -import { execFileSync } from "child_process"; +import { execFileSync, execSync } from "child_process"; import { PROJECT_DIR } from "./files.js"; import type { RunError } from "../types.js"; +/** + * Run a shell command string (supports pipes, redirects, etc.). + * Returns stdout on success, descriptive error string on failure. + */ +export function shell(cmd: string, opts: { timeout?: number } = {}): string { + try { + return execSync(cmd, { + cwd: PROJECT_DIR, + encoding: "utf-8", + timeout: opts.timeout || 10000, + maxBuffer: 1024 * 1024, + stdio: ["pipe", "pipe", "pipe"], + }).trim(); + } catch (e: any) { + if (e.killed === true || e.signal === "SIGTERM") { + return `[timed out after ${opts.timeout || 10000}ms]`; + } + const output = e.stdout?.trim() || e.stderr?.trim(); + if (output) return output; + return `[command failed: ${cmd} (exit ${e.status ?? "?"})]`; + } +} + /** * Run a git command safely using execFileSync (no shell injection). * Accepts an array of args (preferred) or a string (split on whitespace for backward compat). + * When passed a string, strips a leading "git" if present and removes shell syntax + * (redirections like 2>/dev/null, || clauses) that don't work with execFileSync. * Returns stdout on success. On failure, returns a descriptive error string. */ export function run(argsOrCmd: string | string[], opts: { timeout?: number } = {}): string { - const args = typeof argsOrCmd === "string" ? argsOrCmd.split(/\s+/) : argsOrCmd; + let args: string[]; + if (typeof argsOrCmd === "string") { + // Strip shell syntax that doesn't work with execFileSync + let cleaned = argsOrCmd + .replace(/\s*2>&?\d*\s*>?\s*\/dev\/null/g, "") // 2>/dev/null, 2>&1 + .replace(/\s*\|.*$/g, "") // pipe chains + .replace(/\s*\|\|.*$/g, "") // || fallbacks + .trim(); + args = cleaned.split(/\s+/).filter(Boolean); + // Strip leading "git" if caller passed it (run already prepends git) + if (args[0] === "git") args = args.slice(1); + } else { + args = argsOrCmd; + } try { return execFileSync("git", args, { cwd: PROJECT_DIR, diff --git a/src/tools/audit-workspace.ts b/src/tools/audit-workspace.ts index d4306bd..6f49a46 100644 --- a/src/tools/audit-workspace.ts +++ b/src/tools/audit-workspace.ts @@ -1,5 +1,5 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { run } from "../lib/git.js"; +import { run, shell } from "../lib/git.js"; import { readIfExists, findWorkspaceDocs } from "../lib/files.js"; /** Extract top-level work areas from file paths generically */ @@ -36,7 +36,7 @@ export function registerAuditWorkspace(server: McpServer): void { {}, async () => { const docs = findWorkspaceDocs(); - const recentFiles = run("git diff --name-only HEAD~10 2>/dev/null || echo ''").split("\n").filter(Boolean); + const recentFiles = run(["diff", "--name-only", "HEAD~10"]).split("\n").filter(Boolean); const sections: string[] = []; // Doc freshness @@ -75,7 +75,7 @@ export function registerAuditWorkspace(server: McpServer): void { // Check for gap trackers or similar tracking docs const trackingDocs = Object.entries(docs).filter(([n]) => /gap|track|progress/i.test(n)); if (trackingDocs.length > 0) { - const testFilesCount = parseInt(run("find tests -name '*.spec.ts' -o -name '*.test.ts' 2>/dev/null | wc -l").trim()) || 0; + const testFilesCount = parseInt(shell("find tests -name '*.spec.ts' -o -name '*.test.ts' 2>/dev/null | wc -l").trim()) || 0; sections.push(`## Tracking Docs\n${trackingDocs.map(([n]) => { const age = docStatus.find(d => d.name === n)?.ageHours ?? "?"; return `- .claude/${n} — last updated ${age}h ago`; diff --git a/src/tools/clarify-intent.ts b/src/tools/clarify-intent.ts index 32efa3a..f141269 100644 --- a/src/tools/clarify-intent.ts +++ b/src/tools/clarify-intent.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { run, getBranch, getStatus, getRecentCommits, getDiffFiles, getStagedFiles } from "../lib/git.js"; +import { run, shell, getBranch, getStatus, getRecentCommits, getDiffFiles, getStagedFiles } from "../lib/git.js"; import { findWorkspaceDocs, PROJECT_DIR } from "../lib/files.js"; import { searchSemantic } from "../lib/timeline-db.js"; import { getRelatedProjects } from "../lib/config.js"; @@ -152,10 +152,10 @@ export function registerClarifyIntent(server: McpServer): void { let hasTestFailures = false; if (!area || area.includes("test") || area.includes("fix") || area.includes("ui") || area.includes("api")) { - const typeErrors = run("pnpm tsc --noEmit 2>&1 | grep -c 'error TS' || echo '0'"); + const typeErrors = shell("pnpm tsc --noEmit 2>&1 | grep -c 'error TS' || echo '0'"); hasTypeErrors = parseInt(typeErrors, 10) > 0; - const testFiles = run("find tests -name '*.spec.ts' -maxdepth 4 2>/dev/null | head -20"); + const testFiles = shell("find tests -name '*.spec.ts' -maxdepth 4 2>/dev/null | head -20"); const failingTests = getTestFailures(); hasTestFailures = failingTests !== "all passing" && failingTests !== "no test report found"; diff --git a/src/tools/enrich-agent-task.ts b/src/tools/enrich-agent-task.ts index 236edfa..910b18f 100644 --- a/src/tools/enrich-agent-task.ts +++ b/src/tools/enrich-agent-task.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { run, getDiffFiles } from "../lib/git.js"; +import { run, shell, getDiffFiles } from "../lib/git.js"; import { PROJECT_DIR } from "../lib/files.js"; import { getConfig, type RelatedProject } from "../lib/config.js"; import { existsSync, readFileSync } from "fs"; @@ -42,11 +42,11 @@ function findAreaFiles(area: string): string { /** Find related test files for an area */ function findRelatedTests(area: string): string { - if (!area) return run("git ls-files 2>/dev/null | grep -E '\\.(spec|test)\\.(ts|tsx|js|jsx)$' | head -10"); + if (!area) return shell("git ls-files 2>/dev/null | grep -E '\\.(spec|test)\\.(ts|tsx|js|jsx)$' | head -10"); const safeArea = shellEscape(area.split(/\s+/)[0]); const tests = run(`git ls-files 2>/dev/null | grep -E '\\.(spec|test)\\.(ts|tsx|js|jsx)$' | grep -i '${safeArea}' | head -10`); - return tests || run("git ls-files 2>/dev/null | grep -E '\\.(spec|test)\\.(ts|tsx|js|jsx)$' | head -10"); + return tests || shell("git ls-files 2>/dev/null | grep -E '\\.(spec|test)\\.(ts|tsx|js|jsx)$' | head -10"); } /** Get an example pattern from the first matching file */ diff --git a/src/tools/sequence-tasks.ts b/src/tools/sequence-tasks.ts index 22dea23..b23011b 100644 --- a/src/tools/sequence-tasks.ts +++ b/src/tools/sequence-tasks.ts @@ -1,7 +1,7 @@ // CATEGORY 6: sequence_tasks — Sequencing import { z } from "zod"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { run } from "../lib/git.js"; +import { shell } from "../lib/git.js"; import { now } from "../lib/state.js"; import { PROJECT_DIR } from "../lib/files.js"; import { existsSync } from "fs"; @@ -90,7 +90,7 @@ export function registerSequenceTasks(server: McpServer): void { // For locality: infer directories from path-like tokens in task text if (strategy === "locality") { // Use git ls-files with a depth limit instead of find for performance - const gitFiles = run("git ls-files 2>/dev/null | head -1000"); + const gitFiles = shell("git ls-files 2>/dev/null | head -1000"); const knownDirs = new Set(); for (const f of gitFiles.split("\n").filter(Boolean)) { const parts = f.split("/"); diff --git a/src/tools/session-handoff.ts b/src/tools/session-handoff.ts index d199462..ab0a455 100644 --- a/src/tools/session-handoff.ts +++ b/src/tools/session-handoff.ts @@ -2,7 +2,7 @@ import { z } from "zod"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { existsSync, readFileSync } from "fs"; import { join } from "path"; -import { run, getBranch, getRecentCommits, getStatus } from "../lib/git.js"; +import { run, shell, getBranch, getRecentCommits, getStatus } from "../lib/git.js"; import { readIfExists, findWorkspaceDocs } from "../lib/files.js"; import { STATE_DIR, now } from "../lib/state.js"; @@ -44,7 +44,7 @@ export function registerSessionHandoff(server: McpServer): void { // Only try gh if it exists if (hasCommand("gh")) { - const openPRs = run("gh pr list --state open --json number,title,headRefName 2>/dev/null || echo '[]'"); + const openPRs = shell("gh pr list --state open --json number,title,headRefName 2>/dev/null || echo '[]'"); if (openPRs && openPRs !== "[]") { sections.push(`## Open PRs\n\`\`\`json\n${openPRs}\n\`\`\``); } diff --git a/src/tools/session-health.ts b/src/tools/session-health.ts index bd6a819..7bc6424 100644 --- a/src/tools/session-health.ts +++ b/src/tools/session-health.ts @@ -1,6 +1,6 @@ import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; -import { getBranch, getStatus, getLastCommit, getLastCommitTime, run } from "../lib/git.js"; +import { getBranch, getStatus, getLastCommit, getLastCommitTime, shell } from "../lib/git.js"; import { readIfExists, findWorkspaceDocs } from "../lib/files.js"; import { loadState, saveState } from "../lib/state.js"; import { getConfig } from "../lib/config.js"; @@ -27,7 +27,7 @@ export function registerSessionHealth(server: McpServer): void { const dirtyCount = dirty ? dirty.split("\n").filter(Boolean).length : 0; const lastCommit = getLastCommit(); const lastCommitTimeStr = getLastCommitTime(); - const uncommittedDiff = run("git diff --stat | tail -1"); + const uncommittedDiff = shell("git diff --stat | tail -1"); // Parse commit time safely const commitDate = parseGitDate(lastCommitTimeStr); diff --git a/src/tools/verify-completion.ts b/src/tools/verify-completion.ts index 732532f..51590af 100644 --- a/src/tools/verify-completion.ts +++ b/src/tools/verify-completion.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { run, getStatus } from "../lib/git.js"; +import { run, shell, getStatus } from "../lib/git.js"; import { PROJECT_DIR } from "../lib/files.js"; import { existsSync } from "fs"; import { join } from "path"; @@ -34,7 +34,7 @@ function detectTestRunner(): string | null { /** Check if a build script exists in package.json */ function hasBuildScript(): boolean { try { - const pkg = JSON.parse(run("cat package.json 2>/dev/null")); + const pkg = JSON.parse(shell("cat package.json 2>/dev/null")); return !!pkg?.scripts?.build; } catch { return false; } } diff --git a/tests/lib/git.test.ts b/tests/lib/git.test.ts new file mode 100644 index 0000000..de99e64 --- /dev/null +++ b/tests/lib/git.test.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +// Mock child_process before importing the module +vi.mock("child_process", () => ({ + execFileSync: vi.fn().mockReturnValue("mock output"), + execSync: vi.fn().mockReturnValue("shell output"), +})); + +vi.mock("../../src/lib/files.js", () => ({ + PROJECT_DIR: "/mock/project", +})); + +import { run, shell } from "../../src/lib/git.js"; +import { execFileSync, execSync } from "child_process"; + +describe("run()", () => { + beforeEach(() => { + vi.clearAllMocks(); + (execFileSync as any).mockReturnValue("mock output"); + }); + + it("accepts an array of args", () => { + run(["status", "--short"]); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["status", "--short"], + expect.any(Object) + ); + }); + + it("splits string args on whitespace", () => { + run("status --short"); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["status", "--short"], + expect.any(Object) + ); + }); + + it("strips leading 'git' from string to avoid duplication", () => { + run("git status --short"); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["status", "--short"], + expect.any(Object) + ); + }); + + it("strips 2>/dev/null from string args", () => { + run("diff --name-only HEAD~1 2>/dev/null"); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["diff", "--name-only", "HEAD~1"], + expect.any(Object) + ); + }); + + it("strips || fallback clauses from string args", () => { + run("diff --name-only HEAD~10 2>/dev/null || echo ''"); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["diff", "--name-only", "HEAD~10"], + expect.any(Object) + ); + }); + + it("strips pipe chains from string args", () => { + run("diff --stat | tail -1"); + expect(execFileSync).toHaveBeenCalledWith( + "git", + ["diff", "--stat"], + expect.any(Object) + ); + }); + + it("returns trimmed output on success", () => { + (execFileSync as any).mockReturnValue(" result\n"); + expect(run(["log"])).toBe("result"); + }); + + it("returns error message on timeout", () => { + const err = new Error("killed") as any; + err.killed = true; + (execFileSync as any).mockImplementation(() => { throw err; }); + expect(run(["log"])).toContain("timed out"); + }); + + it("returns stderr on command failure", () => { + const err = new Error("fail") as any; + err.stderr = "fatal: not a git repo"; + err.stdout = ""; + (execFileSync as any).mockImplementation(() => { throw err; }); + expect(run(["status"])).toBe("fatal: not a git repo"); + }); + + it("returns ENOENT message when git not found", () => { + const err = new Error("fail") as any; + err.code = "ENOENT"; + (execFileSync as any).mockImplementation(() => { throw err; }); + expect(run(["status"])).toBe("[git not found]"); + }); +}); + +describe("shell()", () => { + beforeEach(() => { + vi.clearAllMocks(); + (execSync as any).mockReturnValue("shell output"); + }); + + it("passes command string directly to execSync", () => { + shell("git ls-files | grep test | head -5"); + expect(execSync).toHaveBeenCalledWith( + "git ls-files | grep test | head -5", + expect.any(Object) + ); + }); + + it("returns trimmed output", () => { + (execSync as any).mockReturnValue(" result\n"); + expect(shell("echo hi")).toBe("result"); + }); + + it("returns error message on timeout", () => { + const err = new Error("killed") as any; + err.killed = true; + (execSync as any).mockImplementation(() => { throw err; }); + expect(shell("sleep 100")).toContain("timed out"); + }); +});