From e73ae72029f06b750ad9ad0b8e90a42092474f9d Mon Sep 17 00:00:00 2001
From: Hoang Nguyen <codeaholicguy@users.noreply.github.com>
Date: Tue, 2 Jun 2026 12:43:33 +0200
Subject: [PATCH] Revert "feat(cli): optimize startup time (#96)"

This reverts commit 2ffdb436c0c467ca5a9185d2c348ebbee87abcaa.
---
 ...6-06-02-feature-cli-startup-performance.md | 159 ------------
 ...6-06-02-feature-cli-startup-performance.md | 123 ---------
 ...6-06-02-feature-cli-startup-performance.md |  87 -------
 ...6-06-02-feature-cli-startup-performance.md | 107 --------
 ...6-06-02-feature-cli-startup-performance.md | 114 ---------
 packages/cli/DEVELOPMENT.md                   |  28 ---
 packages/cli/README.md                        |   4 -
 packages/cli/package.json                     |   1 -
 .../src/__tests__/util/cli-benchmark.test.ts  | 134 ----------
 .../util/cli-command-manifest.test.ts         |  28 ---
 .../src/__tests__/util/cli-runtime.test.ts    | 143 -----------
 packages/cli/src/cli-command-manifest.ts      | 156 ------------
 packages/cli/src/cli-runtime.ts               | 208 ---------------
 packages/cli/src/cli.ts                       |  73 +++++-
 packages/cli/src/util/cli-benchmark.ts        | 238 ------------------
 15 files changed, 61 insertions(+), 1542 deletions(-)
 delete mode 100644 docs/ai/design/2026-06-02-feature-cli-startup-performance.md
 delete mode 100644 docs/ai/implementation/2026-06-02-feature-cli-startup-performance.md
 delete mode 100644 docs/ai/planning/2026-06-02-feature-cli-startup-performance.md
 delete mode 100644 docs/ai/requirements/2026-06-02-feature-cli-startup-performance.md
 delete mode 100644 docs/ai/testing/2026-06-02-feature-cli-startup-performance.md
 delete mode 100644 packages/cli/DEVELOPMENT.md
 delete mode 100644 packages/cli/src/__tests__/util/cli-benchmark.test.ts
 delete mode 100644 packages/cli/src/__tests__/util/cli-command-manifest.test.ts
 delete mode 100644 packages/cli/src/__tests__/util/cli-runtime.test.ts
 delete mode 100644 packages/cli/src/cli-command-manifest.ts
 delete mode 100644 packages/cli/src/cli-runtime.ts
 delete mode 100644 packages/cli/src/util/cli-benchmark.ts

diff --git a/docs/ai/design/2026-06-02-feature-cli-startup-performance.md b/docs/ai/design/2026-06-02-feature-cli-startup-performance.md
deleted file mode 100644
index 68594db9..00000000
--- a/docs/ai/design/2026-06-02-feature-cli-startup-performance.md
+++ /dev/null
@@ -1,159 +0,0 @@
----
-phase: design
-title: System Design & Architecture
-description: Define the technical architecture, components, and data models
-feature: cli-startup-performance
----
-
-# System Design & Architecture
-
-## Architecture Overview
-
-```mermaid
-graph TD
-  User[User or CI] --> Bin[dist/cli.js bin entry]
-  Bin --> Bootstrap[Lightweight CLI Bootstrap]
-  Bootstrap --> Manifest[Command Metadata Manifest]
-  Manifest --> Commander[Commander Parser]
-  Commander --> Help[Help and Version Output]
-  Commander --> Dispatch[Lazy Action Dispatcher]
-  Dispatch --> Init[init/phase/setup/lint/install handlers]
-  Dispatch --> Memory[memory handlers]
-  Dispatch --> Skill[skill handlers]
-  Dispatch --> Agent[agent handlers and TUI]
-  Dispatch --> Channel[channel handlers and bridge]
-  Dispatch --> Docs[docs handlers]
-  Bench[Benchmark Script] --> Bin
-  CI[CI Gate] --> Bench
-```
-
-The optimized CLI should separate cheap command metadata from expensive command execution code.
-
-The approved architecture is a two-step optimization:
-
-1. Implement a lightweight static command metadata layer plus lazy action dispatcher first. This keeps TypeScript source maintainable and removes eager command-handler imports from startup/help paths.
-2. Run the benchmark after the lazy metadata/dispatcher refactor. If p50 remains above `50 ms`, add generated or bundled `dist` optimization using only existing repo tooling and without changing package manifests.
-
-Key components:
-
-- **Lightweight CLI bootstrap**: The published entrypoint that loads only Commander, version metadata, command metadata, and dispatch glue.
-- **Command metadata manifest**: Static data for command names, descriptions, arguments, and options. This enables help/version output without importing handlers.
-- **Lazy action dispatcher**: Imports the actual command module only when the selected action executes.
-- **Command handler modules**: Existing command implementations, refactored only as needed to avoid top-level imports that are not needed by the selected subcommand.
-- **Benchmark script**: Local and CI entrypoint for startup/help timing and representative command smoke checks.
-
-## Data Models
-
-### Command Metadata
-
-```typescript
-interface CliCommandDefinition {
-  name: string;
-  description: string;
-  arguments?: CliArgumentDefinition[];
-  options?: CliOptionDefinition[];
-  subcommands?: CliCommandDefinition[];
-  action?: LazyActionDefinition;
-}
-
-interface LazyActionDefinition {
-  module: string;
-  exportName: string;
-}
-```
-
-The exact shape can be simpler if hand-written registration helpers are clearer. The design requirement is that help-visible command metadata is available without importing heavy handler modules.
-
-### Benchmark Result
-
-```typescript
-interface BenchmarkCaseResult {
-  label: string;
-  command: string[];
-  iterations: number;
-  minMs: number;
-  p50Ms: number;
-  p95Ms: number;
-  maxMs: number;
-  avgMs: number;
-  failures: number;
-}
-```
-
-## API Design
-
-No public CLI API changes are allowed. Internal APIs may be introduced:
-
-- `registerCommandMetadata(program, definitions)` to build Commander commands from metadata.
-- `lazyAction(modulePath, exportName)` to wrap `.action(...)` with dynamic import and error handling.
-- `runCliBenchmark(cases, options)` to execute benchmark cases with repeated child processes.
-
-Existing command modules should continue exposing testable handler functions where practical.
-
-## Component Breakdown
-
-### CLI Bootstrap
-
-- Owns `program.name`, package version loading, root command metadata, and `program.parse`.
-- Must not import heavy command modules at top level.
-- Must not import `ink`, `react`, `inquirer`, `telegraf`, `@ai-devkit/agent-manager`, `@ai-devkit/memory`, or channel bridge code unless the chosen command requires them.
-
-### Command Registration
-
-- Keeps help text equivalent to current help output.
-- Registers command actions through lazy dispatch wrappers.
-- May be hand-written first to reduce risk; generated metadata is allowed if it improves maintainability.
-
-### Command Handlers
-
-- Existing command behavior remains source of truth.
-- Heavy subcommand-specific dependencies should move into the action path when feasible. Example: `agent console` should be the path that loads Ink/React, not `agent --help`.
-- Shared utility imports are acceptable only when they are lightweight enough for the target.
-
-### Build Output
-
-- The build may produce generated or bundled `dist` artifacts.
-- Source maps or clear generated-file provenance must exist if output becomes hard to inspect.
-- `packages/cli/package.json` `bin` behavior must remain install-compatible.
-
-### Benchmarking
-
-- Benchmark direct built CLI execution after `npm run build`.
-- Use at least 20 iterations per startup/help command.
-- Record p50 and p95; p50 is the enforcement metric for `<50 ms`.
-- Use temporary directories/config for memory benchmark cases.
-
-## Design Decisions
-
-### 1. Optimize Current Node CLI First
-
-Rust is intentionally out of scope. Measurements show most overhead comes from eager imports and CLI bootstrap shape, not local CPU-heavy work. The fastest low-risk path is to remove unnecessary Node module loading.
-
-### 2. Preserve CLI Semantics
-
-Performance work must be behavior-preserving. Any command output, option parsing, or exit-code change is a regression unless explicitly approved later.
-
-### 3. Allow Bootstrap/Build Restructuring
-
-The `<50 ms` target is aggressive. Dynamic imports alone may not be enough with native ESM file fanout, so the design allows a lightweight bootstrap, generated metadata, or bundled artifacts without adding dependencies.
-
-Chosen path: do not start with bundling. Start with static metadata plus lazy dispatch because it is easier to review and preserves source/debug clarity. Treat bundling or generated `dist` output as a measured second step only if the first step does not meet the target.
-
-### 4. No New Dependencies
-
-The implementation must use existing repo tooling or plain Node scripts. If bundling is required, use tooling already available through the current lockfile without changing manifests, or implement a non-bundled fallback.
-
-## Alternatives Considered
-
-- **Action-only dynamic imports**: Simple and likely helpful, but may not hit `<50 ms` if command metadata still imports large modules.
-- **Static command metadata plus lazy handlers**: Chosen first step. Better startup characteristics while keeping source maintainable; requires keeping metadata and handler behavior aligned.
-- **Bundled bootstrap or CLI**: Conditional second step. Can reduce ESM file-load overhead; requires careful handling of dynamic imports, source maps, shebang, templates, and daemon entrypoints.
-- **Rust rewrite**: Best native startup potential, but too much scope for this feature and does not directly address command compatibility risk.
-
-## Non-Functional Requirements
-
-- Startup/help benchmark p50 `<50 ms` for required commands.
-- Lightweight command RSS should drop materially from current `~100 MB+` import paths; exact memory threshold is secondary to startup target.
-- CI benchmark must avoid single-run flakiness through repeated sampling.
-- The implementation must remain portable on supported Node versions and existing npm workspace tooling.
-- No additional secrets, credentials, or network services are needed for tests.
diff --git a/docs/ai/implementation/2026-06-02-feature-cli-startup-performance.md b/docs/ai/implementation/2026-06-02-feature-cli-startup-performance.md
deleted file mode 100644
index 1aa408c2..00000000
--- a/docs/ai/implementation/2026-06-02-feature-cli-startup-performance.md
+++ /dev/null
@@ -1,123 +0,0 @@
----
-phase: implementation
-title: Implementation Guide
-description: Technical implementation notes, patterns, and code guidelines
-feature: cli-startup-performance
----
-
-# Implementation Guide
-
-## Development Setup
-
-- Work in branch/worktree `feature-cli-startup-performance`.
-- Run `npm ci` in the feature worktree before phase work.
-- Run `npm run build` before benchmark commands so measurements use `packages/cli/dist/cli.js`.
-
-## Code Structure
-
-Expected touch points:
-
-- `packages/cli/src/cli.ts`: root bootstrap and command registration.
-- `packages/cli/src/commands/*.ts`: command metadata/action split and lazy handler imports.
-- `packages/cli/src/__tests__/commands/*.test.ts`: command behavior regression tests.
-- `e2e/`: built CLI smoke coverage if bootstrap/build changes affect published behavior.
-- CI workflow files if benchmark gate is added there.
-
-Current implementation deltas:
-
-- `packages/cli/src/util/cli-benchmark.ts`: local startup benchmark utility and executable built script entrypoint.
-- `packages/cli/src/__tests__/util/cli-benchmark.test.ts`: TDD coverage for timing stats, failure accounting, required benchmark case list, and built-script root resolution.
-- `packages/cli/src/cli-command-manifest.ts`: shared lightweight top-level command manifest used by static help and lazy dispatch.
-- `packages/cli/src/__tests__/util/cli-command-manifest.test.ts`: coverage proving the manifest drives root help, command help, and dispatch paths.
-- `packages/cli/src/cli-runtime.ts`: lightweight static help rendering and lazy top-level command execution.
-- `packages/cli/src/__tests__/util/cli-runtime.test.ts`: TDD coverage for lightweight help/version, dispatch mapping, and lazy command registration.
-- `packages/cli/src/cli.ts`: thin bootstrap that handles static help/version and delegates real commands to the lazy dispatcher.
-- `packages/cli/package.json`: `benchmark:startup` script running `node dist/util/cli-benchmark.js`.
-- `.github/workflows/ci.yml`: CI benchmark step after build.
-
-## Phase 6 Implementation Check
-
-Alignment with the design:
-
-- The root entrypoint now imports only package metadata plus lightweight bootstrap/dispatch helpers before command selection.
-- Root `--version`, root `--help`, and top-level command `--help` paths are served from static metadata and do not load the previous heavy command graph.
-- Real command execution imports only the selected top-level command module before Commander parsing.
-- Unknown command routing uses a lightweight Commander program populated from the shared manifest, preserving the existing unknown-command error without eager command-module imports.
-- The startup benchmark runs locally and in CI after build, with the `<50 ms` p50 gate enforced for version/help paths.
-
-Deviations and follow-ups:
-
-- Static help metadata duplicates command names/descriptions and selected option metadata. This is the main drift risk versus Commander-generated help and should be reviewed when commands change.
-- Lazy loading is currently at the top-level command group boundary. Heavy subcommand-specific dependencies inside groups such as `agent` and `channel` can be split further later, but this was not required to meet the startup/help target.
-- Representative real commands are smoke-measured in the benchmark table, but CI does not enforce a `10%` real-command regression threshold because there is no stored baseline in this implementation.
-
-## Phase 8 Code Review Notes
-
-- Reviewed the lightweight help metadata against real command registration. Fixed two public help parity gaps found during review: option-bearing command help now includes command-specific flags for `init`, `setup`, `lint`, and `install`; `channel --help` now includes `stop [name]`.
-- Refactored the runtime to expose `registerSelectedCommand` for direct branch coverage, while keeping `runSelectedCommand` as the CLI entrypoint dispatch API.
-- Verified exported helper usage with `rg`: new APIs are referenced only by `cli.ts`, tests, benchmark script entrypoint, and feature docs.
-- No new runtime dependencies, config keys, migrations, or irreversible state changes were introduced.
-
-## Simplification Pass
-
-- Consolidated top-level command metadata into `cli-command-manifest.ts`, so adding or changing a top-level command has one lightweight metadata entry used by both help rendering and dispatch resolution.
-- Removed the source `cli-full.ts` eager fallback. Unknown command handling now builds a lightweight Commander shell from the manifest, avoiding a second full command graph.
-- Consolidated `cli-bootstrap.ts` and `cli-dispatch.ts` into `cli-runtime.ts`, keeping one runtime module for help rendering and lazy command execution.
-- Updated the CLI entrypoint to fast-path `--version` before importing runtime code.
-- Added manifest tests to guard against future drift between root help, command help, and dispatch.
-
-## Implementation Notes
-
-### Core Features
-
-- Keep root bootstrap lightweight. Avoid top-level imports of heavy command modules.
-- Keep help-visible command metadata available without importing handler dependencies.
-- Import command handlers dynamically only when a command action actually runs.
-- Implement static command metadata plus lazy dispatch as the first optimization step.
-- Introduce generated or bundled `dist` output only after benchmarking proves the first step misses the `<50 ms` target.
-- If generated or bundled output is introduced, keep the source architecture explicit and testable.
-
-### Patterns & Best Practices
-
-- Prefer small registration helpers over broad abstractions unless generated metadata becomes necessary.
-- Preserve existing `withErrorHandler` behavior around async command actions.
-- Keep command handler functions exported for direct unit testing.
-- Do not introduce new dependencies.
-
-## Integration Points
-
-- `packages/cli/package.json` `bin.ai-devkit` must remain compatible.
-- `packages/cli` build must continue copying `templates` into `dist/templates`.
-- `channel-daemon` launch logic must still resolve dev and built paths correctly.
-- Existing package imports from `@ai-devkit/agent-manager`, `@ai-devkit/memory`, and `@ai-devkit/channel-connector` should move behind lazy boundaries where possible.
-
-## Error Handling
-
-- Lazy import failures should surface as command failures with the same error handling conventions as existing commands.
-- Benchmark failures should print the failing command, p50, threshold, iterations, and failed process count.
-- Generated build failures should fail `npm run build` clearly.
-
-## Performance Considerations
-
-- Optimize for fresh process startup, not long-lived process warm paths.
-- Avoid unnecessary JSON/config/file reads before command selection.
-- Avoid loading TUI/React/Ink unless `agent console` runs.
-- Avoid loading memory database code unless a memory action runs.
-- Avoid loading Telegram/channel bridge code unless channel actions requiring them run.
-
-Benchmark foundation evidence:
-
-- `npm test -w packages/cli -- src/__tests__/util/cli-benchmark.test.ts` passed with 4 tests.
-- `npm test -w packages/cli -- src/__tests__/util/cli-runtime.test.ts src/__tests__/util/cli-command-manifest.test.ts src/__tests__/util/cli-benchmark.test.ts` passed with 18 tests after the final simplification pass.
-- `npm run build` passed for all 4 projects.
-- `AI_DEVKIT_CLI_BENCHMARK_ITERATIONS=1 npm run benchmark:startup -w packages/cli` executed all 15 configured cases with `0` failures. This smoke run captures current unoptimized startup timings around `325-680 ms`, confirming the benchmark exposes the baseline regression target.
-- After lightweight bootstrap, `npm run benchmark:startup -w packages/cli` with 20 iterations produced `0` failures. Startup/help p50 values were `24.070-25.226 ms`; `--version` p50 was `25.080 ms`.
-- After top-level lazy dispatch and CI gate wiring, `npm run benchmark:startup -w packages/cli` with 20 iterations exited `0`. Startup/help p50 values were `29.391-33.132 ms`; real command smoke p50 values were `75.028 ms` for `lint`, `239.437 ms` for `agent-list-json`, and `153.793 ms` for `memory-search`.
-- After the simplification pass, `npm run benchmark:startup -w packages/cli` with 20 iterations exited `0`. Startup/help p50 values were `24.085-25.149 ms`; real command smoke p50 values were `70.889 ms` for `lint`, `227.256 ms` for `agent-list-json`, and `149.253 ms` for `memory-search`.
-- After moving runtime modules next to `cli.ts`, `npm run benchmark:startup -w packages/cli` with 20 iterations exited `0`. Startup/help p50 values were `24.290-26.318 ms`; real command smoke p50 values were `71.420 ms` for `lint`, `255.848 ms` for `agent-list-json`, and `149.797 ms` for `memory-search`.
-
-## Security Notes
-
-- Benchmark scripts must not read or print secrets.
-- Channel and memory smoke tests should use temporary or project-isolated config paths.
-- No Telegram tokens, tmux sessions, or external network calls should be required in CI.
diff --git a/docs/ai/planning/2026-06-02-feature-cli-startup-performance.md b/docs/ai/planning/2026-06-02-feature-cli-startup-performance.md
deleted file mode 100644
index 60aec22e..00000000
--- a/docs/ai/planning/2026-06-02-feature-cli-startup-performance.md
+++ /dev/null
@@ -1,87 +0,0 @@
----
-phase: planning
-title: Project Planning & Task Breakdown
-description: Break down work into actionable tasks and estimate timeline
-feature: cli-startup-performance
----
-
-# Project Planning & Task Breakdown
-
-## Milestones
-
-- [x] Milestone 1: Establish benchmark baseline and command import boundaries.
-- [x] Milestone 2: Introduce lightweight CLI bootstrap and lazy command dispatch.
-- [x] Milestone 3: Measure lazy-dispatch performance and conditionally restructure or bundle build output if needed to reach `<50 ms` p50.
-- [x] Milestone 4: Add tests, CI benchmark gate, and documentation.
-
-## Task Breakdown
-
-### Phase 1: Benchmark Foundation
-
-- [x] Task 1.1: Add a local benchmark script for direct built CLI execution with at least 20 iterations per case.
-- [x] Task 1.2: Include root `--version`, root `--help`, every top-level command `--help`, and representative real command cases.
-- [x] Task 1.3: Capture current built CLI baseline in benchmark output for comparison.
-- [x] Task 1.4: Add temporary workspace/config helpers for `lint`, `memory search`, and no-agent `agent list --json` smoke cases.
-
-### Phase 2: Lightweight Bootstrap
-
-- [x] Task 2.1: Split root CLI bootstrap from command handler modules.
-- [x] Task 2.2: Move top-level command metadata into lightweight registration code.
-- [x] Task 2.3: Add lazy dispatch for each top-level command group. Subcommand-level handler splitting remains a possible follow-up inside heavy groups such as `agent` and `channel`.
-- [x] Task 2.4: Refactor heavy command-module imports so help/version paths do not load `ink`, `react`, `inquirer`, `telegraf`, `@ai-devkit/agent-manager`, or `@ai-devkit/memory`.
-- [x] Task 2.5: Preserve existing command tests by updating mocks around lazy imports where needed.
-
-### Phase 3: Build/Runtime Optimization
-
-- [x] Task 3.1: Run benchmark after lazy bootstrap to see whether p50 `<50 ms` is met.
-- [x] Task 3.2: If lazy metadata/dispatch does not meet `<50 ms` p50, add generated or bundled `dist` output using existing repo tooling only, with no dependency additions. Not needed because lazy dispatch meets the startup/help target.
-- [x] Task 3.3: Preserve shebang/bin behavior for `dist/cli.js`.
-- [x] Task 3.4: Preserve template copy behavior and channel-daemon launch paths.
-- [x] Task 3.5: Add source maps or clear generated-file provenance for debugging. No generated/bundled output was introduced, so no additional source-map work was needed.
-
-### Phase 4: Verification and CI
-
-- [x] Task 4.1: Add unit tests for metadata registration and lazy action loading.
-- [x] Task 4.2: Add integration tests for built CLI help/version and representative commands.
-- [x] Task 4.3: Add CI benchmark gate with p50 `<50 ms` for required startup/help cases.
-- [x] Task 4.4: Run `npm run build`, `npm test -w packages/cli`, `npm run test:e2e`, and benchmark gate.
-- [x] Task 4.5: Update README or package docs only if maintainers need to know the benchmark command or build change. Feature docs were updated; README/package docs are not needed because the benchmark is internal CI/maintainer tooling.
-
-## Dependencies
-
-- Task 1.1 must land before enforcing performance claims.
-- Phase 2 depends on understanding existing command registration and test mocks.
-- Phase 3 is conditional: only do bundling/generated output if Phase 2 does not hit the target.
-- CI gate depends on benchmark script stability.
-
-## Timeline & Estimates
-
-- Benchmark foundation: 0.5 day.
-- Lazy bootstrap and import-boundary refactor: 1-1.5 days.
-- Conditional build optimization: 0.5-1.5 days depending on whether bundling is needed.
-- Tests and CI gate: 0.5-1 day.
-- Total estimate: 2.5-4.5 days.
-
-## Risks & Mitigation
-
-- **Risk:** `<50 ms` p50 is not achievable with native Node ESM and no new dependencies.
-  - **Mitigation:** Allow generated/bundled `dist` using existing tooling; if still impossible, report measured floor before changing scope.
-- **Risk:** Lazy imports change Commander help, option parsing, or error behavior.
-  - **Mitigation:** Add help/output and command action regression tests before broad refactor.
-- **Risk:** CI performance gate flakes on shared runners.
-  - **Mitigation:** Use repeated sampling, p50 threshold, warm-up if needed, and direct built CLI execution.
-- **Risk:** Build restructuring breaks package install, shebang, templates, or channel daemon paths.
-  - **Mitigation:** Include focused integration/e2e coverage for `dist/cli.js`, init/template behavior, and daemon path resolution.
-- **Risk:** Existing tests mock modules before import and may fail with dynamic imports.
-  - **Mitigation:** Update tests to mock lazy targets explicitly and keep handler functions independently testable.
-
-## Resources Needed
-
-- Existing npm workspace and lockfile.
-- Existing Node/Vitest/Nx tooling.
-- CI workflow access to add the benchmark gate.
-- Measured baseline from the June 2, 2026 startup investigation.
-
-## Phase 5 Status Summary
-
-All planned implementation tasks needed for the `<50 ms` startup/help target are complete. The feature added a local startup benchmark, CI benchmark gate, lightweight static help/version bootstrap, lazy top-level command dispatch, focused unit tests, and feature documentation. The measured startup/help p50 target is met without generated or bundled `dist` output, so build restructuring remains unnecessary. Remaining work moves to Phase 6 implementation review, then Phase 7 test coverage review and Phase 8 code review.
diff --git a/docs/ai/requirements/2026-06-02-feature-cli-startup-performance.md b/docs/ai/requirements/2026-06-02-feature-cli-startup-performance.md
deleted file mode 100644
index fbb03c9b..00000000
--- a/docs/ai/requirements/2026-06-02-feature-cli-startup-performance.md
+++ /dev/null
@@ -1,107 +0,0 @@
----
-phase: requirements
-title: Requirements & Problem Understanding
-description: Clarify the problem space, gather requirements, and define success criteria
-feature: cli-startup-performance
----
-
-# Requirements & Problem Understanding
-
-## Problem Statement
-
-The published `ai-devkit` CLI has high process startup cost for lightweight operations. Fresh measurements on June 2, 2026 showed:
-
-- `node packages/cli/dist/cli.js --version`: p50 `362.1 ms`, avg `360.2 ms`
-- `node packages/cli/dist/cli.js --help`: p50 `332.0 ms`, avg `338.8 ms`
-- `node packages/cli/dist/cli.js memory --help`: p50 `322.2 ms`, avg `329.4 ms`
-- `node packages/cli/dist/cli.js agent --help`: p50 `325.7 ms`, avg `329.3 ms`
-- Minimal Node + Commander baseline: p50 `12.1 ms`, avg `12.2 ms`
-
-The current CLI entrypoint eagerly imports and registers all command implementations before Commander resolves the requested command. As a result, commands that only need metadata pay for unrelated modules such as `ink`, `react`, `inquirer`, `@ai-devkit/agent-manager`, `@ai-devkit/memory`, `telegraf`, and channel bridge code.
-
-Developers feel this cost most in repeated local workflows, shell completions/help lookups, scripts, CI smoke checks, and `npx` or globally installed CLI usage where each command is a new Node process.
-
-## Goals & Objectives
-
-### Primary Goals
-
-- Reduce startup time for every CLI command path, not only root `--version` and `--help`.
-- Keep all measured direct built CLI startup/help paths under `50 ms` p50.
-- Preserve existing command names, flags, output semantics, and exit codes.
-- Add repeatable local and CI-visible performance benchmarks for the optimized paths.
-- Keep the source maintainable even if the build output uses generated or bundled artifacts.
-
-### Secondary Goals
-
-- Reduce memory footprint for lightweight command paths by avoiding heavy command-module imports until needed.
-- Make the CLI bootstrap architecture explicit enough that future commands do not accidentally reintroduce eager imports.
-- Document benchmark commands and expected thresholds for maintainers.
-
-### Non-Goals
-
-- Rewriting the CLI in Rust or migrating to a native binary.
-- Changing user-facing CLI behavior, command names, flags, output format, or exit-code semantics.
-- Upgrading runtime dependencies such as `chalk`, `ink`, `inquirer`, `ora`, `commander`, or package-manager tooling.
-- Adding new dependencies.
-- Reworking `@ai-devkit/agent-manager`, `@ai-devkit/memory`, or `@ai-devkit/channel-connector` internals except where import boundaries require it.
-- Optimizing `npx ai-devkit@latest ...` package download, package extraction, or network latency.
-- Requiring live tmux sessions, Telegram credentials, or running AI agents in CI.
-
-## User Stories & Use Cases
-
-- As a developer, I want `ai-devkit --version` and `ai-devkit --help` to return almost immediately so that shell and smoke-test workflows feel responsive.
-- As a developer exploring commands, I want `ai-devkit <command> --help` to avoid loading unrelated command implementations so that help remains fast even as the CLI grows.
-- As a maintainer, I want actual commands such as `lint`, `memory search`, and `agent list --json` to retain behavior while loading only the code they need.
-- As a maintainer, I want a benchmark script that runs locally and in CI so that future regressions are visible before release.
-
-### Required Benchmark/Smoke Set
-
-Lightweight startup/help paths:
-
-- `ai-devkit --version`
-- `ai-devkit --help`
-- `ai-devkit init --help`
-- `ai-devkit phase --help`
-- `ai-devkit setup --help`
-- `ai-devkit lint --help`
-- `ai-devkit install --help`
-- `ai-devkit memory --help`
-- `ai-devkit agent --help`
-- `ai-devkit channel --help`
-- `ai-devkit docs --help`
-- `ai-devkit skill --help`
-
-Real non-mutating command paths:
-
-- `ai-devkit lint`
-- `ai-devkit agent list --json` in an environment with no live agents or a deterministic no-agent setup
-- `ai-devkit memory search --query "startup performance" --limit 1` against a temporary/project-isolated memory DB/config
-
-## Success Criteria
-
-- [ ] After `npm run build`, direct built CLI execution satisfies p50 `< 50 ms` for the lightweight startup/help benchmark set.
-- [ ] Benchmark runs at least 20 iterations per command and reports min, p50, p95, max, avg, exit-code failures, and command labels.
-- [ ] CI runs the benchmark gate and fails when p50 exceeds the configured threshold for required startup/help commands.
-- [ ] Representative real commands keep behavior unchanged and do not regress by more than `10%` p50 from the measured post-change baseline during the same benchmark run.
-- [ ] Unit/integration tests cover command dispatch so each command action lazy-loads the intended handler without changing parsed options.
-- [ ] `npm run build` succeeds.
-- [ ] `npm test -w packages/cli` succeeds.
-- [ ] `npm run test:e2e` succeeds or documented equivalent smoke coverage is provided if e2e is too broad for this phase.
-- [ ] No new dependencies are added to root or package manifests.
-- [ ] Generated or bundled CLI artifacts, if used, include clear build scripts and source maps or equivalent debugging support.
-
-## Constraints & Assumptions
-
-- Node engine remains `>=20.20.0`.
-- The CLI remains TypeScript-authored and published as the `ai-devkit` npm package.
-- The package `bin` entry remains compatible with `dist/cli.js` unless a compatible wrapper preserves existing install behavior.
-- Existing transitive tooling already present in the lockfile may be used only if no package manifest changes are required and the build remains reproducible.
-- Generated or bundled `dist` output is acceptable, but source under `packages/cli/src` must remain maintainable.
-- Benchmarks measure direct built CLI execution, not `npx` download/install overhead.
-- CI benchmark thresholds must account for repeated sampling rather than a single process run.
-
-## Questions & Open Items
-
-- Decide in design whether the implementation should use dynamic imports only, split lightweight command metadata from handlers, generated command manifests, bundled CLI bootstrap, or a combination.
-- Decide how to make the CI benchmark stable across different machine speeds while still enforcing the `<50 ms` target.
-- Confirm whether source maps are sufficient for debugging generated/bundled `dist`, or whether an additional debug build mode is needed.
diff --git a/docs/ai/testing/2026-06-02-feature-cli-startup-performance.md b/docs/ai/testing/2026-06-02-feature-cli-startup-performance.md
deleted file mode 100644
index 854625a3..00000000
--- a/docs/ai/testing/2026-06-02-feature-cli-startup-performance.md
+++ /dev/null
@@ -1,114 +0,0 @@
----
-phase: testing
-title: Testing Strategy
-description: Define testing approach, test cases, and quality assurance
-feature: cli-startup-performance
----
-
-# Testing Strategy
-
-## Test Coverage Goals
-
-- Unit coverage target: 100% of new command registration, lazy dispatch, and benchmark utility code.
-- Integration scope: built CLI help/version, command parsing, lazy handler execution, and representative non-mutating commands.
-- Performance scope: local and CI benchmark gate for required startup/help paths, with representative real commands measured as smoke metrics.
-- Regression scope: existing CLI command tests and e2e smoke tests continue to pass.
-
-## Unit Tests
-
-### CLI Bootstrap and Metadata
-
-- [x] Root command registers the same command names as current CLI. Covered by `packages/cli/src/cli-command-manifest.ts` and `packages/cli/src/__tests__/util/cli-command-manifest.test.ts`.
-- [x] Root `--help` output includes existing command descriptions without importing heavy command modules. Covered by `packages/cli/src/__tests__/util/cli-runtime.test.ts`.
-- [x] Root `--version` returns the package version without importing command handlers. Covered by `packages/cli/src/__tests__/util/cli-runtime.test.ts`.
-- [x] Each top-level command exposes expected command names and core descriptions from metadata. Static help metadata is covered by `packages/cli/src/__tests__/util/cli-runtime.test.ts`.
-- [x] Lazy action wrapper imports the target module only when the action executes. Top-level dispatcher mapping is covered by `packages/cli/src/__tests__/util/cli-runtime.test.ts`.
-- [x] Lazy action wrapper preserves thrown errors and existing error-handler behavior. Existing command handlers and error wrappers are still used by selected command modules; covered by `npm test -w packages/cli` and `npm run test:e2e`.
-
-### Command Handler Boundaries
-
-- [x] `memory --help` does not import `@ai-devkit/memory`.
-- [x] `agent --help` does not import `ink`, `react`, or `@ai-devkit/agent-manager`.
-- [x] `channel --help` does not import `telegraf` or bridge runner code.
-- [x] Actual `memory search` imports and calls the memory command implementation with parsed options.
-- [x] Actual `agent list --json` imports and calls the agent command implementation with parsed options.
-
-### Benchmark Utility
-
-- [x] Computes min, p50, p95, max, and avg from timing samples. Covered by `packages/cli/src/__tests__/util/cli-benchmark.test.ts`.
-- [x] Captures non-zero command exits as failures. Covered by `packages/cli/src/__tests__/util/cli-benchmark.test.ts`.
-- [x] Fails the gate when p50 exceeds the configured threshold. Covered by `packages/cli/src/__tests__/util/cli-benchmark.test.ts`.
-- [x] Supports temporary cwd/env setup for memory benchmark cases and isolated cwd setup for agent list smoke runs. Covered by `packages/cli/src/__tests__/util/cli-benchmark.test.ts`.
-
-## Integration Tests
-
-- [x] `node packages/cli/dist/cli.js --version` prints the package version.
-- [x] `node packages/cli/dist/cli.js --help` prints root help and exits successfully.
-- [x] `node packages/cli/dist/cli.js init --help` prints init help and exits successfully.
-- [x] `node packages/cli/dist/cli.js phase --help` prints phase help and exits successfully.
-- [x] `node packages/cli/dist/cli.js setup --help` prints setup help and exits successfully.
-- [x] `node packages/cli/dist/cli.js lint --help` prints lint help and exits successfully.
-- [x] `node packages/cli/dist/cli.js install --help` prints install help and exits successfully.
-- [x] `node packages/cli/dist/cli.js memory --help` prints memory help and exits successfully.
-- [x] `node packages/cli/dist/cli.js agent --help` prints agent help and exits successfully.
-- [x] `node packages/cli/dist/cli.js channel --help` prints channel help and exits successfully.
-- [x] `node packages/cli/dist/cli.js docs --help` prints docs help and exits successfully.
-- [x] `node packages/cli/dist/cli.js skill --help` prints skill help and exits successfully.
-- [x] `node packages/cli/dist/cli.js lint` works in a valid `docs/ai` workspace.
-- [x] `node packages/cli/dist/cli.js agent list --json` works from an isolated temporary cwd and tolerates empty/non-empty agent lists while verifying successful exit.
-- [x] `node packages/cli/dist/cli.js memory search --query "startup performance" --limit 1` works against an isolated temp memory DB/config.
-
-## End-to-End Tests
-
-- [x] Existing `npm run test:e2e` remains green after the CLI bootstrap/build changes.
-- [x] Existing init/install/skill/memory e2e paths still execute through the published `dist/cli.js` entrypoint.
-- [x] Channel daemon path remains valid if build restructuring changes `dist` layout. No generated/bundled `dist` layout change was needed.
-
-## Test Data
-
-- Temporary workspace with valid `docs/ai` base files for `lint`.
-- Temporary `.ai-devkit.json` or config path for memory DB isolation.
-- Environment setup that isolates project config for memory and tolerates empty/non-empty agent lists while verifying successful exit.
-
-## Test Reporting & Coverage
-
-- Required commands:
-  - `npm run build`
-  - `npm test -w packages/cli`
-  - `npm test -w packages/cli -- --coverage`
-  - `npm run test:e2e`
-  - CLI benchmark command introduced by this feature
-- Benchmark output must print a compact table or JSON summary with p50/p95 for each case.
-- CI should preserve benchmark output in logs for regression diagnosis.
-
-## Phase 7 Results
-
-- `npm test -w packages/cli -- src/__tests__/util/cli-runtime.test.ts src/__tests__/util/cli-command-manifest.test.ts src/__tests__/util/cli-benchmark.test.ts` passed after the final simplification pass: 3 files, 18 tests.
-- `npm run build` passed for all 4 projects.
-- `npm test -w packages/cli -- --coverage` passed before the final runtime merge: 49 files, 680 tests. Overall package coverage was `90.53%` statements, `90.56%` branches, `86.27%` functions, and `90.53%` lines.
-- `npm test -w packages/cli` passed after the final help-output parity fixes: 49 files, 680 tests.
-- `npm run test:e2e` passed: 38 tests.
-- `npm run benchmark:startup -w packages/cli` passed with `0` failures. Final startup/help p50 values were `28.299-29.276 ms`; representative real command p50 values were `70.802 ms` for `lint`, `226.225 ms` for `agent-list-json`, and `149.483 ms` for `memory-search`.
-- After moving runtime logic next to `cli.ts`, `npm test -w packages/cli` passed: 49 files, 681 tests. `npm run test:e2e` passed: 38 tests. `npm run benchmark:startup -w packages/cli` passed with `0` failures and startup/help p50 values of `24.290-26.318 ms`.
-
-Remaining coverage gaps are pre-existing broad package gaps or executable-only benchmark branches. No additional production tests are required for this feature before code review.
-
-## Manual Testing
-
-- [x] Run benchmark locally on a clean built tree and confirm required startup/help p50 values are `<50 ms`.
-- [x] Run `ai-devkit --help`, `memory --help`, `agent --help`, and `channel --help` manually to inspect output parity through benchmark/e2e smoke coverage.
-- [x] Run `ai-devkit lint` in the repo.
-- [x] Run `ai-devkit memory search --query "startup performance" --limit 1` with an isolated DB.
-
-## Performance Testing
-
-- [x] Benchmark root `--version`, root `--help`, and every top-level command `--help` path for at least 20 iterations. Implemented by `packages/cli/src/util/cli-benchmark.ts`; smoke-run with one iteration after `npm run build`.
-- [x] Enforce p50 `<50 ms` for lightweight startup/help cases. Verified locally with `npm run benchmark:startup -w packages/cli`: all final startup/help p50 values were `28.299-29.276 ms` and failures were `0`.
-- [x] Track p95 for visibility; do not fail only on p95 unless it shows repeated outliers that make p50 unstable.
-- [x] Measure representative real commands in the same benchmark table for regression visibility. The CI gate currently enforces the `<50 ms` requirement only for startup/help cases; real-command p50 regression gating would require a stored or generated baseline in a follow-up.
-
-## Bug Tracking
-
-- Any behavior change in command output, parsed options, or exit code is blocking.
-- Any new dependency in package manifests is blocking.
-- Any benchmark flake in CI must be fixed by improving sampling/environment control, not by removing the gate.
diff --git a/packages/cli/DEVELOPMENT.md b/packages/cli/DEVELOPMENT.md
deleted file mode 100644
index e7630beb..00000000
--- a/packages/cli/DEVELOPMENT.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# AI DevKit CLI Development
-
-## Runtime Layout
-
-The CLI entrypoint is optimized for startup time:
-
-- `src/cli.ts` handles `--version` inline before loading runtime code.
-- `src/cli-command-manifest.ts` is the lightweight source of truth for top-level command names, descriptions, help metadata, and lazy module paths.
-- `src/cli-runtime.ts` renders static root/top-level help from the manifest and lazy-loads Commander plus the selected command module only for real command execution.
-- `src/commands/` contains the command implementations.
-
-## Adding a Top-Level Command
-
-1. Add the command implementation in `src/commands/`.
-2. Add one entry to `src/cli-command-manifest.ts`.
-3. Add the lazy import/registration branch in `src/cli-runtime.ts`.
-4. Update `src/__tests__/util/cli-runtime.test.ts` if the command needs new runtime coverage.
-
-## Startup Benchmark
-
-Run the startup benchmark after CLI runtime changes:
-
-```bash
-npm run build
-npm run benchmark:startup -w packages/cli
-```
-
-The CI gate enforces p50 `<50 ms` for `--version`, root `--help`, and every top-level command `--help`.
diff --git a/packages/cli/README.md b/packages/cli/README.md
index 936f8d7b..92849daa 100644
--- a/packages/cli/README.md
+++ b/packages/cli/README.md
@@ -93,10 +93,6 @@ ai-devkit skill add <skill-registry> [skill-name]
 ai-devkit memory store
 ```
 
-## Development
-
-Maintainer notes for the CLI runtime layout, adding top-level commands, and startup benchmarking live in [DEVELOPMENT.md](./DEVELOPMENT.md).
-
 Template example:
 
 ```yaml
diff --git a/packages/cli/package.json b/packages/cli/package.json
index dea4274b..3fb8ed01 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -10,7 +10,6 @@
   },
   "scripts": {
     "build": "tsc && cp -R templates dist/templates",
-    "benchmark:startup": "node dist/util/cli-benchmark.js",
     "dev": "node --no-warnings --loader ts-node/esm src/cli.ts",
     "lint": "eslint src --ext .ts",
     "test": "vitest run",
diff --git a/packages/cli/src/__tests__/util/cli-benchmark.test.ts b/packages/cli/src/__tests__/util/cli-benchmark.test.ts
deleted file mode 100644
index f11b22e8..00000000
--- a/packages/cli/src/__tests__/util/cli-benchmark.test.ts
+++ /dev/null
@@ -1,134 +0,0 @@
-import { describe, expect, it, vi } from 'vitest';
-import {
-  calculateTimingStats,
-  createDefaultBenchmarkPlan,
-  evaluateBenchmarkGate,
-  resolveDefaultRootDir,
-  runBenchmarkCase,
-} from '../../util/cli-benchmark.js';
-
-describe('cli benchmark utility', () => {
-  it('computes timing statistics from samples', () => {
-    const stats = calculateTimingStats([30, 10, 50, 20, 40]);
-
-    expect(stats).toEqual({
-      minMs: 10,
-      p50Ms: 30,
-      p95Ms: 50,
-      maxMs: 50,
-      avgMs: 30,
-    });
-  });
-
-  it('counts failed command exits while reporting timings', () => {
-    const spawn = vi.fn()
-      .mockReturnValueOnce({ status: 0 })
-      .mockReturnValueOnce({ status: 2 })
-      .mockReturnValueOnce({ status: null, error: new Error('spawn failed') });
-    const nowValues = [0, 10, 10, 25, 25, 55];
-    const now = vi.fn(() => nowValues.shift() ?? 55);
-
-    const result = runBenchmarkCase({
-      label: 'version',
-      command: ['node', 'dist/cli.js', '--version'],
-      iterations: 3,
-      spawn,
-      now,
-    });
-
-    expect(result).toMatchObject({
-      label: 'version',
-      command: ['node', 'dist/cli.js', '--version'],
-      iterations: 3,
-      failures: 2,
-      minMs: 10,
-      p50Ms: 15,
-      p95Ms: 30,
-      maxMs: 30,
-      avgMs: 18.333,
-    });
-  });
-
-  it('creates the required startup and smoke benchmark cases', () => {
-    const plan = createDefaultBenchmarkPlan({
-      cliPath: 'packages/cli/dist/cli.js',
-      iterations: 20,
-      rootDir: '/repo',
-      tempRoot: '/tmp/ai-devkit-cli-benchmark-test',
-      nodePath: '/usr/local/bin/node',
-    });
-
-    expect(plan.cases.map((benchmarkCase) => benchmarkCase.label)).toEqual([
-      'version',
-      'root-help',
-      'init-help',
-      'phase-help',
-      'setup-help',
-      'lint-help',
-      'install-help',
-      'memory-help',
-      'skill-help',
-      'agent-help',
-      'channel-help',
-      'docs-help',
-      'lint',
-      'agent-list-json',
-      'memory-search',
-    ]);
-
-    expect(plan.cases.every((benchmarkCase) => benchmarkCase.iterations === 20)).toBe(true);
-    expect(plan.cases.find((benchmarkCase) => benchmarkCase.label === 'memory-search')).toMatchObject({
-      cwd: '/tmp/ai-devkit-cli-benchmark-test',
-      command: ['/usr/local/bin/node', '/repo/packages/cli/dist/cli.js', 'memory', 'search', '--query', 'startup performance', '--limit', '1'],
-    });
-    plan.cleanup();
-  });
-
-  it('resolves the repository root from the built benchmark script location', () => {
-    expect(resolveDefaultRootDir('file:///repo/packages/cli/dist/util/cli-benchmark.js')).toBe('/repo');
-  });
-
-  it('fails the startup gate for slow or failed required startup cases only', () => {
-    const gate = evaluateBenchmarkGate([
-      {
-        label: 'version',
-        command: ['node', 'cli.js', '--version'],
-        iterations: 20,
-        minMs: 10,
-        p50Ms: 51,
-        p95Ms: 60,
-        maxMs: 70,
-        avgMs: 52,
-        failures: 0,
-      },
-      {
-        label: 'lint',
-        command: ['node', 'cli.js', 'lint'],
-        iterations: 20,
-        minMs: 100,
-        p50Ms: 500,
-        p95Ms: 600,
-        maxMs: 700,
-        avgMs: 520,
-        failures: 0,
-      },
-      {
-        label: 'agent-help',
-        command: ['node', 'cli.js', 'agent', '--help'],
-        iterations: 20,
-        minMs: 20,
-        p50Ms: 30,
-        p95Ms: 40,
-        maxMs: 50,
-        avgMs: 32,
-        failures: 1,
-      },
-    ], { startupThresholdMs: 50 });
-
-    expect(gate.pass).toBe(false);
-    expect(gate.failures).toEqual([
-      'version p50 51ms exceeded startup threshold 50ms',
-      'agent-help had 1 failed run(s)',
-    ]);
-  });
-});
diff --git a/packages/cli/src/__tests__/util/cli-command-manifest.test.ts b/packages/cli/src/__tests__/util/cli-command-manifest.test.ts
deleted file mode 100644
index a09251ab..00000000
--- a/packages/cli/src/__tests__/util/cli-command-manifest.test.ts
+++ /dev/null
@@ -1,28 +0,0 @@
-import { describe, expect, it } from 'vitest';
-import { CLI_COMMANDS } from '../../cli-command-manifest.js';
-import { resolveLightweightCliResponse, resolveTopLevelCommandModule } from '../../cli-runtime.js';
-
-describe('CLI command manifest', () => {
-  it('is the shared source for root help and top-level dispatch', () => {
-    const rootHelp = resolveLightweightCliResponse(['--help'], '1.2.3').output ?? '';
-
-    for (const command of CLI_COMMANDS) {
-      expect(rootHelp).toContain(command.usage);
-      expect(resolveTopLevelCommandModule([command.name])).toBe(command.modulePath);
-    }
-  });
-
-  it('renders declared options and subcommands in static command help', () => {
-    for (const command of CLI_COMMANDS) {
-      const commandHelp = resolveLightweightCliResponse([command.name, '--help'], '1.2.3').output ?? '';
-
-      for (const option of command.options ?? []) {
-        expect(commandHelp).toContain(option.flags);
-      }
-
-      for (const subcommand of command.subcommands ?? []) {
-        expect(commandHelp).toContain(subcommand.usage);
-      }
-    }
-  });
-});
diff --git a/packages/cli/src/__tests__/util/cli-runtime.test.ts b/packages/cli/src/__tests__/util/cli-runtime.test.ts
deleted file mode 100644
index 3341fba7..00000000
--- a/packages/cli/src/__tests__/util/cli-runtime.test.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-import { Command } from 'commander';
-import { beforeEach, describe, expect, it, vi } from 'vitest';
-import {
-  registerSelectedCommand,
-  resolveLightweightCliResponse,
-  resolveTopLevelCommandModule,
-  runSelectedCommand,
-} from '../../cli-runtime.js';
-
-const mocks = vi.hoisted(() => ({
-  initCommand: vi.fn(),
-  phaseCommand: vi.fn(),
-  setupCommand: vi.fn(),
-  lintCommand: vi.fn(),
-  installCommand: vi.fn(),
-  registerMemoryCommand: vi.fn(),
-  registerSkillCommand: vi.fn(),
-  registerAgentCommand: vi.fn(),
-  registerChannelCommand: vi.fn(),
-  registerDocsCommand: vi.fn(),
-}));
-
-vi.mock('../../commands/init.js', () => ({ initCommand: mocks.initCommand }));
-vi.mock('../../commands/phase.js', () => ({ phaseCommand: mocks.phaseCommand }));
-vi.mock('../../commands/setup.js', () => ({ setupCommand: mocks.setupCommand }));
-vi.mock('../../commands/lint.js', () => ({ lintCommand: mocks.lintCommand }));
-vi.mock('../../commands/install.js', () => ({ installCommand: mocks.installCommand }));
-vi.mock('../../commands/memory.js', () => ({ registerMemoryCommand: mocks.registerMemoryCommand }));
-vi.mock('../../commands/skill.js', () => ({ registerSkillCommand: mocks.registerSkillCommand }));
-vi.mock('../../commands/agent.js', () => ({ registerAgentCommand: mocks.registerAgentCommand }));
-vi.mock('../../commands/channel.js', () => ({ registerChannelCommand: mocks.registerChannelCommand }));
-vi.mock('../../commands/docs.js', () => ({ registerDocsCommand: mocks.registerDocsCommand }));
-
-describe('CLI runtime', () => {
-  beforeEach(() => {
-    vi.clearAllMocks();
-  });
-
-  it('prints version without loading the full command graph', () => {
-    expect(resolveLightweightCliResponse(['--version'], '1.2.3')).toEqual({
-      handled: true,
-      output: '1.2.3\n',
-    });
-  });
-
-  it('prints root help without loading the full command graph', () => {
-    const response = resolveLightweightCliResponse(['--help'], '1.2.3');
-
-    expect(response.handled).toBe(true);
-    expect(response.output).toContain('Usage: ai-devkit [options] [command]');
-    expect(response.output).toContain('agent');
-  });
-
-  it('prints top-level command help without loading the full command graph', () => {
-    const response = resolveLightweightCliResponse(['memory', '--help'], '1.2.3');
-
-    expect(response.handled).toBe(true);
-    expect(response.output).toContain('Usage: ai-devkit memory [options] [command]');
-    expect(response.output).toContain('store');
-    expect(response.output).toContain('search');
-  });
-
-  it('prints command-specific options for option-bearing commands', () => {
-    const response = resolveLightweightCliResponse(['lint', '--help'], '1.2.3');
-
-    expect(response.handled).toBe(true);
-    expect(response.output).toContain('-f, --feature <name>');
-    expect(response.output).toContain('--json');
-  });
-
-  it('prints all static top-level subcommands for grouped command help', () => {
-    const response = resolveLightweightCliResponse(['channel', '--help'], '1.2.3');
-
-    expect(response.handled).toBe(true);
-    expect(response.output).toContain('connect [options] <type>');
-    expect(response.output).toContain('stop [name]');
-    expect(response.output).toContain('status [name]');
-  });
-
-  it('delegates non-help command execution to the command runner', () => {
-    expect(resolveLightweightCliResponse(['lint'], '1.2.3')).toEqual({ handled: false });
-  });
-
-  it('maps selected top-level commands to their command modules', () => {
-    expect(resolveTopLevelCommandModule(['lint'])).toBe('./commands/lint.js');
-    expect(resolveTopLevelCommandModule(['memory', 'search'])).toBe('./commands/memory.js');
-    expect(resolveTopLevelCommandModule(['agent', 'list'])).toBe('./commands/agent.js');
-  });
-
-  it('does not map root help or unknown commands', () => {
-    expect(resolveTopLevelCommandModule(['--help'])).toBeUndefined();
-    expect(resolveTopLevelCommandModule(['unknown'])).toBeUndefined();
-  });
-
-  it('registers inline top-level commands with their lazy handlers', async () => {
-    const cases = [
-      { modulePath: './commands/init.js' as const, name: 'init', action: mocks.initCommand },
-      { modulePath: './commands/phase.js' as const, name: 'phase', action: mocks.phaseCommand },
-      { modulePath: './commands/setup.js' as const, name: 'setup', action: mocks.setupCommand },
-      { modulePath: './commands/lint.js' as const, name: 'lint', action: mocks.lintCommand },
-      { modulePath: './commands/install.js' as const, name: 'install', action: mocks.installCommand },
-    ];
-
-    for (const testCase of cases) {
-      const program = new Command();
-
-      await registerSelectedCommand(program, testCase.modulePath);
-
-      const command = program.commands.find((registeredCommand) => registeredCommand.name() === testCase.name);
-      expect(command).toBeDefined();
-      expect(command?.description()).not.toBe('');
-
-      await program.parseAsync(['node', 'ai-devkit', testCase.name]);
-
-      expect(testCase.action).toHaveBeenCalled();
-      vi.clearAllMocks();
-    }
-  });
-
-  it('delegates grouped commands to their command registration modules', async () => {
-    const cases = [
-      { modulePath: './commands/memory.js' as const, register: mocks.registerMemoryCommand },
-      { modulePath: './commands/skill.js' as const, register: mocks.registerSkillCommand },
-      { modulePath: './commands/agent.js' as const, register: mocks.registerAgentCommand },
-      { modulePath: './commands/channel.js' as const, register: mocks.registerChannelCommand },
-      { modulePath: './commands/docs.js' as const, register: mocks.registerDocsCommand },
-    ];
-
-    for (const testCase of cases) {
-      const program = new Command();
-
-      await registerSelectedCommand(program, testCase.modulePath);
-
-      expect(testCase.register).toHaveBeenCalledWith(program);
-    }
-  });
-
-  it('runs the selected command through a lightweight program', async () => {
-    await runSelectedCommand(['node', 'ai-devkit', 'lint'], '1.2.3');
-
-    expect(mocks.lintCommand).toHaveBeenCalled();
-  });
-});
diff --git a/packages/cli/src/cli-command-manifest.ts b/packages/cli/src/cli-command-manifest.ts
deleted file mode 100644
index ed9de3be..00000000
--- a/packages/cli/src/cli-command-manifest.ts
+++ /dev/null
@@ -1,156 +0,0 @@
-export type CommandModulePath =
-  | './commands/init.js'
-  | './commands/phase.js'
-  | './commands/setup.js'
-  | './commands/lint.js'
-  | './commands/install.js'
-  | './commands/memory.js'
-  | './commands/skill.js'
-  | './commands/agent.js'
-  | './commands/channel.js'
-  | './commands/docs.js';
-
-export interface CommandRow {
-  usage: string;
-  description: string;
-}
-
-export interface CommandOption {
-  flags: string;
-  description: string;
-}
-
-export interface CliCommandManifest {
-  name: string;
-  usage: string;
-  description: string;
-  rootDescription?: string;
-  modulePath: CommandModulePath;
-  options?: CommandOption[];
-  subcommands?: CommandRow[];
-}
-
-export const CLI_COMMANDS: CliCommandManifest[] = [
-  {
-    name: 'init',
-    usage: 'init [options]',
-    description: 'Initialize AI DevKit in the current directory',
-    modulePath: './commands/init.js',
-    options: [
-      { flags: '-e, --environment <env>', description: 'Development environment (cursor|claude|both)' },
-      { flags: '-a, --all', description: 'Initialize all phases' },
-      { flags: '-p, --phases <phases>', description: 'Comma-separated list of phases to initialize' },
-      { flags: '-t, --template <path>', description: 'Initialize from template file (.yaml, .yml, .json)' },
-      { flags: '-d, --docs-dir <path>', description: 'Custom directory for AI documentation (default: docs/ai)' },
-      { flags: '--built-in', description: 'Install AI DevKit built-in skills without prompting (useful for CI/non-interactive runs)' },
-      { flags: '-y, --yes', description: 'Run non-interactively. Without -t, requires -e <env> and one of -a/-p. Existing files are kept unless --overwrite is also passed.' },
-      { flags: '--overwrite', description: 'With --yes, overwrite existing environments and phase files instead of skipping them' },
-    ],
-  },
-  {
-    name: 'phase',
-    usage: 'phase [name]',
-    description: 'Add a specific phase template (requirements|design|planning|implementation|testing|deployment|monitoring)',
-    rootDescription: 'Add a specific phase template',
-    modulePath: './commands/phase.js',
-  },
-  {
-    name: 'setup',
-    usage: 'setup [options]',
-    description: 'Set up AI DevKit commands globally',
-    modulePath: './commands/setup.js',
-    options: [
-      { flags: '-g, --global', description: 'Install commands to global environment folders' },
-    ],
-  },
-  {
-    name: 'lint',
-    usage: 'lint [options]',
-    description: 'Validate workspace readiness for AI DevKit workflows',
-    modulePath: './commands/lint.js',
-    options: [
-      { flags: '-f, --feature <name>', description: 'Validate docs and git worktree conventions for a feature' },
-      { flags: '--json', description: 'Output lint results as JSON' },
-    ],
-  },
-  {
-    name: 'install',
-    usage: 'install [options]',
-    description: 'Install AI DevKit artifacts from a project config',
-    modulePath: './commands/install.js',
-    options: [
-      { flags: '-c, --config <path>', description: 'Path to config file (default: .ai-devkit.json)' },
-      { flags: '--overwrite', description: 'Overwrite existing install artifacts' },
-    ],
-  },
-  {
-    name: 'memory',
-    usage: 'memory [options] [command]',
-    description: 'Interact with the knowledge memory service',
-    modulePath: './commands/memory.js',
-    subcommands: [
-      { usage: 'store [options]', description: 'Store a new knowledge item' },
-      { usage: 'update [options]', description: 'Update an existing knowledge item by ID' },
-      { usage: 'search [options]', description: 'Search for knowledge items' },
-    ],
-  },
-  {
-    name: 'skill',
-    usage: 'skill [options] [command]',
-    description: 'Manage Agent Skills',
-    modulePath: './commands/skill.js',
-    subcommands: [
-      { usage: 'add [options] <registry-repo> [skill-name]', description: 'Install a skill from a registry' },
-      { usage: 'list', description: 'List all installed skills in the current project' },
-      { usage: 'remove <skill-name>', description: 'Remove a skill from the current project' },
-      { usage: 'update [registry-id]', description: 'Update skills from registries' },
-      { usage: 'find [options] <keyword>', description: 'Search for skills across all registries' },
-      { usage: 'rebuild-index [options]', description: 'Rebuild the skill index from all registries' },
-    ],
-  },
-  {
-    name: 'agent',
-    usage: 'agent [options] [command]',
-    description: 'Manage AI Agents',
-    modulePath: './commands/agent.js',
-    subcommands: [
-      { usage: 'start [options]', description: 'Start a new agent in a managed tmux session' },
-      { usage: 'list [options]', description: 'List all running AI agents' },
-      { usage: 'sessions [options]', description: 'List historical Claude/Codex/Gemini/OpenCode sessions for resume' },
-      { usage: 'session', description: 'Manage historical AI agent sessions' },
-      { usage: 'open <name>', description: 'Focus a running agent terminal' },
-      { usage: 'send [options] [message]', description: 'Send a message to a running agent' },
-      { usage: 'kill <name>', description: 'Stop a running agent and clean up its managed tmux session' },
-      { usage: 'detail [options]', description: 'Show detailed information about a running agent' },
-      { usage: 'rename <current-name> <new-name>', description: 'Rename an agent in the registry' },
-      { usage: 'console', description: 'Interactive multi-agent console' },
-    ],
-  },
-  {
-    name: 'channel',
-    usage: 'channel [options] [command]',
-    description: 'Connect agents with messaging channels',
-    modulePath: './commands/channel.js',
-    subcommands: [
-      { usage: 'connect [options] <type>', description: 'Connect a messaging channel' },
-      { usage: 'list', description: 'List configured channels' },
-      { usage: 'disconnect <name>', description: 'Remove a channel configuration' },
-      { usage: 'start [options] [name]', description: 'Start the channel bridge to a running agent' },
-      { usage: 'stop [name]', description: 'Stop a running channel bridge' },
-      { usage: 'status [name]', description: 'Show channel bridge status' },
-    ],
-  },
-  {
-    name: 'docs',
-    usage: 'docs [options] [command]',
-    description: 'Manage AI DevKit documentation',
-    modulePath: './commands/docs.js',
-    subcommands: [
-      { usage: 'init-feature [options] <name>', description: 'Initialize date-prefixed feature documentation from phase templates' },
-    ],
-  },
-];
-
-export function getCliCommand(name: string | undefined): CliCommandManifest | undefined {
-  return CLI_COMMANDS.find((command) => command.name === name);
-}
diff --git a/packages/cli/src/cli-runtime.ts b/packages/cli/src/cli-runtime.ts
deleted file mode 100644
index 812bbbc1..00000000
--- a/packages/cli/src/cli-runtime.ts
+++ /dev/null
@@ -1,208 +0,0 @@
-import { CLI_COMMANDS, getCliCommand, type CommandModulePath, type CommandRow } from './cli-command-manifest.js';
-
-interface LightweightResponse {
-  handled: boolean;
-  output?: string;
-}
-
-function renderCommandRows(rows: CommandRow[]): string {
-  return rows
-    .map((row) => `  ${row.usage.padEnd(30)} ${row.description}`)
-    .join('\n');
-}
-
-function rootCommandRows(): CommandRow[] {
-  return [
-    ...CLI_COMMANDS.map((command) => ({
-      usage: command.usage,
-      description: command.rootDescription ?? command.description,
-    })),
-    { usage: 'help [command]', description: 'display help for command' },
-  ];
-}
-
-function renderRootHelp(): string {
-  return [
-    'Usage: ai-devkit [options] [command]',
-    '',
-    'AI-assisted software development toolkit',
-    '',
-    'Options:',
-    '  -V, --version                  output the version number',
-    '  -h, --help                     display help for command',
-    '',
-    'Commands:',
-    renderCommandRows(rootCommandRows()),
-    '',
-  ].join('\n');
-}
-
-function renderCommandHelp(commandName: string): string | undefined {
-  const command = getCliCommand(commandName);
-  if (!command) return undefined;
-
-  const optionRows: CommandRow[] = [
-    ...(command.options ?? []).map((option) => ({
-      usage: option.flags,
-      description: option.description,
-    })),
-    { usage: '-h, --help', description: 'display help for command' },
-  ];
-  const sections = [
-    `Usage: ai-devkit ${command.usage}`,
-    '',
-    command.description,
-    '',
-    'Options:',
-    renderCommandRows(optionRows),
-  ];
-
-  if (command.subcommands?.length) {
-    sections.push(
-      '',
-      'Commands:',
-      renderCommandRows([...command.subcommands, { usage: 'help [command]', description: 'display help for command' }]),
-    );
-  }
-
-  sections.push('');
-  return sections.join('\n');
-}
-
-function isHelpFlag(value: string | undefined): boolean {
-  return value === '--help' || value === '-h';
-}
-
-export function resolveLightweightCliResponse(args: string[], version: string): LightweightResponse {
-  const [first, second] = args;
-
-  if (first === '--version' || first === '-V') {
-    return { handled: true, output: `${version}\n` };
-  }
-
-  if (isHelpFlag(first) || first === undefined) {
-    return { handled: true, output: renderRootHelp() };
-  }
-
-  if (isHelpFlag(second)) {
-    const output = renderCommandHelp(first);
-    if (output) {
-      return { handled: true, output };
-    }
-  }
-
-  return { handled: false };
-}
-
-export function resolveTopLevelCommandModule(args: string[]): CommandModulePath | undefined {
-  return getCliCommand(args[0])?.modulePath;
-}
-
-async function createProgram(version: string) {
-  const { Command } = await import('commander');
-  return new Command()
-    .name('ai-devkit')
-    .description('AI-assisted software development toolkit')
-    .version(version);
-}
-
-export async function registerSelectedCommand(
-  program: Awaited<ReturnType<typeof createProgram>>,
-  modulePath: CommandModulePath,
-): Promise<void> {
-  switch (modulePath) {
-    case './commands/init.js': {
-      const { initCommand } = await import('./commands/init.js');
-      program
-        .command('init')
-        .description('Initialize AI DevKit in the current directory')
-        .option('-e, --environment <env>', 'Development environment (cursor|claude|both)')
-        .option('-a, --all', 'Initialize all phases')
-        .option('-p, --phases <phases>', 'Comma-separated list of phases to initialize')
-        .option('-t, --template <path>', 'Initialize from template file (.yaml, .yml, .json)')
-        .option('-d, --docs-dir <path>', 'Custom directory for AI documentation (default: docs/ai)')
-        .option('--built-in', 'Install AI DevKit built-in skills without prompting (useful for CI/non-interactive runs)')
-        .option('-y, --yes', 'Run non-interactively. Without -t, requires -e <env> and one of -a/-p. Existing files are kept unless --overwrite is also passed.')
-        .option('--overwrite', 'With --yes, overwrite existing environments and phase files instead of skipping them')
-        .action(initCommand);
-      break;
-    }
-    case './commands/phase.js': {
-      const { phaseCommand } = await import('./commands/phase.js');
-      program
-        .command('phase [name]')
-        .description('Add a specific phase template (requirements|design|planning|implementation|testing|deployment|monitoring)')
-        .action(phaseCommand);
-      break;
-    }
-    case './commands/setup.js': {
-      const { setupCommand } = await import('./commands/setup.js');
-      program
-        .command('setup')
-        .description('Set up AI DevKit commands globally')
-        .option('-g, --global', 'Install commands to global environment folders')
-        .action(setupCommand);
-      break;
-    }
-    case './commands/lint.js': {
-      const { lintCommand } = await import('./commands/lint.js');
-      program
-        .command('lint')
-        .description('Validate workspace readiness for AI DevKit workflows')
-        .option('-f, --feature <name>', 'Validate docs and git worktree conventions for a feature')
-        .option('--json', 'Output lint results as JSON')
-        .action(lintCommand);
-      break;
-    }
-    case './commands/install.js': {
-      const { installCommand } = await import('./commands/install.js');
-      program
-        .command('install')
-        .description('Install AI DevKit artifacts from a project config')
-        .option('-c, --config <path>', 'Path to config file (default: .ai-devkit.json)')
-        .option('--overwrite', 'Overwrite existing install artifacts')
-        .action(installCommand);
-      break;
-    }
-    case './commands/memory.js': {
-      const { registerMemoryCommand } = await import('./commands/memory.js');
-      registerMemoryCommand(program);
-      break;
-    }
-    case './commands/skill.js': {
-      const { registerSkillCommand } = await import('./commands/skill.js');
-      registerSkillCommand(program);
-      break;
-    }
-    case './commands/agent.js': {
-      const { registerAgentCommand } = await import('./commands/agent.js');
-      registerAgentCommand(program);
-      break;
-    }
-    case './commands/channel.js': {
-      const { registerChannelCommand } = await import('./commands/channel.js');
-      registerChannelCommand(program);
-      break;
-    }
-    case './commands/docs.js': {
-      const { registerDocsCommand } = await import('./commands/docs.js');
-      registerDocsCommand(program);
-      break;
-    }
-  }
-}
-
-export async function runSelectedCommand(argv: string[], version: string): Promise<void> {
-  const program = await createProgram(version);
-  const modulePath = resolveTopLevelCommandModule(argv.slice(2));
-
-  if (modulePath) {
-    await registerSelectedCommand(program, modulePath);
-  } else {
-    for (const command of CLI_COMMANDS) {
-      program.command(command.usage).description(command.description);
-    }
-  }
-
-  await program.parseAsync(argv);
-}
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
index 00f5bb6c..7095b138 100644
--- a/packages/cli/src/cli.ts
+++ b/packages/cli/src/cli.ts
@@ -1,19 +1,68 @@
 #!/usr/bin/env node
 
+import { Command } from 'commander';
+import { initCommand } from './commands/init.js';
+import { phaseCommand } from './commands/phase.js';
+import { setupCommand } from './commands/setup.js';
+import { lintCommand } from './commands/lint.js';
+import { installCommand } from './commands/install.js';
+import { registerMemoryCommand } from './commands/memory.js';
+import { registerSkillCommand } from './commands/skill.js';
+import { registerAgentCommand } from './commands/agent.js';
+import { registerChannelCommand } from './commands/channel.js';
+import { registerDocsCommand } from './commands/docs.js';
 import pkg from '../package.json' with { type: 'json' };
-
 const { version } = pkg as { version: string };
-const args = process.argv.slice(2);
 
-if (args[0] === '--version' || args[0] === '-V') {
-  process.stdout.write(`${version}\n`);
-} else {
-  const { resolveLightweightCliResponse, runSelectedCommand } = await import('./cli-runtime.js');
-  const response = resolveLightweightCliResponse(args, version);
+const program = new Command();
+
+program
+  .name('ai-devkit')
+  .description('AI-assisted software development toolkit')
+  .version(version);
+
+program
+  .command('init')
+  .description('Initialize AI DevKit in the current directory')
+  .option('-e, --environment <env>', 'Development environment (cursor|claude|both)')
+  .option('-a, --all', 'Initialize all phases')
+  .option('-p, --phases <phases>', 'Comma-separated list of phases to initialize')
+  .option('-t, --template <path>', 'Initialize from template file (.yaml, .yml, .json)')
+  .option('-d, --docs-dir <path>', 'Custom directory for AI documentation (default: docs/ai)')
+  .option('--built-in', 'Install AI DevKit built-in skills without prompting (useful for CI/non-interactive runs)')
+  .option('-y, --yes', 'Run non-interactively. Without -t, requires -e <env> and one of -a/-p. Existing files are kept unless --overwrite is also passed.')
+  .option('--overwrite', 'With --yes, overwrite existing environments and phase files instead of skipping them')
+  .action(initCommand);
+
+program
+  .command('phase [name]')
+  .description('Add a specific phase template (requirements|design|planning|implementation|testing|deployment|monitoring)')
+  .action(phaseCommand);
+
+program
+  .command('setup')
+  .description('Set up AI DevKit commands globally')
+  .option('-g, --global', 'Install commands to global environment folders')
+  .action(setupCommand);
+
+program
+  .command('lint')
+  .description('Validate workspace readiness for AI DevKit workflows')
+  .option('-f, --feature <name>', 'Validate docs and git worktree conventions for a feature')
+  .option('--json', 'Output lint results as JSON')
+  .action(lintCommand);
+
+program
+  .command('install')
+  .description('Install AI DevKit artifacts from a project config')
+  .option('-c, --config <path>', 'Path to config file (default: .ai-devkit.json)')
+  .option('--overwrite', 'Overwrite existing install artifacts')
+  .action(installCommand);
 
-  if (!response.handled) {
-    await runSelectedCommand(process.argv, version);
-  }
+registerMemoryCommand(program);
+registerSkillCommand(program);
+registerAgentCommand(program);
+registerChannelCommand(program);
+registerDocsCommand(program);
 
-  process.stdout.write(response.output ?? '');
-}
+program.parse();
diff --git a/packages/cli/src/util/cli-benchmark.ts b/packages/cli/src/util/cli-benchmark.ts
deleted file mode 100644
index 6053d4e3..00000000
--- a/packages/cli/src/util/cli-benchmark.ts
+++ /dev/null
@@ -1,238 +0,0 @@
-import { spawnSync } from 'child_process';
-import { mkdirSync, rmSync, writeFileSync } from 'fs';
-import { tmpdir } from 'os';
-import path from 'path';
-import { performance } from 'perf_hooks';
-import { fileURLToPath, pathToFileURL } from 'url';
-
-export interface TimingStats {
-  minMs: number;
-  p50Ms: number;
-  p95Ms: number;
-  maxMs: number;
-  avgMs: number;
-}
-
-export interface BenchmarkCase {
-  label: string;
-  command: string[];
-  iterations: number;
-  cwd?: string;
-  env?: NodeJS.ProcessEnv;
-}
-
-export interface BenchmarkCaseResult extends BenchmarkCase, TimingStats {
-  failures: number;
-}
-
-export interface BenchmarkPlan {
-  cases: BenchmarkCase[];
-  cleanup: () => void;
-}
-
-export interface DefaultBenchmarkPlanOptions {
-  cliPath?: string;
-  iterations?: number;
-  rootDir?: string;
-  tempRoot?: string;
-  nodePath?: string;
-}
-
-export interface BenchmarkGateResult {
-  pass: boolean;
-  failures: string[];
-}
-
-export interface BenchmarkGateOptions {
-  startupThresholdMs: number;
-}
-
-const STARTUP_GATE_LABELS = new Set([
-  'version',
-  'root-help',
-  'init-help',
-  'phase-help',
-  'setup-help',
-  'lint-help',
-  'install-help',
-  'memory-help',
-  'skill-help',
-  'agent-help',
-  'channel-help',
-  'docs-help',
-]);
-
-type Spawn = typeof spawnSync;
-type Now = () => number;
-
-function roundMs(value: number): number {
-  return Math.round(value * 1000) / 1000;
-}
-
-function percentile(sorted: number[], p: number): number {
-  if (sorted.length === 0) return 0;
-  const index = Math.ceil((p / 100) * sorted.length) - 1;
-  return sorted[Math.min(Math.max(index, 0), sorted.length - 1)];
-}
-
-export function calculateTimingStats(samples: number[]): TimingStats {
-  if (samples.length === 0) {
-    return { minMs: 0, p50Ms: 0, p95Ms: 0, maxMs: 0, avgMs: 0 };
-  }
-
-  const sorted = [...samples].sort((a, b) => a - b);
-  const avg = samples.reduce((sum, sample) => sum + sample, 0) / samples.length;
-
-  return {
-    minMs: roundMs(sorted[0]),
-    p50Ms: roundMs(percentile(sorted, 50)),
-    p95Ms: roundMs(percentile(sorted, 95)),
-    maxMs: roundMs(sorted[sorted.length - 1]),
-    avgMs: roundMs(avg),
-  };
-}
-
-export function runBenchmarkCase(
-  benchmarkCase: BenchmarkCase & { spawn?: Spawn; now?: Now },
-): BenchmarkCaseResult {
-  const { spawn = spawnSync, now = () => performance.now(), ...caseConfig } = benchmarkCase;
-  const samples: number[] = [];
-  let failures = 0;
-  const [command, ...args] = caseConfig.command;
-
-  for (let i = 0; i < caseConfig.iterations; i += 1) {
-    const start = now();
-    const result = spawn(command, args, {
-      cwd: caseConfig.cwd,
-      env: caseConfig.env ? { ...process.env, ...caseConfig.env } : process.env,
-      stdio: 'ignore',
-    });
-    samples.push(now() - start);
-
-    if (result.status !== 0 || result.error) {
-      failures += 1;
-    }
-  }
-
-  return {
-    ...caseConfig,
-    ...calculateTimingStats(samples),
-    failures,
-  };
-}
-
-export function runCliBenchmark(cases: BenchmarkCase[]): BenchmarkCaseResult[] {
-  return cases.map(runBenchmarkCase);
-}
-
-export function printBenchmarkResults(results: BenchmarkCaseResult[]): void {
-  const rows = results.map((result) => ({
-    label: result.label,
-    p50: `${result.p50Ms.toFixed(3)}ms`,
-    p95: `${result.p95Ms.toFixed(3)}ms`,
-    avg: `${result.avgMs.toFixed(3)}ms`,
-    failures: result.failures,
-  }));
-  console.table(rows);
-}
-
-export function evaluateBenchmarkGate(
-  results: BenchmarkCaseResult[],
-  options: BenchmarkGateOptions,
-): BenchmarkGateResult {
-  const failures: string[] = [];
-
-  for (const result of results) {
-    if (!STARTUP_GATE_LABELS.has(result.label)) {
-      continue;
-    }
-
-    if (result.failures > 0) {
-      failures.push(`${result.label} had ${result.failures} failed run(s)`);
-    }
-
-    if (result.p50Ms > options.startupThresholdMs) {
-      failures.push(`${result.label} p50 ${result.p50Ms}ms exceeded startup threshold ${options.startupThresholdMs}ms`);
-    }
-  }
-
-  return {
-    pass: failures.length === 0,
-    failures,
-  };
-}
-
-export function createDefaultBenchmarkPlan(options: DefaultBenchmarkPlanOptions = {}): BenchmarkPlan {
-  const rootDir = options.rootDir ?? process.cwd();
-  const cliPath = options.cliPath
-    ? path.resolve(rootDir, options.cliPath)
-    : path.join(rootDir, 'packages/cli/dist/cli.js');
-  const nodePath = options.nodePath ?? process.execPath;
-  const cli = [nodePath, cliPath];
-  const iterations = options.iterations
-    ?? Number.parseInt(process.env.AI_DEVKIT_CLI_BENCHMARK_ITERATIONS ?? '20', 10);
-  const tempRoot = options.tempRoot ?? path.join(tmpdir(), `ai-devkit-cli-benchmark-${process.pid}`);
-
-  mkdirSync(tempRoot, { recursive: true });
-  writeFileSync(path.join(tempRoot, '.ai-devkit.json'), JSON.stringify({
-    version: '0.0.0',
-    environments: [],
-    phases: ['requirements', 'design', 'planning', 'implementation', 'testing'],
-    memory: { path: path.join(tempRoot, 'memory.db') },
-  }, null, 2));
-
-  const helpCases: BenchmarkCase[] = [
-    { label: 'version', command: [...cli, '--version'], iterations, cwd: rootDir },
-    { label: 'root-help', command: [...cli, '--help'], iterations, cwd: rootDir },
-    { label: 'init-help', command: [...cli, 'init', '--help'], iterations, cwd: rootDir },
-    { label: 'phase-help', command: [...cli, 'phase', '--help'], iterations, cwd: rootDir },
-    { label: 'setup-help', command: [...cli, 'setup', '--help'], iterations, cwd: rootDir },
-    { label: 'lint-help', command: [...cli, 'lint', '--help'], iterations, cwd: rootDir },
-    { label: 'install-help', command: [...cli, 'install', '--help'], iterations, cwd: rootDir },
-    { label: 'memory-help', command: [...cli, 'memory', '--help'], iterations, cwd: rootDir },
-    { label: 'skill-help', command: [...cli, 'skill', '--help'], iterations, cwd: rootDir },
-    { label: 'agent-help', command: [...cli, 'agent', '--help'], iterations, cwd: rootDir },
-    { label: 'channel-help', command: [...cli, 'channel', '--help'], iterations, cwd: rootDir },
-    { label: 'docs-help', command: [...cli, 'docs', '--help'], iterations, cwd: rootDir },
-  ];
-
-  return {
-    cases: [
-      ...helpCases,
-      { label: 'lint', command: [...cli, 'lint'], iterations, cwd: rootDir },
-      { label: 'agent-list-json', command: [...cli, 'agent', 'list', '--json'], iterations, cwd: tempRoot },
-      {
-        label: 'memory-search',
-        command: [...cli, 'memory', 'search', '--query', 'startup performance', '--limit', '1'],
-        iterations,
-        cwd: tempRoot,
-      },
-    ],
-    cleanup: () => {
-      rmSync(tempRoot, { recursive: true, force: true });
-    },
-  };
-}
-
-export function resolveDefaultRootDir(moduleUrl: string): string {
-  return path.resolve(path.dirname(fileURLToPath(moduleUrl)), '../../../..');
-}
-
-if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
-  const plan = createDefaultBenchmarkPlan({ rootDir: resolveDefaultRootDir(import.meta.url) });
-  try {
-    const results = runCliBenchmark(plan.cases);
-    printBenchmarkResults(results);
-    const gate = evaluateBenchmarkGate(results, {
-      startupThresholdMs: Number.parseFloat(process.env.AI_DEVKIT_CLI_STARTUP_THRESHOLD_MS ?? '50'),
-    });
-    if (!gate.pass) {
-      for (const failure of gate.failures) {
-        console.error(failure);
-      }
-      process.exitCode = 1;
-    }
-  } finally {
-    plan.cleanup();
-  }
-}