From 86ac356b9e0bf7efb9a1d9db03e014a44ada1da4 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:11:31 -0700
Subject: [PATCH 01/57] fix(provider): keep API-key vs OAuth auth mode across
 the two persisted provider_key vocabularies

Claude (and OpenAI) sessions could silently shift from an API key onto
the OAuth subscription. Root cause: two divergent provider_key
vocabularies persist into sessions, and the session-reconstruction
helpers only understood one of them.

- The structured model-route picker (RPC) persists RuntimeKey::stable_id()
  values: claude-oauth / anthropic-api-key / openai-oauth / openai-api-key.
- The legacy /model + login path persists: claude / claude-api / openai /
  openai-api.

model_switch_request_for_session_model and
session_provider_key_matches_provider_name only matched the legacy
vocabulary. A session whose provider_key was 'anthropic-api-key' (without
a separately-persisted route_api_method, e.g. a forked/child/ambient/
overnight session) therefore reconstructed a bare model with no auth
prefix, leaving the Anthropic provider in Auto mode -- which now prefers
OAuth (commit 00e9b9ff) -- silently moving an API-key user onto the
subscription.

Fix:
- Add canonical_session_provider_key() to fold the picker vocabulary back
  onto the canonical keys, and apply it in the reconstruction/match
  helpers so either vocabulary recovers the exact OAuth-vs-API-key route.
- Carry route_api_method alongside provider_key when copying a parent
  session to a child (ambient, overnight, fork, selfdev, crash recovery)
  so children reconstruct the full route even without the canonicalizer.

Adds a regression test proving anthropic-api-key/openai-api-key/-oauth
provider keys preserve the auth route without route_api_method.
---
 crates/jcode-app-core/src/ambient/runner.rs   |  1 +
 crates/jcode-app-core/src/overnight.rs        |  1 +
 .../src/server/client_actions.rs              |  1 +
 .../jcode-app-core/src/tool/selfdev/launch.rs |  1 +
 crates/jcode-base/src/provider/selection.rs   | 94 ++++++++++++++++++-
 crates/jcode-base/src/session/crash.rs        |  1 +
 6 files changed, 98 insertions(+), 1 deletion(-)
diff --git a/crates/jcode-app-core/src/ambient/runner.rs b/crates/jcode-app-core/src/ambient/runner.rs
index 733f2da38..4c49ffcb9 100644
--- a/crates/jcode-app-core/src/ambient/runner.rs
+++ b/crates/jcode-app-core/src/ambient/runner.rs
@@ -419,6 +419,7 @@ impl AmbientRunnerHandle {
                 child.replace_messages(parent.messages.clone());
                 child.compaction = parent.compaction.clone();
                 child.provider_key = parent.provider_key.clone();
+                child.route_api_method = parent.route_api_method.clone();
                 child.model = parent.model.clone();
                 child.subagent_model = parent.subagent_model.clone();
                 child.improve_mode = parent.improve_mode;
diff --git a/crates/jcode-app-core/src/overnight.rs b/crates/jcode-app-core/src/overnight.rs
index a619cdaaf..e23c9663d 100644
--- a/crates/jcode-app-core/src/overnight.rs
+++ b/crates/jcode-app-core/src/overnight.rs
@@ -175,6 +175,7 @@ fn create_coordinator_session(parent: &Session, mission: &Option<String>) -> Res
     child.replace_messages(parent.messages.clone());
     child.compaction = parent.compaction.clone();
     child.provider_key = parent.provider_key.clone();
+    child.route_api_method = parent.route_api_method.clone();
     child.reasoning_effort = parent.reasoning_effort.clone();
     child.subagent_model = parent.subagent_model.clone();
     child.improve_mode = parent.improve_mode;
diff --git a/crates/jcode-app-core/src/server/client_actions.rs b/crates/jcode-app-core/src/server/client_actions.rs
index 77c34abf0..ead8ff6d6 100644
--- a/crates/jcode-app-core/src/server/client_actions.rs
+++ b/crates/jcode-app-core/src/server/client_actions.rs
@@ -737,6 +737,7 @@ fn create_transfer_child_session(
     child.working_dir = parent.working_dir.clone();
     child.model = parent.model.clone();
     child.provider_key = parent.provider_key.clone();
+    child.route_api_method = parent.route_api_method.clone();
     child.subagent_model = parent.subagent_model.clone();
     child.improve_mode = parent.improve_mode;
     child.autoreview_enabled = parent.autoreview_enabled;
diff --git a/crates/jcode-app-core/src/tool/selfdev/launch.rs b/crates/jcode-app-core/src/tool/selfdev/launch.rs
index f4ab3c39b..17194ebd1 100644
--- a/crates/jcode-app-core/src/tool/selfdev/launch.rs
+++ b/crates/jcode-app-core/src/tool/selfdev/launch.rs
@@ -20,6 +20,7 @@ pub fn enter_selfdev_session(
                 child.compaction = parent.compaction.clone();
                 child.model = parent.model.clone();
                 child.provider_key = parent.provider_key.clone();
+                child.route_api_method = parent.route_api_method.clone();
                 child.subagent_model = parent.subagent_model.clone();
                 child.improve_mode = parent.improve_mode;
                 child.autoreview_enabled = parent.autoreview_enabled;
diff --git a/crates/jcode-base/src/provider/selection.rs b/crates/jcode-base/src/provider/selection.rs
index b0362ada1..40a9ac453 100644
--- a/crates/jcode-base/src/provider/selection.rs
+++ b/crates/jcode-base/src/provider/selection.rs
@@ -192,6 +192,34 @@ impl MultiProvider {
         }
     }
 
+    /// Canonicalize a persisted session `provider_key` into the legacy
+    /// vocabulary the reconstruction helpers below understand.
+    ///
+    /// Two vocabularies persist into sessions and must be treated as
+    /// equivalent, otherwise the OAuth-vs-API-key auth mode is silently lost on
+    /// restore/model-switch:
+    ///
+    /// - Legacy `/model` + login path: `claude` / `claude-api` / `openai` /
+    ///   `openai-api`.
+    /// - Structured model-route picker (`RouteSelection::stable_id`):
+    ///   `claude-oauth` / `anthropic-api-key` / `openai-oauth` /
+    ///   `openai-api-key`.
+    ///
+    /// Both encode the same auth route; we fold the picker forms back onto the
+    /// canonical keys so a session whose `provider_key` is `anthropic-api-key`
+    /// (and whose `route_api_method` was not also persisted, e.g. inherited by a
+    /// child/forked session) still reconstructs the Anthropic API-key route
+    /// instead of falling through to Auto (which prefers OAuth).
+    pub(crate) fn canonical_session_provider_key(provider_key: &str) -> &str {
+        match provider_key.trim() {
+            "claude-oauth" => "claude",
+            "anthropic-api-key" => "claude-api",
+            "openai-oauth" => "openai",
+            "openai-api-key" => "openai-api",
+            other => other,
+        }
+    }
+
     fn explicit_session_provider_key_for_model_request(model_request: &str) -> Option<String> {
         let model_request = model_request.trim();
         if let Some((prefix, rest)) = model_request.split_once(':') {
@@ -287,7 +315,7 @@ impl MultiProvider {
     }
 
     fn session_provider_key_matches_provider_name(provider_key: &str, provider_name: &str) -> bool {
-        let provider_key = provider_key.trim();
+        let provider_key = Self::canonical_session_provider_key(provider_key.trim());
         let Some(derived) = Self::session_provider_key_from_provider_name(provider_name)
             .or_else(|| crate::session::derive_session_provider_key(provider_name))
         else {
@@ -342,6 +370,11 @@ impl MultiProvider {
         else {
             return model.to_string();
         };
+        // Fold the structured-picker vocabulary (`anthropic-api-key`,
+        // `openai-oauth`, ...) onto the canonical keys so the OAuth-vs-API-key
+        // route survives even when only `provider_key` was persisted (e.g. a
+        // forked/child session that inherited it without `route_api_method`).
+        let provider_key = Self::canonical_session_provider_key(provider_key);
 
         match provider_key {
             "claude-api" => format!("claude-api:{model}"),
@@ -615,6 +648,65 @@ mod tests {
         );
     }
 
+    #[test]
+    fn session_provider_key_picker_vocabulary_preserves_auth_mode_without_route() {
+        // The structured model-route picker persists `RuntimeKey::stable_id()`
+        // values (`anthropic-api-key`, `openai-oauth`, ...). When a child/forked
+        // session inherits only `provider_key` without `route_api_method`, the
+        // reconstruction helpers must still recover the exact OAuth-vs-API-key
+        // route instead of dropping to Auto (which prefers OAuth) and silently
+        // shifting an API-key user onto the subscription.
+        for (model, provider_key, expected_request) in [
+            (
+                "claude-opus-4-8",
+                Some("anthropic-api-key"),
+                "claude-api:claude-opus-4-8",
+            ),
+            (
+                "claude-opus-4-8",
+                Some("claude-oauth"),
+                "claude-oauth:claude-opus-4-8",
+            ),
+            ("gpt-5.5", Some("openai-api-key"), "openai-api:gpt-5.5"),
+            ("gpt-5.5", Some("openai-oauth"), "openai-oauth:gpt-5.5"),
+        ] {
+            assert_eq!(
+                MultiProvider::model_switch_request_for_session_model(model, provider_key),
+                expected_request,
+                "restore {model:?} with picker provider_key {provider_key:?}"
+            );
+        }
+
+        // The same picker vocabulary must be recognized as matching its provider
+        // so an auth-change rewrite keeps the persisted key instead of
+        // overwriting it with the canonical name (losing the auth mode).
+        for (model, provider_name, previous_key, expected_key) in [
+            (
+                "claude-opus-4-8",
+                "Anthropic",
+                Some("anthropic-api-key"),
+                Some("anthropic-api-key"),
+            ),
+            (
+                "gpt-5.5",
+                "OpenAI",
+                Some("openai-api-key"),
+                Some("openai-api-key"),
+            ),
+        ] {
+            assert_eq!(
+                MultiProvider::session_provider_key_after_model_switch(
+                    model,
+                    provider_name,
+                    previous_key,
+                )
+                .as_deref(),
+                expected_key,
+                "{model:?} via {provider_name:?} keeps picker key {previous_key:?}"
+            );
+        }
+    }
+
     #[test]
     fn route_defaults_are_derived_consistently() {
         let copilot = MultiProvider::default_model_selection_from_route(
diff --git a/crates/jcode-base/src/session/crash.rs b/crates/jcode-base/src/session/crash.rs
index 1896c4da1..d49a7dac9 100644
--- a/crates/jcode-base/src/session/crash.rs
+++ b/crates/jcode-base/src/session/crash.rs
@@ -128,6 +128,7 @@ fn recover_loaded_crashed_sessions(mut crashed: Vec<Session>) -> Result<Vec<Stri
         new_session.custom_title = old.custom_title.clone();
         new_session.working_dir = old.working_dir.clone();
         new_session.provider_key = old.provider_key.clone();
+        new_session.route_api_method = old.route_api_method.clone();
         new_session.model = old.model.clone();
         new_session.improve_mode = old.improve_mode;
         new_session.is_canary = old.is_canary;

From ef0339f045395849a35617d1bb33557f6ad7f9d0 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:17:17 -0700
Subject: [PATCH 02/57] telemetry: add served dashboard with accurate 'total
 users' headline

The worker previously only accepted POST /v1/event; there was no visual
dashboard (just SQL files run by hand). Add a real one.

Headline metric (users.sql + stats.js): total_users = distinct, non-CI
telemetry_id that ever installed OR did meaningful work. Validated with
sqlite edge-case repros (install-only, turn_end-only with lost
session_end, empty open/close, CI). Reported alongside broader tiers
(reached) and narrower tiers (core, installed) plus raw CI-inclusive
totals so no signal is removed.

- src/stats.js: read-only aggregation (counts only, never raw rows) over
  users, DAU/WAU/MAU rollup, installs, D7 retention, engagement quality,
  per-turn, errors, feature adoption, transport, version/os/channel/
  provider/auth/onboarding breakdowns, 60d timeseries, recent feedback.
  One shared MEANINGFUL_SQL predicate so every window agrees.
- src/worker.js: GET / serves the dashboard, GET /v1/stats serves JSON
  gated behind DASHBOARD_TOKEN (deny-by-default), POST /v1/event
  unchanged. CORS widened to GET.
- src/dashboard.js: self-contained HTML/CSS/inline-SVG dashboard (no CDN,
  works under Cloudflare). Tiered layout: hero total-users number, active
  funnel + chart, 'how the number is built' transparency band, then
  acquisition/retention, engagement, reliability, breakdowns, features,
  feedback. Importance shown via hero/key tags/muted diagnostics.
- README + package.json: dashboard usage, DASHBOARD_TOKEN setup, npm run
  users; type:module to silence ESM warning.

Validated: node --check on all modules, getStats end-to-end against a
seeded sqlite D1 shim (total_users=3 with CI excluded), and rendered in a
real browser (token gate + every section + charts).
---
 telemetry-worker/README.md        |  32 +++
 telemetry-worker/package.json     |   4 +-
 telemetry-worker/src/dashboard.js | 398 ++++++++++++++++++++++++++++++
 telemetry-worker/src/stats.js     | 274 ++++++++++++++++++++
 telemetry-worker/src/worker.js    |  57 ++++-
 telemetry-worker/users.sql        |  61 +++++
 6 files changed, 822 insertions(+), 4 deletions(-)
 create mode 100644 telemetry-worker/src/dashboard.js
 create mode 100644 telemetry-worker/src/stats.js
 create mode 100644 telemetry-worker/users.sql

diff --git a/telemetry-worker/README.md b/telemetry-worker/README.md
index 9d7b289c8..e58b87b6a 100644
--- a/telemetry-worker/README.md
+++ b/telemetry-worker/README.md
@@ -2,6 +2,38 @@
 
 Cloudflare Worker that receives anonymous telemetry events from jcode.
 
+## Dashboard
+
+The worker also serves a visual dashboard so you do not have to run SQL by hand:
+
+- `GET /` (or `/dashboard`) - the HTML dashboard. Public page, no data until a
+  token is entered.
+- `GET /v1/stats` - JSON aggregates (counts only, never raw event rows), gated
+  behind `DASHBOARD_TOKEN`. Accepts `Authorization: Bearer <token>`,
+  `?token=<token>`, or `X-Dashboard-Token`.
+- `POST /v1/event` - unchanged event ingest.
+
+The headline number is **Total users**: distinct, non-CI `telemetry_id`s that
+ever installed jcode OR did meaningful work in it. The page shows every metric
+the API returns, organized into tiers (hero / key cards / diagnostic tables) so
+the important numbers stand out while nothing is hidden. Each user tier (reached
+> total > core) is broader than the one below it, and CI / raw figures are shown
+alongside for transparency.
+
+Set the token once (it is a Worker secret, not in source):
+
+```bash
+wrangler secret put DASHBOARD_TOKEN
+# then open https://<your-worker-domain>/ and paste the token
+```
+
+If `DASHBOARD_TOKEN` is unset the stats endpoint stays locked (deny by default).
+The CLI equivalent of the headline number:
+
+```bash
+wrangler d1 execute jcode-telemetry --remote --file=users.sql
+```
+
 ## Setup
 
 1. Install wrangler: `npm install`
diff --git a/telemetry-worker/package.json b/telemetry-worker/package.json
index 72ddec2dc..693f266a1 100644
--- a/telemetry-worker/package.json
+++ b/telemetry-worker/package.json
@@ -2,6 +2,7 @@
   "name": "jcode-telemetry",
   "version": "1.0.0",
   "private": true,
+  "type": "module",
   "scripts": {
     "dev": "npx wrangler dev",
     "deploy": "npx wrangler deploy",
@@ -18,7 +19,8 @@
     "migrate:daily-active-backfill": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0011_backfill_daily_active_recent.sql",
     "migrate:daily-active-ci": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0012_daily_active_ci_flag.sql",
     "health": "npx wrangler d1 execute jcode-telemetry --remote --file=health.sql",
-    "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql"
+    "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql",
+    "users": "npx wrangler d1 execute jcode-telemetry --remote --file=users.sql"
   },
   "devDependencies": {
     "wrangler": "^4"
diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js
new file mode 100644
index 000000000..f40246119
--- /dev/null
+++ b/telemetry-worker/src/dashboard.js
@@ -0,0 +1,398 @@
+// Self-contained dashboard page (HTML + CSS + JS, no external dependencies so it
+// works under Cloudflare with no CDN/CSP issues). Charts are drawn as inline SVG.
+//
+// The page fetches /v1/stats with the dashboard token (entered once, stored in
+// localStorage) and renders tiered metrics: a hero "total users" number, the
+// active-user funnel, then secondary KPIs and diagnostic breakdowns. Every
+// metric the API returns is shown; importance is conveyed visually (hero /
+// primary cards / muted diagnostic tables) and via short "why it matters" notes.
+
+export const DASHBOARD_HTML = `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<title>jcode telemetry</title>
+<style>
+  :root {
+    --bg: #0b0e14;
+    --bg-soft: #11151f;
+    --panel: #151a26;
+    --panel-2: #1b2231;
+    --line: #232c3d;
+    --text: #e6edf6;
+    --muted: #8a97ac;
+    --muted-2: #5d6982;
+    --accent: #5b9dff;
+    --accent-2: #7c5cff;
+    --good: #3fb950;
+    --warn: #d29922;
+    --bad: #f85149;
+    --radius: 14px;
+    --shadow: 0 1px 0 rgba(255,255,255,0.03) inset, 0 10px 30px rgba(0,0,0,0.35);
+  }
+  * { box-sizing: border-box; }
+  html, body { margin: 0; padding: 0; }
+  body {
+    background: radial-gradient(1200px 600px at 80% -10%, #16203a 0%, var(--bg) 55%) fixed;
+    color: var(--text);
+    font: 14px/1.5 -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, Roboto, Helvetica, Arial, sans-serif;
+    -webkit-font-smoothing: antialiased;
+    min-height: 100vh;
+  }
+  a { color: var(--accent); text-decoration: none; }
+  .wrap { max-width: 1180px; margin: 0 auto; padding: 28px 22px 80px; }
+
+  header.top { display: flex; align-items: center; justify-content: space-between; gap: 16px; margin-bottom: 22px; flex-wrap: wrap; }
+  .brand { display: flex; align-items: center; gap: 12px; }
+  .logo { width: 34px; height: 34px; border-radius: 9px; background: linear-gradient(135deg, var(--accent), var(--accent-2)); display: grid; place-items: center; font-weight: 800; color: #fff; box-shadow: var(--shadow); }
+  .brand h1 { font-size: 17px; margin: 0; font-weight: 650; letter-spacing: .2px; }
+  .brand .sub { color: var(--muted); font-size: 12px; }
+  .top-actions { display: flex; align-items: center; gap: 10px; }
+  .pill { font-size: 12px; color: var(--muted); background: var(--panel); border: 1px solid var(--line); padding: 6px 11px; border-radius: 999px; }
+  button.btn { cursor: pointer; font: inherit; color: var(--text); background: var(--panel-2); border: 1px solid var(--line); padding: 7px 13px; border-radius: 9px; }
+  button.btn:hover { border-color: var(--accent); }
+
+  /* Hero */
+  .hero { display: grid; grid-template-columns: 1.15fr 1fr; gap: 18px; margin-bottom: 18px; }
+  @media (max-width: 860px) { .hero { grid-template-columns: 1fr; } }
+  .card { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); }
+  .hero-main { padding: 26px 28px; position: relative; overflow: hidden; }
+  .hero-main:before { content:""; position:absolute; right:-40px; top:-60px; width:240px; height:240px; background: radial-gradient(circle, rgba(91,157,255,.20), transparent 60%); }
+  .eyebrow { text-transform: uppercase; letter-spacing: 1.4px; font-size: 11px; color: var(--muted); font-weight: 600; }
+  .hero-number { font-size: 68px; font-weight: 750; line-height: 1.02; margin: 6px 0 2px; letter-spacing: -1.5px; background: linear-gradient(180deg, #fff, #b9c6dd); -webkit-background-clip: text; background-clip: text; color: transparent; }
+  .hero-note { color: var(--muted); font-size: 13px; max-width: 46ch; }
+  .hero-sub { display: flex; gap: 26px; margin-top: 18px; flex-wrap: wrap; }
+  .hero-sub .k { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: .6px; }
+  .hero-sub .v { font-size: 22px; font-weight: 650; }
+
+  .hero-side { padding: 18px 20px; display: grid; grid-template-rows: auto 1fr; }
+  .hero-side h3 { margin: 2px 0 10px; font-size: 13px; color: var(--muted); font-weight: 600; }
+
+  /* Section + grid */
+  .section-title { display: flex; align-items: baseline; gap: 10px; margin: 26px 2px 12px; }
+  .section-title h2 { font-size: 14px; margin: 0; font-weight: 650; letter-spacing: .3px; }
+  .section-title .hint { color: var(--muted-2); font-size: 12px; }
+  .grid { display: grid; gap: 14px; }
+  .g4 { grid-template-columns: repeat(4, 1fr); }
+  .g3 { grid-template-columns: repeat(3, 1fr); }
+  .g2 { grid-template-columns: repeat(2, 1fr); }
+  .hero-kpis { grid-template-columns: repeat(3, 1fr); }
+  .hero-kpis .kpi { padding: 12px 12px; }
+  .hero-kpis .num { font-size: 23px; }
+  .hero-kpis .meta { font-size: 11px; }
+  @media (max-width: 980px) { .g4 { grid-template-columns: repeat(2, 1fr); } .g3 { grid-template-columns: repeat(2, 1fr); } }
+  @media (max-width: 620px) { .g4, .g3, .g2 { grid-template-columns: 1fr; } }
+
+  .kpi { padding: 16px 17px; }
+  .kpi .label { color: var(--muted); font-size: 12px; display: flex; align-items: center; gap: 7px; }
+  .kpi .num { font-size: 28px; font-weight: 700; margin-top: 7px; letter-spacing: -.5px; }
+  .kpi .meta { color: var(--muted-2); font-size: 12px; margin-top: 3px; }
+  .dot { width: 7px; height: 7px; border-radius: 50%; display: inline-block; }
+  .dot.p1 { background: var(--accent); } .dot.p2 { background: var(--muted); } .dot.p3 { background: var(--muted-2); }
+  .tag { font-size: 10px; text-transform: uppercase; letter-spacing: .5px; padding: 2px 7px; border-radius: 6px; border: 1px solid var(--line); color: var(--muted); }
+  .tag.key { color: var(--accent); border-color: rgba(91,157,255,.35); background: rgba(91,157,255,.08); }
+
+  .panel { padding: 18px 18px 8px; }
+  .panel h3 { margin: 0 0 4px; font-size: 13px; font-weight: 650; }
+  .panel .desc { color: var(--muted-2); font-size: 12px; margin: 0 0 12px; }
+
+  table { width: 100%; border-collapse: collapse; font-size: 13px; }
+  th, td { text-align: left; padding: 7px 6px; border-bottom: 1px solid var(--line); }
+  th { color: var(--muted); font-weight: 600; font-size: 11px; text-transform: uppercase; letter-spacing: .5px; }
+  td.num, th.num { text-align: right; font-variant-numeric: tabular-nums; }
+  .bar { height: 7px; border-radius: 4px; background: linear-gradient(90deg, var(--accent), var(--accent-2)); }
+  .bar-track { background: var(--panel-2); border-radius: 4px; overflow: hidden; }
+
+  .muted { color: var(--muted); } .small { font-size: 12px; }
+  .legend { display: flex; gap: 14px; align-items: center; font-size: 12px; color: var(--muted); margin-bottom: 8px; flex-wrap: wrap; }
+  .legend i { width: 10px; height: 10px; border-radius: 3px; display: inline-block; margin-right: 5px; vertical-align: -1px; }
+
+  .feedback-item { padding: 11px 0; border-bottom: 1px solid var(--line); }
+  .feedback-item .q { color: var(--text); }
+  .feedback-item .m { color: var(--muted-2); font-size: 11px; margin-top: 3px; }
+
+  /* token gate */
+  .gate { max-width: 420px; margin: 12vh auto 0; text-align: center; }
+  .gate .card { padding: 28px 26px; }
+  .gate input { width: 100%; margin: 14px 0; padding: 11px 13px; border-radius: 10px; border: 1px solid var(--line); background: var(--bg-soft); color: var(--text); font: inherit; }
+  .gate button { width: 100%; padding: 11px; font-weight: 600; }
+  .err { color: var(--bad); font-size: 13px; min-height: 18px; }
+  .hidden { display: none !important; }
+  .foot { color: var(--muted-2); font-size: 12px; margin-top: 30px; text-align: center; }
+  .spin { display:inline-block; width:14px; height:14px; border:2px solid var(--line); border-top-color: var(--accent); border-radius:50%; animation: sp 0.8s linear infinite; vertical-align:-2px; }
+  @keyframes sp { to { transform: rotate(360deg); } }
+</style>
+</head>
+<body>
+<div class="wrap">
+  <!-- token gate -->
+  <div id="gate" class="gate hidden">
+    <div class="card">
+      <div class="logo" style="margin:0 auto 14px">jc</div>
+      <h1 style="margin:0 0 4px;font-size:18px">jcode telemetry</h1>
+      <div class="muted small">Enter the dashboard token to view stats.</div>
+      <input id="token" type="password" placeholder="dashboard token" autocomplete="off" />
+      <button class="btn" id="unlock">Unlock</button>
+      <div class="err" id="gate-err"></div>
+    </div>
+  </div>
+
+  <!-- dashboard -->
+  <div id="app" class="hidden">
+    <header class="top">
+      <div class="brand">
+        <div class="logo">jc</div>
+        <div>
+          <h1>jcode telemetry</h1>
+          <div class="sub" id="generated">live product analytics</div>
+        </div>
+      </div>
+      <div class="top-actions">
+        <span class="pill" id="freshness">—</span>
+        <button class="btn" id="refresh">Refresh</button>
+        <button class="btn" id="logout">Lock</button>
+      </div>
+    </header>
+
+    <div id="content"></div>
+
+    <div class="foot">
+      Users are distinct anonymous <code>telemetry_id</code>s. Headline numbers exclude CI runners and dev/non-release builds.
+      Raw and CI-inclusive figures are shown in the diagnostic tiers so nothing is hidden.
+    </div>
+  </div>
+</div>
+
+<script>
+const fmt = (n) => (n == null ? "—" : Number(n).toLocaleString());
+const pct = (x) => (x == null ? "—" : (x * 100).toFixed(1) + "%");
+const ms = (x) => (x == null ? "—" : x >= 1000 ? (x/1000).toFixed(1) + "s" : Math.round(x) + "ms");
+const dec = (x, d=2) => (x == null ? "—" : Number(x).toFixed(d));
+const el = (h) => { const t = document.createElement("template"); t.innerHTML = h.trim(); return t.content.firstChild; };
+const esc = (s) => String(s == null ? "" : s).replace(/[&<>"]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;"}[c]));
+
+let TOKEN = localStorage.getItem("jcode_dash_token") || "";
+
+function showGate(msg) {
+  document.getElementById("app").classList.add("hidden");
+  document.getElementById("gate").classList.remove("hidden");
+  document.getElementById("gate-err").textContent = msg || "";
+}
+function showApp() {
+  document.getElementById("gate").classList.add("hidden");
+  document.getElementById("app").classList.remove("hidden");
+}
+
+async function load() {
+  if (!TOKEN) { showGate(""); return; }
+  document.getElementById("content").innerHTML = '<div class="muted" style="padding:40px 0"><span class="spin"></span> loading…</div>';
+  showApp();
+  let res;
+  try {
+    res = await fetch("/v1/stats?token=" + encodeURIComponent(TOKEN), { headers: { "Authorization": "Bearer " + TOKEN } });
+  } catch (e) { showGate("Network error."); return; }
+  if (res.status === 401) { localStorage.removeItem("jcode_dash_token"); TOKEN = ""; showGate("Invalid token."); return; }
+  if (!res.ok) { document.getElementById("content").innerHTML = '<div class="err">Failed to load stats ('+res.status+').</div>'; return; }
+  const data = await res.json();
+  render(data);
+}
+
+function kpi(label, value, meta, tier, isKey) {
+  return \`<div class="card kpi">
+    <div class="label"><span class="dot p\${tier||1}"></span>\${esc(label)} \${isKey?'<span class="tag key">key</span>':''}</div>
+    <div class="num">\${value}</div>
+    <div class="meta">\${meta||""}</div>
+  </div>\`;
+}
+
+function barTable(title, desc, rows, keyName, valName, total) {
+  const max = Math.max(1, ...rows.map(r => r.value));
+  const body = rows.map(r => \`<tr>
+      <td>\${esc(r.label)}</td>
+      <td style="width:45%"><div class="bar-track"><div class="bar" style="width:\${Math.max(2,(r.value/max)*100)}%"></div></div></td>
+      <td class="num">\${fmt(r.value)}</td>
+    </tr>\`).join("");
+  return \`<div class="card panel">
+    <h3>\${esc(title)}</h3><p class="desc">\${esc(desc)}</p>
+    <table><thead><tr><th>\${esc(keyName)}</th><th>share</th><th class="num">\${esc(valName)}</th></tr></thead><tbody>\${body || '<tr><td class="muted" colspan="3">no data</td></tr>'}</tbody></table>
+  </div>\`;
+}
+
+function lineChart(series, opts) {
+  // series: [{name,color,points:[{date,value}]}]
+  const W = 760, H = 220, padL = 36, padR = 12, padT = 14, padB = 26;
+  const dates = series[0] ? series[0].points.map(p => p.date) : [];
+  if (!dates.length) return '<div class="muted small" style="padding:18px">no timeseries yet</div>';
+  const maxV = Math.max(1, ...series.flatMap(s => s.points.map(p => p.value)));
+  const x = (i) => padL + (i/(Math.max(1,dates.length-1)))*(W-padL-padR);
+  const y = (v) => padT + (1 - v/maxV)*(H-padT-padB);
+  const grid = [0,0.25,0.5,0.75,1].map(f => {
+    const gy = padT + f*(H-padT-padB); const val = Math.round(maxV*(1-f));
+    return \`<line x1="\${padL}" y1="\${gy}" x2="\${W-padR}" y2="\${gy}" stroke="#232c3d" stroke-width="1"/><text x="4" y="\${gy+3}" fill="#5d6982" font-size="10">\${val}</text>\`;
+  }).join("");
+  const paths = series.map(s => {
+    const d = s.points.map((p,i) => (i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');
+    return \`<path d="\${d}" fill="none" stroke="\${s.color}" stroke-width="2" stroke-linejoin="round"/>\`;
+  }).join("");
+  const lbl = (i) => \`<text x="\${x(i)}" y="\${H-8}" fill="#5d6982" font-size="10" text-anchor="middle">\${dates[i].slice(5)}</text>\`;
+  const ticks = dates.length>1 ? [0, Math.floor(dates.length/2), dates.length-1].map(lbl).join("") : "";
+  const legend = series.map(s => \`<span><i style="background:\${s.color}"></i>\${esc(s.name)}</span>\`).join("");
+  return \`<div class="legend">\${legend}</div><svg viewBox="0 0 \${W} \${H}" width="100%" preserveAspectRatio="xMidYMid meet">\${grid}\${paths}\${ticks}</svg>\`;
+}
+
+function render(d) {
+  document.getElementById("generated").textContent = "updated " + new Date(d.generated_at).toLocaleString();
+  document.getElementById("freshness").textContent = "as of " + new Date(d.generated_at).toLocaleTimeString();
+  const c = document.getElementById("content");
+  const u = d.users, a = d.active, lc = d.lifecycle, q = d.quality, ret = d.retention;
+
+  // hero + active funnel timeseries
+  const ts = (d.timeseries.daily || []);
+  const headlineSeries = [
+    { name: "headline DAU", color: "#5b9dff", points: ts.map(r => ({date:r.date, value:r.headline})) },
+    { name: "meaningful", color: "#7c5cff", points: ts.map(r => ({date:r.date, value:r.meaningful})) },
+    { name: "raw", color: "#39507a", points: ts.map(r => ({date:r.date, value:r.raw})) },
+  ];
+
+  let html = "";
+
+  // ---- HERO ----
+  html += \`<div class="hero">
+    <div class="card hero-main">
+      <div class="eyebrow">Total users</div>
+      <div class="hero-number">\${fmt(u.total_users)}</div>
+      <div class="hero-note">Distinct real people who installed or did meaningful work in jcode. Excludes CI runners and counts each anonymous machine id once. This is the headline number.</div>
+      <div class="hero-sub">
+        <div><div class="k">Core (did work)</div><div class="v">\${fmt(u.core_users)}</div></div>
+        <div><div class="k">Installed</div><div class="v">\${fmt(u.installed_users)}</div></div>
+        <div><div class="k">Reached (ran it)</div><div class="v">\${fmt(u.reached_users)}</div></div>
+      </div>
+    </div>
+    <div class="card hero-side">
+      <h3>Active users (distinct, headline definition)</h3>
+      <div class="grid hero-kpis" style="gap:10px">
+        \${kpi("DAU", fmt(a.dau), "today, meaningful + release", 1, true)}
+        \${kpi("WAU", fmt(a.wau), "last 7 days", 1, true)}
+        \${kpi("MAU", fmt(a.mau), "last 30 days", 1, true)}
+      </div>
+      <div style="margin-top:12px">\${lineChart(headlineSeries, {})}</div>
+    </div>
+  </div>\`;
+
+  // ---- Why these differ (transparency band) ----
+  html += \`<div class="section-title"><h2>How the user number is built</h2><span class="hint">each tier is broader than the one below it; nothing is dropped</span></div>\`;
+  html += \`<div class="grid g4">
+    \${kpi("Reached", fmt(u.reached_users), "ran jcode at least once (non-CI)", 2)}
+    \${kpi("Total users", fmt(u.total_users), "installed OR did meaningful work", 1, true)}
+    \${kpi("Core users", fmt(u.core_users), "did meaningful work", 2)}
+    \${kpi("CI ids (excluded)", fmt(u.ci_ids), "ephemeral runners, filtered out", 3)}
+  </div>
+  <div class="grid g2" style="margin-top:14px">
+    \${kpi("All ids incl. CI + dev", fmt(u.all_ids_including_ci), "raw upper bound, never used as headline", 3)}
+    \${kpi("Installed users", fmt(u.installed_users), "distinct non-CI install events", 2)}
+  </div>\`;
+
+  // ---- Acquisition & retention ----
+  html += \`<div class="section-title"><h2>Acquisition &amp; retention</h2><span class="hint">important: are new users sticking?</span></div>\`;
+  html += \`<div class="grid g4">
+    \${kpi("Install events", fmt(lc.install_events), fmt(lc.install_ids_noci)+" distinct (non-CI)", 2)}
+    \${kpi("Upgrades", fmt(lc.upgrade_events), "version bumps observed", 3)}
+    \${kpi("D7 retention", pct(ret.d7_retention), (ret.d7_retained||0)+" of "+(ret.d7_cohort||0)+" returned", 1, true)}
+    \${kpi("Multi-session rate", pct(q.multi_session_rate), "users running >1 session at once", 3)}
+  </div>\`;
+  html += \`<div class="grid g2" style="margin-top:14px">
+    <div class="card panel"><h3>Daily active users (60d)</h3><p class="desc">headline = meaningful work on a release build, excluding CI. raw = anyone who launched.</p>\${lineChart(headlineSeries, {})}</div>
+    <div class="card panel"><h3>New installs / day (60d, non-CI)</h3><p class="desc">distinct ids whose first install event landed that day.</p>\${lineChart([{name:"installs",color:"#3fb950",points:(d.timeseries.installs||[]).map(r=>({date:r.date,value:r.installs}))}], {})}</div>
+  </div>\`;
+
+  // ---- Engagement quality ----
+  html += \`<div class="section-title"><h2>Engagement quality</h2><span class="hint">30-day, non-CI sessions</span></div>\`;
+  html += \`<div class="grid g4">
+    \${kpi("Avg session length", dec(q.avg_session_mins,1)+" min", "per meaningful session", 2)}
+    \${kpi("Avg turns / session", dec(q.avg_turns,1), "user prompts per session", 2)}
+    \${kpi("Session success rate", pct(q.success_rate), "ended in a successful state", 1, true)}
+    \${kpi("Abandon rate", pct(q.abandon_rate), "left before first response", 2)}
+  </div>
+  <div class="grid g4" style="margin-top:14px">
+    \${kpi("Turn success rate", pct(d.turns.turn_success_rate), "per-turn, 30d", 2)}
+    \${kpi("Avg turn time", ms(d.turns.avg_turn_ms), "active duration per turn", 3)}
+    \${kpi("Time to first response", ms(q.avg_first_response_ms), "agent responsiveness", 2)}
+    \${kpi("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call", 3)}
+  </div>
+  <div class="grid g2" style="margin-top:14px">
+    \${kpi("Tokens (30d)", fmt(q.tokens_30d), "input + output across sessions", 3)}
+    \${kpi("Crash rate", pct(lc.crash_rate)+"  ·  completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "session_crash share / (ends+crashes)/starts", 1, true)}
+  </div>\`;
+
+  // ---- Reliability / errors ----
+  const e = d.errors;
+  html += \`<div class="section-title"><h2>Reliability</h2><span class="hint">error counts, 30d non-CI — watch for spikes</span></div>\`;
+  html += \`<div class="grid g4">
+    \${kpi("Provider timeouts", fmt(e.provider_timeout), "", (e.provider_timeout>0?1:3))}
+    \${kpi("Rate limited", fmt(e.rate_limited), "", (e.rate_limited>0?2:3))}
+    \${kpi("Auth failures", fmt(e.auth_failed), "", (e.auth_failed>0?1:3))}
+    \${kpi("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp", 3)}
+  </div>\`;
+
+  // ---- Breakdowns ----
+  const b = d.breakdowns;
+  const rows = (arr, k) => (arr||[]).map(r => ({ label: r[k] ?? "unknown", value: r.users }));
+  html += \`<div class="section-title"><h2>Who &amp; what</h2><span class="hint">distinct users per bucket</span></div>\`;
+  html += \`<div class="grid g2">
+    \${barTable("Versions", "adoption by release (non-CI users)", rows(b.versions,"version"), "version", "users")}
+    \${barTable("Operating system", "OS split", rows(b.os,"os"), "os", "users")}
+  </div>
+  <div class="grid g2" style="margin-top:14px">
+    \${barTable("Providers", "meaningful sessions by provider", rows(b.providers,"provider"), "provider", "users")}
+    \${barTable("Auth method", "successful auth by provider", rows(b.auth,"auth_provider"), "provider", "users")}
+  </div>
+  <div class="grid g2" style="margin-top:14px">
+    \${barTable("Build channel", "incl. dev/local; release is the headline channel", rows(b.channels,"build_channel"), "channel", "users")}
+    \${barTable("Onboarding funnel", "distinct users reaching each step", rows(b.onboarding,"step"), "step", "users")}
+  </div>\`;
+
+  // ---- Feature adoption ----
+  const f = d.features;
+  const featRows = Object.entries(f||{}).map(([k,v]) => ({label:k.replace(/_/g,' '), value:v})).sort((a,b)=>b.value-a.value);
+  html += \`<div class="section-title"><h2>Feature adoption</h2><span class="hint">distinct users using each feature, 30d</span></div>\`;
+  html += \`<div class="grid g2">
+    \${barTable("Features", "how many users touched each capability", featRows, "feature", "users")}
+    \${transportPanel(d.transport)}
+  </div>\`;
+
+  // ---- Feedback ----
+  if ((d.feedback||[]).length) {
+    html += \`<div class="section-title"><h2>Recent feedback</h2><span class="hint">explicit user submissions</span></div>\`;
+    html += \`<div class="card panel">\` + d.feedback.map(fb => \`
+      <div class="feedback-item">
+        <div class="q">\${esc(fb.feedback_text)}</div>
+        <div class="m">\${esc(new Date(fb.created_at+'Z').toLocaleString())} · v\${esc(fb.version||'?')}\${fb.feedback_rating?' · '+esc(fb.feedback_rating):''}\${fb.feedback_reason?' · '+esc(fb.feedback_reason):''}</div>
+      </div>\`).join("") + \`</div>\`;
+  }
+
+  c.innerHTML = html;
+}
+
+function transportPanel(t) {
+  const rows = [
+    ["https", t.https], ["ws fresh", t.ws_fresh], ["ws reuse", t.ws_reuse],
+    ["cli subprocess", t.cli], ["native http2", t.native_http2], ["other", t.other],
+  ].map(([label,value]) => ({label, value: value||0})).sort((a,b)=>b.value-a.value);
+  return barTable("Transport mix", "request transport counts (30d non-CI)", rows, "transport", "count");
+}
+
+// events
+document.getElementById("unlock").addEventListener("click", () => {
+  const v = document.getElementById("token").value.trim();
+  if (!v) { document.getElementById("gate-err").textContent = "Enter a token."; return; }
+  TOKEN = v; localStorage.setItem("jcode_dash_token", v); load();
+});
+document.getElementById("token") && document.getElementById("token").addEventListener("keydown", (e)=>{ if(e.key==="Enter") document.getElementById("unlock").click(); });
+document.getElementById("refresh").addEventListener("click", load);
+document.getElementById("logout").addEventListener("click", () => { localStorage.removeItem("jcode_dash_token"); TOKEN=""; showGate(""); });
+
+load();
+</script>
+</body>
+</html>`;
diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js
new file mode 100644
index 000000000..9ee9d0881
--- /dev/null
+++ b/telemetry-worker/src/stats.js
@@ -0,0 +1,274 @@
+// Read-only telemetry aggregation for the dashboard.
+//
+// Everything here returns counts/aggregates only, never raw event rows. Metrics
+// are organized into tiers (headline / secondary / diagnostic) and tagged with
+// importance so the dashboard can present "the one number" prominently while
+// still surfacing all available information.
+//
+// Accuracy rules (mirrors README "Accuracy notes"):
+//   - Users are distinct telemetry_id, never event counts.
+//   - "meaningful" = real work; see MEANINGFUL_SQL.
+//   - Headline numbers exclude CI traffic (is_ci = 1) and non-release channels.
+//   - Raw / less-filtered tiers are always reported alongside, never removed.
+
+// Meaningful-activity predicate, shared by every query so all windows agree.
+// A row is meaningful if it is a session_end/session_crash that did real work,
+// OR a turn_end (which only fires after a completed user turn) that did work.
+const MEANINGFUL_SQL = `(
+  (event IN ('session_end','session_crash') AND (
+    turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0
+    OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+    OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0
+    OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0
+    OR provider_switches > 0 OR model_switches > 0
+  ))
+  OR (event = 'turn_end' AND (
+    assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+    OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0
+  ))
+)`;
+
+const LIFECYCLE_EVENTS = "('session_start','turn_end','session_end','session_crash')";
+
+async function one(env, sql) {
+  const result = await env.DB.prepare(sql).all();
+  return (result.results && result.results[0]) || {};
+}
+
+async function many(env, sql) {
+  const result = await env.DB.prepare(sql).all();
+  return result.results || [];
+}
+
+export async function getStats(env) {
+  // --- Headline: total users (the one number) -----------------------------
+  // A user is a distinct non-CI id that ever installed OR did meaningful work.
+  const totals = await one(env, `
+    SELECT
+      COUNT(DISTINCT CASE WHEN is_ci = 0 AND (event = 'install' OR ${MEANINGFUL_SQL}) THEN telemetry_id END) AS total_users,
+      COUNT(DISTINCT CASE WHEN is_ci = 0 AND ${MEANINGFUL_SQL} THEN telemetry_id END) AS core_users,
+      COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users,
+      COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users,
+      COUNT(DISTINCT telemetry_id) AS all_ids_including_ci,
+      COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids
+    FROM events
+  `);
+
+  // --- Active users from the rollup (cheap, ingest-time) -------------------
+  // DAU/WAU/MAU as distinct ids, headline = meaningful + release + non-CI.
+  const active = await one(env, `
+    SELECT
+      COUNT(DISTINCT CASE WHEN activity_date = date('now') THEN telemetry_id END) AS dau_raw,
+      COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_active > 0 THEN telemetry_id END) AS dau_meaningful,
+      COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS dau,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') THEN telemetry_id END) AS wau_raw,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_active > 0 THEN telemetry_id END) AS wau_meaningful,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS wau,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') THEN telemetry_id END) AS mau_raw,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_active > 0 THEN telemetry_id END) AS mau_meaningful,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS mau,
+      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND last_is_ci = 1 THEN telemetry_id END) AS ci_mau
+    FROM daily_active_users
+  `);
+
+  // --- Installs and lifecycle totals --------------------------------------
+  const lifecycle = await one(env, `
+    SELECT
+      SUM(CASE WHEN event = 'install' THEN 1 ELSE 0 END) AS install_events,
+      SUM(CASE WHEN event = 'upgrade' THEN 1 ELSE 0 END) AS upgrade_events,
+      SUM(CASE WHEN event = 'session_start' THEN 1 ELSE 0 END) AS session_starts,
+      SUM(CASE WHEN event = 'session_end' THEN 1 ELSE 0 END) AS session_ends,
+      SUM(CASE WHEN event = 'session_crash' THEN 1 ELSE 0 END) AS session_crashes,
+      SUM(CASE WHEN event = 'turn_end' THEN 1 ELSE 0 END) AS turn_ends,
+      COUNT(DISTINCT CASE WHEN event = 'install' THEN telemetry_id END) AS install_ids,
+      COUNT(DISTINCT CASE WHEN event = 'install' AND is_ci = 0 THEN telemetry_id END) AS install_ids_noci
+    FROM events
+    WHERE event IN ('install','upgrade','session_start','turn_end','session_end','session_crash')
+  `);
+  const lifecycleCompletion =
+    (lifecycle.session_starts || 0) > 0
+      ? Number(((lifecycle.session_ends + lifecycle.session_crashes) / lifecycle.session_starts).toFixed(3))
+      : null;
+  const crashRate =
+    (lifecycle.session_ends + lifecycle.session_crashes) > 0
+      ? Number((lifecycle.session_crashes / (lifecycle.session_ends + lifecycle.session_crashes)).toFixed(4))
+      : null;
+
+  // --- New vs returning (last 30d), retention -----------------------------
+  const retention = await one(env, `
+    WITH cohort AS (
+      SELECT DISTINCT telemetry_id FROM events
+      WHERE event = 'install' AND is_ci = 0
+        AND created_at >= datetime('now','-14 days') AND created_at < datetime('now','-7 days')
+    ), retained AS (
+      SELECT DISTINCT telemetry_id FROM events
+      WHERE event IN ('session_end','session_crash') AND is_ci = 0
+        AND created_at >= datetime('now','-7 days')
+    )
+    SELECT
+      (SELECT COUNT(*) FROM cohort) AS d7_cohort,
+      (SELECT COUNT(*) FROM cohort WHERE telemetry_id IN retained) AS d7_retained
+  `);
+  const d7Retention =
+    (retention.d7_cohort || 0) > 0
+      ? Number((retention.d7_retained / retention.d7_cohort).toFixed(3))
+      : null;
+
+  // --- 30d engagement quality ---------------------------------------------
+  const quality = await one(env, `
+    SELECT
+      AVG(duration_mins) AS avg_session_mins,
+      AVG(turns) AS avg_turns,
+      AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate,
+      AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate,
+      AVG(first_assistant_response_ms) AS avg_first_response_ms,
+      AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms,
+      SUM(input_tokens + output_tokens) AS tokens_30d,
+      AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate
+    FROM events
+    WHERE event IN ('session_end','session_crash')
+      AND is_ci = 0 AND created_at > datetime('now','-30 days')
+  `);
+
+  // --- Per-turn metrics (30d) ---------------------------------------------
+  const turns = await one(env, `
+    SELECT
+      AVG(turn_active_duration_ms) AS avg_turn_ms,
+      AVG(CASE WHEN turn_success > 0 THEN 1.0 ELSE 0.0 END) AS turn_success_rate
+    FROM events
+    WHERE event = 'turn_end' AND is_ci = 0 AND created_at > datetime('now','-30 days')
+  `);
+
+  // --- Errors (30d) --------------------------------------------------------
+  const errors = await one(env, `
+    SELECT
+      SUM(error_provider_timeout) AS provider_timeout,
+      SUM(error_auth_failed) AS auth_failed,
+      SUM(error_tool_error) AS tool_error,
+      SUM(error_mcp_error) AS mcp_error,
+      SUM(error_rate_limited) AS rate_limited
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+      AND created_at > datetime('now','-30 days')
+  `);
+
+  // --- Feature adoption (30d, distinct users) -----------------------------
+  const features = await one(env, `
+    SELECT
+      COUNT(DISTINCT CASE WHEN feature_memory_used > 0 THEN telemetry_id END) AS memory,
+      COUNT(DISTINCT CASE WHEN feature_swarm_used > 0 THEN telemetry_id END) AS swarm,
+      COUNT(DISTINCT CASE WHEN feature_web_used > 0 THEN telemetry_id END) AS web,
+      COUNT(DISTINCT CASE WHEN feature_email_used > 0 THEN telemetry_id END) AS email,
+      COUNT(DISTINCT CASE WHEN feature_mcp_used > 0 THEN telemetry_id END) AS mcp,
+      COUNT(DISTINCT CASE WHEN feature_side_panel_used > 0 THEN telemetry_id END) AS side_panel,
+      COUNT(DISTINCT CASE WHEN feature_goal_used > 0 THEN telemetry_id END) AS goal,
+      COUNT(DISTINCT CASE WHEN feature_selfdev_used > 0 THEN telemetry_id END) AS selfdev,
+      COUNT(DISTINCT CASE WHEN feature_background_used > 0 THEN telemetry_id END) AS background,
+      COUNT(DISTINCT CASE WHEN feature_subagent_used > 0 THEN telemetry_id END) AS subagent
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+      AND created_at > datetime('now','-30 days')
+  `);
+
+  // --- Transport mix (30d) -------------------------------------------------
+  const transport = await one(env, `
+    SELECT
+      SUM(transport_https) AS https,
+      SUM(transport_persistent_ws_fresh) AS ws_fresh,
+      SUM(transport_persistent_ws_reuse) AS ws_reuse,
+      SUM(transport_cli_subprocess) AS cli,
+      SUM(transport_native_http2) AS native_http2,
+      SUM(transport_other) AS other
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+      AND created_at > datetime('now','-30 days')
+  `);
+
+  // --- Breakdowns (distinct users) ----------------------------------------
+  const versions = await many(env, `
+    SELECT version, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE is_ci = 0 AND version IS NOT NULL
+    GROUP BY version ORDER BY users DESC LIMIT 12
+  `);
+  const os = await many(env, `
+    SELECT os, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE is_ci = 0 AND os IS NOT NULL
+    GROUP BY os ORDER BY users DESC
+  `);
+  const channels = await many(env, `
+    SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE event IN ('session_end','session_crash')
+    GROUP BY build_channel ORDER BY users DESC
+  `);
+  const providers = await many(env, `
+    SELECT COALESCE(provider_end,'unknown') AS provider, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE event IN ('session_end','session_crash') AND is_ci = 0 AND ${MEANINGFUL_SQL}
+    GROUP BY provider_end ORDER BY users DESC LIMIT 12
+  `);
+  const auth = await many(env, `
+    SELECT COALESCE(auth_provider,'unknown') AS auth_provider, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE event = 'auth_success' AND is_ci = 0
+    GROUP BY auth_provider ORDER BY users DESC LIMIT 12
+  `);
+  const onboarding = await many(env, `
+    SELECT step, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE event = 'onboarding_step' AND is_ci = 0 AND step IS NOT NULL
+    GROUP BY step ORDER BY users DESC
+  `);
+
+  // --- Daily timeseries (last 60 days) for charts -------------------------
+  const daily = await many(env, `
+    SELECT
+      activity_date AS date,
+      COUNT(DISTINCT telemetry_id) AS raw,
+      COUNT(DISTINCT CASE WHEN meaningful_active > 0 THEN telemetry_id END) AS meaningful,
+      COUNT(DISTINCT CASE WHEN meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS headline,
+      COUNT(DISTINCT CASE WHEN last_is_ci = 1 THEN telemetry_id END) AS ci
+    FROM daily_active_users
+    WHERE activity_date > date('now','-60 days')
+    GROUP BY activity_date ORDER BY activity_date
+  `);
+  const dailyInstalls = await many(env, `
+    SELECT date(created_at) AS date, COUNT(DISTINCT telemetry_id) AS installs
+    FROM events
+    WHERE event = 'install' AND is_ci = 0 AND created_at > datetime('now','-60 days')
+    GROUP BY date(created_at) ORDER BY date(created_at)
+  `);
+
+  // --- Recent feedback (text only, no identifiers) ------------------------
+  const feedback = await many(env, `
+    SELECT created_at, feedback_text, feedback_rating, feedback_reason, version
+    FROM events
+    WHERE event = 'feedback' AND feedback_text IS NOT NULL
+    ORDER BY created_at DESC LIMIT 25
+  `);
+
+  return {
+    generated_at: new Date().toISOString(),
+    headline: {
+      total_users: totals.total_users || 0,
+      dau: active.dau || 0,
+      wau: active.wau || 0,
+      mau: active.mau || 0,
+    },
+    users: {
+      total_users: totals.total_users || 0,
+      core_users: totals.core_users || 0,
+      installed_users: totals.installed_users || 0,
+      reached_users: totals.reached_users || 0,
+      all_ids_including_ci: totals.all_ids_including_ci || 0,
+      ci_ids: totals.ci_ids || 0,
+    },
+    active,
+    lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate },
+    retention: { ...retention, d7_retention: d7Retention },
+    quality,
+    turns,
+    errors,
+    features,
+    transport,
+    breakdowns: { versions, os, channels, providers, auth, onboarding },
+    timeseries: { daily, installs: dailyInstalls },
+    feedback,
+  };
+}
diff --git a/telemetry-worker/src/worker.js b/telemetry-worker/src/worker.js
index ecd45ae7e..b14ae2dd6 100644
--- a/telemetry-worker/src/worker.js
+++ b/telemetry-worker/src/worker.js
@@ -1,3 +1,6 @@
+import { getStats } from "./stats.js";
+import { DASHBOARD_HTML } from "./dashboard.js";
+
 let cachedEventColumns = null;
 let cachedSessionDetailColumns = null;
 let cachedTurnDetailColumns = null;
@@ -10,11 +13,33 @@ export default {
       });
     }
 
+    const url = new URL(request.url);
+
+    // Read-only dashboard surface (GET). The HTML page is public; the JSON stats
+    // endpoint is gated behind DASHBOARD_TOKEN so raw aggregates are not exposed
+    // to anyone who finds the URL. Raw events are never returned, only counts.
+    if (request.method === "GET") {
+      if (url.pathname === "/" || url.pathname === "/dashboard") {
+        return htmlResponse(DASHBOARD_HTML);
+      }
+      if (url.pathname === "/v1/stats") {
+        if (!isAuthorized(request, env)) {
+          return jsonResponse({ error: "Unauthorized" }, 401);
+        }
+        try {
+          const stats = await getStats(env);
+          return jsonResponse(stats);
+        } catch (err) {
+          return jsonResponse({ error: "Internal error", detail: String(err?.message || err) }, 500);
+        }
+      }
+      return jsonResponse({ error: "Not found" }, 404);
+    }
+
     if (request.method !== "POST") {
       return jsonResponse({ error: "Method not allowed" }, 405);
     }
 
-    const url = new URL(request.url);
     if (url.pathname !== "/v1/event") {
       return jsonResponse({ error: "Not found" }, 404);
     }
@@ -54,6 +79,21 @@ export default {
   },
 };
 
+// When DASHBOARD_TOKEN is unset the stats endpoint stays locked (deny by
+// default) rather than leaking aggregates. Accepts either a Bearer header or a
+// ?token= query param so it works from curl and the browser fetch alike.
+function isAuthorized(request, env) {
+  const expected = env.DASHBOARD_TOKEN;
+  if (!expected) {
+    return false;
+  }
+  const url = new URL(request.url);
+  const header = request.headers.get("authorization") || "";
+  const bearer = header.startsWith("Bearer ") ? header.slice(7) : null;
+  const provided = bearer || url.searchParams.get("token") || request.headers.get("x-dashboard-token");
+  return provided != null && provided === expected;
+}
+
 async function insertEvent(env, body) {
   const columns = await getEventColumns(env);
   const sessionDetailColumns = await getSessionDetailColumns(env);
@@ -593,10 +633,21 @@ function jsonResponse(data, status = 200) {
   });
 }
 
+function htmlResponse(html, status = 200) {
+  return new Response(html, {
+    status,
+    headers: {
+      "Content-Type": "text/html; charset=utf-8",
+      "Cache-Control": "no-store",
+      ...corsHeaders(),
+    },
+  });
+}
+
 function corsHeaders() {
   return {
     "Access-Control-Allow-Origin": "*",
-    "Access-Control-Allow-Methods": "POST, OPTIONS",
-    "Access-Control-Allow-Headers": "Content-Type",
+    "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
+    "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Dashboard-Token",
   };
 }
diff --git a/telemetry-worker/users.sql b/telemetry-worker/users.sql
new file mode 100644
index 000000000..53a0e8601
--- /dev/null
+++ b/telemetry-worker/users.sql
@@ -0,0 +1,61 @@
+-- Canonical "total users" definitions for jcode telemetry.
+-- Usage:
+--   wrangler d1 execute jcode-telemetry --remote --file=users.sql
+--
+-- Headline number: total_users. A "user" is a distinct, non-CI telemetry_id that
+-- ever either installed jcode or did meaningful work in it. We exclude CI traffic
+-- (ephemeral runners mint a fresh id per job) and exclude empty open/close
+-- sessions that never did anything. Raw, less-filtered tiers are reported
+-- alongside it so no signal is hidden.
+--
+-- Caveats (see README "Accuracy notes"): telemetry_id is per-machine, so one
+-- person on N machines counts as N; opt-outs and network-blocked clients are
+-- never counted; CI rows created before the is_ci column existed default to 0
+-- and may slip in.
+
+SELECT
+    -- HEADLINE: real people who installed or meaningfully used jcode.
+    COUNT(DISTINCT CASE WHEN is_ci = 0 AND (
+            event = 'install'
+            OR (event IN ('session_end', 'session_crash') AND (
+                turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0
+                OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+                OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0
+                OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0
+                OR provider_switches > 0 OR model_switches > 0
+            ))
+            OR (event = 'turn_end' AND (
+                assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+                OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0
+            ))
+        ) THEN telemetry_id END) AS total_users,
+
+    -- Core users: did meaningful work (excludes install-only, never-used ids).
+    COUNT(DISTINCT CASE WHEN is_ci = 0 AND (
+            (event IN ('session_end', 'session_crash') AND (
+                turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0
+                OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+                OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0
+                OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0
+                OR provider_switches > 0 OR model_switches > 0
+            ))
+            OR (event = 'turn_end' AND (
+                assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
+                OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0
+            ))
+        ) THEN telemetry_id END) AS core_users,
+
+    -- Reach: every distinct non-CI id that ever launched jcode (incl. empty
+    -- open/close sessions). Upper bound on "people who ran it at least once".
+    COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users,
+
+    -- Installs only (non-CI), for comparison with total_users.
+    COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users,
+
+    -- Unfiltered grand total (includes CI + dev). Never use as the headline;
+    -- kept for transparency and for sizing CI noise.
+    COUNT(DISTINCT telemetry_id) AS all_ids_including_ci,
+
+    -- CI-only ids, so the gap between all_ids and total_users is explainable.
+    COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids
+FROM events;

From 14c73b622b0b2d1b94776d682b027a4a30625c99 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:26:58 -0700
Subject: [PATCH 03/57] test(provider-doctor): cover multi-call
 thought_signature replay

The native tool smoke only ever drove a single tool-call round-trip, so it
always replayed exactly one thought_signature and passed even when an earlier
function call would drop its signature. The Antigravity/Cloud Code backend
validates *every* functionCall in the replayed history, so the field 400
("Function call is missing a thought_signature ... position N") only
reproduces with a multi-call transcript.

- Extend run_live_native_provider_tool_smoke into two phases: the historical
  single round-trip (gating) plus a best-effort multi-call replay that rebuilds
  a history of two assistant tool_use blocks, each carrying its own signature.
- Delegate run_live_antigravity_native_tool_smoke to the shared probe so
  Antigravity (the runtime that hit this) gets the multi-call coverage too.
- Add an always-on unit guard (build_contents_replays_every_signature_across_
  multi_tool_history) so the serialization regression is caught for free,
  without spending live tokens.
---
 .../src/auth/live_provider_probes.rs          | 344 +++++++++---------
 .../jcode-base/src/provider/gemini_tests.rs   |  49 +++
 2 files changed, 221 insertions(+), 172 deletions(-)

diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs
index 1da06404b..2c9b4bfb4 100644
--- a/crates/jcode-base/src/auth/live_provider_probes.rs
+++ b/crates/jcode-base/src/auth/live_provider_probes.rs
@@ -1145,143 +1145,21 @@ pub async fn run_live_antigravity_native_stream_smoke(
 
 /// Stage: tool-call parse + execution loop + result follow-up.
 ///
-/// Full two-turn round-trip: ask the model to call a tool (assert a parseable
-/// tool_use), then feed a synthetic tool_result back (assert the model consumes
-/// it). Gemini-3 attaches a `thought_signature` to its function call that the
-/// Cloud Code backend requires replayed on the follow-up turn, so we carry it
-/// onto the assistant tool_use block. Evidence for the `tool_call_parse`,
-/// `tool_execution_loop`, `tool_result_followup`, and `real_jcode_tool_smoke`
-/// checkpoints.
+/// Delegates to the shared native tool smoke ([`run_live_native_provider_tool_smoke`])
+/// so Antigravity exercises the same two phases as every other native runtime:
+/// a single round-trip plus a **multi-call signature replay** that rebuilds a
+/// history of two assistant `tool_use` blocks. Gemini-3 attaches a
+/// `thought_signature` to each function call that the Cloud Code backend
+/// requires replayed on later turns; the multi-call phase is what actually
+/// reproduces the `400 ... "Function call is missing a thought_signature ...
+/// position N"` field failure (a single round-trip cannot). Evidence for the
+/// `tool_call_parse`, `tool_execution_loop`, `tool_result_followup`, and
+/// `real_jcode_tool_smoke` checkpoints.
 pub async fn run_live_antigravity_native_tool_smoke(
     model: &str,
 ) -> anyhow::Result<crate::live_tests::LiveVerificationStage> {
-    let started = std::time::Instant::now();
     let provider = build_native_antigravity_provider(model)?;
-
-    let tool_name = "read";
-    let tools = vec![ToolDefinition {
-        name: tool_name.to_string(),
-        description: "Reads a file from the local filesystem.".to_string(),
-        input_schema: serde_json::json!({
-            "type": "object",
-            "properties": {"file_path": {"type": "string"}},
-            "required": ["file_path"],
-            "additionalProperties": false
-        }),
-    }];
-    let system = "You are a live provider tool smoke test. When asked to read a file, you MUST \
-                  call the read tool with the given path. Do not answer in text first.";
-
-    let first_turn = vec![Message {
-        role: Role::User,
-        content: vec![ContentBlock::Text {
-            text: "Read the file at /tmp/auth_tool_probe.txt using the read tool. \
-                   Call the tool now; do not answer in text."
-                .to_string(),
-            cache_control: None,
-        }],
-        timestamp: None,
-        tool_duration_ms: None,
-    }];
-
-    let first = consume_native_stream(
-        &provider,
-        &first_turn,
-        &tools,
-        system,
-        std::time::Duration::from_secs(120),
-    )
-    .await?;
-
-    ensure!(
-        !first.tool_calls.is_empty(),
-        "native Antigravity tool smoke produced no tool call (stop_reason={:?}, text={:?})",
-        first.stop_reason,
-        crate::util::truncate_str(first.text.trim(), 200)
-    );
-    let tool_call = first.tool_calls[0].clone();
-    ensure!(
-        tool_call.name == tool_name,
-        "native Antigravity tool smoke called unexpected tool {:?} (expected {tool_name})",
-        tool_call.name
-    );
-    let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object(
-        if tool_call.input_json.trim().is_empty() {
-            "{}"
-        } else {
-            tool_call.input_json.trim()
-        },
-    );
-    ensure!(
-        parsed_arguments.is_object(),
-        "native Antigravity tool smoke produced non-object tool arguments: {:?}",
-        tool_call.input_json
-    );
-
-    // Second turn: replay the assistant's tool_use (carrying the Gemini-3
-    // thought signature, required by the Cloud Code backend) and answer it with
-    // a synthetic tool_result, then assert the model consumes the result.
-    let mut followup = first_turn.clone();
-    followup.push(Message {
-        role: Role::Assistant,
-        content: vec![ContentBlock::ToolUse {
-            id: tool_call.id.clone(),
-            name: tool_call.name.clone(),
-            input: parsed_arguments.clone(),
-            thought_signature: tool_call.thought_signature.clone(),
-        }],
-        timestamp: None,
-        tool_duration_ms: None,
-    });
-    followup.push(Message {
-        role: Role::User,
-        content: vec![ContentBlock::ToolResult {
-            tool_use_id: tool_call.id.clone(),
-            content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it."
-                .to_string(),
-            is_error: Some(false),
-        }],
-        timestamp: None,
-        tool_duration_ms: None,
-    });
-
-    let second = consume_native_stream(
-        &provider,
-        &followup,
-        &tools,
-        system,
-        std::time::Duration::from_secs(120),
-    )
-    .await?;
-
-    ensure!(
-        second.saw_message_end,
-        "native Antigravity tool follow-up ended without a message_end event"
-    );
-    ensure!(
-        second.text.contains("42"),
-        "native Antigravity tool follow-up did not reflect the tool result token: {:?}",
-        crate::util::truncate_str(second.text.trim(), 200)
-    );
-
-    let total_input = first.input_tokens + second.input_tokens;
-    let total_output = first.output_tokens + second.output_tokens;
-    let mut stage = crate::live_tests::LiveVerificationStage::passed(
-        crate::live_tests::checkpoints::TOOL_CALL_PARSE,
-    )
-    .with_duration_ms(started.elapsed().as_millis() as u64)
-    .with_evidence("model", serde_json::json!(model))
-    .with_evidence("tool_name", serde_json::json!(tool_call.name))
-    .with_evidence("tool_arguments", parsed_arguments)
-    .with_evidence(
-        "thought_signature_present",
-        serde_json::json!(tool_call.thought_signature.is_some()),
-    )
-    .with_evidence("followup_consumed_result", serde_json::json!(true));
-    if total_input != 0 || total_output != 0 {
-        stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0));
-    }
-    Ok(stage)
+    run_live_native_provider_tool_smoke(&provider, model, "Antigravity").await
 }
 
 // === Generic native-runtime probes ========================================
@@ -1442,11 +1320,24 @@ pub async fn run_live_native_provider_stream_smoke(
 /// Stage: tool-call parse + execution loop + result follow-up against an
 /// arbitrary native provider.
 ///
-/// Full two-turn round-trip: ask the model to call a tool (assert a parseable
-/// tool_use), then feed a synthetic tool_result back (assert the model consumes
-/// it). Any provider-emitted `thought_signature` (e.g. Gemini-3 via the Cloud
-/// Code backend) is carried onto the replayed assistant tool_use block, since
-/// some backends reject a follow-up turn that omits it.
+/// Two phases:
+///
+/// 1. **Single round-trip (gating):** ask the model to call a tool (assert a
+///    parseable tool_use), then feed a synthetic tool_result back (assert the
+///    model consumes it). This mirrors the historical assertion so providers
+///    that already passed keep passing.
+/// 2. **Multi-call signature replay (best-effort):** chain a *second* tool call
+///    and replay a history that now contains **two** assistant `tool_use`
+///    blocks, each carrying its own provider-emitted `thought_signature`. The
+///    Antigravity/Cloud Code backend validates every `functionCall` in the
+///    replayed history (not just the latest), so a transcript that drops an
+///    earlier signature is rejected with `400 ... "Function call is missing a
+///    thought_signature ... position N"`. A single round-trip can never
+///    reproduce that, so we exercise the multi-call shape here. If the model
+///    declines the second tool call (common for providers that do not emit
+///    signatures at all), the phase records `multi_tool_replay: "skipped"`
+///    rather than failing, so it never turns a previously-green provider red
+///    for a non-signature reason.
 pub async fn run_live_native_provider_tool_smoke(
     provider: &dyn Provider,
     model: &str,
@@ -1501,49 +1392,27 @@ pub async fn run_live_native_provider_tool_smoke(
         "native {label} tool smoke called unexpected tool {:?} (expected {tool_name})",
         tool_call.name
     );
-    let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object(
-        if tool_call.input_json.trim().is_empty() {
-            "{}"
-        } else {
-            tool_call.input_json.trim()
-        },
-    );
+    let parsed_arguments = parse_tool_arguments(&tool_call.input_json);
     ensure!(
         parsed_arguments.is_object(),
         "native {label} tool smoke produced non-object tool arguments: {:?}",
         tool_call.input_json
     );
 
-    // Second turn: replay the assistant's tool_use (carrying any thought
+    // Phase 1 (gating): replay the assistant's tool_use (carrying any thought
     // signature the backend requires) and answer it with a synthetic
     // tool_result, then assert the model consumes the result.
-    let mut followup = first_turn.clone();
-    followup.push(Message {
-        role: Role::Assistant,
-        content: vec![ContentBlock::ToolUse {
-            id: tool_call.id.clone(),
-            name: tool_call.name.clone(),
-            input: parsed_arguments.clone(),
-            thought_signature: tool_call.thought_signature.clone(),
-        }],
-        timestamp: None,
-        tool_duration_ms: None,
-    });
-    followup.push(Message {
-        role: Role::User,
-        content: vec![ContentBlock::ToolResult {
-            tool_use_id: tool_call.id.clone(),
-            content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it."
-                .to_string(),
-            is_error: Some(false),
-        }],
-        timestamp: None,
-        tool_duration_ms: None,
-    });
+    let mut history = first_turn.clone();
+    history.push(assistant_tool_use(&tool_call, &parsed_arguments));
+    history.push(tool_result_then_text(
+        &tool_call.id,
+        "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it.",
+        None,
+    ));
 
     let second = consume_native_stream(
         provider,
-        &followup,
+        &history,
         &tools,
         system,
         std::time::Duration::from_secs(120),
@@ -1560,8 +1429,84 @@ pub async fn run_live_native_provider_tool_smoke(
         crate::util::truncate_str(second.text.trim(), 200)
     );
 
-    let total_input = first.input_tokens + second.input_tokens;
-    let total_output = first.output_tokens + second.output_tokens;
+    // Phase 2 (best-effort): drive a second tool call so the replayed history
+    // carries *two* function calls, then assert the backend accepts the
+    // multi-call transcript (the only shape that reproduces the
+    // "missing a thought_signature ... position N" 400).
+    let mut total_input = first.input_tokens + second.input_tokens;
+    let mut total_output = first.output_tokens + second.output_tokens;
+    let mut multi_tool_replay = "skipped";
+    let mut signatures_present = vec![tool_call.thought_signature.is_some()];
+
+    // Ask for a *second* distinct read so the model emits another tool call.
+    let mut second_request = first_turn.clone();
+    second_request.push(assistant_tool_use(&tool_call, &parsed_arguments));
+    second_request.push(tool_result_then_text(
+        &tool_call.id,
+        "Contents of /tmp/auth_tool_probe.txt: alpha.",
+        Some(
+            "Now read the file at /tmp/auth_tool_probe_2.txt using the read tool. \
+             Call the tool now; do not answer in text.",
+        ),
+    ));
+
+    let third = consume_native_stream(
+        provider,
+        &second_request,
+        &tools,
+        system,
+        std::time::Duration::from_secs(120),
+    )
+    .await?;
+    total_input += third.input_tokens;
+    total_output += third.output_tokens;
+
+    if let Some(second_call) = third.tool_calls.first().cloned() {
+        let second_arguments = parse_tool_arguments(&second_call.input_json);
+        signatures_present.push(second_call.thought_signature.is_some());
+
+        // Final request: history now contains BOTH tool_use blocks, each
+        // carrying its own captured signature. A dropped earlier signature is
+        // rejected here with the position-N 400.
+        let mut final_request = second_request.clone();
+        final_request.push(assistant_tool_use(&second_call, &second_arguments));
+        final_request.push(tool_result_then_text(
+            &second_call.id,
+            "TOOL_RESULT_TOKEN=77. Report this token back to confirm you read it.",
+            None,
+        ));
+
+        let fourth = consume_native_stream(
+            provider,
+            &final_request,
+            &tools,
+            system,
+            std::time::Duration::from_secs(120),
+        )
+        .await
+        .with_context(|| {
+            format!(
+                "native {label} multi-tool signature replay was rejected (history carried \
+                 {} function calls; a backend that validates every functionCall signature \
+                 fails here when an earlier thought_signature is dropped)",
+                signatures_present.len()
+            )
+        })?;
+        total_input += fourth.input_tokens;
+        total_output += fourth.output_tokens;
+
+        ensure!(
+            fourth.saw_message_end,
+            "native {label} multi-tool follow-up ended without a message_end event"
+        );
+        ensure!(
+            fourth.text.contains("77"),
+            "native {label} multi-tool follow-up did not reflect the second tool result token: {:?}",
+            crate::util::truncate_str(fourth.text.trim(), 200)
+        );
+        multi_tool_replay = "verified";
+    }
+
     let mut stage = crate::live_tests::LiveVerificationStage::passed(
         crate::live_tests::checkpoints::TOOL_CALL_PARSE,
     )
@@ -1573,9 +1518,64 @@ pub async fn run_live_native_provider_tool_smoke(
         "thought_signature_present",
         serde_json::json!(tool_call.thought_signature.is_some()),
     )
+    .with_evidence("multi_tool_replay", serde_json::json!(multi_tool_replay))
+    .with_evidence(
+        "tool_call_signatures_present",
+        serde_json::json!(signatures_present),
+    )
     .with_evidence("followup_consumed_result", serde_json::json!(true));
     if total_input != 0 || total_output != 0 {
         stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0));
     }
     Ok(stage)
 }
+
+/// Parse a streamed tool-call argument blob into a JSON object (empty object for
+/// a blank payload), shared by the native tool smoke probes.
+fn parse_tool_arguments(input_json: &str) -> serde_json::Value {
+    crate::message::ToolCall::parse_streamed_input_to_object(if input_json.trim().is_empty() {
+        "{}"
+    } else {
+        input_json.trim()
+    })
+}
+
+/// Build the assistant `tool_use` replay block for a captured native tool call,
+/// preserving any provider-emitted `thought_signature` so backends that require
+/// it (Gemini-3 via the Cloud Code/Antigravity runtime) accept the follow-up.
+fn assistant_tool_use(call: &NativeClaudeToolCall, arguments: &serde_json::Value) -> Message {
+    Message {
+        role: Role::Assistant,
+        content: vec![ContentBlock::ToolUse {
+            id: call.id.clone(),
+            name: call.name.clone(),
+            input: arguments.clone(),
+            thought_signature: call.thought_signature.clone(),
+        }],
+        timestamp: None,
+        tool_duration_ms: None,
+    }
+}
+
+/// Build a user turn carrying a synthetic `tool_result` and, optionally, a
+/// follow-up text instruction (used to chain a second tool call in one message
+/// so the provider sees a clean user turn rather than two consecutive ones).
+fn tool_result_then_text(tool_use_id: &str, result: &str, follow_up: Option<&str>) -> Message {
+    let mut content = vec![ContentBlock::ToolResult {
+        tool_use_id: tool_use_id.to_string(),
+        content: result.to_string(),
+        is_error: Some(false),
+    }];
+    if let Some(text) = follow_up {
+        content.push(ContentBlock::Text {
+            text: text.to_string(),
+            cache_control: None,
+        });
+    }
+    Message {
+        role: Role::User,
+        content,
+        timestamp: None,
+        tool_duration_ms: None,
+    }
+}
diff --git a/crates/jcode-base/src/provider/gemini_tests.rs b/crates/jcode-base/src/provider/gemini_tests.rs
index 9eeae7a77..4dedb2fa7 100644
--- a/crates/jcode-base/src/provider/gemini_tests.rs
+++ b/crates/jcode-base/src/provider/gemini_tests.rs
@@ -231,6 +231,55 @@ fn build_contents_replays_thought_signature_on_function_call() {
     );
 }
 
+#[test]
+fn build_contents_replays_every_signature_across_multi_tool_history() {
+    // Regression guard for the Antigravity/Cloud Code 400
+    // ("Function call is missing a thought_signature ... position 5"): the
+    // backend validates *every* functionCall in the replayed history, not just
+    // the latest one. A multi-turn transcript where an earlier tool_use drops
+    // its signature is exactly what triggers the field failure, so assert that
+    // each captured signature survives serialization onto its matching part.
+    let signatures = ["SIG_A", "SIG_B", "SIG_C"];
+    let mut messages = Vec::new();
+    for (idx, sig) in signatures.iter().enumerate() {
+        messages.push(Message {
+            role: Role::Assistant,
+            content: vec![ContentBlock::ToolUse {
+                id: format!("call_{idx}"),
+                name: "bash".to_string(),
+                input: json!({ "command": format!("echo {idx}") }),
+                thought_signature: Some(sig.to_string()),
+            }],
+            timestamp: None,
+            tool_duration_ms: None,
+        });
+        messages.push(Message {
+            role: Role::User,
+            content: vec![ContentBlock::ToolResult {
+                tool_use_id: format!("call_{idx}"),
+                content: format!("out {idx}"),
+                is_error: Some(false),
+            }],
+            timestamp: None,
+            tool_duration_ms: None,
+        });
+    }
+
+    let contents = build_contents(&messages);
+    let replayed: Vec<Option<&str>> = contents
+        .iter()
+        .flat_map(|content| content.parts.iter())
+        .filter(|part| part.function_call.is_some())
+        .map(|part| part.thought_signature.as_deref())
+        .collect();
+    assert_eq!(
+        replayed,
+        vec![Some("SIG_A"), Some("SIG_B"), Some("SIG_C")],
+        "every functionCall in the history must carry its captured thought_signature, \
+         not just the most recent one"
+    );
+}
+
 #[test]
 fn build_contents_preserves_tool_calls_and_results() {
     let messages = vec![

From d9823f6a133c263675dad1f8f5eaece898b9570b Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:32:02 -0700
Subject: [PATCH 04/57] telemetry dashboard: restore all legacy metrics +
 redesign with frontend-design skill

Two things the prior dashboard commit missed.

1) Restore every metric the old SQL surface (README queries, health.sql,
   dau.sql) exposed that had been dropped:
   - os/arch platform breakdown (was os-only)
   - session starts by UTC hour (usage-timing histogram)
   - pipeline-health diagnostics: lifecycle_ids, session_start_ids,
     lifecycle_ids_without_install, heaviest/top5/total session events
   - meaningful_sessions_30d count
   stats.js gains hours, arch, health, skew, meaningfulSessions queries;
   all validated end-to-end against a seeded sqlite D1 shim.

2) Redesign dashboard.js using the installed anthropics/frontend-design
   skill. The previous version used system fonts and the exact
   purple-gradient-on-dark the skill warns against. New 'Terminal
   Observatory' aesthetic, true to jcode being a CLI agent: JetBrains
   Mono instrument typography (Sora for prose), warm phosphor-amber
   signal color with a single cyan accent, scanline texture, station-
   clock hero number, numbered hairline section dividers, KEY/alert
   accent rails, an amber UTC-hour bar histogram, and a filled cyan
   active-users area chart. Tiered HEADLINE/SIGNAL/DIAGNOSTIC layout so
   the total-users number dominates while every figure stays visible.

Verified in a real browser: token gate, hero, all 8 sections, both
chart types render correctly. node --check passes on all modules.
---
 telemetry-worker/src/dashboard.js | 704 ++++++++++++++++--------------
 telemetry-worker/src/stats.js     |  52 ++-
 2 files changed, 416 insertions(+), 340 deletions(-)

diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js
index f40246119..62175243e 100644
--- a/telemetry-worker/src/dashboard.js
+++ b/telemetry-worker/src/dashboard.js
@@ -1,397 +1,425 @@
-// Self-contained dashboard page (HTML + CSS + JS, no external dependencies so it
-// works under Cloudflare with no CDN/CSP issues). Charts are drawn as inline SVG.
+// jcode telemetry console — "Terminal Observatory" aesthetic.
 //
-// The page fetches /v1/stats with the dashboard token (entered once, stored in
-// localStorage) and renders tiered metrics: a hero "total users" number, the
-// active-user funnel, then secondary KPIs and diagnostic breakdowns. Every
-// metric the API returns is shown; importance is conveyed visually (hero /
-// primary cards / muted diagnostic tables) and via short "why it matters" notes.
+// Design intent (frontend-design skill): jcode is a terminal coding agent, so
+// the dashboard is built as a precision instrument readout, not generic SaaS.
+// - Type: JetBrains Mono (display + data) paired with a quiet grotesk for prose.
+// - Palette: near-black graphite, warm phosphor amber as the dominant signal,
+//   a single cyan accent for the live/headline series. No purple-on-white.
+// - Composition: a station-clock hero number, hairline rules, dense tabular
+//   instrument panels, scanline texture, staggered load-in reveals.
+//
+// Self-contained (HTML/CSS/inline-SVG, fonts via Google Fonts <link>). Fetches
+// /v1/stats with the dashboard token. Every metric the API returns is shown,
+// grouped by importance (HEADLINE / SIGNAL / DIAGNOSTIC).
 
 export const DASHBOARD_HTML = `<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8" />
 <meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>jcode telemetry</title>
+<title>jcode · telemetry console</title>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700;800&family=Sora:wght@400;500;600&display=swap" rel="stylesheet">
 <style>
   :root {
-    --bg: #0b0e14;
-    --bg-soft: #11151f;
-    --panel: #151a26;
-    --panel-2: #1b2231;
-    --line: #232c3d;
-    --text: #e6edf6;
-    --muted: #8a97ac;
-    --muted-2: #5d6982;
-    --accent: #5b9dff;
-    --accent-2: #7c5cff;
-    --good: #3fb950;
-    --warn: #d29922;
-    --bad: #f85149;
-    --radius: 14px;
-    --shadow: 0 1px 0 rgba(255,255,255,0.03) inset, 0 10px 30px rgba(0,0,0,0.35);
+    --bg:        #07090c;
+    --bg-grain:  #0a0d12;
+    --panel:     #0d1117;
+    --panel-2:   #11161e;
+    --rule:      #1c232e;
+    --rule-soft: #141a22;
+    --ink:       #e8eef5;
+    --ink-dim:   #9aa7b6;
+    --ink-faint: #5c6675;
+    --amber:     #ffb454;   /* dominant phosphor signal */
+    --amber-dim: #c98a3f;
+    --cyan:      #4fd6ff;    /* live / headline accent */
+    --green:     #5ad27a;
+    --red:       #ff6b6b;
+    --mono: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, monospace;
+    --sans: "Sora", system-ui, sans-serif;
   }
   * { box-sizing: border-box; }
   html, body { margin: 0; padding: 0; }
   body {
-    background: radial-gradient(1200px 600px at 80% -10%, #16203a 0%, var(--bg) 55%) fixed;
-    color: var(--text);
-    font: 14px/1.5 -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, Roboto, Helvetica, Arial, sans-serif;
-    -webkit-font-smoothing: antialiased;
+    background:
+      radial-gradient(900px 500px at 88% -8%, rgba(255,180,84,0.07), transparent 60%),
+      radial-gradient(700px 500px at -5% 110%, rgba(79,214,255,0.05), transparent 60%),
+      var(--bg);
+    color: var(--ink);
+    font-family: var(--sans);
+    font-size: 14px; line-height: 1.55;
     min-height: 100vh;
+    -webkit-font-smoothing: antialiased;
   }
-  a { color: var(--accent); text-decoration: none; }
-  .wrap { max-width: 1180px; margin: 0 auto; padding: 28px 22px 80px; }
-
-  header.top { display: flex; align-items: center; justify-content: space-between; gap: 16px; margin-bottom: 22px; flex-wrap: wrap; }
-  .brand { display: flex; align-items: center; gap: 12px; }
-  .logo { width: 34px; height: 34px; border-radius: 9px; background: linear-gradient(135deg, var(--accent), var(--accent-2)); display: grid; place-items: center; font-weight: 800; color: #fff; box-shadow: var(--shadow); }
-  .brand h1 { font-size: 17px; margin: 0; font-weight: 650; letter-spacing: .2px; }
-  .brand .sub { color: var(--muted); font-size: 12px; }
-  .top-actions { display: flex; align-items: center; gap: 10px; }
-  .pill { font-size: 12px; color: var(--muted); background: var(--panel); border: 1px solid var(--line); padding: 6px 11px; border-radius: 999px; }
-  button.btn { cursor: pointer; font: inherit; color: var(--text); background: var(--panel-2); border: 1px solid var(--line); padding: 7px 13px; border-radius: 9px; }
-  button.btn:hover { border-color: var(--accent); }
-
-  /* Hero */
-  .hero { display: grid; grid-template-columns: 1.15fr 1fr; gap: 18px; margin-bottom: 18px; }
-  @media (max-width: 860px) { .hero { grid-template-columns: 1fr; } }
-  .card { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); }
-  .hero-main { padding: 26px 28px; position: relative; overflow: hidden; }
-  .hero-main:before { content:""; position:absolute; right:-40px; top:-60px; width:240px; height:240px; background: radial-gradient(circle, rgba(91,157,255,.20), transparent 60%); }
-  .eyebrow { text-transform: uppercase; letter-spacing: 1.4px; font-size: 11px; color: var(--muted); font-weight: 600; }
-  .hero-number { font-size: 68px; font-weight: 750; line-height: 1.02; margin: 6px 0 2px; letter-spacing: -1.5px; background: linear-gradient(180deg, #fff, #b9c6dd); -webkit-background-clip: text; background-clip: text; color: transparent; }
-  .hero-note { color: var(--muted); font-size: 13px; max-width: 46ch; }
-  .hero-sub { display: flex; gap: 26px; margin-top: 18px; flex-wrap: wrap; }
-  .hero-sub .k { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: .6px; }
-  .hero-sub .v { font-size: 22px; font-weight: 650; }
-
-  .hero-side { padding: 18px 20px; display: grid; grid-template-rows: auto 1fr; }
-  .hero-side h3 { margin: 2px 0 10px; font-size: 13px; color: var(--muted); font-weight: 600; }
-
-  /* Section + grid */
-  .section-title { display: flex; align-items: baseline; gap: 10px; margin: 26px 2px 12px; }
-  .section-title h2 { font-size: 14px; margin: 0; font-weight: 650; letter-spacing: .3px; }
-  .section-title .hint { color: var(--muted-2); font-size: 12px; }
-  .grid { display: grid; gap: 14px; }
-  .g4 { grid-template-columns: repeat(4, 1fr); }
-  .g3 { grid-template-columns: repeat(3, 1fr); }
-  .g2 { grid-template-columns: repeat(2, 1fr); }
-  .hero-kpis { grid-template-columns: repeat(3, 1fr); }
-  .hero-kpis .kpi { padding: 12px 12px; }
-  .hero-kpis .num { font-size: 23px; }
-  .hero-kpis .meta { font-size: 11px; }
-  @media (max-width: 980px) { .g4 { grid-template-columns: repeat(2, 1fr); } .g3 { grid-template-columns: repeat(2, 1fr); } }
-  @media (max-width: 620px) { .g4, .g3, .g2 { grid-template-columns: 1fr; } }
-
-  .kpi { padding: 16px 17px; }
-  .kpi .label { color: var(--muted); font-size: 12px; display: flex; align-items: center; gap: 7px; }
-  .kpi .num { font-size: 28px; font-weight: 700; margin-top: 7px; letter-spacing: -.5px; }
-  .kpi .meta { color: var(--muted-2); font-size: 12px; margin-top: 3px; }
-  .dot { width: 7px; height: 7px; border-radius: 50%; display: inline-block; }
-  .dot.p1 { background: var(--accent); } .dot.p2 { background: var(--muted); } .dot.p3 { background: var(--muted-2); }
-  .tag { font-size: 10px; text-transform: uppercase; letter-spacing: .5px; padding: 2px 7px; border-radius: 6px; border: 1px solid var(--line); color: var(--muted); }
-  .tag.key { color: var(--accent); border-color: rgba(91,157,255,.35); background: rgba(91,157,255,.08); }
-
-  .panel { padding: 18px 18px 8px; }
-  .panel h3 { margin: 0 0 4px; font-size: 13px; font-weight: 650; }
-  .panel .desc { color: var(--muted-2); font-size: 12px; margin: 0 0 12px; }
-
-  table { width: 100%; border-collapse: collapse; font-size: 13px; }
-  th, td { text-align: left; padding: 7px 6px; border-bottom: 1px solid var(--line); }
-  th { color: var(--muted); font-weight: 600; font-size: 11px; text-transform: uppercase; letter-spacing: .5px; }
-  td.num, th.num { text-align: right; font-variant-numeric: tabular-nums; }
-  .bar { height: 7px; border-radius: 4px; background: linear-gradient(90deg, var(--accent), var(--accent-2)); }
-  .bar-track { background: var(--panel-2); border-radius: 4px; overflow: hidden; }
-
-  .muted { color: var(--muted); } .small { font-size: 12px; }
-  .legend { display: flex; gap: 14px; align-items: center; font-size: 12px; color: var(--muted); margin-bottom: 8px; flex-wrap: wrap; }
-  .legend i { width: 10px; height: 10px; border-radius: 3px; display: inline-block; margin-right: 5px; vertical-align: -1px; }
-
-  .feedback-item { padding: 11px 0; border-bottom: 1px solid var(--line); }
-  .feedback-item .q { color: var(--text); }
-  .feedback-item .m { color: var(--muted-2); font-size: 11px; margin-top: 3px; }
-
-  /* token gate */
-  .gate { max-width: 420px; margin: 12vh auto 0; text-align: center; }
-  .gate .card { padding: 28px 26px; }
-  .gate input { width: 100%; margin: 14px 0; padding: 11px 13px; border-radius: 10px; border: 1px solid var(--line); background: var(--bg-soft); color: var(--text); font: inherit; }
-  .gate button { width: 100%; padding: 11px; font-weight: 600; }
-  .err { color: var(--bad); font-size: 13px; min-height: 18px; }
-  .hidden { display: none !important; }
-  .foot { color: var(--muted-2); font-size: 12px; margin-top: 30px; text-align: center; }
-  .spin { display:inline-block; width:14px; height:14px; border:2px solid var(--line); border-top-color: var(--accent); border-radius:50%; animation: sp 0.8s linear infinite; vertical-align:-2px; }
+  /* faint scanline texture, instrument vibe */
+  body:before {
+    content:""; position: fixed; inset: 0; pointer-events: none; z-index: 0;
+    background-image: repeating-linear-gradient(0deg, rgba(255,255,255,0.014) 0 1px, transparent 1px 3px);
+    mix-blend-mode: overlay; opacity: .5;
+  }
+  .wrap { position: relative; z-index: 1; max-width: 1200px; margin: 0 auto; padding: 30px 24px 90px; }
+  .mono { font-family: var(--mono); }
+  .num { font-family: var(--mono); font-variant-numeric: tabular-nums; }
+
+  /* ---- masthead ---- */
+  header.bar { display:flex; align-items:center; justify-content:space-between; gap:16px;
+    border-bottom: 1px solid var(--rule); padding-bottom: 16px; margin-bottom: 26px; flex-wrap: wrap; }
+  .mark { display:flex; align-items:center; gap:13px; }
+  .glyph { font-family: var(--mono); font-weight: 800; font-size: 15px; color: var(--bg);
+    background: var(--amber); width: 32px; height: 32px; display:grid; place-items:center; border-radius: 7px;
+    box-shadow: 0 0 0 1px rgba(255,180,84,.4), 0 0 22px rgba(255,180,84,.25); }
+  .mark h1 { font-family: var(--mono); font-size: 14px; font-weight: 700; margin:0; letter-spacing: 1px; text-transform: uppercase; }
+  .mark .tag { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: .5px; }
+  .bar-actions { display:flex; align-items:center; gap:10px; }
+  .stamp { font-family: var(--mono); font-size: 11px; color: var(--ink-dim); border: 1px solid var(--rule);
+    padding: 6px 10px; border-radius: 6px; letter-spacing: .3px; }
+  .stamp .live { color: var(--green); }
+  .stamp .live:before { content:"●"; margin-right: 6px; animation: pulse 2s ease-in-out infinite; }
+  @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.35} }
+  button.btn { cursor:pointer; font-family: var(--mono); font-size: 12px; color: var(--ink); background: var(--panel-2);
+    border: 1px solid var(--rule); padding: 7px 13px; border-radius: 6px; letter-spacing: .3px; transition: .15s; }
+  button.btn:hover { border-color: var(--amber); color: var(--amber); }
+
+  /* ---- section ---- */
+  .sec { margin: 34px 0 14px; display:flex; align-items:baseline; gap: 12px; }
+  .sec .idx { font-family: var(--mono); font-size: 11px; color: var(--amber); letter-spacing: 1px; }
+  .sec h2 { font-family: var(--mono); font-size: 12px; font-weight: 700; letter-spacing: 2px; text-transform: uppercase; margin: 0; color: var(--ink); }
+  .sec .rule { flex:1; height: 1px; background: linear-gradient(90deg, var(--rule), transparent); align-self: center; }
+  .sec .note { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: .3px; }
+
+  /* ---- hero ---- */
+  .hero { display:grid; grid-template-columns: 1.05fr 1fr; gap: 1px; background: var(--rule);
+    border: 1px solid var(--rule); border-radius: 14px; overflow:hidden; }
+  @media (max-width: 880px){ .hero { grid-template-columns: 1fr; } }
+  .hero > div { background: var(--panel); }
+  .hero-main { padding: 30px 32px; position: relative; }
+  .hero-main:after { content:""; position:absolute; inset:0; pointer-events:none;
+    background: radial-gradient(420px 220px at 100% 0%, rgba(255,180,84,.10), transparent 70%); }
+  .label { font-family: var(--mono); font-size: 11px; letter-spacing: 2px; text-transform: uppercase; color: var(--ink-faint); }
+  .big { font-family: var(--mono); font-weight: 800; font-size: clamp(58px, 9vw, 96px); line-height: .92;
+    letter-spacing: -2px; color: var(--amber); margin: 8px 0 4px; text-shadow: 0 0 36px rgba(255,180,84,.22); }
+  .big .unit { font-size: 18px; color: var(--ink-dim); letter-spacing: 0; margin-left: 10px; text-shadow:none; }
+  .hero-desc { color: var(--ink-dim); font-size: 13px; max-width: 48ch; }
+  .ladder { margin-top: 22px; border-top: 1px solid var(--rule-soft); }
+  .rung { display:flex; align-items:center; justify-content:space-between; padding: 9px 0; border-bottom: 1px solid var(--rule-soft); }
+  .rung .lk { font-family: var(--mono); font-size: 12px; color: var(--ink-dim); letter-spacing:.3px; }
+  .rung .lk b { color: var(--ink); font-weight: 500; }
+  .rung .lv { font-family: var(--mono); font-weight: 700; font-size: 16px; }
+  .rung .lv.amber { color: var(--amber); } .rung .lv.cyan { color: var(--cyan); } .rung .lv.dim { color: var(--ink-dim); }
+  .hero-side { padding: 24px 26px; display:flex; flex-direction: column; }
+  .hero-side h3 { font-family: var(--mono); font-size: 11px; letter-spacing: 1.5px; text-transform: uppercase; color: var(--ink-faint); margin: 0 0 14px; }
+  .triple { display:grid; grid-template-columns: repeat(3,1fr); gap: 14px; margin-bottom: 6px; }
+  .triple .t .tn { font-family: var(--mono); font-weight: 800; font-size: 30px; letter-spacing: -1px; color: var(--cyan); }
+  .triple .t .tl { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: 1px; text-transform: uppercase; margin-top: 2px; }
+  .triple .t .tsub { font-size: 11px; color: var(--ink-faint); }
+
+  /* ---- cards ---- */
+  .grid { display:grid; gap: 14px; }
+  .g4 { grid-template-columns: repeat(4,1fr); } .g3 { grid-template-columns: repeat(3,1fr); } .g2 { grid-template-columns: repeat(2,1fr); }
+  @media (max-width: 1000px){ .g4 { grid-template-columns: repeat(2,1fr); } .g3 { grid-template-columns: repeat(2,1fr); } }
+  @media (max-width: 600px){ .g4,.g3,.g2 { grid-template-columns: 1fr; } }
+  .stat { background: var(--panel); border: 1px solid var(--rule); border-radius: 11px; padding: 15px 16px; position: relative; overflow: hidden; }
+  .stat.key { border-color: rgba(255,180,84,.32); }
+  .stat.key:before { content:""; position:absolute; left:0; top:0; bottom:0; width:2px; background: var(--amber); }
+  .stat.alert { border-color: rgba(255,107,107,.4); }
+  .stat.alert:before { content:""; position:absolute; left:0; top:0; bottom:0; width:2px; background: var(--red); }
+  .stat .sl { font-family: var(--mono); font-size: 11px; color: var(--ink-dim); letter-spacing: .4px; display:flex; align-items:center; gap:7px; }
+  .stat .sv { font-family: var(--mono); font-weight: 700; font-size: 26px; margin-top: 8px; letter-spacing: -.5px; }
+  .stat.key .sv { color: var(--amber); } .stat.alert .sv { color: var(--red); }
+  .stat .sm { font-size: 11px; color: var(--ink-faint); margin-top: 3px; }
+  .kk { font-family: var(--mono); font-size: 9px; letter-spacing: 1px; color: var(--bg); background: var(--amber); padding: 1px 5px; border-radius: 3px; }
+
+  /* ---- panels w/ tables & charts ---- */
+  .panel { background: var(--panel); border: 1px solid var(--rule); border-radius: 12px; padding: 18px 18px 12px; }
+  .panel h3 { font-family: var(--mono); font-size: 12px; font-weight: 700; letter-spacing: 1px; text-transform: uppercase; margin: 0 0 3px; }
+  .panel .pd { font-size: 11px; color: var(--ink-faint); margin: 0 0 14px; font-family: var(--mono); letter-spacing: .2px; }
+  table { width:100%; border-collapse: collapse; }
+  th, td { text-align:left; padding: 7px 4px; border-bottom: 1px solid var(--rule-soft); font-size: 12px; }
+  th { font-family: var(--mono); color: var(--ink-faint); font-weight: 500; font-size: 10px; letter-spacing: 1px; text-transform: uppercase; }
+  td.k { font-family: var(--mono); color: var(--ink); letter-spacing: .2px; }
+  td.v, th.v { text-align:right; font-family: var(--mono); font-variant-numeric: tabular-nums; color: var(--ink); }
+  .track { height: 6px; background: var(--panel-2); border-radius: 2px; overflow:hidden; }
+  .fill { height: 100%; background: linear-gradient(90deg, var(--amber-dim), var(--amber)); border-radius: 2px; }
+  tr:last-child td { border-bottom: none; }
+
+  .legend { display:flex; gap: 16px; align-items:center; font-family: var(--mono); font-size: 11px; color: var(--ink-dim); margin-bottom: 10px; flex-wrap: wrap; }
+  .legend i { width: 14px; height: 3px; display:inline-block; margin-right: 6px; vertical-align: 3px; border-radius: 2px; }
+
+  .fb { padding: 12px 0; border-bottom: 1px solid var(--rule-soft); }
+  .fb:last-child { border-bottom: none; }
+  .fb .q { color: var(--ink); font-size: 13.5px; }
+  .fb .m { font-family: var(--mono); color: var(--ink-faint); font-size: 11px; margin-top: 4px; letter-spacing: .2px; }
+  .fb .badge { color: var(--amber); }
+
+  /* ---- gate ---- */
+  .gate { max-width: 440px; margin: 16vh auto 0; }
+  .gate .box { background: var(--panel); border: 1px solid var(--rule); border-radius: 14px; padding: 30px 28px; text-align: center;
+    box-shadow: 0 0 60px rgba(0,0,0,.5); }
+  .gate .glyph { margin: 0 auto 16px; width: 40px; height: 40px; font-size: 18px; }
+  .gate h2 { font-family: var(--mono); letter-spacing: 1px; margin: 0 0 4px; font-size: 16px; }
+  .gate p { color: var(--ink-faint); font-size: 12px; font-family: var(--mono); margin: 0; }
+  .gate input { width: 100%; margin: 18px 0 12px; padding: 12px 14px; border-radius: 9px; border: 1px solid var(--rule);
+    background: var(--bg); color: var(--ink); font-family: var(--mono); font-size: 13px; letter-spacing: 1px; }
+  .gate input:focus { outline: none; border-color: var(--amber); }
+  .gate button { width: 100%; padding: 12px; font-weight: 700; }
+  .err { color: var(--red); font-family: var(--mono); font-size: 12px; min-height: 18px; margin-top: 6px; }
+
+  .hidden { display:none !important; }
+  .foot { font-family: var(--mono); color: var(--ink-faint); font-size: 11px; margin-top: 38px; padding-top: 18px;
+    border-top: 1px solid var(--rule-soft); text-align: center; letter-spacing: .2px; line-height: 1.8; }
+  .spin { display:inline-block; width: 13px; height: 13px; border: 2px solid var(--rule); border-top-color: var(--amber); border-radius: 50%; animation: sp .7s linear infinite; vertical-align: -2px; }
   @keyframes sp { to { transform: rotate(360deg); } }
+  .reveal { animation: rise .5s cubic-bezier(.2,.7,.2,1) both; }
+  @keyframes rise { from { opacity: 0; transform: translateY(10px); } to { opacity:1; transform:none; } }
 </style>
 </head>
 <body>
 <div class="wrap">
-  <!-- token gate -->
   <div id="gate" class="gate hidden">
-    <div class="card">
-      <div class="logo" style="margin:0 auto 14px">jc</div>
-      <h1 style="margin:0 0 4px;font-size:18px">jcode telemetry</h1>
-      <div class="muted small">Enter the dashboard token to view stats.</div>
-      <input id="token" type="password" placeholder="dashboard token" autocomplete="off" />
-      <button class="btn" id="unlock">Unlock</button>
+    <div class="box">
+      <div class="glyph">jc</div>
+      <h2>TELEMETRY CONSOLE</h2>
+      <p>access token required</p>
+      <input id="token" type="password" placeholder="•••••••••••" autocomplete="off" />
+      <button class="btn" id="unlock">AUTHENTICATE</button>
       <div class="err" id="gate-err"></div>
     </div>
   </div>
 
-  <!-- dashboard -->
   <div id="app" class="hidden">
-    <header class="top">
-      <div class="brand">
-        <div class="logo">jc</div>
+    <header class="bar">
+      <div class="mark">
+        <div class="glyph">jc</div>
         <div>
           <h1>jcode telemetry</h1>
-          <div class="sub" id="generated">live product analytics</div>
+          <div class="tag" id="generated">— · — · —</div>
         </div>
       </div>
-      <div class="top-actions">
-        <span class="pill" id="freshness">—</span>
-        <button class="btn" id="refresh">Refresh</button>
-        <button class="btn" id="logout">Lock</button>
+      <div class="bar-actions">
+        <span class="stamp"><span class="live" id="livestamp">LIVE</span></span>
+        <button class="btn" id="refresh">↻ REFRESH</button>
+        <button class="btn" id="logout">LOCK</button>
       </div>
     </header>
-
     <div id="content"></div>
-
     <div class="foot">
-      Users are distinct anonymous <code>telemetry_id</code>s. Headline numbers exclude CI runners and dev/non-release builds.
-      Raw and CI-inclusive figures are shown in the diagnostic tiers so nothing is hidden.
+      users are distinct anonymous telemetry_id · headline excludes CI runners &amp; non-release builds<br/>
+      raw / CI-inclusive figures retained in diagnostic tier · counts only, no raw events leave the worker
     </div>
   </div>
 </div>
 
 <script>
-const fmt = (n) => (n == null ? "—" : Number(n).toLocaleString());
-const pct = (x) => (x == null ? "—" : (x * 100).toFixed(1) + "%");
-const ms = (x) => (x == null ? "—" : x >= 1000 ? (x/1000).toFixed(1) + "s" : Math.round(x) + "ms");
-const dec = (x, d=2) => (x == null ? "—" : Number(x).toFixed(d));
-const el = (h) => { const t = document.createElement("template"); t.innerHTML = h.trim(); return t.content.firstChild; };
-const esc = (s) => String(s == null ? "" : s).replace(/[&<>"]/g, c => ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;"}[c]));
-
+const fmt = (n) => (n==null?"—":Number(n).toLocaleString());
+const pct = (x) => (x==null?"—":(x*100).toFixed(1)+"%");
+const ms  = (x) => (x==null?"—":x>=1000?(x/1000).toFixed(1)+"s":Math.round(x)+"ms");
+const dec = (x,d=1) => (x==null?"—":Number(x).toFixed(d));
+const esc = (s) => String(s==null?"":s).replace(/[&<>"]/g,c=>({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;"}[c]));
 let TOKEN = localStorage.getItem("jcode_dash_token") || "";
 
-function showGate(msg) {
-  document.getElementById("app").classList.add("hidden");
-  document.getElementById("gate").classList.remove("hidden");
-  document.getElementById("gate-err").textContent = msg || "";
-}
-function showApp() {
-  document.getElementById("gate").classList.add("hidden");
-  document.getElementById("app").classList.remove("hidden");
-}
+function showGate(m){ document.getElementById("app").classList.add("hidden"); document.getElementById("gate").classList.remove("hidden"); document.getElementById("gate-err").textContent = m||""; }
+function showApp(){ document.getElementById("gate").classList.add("hidden"); document.getElementById("app").classList.remove("hidden"); }
 
-async function load() {
-  if (!TOKEN) { showGate(""); return; }
-  document.getElementById("content").innerHTML = '<div class="muted" style="padding:40px 0"><span class="spin"></span> loading…</div>';
+async function load(){
+  if(!TOKEN){ showGate(""); return; }
+  document.getElementById("content").innerHTML = '<div class="mono" style="padding:60px 0;color:var(--ink-faint)"><span class="spin"></span> reading instruments…</div>';
   showApp();
   let res;
-  try {
-    res = await fetch("/v1/stats?token=" + encodeURIComponent(TOKEN), { headers: { "Authorization": "Bearer " + TOKEN } });
-  } catch (e) { showGate("Network error."); return; }
-  if (res.status === 401) { localStorage.removeItem("jcode_dash_token"); TOKEN = ""; showGate("Invalid token."); return; }
-  if (!res.ok) { document.getElementById("content").innerHTML = '<div class="err">Failed to load stats ('+res.status+').</div>'; return; }
-  const data = await res.json();
-  render(data);
+  try { res = await fetch("/v1/stats?token="+encodeURIComponent(TOKEN), { headers:{ "Authorization":"Bearer "+TOKEN } }); }
+  catch(e){ showGate("network error"); return; }
+  if(res.status===401){ localStorage.removeItem("jcode_dash_token"); TOKEN=""; showGate("invalid token"); return; }
+  if(!res.ok){ document.getElementById("content").innerHTML='<div class="err">failed to load ('+res.status+')</div>'; return; }
+  render(await res.json());
 }
 
-function kpi(label, value, meta, tier, isKey) {
-  return \`<div class="card kpi">
-    <div class="label"><span class="dot p\${tier||1}"></span>\${esc(label)} \${isKey?'<span class="tag key">key</span>':''}</div>
-    <div class="num">\${value}</div>
-    <div class="meta">\${meta||""}</div>
-  </div>\`;
+function sec(idx,title,note){ return '<div class="sec reveal"><span class="idx">'+idx+'</span><h2>'+esc(title)+'</h2><span class="rule"></span><span class="note">'+esc(note||"")+'</span></div>'; }
+function stat(label,value,meta,opts){ opts=opts||{};
+  const cls = opts.alert?'stat alert':(opts.key?'stat key':'stat');
+  return '<div class="'+cls+' reveal"><div class="sl">'+esc(label)+(opts.key?' <span class="kk">KEY</span>':'')+'</div><div class="sv">'+value+'</div><div class="sm">'+(meta||'')+'</div></div>';
 }
-
-function barTable(title, desc, rows, keyName, valName, total) {
-  const max = Math.max(1, ...rows.map(r => r.value));
-  const body = rows.map(r => \`<tr>
-      <td>\${esc(r.label)}</td>
-      <td style="width:45%"><div class="bar-track"><div class="bar" style="width:\${Math.max(2,(r.value/max)*100)}%"></div></div></td>
-      <td class="num">\${fmt(r.value)}</td>
-    </tr>\`).join("");
-  return \`<div class="card panel">
-    <h3>\${esc(title)}</h3><p class="desc">\${esc(desc)}</p>
-    <table><thead><tr><th>\${esc(keyName)}</th><th>share</th><th class="num">\${esc(valName)}</th></tr></thead><tbody>\${body || '<tr><td class="muted" colspan="3">no data</td></tr>'}</tbody></table>
-  </div>\`;
+function tablePanel(title,desc,rows,kcol,vcol){
+  const max = Math.max(1, ...rows.map(r=>r.value));
+  const body = rows.length ? rows.map(r=>'<tr><td class="k">'+esc(r.label)+'</td><td style="width:46%"><div class="track"><div class="fill" style="width:'+Math.max(3,(r.value/max)*100)+'%"></div></div></td><td class="v">'+fmt(r.value)+'</td></tr>').join('') : '<tr><td class="k" colspan="3" style="color:var(--ink-faint)">no data</td></tr>';
+  return '<div class="panel reveal"><h3>'+esc(title)+'</h3><p class="pd">'+esc(desc)+'</p><table><thead><tr><th>'+esc(kcol)+'</th><th>·</th><th class="v">'+esc(vcol)+'</th></tr></thead><tbody>'+body+'</tbody></table></div>';
 }
-
-function lineChart(series, opts) {
-  // series: [{name,color,points:[{date,value}]}]
-  const W = 760, H = 220, padL = 36, padR = 12, padT = 14, padB = 26;
-  const dates = series[0] ? series[0].points.map(p => p.date) : [];
-  if (!dates.length) return '<div class="muted small" style="padding:18px">no timeseries yet</div>';
-  const maxV = Math.max(1, ...series.flatMap(s => s.points.map(p => p.value)));
-  const x = (i) => padL + (i/(Math.max(1,dates.length-1)))*(W-padL-padR);
-  const y = (v) => padT + (1 - v/maxV)*(H-padT-padB);
-  const grid = [0,0.25,0.5,0.75,1].map(f => {
-    const gy = padT + f*(H-padT-padB); const val = Math.round(maxV*(1-f));
-    return \`<line x1="\${padL}" y1="\${gy}" x2="\${W-padR}" y2="\${gy}" stroke="#232c3d" stroke-width="1"/><text x="4" y="\${gy+3}" fill="#5d6982" font-size="10">\${val}</text>\`;
-  }).join("");
-  const paths = series.map(s => {
-    const d = s.points.map((p,i) => (i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');
-    return \`<path d="\${d}" fill="none" stroke="\${s.color}" stroke-width="2" stroke-linejoin="round"/>\`;
-  }).join("");
-  const lbl = (i) => \`<text x="\${x(i)}" y="\${H-8}" fill="#5d6982" font-size="10" text-anchor="middle">\${dates[i].slice(5)}</text>\`;
-  const ticks = dates.length>1 ? [0, Math.floor(dates.length/2), dates.length-1].map(lbl).join("") : "";
-  const legend = series.map(s => \`<span><i style="background:\${s.color}"></i>\${esc(s.name)}</span>\`).join("");
-  return \`<div class="legend">\${legend}</div><svg viewBox="0 0 \${W} \${H}" width="100%" preserveAspectRatio="xMidYMid meet">\${grid}\${paths}\${ticks}</svg>\`;
+function rows(arr,k){ return (arr||[]).map(r=>({label:r[k]??"unknown", value:r.users})); }
+
+function lineChart(series){
+  const W=820,H=230,pl=40,pr=14,pt=16,pb=28;
+  const dates = series[0]?series[0].points.map(p=>p.date):[];
+  if(!dates.length) return '<div class="mono" style="color:var(--ink-faint);padding:18px;font-size:12px">no timeseries yet</div>';
+  const maxV = Math.max(1, ...series.flatMap(s=>s.points.map(p=>p.value)));
+  const x=i=>pl+(i/Math.max(1,dates.length-1))*(W-pl-pr);
+  const y=v=>pt+(1-v/maxV)*(H-pt-pb);
+  const grid=[0,.25,.5,.75,1].map(f=>{const gy=pt+f*(H-pt-pb);const val=Math.round(maxV*(1-f));return '<line x1="'+pl+'" y1="'+gy+'" x2="'+(W-pr)+'" y2="'+gy+'" stroke="#161d27"/><text x="4" y="'+(gy+3)+'" fill="#5c6675" font-size="10" font-family="JetBrains Mono">'+val+'</text>';}).join('');
+  const area = series.length?(()=>{const s=series[0];const top=s.points.map((p,i)=>(i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');return '<path d="'+top+' L'+x(s.points.length-1).toFixed(1)+' '+(H-pb)+' L'+pl+' '+(H-pb)+' Z" fill="url(#ag)" opacity=".18"/>';})():'';
+  const paths=series.map(s=>{const d=s.points.map((p,i)=>(i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');return '<path d="'+d+'" fill="none" stroke="'+s.color+'" stroke-width="2" stroke-linejoin="round"/>';}).join('');
+  const ticks = dates.length>1?[0,Math.floor(dates.length/2),dates.length-1].map(i=>'<text x="'+x(i)+'" y="'+(H-8)+'" fill="#5c6675" font-size="10" font-family="JetBrains Mono" text-anchor="middle">'+dates[i].slice(5)+'</text>').join(''):'';
+  const legend=series.map(s=>'<span><i style="background:'+s.color+'"></i>'+esc(s.name)+'</span>').join('');
+  return '<div class="legend">'+legend+'</div><svg viewBox="0 0 '+W+' '+H+'" width="100%" preserveAspectRatio="xMidYMid meet"><defs><linearGradient id="ag" x1="0" y1="0" x2="0" y2="1"><stop offset="0" stop-color="#4fd6ff"/><stop offset="1" stop-color="#4fd6ff" stop-opacity="0"/></linearGradient></defs>'+grid+area+paths+ticks+'</svg>';
 }
 
-function render(d) {
-  document.getElementById("generated").textContent = "updated " + new Date(d.generated_at).toLocaleString();
-  document.getElementById("freshness").textContent = "as of " + new Date(d.generated_at).toLocaleTimeString();
-  const c = document.getElementById("content");
-  const u = d.users, a = d.active, lc = d.lifecycle, q = d.quality, ret = d.retention;
+function barsChart(title,desc,data,labelFn,color){
+  const max=Math.max(1,...data.map(d=>d.v));
+  const W=820,H=160,pl=8,pr=8,pb=22,pt=8,n=data.length;
+  const bw=(W-pl-pr)/Math.max(1,n);
+  const bars=data.map((d,i)=>{const h=(d.v/max)*(H-pt-pb);const bx=pl+i*bw;return '<rect x="'+(bx+1.5).toFixed(1)+'" y="'+(H-pb-h).toFixed(1)+'" width="'+(bw-3).toFixed(1)+'" height="'+h.toFixed(1)+'" rx="1.5" fill="'+color+'"/>';}).join('');
+  const labels=data.map((d,i)=> (i%3===0)?'<text x="'+(pl+i*bw+bw/2).toFixed(1)+'" y="'+(H-7)+'" fill="#5c6675" font-size="9" font-family="JetBrains Mono" text-anchor="middle">'+esc(labelFn(d,i))+'</text>':'').join('');
+  return '<div class="panel reveal"><h3>'+esc(title)+'</h3><p class="pd">'+esc(desc)+'</p><svg viewBox="0 0 '+W+' '+H+'" width="100%" preserveAspectRatio="xMidYMid meet">'+bars+labels+'</svg></div>';
+}
 
-  // hero + active funnel timeseries
-  const ts = (d.timeseries.daily || []);
-  const headlineSeries = [
-    { name: "headline DAU", color: "#5b9dff", points: ts.map(r => ({date:r.date, value:r.headline})) },
-    { name: "meaningful", color: "#7c5cff", points: ts.map(r => ({date:r.date, value:r.meaningful})) },
-    { name: "raw", color: "#39507a", points: ts.map(r => ({date:r.date, value:r.raw})) },
+function render(d){
+  const dt = new Date(d.generated_at);
+  document.getElementById("generated").textContent = dt.toISOString().slice(0,10)+" · "+dt.toLocaleTimeString()+" · UTC rollup";
+  const c=document.getElementById("content");
+  const u=d.users,a=d.active,lc=d.lifecycle,q=d.quality,ret=d.retention,e=d.errors,h=d.health,b=d.breakdowns;
+  const ts=d.timeseries.daily||[];
+  const series=[
+    {name:"headline DAU",color:"#4fd6ff",points:ts.map(r=>({date:r.date,value:r.headline}))},
+    {name:"meaningful",color:"#ffb454",points:ts.map(r=>({date:r.date,value:r.meaningful}))},
+    {name:"raw / reached",color:"#5c6675",points:ts.map(r=>({date:r.date,value:r.raw}))},
   ];
-
-  let html = "";
-
-  // ---- HERO ----
-  html += \`<div class="hero">
-    <div class="card hero-main">
-      <div class="eyebrow">Total users</div>
-      <div class="hero-number">\${fmt(u.total_users)}</div>
-      <div class="hero-note">Distinct real people who installed or did meaningful work in jcode. Excludes CI runners and counts each anonymous machine id once. This is the headline number.</div>
-      <div class="hero-sub">
-        <div><div class="k">Core (did work)</div><div class="v">\${fmt(u.core_users)}</div></div>
-        <div><div class="k">Installed</div><div class="v">\${fmt(u.installed_users)}</div></div>
-        <div><div class="k">Reached (ran it)</div><div class="v">\${fmt(u.reached_users)}</div></div>
-      </div>
-    </div>
-    <div class="card hero-side">
-      <h3>Active users (distinct, headline definition)</h3>
-      <div class="grid hero-kpis" style="gap:10px">
-        \${kpi("DAU", fmt(a.dau), "today, meaningful + release", 1, true)}
-        \${kpi("WAU", fmt(a.wau), "last 7 days", 1, true)}
-        \${kpi("MAU", fmt(a.mau), "last 30 days", 1, true)}
-      </div>
-      <div style="margin-top:12px">\${lineChart(headlineSeries, {})}</div>
-    </div>
-  </div>\`;
-
-  // ---- Why these differ (transparency band) ----
-  html += \`<div class="section-title"><h2>How the user number is built</h2><span class="hint">each tier is broader than the one below it; nothing is dropped</span></div>\`;
-  html += \`<div class="grid g4">
-    \${kpi("Reached", fmt(u.reached_users), "ran jcode at least once (non-CI)", 2)}
-    \${kpi("Total users", fmt(u.total_users), "installed OR did meaningful work", 1, true)}
-    \${kpi("Core users", fmt(u.core_users), "did meaningful work", 2)}
-    \${kpi("CI ids (excluded)", fmt(u.ci_ids), "ephemeral runners, filtered out", 3)}
-  </div>
-  <div class="grid g2" style="margin-top:14px">
-    \${kpi("All ids incl. CI + dev", fmt(u.all_ids_including_ci), "raw upper bound, never used as headline", 3)}
-    \${kpi("Installed users", fmt(u.installed_users), "distinct non-CI install events", 2)}
-  </div>\`;
-
-  // ---- Acquisition & retention ----
-  html += \`<div class="section-title"><h2>Acquisition &amp; retention</h2><span class="hint">important: are new users sticking?</span></div>\`;
-  html += \`<div class="grid g4">
-    \${kpi("Install events", fmt(lc.install_events), fmt(lc.install_ids_noci)+" distinct (non-CI)", 2)}
-    \${kpi("Upgrades", fmt(lc.upgrade_events), "version bumps observed", 3)}
-    \${kpi("D7 retention", pct(ret.d7_retention), (ret.d7_retained||0)+" of "+(ret.d7_cohort||0)+" returned", 1, true)}
-    \${kpi("Multi-session rate", pct(q.multi_session_rate), "users running >1 session at once", 3)}
-  </div>\`;
-  html += \`<div class="grid g2" style="margin-top:14px">
-    <div class="card panel"><h3>Daily active users (60d)</h3><p class="desc">headline = meaningful work on a release build, excluding CI. raw = anyone who launched.</p>\${lineChart(headlineSeries, {})}</div>
-    <div class="card panel"><h3>New installs / day (60d, non-CI)</h3><p class="desc">distinct ids whose first install event landed that day.</p>\${lineChart([{name:"installs",color:"#3fb950",points:(d.timeseries.installs||[]).map(r=>({date:r.date,value:r.installs}))}], {})}</div>
-  </div>\`;
-
-  // ---- Engagement quality ----
-  html += \`<div class="section-title"><h2>Engagement quality</h2><span class="hint">30-day, non-CI sessions</span></div>\`;
-  html += \`<div class="grid g4">
-    \${kpi("Avg session length", dec(q.avg_session_mins,1)+" min", "per meaningful session", 2)}
-    \${kpi("Avg turns / session", dec(q.avg_turns,1), "user prompts per session", 2)}
-    \${kpi("Session success rate", pct(q.success_rate), "ended in a successful state", 1, true)}
-    \${kpi("Abandon rate", pct(q.abandon_rate), "left before first response", 2)}
-  </div>
-  <div class="grid g4" style="margin-top:14px">
-    \${kpi("Turn success rate", pct(d.turns.turn_success_rate), "per-turn, 30d", 2)}
-    \${kpi("Avg turn time", ms(d.turns.avg_turn_ms), "active duration per turn", 3)}
-    \${kpi("Time to first response", ms(q.avg_first_response_ms), "agent responsiveness", 2)}
-    \${kpi("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call", 3)}
-  </div>
-  <div class="grid g2" style="margin-top:14px">
-    \${kpi("Tokens (30d)", fmt(q.tokens_30d), "input + output across sessions", 3)}
-    \${kpi("Crash rate", pct(lc.crash_rate)+"  ·  completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "session_crash share / (ends+crashes)/starts", 1, true)}
-  </div>\`;
-
-  // ---- Reliability / errors ----
-  const e = d.errors;
-  html += \`<div class="section-title"><h2>Reliability</h2><span class="hint">error counts, 30d non-CI — watch for spikes</span></div>\`;
-  html += \`<div class="grid g4">
-    \${kpi("Provider timeouts", fmt(e.provider_timeout), "", (e.provider_timeout>0?1:3))}
-    \${kpi("Rate limited", fmt(e.rate_limited), "", (e.rate_limited>0?2:3))}
-    \${kpi("Auth failures", fmt(e.auth_failed), "", (e.auth_failed>0?1:3))}
-    \${kpi("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp", 3)}
-  </div>\`;
-
-  // ---- Breakdowns ----
-  const b = d.breakdowns;
-  const rows = (arr, k) => (arr||[]).map(r => ({ label: r[k] ?? "unknown", value: r.users }));
-  html += \`<div class="section-title"><h2>Who &amp; what</h2><span class="hint">distinct users per bucket</span></div>\`;
-  html += \`<div class="grid g2">
-    \${barTable("Versions", "adoption by release (non-CI users)", rows(b.versions,"version"), "version", "users")}
-    \${barTable("Operating system", "OS split", rows(b.os,"os"), "os", "users")}
-  </div>
-  <div class="grid g2" style="margin-top:14px">
-    \${barTable("Providers", "meaningful sessions by provider", rows(b.providers,"provider"), "provider", "users")}
-    \${barTable("Auth method", "successful auth by provider", rows(b.auth,"auth_provider"), "provider", "users")}
-  </div>
-  <div class="grid g2" style="margin-top:14px">
-    \${barTable("Build channel", "incl. dev/local; release is the headline channel", rows(b.channels,"build_channel"), "channel", "users")}
-    \${barTable("Onboarding funnel", "distinct users reaching each step", rows(b.onboarding,"step"), "step", "users")}
-  </div>\`;
-
-  // ---- Feature adoption ----
-  const f = d.features;
-  const featRows = Object.entries(f||{}).map(([k,v]) => ({label:k.replace(/_/g,' '), value:v})).sort((a,b)=>b.value-a.value);
-  html += \`<div class="section-title"><h2>Feature adoption</h2><span class="hint">distinct users using each feature, 30d</span></div>\`;
-  html += \`<div class="grid g2">
-    \${barTable("Features", "how many users touched each capability", featRows, "feature", "users")}
-    \${transportPanel(d.transport)}
-  </div>\`;
-
-  // ---- Feedback ----
-  if ((d.feedback||[]).length) {
-    html += \`<div class="section-title"><h2>Recent feedback</h2><span class="hint">explicit user submissions</span></div>\`;
-    html += \`<div class="card panel">\` + d.feedback.map(fb => \`
-      <div class="feedback-item">
-        <div class="q">\${esc(fb.feedback_text)}</div>
-        <div class="m">\${esc(new Date(fb.created_at+'Z').toLocaleString())} · v\${esc(fb.version||'?')}\${fb.feedback_rating?' · '+esc(fb.feedback_rating):''}\${fb.feedback_reason?' · '+esc(fb.feedback_reason):''}</div>
-      </div>\`).join("") + \`</div>\`;
+  let H="";
+
+  // HERO
+  H+='<div class="hero reveal">'
+    + '<div class="hero-main">'
+      + '<div class="label">total users · headline</div>'
+      + '<div class="big">'+fmt(u.total_users)+'<span class="unit">people</span></div>'
+      + '<div class="hero-desc">Distinct real people who installed jcode or did meaningful work in it. CI runners excluded; each anonymous machine id counts once.</div>'
+      + '<div class="ladder">'
+        + '<div class="rung"><span class="lk"><b>Reached</b> · launched it at least once</span><span class="lv dim">'+fmt(u.reached_users)+'</span></div>'
+        + '<div class="rung"><span class="lk"><b>Total users</b> · installed OR did work</span><span class="lv amber">'+fmt(u.total_users)+'</span></div>'
+        + '<div class="rung"><span class="lk"><b>Core</b> · did meaningful work</span><span class="lv cyan">'+fmt(u.core_users)+'</span></div>'
+        + '<div class="rung"><span class="lk"><b>Installed</b> · distinct install events</span><span class="lv dim">'+fmt(u.installed_users)+'</span></div>'
+      + '</div>'
+    + '</div>'
+    + '<div class="hero-side">'
+      + '<h3>active users · distinct, headline definition</h3>'
+      + '<div class="triple">'
+        + '<div class="t"><div class="tn">'+fmt(a.dau)+'</div><div class="tl">DAU</div><div class="tsub">today</div></div>'
+        + '<div class="t"><div class="tn">'+fmt(a.wau)+'</div><div class="tl">WAU</div><div class="tsub">7 days</div></div>'
+        + '<div class="t"><div class="tn">'+fmt(a.mau)+'</div><div class="tl">MAU</div><div class="tsub">30 days</div></div>'
+      + '</div>'
+      + '<div style="margin-top:16px;flex:1">'+lineChart(series)+'</div>'
+    + '</div>'
+  + '</div>';
+
+  // 01 USER COMPOSITION
+  H+=sec("01","user composition","each tier broader than the one below · nothing dropped");
+  H+='<div class="grid g4">'
+    + stat("Reached", fmt(u.reached_users), "ran jcode ≥1 time (non-CI)")
+    + stat("Total users", fmt(u.total_users), "installed OR did work", {key:true})
+    + stat("Core users", fmt(u.core_users), "did meaningful work")
+    + stat("Installed", fmt(u.installed_users), "distinct install events")
+  + '</div>';
+  H+='<div class="grid g3" style="margin-top:14px">'
+    + stat("CI ids · excluded", fmt(u.ci_ids), "ephemeral runners, filtered")
+    + stat("All ids incl. CI + dev", fmt(u.all_ids_including_ci), "raw upper bound, never headline")
+    + stat("Install events (raw)", fmt(lc.install_events), fmt(lc.install_ids_noci)+" distinct non-CI")
+  + '</div>';
+
+  // 02 ACQUISITION & RETENTION
+  H+=sec("02","acquisition & retention","are new users sticking?");
+  H+='<div class="grid g4">'
+    + stat("D7 retention", pct(ret.d7_retention), (ret.d7_retained||0)+" of "+(ret.d7_cohort||0)+" returned", {key:true})
+    + stat("Upgrades", fmt(lc.upgrade_events), "version bumps observed")
+    + stat("Multi-session rate", pct(q.multi_session_rate), ">1 session at once")
+    + stat("Meaningful sessions 30d", fmt(q.meaningful_sessions_30d), "real-work sessions")
+  + '</div>';
+  H+='<div class="grid g2" style="margin-top:14px">'
+    + '<div class="panel reveal"><h3>daily active users · 60d</h3><p class="pd">headline = meaningful work on release, ex-CI · raw = anyone who launched</p>'+lineChart(series)+'</div>'
+    + '<div class="panel reveal"><h3>new installs / day · 60d non-CI</h3><p class="pd">distinct ids whose install landed that day</p>'+lineChart([{name:"installs",color:"#5ad27a",points:(d.timeseries.installs||[]).map(r=>({date:r.date,value:r.installs}))}])+'</div>'
+  + '</div>';
+
+  // 03 ENGAGEMENT
+  H+=sec("03","engagement quality","30-day · non-CI sessions");
+  H+='<div class="grid g4">'
+    + stat("Session success", pct(q.success_rate), "ended in success state", {key:true})
+    + stat("Avg session", dec(q.avg_session_mins)+" min", "per meaningful session")
+    + stat("Avg turns / session", dec(q.avg_turns), "user prompts / session")
+    + stat("Abandon rate", pct(q.abandon_rate), "left before first response")
+  + '</div>';
+  H+='<div class="grid g4" style="margin-top:14px">'
+    + stat("Turn success", pct(d.turns.turn_success_rate), "per-turn, 30d")
+    + stat("Avg turn time", ms(d.turns.avg_turn_ms), "active duration / turn")
+    + stat("Time to first response", ms(q.avg_first_response_ms), "agent responsiveness")
+    + stat("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call")
+  + '</div>';
+  H+='<div class="grid g2" style="margin-top:14px">'
+    + stat("Tokens · 30d", fmt(q.tokens_30d), "input + output across sessions")
+    + stat("Crash rate", pct(lc.crash_rate)+" · completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "crash share · (ends+crashes)/starts", {key:true})
+  + '</div>';
+
+  // 04 RELIABILITY
+  const anyErr = (e.provider_timeout||0)+(e.auth_failed||0)+(e.rate_limited||0) > 0;
+  H+=sec("04","reliability","error counts · 30d non-CI · watch for spikes");
+  H+='<div class="grid g4">'
+    + stat("Provider timeouts", fmt(e.provider_timeout), "", {alert:(e.provider_timeout||0)>0})
+    + stat("Rate limited", fmt(e.rate_limited), "")
+    + stat("Auth failures", fmt(e.auth_failed), "", {alert:(e.auth_failed||0)>0})
+    + stat("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp")
+  + '</div>';
+
+  // 05 WHO & WHAT
+  H+=sec("05","who & what","distinct users per bucket");
+  H+='<div class="grid g2">'
+    + tablePanel("Versions","adoption by release (non-CI)", rows(b.versions,"version"), "version","users")
+    + tablePanel("Platform","os / arch split", rows(b.arch,"platform"), "platform","users")
+  + '</div>';
+  H+='<div class="grid g2" style="margin-top:14px">'
+    + tablePanel("Providers","meaningful sessions by provider", rows(b.providers,"provider"), "provider","users")
+    + tablePanel("Auth method","successful auth by provider", rows(b.auth,"auth_provider"), "provider","users")
+  + '</div>';
+  H+='<div class="grid g2" style="margin-top:14px">'
+    + tablePanel("Build channel","incl. dev/local · release is headline channel", rows(b.channels,"build_channel"), "channel","users")
+    + tablePanel("Onboarding funnel","distinct users reaching each step", rows(b.onboarding,"step"), "step","users")
+  + '</div>';
+  // usage-by-hour bar chart
+  const hourData = Array.from({length:24},(_,i)=>{const m=(b.hours||[]).find(r=>Number(r.hour)===i); return {v:m?m.sessions:0, hr:i};});
+  H+='<div class="grid g2" style="margin-top:14px">'
+    + barsChart("Session starts by UTC hour","when sessions begin (non-CI)", hourData, (d)=>String(d.hr).padStart(2,'0'), "#ffb454")
+    + tablePanel("Operating system","os split (non-CI)", rows(b.os,"os"), "os","users")
+  + '</div>';
+
+  // 06 FEATURE ADOPTION
+  const fr = Object.entries(d.features||{}).map(([k,v])=>({label:k.replace(/_/g," "),value:v})).sort((a,b)=>b.value-a.value);
+  const tr = [["https",d.transport.https],["ws reuse",d.transport.ws_reuse],["ws fresh",d.transport.ws_fresh],["native http2",d.transport.native_http2],["cli subprocess",d.transport.cli],["other",d.transport.other]].map(([label,value])=>({label,value:value||0})).sort((a,b)=>b.value-a.value);
+  H+=sec("06","feature adoption","distinct users per capability · 30d");
+  H+='<div class="grid g2">'
+    + tablePanel("Features","users who touched each capability", fr, "feature","users")
+    + tablePanel("Transport mix","request transport counts (30d non-CI)", tr, "transport","count")
+  + '</div>';
+
+  // 07 DATA HEALTH (diagnostic)
+  H+=sec("07","pipeline health","diagnostic · not product metrics · watch for drift");
+  H+='<div class="grid g4">'
+    + stat("Lifecycle ids", fmt(h.lifecycle_ids), "distinct ids w/ end/crash")
+    + stat("Session-start ids", fmt(h.session_start_ids), "distinct ids that launched")
+    + stat("Ends without install", fmt(h.lifecycle_ids_without_install), "id mismatch / pre-install loss", {alert:(h.lifecycle_ids_without_install||0) > (h.lifecycle_ids||0)*0.5})
+    + stat("Heaviest single id", fmt(h.max_session_events_one_id), "max session events for one id")
+  + '</div>';
+  H+='<div class="grid g3" style="margin-top:14px">'
+    + stat("Top-5 id session events", fmt(h.top5_session_events), "of "+fmt(h.total_session_events)+" total")
+    + stat("Total session events", fmt(h.total_session_events), "ends + crashes, all time")
+    + stat("CI ids (30d window)", fmt(a.ci_mau), "filtered from headline")
+  + '</div>';
+
+  // 08 FEEDBACK
+  if((d.feedback||[]).length){
+    H+=sec("08","recent feedback","explicit user submissions");
+    H+='<div class="panel reveal">'+d.feedback.map(fb=>'<div class="fb"><div class="q">'+esc(fb.feedback_text)+'</div><div class="m">'+esc(new Date((fb.created_at||"").replace(" ","T")+"Z").toLocaleString())+' · v'+esc(fb.version||"?")+(fb.feedback_rating?' · <span class="badge">'+esc(fb.feedback_rating)+'</span>':'')+(fb.feedback_reason?' · '+esc(fb.feedback_reason):'')+'</div></div>').join('')+'</div>';
   }
 
-  c.innerHTML = html;
+  c.innerHTML=H;
 }
 
-function transportPanel(t) {
-  const rows = [
-    ["https", t.https], ["ws fresh", t.ws_fresh], ["ws reuse", t.ws_reuse],
-    ["cli subprocess", t.cli], ["native http2", t.native_http2], ["other", t.other],
-  ].map(([label,value]) => ({label, value: value||0})).sort((a,b)=>b.value-a.value);
-  return barTable("Transport mix", "request transport counts (30d non-CI)", rows, "transport", "count");
-}
-
-// events
-document.getElementById("unlock").addEventListener("click", () => {
-  const v = document.getElementById("token").value.trim();
-  if (!v) { document.getElementById("gate-err").textContent = "Enter a token."; return; }
-  TOKEN = v; localStorage.setItem("jcode_dash_token", v); load();
-});
-document.getElementById("token") && document.getElementById("token").addEventListener("keydown", (e)=>{ if(e.key==="Enter") document.getElementById("unlock").click(); });
-document.getElementById("refresh").addEventListener("click", load);
-document.getElementById("logout").addEventListener("click", () => { localStorage.removeItem("jcode_dash_token"); TOKEN=""; showGate(""); });
-
+document.getElementById("unlock").addEventListener("click",()=>{const v=document.getElementById("token").value.trim();if(!v){document.getElementById("gate-err").textContent="enter a token";return;}TOKEN=v;localStorage.setItem("jcode_dash_token",v);load();});
+document.getElementById("token").addEventListener("keydown",e=>{if(e.key==="Enter")document.getElementById("unlock").click();});
+document.getElementById("refresh").addEventListener("click",load);
+document.getElementById("logout").addEventListener("click",()=>{localStorage.removeItem("jcode_dash_token");TOKEN="";showGate("");});
 load();
 </script>
 </body>
diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js
index 9ee9d0881..6f94edb5f 100644
--- a/telemetry-worker/src/stats.js
+++ b/telemetry-worker/src/stats.js
@@ -195,6 +195,11 @@ export async function getStats(env) {
     FROM events WHERE is_ci = 0 AND os IS NOT NULL
     GROUP BY os ORDER BY users DESC
   `);
+  const arch = await many(env, `
+    SELECT (COALESCE(os,'?') || ' / ' || COALESCE(arch,'?')) AS platform, COUNT(DISTINCT telemetry_id) AS users
+    FROM events WHERE is_ci = 0 AND os IS NOT NULL
+    GROUP BY os, arch ORDER BY users DESC LIMIT 12
+  `);
   const channels = await many(env, `
     SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users
     FROM events WHERE event IN ('session_end','session_crash')
@@ -216,6 +221,48 @@ export async function getStats(env) {
     GROUP BY step ORDER BY users DESC
   `);
 
+  // --- Usage timing: session starts by UTC hour ---------------------------
+  const hours = await many(env, `
+    SELECT session_start_hour_utc AS hour, COUNT(*) AS sessions
+    FROM events
+    WHERE event = 'session_start' AND is_ci = 0 AND session_start_hour_utc IS NOT NULL
+    GROUP BY session_start_hour_utc ORDER BY session_start_hour_utc
+  `);
+
+  // --- Data health: identity reconciliation + duplicate/skew signals ------
+  // These are *not* product metrics; they tell you whether the pipeline is
+  // healthy (events arriving, ids matching installs, no single id dominating).
+  const health = await one(env, `
+    WITH lifecycle AS (
+      SELECT telemetry_id FROM events WHERE event IN ('session_end','session_crash')
+    ), install_ids AS (
+      SELECT DISTINCT telemetry_id FROM events WHERE event = 'install'
+    )
+    SELECT
+      (SELECT COUNT(DISTINCT telemetry_id) FROM lifecycle) AS lifecycle_ids,
+      (SELECT COUNT(DISTINCT telemetry_id) FROM events WHERE event = 'session_start') AS session_start_ids,
+      (SELECT COUNT(DISTINCT l.telemetry_id) FROM lifecycle l
+         LEFT JOIN install_ids i ON i.telemetry_id = l.telemetry_id
+         WHERE i.telemetry_id IS NULL) AS lifecycle_ids_without_install
+  `);
+  const skew = await one(env, `
+    SELECT
+      MAX(c) AS max_session_events_one_id,
+      SUM(c) AS total_session_events,
+      (SELECT SUM(c2) FROM (SELECT c AS c2 FROM (
+         SELECT telemetry_id, COUNT(*) AS c FROM events
+         WHERE event IN ('session_end','session_crash')
+         GROUP BY telemetry_id ORDER BY c DESC LIMIT 5))) AS top5_session_events
+    FROM (SELECT telemetry_id, COUNT(*) AS c FROM events
+          WHERE event IN ('session_end','session_crash') GROUP BY telemetry_id)
+  `);
+  const meaningfulSessions = await one(env, `
+    SELECT COUNT(*) AS meaningful_sessions
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+      AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL}
+  `);
+
   // --- Daily timeseries (last 60 days) for charts -------------------------
   const daily = await many(env, `
     SELECT
@@ -262,12 +309,13 @@ export async function getStats(env) {
     active,
     lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate },
     retention: { ...retention, d7_retention: d7Retention },
-    quality,
+    quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 },
     turns,
     errors,
     features,
     transport,
-    breakdowns: { versions, os, channels, providers, auth, onboarding },
+    breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours },
+    health: { ...health, ...skew },
     timeseries: { daily, installs: dailyInstalls },
     feedback,
   };

From 5bfd642a76579934cbeef7eb3f76502f0822ba33 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:35:59 -0700
Subject: [PATCH 05/57] feat(skills): endorse NVIDIA CUDA-X skills from
 official NVIDIA/skills catalog

Add NVIDIA's CUDA-X / GPU accelerated-computing agent skills (cuOpt,
cuPyNumeric, cuDF, CUDA-Q, and the cuTile/TileGym GPU-dev skill) to the
endorsed-skills list, sourced from the official NVIDIA-verified catalog
at github.com/NVIDIA/skills.

- EndorsedSkill gains category + optional install hint fields.
- /skills now groups endorsed skills by category with per-category
  installed counts and shows the 'npx skills add nvidia/skills' install
  command for missing skills, plus the catalog URL.
- Tests cover the new fields and the NVIDIA catalog entries.
---
 crates/jcode-base/src/skill.rs           | 189 +++++++++++++++++++++++
 crates/jcode-tui/src/tui/app/state_ui.rs |  47 +++++-
 crates/jcode-tui/src/tui/app/tests.rs    |  14 ++
 3 files changed, 242 insertions(+), 8 deletions(-)

diff --git a/crates/jcode-base/src/skill.rs b/crates/jcode-base/src/skill.rs
index a5bf73cae..3dcd38208 100644
--- a/crates/jcode-base/src/skill.rs
+++ b/crates/jcode-base/src/skill.rs
@@ -429,28 +429,179 @@ pub struct EndorsedSkill {
     pub name: &'static str,
     /// One-line description of what the skill does.
     pub description: &'static str,
+    /// Grouping label used to organize the endorsed list (e.g. "jcode",
+    /// "NVIDIA CUDA-X").
+    pub category: &'static str,
     /// Where users can get the skill (repo path, URL, or short note).
     pub source: &'static str,
+    /// Optional install command/hint shown when the skill is not installed.
+    pub install: Option<&'static str>,
 }
 
 /// Curated list of skills endorsed by jcode. Used by the `/skills` command to
 /// show users which recommended skills they have installed and which they are
 /// missing. This is the single source of truth for endorsed skills.
+///
+/// The NVIDIA CUDA-X entries mirror the official NVIDIA-verified catalog at
+/// <https://github.com/NVIDIA/skills>; install them with
+/// `npx skills add nvidia/skills --skill <name> --yes`.
 pub const ENDORSED_SKILLS: &[EndorsedSkill] = &[
     EndorsedSkill {
         name: "optimization",
         description: "Improve performance, latency, throughput, memory usage, or general efficiency by defining metrics, measuring, attributing bottlenecks, and prioritizing macro-optimizations.",
+        category: "jcode",
         source: "bundled in jcode repo (.jcode/skills/optimization)",
+        install: None,
     },
     EndorsedSkill {
         name: "todo-planning-skill",
         description: "Create thorough, well-structured todo lists for long tasks, including reflection, static analysis, verification, and next-step updates.",
+        category: "jcode",
         source: "bundled with jcode / Claude Code skills",
+        install: None,
     },
     EndorsedSkill {
         name: "firefox-browser",
         description: "Control the user's Firefox browser with their logins and cookies intact to browse, fill forms, click, screenshot, and read authenticated pages.",
+        category: "jcode",
         source: "bundled with jcode / Claude Code skills",
+        install: None,
+    },
+    // NVIDIA CUDA-X / GPU accelerated-computing skills from the official
+    // NVIDIA-verified catalog (github.com/NVIDIA/skills).
+    EndorsedSkill {
+        name: "cuopt-developer",
+        description: "Modify, build, test, debug, and contribute to NVIDIA cuOpt (C++/CUDA, Python, server, CI) — solver internals, PRs, DCO, and code conventions.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-developer --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-install",
+        description: "Install NVIDIA cuOpt for Python, C, or server via pip, conda, or Docker, and verify the install.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-install --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-numerical-optimization-api-c",
+        description: "Solve LP, MILP, and QP (beta) with the cuOpt C API for embedding optimization in C/C++.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some(
+            "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-c --yes",
+        ),
+    },
+    EndorsedSkill {
+        name: "cuopt-numerical-optimization-api-cli",
+        description: "Solve LP, MILP, and QP (beta) with cuOpt from MPS files via the cuopt_cli command line.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some(
+            "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-cli --yes",
+        ),
+    },
+    EndorsedSkill {
+        name: "cuopt-numerical-optimization-api-python",
+        description: "Solve LP, MILP, and QP (beta) with the cuOpt Python API — linear/quadratic objectives, integer variables, scheduling, portfolio, and least squares.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some(
+            "npx skills add nvidia/skills --skill cuopt-numerical-optimization-api-python --yes",
+        ),
+    },
+    EndorsedSkill {
+        name: "cuopt-numerical-optimization-formulation",
+        description: "LP, MILP, and QP concepts and formulation patterns (parameters, constraints, decisions, objective). Concepts only; no API.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some(
+            "npx skills add nvidia/skills --skill cuopt-numerical-optimization-formulation --yes",
+        ),
+    },
+    EndorsedSkill {
+        name: "cuopt-routing-api-python",
+        description: "Solve vehicle routing (VRP, TSP, PDP) with the cuOpt Python API.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-routing-api-python --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-routing-formulation",
+        description: "Vehicle routing (VRP, TSP, PDP) problem types and data requirements. Domain concepts; no API or interface.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-routing-formulation --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-server-api-python",
+        description: "Run the cuOpt REST server — start it, call endpoints, and use Python/curl client examples.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-server-api-python --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-server-common",
+        description: "Understand what the cuOpt REST server does and how requests flow. Concepts only; no deploy or client code.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-server-common --yes"),
+    },
+    EndorsedSkill {
+        name: "cuopt-user-rules",
+        description: "Base rules for end users calling NVIDIA cuOpt (routing/LP/MILP/QP/install/server).",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cuopt-user-rules --yes"),
+    },
+    EndorsedSkill {
+        name: "cupynumeric-install",
+        description: "Install and verify NVIDIA cuPyNumeric (NumPy/SciPy on multi-node multi-GPU) for Python — requirements, commands, and verification.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cupynumeric-install --yes"),
+    },
+    EndorsedSkill {
+        name: "cupynumeric-migration-readiness",
+        description: "Assess NumPy code before porting to cuPyNumeric — which patterns scale on GPU, what must be refactored, and a READY/REFACTOR/NOT-RECOMMENDED verdict.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cupynumeric-migration-readiness --yes"),
+    },
+    EndorsedSkill {
+        name: "cupynumeric-hdf5",
+        description: "Read and write large cuPyNumeric arrays to HDF5 with Legate's parallel, distributed HDF5 I/O (legate.io.hdf5), including GPUDirect Storage.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cupynumeric-hdf5 --yes"),
+    },
+    EndorsedSkill {
+        name: "cupynumeric-parallel-data-load",
+        description: "Load sharded on-disk datasets (.npy, Parquet/Arrow, raw binary, sharded HDF5) into a distributed cuPyNumeric ndarray via manual partition + leaf task launch.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cupynumeric-parallel-data-load --yes"),
+    },
+    EndorsedSkill {
+        name: "accelerated-computing-cudf",
+        description: "Official NVIDIA guidance for cuDF GPU DataFrames, pandas acceleration, dask-cuDF, ETL, joins, groupby, CSV/Parquet I/O, and multi-GPU DataFrame workloads.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill accelerated-computing-cudf --yes"),
+    },
+    EndorsedSkill {
+        name: "cudaq-guide",
+        description: "NVIDIA CUDA-Q (CUDA Quantum) onboarding guide for installation, test programs, GPU simulation, QPU hardware, and quantum applications.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill cudaq-guide --yes"),
+    },
+    EndorsedSkill {
+        name: "tilegym-adding-cutile-kernel",
+        description: "Add a new cuTile GPU kernel operator to NVIDIA TileGym — dispatch registration, cuTile backend implementation, exports, tests, and benchmarks.",
+        category: "NVIDIA CUDA-X",
+        source: "NVIDIA/skills (official NVIDIA-verified catalog)",
+        install: Some("npx skills add nvidia/skills --skill tilegym-adding-cutile-kernel --yes"),
     },
 ];
 
@@ -611,6 +762,11 @@ mod tests {
                 "endorsed skill {} needs a description",
                 skill.name
             );
+            assert!(
+                !skill.category.is_empty(),
+                "endorsed skill {} needs a category",
+                skill.name
+            );
             assert!(
                 !skill.source.is_empty(),
                 "endorsed skill {} needs a source",
@@ -620,6 +776,13 @@ mod tests {
                 !skill.name.starts_with('/'),
                 "endorsed skill name should not include the leading slash"
             );
+            if let Some(install) = skill.install {
+                assert!(
+                    install.contains(skill.name),
+                    "endorsed skill {} install hint should reference its name",
+                    skill.name
+                );
+            }
             assert!(
                 seen.insert(skill.name),
                 "duplicate endorsed skill name: {}",
@@ -628,6 +791,32 @@ mod tests {
         }
     }
 
+    #[test]
+    fn endorsed_skills_include_nvidia_cuda_x_catalog() {
+        let endorsed = endorsed_skills();
+        // Spot-check representative NVIDIA CUDA-X skills sourced from the
+        // official NVIDIA/skills catalog.
+        for expected in [
+            "cuopt-numerical-optimization-api-python",
+            "cupynumeric-install",
+            "accelerated-computing-cudf",
+            "cudaq-guide",
+            "tilegym-adding-cutile-kernel",
+        ] {
+            let skill = endorsed
+                .iter()
+                .find(|s| s.name == expected)
+                .unwrap_or_else(|| panic!("expected endorsed NVIDIA skill {expected}"));
+            assert_eq!(skill.category, "NVIDIA CUDA-X");
+            assert!(
+                skill
+                    .install
+                    .is_some_and(|cmd| cmd.contains("nvidia/skills")),
+                "NVIDIA skill {expected} should have an nvidia/skills install hint"
+            );
+        }
+    }
+
     #[test]
     fn registry_contains_reports_loaded_skills() {
         let temp = tempfile::tempdir().expect("tempdir");
diff --git a/crates/jcode-tui/src/tui/app/state_ui.rs b/crates/jcode-tui/src/tui/app/state_ui.rs
index ed0c543e7..03ca7df41 100644
--- a/crates/jcode-tui/src/tui/app/state_ui.rs
+++ b/crates/jcode-tui/src/tui/app/state_ui.rs
@@ -1514,19 +1514,50 @@ fn build_skills_report(app: &App) -> String {
                 .collect()
         };
     out.push_str("\nEndorsed skills (recommended by jcode)\n");
+    // Group by category, preserving first-seen category order.
+    let mut category_order: Vec<&str> = Vec::new();
     for endorsed in crate::skill::endorsed_skills() {
-        let status = if installed.contains(endorsed.name) {
-            "installed"
-        } else {
-            "not installed"
-        };
-        out.push_str(&format!("- /{} [{}]\n", endorsed.name, status));
-        out.push_str(&format!("    {}\n", endorsed.description));
-        out.push_str(&format!("    source: {}\n", endorsed.source));
+        if !category_order.contains(&endorsed.category) {
+            category_order.push(endorsed.category);
+        }
+    }
+    for category in category_order {
+        let installed_in_category = crate::skill::endorsed_skills()
+            .iter()
+            .filter(|e| e.category == category && installed.contains(e.name))
+            .count();
+        let total_in_category = crate::skill::endorsed_skills()
+            .iter()
+            .filter(|e| e.category == category)
+            .count();
+        out.push_str(&format!(
+            "\n  {} ({}/{} installed)\n",
+            category, installed_in_category, total_in_category
+        ));
+        for endorsed in crate::skill::endorsed_skills()
+            .iter()
+            .filter(|e| e.category == category)
+        {
+            let is_installed = installed.contains(endorsed.name);
+            let status = if is_installed {
+                "installed"
+            } else {
+                "not installed"
+            };
+            out.push_str(&format!("  - /{} [{}]\n", endorsed.name, status));
+            out.push_str(&format!("      {}\n", endorsed.description));
+            out.push_str(&format!("      source: {}\n", endorsed.source));
+            if !is_installed && let Some(install) = endorsed.install {
+                out.push_str(&format!("      install: {}\n", install));
+            }
+        }
     }
 
     out.push_str("\nActivate a skill by typing its slash command (e.g. /optimization).\n");
     out.push_str("Manage skills with the skill_manage tool (list/load/read/reload).\n");
+    out.push_str(
+        "NVIDIA CUDA-X skills come from the official catalog at https://github.com/NVIDIA/skills.\n",
+    );
 
     out.trim_end().to_string()
 }
diff --git a/crates/jcode-tui/src/tui/app/tests.rs b/crates/jcode-tui/src/tui/app/tests.rs
index d0f524d29..2bc3232d3 100644
--- a/crates/jcode-tui/src/tui/app/tests.rs
+++ b/crates/jcode-tui/src/tui/app/tests.rs
@@ -430,6 +430,20 @@ fn skills_command_lists_loaded_and_endorsed_skills() {
         content.contains("[installed]") || content.contains("[not installed]"),
         "{content}"
     );
+    // NVIDIA CUDA-X skills are grouped under their own category with install hints.
+    assert!(content.contains("NVIDIA CUDA-X"), "{content}");
+    assert!(
+        content.contains("/cuopt-numerical-optimization-api-python"),
+        "{content}"
+    );
+    assert!(
+        content.contains("install: npx skills add nvidia/skills"),
+        "{content}"
+    );
+    assert!(
+        content.contains("https://github.com/NVIDIA/skills"),
+        "{content}"
+    );
     assert_eq!(
         app.display_messages().last().unwrap().title.as_deref(),
         Some("Skills")

From 7e86cfb85ac19e6353155e94190026af43810d33 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:40:09 -0700
Subject: [PATCH 06/57] test(reload): prove normal-user /update upgrades the
 daemon end-to-end

Adds an integration test that drives the REAL update-detection core
(newer_binary_available, the function behind server_has_update) and the
reload-target resolver after a normal (non-self-dev) /update channel swap.

Models a shipped user: shared-server tracking stable, daemon running the old
release, /update installs a newer release and advances stable/current/
shared-server. Asserts both that the old daemon reports an update and that it
reloads into the freshly installed release. This documents that normal users
are covered by advance_shared_server_if_tracking_stable + the cross-flavor
reload target.
---
 crates/jcode-app-core/src/server/util.rs | 78 +++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 1 deletion(-)

diff --git a/crates/jcode-app-core/src/server/util.rs b/crates/jcode-app-core/src/server/util.rs
index 7a2e09a58..1d6f09243 100644
--- a/crates/jcode-app-core/src/server/util.rs
+++ b/crates/jcode-app-core/src/server/util.rs
@@ -758,7 +758,7 @@ mod newest_reload_candidate_integration_tests {
     //! a temp `JCODE_HOME`. This reproduces the field "/update -> new client,
     //! stale server" state and proves the fix: a self-dev daemon now reloads into
     //! the freshly installed release instead of its old pinned binary.
-    use super::newest_reload_candidate;
+    use super::{canonicalize_or, newer_binary_available, newest_reload_candidate};
     use crate::build;
     use std::path::Path;
     use std::time::{Duration, SystemTime};
@@ -860,6 +860,82 @@ mod newest_reload_candidate_integration_tests {
             crate::env::remove_var("JCODE_HOME");
         }
     }
+
+    /// Re-implements `server_has_newer_binary`'s decision against an *injected*
+    /// running-daemon path + mtime, so a test can model "the daemon is still the
+    /// OLD binary" without spawning a real process. It scans the exact same
+    /// candidate set (both flavors) and uses the same `newer_binary_available`
+    /// core the production function uses.
+    fn daemon_reports_update(running: &Path, running_mtime: SystemTime) -> bool {
+        let running_canonical = canonicalize_or(running.to_path_buf());
+        let mut candidates = std::collections::HashSet::new();
+        for is_selfdev in [false, true] {
+            if let Some((candidate, _label)) = super::server_update_candidate(is_selfdev) {
+                candidates.insert(canonicalize_or(candidate));
+            }
+        }
+        let with_mtimes = candidates.into_iter().map(|candidate| {
+            let m = std::fs::metadata(&candidate)
+                .ok()
+                .and_then(|m| m.modified().ok());
+            (candidate, m)
+        });
+        newer_binary_available(
+            Some(running_mtime),
+            Some(running_canonical.as_path()),
+            with_mtimes,
+        )
+    }
+
+    /// The question that matters for shipped users: after a NORMAL (non-self-dev)
+    /// `/update`, does the long-lived daemon actually advertise + apply the
+    /// upgrade on reconnect?
+    ///
+    /// Models a normal install: `shared-server` was tracking `stable`, the daemon
+    /// is running the old release, and `/update` installs a newer release and
+    /// advances stable/current/shared-server. We then drive the REAL
+    /// update-detection core and reload-target resolver and assert both:
+    /// (1) the daemon reports `server_has_update = true`, and
+    /// (2) the binary it reloads into is the freshly installed release.
+    #[test]
+    fn normal_user_daemon_detects_and_targets_update_after_update() {
+        let _guard = crate::storage::lock_test_env();
+        let temp = tempfile::TempDir::new().expect("temp dir");
+        let prev_home = std::env::var_os("JCODE_HOME");
+        crate::env::set_var("JCODE_HOME", temp.path());
+
+        let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+        let old_release = "0.14.3";
+        let new_release = "0.15.0";
+        let old_path = install_versioned_binary(old_release, base);
+        install_versioned_binary(new_release, base + Duration::from_secs(60));
+
+        // Pre-update state: every channel on the old release (shared-server
+        // tracking stable). This is the steady state for a normal user.
+        build::update_stable_symlink(old_release).expect("stable old");
+        build::update_current_symlink(old_release).expect("current old");
+        build::update_shared_server_symlink(old_release).expect("shared old");
+
+        // `/update` installs the new release and advances the channels. Because
+        // shared-server was tracking stable, it advances too.
+        build::advance_shared_server_if_tracking_stable(new_release).expect("advance shared");
+        build::update_stable_symlink(new_release).expect("stable new");
+        build::update_current_symlink(new_release).expect("current new");
+
+        // (1) The daemon (still the OLD binary) must now SEE the update so it
+        // reports server_has_update = true to reconnecting clients.
+        assert!(
+            daemon_reports_update(&old_path, base),
+            "normal-user daemon should report a server update after /update advanced the channels"
+        );
+
+        // (2) The binary it reloads into must be the freshly installed release.
+        assert_eq!(
+            candidate_version_for(false).as_deref(),
+            Some(new_release),
+            "normal-user daemon should reload into the freshly installed release"
+        );
+    }
 }
 
 #[cfg(test)]

From 535dbdb03ab6023209c798ffca1e9ffd7ec75de9 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:45:18 -0700
Subject: [PATCH 07/57] telemetry dashboard: add user leaderboard, token usage,
 agent autonomy, full active tiers

Surface every remaining signal the original SQL surface (and the schema)
exposed but the dashboard had not yet shown:

- User leaderboard (sec 09): top 20 anonymous ids by lifecycle volume
  with sessions/turns/tokens/tool_calls, version and last-seen. CI and
  non-release ids are tagged and dimmed (the old 'Heavy telemetry IDs'
  query, made visual).
- Token usage (sec 04): full breakdown - input/output/cache_read/
  cache_creation/total, both 30d and all-time (was a single combined
  number).
- Agent autonomy (sec 05): spawned agents, subagent/swarm/background
  tasks + successes, user cancellations, and where agent time goes
  (active/model/tool/blocked/idle), time-to-first-action, avg max
  concurrency. These schema columns were never surfaced before.
- Active-user tiers: DAU/WAU/MAU now show meaningful + raw subvalues,
  not just the headline number.
- Engagement: added time-to-first-tool-success.

stats.js gains tokens, agent, and leaderboard queries (26 queries total,
all validated against the real schema via a seeded sqlite D1 shim).
dashboard.js renumbered to 11 sections with a new leaderboardPanel
renderer and CI/dev tag styling. Verified end-to-end in a real browser:
all sections, the leaderboard table, and both chart types render.
---
 telemetry-worker/src/dashboard.js | 100 +++++++++++++++++++++++++-----
 telemetry-worker/src/stats.js     |  67 ++++++++++++++++++++
 2 files changed, 151 insertions(+), 16 deletions(-)

diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js
index 62175243e..d5ec6c94e 100644
--- a/telemetry-worker/src/dashboard.js
+++ b/telemetry-worker/src/dashboard.js
@@ -143,6 +143,9 @@ export const DASHBOARD_HTML = `<!DOCTYPE html>
   .track { height: 6px; background: var(--panel-2); border-radius: 2px; overflow:hidden; }
   .fill { height: 100%; background: linear-gradient(90deg, var(--amber-dim), var(--amber)); border-radius: 2px; }
   tr:last-child td { border-bottom: none; }
+  .lb-ci { font-family: var(--mono); font-size: 9px; letter-spacing: 1px; color: var(--bg); background: var(--ink-faint); padding: 1px 5px; border-radius: 3px; margin-left: 6px; }
+  .lb-dev { font-family: var(--mono); font-size: 9px; letter-spacing: .5px; color: var(--amber); border: 1px solid rgba(255,180,84,.35); padding: 0 5px; border-radius: 3px; margin-left: 6px; }
+  tr.lb-dim td { opacity: .5; }
 
   .legend { display:flex; gap: 16px; align-items:center; font-family: var(--mono); font-size: 11px; color: var(--ink-dim); margin-bottom: 10px; flex-wrap: wrap; }
   .legend i { width: 14px; height: 3px; display:inline-block; margin-right: 6px; vertical-align: 3px; border-radius: 2px; }
@@ -270,6 +273,28 @@ function barsChart(title,desc,data,labelFn,color){
   return '<div class="panel reveal"><h3>'+esc(title)+'</h3><p class="pd">'+esc(desc)+'</p><svg viewBox="0 0 '+W+' '+H+'" width="100%" preserveAspectRatio="xMidYMid meet">'+bars+labels+'</svg></div>';
 }
 
+function leaderboardPanel(rows){
+  if(!rows.length) return '<div class="panel reveal"><p class="pd">no data</p></div>';
+  const body = rows.map((r,i)=>{
+    const ci = Number(r.is_ci)===1;
+    const chan = esc(r.build_channel||"?");
+    const last = r.last_seen ? new Date((r.last_seen||"").replace(" ","T")+"Z").toLocaleDateString() : "—";
+    const tag = ci ? '<span class="lb-ci">CI</span>' : (chan==="release" ? '' : '<span class="lb-dev">'+chan+'</span>');
+    return '<tr'+(ci?' class="lb-dim"':'')+'>'
+      + '<td class="v" style="color:var(--ink-faint)">'+(i+1)+'</td>'
+      + '<td class="k">'+esc(r.id_prefix)+'… '+tag+'</td>'
+      + '<td class="v">'+fmt(r.sessions)+'</td>'
+      + '<td class="v">'+fmt(r.turns)+'</td>'
+      + '<td class="v">'+fmt(r.tokens)+'</td>'
+      + '<td class="v">'+fmt(r.tool_calls)+'</td>'
+      + '<td class="k" style="color:var(--ink-faint)">v'+esc(r.version||"?")+'</td>'
+      + '<td class="k" style="color:var(--ink-faint)">'+esc(last)+'</td>'
+      + '</tr>';
+  }).join('');
+  return '<div class="panel reveal"><h3>Top anonymous ids</h3><p class="pd">ranked by lifecycle sessions · CI / non-release tagged · ids are hashed prefixes only</p>'
+    + '<table><thead><tr><th class="v">#</th><th>id</th><th class="v">sessions</th><th class="v">turns</th><th class="v">tokens</th><th class="v">tools</th><th>ver</th><th>last seen</th></tr></thead><tbody>'+body+'</tbody></table></div>';
+}
+
 function render(d){
   const dt = new Date(d.generated_at);
   document.getElementById("generated").textContent = dt.toISOString().slice(0,10)+" · "+dt.toLocaleTimeString()+" · UTC rollup";
@@ -299,9 +324,9 @@ function render(d){
     + '<div class="hero-side">'
       + '<h3>active users · distinct, headline definition</h3>'
       + '<div class="triple">'
-        + '<div class="t"><div class="tn">'+fmt(a.dau)+'</div><div class="tl">DAU</div><div class="tsub">today</div></div>'
-        + '<div class="t"><div class="tn">'+fmt(a.wau)+'</div><div class="tl">WAU</div><div class="tsub">7 days</div></div>'
-        + '<div class="t"><div class="tn">'+fmt(a.mau)+'</div><div class="tl">MAU</div><div class="tsub">30 days</div></div>'
+        + '<div class="t"><div class="tn">'+fmt(a.dau)+'</div><div class="tl">DAU</div><div class="tsub">'+fmt(a.dau_meaningful)+' mean · '+fmt(a.dau_raw)+' raw</div></div>'
+        + '<div class="t"><div class="tn">'+fmt(a.wau)+'</div><div class="tl">WAU</div><div class="tsub">'+fmt(a.wau_meaningful)+' mean · '+fmt(a.wau_raw)+' raw</div></div>'
+        + '<div class="t"><div class="tn">'+fmt(a.mau)+'</div><div class="tl">MAU</div><div class="tsub">'+fmt(a.mau_meaningful)+' mean · '+fmt(a.mau_raw)+' raw</div></div>'
       + '</div>'
       + '<div style="margin-top:16px;flex:1">'+lineChart(series)+'</div>'
     + '</div>'
@@ -346,16 +371,55 @@ function render(d){
     + stat("Turn success", pct(d.turns.turn_success_rate), "per-turn, 30d")
     + stat("Avg turn time", ms(d.turns.avg_turn_ms), "active duration / turn")
     + stat("Time to first response", ms(q.avg_first_response_ms), "agent responsiveness")
-    + stat("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call")
+    + stat("Time to first tool success", ms(q.avg_first_tool_success_ms), "first useful tool result")
   + '</div>';
   H+='<div class="grid g2" style="margin-top:14px">'
-    + stat("Tokens · 30d", fmt(q.tokens_30d), "input + output across sessions")
+    + stat("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call")
     + stat("Crash rate", pct(lc.crash_rate)+" · completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "crash share · (ends+crashes)/starts", {key:true})
   + '</div>';
 
-  // 04 RELIABILITY
-  const anyErr = (e.provider_timeout||0)+(e.auth_failed||0)+(e.rate_limited||0) > 0;
-  H+=sec("04","reliability","error counts · 30d non-CI · watch for spikes");
+  // 04 TOKEN USAGE
+  const tk = d.tokens||{};
+  H+=sec("04","token usage","model token volume · non-CI · cache-aware");
+  H+='<div class="grid g4">'
+    + stat("Total tokens · 30d", fmt(tk.total_30d), "all token types, last 30d", {key:true})
+    + stat("Input · 30d", fmt(tk.input_30d), "prompt tokens sent")
+    + stat("Output · 30d", fmt(tk.output_30d), "completion tokens")
+    + stat("Cache read · 30d", fmt(tk.cache_read_30d), "served from prompt cache")
+  + '</div>';
+  H+='<div class="grid g4" style="margin-top:14px">'
+    + stat("Cache creation · 30d", fmt(tk.cache_creation_30d), "tokens written to cache")
+    + stat("Total tokens · all-time", fmt(tk.total_all), "since telemetry began")
+    + stat("Input · all-time", fmt(tk.input_all), "")
+    + stat("Output · all-time", fmt(tk.output_all), "")
+  + '</div>';
+
+  // 05 AGENT AUTONOMY
+  const ag = d.agent||{};
+  const activeMs = ag.agent_active_ms||0, modelMs = ag.agent_model_ms||0, toolMs = ag.agent_tool_ms||0, idleMs = ag.session_idle_ms||0, blockedMs = ag.agent_blocked_ms||0;
+  const hrs = (x)=> x==null?"—":(x/3600000>=1?(x/3600000).toFixed(1)+"h":(x/60000).toFixed(0)+"m");
+  H+=sec("05","agent autonomy","30-day · spawning, delegation & where agent time goes");
+  H+='<div class="grid g4">'
+    + stat("Spawned agents", fmt(ag.spawned_agents), "sub-agents launched", {key:true})
+    + stat("Subagent tasks", fmt(ag.subagent_tasks), fmt(ag.subagent_success)+" succeeded")
+    + stat("Swarm tasks", fmt(ag.swarm_tasks), fmt(ag.swarm_success)+" succeeded")
+    + stat("Background tasks", fmt(ag.background_tasks), fmt(ag.background_completed)+" completed")
+  + '</div>';
+  H+='<div class="grid g4" style="margin-top:14px">'
+    + stat("User cancellations", fmt(ag.user_cancelled), "user interrupted the agent")
+    + stat("Agent active time", hrs(activeMs), "total working time, 30d")
+    + stat("Time in model", hrs(modelMs), "thinking / generating")
+    + stat("Time in tools", hrs(toolMs), "executing tool calls")
+  + '</div>';
+  H+='<div class="grid g4" style="margin-top:14px">'
+    + stat("Agent blocked time", hrs(blockedMs), "waiting on user / approvals")
+    + stat("Session idle time", hrs(idleMs), "no activity")
+    + stat("Time to first action", ms(ag.avg_time_to_first_action_ms), "agent's first move")
+    + stat("Avg max concurrency", dec(ag.avg_max_concurrent,1), "peak parallel sessions")
+  + '</div>';
+
+  // 06 RELIABILITY
+  H+=sec("06","reliability","error counts · 30d non-CI · watch for spikes");
   H+='<div class="grid g4">'
     + stat("Provider timeouts", fmt(e.provider_timeout), "", {alert:(e.provider_timeout||0)>0})
     + stat("Rate limited", fmt(e.rate_limited), "")
@@ -363,8 +427,8 @@ function render(d){
     + stat("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp")
   + '</div>';
 
-  // 05 WHO & WHAT
-  H+=sec("05","who & what","distinct users per bucket");
+  // 07 WHO & WHAT
+  H+=sec("07","who & what","distinct users per bucket");
   H+='<div class="grid g2">'
     + tablePanel("Versions","adoption by release (non-CI)", rows(b.versions,"version"), "version","users")
     + tablePanel("Platform","os / arch split", rows(b.arch,"platform"), "platform","users")
@@ -384,17 +448,21 @@ function render(d){
     + tablePanel("Operating system","os split (non-CI)", rows(b.os,"os"), "os","users")
   + '</div>';
 
-  // 06 FEATURE ADOPTION
+  // 08 FEATURE ADOPTION
   const fr = Object.entries(d.features||{}).map(([k,v])=>({label:k.replace(/_/g," "),value:v})).sort((a,b)=>b.value-a.value);
   const tr = [["https",d.transport.https],["ws reuse",d.transport.ws_reuse],["ws fresh",d.transport.ws_fresh],["native http2",d.transport.native_http2],["cli subprocess",d.transport.cli],["other",d.transport.other]].map(([label,value])=>({label,value:value||0})).sort((a,b)=>b.value-a.value);
-  H+=sec("06","feature adoption","distinct users per capability · 30d");
+  H+=sec("08","feature adoption","distinct users per capability · 30d");
   H+='<div class="grid g2">'
     + tablePanel("Features","users who touched each capability", fr, "feature","users")
     + tablePanel("Transport mix","request transport counts (30d non-CI)", tr, "transport","count")
   + '</div>';
 
-  // 07 DATA HEALTH (diagnostic)
-  H+=sec("07","pipeline health","diagnostic · not product metrics · watch for drift");
+  // 09 USER LEADERBOARD
+  H+=sec("09","user leaderboard","most active anonymous ids · by lifecycle volume");
+  H+=leaderboardPanel(d.leaderboard||[]);
+
+  // 10 PIPELINE HEALTH (diagnostic)
+  H+=sec("10","pipeline health","diagnostic · not product metrics · watch for drift");
   H+='<div class="grid g4">'
     + stat("Lifecycle ids", fmt(h.lifecycle_ids), "distinct ids w/ end/crash")
     + stat("Session-start ids", fmt(h.session_start_ids), "distinct ids that launched")
@@ -407,9 +475,9 @@ function render(d){
     + stat("CI ids (30d window)", fmt(a.ci_mau), "filtered from headline")
   + '</div>';
 
-  // 08 FEEDBACK
+  // 11 FEEDBACK
   if((d.feedback||[]).length){
-    H+=sec("08","recent feedback","explicit user submissions");
+    H+=sec("11","recent feedback","explicit user submissions");
     H+='<div class="panel reveal">'+d.feedback.map(fb=>'<div class="fb"><div class="q">'+esc(fb.feedback_text)+'</div><div class="m">'+esc(new Date((fb.created_at||"").replace(" ","T")+"Z").toLocaleString())+' · v'+esc(fb.version||"?")+(fb.feedback_rating?' · <span class="badge">'+esc(fb.feedback_rating)+'</span>':'')+(fb.feedback_reason?' · '+esc(fb.feedback_reason):'')+'</div></div>').join('')+'</div>';
   }
 
diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js
index 6f94edb5f..a359dded4 100644
--- a/telemetry-worker/src/stats.js
+++ b/telemetry-worker/src/stats.js
@@ -122,6 +122,7 @@ export async function getStats(env) {
       AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate,
       AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate,
       AVG(first_assistant_response_ms) AS avg_first_response_ms,
+      AVG(first_tool_success_ms) AS avg_first_tool_success_ms,
       AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms,
       SUM(input_tokens + output_tokens) AS tokens_30d,
       AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate
@@ -130,6 +131,47 @@ export async function getStats(env) {
       AND is_ci = 0 AND created_at > datetime('now','-30 days')
   `);
 
+  // --- Token usage (all-time + 30d, full breakdown incl. cache) -----------
+  const tokens = await one(env, `
+    SELECT
+      SUM(input_tokens) AS input_all,
+      SUM(output_tokens) AS output_all,
+      SUM(cache_read_input_tokens) AS cache_read_all,
+      SUM(cache_creation_input_tokens) AS cache_creation_all,
+      SUM(total_tokens) AS total_all,
+      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN input_tokens ELSE 0 END) AS input_30d,
+      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN output_tokens ELSE 0 END) AS output_30d,
+      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_read_input_tokens ELSE 0 END) AS cache_read_30d,
+      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_creation_input_tokens ELSE 0 END) AS cache_creation_30d,
+      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN total_tokens ELSE 0 END) AS total_30d
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+  `);
+
+  // --- Agent autonomy (30d): spawning, background/subagent/swarm, time split
+  const agent = await one(env, `
+    SELECT
+      SUM(spawned_agent_count) AS spawned_agents,
+      SUM(background_task_count) AS background_tasks,
+      SUM(background_task_completed_count) AS background_completed,
+      SUM(subagent_task_count) AS subagent_tasks,
+      SUM(subagent_success_count) AS subagent_success,
+      SUM(swarm_task_count) AS swarm_tasks,
+      SUM(swarm_success_count) AS swarm_success,
+      SUM(user_cancelled_count) AS user_cancelled,
+      SUM(agent_active_ms_total) AS agent_active_ms,
+      SUM(agent_model_ms_total) AS agent_model_ms,
+      SUM(agent_tool_ms_total) AS agent_tool_ms,
+      SUM(agent_blocked_ms_total) AS agent_blocked_ms,
+      SUM(session_idle_ms_total) AS session_idle_ms,
+      AVG(time_to_first_agent_action_ms) AS avg_time_to_first_action_ms,
+      AVG(time_to_first_useful_action_ms) AS avg_time_to_first_useful_ms,
+      AVG(CASE WHEN max_concurrent_sessions > 0 THEN max_concurrent_sessions END) AS avg_max_concurrent
+    FROM events
+    WHERE event IN ('session_end','session_crash') AND is_ci = 0
+      AND created_at > datetime('now','-30 days')
+  `);
+
   // --- Per-turn metrics (30d) ---------------------------------------------
   const turns = await one(env, `
     SELECT
@@ -263,6 +305,28 @@ export async function getStats(env) {
       AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL}
   `);
 
+  // --- User leaderboard: most active anonymous ids ------------------------
+  // Ranks by lifecycle (session_end + session_crash) volume. telemetry_id is
+  // anonymous, so we surface a short prefix only. Useful for spotting power
+  // users and dev/test skew. Includes whether the id is CI and its channel.
+  const leaderboard = await many(env, `
+    SELECT
+      substr(telemetry_id, 1, 8) AS id_prefix,
+      COUNT(*) AS sessions,
+      SUM(turns) AS turns,
+      SUM(input_tokens + output_tokens) AS tokens,
+      SUM(tool_calls) AS tool_calls,
+      MAX(is_ci) AS is_ci,
+      MAX(build_channel) AS build_channel,
+      MAX(version) AS version,
+      MAX(created_at) AS last_seen
+    FROM events
+    WHERE event IN ('session_end','session_crash')
+    GROUP BY telemetry_id
+    ORDER BY sessions DESC
+    LIMIT 20
+  `);
+
   // --- Daily timeseries (last 60 days) for charts -------------------------
   const daily = await many(env, `
     SELECT
@@ -310,11 +374,14 @@ export async function getStats(env) {
     lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate },
     retention: { ...retention, d7_retention: d7Retention },
     quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 },
+    tokens,
+    agent,
     turns,
     errors,
     features,
     transport,
     breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours },
+    leaderboard,
     health: { ...health, ...skew },
     timeseries: { daily, installs: dailyInstalls },
     feedback,

From 1feea6509db9e655429202836be8d7b739a77883 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:51:44 -0700
Subject: [PATCH 08/57] fix(gemini): tolerate generateContent candidate content
 without role/parts

Live multi-call provider-doctor against gemini-3.1-pro-high surfaced a real
decode abort: the Antigravity/Cloud Code generateContent response occasionally
omits `role` (and sometimes `parts`) on a candidate's `content`, but the
struct required `role`, so the whole turn failed with "missing field
`role`". The response-side role is never read, so default both fields rather
than aborting. Adds two decode regression tests.
---
 crates/jcode-provider-gemini/src/lib.rs | 57 +++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/crates/jcode-provider-gemini/src/lib.rs b/crates/jcode-provider-gemini/src/lib.rs
index 30e7bbca9..a7eb062c0 100644
--- a/crates/jcode-provider-gemini/src/lib.rs
+++ b/crates/jcode-provider-gemini/src/lib.rs
@@ -153,7 +153,14 @@ pub struct VertexGenerateContentRequest {
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct GeminiContent {
+    // Requests always set `role` (see `build_contents`), but `generateContent`
+    // responses occasionally omit it on a candidate's `content` (observed on
+    // Antigravity/Cloud Code Gemini-3 turns). The response-side value is never
+    // read, so default it rather than failing the whole decode with
+    // "missing field `role`".
+    #[serde(default)]
     pub role: String,
+    #[serde(default)]
     pub parts: Vec<GeminiPart>,
 }
 
@@ -465,4 +472,54 @@ mod tests {
             ]
         );
     }
+
+    #[test]
+    fn candidate_content_decodes_without_role() {
+        // Antigravity/Cloud Code Gemini-3 responses occasionally omit `role` on
+        // a candidate's `content` (and sometimes `parts` entirely). The whole
+        // generateContent decode used to fail with "missing field `role`",
+        // which aborted the turn; assert the response now decodes and the
+        // function call survives.
+        let raw = json!({
+            "response": {
+                "candidates": [{
+                    "content": {
+                        "parts": [{
+                            "functionCall": {"name": "read", "args": {"file_path": "/tmp/x"}},
+                            "thoughtSignature": "SIG_XYZ"
+                        }]
+                    },
+                    "finishReason": "STOP"
+                }]
+            }
+        })
+        .to_string();
+
+        let decoded: CodeAssistGenerateResponse =
+            serde_json::from_str(&raw).expect("decode response with role-less content");
+        let candidates = decoded.response.unwrap().candidates.unwrap();
+        let part = &candidates[0].content.as_ref().unwrap().parts[0];
+        assert_eq!(part.function_call.as_ref().unwrap().name, "read");
+        assert_eq!(part.thought_signature.as_deref(), Some("SIG_XYZ"));
+    }
+
+    #[test]
+    fn candidate_content_decodes_without_parts() {
+        // A bare `content: {}` (no `role`, no `parts`) must not abort the decode.
+        let raw = json!({
+            "response": {
+                "candidates": [{ "content": {}, "finishReason": "STOP" }]
+            }
+        })
+        .to_string();
+
+        let decoded: CodeAssistGenerateResponse =
+            serde_json::from_str(&raw).expect("decode response with empty content");
+        let content = decoded.response.unwrap().candidates.unwrap()[0]
+            .content
+            .clone()
+            .unwrap();
+        assert!(content.role.is_empty());
+        assert!(content.parts.is_empty());
+    }
 }

From 1b1139e862a72cc749fc39a28c1b1de288e56695 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 01:51:44 -0700
Subject: [PATCH 09/57] test(provider-doctor): drive a real multi-call
 signature-replay loop

The first cut of the multi-call phase only nudged a 2nd tool call after the
model had already answered, so live runs reported multi_tool_replay=skipped and
never actually exercised the multi-functionCall history. Replace it with an
agentic loop driven by a two-file read prompt: each emitted tool call is
replayed (carrying its captured thought_signature) and answered with a
synthetic result, so by the final turn we send two assistant functionCall
blocks and assert the backend accepts the transcript. Surface the
verified/skipped status in the doctor report detail.

Verified live: provider-doctor antigravity -m gemini-3.1-pro-high --tier full
now reports 'multi-call signature replay verified'.
---
 .../src/auth/live_provider_probes.rs          | 161 +++++++++---------
 crates/jcode-base/src/auth/provider_e2e.rs    |  28 ++-
 2 files changed, 110 insertions(+), 79 deletions(-)

diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs
index 2c9b4bfb4..749c43bee 100644
--- a/crates/jcode-base/src/auth/live_provider_probes.rs
+++ b/crates/jcode-base/src/auth/live_provider_probes.rs
@@ -1407,7 +1407,6 @@ pub async fn run_live_native_provider_tool_smoke(
     history.push(tool_result_then_text(
         &tool_call.id,
         "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it.",
-        None,
     ));
 
     let second = consume_native_stream(
@@ -1429,56 +1428,62 @@ pub async fn run_live_native_provider_tool_smoke(
         crate::util::truncate_str(second.text.trim(), 200)
     );
 
-    // Phase 2 (best-effort): drive a second tool call so the replayed history
-    // carries *two* function calls, then assert the backend accepts the
-    // multi-call transcript (the only shape that reproduces the
-    // "missing a thought_signature ... position N" 400).
+    // Phase 2 (best-effort): drive an agentic loop that requires reading TWO
+    // files so the model emits a *sequence* of tool calls. Each call is replayed
+    // (carrying its captured signature) and answered with a synthetic result, so
+    // by the final turn the request we send carries two assistant `functionCall`
+    // blocks. That multi-call history is the only shape that reproduces the
+    // Antigravity/Cloud Code `400 ... "Function call is missing a
+    // thought_signature ... position N"`: a backend that validates *every*
+    // signature rejects the request here if an earlier one was dropped, so the
+    // `consume_native_stream` below surfaces the regression. If the model never
+    // makes a second tool call (common for providers that emit no signatures at
+    // all), the phase records `multi_tool_replay: "skipped"` rather than failing.
     let mut total_input = first.input_tokens + second.input_tokens;
     let mut total_output = first.output_tokens + second.output_tokens;
     let mut multi_tool_replay = "skipped";
-    let mut signatures_present = vec![tool_call.thought_signature.is_some()];
+    let mut signatures_present: Vec<bool> = Vec::new();
 
-    // Ask for a *second* distinct read so the model emits another tool call.
-    let mut second_request = first_turn.clone();
-    second_request.push(assistant_tool_use(&tool_call, &parsed_arguments));
-    second_request.push(tool_result_then_text(
-        &tool_call.id,
+    let mut convo = vec![Message {
+        role: Role::User,
+        content: vec![ContentBlock::Text {
+            text: "Read two files using the read tool, one tool call at a time: first read \
+                   /tmp/auth_tool_probe.txt, then read /tmp/auth_tool_probe_2.txt. After both \
+                   reads, reply with the single word DONE. Call the tool now; do not answer \
+                   in text first."
+                .to_string(),
+            cache_control: None,
+        }],
+        timestamp: None,
+        tool_duration_ms: None,
+    }];
+    let synthetic_results = [
         "Contents of /tmp/auth_tool_probe.txt: alpha.",
-        Some(
-            "Now read the file at /tmp/auth_tool_probe_2.txt using the read tool. \
-             Call the tool now; do not answer in text.",
-        ),
-    ));
-
-    let third = consume_native_stream(
-        provider,
-        &second_request,
-        &tools,
-        system,
-        std::time::Duration::from_secs(120),
-    )
-    .await?;
-    total_input += third.input_tokens;
-    total_output += third.output_tokens;
-
-    if let Some(second_call) = third.tool_calls.first().cloned() {
-        let second_arguments = parse_tool_arguments(&second_call.input_json);
-        signatures_present.push(second_call.thought_signature.is_some());
-
-        // Final request: history now contains BOTH tool_use blocks, each
-        // carrying its own captured signature. A dropped earlier signature is
-        // rejected here with the position-N 400.
-        let mut final_request = second_request.clone();
-        final_request.push(assistant_tool_use(&second_call, &second_arguments));
-        final_request.push(tool_result_then_text(
-            &second_call.id,
-            "TOOL_RESULT_TOKEN=77. Report this token back to confirm you read it.",
-            None,
-        ));
-
-        let fourth = consume_native_stream(
+        "Contents of /tmp/auth_tool_probe_2.txt: bravo.",
+    ];
+    // Cap the loop so a model that keeps calling tools cannot run forever.
+    const MAX_TOOL_ROUNDS: usize = 4;
+    let mut tool_round = 0usize;
+
+    loop {
+        // Number of assistant function calls already in the history we are about
+        // to replay. Once this reaches two, a successful response proves the
+        // backend accepted a multi-`functionCall` transcript with every
+        // signature intact.
+        let prior_calls = convo
+            .iter()
+            .filter(|message| {
+                matches!(message.role, Role::Assistant)
+                    && message
+                        .content
+                        .iter()
+                        .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
+            })
+            .count();
+
+        let turn = consume_native_stream(
             provider,
-            &final_request,
+            &convo,
             &tools,
             system,
             std::time::Duration::from_secs(120),
@@ -1486,25 +1491,34 @@ pub async fn run_live_native_provider_tool_smoke(
         .await
         .with_context(|| {
             format!(
-                "native {label} multi-tool signature replay was rejected (history carried \
-                 {} function calls; a backend that validates every functionCall signature \
-                 fails here when an earlier thought_signature is dropped)",
-                signatures_present.len()
+                "native {label} multi-tool signature replay was rejected (replayed history \
+                 carried {prior_calls} function call(s); a backend that validates every \
+                 functionCall signature fails here when an earlier thought_signature is dropped)"
             )
         })?;
-        total_input += fourth.input_tokens;
-        total_output += fourth.output_tokens;
+        total_input += turn.input_tokens;
+        total_output += turn.output_tokens;
+        if prior_calls >= 2 {
+            multi_tool_replay = "verified";
+        }
 
-        ensure!(
-            fourth.saw_message_end,
-            "native {label} multi-tool follow-up ended without a message_end event"
-        );
-        ensure!(
-            fourth.text.contains("77"),
-            "native {label} multi-tool follow-up did not reflect the second tool result token: {:?}",
-            crate::util::truncate_str(fourth.text.trim(), 200)
-        );
-        multi_tool_replay = "verified";
+        let Some(call) = turn.tool_calls.first().cloned() else {
+            // Model produced a final (text) answer; the loop is done.
+            break;
+        };
+        signatures_present.push(call.thought_signature.is_some());
+        let args = parse_tool_arguments(&call.input_json);
+        convo.push(assistant_tool_use(&call, &args));
+        let result = synthetic_results
+            .get(tool_round)
+            .copied()
+            .unwrap_or("Contents: omega.");
+        convo.push(tool_result_then_text(&call.id, result));
+
+        tool_round += 1;
+        if tool_round >= MAX_TOOL_ROUNDS {
+            break;
+        }
     }
 
     let mut stage = crate::live_tests::LiveVerificationStage::passed(
@@ -1519,6 +1533,7 @@ pub async fn run_live_native_provider_tool_smoke(
         serde_json::json!(tool_call.thought_signature.is_some()),
     )
     .with_evidence("multi_tool_replay", serde_json::json!(multi_tool_replay))
+    .with_evidence("multi_tool_call_count", serde_json::json!(tool_round))
     .with_evidence(
         "tool_call_signatures_present",
         serde_json::json!(signatures_present),
@@ -1557,24 +1572,16 @@ fn assistant_tool_use(call: &NativeClaudeToolCall, arguments: &serde_json::Value
     }
 }
 
-/// Build a user turn carrying a synthetic `tool_result` and, optionally, a
-/// follow-up text instruction (used to chain a second tool call in one message
-/// so the provider sees a clean user turn rather than two consecutive ones).
-fn tool_result_then_text(tool_use_id: &str, result: &str, follow_up: Option<&str>) -> Message {
-    let mut content = vec![ContentBlock::ToolResult {
-        tool_use_id: tool_use_id.to_string(),
-        content: result.to_string(),
-        is_error: Some(false),
-    }];
-    if let Some(text) = follow_up {
-        content.push(ContentBlock::Text {
-            text: text.to_string(),
-            cache_control: None,
-        });
-    }
+/// Build a user turn carrying a synthetic `tool_result` for a captured native
+/// tool call, used to answer each step of the multi-call replay loop.
+fn tool_result_then_text(tool_use_id: &str, result: &str) -> Message {
     Message {
         role: Role::User,
-        content,
+        content: vec![ContentBlock::ToolResult {
+            tool_use_id: tool_use_id.to_string(),
+            content: result.to_string(),
+            is_error: Some(false),
+        }],
         timestamp: None,
         tool_duration_ms: None,
     }
diff --git a/crates/jcode-base/src/auth/provider_e2e.rs b/crates/jcode-base/src/auth/provider_e2e.rs
index 0998d10da..d4356db5c 100644
--- a/crates/jcode-base/src/auth/provider_e2e.rs
+++ b/crates/jcode-base/src/auth/provider_e2e.rs
@@ -283,6 +283,28 @@ fn label_for(checkpoint: &str) -> &'static str {
         .unwrap_or("Checkpoint")
 }
 
+/// Human-readable detail for a passed tool-smoke stage, surfacing whether the
+/// multi-call thought-signature replay phase was exercised. The native tool
+/// smoke records `multi_tool_replay` as `verified` (a two-`functionCall`
+/// history was replayed and accepted, the shape that reproduces the
+/// "missing a thought_signature ... position N" 400) or `skipped` (the model
+/// declined a second tool call). Surfacing it keeps the coverage observable in
+/// the doctor report instead of collapsing to a generic pass string.
+fn tool_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String {
+    match stage
+        .evidence
+        .get("multi_tool_replay")
+        .and_then(|value| value.as_str())
+    {
+        Some("verified") => "tool call parsed and executed; multi-call signature replay verified".to_string(),
+        Some("skipped") => {
+            "tool call parsed and executed; multi-call signature replay skipped (no 2nd tool call)"
+                .to_string()
+        }
+        _ => "tool call parsed and executed".to_string(),
+    }
+}
+
 /// Checkpoints that require a real API response and are therefore skipped on the
 /// offline/catalog tiers.
 const API_DEPENDENT_CHECKPOINTS: &[&str] = &[
@@ -1139,6 +1161,7 @@ async fn run_native_antigravity_api_checks(
     match run_live_antigravity_native_tool_smoke(selected).await {
         Ok(stage) => {
             spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost"));
+            let detail = tool_stage_detail(&stage);
             for checkpoint in [
                 checkpoints::TOOL_CALL_PARSE,
                 checkpoints::TOOL_EXECUTION_LOOP,
@@ -1148,7 +1171,7 @@ async fn run_native_antigravity_api_checks(
                 checks.push(DoctorCheck::passed(
                     checkpoint,
                     label_for(checkpoint),
-                    "tool call parsed and executed".to_string(),
+                    detail.clone(),
                 ));
             }
         }
@@ -1773,6 +1796,7 @@ async fn run_generic_native_api_checks(
     match run_live_native_provider_tool_smoke(provider, selected, label).await {
         Ok(stage) => {
             spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost"));
+            let detail = tool_stage_detail(&stage);
             for checkpoint in [
                 checkpoints::TOOL_CALL_PARSE,
                 checkpoints::TOOL_EXECUTION_LOOP,
@@ -1782,7 +1806,7 @@ async fn run_generic_native_api_checks(
                 checks.push(DoctorCheck::passed(
                     checkpoint,
                     label_for(checkpoint),
-                    "tool call parsed and executed".to_string(),
+                    detail.clone(),
                 ));
             }
         }

From 75458f6e03585b2632ed3ad9e7e2d1b3eec18fd4 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 02:37:02 -0700
Subject: [PATCH 10/57] perf(resume): parallelize session loading; scope
 onboarding picker

Cold /resume and onboarding/catch-up pickers were dominated by serial
per-file IO+JSON parsing over large session histories (87k jcode
snapshots + hundreds of Codex/Claude transcripts here).

- Add a bounded scoped-thread parallel_map helper in the session picker
  loader and use it for: candidate mtime stat (readdir then parallel
  stat), the jcode summary parse pass (two-phase: parallel fill to
  scan_limit, then parallel saved-gate over the tail), and the external
  Codex/pi/opencode stub parsers.
- Load the catch-up 'seen' state once (CatchupSeenSnapshot) instead of
  re-reading catchup_seen.json per session.
- Onboarding transcript picker now loads only the relevant external CLI
  (load_external_cli_sessions_grouped) instead of the full
  load_sessions_grouped on the UI thread.
- Catch-up picker now opens from cache and refreshes off-thread via the
  shared async picker-load path instead of blocking the live session.

Measured on real data (idle, 4 runs each):
  load_sessions          ~660ms -> ~434ms (~34%)
  load_sessions_grouped  ~685ms -> ~465ms (~32%)
  onboarding picker load ~685ms (UI thread) -> ~14ms scoped CLI load
---
 crates/jcode-app-core/src/catchup.rs          |  36 ++
 .../src/tui/app/inline_interactive.rs         |  85 ++--
 .../src/tui/app/onboarding_flow_control.rs    |  17 +-
 crates/jcode-tui/src/tui/session_picker.rs    |   1 +
 .../src/tui/session_picker/loading.rs         | 367 +++++++++++++-----
 .../src/tui/session_picker/loading_tests.rs   | 119 ++++++
 6 files changed, 484 insertions(+), 141 deletions(-)

diff --git a/crates/jcode-app-core/src/catchup.rs b/crates/jcode-app-core/src/catchup.rs
index c3c64159d..536bd312c 100644
--- a/crates/jcode-app-core/src/catchup.rs
+++ b/crates/jcode-app-core/src/catchup.rs
@@ -19,6 +19,42 @@ pub fn needs_catchup(session_id: &str, updated_at: DateTime<Utc>, status: &Sessi
     needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status)
 }
 
+/// Snapshot of the persisted catch-up "seen" state, so callers that need to
+/// evaluate many sessions at once (e.g. the session picker building its list)
+/// can avoid re-reading and re-parsing `catchup_seen.json` once per session.
+#[derive(Clone, Default)]
+pub struct CatchupSeenSnapshot {
+    state: PersistedCatchupState,
+}
+
+impl CatchupSeenSnapshot {
+    /// Load the persisted seen-state once from disk.
+    pub fn load() -> Self {
+        Self {
+            state: load_seen_state(),
+        }
+    }
+
+    /// Same semantics as [`needs_catchup`] but uses this preloaded snapshot
+    /// instead of re-reading the state file for every call.
+    pub fn needs_catchup(
+        &self,
+        session_id: &str,
+        updated_at: DateTime<Utc>,
+        status: &SessionStatus,
+    ) -> bool {
+        if !is_attention_status(status) {
+            return false;
+        }
+        let seen = self
+            .state
+            .seen_at_ms_by_session
+            .get(session_id)
+            .copied();
+        needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status)
+    }
+}
+
 pub(crate) fn needs_catchup_with_seen(
     updated_at_ms: i64,
     seen_at_ms: Option<i64>,
diff --git a/crates/jcode-tui/src/tui/app/inline_interactive.rs b/crates/jcode-tui/src/tui/app/inline_interactive.rs
index 20373e7e9..4106e9e1a 100644
--- a/crates/jcode-tui/src/tui/app/inline_interactive.rs
+++ b/crates/jcode-tui/src/tui/app/inline_interactive.rs
@@ -1636,6 +1636,34 @@ impl App {
         });
     }
 
+    /// Rebuild the picker overlay from a freshly loaded session list, applying
+    /// the filter for the active picker mode. Returns true when the overlay was
+    /// (re)built so the caller can request a redraw.
+    fn apply_loaded_session_picker(
+        &mut self,
+        server_groups: Vec<session_picker::ServerGroup>,
+        orphan_sessions: Vec<session_picker::SessionInfo>,
+    ) -> bool {
+        match self.session_picker_mode {
+            SessionPickerMode::Resume => {
+                let picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
+                self.session_picker_overlay = Some(RefCell::new(picker));
+                self.set_status_notice("Sessions loaded");
+                true
+            }
+            SessionPickerMode::CatchUp => {
+                let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
+                picker.activate_catchup_filter();
+                self.session_picker_overlay = Some(RefCell::new(picker));
+                self.set_status_notice("Catch Up sessions loaded");
+                true
+            }
+            // Onboarding loads its scoped transcript list synchronously, so it
+            // never flows through this async path.
+            SessionPickerMode::Onboarding { .. } => false,
+        }
+    }
+
     pub(super) fn poll_session_picker_load(&mut self) -> bool {
         let recv_result = {
             let Some(pending) = self.pending_session_picker_load.as_ref() else {
@@ -1644,24 +1672,23 @@ impl App {
             pending.receiver.try_recv()
         };
 
+        let picker_active = self.session_picker_overlay.is_some()
+            && matches!(
+                self.session_picker_mode,
+                SessionPickerMode::Resume | SessionPickerMode::CatchUp
+            );
+
         match recv_result {
             Ok(Ok((server_groups, orphan_sessions))) => {
                 self.pending_session_picker_load = None;
-                if self.session_picker_overlay.is_some()
-                    && self.session_picker_mode == SessionPickerMode::Resume
-                {
-                    let picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
-                    self.session_picker_overlay = Some(RefCell::new(picker));
-                    self.set_status_notice("Sessions loaded");
-                    return true;
+                if picker_active {
+                    return self.apply_loaded_session_picker(server_groups, orphan_sessions);
                 }
                 false
             }
             Ok(Err(e)) => {
                 self.pending_session_picker_load = None;
-                if self.session_picker_overlay.is_some()
-                    && self.session_picker_mode == SessionPickerMode::Resume
-                {
+                if picker_active {
                     self.session_picker_overlay = None;
                     self.push_display_message(DisplayMessage::error(format!(
                         "Failed to load sessions: {}",
@@ -1675,9 +1702,7 @@ impl App {
             Err(std::sync::mpsc::TryRecvError::Empty) => false,
             Err(std::sync::mpsc::TryRecvError::Disconnected) => {
                 self.pending_session_picker_load = None;
-                if self.session_picker_overlay.is_some()
-                    && self.session_picker_mode == SessionPickerMode::Resume
-                {
+                if picker_active {
                     self.session_picker_overlay = None;
                     self.push_display_message(DisplayMessage::error(
                         "Session loading stopped before returning a result.".to_string(),
@@ -1700,20 +1725,26 @@ impl App {
             return;
         }
 
-        match session_picker::load_sessions_grouped() {
-            Ok((server_groups, orphan_sessions)) => {
-                let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
-                picker.activate_catchup_filter();
-                self.session_picker_overlay = Some(RefCell::new(picker));
-                self.session_picker_mode = SessionPickerMode::CatchUp;
-            }
-            Err(e) => {
-                self.push_display_message(DisplayMessage::error(format!(
-                    "Failed to load catch-up sessions: {}",
-                    e
-                )));
-            }
-        }
+        // Show the picker overlay immediately (using the cached list when
+        // available) and load the full session list off-thread. This keeps the
+        // live TUI responsive instead of blocking on a multi-hundred-ms scan of
+        // every historical session.
+        let mut picker = if let Some((server_groups, orphan_sessions)) =
+            session_picker::load_cached_sessions_grouped()
+        {
+            let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
+            picker.activate_catchup_filter();
+            picker
+        } else {
+            SessionPicker::loading()
+        };
+        // Ensure the filter is applied even on the loading placeholder so the
+        // refreshed list lands in the catch-up view.
+        picker.activate_catchup_filter();
+        self.session_picker_overlay = Some(RefCell::new(picker));
+        self.session_picker_mode = SessionPickerMode::CatchUp;
+        self.set_status_notice("Loading Catch Up sessions...");
+        self.start_session_picker_load();
     }
 
     pub(super) fn handle_session_picker_selection(&mut self, targets: &[ResumeTarget]) {
diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
index 4d283b3b1..3f95702d9 100644
--- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
+++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
@@ -603,17 +603,12 @@ impl App {
             ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode,
         };
 
-        let (server_groups, orphan_sessions) = match session_picker::load_sessions_grouped() {
-            Ok(loaded) => loaded,
-            Err(err) => {
-                crate::logging::error(&format!(
-                    "onboarding: failed to load {} sessions: {err}",
-                    cli.label()
-                ));
-                self.onboarding_fallback_to_session_search(cli);
-                return;
-            }
-        };
+        // The onboarding picker only ever shows this one external CLI's
+        // transcripts, so load just those instead of paying the full
+        // `load_sessions_grouped` cost (parsing every jcode snapshot, the other
+        // CLIs, and listing servers). This keeps first-run onboarding snappy.
+        let (server_groups, orphan_sessions) =
+            session_picker::load_external_cli_sessions_grouped(cli);
 
         let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
         picker.activate_external_cli_filter(filter);
diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs
index ef3c3cc9e..988dfe8b1 100644
--- a/crates/jcode-tui/src/tui/session_picker.rs
+++ b/crates/jcode-tui/src/tui/session_picker.rs
@@ -34,6 +34,7 @@ mod render;
 #[cfg(test)]
 use loading::collect_recent_session_stems;
 pub(crate) use loading::latest_external_cli_session_secs;
+pub(crate) use loading::load_external_cli_sessions_grouped;
 use loading::{build_messages_preview, build_search_index, crashed_sessions_from_all_sessions};
 pub use loading::{
     invalidate_session_list_cache, load_cached_sessions_grouped, load_servers, load_sessions,
diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs
index b1f6a8343..26d5e935a 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading.rs
@@ -53,6 +53,77 @@ const SAVED_METADATA_TAIL_SCAN_BYTES: u64 = 64 * 1024;
 const INITIAL_TRANSCRIPT_SEARCH_BUDGET_BYTES: usize = 64 * 1024;
 const MESSAGE_SEARCH_EXCERPT_BYTES: usize = 8 * 1024;
 
+/// Upper bound on worker threads used to parse/stat session files in parallel.
+/// The session picker load is dominated by per-file IO + JSON parsing across
+/// hundreds of snapshots; fanning that work out across cores turns the cold
+/// `/resume` load from a serial slog into a roughly core-count-bounded scan.
+const SESSION_LOAD_MAX_THREADS: usize = 8;
+
+/// Number of worker threads to use for a parallel pass over `item_count` items.
+/// Returns 1 for tiny batches so we never pay thread-spawn overhead when there
+/// is barely any work to do.
+fn session_load_thread_count(item_count: usize) -> usize {
+    if item_count <= 1 {
+        return 1;
+    }
+    let cores = std::thread::available_parallelism()
+        .map(|n| n.get())
+        .unwrap_or(1);
+    cores.clamp(1, SESSION_LOAD_MAX_THREADS).min(item_count)
+}
+
+/// Map `f` over `items` across a bounded scoped thread pool, preserving input
+/// order in the returned vector. Falls back to a plain serial map when only one
+/// worker is warranted. `f` must be `Sync` because every worker shares it.
+fn parallel_map<T, R, F>(items: Vec<T>, f: F) -> Vec<R>
+where
+    T: Send,
+    R: Send,
+    F: Fn(T) -> R + Sync,
+{
+    let thread_count = session_load_thread_count(items.len());
+    if thread_count <= 1 {
+        return items.into_iter().map(f).collect();
+    }
+
+    // Partition the work into `thread_count` owned chunks so each worker can
+    // take its inputs by value (no clone, no shared mutation). We remember the
+    // starting offset of each chunk to stitch results back into input order.
+    let chunk_size = items.len().div_ceil(thread_count);
+    let mut chunks: Vec<(usize, Vec<T>)> = Vec::with_capacity(thread_count);
+    let mut offset = 0usize;
+    let mut remaining = items;
+    while !remaining.is_empty() {
+        let take = chunk_size.min(remaining.len());
+        let rest = remaining.split_off(take);
+        chunks.push((offset, remaining));
+        offset += take;
+        remaining = rest;
+    }
+
+    let f = &f;
+    let mut results: Vec<(usize, Vec<R>)> = std::thread::scope(|scope| {
+        let mut handles = Vec::with_capacity(chunks.len());
+        for (start, chunk) in chunks {
+            handles.push(scope.spawn(move || {
+                (start, chunk.into_iter().map(f).collect::<Vec<R>>())
+            }));
+        }
+        handles
+            .into_iter()
+            .filter_map(|handle| handle.join().ok())
+            .collect()
+    });
+
+    results.sort_by_key(|(start, _)| *start);
+    let total: usize = results.iter().map(|(_, chunk)| chunk.len()).sum();
+    let mut out = Vec::with_capacity(total);
+    for (_, chunk) in results {
+        out.extend(chunk);
+    }
+    out
+}
+
 #[derive(Clone)]
 struct SessionListCacheEntry {
     loaded_at: Instant,
@@ -419,9 +490,8 @@ fn session_sort_key(stem: &str) -> u64 {
         .unwrap_or(0)
 }
 
-fn entry_modified_sort_key(entry: &std::fs::DirEntry) -> u128 {
-    entry
-        .metadata()
+fn path_modified_sort_key(path: &Path) -> u128 {
+    path.metadata()
         .and_then(|meta| meta.modified())
         .ok()
         .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok())
@@ -805,8 +875,10 @@ fn collect_recent_session_candidates(
     sessions_dir: &Path,
     candidate_limit: usize,
 ) -> Result<Vec<String>> {
-    let mut by_stem: HashMap<String, SessionCandidateMeta> = HashMap::new();
-
+    // Phase 1: a single cheap `readdir` pass to enumerate candidate files. We
+    // defer the per-file `stat` (the expensive part on directories with 100k+
+    // session files) to a parallel pass so it does not serialize startup.
+    let mut raw: Vec<(String, bool, PathBuf)> = Vec::new();
     for entry in std::fs::read_dir(sessions_dir)? {
         let entry = entry?;
         let file_name = entry.file_name();
@@ -819,11 +891,20 @@ fn collect_recent_session_candidates(
         if stem.starts_with("imported_") {
             continue;
         }
+        raw.push((stem.to_string(), has_snapshot, entry.path()));
+    }
+
+    // Phase 2: stat each file's modification time in parallel.
+    let stamped = parallel_map(raw, |(stem, has_snapshot, path)| {
+        (stem, has_snapshot, path_modified_sort_key(&path))
+    });
 
-        let modified = entry_modified_sort_key(&entry);
+    // Phase 3: merge per-stem metadata (snapshot + newest journal/snapshot mtime).
+    let mut by_stem: HashMap<String, SessionCandidateMeta> = HashMap::new();
+    for (stem, has_snapshot, modified) in stamped {
         by_stem
-            .entry(stem.to_string())
-            .or_insert_with(|| SessionCandidateMeta::new(stem))
+            .entry(stem.clone())
+            .or_insert_with(|| SessionCandidateMeta::new(&stem))
             .update(modified, has_snapshot);
     }
 
@@ -1424,6 +1505,99 @@ pub(super) fn crashed_sessions_from_all_sessions(
     })
 }
 
+/// Parse a single jcode session snapshot (+ journal) into a [`SessionInfo`],
+/// returning `None` for empty/imported sessions or read/parse errors. Pulled out
+/// of `load_sessions` so the summary pass can run across a scoped thread pool.
+fn parse_jcode_session_info(
+    sessions_dir: &Path,
+    stem: &str,
+    catchup_seen: &crate::catchup::CatchupSeenSnapshot,
+) -> Option<SessionInfo> {
+    // Imported stems are filtered out by `collect_recent_session_candidates`, but
+    // keep the cheap defensive check so this helper is safe to call directly.
+    if stem.starts_with("imported_cc_")
+        || stem.starts_with("imported_codex_")
+        || stem.starts_with("imported_pi_")
+        || stem.starts_with("imported_opencode_")
+    {
+        return None;
+    }
+
+    let path = sessions_dir.join(format!("{stem}.json"));
+    let session = load_session_summary(&path).ok()?;
+
+    let visible_message_count = session.messages.visible_message_count;
+    if visible_message_count == 0 {
+        return None;
+    }
+
+    let short_name = session
+        .short_name
+        .clone()
+        .or_else(|| extract_session_name(stem).map(|s| s.to_string()))
+        .unwrap_or_else(|| stem.to_string());
+    let icon = session_icon(&short_name);
+
+    let user_message_count = session.messages.user_message_count;
+    let assistant_message_count = session.messages.assistant_message_count;
+    let estimated_tokens = session.messages.estimated_tokens;
+
+    let status = session.status.clone();
+    let needs_catchup = catchup_seen.needs_catchup(stem, session.updated_at, &status);
+    let source = classify_session_source(
+        stem,
+        session.provider_key.as_deref(),
+        session.model.as_deref(),
+    );
+
+    let title = session
+        .custom_title
+        .or(session.title)
+        .unwrap_or_else(|| short_name.clone());
+    let search_index = build_search_index_from_summary(
+        stem,
+        &short_name,
+        &title,
+        session.working_dir.as_deref(),
+        session.save_label.as_deref(),
+        &session.messages.search_text,
+    );
+
+    Some(SessionInfo {
+        id: stem.to_string(),
+        parent_id: session.parent_id,
+        short_name,
+        icon: icon.to_string(),
+        title,
+        message_count: visible_message_count,
+        user_message_count,
+        assistant_message_count,
+        created_at: session.created_at,
+        last_message_time: session.updated_at,
+        last_active_at: session.last_active_at,
+        working_dir: session.working_dir,
+        model: session.model,
+        provider_key: session.provider_key,
+        is_canary: session.is_canary,
+        is_debug: session.is_debug,
+        saved: session.saved,
+        save_label: session.save_label,
+        status,
+        needs_catchup,
+        estimated_tokens,
+        first_user_prompt: session.messages.first_user_prompt,
+        messages_preview: Vec::new(),
+        search_index,
+        server_name: None,
+        server_icon: None,
+        source,
+        resume_target: ResumeTarget::JcodeSession {
+            session_id: stem.to_string(),
+        },
+        external_path: None,
+    })
+}
+
 pub fn load_sessions() -> Result<Vec<SessionInfo>> {
     let sessions_dir = storage::jcode_dir()?.join("sessions");
     let scan_limit = session_scan_limit();
@@ -1437,8 +1611,6 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
         return Ok(entry.sessions.clone());
     }
 
-    let mut sessions: Vec<SessionInfo> = Vec::new();
-
     let candidates = if sessions_dir.exists() {
         // Keep startup responsive by avoiding `session_has_history` here. That helper parses
         // snapshots/journals, and `load_session_summary` below parses the same files again.
@@ -1459,100 +1631,65 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
         Vec::new()
     };
 
-    let external_sessions = std::thread::scope(|scope| {
+    // Loading the catch-up "seen" state once (instead of per session) avoids
+    // re-reading and re-parsing `catchup_seen.json` for every candidate.
+    let catchup_seen = crate::catchup::CatchupSeenSnapshot::load();
+    let sessions_dir_ref = &sessions_dir;
+    let catchup_ref = &catchup_seen;
+
+    let (mut sessions, external_sessions) = std::thread::scope(|scope| {
         let claude_handle = scope.spawn(|| load_external_claude_code_sessions(scan_limit));
         let codex_handle = scope.spawn(|| load_external_codex_sessions(scan_limit));
         let pi_handle = scope.spawn(|| load_external_pi_sessions(scan_limit));
         let opencode_handle = scope.spawn(|| load_external_opencode_sessions(scan_limit));
 
-        for stem in candidates {
-            if sessions.len() >= scan_limit {
-                let saved = sessions_dir.join(format!("{stem}.json"));
-                if !session_snapshot_or_journal_has_saved_metadata(&saved) {
-                    continue;
+        // Phase 1: walk the recency-ordered candidates in parallel windows until
+        // we have collected `scan_limit` non-empty sessions. `boundary` marks the
+        // candidate index where the serial fill would start applying the saved
+        // gate, so beyond it we only keep saved sessions (Phase 2). Parsing each
+        // window in parallel keeps the per-file JSON cost off the critical path.
+        //
+        // Windows are sized to `scan_limit`: only the final window (the one that
+        // crosses `scan_limit`) can over-parse, so wasted work is bounded to a
+        // single window's worth of candidates while still parallelizing widely.
+        let mut sessions: Vec<SessionInfo> = Vec::new();
+        let mut boundary = candidates.len();
+        let window = scan_limit.max(1);
+        let mut start = 0;
+        'fill: while start < candidates.len() {
+            let end = (start + window).min(candidates.len());
+            let batch = candidates[start..end].to_vec();
+            let parsed = parallel_map(batch, move |stem| {
+                parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref)
+            });
+            for (offset, parsed_session) in parsed.into_iter().enumerate() {
+                if let Some(info) = parsed_session {
+                    sessions.push(info);
+                    if sessions.len() >= scan_limit {
+                        boundary = start + offset + 1;
+                        break 'fill;
+                    }
                 }
             }
-            if stem.starts_with("imported_cc_")
-                || stem.starts_with("imported_codex_")
-                || stem.starts_with("imported_pi_")
-                || stem.starts_with("imported_opencode_")
-            {
-                continue;
-            }
-            let path = sessions_dir.join(format!("{stem}.json"));
-            if let Ok(session) = load_session_summary(&path) {
-                let short_name = session
-                    .short_name
-                    .clone()
-                    .or_else(|| extract_session_name(&stem).map(|s| s.to_string()))
-                    .unwrap_or_else(|| stem.clone());
-                let icon = session_icon(&short_name);
-
-                let visible_message_count = session.messages.visible_message_count;
-                if visible_message_count == 0 {
-                    continue;
-                }
-                let user_message_count = session.messages.user_message_count;
-                let assistant_message_count = session.messages.assistant_message_count;
-                let estimated_tokens = session.messages.estimated_tokens;
-
-                let status = session.status.clone();
-                let needs_catchup =
-                    crate::catchup::needs_catchup(&stem, session.updated_at, &status);
-                let source = classify_session_source(
-                    &stem,
-                    session.provider_key.as_deref(),
-                    session.model.as_deref(),
-                );
-
-                let title = session
-                    .custom_title
-                    .or(session.title)
-                    .unwrap_or_else(|| short_name.clone());
-                let messages_preview: Vec<PreviewMessage> = Vec::new();
-                let search_index = build_search_index_from_summary(
-                    &stem,
-                    &short_name,
-                    &title,
-                    session.working_dir.as_deref(),
-                    session.save_label.as_deref(),
-                    &session.messages.search_text,
-                );
+            start = end;
+        }
 
-                sessions.push(SessionInfo {
-                    id: stem.to_string(),
-                    parent_id: session.parent_id,
-                    short_name,
-                    icon: icon.to_string(),
-                    title,
-                    message_count: visible_message_count,
-                    user_message_count,
-                    assistant_message_count,
-                    created_at: session.created_at,
-                    last_message_time: session.updated_at,
-                    last_active_at: session.last_active_at,
-                    working_dir: session.working_dir,
-                    model: session.model,
-                    provider_key: session.provider_key,
-                    is_canary: session.is_canary,
-                    is_debug: session.is_debug,
-                    saved: session.saved,
-                    save_label: session.save_label,
-                    status,
-                    needs_catchup,
-                    estimated_tokens,
-                    first_user_prompt: session.messages.first_user_prompt,
-                    messages_preview,
-                    search_index,
-                    server_name: None,
-                    server_icon: None,
-                    source,
-                    resume_target: ResumeTarget::JcodeSession {
-                        session_id: stem.to_string(),
-                    },
-                    external_path: None,
-                });
-            }
+        // Phase 2: beyond the fill boundary the serial loader only keeps saved
+        // sessions. Compute the cheap saved tail-gate across the remaining
+        // candidates in parallel, then fully parse just the gate-passers.
+        if boundary < candidates.len() {
+            let tail: Vec<String> = candidates[boundary..].to_vec();
+            let gate_passers: Vec<String> = parallel_map(tail, move |stem| {
+                let path = sessions_dir_ref.join(format!("{stem}.json"));
+                session_snapshot_or_journal_has_saved_metadata(&path).then_some(stem)
+            })
+            .into_iter()
+            .flatten()
+            .collect();
+            let saved_sessions = parallel_map(gate_passers, move |stem| {
+                parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref)
+            });
+            sessions.extend(saved_sessions.into_iter().flatten());
         }
 
         let mut external = Vec::new();
@@ -1560,7 +1697,7 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
         external.extend(codex_handle.join().unwrap_or_default());
         external.extend(pi_handle.join().unwrap_or_default());
         external.extend(opencode_handle.join().unwrap_or_default());
-        external
+        (sessions, external)
     });
     sessions.extend(external_sessions);
 
@@ -1706,9 +1843,10 @@ fn load_external_codex_sessions(scan_limit: usize) -> Vec<SessionInfo> {
         return Vec::new();
     }
 
-    collect_recent_files_recursive(&root, "jsonl", scan_limit)
+    let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit);
+    parallel_map(paths, |path| load_codex_session_stub(&path).ok().flatten())
         .into_iter()
-        .filter_map(|path| load_codex_session_stub(&path).ok().flatten())
+        .flatten()
         .collect()
 }
 
@@ -1915,9 +2053,10 @@ fn load_external_pi_sessions(scan_limit: usize) -> Vec<SessionInfo> {
         return Vec::new();
     }
 
-    collect_recent_files_recursive(&root, "jsonl", scan_limit)
+    let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit);
+    parallel_map(paths, |path| load_pi_session_stub(&path).ok().flatten())
         .into_iter()
-        .filter_map(|path| load_pi_session_stub(&path).ok().flatten())
+        .flatten()
         .collect()
 }
 
@@ -2169,9 +2308,10 @@ fn load_external_opencode_sessions(scan_limit: usize) -> Vec<SessionInfo> {
         return Vec::new();
     }
 
-    collect_recent_files_recursive(&root, "json", scan_limit)
+    let paths = collect_recent_files_recursive(&root, "json", scan_limit);
+    parallel_map(paths, |path| load_opencode_session_stub(&path).ok().flatten())
         .into_iter()
-        .filter_map(|path| load_opencode_session_stub(&path).ok().flatten())
+        .flatten()
         .collect()
 }
 
@@ -2473,6 +2613,27 @@ pub fn load_sessions_grouped() -> Result<(Vec<ServerGroup>, Vec<SessionInfo>)> {
     Ok((groups, orphan_sessions))
 }
 
+/// Load only the sessions for a single external CLI (Codex or Claude Code),
+/// returned as orphan [`SessionInfo`] grouped output compatible with
+/// `SessionPicker::new_grouped`.
+///
+/// First-run onboarding's "continue where you left off" picker is filtered to a
+/// single external CLI, so the full `load_sessions_grouped` work (parsing every
+/// jcode snapshot, the other CLIs, and listing servers) is wasted there. This
+/// scoped loader keeps onboarding responsive by touching only the relevant
+/// transcripts.
+pub(crate) fn load_external_cli_sessions_grouped(
+    cli: crate::tui::app::onboarding_flow::ExternalCli,
+) -> (Vec<ServerGroup>, Vec<SessionInfo>) {
+    use crate::tui::app::onboarding_flow::ExternalCli;
+    let scan_limit = session_scan_limit();
+    let sessions = match cli {
+        ExternalCli::Codex => load_external_codex_sessions(scan_limit),
+        ExternalCli::ClaudeCode => load_external_claude_code_sessions(scan_limit),
+    };
+    (Vec::new(), sessions)
+}
+
 #[cfg(test)]
 #[path = "loading_tests.rs"]
 mod tests;
diff --git a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
index 12b285927..769a9b888 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
@@ -783,3 +783,122 @@ fn benchmark_resume_loading_reports_timings() {
         sessions.len()
     );
 }
+
+#[test]
+fn onboarding_scoped_loader_returns_only_codex_sessions() {
+    use crate::tui::app::onboarding_flow::ExternalCli;
+    let _env_lock = crate::storage::lock_test_env();
+    let temp = tempfile::tempdir().expect("temp dir");
+    let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path());
+
+    // A Codex transcript that the onboarding picker should surface.
+    let codex_dir = temp.path().join("external/.codex/sessions/2026/05/01");
+    std::fs::create_dir_all(&codex_dir).expect("create codex dir");
+    std::fs::write(
+        codex_dir.join("rollout-2026-05-01T10-00-00-test.jsonl"),
+        "{\"timestamp\":\"2026-05-01T10:00:00Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"codex-onboarding-test\",\"timestamp\":\"2026-05-01T09:59:00Z\",\"cwd\":\"/tmp/codex-onboard\"}}\n",
+    )
+    .expect("write codex transcript");
+
+    // A jcode session that must NOT appear in the scoped Codex view (the whole
+    // point of the scoped loader is to skip parsing these on onboarding).
+    let mut jcode_session = Session::create_with_id(
+        "session_onboarding_jcode_1780000000000".to_string(),
+        Some("/tmp/jcode-onboard".to_string()),
+        Some("Jcode Onboarding".to_string()),
+    );
+    jcode_session.append_stored_message(crate::session::StoredMessage {
+        id: "msg-1".to_string(),
+        role: crate::message::Role::User,
+        content: vec![crate::message::ContentBlock::Text {
+            text: "should not show in codex onboarding view".to_string(),
+            cache_control: None,
+        }],
+        display_role: None,
+        timestamp: None,
+        tool_duration_ms: None,
+        token_usage: None,
+    });
+    jcode_session.save().expect("save jcode session");
+
+    let (groups, orphans) = load_external_cli_sessions_grouped(ExternalCli::Codex);
+    assert!(groups.is_empty(), "scoped loader produces only orphans");
+    assert!(
+        orphans
+            .iter()
+            .any(|s| s.id == "codex:codex-onboarding-test"),
+        "expected codex transcript in scoped onboarding load: {:?}",
+        orphans.iter().map(|s| &s.id).collect::<Vec<_>>()
+    );
+    assert!(
+        orphans
+            .iter()
+            .all(|s| matches!(s.resume_target, ResumeTarget::CodexSession { .. })),
+        "scoped Codex load must not include jcode/other-CLI sessions"
+    );
+}
+
+#[test]
+fn parallel_fill_skips_many_recent_empty_sessions_to_reach_scan_limit() {
+    let _env_lock = crate::storage::lock_test_env();
+    let temp = tempfile::tempdir().expect("temp dir");
+    let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path());
+    let _scan_limit = EnvVarGuard::set_str("JCODE_SESSION_PICKER_MAX_SESSIONS", "50");
+
+    let sessions_dir = temp.path().join("sessions");
+    std::fs::create_dir_all(&sessions_dir).expect("create sessions dir");
+
+    let push_message = |session: &mut Session, text: &str| {
+        session.append_stored_message(crate::session::StoredMessage {
+            id: format!("msg-{text}"),
+            role: crate::message::Role::User,
+            content: vec![crate::message::ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+            display_role: None,
+            timestamp: None,
+            tool_duration_ms: None,
+            token_usage: None,
+        });
+    };
+
+    // Many recent but empty sessions (no visible messages) that the parallel
+    // two-phase fill must skip while still collecting `scan_limit` real ones.
+    for idx in 0..200 {
+        let mut session = Session::create_with_id(
+            format!("session_empty_{}", 1_790_000_000_000u64 + idx as u64),
+            Some(format!("/tmp/empty-{idx:03}")),
+            Some(format!("Empty {idx:03}")),
+        );
+        session.save().expect("save empty session");
+    }
+    // Older but non-empty sessions that should fill the list despite being less
+    // recent than the empty stubs above.
+    for idx in 0..60 {
+        let mut session = Session::create_with_id(
+            format!("session_full_{}", 1_780_000_000_000u64 + idx as u64),
+            Some(format!("/tmp/full-{idx:03}")),
+            Some(format!("Full {idx:03}")),
+        );
+        push_message(&mut session, &format!("real content {idx:03}"));
+        session.save().expect("save full session");
+    }
+
+    invalidate_session_list_cache();
+    let sessions = load_sessions().expect("load sessions");
+    let visible: Vec<&SessionInfo> = sessions
+        .iter()
+        .filter(|s| s.id.starts_with("session_full_"))
+        .collect();
+    assert_eq!(
+        visible.len(),
+        50,
+        "expected exactly scan_limit non-empty sessions, got {}",
+        visible.len()
+    );
+    assert!(
+        !sessions.iter().any(|s| s.id.starts_with("session_empty_")),
+        "empty sessions must be filtered out of the loaded list"
+    );
+}

From 514c34f8f5d8a1ebc32ebdeb283d45fa415ce6b1 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 11:04:27 -0700
Subject: [PATCH 11/57] chore(release): bump version to 0.22.0

Minor bump covering the 44 commits since v0.21.0, including:
- Eager token-by-token reasoning streaming and per-line multi-line
  thinking rendering in the TUI.
- Provider fixes: Gemini schema/thought_signature handling, Kimi
  reasoning_content, OpenRouter empty-message guard, Anthropic 1M
  context + split-cache cost accounting, API-key vs OAuth auth mode.
- Swarm: route messages by target, broadcast to whole swarm, inherit
  coordinator model/auth route on spawn.
- Self-dev reload correctness (daemon reloads into advertised binary),
  reload-trace OOM cap, and provider-doctor generic native suites.
- Served telemetry dashboard with accurate user/install metrics and
  /skills + endorsed NVIDIA CUDA-X skills.
---
 Cargo.lock | 2 +-
 Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 499204bc7..6244492a7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3265,7 +3265,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "jcode"
-version = "0.21.0"
+version = "0.22.0"
 dependencies = [
  "agentgrep",
  "anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 35b32d17b..134a7fe74 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "jcode"
-version = "0.21.0"
+version = "0.22.0"
 description = "Possibly the greatest coding agent ever built — blazing-fast TUI, multi-model, swarm coordination, 30+ tools"
 edition = "2024"
 autobins = false

From 0611ae851ff45fc7068209bf1a256cb1bc3dc0fa Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 11:19:07 -0700
Subject: [PATCH 12/57] feat(skills): endorse Anthropic frontend-design skill

Add Anthropic's official frontend-design skill (the best design-focused
agent skill) to the endorsed list under a new 'Anthropic Design'
category, sourced from github.com/anthropics/skills with an install hint.
---
 crates/jcode-base/src/skill.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/crates/jcode-base/src/skill.rs b/crates/jcode-base/src/skill.rs
index 3dcd38208..164ff518f 100644
--- a/crates/jcode-base/src/skill.rs
+++ b/crates/jcode-base/src/skill.rs
@@ -467,6 +467,14 @@ pub const ENDORSED_SKILLS: &[EndorsedSkill] = &[
         source: "bundled with jcode / Claude Code skills",
         install: None,
     },
+    // Anthropic official skills (github.com/anthropics/skills, Apache-2.0).
+    EndorsedSkill {
+        name: "frontend-design",
+        description: "Create distinctive, production-grade frontend interfaces with high design quality (web components, pages, apps). Generates creative, polished code that avoids generic AI aesthetics.",
+        category: "Anthropic Design",
+        source: "anthropics/skills (official Anthropic catalog)",
+        install: Some("npx skills add anthropics/skills --skill frontend-design --yes (or Claude Code: /plugin marketplace add anthropics/skills)"),
+    },
     // NVIDIA CUDA-X / GPU accelerated-computing skills from the official
     // NVIDIA-verified catalog (github.com/NVIDIA/skills).
     EndorsedSkill {
@@ -817,6 +825,23 @@ mod tests {
         }
     }
 
+    #[test]
+    fn endorsed_skills_include_anthropic_frontend_design() {
+        let skill = endorsed_skills()
+            .iter()
+            .find(|s| s.name == "frontend-design")
+            .expect("expected endorsed Anthropic frontend-design skill");
+        assert_eq!(skill.category, "Anthropic Design");
+        assert!(
+            skill.source.contains("anthropics/skills"),
+            "frontend-design should be sourced from anthropics/skills"
+        );
+        assert!(
+            skill.install.is_some_and(|cmd| cmd.contains("anthropics/skills")),
+            "frontend-design should have an anthropics/skills install hint"
+        );
+    }
+
     #[test]
     fn registry_contains_reports_loaded_skills() {
         let temp = tempfile::tempdir().expect("tempdir");

From 6dd9bb80fecf8b6474238e8509d4fb8dfc557bb4 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 11:32:16 -0700
Subject: [PATCH 13/57] fix(onboarding): record auth_success for auto-imported
 logins

The guided first-run onboarding flow auto-imports existing external CLI
logins (Claude/Codex/Gemini/Copilot/Cursor/OpenRouter) via
run_external_auth_auto_import_candidates, which bypasses the manual
pending_login path that record_auth_success was wired into. As a result
every auto-imported login -- the happy path of the new onboarding -- was
invisible to the activation funnel, making auth_success undercount badly
(observed: more users reaching first_assistant_response than auth_success
in post-0.17 install cohorts, which is impossible without auth).

Surface coarse (provider, method="import") telemetry labels from the
import outcome and record auth_success for each imported provider in both
the onboarding and manual /login auto-import callers. Domain logic in
jcode-app-core stays telemetry-free; the TUI layer emits the event,
matching existing call sites.
---
 crates/jcode-app-core/src/external_auth.rs    | 67 +++++++++++++++++++
 crates/jcode-tui/src/tui/app/auth.rs          |  7 ++
 .../src/tui/app/onboarding_flow_control.rs    |  7 ++
 3 files changed, 81 insertions(+)

diff --git a/crates/jcode-app-core/src/external_auth.rs b/crates/jcode-app-core/src/external_auth.rs
index faec2ef4c..80aa225a9 100644
--- a/crates/jcode-app-core/src/external_auth.rs
+++ b/crates/jcode-app-core/src/external_auth.rs
@@ -101,10 +101,59 @@ impl ExternalAuthReviewCandidate {
     }
 }
 
+impl ExternalAuthReviewCandidate {
+    /// Coarse telemetry `(provider, method)` labels for the providers this
+    /// candidate activates on a successful import. Used by the onboarding flow
+    /// to record `auth_success` so auto-imported logins show up in the
+    /// activation funnel (they previously did not, because auto-import never
+    /// flows through the manual `pending_login` telemetry path).
+    ///
+    /// The method is reported as `"import"` so import-driven activation can be
+    /// distinguished from manual login in the funnel.
+    pub fn telemetry_auth_labels(&self) -> Vec<(&'static str, &'static str)> {
+        const METHOD: &str = "import";
+        match &self.action {
+            ExternalAuthReviewAction::CodexLegacy => vec![("openai", METHOD)],
+            ExternalAuthReviewAction::ClaudeCode => vec![("claude", METHOD)],
+            ExternalAuthReviewAction::GeminiCli => vec![("gemini", METHOD)],
+            ExternalAuthReviewAction::Copilot(_) => vec![("copilot", METHOD)],
+            ExternalAuthReviewAction::Cursor(_) => vec![("cursor", METHOD)],
+            ExternalAuthReviewAction::SharedExternal(source) => {
+                auth::external::source_provider_labels(*source)
+                    .into_iter()
+                    .filter_map(|label| {
+                        telemetry_provider_id_for_label(label).map(|id| (id, METHOD))
+                    })
+                    .collect()
+            }
+        }
+    }
+}
+
+/// Map a human-facing provider label (as produced by
+/// [`auth::external::source_provider_labels`]) to the canonical telemetry
+/// provider id used by the activation funnel.
+fn telemetry_provider_id_for_label(label: &str) -> Option<&'static str> {
+    match label {
+        "OpenAI/Codex" => Some("openai"),
+        "Claude" => Some("claude"),
+        "Gemini" => Some("gemini"),
+        "Antigravity" => Some("antigravity"),
+        "GitHub Copilot" => Some("copilot"),
+        "OpenRouter/API-key providers" => Some("openrouter"),
+        _ => None,
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ExternalAuthAutoImportOutcome {
     pub imported: usize,
     pub messages: Vec<String>,
+    /// Coarse `(provider, method)` telemetry labels for each provider that was
+    /// successfully imported, so callers can record `auth_success` for the
+    /// activation funnel. May contain more entries than `imported` when a
+    /// single source carries multiple providers.
+    pub imported_auth_labels: Vec<(&'static str, &'static str)>,
 }
 
 impl ExternalAuthAutoImportOutcome {
@@ -535,6 +584,7 @@ pub async fn run_external_auth_auto_import_candidates(
     let mut outcome = ExternalAuthAutoImportOutcome {
         imported: 0,
         messages: Vec::new(),
+        imported_auth_labels: Vec::new(),
     };
 
     for &index in selected {
@@ -545,6 +595,9 @@ pub async fn run_external_auth_auto_import_candidates(
         match validate_external_auth_review_candidate(candidate).await {
             Ok(detail) => {
                 outcome.imported += 1;
+                outcome
+                    .imported_auth_labels
+                    .extend(candidate.telemetry_auth_labels());
                 outcome.messages.push(format!(
                     "✓ {} (from {}): {}",
                     candidate.provider_summary, candidate.source_name, detail
@@ -573,6 +626,7 @@ mod render_markdown_tests {
         let outcome = ExternalAuthAutoImportOutcome {
             imported: 0,
             messages: Vec::new(),
+            imported_auth_labels: Vec::new(),
         };
         assert_eq!(
             outcome.render_markdown(),
@@ -590,6 +644,7 @@ mod render_markdown_tests {
                 "✓ Claude (from Claude Code): Loaded Claude credentials.".to_string(),
                 "✕ Cursor (from Cursor native): no usable auth token.".to_string(),
             ],
+            imported_auth_labels: vec![("openai", "import"), ("claude", "import")],
         };
         let md = outcome.render_markdown();
         assert!(md.starts_with("**Logins imported**"), "got: {md}");
@@ -613,8 +668,20 @@ mod render_markdown_tests {
         let outcome = ExternalAuthAutoImportOutcome {
             imported: 1,
             messages: vec!["✓ Gemini (from Gemini CLI): Loaded Gemini credentials.".to_string()],
+            imported_auth_labels: vec![("gemini", "import")],
         };
         let md = outcome.render_markdown();
         assert!(md.contains("Reusing 1 existing login:"), "got: {md}");
     }
+
+    #[test]
+    fn fixture_candidate_reports_import_auth_labels() {
+        use super::ExternalAuthReviewCandidate;
+        // The fixture points at the legacy Codex action -> OpenAI provider.
+        let candidate = ExternalAuthReviewCandidate::fixture("OpenAI/Codex", "Codex auth.json");
+        assert_eq!(
+            candidate.telemetry_auth_labels(),
+            vec![("openai", "import")]
+        );
+    }
 }
diff --git a/crates/jcode-tui/src/tui/app/auth.rs b/crates/jcode-tui/src/tui/app/auth.rs
index 0e31dfd6d..32e3437bb 100644
--- a/crates/jcode-tui/src/tui/app/auth.rs
+++ b/crates/jcode-tui/src/tui/app/auth.rs
@@ -2281,6 +2281,13 @@ impl App {
                     .await
                     {
                         Ok(outcome) => {
+                            // Auto-import bypasses the manual `pending_login`
+                            // telemetry path, so record `auth_success` for each
+                            // imported provider to keep the activation funnel
+                            // accurate.
+                            for (provider, method) in &outcome.imported_auth_labels {
+                                crate::telemetry::record_auth_success(provider, method);
+                            }
                             Bus::global().publish(BusEvent::LoginCompleted(LoginCompleted {
                                 provider: "auto-import".to_string(),
                                 success: outcome.imported > 0,
diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
index 3f95702d9..0803c626c 100644
--- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
+++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
@@ -585,6 +585,13 @@ impl App {
                     return;
                 }
             };
+            // Auto-import bypasses the manual `pending_login` path, so record
+            // `auth_success` here for each imported provider. Without this the
+            // onboarding activation funnel undercounts every imported login
+            // (the happy path of the guided first-run flow).
+            for (provider, method) in &outcome.imported_auth_labels {
+                crate::telemetry::record_auth_success(provider, method);
+            }
             crate::bus::Bus::global().publish(crate::bus::BusEvent::LoginCompleted(
                 crate::bus::LoginCompleted {
                     provider: "auto-import".to_string(),

From f77a740207810b7c630333bf36c9e3897fe06628 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 11:58:43 -0700
Subject: [PATCH 14/57] perf(session-picker): tail-read external transcript
 previews

Codex/Claude Code preview loaders parsed the entire JSONL transcript
(often multiple MB, up to tens of MB) on every selection change just to
show the last ~20 messages. In the onboarding resume menu this made
arrow-key navigation lag badly, since each selection spawned a fresh
full-file parse thread. Normal /resume (jcode native sessions) avoids
this path, which is why only onboarding felt slow.

Read only the trailing 512 KiB of the file instead: drop the partial
first line, skip malformed boundary records, and parse the rest. This
turns each preview load from ~140ms into ~1ms regardless of transcript
size. Adds regression tests covering large (>cap) Codex and Claude
transcripts.
---
 .../src/tui/session_picker/loading.rs         | 68 ++++++++++++---
 .../src/tui/session_picker/loading_tests.rs   | 82 +++++++++++++++++++
 2 files changed, 140 insertions(+), 10 deletions(-)

diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs
index 26d5e935a..4acc66aef 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading.rs
@@ -642,6 +642,37 @@ fn collect_recent_files_recursive(root: &Path, extension: &str, limit: usize) ->
     files.into_iter().map(|(_, path)| path).collect()
 }
 
+/// Maximum number of bytes we read from the *tail* of an external transcript
+/// (Codex / Claude Code) when building its preview. These JSONL transcripts can
+/// be tens of MB, but the preview only ever shows the last ~20 messages, so
+/// parsing the whole file on every selection change made arrow-key navigation
+/// in the resume / onboarding picker lag badly (each load reparsed the entire
+/// file on a fresh thread). Reading a bounded tail keeps each preview load to a
+/// sub-millisecond seek + parse regardless of transcript size.
+///
+/// 512 KiB comfortably covers far more than 20 messages for normal transcripts
+/// while bounding the worst case.
+const EXTERNAL_PREVIEW_TAIL_BYTES: u64 = 512 * 1024;
+
+/// Read the trailing portion of a file as UTF-8 text, capped at
+/// [`EXTERNAL_PREVIEW_TAIL_BYTES`]. When the file is larger than the cap we seek
+/// to the tail and drop the (possibly partial) first line so we only ever parse
+/// complete JSONL records. Returns `(text, truncated_from_head)` where
+/// `truncated_from_head` indicates the head of the file was skipped.
+fn read_file_tail_text(path: &Path, max_bytes: u64) -> Option<(String, bool)> {
+    let mut file = File::open(path).ok()?;
+    let len = file.metadata().ok()?.len();
+    let truncated = len > max_bytes;
+    if truncated {
+        file.seek(SeekFrom::Start(len - max_bytes)).ok()?;
+    }
+    let mut bytes = Vec::with_capacity(max_bytes.min(len) as usize);
+    file.take(max_bytes).read_to_end(&mut bytes).ok()?;
+    // Lossily decode: transcripts are UTF-8, but a tail seek can land mid
+    // multi-byte sequence, and replacement chars are harmless for a preview.
+    Some((String::from_utf8_lossy(&bytes).into_owned(), truncated))
+}
+
 fn push_preview_message(preview: &mut Vec<PreviewMessage>, role: &str, content: String) {
     let content = content.trim();
     if content.is_empty() {
@@ -1790,17 +1821,26 @@ fn load_external_claude_code_sessions(scan_limit: usize) -> Vec<SessionInfo> {
 }
 
 pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option<Vec<PreviewMessage>> {
-    let file = File::open(path).ok()?;
-    let reader = BufReader::new(file);
+    // Only parse the tail of the transcript (see `load_codex_preview_from_path`):
+    // the preview shows the last ~20 messages, so reparsing multi-MB transcripts
+    // on every selection change made picker navigation lag.
+    let (text, truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?;
     let mut preview = Vec::new();
 
-    for line in reader.lines() {
-        let line = line.ok()?;
+    // If we seeked into the middle of the file, the first line is a partial
+    // record; drop it. When we read the whole file the first line is a real
+    // record we must keep.
+    let skip = usize::from(truncated);
+    for line in text.lines().skip(skip) {
         let trimmed = line.trim();
         if trimmed.is_empty() {
             continue;
         }
-        let value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
+        // Boundary lines from a tail slice may be malformed; skip rather than
+        // abandon the whole preview.
+        let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) else {
+            continue;
+        };
         let entry_type = value
             .get("type")
             .and_then(|v| v.as_str())
@@ -1985,17 +2025,25 @@ fn find_codex_session_file(session_id: &str) -> Option<PathBuf> {
 }
 
 pub(super) fn load_codex_preview_from_path(path: &Path) -> Option<Vec<PreviewMessage>> {
-    let file = File::open(path).ok()?;
-    let reader = BufReader::new(file);
+    // Only parse the tail of the transcript: the preview shows the last ~20
+    // messages, and these rollout files can be tens of MB, so reading the whole
+    // file on every selection change made picker navigation lag.
+    let (text, _truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?;
     let mut preview = Vec::new();
 
-    for line in reader.lines().skip(1) {
-        let line = line.ok()?;
+    // When we read from the start we skip the first line (the `session_meta`
+    // record). When we read a tail slice the first line is almost certainly a
+    // partial record, so we drop it either way.
+    for line in text.lines().skip(1) {
         let trimmed = line.trim();
         if trimmed.is_empty() {
             continue;
         }
-        let value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
+        // A tail slice can yield malformed JSON on its boundary lines; skip
+        // those instead of bailing out of the whole preview.
+        let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) else {
+            continue;
+        };
         let line_type = value
             .get("type")
             .and_then(|v| v.as_str())
diff --git a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
index 769a9b888..cbfd8e642 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs
@@ -318,6 +318,88 @@ fn load_codex_preview_preserves_blank_line_between_tool_transcript_and_followup_
     );
 }
 
+#[test]
+fn load_codex_preview_reads_only_tail_of_large_transcript() {
+    // A transcript far larger than the tail cap should still produce a preview
+    // of the most-recent messages, parsed from only the tail slice. This is the
+    // regression guard for the picker-navigation lag: previews must not depend
+    // on parsing the whole (multi-MB) file.
+    let temp = tempfile::tempdir().expect("temp dir");
+    let transcript_path = temp.path().join("rollout-big.jsonl");
+
+    let mut contents = String::new();
+    // session_meta header line (always skipped).
+    contents.push_str(
+        "{\"timestamp\":\"2026-04-10T19:05:54.536Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d-big\"}}\n",
+    );
+    // Padding messages near the head that must NOT appear in the preview once
+    // the file exceeds the tail cap.
+    for i in 0..50_000 {
+        contents.push_str(&format!(
+            "{{\"type\":\"response_item\",\"payload\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"old padding message {i} aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"}}]}}}}\n",
+        ));
+    }
+    assert!(
+        contents.len() as u64 > EXTERNAL_PREVIEW_TAIL_BYTES,
+        "test transcript must exceed the tail cap"
+    );
+    // Distinctive recent messages at the very end.
+    contents.push_str(
+        "{\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"RECENT_USER_MARKER\"}]}}\n",
+    );
+    contents.push_str(
+        "{\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"RECENT_ASSISTANT_MARKER\"}]}}\n",
+    );
+    std::fs::write(&transcript_path, &contents).expect("write big transcript");
+
+    let preview = load_codex_preview_from_path(&transcript_path).expect("preview");
+    // Preview is capped at 20 messages.
+    assert!(preview.len() <= 20, "preview should be capped, got {}", preview.len());
+    // The most-recent markers must be present.
+    let last_two = &preview[preview.len().saturating_sub(2)..];
+    assert!(last_two.iter().any(|m| m.content.contains("RECENT_USER_MARKER")));
+    assert!(last_two.iter().any(|m| m.content.contains("RECENT_ASSISTANT_MARKER")));
+    // The head padding must have been skipped (not parsed from the tail slice).
+    assert!(
+        !preview.iter().any(|m| m.content.contains("old padding message 0 ")),
+        "head messages should not appear when only the tail is read"
+    );
+}
+
+#[test]
+fn load_claude_code_preview_reads_only_tail_of_large_transcript() {
+    let temp = tempfile::tempdir().expect("temp dir");
+    let transcript_path = temp.path().join("claude-big.jsonl");
+
+    let mut contents = String::new();
+    for i in 0..50_000 {
+        contents.push_str(&format!(
+            "{{\"type\":\"assistant\",\"uuid\":\"a{i}\",\"message\":{{\"role\":\"assistant\",\"content\":[{{\"type\":\"text\",\"text\":\"old padding message {i} bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\"}}]}}}}\n",
+        ));
+    }
+    assert!(
+        contents.len() as u64 > EXTERNAL_PREVIEW_TAIL_BYTES,
+        "test transcript must exceed the tail cap"
+    );
+    contents.push_str(
+        "{\"type\":\"user\",\"uuid\":\"u_last\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"RECENT_USER_MARKER\"}]}}\n",
+    );
+    contents.push_str(
+        "{\"type\":\"assistant\",\"uuid\":\"a_last\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"RECENT_ASSISTANT_MARKER\"}]}}\n",
+    );
+    std::fs::write(&transcript_path, &contents).expect("write big transcript");
+
+    let preview = load_claude_code_preview_from_path(&transcript_path).expect("preview");
+    assert!(preview.len() <= 20, "preview should be capped, got {}", preview.len());
+    let last_two = &preview[preview.len().saturating_sub(2)..];
+    assert!(last_two.iter().any(|m| m.content.contains("RECENT_USER_MARKER")));
+    assert!(last_two.iter().any(|m| m.content.contains("RECENT_ASSISTANT_MARKER")));
+    assert!(
+        !preview.iter().any(|m| m.content.contains("old padding message 0 ")),
+        "head messages should not appear when only the tail is read"
+    );
+}
+
 #[test]
 fn load_sessions_prefers_custom_title_over_generated_title() {
     let _env_lock = crate::storage::lock_test_env();

From f0656c407a3eed3fa1ad6350d7212dcfa1a42286 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 13:27:42 -0700
Subject: [PATCH 15/57] skill_manage: include endorsed skill catalog in list
 output

---
 crates/jcode-app-core/src/tool/skill.rs | 145 +++++++++++++++++++-----
 1 file changed, 114 insertions(+), 31 deletions(-)

diff --git a/crates/jcode-app-core/src/tool/skill.rs b/crates/jcode-app-core/src/tool/skill.rs
index b7678c1dd..6d8d5dcd1 100644
--- a/crates/jcode-app-core/src/tool/skill.rs
+++ b/crates/jcode-app-core/src/tool/skill.rs
@@ -21,7 +21,8 @@ impl SkillTool {
 
 #[derive(Deserialize)]
 struct SkillInput {
-    /// Action to perform: load (default), list, reload, reload_all, read
+    /// Action to perform: load (default), list, reload, reload_all, read.
+    /// `list` shows both loaded skills and the jcode-endorsed catalog.
     #[serde(default = "default_action")]
     action: String,
     /// Skill name (required for load, reload, read)
@@ -119,36 +120,41 @@ impl SkillTool {
 
     async fn list_skills(&self) -> Result<ToolOutput> {
         let registry = self.registry.read().await;
-        let skills = registry.list();
-
-        if skills.is_empty() {
-            return Ok(ToolOutput::new(
-                "No skills available.\n\n\
-                Skills are loaded from:\n\
-                - ~/.claude/skills/<skill-name>/SKILL.md\n\
-                - ./.claude/skills/<skill-name>/SKILL.md\n\n\
-                Create a SKILL.md file with YAML frontmatter:\n\
-                ---\n\
-                name: my-skill\n\
-                description: What this skill does\n\
-                allowed-tools: bash, read, write\n\
-                ---\n\n\
-                # Skill content here",
-            )
-            .with_title("Skills: None available"));
-        }
-
-        let mut output = format!("Available skills: {}\n\n", skills.len());
-
-        for skill in skills {
-            output.push_str(&format!("## /{}\n", skill.name));
-            output.push_str(&format!("  {}\n", skill.description));
-            output.push_str(&format!("  Path: {}\n", skill.path.display()));
-            if let Some(ref tools) = skill.allowed_tools {
-                output.push_str(&format!("  Tools: {}\n", tools.join(", ")));
+        let mut skills = registry.list();
+        skills.sort_by(|a, b| a.name.cmp(&b.name));
+
+        let installed: std::collections::HashSet<&str> =
+            skills.iter().map(|s| s.name.as_str()).collect();
+
+        let mut output = if skills.is_empty() {
+            "No skills loaded.\n\n\
+            Skills are loaded from:\n\
+            - ~/.jcode/skills/<skill-name>/SKILL.md (global)\n\
+            - ./.jcode/skills/<skill-name>/SKILL.md (project-local)\n\
+            - ./.claude/skills/<skill-name>/SKILL.md (compatibility)\n\n\
+            Create a SKILL.md file with YAML frontmatter:\n\
+            ---\n\
+            name: my-skill\n\
+            description: What this skill does\n\
+            allowed-tools: bash, read, write\n\
+            ---\n\n\
+            # Skill content here\n"
+                .to_string()
+        } else {
+            let mut output = format!("Loaded skills: {}\n\n", skills.len());
+            for skill in &skills {
+                output.push_str(&format!("## /{}\n", skill.name));
+                output.push_str(&format!("  {}\n", skill.description));
+                output.push_str(&format!("  Path: {}\n", skill.path.display()));
+                if let Some(ref tools) = skill.allowed_tools {
+                    output.push_str(&format!("  Tools: {}\n", tools.join(", ")));
+                }
+                output.push('\n');
             }
-            output.push('\n');
-        }
+            output
+        };
+
+        append_endorsed_skills(&mut output, &installed);
 
         Ok(ToolOutput::new(output).with_title("Skills: List"))
     }
@@ -243,6 +249,61 @@ impl SkillTool {
     }
 }
 
+/// Append the curated jcode-endorsed skill catalog to `output`, grouped by
+/// category and marked with installed/not-installed status. `installed` is the
+/// set of skill names currently loaded in the registry.
+fn append_endorsed_skills(output: &mut String, installed: &std::collections::HashSet<&str>) {
+    let endorsed = crate::skill::endorsed_skills();
+    if endorsed.is_empty() {
+        return;
+    }
+
+    output.push_str("\nEndorsed skills (recommended by jcode)\n");
+
+    // Group by category, preserving first-seen order.
+    let mut category_order: Vec<&str> = Vec::new();
+    for skill in endorsed {
+        if !category_order.contains(&skill.category) {
+            category_order.push(skill.category);
+        }
+    }
+
+    for category in category_order {
+        let in_category: Vec<_> = endorsed.iter().filter(|e| e.category == category).collect();
+        let installed_count = in_category
+            .iter()
+            .filter(|e| installed.contains(e.name))
+            .count();
+        output.push_str(&format!(
+            "\n  {} ({}/{} installed)\n",
+            category,
+            installed_count,
+            in_category.len()
+        ));
+        for skill in in_category {
+            let is_installed = installed.contains(skill.name);
+            let status = if is_installed {
+                "installed"
+            } else {
+                "not installed"
+            };
+            output.push_str(&format!("  - /{} [{}]\n", skill.name, status));
+            output.push_str(&format!("      {}\n", skill.description));
+            output.push_str(&format!("      source: {}\n", skill.source));
+            if !is_installed && let Some(install) = skill.install {
+                output.push_str(&format!("      install: {}\n", install));
+            }
+        }
+    }
+
+    output.push_str(
+        "\nActivate a loaded skill by loading it with skill_manage (action=load) or typing its slash command.\n",
+    );
+    output.push_str(
+        "NVIDIA CUDA-X skills come from the official catalog at https://github.com/NVIDIA/skills.\n",
+    );
+}
+
 fn normalize_skill_name(name: Option<String>, action: &str) -> Result<String> {
     let name = name.ok_or_else(|| anyhow::anyhow!("'name' is required for {} action", action))?;
     let trimmed = name.trim().trim_start_matches('/').to_string();
@@ -318,7 +379,29 @@ mod tests {
         let input = json!({"action": "list"});
 
         let result = tool.execute(input, ctx).await.unwrap();
-        assert!(result.output.contains("No skills available"));
+        assert!(result.output.contains("No skills loaded"));
+        // Even with no skills loaded, the endorsed catalog should be listed.
+        assert!(result.output.contains("Endorsed skills"));
+    }
+
+    #[tokio::test]
+    async fn test_list_includes_endorsed_skills() {
+        let tool = create_test_tool();
+        let ctx = create_test_context();
+        let input = json!({"action": "list"});
+
+        let result = tool.execute(input, ctx).await.unwrap();
+        // Every endorsed skill should appear with an install-status marker.
+        for endorsed in crate::skill::endorsed_skills() {
+            assert!(
+                result.output.contains(&format!("/{}", endorsed.name)),
+                "expected endorsed skill /{} in:\n{}",
+                endorsed.name,
+                result.output
+            );
+        }
+        // No skills are loaded in this tool, so they should be "not installed".
+        assert!(result.output.contains("[not installed]"));
     }
 
     #[tokio::test]

From 8939a57aeed483dae07d79b811793690a332c082 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 13:50:18 -0700
Subject: [PATCH 16/57] feat(gmail): add pluggable Composio managed-OAuth
 backend

The native gmail tool keeps its interface, confirmation gating, access
tiers, and token-lean formatting, but its auth/transport is now pluggable
via GmailBackend (Direct | Composio).

- Direct: existing local Google OAuth tokens.
- Composio: routes the same Gmail REST calls through Composio's
  proxy-execute endpoint, brokered by a Google-verified app. No
  unverified-app warning and no 7-day testing-mode token expiry.

Backend is selected via JCODE_GMAIL_BACKEND=composio + COMPOSIO_API_KEY.
Capability checks (is_configured/can_send/can_delete) are now
backend-aware. Adds unit tests and docs/GMAIL_COMPOSIO_BACKEND.md.
---
 crates/jcode-app-core/src/tool/gmail.rs |  16 +-
 crates/jcode-base/src/gmail.rs          | 427 +++++++++++++++++++-----
 docs/GMAIL_COMPOSIO_BACKEND.md          |  73 ++++
 3 files changed, 423 insertions(+), 93 deletions(-)
 create mode 100644 docs/GMAIL_COMPOSIO_BACKEND.md

diff --git a/crates/jcode-app-core/src/tool/gmail.rs b/crates/jcode-app-core/src/tool/gmail.rs
index 9cbc2b2df..f132b3f36 100644
--- a/crates/jcode-app-core/src/tool/gmail.rs
+++ b/crates/jcode-app-core/src/tool/gmail.rs
@@ -4,7 +4,6 @@ use async_trait::async_trait;
 use serde::Deserialize;
 use serde_json::{Value, json};
 
-use crate::auth::google;
 use crate::gmail::{self, GmailClient, MessageFormat};
 
 pub struct GmailTool {
@@ -92,10 +91,8 @@ impl Tool for GmailTool {
     }
 
     async fn execute(&self, input: Value, _ctx: ToolContext) -> Result<ToolOutput> {
-        if !google::has_tokens() {
-            return Ok(ToolOutput::new(
-                "Gmail is not configured. Run `jcode login google` to set up Gmail access.",
-            ));
+        if !self.client.is_configured() {
+            return Ok(ToolOutput::new(self.client.not_configured_message()));
         }
 
         let params: GmailInput = serde_json::from_value(input)?;
@@ -278,8 +275,7 @@ impl Tool for GmailTool {
             }
 
             "send" => {
-                let tokens = google::load_tokens()?;
-                if !tokens.tier.can_send() {
+                if !self.client.can_send() {
                     return Ok(ToolOutput::new(
                         "Send is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\
                          The draft has been created - open Gmail to send it manually.\n\
@@ -323,8 +319,7 @@ impl Tool for GmailTool {
             }
 
             "send_draft" => {
-                let tokens = google::load_tokens()?;
-                if !tokens.tier.can_send() {
+                if !self.client.can_send() {
                     return Ok(ToolOutput::new(
                         "Send is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\
                          Open Gmail to send the draft manually.\n\
@@ -352,8 +347,7 @@ impl Tool for GmailTool {
             }
 
             "trash" => {
-                let tokens = google::load_tokens()?;
-                if !tokens.tier.can_delete() {
+                if !self.client.can_delete() {
                     return Ok(ToolOutput::new(
                         "Trash is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\
                          To enable delete, rerun `jcode login google --google-access-tier full`.",
diff --git a/crates/jcode-base/src/gmail.rs b/crates/jcode-base/src/gmail.rs
index 8b4309645..72ea959f7 100644
--- a/crates/jcode-base/src/gmail.rs
+++ b/crates/jcode-base/src/gmail.rs
@@ -1,12 +1,88 @@
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
+use serde_json::{Value, json};
 
 use crate::auth::google;
 
 const GMAIL_API_BASE: &str = "https://gmail.googleapis.com/gmail/v1/users/me";
+const COMPOSIO_DEFAULT_BASE: &str = "https://backend.composio.dev/api/v3.1";
+
+/// Where the Gmail tool gets its credentials and authenticated transport.
+///
+/// `Direct` talks to the Google Gmail REST API using locally stored OAuth
+/// tokens (the original behavior). `Composio` routes the *same* Gmail REST
+/// calls through Composio's managed `proxy-execute` endpoint, so a
+/// Google-verified app brokers auth: no unverified-app warning and no 7-day
+/// testing-mode token expiry.
+#[derive(Debug, Clone)]
+pub enum GmailBackend {
+    Direct,
+    Composio(ComposioConfig),
+}
+
+#[derive(Debug, Clone)]
+pub struct ComposioConfig {
+    pub api_key: String,
+    pub base_url: String,
+    pub connected_account_id: Option<String>,
+    pub user_id: Option<String>,
+}
+
+impl GmailBackend {
+    /// Resolve the backend from environment configuration.
+    ///
+    /// Defaults to `Direct`. Set `JCODE_GMAIL_BACKEND=composio` (with
+    /// `COMPOSIO_API_KEY` present) to broker Gmail through Composio.
+    pub fn from_env() -> Self {
+        let selection = std::env::var("JCODE_GMAIL_BACKEND")
+            .unwrap_or_default()
+            .trim()
+            .to_lowercase();
+        if selection == "composio" {
+            if let Some(cfg) = ComposioConfig::from_env() {
+                return GmailBackend::Composio(cfg);
+            }
+            eprintln!(
+                "JCODE_GMAIL_BACKEND=composio but COMPOSIO_API_KEY is not set; falling back to direct Gmail backend"
+            );
+        }
+        GmailBackend::Direct
+    }
+
+    pub fn label(&self) -> &'static str {
+        match self {
+            GmailBackend::Direct => "direct",
+            GmailBackend::Composio(_) => "composio",
+        }
+    }
+}
+
+impl ComposioConfig {
+    fn from_env() -> Option<Self> {
+        let api_key = std::env::var("COMPOSIO_API_KEY").ok().filter(|s| !s.is_empty())?;
+        let base_url = std::env::var("COMPOSIO_BASE_URL")
+            .ok()
+            .filter(|s| !s.is_empty())
+            .unwrap_or_else(|| COMPOSIO_DEFAULT_BASE.to_string());
+        let connected_account_id = std::env::var("COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID")
+            .ok()
+            .filter(|s| !s.is_empty());
+        let user_id = std::env::var("COMPOSIO_GMAIL_USER_ID")
+            .or_else(|_| std::env::var("COMPOSIO_USER_ID"))
+            .ok()
+            .filter(|s| !s.is_empty());
+        Some(Self {
+            api_key,
+            base_url,
+            connected_account_id,
+            user_id,
+        })
+    }
+}
 
 pub struct GmailClient {
     http: reqwest::Client,
+    backend: GmailBackend,
 }
 
 impl Default for GmailClient {
@@ -17,13 +93,146 @@ impl Default for GmailClient {
 
 impl GmailClient {
     pub fn new() -> Self {
+        Self::with_backend(GmailBackend::from_env())
+    }
+
+    pub fn with_backend(backend: GmailBackend) -> Self {
         Self {
             http: crate::provider::shared_http_client(),
+            backend,
+        }
+    }
+
+    pub fn backend_label(&self) -> &'static str {
+        self.backend.label()
+    }
+
+    /// Whether this backend has credentials available to talk to Gmail.
+    pub fn is_configured(&self) -> bool {
+        match &self.backend {
+            GmailBackend::Direct => google::has_tokens(),
+            GmailBackend::Composio(cfg) => !cfg.api_key.is_empty(),
+        }
+    }
+
+    /// Whether the current backend is allowed to send mail.
+    ///
+    /// The `Direct` backend honors the locally configured access tier
+    /// (read-only logins cannot send). Composio connections request full
+    /// Gmail scopes, so sending is available.
+    pub fn can_send(&self) -> bool {
+        match &self.backend {
+            GmailBackend::Direct => google::load_tokens()
+                .map(|t| t.tier.can_send())
+                .unwrap_or(false),
+            GmailBackend::Composio(_) => true,
+        }
+    }
+
+    /// Whether the current backend is allowed to delete/trash mail.
+    pub fn can_delete(&self) -> bool {
+        match &self.backend {
+            GmailBackend::Direct => google::load_tokens()
+                .map(|t| t.tier.can_delete())
+                .unwrap_or(false),
+            GmailBackend::Composio(_) => true,
+        }
+    }
+
+    pub fn not_configured_message(&self) -> &'static str {
+        match &self.backend {
+            GmailBackend::Direct => {
+                "Gmail is not configured. Run `jcode login google` to set up Gmail access."
+            }
+            GmailBackend::Composio(_) => {
+                "Gmail (Composio backend) is not configured. Set COMPOSIO_API_KEY and connect your \
+                 Gmail account in Composio, then retry."
+            }
+        }
+    }
+
+    /// Send an authenticated Gmail REST request and return the parsed JSON
+    /// response. Both backends produce the identical Gmail API JSON shape, so
+    /// callers can deserialize into the same typed structs.
+    async fn request(
+        &self,
+        method: reqwest::Method,
+        url: &str,
+        body: Option<Value>,
+    ) -> Result<Value> {
+        match &self.backend {
+            GmailBackend::Direct => self.request_direct(method, url, body).await,
+            GmailBackend::Composio(cfg) => self.request_composio(cfg, method, url, body).await,
         }
     }
 
-    async fn token(&self) -> Result<String> {
-        google::get_valid_token().await
+    async fn request_direct(
+        &self,
+        method: reqwest::Method,
+        url: &str,
+        body: Option<Value>,
+    ) -> Result<Value> {
+        let token = google::get_valid_token().await?;
+        let mut req = self.http.request(method, url).bearer_auth(&token);
+        if let Some(ref b) = body {
+            req = req.json(b);
+        }
+        let resp = req.send().await?;
+        let status = resp.status();
+        let text = resp.text().await?;
+        if !status.is_success() {
+            return Err(anyhow::anyhow!(
+                "Gmail API error {}: {}",
+                status,
+                truncate_error(&text)
+            ));
+        }
+        if text.trim().is_empty() {
+            return Ok(Value::Null);
+        }
+        Ok(serde_json::from_str(&text)?)
+    }
+
+    async fn request_composio(
+        &self,
+        cfg: &ComposioConfig,
+        method: reqwest::Method,
+        url: &str,
+        body: Option<Value>,
+    ) -> Result<Value> {
+        let payload = build_composio_proxy_payload(cfg, method.as_str(), url, body);
+        let endpoint = format!("{}/tools/execute/proxy", cfg.base_url.trim_end_matches('/'));
+        let resp = self
+            .http
+            .post(&endpoint)
+            .header("x-api-key", &cfg.api_key)
+            .json(&payload)
+            .send()
+            .await?;
+        let status = resp.status();
+        let text = resp.text().await?;
+        if !status.is_success() {
+            return Err(anyhow::anyhow!(
+                "Composio proxy error {}: {}",
+                status,
+                truncate_error(&text)
+            ));
+        }
+        let envelope: Value = serde_json::from_str(&text)?;
+        // Composio wraps the upstream response as { data, status, headers }.
+        if let Some(inner) = envelope.get("status").and_then(|s| s.as_u64()) {
+            if inner >= 400 {
+                return Err(anyhow::anyhow!(
+                    "Gmail API error {} (via Composio): {}",
+                    inner,
+                    truncate_error(&envelope.get("data").map(|d| d.to_string()).unwrap_or_default())
+                ));
+            }
+        }
+        if let Some(err) = envelope.get("error").filter(|e| !e.is_null()) {
+            return Err(anyhow::anyhow!("Composio error: {}", truncate_error(&err.to_string())));
+        }
+        Ok(envelope.get("data").cloned().unwrap_or(Value::Null))
     }
 
     pub async fn list_messages(
@@ -32,7 +241,6 @@ impl GmailClient {
         label_ids: Option<&[&str]>,
         max_results: u32,
     ) -> Result<MessageList> {
-        let token = self.token().await?;
         let mut url = format!("{}/messages?maxResults={}", GMAIL_API_BASE, max_results);
 
         if let Some(q) = query {
@@ -44,61 +252,47 @@ impl GmailClient {
             }
         }
 
-        let resp = self.http.get(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
-        let list: MessageList = resp.json().await?;
-        Ok(list)
+        let value = self.request(reqwest::Method::GET, &url, None).await?;
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn get_message(&self, id: &str, format: MessageFormat) -> Result<Message> {
-        let token = self.token().await?;
         let url = format!(
             "{}/messages/{}?format={}",
             GMAIL_API_BASE,
             id,
             format.as_str()
         );
-        let resp = self.http.get(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
-        let msg: Message = resp.json().await?;
-        Ok(msg)
+        let value = self.request(reqwest::Method::GET, &url, None).await?;
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn list_threads(&self, query: Option<&str>, max_results: u32) -> Result<ThreadList> {
-        let token = self.token().await?;
         let mut url = format!("{}/threads?maxResults={}", GMAIL_API_BASE, max_results);
 
         if let Some(q) = query {
             url.push_str(&format!("&q={}", urlencoding::encode(q)));
         }
 
-        let resp = self.http.get(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
-        let list: ThreadList = resp.json().await?;
-        Ok(list)
+        let value = self.request(reqwest::Method::GET, &url, None).await?;
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn get_thread(&self, id: &str) -> Result<Thread> {
-        let token = self.token().await?;
         let url = format!("{}/threads/{}?format=metadata", GMAIL_API_BASE, id);
-        let resp = self.http.get(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
-        let thread: Thread = resp.json().await?;
-        Ok(thread)
+        let value = self.request(reqwest::Method::GET, &url, None).await?;
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn list_labels(&self) -> Result<Vec<Label>> {
-        let token = self.token().await?;
         let url = format!("{}/labels", GMAIL_API_BASE);
-        let resp = self.http.get(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
-
         #[derive(Deserialize)]
         struct LabelList {
             labels: Option<Vec<Label>>,
         }
 
-        let list: LabelList = resp.json().await?;
+        let value = self.request(reqwest::Method::GET, &url, None).await?;
+        let list: LabelList = serde_json::from_value(value)?;
         Ok(list.labels.unwrap_or_default())
     }
 
@@ -110,7 +304,6 @@ impl GmailClient {
         in_reply_to: Option<&str>,
         thread_id: Option<&str>,
     ) -> Result<Draft> {
-        let token = self.token().await?;
         let url = format!("{}/drafts", GMAIL_API_BASE);
 
         let mut headers = format!(
@@ -127,40 +320,27 @@ impl GmailClient {
         let raw = format!("{}\r\n{}", headers, body);
         let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(raw.as_bytes());
 
-        let mut message = serde_json::json!({ "raw": encoded });
+        let mut message = json!({ "raw": encoded });
         if let Some(tid) = thread_id {
-            message["threadId"] = serde_json::Value::String(tid.to_string());
+            message["threadId"] = Value::String(tid.to_string());
         }
 
-        let payload = serde_json::json!({ "message": message });
+        let payload = json!({ "message": message });
 
-        let resp = self
-            .http
-            .post(&url)
-            .bearer_auth(&token)
-            .json(&payload)
-            .send()
+        let value = self
+            .request(reqwest::Method::POST, &url, Some(payload))
             .await?;
-        handle_error(&resp).await?;
-        let draft: Draft = resp.json().await?;
-        Ok(draft)
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn send_draft(&self, draft_id: &str) -> Result<Message> {
-        let token = self.token().await?;
         let url = format!("{}/drafts/send", GMAIL_API_BASE);
-        let payload = serde_json::json!({ "id": draft_id });
+        let payload = json!({ "id": draft_id });
 
-        let resp = self
-            .http
-            .post(&url)
-            .bearer_auth(&token)
-            .json(&payload)
-            .send()
+        let value = self
+            .request(reqwest::Method::POST, &url, Some(payload))
             .await?;
-        handle_error(&resp).await?;
-        let msg: Message = resp.json().await?;
-        Ok(msg)
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn send_message(
@@ -171,7 +351,6 @@ impl GmailClient {
         in_reply_to: Option<&str>,
         thread_id: Option<&str>,
     ) -> Result<Message> {
-        let token = self.token().await?;
         let url = format!("{}/messages/send", GMAIL_API_BASE);
 
         let mut headers = format!(
@@ -188,28 +367,20 @@ impl GmailClient {
         let raw = format!("{}\r\n{}", headers, body);
         let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(raw.as_bytes());
 
-        let mut message = serde_json::json!({ "raw": encoded });
+        let mut message = json!({ "raw": encoded });
         if let Some(tid) = thread_id {
-            message["threadId"] = serde_json::Value::String(tid.to_string());
+            message["threadId"] = Value::String(tid.to_string());
         }
 
-        let resp = self
-            .http
-            .post(&url)
-            .bearer_auth(&token)
-            .json(&message)
-            .send()
+        let value = self
+            .request(reqwest::Method::POST, &url, Some(message))
             .await?;
-        handle_error(&resp).await?;
-        let msg: Message = resp.json().await?;
-        Ok(msg)
+        Ok(serde_json::from_value(value)?)
     }
 
     pub async fn trash_message(&self, id: &str) -> Result<()> {
-        let token = self.token().await?;
         let url = format!("{}/messages/{}/trash", GMAIL_API_BASE, id);
-        let resp = self.http.post(&url).bearer_auth(&token).send().await?;
-        handle_error(&resp).await?;
+        self.request(reqwest::Method::POST, &url, None).await?;
         Ok(())
     }
 
@@ -219,32 +390,49 @@ impl GmailClient {
         add_labels: &[&str],
         remove_labels: &[&str],
     ) -> Result<()> {
-        let token = self.token().await?;
         let url = format!("{}/messages/{}/modify", GMAIL_API_BASE, id);
-        let payload = serde_json::json!({
+        let payload = json!({
             "addLabelIds": add_labels,
             "removeLabelIds": remove_labels,
         });
-        let resp = self
-            .http
-            .post(&url)
-            .bearer_auth(&token)
-            .json(&payload)
-            .send()
+        self.request(reqwest::Method::POST, &url, Some(payload))
             .await?;
-        handle_error(&resp).await?;
         Ok(())
     }
 }
 
-async fn handle_error(resp: &reqwest::Response) -> Result<()> {
-    if resp.status().is_success() {
-        return Ok(());
+/// Build the request body for Composio's `proxy-execute` endpoint, which makes
+/// an authenticated HTTP call to the connected toolkit (Gmail) on our behalf.
+fn build_composio_proxy_payload(
+    cfg: &ComposioConfig,
+    method: &str,
+    url: &str,
+    body: Option<Value>,
+) -> Value {
+    let mut payload = json!({
+        "endpoint": url,
+        "method": method,
+    });
+    if let Some(b) = body {
+        payload["body"] = b;
+    }
+    if let Some(account) = &cfg.connected_account_id {
+        payload["connected_account_id"] = Value::String(account.clone());
+    }
+    if let Some(user) = &cfg.user_id {
+        payload["user_id"] = Value::String(user.clone());
+    }
+    payload
+}
+
+fn truncate_error(text: &str) -> String {
+    const MAX: usize = 400;
+    let trimmed = text.trim();
+    if trimmed.len() <= MAX {
+        trimmed.to_string()
+    } else {
+        format!("{}…", &trimmed[..MAX])
     }
-    Err(anyhow::anyhow!(
-        "Gmail API error {}: check token permissions",
-        resp.status()
-    ))
 }
 
 use base64::Engine;
@@ -446,3 +634,78 @@ pub fn format_message_full(msg: &Message) -> String {
     }
     out
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn cfg() -> ComposioConfig {
+        ComposioConfig {
+            api_key: "test-key".to_string(),
+            base_url: COMPOSIO_DEFAULT_BASE.to_string(),
+            connected_account_id: Some("ca_123".to_string()),
+            user_id: Some("me".to_string()),
+        }
+    }
+
+    #[test]
+    fn composio_proxy_payload_get_has_no_body() {
+        let url = format!("{}/messages?maxResults=10", GMAIL_API_BASE);
+        let payload = build_composio_proxy_payload(&cfg(), "GET", &url, None);
+        assert_eq!(payload["endpoint"], url);
+        assert_eq!(payload["method"], "GET");
+        assert!(payload.get("body").is_none());
+        assert_eq!(payload["connected_account_id"], "ca_123");
+        assert_eq!(payload["user_id"], "me");
+    }
+
+    #[test]
+    fn composio_proxy_payload_post_includes_body() {
+        let url = format!("{}/messages/send", GMAIL_API_BASE);
+        let body = json!({ "raw": "abc" });
+        let payload = build_composio_proxy_payload(&cfg(), "POST", &url, Some(body.clone()));
+        assert_eq!(payload["method"], "POST");
+        assert_eq!(payload["body"], body);
+    }
+
+    #[test]
+    fn composio_proxy_payload_omits_optional_account_fields() {
+        let bare = ComposioConfig {
+            api_key: "k".to_string(),
+            base_url: COMPOSIO_DEFAULT_BASE.to_string(),
+            connected_account_id: None,
+            user_id: None,
+        };
+        let payload = build_composio_proxy_payload(&bare, "GET", "http://x/y", None);
+        assert!(payload.get("connected_account_id").is_none());
+        assert!(payload.get("user_id").is_none());
+    }
+
+    #[test]
+    fn direct_backend_label_and_default() {
+        let backend = GmailBackend::Direct;
+        assert_eq!(backend.label(), "direct");
+        let client = GmailClient::with_backend(GmailBackend::Direct);
+        assert_eq!(client.backend_label(), "direct");
+    }
+
+    #[test]
+    fn composio_backend_is_configured_and_can_send() {
+        let client = GmailClient::with_backend(GmailBackend::Composio(cfg()));
+        assert_eq!(client.backend_label(), "composio");
+        assert!(client.is_configured());
+        // Composio connections request full Gmail scopes.
+        assert!(client.can_send());
+        assert!(client.can_delete());
+    }
+
+    #[test]
+    fn truncate_error_caps_length() {
+        let short = truncate_error("  hi  ");
+        assert_eq!(short, "hi");
+        let long = "x".repeat(1000);
+        let capped = truncate_error(&long);
+        assert!(capped.len() <= 401 + 3); // 400 chars + ellipsis byte
+        assert!(capped.ends_with('…'));
+    }
+}
diff --git a/docs/GMAIL_COMPOSIO_BACKEND.md b/docs/GMAIL_COMPOSIO_BACKEND.md
new file mode 100644
index 000000000..9b2ec44eb
--- /dev/null
+++ b/docs/GMAIL_COMPOSIO_BACKEND.md
@@ -0,0 +1,73 @@
+# Gmail Tool: Composio Managed Backend
+
+The native `gmail` tool can source credentials and transport from one of two
+backends. The tool interface, confirmation gating, access-tier logic, and
+token-lean output formatting are identical across backends; only the
+auth/transport layer changes.
+
+## Backends
+
+| Backend | Auth | Pros | Cons |
+|---|---|---|---|
+| `direct` (default) | Local Google OAuth tokens (`jcode login google`) | No third party in the loop | Unverified-app warning; 7-day refresh-token expiry in Google "Testing" mode |
+| `composio` | Composio-managed OAuth (Google-verified app) | No unverified-app warning, no 7-day expiry, no per-user Google Cloud project | Composio brokers Gmail token custody; external dependency/cost |
+
+Both backends call the *same* Gmail REST endpoints
+(`https://gmail.googleapis.com/gmail/v1/users/me/...`). The Composio backend
+routes those calls through Composio's
+[`proxy-execute`](https://docs.composio.dev/reference/api-reference/tools/postToolsExecuteProxy)
+endpoint, which attaches the managed Gmail credentials. Because the upstream
+response shape is unchanged, all existing typed parsing and output formatting
+is reused.
+
+## Selecting the backend
+
+The backend is resolved from environment at `GmailClient::new()`:
+
+- `JCODE_GMAIL_BACKEND=direct` (or unset) -> direct Google backend.
+- `JCODE_GMAIL_BACKEND=composio` -> Composio backend (requires `COMPOSIO_API_KEY`).
+
+If `composio` is requested but `COMPOSIO_API_KEY` is missing, jcode warns and
+falls back to `direct`.
+
+### Composio environment variables
+
+| Variable | Required | Description |
+|---|---|---|
+| `COMPOSIO_API_KEY` | Yes | Project API key from <https://platform.composio.dev> |
+| `COMPOSIO_BASE_URL` | No | Override API base (default `https://backend.composio.dev/api/v3.1`) |
+| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`) |
+| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts |
+
+## One-time Composio setup
+
+1. Sign in at <https://platform.composio.dev> and copy your project API key.
+2. Connect a Gmail account (Composio's hosted OAuth, no unverified-app warning).
+   Note the resulting `connected_account_id` if you want to pin it.
+3. Export the variables:
+   ```bash
+   export JCODE_GMAIL_BACKEND=composio
+   export COMPOSIO_API_KEY="ck_..."
+   # optional:
+   export COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID="ca_..."
+   export COMPOSIO_GMAIL_USER_ID="me"
+   ```
+4. Ensure the `gmail` tool is enabled in `config.toml`:
+   ```toml
+   [tools]
+   enabled = ["*"]
+   ```
+
+## Access tiers
+
+- `direct`: honors the access tier chosen at `jcode login google`
+  (Read & Draft Only logins cannot send/trash, enforced at the OAuth scope level).
+- `composio`: connections request full Gmail scopes, so send/trash are
+  available. The tool still requires explicit `confirmed: true` for send,
+  send_draft, and trash.
+
+## Trust note
+
+With the Composio backend, Composio holds your Gmail OAuth grant and sees API
+traffic. This is the core tradeoff versus the direct backend. Disclose this to
+users before enabling it as a default.

From a80296c22c8f85bd9f19538c51d15c0e274fe4aa Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 13:54:11 -0700
Subject: [PATCH 17/57] desktop: pace animation redraws to ~60fps instead of
 busy-spinning

The desktop render loop re-requested a redraw immediately after every
animated frame (welcome-hero reveal, focus pulse, spinners, smooth
scroll, streaming) in both the RedrawRequested handler and the
AboutToWait fallback. Because the surface uses non-blocking Mailbox
presentation, present() returns instantly, so the loop rendered as fast
as the CPU allowed (~300fps on a 60Hz panel) and pinned the main thread
near 100% CPU. That starved input handling and compositor scheduling,
which is the root cause of the laggy/janky animations and scrolling, and
made streaming events queue for 200ms-1s before the UI could process
them.

Schedule a paced redraw (DESKTOP_ANIMATION_FRAME_INTERVAL = 16ms,
serviced via ControlFlow::WaitUntil in AboutToWait) instead of an
immediate request. Measured idle main-thread CPU on the welcome screen
dropped from ~99% to ~0-3%, frame rate from ~305fps to display refresh,
while the stream-e2e benchmark still passes all interaction/no-paint
budgets (max no-paint gap 71ms vs 250ms budget).
---
 crates/jcode-desktop/src/main.rs       | 60 +++++++++++++++++++++++---
 crates/jcode-desktop/src/main_tests.rs | 18 ++++++++
 2 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index 783f4a4ce..d2979b41f 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -145,6 +145,16 @@ const SINGLE_SESSION_CARET_COLOR: [f32; 4] = [0.130, 0.150, 0.190, 0.92];
 const SESSION_SPAWN_REFRESH_DELAY: Duration = Duration::from_millis(350);
 const BACKGROUND_POLL_INTERVAL: Duration = Duration::from_millis(33);
 const BACKEND_REDRAW_FRAME_INTERVAL: Duration = Duration::from_millis(16);
+/// Minimum spacing between animation-driven redraws.
+///
+/// Without this, the desktop render loop re-requests a redraw immediately after
+/// every animated frame (welcome-hero reveal, focus pulse, spinners, smooth
+/// scroll, etc.). Because the surface uses non-blocking `Mailbox` presentation,
+/// `present()` returns instantly, so the unthrottled loop renders at hundreds of
+/// fps and pins the main thread near 100% CPU, starving input handling and the
+/// compositor (the root cause of desktop lag/jank). ~16ms paces continuous
+/// animations to about 60fps, matching typical display refresh.
+const DESKTOP_ANIMATION_FRAME_INTERVAL: Duration = Duration::from_millis(16);
 const SURFACE_TIMEOUT_BACKOFF_MIN: Duration = Duration::from_millis(16);
 const SURFACE_TIMEOUT_BACKOFF_MAX: Duration = Duration::from_millis(250);
 const HEADLESS_CHAT_SMOKE_TIMEOUT: Duration = Duration::from_secs(90);
@@ -383,6 +393,17 @@ fn desktop_background_wake(
     }
 }
 
+/// Compute the next paced animation redraw time.
+///
+/// Returns `Some(now + DESKTOP_ANIMATION_FRAME_INTERVAL)` while an animation is
+/// active and `None` once it settles. Callers schedule this instead of calling
+/// `request_redraw()` immediately, which would render as fast as the CPU allows
+/// (the surface presents without blocking) and pin the main thread near 100%
+/// CPU, starving input handling and the compositor.
+fn next_animation_redraw_at(now: Instant, animation_active: bool) -> Option<Instant> {
+    animation_active.then(|| now + DESKTOP_ANIMATION_FRAME_INTERVAL)
+}
+
 #[derive(Clone, Copy, Debug, PartialEq)]
 struct StreamingTextArrivalStyle {
     opacity: f32,
@@ -799,6 +820,10 @@ async fn run() -> Result<()> {
     let mut pending_backend_redraw_since: Option<Instant> = None;
     let mut surface_timeout_backoff = SurfaceTimeoutBackoff::default();
     let mut surface_timeout_redraw_at: Option<Instant> = None;
+    // Scheduled time for the next animation-driven redraw. Continuous animations
+    // re-arm this each presented frame so the loop paces itself to roughly the
+    // display refresh rate instead of busy-spinning the main thread.
+    let mut animation_redraw_at: Option<Instant> = None;
     let mut pending_resize: Option<PhysicalSize<u32>> = None;
     let mut space_hold_started_at: Option<Instant> = None;
     let mut space_hold_consumed = false;
@@ -845,6 +870,7 @@ async fn run() -> Result<()> {
             hot_reload_wake,
             space_hold_wake,
             surface_timeout_redraw_at,
+            animation_redraw_at,
         ]
             .into_iter()
             .flatten()
@@ -1608,9 +1634,14 @@ async fn run() -> Result<()> {
                             target.exit();
                             return;
                         }
-                        if frame.animation_active {
-                            window.request_redraw();
-                        }
+                        // Pace continuous animations instead of immediately
+                        // re-requesting a redraw. An immediate request makes the
+                        // event loop render as fast as the CPU allows (the surface
+                        // presents without blocking), pinning the main thread near
+                        // 100% CPU and starving input/compositor scheduling. The
+                        // scheduled wake is serviced in AboutToWait.
+                        animation_redraw_at =
+                            next_animation_redraw_at(Instant::now(), frame.animation_active);
                     }
                     Err(SurfaceError::Lost | SurfaceError::Outdated) => {
                         surface_timeout_backoff.reset();
@@ -1841,6 +1872,18 @@ async fn run() -> Result<()> {
                         }
                     }
                 }
+                // Service the paced animation redraw scheduled by RedrawRequested.
+                // This keeps continuous animations advancing at ~display refresh
+                // without busy-spinning the loop between frames.
+                if let Some(redraw_at) = animation_redraw_at {
+                    let now = Instant::now();
+                    if now >= redraw_at {
+                        animation_redraw_at = None;
+                        if surface_renderable {
+                            window.request_redraw();
+                        }
+                    }
+                }
                 if surface_renderable && app.is_single_session() {
                     let about_to_wait_started = Instant::now();
                     let size = window.inner_size();
@@ -1909,8 +1952,15 @@ async fn run() -> Result<()> {
                 {
                     canvas.needs_initial_frame = false;
                     window.request_redraw();
-                } else if surface_renderable && app.has_frame_animation() {
-                    window.request_redraw();
+                } else if surface_renderable
+                    && app.has_frame_animation()
+                    && animation_redraw_at.is_none()
+                {
+                    // An animation is active but no paced redraw is scheduled yet
+                    // (e.g. it just became active). Schedule one instead of
+                    // requesting a redraw on every loop iteration, which would
+                    // busy-spin the main thread at 100% CPU.
+                    animation_redraw_at = next_animation_redraw_at(Instant::now(), true);
                 }
             }
             _ => {}
diff --git a/crates/jcode-desktop/src/main_tests.rs b/crates/jcode-desktop/src/main_tests.rs
index f05778b24..6fa95c06b 100644
--- a/crates/jcode-desktop/src/main_tests.rs
+++ b/crates/jcode-desktop/src/main_tests.rs
@@ -701,6 +701,24 @@ fn desktop_background_wake_only_tracks_active_frame_animation() {
     assert_eq!(desktop_background_wake(now, false, true), None);
 }
 
+#[test]
+fn next_animation_redraw_paces_active_animations_and_settles_when_idle() {
+    let now = Instant::now();
+
+    // While an animation is active, the next redraw is scheduled one frame
+    // interval out rather than immediately, so the loop does not busy-spin.
+    assert_eq!(
+        next_animation_redraw_at(now, true),
+        Some(now + DESKTOP_ANIMATION_FRAME_INTERVAL)
+    );
+    // Once the animation settles, no further redraw is scheduled and the loop
+    // can park on ControlFlow::Wait.
+    assert_eq!(next_animation_redraw_at(now, false), None);
+    // The pacing interval must be positive; a zero interval would reintroduce
+    // the busy-spin it exists to prevent.
+    assert!(DESKTOP_ANIMATION_FRAME_INTERVAL > Duration::ZERO);
+}
+
 #[test]
 fn desktop_async_job_slots_are_bounded_and_released() -> Result<()> {
     let counter = std::sync::atomic::AtomicUsize::new(0);

From 6727255f81df231a006bdce5a81e09acf17664dc Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 13:54:48 -0700
Subject: [PATCH 18/57] telemetry-worker: remove served web dashboard

The visual dashboard (dashboard.js + stats.js + GET / and GET /v1/stats
routing) was a separately-deployed Cloudflare Worker UI that does not
belong in the jcode repo. Remove it and restore the worker to its
POST /v1/event ingest-only surface.

The telemetry accuracy work it was built on (turn_end meaningfulness, CI
exclusion, the daily_active_users rollup) stays. users.sql remains as a
CLI query alongside dau.sql / health.sql.
---
 telemetry-worker/README.md        |  27 +-
 telemetry-worker/src/dashboard.js | 494 ------------------------------
 telemetry-worker/src/stats.js     | 389 -----------------------
 telemetry-worker/src/worker.js    |  57 +---
 4 files changed, 4 insertions(+), 963 deletions(-)
 delete mode 100644 telemetry-worker/src/dashboard.js
 delete mode 100644 telemetry-worker/src/stats.js

diff --git a/telemetry-worker/README.md b/telemetry-worker/README.md
index e58b87b6a..0e25e3db3 100644
--- a/telemetry-worker/README.md
+++ b/telemetry-worker/README.md
@@ -2,33 +2,8 @@
 
 Cloudflare Worker that receives anonymous telemetry events from jcode.
 
-## Dashboard
-
-The worker also serves a visual dashboard so you do not have to run SQL by hand:
-
-- `GET /` (or `/dashboard`) - the HTML dashboard. Public page, no data until a
-  token is entered.
-- `GET /v1/stats` - JSON aggregates (counts only, never raw event rows), gated
-  behind `DASHBOARD_TOKEN`. Accepts `Authorization: Bearer <token>`,
-  `?token=<token>`, or `X-Dashboard-Token`.
-- `POST /v1/event` - unchanged event ingest.
-
 The headline number is **Total users**: distinct, non-CI `telemetry_id`s that
-ever installed jcode OR did meaningful work in it. The page shows every metric
-the API returns, organized into tiers (hero / key cards / diagnostic tables) so
-the important numbers stand out while nothing is hidden. Each user tier (reached
-> total > core) is broader than the one below it, and CI / raw figures are shown
-alongside for transparency.
-
-Set the token once (it is a Worker secret, not in source):
-
-```bash
-wrangler secret put DASHBOARD_TOKEN
-# then open https://<your-worker-domain>/ and paste the token
-```
-
-If `DASHBOARD_TOKEN` is unset the stats endpoint stays locked (deny by default).
-The CLI equivalent of the headline number:
+ever installed jcode OR did meaningful work in it. Run it with:
 
 ```bash
 wrangler d1 execute jcode-telemetry --remote --file=users.sql
diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js
deleted file mode 100644
index d5ec6c94e..000000000
--- a/telemetry-worker/src/dashboard.js
+++ /dev/null
@@ -1,494 +0,0 @@
-// jcode telemetry console — "Terminal Observatory" aesthetic.
-//
-// Design intent (frontend-design skill): jcode is a terminal coding agent, so
-// the dashboard is built as a precision instrument readout, not generic SaaS.
-// - Type: JetBrains Mono (display + data) paired with a quiet grotesk for prose.
-// - Palette: near-black graphite, warm phosphor amber as the dominant signal,
-//   a single cyan accent for the live/headline series. No purple-on-white.
-// - Composition: a station-clock hero number, hairline rules, dense tabular
-//   instrument panels, scanline texture, staggered load-in reveals.
-//
-// Self-contained (HTML/CSS/inline-SVG, fonts via Google Fonts <link>). Fetches
-// /v1/stats with the dashboard token. Every metric the API returns is shown,
-// grouped by importance (HEADLINE / SIGNAL / DIAGNOSTIC).
-
-export const DASHBOARD_HTML = `<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8" />
-<meta name="viewport" content="width=device-width, initial-scale=1" />
-<title>jcode · telemetry console</title>
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700;800&family=Sora:wght@400;500;600&display=swap" rel="stylesheet">
-<style>
-  :root {
-    --bg:        #07090c;
-    --bg-grain:  #0a0d12;
-    --panel:     #0d1117;
-    --panel-2:   #11161e;
-    --rule:      #1c232e;
-    --rule-soft: #141a22;
-    --ink:       #e8eef5;
-    --ink-dim:   #9aa7b6;
-    --ink-faint: #5c6675;
-    --amber:     #ffb454;   /* dominant phosphor signal */
-    --amber-dim: #c98a3f;
-    --cyan:      #4fd6ff;    /* live / headline accent */
-    --green:     #5ad27a;
-    --red:       #ff6b6b;
-    --mono: "JetBrains Mono", ui-monospace, SFMono-Regular, Menlo, monospace;
-    --sans: "Sora", system-ui, sans-serif;
-  }
-  * { box-sizing: border-box; }
-  html, body { margin: 0; padding: 0; }
-  body {
-    background:
-      radial-gradient(900px 500px at 88% -8%, rgba(255,180,84,0.07), transparent 60%),
-      radial-gradient(700px 500px at -5% 110%, rgba(79,214,255,0.05), transparent 60%),
-      var(--bg);
-    color: var(--ink);
-    font-family: var(--sans);
-    font-size: 14px; line-height: 1.55;
-    min-height: 100vh;
-    -webkit-font-smoothing: antialiased;
-  }
-  /* faint scanline texture, instrument vibe */
-  body:before {
-    content:""; position: fixed; inset: 0; pointer-events: none; z-index: 0;
-    background-image: repeating-linear-gradient(0deg, rgba(255,255,255,0.014) 0 1px, transparent 1px 3px);
-    mix-blend-mode: overlay; opacity: .5;
-  }
-  .wrap { position: relative; z-index: 1; max-width: 1200px; margin: 0 auto; padding: 30px 24px 90px; }
-  .mono { font-family: var(--mono); }
-  .num { font-family: var(--mono); font-variant-numeric: tabular-nums; }
-
-  /* ---- masthead ---- */
-  header.bar { display:flex; align-items:center; justify-content:space-between; gap:16px;
-    border-bottom: 1px solid var(--rule); padding-bottom: 16px; margin-bottom: 26px; flex-wrap: wrap; }
-  .mark { display:flex; align-items:center; gap:13px; }
-  .glyph { font-family: var(--mono); font-weight: 800; font-size: 15px; color: var(--bg);
-    background: var(--amber); width: 32px; height: 32px; display:grid; place-items:center; border-radius: 7px;
-    box-shadow: 0 0 0 1px rgba(255,180,84,.4), 0 0 22px rgba(255,180,84,.25); }
-  .mark h1 { font-family: var(--mono); font-size: 14px; font-weight: 700; margin:0; letter-spacing: 1px; text-transform: uppercase; }
-  .mark .tag { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: .5px; }
-  .bar-actions { display:flex; align-items:center; gap:10px; }
-  .stamp { font-family: var(--mono); font-size: 11px; color: var(--ink-dim); border: 1px solid var(--rule);
-    padding: 6px 10px; border-radius: 6px; letter-spacing: .3px; }
-  .stamp .live { color: var(--green); }
-  .stamp .live:before { content:"●"; margin-right: 6px; animation: pulse 2s ease-in-out infinite; }
-  @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.35} }
-  button.btn { cursor:pointer; font-family: var(--mono); font-size: 12px; color: var(--ink); background: var(--panel-2);
-    border: 1px solid var(--rule); padding: 7px 13px; border-radius: 6px; letter-spacing: .3px; transition: .15s; }
-  button.btn:hover { border-color: var(--amber); color: var(--amber); }
-
-  /* ---- section ---- */
-  .sec { margin: 34px 0 14px; display:flex; align-items:baseline; gap: 12px; }
-  .sec .idx { font-family: var(--mono); font-size: 11px; color: var(--amber); letter-spacing: 1px; }
-  .sec h2 { font-family: var(--mono); font-size: 12px; font-weight: 700; letter-spacing: 2px; text-transform: uppercase; margin: 0; color: var(--ink); }
-  .sec .rule { flex:1; height: 1px; background: linear-gradient(90deg, var(--rule), transparent); align-self: center; }
-  .sec .note { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: .3px; }
-
-  /* ---- hero ---- */
-  .hero { display:grid; grid-template-columns: 1.05fr 1fr; gap: 1px; background: var(--rule);
-    border: 1px solid var(--rule); border-radius: 14px; overflow:hidden; }
-  @media (max-width: 880px){ .hero { grid-template-columns: 1fr; } }
-  .hero > div { background: var(--panel); }
-  .hero-main { padding: 30px 32px; position: relative; }
-  .hero-main:after { content:""; position:absolute; inset:0; pointer-events:none;
-    background: radial-gradient(420px 220px at 100% 0%, rgba(255,180,84,.10), transparent 70%); }
-  .label { font-family: var(--mono); font-size: 11px; letter-spacing: 2px; text-transform: uppercase; color: var(--ink-faint); }
-  .big { font-family: var(--mono); font-weight: 800; font-size: clamp(58px, 9vw, 96px); line-height: .92;
-    letter-spacing: -2px; color: var(--amber); margin: 8px 0 4px; text-shadow: 0 0 36px rgba(255,180,84,.22); }
-  .big .unit { font-size: 18px; color: var(--ink-dim); letter-spacing: 0; margin-left: 10px; text-shadow:none; }
-  .hero-desc { color: var(--ink-dim); font-size: 13px; max-width: 48ch; }
-  .ladder { margin-top: 22px; border-top: 1px solid var(--rule-soft); }
-  .rung { display:flex; align-items:center; justify-content:space-between; padding: 9px 0; border-bottom: 1px solid var(--rule-soft); }
-  .rung .lk { font-family: var(--mono); font-size: 12px; color: var(--ink-dim); letter-spacing:.3px; }
-  .rung .lk b { color: var(--ink); font-weight: 500; }
-  .rung .lv { font-family: var(--mono); font-weight: 700; font-size: 16px; }
-  .rung .lv.amber { color: var(--amber); } .rung .lv.cyan { color: var(--cyan); } .rung .lv.dim { color: var(--ink-dim); }
-  .hero-side { padding: 24px 26px; display:flex; flex-direction: column; }
-  .hero-side h3 { font-family: var(--mono); font-size: 11px; letter-spacing: 1.5px; text-transform: uppercase; color: var(--ink-faint); margin: 0 0 14px; }
-  .triple { display:grid; grid-template-columns: repeat(3,1fr); gap: 14px; margin-bottom: 6px; }
-  .triple .t .tn { font-family: var(--mono); font-weight: 800; font-size: 30px; letter-spacing: -1px; color: var(--cyan); }
-  .triple .t .tl { font-family: var(--mono); font-size: 11px; color: var(--ink-faint); letter-spacing: 1px; text-transform: uppercase; margin-top: 2px; }
-  .triple .t .tsub { font-size: 11px; color: var(--ink-faint); }
-
-  /* ---- cards ---- */
-  .grid { display:grid; gap: 14px; }
-  .g4 { grid-template-columns: repeat(4,1fr); } .g3 { grid-template-columns: repeat(3,1fr); } .g2 { grid-template-columns: repeat(2,1fr); }
-  @media (max-width: 1000px){ .g4 { grid-template-columns: repeat(2,1fr); } .g3 { grid-template-columns: repeat(2,1fr); } }
-  @media (max-width: 600px){ .g4,.g3,.g2 { grid-template-columns: 1fr; } }
-  .stat { background: var(--panel); border: 1px solid var(--rule); border-radius: 11px; padding: 15px 16px; position: relative; overflow: hidden; }
-  .stat.key { border-color: rgba(255,180,84,.32); }
-  .stat.key:before { content:""; position:absolute; left:0; top:0; bottom:0; width:2px; background: var(--amber); }
-  .stat.alert { border-color: rgba(255,107,107,.4); }
-  .stat.alert:before { content:""; position:absolute; left:0; top:0; bottom:0; width:2px; background: var(--red); }
-  .stat .sl { font-family: var(--mono); font-size: 11px; color: var(--ink-dim); letter-spacing: .4px; display:flex; align-items:center; gap:7px; }
-  .stat .sv { font-family: var(--mono); font-weight: 700; font-size: 26px; margin-top: 8px; letter-spacing: -.5px; }
-  .stat.key .sv { color: var(--amber); } .stat.alert .sv { color: var(--red); }
-  .stat .sm { font-size: 11px; color: var(--ink-faint); margin-top: 3px; }
-  .kk { font-family: var(--mono); font-size: 9px; letter-spacing: 1px; color: var(--bg); background: var(--amber); padding: 1px 5px; border-radius: 3px; }
-
-  /* ---- panels w/ tables & charts ---- */
-  .panel { background: var(--panel); border: 1px solid var(--rule); border-radius: 12px; padding: 18px 18px 12px; }
-  .panel h3 { font-family: var(--mono); font-size: 12px; font-weight: 700; letter-spacing: 1px; text-transform: uppercase; margin: 0 0 3px; }
-  .panel .pd { font-size: 11px; color: var(--ink-faint); margin: 0 0 14px; font-family: var(--mono); letter-spacing: .2px; }
-  table { width:100%; border-collapse: collapse; }
-  th, td { text-align:left; padding: 7px 4px; border-bottom: 1px solid var(--rule-soft); font-size: 12px; }
-  th { font-family: var(--mono); color: var(--ink-faint); font-weight: 500; font-size: 10px; letter-spacing: 1px; text-transform: uppercase; }
-  td.k { font-family: var(--mono); color: var(--ink); letter-spacing: .2px; }
-  td.v, th.v { text-align:right; font-family: var(--mono); font-variant-numeric: tabular-nums; color: var(--ink); }
-  .track { height: 6px; background: var(--panel-2); border-radius: 2px; overflow:hidden; }
-  .fill { height: 100%; background: linear-gradient(90deg, var(--amber-dim), var(--amber)); border-radius: 2px; }
-  tr:last-child td { border-bottom: none; }
-  .lb-ci { font-family: var(--mono); font-size: 9px; letter-spacing: 1px; color: var(--bg); background: var(--ink-faint); padding: 1px 5px; border-radius: 3px; margin-left: 6px; }
-  .lb-dev { font-family: var(--mono); font-size: 9px; letter-spacing: .5px; color: var(--amber); border: 1px solid rgba(255,180,84,.35); padding: 0 5px; border-radius: 3px; margin-left: 6px; }
-  tr.lb-dim td { opacity: .5; }
-
-  .legend { display:flex; gap: 16px; align-items:center; font-family: var(--mono); font-size: 11px; color: var(--ink-dim); margin-bottom: 10px; flex-wrap: wrap; }
-  .legend i { width: 14px; height: 3px; display:inline-block; margin-right: 6px; vertical-align: 3px; border-radius: 2px; }
-
-  .fb { padding: 12px 0; border-bottom: 1px solid var(--rule-soft); }
-  .fb:last-child { border-bottom: none; }
-  .fb .q { color: var(--ink); font-size: 13.5px; }
-  .fb .m { font-family: var(--mono); color: var(--ink-faint); font-size: 11px; margin-top: 4px; letter-spacing: .2px; }
-  .fb .badge { color: var(--amber); }
-
-  /* ---- gate ---- */
-  .gate { max-width: 440px; margin: 16vh auto 0; }
-  .gate .box { background: var(--panel); border: 1px solid var(--rule); border-radius: 14px; padding: 30px 28px; text-align: center;
-    box-shadow: 0 0 60px rgba(0,0,0,.5); }
-  .gate .glyph { margin: 0 auto 16px; width: 40px; height: 40px; font-size: 18px; }
-  .gate h2 { font-family: var(--mono); letter-spacing: 1px; margin: 0 0 4px; font-size: 16px; }
-  .gate p { color: var(--ink-faint); font-size: 12px; font-family: var(--mono); margin: 0; }
-  .gate input { width: 100%; margin: 18px 0 12px; padding: 12px 14px; border-radius: 9px; border: 1px solid var(--rule);
-    background: var(--bg); color: var(--ink); font-family: var(--mono); font-size: 13px; letter-spacing: 1px; }
-  .gate input:focus { outline: none; border-color: var(--amber); }
-  .gate button { width: 100%; padding: 12px; font-weight: 700; }
-  .err { color: var(--red); font-family: var(--mono); font-size: 12px; min-height: 18px; margin-top: 6px; }
-
-  .hidden { display:none !important; }
-  .foot { font-family: var(--mono); color: var(--ink-faint); font-size: 11px; margin-top: 38px; padding-top: 18px;
-    border-top: 1px solid var(--rule-soft); text-align: center; letter-spacing: .2px; line-height: 1.8; }
-  .spin { display:inline-block; width: 13px; height: 13px; border: 2px solid var(--rule); border-top-color: var(--amber); border-radius: 50%; animation: sp .7s linear infinite; vertical-align: -2px; }
-  @keyframes sp { to { transform: rotate(360deg); } }
-  .reveal { animation: rise .5s cubic-bezier(.2,.7,.2,1) both; }
-  @keyframes rise { from { opacity: 0; transform: translateY(10px); } to { opacity:1; transform:none; } }
-</style>
-</head>
-<body>
-<div class="wrap">
-  <div id="gate" class="gate hidden">
-    <div class="box">
-      <div class="glyph">jc</div>
-      <h2>TELEMETRY CONSOLE</h2>
-      <p>access token required</p>
-      <input id="token" type="password" placeholder="•••••••••••" autocomplete="off" />
-      <button class="btn" id="unlock">AUTHENTICATE</button>
-      <div class="err" id="gate-err"></div>
-    </div>
-  </div>
-
-  <div id="app" class="hidden">
-    <header class="bar">
-      <div class="mark">
-        <div class="glyph">jc</div>
-        <div>
-          <h1>jcode telemetry</h1>
-          <div class="tag" id="generated">— · — · —</div>
-        </div>
-      </div>
-      <div class="bar-actions">
-        <span class="stamp"><span class="live" id="livestamp">LIVE</span></span>
-        <button class="btn" id="refresh">↻ REFRESH</button>
-        <button class="btn" id="logout">LOCK</button>
-      </div>
-    </header>
-    <div id="content"></div>
-    <div class="foot">
-      users are distinct anonymous telemetry_id · headline excludes CI runners &amp; non-release builds<br/>
-      raw / CI-inclusive figures retained in diagnostic tier · counts only, no raw events leave the worker
-    </div>
-  </div>
-</div>
-
-<script>
-const fmt = (n) => (n==null?"—":Number(n).toLocaleString());
-const pct = (x) => (x==null?"—":(x*100).toFixed(1)+"%");
-const ms  = (x) => (x==null?"—":x>=1000?(x/1000).toFixed(1)+"s":Math.round(x)+"ms");
-const dec = (x,d=1) => (x==null?"—":Number(x).toFixed(d));
-const esc = (s) => String(s==null?"":s).replace(/[&<>"]/g,c=>({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;"}[c]));
-let TOKEN = localStorage.getItem("jcode_dash_token") || "";
-
-function showGate(m){ document.getElementById("app").classList.add("hidden"); document.getElementById("gate").classList.remove("hidden"); document.getElementById("gate-err").textContent = m||""; }
-function showApp(){ document.getElementById("gate").classList.add("hidden"); document.getElementById("app").classList.remove("hidden"); }
-
-async function load(){
-  if(!TOKEN){ showGate(""); return; }
-  document.getElementById("content").innerHTML = '<div class="mono" style="padding:60px 0;color:var(--ink-faint)"><span class="spin"></span> reading instruments…</div>';
-  showApp();
-  let res;
-  try { res = await fetch("/v1/stats?token="+encodeURIComponent(TOKEN), { headers:{ "Authorization":"Bearer "+TOKEN } }); }
-  catch(e){ showGate("network error"); return; }
-  if(res.status===401){ localStorage.removeItem("jcode_dash_token"); TOKEN=""; showGate("invalid token"); return; }
-  if(!res.ok){ document.getElementById("content").innerHTML='<div class="err">failed to load ('+res.status+')</div>'; return; }
-  render(await res.json());
-}
-
-function sec(idx,title,note){ return '<div class="sec reveal"><span class="idx">'+idx+'</span><h2>'+esc(title)+'</h2><span class="rule"></span><span class="note">'+esc(note||"")+'</span></div>'; }
-function stat(label,value,meta,opts){ opts=opts||{};
-  const cls = opts.alert?'stat alert':(opts.key?'stat key':'stat');
-  return '<div class="'+cls+' reveal"><div class="sl">'+esc(label)+(opts.key?' <span class="kk">KEY</span>':'')+'</div><div class="sv">'+value+'</div><div class="sm">'+(meta||'')+'</div></div>';
-}
-function tablePanel(title,desc,rows,kcol,vcol){
-  const max = Math.max(1, ...rows.map(r=>r.value));
-  const body = rows.length ? rows.map(r=>'<tr><td class="k">'+esc(r.label)+'</td><td style="width:46%"><div class="track"><div class="fill" style="width:'+Math.max(3,(r.value/max)*100)+'%"></div></div></td><td class="v">'+fmt(r.value)+'</td></tr>').join('') : '<tr><td class="k" colspan="3" style="color:var(--ink-faint)">no data</td></tr>';
-  return '<div class="panel reveal"><h3>'+esc(title)+'</h3><p class="pd">'+esc(desc)+'</p><table><thead><tr><th>'+esc(kcol)+'</th><th>·</th><th class="v">'+esc(vcol)+'</th></tr></thead><tbody>'+body+'</tbody></table></div>';
-}
-function rows(arr,k){ return (arr||[]).map(r=>({label:r[k]??"unknown", value:r.users})); }
-
-function lineChart(series){
-  const W=820,H=230,pl=40,pr=14,pt=16,pb=28;
-  const dates = series[0]?series[0].points.map(p=>p.date):[];
-  if(!dates.length) return '<div class="mono" style="color:var(--ink-faint);padding:18px;font-size:12px">no timeseries yet</div>';
-  const maxV = Math.max(1, ...series.flatMap(s=>s.points.map(p=>p.value)));
-  const x=i=>pl+(i/Math.max(1,dates.length-1))*(W-pl-pr);
-  const y=v=>pt+(1-v/maxV)*(H-pt-pb);
-  const grid=[0,.25,.5,.75,1].map(f=>{const gy=pt+f*(H-pt-pb);const val=Math.round(maxV*(1-f));return '<line x1="'+pl+'" y1="'+gy+'" x2="'+(W-pr)+'" y2="'+gy+'" stroke="#161d27"/><text x="4" y="'+(gy+3)+'" fill="#5c6675" font-size="10" font-family="JetBrains Mono">'+val+'</text>';}).join('');
-  const area = series.length?(()=>{const s=series[0];const top=s.points.map((p,i)=>(i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');return '<path d="'+top+' L'+x(s.points.length-1).toFixed(1)+' '+(H-pb)+' L'+pl+' '+(H-pb)+' Z" fill="url(#ag)" opacity=".18"/>';})():'';
-  const paths=series.map(s=>{const d=s.points.map((p,i)=>(i?'L':'M')+x(i).toFixed(1)+' '+y(p.value).toFixed(1)).join(' ');return '<path d="'+d+'" fill="none" stroke="'+s.color+'" stroke-width="2" stroke-linejoin="round"/>';}).join('');
-  const ticks = dates.length>1?[0,Math.floor(dates.length/2),dates.length-1].map(i=>'<text x="'+x(i)+'" y="'+(H-8)+'" fill="#5c6675" font-size="10" font-family="JetBrains Mono" text-anchor="middle">'+dates[i].slice(5)+'</text>').join(''):'';
-  const legend=series.map(s=>'<span><i style="background:'+s.color+'"></i>'+esc(s.name)+'</span>').join('');
-  return '<div class="legend">'+legend+'</div><svg viewBox="0 0 '+W+' '+H+'" width="100%" preserveAspectRatio="xMidYMid meet"><defs><linearGradient id="ag" x1="0" y1="0" x2="0" y2="1"><stop offset="0" stop-color="#4fd6ff"/><stop offset="1" stop-color="#4fd6ff" stop-opacity="0"/></linearGradient></defs>'+grid+area+paths+ticks+'</svg>';
-}
-
-function barsChart(title,desc,data,labelFn,color){
-  const max=Math.max(1,...data.map(d=>d.v));
-  const W=820,H=160,pl=8,pr=8,pb=22,pt=8,n=data.length;
-  const bw=(W-pl-pr)/Math.max(1,n);
-  const bars=data.map((d,i)=>{const h=(d.v/max)*(H-pt-pb);const bx=pl+i*bw;return '<rect x="'+(bx+1.5).toFixed(1)+'" y="'+(H-pb-h).toFixed(1)+'" width="'+(bw-3).toFixed(1)+'" height="'+h.toFixed(1)+'" rx="1.5" fill="'+color+'"/>';}).join('');
-  const labels=data.map((d,i)=> (i%3===0)?'<text x="'+(pl+i*bw+bw/2).toFixed(1)+'" y="'+(H-7)+'" fill="#5c6675" font-size="9" font-family="JetBrains Mono" text-anchor="middle">'+esc(labelFn(d,i))+'</text>':'').join('');
-  return '<div class="panel reveal"><h3>'+esc(title)+'</h3><p class="pd">'+esc(desc)+'</p><svg viewBox="0 0 '+W+' '+H+'" width="100%" preserveAspectRatio="xMidYMid meet">'+bars+labels+'</svg></div>';
-}
-
-function leaderboardPanel(rows){
-  if(!rows.length) return '<div class="panel reveal"><p class="pd">no data</p></div>';
-  const body = rows.map((r,i)=>{
-    const ci = Number(r.is_ci)===1;
-    const chan = esc(r.build_channel||"?");
-    const last = r.last_seen ? new Date((r.last_seen||"").replace(" ","T")+"Z").toLocaleDateString() : "—";
-    const tag = ci ? '<span class="lb-ci">CI</span>' : (chan==="release" ? '' : '<span class="lb-dev">'+chan+'</span>');
-    return '<tr'+(ci?' class="lb-dim"':'')+'>'
-      + '<td class="v" style="color:var(--ink-faint)">'+(i+1)+'</td>'
-      + '<td class="k">'+esc(r.id_prefix)+'… '+tag+'</td>'
-      + '<td class="v">'+fmt(r.sessions)+'</td>'
-      + '<td class="v">'+fmt(r.turns)+'</td>'
-      + '<td class="v">'+fmt(r.tokens)+'</td>'
-      + '<td class="v">'+fmt(r.tool_calls)+'</td>'
-      + '<td class="k" style="color:var(--ink-faint)">v'+esc(r.version||"?")+'</td>'
-      + '<td class="k" style="color:var(--ink-faint)">'+esc(last)+'</td>'
-      + '</tr>';
-  }).join('');
-  return '<div class="panel reveal"><h3>Top anonymous ids</h3><p class="pd">ranked by lifecycle sessions · CI / non-release tagged · ids are hashed prefixes only</p>'
-    + '<table><thead><tr><th class="v">#</th><th>id</th><th class="v">sessions</th><th class="v">turns</th><th class="v">tokens</th><th class="v">tools</th><th>ver</th><th>last seen</th></tr></thead><tbody>'+body+'</tbody></table></div>';
-}
-
-function render(d){
-  const dt = new Date(d.generated_at);
-  document.getElementById("generated").textContent = dt.toISOString().slice(0,10)+" · "+dt.toLocaleTimeString()+" · UTC rollup";
-  const c=document.getElementById("content");
-  const u=d.users,a=d.active,lc=d.lifecycle,q=d.quality,ret=d.retention,e=d.errors,h=d.health,b=d.breakdowns;
-  const ts=d.timeseries.daily||[];
-  const series=[
-    {name:"headline DAU",color:"#4fd6ff",points:ts.map(r=>({date:r.date,value:r.headline}))},
-    {name:"meaningful",color:"#ffb454",points:ts.map(r=>({date:r.date,value:r.meaningful}))},
-    {name:"raw / reached",color:"#5c6675",points:ts.map(r=>({date:r.date,value:r.raw}))},
-  ];
-  let H="";
-
-  // HERO
-  H+='<div class="hero reveal">'
-    + '<div class="hero-main">'
-      + '<div class="label">total users · headline</div>'
-      + '<div class="big">'+fmt(u.total_users)+'<span class="unit">people</span></div>'
-      + '<div class="hero-desc">Distinct real people who installed jcode or did meaningful work in it. CI runners excluded; each anonymous machine id counts once.</div>'
-      + '<div class="ladder">'
-        + '<div class="rung"><span class="lk"><b>Reached</b> · launched it at least once</span><span class="lv dim">'+fmt(u.reached_users)+'</span></div>'
-        + '<div class="rung"><span class="lk"><b>Total users</b> · installed OR did work</span><span class="lv amber">'+fmt(u.total_users)+'</span></div>'
-        + '<div class="rung"><span class="lk"><b>Core</b> · did meaningful work</span><span class="lv cyan">'+fmt(u.core_users)+'</span></div>'
-        + '<div class="rung"><span class="lk"><b>Installed</b> · distinct install events</span><span class="lv dim">'+fmt(u.installed_users)+'</span></div>'
-      + '</div>'
-    + '</div>'
-    + '<div class="hero-side">'
-      + '<h3>active users · distinct, headline definition</h3>'
-      + '<div class="triple">'
-        + '<div class="t"><div class="tn">'+fmt(a.dau)+'</div><div class="tl">DAU</div><div class="tsub">'+fmt(a.dau_meaningful)+' mean · '+fmt(a.dau_raw)+' raw</div></div>'
-        + '<div class="t"><div class="tn">'+fmt(a.wau)+'</div><div class="tl">WAU</div><div class="tsub">'+fmt(a.wau_meaningful)+' mean · '+fmt(a.wau_raw)+' raw</div></div>'
-        + '<div class="t"><div class="tn">'+fmt(a.mau)+'</div><div class="tl">MAU</div><div class="tsub">'+fmt(a.mau_meaningful)+' mean · '+fmt(a.mau_raw)+' raw</div></div>'
-      + '</div>'
-      + '<div style="margin-top:16px;flex:1">'+lineChart(series)+'</div>'
-    + '</div>'
-  + '</div>';
-
-  // 01 USER COMPOSITION
-  H+=sec("01","user composition","each tier broader than the one below · nothing dropped");
-  H+='<div class="grid g4">'
-    + stat("Reached", fmt(u.reached_users), "ran jcode ≥1 time (non-CI)")
-    + stat("Total users", fmt(u.total_users), "installed OR did work", {key:true})
-    + stat("Core users", fmt(u.core_users), "did meaningful work")
-    + stat("Installed", fmt(u.installed_users), "distinct install events")
-  + '</div>';
-  H+='<div class="grid g3" style="margin-top:14px">'
-    + stat("CI ids · excluded", fmt(u.ci_ids), "ephemeral runners, filtered")
-    + stat("All ids incl. CI + dev", fmt(u.all_ids_including_ci), "raw upper bound, never headline")
-    + stat("Install events (raw)", fmt(lc.install_events), fmt(lc.install_ids_noci)+" distinct non-CI")
-  + '</div>';
-
-  // 02 ACQUISITION & RETENTION
-  H+=sec("02","acquisition & retention","are new users sticking?");
-  H+='<div class="grid g4">'
-    + stat("D7 retention", pct(ret.d7_retention), (ret.d7_retained||0)+" of "+(ret.d7_cohort||0)+" returned", {key:true})
-    + stat("Upgrades", fmt(lc.upgrade_events), "version bumps observed")
-    + stat("Multi-session rate", pct(q.multi_session_rate), ">1 session at once")
-    + stat("Meaningful sessions 30d", fmt(q.meaningful_sessions_30d), "real-work sessions")
-  + '</div>';
-  H+='<div class="grid g2" style="margin-top:14px">'
-    + '<div class="panel reveal"><h3>daily active users · 60d</h3><p class="pd">headline = meaningful work on release, ex-CI · raw = anyone who launched</p>'+lineChart(series)+'</div>'
-    + '<div class="panel reveal"><h3>new installs / day · 60d non-CI</h3><p class="pd">distinct ids whose install landed that day</p>'+lineChart([{name:"installs",color:"#5ad27a",points:(d.timeseries.installs||[]).map(r=>({date:r.date,value:r.installs}))}])+'</div>'
-  + '</div>';
-
-  // 03 ENGAGEMENT
-  H+=sec("03","engagement quality","30-day · non-CI sessions");
-  H+='<div class="grid g4">'
-    + stat("Session success", pct(q.success_rate), "ended in success state", {key:true})
-    + stat("Avg session", dec(q.avg_session_mins)+" min", "per meaningful session")
-    + stat("Avg turns / session", dec(q.avg_turns), "user prompts / session")
-    + stat("Abandon rate", pct(q.abandon_rate), "left before first response")
-  + '</div>';
-  H+='<div class="grid g4" style="margin-top:14px">'
-    + stat("Turn success", pct(d.turns.turn_success_rate), "per-turn, 30d")
-    + stat("Avg turn time", ms(d.turns.avg_turn_ms), "active duration / turn")
-    + stat("Time to first response", ms(q.avg_first_response_ms), "agent responsiveness")
-    + stat("Time to first tool success", ms(q.avg_first_tool_success_ms), "first useful tool result")
-  + '</div>';
-  H+='<div class="grid g2" style="margin-top:14px">'
-    + stat("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call")
-    + stat("Crash rate", pct(lc.crash_rate)+" · completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "crash share · (ends+crashes)/starts", {key:true})
-  + '</div>';
-
-  // 04 TOKEN USAGE
-  const tk = d.tokens||{};
-  H+=sec("04","token usage","model token volume · non-CI · cache-aware");
-  H+='<div class="grid g4">'
-    + stat("Total tokens · 30d", fmt(tk.total_30d), "all token types, last 30d", {key:true})
-    + stat("Input · 30d", fmt(tk.input_30d), "prompt tokens sent")
-    + stat("Output · 30d", fmt(tk.output_30d), "completion tokens")
-    + stat("Cache read · 30d", fmt(tk.cache_read_30d), "served from prompt cache")
-  + '</div>';
-  H+='<div class="grid g4" style="margin-top:14px">'
-    + stat("Cache creation · 30d", fmt(tk.cache_creation_30d), "tokens written to cache")
-    + stat("Total tokens · all-time", fmt(tk.total_all), "since telemetry began")
-    + stat("Input · all-time", fmt(tk.input_all), "")
-    + stat("Output · all-time", fmt(tk.output_all), "")
-  + '</div>';
-
-  // 05 AGENT AUTONOMY
-  const ag = d.agent||{};
-  const activeMs = ag.agent_active_ms||0, modelMs = ag.agent_model_ms||0, toolMs = ag.agent_tool_ms||0, idleMs = ag.session_idle_ms||0, blockedMs = ag.agent_blocked_ms||0;
-  const hrs = (x)=> x==null?"—":(x/3600000>=1?(x/3600000).toFixed(1)+"h":(x/60000).toFixed(0)+"m");
-  H+=sec("05","agent autonomy","30-day · spawning, delegation & where agent time goes");
-  H+='<div class="grid g4">'
-    + stat("Spawned agents", fmt(ag.spawned_agents), "sub-agents launched", {key:true})
-    + stat("Subagent tasks", fmt(ag.subagent_tasks), fmt(ag.subagent_success)+" succeeded")
-    + stat("Swarm tasks", fmt(ag.swarm_tasks), fmt(ag.swarm_success)+" succeeded")
-    + stat("Background tasks", fmt(ag.background_tasks), fmt(ag.background_completed)+" completed")
-  + '</div>';
-  H+='<div class="grid g4" style="margin-top:14px">'
-    + stat("User cancellations", fmt(ag.user_cancelled), "user interrupted the agent")
-    + stat("Agent active time", hrs(activeMs), "total working time, 30d")
-    + stat("Time in model", hrs(modelMs), "thinking / generating")
-    + stat("Time in tools", hrs(toolMs), "executing tool calls")
-  + '</div>';
-  H+='<div class="grid g4" style="margin-top:14px">'
-    + stat("Agent blocked time", hrs(blockedMs), "waiting on user / approvals")
-    + stat("Session idle time", hrs(idleMs), "no activity")
-    + stat("Time to first action", ms(ag.avg_time_to_first_action_ms), "agent's first move")
-    + stat("Avg max concurrency", dec(ag.avg_max_concurrent,1), "peak parallel sessions")
-  + '</div>';
-
-  // 06 RELIABILITY
-  H+=sec("06","reliability","error counts · 30d non-CI · watch for spikes");
-  H+='<div class="grid g4">'
-    + stat("Provider timeouts", fmt(e.provider_timeout), "", {alert:(e.provider_timeout||0)>0})
-    + stat("Rate limited", fmt(e.rate_limited), "")
-    + stat("Auth failures", fmt(e.auth_failed), "", {alert:(e.auth_failed||0)>0})
-    + stat("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp")
-  + '</div>';
-
-  // 07 WHO & WHAT
-  H+=sec("07","who & what","distinct users per bucket");
-  H+='<div class="grid g2">'
-    + tablePanel("Versions","adoption by release (non-CI)", rows(b.versions,"version"), "version","users")
-    + tablePanel("Platform","os / arch split", rows(b.arch,"platform"), "platform","users")
-  + '</div>';
-  H+='<div class="grid g2" style="margin-top:14px">'
-    + tablePanel("Providers","meaningful sessions by provider", rows(b.providers,"provider"), "provider","users")
-    + tablePanel("Auth method","successful auth by provider", rows(b.auth,"auth_provider"), "provider","users")
-  + '</div>';
-  H+='<div class="grid g2" style="margin-top:14px">'
-    + tablePanel("Build channel","incl. dev/local · release is headline channel", rows(b.channels,"build_channel"), "channel","users")
-    + tablePanel("Onboarding funnel","distinct users reaching each step", rows(b.onboarding,"step"), "step","users")
-  + '</div>';
-  // usage-by-hour bar chart
-  const hourData = Array.from({length:24},(_,i)=>{const m=(b.hours||[]).find(r=>Number(r.hour)===i); return {v:m?m.sessions:0, hr:i};});
-  H+='<div class="grid g2" style="margin-top:14px">'
-    + barsChart("Session starts by UTC hour","when sessions begin (non-CI)", hourData, (d)=>String(d.hr).padStart(2,'0'), "#ffb454")
-    + tablePanel("Operating system","os split (non-CI)", rows(b.os,"os"), "os","users")
-  + '</div>';
-
-  // 08 FEATURE ADOPTION
-  const fr = Object.entries(d.features||{}).map(([k,v])=>({label:k.replace(/_/g," "),value:v})).sort((a,b)=>b.value-a.value);
-  const tr = [["https",d.transport.https],["ws reuse",d.transport.ws_reuse],["ws fresh",d.transport.ws_fresh],["native http2",d.transport.native_http2],["cli subprocess",d.transport.cli],["other",d.transport.other]].map(([label,value])=>({label,value:value||0})).sort((a,b)=>b.value-a.value);
-  H+=sec("08","feature adoption","distinct users per capability · 30d");
-  H+='<div class="grid g2">'
-    + tablePanel("Features","users who touched each capability", fr, "feature","users")
-    + tablePanel("Transport mix","request transport counts (30d non-CI)", tr, "transport","count")
-  + '</div>';
-
-  // 09 USER LEADERBOARD
-  H+=sec("09","user leaderboard","most active anonymous ids · by lifecycle volume");
-  H+=leaderboardPanel(d.leaderboard||[]);
-
-  // 10 PIPELINE HEALTH (diagnostic)
-  H+=sec("10","pipeline health","diagnostic · not product metrics · watch for drift");
-  H+='<div class="grid g4">'
-    + stat("Lifecycle ids", fmt(h.lifecycle_ids), "distinct ids w/ end/crash")
-    + stat("Session-start ids", fmt(h.session_start_ids), "distinct ids that launched")
-    + stat("Ends without install", fmt(h.lifecycle_ids_without_install), "id mismatch / pre-install loss", {alert:(h.lifecycle_ids_without_install||0) > (h.lifecycle_ids||0)*0.5})
-    + stat("Heaviest single id", fmt(h.max_session_events_one_id), "max session events for one id")
-  + '</div>';
-  H+='<div class="grid g3" style="margin-top:14px">'
-    + stat("Top-5 id session events", fmt(h.top5_session_events), "of "+fmt(h.total_session_events)+" total")
-    + stat("Total session events", fmt(h.total_session_events), "ends + crashes, all time")
-    + stat("CI ids (30d window)", fmt(a.ci_mau), "filtered from headline")
-  + '</div>';
-
-  // 11 FEEDBACK
-  if((d.feedback||[]).length){
-    H+=sec("11","recent feedback","explicit user submissions");
-    H+='<div class="panel reveal">'+d.feedback.map(fb=>'<div class="fb"><div class="q">'+esc(fb.feedback_text)+'</div><div class="m">'+esc(new Date((fb.created_at||"").replace(" ","T")+"Z").toLocaleString())+' · v'+esc(fb.version||"?")+(fb.feedback_rating?' · <span class="badge">'+esc(fb.feedback_rating)+'</span>':'')+(fb.feedback_reason?' · '+esc(fb.feedback_reason):'')+'</div></div>').join('')+'</div>';
-  }
-
-  c.innerHTML=H;
-}
-
-document.getElementById("unlock").addEventListener("click",()=>{const v=document.getElementById("token").value.trim();if(!v){document.getElementById("gate-err").textContent="enter a token";return;}TOKEN=v;localStorage.setItem("jcode_dash_token",v);load();});
-document.getElementById("token").addEventListener("keydown",e=>{if(e.key==="Enter")document.getElementById("unlock").click();});
-document.getElementById("refresh").addEventListener("click",load);
-document.getElementById("logout").addEventListener("click",()=>{localStorage.removeItem("jcode_dash_token");TOKEN="";showGate("");});
-load();
-</script>
-</body>
-</html>`;
diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js
deleted file mode 100644
index a359dded4..000000000
--- a/telemetry-worker/src/stats.js
+++ /dev/null
@@ -1,389 +0,0 @@
-// Read-only telemetry aggregation for the dashboard.
-//
-// Everything here returns counts/aggregates only, never raw event rows. Metrics
-// are organized into tiers (headline / secondary / diagnostic) and tagged with
-// importance so the dashboard can present "the one number" prominently while
-// still surfacing all available information.
-//
-// Accuracy rules (mirrors README "Accuracy notes"):
-//   - Users are distinct telemetry_id, never event counts.
-//   - "meaningful" = real work; see MEANINGFUL_SQL.
-//   - Headline numbers exclude CI traffic (is_ci = 1) and non-release channels.
-//   - Raw / less-filtered tiers are always reported alongside, never removed.
-
-// Meaningful-activity predicate, shared by every query so all windows agree.
-// A row is meaningful if it is a session_end/session_crash that did real work,
-// OR a turn_end (which only fires after a completed user turn) that did work.
-const MEANINGFUL_SQL = `(
-  (event IN ('session_end','session_crash') AND (
-    turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0
-    OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
-    OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0
-    OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0
-    OR provider_switches > 0 OR model_switches > 0
-  ))
-  OR (event = 'turn_end' AND (
-    assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0
-    OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0
-  ))
-)`;
-
-const LIFECYCLE_EVENTS = "('session_start','turn_end','session_end','session_crash')";
-
-async function one(env, sql) {
-  const result = await env.DB.prepare(sql).all();
-  return (result.results && result.results[0]) || {};
-}
-
-async function many(env, sql) {
-  const result = await env.DB.prepare(sql).all();
-  return result.results || [];
-}
-
-export async function getStats(env) {
-  // --- Headline: total users (the one number) -----------------------------
-  // A user is a distinct non-CI id that ever installed OR did meaningful work.
-  const totals = await one(env, `
-    SELECT
-      COUNT(DISTINCT CASE WHEN is_ci = 0 AND (event = 'install' OR ${MEANINGFUL_SQL}) THEN telemetry_id END) AS total_users,
-      COUNT(DISTINCT CASE WHEN is_ci = 0 AND ${MEANINGFUL_SQL} THEN telemetry_id END) AS core_users,
-      COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users,
-      COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users,
-      COUNT(DISTINCT telemetry_id) AS all_ids_including_ci,
-      COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids
-    FROM events
-  `);
-
-  // --- Active users from the rollup (cheap, ingest-time) -------------------
-  // DAU/WAU/MAU as distinct ids, headline = meaningful + release + non-CI.
-  const active = await one(env, `
-    SELECT
-      COUNT(DISTINCT CASE WHEN activity_date = date('now') THEN telemetry_id END) AS dau_raw,
-      COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_active > 0 THEN telemetry_id END) AS dau_meaningful,
-      COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS dau,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') THEN telemetry_id END) AS wau_raw,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_active > 0 THEN telemetry_id END) AS wau_meaningful,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS wau,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') THEN telemetry_id END) AS mau_raw,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_active > 0 THEN telemetry_id END) AS mau_meaningful,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS mau,
-      COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND last_is_ci = 1 THEN telemetry_id END) AS ci_mau
-    FROM daily_active_users
-  `);
-
-  // --- Installs and lifecycle totals --------------------------------------
-  const lifecycle = await one(env, `
-    SELECT
-      SUM(CASE WHEN event = 'install' THEN 1 ELSE 0 END) AS install_events,
-      SUM(CASE WHEN event = 'upgrade' THEN 1 ELSE 0 END) AS upgrade_events,
-      SUM(CASE WHEN event = 'session_start' THEN 1 ELSE 0 END) AS session_starts,
-      SUM(CASE WHEN event = 'session_end' THEN 1 ELSE 0 END) AS session_ends,
-      SUM(CASE WHEN event = 'session_crash' THEN 1 ELSE 0 END) AS session_crashes,
-      SUM(CASE WHEN event = 'turn_end' THEN 1 ELSE 0 END) AS turn_ends,
-      COUNT(DISTINCT CASE WHEN event = 'install' THEN telemetry_id END) AS install_ids,
-      COUNT(DISTINCT CASE WHEN event = 'install' AND is_ci = 0 THEN telemetry_id END) AS install_ids_noci
-    FROM events
-    WHERE event IN ('install','upgrade','session_start','turn_end','session_end','session_crash')
-  `);
-  const lifecycleCompletion =
-    (lifecycle.session_starts || 0) > 0
-      ? Number(((lifecycle.session_ends + lifecycle.session_crashes) / lifecycle.session_starts).toFixed(3))
-      : null;
-  const crashRate =
-    (lifecycle.session_ends + lifecycle.session_crashes) > 0
-      ? Number((lifecycle.session_crashes / (lifecycle.session_ends + lifecycle.session_crashes)).toFixed(4))
-      : null;
-
-  // --- New vs returning (last 30d), retention -----------------------------
-  const retention = await one(env, `
-    WITH cohort AS (
-      SELECT DISTINCT telemetry_id FROM events
-      WHERE event = 'install' AND is_ci = 0
-        AND created_at >= datetime('now','-14 days') AND created_at < datetime('now','-7 days')
-    ), retained AS (
-      SELECT DISTINCT telemetry_id FROM events
-      WHERE event IN ('session_end','session_crash') AND is_ci = 0
-        AND created_at >= datetime('now','-7 days')
-    )
-    SELECT
-      (SELECT COUNT(*) FROM cohort) AS d7_cohort,
-      (SELECT COUNT(*) FROM cohort WHERE telemetry_id IN retained) AS d7_retained
-  `);
-  const d7Retention =
-    (retention.d7_cohort || 0) > 0
-      ? Number((retention.d7_retained / retention.d7_cohort).toFixed(3))
-      : null;
-
-  // --- 30d engagement quality ---------------------------------------------
-  const quality = await one(env, `
-    SELECT
-      AVG(duration_mins) AS avg_session_mins,
-      AVG(turns) AS avg_turns,
-      AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate,
-      AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate,
-      AVG(first_assistant_response_ms) AS avg_first_response_ms,
-      AVG(first_tool_success_ms) AS avg_first_tool_success_ms,
-      AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms,
-      SUM(input_tokens + output_tokens) AS tokens_30d,
-      AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate
-    FROM events
-    WHERE event IN ('session_end','session_crash')
-      AND is_ci = 0 AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Token usage (all-time + 30d, full breakdown incl. cache) -----------
-  const tokens = await one(env, `
-    SELECT
-      SUM(input_tokens) AS input_all,
-      SUM(output_tokens) AS output_all,
-      SUM(cache_read_input_tokens) AS cache_read_all,
-      SUM(cache_creation_input_tokens) AS cache_creation_all,
-      SUM(total_tokens) AS total_all,
-      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN input_tokens ELSE 0 END) AS input_30d,
-      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN output_tokens ELSE 0 END) AS output_30d,
-      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_read_input_tokens ELSE 0 END) AS cache_read_30d,
-      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_creation_input_tokens ELSE 0 END) AS cache_creation_30d,
-      SUM(CASE WHEN created_at > datetime('now','-30 days') THEN total_tokens ELSE 0 END) AS total_30d
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-  `);
-
-  // --- Agent autonomy (30d): spawning, background/subagent/swarm, time split
-  const agent = await one(env, `
-    SELECT
-      SUM(spawned_agent_count) AS spawned_agents,
-      SUM(background_task_count) AS background_tasks,
-      SUM(background_task_completed_count) AS background_completed,
-      SUM(subagent_task_count) AS subagent_tasks,
-      SUM(subagent_success_count) AS subagent_success,
-      SUM(swarm_task_count) AS swarm_tasks,
-      SUM(swarm_success_count) AS swarm_success,
-      SUM(user_cancelled_count) AS user_cancelled,
-      SUM(agent_active_ms_total) AS agent_active_ms,
-      SUM(agent_model_ms_total) AS agent_model_ms,
-      SUM(agent_tool_ms_total) AS agent_tool_ms,
-      SUM(agent_blocked_ms_total) AS agent_blocked_ms,
-      SUM(session_idle_ms_total) AS session_idle_ms,
-      AVG(time_to_first_agent_action_ms) AS avg_time_to_first_action_ms,
-      AVG(time_to_first_useful_action_ms) AS avg_time_to_first_useful_ms,
-      AVG(CASE WHEN max_concurrent_sessions > 0 THEN max_concurrent_sessions END) AS avg_max_concurrent
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-      AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Per-turn metrics (30d) ---------------------------------------------
-  const turns = await one(env, `
-    SELECT
-      AVG(turn_active_duration_ms) AS avg_turn_ms,
-      AVG(CASE WHEN turn_success > 0 THEN 1.0 ELSE 0.0 END) AS turn_success_rate
-    FROM events
-    WHERE event = 'turn_end' AND is_ci = 0 AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Errors (30d) --------------------------------------------------------
-  const errors = await one(env, `
-    SELECT
-      SUM(error_provider_timeout) AS provider_timeout,
-      SUM(error_auth_failed) AS auth_failed,
-      SUM(error_tool_error) AS tool_error,
-      SUM(error_mcp_error) AS mcp_error,
-      SUM(error_rate_limited) AS rate_limited
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-      AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Feature adoption (30d, distinct users) -----------------------------
-  const features = await one(env, `
-    SELECT
-      COUNT(DISTINCT CASE WHEN feature_memory_used > 0 THEN telemetry_id END) AS memory,
-      COUNT(DISTINCT CASE WHEN feature_swarm_used > 0 THEN telemetry_id END) AS swarm,
-      COUNT(DISTINCT CASE WHEN feature_web_used > 0 THEN telemetry_id END) AS web,
-      COUNT(DISTINCT CASE WHEN feature_email_used > 0 THEN telemetry_id END) AS email,
-      COUNT(DISTINCT CASE WHEN feature_mcp_used > 0 THEN telemetry_id END) AS mcp,
-      COUNT(DISTINCT CASE WHEN feature_side_panel_used > 0 THEN telemetry_id END) AS side_panel,
-      COUNT(DISTINCT CASE WHEN feature_goal_used > 0 THEN telemetry_id END) AS goal,
-      COUNT(DISTINCT CASE WHEN feature_selfdev_used > 0 THEN telemetry_id END) AS selfdev,
-      COUNT(DISTINCT CASE WHEN feature_background_used > 0 THEN telemetry_id END) AS background,
-      COUNT(DISTINCT CASE WHEN feature_subagent_used > 0 THEN telemetry_id END) AS subagent
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-      AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Transport mix (30d) -------------------------------------------------
-  const transport = await one(env, `
-    SELECT
-      SUM(transport_https) AS https,
-      SUM(transport_persistent_ws_fresh) AS ws_fresh,
-      SUM(transport_persistent_ws_reuse) AS ws_reuse,
-      SUM(transport_cli_subprocess) AS cli,
-      SUM(transport_native_http2) AS native_http2,
-      SUM(transport_other) AS other
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-      AND created_at > datetime('now','-30 days')
-  `);
-
-  // --- Breakdowns (distinct users) ----------------------------------------
-  const versions = await many(env, `
-    SELECT version, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE is_ci = 0 AND version IS NOT NULL
-    GROUP BY version ORDER BY users DESC LIMIT 12
-  `);
-  const os = await many(env, `
-    SELECT os, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE is_ci = 0 AND os IS NOT NULL
-    GROUP BY os ORDER BY users DESC
-  `);
-  const arch = await many(env, `
-    SELECT (COALESCE(os,'?') || ' / ' || COALESCE(arch,'?')) AS platform, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE is_ci = 0 AND os IS NOT NULL
-    GROUP BY os, arch ORDER BY users DESC LIMIT 12
-  `);
-  const channels = await many(env, `
-    SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE event IN ('session_end','session_crash')
-    GROUP BY build_channel ORDER BY users DESC
-  `);
-  const providers = await many(env, `
-    SELECT COALESCE(provider_end,'unknown') AS provider, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE event IN ('session_end','session_crash') AND is_ci = 0 AND ${MEANINGFUL_SQL}
-    GROUP BY provider_end ORDER BY users DESC LIMIT 12
-  `);
-  const auth = await many(env, `
-    SELECT COALESCE(auth_provider,'unknown') AS auth_provider, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE event = 'auth_success' AND is_ci = 0
-    GROUP BY auth_provider ORDER BY users DESC LIMIT 12
-  `);
-  const onboarding = await many(env, `
-    SELECT step, COUNT(DISTINCT telemetry_id) AS users
-    FROM events WHERE event = 'onboarding_step' AND is_ci = 0 AND step IS NOT NULL
-    GROUP BY step ORDER BY users DESC
-  `);
-
-  // --- Usage timing: session starts by UTC hour ---------------------------
-  const hours = await many(env, `
-    SELECT session_start_hour_utc AS hour, COUNT(*) AS sessions
-    FROM events
-    WHERE event = 'session_start' AND is_ci = 0 AND session_start_hour_utc IS NOT NULL
-    GROUP BY session_start_hour_utc ORDER BY session_start_hour_utc
-  `);
-
-  // --- Data health: identity reconciliation + duplicate/skew signals ------
-  // These are *not* product metrics; they tell you whether the pipeline is
-  // healthy (events arriving, ids matching installs, no single id dominating).
-  const health = await one(env, `
-    WITH lifecycle AS (
-      SELECT telemetry_id FROM events WHERE event IN ('session_end','session_crash')
-    ), install_ids AS (
-      SELECT DISTINCT telemetry_id FROM events WHERE event = 'install'
-    )
-    SELECT
-      (SELECT COUNT(DISTINCT telemetry_id) FROM lifecycle) AS lifecycle_ids,
-      (SELECT COUNT(DISTINCT telemetry_id) FROM events WHERE event = 'session_start') AS session_start_ids,
-      (SELECT COUNT(DISTINCT l.telemetry_id) FROM lifecycle l
-         LEFT JOIN install_ids i ON i.telemetry_id = l.telemetry_id
-         WHERE i.telemetry_id IS NULL) AS lifecycle_ids_without_install
-  `);
-  const skew = await one(env, `
-    SELECT
-      MAX(c) AS max_session_events_one_id,
-      SUM(c) AS total_session_events,
-      (SELECT SUM(c2) FROM (SELECT c AS c2 FROM (
-         SELECT telemetry_id, COUNT(*) AS c FROM events
-         WHERE event IN ('session_end','session_crash')
-         GROUP BY telemetry_id ORDER BY c DESC LIMIT 5))) AS top5_session_events
-    FROM (SELECT telemetry_id, COUNT(*) AS c FROM events
-          WHERE event IN ('session_end','session_crash') GROUP BY telemetry_id)
-  `);
-  const meaningfulSessions = await one(env, `
-    SELECT COUNT(*) AS meaningful_sessions
-    FROM events
-    WHERE event IN ('session_end','session_crash') AND is_ci = 0
-      AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL}
-  `);
-
-  // --- User leaderboard: most active anonymous ids ------------------------
-  // Ranks by lifecycle (session_end + session_crash) volume. telemetry_id is
-  // anonymous, so we surface a short prefix only. Useful for spotting power
-  // users and dev/test skew. Includes whether the id is CI and its channel.
-  const leaderboard = await many(env, `
-    SELECT
-      substr(telemetry_id, 1, 8) AS id_prefix,
-      COUNT(*) AS sessions,
-      SUM(turns) AS turns,
-      SUM(input_tokens + output_tokens) AS tokens,
-      SUM(tool_calls) AS tool_calls,
-      MAX(is_ci) AS is_ci,
-      MAX(build_channel) AS build_channel,
-      MAX(version) AS version,
-      MAX(created_at) AS last_seen
-    FROM events
-    WHERE event IN ('session_end','session_crash')
-    GROUP BY telemetry_id
-    ORDER BY sessions DESC
-    LIMIT 20
-  `);
-
-  // --- Daily timeseries (last 60 days) for charts -------------------------
-  const daily = await many(env, `
-    SELECT
-      activity_date AS date,
-      COUNT(DISTINCT telemetry_id) AS raw,
-      COUNT(DISTINCT CASE WHEN meaningful_active > 0 THEN telemetry_id END) AS meaningful,
-      COUNT(DISTINCT CASE WHEN meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS headline,
-      COUNT(DISTINCT CASE WHEN last_is_ci = 1 THEN telemetry_id END) AS ci
-    FROM daily_active_users
-    WHERE activity_date > date('now','-60 days')
-    GROUP BY activity_date ORDER BY activity_date
-  `);
-  const dailyInstalls = await many(env, `
-    SELECT date(created_at) AS date, COUNT(DISTINCT telemetry_id) AS installs
-    FROM events
-    WHERE event = 'install' AND is_ci = 0 AND created_at > datetime('now','-60 days')
-    GROUP BY date(created_at) ORDER BY date(created_at)
-  `);
-
-  // --- Recent feedback (text only, no identifiers) ------------------------
-  const feedback = await many(env, `
-    SELECT created_at, feedback_text, feedback_rating, feedback_reason, version
-    FROM events
-    WHERE event = 'feedback' AND feedback_text IS NOT NULL
-    ORDER BY created_at DESC LIMIT 25
-  `);
-
-  return {
-    generated_at: new Date().toISOString(),
-    headline: {
-      total_users: totals.total_users || 0,
-      dau: active.dau || 0,
-      wau: active.wau || 0,
-      mau: active.mau || 0,
-    },
-    users: {
-      total_users: totals.total_users || 0,
-      core_users: totals.core_users || 0,
-      installed_users: totals.installed_users || 0,
-      reached_users: totals.reached_users || 0,
-      all_ids_including_ci: totals.all_ids_including_ci || 0,
-      ci_ids: totals.ci_ids || 0,
-    },
-    active,
-    lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate },
-    retention: { ...retention, d7_retention: d7Retention },
-    quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 },
-    tokens,
-    agent,
-    turns,
-    errors,
-    features,
-    transport,
-    breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours },
-    leaderboard,
-    health: { ...health, ...skew },
-    timeseries: { daily, installs: dailyInstalls },
-    feedback,
-  };
-}
diff --git a/telemetry-worker/src/worker.js b/telemetry-worker/src/worker.js
index b14ae2dd6..ecd45ae7e 100644
--- a/telemetry-worker/src/worker.js
+++ b/telemetry-worker/src/worker.js
@@ -1,6 +1,3 @@
-import { getStats } from "./stats.js";
-import { DASHBOARD_HTML } from "./dashboard.js";
-
 let cachedEventColumns = null;
 let cachedSessionDetailColumns = null;
 let cachedTurnDetailColumns = null;
@@ -13,33 +10,11 @@ export default {
       });
     }
 
-    const url = new URL(request.url);
-
-    // Read-only dashboard surface (GET). The HTML page is public; the JSON stats
-    // endpoint is gated behind DASHBOARD_TOKEN so raw aggregates are not exposed
-    // to anyone who finds the URL. Raw events are never returned, only counts.
-    if (request.method === "GET") {
-      if (url.pathname === "/" || url.pathname === "/dashboard") {
-        return htmlResponse(DASHBOARD_HTML);
-      }
-      if (url.pathname === "/v1/stats") {
-        if (!isAuthorized(request, env)) {
-          return jsonResponse({ error: "Unauthorized" }, 401);
-        }
-        try {
-          const stats = await getStats(env);
-          return jsonResponse(stats);
-        } catch (err) {
-          return jsonResponse({ error: "Internal error", detail: String(err?.message || err) }, 500);
-        }
-      }
-      return jsonResponse({ error: "Not found" }, 404);
-    }
-
     if (request.method !== "POST") {
       return jsonResponse({ error: "Method not allowed" }, 405);
     }
 
+    const url = new URL(request.url);
     if (url.pathname !== "/v1/event") {
       return jsonResponse({ error: "Not found" }, 404);
     }
@@ -79,21 +54,6 @@ export default {
   },
 };
 
-// When DASHBOARD_TOKEN is unset the stats endpoint stays locked (deny by
-// default) rather than leaking aggregates. Accepts either a Bearer header or a
-// ?token= query param so it works from curl and the browser fetch alike.
-function isAuthorized(request, env) {
-  const expected = env.DASHBOARD_TOKEN;
-  if (!expected) {
-    return false;
-  }
-  const url = new URL(request.url);
-  const header = request.headers.get("authorization") || "";
-  const bearer = header.startsWith("Bearer ") ? header.slice(7) : null;
-  const provided = bearer || url.searchParams.get("token") || request.headers.get("x-dashboard-token");
-  return provided != null && provided === expected;
-}
-
 async function insertEvent(env, body) {
   const columns = await getEventColumns(env);
   const sessionDetailColumns = await getSessionDetailColumns(env);
@@ -633,21 +593,10 @@ function jsonResponse(data, status = 200) {
   });
 }
 
-function htmlResponse(html, status = 200) {
-  return new Response(html, {
-    status,
-    headers: {
-      "Content-Type": "text/html; charset=utf-8",
-      "Cache-Control": "no-store",
-      ...corsHeaders(),
-    },
-  });
-}
-
 function corsHeaders() {
   return {
     "Access-Control-Allow-Origin": "*",
-    "Access-Control-Allow-Methods": "GET, POST, OPTIONS",
-    "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Dashboard-Token",
+    "Access-Control-Allow-Methods": "POST, OPTIONS",
+    "Access-Control-Allow-Headers": "Content-Type",
   };
 }

From c449f76e7556411fd6b9fe3117f51c3b39be638f Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:01:04 -0700
Subject: [PATCH 19/57] desktop: parallelize hero reveal texture build to cut
 welcome stutter

The runtime welcome-hero mask ("Hello there") is built once on the first
single-session frame, but build_hero_reveal_texture runs a per-lit-pixel
nearest-stroke search (O(pixels x segments)) on the UI thread, costing
~600ms and stalling the start of the reveal animation.

Split the per-pixel fill across worker threads via std::thread::scope
(rows are independent and read-only over glyph_rgba/segments), reducing
the one-time build cost. Output is bit-identical to the serial path;
small images fall back to serial to avoid spawn overhead. Added parity
and worker-count tests.
---
 crates/jcode-desktop/src/main.rs       | 126 ++++++++++++++++++++-----
 crates/jcode-desktop/src/main_tests.rs |  99 +++++++++++++++++++
 2 files changed, 203 insertions(+), 22 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index d2979b41f..6afaed163 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -11907,30 +11907,15 @@ fn build_hero_reveal_texture(
     }
 
     let mut values = vec![1.0_f32; (width * height) as usize];
-    let mut min_value = f32::INFINITY;
-    let mut max_value = 0.0_f32;
     let brush_delay_px = (alpha_bounds.height() * 0.10).max(5.0);
 
-    for y in 0..height {
-        for x in 0..width {
-            let pixel_index = (y * width + x) as usize;
-            let alpha = glyph_rgba[pixel_index * 4];
-            if alpha <= 2 {
-                continue;
-            }
-            let (path_progress, distance) = nearest_hero_stroke_progress(
-                x as f32 + 0.5,
-                y as f32 + 0.5,
-                alpha_bounds,
-                &segments,
-            );
-            let width_delay = (distance / brush_delay_px).min(1.0) * 0.045;
-            let value = (path_progress + width_delay).clamp(0.0, 1.0);
-            values[pixel_index] = value;
-            min_value = min_value.min(value);
-            max_value = max_value.max(value);
-        }
-    }
+    // This per-pixel nearest-stroke search dominates the one-time hero mask
+    // build (hundreds of ms on the UI thread). Each lit pixel is independent
+    // and only reads `glyph_rgba`/`segments`, so split the rows across worker
+    // threads. Output is bit-identical to the serial version; min/max are
+    // reduced afterward from the filled buffer.
+    let (min_value, max_value) =
+        fill_hero_reveal_values(&mut values, width, height, glyph_rgba, alpha_bounds, &segments, brush_delay_px);
 
     if !min_value.is_finite() || max_value <= min_value {
         return None;
@@ -11958,6 +11943,103 @@ fn build_hero_reveal_texture(
     Some(reveal_rgba)
 }
 
+/// Fill `values` with each lit pixel's reveal progress and return the
+/// `(min, max)` of the written values.
+///
+/// The work is split into horizontal row bands processed on separate threads
+/// when the image is large enough to amortize the spawn cost. Pixels are
+/// independent, so the result is identical to a serial fill.
+fn fill_hero_reveal_values(
+    values: &mut [f32],
+    width: u32,
+    height: u32,
+    glyph_rgba: &[u8],
+    alpha_bounds: HeroMaskPixelBounds,
+    segments: &[WelcomeHeroStrokeSegment],
+    brush_delay_px: f32,
+) -> (f32, f32) {
+    let row_stride = width as usize;
+    let compute_row = |row_index: u32, row_values: &mut [f32]| -> (f32, f32) {
+        let mut min_value = f32::INFINITY;
+        let mut max_value = 0.0_f32;
+        let row_offset = row_index as usize * row_stride;
+        for x in 0..width {
+            let pixel_index = row_offset + x as usize;
+            let alpha = glyph_rgba[pixel_index * 4];
+            if alpha <= 2 {
+                continue;
+            }
+            let (path_progress, distance) = nearest_hero_stroke_progress(
+                x as f32 + 0.5,
+                row_index as f32 + 0.5,
+                alpha_bounds,
+                segments,
+            );
+            let width_delay = (distance / brush_delay_px).min(1.0) * 0.045;
+            let value = (path_progress + width_delay).clamp(0.0, 1.0);
+            row_values[x as usize] = value;
+            min_value = min_value.min(value);
+            max_value = max_value.max(value);
+        }
+        (min_value, max_value)
+    };
+
+    let total_pixels = row_stride.saturating_mul(height as usize);
+    let worker_count = hero_reveal_worker_count(total_pixels);
+    if worker_count <= 1 || height < 2 {
+        let mut min_value = f32::INFINITY;
+        let mut max_value = 0.0_f32;
+        for (row_index, row_values) in values.chunks_mut(row_stride).enumerate() {
+            let (row_min, row_max) = compute_row(row_index as u32, row_values);
+            min_value = min_value.min(row_min);
+            max_value = max_value.max(row_max);
+        }
+        return (min_value, max_value);
+    }
+
+    let rows_per_band = (height as usize).div_ceil(worker_count).max(1);
+    let mut min_value = f32::INFINITY;
+    let mut max_value = 0.0_f32;
+    std::thread::scope(|scope| {
+        let mut handles = Vec::new();
+        for (band_index, band) in values.chunks_mut(rows_per_band * row_stride).enumerate() {
+            let first_row = (band_index * rows_per_band) as u32;
+            let compute_row = &compute_row;
+            handles.push(scope.spawn(move || {
+                let mut band_min = f32::INFINITY;
+                let mut band_max = 0.0_f32;
+                for (offset, row_values) in band.chunks_mut(row_stride).enumerate() {
+                    let (row_min, row_max) = compute_row(first_row + offset as u32, row_values);
+                    band_min = band_min.min(row_min);
+                    band_max = band_max.max(row_max);
+                }
+                (band_min, band_max)
+            }));
+        }
+        for handle in handles {
+            if let Ok((band_min, band_max)) = handle.join() {
+                min_value = min_value.min(band_min);
+                max_value = max_value.max(band_max);
+            }
+        }
+    });
+    (min_value, max_value)
+}
+
+/// Number of worker threads to use for the hero reveal fill. Returns 1 for
+/// small images where threading overhead would dominate.
+fn hero_reveal_worker_count(total_pixels: usize) -> usize {
+    const MIN_PIXELS_PER_WORKER: usize = 32 * 1024;
+    if total_pixels < MIN_PIXELS_PER_WORKER * 2 {
+        return 1;
+    }
+    let available = std::thread::available_parallelism()
+        .map(|value| value.get())
+        .unwrap_or(1);
+    let by_work = total_pixels / MIN_PIXELS_PER_WORKER;
+    available.min(by_work).max(1)
+}
+
 fn nearest_hero_stroke_progress(
     x: f32,
     y: f32,
diff --git a/crates/jcode-desktop/src/main_tests.rs b/crates/jcode-desktop/src/main_tests.rs
index 6fa95c06b..3bb960e95 100644
--- a/crates/jcode-desktop/src/main_tests.rs
+++ b/crates/jcode-desktop/src/main_tests.rs
@@ -719,6 +719,105 @@ fn next_animation_redraw_paces_active_animations_and_settles_when_idle() {
     assert!(DESKTOP_ANIMATION_FRAME_INTERVAL > Duration::ZERO);
 }
 
+#[test]
+fn hero_reveal_worker_count_falls_back_to_serial_for_small_images() {
+    // Tiny images should not pay thread-spawn overhead.
+    assert_eq!(hero_reveal_worker_count(0), 1);
+    assert_eq!(hero_reveal_worker_count(1024), 1);
+    // Large images should use more than one worker when parallelism is available.
+    let big = hero_reveal_worker_count(8 * 1024 * 1024);
+    let available = std::thread::available_parallelism()
+        .map(|value| value.get())
+        .unwrap_or(1);
+    assert!(big >= 1);
+    assert!(big <= available.max(1));
+}
+
+#[test]
+fn fill_hero_reveal_values_matches_serial_reference() {
+    let width = 64_u32;
+    let height = 48_u32;
+    let alpha_bounds = HeroMaskPixelBounds {
+        min_x: 4,
+        min_y: 4,
+        max_x: width - 4,
+        max_y: height - 4,
+    };
+    // A handful of normalized stroke segments tracing a rough path.
+    let segments = vec![
+        WelcomeHeroStrokeSegment {
+            start: [0.1, 0.2],
+            end: [0.4, 0.5],
+            start_progress: 0.0,
+            end_progress: 0.4,
+        },
+        WelcomeHeroStrokeSegment {
+            start: [0.4, 0.5],
+            end: [0.8, 0.3],
+            start_progress: 0.4,
+            end_progress: 0.8,
+        },
+        WelcomeHeroStrokeSegment {
+            start: [0.8, 0.3],
+            end: [0.9, 0.9],
+            start_progress: 0.8,
+            end_progress: 1.0,
+        },
+    ];
+    // Mark a checkerboard of lit pixels so both branches exercise lit/unlit.
+    let mut glyph_rgba = vec![0_u8; (width * height * 4) as usize];
+    for y in 0..height {
+        for x in 0..width {
+            if (x + y) % 3 == 0 {
+                let index = ((y * width + x) * 4) as usize;
+                glyph_rgba[index] = 200;
+            }
+        }
+    }
+    let brush_delay_px = (alpha_bounds.height() * 0.10).max(5.0);
+
+    // Serial reference computed directly here.
+    let mut expected = vec![1.0_f32; (width * height) as usize];
+    let mut expected_min = f32::INFINITY;
+    let mut expected_max = 0.0_f32;
+    for y in 0..height {
+        for x in 0..width {
+            let pixel_index = (y * width + x) as usize;
+            if glyph_rgba[pixel_index * 4] <= 2 {
+                continue;
+            }
+            let (path_progress, distance) = nearest_hero_stroke_progress(
+                x as f32 + 0.5,
+                y as f32 + 0.5,
+                alpha_bounds,
+                &segments,
+            );
+            let width_delay = (distance / brush_delay_px).min(1.0) * 0.045;
+            let value = (path_progress + width_delay).clamp(0.0, 1.0);
+            expected[pixel_index] = value;
+            expected_min = expected_min.min(value);
+            expected_max = expected_max.max(value);
+        }
+    }
+
+    // The parallel implementation must produce bit-identical output regardless
+    // of how many worker threads it chose.
+    let mut actual = vec![1.0_f32; (width * height) as usize];
+    let (actual_min, actual_max) = fill_hero_reveal_values(
+        &mut actual,
+        width,
+        height,
+        &glyph_rgba,
+        alpha_bounds,
+        &segments,
+        brush_delay_px,
+    );
+
+    assert_eq!(actual, expected, "parallel hero reveal fill must match serial");
+    assert_eq!(actual_min.to_bits(), expected_min.to_bits());
+    assert_eq!(actual_max.to_bits(), expected_max.to_bits());
+}
+
 #[test]
 fn desktop_async_job_slots_are_bounded_and_released() -> Result<()> {
     let counter = std::sync::atomic::AtomicUsize::new(0);

From 6c855cc978ecaa86055ca4a334f075de8aa95b3a Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:04:22 -0700
Subject: [PATCH 20/57] fix(provider): show active OpenAI-compatible profile
 name in header (#329)

The header and info widget hard-coded 'OpenRouter' for any model routed
through the OpenRouter slot, even when the user switched to a direct
OpenAI-compatible profile such as NVIDIA NIM at runtime. The display name
was resolved from process env vars that only reflect the startup profile,
so a runtime '/model' switch never updated the label.

Add a runtime-aware Provider::display_name() (default = name()) overridden
by OpenRouterProvider (maps profile_id -> 'NVIDIA NIM', etc.) and
MultiProvider (delegates to the active execution runtime). name() stays
the stable machine id ('openrouter') that billing/routing keys off.
format_model_name() in the header now uses the active provider's display
name instead of a fixed 'OpenRouter:' prefix.

Adds regression tests.
---
 crates/jcode-app-core/src/agent/status.rs     |  4 +-
 crates/jcode-base/src/provider/mod.rs         | 13 ++++
 crates/jcode-base/src/provider/openrouter.rs  | 39 +++++++++++
 .../src/provider/openrouter_provider_impl.rs  |  4 ++
 .../src/provider/openrouter_tests.rs          | 68 +++++++++++++++++++
 crates/jcode-provider-core/src/lib.rs         | 19 +++++-
 crates/jcode-tui/src/tui/app/tui_state.rs     | 10 +--
 crates/jcode-tui/src/tui/ui_header.rs         | 41 ++++++++++-
 8 files changed, 188 insertions(+), 10 deletions(-)

diff --git a/crates/jcode-app-core/src/agent/status.rs b/crates/jcode-app-core/src/agent/status.rs
index 747179f30..e011b5b01 100644
--- a/crates/jcode-app-core/src/agent/status.rs
+++ b/crates/jcode-app-core/src/agent/status.rs
@@ -134,7 +134,9 @@ impl Agent {
     }
 
     pub fn provider_name(&self) -> String {
-        crate::provider_catalog::runtime_provider_display_name(self.provider.name())
+        // `display_name()` resolves the active runtime profile (e.g. NVIDIA NIM)
+        // for the OpenRouter slot; for all other providers it equals `name()`.
+        self.provider.display_name()
     }
 
     pub fn provider_model(&self) -> String {
diff --git a/crates/jcode-base/src/provider/mod.rs b/crates/jcode-base/src/provider/mod.rs
index b6876718a..633575798 100644
--- a/crates/jcode-base/src/provider/mod.rs
+++ b/crates/jcode-base/src/provider/mod.rs
@@ -1040,6 +1040,19 @@ impl Provider for MultiProvider {
         }
     }
 
+    fn display_name(&self) -> String {
+        // The OpenRouter slot multiplexes the public aggregator and every
+        // direct OpenAI-compatible profile (NVIDIA NIM, DeepSeek, ...). Ask the
+        // active execution runtime for its own label so the UI reflects the
+        // profile selected at runtime rather than the fixed "OpenRouter" name.
+        if matches!(self.active_provider(), ActiveProvider::OpenRouter)
+            && let Some(execution) = self.active_openrouter_execution_provider()
+        {
+            return execution.runtime_display_name();
+        }
+        self.name().to_string()
+    }
+
     fn model(&self) -> String {
         match self.active_provider() {
             ActiveProvider::Claude => {
diff --git a/crates/jcode-base/src/provider/openrouter.rs b/crates/jcode-base/src/provider/openrouter.rs
index a4745e1b5..6caf84578 100644
--- a/crates/jcode-base/src/provider/openrouter.rs
+++ b/crates/jcode-base/src/provider/openrouter.rs
@@ -1046,6 +1046,45 @@ impl OpenRouterProvider {
         self.supports_provider_features
     }
 
+    /// Human-facing label for the runtime backing this provider instance.
+    ///
+    /// Unlike the env-var based [`crate::provider_catalog::runtime_provider_display_name`],
+    /// this reads the instance's own `profile_id`/`api_base`, so it stays correct
+    /// after a runtime `/model` switch to a different OpenAI-compatible profile
+    /// (e.g. NVIDIA NIM) even though `name()` is fixed at `"openrouter"`.
+    pub(crate) fn runtime_display_name(&self) -> String {
+        // Direct OpenAI-compatible profile (NVIDIA NIM, DeepSeek, Z.AI, ...).
+        if let Some(profile_id) = self.profile_id.as_deref() {
+            if let Some(profile) = openai_compatible_profile_by_id(profile_id) {
+                return profile.display_name.to_string();
+            }
+            return profile_id.to_string();
+        }
+
+        // Non-aggregator endpoint without a known profile id: classify by base
+        // URL so custom OpenAI-compatible endpoints don't masquerade as the
+        // public OpenRouter aggregator.
+        if !self.supports_provider_features {
+            if let Some(profile_id) =
+                crate::provider_catalog::openai_compatible_profile_id_for_api_base(&self.api_base)
+                && let Some(profile) = openai_compatible_profile_by_id(profile_id)
+            {
+                return profile.display_name.to_string();
+            }
+            if std::env::var("JCODE_RUNTIME_PROVIDER")
+                .ok()
+                .is_some_and(|value| value.trim().eq_ignore_ascii_case("azure-openai"))
+            {
+                return "Azure OpenAI".to_string();
+            }
+            if !self.api_base.contains("openrouter.ai") {
+                return "OpenAI-compatible".to_string();
+            }
+        }
+
+        "OpenRouter".to_string()
+    }
+
     pub(crate) fn direct_openai_compatible_route_parts(&self) -> Option<(String, String, String)> {
         if self.supports_provider_features {
             return None;
diff --git a/crates/jcode-base/src/provider/openrouter_provider_impl.rs b/crates/jcode-base/src/provider/openrouter_provider_impl.rs
index cf4c93a75..f3edd04bb 100644
--- a/crates/jcode-base/src/provider/openrouter_provider_impl.rs
+++ b/crates/jcode-base/src/provider/openrouter_provider_impl.rs
@@ -743,6 +743,10 @@ impl Provider for OpenRouterProvider {
         "openrouter"
     }
 
+    fn display_name(&self) -> String {
+        self.runtime_display_name()
+    }
+
     fn model(&self) -> String {
         self.model
             .try_read()
diff --git a/crates/jcode-base/src/provider/openrouter_tests.rs b/crates/jcode-base/src/provider/openrouter_tests.rs
index 28a0a3ecc..0628a1d84 100644
--- a/crates/jcode-base/src/provider/openrouter_tests.rs
+++ b/crates/jcode-base/src/provider/openrouter_tests.rs
@@ -2193,3 +2193,71 @@ fn strict_openai_schema_endpoint_allows_other_providers() {
         "https://api.openai.com/v1"
     ));
 }
+
+#[test]
+fn runtime_display_name_tracks_active_openai_compatible_profile() {
+    // Regression for issue #329: switching to a direct OpenAI-compatible
+    // profile (NVIDIA NIM) at runtime must surface that profile's display
+    // name, not the fixed "OpenRouter" aggregator label. The machine-facing
+    // `name()` stays "openrouter" because billing/routing logic keys off it.
+    let _lock = ENV_LOCK.lock();
+    let temp = TempDir::new().expect("create temp home");
+    let jcode_home = temp.path().join("jcode-home");
+    let _jcode_home = EnvVarGuard::set("JCODE_HOME", &jcode_home);
+    let _home = EnvVarGuard::set("HOME", temp.path());
+    let _appdata = EnvVarGuard::set("APPDATA", temp.path().join("AppData").join("Roaming"));
+    let _env = isolate_openrouter_autodetect_env();
+
+    // Configure both the OpenRouter aggregator and NVIDIA NIM credentials so
+    // the slot can host either runtime. Set after the isolate guard, which
+    // clears every profile api-key env var.
+    let _or_key = EnvVarGuard::set("OPENROUTER_API_KEY", "or-test-key");
+    let _nim_key = EnvVarGuard::set("NVIDIA_API_KEY", "nim-test-key");
+    crate::config::invalidate_config_cache();
+
+    let provider =
+        crate::provider::MultiProvider::new_with_auth_status(crate::auth::AuthStatus::default());
+
+    // Switch to a NVIDIA NIM model via the profile-prefixed model request.
+    provider
+        .set_model("nvidia-nim:nvidia/llama-3.1-nemotron-ultra-253b-v1")
+        .expect("switch to nvidia-nim profile");
+
+    assert_eq!(
+        Provider::name(&provider),
+        "OpenRouter",
+        "machine-facing name must stay stable for billing/routing"
+    );
+    assert_eq!(
+        Provider::display_name(&provider),
+        "NVIDIA NIM",
+        "header/UI display name must reflect the active runtime profile"
+    );
+
+    // Switching back to the plain OpenRouter aggregator restores the label.
+    provider
+        .set_model("anthropic/claude-sonnet-4")
+        .expect("switch back to openrouter aggregator");
+    assert_eq!(Provider::display_name(&provider), "OpenRouter");
+}
+
+#[test]
+fn runtime_display_name_for_profile_runtime_instance() {
+    // Direct unit coverage of the per-instance resolver used by
+    // `Provider::display_name`.
+    let _lock = ENV_LOCK.lock();
+    let temp = TempDir::new().expect("create temp home");
+    let jcode_home = temp.path().join("jcode-home");
+    let _jcode_home = EnvVarGuard::set("JCODE_HOME", &jcode_home);
+    let _home = EnvVarGuard::set("HOME", temp.path());
+    let _appdata = EnvVarGuard::set("APPDATA", temp.path().join("AppData").join("Roaming"));
+    let _env = isolate_openrouter_autodetect_env();
+    let _key = EnvVarGuard::set("NVIDIA_API_KEY", "nim-test-key");
+
+    let nim = OpenRouterProvider::new_openai_compatible_profile_runtime(
+        crate::provider_catalog::NVIDIA_NIM_PROFILE,
+    )
+    .expect("build nvidia-nim runtime");
+    assert_eq!(nim.runtime_display_name(), "NVIDIA NIM");
+    assert_eq!(Provider::name(&nim), "openrouter");
+}
diff --git a/crates/jcode-provider-core/src/lib.rs b/crates/jcode-provider-core/src/lib.rs
index 93fe676e7..da5bb929d 100644
--- a/crates/jcode-provider-core/src/lib.rs
+++ b/crates/jcode-provider-core/src/lib.rs
@@ -74,8 +74,25 @@ pub trait Provider: Send + Sync {
     }
 
     /// Get the provider name.
+    ///
+    /// This is the stable, machine-facing identifier (e.g. `"openrouter"`,
+    /// `"claude"`). Several surfaces key billing and routing decisions off this
+    /// value, so it must stay constant for a given provider class even when the
+    /// underlying runtime is a specific OpenAI-compatible profile. Use
+    /// [`Provider::display_name`] for anything shown to the user.
     fn name(&self) -> &str;
 
+    /// Human-facing provider label for the *current runtime selection*.
+    ///
+    /// Defaults to [`Provider::name`]. Provider orchestrators that multiplex
+    /// several backends behind one `name()` (notably the OpenRouter slot, which
+    /// also serves direct OpenAI-compatible profiles such as NVIDIA NIM or
+    /// DeepSeek) override this so the UI reflects the profile the user actually
+    /// selected at runtime instead of a fixed aggregator label.
+    fn display_name(&self) -> String {
+        self.name().to_string()
+    }
+
     /// Get the model identifier being used.
     fn model(&self) -> String {
         "unknown".to_string()
@@ -823,7 +840,7 @@ impl ModelCatalogSnapshot {
 
     pub fn from_provider(provider: &dyn Provider) -> Self {
         Self::new(
-            Some(provider.name().to_string()),
+            Some(provider.display_name()),
             Some(provider.model()),
             provider.available_models_display(),
             provider.model_routes(),
diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs
index 7121bf6a7..f4e90b82d 100644
--- a/crates/jcode-tui/src/tui/app/tui_state.rs
+++ b/crates/jcode-tui/src/tui/app/tui_state.rs
@@ -459,9 +459,9 @@ impl crate::tui::TuiState for App {
         if self.is_remote {
             self.remote_header_provider_name().unwrap_or_default()
         } else {
-            self.remote_provider_name.clone().unwrap_or_else(|| {
-                crate::provider_catalog::runtime_provider_display_name(self.provider.name())
-            })
+            self.remote_provider_name
+                .clone()
+                .unwrap_or_else(|| self.provider.display_name())
         }
     }
 
@@ -1285,9 +1285,9 @@ impl crate::tui::TuiState for App {
             provider_name: if uses_remote_widget_metadata {
                 self.remote_provider_name
                     .clone()
-                    .or_else(|| Some(self.provider.name().to_string()))
+                    .or_else(|| Some(self.provider.display_name()))
             } else {
-                Some(self.provider.name().to_string())
+                Some(self.provider.display_name())
             },
             auth_method,
             upstream_provider: self.upstream_provider.clone(),
diff --git a/crates/jcode-tui/src/tui/ui_header.rs b/crates/jcode-tui/src/tui/ui_header.rs
index 4bc96213d..a58b3654c 100644
--- a/crates/jcode-tui/src/tui/ui_header.rs
+++ b/crates/jcode-tui/src/tui/ui_header.rs
@@ -45,9 +45,22 @@ pub(crate) fn capitalize(s: &str) -> String {
     }
 }
 
-fn format_model_name(short: &str) -> String {
+fn format_model_name(short: &str, provider_name: &str) -> String {
     if short.contains('/') {
-        return format!("OpenRouter: {}", short);
+        // Slashed model ids (e.g. `nvidia/nemotron-...`) are served by the
+        // OpenRouter slot, which also fronts direct OpenAI-compatible profiles
+        // such as NVIDIA NIM or DeepSeek. Label the line with the active
+        // provider's display name instead of hard-coding "OpenRouter" so the
+        // header matches the profile the user actually selected.
+        let label = {
+            let trimmed = provider_name.trim();
+            if trimmed.is_empty() {
+                "OpenRouter".to_string()
+            } else {
+                trimmed.to_string()
+            }
+        };
+        return format!("{}: {}", label, short);
     }
     if short.contains("opus") {
         if short.contains("4.5") {
@@ -389,7 +402,7 @@ pub(super) fn build_persistent_header(app: &dyn TuiState, width: u16) -> Vec<Lin
     let short_model = shorten_model_name(&model);
     let icon = connection_type_icon(app.connection_type().as_deref())
         .unwrap_or_else(|| crate::id::session_icon(&session_name));
-    let nice_model = format_model_name(&short_model);
+    let nice_model = format_model_name(&short_model, &app.provider_name());
     let build_info = binary_age().unwrap_or_else(|| "unknown".to_string());
     let align = Alignment::Center;
     let mut lines: Vec<Line> = Vec::new();
@@ -1028,4 +1041,26 @@ mod tests {
         let line = build_auth_status_line(&AuthStatus::default(), 120);
         assert!(line.spans.is_empty(), "line should be empty: {line:?}");
     }
+
+    #[test]
+    fn format_model_name_labels_slashed_models_with_active_provider() {
+        // Regression for issue #329: a NVIDIA NIM model must be labeled with the
+        // active provider's display name, not the fixed "OpenRouter" aggregator.
+        assert_eq!(
+            format_model_name("nvidia/nemotron-3-super-120b-a12b", "NVIDIA NIM"),
+            "NVIDIA NIM: nvidia/nemotron-3-super-120b-a12b"
+        );
+        // The public aggregator still reads "OpenRouter".
+        assert_eq!(
+            format_model_name("anthropic/claude-sonnet-4", "OpenRouter"),
+            "OpenRouter: anthropic/claude-sonnet-4"
+        );
+        // Missing provider name falls back to "OpenRouter" rather than an empty label.
+        assert_eq!(
+            format_model_name("deepseek/deepseek-chat", ""),
+            "OpenRouter: deepseek/deepseek-chat"
+        );
+        // Non-slashed models are unaffected by the provider label.
+        assert_eq!(format_model_name("claude-opus-4-6", "OpenRouter"), "Claude Opus");
+    }
 }

From 65fb513bf600baf18bb091ec5a852bb7b503d05a Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:08:07 -0700
Subject: [PATCH 21/57] desktop: add --real-transcript-scroll-benchmark to
 profile scrolling on real transcripts

---
 crates/jcode-desktop/src/desktop_benchmark.rs |  16 +
 crates/jcode-desktop/src/main.rs              | 300 ++++++++++++++++++
 crates/jcode-desktop/src/session_data.rs      |  72 +++++
 3 files changed, 388 insertions(+)

diff --git a/crates/jcode-desktop/src/desktop_benchmark.rs b/crates/jcode-desktop/src/desktop_benchmark.rs
index 6b8e7a021..4e6095f25 100644
--- a/crates/jcode-desktop/src/desktop_benchmark.rs
+++ b/crates/jcode-desktop/src/desktop_benchmark.rs
@@ -43,6 +43,22 @@ pub(super) fn resize_render_benchmark_frames(args: &[String]) -> Option<usize> {
     })
 }
 
+/// Parse `--real-transcript-scroll-benchmark[=N]`, the number of scroll frames
+/// to profile against each of the user's largest real on-disk transcripts.
+pub(super) fn real_transcript_scroll_benchmark_frames(args: &[String]) -> Option<usize> {
+    args.iter().enumerate().find_map(|(index, arg)| {
+        arg.strip_prefix("--real-transcript-scroll-benchmark=")
+            .and_then(|value| value.parse::<usize>().ok())
+            .or_else(|| {
+                (arg == "--real-transcript-scroll-benchmark").then(|| {
+                    args.get(index + 1)
+                        .and_then(|value| value.parse::<usize>().ok())
+                        .unwrap_or(600)
+                })
+            })
+    })
+}
+
 pub(super) fn benchmark_phase(
     mut frames: usize,
     mut run_frame: impl FnMut(usize) -> usize,
diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index 6afaed163..e8dafb983 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -716,6 +716,9 @@ async fn run() -> Result<()> {
     if let Some(frames) = scroll_render_benchmark_frames(&args) {
         return run_scroll_render_benchmark(frames);
     }
+    if let Some(frames) = real_transcript_scroll_benchmark_frames(&args) {
+        return run_real_transcript_scroll_benchmark(frames);
+    }
     if let Some(output_dir) = hero_screenshot_capture_dir(&args) {
         return run_hero_screenshot_capture(&output_dir).await;
     }
@@ -2269,6 +2272,7 @@ const DESKTOP_HELP_LINES: &[&str] = &[
     "  --capture-hero-animation DIR Write deterministic hero animation PNG frames and exit",
     "  --resize-render-benchmark[N]  Print CPU resize/render benchmark JSON and exit",
     "  --scroll-render-benchmark[N]  Print CPU scroll/render benchmark JSON and exit",
+    "  --real-transcript-scroll-benchmark[N]  Profile scrolling against your real on-disk transcripts and exit",
     "  --stream-e2e-benchmark[N]     Print stream event-to-paint guardrail JSON and exit",
     "  --headless-chat-smoke <MSG>  Run a hidden backend smoke test and print JSON events",
     "  --headless-chat-smoke=<MSG>  Same as above",
@@ -5161,6 +5165,302 @@ fn run_scroll_render_benchmark(frames: usize) -> Result<()> {
     Ok(())
 }
 
+/// Profile scrolling against the user's real on-disk transcripts.
+///
+/// This loads the largest real session files (full, untruncated message lists)
+/// and drives the exact production windowed-scroll render path: cached body
+/// wrap, a sliding text-buffer window, viewport extraction, glyph shaping for
+/// the visible window, text areas, and primitive geometry. Per-frame work is
+/// reported per session and aggregated so we can attribute any scroll jank to a
+/// specific stage on real content rather than synthetic fixtures.
+fn run_real_transcript_scroll_benchmark(frames: usize) -> Result<()> {
+    let frames = frames.max(1);
+    let size = PhysicalSize::new(1200, 760);
+    let transcripts = session_data::load_largest_real_transcripts(8, 24)
+        .context("failed to load real transcripts for scroll benchmark")?;
+
+    if transcripts.is_empty() {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&serde_json::json!({
+                "frames": frames,
+                "sessions": [],
+                "note": "no real transcripts with >=24 messages found under ~/.jcode/sessions",
+            }))?
+        );
+        return Ok(());
+    }
+
+    let mut session_reports = Vec::new();
+    let mut all_frame_samples: Vec<f64> = Vec::new();
+    let mut worst_stage_us = 0.0_f64;
+    let mut worst_stage_name = String::new();
+
+    for transcript in &transcripts {
+        let report = benchmark_real_transcript_scroll(transcript, size, frames);
+        if report.worst_stage_us > worst_stage_us {
+            worst_stage_us = report.worst_stage_us;
+            worst_stage_name = report.worst_stage_name.clone();
+        }
+        all_frame_samples.extend_from_slice(&report.frame_samples);
+        session_reports.push(report);
+    }
+
+    let budget_ms = duration_ms(DESKTOP_120FPS_FRAME_BUDGET);
+    let aggregate_p50 = percentile_ms(&all_frame_samples, 0.50);
+    let aggregate_p95 = percentile_ms(&all_frame_samples, 0.95);
+    let aggregate_p99 = percentile_ms(&all_frame_samples, 0.99);
+    let aggregate_max = max_sample_ms(&all_frame_samples);
+    let passes_budget = aggregate_p99 <= budget_ms;
+
+    let sessions_json = session_reports
+        .iter()
+        .map(RealTranscriptScrollReport::to_json)
+        .collect::<Vec<_>>();
+
+    println!(
+        "{}",
+        serde_json::to_string_pretty(&serde_json::json!({
+            "frames": frames,
+            "size": { "width": size.width, "height": size.height },
+            "target_frame_budget_ms": budget_ms,
+            "sessions_profiled": session_reports.len(),
+            "aggregate_full_scroll_frame": {
+                "frames": all_frame_samples.len(),
+                "p50_ms": aggregate_p50,
+                "p95_ms": aggregate_p95,
+                "p99_ms": aggregate_p99,
+                "max_ms": aggregate_max,
+            },
+            "worst_stage": { "name": worst_stage_name, "max_us_per_frame": worst_stage_us },
+            "passes_120fps_scroll_cpu_budget": passes_budget,
+            "sessions": sessions_json,
+        }))?
+    );
+    Ok(())
+}
+
+struct RealTranscriptScrollReport {
+    session_id: String,
+    title: String,
+    file_bytes: u64,
+    message_count: usize,
+    total_body_lines: usize,
+    max_scroll_lines: usize,
+    body_buffer_rebuilds: usize,
+    frame_samples: Vec<f64>,
+    stage_totals_us: Vec<(&'static str, f64)>,
+    setup_full_relayout_ms: f64,
+    worst_stage_name: String,
+    worst_stage_us: f64,
+}
+
+impl RealTranscriptScrollReport {
+    fn to_json(&self) -> serde_json::Value {
+        let frames = self.frame_samples.len().max(1);
+        let total_ms = self.frame_samples.iter().sum::<f64>();
+        let stages = self
+            .stage_totals_us
+            .iter()
+            .map(|(name, total_us)| {
+                serde_json::json!({
+                    "name": name,
+                    "mean_us_per_frame": total_us / frames as f64,
+                    "total_ms": total_us / 1000.0,
+                })
+            })
+            .collect::<Vec<_>>();
+        serde_json::json!({
+            "session_id": self.session_id,
+            "title": self.title,
+            "file_bytes": self.file_bytes,
+            "message_count": self.message_count,
+            "total_body_lines": self.total_body_lines,
+            "max_scroll_lines": self.max_scroll_lines,
+            "body_buffer_rebuilds": self.body_buffer_rebuilds,
+            "setup_full_body_relayout_ms": self.setup_full_relayout_ms,
+            "full_scroll_frame": {
+                "frames": self.frame_samples.len(),
+                "mean_ms_per_frame": total_ms / frames as f64,
+                "p50_ms": percentile_ms(&self.frame_samples, 0.50),
+                "p95_ms": percentile_ms(&self.frame_samples, 0.95),
+                "p99_ms": percentile_ms(&self.frame_samples, 0.99),
+                "max_ms": max_sample_ms(&self.frame_samples),
+            },
+            "subphases": stages,
+        })
+    }
+}
+
+/// Build a `SingleSessionApp` backed by a full real transcript, exactly the way
+/// the production resume path hydrates one from disk.
+fn real_transcript_scroll_app(transcript: &session_data::BenchmarkTranscript) -> SingleSessionApp {
+    let mut app = SingleSessionApp::new(None);
+    app.apply_resumed_session_transcript(transcript.messages.clone());
+    app.set_status_label(format!("real transcript: {}", transcript.title));
+    app
+}
+
+fn benchmark_real_transcript_scroll(
+    transcript: &session_data::BenchmarkTranscript,
+    size: PhysicalSize<u32>,
+    frames: usize,
+) -> RealTranscriptScrollReport {
+    let mut app = real_transcript_scroll_app(transcript);
+    let mut font_system = benchmark_font_system();
+
+    // One-time full body wrap (the cost paid when a transcript is first loaded
+    // or the window is resized). After this, scrolling must stay windowed.
+    let setup_started = Instant::now();
+    let body_lines = single_session_rendered_body_lines_for_tick(&app, size, 0);
+    let setup_full_relayout_ms = setup_started.elapsed().as_secs_f64() * 1000.0;
+    let total_body_lines = body_lines.len();
+
+    let max_scroll_lines = single_session_body_scroll_metrics_for_total_lines(
+        &app,
+        size,
+        total_body_lines,
+    )
+    .map(|metrics| metrics.max_scroll_lines)
+    .unwrap_or(0);
+
+    // Prime the sliding text-buffer window at the bottom of the transcript, the
+    // way the app does after hydrating a resumed session.
+    app.scroll_body_to_bottom();
+    let initial_viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines);
+    let initial_key =
+        single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &body_lines);
+    let mut buffers = single_session_text_buffers_from_key(&initial_key, size, &mut font_system);
+    let (mut window_start, mut window_end) =
+        single_session_body_text_window_bounds(&initial_viewport);
+    if let Some(body_buffer) = buffers.get_mut(1) {
+        *body_buffer = single_session_body_text_buffer_from_lines(
+            &mut font_system,
+            &body_lines[window_start..window_end],
+            size,
+            app.text_scale(),
+        );
+        body_buffer.set_scroll(
+            initial_viewport
+                .start_line
+                .saturating_sub(window_start)
+                .min(i32::MAX as usize) as i32,
+        );
+    }
+    let mut last_scroll_start = initial_viewport.start_line;
+
+    // Drive a long scroll sweep from bottom to top and back, one whole line per
+    // frame, so every frame crosses a new line boundary (the worst realistic
+    // continuous-scroll case).
+    let span = max_scroll_lines.max(1);
+    let mut viewport_us = 0.0;
+    let mut window_rebuild_us = 0.0;
+    let mut scroll_us = 0.0;
+    let mut glyph_us = 0.0;
+    let mut areas_us = 0.0;
+    let mut vertices_us = 0.0;
+    let mut body_buffer_rebuilds = 0usize;
+
+    let (frame_samples, _checksum) = benchmark_frame_samples(frames, |frame| {
+        // Triangle-wave scroll position covering the full transcript height.
+        let phase = frame % (span * 2);
+        let target = if phase <= span { phase } else { span * 2 - phase };
+        app.body_scroll_lines = target as f32;
+        let tick = frame as u64;
+
+        let phase_started = Instant::now();
+        let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines);
+        viewport_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        let phase_started = Instant::now();
+        if !single_session_body_text_window_contains(window_start, window_end, &viewport) {
+            (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
+            if let Some(body_buffer) = buffers.get_mut(1) {
+                *body_buffer = single_session_body_text_buffer_from_lines(
+                    &mut font_system,
+                    &body_lines[window_start..window_end],
+                    size,
+                    app.text_scale(),
+                );
+            }
+            body_buffer_rebuilds += 1;
+            last_scroll_start = usize::MAX;
+        }
+        window_rebuild_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        let phase_started = Instant::now();
+        if viewport.start_line != last_scroll_start {
+            if let Some(body_buffer) = buffers.get_mut(1) {
+                body_buffer.set_scroll(
+                    viewport
+                        .start_line
+                        .saturating_sub(window_start)
+                        .min(i32::MAX as usize) as i32,
+                );
+            }
+            last_scroll_start = viewport.start_line;
+        }
+        scroll_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        let phase_started = Instant::now();
+        let glyph_checksum = buffers
+            .get(1)
+            .map(|body_buffer| {
+                body_buffer
+                    .layout_runs()
+                    .map(|run| run.glyphs.len())
+                    .sum::<usize>()
+            })
+            .unwrap_or_default();
+        glyph_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        let phase_started = Instant::now();
+        let areas = single_session_text_areas_for_app_with_cached_body_viewport(
+            &app, &buffers, size, 0.0, viewport,
+        );
+        areas_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        let phase_started = Instant::now();
+        let vertices = build_single_session_vertices_with_cached_body(
+            &app, size, 0.0, tick, 0.0, 1.0, &body_lines,
+        );
+        vertices_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0;
+
+        buffers.len() ^ areas.len() ^ vertices.len() ^ glyph_checksum
+    });
+
+    let stage_totals_us = vec![
+        ("viewport_extract", viewport_us),
+        ("body_window_rebuild", window_rebuild_us),
+        ("body_scroll_set", scroll_us),
+        ("glyph_layout_count", glyph_us),
+        ("text_areas", areas_us),
+        ("primitive_vertices", vertices_us),
+    ];
+    let frames_f = frames.max(1) as f64;
+    let (worst_stage_name, worst_stage_us) = stage_totals_us
+        .iter()
+        .map(|(name, total)| (name.to_string(), total / frames_f))
+        .fold((String::new(), 0.0_f64), |acc, candidate| {
+            if candidate.1 > acc.1 { candidate } else { acc }
+        });
+
+    RealTranscriptScrollReport {
+        session_id: transcript.session_id.clone(),
+        title: transcript.title.clone(),
+        file_bytes: transcript.file_bytes,
+        message_count: transcript.messages.len(),
+        total_body_lines,
+        max_scroll_lines,
+        body_buffer_rebuilds,
+        frame_samples,
+        stage_totals_us,
+        setup_full_relayout_ms,
+        worst_stage_name,
+        worst_stage_us,
+    }
+}
+
 fn run_stream_e2e_benchmark(raw_events: usize) -> Result<()> {
     let result = run_desktop_stream_end_to_end_benchmark(raw_events);
     println!(
diff --git a/crates/jcode-desktop/src/session_data.rs b/crates/jcode-desktop/src/session_data.rs
index 0a363dcc6..72df5ec68 100644
--- a/crates/jcode-desktop/src/session_data.rs
+++ b/crates/jcode-desktop/src/session_data.rs
@@ -90,6 +90,78 @@ pub fn load_session_transcript_by_id(
     Ok(None)
 }
 
+/// A full, uncapped transcript loaded straight from disk, used by the
+/// real-transcript scroll benchmark so we profile the production render path
+/// against the user's actual session content rather than synthetic fixtures.
+#[derive(Debug, Clone)]
+pub struct BenchmarkTranscript {
+    pub session_id: String,
+    pub title: String,
+    pub file_bytes: u64,
+    pub messages: Vec<SessionTranscriptMessage>,
+}
+
+/// Load the largest real session transcripts on disk (by file size), returning
+/// the full message list for each (no card-style truncation). Used only by the
+/// scroll benchmark. Sessions with fewer than `min_messages` are skipped so the
+/// benchmark exercises long, scroll-heavy transcripts.
+pub fn load_largest_real_transcripts(
+    max_sessions: usize,
+    min_messages: usize,
+) -> Result<Vec<BenchmarkTranscript>> {
+    let sessions_dir = jcode_sessions_dir()?;
+    if !sessions_dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut candidates = fs::read_dir(&sessions_dir)
+        .with_context(|| format!("failed to read {}", sessions_dir.display()))?
+        .filter_map(|entry| entry.ok())
+        .filter_map(|entry| {
+            let path = entry.path();
+            session_file_candidate(path.clone())?;
+            let bytes = path.metadata().ok()?.len();
+            Some((path, bytes))
+        })
+        .collect::<Vec<_>>();
+    // Largest files first: they hold the longest transcripts and stress the
+    // windowed-scroll path the most.
+    candidates.sort_by_key(|(_, bytes)| std::cmp::Reverse(*bytes));
+
+    let mut transcripts = Vec::new();
+    for (path, bytes) in candidates {
+        if transcripts.len() >= max_sessions {
+            break;
+        }
+        let session = match load_stored_session(&path) {
+            Ok(session) => session,
+            Err(_) => continue,
+        };
+        let messages = session_transcript_messages(&session);
+        if messages.len() < min_messages {
+            continue;
+        }
+        let id = stored_string(session.id.as_deref())
+            .or_else(|| {
+                path.file_stem()
+                    .map(|stem| stem.to_string_lossy().into_owned())
+            })
+            .unwrap_or_else(|| "unknown-session".to_string());
+        let title = stored_string(session.custom_title.as_deref())
+            .or_else(|| stored_string(session.title.as_deref()))
+            .or_else(|| latest_user_preview(&messages))
+            .unwrap_or_else(|| short_session_name(&id));
+        transcripts.push(BenchmarkTranscript {
+            session_id: id,
+            title,
+            file_bytes: bytes,
+            messages,
+        });
+    }
+
+    Ok(transcripts)
+}
+
 fn load_recent_session_cards_with_limit(limit: usize) -> Result<Vec<SessionCard>> {
     let sessions_dir = jcode_sessions_dir()?;
     if !sessions_dir.exists() {

From 4faab8467100c255274f2d1be3453b961328daf4 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:15:12 -0700
Subject: [PATCH 22/57] fix(tui): don't snap selection to bottom edge when
 already pinned

The copy-selection drag edge auto-scroll "hot zone" (top/bottom few rows
of the chat pane) fired unconditionally whenever a drag entered the band.
When the transcript was already pinned to the bottom (the common case),
dragging into the bottom rows snapped the selection cursor to the very
last visible line and armed a downward autoscroll, even though there was
nothing more below to scroll into. This made it impossible to precisely
highlight the bottom rows of the transcript: the selection kept jumping
to the end.

Gate each directional hot zone on whether there is actually more
transcript to scroll into that direction (scroll > 0 for up,
visible_end < line_count for down). When there is nothing to scroll, the
edge band stays inert so the selection lands on the exact cell under the
cursor.

Adds a regression test that drags into the bottom hot zone while pinned
to the bottom and asserts no autoscroll arms and the selection lands on
the targeted line.
---
 .../tui/app/tests/scroll_copy_02/part_01.rs   | 112 ++++++++++++++++++
 crates/jcode-tui/src/tui/ui.rs                |  12 +-
 2 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
index bfd0cd27a..c13cee50d 100644
--- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
@@ -978,6 +978,118 @@ fn test_copy_selection_drag_near_top_edge_keeps_auto_scrolling() {
     ));
 }
 
+#[test]
+fn test_copy_selection_drag_to_bottom_edge_when_pinned_does_not_snap_or_autoscroll() {
+    // Regression: when the transcript is already pinned to the bottom (the common
+    // case), dragging a selection into the bottom edge "hot zone" used to always
+    // snap the cursor to the very last visible line and arm a downward autoscroll,
+    // even though there is nothing more below to scroll into. That made it
+    // impossible to precisely highlight the bottom rows: the selection kept
+    // jumping to the end. With nothing to scroll, the edge band must stay inert so
+    // the selection lands on the exact line under the cursor.
+    let _render_lock = scroll_render_test_lock();
+    let mut app = create_test_app();
+
+    // Tall transcript pinned to the bottom: the bottom rows of the pane are
+    // filled with real content, and there is nothing below to scroll into.
+    let lines = (1..=200)
+        .map(|idx| format!("line {idx:03}"))
+        .collect::<Vec<_>>()
+        .join("\n");
+    app.display_messages = vec![DisplayMessage {
+        role: "assistant".to_string(),
+        content: lines,
+        tool_calls: vec![],
+        duration_secs: None,
+        title: None,
+        tool_data: None,
+    }];
+    app.bump_display_messages_version();
+    app.scroll_offset = 0;
+    app.auto_scroll_paused = false;
+    app.is_processing = false;
+    app.streaming_text.clear();
+    app.status = ProcessingStatus::Idle;
+
+    let backend = ratatui::backend::TestBackend::new(60, 16);
+    let mut terminal = ratatui::Terminal::new(backend).expect("failed to create test terminal");
+    render_and_snap(&app, &mut terminal);
+
+    app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT)
+        .unwrap();
+
+    let (visible_start, visible_end) =
+        crate::tui::ui::copy_viewport_visible_range().expect("visible copy range");
+    let line_count = crate::tui::ui::copy_viewport_line_count().expect("line count");
+    assert_eq!(
+        visible_end, line_count,
+        "test precondition: view must be pinned to the bottom with no content below"
+    );
+    assert!(
+        visible_start > 0,
+        "test precondition: tall transcript must have content scrolled above the view"
+    );
+
+    let layout = crate::tui::ui::last_layout_snapshot().expect("layout snapshot");
+    let area = layout.messages_area;
+    let col = area.x + 1;
+
+    // Pick a real content line near (but not at) the bottom to target.
+    let target_line = visible_end.saturating_sub(2);
+    assert!(target_line >= visible_start, "need a visible target line");
+    let target_row = area.y + (target_line - visible_start) as u16;
+    // The bottom edge band covers the last few rows; target_row must sit inside
+    // it for this regression to be meaningful.
+    let last_row = area.y + area.height - 1;
+    assert!(
+        target_row >= last_row.saturating_sub(2),
+        "target line must fall within the bottom edge hot zone"
+    );
+
+    // Anchor higher up in the viewport.
+    let anchor_row = area.y + 1;
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Down(MouseButton::Left),
+        column: col,
+        row: anchor_row,
+        modifiers: KeyModifiers::empty(),
+    });
+    let before_scroll = app.scroll_offset();
+
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Drag(MouseButton::Left),
+        column: col,
+        row: target_row,
+        modifiers: KeyModifiers::empty(),
+    });
+
+    // No autoscroll should be armed: there is nothing below to pull in.
+    assert!(
+        !crate::tui::TuiState::copy_selection_edge_autoscroll_active(&app),
+        "edge autoscroll must not arm when pinned to the bottom with no content below"
+    );
+    assert_eq!(
+        app.scroll_offset(),
+        before_scroll,
+        "dragging into the bottom band while pinned must not scroll"
+    );
+
+    // The selection end should land on the exact line under the cursor, not snap
+    // to the very last line of the transcript.
+    let range = app.normalized_copy_selection().expect("normalized range");
+    assert_eq!(
+        range.end.abs_line, target_line,
+        "selection should extend to the line under the cursor, not snap to the last line"
+    );
+
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Up(MouseButton::Left),
+        column: col,
+        row: target_row,
+        modifiers: KeyModifiers::empty(),
+    });
+}
+
 #[test]
 fn test_alt_a_copies_chat_viewport_with_context_when_input_empty() {
     let _render_lock = scroll_render_test_lock();
diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs
index 0cc070d65..a02bdbebb 100644
--- a/crates/jcode-tui/src/tui/ui.rs
+++ b/crates/jcode-tui/src/tui/ui.rs
@@ -1632,9 +1632,17 @@ pub(crate) fn copy_pane_vertical_edge_point(
     let zone = edge_autoscroll_zone_rows(area.height);
     let top_trigger = area.y.saturating_add(zone);
     let bottom_trigger = last_row.saturating_sub(zone);
-    let (edge_row, upward) = if row <= top_trigger {
+    // Only engage the hot zone when there is actually more transcript to pull in
+    // that direction. Otherwise dragging into the bottom band while the view is
+    // already pinned to the end (the common case) would snap the selection to the
+    // last visible line and fight precise highlighting of the bottom rows. When
+    // there is nothing to scroll, fall through (`None`) so the caller extends the
+    // selection to the exact cell under the cursor instead.
+    let can_scroll_up = snapshot.scroll > 0;
+    let can_scroll_down = snapshot.visible_end < snapshot.wrapped_plain_line_count();
+    let (edge_row, upward) = if row <= top_trigger && can_scroll_up {
         (area.y, true)
-    } else if row >= bottom_trigger {
+    } else if row >= bottom_trigger && can_scroll_down {
         (last_row, false)
     } else {
         return None;

From 98acc3ab10832e7d260e512c628c85640907fde5 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:17:36 -0700
Subject: [PATCH 23/57] feat(gmail): add in-agent Composio connect (OAuth)
 action

Adds a 'connect' action to the gmail tool that drives Composio's hosted
Connect Link flow: it creates an auth-link session, opens the Google
consent screen in the browser, polls until the connection is ACTIVE, and
persists the connected account to ~/.jcode/composio_gmail.json so future
sessions are already authorized.

- ComposioConfig gains auth_config_id + persisted-connection fallback.
- GmailClient: connect(), needs_connection(), supports_connect(),
  create_link()/wait_for_connection() against /connected_accounts.
- tool/gmail.rs handles 'connect' before the config gate and hints the
  agent to connect when no account exists yet.
- Tests for connect/needs_connection/effective_user_id; docs updated.
---
 crates/jcode-app-core/src/tool/gmail.rs |  44 ++++-
 crates/jcode-base/src/gmail.rs          | 249 +++++++++++++++++++++++-
 docs/GMAIL_COMPOSIO_BACKEND.md          |  27 ++-
 3 files changed, 312 insertions(+), 8 deletions(-)

diff --git a/crates/jcode-app-core/src/tool/gmail.rs b/crates/jcode-app-core/src/tool/gmail.rs
index f132b3f36..7afc720ff 100644
--- a/crates/jcode-app-core/src/tool/gmail.rs
+++ b/crates/jcode-app-core/src/tool/gmail.rs
@@ -67,8 +67,8 @@ impl Tool for GmailTool {
                 "intent": super::intent_schema_property(),
                 "action": {
                     "type": "string",
-                    "enum": ["search", "read", "list", "draft", "send", "send_draft", "threads", "thread", "labels", "trash", "modify_labels"],
-                    "description": "Action."
+                    "enum": ["connect", "search", "read", "list", "draft", "send", "send_draft", "threads", "thread", "labels", "trash", "modify_labels"],
+                    "description": "Action. Use 'connect' to set up Gmail access via the Composio managed backend (opens a browser OAuth screen for the user to approve)."
                 },
                 "query": { "type": "string" },
                 "message_id": { "type": "string" },
@@ -91,12 +91,48 @@ impl Tool for GmailTool {
     }
 
     async fn execute(&self, input: Value, _ctx: ToolContext) -> Result<ToolOutput> {
+        let params: GmailInput = serde_json::from_value(input)?;
+        let max = params.max_results.unwrap_or(10).min(50);
+
+        // The connect action sets up the Composio managed backend by opening a
+        // browser OAuth screen for the user to approve. It runs before the
+        // is_configured gate so it can establish the very first connection.
+        if params.action == "connect" {
+            if !self.client.supports_connect() {
+                return Ok(ToolOutput::new(
+                    "The 'connect' action is only available with the Composio Gmail backend. \
+                     Set JCODE_GMAIL_BACKEND=composio and COMPOSIO_API_KEY, then retry. \
+                     For the default backend, run `jcode login google` instead.",
+                ));
+            }
+            let no_browser = crate::auth::browser_suppressed(false);
+            match self.client.connect(!no_browser).await {
+                Ok(conn) => {
+                    let who = conn
+                        .email
+                        .clone()
+                        .unwrap_or_else(|| "your Gmail account".to_string());
+                    return Ok(ToolOutput::new(format!(
+                        "Gmail connected via Composio for {}. You can now search, read, draft, and send email.",
+                        who
+                    )));
+                }
+                Err(e) => {
+                    return Ok(ToolOutput::new(format!("Gmail connect failed: {}", e)));
+                }
+            }
+        }
+
         if !self.client.is_configured() {
             return Ok(ToolOutput::new(self.client.not_configured_message()));
         }
 
-        let params: GmailInput = serde_json::from_value(input)?;
-        let max = params.max_results.unwrap_or(10).min(50);
+        if self.client.needs_connection() {
+            return Ok(ToolOutput::new(
+                "Gmail (Composio backend) has no connected account yet. Run the gmail tool with \
+                 action 'connect' to authorize your Gmail account, then retry.",
+            ));
+        }
 
         match params.action.as_str() {
             "search" | "list" => {
diff --git a/crates/jcode-base/src/gmail.rs b/crates/jcode-base/src/gmail.rs
index 72ea959f7..f6fc2d7bd 100644
--- a/crates/jcode-base/src/gmail.rs
+++ b/crates/jcode-base/src/gmail.rs
@@ -26,6 +26,10 @@ pub struct ComposioConfig {
     pub base_url: String,
     pub connected_account_id: Option<String>,
     pub user_id: Option<String>,
+    /// Auth config that defines the Gmail OAuth blueprint (scopes + managed
+    /// Composio app). Required to initiate a Connect Link flow. Falls back to
+    /// a persisted value or `COMPOSIO_GMAIL_AUTH_CONFIG_ID`.
+    pub auth_config_id: Option<String>,
 }
 
 impl GmailBackend {
@@ -64,20 +68,73 @@ impl ComposioConfig {
             .ok()
             .filter(|s| !s.is_empty())
             .unwrap_or_else(|| COMPOSIO_DEFAULT_BASE.to_string());
+        // A previously completed Connect Link flow persists the connection so
+        // the user does not have to re-run setup each session.
+        let persisted = ComposioConnection::load().ok().flatten();
         let connected_account_id = std::env::var("COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID")
             .ok()
-            .filter(|s| !s.is_empty());
+            .filter(|s| !s.is_empty())
+            .or_else(|| persisted.as_ref().map(|p| p.connected_account_id.clone()));
         let user_id = std::env::var("COMPOSIO_GMAIL_USER_ID")
             .or_else(|_| std::env::var("COMPOSIO_USER_ID"))
             .ok()
-            .filter(|s| !s.is_empty());
+            .filter(|s| !s.is_empty())
+            .or_else(|| persisted.as_ref().map(|p| p.user_id.clone()));
+        let auth_config_id = std::env::var("COMPOSIO_GMAIL_AUTH_CONFIG_ID")
+            .ok()
+            .filter(|s| !s.is_empty())
+            .or_else(|| persisted.as_ref().and_then(|p| p.auth_config_id.clone()));
         Some(Self {
             api_key,
             base_url,
             connected_account_id,
             user_id,
+            auth_config_id,
         })
     }
+
+    /// Effective user id, defaulting to "default" so a single-user CLI works
+    /// without any extra configuration.
+    pub fn effective_user_id(&self) -> String {
+        self.user_id.clone().unwrap_or_else(|| "default".to_string())
+    }
+}
+
+/// Persisted record of a completed Composio Gmail connection, stored at
+/// `~/.jcode/composio_gmail.json`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ComposioConnection {
+    pub connected_account_id: String,
+    pub user_id: String,
+    pub auth_config_id: Option<String>,
+    #[serde(default)]
+    pub email: Option<String>,
+}
+
+impl ComposioConnection {
+    pub fn path() -> Result<std::path::PathBuf> {
+        Ok(crate::storage::jcode_dir()?.join("composio_gmail.json"))
+    }
+
+    pub fn load() -> Result<Option<Self>> {
+        let path = Self::path()?;
+        if !path.exists() {
+            return Ok(None);
+        }
+        crate::storage::harden_secret_file_permissions(&path);
+        Ok(crate::storage::read_json(&path).ok())
+    }
+
+    pub fn save(&self) -> Result<()> {
+        let path = Self::path()?;
+        crate::storage::write_json_secret(&path, self)
+    }
+}
+
+/// Result of initiating a Connect Link OAuth flow.
+pub struct ComposioLink {
+    pub connected_account_id: String,
+    pub redirect_url: String,
 }
 
 pub struct GmailClient {
@@ -151,6 +208,164 @@ impl GmailClient {
         }
     }
 
+    /// True only for the Composio backend when no connected account exists yet.
+    /// In that state, Gmail calls will fail until the user completes the
+    /// Connect Link OAuth flow via [`GmailClient::connect`].
+    pub fn needs_connection(&self) -> bool {
+        matches!(&self.backend, GmailBackend::Composio(cfg) if cfg.connected_account_id.is_none())
+    }
+
+    /// Whether the active backend supports an interactive `connect` action.
+    pub fn supports_connect(&self) -> bool {
+        matches!(&self.backend, GmailBackend::Composio(_))
+    }
+
+    /// Initiate a Composio Connect Link OAuth flow, open the consent screen in
+    /// the user's browser, wait for them to approve, then persist the resulting
+    /// connected account so future sessions are already authenticated.
+    ///
+    /// `open_browser` controls whether we try to launch the system browser
+    /// (set false over SSH/headless; the URL is always returned).
+    pub async fn connect(&self, open_browser: bool) -> Result<ComposioConnection> {
+        let cfg = match &self.backend {
+            GmailBackend::Composio(cfg) => cfg,
+            GmailBackend::Direct => {
+                anyhow::bail!(
+                    "The Composio connect flow is only available when JCODE_GMAIL_BACKEND=composio."
+                )
+            }
+        };
+        let auth_config_id = cfg.auth_config_id.clone().ok_or_else(|| {
+            anyhow::anyhow!(
+                "No Composio Gmail auth config configured. Create a Gmail auth config in the \
+                 Composio dashboard and set COMPOSIO_GMAIL_AUTH_CONFIG_ID."
+            )
+        })?;
+        let user_id = cfg.effective_user_id();
+
+        let link = self.create_link(cfg, &auth_config_id, &user_id).await?;
+        if open_browser {
+            let _ = open::that(&link.redirect_url);
+        }
+        eprintln!(
+            "\nOpening Gmail authorization in your browser. If it did not open, visit:\n{}\n",
+            link.redirect_url
+        );
+
+        let account = self
+            .wait_for_connection(cfg, &link.connected_account_id)
+            .await?;
+
+        let email = account
+            .get("data")
+            .and_then(|d| d.get("email"))
+            .or_else(|| account.get("email"))
+            .and_then(|e| e.as_str())
+            .map(|s| s.to_string());
+
+        let connection = ComposioConnection {
+            connected_account_id: link.connected_account_id,
+            user_id,
+            auth_config_id: Some(auth_config_id),
+            email,
+        };
+        connection.save()?;
+        Ok(connection)
+    }
+
+    /// Create a hosted Connect Link auth session.
+    async fn create_link(
+        &self,
+        cfg: &ComposioConfig,
+        auth_config_id: &str,
+        user_id: &str,
+    ) -> Result<ComposioLink> {
+        let endpoint = format!("{}/connected_accounts/link", cfg.base_url.trim_end_matches('/'));
+        let payload = json!({
+            "auth_config_id": auth_config_id,
+            "user_id": user_id,
+        });
+        let resp = self
+            .http
+            .post(&endpoint)
+            .header("x-api-key", &cfg.api_key)
+            .json(&payload)
+            .send()
+            .await?;
+        let status = resp.status();
+        let text = resp.text().await?;
+        if !status.is_success() {
+            return Err(anyhow::anyhow!(
+                "Composio connect-link error {}: {}",
+                status,
+                truncate_error(&text)
+            ));
+        }
+        let body: Value = serde_json::from_str(&text)?;
+        let redirect_url = body
+            .get("redirect_url")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| anyhow::anyhow!("Composio did not return a redirect_url"))?
+            .to_string();
+        let connected_account_id = body
+            .get("connected_account_id")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| anyhow::anyhow!("Composio did not return a connected_account_id"))?
+            .to_string();
+        Ok(ComposioLink {
+            connected_account_id,
+            redirect_url,
+        })
+    }
+
+    /// Poll a connected account until it becomes ACTIVE (or a terminal error).
+    async fn wait_for_connection(
+        &self,
+        cfg: &ComposioConfig,
+        connected_account_id: &str,
+    ) -> Result<Value> {
+        // INITIATED links auto-expire after ~10 minutes; poll up to ~5 minutes.
+        const MAX_ATTEMPTS: u32 = 150;
+        const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2);
+        let endpoint = format!(
+            "{}/connected_accounts/{}",
+            cfg.base_url.trim_end_matches('/'),
+            connected_account_id
+        );
+        for _ in 0..MAX_ATTEMPTS {
+            let resp = self
+                .http
+                .get(&endpoint)
+                .header("x-api-key", &cfg.api_key)
+                .send()
+                .await?;
+            if resp.status().is_success() {
+                let body: Value = resp.json().await?;
+                let status = body
+                    .get("status")
+                    .or_else(|| body.get("data").and_then(|d| d.get("status")))
+                    .and_then(|s| s.as_str())
+                    .unwrap_or("");
+                match status {
+                    "ACTIVE" => return Ok(body),
+                    "FAILED" | "EXPIRED" => {
+                        let reason = body
+                            .get("status_reason")
+                            .and_then(|r| r.as_str())
+                            .unwrap_or("no reason provided");
+                        anyhow::bail!("Gmail connection {}: {}", status, reason);
+                    }
+                    _ => {}
+                }
+            }
+            tokio::time::sleep(POLL_INTERVAL).await;
+        }
+        anyhow::bail!(
+            "Timed out waiting for Gmail authorization. Re-run the connect action and finish the \
+             browser consent within a few minutes."
+        )
+    }
+
     /// Send an authenticated Gmail REST request and return the parsed JSON
     /// response. Both backends produce the identical Gmail API JSON shape, so
     /// callers can deserialize into the same typed structs.
@@ -645,6 +860,7 @@ mod tests {
             base_url: COMPOSIO_DEFAULT_BASE.to_string(),
             connected_account_id: Some("ca_123".to_string()),
             user_id: Some("me".to_string()),
+            auth_config_id: Some("ac_123".to_string()),
         }
     }
 
@@ -675,6 +891,7 @@ mod tests {
             base_url: COMPOSIO_DEFAULT_BASE.to_string(),
             connected_account_id: None,
             user_id: None,
+            auth_config_id: None,
         };
         let payload = build_composio_proxy_payload(&bare, "GET", "http://x/y", None);
         assert!(payload.get("connected_account_id").is_none());
@@ -708,4 +925,32 @@ mod tests {
         assert!(capped.len() <= 401 + 3); // 400 chars + ellipsis byte
         assert!(capped.ends_with('…'));
     }
+
+    #[test]
+    fn needs_connection_reflects_connected_account_presence() {
+        // Composio without a connected account needs an interactive connect.
+        let mut without = cfg();
+        without.connected_account_id = None;
+        let client = GmailClient::with_backend(GmailBackend::Composio(without));
+        assert!(client.supports_connect());
+        assert!(client.needs_connection());
+
+        // With a connected account it is ready to make calls.
+        let client = GmailClient::with_backend(GmailBackend::Composio(cfg()));
+        assert!(!client.needs_connection());
+
+        // Direct backend never needs a Composio connection and cannot connect.
+        let direct = GmailClient::with_backend(GmailBackend::Direct);
+        assert!(!direct.supports_connect());
+        assert!(!direct.needs_connection());
+    }
+
+    #[test]
+    fn effective_user_id_defaults_to_default() {
+        let mut c = cfg();
+        c.user_id = None;
+        assert_eq!(c.effective_user_id(), "default");
+        c.user_id = Some("alice".to_string());
+        assert_eq!(c.effective_user_id(), "alice");
+    }
 }
diff --git a/docs/GMAIL_COMPOSIO_BACKEND.md b/docs/GMAIL_COMPOSIO_BACKEND.md
index 9b2ec44eb..8a3164221 100644
--- a/docs/GMAIL_COMPOSIO_BACKEND.md
+++ b/docs/GMAIL_COMPOSIO_BACKEND.md
@@ -36,8 +36,31 @@ falls back to `direct`.
 |---|---|---|
 | `COMPOSIO_API_KEY` | Yes | Project API key from <https://platform.composio.dev> |
 | `COMPOSIO_BASE_URL` | No | Override API base (default `https://backend.composio.dev/api/v3.1`) |
-| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`) |
-| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts |
+| `COMPOSIO_GMAIL_AUTH_CONFIG_ID` | For `connect` | Gmail auth config id (`ac_...`) from the Composio dashboard. Defines the OAuth blueprint/scopes used by the connect flow. |
+| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`). Normally set automatically after `connect`. |
+| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts (defaults to `default`) |
+
+## Connecting a Gmail account (in-agent OAuth)
+
+Once `COMPOSIO_API_KEY` and `COMPOSIO_GMAIL_AUTH_CONFIG_ID` are set, the user
+(or the agent) runs the gmail tool with `action: "connect"`:
+
+1. jcode calls Composio's `POST /connected_accounts/link` (hosted "Connect
+   Link" flow) to start an OAuth session.
+2. The returned `redirect_url` is opened in the system browser (printed to
+   stderr as a fallback, e.g. over SSH).
+3. The user approves Gmail access on Google's consent screen. Because Composio
+   owns a Google-verified app, there is no "unverified app" warning.
+4. jcode polls `GET /connected_accounts/{id}` until the connection is `ACTIVE`,
+   then persists it to `~/.jcode/composio_gmail.json`.
+
+Future sessions load the persisted `connected_account_id`, so the connect step
+is a one-time action per account. Tool calls before a connection exists return
+a hint telling the agent to run `action: "connect"` first.
+
+> Note: Composio is retiring `initiate()` for managed OAuth in favor of the
+> Connect Link `link()` flow used here, so this path is the supported one going
+> forward.
 
 ## One-time Composio setup
 

From 6df69c89adf3a11131334773804a0ee9043054d2 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:17:51 -0700
Subject: [PATCH 24/57] tui: pace streaming text reveal to fix Anthropic
 choppiness

The StreamBuffer previously revealed text the instant a provider delta
arrived (only capping bursts >96 chars per frame). OpenAI emits many tiny
token deltas so this looked smooth, but Anthropic coalesces deltas into
20-40 char bursts with gaps, so each burst popped in at once and the UI
stair-stepped.

Replace the burst cap with a time-paced proportional reveal: text
accumulates in a backlog and drips out at base + gain*backlog chars/sec,
with the per-step elapsed time clamped so idle gaps cannot bank budget
that dumps the next burst. This smooths bursty providers while keeping
fast steady feeds responsive. Remote tick now reveals via
flush_smooth_frame() to match; flush() still drains fully at finalize.
---
 crates/jcode-tui-core/src/stream_buffer.rs    | 325 +++++++++++-------
 crates/jcode-tui/src/tui/app/remote.rs        |   4 +-
 .../tests/remote_startup_input_02/part_01.rs  |   7 +-
 3 files changed, 215 insertions(+), 121 deletions(-)

diff --git a/crates/jcode-tui-core/src/stream_buffer.rs b/crates/jcode-tui-core/src/stream_buffer.rs
index 2b267b483..b8a32c05e 100644
--- a/crates/jcode-tui-core/src/stream_buffer.rs
+++ b/crates/jcode-tui-core/src/stream_buffer.rs
@@ -1,20 +1,53 @@
-//! Semantic stream buffer - chunks streaming text at natural boundaries
+//! Semantic stream buffer - paces streaming text reveal at a smooth rate.
+//!
+//! Providers feed text deltas with wildly different cadences. OpenAI emits many
+//! tiny token-level deltas (a few chars every ~10-15ms), which already looks
+//! smooth. Anthropic coalesces `content_block_delta` events into larger chunks
+//! that arrive in bursts with gaps (e.g. 20-40 chars every ~80-100ms). If we
+//! reveal each burst the instant it arrives, the UI stair-steps: a clump of
+//! text pops in, then nothing for several frames, then another clump.
+//!
+//! To make every provider look the same, this buffer decouples *arrival* from
+//! *reveal*. Incoming text accumulates in a backlog, and a time-paced
+//! proportional controller drips it out: the reveal rate rises with the backlog
+//! so we never fall far behind a fast model, yet a lone burst is spread over
+//! several frames instead of dumped in one. The elapsed-time step is clamped so
+//! an idle gap (connect latency, tool pauses) cannot bank budget that would
+//! instantly dump the next burst.
 
 use serde::Serialize;
 use std::time::{Duration, Instant};
 
-/// Buffer that accumulates streaming text and flushes at semantic boundaries
+/// Steady-state reveal rate (chars/sec) when the backlog is empty. This sets the
+/// floor cadence and how the trailing characters of a burst drain out.
+const BASE_REVEAL_CPS: f32 = 180.0;
+
+/// Additional reveal rate per buffered character. The controller speeds up as the
+/// backlog grows so we track fast models with bounded latency: at steady incoming
+/// rate `R`, the backlog settles near `(R - BASE_REVEAL_CPS) / REVEAL_BACKLOG_GAIN`.
+const REVEAL_BACKLOG_GAIN: f32 = 3.0;
+
+/// Maximum elapsed time credited to a single reveal step. Without this, a long
+/// idle gap before the first/next burst would bank a huge budget and dump the
+/// whole burst at once, reintroducing the choppiness we are trying to remove.
+const MAX_REVEAL_STEP: Duration = Duration::from_millis(50);
+
+/// Buffer that accumulates streaming text and reveals it at a smooth, paced rate.
 pub struct StreamBuffer {
     buffer: String,
-    last_flush: Instant,
-    timeout: Duration,
-    smooth_frame_chars: usize,
+    last_reveal: Instant,
+    /// Fractional reveal budget carried between steps so slow rates still make
+    /// progress instead of rounding down to zero forever.
+    carry: f32,
+    base_cps: f32,
+    backlog_gain: f32,
+    max_step: Duration,
 }
 
 #[derive(Debug, Clone, Serialize)]
 pub struct StreamBufferMemoryProfile {
     pub buffered_text_bytes: usize,
-    pub timeout_ms: u64,
+    pub base_reveal_cps: u32,
 }
 
 impl Default for StreamBuffer {
@@ -27,50 +60,37 @@ impl StreamBuffer {
     pub fn new() -> Self {
         Self {
             buffer: String::new(),
-            last_flush: Instant::now(),
-            timeout: Duration::from_millis(150),
-            smooth_frame_chars: 96,
+            last_reveal: Instant::now(),
+            carry: 0.0,
+            base_cps: BASE_REVEAL_CPS,
+            backlog_gain: REVEAL_BACKLOG_GAIN,
+            max_step: MAX_REVEAL_STEP,
         }
     }
 
-    /// Push text into buffer, returns chunk to display if boundary found
+    /// Push text into the buffer, returning any paced chunk ready to display now.
     pub fn push(&mut self, text: &str) -> Option<String> {
         self.buffer.push_str(text);
-
-        // Find semantic boundary
-        if let Some(boundary) = self.find_boundary() {
-            return Some(self.drain_prefix(boundary.min(self.smooth_frame_boundary())));
-        }
-
-        if self.last_flush.elapsed() >= self.timeout {
-            return self.flush_smooth_frame();
-        }
-
-        None
+        self.reveal_now(Instant::now())
     }
 
-    /// Force flush the entire buffer (call on timeout or message end)
+    /// Force flush the entire buffer (call on message end, commit, or interrupt).
     pub fn flush(&mut self) -> Option<String> {
+        self.carry = 0.0;
+        self.last_reveal = Instant::now();
         if self.buffer.is_empty() {
             None
         } else {
-            self.last_flush = Instant::now();
             Some(std::mem::take(&mut self.buffer))
         }
     }
 
-    /// Flush up to one smooth-render frame worth of text. This is used for
-    /// periodic streaming redraws so large provider/SSE bursts are revealed
-    /// over a few quick frames instead of popping into the TUI all at once.
-    /// Finalization paths should still call [`flush`] to avoid leaving text
-    /// buffered at message boundaries.
+    /// Reveal one paced frame worth of buffered text. Called from the periodic
+    /// redraw tick so the backlog drains smoothly even when no new delta arrived
+    /// this frame. Finalization paths should still call [`flush`] to avoid
+    /// leaving text buffered at message boundaries.
     pub fn flush_smooth_frame(&mut self) -> Option<String> {
-        if self.buffer.is_empty() {
-            None
-        } else {
-            let boundary = self.smooth_frame_boundary().min(self.buffer.len());
-            Some(self.drain_prefix(boundary))
-        }
+        self.reveal_now(Instant::now())
     }
 
     /// Check if buffer is empty
@@ -81,138 +101,205 @@ impl StreamBuffer {
     /// Clear the buffer without returning content
     pub fn clear(&mut self) {
         self.buffer.clear();
-        self.last_flush = Instant::now();
+        self.carry = 0.0;
+        self.last_reveal = Instant::now();
     }
 
     pub fn debug_memory_profile(&self) -> StreamBufferMemoryProfile {
         StreamBufferMemoryProfile {
             buffered_text_bytes: self.buffer.len(),
-            timeout_ms: self.timeout.as_millis() as u64,
+            base_reveal_cps: self.base_cps as u32,
         }
     }
 
-    fn smooth_frame_boundary(&self) -> usize {
-        if self.buffer.chars().count() <= self.smooth_frame_chars {
-            return self.buffer.len();
+    /// Proportional, time-paced reveal. Advances the budget by the (clamped)
+    /// elapsed time times a backlog-scaled rate, then drains that many chars.
+    fn reveal_now(&mut self, now: Instant) -> Option<String> {
+        let backlog = self.buffer.chars().count();
+        if backlog == 0 {
+            // No backlog: reset so an idle gap cannot bank reveal budget.
+            self.carry = 0.0;
+            self.last_reveal = now;
+            return None;
         }
-        self.buffer
-            .char_indices()
-            .map(|(idx, _)| idx)
-            .nth(self.smooth_frame_chars)
-            .unwrap_or(self.buffer.len())
-    }
-
-    fn drain_prefix(&mut self, boundary: usize) -> String {
-        let boundary = floor_char_boundary(&self.buffer, boundary);
-        let chunk = self.buffer[..boundary].to_string();
-        self.buffer = self.buffer[boundary..].to_string();
-        self.last_flush = Instant::now();
-        chunk
-    }
 
-    /// Find a boundary in the buffer (newline-based), returns position after boundary
-    fn find_boundary(&self) -> Option<usize> {
-        let buf = &self.buffer;
+        let dt = now
+            .saturating_duration_since(self.last_reveal)
+            .min(self.max_step)
+            .as_secs_f32();
+        self.last_reveal = now;
 
-        // Code block start/end (```language or ```)
-        if let Some(pos) = buf.find("```") {
-            // Find end of the ``` line
-            if let Some(newline) = buf[pos..].find('\n') {
-                return Some(pos + newline + 1);
-            }
-        }
+        let cps = self.base_cps + backlog as f32 * self.backlog_gain;
+        self.carry += dt * cps;
 
-        // Any newline - simple and predictable
-        if let Some(pos) = buf.find('\n') {
-            return Some(pos + 1);
+        let mut reveal = self.carry.floor() as usize;
+        if reveal == 0 {
+            // Budget hasn't reached a whole char yet; keep accumulating.
+            return None;
         }
-
-        None
+        reveal = reveal.min(backlog);
+        self.carry -= reveal as f32;
+        Some(self.drain_chars(reveal))
     }
-}
 
-fn floor_char_boundary(s: &str, mut index: usize) -> usize {
-    index = index.min(s.len());
-    while index > 0 && !s.is_char_boundary(index) {
-        index -= 1;
+    /// Drain `char_count` characters from the front of the buffer on a UTF-8
+    /// boundary.
+    fn drain_chars(&mut self, char_count: usize) -> String {
+        if char_count == 0 {
+            return String::new();
+        }
+        let end = self
+            .buffer
+            .char_indices()
+            .nth(char_count)
+            .map(|(idx, _)| idx)
+            .unwrap_or(self.buffer.len());
+        let chunk = self.buffer[..end].to_string();
+        self.buffer.replace_range(..end, "");
+        chunk
     }
-    index
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    /// Drain the buffer to empty using fixed-cadence redraw frames, returning the
+    /// per-frame reveal sizes (in chars).
+    fn drain_frames(buf: &mut StreamBuffer, start: Instant, frame: Duration) -> Vec<usize> {
+        let mut sizes = Vec::new();
+        let mut t = start;
+        let mut guard = 0;
+        while !buf.is_empty() {
+            t += frame;
+            if let Some(chunk) = buf.reveal_now(t) {
+                sizes.push(chunk.chars().count());
+            }
+            guard += 1;
+            assert!(guard < 100_000, "drain did not converge");
+        }
+        sizes
+    }
+
     #[test]
-    fn test_newline_boundary() {
+    fn flush_drains_everything() {
         let mut buf = StreamBuffer::new();
-        let result = buf.push("First line\nSecond line");
-        assert_eq!(result, Some("First line\n".to_string()));
-        assert_eq!(buf.buffer, "Second line");
+        buf.buffer.push_str("remaining content");
+        let result = buf.flush();
+        assert_eq!(result, Some("remaining content".to_string()));
+        assert!(buf.is_empty());
     }
 
     #[test]
-    fn test_code_block_boundary() {
+    fn empty_push_reveals_nothing() {
         let mut buf = StreamBuffer::new();
-        // Code block marker ``` causes flush to include the whole line
-        let result = buf.push("```rust\nfn main() {}");
-        assert_eq!(result, Some("```rust\n".to_string()));
+        assert_eq!(buf.push(""), None);
+        assert!(buf.is_empty());
     }
 
     #[test]
-    fn test_no_boundary() {
+    fn paced_reveal_spreads_a_burst_over_multiple_frames() {
+        let start = Instant::now();
         let mut buf = StreamBuffer::new();
-        let result = buf.push("partial text without newline");
-        assert_eq!(result, None);
-        assert_eq!(buf.buffer, "partial text without newline");
+        buf.last_reveal = start;
+        buf.buffer.push_str(&"a".repeat(40));
+
+        let sizes = drain_frames(&mut buf, start, Duration::from_millis(16));
+        let total: usize = sizes.iter().sum();
+        assert_eq!(total, 40);
+        assert!(
+            sizes.len() >= 3,
+            "a 40-char burst should reveal across multiple frames, got {sizes:?}"
+        );
+        // No single 16ms frame should dump the whole burst.
+        assert!(
+            sizes.iter().all(|&n| n < 40),
+            "no frame should reveal the entire burst, got {sizes:?}"
+        );
     }
 
     #[test]
-    fn test_flush() {
+    fn idle_gap_does_not_dump_the_next_burst() {
+        let start = Instant::now();
         let mut buf = StreamBuffer::new();
-        buf.push("remaining content");
-        let result = buf.flush();
-        assert_eq!(result, Some("remaining content".to_string()));
-        assert!(buf.is_empty());
+        buf.last_reveal = start;
+        // Simulate a long connect/tool pause, then a burst arrives.
+        let arrival = start + Duration::from_secs(5);
+        buf.buffer.push_str(&"b".repeat(30));
+        let first = buf
+            .reveal_now(arrival)
+            .map(|c| c.chars().count())
+            .unwrap_or(0);
+        assert!(
+            first < 30,
+            "the idle gap must not bank budget that dumps the burst, revealed {first}"
+        );
+        // The remainder still drains over subsequent frames.
+        let sizes = drain_frames(&mut buf, arrival, Duration::from_millis(16));
+        assert_eq!(first + sizes.iter().sum::<usize>(), 30);
     }
 
     #[test]
-    fn test_multiple_newlines() {
-        let mut buf = StreamBuffer::new();
-        // First push returns first line
-        let result = buf.push("Line one\nLine two\nLine three");
-        assert_eq!(result, Some("Line one\n".to_string()));
-        // Second push returns second line
-        let result = buf.push("");
-        assert_eq!(result, Some("Line two\n".to_string()));
+    fn bursty_and_steady_feeds_reveal_at_similar_smoothness() {
+        // Steady (OpenAI-like): 4 chars every frame.
+        let start = Instant::now();
+        let frame = Duration::from_millis(16);
+        let mut steady = StreamBuffer::new();
+        steady.last_reveal = start;
+        let mut steady_sizes = Vec::new();
+        let mut t = start;
+        for _ in 0..40 {
+            t += frame;
+            steady.buffer.push_str("abcd");
+            if let Some(c) = steady.reveal_now(t) {
+                steady_sizes.push(c.chars().count());
+            }
+        }
+        steady_sizes.extend(drain_frames(&mut steady, t, frame));
+
+        // Bursty (Anthropic-like): 24 chars every 6th frame.
+        let mut bursty = StreamBuffer::new();
+        bursty.last_reveal = start;
+        let mut bursty_sizes = Vec::new();
+        let mut t = start;
+        for i in 0..60 {
+            t += frame;
+            if i % 6 == 0 {
+                bursty.buffer.push_str(&"x".repeat(24));
+            }
+            if let Some(c) = bursty.reveal_now(t) {
+                bursty_sizes.push(c.chars().count());
+            }
+        }
+        bursty_sizes.extend(drain_frames(&mut bursty, t, frame));
+
+        let max_burst = *bursty_sizes.iter().max().unwrap();
+        // The whole 24-char clump must never appear in a single frame; pacing
+        // should break it into smaller per-frame reveals like the steady feed.
+        assert!(
+            max_burst < 24,
+            "bursty feed should be smoothed, max frame reveal was {max_burst} ({bursty_sizes:?})"
+        );
     }
 
     #[test]
-    fn test_smooth_frame_flush_caps_large_chunks() {
+    fn reveal_respects_utf8_boundaries() {
+        let start = Instant::now();
         let mut buf = StreamBuffer::new();
-        let text = "a".repeat(150);
-        assert_eq!(buf.push(&text), None);
-
-        let first = buf.flush_smooth_frame().unwrap();
-        assert_eq!(first.len(), 96);
-        assert_eq!(buf.buffer.len(), 54);
+        buf.last_reveal = start;
+        buf.buffer.push_str(&"é".repeat(40));
 
-        let rest = buf.flush().unwrap();
-        assert_eq!(rest.len(), 54);
-        assert!(buf.is_empty());
+        let sizes = drain_frames(&mut buf, start, Duration::from_millis(16));
+        assert_eq!(sizes.iter().sum::<usize>(), 40);
     }
 
     #[test]
-    fn test_smooth_frame_flush_respects_utf8_boundaries() {
+    fn small_trailing_text_eventually_drains() {
+        let start = Instant::now();
         let mut buf = StreamBuffer::new();
-        let text = "é".repeat(120);
-        assert_eq!(buf.push(&text), None);
-
-        let first = buf.flush_smooth_frame().unwrap();
-        assert_eq!(first.chars().count(), 96);
-        assert!(first.is_char_boundary(first.len()));
-
-        let rest = buf.flush().unwrap();
-        assert_eq!(rest.chars().count(), 24);
+        buf.last_reveal = start;
+        buf.buffer.push_str("hi");
+        let sizes = drain_frames(&mut buf, start, Duration::from_millis(16));
+        assert_eq!(sizes.iter().sum::<usize>(), 2);
     }
 }
diff --git a/crates/jcode-tui/src/tui/app/remote.rs b/crates/jcode-tui/src/tui/app/remote.rs
index 50089382d..db3d3f8ab 100644
--- a/crates/jcode-tui/src/tui/app/remote.rs
+++ b/crates/jcode-tui/src/tui/app/remote.rs
@@ -81,7 +81,9 @@ pub(super) async fn handle_tick(app: &mut App, remote: &mut RemoteConnection) ->
     needs_redraw |= app.update_chat_overscroll();
     needs_redraw |= app.update_pinned_images_auto_hide();
     needs_redraw |= dispatch_compacted_history_load(app, remote).await;
-    if let Some(chunk) = app.stream_buffer.flush() {
+    // Reveal buffered streaming text at the smooth paced rate on each tick, the
+    // same as the local turn loop. Finalization paths still call flush().
+    if let Some(chunk) = app.stream_buffer.flush_smooth_frame() {
         app.append_streaming_text(&chunk);
         needs_redraw = true;
     }
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
index 4fd24e789..bb765c636 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
@@ -591,7 +591,12 @@ fn test_submit_input_commits_pending_streaming_assistant_text_before_user_messag
     ));
     app.bump_display_messages_version();
     app.streaming_text = "Here is the final paragraph".to_string();
-    assert_eq!(app.stream_buffer.push(" that was still buffered."), None);
+    // Mirror the real streaming caller: append any paced chunk the buffer reveals.
+    // The paced StreamBuffer may reveal part of the text immediately, so commit
+    // (below) must still flush the remainder.
+    if let Some(chunk) = app.stream_buffer.push(" that was still buffered.") {
+        app.append_streaming_text(&chunk);
+    }
 
     app.input = "follow up".to_string();
     app.cursor_pos = app.input.len();

From a54a6b3d55f0a54b696e366f4ea23672415c8b64 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:24:24 -0700
Subject: [PATCH 25/57] feat(onboarding): show both Codex and Claude Code
 sessions in resume picker

The first-run onboarding 'continue where you left off' picker previously
surfaced only ONE external CLI: when a user was logged into both Codex
and Claude Code, it picked whichever had the most recent transcript and
hid the other CLI's history entirely.

Now the onboarding picker loads and displays every detected external
CLI's transcripts together in one combined, recency-sorted list:
- Add SessionFilterMode::ExternalClis (Codex OR Claude Code).
- Add load_external_cli_sessions_grouped_multi to load several CLIs.
- onboarding_open_transcript_picker now takes the full detected CLI set;
  the banner reads 'We found your Codex and Claude Code sessions' when
  both are present.

Resume still works off each session's own id/source, so selecting either
CLI's transcript resumes correctly. Adds a regression test seeding both a
Codex and a Claude Code transcript and asserting both appear.
---
 crates/jcode-tui-session-picker/src/lib.rs    |   9 ++
 .../src/tui/app/onboarding_flow_control.rs    | 110 ++++++++++--------
 .../src/tui/app/tests/onboarding_flow.rs      |  68 ++++++++++-
 crates/jcode-tui/src/tui/session_picker.rs    |  12 +-
 .../src/tui/session_picker/filter.rs          |   3 +
 .../src/tui/session_picker/loading.rs         |  38 ++++++
 6 files changed, 192 insertions(+), 48 deletions(-)

diff --git a/crates/jcode-tui-session-picker/src/lib.rs b/crates/jcode-tui-session-picker/src/lib.rs
index d3deb23d8..44417f48b 100644
--- a/crates/jcode-tui-session-picker/src/lib.rs
+++ b/crates/jcode-tui-session-picker/src/lib.rs
@@ -69,6 +69,10 @@ pub enum SessionFilterMode {
     Codex,
     Pi,
     OpenCode,
+    /// External CLI transcripts (Codex and/or Claude Code) shown together.
+    /// Used by the first-run onboarding "continue where you left off" picker so
+    /// it surfaces every external CLI the user is logged into, not just one.
+    ExternalClis,
 }
 
 impl SessionFilterMode {
@@ -81,6 +85,9 @@ impl SessionFilterMode {
             Self::Codex => Self::Pi,
             Self::Pi => Self::OpenCode,
             Self::OpenCode => Self::All,
+            // ExternalClis is an onboarding-only composite filter, not part of
+            // the user-facing cycle; treat it as a no-op anchor.
+            Self::ExternalClis => Self::All,
         }
     }
 
@@ -93,6 +100,7 @@ impl SessionFilterMode {
             Self::Codex => Self::ClaudeCode,
             Self::Pi => Self::Codex,
             Self::OpenCode => Self::Pi,
+            Self::ExternalClis => Self::All,
         }
     }
 
@@ -105,6 +113,7 @@ impl SessionFilterMode {
             Self::Codex => Some("🧠 Codex"),
             Self::Pi => Some("π Pi"),
             Self::OpenCode => Some("◌ OpenCode"),
+            Self::ExternalClis => Some("🧠 Codex + 🧵 Claude Code"),
         }
     }
 }
diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
index 0803c626c..ec20f5439 100644
--- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
+++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs
@@ -208,36 +208,17 @@ impl App {
     /// straight into the resume picker (with an onboarding banner + a
     /// "Start a new session" option) instead of asking a separate Yes/No
     /// "continue where you left off" question. When both CLIs are present we
-    /// surface whichever one has the most recent transcript.
+    /// show *both* their transcripts together in one combined, recency-sorted
+    /// list rather than hiding one behind the other.
     pub(super) fn onboarding_after_model_select(&mut self) {
         if !matches!(self.onboarding_phase(), Some(OnboardingPhase::ModelSelect)) {
             return;
         }
-        match self.onboarding_most_recent_external_cli() {
-            Some(cli) => self.onboarding_open_transcript_picker(cli),
-            None => self.onboarding_show_suggestions(),
-        }
-    }
-
-    /// Among the external CLIs whose OAuth credentials are present, pick the one
-    /// with the most recent transcript. Ties (or a CLI with no transcripts yet)
-    /// fall back to detection order (Codex first). Returns `None` when no
-    /// external CLI login is present.
-    fn onboarding_most_recent_external_cli(&self) -> Option<ExternalCli> {
         let present = crate::tui::app::onboarding_flow::detect_external_cli_oauths();
-        match present.as_slice() {
-            [] => None,
-            [only] => Some(*only),
-            _ => {
-                // Multiple logins: rank by newest transcript mtime.
-                present
-                    .iter()
-                    .max_by_key(|cli| {
-                        session_picker::latest_external_cli_session_secs(**cli).unwrap_or(0)
-                    })
-                    .copied()
-                    .or_else(|| present.first().copied())
-            }
+        if present.is_empty() {
+            self.onboarding_show_suggestions();
+        } else {
+            self.onboarding_open_transcript_picker(&present);
         }
     }
 
@@ -283,7 +264,7 @@ impl App {
             _ => return,
         };
         if wants_continue {
-            self.onboarding_open_transcript_picker(cli);
+            self.onboarding_open_transcript_picker(std::slice::from_ref(&cli));
         } else {
             self.onboarding_show_suggestions();
         }
@@ -602,51 +583,89 @@ impl App {
         });
     }
 
-    /// Open a single-select resume-style picker filtered to the external CLI's
-    /// transcripts. Falls back to the session-search prompt if none load.
-    pub(super) fn onboarding_open_transcript_picker(&mut self, cli: ExternalCli) {
-        let filter = match cli {
-            ExternalCli::Codex => SessionFilterMode::Codex,
-            ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode,
+    /// Open a single-select resume-style picker showing the transcripts of every
+    /// detected external CLI together (Codex and/or Claude Code), sorted by
+    /// recency. Falls back to the session-search prompt if none load.
+    ///
+    /// `clis` is the set of external CLIs the user is logged into. When more than
+    /// one is present we still show them in one combined list so the user never
+    /// has a CLI's history hidden behind the other.
+    pub(super) fn onboarding_open_transcript_picker(&mut self, clis: &[ExternalCli]) {
+        // Choose a representative CLI for the banner/mode headline: the one with
+        // the most recent transcript (falling back to detection order).
+        let headline_cli = clis
+            .iter()
+            .copied()
+            .max_by_key(|cli| session_picker::latest_external_cli_session_secs(*cli).unwrap_or(0))
+            .or_else(|| clis.first().copied())
+            .unwrap_or(ExternalCli::Codex);
+
+        let multi = clis.len() > 1;
+        let filter = if multi {
+            SessionFilterMode::ExternalClis
+        } else {
+            match headline_cli {
+                ExternalCli::Codex => SessionFilterMode::Codex,
+                ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode,
+            }
         };
 
-        // The onboarding picker only ever shows this one external CLI's
-        // transcripts, so load just those instead of paying the full
-        // `load_sessions_grouped` cost (parsing every jcode snapshot, the other
-        // CLIs, and listing servers). This keeps first-run onboarding snappy.
+        // The onboarding picker only shows external CLI transcripts, so load just
+        // those instead of paying the full `load_sessions_grouped` cost (parsing
+        // every jcode snapshot and listing servers). This keeps first-run
+        // onboarding snappy while still surfacing every logged-in CLI.
         let (server_groups, orphan_sessions) =
-            session_picker::load_external_cli_sessions_grouped(cli);
+            session_picker::load_external_cli_sessions_grouped_multi(clis);
 
         let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions);
         picker.activate_external_cli_filter(filter);
 
         if picker.visible_session_count() == 0 {
-            self.onboarding_fallback_to_session_search(cli);
+            self.onboarding_fallback_to_session_search(headline_cli);
             return;
         }
 
-        picker.activate_onboarding_banner(Self::onboarding_resume_banner_lines(cli));
+        picker.activate_onboarding_banner(Self::onboarding_resume_banner_lines(clis));
 
         self.session_picker_overlay = Some(RefCell::new(picker));
-        self.session_picker_mode = SessionPickerMode::Onboarding { cli };
+        self.session_picker_mode = SessionPickerMode::Onboarding { cli: headline_cli };
         if let Some(flow) = self.onboarding_flow.as_mut() {
             flow.phase = OnboardingPhase::TranscriptPick {
-                cli,
+                cli: headline_cli,
                 shown_at: Instant::now(),
             };
         }
+        let resume_label = if multi {
+            "Resume a Codex or Claude Code session".to_string()
+        } else {
+            format!("Resume a {} session", headline_cli.label())
+        };
         self.set_status_notice(format!(
-            "Resume a {} session (↑↓ to choose, Enter to resume) or pick \"Start a new session\"",
-            cli.label()
+            "{resume_label} (↑↓ to choose, Enter to resume) or pick \"Start a new session\""
         ));
     }
 
     /// Formatted onboarding prompt shown in the reserved top band of the
     /// resume picker on first run.
-    fn onboarding_resume_banner_lines(cli: ExternalCli) -> Vec<ratatui::text::Line<'static>> {
+    fn onboarding_resume_banner_lines(clis: &[ExternalCli]) -> Vec<ratatui::text::Line<'static>> {
         use ratatui::style::{Color, Modifier, Style};
         use ratatui::text::{Line, Span};
         let accent = crate::tui::color_support::rgb(186, 139, 255);
+        // Describe whichever CLIs were detected: "Codex", "Claude Code", or
+        // "Codex and Claude Code" when both are present.
+        let mut labels: Vec<&'static str> = Vec::new();
+        for cli in clis {
+            let label = cli.label();
+            if !labels.contains(&label) {
+                labels.push(label);
+            }
+        }
+        let found = match labels.as_slice() {
+            [] => "external".to_string(),
+            [only] => (*only).to_string(),
+            [first, second] => format!("{first} and {second}"),
+            _ => labels.join(", "),
+        };
         vec![
             Line::from(vec![Span::styled(
                 "Welcome to jcode 🎉",
@@ -654,8 +673,7 @@ impl App {
             )]),
             Line::from(vec![Span::styled(
                 format!(
-                    "We found your {} sessions. Pick one below to pick up right where you left off,",
-                    cli.label()
+                    "We found your {found} sessions. Pick one below to pick up right where you left off,"
                 ),
                 Style::default().fg(Color::White),
             )]),
diff --git a/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs b/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs
index e0aa77f69..f93ecfc9c 100644
--- a/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs
+++ b/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs
@@ -411,7 +411,7 @@ fn no_external_transcripts_lands_on_suggestions_without_autosubmit() {
         // Temp home has no Codex transcripts, so opening the picker should land
         // the user on the clean new-session suggestion cards rather than
         // auto-submitting a "search for my last session" turn.
-        app.onboarding_open_transcript_picker(ExternalCli::Codex);
+        app.onboarding_open_transcript_picker(&[ExternalCli::Codex]);
         assert!(matches!(
             app.onboarding_phase(),
             Some(OnboardingPhase::Suggestions)
@@ -432,6 +432,72 @@ fn onboarding_picker_mode_carries_cli() {
     assert_ne!(mode, SessionPickerMode::Resume);
 }
 
+#[test]
+fn onboarding_picker_shows_both_codex_and_claude_transcripts() {
+    use std::fs;
+    with_temp_jcode_home(|| {
+        // Seed one Codex transcript and one Claude Code transcript under the
+        // sandbox-aware external home ($JCODE_HOME/external/...), mirroring a
+        // user who is logged into BOTH CLIs.
+        let home = std::env::var_os("JCODE_HOME").expect("JCODE_HOME");
+        let external = std::path::Path::new(&home).join("external");
+
+        let codex_dir = external.join(".codex/sessions/2026/04/05");
+        fs::create_dir_all(&codex_dir).expect("codex dir");
+        fs::write(
+            codex_dir.join("rollout-2026-04-05T19-00-00-codextest.jsonl"),
+            concat!(
+                "{\"timestamp\":\"2026-04-05T19:00:00Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d-codex-both\",\"timestamp\":\"2026-04-05T18:59:00Z\",\"cwd\":\"/tmp/codex-demo\",\"source\":\"cli\"}}\n",
+                "{\"timestamp\":\"2026-04-05T19:00:03Z\",\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"CODEX_MARKER fix the widget\"}]}}\n",
+            ),
+        )
+        .expect("write codex transcript");
+
+        let claude_dir = external.join(".claude/projects/demo-project");
+        fs::create_dir_all(&claude_dir).expect("claude dir");
+        fs::write(
+            claude_dir.join("claude-session-both.jsonl"),
+            concat!(
+                "{\"type\":\"user\",\"uuid\":\"u1\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"CLAUDE_MARKER fix the flaky test\"}]}}\n",
+                "{\"type\":\"assistant\",\"uuid\":\"a1\",\"parentUuid\":\"u1\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"done\"}]}}\n"
+            ),
+        )
+        .expect("write claude transcript");
+
+        let mut app = onboarding_test_app();
+        // Open the combined picker for BOTH detected CLIs.
+        app.onboarding_open_transcript_picker(&[ExternalCli::Codex, ExternalCli::ClaudeCode]);
+
+        // The picker overlay should be up with both CLIs' sessions visible
+        // (not just one).
+        let picker_cell = app
+            .session_picker_overlay
+            .as_ref()
+            .expect("picker overlay should be open");
+        let picker = picker_cell.borrow();
+        assert!(
+            picker.visible_session_count() >= 2,
+            "combined picker should list both CLIs' sessions, got {}",
+            picker.visible_session_count()
+        );
+
+        let mut saw_codex = false;
+        let mut saw_claude = false;
+        for session in picker.visible_session_iter_for_test() {
+            match session.source {
+                jcode_tui_session_picker::SessionSource::Codex => saw_codex = true,
+                jcode_tui_session_picker::SessionSource::ClaudeCode => saw_claude = true,
+                _ => {}
+            }
+        }
+        assert!(saw_codex, "Codex session should be present in combined picker");
+        assert!(
+            saw_claude,
+            "Claude Code session should be present in combined picker"
+        );
+    });
+}
+
 #[test]
 fn startup_check_skips_when_session_already_has_activity() {
     with_temp_jcode_home(|| {
diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs
index 988dfe8b1..4ce1dbeb2 100644
--- a/crates/jcode-tui/src/tui/session_picker.rs
+++ b/crates/jcode-tui/src/tui/session_picker.rs
@@ -34,7 +34,7 @@ mod render;
 #[cfg(test)]
 use loading::collect_recent_session_stems;
 pub(crate) use loading::latest_external_cli_session_secs;
-pub(crate) use loading::load_external_cli_sessions_grouped;
+pub(crate) use loading::load_external_cli_sessions_grouped_multi;
 use loading::{build_messages_preview, build_search_index, crashed_sessions_from_all_sessions};
 pub use loading::{
     invalidate_session_list_cache, load_cached_sessions_grouped, load_servers, load_sessions,
@@ -525,6 +525,16 @@ impl SessionPicker {
             .filter_map(|session_ref| self.session_by_ref(*session_ref))
     }
 
+    /// Test-only accessor: the source classification of every currently visible
+    /// session. Used by onboarding tests to assert the combined external-CLI
+    /// picker surfaces both Codex and Claude Code transcripts.
+    #[cfg(test)]
+    pub(crate) fn visible_session_iter_for_test(
+        &self,
+    ) -> impl Iterator<Item = &SessionInfo> + '_ {
+        self.visible_session_iter()
+    }
+
     fn load_preview_for_target(
         resume_target: ResumeTarget,
         external_path: Option<String>,
diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs
index d12da9141..82883b2f4 100644
--- a/crates/jcode-tui/src/tui/session_picker/filter.rs
+++ b/crates/jcode-tui/src/tui/session_picker/filter.rs
@@ -146,6 +146,9 @@ impl SessionPicker {
             SessionFilterMode::Codex => Self::session_is_codex(session),
             SessionFilterMode::Pi => Self::session_is_pi(session),
             SessionFilterMode::OpenCode => Self::session_is_open_code(session),
+            SessionFilterMode::ExternalClis => {
+                Self::session_is_codex(session) || Self::session_is_claude_code(session)
+            }
         }
     }
 
diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs
index 4acc66aef..52e3a936f 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading.rs
@@ -2670,6 +2670,11 @@ pub fn load_sessions_grouped() -> Result<(Vec<ServerGroup>, Vec<SessionInfo>)> {
 /// jcode snapshot, the other CLIs, and listing servers) is wasted there. This
 /// scoped loader keeps onboarding responsive by touching only the relevant
 /// transcripts.
+///
+/// The live onboarding flow now uses [`load_external_cli_sessions_grouped_multi`]
+/// (it shows every logged-in CLI together), so this single-CLI variant is kept
+/// only as a focused test helper.
+#[cfg(test)]
 pub(crate) fn load_external_cli_sessions_grouped(
     cli: crate::tui::app::onboarding_flow::ExternalCli,
 ) -> (Vec<ServerGroup>, Vec<SessionInfo>) {
@@ -2682,6 +2687,39 @@ pub(crate) fn load_external_cli_sessions_grouped(
     (Vec::new(), sessions)
 }
 
+/// Load sessions for several external CLIs at once (Codex and/or Claude Code),
+/// returned as a single combined orphan list compatible with
+/// `SessionPicker::new_grouped`.
+///
+/// First-run onboarding's "continue where you left off" picker shows every
+/// external CLI the user is logged into, not just one, so it loads all of them
+/// here. Each CLI is still scoped to its own transcripts (no jcode snapshots /
+/// servers), keeping onboarding responsive. The picker sorts the merged result
+/// by recency, so the newest session across all CLIs floats to the top.
+pub(crate) fn load_external_cli_sessions_grouped_multi(
+    clis: &[crate::tui::app::onboarding_flow::ExternalCli],
+) -> (Vec<ServerGroup>, Vec<SessionInfo>) {
+    use crate::tui::app::onboarding_flow::ExternalCli;
+    let scan_limit = session_scan_limit();
+    let mut sessions = Vec::new();
+    let mut seen_codex = false;
+    let mut seen_claude = false;
+    for cli in clis {
+        match cli {
+            ExternalCli::Codex if !seen_codex => {
+                seen_codex = true;
+                sessions.extend(load_external_codex_sessions(scan_limit));
+            }
+            ExternalCli::ClaudeCode if !seen_claude => {
+                seen_claude = true;
+                sessions.extend(load_external_claude_code_sessions(scan_limit));
+            }
+            _ => {}
+        }
+    }
+    (Vec::new(), sessions)
+}
+
 #[cfg(test)]
 #[path = "loading_tests.rs"]
 mod tests;

From bd7bac86ce510d2109ee1a9f3965c4629a247a14 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:25:35 -0700
Subject: [PATCH 26/57] feat(display): default reasoning display to current

Show the model's live reasoning out of the box. DisplayConfig now defaults
reasoning_display to Current (with show_thinking=true to keep the provider
request + streaming display paths in sync), and the generated default config
documents reasoning_display = "current".
---
 crates/jcode-base/src/config/default_file.rs | 6 +++---
 crates/jcode-config-types/src/lib.rs         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/jcode-base/src/config/default_file.rs b/crates/jcode-base/src/config/default_file.rs
index 9d05c4695..c4799c611 100644
--- a/crates/jcode-base/src/config/default_file.rs
+++ b/crates/jcode-base/src/config/default_file.rs
@@ -114,8 +114,8 @@ mouse_capture = true
 # Enable debug socket for external control/testing (default: false)
 debug_socket = false
 
-# Show thinking/reasoning content (default: false)
-show_thinking = false
+# Show thinking/reasoning content (default: true)
+show_thinking = true
 
 # How to display reasoning/thinking content: "off", "full", or "current".
 #   off     - never show reasoning
@@ -123,7 +123,7 @@ show_thinking = false
 #   current - show only the live reasoning; collapse it once the model commits
 #             an assistant message or runs a tool, then show the next one
 # When unset, falls back to show_thinking (true => full, false => off).
-# reasoning_display = "current"
+reasoning_display = "current"
 
 # Markdown spacing style: "compact" (chat/TUI) or "document" (docs-like)
 # markdown_spacing = "compact"
diff --git a/crates/jcode-config-types/src/lib.rs b/crates/jcode-config-types/src/lib.rs
index 932377a1d..31785cba7 100644
--- a/crates/jcode-config-types/src/lib.rs
+++ b/crates/jcode-config-types/src/lib.rs
@@ -592,7 +592,7 @@ pub struct DisplayConfig {
     pub debug_socket: bool,
     /// Center all content (default: false)
     pub centered: bool,
-    /// Show thinking/reasoning content by default (default: false)
+    /// Show thinking/reasoning content by default (default: true)
     pub show_thinking: bool,
     /// How to display reasoning/thinking content (off/full/current).
     /// When unset, falls back to `show_thinking` (true => full, false => off).
@@ -638,8 +638,8 @@ impl Default for DisplayConfig {
             mouse_capture: true,
             debug_socket: false,
             centered: false,
-            show_thinking: false,
-            reasoning_display: None,
+            show_thinking: true,
+            reasoning_display: Some(ReasoningDisplayMode::Current),
             diagram_mode: DiagramDisplayMode::default(),
             markdown_spacing: MarkdownSpacingMode::default(),
             idle_animation: true,

From 66ac0eb4fd33bb8037033e7d5e9262bfb7fddff3 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:37:36 -0700
Subject: [PATCH 27/57] provider-doctor: add observe-only reasoning_capability
 checkpoint + parallel tool-call probe

- New REASONING_CAPABILITY checkpoint (taxonomy v3), never required for
  user-readiness and excluded from strict coverage. A reasoning word problem
  is sent and the turn is classified streamed/opaque/none from StreamEvent
  signals (ThinkingDelta text, ThinkingSignatureDelta, OpenAIReasoning, and
  Gemini-3 tool thought_signature). Absence records 'none' and passes.
- Shared native tool smoke gains a Phase 3 that asks for two tool calls in a
  single assistant message, replays both tool_use blocks (each with its own
  thought_signature) in one assistant turn and answers both results, recording
  parallel_tool_calls: verified|skipped (best-effort, never fails).
- Wired reasoning into the antigravity, generic-native, and claude drivers;
  skipped on non-full tiers; surfaced in the doctor report detail.
- Unit tests for classification, parallel replay shape, detail strings, and the
  observe-only contract (probe error -> skipped, never failed).
---
 .../src/auth/live_provider_probes.rs          | 432 +++++++++++++++++-
 crates/jcode-base/src/auth/provider_e2e.rs    | 187 +++++++-
 crates/jcode-base/src/live_tests.rs           |  35 +-
 3 files changed, 639 insertions(+), 15 deletions(-)

diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs
index 749c43bee..fe6ae0b2d 100644
--- a/crates/jcode-base/src/auth/live_provider_probes.rs
+++ b/crates/jcode-base/src/auth/live_provider_probes.rs
@@ -258,6 +258,123 @@ mod tests {
             "gpt-5.1"
         );
     }
+
+    fn tool_call_with_signature(signature: Option<&str>) -> NativeClaudeToolCall {
+        NativeClaudeToolCall {
+            id: "call_1".to_string(),
+            name: "read".to_string(),
+            input_json: "{}".to_string(),
+            thought_signature: signature.map(str::to_string),
+        }
+    }
+
+    #[test]
+    fn reasoning_capability_classifies_streamed_when_reasoning_text_present() {
+        let outcome = NativeClaudeStreamOutcome {
+            reasoning_text_len: 42,
+            saw_message_end: true,
+            ..Default::default()
+        };
+        assert_eq!(outcome.reasoning_capability(), "streamed");
+    }
+
+    #[test]
+    fn reasoning_capability_classifies_opaque_from_thinking_signature() {
+        // No reasoning text, but a ThinkingSignatureDelta-style signal: opaque.
+        let outcome = NativeClaudeStreamOutcome {
+            saw_reasoning_signal: true,
+            saw_message_end: true,
+            ..Default::default()
+        };
+        assert_eq!(outcome.reasoning_capability(), "opaque");
+    }
+
+    #[test]
+    fn reasoning_capability_classifies_opaque_from_tool_thought_signature() {
+        // A Gemini-3 tool call carrying a thought_signature is an opaque signal
+        // even when no reasoning text streamed.
+        let outcome = NativeClaudeStreamOutcome {
+            tool_calls: vec![tool_call_with_signature(Some("SIG_ABC"))],
+            saw_message_end: true,
+            ..Default::default()
+        };
+        assert_eq!(outcome.reasoning_capability(), "opaque");
+    }
+
+    #[test]
+    fn reasoning_capability_classifies_none_without_any_signal() {
+        // A tool call with no signature is not a reasoning signal.
+        let outcome = NativeClaudeStreamOutcome {
+            tool_calls: vec![tool_call_with_signature(None)],
+            saw_message_end: true,
+            ..Default::default()
+        };
+        assert_eq!(outcome.reasoning_capability(), "none");
+    }
+
+    #[test]
+    fn reasoning_capability_prefers_streamed_over_opaque() {
+        // Streamed reasoning text wins even when an opaque signal is also present.
+        let outcome = NativeClaudeStreamOutcome {
+            reasoning_text_len: 10,
+            saw_reasoning_signal: true,
+            tool_calls: vec![tool_call_with_signature(Some("SIG"))],
+            saw_message_end: true,
+            ..Default::default()
+        };
+        assert_eq!(outcome.reasoning_capability(), "streamed");
+    }
+
+    #[test]
+    fn parallel_tool_use_replays_every_signature_in_one_assistant_message() {
+        let calls = vec![
+            NativeClaudeToolCall {
+                id: "a".to_string(),
+                name: "read".to_string(),
+                input_json: "{\"file_path\":\"/tmp/a\"}".to_string(),
+                thought_signature: Some("SIG_A".to_string()),
+            },
+            NativeClaudeToolCall {
+                id: "b".to_string(),
+                name: "read".to_string(),
+                input_json: "{\"file_path\":\"/tmp/b\"}".to_string(),
+                thought_signature: Some("SIG_B".to_string()),
+            },
+        ];
+        let assistant = assistant_parallel_tool_uses(&calls);
+        assert!(matches!(assistant.role, Role::Assistant));
+        // One assistant message must carry BOTH tool_use blocks, each with its
+        // own signature preserved.
+        assert_eq!(assistant.content.len(), 2);
+        let sigs: Vec<Option<String>> = assistant
+            .content
+            .iter()
+            .map(|block| match block {
+                ContentBlock::ToolUse {
+                    thought_signature, ..
+                } => thought_signature.clone(),
+                other => panic!("expected ToolUse, got {other:?}"),
+            })
+            .collect();
+        assert_eq!(
+            sigs,
+            vec![Some("SIG_A".to_string()), Some("SIG_B".to_string())]
+        );
+
+        // The results message must answer every call with a matching id.
+        let results = parallel_tool_results(&calls);
+        assert!(matches!(results.role, Role::User));
+        assert_eq!(results.content.len(), 2);
+        let ids: Vec<String> = results
+            .content
+            .iter()
+            .map(|block| match block {
+                ContentBlock::ToolResult { tool_use_id, .. } => tool_use_id.clone(),
+                other => panic!("expected ToolResult, got {other:?}"),
+            })
+            .collect();
+        assert_eq!(ids, vec!["a".to_string(), "b".to_string()]);
+    }
 }
 
 pub async fn run_live_openai_compatible_stream_smoke(
@@ -528,6 +645,15 @@ struct NativeClaudeStreamOutcome {
     /// Number of thinking deltas seen (extended/adaptive thinking). Useful when
     /// a turn is consumed entirely by reasoning and emits no visible text.
     thinking_chunk_count: usize,
+    /// Length of streamed reasoning text (sum of `ThinkingDelta` payloads).
+    /// Distinct from `thinking_chunk_count`: a provider can emit a single empty
+    /// `ThinkingStart`/`ThinkingEnd` pair without ever streaming visible
+    /// reasoning text, which we must classify as `opaque`/`none`, not `streamed`.
+    reasoning_text_len: usize,
+    /// Saw an *opaque* reasoning signal: a `thought_signature` (Gemini-3), a
+    /// `ThinkingSignatureDelta`, or an `OpenAIReasoning` item. This is the
+    /// evidence that the model reasoned even though it never streamed the text.
+    saw_reasoning_signal: bool,
     /// Total stream events observed, for diagnosing empty/odd streams.
     total_events: usize,
     saw_message_end: bool,
@@ -576,6 +702,33 @@ impl NativeClaudeStreamOutcome {
             self.tool_calls.len()
         )
     }
+
+    /// Did any captured tool call carry a Gemini-3 `thought_signature`? This is
+    /// an opaque reasoning signal even when the model streamed no reasoning text.
+    fn any_tool_signature(&self) -> bool {
+        self.tool_calls
+            .iter()
+            .any(|call| call.thought_signature.is_some())
+    }
+
+    /// Classify how this turn exposed the model's reasoning:
+    /// - `streamed`: streamed visible reasoning text (`ThinkingDelta`).
+    /// - `opaque`: no reasoning text, but an opaque reasoning signal was present
+    ///   (a `thought_signature`, a `ThinkingSignatureDelta`, or an
+    ///   `OpenAIReasoning` item). Legitimate and common (Gemini-3, OpenAI).
+    /// - `none`: neither was observed.
+    ///
+    /// All three are valid; the reasoning checkpoint records the classification
+    /// and never fails on `none`.
+    fn reasoning_capability(&self) -> &'static str {
+        if self.reasoning_text_len > 0 {
+            "streamed"
+        } else if self.saw_reasoning_signal || self.any_tool_signature() {
+            "opaque"
+        } else {
+            "none"
+        }
+    }
 }
 
 /// Drive any native [`Provider`] runtime's `complete` and fold the resulting
@@ -610,8 +763,19 @@ async fn consume_native_stream(
                     outcome.chunk_count += 1;
                     outcome.text.push_str(&text);
                 }
-                StreamEvent::ThinkingDelta(_) => {
+                StreamEvent::ThinkingDelta(text) => {
                     outcome.thinking_chunk_count += 1;
+                    outcome.reasoning_text_len += text.len();
+                }
+                // Opaque reasoning signals: the model reasoned but the runtime
+                // surfaces only a signature/encrypted item, not readable text.
+                StreamEvent::ThinkingSignatureDelta(signature) => {
+                    if !signature.is_empty() {
+                        outcome.saw_reasoning_signal = true;
+                    }
+                }
+                StreamEvent::OpenAIReasoning { .. } => {
+                    outcome.saw_reasoning_signal = true;
                 }
                 StreamEvent::ToolUseStart { id, name } => {
                     pending_tool = Some(NativeClaudeToolCall {
@@ -981,6 +1145,18 @@ pub async fn run_live_claude_native_tool_smoke(
     Ok(stage)
 }
 
+/// Stage: reasoning capability (observe-only).
+///
+/// Delegates to the shared [`run_live_native_provider_reasoning_smoke`] so the
+/// native Claude runtime records whether the model streamed reasoning text
+/// (extended thinking) or hid it behind an opaque signal.
+pub async fn run_live_claude_native_reasoning_smoke(
+    model: &str,
+) -> anyhow::Result<crate::live_tests::LiveVerificationStage> {
+    let provider = build_native_claude_provider(model)?;
+    run_live_native_provider_reasoning_smoke(&provider, model, "Claude").await
+}
+
 // === Native Antigravity probes ============================================
 //
 // Antigravity is a Google OAuth login provider whose `generateContent` runtime
@@ -1162,6 +1338,18 @@ pub async fn run_live_antigravity_native_tool_smoke(
     run_live_native_provider_tool_smoke(&provider, model, "Antigravity").await
 }
 
+/// Stage: reasoning capability (observe-only).
+///
+/// Delegates to the shared [`run_live_native_provider_reasoning_smoke`] so
+/// Antigravity records whether the resolved model streams reasoning text or
+/// hides it behind an opaque signal (Gemini-3 thought signatures are opaque).
+pub async fn run_live_antigravity_native_reasoning_smoke(
+    model: &str,
+) -> anyhow::Result<crate::live_tests::LiveVerificationStage> {
+    let provider = build_native_antigravity_provider(model)?;
+    run_live_native_provider_reasoning_smoke(&provider, model, "Antigravity").await
+}
+
 // === Generic native-runtime probes ========================================
 //
 // The native Claude and native Antigravity probes above each build a concrete
@@ -1317,10 +1505,110 @@ pub async fn run_live_native_provider_stream_smoke(
     Ok(stage)
 }
 
+/// Stage: reasoning capability (observe-only).
+///
+/// Sends a small multi-step logic/word problem that forces the model to reason
+/// before answering, consumes the stream, and classifies how the model exposed
+/// its reasoning:
+///
+/// - `streamed`: the runtime streamed visible reasoning text (`ThinkingDelta`).
+/// - `opaque`: no reasoning text, but an opaque reasoning signal was present (a
+///   Gemini-3 `thought_signature`, a `ThinkingSignatureDelta`, or an
+///   `OpenAIReasoning` item). This is legitimate and common (Gemini-3 and
+///   OpenAI hide their reasoning), so it MUST be a pass.
+/// - `none`: neither was observed.
+///
+/// The checkpoint passes as long as the turn completes cleanly (a `MessageEnd`
+/// plus a coherent answer); it never hard-fails just because reasoning was
+/// hidden or absent. The classification is recorded as the `reasoning_capability`
+/// evidence. Expected-to-reason gating (a capability list) can layer on later.
+pub async fn run_live_native_provider_reasoning_smoke(
+    provider: &dyn Provider,
+    model: &str,
+    label: &str,
+) -> anyhow::Result<crate::live_tests::LiveVerificationStage> {
+    let started = std::time::Instant::now();
+    // A small logic word problem with a single unambiguous numeric answer (2).
+    // The answer token `REASON_TEST_ANSWER=2` lets us assert a coherent result
+    // without depending on the model's prose. The problem requires at least one
+    // step of arithmetic/elimination so a reasoning model actually reasons.
+    let messages = vec![Message {
+        role: Role::User,
+        content: vec![ContentBlock::Text {
+            text: "Solve this step by step, then give the final answer. A farmer has chickens \
+                   and cows. Together they have 7 heads and 22 legs. How many cows are there? \
+                   After reasoning, end your reply with exactly REASON_TEST_ANSWER=<number> on \
+                   its own final line."
+                .to_string(),
+            cache_control: None,
+        }],
+        timestamp: None,
+        tool_duration_ms: None,
+    }];
+    let system = "You are a live provider reasoning smoke test. Think through the problem, then \
+                  finish with the required REASON_TEST_ANSWER=<number> line.";
+
+    let outcome = consume_native_stream(
+        provider,
+        &messages,
+        &[],
+        system,
+        std::time::Duration::from_secs(120),
+    )
+    .await?;
+
+    ensure!(
+        outcome.saw_message_end,
+        "native {label} reasoning smoke ended without a message_end event ({})",
+        outcome.diagnostics()
+    );
+    // Coherence: the turn must produce a real final answer. We accept either the
+    // exact sentinel or the correct numeric answer (4 cows) appearing in the
+    // text, so a model that ignores the formatting instruction but still answers
+    // correctly is not penalized. The reasoning checkpoint is about completion,
+    // not about reasoning visibility.
+    let answered = outcome.text.contains("REASON_TEST_ANSWER=4")
+        || outcome.text.contains("REASON_TEST_ANSWER= 4")
+        || outcome.text.to_ascii_lowercase().contains("4 cows")
+        || outcome.text.contains("REASON_TEST_ANSWER");
+    ensure!(
+        !outcome.text.trim().is_empty() && answered,
+        "native {label} reasoning smoke produced no coherent answer: {:?} ({})",
+        crate::util::truncate_str(outcome.text.trim(), 200),
+        outcome.diagnostics()
+    );
+
+    let classification = outcome.reasoning_capability();
+    let mut stage = crate::live_tests::LiveVerificationStage::passed(
+        crate::live_tests::checkpoints::REASONING_CAPABILITY,
+    )
+    .with_duration_ms(started.elapsed().as_millis() as u64)
+    .with_evidence("model", serde_json::json!(model))
+    .with_evidence("reasoning_capability", serde_json::json!(classification))
+    .with_evidence(
+        "reasoning_text_chars",
+        serde_json::json!(outcome.reasoning_text_len),
+    )
+    .with_evidence(
+        "thinking_delta_count",
+        serde_json::json!(outcome.thinking_chunk_count),
+    )
+    .with_evidence(
+        "saw_opaque_reasoning_signal",
+        serde_json::json!(outcome.saw_reasoning_signal),
+    )
+    .with_evidence("total_events", serde_json::json!(outcome.total_events))
+    .with_evidence("stop_reason", serde_json::json!(outcome.stop_reason.clone()));
+    if let Some(usage) = outcome.usage_evidence() {
+        stage = stage.with_evidence("usage", usage);
+    }
+    Ok(stage)
+}
+
 /// Stage: tool-call parse + execution loop + result follow-up against an
 /// arbitrary native provider.
 ///
-/// Two phases:
+/// Three phases:
 ///
 /// 1. **Single round-trip (gating):** ask the model to call a tool (assert a
 ///    parseable tool_use), then feed a synthetic tool_result back (assert the
@@ -1338,6 +1626,14 @@ pub async fn run_live_native_provider_stream_smoke(
 ///    signatures at all), the phase records `multi_tool_replay: "skipped"`
 ///    rather than failing, so it never turns a previously-green provider red
 ///    for a non-signature reason.
+/// 3. **Parallel tool calls in one turn (best-effort):** ask the model to call
+///    the tool TWICE in a single assistant message, then replay BOTH `tool_use`
+///    blocks (each with its own `thought_signature`) inside one assistant turn
+///    and answer both `tool_result`s, asserting the backend accepts a single
+///    assistant message carrying two `functionCall` parts. Distinct from the
+///    sequential loop in phase 2. Records `parallel_tool_calls: "verified"` when
+///    the model emitted >=2 calls in one turn and the follow-up was accepted, or
+///    `"skipped"` when the model only emitted one (best-effort, never a fail).
 pub async fn run_live_native_provider_tool_smoke(
     provider: &dyn Provider,
     model: &str,
@@ -1521,6 +1817,86 @@ pub async fn run_live_native_provider_tool_smoke(
         }
     }
 
+    // Phase 3 (best-effort): ask the model to call the tool TWICE in a single
+    // assistant turn (parallel/batch tool calls), then replay BOTH tool_use
+    // blocks inside ONE assistant message (each carrying its own captured
+    // thought_signature) and answer BOTH tool_results. A backend that accepts a
+    // single assistant message containing two `functionCall` parts completes the
+    // follow-up cleanly; one that rejects parallel calls surfaces here. If the
+    // model only emits a single call (common: many models serialize tool use),
+    // we record `parallel_tool_calls: "skipped"` rather than failing.
+    let mut parallel_tool_calls = "skipped";
+    let mut parallel_call_count = 0usize;
+    let parallel_turn = consume_native_stream(
+        provider,
+        &[Message {
+            role: Role::User,
+            content: vec![ContentBlock::Text {
+                text: "In this single turn, make TWO read tool calls at once (in parallel, in \
+                       one message): read /tmp/auth_tool_probe.txt AND read \
+                       /tmp/auth_tool_probe_2.txt. Emit both tool calls now; do not answer in \
+                       text and do not wait for the first result before making the second call."
+                    .to_string(),
+                cache_control: None,
+            }],
+            timestamp: None,
+            tool_duration_ms: None,
+        }],
+        &tools,
+        system,
+        std::time::Duration::from_secs(120),
+    )
+    .await?;
+    total_input += parallel_turn.input_tokens;
+    total_output += parallel_turn.output_tokens;
+
+    if parallel_turn.tool_calls.len() >= 2 {
+        parallel_call_count = parallel_turn.tool_calls.len();
+        // Build ONE assistant message holding every tool_use block (each with
+        // its own signature), then ONE user message holding every tool_result.
+        let assistant = assistant_parallel_tool_uses(&parallel_turn.tool_calls);
+        let results = parallel_tool_results(&parallel_turn.tool_calls);
+        let convo = vec![
+            Message {
+                role: Role::User,
+                content: vec![ContentBlock::Text {
+                    text: "In this single turn, make TWO read tool calls at once (in parallel, \
+                           in one message): read /tmp/auth_tool_probe.txt AND read \
+                           /tmp/auth_tool_probe_2.txt."
+                        .to_string(),
+                    cache_control: None,
+                }],
+                timestamp: None,
+                tool_duration_ms: None,
+            },
+            assistant,
+            results,
+        ];
+        let parallel_followup = consume_native_stream(
+            provider,
+            &convo,
+            &tools,
+            system,
+            std::time::Duration::from_secs(120),
+        )
+        .await
+        .with_context(|| {
+            format!(
+                "native {label} parallel tool-call replay was rejected (one assistant message \
+                 carried {parallel_call_count} functionCall parts; a backend that does not \
+                 accept parallel tool calls in a single message fails here)"
+            )
+        })?;
+        total_input += parallel_followup.input_tokens;
+        total_output += parallel_followup.output_tokens;
+        ensure!(
+            parallel_followup.saw_message_end,
+            "native {label} parallel tool-call follow-up ended without a message_end event ({})",
+            parallel_followup.diagnostics()
+        );
+        parallel_tool_calls = "verified";
+    }
+
     let mut stage = crate::live_tests::LiveVerificationStage::passed(
         crate::live_tests::checkpoints::TOOL_CALL_PARSE,
     )
@@ -1538,6 +1914,14 @@ pub async fn run_live_native_provider_tool_smoke(
         "tool_call_signatures_present",
         serde_json::json!(signatures_present),
     )
+    .with_evidence(
+        "parallel_tool_calls",
+        serde_json::json!(parallel_tool_calls),
+    )
+    .with_evidence(
+        "parallel_tool_call_count",
+        serde_json::json!(parallel_call_count),
+    )
     .with_evidence("followup_consumed_result", serde_json::json!(true));
     if total_input != 0 || total_output != 0 {
         stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0));
@@ -1586,3 +1970,47 @@ fn tool_result_then_text(tool_use_id: &str, result: &str) -> Message {
         tool_duration_ms: None,
     }
 }
+
+/// Build a single assistant message that replays *every* captured tool call as a
+/// parallel batch (multiple `ToolUse` blocks in one message), each preserving
+/// its own `thought_signature`. This is the shape the parallel-tool-call phase
+/// asserts the backend accepts as one assistant turn carrying N `functionCall`
+/// parts.
+fn assistant_parallel_tool_uses(calls: &[NativeClaudeToolCall]) -> Message {
+    let content = calls
+        .iter()
+        .map(|call| ContentBlock::ToolUse {
+            id: call.id.clone(),
+            name: call.name.clone(),
+            input: parse_tool_arguments(&call.input_json),
+            thought_signature: call.thought_signature.clone(),
+        })
+        .collect();
+    Message {
+        role: Role::Assistant,
+        content,
+        timestamp: None,
+        tool_duration_ms: None,
+    }
+}
+
+/// Build a single user message answering *every* parallel tool call with a
+/// synthetic `tool_result`, so a parallel assistant turn is fully resolved in
+/// one follow-up message.
+fn parallel_tool_results(calls: &[NativeClaudeToolCall]) -> Message {
+    let content = calls
+        .iter()
+        .enumerate()
+        .map(|(index, call)| ContentBlock::ToolResult {
+            tool_use_id: call.id.clone(),
+            content: format!("Contents of file {}: token_{index}.", index + 1),
+            is_error: Some(false),
+        })
+        .collect();
+    Message {
+        role: Role::User,
+        content,
+        timestamp: None,
+        tool_duration_ms: None,
+    }
+}
diff --git a/crates/jcode-base/src/auth/provider_e2e.rs b/crates/jcode-base/src/auth/provider_e2e.rs
index d4356db5c..05ef8c40a 100644
--- a/crates/jcode-base/src/auth/provider_e2e.rs
+++ b/crates/jcode-base/src/auth/provider_e2e.rs
@@ -22,13 +22,14 @@ use crate::auth::lifecycle::{
     AuthActivationRequest, activate_auth_change, validate_catalog_invariants,
 };
 use crate::auth::live_provider_probes::{
-    fetch_live_openai_compatible_models, run_live_antigravity_native_smoke,
-    run_live_antigravity_native_stream_smoke, run_live_antigravity_native_tool_smoke,
+    fetch_live_openai_compatible_models, run_live_antigravity_native_reasoning_smoke,
+    run_live_antigravity_native_smoke, run_live_antigravity_native_stream_smoke,
+    run_live_antigravity_native_tool_smoke, run_live_claude_native_reasoning_smoke,
     run_live_claude_native_smoke, run_live_claude_native_stream_smoke,
-    run_live_claude_native_tool_smoke, run_live_native_provider_smoke,
-    run_live_native_provider_stream_smoke, run_live_native_provider_tool_smoke,
-    run_live_openai_compatible_smoke, run_live_openai_compatible_stream_smoke,
-    run_live_openai_compatible_tool_smoke,
+    run_live_claude_native_tool_smoke, run_live_native_provider_reasoning_smoke,
+    run_live_native_provider_smoke, run_live_native_provider_stream_smoke,
+    run_live_native_provider_tool_smoke, run_live_openai_compatible_smoke,
+    run_live_openai_compatible_stream_smoke, run_live_openai_compatible_tool_smoke,
 };
 use crate::live_tests::{
     self, LiveVerificationAuth, LiveVerificationEvent, LiveVerificationResult,
@@ -273,6 +274,7 @@ const FULL_PIPELINE_LABELS: &[(&str, &str)] = &[
     (checkpoints::TOOL_EXECUTION_LOOP, "Tool execution loop"),
     (checkpoints::TOOL_RESULT_FOLLOWUP, "Tool-result followup"),
     (checkpoints::REAL_JCODE_TOOL_SMOKE, "Real Jcode tool smoke"),
+    (checkpoints::REASONING_CAPABILITY, "Reasoning capability"),
 ];
 
 fn label_for(checkpoint: &str) -> &'static str {
@@ -291,17 +293,89 @@ fn label_for(checkpoint: &str) -> &'static str {
 /// declined a second tool call). Surfacing it keeps the coverage observable in
 /// the doctor report instead of collapsing to a generic pass string.
 fn tool_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String {
-    match stage
+    let multi = match stage
         .evidence
         .get("multi_tool_replay")
         .and_then(|value| value.as_str())
     {
-        Some("verified") => "tool call parsed and executed; multi-call signature replay verified".to_string(),
-        Some("skipped") => {
-            "tool call parsed and executed; multi-call signature replay skipped (no 2nd tool call)"
-                .to_string()
+        Some("verified") => "multi-call signature replay verified",
+        Some("skipped") => "multi-call signature replay skipped (no 2nd tool call)",
+        _ => "",
+    };
+    let parallel = match stage
+        .evidence
+        .get("parallel_tool_calls")
+        .and_then(|value| value.as_str())
+    {
+        Some("verified") => "parallel tool calls verified",
+        Some("skipped") => "parallel tool calls skipped (single call)",
+        _ => "",
+    };
+    let mut detail = "tool call parsed and executed".to_string();
+    for part in [multi, parallel] {
+        if !part.is_empty() {
+            detail.push_str("; ");
+            detail.push_str(part);
+        }
+    }
+    detail
+}
+
+/// Human-readable detail for a passed reasoning-capability stage. The stage
+/// records `reasoning_capability` as `streamed` (visible reasoning text),
+/// `opaque` (no text but a reasoning signal: thought signature, reasoning item,
+/// or reasoning tokens), or `none` (neither). All three are passes; `opaque` and
+/// `none` are legitimate because providers like Gemini-3 and OpenAI hide their
+/// reasoning. Surfacing the classification keeps the observation visible in the
+/// doctor report.
+fn reasoning_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String {
+    match stage
+        .evidence
+        .get("reasoning_capability")
+        .and_then(|value| value.as_str())
+    {
+        Some("streamed") => "reasoning streamed (visible thinking text)".to_string(),
+        Some("opaque") => {
+            "reasoning hidden but signaled (opaque: thought signature / reasoning item)".to_string()
+        }
+        Some("none") => "no reasoning signal observed (model hides or skips reasoning)".to_string(),
+        _ => "reasoning turn completed".to_string(),
+    }
+}
+
+/// Fold a reasoning-capability probe result into a [`DoctorCheck`], honoring the
+/// observe-only contract.
+///
+/// A clean turn records a passed checkpoint carrying the `streamed`/`opaque`/
+/// `none` classification (all three are passes; hiding reasoning is legitimate).
+/// A probe *error* (network, or a turn that did not complete with a coherent
+/// answer) is recorded as **skipped**, never failed: this checkpoint must never
+/// flip a provider to "not user-ready", and it is not part of the strict
+/// coverage ladder, so an observational miss should not fail the tier. The
+/// broader chat/streaming checkpoints already guard turn completion.
+fn push_reasoning_check(
+    result: anyhow::Result<LiveVerificationStage>,
+    checks: &mut Vec<DoctorCheck>,
+    spend: &mut DoctorSpend,
+) {
+    match result {
+        Ok(stage) => {
+            spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost"));
+            let detail = reasoning_stage_detail(&stage);
+            checks.push(DoctorCheck::passed(
+                checkpoints::REASONING_CAPABILITY,
+                label_for(checkpoints::REASONING_CAPABILITY),
+                detail,
+            ));
         }
-        _ => "tool call parsed and executed".to_string(),
+        Err(error) => checks.push(DoctorCheck::skipped(
+            checkpoints::REASONING_CAPABILITY,
+            label_for(checkpoints::REASONING_CAPABILITY),
+            format!(
+                "observe-only reasoning probe did not complete: {}",
+                format_error_chain(&error)
+            ),
+        )),
     }
 }
 
@@ -314,6 +388,7 @@ const API_DEPENDENT_CHECKPOINTS: &[&str] = &[
     checkpoints::TOOL_EXECUTION_LOOP,
     checkpoints::TOOL_RESULT_FOLLOWUP,
     checkpoints::REAL_JCODE_TOOL_SMOKE,
+    checkpoints::REASONING_CAPABILITY,
 ];
 
 /// Run the strict provider/model diagnostic.
@@ -846,6 +921,13 @@ async fn run_native_claude_api_checks(
             }
         }
     }
+
+    // Reasoning capability (observe-only; never gates readiness).
+    push_reasoning_check(
+        run_live_claude_native_reasoning_smoke(selected).await,
+        checks,
+        spend,
+    );
 }
 
 /// The wiring contract for the native Antigravity (Google OAuth Cloud Code)
@@ -1190,6 +1272,13 @@ async fn run_native_antigravity_api_checks(
             }
         }
     }
+
+    // Reasoning capability (observe-only; never gates readiness).
+    push_reasoning_check(
+        run_live_antigravity_native_reasoning_smoke(selected).await,
+        checks,
+        spend,
+    );
 }
 
 // === Generic native-runtime doctor =========================================
@@ -1825,6 +1914,13 @@ async fn run_generic_native_api_checks(
             }
         }
     }
+
+    // Reasoning capability (observe-only; never gates readiness).
+    push_reasoning_check(
+        run_live_native_provider_reasoning_smoke(provider, selected, label).await,
+        checks,
+        spend,
+    );
 }
 
 /// The jcode-side wiring a given compat profile is expected to activate.
@@ -2477,4 +2573,71 @@ mod tests {
         let anonymous = native_antigravity_auth("");
         assert!(anonymous.source.contains("Antigravity Google OAuth"));
     }
+
+    #[test]
+    fn tool_stage_detail_surfaces_multi_and_parallel_phases() {
+        let verified = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE)
+            .with_evidence("multi_tool_replay", serde_json::json!("verified"))
+            .with_evidence("parallel_tool_calls", serde_json::json!("verified"));
+        let detail = tool_stage_detail(&verified);
+        assert!(detail.contains("tool call parsed and executed"));
+        assert!(detail.contains("multi-call signature replay verified"));
+        assert!(detail.contains("parallel tool calls verified"));
+
+        let skipped = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE)
+            .with_evidence("multi_tool_replay", serde_json::json!("skipped"))
+            .with_evidence("parallel_tool_calls", serde_json::json!("skipped"));
+        let detail = tool_stage_detail(&skipped);
+        assert!(detail.contains("multi-call signature replay skipped"));
+        assert!(detail.contains("parallel tool calls skipped"));
+
+        // With no evidence the base string is unchanged (back-compat).
+        let bare = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE);
+        assert_eq!(tool_stage_detail(&bare), "tool call parsed and executed");
+    }
+
+    #[test]
+    fn reasoning_stage_detail_describes_each_classification() {
+        for (value, needle) in [
+            ("streamed", "reasoning streamed"),
+            ("opaque", "reasoning hidden but signaled"),
+            ("none", "no reasoning signal observed"),
+        ] {
+            let stage = LiveVerificationStage::passed(checkpoints::REASONING_CAPABILITY)
+                .with_evidence("reasoning_capability", serde_json::json!(value));
+            assert!(
+                reasoning_stage_detail(&stage).contains(needle),
+                "classification {value} should mention {needle}"
+            );
+        }
+    }
+
+    #[test]
+    fn push_reasoning_check_records_pass_for_clean_turn() {
+        let mut checks = Vec::new();
+        let mut spend = DoctorSpend::default();
+        let stage = LiveVerificationStage::passed(checkpoints::REASONING_CAPABILITY)
+            .with_evidence("reasoning_capability", serde_json::json!("opaque"));
+        push_reasoning_check(Ok(stage), &mut checks, &mut spend);
+        assert_eq!(checks.len(), 1);
+        assert_eq!(checks[0].checkpoint, checkpoints::REASONING_CAPABILITY);
+        assert_eq!(checks[0].status, LiveVerificationStageStatus::Passed);
+        assert!(!checks[0].is_failure());
+    }
+
+    #[test]
+    fn push_reasoning_check_skips_never_fails_on_probe_error() {
+        // The observe-only reasoning checkpoint must never produce a failure that
+        // could flip the tier to not-ready; a probe error is recorded as skipped.
+        let mut checks = Vec::new();
+        let mut spend = DoctorSpend::default();
+        push_reasoning_check(
+            Err(anyhow::anyhow!("network blip")),
+            &mut checks,
+            &mut spend,
+        );
+        assert_eq!(checks.len(), 1);
+        assert_eq!(checks[0].status, LiveVerificationStageStatus::Skipped);
+        assert!(!checks[0].is_failure());
+    }
 }
diff --git a/crates/jcode-base/src/live_tests.rs b/crates/jcode-base/src/live_tests.rs
index bc4935aa8..e320c73be 100644
--- a/crates/jcode-base/src/live_tests.rs
+++ b/crates/jcode-base/src/live_tests.rs
@@ -13,7 +13,7 @@ const DEFAULT_RETEST_DAYS: i64 = 14;
 const LEDGER_ENV: &str = "JCODE_LIVE_TEST_LEDGER";
 const COVERAGE_ENV: &str = "JCODE_LIVE_TEST_COVERAGE";
 
-pub const CHECKPOINT_TAXONOMY_VERSION: u32 = 2;
+pub const CHECKPOINT_TAXONOMY_VERSION: u32 = 3;
 
 pub mod checkpoints {
     pub const AUTH_UX_KEY_ENTRY: &str = "auth_ux_key_entry";
@@ -30,6 +30,10 @@ pub mod checkpoints {
     pub const TOOL_EXECUTION_LOOP: &str = "tool_execution_loop";
     pub const TOOL_RESULT_FOLLOWUP: &str = "tool_result_followup";
     pub const REAL_JCODE_TOOL_SMOKE: &str = "real_jcode_tool_smoke";
+    /// Observe-only: did the model expose its reasoning (`streamed`), hide it
+    /// behind an opaque signal (`opaque`, e.g. Gemini-3 / OpenAI), or emit none
+    /// (`none`)? Never required for user-readiness; hiding reasoning is a pass.
+    pub const REASONING_CAPABILITY: &str = "reasoning_capability";
     pub const RESTART_PERSISTENCE: &str = "restart_persistence";
     pub const NEGATIVE_ERROR_UX: &str = "negative_error_ux";
     pub const MODEL_CAPABILITY_MATRIX: &str = "model_capability_matrix";
@@ -159,6 +163,16 @@ const END_TO_END_CHECKPOINTS: &[LiveVerificationCheckpointDefinition] = &[
         spends_balance: true,
         description: "A normal Jcode agent turn uses the real streamed parser, advertised tool schema, registry execution, tool-result followup, and transcript validation without malformed tool calls.",
     },
+    LiveVerificationCheckpointDefinition {
+        id: checkpoints::REASONING_CAPABILITY,
+        label: "Reasoning capability",
+        category: "reasoning",
+        // Observe-only: a provider that hides its reasoning (opaque) or emits
+        // none is still fully user-ready, so this must never gate readiness.
+        required_for_user_ready: false,
+        spends_balance: true,
+        description: "Records whether the model streams reasoning text, hides it behind an opaque signal (thought_signature/reasoning item/reasoning tokens), or emits none. Passes as long as the reasoning turn completes cleanly; absence of reasoning is recorded, not failed.",
+    },
     LiveVerificationCheckpointDefinition {
         id: checkpoints::RESTART_PERSISTENCE,
         label: "Restart persistence",
@@ -2514,6 +2528,7 @@ mod tests {
             checkpoints::TOOL_EXECUTION_LOOP,
             checkpoints::TOOL_RESULT_FOLLOWUP,
             checkpoints::REAL_JCODE_TOOL_SMOKE,
+            checkpoints::REASONING_CAPABILITY,
             checkpoints::RESTART_PERSISTENCE,
             checkpoints::NEGATIVE_ERROR_UX,
             checkpoints::MODEL_CAPABILITY_MATRIX,
@@ -2527,6 +2542,24 @@ mod tests {
                 .any(|checkpoint| checkpoint.spends_balance),
             "taxonomy should identify balance-spending checkpoints"
         );
+
+        // The reasoning_capability checkpoint is observe-only: it records what
+        // the model exposed (streamed/opaque/none) but a provider that hides its
+        // reasoning is still fully user-ready, so it must never gate readiness or
+        // strict coverage.
+        let reasoning = end_to_end_checkpoint_definitions()
+            .iter()
+            .find(|checkpoint| checkpoint.id == checkpoints::REASONING_CAPABILITY)
+            .expect("reasoning_capability checkpoint must exist in the taxonomy");
+        assert!(
+            !reasoning.required_for_user_ready,
+            "reasoning_capability must not be required for user-readiness"
+        );
+        assert!(
+            !STRICT_PROVIDER_MODEL_COVERAGE_CHECKPOINTS
+                .contains(&checkpoints::REASONING_CAPABILITY),
+            "reasoning_capability must not be a strict-required checkpoint"
+        );
     }
 
     #[test]

From 9849d5b4d1a47340046bc2b6157152256e5b7558 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:37:40 -0700
Subject: [PATCH 28/57] fix(gemini/antigravity): surface leftover Gemini-3
 thought signature as reasoning signal

A Gemini-3 thoughtSignature that was not consumed by a following functionCall
(e.g. a pure-text reasoning turn) was silently dropped. Emit it as a
ThinkingSignatureDelta instead so reasoning-aware consumers (and the new
provider-doctor reasoning probe) can observe that the model reasoned even when
no reasoning text and no tool call were produced.
---
 crates/jcode-base/src/provider/antigravity.rs | 10 ++++++++++
 crates/jcode-base/src/provider/gemini.rs      |  9 +++++++++
 2 files changed, 19 insertions(+)

diff --git a/crates/jcode-base/src/provider/antigravity.rs b/crates/jcode-base/src/provider/antigravity.rs
index d6e3fa672..e64f09870 100644
--- a/crates/jcode-base/src/provider/antigravity.rs
+++ b/crates/jcode-base/src/provider/antigravity.rs
@@ -1084,6 +1084,16 @@ impl Provider for AntigravityProvider {
                         pending_signature = Some(signature);
                     }
                 }
+                // A thought signature that was never consumed by a following
+                // function call (e.g. a pure-text reasoning turn) is still an
+                // opaque reasoning signal. Surface it as a ThinkingSignatureDelta
+                // rather than dropping it, so reasoning-aware consumers (and the
+                // provider-doctor reasoning probe) can see the model reasoned.
+                if let Some(signature) = pending_signature.take() {
+                    let _ = tx
+                        .send(Ok(StreamEvent::ThinkingSignatureDelta(signature)))
+                        .await;
+                }
             }
 
             let _ = tx
diff --git a/crates/jcode-base/src/provider/gemini.rs b/crates/jcode-base/src/provider/gemini.rs
index 0f6e6ae0b..b2aa15d8d 100644
--- a/crates/jcode-base/src/provider/gemini.rs
+++ b/crates/jcode-base/src/provider/gemini.rs
@@ -859,6 +859,15 @@ impl Provider for GeminiProvider {
                             pending_signature = Some(signature);
                         }
                     }
+                    // A thought signature not consumed by a following function
+                    // call (e.g. a pure-text reasoning turn) is still an opaque
+                    // reasoning signal. Surface it as a ThinkingSignatureDelta
+                    // instead of dropping it.
+                    if let Some(signature) = pending_signature.take() {
+                        let _ = tx
+                            .send(Ok(StreamEvent::ThinkingSignatureDelta(signature)))
+                            .await;
+                    }
                 }
             }
 

From 91620e1e5e20545faba72fffe5ea780f31696db1 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:44:46 -0700
Subject: [PATCH 29/57] docs(provider-doctor): correct reasoning probe answer
 comment (4 cows)

---
 crates/jcode-base/src/auth/live_provider_probes.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs
index fe6ae0b2d..a2bb0da03 100644
--- a/crates/jcode-base/src/auth/live_provider_probes.rs
+++ b/crates/jcode-base/src/auth/live_provider_probes.rs
@@ -1528,10 +1528,11 @@ pub async fn run_live_native_provider_reasoning_smoke(
     label: &str,
 ) -> anyhow::Result<crate::live_tests::LiveVerificationStage> {
     let started = std::time::Instant::now();
-    // A small logic word problem with a single unambiguous numeric answer (2).
-    // The answer token `REASON_TEST_ANSWER=2` lets us assert a coherent result
-    // without depending on the model's prose. The problem requires at least one
-    // step of arithmetic/elimination so a reasoning model actually reasons.
+    // A small logic word problem with a single unambiguous numeric answer (4
+    // cows: chickens c + cows w give c + w = 7 heads and 2c + 4w = 22 legs, so
+    // w = 4). The `REASON_TEST_ANSWER=<n>` sentinel lets us assert a coherent
+    // result without depending on the model's prose, and the problem requires at
+    // least one elimination/arithmetic step so a reasoning model actually reasons.
     let messages = vec![Message {
         role: Role::User,
         content: vec![ContentBlock::Text {

From 26984695f6aac90b9028892f36a4f5bb266eae9c Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:48:01 -0700
Subject: [PATCH 30/57] desktop: cache measurement FontSystem for inline-code
 pill geometry; add scroll diag instrumentation

Building a fresh FontSystem every frame (rescanning all system fonts) inside the
inline-code/math pill geometry builder caused multi-ms per-frame scroll spikes
over code blocks. Reuse a thread-local measurement FontSystem instead.
---
 crates/jcode-desktop/src/main.rs              | 55 +++++++++++++++++++
 .../src/single_session_render.rs              | 28 +++++++---
 2 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index e8dafb983..8d2d5c487 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -5253,6 +5253,11 @@ struct RealTranscriptScrollReport {
     setup_full_relayout_ms: f64,
     worst_stage_name: String,
     worst_stage_us: f64,
+    worst_rebuild_us: f64,
+    worst_rebuild_window_lines: usize,
+    worst_rebuild_max_line_chars: usize,
+    worst_rebuild_advanced_lines: usize,
+    worst_rebuild_segments: usize,
 }
 
 impl RealTranscriptScrollReport {
@@ -5279,6 +5284,13 @@ impl RealTranscriptScrollReport {
             "max_scroll_lines": self.max_scroll_lines,
             "body_buffer_rebuilds": self.body_buffer_rebuilds,
             "setup_full_body_relayout_ms": self.setup_full_relayout_ms,
+            "worst_window_rebuild": {
+                "us": self.worst_rebuild_us,
+                "window_lines": self.worst_rebuild_window_lines,
+                "max_line_chars": self.worst_rebuild_max_line_chars,
+                "advanced_shaping_lines": self.worst_rebuild_advanced_lines,
+                "segments": self.worst_rebuild_segments,
+            },
             "full_scroll_frame": {
                 "frames": self.frame_samples.len(),
                 "mean_ms_per_frame": total_ms / frames as f64,
@@ -5361,6 +5373,16 @@ fn benchmark_real_transcript_scroll(
     let mut vertices_us = 0.0;
     let mut body_buffer_rebuilds = 0usize;
 
+    // Optional diagnostic: capture the single slowest window rebuild and describe
+    // the window content so we can attribute the cost (line count, advanced
+    // shaping triggers, longest line) rather than guessing.
+    let diagnose = std::env::var_os("JCODE_DESKTOP_SCROLL_DIAG").is_some();
+    let mut worst_rebuild_us = 0.0_f64;
+    let mut worst_rebuild_window_lines = 0usize;
+    let mut worst_rebuild_max_line_chars = 0usize;
+    let mut worst_rebuild_advanced_lines = 0usize;
+    let mut worst_rebuild_segments = 0usize;
+
     let (frame_samples, _checksum) = benchmark_frame_samples(frames, |frame| {
         // Triangle-wave scroll position covering the full transcript height.
         let phase = frame % (span * 2);
@@ -5375,6 +5397,7 @@ fn benchmark_real_transcript_scroll(
         let phase_started = Instant::now();
         if !single_session_body_text_window_contains(window_start, window_end, &viewport) {
             (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
+            let rebuild_started = Instant::now();
             if let Some(body_buffer) = buffers.get_mut(1) {
                 *body_buffer = single_session_body_text_buffer_from_lines(
                     &mut font_system,
@@ -5383,6 +5406,33 @@ fn benchmark_real_transcript_scroll(
                     app.text_scale(),
                 );
             }
+            if diagnose {
+                let rebuild_us = rebuild_started.elapsed().as_secs_f64() * 1_000_000.0;
+                if rebuild_us > worst_rebuild_us {
+                    worst_rebuild_us = rebuild_us;
+                    let window = &body_lines[window_start..window_end];
+                    worst_rebuild_window_lines = window.len();
+                    worst_rebuild_max_line_chars =
+                        window.iter().map(|l| l.text.chars().count()).max().unwrap_or(0);
+                    worst_rebuild_advanced_lines = window
+                        .iter()
+                        .filter(|l| !l.text.is_ascii())
+                        .count();
+                    worst_rebuild_segments =
+                        window.iter().map(|l| l.inline_spans.len() + 1).sum();
+                    if let Ok(path) = std::env::var("JCODE_DESKTOP_SCROLL_DIAG_DUMP") {
+                        let text = window
+                            .iter()
+                            .map(|l| l.text.as_str())
+                            .collect::<Vec<_>>()
+                            .join("\n");
+                        let _ = std::fs::write(
+                            format!("{path}.{}", transcript.session_id),
+                            text,
+                        );
+                    }
+                }
+            }
             body_buffer_rebuilds += 1;
             last_scroll_start = usize::MAX;
         }
@@ -5458,6 +5508,11 @@ fn benchmark_real_transcript_scroll(
         setup_full_relayout_ms,
         worst_stage_name,
         worst_stage_us,
+        worst_rebuild_us,
+        worst_rebuild_window_lines,
+        worst_rebuild_max_line_chars,
+        worst_rebuild_advanced_lines,
+        worst_rebuild_segments,
     }
 }
 
diff --git a/crates/jcode-desktop/src/single_session_render.rs b/crates/jcode-desktop/src/single_session_render.rs
index 7abf41cf6..9fd0d6dd8 100644
--- a/crates/jcode-desktop/src/single_session_render.rs
+++ b/crates/jcode-desktop/src/single_session_render.rs
@@ -7209,6 +7209,24 @@ fn push_single_session_inline_code_cards(
     );
 }
 
+/// A thread-local, lazily-initialized `FontSystem` used purely for measuring
+/// glyph layout (inline-code/math pill bounds) during geometry building.
+///
+/// Building a `FontSystem` rescans every system font from disk, costing several
+/// milliseconds per call. The inline-code/math card builder runs on every frame
+/// whose visible window contains inline code or math, so constructing a fresh
+/// `FontSystem` there made scrolling over code blocks janky (multi-ms spikes per
+/// frame). Caching one per render thread keeps repeated measurement cheap. The
+/// system is only used for transient measurement buffers, never for the glyphs
+/// actually uploaded to the GPU, so reuse is safe.
+fn with_measurement_font_system<R>(f: impl FnOnce(&mut FontSystem) -> R) -> R {
+    thread_local! {
+        static MEASUREMENT_FONT_SYSTEM: std::cell::RefCell<FontSystem> =
+            std::cell::RefCell::new(FontSystem::new());
+    }
+    MEASUREMENT_FONT_SYSTEM.with(|cell| f(&mut cell.borrow_mut()))
+}
+
 fn push_single_session_inline_code_cards_from_viewport(
     vertices: &mut Vec<Vertex>,
     app: &SingleSessionApp,
@@ -7245,13 +7263,9 @@ fn push_single_session_inline_code_cards_from_viewport(
         horizontal_pad,
         top_offset_pixels: viewport.top_offset_pixels,
     };
-    let mut font_system = FontSystem::new();
-    let body_buffer = single_session_body_text_buffer_from_lines(
-        &mut font_system,
-        &viewport.lines,
-        size,
-        text_scale,
-    );
+    let body_buffer = with_measurement_font_system(|font_system| {
+        single_session_body_text_buffer_from_lines(font_system, &viewport.lines, size, text_scale)
+    });
     let layout_runs = body_buffer.layout_runs().collect::<Vec<_>>();
 
     let mut occurrences = HashMap::new();

From 41beb009ec36b90c385fdac2b9c5edfdb77a1859 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:50:21 -0700
Subject: [PATCH 31/57] fix(reload): newer client drags a stale older server
 forward on attach

Fixes the 'current client (v0.22), stale older server, /update only updates
the client' report. Root cause: the decision to upgrade the server runs in the
OLD server process, which a newer client cannot retroactively fix. Two gaps:

1. Detection: the client deferred + reloaded an older server only when the
   server self-reported server_has_update. An old daemon whose shared-server
   channel still points at its own binary legitimately reports Some(false)
   ('nothing newer to reload into'), which the client trusted -> stuck forever.
   Now a client-proven-older release (server_version < client_version, clean
   semver) always wins and defers, regardless of the server's self-report.

2. Reload target: even after deferring, a forced reload re-execs whatever the
   shared-server channel points at -- still the old binary. The new client now
   repairs the shared-server channel client-side before reloading
   (repair_stale_shared_server_channel): repoint shared-server -> stable when
   stable is strictly newer by mtime. Never downgrades, and preserves a
   deliberately-pinned self-dev build that is fresher than stable.

This is version-agnostic (no per-version allowlist): any server that is a
strictly-older clean release than the connected client gets dragged forward.
Existing recover_reloading_server + 3-attempt loop cap handle the case where a
reload still does not take (fresh spawn self-heals via the candidate logic).

Tests:
- build-support: repair repoints stale->stable, no-op when current, preserves a
  fresher self-dev pin, never downgrades when stable is older.
- tui: client-proven-older server with server_has_update=Some(false) now
  defers; same/newer server still trusted; full-path sandbox drives the real
  handle_server_event History handler against a temp JCODE_HOME in the field
  state and asserts the shared-server channel is repaired to the new release.
---
 crates/jcode-build-support/src/lib.rs         |  95 +++++++++
 crates/jcode-build-support/src/tests.rs       | 113 ++++++++++
 .../src/tui/app/remote/server_events.rs       | 130 ++++++++----
 crates/jcode-tui/src/tui/app/tests.rs         | 200 +++++++++++++++++-
 4 files changed, 495 insertions(+), 43 deletions(-)

diff --git a/crates/jcode-build-support/src/lib.rs b/crates/jcode-build-support/src/lib.rs
index f5fc9795f..ad8cc2013 100644
--- a/crates/jcode-build-support/src/lib.rs
+++ b/crates/jcode-build-support/src/lib.rs
@@ -766,6 +766,101 @@ pub fn advance_shared_server_if_tracking_stable(version: &str) -> Result<bool> {
     }
 }
 
+/// Outcome of [`repair_stale_shared_server_channel`].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SharedServerRepair {
+    /// The `shared-server` channel was repointed at the installed `stable`
+    /// release because stable was strictly newer on disk.
+    Repaired {
+        previous: Option<String>,
+        repaired_to: String,
+    },
+    /// Nothing to do: shared-server is already at/newer than stable, or there is
+    /// no usable stable target.
+    AlreadyCurrent,
+}
+
+/// Drag a *stale* `shared-server` channel forward to the installed `stable`
+/// release so a long-lived daemon can actually reload into a newer binary.
+///
+/// This is the client-side counterpart to [`advance_shared_server_if_tracking_stable`].
+/// Updates advance `stable` but only advance `shared-server` *during the install
+/// path*; a client that is already on the newest release (so `/update` is a
+/// no-op) never re-runs that install path, leaving a long-lived older daemon
+/// pinned to its old `shared-server` binary forever. A newer client that detects
+/// an older server calls this to repoint `shared-server` -> `stable` before
+/// asking the server to reload, so the forced reload has a strictly-newer target
+/// to exec into instead of re-execing the same old binary (the "current client,
+/// stale server" report).
+///
+/// Safety: we only repair when the `stable` binary is *strictly newer by mtime*
+/// than the current `shared-server` binary. That preserves a deliberately-pinned
+/// self-dev `shared-server` build whenever it is at least as fresh as stable (the
+/// case the pin exists to protect), and never downgrades the channel.
+pub fn repair_stale_shared_server_channel() -> Result<SharedServerRepair> {
+    let stable_version = read_stable_version()?;
+    let Some(stable_version) = stable_version
+        .as_deref()
+        .map(str::trim)
+        .filter(|s| !s.is_empty())
+    else {
+        return Ok(SharedServerRepair::AlreadyCurrent);
+    };
+
+    let stable_binary = stable_binary_path()?;
+    if !stable_binary.exists() {
+        return Ok(SharedServerRepair::AlreadyCurrent);
+    }
+
+    // If shared-server already resolves to the same version marker, there is
+    // nothing to repair.
+    let previous = read_shared_server_version()?;
+    if previous.as_deref().map(str::trim).filter(|s| !s.is_empty()) == Some(stable_version) {
+        return Ok(SharedServerRepair::AlreadyCurrent);
+    }
+
+    // Only repair when stable is strictly newer than the current shared-server
+    // binary on disk. This never downgrades, and it preserves a self-dev pin
+    // that is fresher than stable.
+    let shared_binary = shared_server_binary_path()?;
+    if !shared_server_binary_is_strictly_older_than(&shared_binary, &stable_binary) {
+        return Ok(SharedServerRepair::AlreadyCurrent);
+    }
+
+    update_shared_server_symlink(stable_version)?;
+    Ok(SharedServerRepair::Repaired {
+        previous: previous
+            .as_deref()
+            .map(str::trim)
+            .filter(|s| !s.is_empty())
+            .map(str::to_string),
+        repaired_to: stable_version.to_string(),
+    })
+}
+
+/// True when `shared` exists and is strictly older (by mtime) than `stable`, or
+/// when `shared` is missing entirely (nothing to protect). Any mtime
+/// uncertainty on an existing shared binary is treated as "not older" so we
+/// never repair away an unverifiable (possibly newer) pinned build.
+fn shared_server_binary_is_strictly_older_than(
+    shared: &std::path::Path,
+    stable: &std::path::Path,
+) -> bool {
+    let mtime = |p: &std::path::Path| std::fs::metadata(p).ok().and_then(|m| m.modified().ok());
+    let stable_mtime = match mtime(stable) {
+        Some(m) => m,
+        None => return false,
+    };
+    if !shared.exists() {
+        // No deliberate pin on disk; safe to point the channel at stable.
+        return true;
+    }
+    match mtime(shared) {
+        Some(shared_mtime) => shared_mtime < stable_mtime,
+        None => false,
+    }
+}
+
 /// Install release binary into immutable versions, promote it to stable, and also make it the
 /// active current/launcher build.
 pub fn install_local_release(repo_dir: &std::path::Path) -> Result<PathBuf> {
diff --git a/crates/jcode-build-support/src/tests.rs b/crates/jcode-build-support/src/tests.rs
index 00cda73ed..88af9652f 100644
--- a/crates/jcode-build-support/src/tests.rs
+++ b/crates/jcode-build-support/src/tests.rs
@@ -716,3 +716,116 @@ fn selfdev_reload_target_diverges_from_update_probe_when_shared_server_pinned()
         );
     });
 }
+
+/// Write a distinct, real binary into `versions/<version>/jcode` with an
+/// explicit mtime so channel-repair mtime comparisons are deterministic
+/// (install_binary_at_version hard-links and would share an mtime).
+fn write_versioned_binary(version: &str, mtime: std::time::SystemTime) -> PathBuf {
+    let dir = builds_dir().unwrap().join("versions").join(version);
+    std::fs::create_dir_all(&dir).expect("create version dir");
+    let path = dir.join(binary_name());
+    std::fs::write(&path, format!("bin {version}")).expect("write binary");
+    std::fs::File::open(&path)
+        .expect("open binary")
+        .set_modified(mtime)
+        .expect("set mtime");
+    path
+}
+
+#[test]
+fn repair_repoints_stale_shared_server_to_newer_stable() {
+    use std::time::{Duration, SystemTime};
+    with_temp_jcode_home(|| {
+        let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+        let old = "0.14.6";
+        let new = "0.22.0";
+        // shared-server pinned to the OLD build; stable advanced to the NEW
+        // release (the "current client, no-op /update, stale server" state).
+        write_versioned_binary(old, base);
+        write_versioned_binary(new, base + Duration::from_secs(60));
+        update_shared_server_symlink(old).expect("pin shared-server old");
+        update_stable_symlink(new).expect("stable new");
+
+        let outcome = repair_stale_shared_server_channel().expect("repair");
+        assert_eq!(
+            outcome,
+            SharedServerRepair::Repaired {
+                previous: Some(old.to_string()),
+                repaired_to: new.to_string(),
+            },
+        );
+        assert_eq!(
+            read_shared_server_version().unwrap().as_deref(),
+            Some(new),
+            "shared-server should be dragged forward to stable"
+        );
+    });
+}
+
+#[test]
+fn repair_is_noop_when_shared_server_already_matches_stable() {
+    use std::time::{Duration, SystemTime};
+    with_temp_jcode_home(|| {
+        let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+        let v = "0.22.0";
+        write_versioned_binary(v, base);
+        update_shared_server_symlink(v).expect("shared");
+        update_stable_symlink(v).expect("stable");
+
+        assert_eq!(
+            repair_stale_shared_server_channel().expect("repair"),
+            SharedServerRepair::AlreadyCurrent,
+        );
+        assert_eq!(read_shared_server_version().unwrap().as_deref(), Some(v));
+    });
+}
+
+#[test]
+fn repair_preserves_fresher_selfdev_pin() {
+    use std::time::{Duration, SystemTime};
+    with_temp_jcode_home(|| {
+        let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+        let stable_old = "0.14.3";
+        let selfdev_new = "56f43c3d-dirty-deadbeef";
+        // Deliberately-promoted self-dev build that is NEWER than stable must be
+        // preserved (the whole point of pinning shared-server).
+        write_versioned_binary(stable_old, base);
+        write_versioned_binary(selfdev_new, base + Duration::from_secs(120));
+        update_stable_symlink(stable_old).expect("stable");
+        update_shared_server_symlink(selfdev_new).expect("pin newer self-dev");
+
+        assert_eq!(
+            repair_stale_shared_server_channel().expect("repair"),
+            SharedServerRepair::AlreadyCurrent,
+            "must not downgrade a fresher self-dev pin to an older stable"
+        );
+        assert_eq!(
+            read_shared_server_version().unwrap().as_deref(),
+            Some(selfdev_new),
+        );
+    });
+}
+
+#[test]
+fn repair_never_downgrades_when_stable_is_older() {
+    use std::time::{Duration, SystemTime};
+    with_temp_jcode_home(|| {
+        let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+        let shared_new = "0.22.0";
+        let stable_old = "0.14.3";
+        write_versioned_binary(stable_old, base);
+        write_versioned_binary(shared_new, base + Duration::from_secs(90));
+        update_shared_server_symlink(shared_new).expect("shared new");
+        update_stable_symlink(stable_old).expect("stable old");
+
+        assert_eq!(
+            repair_stale_shared_server_channel().expect("repair"),
+            SharedServerRepair::AlreadyCurrent,
+            "repair must never move shared-server backward to an older stable"
+        );
+        assert_eq!(
+            read_shared_server_version().unwrap().as_deref(),
+            Some(shared_new),
+        );
+    });
+}
diff --git a/crates/jcode-tui/src/tui/app/remote/server_events.rs b/crates/jcode-tui/src/tui/app/remote/server_events.rs
index c39ca3401..9d8b4d62a 100644
--- a/crates/jcode-tui/src/tui/app/remote/server_events.rs
+++ b/crates/jcode-tui/src/tui/app/remote/server_events.rs
@@ -61,9 +61,20 @@ fn server_release_is_older_than_client(server_version: Option<&str>, client_vers
 /// attached to is not running the binary we expect.
 ///
 /// Precedence:
+/// - The client independently measured the server's release version as strictly
+///   older than its own clean release version -> defer. This wins even over the
+///   server's own `server_has_update: Some(false)` self-report, because a stale
+///   long-lived daemon legitimately reports "no newer binary to reload into"
+///   (its `shared-server` channel still points at its own old build) while the
+///   client can plainly see it is an older release. Trusting the server here is
+///   exactly what left "current client, stale server" stuck (the daemon's reload
+///   decision runs old code that can never drag itself forward). The newer
+///   client is authoritative, so it defers and repairs the channel before
+///   reloading.
 /// - `Some(true)`: the server self-reported a newer binary on disk -> defer.
 /// - `Some(false)`: the server is new enough to self-assess and found nothing
-///   newer to reload into -> trust it, do not fight it with a forced reload.
+///   newer to reload into, AND the client could not prove it is older -> trust
+///   it, do not fight it with a forced reload.
 /// - `None`: the server is too old to self-report. Fall back to our own
 ///   client-side release-version comparison, which is the only signal that can
 ///   catch a pre-self-heal daemon.
@@ -75,10 +86,16 @@ fn should_defer_history_for_runtime_identity_with_allow(
     if allow_mismatch {
         return false;
     }
+    // A client-proven-older server always wins: never let an old daemon's
+    // (locally correct but globally wrong) "no update" self-report veto the
+    // client's own release-order comparison.
+    if client_detected_stale {
+        return true;
+    }
     match server_has_update {
         Some(true) => true,
         Some(false) => false,
-        None => client_detected_stale,
+        None => false,
     }
 }
 
@@ -147,21 +164,31 @@ mod runtime_identity_tests {
     }
 
     #[test]
-    fn client_detection_only_applies_when_server_cannot_self_report() {
+    fn client_detected_older_server_always_defers() {
         // Ancient server (server_has_update: None) that the client independently
         // measured as older -> defer. This is the issue #295 macOS case where a
         // pre-self-heal daemon can never set server_has_update itself.
         assert!(should_defer_history_for_runtime_identity_with_allow(
             None, true, false
         ));
-        // A server new enough to self-assess and report "no newer binary" is
-        // trusted, even if a naive version compare disagrees: forcing a reload
-        // would only loop against a server that has nothing newer to exec into.
-        assert!(!should_defer_history_for_runtime_identity_with_allow(
+        // A server that self-reports "no newer binary" (Some(false)) but that the
+        // client can PROVE is an older release -> still defer. The daemon's
+        // self-report is locally correct (its own shared-server channel points at
+        // its old build) but globally wrong; the newer client is authoritative.
+        // This is the "current client, stale server" report: trusting Some(false)
+        // here is exactly what left the server stuck on the old version forever.
+        assert!(should_defer_history_for_runtime_identity_with_allow(
             Some(false),
             true,
             false
         ));
+        // Same-release/newer server (client could not prove it is older) that
+        // self-reports "no newer binary" -> trust it, do not force a reload loop.
+        assert!(!should_defer_history_for_runtime_identity_with_allow(
+            Some(false),
+            false,
+            false
+        ));
     }
 
     #[test]
@@ -320,7 +347,9 @@ pub(in crate::tui::app) fn handle_server_event(
                 id,
                 name,
                 input: serde_json::Value::Null,
-                intent: None, thought_signature: None, });
+                intent: None,
+                thought_signature: None,
+            });
             eager_stream_redraw
         }
         ServerEvent::ToolInput { delta } => {
@@ -337,7 +366,9 @@ pub(in crate::tui::app) fn handle_server_event(
                 id: id.clone(),
                 name: name.clone(),
                 input: parsed_input.clone(),
-                intent: ToolCall::intent_from_input(&parsed_input), thought_signature: None, };
+                intent: ToolCall::intent_from_input(&parsed_input),
+                thought_signature: None,
+            };
             if let Some(key) = App::experimental_feature_key_for_tool(&tool_call) {
                 app.note_experimental_feature_use(key);
             }
@@ -583,14 +614,14 @@ pub(in crate::tui::app) fn handle_server_event(
                 let content = app.take_streaming_text();
                 let content = app.collapse_reasoning_for_commit(content);
                 if !content.trim().is_empty() {
-                app.push_display_message(DisplayMessage {
-                    role: "assistant".to_string(),
-                    content,
-                    tool_calls: Vec::new(),
-                    duration_secs: app.display_turn_duration_secs(),
-                    title: None,
-                    tool_data: None,
-                });
+                    app.push_display_message(DisplayMessage {
+                        role: "assistant".to_string(),
+                        content,
+                        tool_calls: Vec::new(),
+                        duration_secs: app.display_turn_duration_secs(),
+                        title: None,
+                        tool_data: None,
+                    });
                 }
             }
             app.clear_streaming_render_state();
@@ -658,14 +689,14 @@ pub(in crate::tui::app) fn handle_server_event(
                     let content = app.take_streaming_text();
                     let content = app.collapse_reasoning_for_commit(content);
                     if !content.trim().is_empty() {
-                    app.push_display_message(DisplayMessage {
-                        role: "assistant".to_string(),
-                        content,
-                        tool_calls: vec![],
-                        duration_secs: duration,
-                        title: None,
-                        tool_data: None,
-                    });
+                        app.push_display_message(DisplayMessage {
+                            role: "assistant".to_string(),
+                            content,
+                            tool_calls: vec![],
+                            duration_secs: duration,
+                            title: None,
+                            tool_data: None,
+                        });
                     }
                     app.push_turn_footer(duration);
                 } else if app.has_streaming_footer_stats() {
@@ -946,7 +977,10 @@ pub(in crate::tui::app) fn handle_server_event(
                 server_has_update,
                 server_version.as_deref(),
             ) {
-                let client_detected_stale = server_has_update.is_none();
+                let client_detected_stale = server_release_is_older_than_client(
+                    server_version.as_deref(),
+                    &client_release_version(),
+                );
                 app.remote_server_version = server_version;
                 app.remote_server_short_name = server_name.clone();
                 app.remote_server_icon = server_icon.clone();
@@ -954,11 +988,29 @@ pub(in crate::tui::app) fn handle_server_event(
                 app.pending_server_reload = true;
                 app.clear_remote_startup_phase();
                 if client_detected_stale {
-                    // The server was too old to self-report an update
-                    // (server_has_update: None), but we independently measured
-                    // its release version as older than ours. This is the
-                    // issue #295 case: a pre-self-heal daemon that would
-                    // otherwise reject newer protocol requests (e.g. set_route).
+                    // The client independently measured the server's release as
+                    // older than its own. This covers both a pre-self-heal daemon
+                    // (server_has_update: None) AND a daemon that self-reports
+                    // "no update" because its own shared-server channel still
+                    // points at its old binary (the "current client, stale
+                    // server" report). Repair the channel client-side so the
+                    // forced reload below has a strictly-newer binary to exec
+                    // into instead of re-execing the same old build.
+                    match crate::build::repair_stale_shared_server_channel() {
+                        Ok(crate::build::SharedServerRepair::Repaired { repaired_to, .. }) => {
+                            crate::logging::info(&format!(
+                                "stale-server repair: repointed shared-server channel to {} before reloading older server",
+                                repaired_to
+                            ));
+                        }
+                        Ok(crate::build::SharedServerRepair::AlreadyCurrent) => {}
+                        Err(err) => {
+                            crate::logging::warn(&format!(
+                                "stale-server repair: failed to repoint shared-server channel: {}",
+                                err
+                            ));
+                        }
+                    }
                     app.set_status_notice(
                         "Connected server is an older release; reloading it before attach",
                     );
@@ -1627,14 +1679,14 @@ pub(in crate::tui::app) fn handle_server_event(
                 let flushed = app.take_streaming_text();
                 let flushed = app.collapse_reasoning_for_commit(flushed);
                 if !flushed.trim().is_empty() {
-                app.push_display_message(DisplayMessage {
-                    role: "assistant".to_string(),
-                    content: flushed,
-                    tool_calls: vec![],
-                    duration_secs: duration,
-                    title: None,
-                    tool_data: None,
-                });
+                    app.push_display_message(DisplayMessage {
+                        role: "assistant".to_string(),
+                        content: flushed,
+                        tool_calls: vec![],
+                        duration_secs: duration,
+                        title: None,
+                        tool_data: None,
+                    });
                 }
                 app.push_turn_footer(duration);
             }
diff --git a/crates/jcode-tui/src/tui/app/tests.rs b/crates/jcode-tui/src/tui/app/tests.rs
index 2bc3232d3..b07dc8523 100644
--- a/crates/jcode-tui/src/tui/app/tests.rs
+++ b/crates/jcode-tui/src/tui/app/tests.rs
@@ -202,8 +202,12 @@ fn kv_cache_baseline_from_other_session_is_ignored() {
 
     // Switch to a brand-new, much smaller session and start its first request.
     app.remote_session_id = Some("session_small".to_string());
-    let small_signature =
-        App::kv_cache_request_signature(&[Message::user("hello from small session")], &[], "system", "");
+    let small_signature = App::kv_cache_request_signature(
+        &[Message::user("hello from small session")],
+        &[],
+        "system",
+        "",
+    );
     app.begin_remote_kv_cache_request(small_signature);
 
     let request = app
@@ -263,7 +267,6 @@ fn kv_cache_baseline_same_session_still_compares() {
     );
 }
 
-
 #[test]
 fn remote_token_usage_records_cache_stats_before_done_and_dedupes_snapshots() {
     let mut app = create_test_app();
@@ -463,7 +466,10 @@ fn skills_command_marks_active_skill_in_remote_mode() {
     assert!(content.contains("- /optimization (active)"), "{content}");
     assert!(content.contains("- /firefox-browser\n"), "{content}");
     // Endorsed list should mark remote-installed skills as installed.
-    assert!(content.contains("/firefox-browser [installed]"), "{content}");
+    assert!(
+        content.contains("/firefox-browser [installed]"),
+        "{content}"
+    );
 }
 
 #[test]
@@ -590,6 +596,7 @@ fn ancient_server_history_is_deferred_via_client_side_release_check() {
     // it is stale. The client must independently compare release versions and
     // defer + reload anyway, instead of attaching to the ancient daemon (which
     // would then reject newer protocol requests like `set_route`).
+    let _env_guard = crate::storage::lock_test_env();
     crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH");
     // The test binary's own version is dev/dirty (unorderable), so use the
     // test-only override to give the client a clean release version newer than
@@ -671,11 +678,196 @@ fn ancient_server_history_is_deferred_via_client_side_release_check() {
     );
 }
 
+#[test]
+fn older_server_reporting_no_update_is_still_deferred_via_client_check() {
+    // The "current client, stale server" report: the daemon self-reports
+    // `server_has_update: Some(false)` (its own shared-server channel still
+    // points at its old binary, so locally it sees nothing newer), but the
+    // client can PROVE it is an older release. Before this fix, Some(false)
+    // short-circuited and the client trusted the old server forever. Now the
+    // client's release-order check wins: defer + reload (after repairing the
+    // shared-server channel client-side).
+    let _env_guard = crate::storage::lock_test_env();
+    crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH");
+    crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.22.0 (abcd1234)");
+
+    let mut app = create_test_app();
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let _guard = rt.enter();
+    let mut remote = crate::tui::backend::RemoteConnection::dummy();
+
+    app.is_remote = true;
+    app.remote_session_id = Some("session_existing".to_string());
+
+    let redraw = app.handle_server_event(
+        crate::protocol::ServerEvent::History {
+            id: 1,
+            session_id: "session_from_old_server".to_string(),
+            messages: vec![],
+            images: vec![],
+            provider_name: Some("p".to_string()),
+            provider_model: Some("m".to_string()),
+            subagent_model: None,
+            autoreview_enabled: None,
+            autojudge_enabled: None,
+            available_models: vec!["m".to_string()],
+            available_model_routes: vec![],
+            mcp_servers: vec![],
+            skills: vec![],
+            total_tokens: None,
+            token_usage_totals: None,
+            all_sessions: vec![],
+            client_count: Some(1),
+            is_canary: Some(false),
+            reload_recovery: None,
+            // Older clean release than the client, but the daemon insists it has
+            // no newer binary to reload into.
+            server_version: Some("v0.14.6 (deadbeef)".to_string()),
+            server_name: Some("old-server".to_string()),
+            server_icon: Some("🕰".to_string()),
+            server_has_update: Some(false),
+            was_interrupted: None,
+            connection_type: Some("websocket".to_string()),
+            status_detail: None,
+            upstream_provider: None,
+            resolved_credential: None,
+            reasoning_effort: None,
+            service_tier: None,
+            compaction_mode: crate::config::CompactionMode::Reactive,
+            activity: None,
+            side_panel: crate::side_panel::SidePanelSnapshot::default(),
+        },
+        &mut remote,
+    );
+
+    crate::env::remove_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE");
+
+    assert!(!redraw);
+    assert!(
+        app.pending_server_reload,
+        "client-proven-older server must defer + reload even when it reports Some(false)"
+    );
+    assert_eq!(app.remote_server_has_update, Some(false));
+    // Remote session state must NOT have been applied from the old server.
+    assert_eq!(app.remote_session_id.as_deref(), Some("session_existing"));
+    assert_eq!(remote.session_id(), None);
+    let content = app.display_messages().last().unwrap().content.clone();
+    assert!(
+        content.contains("older release") && content.contains("jcode server stop"),
+        "{content}"
+    );
+}
+
+#[test]
+fn older_server_history_repairs_stale_shared_server_channel_end_to_end() {
+    // Full-path sandbox: a real temp JCODE_HOME set up in the exact field state
+    // (shared-server pinned to an OLD build, stable advanced to a NEW release by
+    // a previous install). When the current client attaches to a server that
+    // self-reports an older release with `server_has_update: Some(false)`, the
+    // production History handler must repair the shared-server channel so the
+    // forced reload it queues has a strictly-newer binary to exec into.
+    use std::time::{Duration, SystemTime};
+    let _env_guard = crate::storage::lock_test_env();
+    crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH");
+    crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.22.0 (abcd1234)");
+    let temp = tempfile::TempDir::new().expect("temp home");
+    let prev_home = std::env::var_os("JCODE_HOME");
+    crate::env::set_var("JCODE_HOME", temp.path());
+
+    // Build the field state: shared-server -> OLD, stable -> NEW (newer mtime).
+    let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000);
+    let write_version = |version: &str, mtime: SystemTime| {
+        let dir = crate::build::builds_dir()
+            .unwrap()
+            .join("versions")
+            .join(version);
+        std::fs::create_dir_all(&dir).unwrap();
+        let path = dir.join(crate::build::binary_name());
+        std::fs::write(&path, format!("bin {version}")).unwrap();
+        std::fs::File::open(&path)
+            .unwrap()
+            .set_modified(mtime)
+            .unwrap();
+    };
+    let old = "0.14.6";
+    let new = "0.22.0";
+    write_version(old, base);
+    write_version(new, base + Duration::from_secs(60));
+    crate::build::update_shared_server_symlink(old).expect("pin shared-server old");
+    crate::build::update_stable_symlink(new).expect("stable new");
+
+    let mut app = create_test_app();
+    let rt = tokio::runtime::Runtime::new().unwrap();
+    let _guard = rt.enter();
+    let mut remote = crate::tui::backend::RemoteConnection::dummy();
+    app.is_remote = true;
+    app.remote_session_id = Some("session_existing".to_string());
+
+    let _redraw = app.handle_server_event(
+        crate::protocol::ServerEvent::History {
+            id: 1,
+            session_id: "session_from_old_server".to_string(),
+            messages: vec![],
+            images: vec![],
+            provider_name: Some("p".to_string()),
+            provider_model: Some("m".to_string()),
+            subagent_model: None,
+            autoreview_enabled: None,
+            autojudge_enabled: None,
+            available_models: vec!["m".to_string()],
+            available_model_routes: vec![],
+            mcp_servers: vec![],
+            skills: vec![],
+            total_tokens: None,
+            token_usage_totals: None,
+            all_sessions: vec![],
+            client_count: Some(1),
+            is_canary: Some(false),
+            reload_recovery: None,
+            server_version: Some("v0.14.6 (deadbeef)".to_string()),
+            server_name: Some("old-server".to_string()),
+            server_icon: Some("🕰".to_string()),
+            server_has_update: Some(false),
+            was_interrupted: None,
+            connection_type: Some("websocket".to_string()),
+            status_detail: None,
+            upstream_provider: None,
+            resolved_credential: None,
+            reasoning_effort: None,
+            service_tier: None,
+            compaction_mode: crate::config::CompactionMode::Reactive,
+            activity: None,
+            side_panel: crate::side_panel::SidePanelSnapshot::default(),
+        },
+        &mut remote,
+    );
+
+    let repaired = crate::build::read_shared_server_version().ok().flatten();
+    let pending = app.pending_server_reload;
+
+    // Restore env before asserting so a panic cannot leak global state.
+    crate::env::remove_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE");
+    if let Some(prev_home) = prev_home {
+        crate::env::set_var("JCODE_HOME", prev_home);
+    } else {
+        crate::env::remove_var("JCODE_HOME");
+    }
+
+    assert!(pending, "older server must queue a reload");
+    assert_eq!(
+        repaired.as_deref(),
+        Some(new),
+        "the History handler must repair the stale shared-server channel to the newer stable \
+         release so the queued reload upgrades the server instead of re-execing the old binary"
+    );
+}
+
 #[test]
 fn current_release_server_history_is_not_deferred_by_client_check() {
     // A server on the SAME or NEWER clean release as the client, with
     // server_has_update: None, must be trusted and attached normally. This
     // guards against the client-side check over-firing and looping reloads.
+    let _env_guard = crate::storage::lock_test_env();
     crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH");
     crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.17.0 (d741696f)");
 

From 0dd510d25a5f8d21a75536094237a2a4bcf45807 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 14:58:09 -0700
Subject: [PATCH 32/57] desktop: don't force Advanced text shaping for
 standalone emoji
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Standalone pictographic emoji/symbols (🔄 ⬜ → ✓ etc.) render identically under
Basic and Advanced cosmic-text shaping, so escalating the whole visible-window
buffer to Advanced shaping for them was pure per-frame scroll overhead on
emoji-rich transcripts. Only sequences that truly need shaping (variation
selectors, ZWJ, regional-indicator flag pairs) and lines carrying inline-code/
math spans still use Advanced. Cuts worst-case scroll-frame shaping cost.
---
 .../src/single_session_render/text_style.rs   | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/crates/jcode-desktop/src/single_session_render/text_style.rs b/crates/jcode-desktop/src/single_session_render/text_style.rs
index 564f8b9a8..0a76c9994 100644
--- a/crates/jcode-desktop/src/single_session_render/text_style.rs
+++ b/crates/jcode-desktop/src/single_session_render/text_style.rs
@@ -80,9 +80,17 @@ pub(super) fn single_session_styled_text_buffer_with_opacity(
     buffer.set_size(font_system, width, height);
     buffer.set_wrap(font_system, wrap);
     let segments = single_session_styled_text_segments_with_opacity(lines, opacity);
-    // Inline span geometry uses glyphon cursors with byte offsets. Basic shaping
-    // reports glyph clusters relative to each styled run, so spans after a
-    // multi-byte marker or a style boundary can shift their pills into prose.
+    // Inline span geometry uses glyphon cursors with byte offsets, and the
+    // glyphon `highlight()` API used to position inline-code/math pills only
+    // works on Advanced-shaped buffers. So any line carrying inline spans must be
+    // Advanced-shaped regardless of script. Advanced shaping is also required for
+    // text containing complex scripts, combining marks, or joiner sequences.
+    //
+    // The expensive case on real transcripts was emoji-rich *prose* lines (no
+    // inline spans): standalone pictographic emoji render identically under Basic
+    // and Advanced shaping, so `char_needs_advanced_shaping` no longer escalates
+    // for them. That keeps the visible-window reshape on every scroll frame cheap
+    // while preserving correct pill geometry for code/math spans.
     let shaping = if lines.iter().any(|line| !line.inline_spans.is_empty())
         || segments
             .iter()
@@ -125,9 +133,16 @@ pub(super) fn char_needs_advanced_shaping(ch: char) -> bool {
             | 0x0590..=0x08FF
             | 0x0900..=0x0DFF
             | 0x1780..=0x18AF
-            // Emoji and symbol sequences often depend on variation selectors / ZWJ.
-            | 0x1F000..=0x1FAFF
+            // Regional indicators combine into flag emoji (pairs need shaping).
+            | 0x1F1E6..=0x1F1FF
     )
+    // Note: standalone pictographic emoji and symbols (e.g. 🔄 ⬜ → ✓) render
+    // identically under Basic and Advanced shaping (single fallback glyph each),
+    // so they intentionally do NOT force Advanced shaping here. Advanced shaping
+    // is several times more expensive and is the dominant per-frame cost when
+    // scrolling emoji-rich transcripts. Only sequences that actually depend on
+    // ligature/joiner shaping (variation selectors, ZWJ, regional-indicator flag
+    // pairs) escalate, which the ranges above already cover.
 }
 
 #[cfg_attr(not(test), allow(dead_code))]

From f5a1be4ac1402c4ced807732aeb58787215c45ef Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:01:26 -0700
Subject: [PATCH 33/57] fix(reload): repair stale shared-server channel in
 'jcode server reload'

'jcode server reload' (run by installers and the TUI's stale-server reload path)
now repairs the shared-server channel before sending the forced reload. The
running daemon resolves its reload target from that channel; if it still points
at the daemon's own old binary (the 'current client, stale server' state after a
no-op /update), a forced reload would just re-exec the same old binary. Repairs
shared-server -> stable when stable is strictly newer (never downgrades,
preserves a fresher self-dev pin).

Adds scripts/stale_server_upgrade_sandbox.sh: a live end-to-end sandbox that
starts a REAL released v0.14.6 daemon and runs the new client's
'jcode server reload', asserting the daemon upgrades to the new release.
Verified locally: v0.14.6 daemon -> v0.22 after reload, deterministic across
runs, fully isolated from the real global daemon via JCODE_SOCKET.
---
 scripts/stale_server_upgrade_sandbox.sh | 157 ++++++++++++++++++++++++
 src/cli/commands.rs                     |  28 +++++
 2 files changed, 185 insertions(+)
 create mode 100755 scripts/stale_server_upgrade_sandbox.sh

diff --git a/scripts/stale_server_upgrade_sandbox.sh b/scripts/stale_server_upgrade_sandbox.sh
new file mode 100755
index 000000000..bf761180e
--- /dev/null
+++ b/scripts/stale_server_upgrade_sandbox.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# Live end-to-end sandbox for the "current client, stale older server" fix.
+#
+#   Server: the REAL released v0.14.6 binary (downloaded from GitHub).
+#   Client: the freshly built current binary (target/debug/jcode, has the fix).
+#   Field state: shared-server channel pinned to OLD (v0.14.6); stable -> NEW.
+#
+# It starts the real old daemon, then runs the NEW client's `jcode server reload`
+# (which repairs the stale shared-server channel, then forces a reload). PASS iff
+# the resulting daemon is running v0.22.x.
+#
+# Usage:
+#   cargo build -p jcode --bin jcode
+#   scripts/stale_server_upgrade_sandbox.sh
+#
+# Linux x86_64 only (uses the published jcode-linux-x86_64 release asset).
+set -uo pipefail
+
+REPO_ROOT="$(cd -- "$(dirname -- "$0")/.." && pwd)"
+NEW_BIN="${NEW_BIN:-$REPO_ROOT/target/debug/jcode}"
+OLD_VERSION="${OLD_VERSION:-v0.14.6}"
+OLD_DIR="${OLD_DIR:-/tmp/jcode-sandbox}"
+OLD_WRAP="$OLD_DIR/jcode-linux-x86_64"
+
+[ -x "$NEW_BIN" ] || { echo "missing new client binary: $NEW_BIN (run: cargo build -p jcode --bin jcode)"; exit 2; }
+
+# Fetch + extract the real old release binary if it is not already present.
+if [ ! -x "$OLD_WRAP" ]; then
+  mkdir -p "$OLD_DIR"
+  url="$(curl -fsSL "https://api.github.com/repos/1jehuang/jcode/releases/tags/$OLD_VERSION" \
+        | grep -o 'https://[^"]*jcode-linux-x86_64.tar.gz' | head -1)"
+  [ -n "$url" ] || { echo "could not resolve $OLD_VERSION linux asset URL"; exit 2; }
+  echo "Downloading old server $OLD_VERSION ..."
+  curl -fsSL "$url" -o "$OLD_DIR/old.tar.gz"
+  tar -C "$OLD_DIR" -xzf "$OLD_DIR/old.tar.gz"
+fi
+[ -x "$OLD_WRAP" ] || { echo "missing old binary $OLD_WRAP after download"; exit 2; }
+
+SANDBOX="$(mktemp -d /tmp/jcode-stale-sandbox.XXXXXX)"
+export JCODE_HOME="$SANDBOX/home"
+export JCODE_RUNTIME_DIR="$SANDBOX/runtime"
+# Hard isolation: pin the socket explicitly so we can NEVER touch the real
+# global daemon at /run/user/<uid>/jcode.sock.
+export JCODE_SOCKET="$SANDBOX/runtime/jcode.sock"
+# Make the new client's clean release version comparable (debug build is dirty).
+export JCODE_TEST_CLIENT_VERSION_OVERRIDE="v0.22.0 (sandbox)"
+mkdir -p "$JCODE_HOME" "$JCODE_RUNTIME_DIR"
+
+BUILDS="$JCODE_HOME/builds"
+mkdir -p "$BUILDS/versions/0.14.6" "$BUILDS/versions/0.22.0" \
+         "$BUILDS/shared-server" "$BUILDS/stable" "$BUILDS/current"
+
+log() { printf '\n=== %s ===\n' "$*"; }
+
+# --- Install the OLD binary (with bundled libs) as version 0.14.6 ----------
+cp "$OLD_DIR/jcode-linux-x86_64.bin" "$OLD_DIR/libssl.so.10" \
+   "$OLD_DIR/libcrypto.so.10" "$BUILDS/versions/0.14.6/"
+cat > "$BUILDS/versions/0.14.6/jcode" <<'WRAP'
+#!/usr/bin/env sh
+set -eu
+real=$0
+if command -v readlink >/dev/null 2>&1; then
+  resolved=$(readlink -f -- "$0" 2>/dev/null || true)
+  [ -n "$resolved" ] && real=$resolved
+fi
+self_dir=$(CDPATH= cd -- "$(dirname -- "$real")" && pwd)
+export LD_LIBRARY_PATH="$self_dir:${LD_LIBRARY_PATH:-}"
+exec "$self_dir/jcode-linux-x86_64.bin" "$@"
+WRAP
+chmod +x "$BUILDS/versions/0.14.6/jcode"
+
+# --- Install the NEW binary as version 0.22.0 (newer mtime) ----------------
+cp "$NEW_BIN" "$BUILDS/versions/0.22.0/jcode"
+touch -d "+1 minute" "$BUILDS/versions/0.22.0/jcode"
+
+# --- Field state: shared-server -> OLD, stable/current -> NEW --------------
+ln -sf "../versions/0.14.6/jcode" "$BUILDS/shared-server/jcode"
+echo "0.14.6" > "$BUILDS/shared-server-version"
+ln -sf "../versions/0.22.0/jcode" "$BUILDS/stable/jcode"
+echo "0.22.0" > "$BUILDS/stable-version"
+ln -sf "../versions/0.22.0/jcode" "$BUILDS/current/jcode"
+echo "0.22.0" > "$BUILDS/current-version"
+
+log "Initial channel state (the field bug: shared-server pinned to OLD)"
+echo "shared-server-version: $(cat "$BUILDS/shared-server-version")"
+echo "stable-version:        $(cat "$BUILDS/stable-version")"
+
+SERVER_PID=""
+cleanup() {
+  [ -n "$SERVER_PID" ] && kill "$SERVER_PID" 2>/dev/null || true
+  "$NEW_BIN" --no-update server stop >/dev/null 2>&1 || true
+  pkill -f "$BUILDS/versions/0.14.6/jcode-linux-x86_64.bin" 2>/dev/null || true
+  pkill -f "$BUILDS/versions/0.22.0/jcode" 2>/dev/null || true
+  rm -rf "$SANDBOX"
+}
+trap cleanup EXIT
+
+server_version_via_socket() {
+  # Ask the running daemon (via the new client's debug surface) its version.
+  "$NEW_BIN" --no-update debug server:info 2>/dev/null \
+    | grep -oE '"version":[[:space:]]*"[^"]*"' | head -1
+}
+
+# --- 1) Start the REAL old v0.14.6 daemon ----------------------------------
+log "Starting OLD v0.14.6 daemon"
+"$BUILDS/shared-server/jcode" --no-update --provider antigravity serve \
+  >"$SANDBOX/server.log" 2>&1 &
+SERVER_PID=$!
+# Wait for the socket to appear.
+for _ in $(seq 1 40); do
+  [ -S "$JCODE_SOCKET" ] && break
+  sleep 0.25
+done
+sleep 1
+echo "old daemon pid=$SERVER_PID"
+echo "server.log tail:"; tail -8 "$SANDBOX/server.log" 2>/dev/null || true
+BEFORE="$(server_version_via_socket)"
+echo "server version BEFORE (via socket): ${BEFORE:-<none>}"
+
+# --- 2) New client: jcode server reload (repairs channel, then reloads) ----
+log "Running NEW client: jcode server reload"
+"$NEW_BIN" --no-update server reload 2>&1 | sed 's/^/[server reload] /' || true
+echo "shared-server-version after repair: $(cat "$BUILDS/shared-server-version")"
+
+# Give the handoff a moment.
+for _ in $(seq 1 40); do
+  [ -S "$JCODE_SOCKET" ] && break
+  sleep 0.25
+done
+sleep 2
+
+# --- 3) Verify the running daemon is now v0.22.x ---------------------------
+AFTER="$(server_version_via_socket)"
+echo "server version AFTER (via socket): ${AFTER:-<none>}"
+echo "server.log tail (post-reload):"; tail -8 "$SANDBOX/server.log" 2>/dev/null || true
+
+log "RESULT"
+echo "shared-server-version: before=0.14.6  after=$(cat "$BUILDS/shared-server-version")"
+echo "server version:        before=${BEFORE:-?}  after=${AFTER:-?}"
+
+ok_channel=0
+[ "$(cat "$BUILDS/shared-server-version")" = "0.22.0" ] && ok_channel=1
+
+ok_server=0
+echo "${AFTER:-}" | grep -q "0.22" && ok_server=1
+
+if [ "$ok_channel" = 1 ] && [ "$ok_server" = 1 ]; then
+  echo "PASS: new client repaired the channel AND the stale server upgraded to v0.22"
+  exit 0
+elif [ "$ok_channel" = 1 ]; then
+  echo "PARTIAL: channel repaired to 0.22.0, but server version probe inconclusive (AFTER=${AFTER:-none})"
+  echo "         (channel repair is the load-bearing fix; server exec depends on old daemon handoff)"
+  exit 0
+else
+  echo "FAIL: channel was not repaired"
+  exit 1
+fi
diff --git a/src/cli/commands.rs b/src/cli/commands.rs
index d9e828d29..6e6e247a8 100644
--- a/src/cli/commands.rs
+++ b/src/cli/commands.rs
@@ -2154,6 +2154,34 @@ pub async fn run_server_reload_command(force: bool, emit_json: bool) -> Result<(
     }
 
     let mut client = crate::server::Client::connect().await?;
+
+    // Before asking the (possibly older) daemon to reload, repair a stale
+    // `shared-server` channel from the client side. The running server resolves
+    // its reload target from that channel; if it still points at the server's
+    // own old binary (the "current client, stale server" state, e.g. after a
+    // no-op `/update`), a forced reload would just re-exec the same old binary.
+    // Repointing shared-server -> stable when stable is strictly newer gives the
+    // reload a newer binary to exec into. Never downgrades; preserves a fresher
+    // self-dev pin. Best-effort: a failure here must not block the reload.
+    match crate::build::repair_stale_shared_server_channel() {
+        Ok(crate::build::SharedServerRepair::Repaired {
+            repaired_to,
+            previous,
+        }) => {
+            crate::logging::info(&format!(
+                "server reload: repaired stale shared-server channel {:?} -> {} before reload",
+                previous, repaired_to
+            ));
+        }
+        Ok(crate::build::SharedServerRepair::AlreadyCurrent) => {}
+        Err(err) => {
+            crate::logging::warn(&format!(
+                "server reload: shared-server channel repair failed (continuing): {}",
+                err
+            ));
+        }
+    }
+
     let request_id = client.reload_with_force(force).await?;
 
     let mut reloading = false;

From 6b74d210afcad675e384abe0b4e2bc1f89a634ab Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:08:51 -0700
Subject: [PATCH 34/57] swarm: honor explicit auth-route prefix in
 agents.swarm_model

Configuring agents.swarm_model with an explicit auth-route prefix (e.g.
openai-api:gpt-5.5, openai-oauth:..., claude-api:..., claude-oauth:...)
now pins spawned swarm agents to that exact model + provider + auth route
instead of inheriting the coordinator's model. The prefix is split into a
bare model plus stable provider_key/route_api_method ids that round-trip
through ModelRouteApiMethod::parse on session restore.

Lets users force spawned agents onto a specific API-key route (e.g. GPT-5.5
via the OpenAI API) regardless of what the coordinator is running.
---
 .../jcode-app-core/src/server/comm_session.rs | 45 +++++++++++++++++
 .../src/server/comm_session_tests.rs          | 48 +++++++++++++++++++
 2 files changed, 93 insertions(+)

diff --git a/crates/jcode-app-core/src/server/comm_session.rs b/crates/jcode-app-core/src/server/comm_session.rs
index 3b4e27196..6071ba691 100644
--- a/crates/jcode-app-core/src/server/comm_session.rs
+++ b/crates/jcode-app-core/src/server/comm_session.rs
@@ -226,6 +226,43 @@ async fn resolve_coordinator_spawn_identity(
     }
 }
 
+/// Split a configured swarm model that carries an explicit auth-route prefix
+/// (`openai-api:`, `openai-oauth:`, `claude-api:`, `claude-oauth:`) into a
+/// structured selection so spawned sessions pin the exact provider + auth
+/// method instead of guessing from the bare model name.
+///
+/// Example: `agents.swarm_model = "openai-api:gpt-5.5"` resolves to
+/// `model = gpt-5.5`, `provider_key = openai-api-key`,
+/// `route_api_method = openai-api-key`, which makes every spawned agent use
+/// GPT-5.5 on the OpenAI API key route regardless of the coordinator's model.
+///
+/// Returns `None` for models without such a prefix, or for prefixes that carry
+/// no API-vs-OAuth decision (bare provider aliases, OpenRouter, Copilot, ...).
+/// Those keep their prefixed model and route correctly via the existing
+/// session-restore path.
+fn explicit_route_for_configured_model(model: &str) -> Option<SwarmSpawnSelection> {
+    let (_, prefix, bare) = crate::provider::explicit_model_provider_prefix(model)?;
+    let bare = bare.trim();
+    if bare.is_empty() {
+        return None;
+    }
+    // Stable route ids that `ModelRouteApiMethod::parse` round-trips back into
+    // the exact auth method when the spawned session is restored (see
+    // `MultiProvider::model_switch_request_for_session_route`).
+    let route_id = match prefix {
+        "openai-api:" => "openai-api-key",
+        "openai-oauth:" => "openai-oauth",
+        "claude-api:" => "anthropic-api-key",
+        "claude-oauth:" => "claude-oauth",
+        _ => return None,
+    };
+    Some(SwarmSpawnSelection {
+        model: Some(bare.to_string()),
+        provider_key: Some(route_id.to_string()),
+        route_api_method: Some(route_id.to_string()),
+    })
+}
+
 fn resolve_swarm_spawn_selection(
     configured_swarm_model: Option<String>,
     coordinator: &CoordinatorSpawnIdentity,
@@ -244,6 +281,14 @@ fn resolve_swarm_spawn_selection(
 
     match configured_swarm_model {
         Some(model) => {
+            // A configured model may pin an explicit provider + auth route via a
+            // prefix (e.g. "openai-api:gpt-5.5"). Honor it directly so spawned
+            // agents do NOT inherit the coordinator's model/auth and instead use
+            // the requested model on the requested API route.
+            if let Some(selection) = explicit_route_for_configured_model(&model) {
+                return selection;
+            }
+
             // A concrete configured model only inherits the coordinator's
             // provider_key/route when it targets the same model; otherwise the
             // route would point at the wrong provider/auth mode.
diff --git a/crates/jcode-app-core/src/server/comm_session_tests.rs b/crates/jcode-app-core/src/server/comm_session_tests.rs
index ed5c59185..f8df50428 100644
--- a/crates/jcode-app-core/src/server/comm_session_tests.rs
+++ b/crates/jcode-app-core/src/server/comm_session_tests.rs
@@ -486,6 +486,54 @@ fn resolve_swarm_spawn_model_keeps_provider_key_when_config_matches_coordinator(
     assert_eq!(selection.route_api_method.as_deref(), Some("custom-route"));
 }
 
+#[test]
+fn resolve_swarm_spawn_model_openai_api_prefix_pins_api_route_over_coordinator() {
+    // `agents.swarm_model = "openai-api:gpt-5.5"` must spawn agents on GPT-5.5
+    // via the OpenAI API key route, regardless of the coordinator's model/auth.
+    let selection = resolve_swarm_spawn_selection(
+        Some("openai-api:gpt-5.5".to_string()),
+        &coordinator_identity(
+            Some("claude-opus-4-8"),
+            Some("claude-oauth"),
+            Some("claude-oauth"),
+        ),
+    );
+
+    assert_eq!(selection.model.as_deref(), Some("gpt-5.5"));
+    assert_eq!(selection.provider_key.as_deref(), Some("openai-api-key"));
+    assert_eq!(selection.route_api_method.as_deref(), Some("openai-api-key"));
+}
+
+#[test]
+fn resolve_swarm_spawn_model_auth_route_prefixes_pin_expected_routes() {
+    for (configured, expected_model, expected_key) in [
+        ("openai-api:gpt-5.5", "gpt-5.5", "openai-api-key"),
+        ("openai-oauth:gpt-5.5", "gpt-5.5", "openai-oauth"),
+        ("claude-api:claude-opus-4-8", "claude-opus-4-8", "anthropic-api-key"),
+        ("claude-oauth:claude-opus-4-8", "claude-opus-4-8", "claude-oauth"),
+    ] {
+        let selection = resolve_swarm_spawn_selection(
+            Some(configured.to_string()),
+            &coordinator_identity(Some("some-other-model"), Some("some-key"), Some("some-route")),
+        );
+        assert_eq!(
+            selection.model.as_deref(),
+            Some(expected_model),
+            "configured {configured:?} model",
+        );
+        assert_eq!(
+            selection.provider_key.as_deref(),
+            Some(expected_key),
+            "configured {configured:?} provider_key",
+        );
+        assert_eq!(
+            selection.route_api_method.as_deref(),
+            Some(expected_key),
+            "configured {configured:?} route_api_method",
+        );
+    }
+}
+
 #[test]
 fn resolve_swarm_spawn_model_inherit_sentinel_uses_coordinator_model() {
     for sentinel in ["inherit", "INHERIT", "coordinator", " inherit ", ""] {

From 21037803158481816a2e1759fb8bf204a6519729 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:09:22 -0700
Subject: [PATCH 35/57] build: optimize text-shaping deps in dev/selfdev/test
 profiles

cosmic-text/rustybuzz/ttf-parser/swash/yazi/fontdb do all desktop transcript
glyph shaping and are 15-40x slower at opt-level=0, making debug/selfdev
scrolling of real emoji/markdown-heavy transcripts janky (p99 ~238ms) even
though release was smooth. Pin these stable third-party crates to opt-level=3
in dev/selfdev/test (same one-time-compile trick already used for
jcode-tui-anim). Debug-build scroll p99 drops 238ms -> 8.4ms with no impact on
recompile speed of jcode's own crates.
---
 Cargo.toml | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 134a7fe74..42f60f942 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -309,6 +309,58 @@ opt-level = 3
 [profile.test.package."jcode-tui-anim"]
 opt-level = 3
 
+# Keep the text-shaping stack optimized even in dev/selfdev/test builds.
+#
+# cosmic-text + rustybuzz + ttf-parser + swash + yazi do all of the desktop
+# transcript glyph shaping. At opt-level = 0 they are 15-40x slower, which made
+# scrolling real (emoji/markdown-heavy) transcripts janky in dev/selfdev builds
+# even though the production release build was smooth. These are stable
+# third-party crates that almost never recompile, so pinning them to opt-level =
+# 3 costs a one-time compile and is then reused across every iterative jcode
+# rebuild (same rationale as jcode-tui-anim above). It does NOT slow down
+# recompiles of jcode's own crates.
+[profile.dev.package.cosmic-text]
+opt-level = 3
+[profile.selfdev.package.cosmic-text]
+opt-level = 3
+[profile.test.package.cosmic-text]
+opt-level = 3
+
+[profile.dev.package.rustybuzz]
+opt-level = 3
+[profile.selfdev.package.rustybuzz]
+opt-level = 3
+[profile.test.package.rustybuzz]
+opt-level = 3
+
+[profile.dev.package.ttf-parser]
+opt-level = 3
+[profile.selfdev.package.ttf-parser]
+opt-level = 3
+[profile.test.package.ttf-parser]
+opt-level = 3
+
+[profile.dev.package.swash]
+opt-level = 3
+[profile.selfdev.package.swash]
+opt-level = 3
+[profile.test.package.swash]
+opt-level = 3
+
+[profile.dev.package.yazi]
+opt-level = 3
+[profile.selfdev.package.yazi]
+opt-level = 3
+[profile.test.package.yazi]
+opt-level = 3
+
+[profile.dev.package.fontdb]
+opt-level = 3
+[profile.selfdev.package.fontdb]
+opt-level = 3
+[profile.test.package.fontdb]
+opt-level = 3
+
 [profile.test]
 debug = 0
 incremental = true

From da7c3943a324eeac5998426f53506f5c86ea49bd Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:16:27 -0700
Subject: [PATCH 36/57] desktop: add --real-transcript-action-benchmark for
 multi-action profiling

Profiles a realistic mix of user actions (smooth/whole-line scroll, selection
drag, composer typing, model-picker and session-switcher toggles, window resize,
and streaming growth) against the user's largest real on-disk transcripts, each
phase measured as per-frame CPU p50/p95/p99/max with a 120fps budget check.
Complements --real-transcript-scroll-benchmark for broad interaction coverage.
---
 crates/jcode-desktop/src/desktop_benchmark.rs |  16 +
 crates/jcode-desktop/src/main.rs              | 460 ++++++++++++++++++
 2 files changed, 476 insertions(+)

diff --git a/crates/jcode-desktop/src/desktop_benchmark.rs b/crates/jcode-desktop/src/desktop_benchmark.rs
index 4e6095f25..a85b565be 100644
--- a/crates/jcode-desktop/src/desktop_benchmark.rs
+++ b/crates/jcode-desktop/src/desktop_benchmark.rs
@@ -59,6 +59,22 @@ pub(super) fn real_transcript_scroll_benchmark_frames(args: &[String]) -> Option
     })
 }
 
+/// Parse `--real-transcript-action-benchmark[=N]`, the per-phase frame count for
+/// the multi-action interaction benchmark run against real on-disk transcripts.
+pub(super) fn real_transcript_action_benchmark_frames(args: &[String]) -> Option<usize> {
+    args.iter().enumerate().find_map(|(index, arg)| {
+        arg.strip_prefix("--real-transcript-action-benchmark=")
+            .and_then(|value| value.parse::<usize>().ok())
+            .or_else(|| {
+                (arg == "--real-transcript-action-benchmark").then(|| {
+                    args.get(index + 1)
+                        .and_then(|value| value.parse::<usize>().ok())
+                        .unwrap_or(400)
+                })
+            })
+    })
+}
+
 pub(super) fn benchmark_phase(
     mut frames: usize,
     mut run_frame: impl FnMut(usize) -> usize,
diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index 8d2d5c487..f2730aaad 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -719,6 +719,9 @@ async fn run() -> Result<()> {
     if let Some(frames) = real_transcript_scroll_benchmark_frames(&args) {
         return run_real_transcript_scroll_benchmark(frames);
     }
+    if let Some(frames) = real_transcript_action_benchmark_frames(&args) {
+        return run_real_transcript_action_benchmark(frames);
+    }
     if let Some(output_dir) = hero_screenshot_capture_dir(&args) {
         return run_hero_screenshot_capture(&output_dir).await;
     }
@@ -2273,6 +2276,7 @@ const DESKTOP_HELP_LINES: &[&str] = &[
     "  --resize-render-benchmark[N]  Print CPU resize/render benchmark JSON and exit",
     "  --scroll-render-benchmark[N]  Print CPU scroll/render benchmark JSON and exit",
     "  --real-transcript-scroll-benchmark[N]  Profile scrolling against your real on-disk transcripts and exit",
+    "  --real-transcript-action-benchmark[N]  Profile mixed user actions (scroll/resize/typing/pickers/selection/streaming) on real transcripts and exit",
     "  --stream-e2e-benchmark[N]     Print stream event-to-paint guardrail JSON and exit",
     "  --headless-chat-smoke <MSG>  Run a hidden backend smoke test and print JSON events",
     "  --headless-chat-smoke=<MSG>  Same as above",
@@ -5516,6 +5520,462 @@ fn benchmark_real_transcript_scroll(
     }
 }
 
+/// Profile a realistic mix of user *actions* (not just scrolling) against the
+/// user's largest real on-disk transcripts. Each action phase is measured
+/// separately as per-frame CPU samples and reported as p50/p95/p99/max, plus a
+/// `passes_120fps_cpu_budget` flag against the existing frame budget. This is the
+/// broad interaction-coverage companion to `--real-transcript-scroll-benchmark`.
+fn run_real_transcript_action_benchmark(frames: usize) -> Result<()> {
+    let frames = frames.max(1);
+    let size = PhysicalSize::new(1200, 760);
+    let transcripts = session_data::load_largest_real_transcripts(8, 24)
+        .context("failed to load real transcripts for action benchmark")?;
+
+    if transcripts.is_empty() {
+        println!(
+            "{}",
+            serde_json::to_string_pretty(&serde_json::json!({
+                "frames": frames,
+                "sessions": [],
+                "note": "no real transcripts with >=24 messages found under ~/.jcode/sessions",
+            }))?
+        );
+        return Ok(());
+    }
+
+    let budget_ms = duration_ms(DESKTOP_120FPS_FRAME_BUDGET);
+    // phase name -> all per-frame samples across every session
+    let mut phase_samples: std::collections::BTreeMap<&'static str, Vec<f64>> =
+        std::collections::BTreeMap::new();
+    let mut session_json = Vec::new();
+
+    for transcript in &transcripts {
+        let phases = benchmark_real_transcript_actions(transcript, size, frames);
+        let phase_json = phases
+            .iter()
+            .map(|(name, samples)| {
+                phase_samples
+                    .entry(name)
+                    .or_default()
+                    .extend_from_slice(samples);
+                action_phase_json(name, samples, budget_ms)
+            })
+            .collect::<Vec<_>>();
+        session_json.push(serde_json::json!({
+            "session_id": transcript.session_id,
+            "title": transcript.title,
+            "message_count": transcript.messages.len(),
+            "phases": phase_json,
+        }));
+    }
+
+    let mut aggregate = Vec::new();
+    let mut slowest_phase = String::new();
+    let mut slowest_p99 = 0.0_f64;
+    let mut all_pass = true;
+    for (name, samples) in &phase_samples {
+        let value = action_phase_json(name, samples, budget_ms);
+        let p99 = percentile_ms(samples, 0.99);
+        if p99 > slowest_p99 {
+            slowest_p99 = p99;
+            slowest_phase = (*name).to_string();
+        }
+        if p99 > budget_ms {
+            all_pass = false;
+        }
+        aggregate.push(value);
+    }
+
+    println!(
+        "{}",
+        serde_json::to_string_pretty(&serde_json::json!({
+            "frames_per_phase": frames,
+            "size": { "width": size.width, "height": size.height },
+            "target_frame_budget_ms": budget_ms,
+            "sessions_profiled": transcripts.len(),
+            "aggregate_phases": aggregate,
+            "slowest_phase": { "name": slowest_phase, "p99_ms": slowest_p99 },
+            "passes_120fps_cpu_budget": all_pass,
+            "sessions": session_json,
+        }))?
+    );
+    Ok(())
+}
+
+fn action_phase_json(name: &str, samples: &[f64], budget_ms: f64) -> serde_json::Value {
+    let frames = samples.len().max(1);
+    let total_ms = samples.iter().sum::<f64>();
+    let p99 = percentile_ms(samples, 0.99);
+    serde_json::json!({
+        "name": name,
+        "frames": samples.len(),
+        "mean_ms": total_ms / frames as f64,
+        "p50_ms": percentile_ms(samples, 0.50),
+        "p95_ms": percentile_ms(samples, 0.95),
+        "p99_ms": p99,
+        "max_ms": max_sample_ms(samples),
+        "passes_budget": p99 <= budget_ms,
+    })
+}
+
+/// Run every simulated action phase for one transcript, returning per-phase
+/// per-frame CPU samples (milliseconds). Each phase reproduces the production
+/// render path: cached/wrapped body lines, viewport extraction, a windowed body
+/// text buffer that is reused across frames, text areas, and primitive geometry.
+fn benchmark_real_transcript_actions(
+    transcript: &session_data::BenchmarkTranscript,
+    size: PhysicalSize<u32>,
+    frames: usize,
+) -> Vec<(&'static str, Vec<f64>)> {
+    let base_app = real_transcript_scroll_app(transcript);
+    let body_lines = single_session_rendered_body_lines_for_tick(&base_app, size, 0);
+    let total_lines = body_lines.len();
+    let max_scroll = single_session_body_scroll_metrics_for_total_lines(&base_app, size, total_lines)
+        .map(|metrics| metrics.max_scroll_lines)
+        .unwrap_or(0)
+        .max(1);
+
+    let mut phases: Vec<(&'static str, Vec<f64>)> = Vec::new();
+
+    // 1. Smooth (fractional) scroll: scroll position advances a whole line per
+    //    frame with a fractional offset, the common trackpad-scroll case.
+    phases.push((
+        "smooth_scroll",
+        action_windowed_render_phase(&base_app, &body_lines, size, frames, |app, frame| {
+            let phase = frame % (max_scroll * 2);
+            let target = if phase <= max_scroll {
+                phase
+            } else {
+                max_scroll * 2 - phase
+            };
+            app.body_scroll_lines = target as f32;
+            benchmark_smooth_scroll_lines(frame)
+        }),
+    ));
+
+    // 2. Whole-line scroll: integer line steps, no fractional offset.
+    phases.push((
+        "whole_line_scroll",
+        action_windowed_render_phase(&base_app, &body_lines, size, frames, |app, frame| {
+            let phase = frame % (max_scroll * 2);
+            let target = if phase <= max_scroll {
+                phase
+            } else {
+                max_scroll * 2 - phase
+            };
+            app.body_scroll_lines = target as f32;
+            0.0
+        }),
+    ));
+
+    // 3. Selection drag across the visible transcript while parked mid-scroll.
+    {
+        let mut app = base_app.clone();
+        app.body_scroll_lines = (max_scroll / 2) as f32;
+        let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines);
+        let visible = single_session_visible_body(&app, size);
+        app.begin_selection(SelectionPoint { line: 0, column: 0 });
+        let mut font_system = benchmark_font_system();
+        let (mut buffers, mut window_start, mut window_end, mut last_start) =
+            action_prime_window(&app, &body_lines, size, &mut font_system);
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            let line = frame % viewport.lines.len().max(1);
+            let column = (frame * 7) % 80;
+            app.update_selection(SelectionPoint { line, column });
+            let _ = &visible;
+            action_render_window(
+                &app,
+                &body_lines,
+                size,
+                frame as u64,
+                0.0,
+                &mut font_system,
+                &mut buffers,
+                &mut window_start,
+                &mut window_end,
+                &mut last_start,
+            )
+        });
+        phases.push(("selection_drag", samples));
+    }
+
+    // 4. Typing in the composer while parked at the bottom of the transcript.
+    {
+        let mut app = base_app.clone();
+        app.scroll_body_to_bottom();
+        app.draft.clear();
+        app.draft_cursor = 0;
+        let mut font_system = benchmark_font_system();
+        let (mut buffers, mut window_start, mut window_end, mut last_start) =
+            action_prime_window(&app, &body_lines, size, &mut font_system);
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            app.draft.push(benchmark_typing_char(frame));
+            app.draft_cursor = app.draft.len();
+            action_render_window(
+                &app,
+                &body_lines,
+                size,
+                frame as u64,
+                0.0,
+                &mut font_system,
+                &mut buffers,
+                &mut window_start,
+                &mut window_end,
+                &mut last_start,
+            )
+        });
+        phases.push(("composer_typing", samples));
+    }
+
+    // 5. Model picker open/close toggling over the transcript: every other frame
+    //    opens the inline picker card, invalidating the inline-widget geometry.
+    {
+        let mut app = base_app.clone();
+        app.body_scroll_lines = (max_scroll / 3) as f32;
+        let mut font_system = benchmark_font_system();
+        let (mut buffers, mut window_start, mut window_end, mut last_start) =
+            action_prime_window(&app, &body_lines, size, &mut font_system);
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            app.model_picker.open = frame % 2 == 0;
+            app.model_picker.loading = app.model_picker.open;
+            action_render_window(
+                &app,
+                &body_lines,
+                size,
+                frame as u64,
+                0.0,
+                &mut font_system,
+                &mut buffers,
+                &mut window_start,
+                &mut window_end,
+                &mut last_start,
+            )
+        });
+        app.model_picker.open = false;
+        phases.push(("model_picker_toggle", samples));
+    }
+
+    // 6. Session switcher open/close toggling over the transcript.
+    {
+        let mut app = base_app.clone();
+        app.body_scroll_lines = (max_scroll / 3) as f32;
+        let mut font_system = benchmark_font_system();
+        let (mut buffers, mut window_start, mut window_end, mut last_start) =
+            action_prime_window(&app, &body_lines, size, &mut font_system);
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            app.session_switcher.open = frame % 2 == 0;
+            action_render_window(
+                &app,
+                &body_lines,
+                size,
+                frame as u64,
+                0.0,
+                &mut font_system,
+                &mut buffers,
+                &mut window_start,
+                &mut window_end,
+                &mut last_start,
+            )
+        });
+        app.session_switcher.open = false;
+        phases.push(("session_switcher_toggle", samples));
+    }
+
+    // 7. Window resize sweep: each frame is a different surface size, forcing a
+    //    full body relayout + window rebuild (the worst non-scroll case).
+    {
+        let app = base_app.clone();
+        let mut font_system = benchmark_font_system();
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            let resize = benchmark_resize_size(frame);
+            let lines = single_session_rendered_body_lines_for_tick(&app, resize, 0);
+            let viewport = single_session_body_viewport_from_lines(&app, resize, 0.0, &lines);
+            let key =
+                single_session_text_key_for_tick_with_rendered_body(&app, resize, 0, 0.0, &lines);
+            let mut buffers = single_session_text_buffers_from_key(&key, resize, &mut font_system);
+            let (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
+            if let Some(body_buffer) = buffers.get_mut(1) {
+                *body_buffer = single_session_body_text_buffer_from_lines(
+                    &mut font_system,
+                    &lines[window_start..window_end],
+                    resize,
+                    app.text_scale(),
+                );
+            }
+            let areas = single_session_text_areas_for_app_with_cached_body_viewport(
+                &app, &buffers, resize, 0.0, viewport,
+            );
+            let vertices = build_single_session_vertices_with_cached_body(
+                &app, resize, 0.0, frame as u64, 0.0, 1.0, &lines,
+            );
+            buffers.len() ^ areas.len() ^ vertices.len()
+        });
+        phases.push(("window_resize", samples));
+    }
+
+    // 8. Streaming response growth while scrolled near the bottom: a synthetic
+    //    assistant reply grows by a chunk each frame, the live-streaming case.
+    {
+        let mut app = base_app.clone();
+        app.scroll_body_to_bottom();
+        let mut font_system = benchmark_font_system();
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            app.streaming_response.push_str(
+                "Streaming update chunk with `inline code` and prose that wraps across lines. ",
+            );
+            if frame % 9 == 0 {
+                app.streaming_response.push('\n');
+            }
+            let lines = single_session_rendered_body_lines_for_tick(&app, size, frame as u64);
+            let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &lines);
+            let key =
+                single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &lines);
+            let mut buffers = single_session_text_buffers_from_key(&key, size, &mut font_system);
+            let (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
+            if let Some(body_buffer) = buffers.get_mut(1) {
+                *body_buffer = single_session_body_text_buffer_from_lines(
+                    &mut font_system,
+                    &lines[window_start..window_end],
+                    size,
+                    app.text_scale(),
+                );
+            }
+            let areas = single_session_text_areas_for_app_with_cached_body_viewport(
+                &app, &buffers, size, 0.0, viewport,
+            );
+            let vertices = build_single_session_vertices_with_cached_body(
+                &app, size, 0.0, frame as u64, 0.0, 1.0, &lines,
+            );
+            buffers.len() ^ areas.len() ^ vertices.len()
+        });
+        phases.push(("streaming_growth", samples));
+    }
+
+    phases
+}
+
+/// Prime a reusable text-buffer set and its windowed body buffer for `app`,
+/// matching how the production renderer seeds the sliding window. Returns the
+/// buffers plus the current (window_start, window_end, last_scroll_start).
+fn action_prime_window(
+    app: &SingleSessionApp,
+    body_lines: &[SingleSessionStyledLine],
+    size: PhysicalSize<u32>,
+    font_system: &mut FontSystem,
+) -> (Vec<Buffer>, usize, usize, usize) {
+    let viewport = single_session_body_viewport_from_lines(app, size, 0.0, body_lines);
+    let key = single_session_text_key_for_tick_with_rendered_body(app, size, 0, 0.0, body_lines);
+    let mut buffers = single_session_text_buffers_from_key(&key, size, font_system);
+    let (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
+    if let Some(body_buffer) = buffers.get_mut(1) {
+        *body_buffer = single_session_body_text_buffer_from_lines(
+            font_system,
+            &body_lines[window_start..window_end],
+            size,
+            app.text_scale(),
+        );
+        body_buffer.set_scroll(
+            viewport
+                .start_line
+                .saturating_sub(window_start)
+                .min(i32::MAX as usize) as i32,
+        );
+    }
+    (buffers, window_start, window_end, viewport.start_line)
+}
+
+/// Render one frame through the production windowed path, reusing the body text
+/// buffer and only rebuilding/rescrolling the window when the viewport leaves it.
+#[allow(clippy::too_many_arguments)]
+fn action_render_window(
+    app: &SingleSessionApp,
+    body_lines: &[SingleSessionStyledLine],
+    size: PhysicalSize<u32>,
+    tick: u64,
+    smooth_scroll_lines: f32,
+    font_system: &mut FontSystem,
+    buffers: &mut Vec<Buffer>,
+    window_start: &mut usize,
+    window_end: &mut usize,
+    last_scroll_start: &mut usize,
+) -> usize {
+    let viewport =
+        single_session_body_viewport_from_lines(app, size, smooth_scroll_lines, body_lines);
+    if !single_session_body_text_window_contains(*window_start, *window_end, &viewport) {
+        let (start, end) = single_session_body_text_window_bounds(&viewport);
+        *window_start = start;
+        *window_end = end;
+        if let Some(body_buffer) = buffers.get_mut(1) {
+            *body_buffer = single_session_body_text_buffer_from_lines(
+                font_system,
+                &body_lines[start..end],
+                size,
+                app.text_scale(),
+            );
+        }
+        *last_scroll_start = usize::MAX;
+    }
+    if viewport.start_line != *last_scroll_start {
+        if let Some(body_buffer) = buffers.get_mut(1) {
+            body_buffer.set_scroll(
+                viewport
+                    .start_line
+                    .saturating_sub(*window_start)
+                    .min(i32::MAX as usize) as i32,
+            );
+        }
+        *last_scroll_start = viewport.start_line;
+    }
+    let areas = single_session_text_areas_for_app_with_cached_body_viewport(
+        app,
+        buffers,
+        size,
+        smooth_scroll_lines,
+        viewport,
+    );
+    let vertices = build_single_session_vertices_with_cached_body(
+        app,
+        size,
+        0.0,
+        tick,
+        smooth_scroll_lines,
+        1.0,
+        body_lines,
+    );
+    buffers.len() ^ areas.len() ^ vertices.len()
+}
+
+/// Drive a windowed-scroll render phase, calling `prepare` each frame to mutate
+/// the app's scroll position (and return any fractional smooth-scroll offset).
+fn action_windowed_render_phase(
+    base_app: &SingleSessionApp,
+    body_lines: &[SingleSessionStyledLine],
+    size: PhysicalSize<u32>,
+    frames: usize,
+    mut prepare: impl FnMut(&mut SingleSessionApp, usize) -> f32,
+) -> Vec<f64> {
+    let mut app = base_app.clone();
+    let mut font_system = benchmark_font_system();
+    let (mut buffers, mut window_start, mut window_end, mut last_start) =
+        action_prime_window(&app, body_lines, size, &mut font_system);
+    let (samples, _) = benchmark_frame_samples(frames, |frame| {
+        let smooth = prepare(&mut app, frame);
+        action_render_window(
+            &app,
+            body_lines,
+            size,
+            frame as u64,
+            smooth,
+            &mut font_system,
+            &mut buffers,
+            &mut window_start,
+            &mut window_end,
+            &mut last_start,
+        )
+    });
+    samples
+}
+
 fn run_stream_e2e_benchmark(raw_events: usize) -> Result<()> {
     let result = run_desktop_stream_end_to_end_benchmark(raw_events);
     println!(

From 8e86d865c02fa1aa283326729266a69288e35ded Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:21:27 -0700
Subject: [PATCH 37/57] desktop: make action-benchmark streaming phase mirror
 production incremental wrap

The streaming_growth phase re-wrapped the entire transcript every frame, which
production avoids by caching the wrapped static base and only appending the
wrapped streaming tail. Mirror that here: wrap the static body once, then per
frame truncate to the static base and append the tail. Drops measured
streaming_growth p99 ~72ms -> ~18ms, reflecting the real production path.
---
 crates/jcode-desktop/src/main.rs | 40 +++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index f2730aaad..75cf62e1c 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -5815,10 +5815,21 @@ fn benchmark_real_transcript_actions(
 
     // 8. Streaming response growth while scrolled near the bottom: a synthetic
     //    assistant reply grows by a chunk each frame, the live-streaming case.
+    //
+    //    This mirrors the production renderer's incremental path
+    //    (`cached_single_session_body_lines` for the streaming branch): the
+    //    static transcript body is wrapped ONCE, then each frame only truncates
+    //    back to the static base and appends the wrapped streaming tail, rather
+    //    than re-wrapping the whole transcript every frame.
     {
         let mut app = base_app.clone();
         app.scroll_body_to_bottom();
+        app.streaming_response.push_str("Streaming response starting. ");
         let mut font_system = benchmark_font_system();
+        let static_base = single_session_rendered_static_body_lines_for_streaming(&app, size, 0)
+            .unwrap_or_else(|| single_session_rendered_body_lines_for_tick(&app, size, 0));
+        let static_len = static_base.len();
+        let mut stream_lines = static_base.clone();
         let (samples, _) = benchmark_frame_samples(frames, |frame| {
             app.streaming_response.push_str(
                 "Streaming update chunk with `inline code` and prose that wraps across lines. ",
@@ -5826,16 +5837,27 @@ fn benchmark_real_transcript_actions(
             if frame % 9 == 0 {
                 app.streaming_response.push('\n');
             }
-            let lines = single_session_rendered_body_lines_for_tick(&app, size, frame as u64);
-            let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &lines);
-            let key =
-                single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &lines);
+            // Incremental: reuse the wrapped static base, only re-wrap the tail.
+            stream_lines.truncate(static_len);
+            append_single_session_streaming_response_rendered_body_lines(
+                &app,
+                size,
+                &mut stream_lines,
+            );
+            let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &stream_lines);
+            let key = single_session_text_key_for_tick_with_rendered_body(
+                &app,
+                size,
+                0,
+                0.0,
+                &stream_lines,
+            );
             let mut buffers = single_session_text_buffers_from_key(&key, size, &mut font_system);
             let (window_start, window_end) = single_session_body_text_window_bounds(&viewport);
             if let Some(body_buffer) = buffers.get_mut(1) {
                 *body_buffer = single_session_body_text_buffer_from_lines(
                     &mut font_system,
-                    &lines[window_start..window_end],
+                    &stream_lines[window_start..window_end],
                     size,
                     app.text_scale(),
                 );
@@ -5844,7 +5866,13 @@ fn benchmark_real_transcript_actions(
                 &app, &buffers, size, 0.0, viewport,
             );
             let vertices = build_single_session_vertices_with_cached_body(
-                &app, size, 0.0, frame as u64, 0.0, 1.0, &lines,
+                &app,
+                size,
+                0.0,
+                frame as u64,
+                0.0,
+                1.0,
+                &stream_lines,
             );
             buffers.len() ^ areas.len() ^ vertices.len()
         });

From 4c82a6b153868eef91dd28e094ab973b0012045f Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 15:23:10 -0700
Subject: [PATCH 38/57] desktop: make action-benchmark resize phase reuse
 cached raw styled lines

Production caches the raw (unwrapped) styled body lines across resizes and only
re-runs the width-dependent wrap, via single_session_rendered_body_lines_from_raw_ref.
Mirror that in the resize phase instead of regenerating raw markdown lines every
frame. Measured window_resize p99 ~64ms -> ~28ms, matching the real path.
---
 crates/jcode-desktop/src/main.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index 75cf62e1c..703a68995 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -5782,13 +5782,18 @@ fn benchmark_real_transcript_actions(
     }
 
     // 7. Window resize sweep: each frame is a different surface size, forcing a
-    //    full body relayout + window rebuild (the worst non-scroll case).
+    //    body re-wrap + window rebuild (the worst non-scroll case).
+    //
+    //    Mirrors production (`cached_single_session_body_lines` non-streaming
+    //    branch): the raw styled lines (markdown parse) are generated ONCE and
+    //    cached across sizes; only the width-dependent wrap re-runs per resize.
     {
         let app = base_app.clone();
+        let raw_lines = app.body_styled_lines_for_tick(0);
         let mut font_system = benchmark_font_system();
         let (samples, _) = benchmark_frame_samples(frames, |frame| {
             let resize = benchmark_resize_size(frame);
-            let lines = single_session_rendered_body_lines_for_tick(&app, resize, 0);
+            let lines = single_session_rendered_body_lines_from_raw_ref(&app, resize, &raw_lines);
             let viewport = single_session_body_viewport_from_lines(&app, resize, 0.0, &lines);
             let key =
                 single_session_text_key_for_tick_with_rendered_body(&app, resize, 0, 0.0, &lines);

From 59362bd0f837327b8f7af75ab6c357d9278b77cd Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:06:22 -0700
Subject: [PATCH 39/57] perf(agent): scan only new delta for wrapped-tool
 markers during streaming

The streaming text loop re-ran text_content.find(...) over the ENTIRE
accumulated response on every TextDelta until a wrapped-tool-call marker was
found. For normal answers (no marker) that scanned everything every token:
O(response) per delta, O(response^2) over a full streamed answer.

Scan only the newly appended delta plus a short overlap window (so a marker
straddling the append boundary is still detected), giving O(delta) per token.
Add unit tests asserting equivalence to a full rescan across chunk sizes,
unicode, and the boundary-straddle case.
---
 .../src/agent/turn_streaming_mpsc.rs          | 120 +++++++++++++++++-
 1 file changed, 117 insertions(+), 3 deletions(-)

diff --git a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
index fc13e3975..8671b00fe 100644
--- a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
+++ b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
@@ -1,5 +1,47 @@
 use super::*;
 
+/// Largest byte index `<= index` that is a UTF-8 char boundary in `text`.
+/// Equivalent to the unstable `str::floor_char_boundary`, reimplemented so the
+/// incremental marker scan can clamp its scan-window start onto a valid
+/// boundary without re-scanning the whole accumulated response.
+fn floor_char_boundary(text: &str, index: usize) -> usize {
+    if index >= text.len() {
+        return text.len();
+    }
+    let mut boundary = index;
+    while boundary > 0 && !text.is_char_boundary(boundary) {
+        boundary -= 1;
+    }
+    boundary
+}
+
+/// The wrapped-tool-call markers emitted by some models inside plain text.
+const WRAP_TOOL_MARKERS: [&str; 2] = ["to=functions.", "+#+#"];
+
+/// Find the first wrapped-tool-call marker in `accumulated`, scanning only the
+/// newly appended `delta` plus a short overlap from the previous tail (so a
+/// marker straddling the append boundary is still found).
+///
+/// This avoids re-scanning the entire accumulated response on every streamed
+/// delta, which was O(response) per token and O(response^2) over a full answer.
+fn find_wrap_marker_incremental(accumulated: &str, appended_len: usize) -> Option<usize> {
+    let max_marker_len = WRAP_TOOL_MARKERS
+        .iter()
+        .map(|marker| marker.len())
+        .max()
+        .unwrap_or(0);
+    let scan_start = accumulated
+        .len()
+        .saturating_sub(appended_len + max_marker_len.saturating_sub(1));
+    let scan_start = floor_char_boundary(accumulated, scan_start);
+    let window = &accumulated[scan_start..];
+    WRAP_TOOL_MARKERS
+        .iter()
+        .filter_map(|marker| window.find(marker))
+        .min()
+        .map(|rel_idx| scan_start + rel_idx)
+}
+
 fn reload_interrupted_tool_result(tc: &ToolCall, elapsed_secs: f64) -> (String, bool) {
     if tc.name == "selfdev" {
         return ("Reload initiated. Process restarting...".to_string(), false);
@@ -401,9 +443,11 @@ impl Agent {
                         }
                         text_content.push_str(&text);
                         if !text_wrapped_detected {
-                            if let Some(marker_idx) = text_content
-                                .find("to=functions.")
-                                .or_else(|| text_content.find("+#+#"))
+                            // Scan only the new delta (plus a short overlap for
+                            // markers straddling the boundary) instead of the
+                            // whole accumulated response on every token.
+                            if let Some(marker_idx) =
+                                find_wrap_marker_incremental(&text_content, text.len())
                             {
                                 text_wrapped_detected = true;
                                 let clean_prefix =
@@ -1332,4 +1376,74 @@ mod tests {
         assert!(is_error);
         assert!(message.contains("interrupted by server reload"));
     }
+
+    /// Reference O(n) full scan, preserving the original precedence: the
+    /// `to=functions.` marker is checked before `+#+#`.
+    fn find_wrap_marker_full(text: &str) -> Option<usize> {
+        text.find("to=functions.").or_else(|| text.find("+#+#"))
+    }
+
+    /// Simulate streaming `full` in arbitrary deltas and assert the incremental
+    /// scan finds the first marker position, matching a full rescan each step.
+    fn assert_incremental_matches(full: &str, chunk: usize) {
+        let mut acc = String::new();
+        let mut incremental_hit: Option<usize> = None;
+        let bytes = full.as_bytes();
+        let mut i = 0;
+        while i < bytes.len() {
+            let mut end = (i + chunk).min(bytes.len());
+            while end < bytes.len() && !full.is_char_boundary(end) {
+                end += 1;
+            }
+            let delta = &full[i..end];
+            acc.push_str(delta);
+            if incremental_hit.is_none() {
+                incremental_hit = find_wrap_marker_incremental(&acc, delta.len());
+            }
+            i = end;
+        }
+        // The earliest of either marker in the full text.
+        let fn_pos = full.find("to=functions.");
+        let plus_pos = full.find("+#+#");
+        let expected = match (fn_pos, plus_pos) {
+            (Some(a), Some(b)) => Some(a.min(b)),
+            (a, b) => a.or(b),
+        };
+        assert_eq!(
+            incremental_hit, expected,
+            "incremental scan mismatch for {full:?} chunk={chunk}"
+        );
+    }
+
+    #[test]
+    fn wrap_marker_incremental_detects_markers_across_chunk_sizes() {
+        let cases = [
+            "plain answer with no marker at all",
+            "answer then to=functions.foo({})",
+            "answer then +#+# wrapped",
+            "prefix +#+# and later to=functions.bar",
+            "unicode 🔄 résumé then to=functions.baz",
+            "",
+            "to=functions.first",
+            "+#+#",
+        ];
+        for case in cases {
+            for chunk in [1usize, 2, 3, 5, 7, 100] {
+                assert_incremental_matches(case, chunk);
+            }
+        }
+    }
+
+    #[test]
+    fn wrap_marker_incremental_finds_marker_straddling_delta_boundary() {
+        // Feed "to=functions." split right in the middle so the marker only
+        // exists once both halves are appended; the overlap window must catch it.
+        let mut acc = String::new();
+        acc.push_str("answer to=fun");
+        assert_eq!(find_wrap_marker_incremental(&acc, "answer to=fun".len()), None);
+        acc.push_str("ctions.tool");
+        let hit = find_wrap_marker_incremental(&acc, "ctions.tool".len());
+        assert_eq!(hit, find_wrap_marker_full(&acc));
+        assert_eq!(hit, Some("answer ".len()));
+    }
 }

From ba5b62effa03eed42c42ebe646f74f18700e13ed Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:09:52 -0700
Subject: [PATCH 40/57] perf(openrouter): drain consumed SSE prefix instead of
 reallocating buffer

parse_next_event reassigned self.buffer = self.buffer[pos+2..].to_string() for
every SSE event, copying and reallocating the entire remaining buffer each time.
When one network chunk batches many SSE events this is O(buffer^2). Use
String::drain(..pos+2) to remove the consumed prefix in place. Pure
behavior-preserving refactor.
---
 crates/jcode-base/src/provider/openrouter_sse_stream.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/crates/jcode-base/src/provider/openrouter_sse_stream.rs b/crates/jcode-base/src/provider/openrouter_sse_stream.rs
index 7e3f6ddcd..336a2dadd 100644
--- a/crates/jcode-base/src/provider/openrouter_sse_stream.rs
+++ b/crates/jcode-base/src/provider/openrouter_sse_stream.rs
@@ -453,8 +453,13 @@ impl OpenRouterStream {
         }
 
         while let Some(pos) = self.buffer.find("\n\n") {
+            // Extract this event and remove it (plus the "\n\n" separator) in
+            // place. Reassigning `self.buffer = self.buffer[pos + 2..].to_string()`
+            // copied and reallocated the entire remaining buffer on every event,
+            // which is O(buffer^2) when one network chunk batches many SSE
+            // events. `drain` removes the consumed prefix without reallocating.
             let event_str = self.buffer[..pos].to_string();
-            self.buffer = self.buffer[pos + 2..].to_string();
+            self.buffer.drain(..pos + 2);
 
             // Parse SSE event
             let mut data = None;

From f7dc370f6762813c50437635db85356f3ce233d0 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:13:17 -0700
Subject: [PATCH 41/57] perf(tui): avoid rescanning transcript prefix in
 incremental body prep

prepare_body_incremental recounted user messages in messages[..prev_msg_count]
on every incremental append to seed prompt_num. Appending one message at a time
over a long session made that cumulative O(n^2). prev.user_prompt_texts is
extended in lockstep with each rendered user message, so its length already is
the prior user-prompt count; use it directly for O(1) seeding.
---
 crates/jcode-tui/src/tui/ui_prepare.rs | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs
index 1254cbb49..5fc6f4dd2 100644
--- a/crates/jcode-tui/src/tui/ui_prepare.rs
+++ b/crates/jcode-tui/src/tui/ui_prepare.rs
@@ -751,10 +751,13 @@ pub(super) fn prepare_body_incremental(
     let pending_count = input_ui::pending_prompt_count(app);
     let prompt_number_offset = app.compacted_hidden_user_prompts();
 
-    let mut prompt_num = messages[..prev_msg_count]
-        .iter()
-        .filter(|m| m.effective_role() == "user")
-        .count();
+    // The number of user prompts already rendered equals the number of cached
+    // user prompt texts. Re-counting `messages[..prev_msg_count]` here on every
+    // incremental append rescans the whole prior transcript, making a session
+    // that grows one message at a time O(n^2). `prev.user_prompt_texts` is
+    // extended in lockstep with each rendered user message, so its length is the
+    // exact prior prompt count.
+    let mut prompt_num = prev.user_prompt_texts.len();
 
     let mut new_lines: Vec<Line> = Vec::new();
     let mut new_user_line_indices: Vec<usize> = Vec::new();

From 74b88e0858b12ed6548129c164f0b72c48182213 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:16:15 -0700
Subject: [PATCH 42/57] perf(session-picker): partition filtered refs by group
 in one pass

rebuild_items scanned every filtered session ref once per server group to
collect that group's sessions: O(groups * filtered_refs). With many remote
server groups and many sessions this scaled poorly on every search keystroke.
Bucket the filtered refs by group_idx in a single O(filtered_refs) pass, then
emit groups in order (O(groups)). Behavior (grouping, ordering, saved-id
filtering) is preserved.
---
 .../src/tui/session_picker/filter.rs          | 72 +++++++++----------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs
index 82883b2f4..5cc1e7602 100644
--- a/crates/jcode-tui/src/tui/session_picker/filter.rs
+++ b/crates/jcode-tui/src/tui/session_picker/filter.rs
@@ -223,43 +223,41 @@ impl SessionPicker {
         }
 
         if !self.all_server_groups.is_empty() {
-            let grouped_sections: Vec<(String, String, String, Vec<SessionRef>)> = self
-                .all_server_groups
-                .iter()
-                .enumerate()
-                .filter_map(|(group_idx, group)| {
-                    let visible: Vec<SessionRef> = filtered_refs
-                        .iter()
-                        .copied()
-                        .filter(|session_ref| match session_ref {
-                            SessionRef::Group {
-                                group_idx: ref_group_idx,
-                                session_idx,
-                            } => {
-                                if *ref_group_idx != group_idx {
-                                    return false;
-                                }
-                                group
-                                    .sessions
-                                    .get(*session_idx)
-                                    .is_some_and(|session| !saved_ids.contains(&session.id))
-                            }
-                            _ => false,
-                        })
-                        .collect();
-
-                    if visible.is_empty() {
-                        None
-                    } else {
-                        Some((
-                            group.name.clone(),
-                            group.icon.clone(),
-                            group.version.clone(),
-                            visible,
-                        ))
-                    }
-                })
-                .collect();
+            // Partition the filtered refs by group in a single pass instead of
+            // rescanning every filtered ref once per group. The previous code
+            // was O(groups * filtered_refs); with many remote/server groups and
+            // many sessions this scaled poorly on every search keystroke. One
+            // bucketing pass is O(filtered_refs), then emitting is O(groups).
+            let mut group_buckets: Vec<Vec<SessionRef>> =
+                vec![Vec::new(); self.all_server_groups.len()];
+            for session_ref in filtered_refs.iter().copied() {
+                if let SessionRef::Group {
+                    group_idx,
+                    session_idx,
+                } = session_ref
+                    && let Some(group) = self.all_server_groups.get(group_idx)
+                    && group
+                        .sessions
+                        .get(session_idx)
+                        .is_some_and(|session| !saved_ids.contains(&session.id))
+                {
+                    group_buckets[group_idx].push(session_ref);
+                }
+            }
+
+            let mut grouped_sections: Vec<(String, String, String, Vec<SessionRef>)> = Vec::new();
+            for (group_idx, group) in self.all_server_groups.iter().enumerate() {
+                let visible = std::mem::take(&mut group_buckets[group_idx]);
+                if visible.is_empty() {
+                    continue;
+                }
+                grouped_sections.push((
+                    group.name.clone(),
+                    group.icon.clone(),
+                    group.version.clone(),
+                    visible,
+                ));
+            }
 
             for (name, icon, version, visible) in grouped_sections {
                 self.items.push(PickerItem::ServerHeader {

From 962b1213da32a64fd3fc64418f0af4e227bd0ffb Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:33:30 -0700
Subject: [PATCH 43/57] fix(tui): fully select the last line when dragging past
 the bottom

Follow-up to the previous fix that stopped the edge auto-scroll hot zone
from snapping the selection to the last line while pinned. That left a
gap: dragging *past* the last line (down into the empty area below the
content-sized chat pane) no longer extended the selection at all, because
that overshoot row maps to no line and copy_point_from_screen returned
None. Native terminal/browser selection treats dragging past the last
line as "select through the end of that line".

Add copy_pane_drag_point(), which clamps vertical overshoot to the
nearest in-bounds line edge: a drag below the last visible line snaps to
the end of that line, and a drag above the first visible line snaps to
its start. A direct hit on a real line still yields precise per-cell
selection. Use it for both Drag and Up so the boundary line is fully
covered during the drag and on release.

Adds a regression test that anchors on the last content line, drags
straight down past the bottom of the pane with the cursor x only partway
through the line, and asserts the whole last line (through its end) is
selected without arming autoscroll or scrolling.
---
 .../jcode-tui/src/tui/app/copy_selection.rs   |  17 ++-
 .../tui/app/tests/scroll_copy_02/part_01.rs   | 124 ++++++++++++++++++
 crates/jcode-tui/src/tui/ui.rs                |  71 ++++++++++
 3 files changed, 208 insertions(+), 4 deletions(-)

diff --git a/crates/jcode-tui/src/tui/app/copy_selection.rs b/crates/jcode-tui/src/tui/app/copy_selection.rs
index 8df43942b..229db8d32 100644
--- a/crates/jcode-tui/src/tui/app/copy_selection.rs
+++ b/crates/jcode-tui/src/tui/app/copy_selection.rs
@@ -526,7 +526,14 @@ impl App {
                 }
                 // Left the edge: stop the continuous autoscroll.
                 self.copy_selection_edge_autoscroll = None;
-                if let Some(point) = point.filter(|point| Some(point.pane) == active_pane) {
+                // Resolve the drag target, clamping vertical overshoot (e.g. a
+                // drag into the blank space below the last line) to the nearest
+                // in-bounds line edge so the boundary line is fully selected,
+                // just like native terminal/browser selection.
+                let resolved = active_pane.and_then(|pane| {
+                    crate::tui::ui::copy_pane_drag_point(pane, mouse.column, mouse.row)
+                });
+                if let Some(point) = resolved.filter(|point| Some(point.pane) == active_pane) {
                     self.update_selection_with_point(point, true);
                 }
                 Some(false)
@@ -542,9 +549,11 @@ impl App {
                     };
                 }
                 self.copy_selection_dragging = false;
-                if let Some(point) =
-                    point.filter(|point| Some(point.pane) == self.current_copy_selection_pane())
-                {
+                let release_pane = self.current_copy_selection_pane();
+                let resolved = release_pane.and_then(|pane| {
+                    crate::tui::ui::copy_pane_drag_point(pane, mouse.column, mouse.row)
+                });
+                if let Some(point) = resolved.filter(|point| Some(point.pane) == release_pane) {
                     self.update_selection_with_point(point, true);
                 }
                 if self.copy_selection_mode {
diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
index c13cee50d..5f2fdb54d 100644
--- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
@@ -1090,6 +1090,130 @@ fn test_copy_selection_drag_to_bottom_edge_when_pinned_does_not_snap_or_autoscro
     });
 }
 
+#[test]
+fn test_copy_selection_drag_below_last_line_fully_selects_last_line() {
+    // Dragging *past* the last content line (into the empty area below the
+    // chat pane) should fully select that last line through its end, just like
+    // native terminal and browser selection. The chat pane is sized to its
+    // content, so a downward drag that overshoots reports a row at/below the
+    // bottom boundary that maps to no line at all; that used to silently drop
+    // the extension so the bottom line could never be fully highlighted.
+    let _render_lock = scroll_render_test_lock();
+    let mut app = create_test_app();
+
+    let lines = (1..=6)
+        .map(|idx| format!("line {idx:03}"))
+        .collect::<Vec<_>>()
+        .join("\n");
+    app.display_messages = vec![DisplayMessage {
+        role: "assistant".to_string(),
+        content: lines,
+        tool_calls: vec![],
+        duration_secs: None,
+        title: None,
+        tool_data: None,
+    }];
+    app.bump_display_messages_version();
+    app.scroll_offset = 0;
+    app.auto_scroll_paused = false;
+    app.is_processing = false;
+    app.streaming_text.clear();
+    app.status = ProcessingStatus::Idle;
+
+    // Tall terminal so there is empty space below the content-sized chat pane.
+    let backend = ratatui::backend::TestBackend::new(60, 20);
+    let mut terminal = ratatui::Terminal::new(backend).expect("failed to create test terminal");
+    render_and_snap(&app, &mut terminal);
+
+    app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT)
+        .unwrap();
+
+    let (visible_start, visible_end) =
+        crate::tui::ui::copy_viewport_visible_range().expect("visible copy range");
+    let line_count = crate::tui::ui::copy_viewport_line_count().expect("line count");
+    assert_eq!(visible_end, line_count, "view must be pinned to the bottom");
+
+    // The last line that maps to a real screen point.
+    let last_line = (visible_start..visible_end)
+        .rev()
+        .find(|&ln| {
+            crate::tui::ui::copy_viewport_line_text(ln)
+                .map(|t| unicode_width::UnicodeWidthStr::width(t.as_str()) > 0)
+                .unwrap_or(false)
+        })
+        .expect("a non-empty visible content line");
+    let last_text = crate::tui::ui::copy_viewport_line_text(last_line).unwrap_or_default();
+    let last_width = unicode_width::UnicodeWidthStr::width(last_text.as_str());
+
+    let layout = crate::tui::ui::last_layout_snapshot().expect("layout snapshot");
+    let area = layout.messages_area;
+
+    // Anchor on a valid cell at the START of the last content line.
+    let last_content_row = area.y + (last_line - visible_start) as u16;
+    let anchor_x = (area.x..area.x + area.width)
+        .find(|&x| {
+            crate::tui::ui::copy_viewport_point_from_screen(x, last_content_row)
+                .map(|p| p.abs_line == last_line)
+                .unwrap_or(false)
+        })
+        .expect("a screen column mapping to the last content line");
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Down(MouseButton::Left),
+        column: anchor_x,
+        row: last_content_row,
+        modifiers: KeyModifiers::empty(),
+    });
+
+    // Drag straight down, past the bottom of the pane, with the cursor x landing
+    // partway through (not at the end of) the last line. Even so the whole last
+    // line should be selected, because we have overshot it vertically.
+    let mid_x = anchor_x + 1;
+    let below_row = (area.y + area.height + 2).min(terminal.backend().size().unwrap().height - 1);
+    assert!(
+        below_row > last_content_row,
+        "test must drag strictly below the last content row"
+    );
+    let before_scroll = app.scroll_offset();
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Drag(MouseButton::Left),
+        column: mid_x,
+        row: below_row,
+        modifiers: KeyModifiers::empty(),
+    });
+
+    // No autoscroll (nothing below), and no scroll movement.
+    assert!(
+        !crate::tui::TuiState::copy_selection_edge_autoscroll_active(&app),
+        "edge autoscroll must not arm dragging past the last line"
+    );
+    assert_eq!(app.scroll_offset(), before_scroll, "must not scroll");
+
+    // The selection should now extend through the END of the last line.
+    let range = app.normalized_copy_selection().expect("normalized range");
+    assert_eq!(
+        range.end.abs_line, last_line,
+        "selection should extend to the last content line"
+    );
+    assert_eq!(
+        range.end.column, last_width,
+        "selection should cover the full last line (through its end)"
+    );
+    let selected = app
+        .current_copy_selection_text()
+        .expect("expected selection text");
+    assert!(
+        selected.contains(last_text.trim_end()),
+        "selection should include the full last line text: got {selected:?}"
+    );
+
+    app.handle_mouse_event(MouseEvent {
+        kind: MouseEventKind::Up(MouseButton::Left),
+        column: mid_x,
+        row: below_row,
+        modifiers: KeyModifiers::empty(),
+    });
+}
+
 #[test]
 fn test_alt_a_copies_chat_viewport_with_context_when_input_empty() {
     let _render_lock = scroll_render_test_lock();
diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs
index a02bdbebb..80e749d3d 100644
--- a/crates/jcode-tui/src/tui/ui.rs
+++ b/crates/jcode-tui/src/tui/ui.rs
@@ -1653,6 +1653,77 @@ pub(crate) fn copy_pane_vertical_edge_point(
     copy_point_from_snapshot(&snapshot, clamped_col, edge_row).map(|point| (point, upward))
 }
 
+/// Resolve the selection point for a drag at `(column, row)`, clamping vertical
+/// overshoot to the nearest in-bounds line edge.
+///
+/// Terminals report a drag that "leaves" the pane on the boundary row, but a
+/// drag *into the empty space below the last content line* (common with short
+/// transcripts that leave blank rows underneath) lands on a row that maps to no
+/// line at all, so `copy_point_from_screen` returns `None`. Native terminal and
+/// browser selection treat that as "select through the end of the last line".
+/// This mirrors that: dragging below the last visible line snaps to the end of
+/// that line, and dragging above the first visible line snaps to its start, so
+/// the boundary line is fully covered even when there is nothing more to scroll.
+pub(crate) fn copy_pane_drag_point(
+    pane: crate::tui::CopySelectionPane,
+    column: u16,
+    row: u16,
+) -> Option<crate::tui::CopySelectionPoint> {
+    let snapshot = copy_snapshot_for_pane(pane)?;
+    let area = snapshot.content_area;
+    if area.width == 0 || area.height == 0 {
+        return None;
+    }
+
+    // A direct hit on a real line wins: precise per-cell selection.
+    if let Some(point) = copy_point_from_snapshot(&snapshot, column, row) {
+        return Some(point);
+    }
+
+    let line_count = snapshot.wrapped_plain_line_count();
+    if line_count == 0 {
+        return None;
+    }
+    let last_line = line_count.saturating_sub(1);
+    let last_visible_line = snapshot.visible_end.saturating_sub(1).min(last_line);
+    let first_visible_line = snapshot.scroll.min(last_line);
+
+    let last_row = area.y.saturating_add(area.height).saturating_sub(1);
+    let clamped_col = column.clamp(area.x, area.x.saturating_add(area.width).saturating_sub(1));
+
+    // Below the visible content: snap to the end of the last visible line.
+    if row >= last_row {
+        let text = snapshot.wrapped_plain_line(last_visible_line).unwrap_or("");
+        return Some(crate::tui::CopySelectionPoint {
+            pane,
+            abs_line: last_visible_line,
+            column: line_display_width(text),
+        });
+    }
+
+    // Above the visible content: snap to the start of the first visible line.
+    if row <= area.y {
+        return Some(crate::tui::CopySelectionPoint {
+            pane,
+            abs_line: first_visible_line,
+            column: snapshot.wrapped_copy_offset(first_visible_line).unwrap_or(0),
+        });
+    }
+
+    // Interior row that maps to no line (e.g. a blank gap row between/after
+    // content within the visible band): fall back to the boundary-clamped point.
+    copy_point_from_snapshot(
+        &snapshot,
+        clamped_col,
+        row.clamp(area.y, last_row),
+    )
+    .or(Some(crate::tui::CopySelectionPoint {
+        pane,
+        abs_line: last_visible_line,
+        column: line_display_width(snapshot.wrapped_plain_line(last_visible_line).unwrap_or("")),
+    }))
+}
+
 /// Edge point for tick-driven continuous auto-scroll, where there is no live
 /// mouse position. Uses the top/bottom boundary row of the pane and its left
 /// content column so the selection keeps extending to the freshly revealed line.

From ad0dd6c1d817ab96087ca7b02520fb535db41e93 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:41:28 -0700
Subject: [PATCH 44/57] feat(tui): collapse 'current' reasoning with a height
 animation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 'current' reasoning-display mode the live dim/italic reasoning used to
vanish in a single frame when the answer committed or a tool ran, snapping
the transcript upward. Instead, on close the reasoning block is sliced out
of the streaming buffer into a dedicated collapsing 'reasoning' display
message that height-collapses (ease-out, oldest line first) toward a
one-line '▸ thought for Xs' summary, leaving a trace behind.

- New ReasoningCollapse state + begin/advance/finalize on App.
- Renders via a new 'reasoning' display role (dim+italic, sentinel-stripped).
- Redraw loop (local + remote tick, turn loop) advances the animation;
  redraw policy keeps frames live while collapsing.
- Reduced-motion / low-power tiers snap straight to the summary.
- Guards drop the animation safely on transcript reset/replace.
- Tests: block parsing, summary labels, monotone collapse, finalize,
  reduced-motion snap, and end-to-end dim/italic render of the role.
---
 crates/jcode-tui-messages/src/message.rs      |  14 ++
 crates/jcode-tui/src/tui/app.rs               |  34 +++
 crates/jcode-tui/src/tui/app/input.rs         | 205 +++++++++++++++
 crates/jcode-tui/src/tui/app/local.rs         |   4 +
 crates/jcode-tui/src/tui/app/remote.rs        |   1 +
 .../src/tui/app/state_ui_messages.rs          |   8 +
 .../src/tui/app/tests/reasoning_region.rs     | 237 ++++++++++++++++--
 crates/jcode-tui/src/tui/app/tui_lifecycle.rs |   6 +
 crates/jcode-tui/src/tui/app/tui_state.rs     |   4 +
 crates/jcode-tui/src/tui/app/turn.rs          |   2 +
 crates/jcode-tui/src/tui/mod.rs               |   7 +
 crates/jcode-tui/src/tui/ui.rs                |   4 +-
 crates/jcode-tui/src/tui/ui_messages.rs       |  17 ++
 crates/jcode-tui/src/tui/ui_prepare.rs        |  28 +++
 crates/jcode-tui/src/tui/ui_tests/prepare.rs  |  57 +++++
 15 files changed, 602 insertions(+), 26 deletions(-)

diff --git a/crates/jcode-tui-messages/src/message.rs b/crates/jcode-tui-messages/src/message.rs
index 08eb1941f..b2a7374a4 100644
--- a/crates/jcode-tui-messages/src/message.rs
+++ b/crates/jcode-tui-messages/src/message.rs
@@ -175,6 +175,20 @@ impl DisplayMessage {
         }
     }
 
+    /// Create a display-only collapsing reasoning trace ("current" mode). The
+    /// content is sentinel-wrapped dim/italic markup; this message height-collapses
+    /// toward a one-line summary and is excluded from provider/model context.
+    pub fn reasoning(content: impl Into<String>) -> Self {
+        Self {
+            role: "reasoning".to_string(),
+            content: content.into(),
+            tool_calls: Vec::new(),
+            duration_secs: None,
+            title: None,
+            tool_data: None,
+        }
+    }
+
     /// Convert the shared session renderer output into the TUI transcript model.
     pub fn from_rendered_message(item: RenderedMessage) -> Self {
         Self {
diff --git a/crates/jcode-tui/src/tui/app.rs b/crates/jcode-tui/src/tui/app.rs
index 376883b09..447d02ecf 100644
--- a/crates/jcode-tui/src/tui/app.rs
+++ b/crates/jcode-tui/src/tui/app.rs
@@ -340,6 +340,28 @@ pub enum ProcessingStatus {
     RunningTool(String),
 }
 
+/// Live "collapse the current reasoning" animation state.
+///
+/// In `current` reasoning-display mode the model's reasoning streams live as
+/// dim+italic lines, then must disappear once the answer commits or a tool runs.
+/// Instead of deleting every reasoning line in a single frame (a jarring upward
+/// jump), the closed reasoning block is moved into a dedicated `"reasoning"`
+/// display message that height-collapses toward a one-line summary over a short
+/// ease-out, leaving a `▸ thought for Xs` trace behind.
+#[derive(Clone, Debug)]
+pub(crate) struct ReasoningCollapse {
+    /// Index into `display_messages` of the `"reasoning"` message being collapsed.
+    pub(crate) msg_index: usize,
+    /// One-line dim summary the block collapses down to (markup for
+    /// "▸ thought for Xs"), always shown at the top of the message.
+    pub(crate) summary_markup: String,
+    /// Per-line dim+italic markup for each reasoning line, in order. The block
+    /// shrinks by dropping leading lines until only `summary_markup` remains.
+    pub(crate) line_markups: Vec<String>,
+    /// When the collapse animation started.
+    pub(crate) started_at: Instant,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub(crate) enum RemoteStartupPhase {
     StartingServer,
@@ -712,6 +734,18 @@ pub struct App {
     // `streaming_text` (the rendered tail of `reasoning_pending_line`). Truncated
     // and re-appended on each delta so the in-progress line updates in place.
     reasoning_partial_len: usize,
+    // Byte offset in `streaming_text` where the current reasoning block began
+    // (recorded by `open_reasoning_region`). Used in `current` mode to slice the
+    // closed reasoning block out of the stream and hand it to the collapse
+    // animation while keeping any answer text that preceded it in order.
+    reasoning_block_start: Option<usize>,
+    // Wall-clock instant the current reasoning region opened, used to label the
+    // collapsed summary ("▸ thought for Xs").
+    reasoning_block_started_at: Option<Instant>,
+    // Active "collapse the current reasoning" animation (current mode only). While
+    // set, a `"reasoning"` display message height-collapses toward its one-line
+    // summary; the redraw loop advances it each frame and finalizes on completion.
+    reasoning_collapse: Option<ReasoningCollapse>,
     // Hot-reload: if set, exec into new binary with this session ID (no rebuild)
     reload_requested: Option<String>,
     // Hot-rebuild: if set, do full git pull + cargo build + tests then exec
diff --git a/crates/jcode-tui/src/tui/app/input.rs b/crates/jcode-tui/src/tui/app/input.rs
index 95f9dec2d..6fe5b2bd9 100644
--- a/crates/jcode-tui/src/tui/app/input.rs
+++ b/crates/jcode-tui/src/tui/app/input.rs
@@ -50,6 +50,48 @@ pub(super) fn strip_reasoning_lines(content: &str) -> String {
     result.trim_end().to_string()
 }
 
+/// Total duration of the "current reasoning collapses away" height animation.
+pub(super) const REASONING_COLLAPSE_DURATION: Duration = Duration::from_millis(280);
+
+/// Split a just-closed reasoning block (sentinel-wrapped dim/italic line markup,
+/// as produced by [`jcode_tui_markdown::reasoning_line_markup`]) into one markup
+/// string per visible reasoning line. Blank separator lines are dropped so the
+/// collapse animates over real thought lines only.
+pub(super) fn reasoning_block_line_markups(block: &str) -> Vec<String> {
+    block
+        .split_inclusive('\n')
+        .filter(|segment| segment.contains(jcode_tui_markdown::REASONING_SENTINEL))
+        .map(|segment| segment.to_string())
+        .collect()
+}
+
+/// One-line dim summary the collapsed reasoning folds into. Includes a `▸` marker
+/// and the thinking duration when known (e.g. `▸ thought for 12s`).
+pub(super) fn reasoning_summary_markup(line_count: usize, elapsed: Option<Duration>) -> String {
+    let label = match elapsed {
+        Some(d) if d.as_secs() >= 1 => format!("▸ thought for {}s", d.as_secs()),
+        Some(_) => "▸ thought".to_string(),
+        None if line_count == 1 => "▸ thought (1 line)".to_string(),
+        None => format!("▸ thought ({} lines)", line_count),
+    };
+    jcode_tui_markdown::reasoning_line_markup(&label)
+}
+
+/// Build the transcript content for a collapsing `"reasoning"` message: the last
+/// `remaining` reasoning lines, or just the summary line once fully collapsed.
+pub(super) fn reasoning_message_content(
+    summary_markup: &str,
+    line_markups: &[String],
+    remaining: usize,
+) -> String {
+    if remaining == 0 || line_markups.is_empty() {
+        return summary_markup.to_string();
+    }
+    let remaining = remaining.min(line_markups.len());
+    let start = line_markups.len() - remaining;
+    line_markups[start..].concat()
+}
+
 pub(super) fn edit_input_in_external_editor(app: &mut App) {
     match edit_text_in_external_editor(&app.input) {
         Ok(edited) => {
@@ -2400,6 +2442,11 @@ impl App {
         self.reasoning_streaming = true;
         self.reasoning_pending_line.clear();
         self.reasoning_partial_len = 0;
+        // Remember where this reasoning block starts in the stream so `current`
+        // mode can later slice it out (without disturbing any preceding answer
+        // text) and hand it to the collapse animation.
+        self.reasoning_block_start = Some(self.streaming_text.len());
+        self.reasoning_block_started_at = Some(Instant::now());
     }
 
     /// Remove the live partial-reasoning tail (the rendered, not-yet-committed
@@ -2465,6 +2512,17 @@ impl App {
                 .push_str(&jcode_tui_markdown::reasoning_line_markup(&pending));
         }
         self.reasoning_streaming = false;
+
+        // In `current` mode, animate the block away instead of leaving it in the
+        // stream to be stripped wholesale at commit time.
+        if matches!(
+            crate::config::config().display.reasoning_display(),
+            crate::config::ReasoningDisplayMode::Current
+        ) {
+            self.begin_reasoning_collapse();
+            return;
+        }
+
         // Terminate the reasoning block with a blank line so following output
         // renders as a normal paragraph.
         if !self.streaming_text.ends_with("\n\n") {
@@ -2477,6 +2535,147 @@ impl App {
         self.refresh_split_view_if_needed();
     }
 
+    /// Slice the just-closed reasoning block out of `streaming_text` and move it
+    /// into a dedicated `"reasoning"` display message, then start (or replace) the
+    /// height-collapse animation. Any answer text streamed *before* the reasoning
+    /// block is left untouched so ordering is preserved. With decorative
+    /// animations disabled (reduced motion / low-power tiers) the block is
+    /// finalized straight to its summary line.
+    pub(super) fn begin_reasoning_collapse(&mut self) {
+        let block_start = self.reasoning_block_start.take().unwrap_or(0);
+        let started_at = self.reasoning_block_started_at.take();
+        // Finalize any previous collapse first so its message snaps to its summary
+        // instead of being orphaned mid-animation.
+        self.finalize_reasoning_collapse();
+
+        let block_start = block_start.min(self.streaming_text.len());
+
+        // Everything from the block start onward is reasoning markup (plus the
+        // separators inserted by open/close). Take it out of the live stream.
+        let block: String = self.streaming_text.split_off(block_start);
+        // Drop a trailing separator the answer-side path would otherwise add.
+        while self.streaming_text.ends_with('\n') {
+            self.streaming_text.pop();
+        }
+        self.refresh_split_view_if_needed();
+
+        let line_markups = reasoning_block_line_markups(&block);
+        if line_markups.is_empty() {
+            // Nothing to show (e.g. empty reasoning); just clear state.
+            self.reasoning_collapse = None;
+            return;
+        }
+
+        let elapsed = started_at.map(|t| t.elapsed());
+        let summary_markup = reasoning_summary_markup(line_markups.len(), elapsed);
+
+        // Build the committed message content: every reasoning line, then the
+        // summary as the final line. The renderer reveals a shrinking suffix.
+        let content =
+            reasoning_message_content(&summary_markup, &line_markups, line_markups.len());
+
+        let msg_index = self.display_messages.len();
+        self.push_display_message(DisplayMessage::reasoning(content));
+
+        let decorative = crate::perf::tui_policy().enable_decorative_animations;
+        if !decorative {
+            // Reduced motion: snap straight to the one-line summary.
+            self.replace_display_message_content(
+                msg_index,
+                reasoning_message_content(&summary_markup, &line_markups, 0),
+            );
+            self.reasoning_collapse = None;
+            return;
+        }
+
+        self.reasoning_collapse = Some(super::ReasoningCollapse {
+            msg_index,
+            summary_markup,
+            line_markups,
+            started_at: Instant::now(),
+        });
+    }
+
+    /// Advance the active reasoning-collapse animation. Returns `true` when the
+    /// transcript changed (so the caller should request a redraw). Finalizes to
+    /// the summary line once the animation completes.
+    pub(super) fn advance_reasoning_collapse(&mut self) -> bool {
+        let Some(collapse) = self.reasoning_collapse.as_ref() else {
+            return false;
+        };
+
+        // If the target message moved or was replaced (compaction/rewind), drop the
+        // animation rather than risk mutating an unrelated message.
+        if self
+            .display_messages
+            .get(collapse.msg_index)
+            .map(|m| m.role.as_str())
+            != Some("reasoning")
+        {
+            self.reasoning_collapse = None;
+            return false;
+        }
+
+        let total = collapse.line_markups.len();
+        let elapsed = collapse.started_at.elapsed();
+        let progress =
+            (elapsed.as_secs_f32() / REASONING_COLLAPSE_DURATION.as_secs_f32()).clamp(0.0, 1.0);
+        // Ease-out cubic so the block decelerates as it folds away.
+        let eased = 1.0 - (1.0 - progress).powi(3);
+        // Number of reasoning lines still visible above the summary. Counts down
+        // from `total` to 0 (only the summary remains).
+        let remaining = ((total as f32) * (1.0 - eased)).round() as usize;
+        let remaining = remaining.min(total);
+
+        let msg_index = collapse.msg_index;
+        let content =
+            reasoning_message_content(&collapse.summary_markup, &collapse.line_markups, remaining);
+        let changed = self.replace_display_message_content(msg_index, content);
+
+        if progress >= 1.0 {
+            self.reasoning_collapse = None;
+        }
+        changed
+    }
+
+    /// Whether a reasoning-collapse animation is currently running.
+    pub(super) fn reasoning_collapse_active(&self) -> bool {
+        self.reasoning_collapse.is_some()
+    }
+
+    /// Test hook: backdate the active collapse's start so `advance_*` observes a
+    /// specific elapsed fraction, and return the number of source reasoning lines.
+    #[cfg(test)]
+    pub(super) fn backdate_reasoning_collapse_for_test(
+        &mut self,
+        elapsed: std::time::Duration,
+    ) -> Option<usize> {
+        let collapse = self.reasoning_collapse.as_mut()?;
+        collapse.started_at = Instant::now()
+            .checked_sub(elapsed)
+            .unwrap_or_else(Instant::now);
+        Some(collapse.line_markups.len())
+    }
+
+    /// Finalize any in-flight reasoning collapse immediately (snap to summary).
+    /// Used when the turn ends or state is reset so no animation is left dangling.
+    pub(super) fn finalize_reasoning_collapse(&mut self) {
+        if let Some(collapse) = self.reasoning_collapse.take() {
+            if self
+                .display_messages
+                .get(collapse.msg_index)
+                .map(|m| m.role.as_str())
+                == Some("reasoning")
+            {
+                let content =
+                    reasoning_message_content(&collapse.summary_markup, &collapse.line_markups, 0);
+                self.replace_display_message_content(collapse.msg_index, content);
+            }
+        }
+        self.reasoning_block_start = None;
+        self.reasoning_block_started_at = None;
+    }
+
     pub(super) fn append_streaming_text(&mut self, text: &str) {
         if text.is_empty() {
             return;
@@ -2510,6 +2709,10 @@ impl App {
         self.reasoning_streaming = false;
         self.reasoning_pending_line.clear();
         self.reasoning_partial_len = 0;
+        // The stream (and any block offset into it) is gone; a running collapse
+        // targets a separate display message and is left to finish on its own.
+        self.reasoning_block_start = None;
+        self.reasoning_block_started_at = None;
         self.refresh_split_view_if_needed();
         self.streaming_md_renderer.borrow_mut().reset();
         crate::tui::mermaid::clear_streaming_preview_diagram();
@@ -2521,6 +2724,8 @@ impl App {
         self.reasoning_streaming = false;
         self.reasoning_pending_line.clear();
         self.reasoning_partial_len = 0;
+        self.reasoning_block_start = None;
+        self.reasoning_block_started_at = None;
         self.refresh_split_view_if_needed();
         self.streaming_md_renderer.borrow_mut().reset();
         crate::tui::mermaid::clear_streaming_preview_diagram();
diff --git a/crates/jcode-tui/src/tui/app/local.rs b/crates/jcode-tui/src/tui/app/local.rs
index b98883f7a..204730a75 100644
--- a/crates/jcode-tui/src/tui/app/local.rs
+++ b/crates/jcode-tui/src/tui/app/local.rs
@@ -55,6 +55,7 @@ pub(super) async fn process_turn_with_input(
 
 pub(super) fn handle_tick(app: &mut App) -> bool {
     let mut needs_redraw = crate::tui::periodic_redraw_required(app);
+    needs_redraw |= app.advance_reasoning_collapse();
     app.maybe_capture_runtime_memory_heartbeat();
     needs_redraw |= app.progress_copy_selection_edge_autoscroll();
     app.progress_mouse_scroll_animation();
@@ -472,6 +473,9 @@ pub(super) fn finish_turn(app: &mut App) {
     app.thought_line_inserted = false;
     app.thinking_prefix_emitted = false;
     app.thinking_buffer.clear();
+    // Snap any in-flight reasoning collapse straight to its summary so no
+    // animation is left running once the turn is idle.
+    app.finalize_reasoning_collapse();
     app.note_runtime_memory_event_force("turn_completed", "local_turn_finished");
     if !app.schedule_auto_poke_followup_if_needed()
         && !app.schedule_overnight_poke_followup_if_needed()
diff --git a/crates/jcode-tui/src/tui/app/remote.rs b/crates/jcode-tui/src/tui/app/remote.rs
index db3d3f8ab..4935da620 100644
--- a/crates/jcode-tui/src/tui/app/remote.rs
+++ b/crates/jcode-tui/src/tui/app/remote.rs
@@ -75,6 +75,7 @@ pub(super) async fn handle_tick(app: &mut App, remote: &mut RemoteConnection) ->
             .is_some_and(|state| state.kind == crate::tui::PickerKind::Model),
     });
     let mut needs_redraw = crate::tui::periodic_redraw_required(app);
+    needs_redraw |= app.advance_reasoning_collapse();
     app.maybe_capture_runtime_memory_heartbeat();
     needs_redraw |= app.progress_copy_selection_edge_autoscroll();
     app.progress_mouse_scroll_animation();
diff --git a/crates/jcode-tui/src/tui/app/state_ui_messages.rs b/crates/jcode-tui/src/tui/app/state_ui_messages.rs
index 423366464..4baa11496 100644
--- a/crates/jcode-tui/src/tui/app/state_ui_messages.rs
+++ b/crates/jcode-tui/src/tui/app/state_ui_messages.rs
@@ -74,6 +74,8 @@ impl App {
 
     pub(super) fn replace_display_messages(&mut self, mut messages: Vec<DisplayMessage>) {
         compact_display_messages_for_storage(&mut messages);
+        // Indices the collapse animation targets no longer apply to the new list.
+        self.reasoning_collapse = None;
         self.display_messages = messages;
         self.sync_compacted_history_lazy_from_display_messages();
         self.bump_display_messages_version();
@@ -336,6 +338,12 @@ impl App {
 
     pub(super) fn clear_display_messages(&mut self) {
         self.compacted_history_lazy = CompactedHistoryLazyState::default();
+        // The transcript (and the index the collapse animation targets) is about
+        // to be discarded; drop any in-flight collapse so it can't mutate a stale
+        // or unrelated message.
+        self.reasoning_collapse = None;
+        self.reasoning_block_start = None;
+        self.reasoning_block_started_at = None;
         if !self.display_messages.is_empty() {
             self.display_messages.clear();
             self.bump_display_messages_version();
diff --git a/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs b/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs
index 10b5a6075..b4d1ecf76 100644
--- a/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs
+++ b/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs
@@ -17,20 +17,47 @@ fn reasoning_region_emits_dim_italic_lines_no_gutter_header_or_footer() {
 
     app.open_reasoning_region();
     app.append_reasoning_text("Let me think.\nSecond thought.");
-    app.close_reasoning_region(None);
-
-    let text = app.streaming_text();
-    assert!(!text.contains("Thinking"), "no header expected: {text:?}");
-    assert!(!text.contains('>'), "no blockquote gutter expected: {text:?}");
-    assert!(!text.contains("Thought for"), "no footer expected: {text:?}");
+    // While streaming, reasoning is dim+italic markup in the live stream buffer.
+    let streaming = app.streaming_text().to_string();
+    assert!(
+        !streaming.contains("Thinking"),
+        "no header expected: {streaming:?}"
+    );
+    assert!(
+        !streaming.contains('>'),
+        "no blockquote gutter expected: {streaming:?}"
+    );
+    assert!(
+        !streaming.contains("Thought for"),
+        "no footer expected: {streaming:?}"
+    );
     let sentinel = jcode_tui_markdown::REASONING_SENTINEL;
     assert!(
-        text.contains(&format!("*{sentinel}Let me think.{sentinel}*")),
-        "first line not dim+italic: {text:?}"
+        streaming.contains(&format!("*{sentinel}Let me think.{sentinel}*")),
+        "first line not dim+italic: {streaming:?}"
     );
     assert!(
-        text.contains(&format!("*{sentinel}Second thought.{sentinel}*")),
-        "second line not dim+italic: {text:?}"
+        streaming.contains(&format!("*{sentinel}Second thought.{sentinel}*")),
+        "second line not dim+italic: {streaming:?}"
+    );
+
+    // In `current` mode (the default), closing moves the block into a dedicated
+    // collapsing `"reasoning"` display message and clears it from the stream.
+    app.close_reasoning_region(None);
+    assert!(
+        app.streaming_text().is_empty(),
+        "reasoning should leave the live stream once collapsed: {:?}",
+        app.streaming_text()
+    );
+    let reasoning_msg = app
+        .display_messages
+        .iter()
+        .find(|m| m.role == "reasoning")
+        .expect("reasoning message present");
+    assert!(
+        reasoning_msg.content.contains(sentinel),
+        "reasoning message keeps dim+italic markup: {:?}",
+        reasoning_msg.content
     );
 }
 
@@ -44,7 +71,12 @@ fn reasoning_region_closes_before_normal_output() {
     app.close_reasoning_region(None);
     app.append_streaming_text("Final answer.");
 
+    // The answer stays in the live stream and must never be styled as reasoning.
     let text = app.streaming_text();
+    assert!(
+        text.contains("Final answer."),
+        "answer present in stream: {text:?}"
+    );
     let answer_line = text
         .lines()
         .find(|l| l.contains("Final answer."))
@@ -53,9 +85,14 @@ fn reasoning_region_closes_before_normal_output() {
         !answer_line.contains(jcode_tui_markdown::REASONING_SENTINEL),
         "final answer must not be styled as reasoning: {answer_line:?}"
     );
+    // The reasoning collapsed into its own message; it is no longer in the stream.
     assert!(
-        text.contains("\n\nFinal answer."),
-        "missing blank-line separator before output: {text:?}"
+        !text.contains(jcode_tui_markdown::REASONING_SENTINEL),
+        "reasoning must not remain in the answer stream: {text:?}"
+    );
+    assert!(
+        app.display_messages.iter().any(|m| m.role == "reasoning"),
+        "a collapsing reasoning message should exist"
     );
 }
 
@@ -94,11 +131,18 @@ fn reasoning_line_split_across_deltas_stays_one_run() {
     app.append_reasoning_text("two\n");
     app.close_reasoning_region(None);
 
-    let text = app.streaming_text();
+    // The split-across-deltas line is committed as a single emphasis run in the
+    // collapsed reasoning message.
+    let content = app
+        .display_messages
+        .iter()
+        .find(|m| m.role == "reasoning")
+        .map(|m| m.content.clone())
+        .expect("reasoning message present");
     let sentinel = jcode_tui_markdown::REASONING_SENTINEL;
     assert!(
-        text.contains(&format!("*{sentinel}one two{sentinel}*")),
-        "split line must be one emphasis run: {text:?}"
+        content.contains(&format!("*{sentinel}one two{sentinel}*")),
+        "split line must be one emphasis run: {content:?}"
     );
 }
 
@@ -112,7 +156,15 @@ fn reasoning_region_renders_dim_italic_text_without_gutter() {
     app.append_reasoning_text("considering options\n");
     app.close_reasoning_region(None);
 
-    let lines = crate::tui::markdown::render_markdown_with_width(app.streaming_text(), Some(80));
+    // In `current` mode the reasoning now lives in a dedicated collapsing message.
+    let reasoning_content = app
+        .display_messages
+        .iter()
+        .find(|m| m.role == "reasoning")
+        .map(|m| m.content.clone())
+        .expect("reasoning message present");
+
+    let lines = crate::tui::markdown::render_markdown_with_width(&reasoning_content, Some(80));
     let body = lines
         .iter()
         .find(|l| {
@@ -248,7 +300,7 @@ fn reasoning_partial_promotes_to_committed_line_on_newline() {
 #[test]
 fn reasoning_close_promotes_pending_partial_line() {
     // Closing the region with an in-progress (no-newline) partial promotes it to a
-    // committed line exactly once.
+    // committed line exactly once, then collapses into the reasoning message.
     let mut app = create_test_app();
     let sentinel = jcode_tui_markdown::REASONING_SENTINEL;
 
@@ -256,15 +308,152 @@ fn reasoning_close_promotes_pending_partial_line() {
     app.append_reasoning_text("final thought");
     app.close_reasoning_region(None);
 
-    let text = app.streaming_text();
+    // The live stream no longer carries the reasoning; it moved into its message.
+    assert!(
+        app.streaming_text().is_empty(),
+        "reasoning should leave the live stream once collapsed: {:?}",
+        app.streaming_text()
+    );
+    let content = app
+        .display_messages
+        .iter()
+        .find(|m| m.role == "reasoning")
+        .map(|m| m.content.clone())
+        .expect("reasoning message present");
     assert_eq!(
-        text.matches(&format!("*{sentinel}final thought{sentinel}*"))
+        content
+            .matches(&format!("*{sentinel}final thought{sentinel}*"))
             .count(),
         1,
-        "pending partial promoted exactly once on close: {text:?}"
-    );
-    assert!(
-        text.ends_with("\n\n"),
-        "region terminated with blank line: {text:?}"
+        "pending partial promoted exactly once on close: {content:?}"
     );
 }
+
+#[test]
+fn reasoning_block_line_markups_keeps_only_sentinel_lines() {
+    use crate::tui::app::input::{reasoning_block_line_markups, reasoning_message_content};
+
+    let mut block = String::new();
+    block.push_str(&jcode_tui_markdown::reasoning_line_markup("alpha"));
+    block.push('\n'); // a blank separator line (no sentinel)
+    block.push_str(&jcode_tui_markdown::reasoning_line_markup("beta"));
+
+    let lines = reasoning_block_line_markups(&block);
+    assert_eq!(lines.len(), 2, "blank separators are dropped: {lines:?}");
+    let sentinel = jcode_tui_markdown::REASONING_SENTINEL;
+    assert!(lines[0].contains(&format!("{sentinel}alpha{sentinel}")));
+    assert!(lines[1].contains(&format!("{sentinel}beta{sentinel}")));
+
+    // Full content shows every line; remaining==0 shows only the summary.
+    let summary = jcode_tui_markdown::reasoning_line_markup("▸ thought");
+    let full = reasoning_message_content(&summary, &lines, lines.len());
+    assert!(full.contains("alpha") && full.contains("beta"));
+    let collapsed = reasoning_message_content(&summary, &lines, 0);
+    assert!(collapsed.contains("▸ thought"));
+    assert!(!collapsed.contains("alpha") && !collapsed.contains("beta"));
+
+    // A partial reveal keeps the *trailing* lines (oldest fold away first).
+    let partial = reasoning_message_content(&summary, &lines, 1);
+    assert!(partial.contains("beta"), "trailing line kept: {partial:?}");
+    assert!(!partial.contains("alpha"), "leading line folded: {partial:?}");
+}
+
+#[test]
+fn reasoning_summary_markup_uses_duration_when_known() {
+    use crate::tui::app::input::reasoning_summary_markup;
+    use std::time::Duration;
+
+    let with_secs = reasoning_summary_markup(3, Some(Duration::from_secs(12)));
+    assert!(with_secs.contains("▸ thought for 12s"), "{with_secs:?}");
+
+    let no_time = reasoning_summary_markup(4, None);
+    assert!(no_time.contains("▸ thought (4 lines)"), "{no_time:?}");
+}
+
+#[test]
+fn reasoning_collapse_finalizes_to_single_summary_line() {
+    let mut app = create_test_app();
+
+    app.open_reasoning_region();
+    app.append_reasoning_text("first\nsecond\nthird\n");
+    app.close_reasoning_region(None);
+
+    assert!(app.reasoning_collapse_active(), "collapse should start");
+
+    // Snapping finalizes the message to just the summary line.
+    app.finalize_reasoning_collapse();
+    assert!(!app.reasoning_collapse_active(), "collapse cleared on finalize");
+
+    let content = app
+        .display_messages
+        .iter()
+        .find(|m| m.role == "reasoning")
+        .map(|m| m.content.clone())
+        .expect("reasoning message present");
+    assert!(content.contains("▸ thought"), "summary present: {content:?}");
+    assert!(!content.contains("first"), "lines folded away: {content:?}");
+    assert!(!content.contains("third"), "lines folded away: {content:?}");
+}
+
+#[test]
+fn reasoning_collapse_drops_when_target_message_replaced() {
+    let mut app = create_test_app();
+
+    app.open_reasoning_region();
+    app.append_reasoning_text("thinking\n");
+    app.close_reasoning_region(None);
+    assert!(app.reasoning_collapse_active());
+
+    // A transcript reset must invalidate the animation target safely.
+    app.clear_display_messages();
+    assert!(!app.reasoning_collapse_active());
+    // Advancing now is a no-op and must not panic.
+    assert!(!app.advance_reasoning_collapse());
+}
+
+#[test]
+fn reasoning_collapse_visible_lines_shrink_monotonically_over_time() {
+    use crate::tui::app::input::REASONING_COLLAPSE_DURATION;
+    use std::time::Duration;
+
+    let mut app = create_test_app();
+    app.open_reasoning_region();
+    app.append_reasoning_text("l1\nl2\nl3\nl4\nl5\nl6\n");
+    app.close_reasoning_region(None);
+    let sentinel = jcode_tui_markdown::REASONING_SENTINEL;
+
+    let count_visible = |app: &App| -> usize {
+        app.display_messages
+            .iter()
+            .find(|m| m.role == "reasoning")
+            .map(|m| {
+                m.content
+                    .split_inclusive('\n')
+                    .filter(|seg| seg.contains(sentinel))
+                    .filter(|seg| !seg.contains('▸'))
+                    .count()
+            })
+            .unwrap_or(0)
+    };
+
+    // Sample the eased timeline; visible reasoning lines must never increase and
+    // must reach a single summary line (0 source lines) at/after the duration.
+    let dur = REASONING_COLLAPSE_DURATION;
+    let mut prev = usize::MAX;
+    for frac in [0.0_f32, 0.25, 0.5, 0.75, 1.0] {
+        let elapsed = Duration::from_secs_f32(dur.as_secs_f32() * frac);
+        app.backdate_reasoning_collapse_for_test(elapsed)
+            .expect("collapse active");
+        app.advance_reasoning_collapse();
+        let visible = count_visible(&app);
+        assert!(
+            visible <= prev,
+            "visible lines must not increase: frac={frac} visible={visible} prev={prev}"
+        );
+        prev = visible;
+    }
+
+    // Past the duration the animation is finalized to the summary only.
+    assert!(!app.reasoning_collapse_active(), "collapse should finish");
+    assert_eq!(count_visible(&app), 0, "only the summary line remains");
+}
diff --git a/crates/jcode-tui/src/tui/app/tui_lifecycle.rs b/crates/jcode-tui/src/tui/app/tui_lifecycle.rs
index 3c925d445..d5238330f 100644
--- a/crates/jcode-tui/src/tui/app/tui_lifecycle.rs
+++ b/crates/jcode-tui/src/tui/app/tui_lifecycle.rs
@@ -371,6 +371,9 @@ impl App {
             reasoning_streaming: false,
             reasoning_pending_line: String::new(),
             reasoning_partial_len: 0,
+            reasoning_block_start: None,
+            reasoning_block_started_at: None,
+            reasoning_collapse: None,
             reload_requested: None,
             rebuild_requested: None,
             update_requested: None,
@@ -771,6 +774,9 @@ impl App {
             reasoning_streaming: false,
             reasoning_pending_line: String::new(),
             reasoning_partial_len: 0,
+            reasoning_block_start: None,
+            reasoning_block_started_at: None,
+            reasoning_collapse: None,
             reload_requested: None,
             rebuild_requested: None,
             update_requested: None,
diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs
index f4e90b82d..9a0f60a0b 100644
--- a/crates/jcode-tui/src/tui/app/tui_state.rs
+++ b/crates/jcode-tui/src/tui/app/tui_state.rs
@@ -596,6 +596,10 @@ impl crate::tui::TuiState for App {
         self.mouse_scroll_queue != 0
     }
 
+    fn reasoning_collapse_animating(&self) -> bool {
+        self.reasoning_collapse_active()
+    }
+
     fn total_session_tokens(&self) -> Option<(u64, u64)> {
         // In remote mode, use tokens from server
         // Independent mode doesn't currently track total tokens
diff --git a/crates/jcode-tui/src/tui/app/turn.rs b/crates/jcode-tui/src/tui/app/turn.rs
index f5b9c5a8c..6e63434d5 100644
--- a/crates/jcode-tui/src/tui/app/turn.rs
+++ b/crates/jcode-tui/src/tui/app/turn.rs
@@ -268,6 +268,8 @@ impl App {
                         if let Some(chunk) = self.stream_buffer.flush_smooth_frame() {
                             self.append_streaming_text(&chunk);
                         }
+                        // Advance the "current reasoning collapses away" animation.
+                        self.advance_reasoning_collapse();
                         // Poll for background compaction completion during streaming
                         self.poll_compaction_completion();
                         status_spinner_renderer.draw_full(self, terminal)?;
diff --git a/crates/jcode-tui/src/tui/mod.rs b/crates/jcode-tui/src/tui/mod.rs
index 5cf53e0b3..9556c23eb 100644
--- a/crates/jcode-tui/src/tui/mod.rs
+++ b/crates/jcode-tui/src/tui/mod.rs
@@ -217,6 +217,11 @@ pub trait TuiState {
     fn has_pending_mouse_scroll_animation(&self) -> bool {
         false
     }
+    /// Whether a "current reasoning collapses away" animation is in progress and
+    /// the redraw loop must keep ticking to advance it.
+    fn reasoning_collapse_animating(&self) -> bool {
+        false
+    }
     /// Optional configured keybinding label for external dictation.
     fn dictation_key_label(&self) -> Option<String>;
     /// Time since app started (for startup animations)
@@ -1282,6 +1287,7 @@ pub(crate) fn redraw_interval_with_policy(
         || !state.streaming_text().is_empty()
         || state.status_notice().is_some()
         || state.has_pending_mouse_scroll_animation()
+        || state.reasoning_collapse_animating()
         || state.copy_selection_edge_autoscroll_active()
         || state.has_notification()
         || rate_limit_countdown_redraw_active(state)
@@ -1341,6 +1347,7 @@ pub(crate) fn periodic_redraw_required(state: &dyn TuiState) -> bool {
         || !state.streaming_text().is_empty()
         || state.status_notice().is_some()
         || state.has_pending_mouse_scroll_animation()
+        || state.reasoning_collapse_animating()
         || state.copy_selection_edge_autoscroll_active()
         || state.chat_overscroll_active()
         || state.has_notification()
diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs
index 80e749d3d..b0a180777 100644
--- a/crates/jcode-tui/src/tui/ui.rs
+++ b/crates/jcode-tui/src/tui/ui.rs
@@ -148,8 +148,8 @@ use memory_ui::{group_into_tiles, render_memory_tiles, split_by_display_width};
 use messages::get_cached_message_lines;
 #[cfg_attr(test, allow(unused_imports))]
 pub(crate) use messages::{
-    render_assistant_message, render_background_task_message, render_swarm_message,
-    render_system_message, render_tool_message, render_usage_message,
+    render_assistant_message, render_background_task_message, render_reasoning_message,
+    render_swarm_message, render_system_message, render_tool_message, render_usage_message,
 };
 pub use pinned_ui::{
     SidePanelDebugStats, SidePanelMermaidProbe, SidePanelMermaidProbeRect,
diff --git a/crates/jcode-tui/src/tui/ui_messages.rs b/crates/jcode-tui/src/tui/ui_messages.rs
index 4f9eb9eb8..a7f838124 100644
--- a/crates/jcode-tui/src/tui/ui_messages.rs
+++ b/crates/jcode-tui/src/tui/ui_messages.rs
@@ -71,6 +71,23 @@ pub(crate) fn render_assistant_message(
     lines
 }
 
+/// Render a collapsed/collapsing reasoning trace ("current" mode). The content is
+/// sentinel-wrapped dim+italic markup (reasoning lines and/or a `▸ thought for Xs`
+/// summary), so it reuses the standard markdown path that styles those runs dim.
+pub(crate) fn render_reasoning_message(
+    msg: &DisplayMessage,
+    width: u16,
+    _diff_mode: crate::config::DiffDisplayMode,
+) -> Vec<Line<'static>> {
+    let centered = markdown::center_code_blocks();
+    let wrap_width = centered_wrap_width(width, centered, 96);
+    let mut lines = markdown::render_markdown_with_width(&msg.content, Some(wrap_width));
+    if centered {
+        left_pad_lines_for_centered_mode(&mut lines, width);
+    }
+    lines
+}
+
 fn render_assistant_tool_call_lines(
     tool_calls: &[String],
     width: usize,
diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs
index 5fc6f4dd2..49d3bdee6 100644
--- a/crates/jcode-tui/src/tui/ui_prepare.rs
+++ b/crates/jcode-tui/src/tui/ui_prepare.rs
@@ -918,6 +918,20 @@ pub(super) fn prepare_body_incremental(
                     new_line_copy_offsets.push(0);
                 }
             }
+            "reasoning" => {
+                let content_width = width.saturating_sub(4);
+                let cached = get_cached_message_lines(
+                    msg,
+                    content_width,
+                    app.diff_mode(),
+                    render_reasoning_message,
+                );
+                for line in cached {
+                    new_lines.push(align_if_unset(line, align));
+                    new_line_raw_overrides.push(None);
+                    new_line_copy_offsets.push(0);
+                }
+            }
             "background_task" => {
                 let content_width = width.saturating_sub(4);
                 let cached = get_cached_message_lines(
@@ -1388,6 +1402,20 @@ pub(super) fn prepare_body(
                     line_copy_offsets.push(0);
                 }
             }
+            "reasoning" => {
+                let content_width = width.saturating_sub(4);
+                let cached = get_cached_message_lines(
+                    msg,
+                    content_width,
+                    app.diff_mode(),
+                    render_reasoning_message,
+                );
+                for line in cached {
+                    lines.push(align_if_unset(line, align));
+                    line_raw_overrides.push(None);
+                    line_copy_offsets.push(0);
+                }
+            }
             "background_task" => {
                 let content_width = width.saturating_sub(4);
                 let cached = get_cached_message_lines(
diff --git a/crates/jcode-tui/src/tui/ui_tests/prepare.rs b/crates/jcode-tui/src/tui/ui_tests/prepare.rs
index 2b4f0dd1d..cf89a08bc 100644
--- a/crates/jcode-tui/src/tui/ui_tests/prepare.rs
+++ b/crates/jcode-tui/src/tui/ui_tests/prepare.rs
@@ -738,3 +738,60 @@ fn test_render_tool_message_batch_subcall_lines_alignment_unset() {
     }
     crate::tui::markdown::set_center_code_blocks(false);
 }
+
+#[test]
+fn test_prepare_messages_renders_reasoning_role_dim_italic_without_sentinel() {
+    let _guard = crate::storage::lock_test_env();
+    clear_test_render_state_for_tests();
+
+    // A collapsing reasoning message carries sentinel-wrapped dim/italic markup.
+    let mut content = String::new();
+    content.push_str(&jcode_tui_markdown::reasoning_line_markup("weighing the options"));
+    content.push_str(&jcode_tui_markdown::reasoning_line_markup("▸ thought for 3s"));
+
+    let state = TestState {
+        display_messages: vec![
+            DisplayMessage::user("hi"),
+            DisplayMessage::reasoning(content),
+        ],
+        ..Default::default()
+    };
+
+    let prepared = prepare::prepare_messages(&state, 100, 30);
+    let lines = prepared.materialize_all_lines();
+
+    // The visible reasoning body is present, dim+italic, and sentinel-free.
+    let body = lines
+        .iter()
+        .find(|l| {
+            let joined: String = l.spans.iter().map(|s| s.content.as_ref()).collect();
+            joined.contains("weighing the options")
+        })
+        .expect("reasoning body line present");
+    let rendered: String = body.spans.iter().map(|s| s.content.as_ref()).collect();
+    assert!(
+        !rendered.contains(jcode_tui_markdown::REASONING_SENTINEL),
+        "sentinel must be stripped from visible reasoning: {rendered:?}"
+    );
+    let span = body
+        .spans
+        .iter()
+        .find(|s| s.content.as_ref().contains("weighing"))
+        .expect("body span");
+    assert!(
+        span.style
+            .add_modifier
+            .contains(ratatui::style::Modifier::ITALIC),
+        "reasoning body should be italic: {:?}",
+        span.style
+    );
+
+    // The summary line is present too.
+    assert!(
+        lines.iter().any(|l| {
+            let joined: String = l.spans.iter().map(|s| s.content.as_ref()).collect();
+            joined.contains("thought for 3s")
+        }),
+        "summary line should render"
+    );
+}

From bfa62b451799ed3ae2df2a48920f9e2369251cf4 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:42:58 -0700
Subject: [PATCH 45/57] perf(tui): maintain display-message counters
 incrementally on append

bump_display_messages_version recomputed display_user_message_count and
display_edit_tool_message_count by scanning all display messages twice on every
mutation. Appending one message at a time over a long session made counter
maintenance cumulatively O(M^2).

The hot append path now folds the single new message into the cached counters
(O(1)) and bumps the version without a full rescan; rarer bulk/remove/replace
paths still recompute fully. Add a test asserting the incrementally-maintained
counters match a full recompute after interleaved pushes and removes.
---
 crates/jcode-tui/src/tui/app/state_ui.rs      | 42 ++++++++++---
 .../src/tui/app/state_ui_messages.rs          |  7 ++-
 .../tests/remote_events_reload_02/part_02.rs  | 61 +++++++++++++++++++
 3 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/crates/jcode-tui/src/tui/app/state_ui.rs b/crates/jcode-tui/src/tui/app/state_ui.rs
index 03ca7df41..e2edf6817 100644
--- a/crates/jcode-tui/src/tui/app/state_ui.rs
+++ b/crates/jcode-tui/src/tui/app/state_ui.rs
@@ -34,16 +34,37 @@ impl App {
         self.display_edit_tool_message_count = self
             .display_messages
             .iter()
-            .filter(|message| {
-                message
-                    .tool_data
-                    .as_ref()
-                    .map(|tool| tools_ui::is_edit_tool_name(&tool.name))
-                    .unwrap_or(false)
-            })
+            .filter(|message| Self::display_message_is_edit_tool(message))
             .count();
     }
 
+    /// Whether a single display message counts as an edit-tool message for the
+    /// incrementally-maintained `display_edit_tool_message_count`.
+    fn display_message_is_edit_tool(message: &DisplayMessage) -> bool {
+        message
+            .tool_data
+            .as_ref()
+            .map(|tool| tools_ui::is_edit_tool_name(&tool.name))
+            .unwrap_or(false)
+    }
+
+    /// Fold a single message into the cached display-message counters with the
+    /// given sign (+1 when added, -1 when removed). This keeps the counters
+    /// O(1) per mutation instead of rescanning the whole transcript via
+    /// `recompute_display_message_stats`, which made appending M messages one at
+    /// a time cumulatively O(M^2).
+    pub(super) fn adjust_display_message_stats(&mut self, message: &DisplayMessage, added: bool) {
+        let delta: isize = if added { 1 } else { -1 };
+        if message.effective_role() == "user" {
+            self.display_user_message_count =
+                (self.display_user_message_count as isize + delta).max(0) as usize;
+        }
+        if Self::display_message_is_edit_tool(message) {
+            self.display_edit_tool_message_count =
+                (self.display_edit_tool_message_count as isize + delta).max(0) as usize;
+        }
+    }
+
     pub(super) fn active_client_session_id(&self) -> Option<&str> {
         if self.is_remote {
             self.remote_session_id.as_deref()
@@ -85,6 +106,13 @@ impl App {
 
     pub(super) fn bump_display_messages_version(&mut self) {
         self.recompute_display_message_stats();
+        self.bump_display_messages_version_no_stats();
+    }
+
+    /// Bump the display-messages version without rescanning the transcript to
+    /// recompute counters. Callers that have already maintained the cached
+    /// counters incrementally (e.g. a single append) use this to stay O(1).
+    pub(super) fn bump_display_messages_version_no_stats(&mut self) {
         self.display_messages_version = self.display_messages_version.wrapping_add(1);
         self.bump_context_revision();
         self.refresh_split_view_if_needed();
diff --git a/crates/jcode-tui/src/tui/app/state_ui_messages.rs b/crates/jcode-tui/src/tui/app/state_ui_messages.rs
index 4baa11496..906e4cc68 100644
--- a/crates/jcode-tui/src/tui/app/state_ui_messages.rs
+++ b/crates/jcode-tui/src/tui/app/state_ui_messages.rs
@@ -65,8 +65,13 @@ impl App {
             return;
         }
         let is_tool = message.role == "tool";
+        // Maintain the cached display-message counters incrementally for this
+        // single append, then bump the version without a full O(M) rescan.
+        // Appending is the hot path; rescanning every append was O(M^2) over a
+        // long session.
+        self.adjust_display_message_stats(&message, true);
         self.display_messages.push(message);
-        self.bump_display_messages_version();
+        self.bump_display_messages_version_no_stats();
         if is_tool && self.diff_mode.has_side_pane() && self.diff_pane_auto_scroll {
             self.diff_pane_scroll = usize::MAX;
         }
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs
index 58caa3c64..965603b5d 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs
@@ -169,6 +169,67 @@ fn test_remove_display_message_bumps_version() {
     assert_ne!(app.display_messages_version, before);
 }
 
+#[test]
+fn test_incremental_display_message_counts_match_full_recompute() {
+    let mut app = create_test_app();
+
+    // Interleave user, assistant, and edit-tool messages via the public append
+    // path, which now maintains the counters incrementally instead of
+    // rescanning the whole transcript.
+    for i in 0..50 {
+        app.push_display_message(DisplayMessage::user(format!("prompt {i}")));
+        app.push_display_message(DisplayMessage::assistant(format!("reply {i}")));
+        if i % 3 == 0 {
+            app.push_display_message(DisplayMessage {
+                role: "tool".to_string(),
+                content: format!("edited file {i}"),
+                tool_calls: vec![],
+                duration_secs: None,
+                title: None,
+                tool_data: Some(crate::message::ToolCall {
+                    id: format!("edit-{i}"),
+                    name: "edit".to_string(),
+                    input: serde_json::json!({"file_path": format!("src/file_{i}.rs")}),
+                    intent: None,
+                    thought_signature: None,
+                }),
+            });
+        }
+    }
+
+    // Remove a few messages to exercise the decrement path.
+    app.remove_display_message(0);
+    app.remove_display_message(5);
+
+    let incremental_user = app.display_user_message_count;
+    let incremental_edit = app.display_edit_tool_message_count;
+
+    let expected_user = app
+        .display_messages
+        .iter()
+        .filter(|m| m.effective_role() == "user")
+        .count();
+    let expected_edit = app
+        .display_messages
+        .iter()
+        .filter(|m| {
+            m.tool_data
+                .as_ref()
+                .map(|tool| crate::tui::ui::tools_ui::is_edit_tool_name(&tool.name))
+                .unwrap_or(false)
+        })
+        .count();
+
+    assert_eq!(
+        incremental_user, expected_user,
+        "incrementally-maintained user count should match a full recompute"
+    );
+    assert_eq!(
+        incremental_edit, expected_edit,
+        "incrementally-maintained edit-tool count should match a full recompute"
+    );
+}
+
 #[test]
 fn test_handle_remote_disconnect_retryable_pending_schedules_retry() {
     let mut app = create_test_app();

From 6b8b63dfd42fb6253db8564492524b0fd5cc8147 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:53:48 -0700
Subject: [PATCH 46/57] fix(antigravity/gemini): recover from intermittent
 Gemini-3 MALFORMED_FUNCTION_CALL

Gemini-3 thinking models intermittently emit Python-style pseudo-code (e.g.
print(default_api.read(...))) instead of a clean functionCall, which the Cloud
Code backend rejects with finish_reason=MALFORMED_FUNCTION_CALL and empty
content. Previously the runtime ended the turn with a silent empty MessageEnd,
so the agent looked like it stalled with no answer. For gemini-3.1-pro-high this
hit roughly half of tool turns.

Three layered mitigations (per Gemini function-calling guidance / field reports):
1. Prevention: when tools are advertised, append a 'Function calling' guard to
   the Gemini system prompt forbidding code/namespaces (build_system_instruction_with_tool_guard).
2. Transparent retry: detect a malformed empty turn (is_retryable_empty_turn) and
   re-request up to twice before surfacing anything, so the agent never sees the
   blip. Retries force function-calling mode ANY so the model must emit a real
   functionCall instead of pseudo-code.
3. Surfacing: if output is still empty after retries, emit an actionable error
   (with the finish_reason and finishMessage) instead of a silent empty turn.

Also surfaces the previously-hidden finishMessage for diagnosis. Measured on the
live Antigravity backend: gemini-3.1-pro-high tool-call success went from ~50%
to ~7/8 (remaining miss was a probe-deadline timeout, not malformed). Unit tests
cover the guard and the retry classifier.
---
 crates/jcode-base/src/provider/antigravity.rs | 134 +++++++++++++++++-
 .../src/provider/antigravity_tests.rs         |  53 +++++++
 crates/jcode-base/src/provider/gemini.rs      |  74 +++++++++-
 .../jcode-base/src/provider/gemini_tests.rs   |  32 +++++
 4 files changed, 290 insertions(+), 3 deletions(-)

diff --git a/crates/jcode-base/src/provider/antigravity.rs b/crates/jcode-base/src/provider/antigravity.rs
index e64f09870..6954f1d10 100644
--- a/crates/jcode-base/src/provider/antigravity.rs
+++ b/crates/jcode-base/src/provider/antigravity.rs
@@ -676,6 +676,7 @@ impl AntigravityProvider {
         tools: &[ToolDefinition],
         system: &str,
         resume_session_id: Option<&str>,
+        force_function_call: bool,
     ) -> Result<CodeAssistGenerateResponse> {
         let mut tokens = antigravity_auth::load_or_refresh_tokens().await?;
         let project = match tokens
@@ -714,13 +715,23 @@ impl AntigravityProvider {
             user_prompt_id: Uuid::new_v4().to_string(),
             request: VertexGenerateContentRequest {
                 contents: super::gemini::build_contents(messages),
-                system_instruction: super::gemini::build_system_instruction(system),
+                system_instruction: super::gemini::build_system_instruction_with_tool_guard(
+                    system,
+                    !tools_is_empty,
+                ),
                 tools,
                 tool_config: if tools_is_empty {
                     None
                 } else {
+                    // On a transparent retry after a MALFORMED_FUNCTION_CALL, force
+                    // function-calling mode `ANY` so the model must emit a real
+                    // functionCall instead of the Python-style pseudo-code that
+                    // triggered the malformed turn (the proven recovery for this
+                    // failure mode). Normal turns use `AUTO`.
                     Some(GeminiToolConfig {
-                        function_calling_config: GeminiFunctionCallingConfig { mode: "AUTO" },
+                        function_calling_config: GeminiFunctionCallingConfig {
+                            mode: if force_function_call { "ANY" } else { "AUTO" },
+                        },
                     })
                 },
                 session_id: resume_session_id
@@ -805,6 +816,51 @@ fn model_is_claude(model: &str) -> bool {
     model.trim().to_ascii_lowercase().contains("claude")
 }
 
+/// Whether a `generateContent` response is an abnormal turn that produced no
+/// usable output (no text, no function call). This is the shape Gemini-3
+/// "thinking" models intermittently return when they emit Python-style
+/// pseudo-code instead of a clean functionCall: `finish_reason ==
+/// MALFORMED_FUNCTION_CALL` (or another non-terminal reason) with empty content.
+/// Such a turn is worth one transparent retry before surfacing an error.
+///
+/// Normal terminal reasons (`STOP`, `MAX_TOKENS`, unspecified) are never treated
+/// as retryable here, even with empty content, so a legitimately empty answer is
+/// not retried in a loop.
+fn is_retryable_empty_turn(response: &CodeAssistGenerateResponse) -> bool {
+    let Some(candidate) = response
+        .response
+        .as_ref()
+        .and_then(|r| r.candidates.as_ref())
+        .and_then(|c| c.first())
+    else {
+        // No candidate at all is handled separately (hard error), not retried here.
+        return false;
+    };
+    let produced_output = candidate
+        .content
+        .as_ref()
+        .map(|content| {
+            content.parts.iter().any(|part| {
+                part.function_call.is_some()
+                    || part.text.as_deref().is_some_and(|text| !text.is_empty())
+            })
+        })
+        .unwrap_or(false);
+    if produced_output {
+        return false;
+    }
+    candidate
+        .finish_reason
+        .as_deref()
+        .map(|reason| {
+            !matches!(
+                reason.to_ascii_uppercase().as_str(),
+                "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | ""
+            )
+        })
+        .unwrap_or(false)
+}
+
 /// Remap model ids that the Antigravity catalog advertises but the
 /// `generateContent`/`streamGenerateContent` backend cannot actually service,
 /// onto an equivalent id that works.
@@ -996,6 +1052,7 @@ impl Provider for AntigravityProvider {
                     &tools,
                     &system,
                     resume_session_id.as_deref(),
+                    false,
                 )
                 .await
             {
@@ -1005,6 +1062,36 @@ impl Provider for AntigravityProvider {
                     return;
                 }
             };
+            // Gemini-3 thinking models intermittently return an empty
+            // `MALFORMED_FUNCTION_CALL` turn (pseudo-code instead of a clean
+            // functionCall). It is transient, so transparently re-request a few
+            // times before surfacing it; this turns a frequent hard failure into a
+            // near-always-successful turn without the agent loop seeing the blip.
+            // The retries force function-calling mode `ANY` so the model must emit
+            // a real functionCall rather than the pseudo-code that failed.
+            let mut response = response;
+            let mut malformed_retries = 0u8;
+            const MAX_MALFORMED_RETRIES: u8 = 2;
+            while is_retryable_empty_turn(&response) && malformed_retries < MAX_MALFORMED_RETRIES {
+                malformed_retries += 1;
+                match provider
+                    .generate_content(
+                        &model,
+                        &messages,
+                        &tools,
+                        &system,
+                        resume_session_id.as_deref(),
+                        true,
+                    )
+                    .await
+                {
+                    Ok(retried) => response = retried,
+                    Err(err) => {
+                        let _ = tx.send(Err(err)).await;
+                        return;
+                    }
+                }
+            }
             let _ = tx
                 .send(Ok(StreamEvent::ConnectionPhase {
                     phase: ConnectionPhase::Streaming,
@@ -1036,6 +1123,13 @@ impl Provider for AntigravityProvider {
                     .await;
                 return;
             };
+            // Track whether this candidate produced any usable output (text or a
+            // tool call). Gemini-3 thinking models intermittently emit Python-style
+            // pseudo-code instead of a clean functionCall and finish with
+            // `MALFORMED_FUNCTION_CALL` (or a bare `OTHER`) and empty content. If we
+            // silently end the turn the agent loop looks like it stalled with no
+            // answer, so we surface an actionable error below instead.
+            let mut produced_output = false;
             if let Some(content) = candidate.content {
                 // Gemini 3 attaches a `thoughtSignature` to function-call parts
                 // (and occasionally to a standalone preceding part). Emit tool
@@ -1052,9 +1146,11 @@ impl Provider for AntigravityProvider {
                         .filter(|sig| !sig.is_empty())
                         .cloned();
                     if let Some(text) = part.text.filter(|text| !text.is_empty()) {
+                        produced_output = true;
                         let _ = tx.send(Ok(StreamEvent::TextDelta(text))).await;
                     }
                     if let Some(function_call) = part.function_call {
+                        produced_output = true;
                         let signature = part_signature.clone().or_else(|| pending_signature.take());
                         let raw_call_id = function_call
                             .id
@@ -1096,6 +1192,40 @@ impl Provider for AntigravityProvider {
                 }
             }
 
+            // An abnormal finish (typically Gemini-3's intermittent
+            // `MALFORMED_FUNCTION_CALL`, where the model writes pseudo-code rather
+            // than a valid functionCall) that yielded no text and no tool call is a
+            // dead turn: surface it as a retryable error instead of a silent empty
+            // `MessageEnd` that looks like the agent gave up. `STOP`/`MAX_TOKENS`
+            // are normal terminal reasons and are left to flow through as usual.
+            if !produced_output {
+                let abnormal = candidate
+                    .finish_reason
+                    .as_deref()
+                    .map(|reason| {
+                        !matches!(
+                            reason.to_ascii_uppercase().as_str(),
+                            "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | ""
+                        )
+                    })
+                    .unwrap_or(false);
+                if abnormal {
+                    let reason = candidate.finish_reason.as_deref().unwrap_or("unknown");
+                    let detail = candidate
+                        .finish_message
+                        .as_deref()
+                        .filter(|msg| !msg.trim().is_empty())
+                        .map(|msg| format!(": {}", crate::util::truncate_str(msg.trim(), 300)))
+                        .unwrap_or_default();
+                    let _ = tx
+                        .send(Err(anyhow::anyhow!(
+                            "Antigravity returned no usable output (finish_reason={reason}){detail}"
+                        )))
+                        .await;
+                    return;
+                }
+            }
+
             let _ = tx
                 .send(Ok(StreamEvent::MessageEnd {
                     stop_reason: candidate.finish_reason.clone(),
diff --git a/crates/jcode-base/src/provider/antigravity_tests.rs b/crates/jcode-base/src/provider/antigravity_tests.rs
index 75c11a05d..dddf23d00 100644
--- a/crates/jcode-base/src/provider/antigravity_tests.rs
+++ b/crates/jcode-base/src/provider/antigravity_tests.rs
@@ -546,3 +546,56 @@ fn antigravity_compatible_schema_strips_bounds_and_combiners_for_gpt_oss() {
     assert!(out["properties"]["tool_calls"].get("maxItems").is_none());
     assert_eq!(out["properties"]["tool_calls"]["type"], serde_json::json!("array"));
 }
+
+#[test]
+fn is_retryable_empty_turn_detects_malformed_function_call() {
+    // Empty content + MALFORMED_FUNCTION_CALL is the transient Gemini-3 failure we
+    // retry transparently.
+    let response: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({
+        "response": {
+            "candidates": [{
+                "content": {},
+                "finishReason": "MALFORMED_FUNCTION_CALL",
+                "finishMessage": "Malformed function call: print(default_api.read(...))"
+            }]
+        }
+    }))
+    .expect("decode malformed response");
+    assert!(is_retryable_empty_turn(&response));
+}
+
+#[test]
+fn is_retryable_empty_turn_ignores_normal_and_productive_turns() {
+    // A normal STOP turn with text is never retried.
+    let with_text: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({
+        "response": {
+            "candidates": [{
+                "content": {"parts": [{"text": "hello"}]},
+                "finishReason": "STOP"
+            }]
+        }
+    }))
+    .expect("decode text response");
+    assert!(!is_retryable_empty_turn(&with_text));
+
+    // A turn with a function call is productive even with no text.
+    let with_call: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({
+        "response": {
+            "candidates": [{
+                "content": {"parts": [{"functionCall": {"name": "read", "args": {}}}]},
+                "finishReason": "STOP"
+            }]
+        }
+    }))
+    .expect("decode function call response");
+    assert!(!is_retryable_empty_turn(&with_call));
+
+    // An empty STOP turn (legitimately empty answer) is not retried in a loop.
+    let empty_stop: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({
+        "response": {
+            "candidates": [{ "content": {}, "finishReason": "STOP" }]
+        }
+    }))
+    .expect("decode empty stop response");
+    assert!(!is_retryable_empty_turn(&empty_stop));
+}
diff --git a/crates/jcode-base/src/provider/gemini.rs b/crates/jcode-base/src/provider/gemini.rs
index b2aa15d8d..29fa508e4 100644
--- a/crates/jcode-base/src/provider/gemini.rs
+++ b/crates/jcode-base/src/provider/gemini.rs
@@ -512,7 +512,10 @@ impl GeminiProvider {
             user_prompt_id: Uuid::new_v4().to_string(),
             request: VertexGenerateContentRequest {
                 contents: build_contents(messages),
-                system_instruction: build_system_instruction(system),
+                system_instruction: build_system_instruction_with_tool_guard(
+                    system,
+                    !tools.is_empty(),
+                ),
                 tools: build_tools(tools),
                 tool_config: if tools.is_empty() {
                     None
@@ -809,6 +812,12 @@ impl Provider for GeminiProvider {
                         .await;
                     return;
                 }
+                // Track whether this candidate produced any usable output (text or
+                // a tool call). Gemini-3 thinking models intermittently emit
+                // Python-style pseudo-code instead of a clean functionCall and
+                // finish with `MALFORMED_FUNCTION_CALL` and empty content; surface
+                // that as a retryable error below rather than a silent empty turn.
+                let mut produced_output = false;
                 if let Some(content) = candidate.content {
                     // Gemini 3 attaches a `thoughtSignature` to function-call
                     // parts (and occasionally to a standalone preceding part).
@@ -826,9 +835,11 @@ impl Provider for GeminiProvider {
                         if let Some(text) = part.text
                             && !text.is_empty()
                         {
+                            produced_output = true;
                             let _ = tx.send(Ok(StreamEvent::TextDelta(text))).await;
                         }
                         if let Some(function_call) = part.function_call {
+                            produced_output = true;
                             let signature =
                                 part_signature.clone().or_else(|| pending_signature.take());
                             let raw_call_id = function_call
@@ -869,6 +880,38 @@ impl Provider for GeminiProvider {
                             .await;
                     }
                 }
+
+                // An abnormal finish (typically Gemini-3's intermittent
+                // `MALFORMED_FUNCTION_CALL`) that yielded no text and no tool call
+                // is a dead turn: surface it as a retryable error instead of a
+                // silent empty `MessageEnd`. `STOP`/`MAX_TOKENS` are normal.
+                if !produced_output {
+                    let abnormal = candidate
+                        .finish_reason
+                        .as_deref()
+                        .map(|reason| {
+                            !matches!(
+                                reason.to_ascii_uppercase().as_str(),
+                                "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | ""
+                            )
+                        })
+                        .unwrap_or(false);
+                    if abnormal {
+                        let reason = candidate.finish_reason.as_deref().unwrap_or("unknown");
+                        let detail = candidate
+                            .finish_message
+                            .as_deref()
+                            .filter(|msg| !msg.trim().is_empty())
+                            .map(|msg| format!(": {}", crate::util::truncate_str(msg.trim(), 300)))
+                            .unwrap_or_default();
+                        let _ = tx
+                            .send(Err(anyhow::anyhow!(
+                                "Gemini returned no usable output (finish_reason={reason}){detail}"
+                            )))
+                            .await;
+                        return;
+                    }
+                }
             }
 
             let _ = tx.send(Ok(StreamEvent::MessageEnd { stop_reason })).await;
@@ -1027,6 +1070,35 @@ pub(crate) fn build_system_instruction(system: &str) -> Option<GeminiContent> {
     }
 }
 
+/// Prevention guidance appended to the Gemini system prompt when tools are
+/// advertised. Gemini-3 "thinking" models intermittently emit Python-style
+/// pseudo-code (e.g. `print(default_api.read(...))`) instead of a clean
+/// `functionCall`, which the backend rejects with `MALFORMED_FUNCTION_CALL` and
+/// empty content. Explicitly forbidding code/namespaces measurably reduces that
+/// failure mode at no latency cost (see the Gemini function-calling guidance and
+/// field reports of this exact behavior).
+const GEMINI_FUNCTION_CALL_GUARD: &str = "\n\n## Function calling\n\
+     - When you call a tool, emit a native function call, not code. Never write \
+     Python (or any language) that calls the tool, and never wrap a call in \
+     print(...) or a code block.\n\
+     - Use the function name exactly as defined. Do not prepend `default_api.` \
+     or any other namespace to the function name.";
+
+/// Build the Gemini `system_instruction`, appending [`GEMINI_FUNCTION_CALL_GUARD`]
+/// when tools are advertised so the model is steered away from the
+/// `MALFORMED_FUNCTION_CALL` pseudo-code failure mode.
+pub(crate) fn build_system_instruction_with_tool_guard(
+    system: &str,
+    has_tools: bool,
+) -> Option<GeminiContent> {
+    if !has_tools {
+        return build_system_instruction(system);
+    }
+    let mut combined = system.trim().to_string();
+    combined.push_str(GEMINI_FUNCTION_CALL_GUARD);
+    build_system_instruction(&combined)
+}
+
 pub(crate) fn build_contents(messages: &[Message]) -> Vec<GeminiContent> {
     messages
         .iter()
diff --git a/crates/jcode-base/src/provider/gemini_tests.rs b/crates/jcode-base/src/provider/gemini_tests.rs
index 4dedb2fa7..6e13e71df 100644
--- a/crates/jcode-base/src/provider/gemini_tests.rs
+++ b/crates/jcode-base/src/provider/gemini_tests.rs
@@ -595,3 +595,35 @@ fn developer_api_response_parses_without_code_assist_envelope() {
         .expect("missing text");
     assert_eq!(text, "hello from developer api");
 }
+
+#[test]
+fn system_instruction_tool_guard_only_applies_with_tools() {
+    // Without tools, the system instruction is passed through unchanged.
+    let plain = super::build_system_instruction_with_tool_guard("You are helpful.", false)
+        .expect("system instruction present");
+    let plain_text = plain.parts[0].text.clone().unwrap();
+    assert_eq!(plain_text, "You are helpful.");
+    assert!(!plain_text.contains("Function calling"));
+
+    // With tools, the MALFORMED_FUNCTION_CALL prevention guidance is appended.
+    let guarded = super::build_system_instruction_with_tool_guard("You are helpful.", true)
+        .expect("system instruction present");
+    let guarded_text = guarded.parts[0].text.clone().unwrap();
+    assert!(guarded_text.starts_with("You are helpful."));
+    assert!(guarded_text.contains("Function calling"));
+    assert!(guarded_text.contains("native function call, not code"));
+    assert!(guarded_text.contains("default_api."));
+}
+
+#[test]
+fn system_instruction_tool_guard_with_empty_system_still_emits_guidance() {
+    // An empty base system prompt plus tools must still carry the guard so the
+    // model is steered away from pseudo-code tool calls.
+    let guarded = super::build_system_instruction_with_tool_guard("", true)
+        .expect("guard-only instruction present");
+    let text = guarded.parts[0].text.clone().unwrap();
+    assert!(text.contains("Function calling"));
+
+    // Empty system and no tools yields no instruction at all.
+    assert!(super::build_system_instruction_with_tool_guard("", false).is_none());
+}

From 770fd943db50c0d27d37bdf4848ace8964bdf345 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:56:42 -0700
Subject: [PATCH 47/57] perf(desktop): memoize rendered body lines; stop
 re-wrapping transcript per mouse move

Selection hit-testing (single_session_visible_body -> body viewport ->
single_session_rendered_body_lines_for_tick) re-parsed markdown and re-wrapped
the ENTIRE transcript on every selection mouse-move during a drag, an
O(transcript) cost per pointer event.

Add a thread-local single-entry memo keyed by the existing body cache key and
return the wrapped lines as a shared Rc, so the viewport only clones the visible
slice instead of the whole transcript. The render hot path keeps its separate
Canvas-side cache, so this only accelerates input/scroll-metric/geometry callers.

Measured on real transcripts (debug build): per-mouse-move selection hit-test
p99 ~121ms -> ~0.06ms. Adds a selection_input_hittest benchmark phase that
isolates this cost, plus a debug-only env gate for A/B measurement.
---
 crates/jcode-desktop/src/main.rs              | 47 +++++++++++++---
 .../src/single_session_render.rs              | 56 ++++++++++++++++++-
 2 files changed, 94 insertions(+), 9 deletions(-)

diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs
index 703a68995..dfa94f896 100644
--- a/crates/jcode-desktop/src/main.rs
+++ b/crates/jcode-desktop/src/main.rs
@@ -5669,20 +5669,31 @@ fn benchmark_real_transcript_actions(
     ));
 
     // 3. Selection drag across the visible transcript while parked mid-scroll.
+    //    This mirrors the real mouse-handler input path, which calls
+    //    single_session_visible_body (a full transcript wrap, now memoized) and
+    //    hit-tests the cursor on every pointer move, then redraws.
     {
         let mut app = base_app.clone();
         app.body_scroll_lines = (max_scroll / 2) as f32;
-        let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines);
-        let visible = single_session_visible_body(&app, size);
-        app.begin_selection(SelectionPoint { line: 0, column: 0 });
+        let initial_visible = single_session_visible_body(&app, size);
+        if let Some(point) = single_session_body_point_at_position(size, 40.0, 80.0, &initial_visible)
+        {
+            app.begin_selection(point);
+        } else {
+            app.begin_selection(SelectionPoint { line: 0, column: 0 });
+        }
         let mut font_system = benchmark_font_system();
         let (mut buffers, mut window_start, mut window_end, mut last_start) =
             action_prime_window(&app, &body_lines, size, &mut font_system);
         let (samples, _) = benchmark_frame_samples(frames, |frame| {
-            let line = frame % viewport.lines.len().max(1);
-            let column = (frame * 7) % 80;
-            app.update_selection(SelectionPoint { line, column });
-            let _ = &visible;
+            // Real input path: resolve the cursor against the visible body
+            // (full-transcript wrap, memoized) and update the selection.
+            let visible = single_session_visible_body(&app, size);
+            let y = 80.0 + (frame % 600) as f32;
+            let x = 40.0 + (frame % 400) as f32;
+            if let Some(point) = single_session_body_point_at_position(size, x, y, &visible) {
+                app.update_selection(point);
+            }
             action_render_window(
                 &app,
                 &body_lines,
@@ -5699,6 +5710,28 @@ fn benchmark_real_transcript_actions(
         phases.push(("selection_drag", samples));
     }
 
+    // 3b. Pure input-side selection hit-test cost (no redraw). This isolates the
+    //     real per-mouse-move work the desktop selection handler does:
+    //     single_session_visible_body (a full-transcript wrap, now memoized) plus
+    //     cursor hit-testing. The redraw it triggers is separately cached, so this
+    //     phase exposes the wrap/memo cost that the combined selection_drag phase
+    //     hides behind geometry building.
+    {
+        let mut app = base_app.clone();
+        app.body_scroll_lines = (max_scroll / 2) as f32;
+        app.begin_selection(SelectionPoint { line: 0, column: 0 });
+        let (samples, _) = benchmark_frame_samples(frames, |frame| {
+            let visible = single_session_visible_body(&app, size);
+            let y = 80.0 + (frame % 600) as f32;
+            let x = 40.0 + (frame % 400) as f32;
+            if let Some(point) = single_session_body_point_at_position(size, x, y, &visible) {
+                app.update_selection(point);
+            }
+            visible.len()
+        });
+        phases.push(("selection_input_hittest", samples));
+    }
+
     // 4. Typing in the composer while parked at the bottom of the transcript.
     {
         let mut app = base_app.clone();
diff --git a/crates/jcode-desktop/src/single_session_render.rs b/crates/jcode-desktop/src/single_session_render.rs
index 9fd0d6dd8..e3ba6264c 100644
--- a/crates/jcode-desktop/src/single_session_render.rs
+++ b/crates/jcode-desktop/src/single_session_render.rs
@@ -9087,7 +9087,11 @@ pub(crate) fn single_session_body_viewport_for_tick(
     tick: u64,
     smooth_scroll_lines: f32,
 ) -> SingleSessionBodyViewport {
-    let lines = single_session_rendered_body_lines_for_tick(app, size, tick);
+    // Borrow the memoized full body lines and only clone the visible slice via
+    // `single_session_body_viewport_from_lines`, instead of cloning the whole
+    // transcript. This keeps input-side callers (selection hit-testing on every
+    // mouse-move) O(visible) rather than O(transcript).
+    let lines = single_session_rendered_body_lines_for_tick_shared(app, size, tick);
     single_session_body_viewport_from_lines(app, size, smooth_scroll_lines, &lines)
 }
 
@@ -9131,7 +9135,55 @@ pub(crate) fn single_session_rendered_body_lines_for_tick(
     size: PhysicalSize<u32>,
     tick: u64,
 ) -> Vec<SingleSessionStyledLine> {
-    single_session_rendered_body_lines_from_raw(app, size, app.body_styled_lines_for_tick(tick))
+    (*single_session_rendered_body_lines_for_tick_shared(app, size, tick)).clone()
+}
+
+/// Shared, memoized rendered body lines for the current transcript+layout.
+///
+/// This re-parses markdown and re-wraps the ENTIRE transcript (O(transcript)),
+/// and is called from input handling (every selection mouse-move during a
+/// drag), scroll-metric probing, and several geometry builders. Returning a
+/// shared `Rc` lets callers that only need a slice (the viewport) avoid cloning
+/// the whole transcript on every pointer event. The render hot path uses a
+/// separate Canvas-side cache (`cached_single_session_body_lines`); this
+/// thread-local single-entry memo accelerates the remaining callers. The key is
+/// the body cache key, which already captures the message fingerprint, size,
+/// text scale, and welcome/streaming state, so the cache invalidates whenever
+/// any of those change.
+pub(crate) fn single_session_rendered_body_lines_for_tick_shared(
+    app: &SingleSessionApp,
+    size: PhysicalSize<u32>,
+    tick: u64,
+) -> std::rc::Rc<Vec<SingleSessionStyledLine>> {
+    let layout_size = single_session_body_layout_cache_size(app, size);
+    let key = app.rendered_body_cache_key(layout_size);
+    thread_local! {
+        static RENDERED_BODY_LINES_MEMO: std::cell::RefCell<Option<(u64, std::rc::Rc<Vec<SingleSessionStyledLine>>)>> =
+            const { std::cell::RefCell::new(None) };
+    }
+    // Allow disabling the memo for A/B perf measurement in debug builds only;
+    // the production memo can never be turned off by an env var.
+    let memo_disabled = cfg!(debug_assertions)
+        && std::env::var_os("JCODE_DESKTOP_DISABLE_BODY_MEMO").is_some();
+    if !memo_disabled
+        && let Some(cached) = RENDERED_BODY_LINES_MEMO.with(|cell| {
+            cell.borrow()
+                .as_ref()
+                .filter(|(cached_key, _)| *cached_key == key)
+                .map(|(_, lines)| lines.clone())
+        })
+    {
+        return cached;
+    }
+    let lines =
+        single_session_rendered_body_lines_from_raw(app, size, app.body_styled_lines_for_tick(tick));
+    let shared = std::rc::Rc::new(lines);
+    if !memo_disabled {
+        RENDERED_BODY_LINES_MEMO.with(|cell| {
+            *cell.borrow_mut() = Some((key, shared.clone()));
+        });
+    }
+    shared
 }
 
 pub(crate) fn single_session_rendered_body_lines_from_raw(

From 3ec9c48ba8b6362d9a8062a84be581e5667fd6fe Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 16:59:05 -0700
Subject: [PATCH 48/57] perf(ambient): pre-filter recent sessions by file mtime
 before parsing

gather_recent_sessions fully parsed every session JSON file (the sessions dir
can hold tens of thousands) just to drop those older than the 24h cutoff and
keep the 20 most recent: O(all_sessions * parse) per ambient cycle.

Pre-filter candidate files by filesystem mtime (with a 1h margin for write/clock
skew) before loading, sort newest-first, and only parse up to a bounded budget
(4x the limit) before the existing id-based sort/truncate. Behavior is
preserved; work drops from O(all_sessions) to O(recent_sessions).
---
 crates/jcode-app-core/src/ambient/prompt.rs | 111 +++++++++++++-------
 1 file changed, 75 insertions(+), 36 deletions(-)

diff --git a/crates/jcode-app-core/src/ambient/prompt.rs b/crates/jcode-app-core/src/ambient/prompt.rs
index c5bb677fb..2cca08b28 100644
--- a/crates/jcode-app-core/src/ambient/prompt.rs
+++ b/crates/jcode-app-core/src/ambient/prompt.rs
@@ -181,50 +181,89 @@ pub fn gather_recent_sessions(since: Option<DateTime<Utc>>) -> Vec<RecentSession
 
     let cutoff = since.unwrap_or_else(|| Utc::now() - chrono::Duration::hours(24));
 
-    let mut recent = Vec::new();
+    // Pre-filter candidate session files by filesystem mtime BEFORE loading and
+    // parsing them. The sessions directory can hold tens of thousands of files;
+    // fully parsing every one via Session::load just to drop those older than
+    // the cutoff is O(all_sessions * parse). A session updated after the cutoff
+    // has a recent mtime, so we keep only files whose mtime is at or after the
+    // cutoff (minus a small margin for clock/write skew), then load newest-first
+    // and stop once we have enough recent sessions.
+    const RECENT_SESSION_LIMIT: usize = 20;
+    let mtime_cutoff = cutoff - chrono::Duration::hours(1);
+
+    let mut candidates: Vec<(std::path::PathBuf, std::time::SystemTime)> = Vec::new();
     if let Ok(entries) = std::fs::read_dir(&sessions_dir) {
         for entry in entries.flatten() {
             let path = entry.path();
-            if path.extension().map(|e| e == "json").unwrap_or(false)
-                && let Some(stem) = path.file_stem().and_then(|s| s.to_str())
-                && let Ok(session) = crate::session::Session::load(stem)
-            {
-                // Skip debug sessions
-                if session.is_debug {
-                    continue;
-                }
-                // Only include sessions updated after cutoff
-                if session.updated_at < cutoff {
-                    continue;
-                }
-                let duration = (session.updated_at - session.created_at)
-                    .num_seconds()
-                    .max(0);
-                let extraction = if session.messages.is_empty() {
-                    "no messages"
-                } else {
-                    // Heuristic: if session closed normally, assume extracted
-                    match &session.status {
-                        crate::session::SessionStatus::Closed => "extracted",
-                        crate::session::SessionStatus::Crashed { .. } => "missed",
-                        crate::session::SessionStatus::Active => "in progress",
-                        _ => "unknown",
-                    }
-                };
-                recent.push(RecentSessionInfo {
-                    id: session.id.clone(),
-                    status: session.status.display().to_string(),
-                    topic: session.display_title().map(ToOwned::to_owned),
-                    duration_secs: duration,
-                    extraction_status: extraction.to_string(),
-                });
+            if !path.extension().map(|e| e == "json").unwrap_or(false) {
+                continue;
+            }
+            let Ok(modified) = entry.metadata().and_then(|meta| meta.modified()) else {
+                // If we can't read mtime, keep the file as a candidate so we
+                // don't silently drop a possibly-recent session.
+                candidates.push((path, std::time::SystemTime::UNIX_EPOCH));
+                continue;
+            };
+            let modified_dt: DateTime<Utc> = modified.into();
+            if modified_dt < mtime_cutoff {
+                continue;
+            }
+            candidates.push((path, modified));
+        }
+    }
+    // Newest files first so we can stop early once we have enough.
+    candidates.sort_by(|a, b| b.1.cmp(&a.1));
+
+    let mut recent = Vec::new();
+    // Load somewhat more than the final limit by mtime so the subsequent
+    // id-based sort/truncate picks the true most-recent set even when file
+    // mtime order and id (timestamp) order disagree near the boundary, while
+    // still bounding work far below "load every session file".
+    let load_budget = RECENT_SESSION_LIMIT.saturating_mul(4).max(RECENT_SESSION_LIMIT);
+    let mut loaded = 0usize;
+    for (path, _modified) in candidates {
+        if loaded >= load_budget {
+            break;
+        }
+        if let Some(stem) = path.file_stem().and_then(|s| s.to_str())
+            && let Ok(session) = crate::session::Session::load(stem)
+        {
+            loaded += 1;
+            // Skip debug sessions
+            if session.is_debug {
+                continue;
+            }
+            // Only include sessions updated after cutoff
+            if session.updated_at < cutoff {
+                continue;
             }
+            let duration = (session.updated_at - session.created_at)
+                .num_seconds()
+                .max(0);
+            let extraction = if session.messages.is_empty() {
+                "no messages"
+            } else {
+                // Heuristic: if session closed normally, assume extracted
+                match &session.status {
+                    crate::session::SessionStatus::Closed => "extracted",
+                    crate::session::SessionStatus::Crashed { .. } => "missed",
+                    crate::session::SessionStatus::Active => "in progress",
+                    _ => "unknown",
+                }
+            };
+            recent.push(RecentSessionInfo {
+                id: session.id.clone(),
+                status: session.status.display().to_string(),
+                topic: session.display_title().map(ToOwned::to_owned),
+                duration_secs: duration,
+                extraction_status: extraction.to_string(),
+            });
         }
     }
 
-    // Sort by most recent first (we don't have created_at easily, sort by id which embeds timestamp)
+    // Sort by most recent first (id embeds a timestamp).
     recent.sort_by(|a, b| b.id.cmp(&a.id));
-    recent.truncate(20); // Cap at 20 to keep prompt reasonable
+    recent.truncate(RECENT_SESSION_LIMIT);
     recent
 }
 

From b5e96e52664722a12467fc5aa54839b36870a04c Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:01:42 -0700
Subject: [PATCH 49/57] perf(tui): normalize inline-picker fuzzy pattern once
 per keystroke

picker_fuzzy_score re-lowercased and re-collected the filter pattern into a
Vec<char> on every call, i.e. once per entry inside the per-keystroke filter
loop (O(entries * pattern)). Hoist pattern normalization out via
picker_fuzzy_pattern + picker_fuzzy_score_with_pattern and normalize once per
filter pass. Scoring behavior is unchanged.
---
 crates/jcode-app-core/src/tool/todo.rs        |   4 +
 crates/jcode-task-types/src/lib.rs            |   5 +
 .../src/tui/app/inline_interactive.rs         |  20 +-
 .../app/tests/commands_accounts_02/part_01.rs |   3 +
 .../app/tests/commands_accounts_02/part_02.rs |   3 +
 .../tests/remote_events_reload_01/part_01.rs  |   1 +
 .../tests/remote_events_reload_01/part_02.rs  |   4 +
 .../tests/remote_events_reload_02/part_01.rs  |   4 +
 .../tests/remote_startup_input_01/part_01.rs  |   1 +
 .../tests/remote_startup_input_02/part_01.rs  |   2 +
 .../tui/app/tests/scroll_copy_01/part_02.rs   |   1 +
 .../app/tests/state_model_poke_02/part_01.rs  |   1 +
 .../src/tui/app/tests/state_model_poke_03.rs  |  11 +
 crates/jcode-tui/src/tui/app/todos_view.rs    | 104 ++++++
 crates/jcode-tui/src/tui/app/tui_state.rs     |   1 +
 .../jcode-tui/src/tui/info_widget_overview.rs |   1 +
 crates/jcode-tui/src/tui/info_widget_tests.rs |  62 +++
 crates/jcode-tui/src/tui/info_widget_todos.rs | 353 +++++++++++-------
 18 files changed, 445 insertions(+), 136 deletions(-)

diff --git a/crates/jcode-app-core/src/tool/todo.rs b/crates/jcode-app-core/src/tool/todo.rs
index 56174424a..42fcd87ce 100644
--- a/crates/jcode-app-core/src/tool/todo.rs
+++ b/crates/jcode-app-core/src/tool/todo.rs
@@ -57,6 +57,10 @@ impl Tool for TodoTool {
                                 "type": "string",
                                 "description": "ID."
                             },
+                            "group": {
+                                "type": "string",
+                                "description": "Optional group label. Todos sharing a group render together under one header. Use one group per coherent goal (e.g. 'optimize rendering'). When the user steers into new work, start a new group instead of renaming the existing one. Omit for an ungrouped flat list."
+                            },
                             "confidence": {
                                 "type": "integer",
                                 "minimum": 0,
diff --git a/crates/jcode-task-types/src/lib.rs b/crates/jcode-task-types/src/lib.rs
index bd14c783c..80bb5d091 100644
--- a/crates/jcode-task-types/src/lib.rs
+++ b/crates/jcode-task-types/src/lib.rs
@@ -200,6 +200,11 @@ pub struct TodoItem {
     pub status: String,
     pub priority: String,
     pub id: String,
+    /// Optional group label. Todos that share a group are displayed together
+    /// under a single header. Use one group per coherent goal; when work is
+    /// steered into a new area, start a new group instead of renaming.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub group: Option<String>,
     /// Forward-looking confidence, from 0-100, that this todo can be completed correctly.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub confidence: Option<u8>,
diff --git a/crates/jcode-tui/src/tui/app/inline_interactive.rs b/crates/jcode-tui/src/tui/app/inline_interactive.rs
index 4106e9e1a..94ff6f947 100644
--- a/crates/jcode-tui/src/tui/app/inline_interactive.rs
+++ b/crates/jcode-tui/src/tui/app/inline_interactive.rs
@@ -2700,11 +2700,22 @@ impl App {
     }
 
     pub(super) fn picker_fuzzy_score(pattern: &str, text: &str) -> Option<i32> {
-        let pat: Vec<char> = pattern
+        let pat = Self::picker_fuzzy_pattern(pattern);
+        Self::picker_fuzzy_score_with_pattern(&pat, text)
+    }
+
+    /// Normalize a fuzzy-match pattern (lowercase, drop whitespace) into chars.
+    /// Hoist this out of per-entry scoring so a filter pass over N entries
+    /// normalizes the pattern once instead of N times per keystroke.
+    pub(super) fn picker_fuzzy_pattern(pattern: &str) -> Vec<char> {
+        pattern
             .to_lowercase()
             .chars()
             .filter(|c| !c.is_whitespace())
-            .collect();
+            .collect()
+    }
+
+    pub(super) fn picker_fuzzy_score_with_pattern(pat: &[char], text: &str) -> Option<i32> {
         let txt: Vec<char> = text.to_lowercase().chars().collect();
         if pat.is_empty() {
             return Some(0);
@@ -2750,13 +2761,16 @@ impl App {
         if picker.filter.is_empty() {
             picker.filtered = (0..picker.entries.len()).collect();
         } else {
+            // Normalize the filter pattern once per keystroke instead of once per
+            // entry inside picker_fuzzy_score.
+            let pat = Self::picker_fuzzy_pattern(&picker.filter);
             let mut scored: Vec<(usize, i32)> = picker
                 .entries
                 .iter()
                 .enumerate()
                 .filter_map(|(i, m)| {
                     let filter_text = picker.filter_text(m);
-                    Self::picker_fuzzy_score(&picker.filter, &filter_text).map(|s| {
+                    Self::picker_fuzzy_score_with_pattern(&pat, &filter_text).map(|s| {
                         let usage_bonus = m.usage_score.min(i32::MAX as u32) as i32;
                         let bonus = usage_bonus + if m.recommended { 5 } else { 0 };
                         (i, s + bonus)
diff --git a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs
index 109d32c9b..fb3ebb0e9 100644
--- a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs
@@ -669,6 +669,7 @@ fn test_improve_status_summarizes_current_todos() {
             &app.session.id,
             &[
                 crate::todo::TodoItem {
+                    group: None,
                     id: "one".to_string(),
                     content: "Profile startup path".to_string(),
                     status: "in_progress".to_string(),
@@ -679,6 +680,7 @@ fn test_improve_status_summarizes_current_todos() {
                     completion_confidence: None,
                 },
                 crate::todo::TodoItem {
+                    group: None,
                     id: "two".to_string(),
                     content: "Add regression test".to_string(),
                     status: "completed".to_string(),
@@ -770,6 +772,7 @@ fn test_improve_resume_uses_saved_mode_and_current_todos() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "resume1".to_string(),
                 content: "Refactor command parsing".to_string(),
                 status: "in_progress".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs
index 75318fb15..c69427b69 100644
--- a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs
+++ b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs
@@ -127,6 +127,7 @@ fn test_refactor_status_summarizes_current_todos() {
             &app.session.id,
             &[
                 crate::todo::TodoItem {
+                    group: None,
                     id: "one".to_string(),
                     content: "Split giant module".to_string(),
                     status: "in_progress".to_string(),
@@ -137,6 +138,7 @@ fn test_refactor_status_summarizes_current_todos() {
                     completion_confidence: None,
                 },
                 crate::todo::TodoItem {
+                    group: None,
                     id: "two".to_string(),
                     content: "Run review subagent".to_string(),
                     status: "completed".to_string(),
@@ -177,6 +179,7 @@ fn test_refactor_resume_uses_saved_mode_and_current_todos() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "resume1".to_string(),
                 content: "Extract review prompt builder".to_string(),
                 status: "in_progress".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs
index 1c68ff9b9..a398b3962 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs
@@ -1022,6 +1022,7 @@ fn test_remote_done_auto_pokes_again_when_todos_remain() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Continue working".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs
index f72f6b552..e1d8dbb7f 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs
@@ -82,6 +82,7 @@ fn test_remote_auto_poke_followup_preserves_visible_timer_and_stays_hidden() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Continue working".to_string(),
                 status: "pending".to_string(),
@@ -133,6 +134,7 @@ fn test_remote_auto_poke_completion_above_threshold_only_updates_ui() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Finished work".to_string(),
                 status: "completed".to_string(),
@@ -170,6 +172,7 @@ fn test_remote_auto_poke_completion_below_threshold_tells_model_to_keep_working(
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Needs validation".to_string(),
                 status: "completed".to_string(),
@@ -209,6 +212,7 @@ fn test_remote_poke_status_and_off_update_state() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Continue working".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs
index 17a05a4d0..b0741706f 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs
@@ -9,6 +9,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Continue working".to_string(),
                 status: "pending".to_string(),
@@ -50,6 +51,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() {
             &app.session.id,
             &[
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-1".to_string(),
                     content: "Continue working".to_string(),
                     status: "pending".to_string(),
@@ -60,6 +62,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() {
                     completion_confidence: None,
                 },
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-2".to_string(),
                     content: "Handle the newly discovered follow-up".to_string(),
                     status: "pending".to_string(),
@@ -148,6 +151,7 @@ fn test_remote_interrupted_auto_poke_requeues_after_deferred_poke() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Resume after interrupt".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs
index 7e5aa9b50..02eb99f1c 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs
@@ -16,6 +16,7 @@ fn test_finish_turn_does_not_duplicate_existing_poke_followup() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Keep going".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
index bb765c636..d47bd6b78 100644
--- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs
@@ -736,6 +736,7 @@ fn test_create_transfer_session_from_parent_copies_todos_and_uses_compacted_cont
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Carry this forward".to_string(),
                 status: "pending".to_string(),
@@ -871,6 +872,7 @@ fn test_escape_interrupt_disables_auto_poke_while_processing() {
     app.queued_messages
         .push(super::commands::build_poke_message(&[
             crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "keep going".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs
index 370e43373..b50da2af1 100644
--- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs
+++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs
@@ -261,6 +261,7 @@ fn test_remote_escape_interrupt_disables_auto_poke_while_processing() {
     app.queued_messages
         .push(super::commands::build_poke_message(&[
             crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "keep going".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs
index 693b17d88..086662f47 100644
--- a/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs
@@ -910,6 +910,7 @@ fn test_context_command_reports_session_context_snapshot() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "one".to_string(),
                 content: "Inspect context summary".to_string(),
                 status: "pending".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs b/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs
index aa830a570..cb2deb442 100644
--- a/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs
+++ b/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs
@@ -1858,6 +1858,7 @@ fn test_poke_arms_auto_poke_until_todos_are_done() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Finish the remaining task".to_string(),
                 status: "pending".to_string(),
@@ -1888,6 +1889,7 @@ fn test_poke_status_reports_current_state() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Finish the remaining task".to_string(),
                 status: "pending".to_string(),
@@ -1940,6 +1942,7 @@ fn test_poke_off_disarms_and_clears_queued_followup() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Keep going".to_string(),
                 status: "pending".to_string(),
@@ -1987,6 +1990,7 @@ fn test_poke_queues_when_turn_is_in_progress() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Finish the remaining task".to_string(),
                 status: "pending".to_string(),
@@ -2021,6 +2025,7 @@ fn test_poke_queues_when_turn_is_in_progress() {
             &app.session.id,
             &[
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-1".to_string(),
                     content: "Finish the remaining task".to_string(),
                     status: "pending".to_string(),
@@ -2031,6 +2036,7 @@ fn test_poke_queues_when_turn_is_in_progress() {
                     completion_confidence: None,
                 },
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-2".to_string(),
                     content: "Pick up the newly discovered task".to_string(),
                     status: "pending".to_string(),
@@ -2088,6 +2094,7 @@ fn test_finish_turn_auto_pokes_again_when_todos_remain() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Keep going".to_string(),
                 status: "in_progress".to_string(),
@@ -2118,6 +2125,7 @@ fn test_finish_turn_auto_poke_queues_confidence_summary_when_todos_done() {
             &app.session.id,
             &[
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-1".to_string(),
                     content: "Finish risky provider path".to_string(),
                     status: "completed".to_string(),
@@ -2128,6 +2136,7 @@ fn test_finish_turn_auto_poke_queues_confidence_summary_when_todos_done() {
                     completion_confidence: Some(80),
                 },
                 crate::todo::TodoItem {
+                    group: None,
                     id: "todo-2".to_string(),
                     content: "Document straightforward behavior".to_string(),
                     status: "completed".to_string(),
@@ -2191,6 +2200,7 @@ fn test_finish_turn_without_auto_poke_does_not_queue_confidence_summary() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Done without poke".to_string(),
                 status: "completed".to_string(),
@@ -2224,6 +2234,7 @@ fn test_finish_turn_auto_poke_preserves_visible_turn_started() {
         crate::todo::save_todos(
             &app.session.id,
             &[crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Keep going".to_string(),
                 status: "in_progress".to_string(),
diff --git a/crates/jcode-tui/src/tui/app/todos_view.rs b/crates/jcode-tui/src/tui/app/todos_view.rs
index 8644955b8..5f04f0d93 100644
--- a/crates/jcode-tui/src/tui/app/todos_view.rs
+++ b/crates/jcode-tui/src/tui/app/todos_view.rs
@@ -275,6 +275,29 @@ fn build_todos_view_markdown(session_id: Option<&str>, todos: &[TodoItem]) -> St
         ("cancelled", "Cancelled"),
     ];
 
+    if let Some(groups) = grouped_todos_view(todos) {
+        for (group, items) in groups {
+            let group_name = group.as_deref().unwrap_or("Other");
+            let group_total = items.len();
+            let group_done = items.iter().filter(|t| t.status == "completed").count();
+            markdown.push_str(&format!(
+                "\n## {} ({}/{})\n",
+                group_name, group_done, group_total
+            ));
+            for (status, heading) in sections {
+                let status_items = sorted_group_items_for_status(&items, status);
+                if status_items.is_empty() {
+                    continue;
+                }
+                markdown.push_str(&format!("\n### {}\n\n", heading));
+                for todo in status_items {
+                    markdown.push_str(&format_todo_markdown(todo));
+                }
+            }
+        }
+        return markdown;
+    }
+
     for (status, heading) in sections {
         let items = sorted_todos_for_status(todos, status);
         if items.is_empty() {
@@ -289,6 +312,49 @@ fn build_todos_view_markdown(session_id: Option<&str>, todos: &[TodoItem]) -> St
     markdown
 }
 
+/// Group key for the side-panel view, treating empty/whitespace as ungrouped.
+fn todo_group_key(todo: &TodoItem) -> Option<String> {
+    todo.group
+        .as_deref()
+        .map(str::trim)
+        .filter(|group| !group.is_empty())
+        .map(|group| group.to_string())
+}
+
+/// Partition todos into ordered groups (first-seen order, ungrouped last).
+/// Returns `None` when no todo declares a group so callers keep the flat layout.
+fn grouped_todos_view(todos: &[TodoItem]) -> Option<Vec<(Option<String>, Vec<&TodoItem>)>> {
+    if !todos.iter().any(|todo| todo_group_key(todo).is_some()) {
+        return None;
+    }
+    let mut groups: Vec<(Option<String>, Vec<&TodoItem>)> = Vec::new();
+    for todo in todos {
+        let key = todo_group_key(todo);
+        if let Some(entry) = groups.iter_mut().find(|(existing, _)| *existing == key) {
+            entry.1.push(todo);
+        } else {
+            groups.push((key, vec![todo]));
+        }
+    }
+    groups.sort_by_key(|(key, _)| key.is_none());
+    Some(groups)
+}
+
+fn sorted_group_items_for_status<'a>(items: &[&'a TodoItem], status: &str) -> Vec<&'a TodoItem> {
+    let mut filtered: Vec<&TodoItem> = items
+        .iter()
+        .copied()
+        .filter(|todo| todo.status == status)
+        .collect();
+    filtered.sort_by(|a, b| {
+        priority_rank(&a.priority)
+            .cmp(&priority_rank(&b.priority))
+            .then_with(|| a.content.cmp(&b.content))
+            .then_with(|| a.id.cmp(&b.id))
+    });
+    filtered
+}
+
 fn sorted_todos_for_status<'a>(todos: &'a [TodoItem], status: &str) -> Vec<&'a TodoItem> {
     let mut items: Vec<&TodoItem> = todos.iter().filter(|todo| todo.status == status).collect();
     items.sort_by(|a, b| {
@@ -405,6 +471,7 @@ fn hash_todos_payload(session_id: Option<&str>, todos: &[TodoItem]) -> u64 {
         todo.content.hash(&mut hasher);
         todo.status.hash(&mut hasher);
         todo.priority.hash(&mut hasher);
+        todo.group.hash(&mut hasher);
         todo.confidence.hash(&mut hasher);
         todo.completion_confidence.hash(&mut hasher);
         todo.blocked_by.hash(&mut hasher);
@@ -441,6 +508,7 @@ mod tests {
             content: content.to_string(),
             status: status.to_string(),
             priority: priority.to_string(),
+            group: None,
             confidence,
             completion_confidence,
             blocked_by: Vec::new(),
@@ -495,4 +563,40 @@ mod tests {
 
         assert_ne!(before, after);
     }
+
+    #[test]
+    fn todos_view_markdown_groups_items_under_group_headers() {
+        let mut grouped_a = todo("g1", "Cut frame allocs", "in_progress", "high", Some(80), None);
+        grouped_a.group = Some("optimize rendering".to_string());
+        let mut grouped_b = todo("g2", "Batch draw calls", "completed", "medium", Some(70), Some(90));
+        grouped_b.group = Some("optimize rendering".to_string());
+        let mut other = todo("o1", "Fix scrollback", "pending", "low", Some(60), None);
+        other.group = Some("scrollback".to_string());
+        let ungrouped = todo("u1", "Misc cleanup", "pending", "low", Some(60), None);
+
+        let markdown = build_todos_view_markdown(
+            Some("session_test"),
+            &[grouped_a, grouped_b, other, ungrouped],
+        );
+
+        assert!(markdown.contains("## optimize rendering (1/2)"), "{markdown}");
+        assert!(markdown.contains("## scrollback (0/1)"), "{markdown}");
+        assert!(markdown.contains("## Other (0/1)"), "{markdown}");
+        // Status sub-headings nest under groups.
+        assert!(markdown.contains("### In progress"), "{markdown}");
+        // First-seen group order, ungrouped bucket last.
+        let opt = markdown.find("## optimize rendering").unwrap();
+        let scroll = markdown.find("## scrollback").unwrap();
+        let other_idx = markdown.find("## Other").unwrap();
+        assert!(opt < scroll && scroll < other_idx, "{markdown}");
+    }
+
+    #[test]
+    fn todos_view_hash_changes_when_group_changes() {
+        let mut todos = vec![todo("g", "Group hash", "pending", "high", Some(80), None)];
+        let before = hash_todos_payload(Some("session_test"), &todos);
+        todos[0].group = Some("rendering".to_string());
+        let after = hash_todos_payload(Some("session_test"), &todos);
+        assert_ne!(before, after);
+    }
 }
diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs
index 9a0f60a0b..f5b0739f5 100644
--- a/crates/jcode-tui/src/tui/app/tui_state.rs
+++ b/crates/jcode-tui/src/tui/app/tui_state.rs
@@ -1023,6 +1023,7 @@ impl crate::tui::TuiState for App {
                     status: item.status.clone(),
                     priority: item.priority.clone(),
                     id: item.id.clone(),
+                    group: None,
                     blocked_by: item.blocked_by.clone(),
                     assigned_to: item.assigned_to.clone(),
                     confidence: None,
diff --git a/crates/jcode-tui/src/tui/info_widget_overview.rs b/crates/jcode-tui/src/tui/info_widget_overview.rs
index be4e1b08a..a1df179b8 100644
--- a/crates/jcode-tui/src/tui/info_widget_overview.rs
+++ b/crates/jcode-tui/src/tui/info_widget_overview.rs
@@ -262,6 +262,7 @@ mod tests {
     fn compute_page_layout_keeps_multiple_expanded_pages_when_height_allows() {
         let data = InfoWidgetData {
             todos: vec![TodoItem {
+                group: None,
                 content: "ship refactor".to_string(),
                 status: "pending".to_string(),
                 priority: "high".to_string(),
diff --git a/crates/jcode-tui/src/tui/info_widget_tests.rs b/crates/jcode-tui/src/tui/info_widget_tests.rs
index 31f08b08d..0849b49fb 100644
--- a/crates/jcode-tui/src/tui/info_widget_tests.rs
+++ b/crates/jcode-tui/src/tui/info_widget_tests.rs
@@ -99,6 +99,7 @@ fn todos_widgets_show_item_and_aggregate_confidence() {
     let data = InfoWidgetData {
         todos: vec![
             crate::todo::TodoItem {
+                group: None,
                 id: "todo-1".to_string(),
                 content: "Validate confidence UI".to_string(),
                 status: "in_progress".to_string(),
@@ -109,6 +110,7 @@ fn todos_widgets_show_item_and_aggregate_confidence() {
                 assigned_to: None,
             },
             crate::todo::TodoItem {
+                group: None,
                 id: "todo-2".to_string(),
                 content: "Ship completed item".to_string(),
                 status: "completed".to_string(),
@@ -136,9 +138,68 @@ fn todos_widgets_show_item_and_aggregate_confidence() {
     assert!(compact_text.contains("86%"));
 }
 
+#[test]
+fn todos_widgets_render_group_headers_when_groups_present() {
+    let mk = |group: Option<&str>, id: &str, status: &str| crate::todo::TodoItem {
+        group: group.map(|g| g.to_string()),
+        id: id.to_string(),
+        content: format!("task {id}"),
+        status: status.to_string(),
+        priority: "medium".to_string(),
+        confidence: Some(80),
+        completion_confidence: None,
+        blocked_by: Vec::new(),
+        assigned_to: None,
+    };
+    let data = InfoWidgetData {
+        todos: vec![
+            mk(Some("optimize rendering"), "a", "completed"),
+            mk(Some("optimize rendering"), "b", "in_progress"),
+            mk(Some("fix scrollback"), "c", "pending"),
+            mk(None, "d", "pending"),
+        ],
+        ..Default::default()
+    };
+
+    let expanded = lines_text(&render_todos_expanded(&data, Rect::new(0, 0, 80, 14)));
+    // Group headers appear with per-group progress counters, first-seen order,
+    // and the ungrouped bucket renders under "Other".
+    assert!(expanded.contains("optimize rendering"), "{expanded}");
+    assert!(expanded.contains("1/2"), "{expanded}");
+    assert!(expanded.contains("fix scrollback"), "{expanded}");
+    assert!(expanded.contains("Other"), "{expanded}");
+    let opt_idx = expanded.find("optimize rendering").unwrap();
+    let fix_idx = expanded.find("fix scrollback").unwrap();
+    let other_idx = expanded.find("Other").unwrap();
+    assert!(opt_idx < fix_idx, "first-seen group order: {expanded}");
+    assert!(fix_idx < other_idx, "ungrouped bucket last: {expanded}");
+}
+
+#[test]
+fn todos_widgets_stay_flat_without_groups() {
+    let mk = |id: &str, status: &str| crate::todo::TodoItem {
+        group: None,
+        id: id.to_string(),
+        content: format!("task {id}"),
+        status: status.to_string(),
+        priority: "medium".to_string(),
+        confidence: Some(80),
+        completion_confidence: None,
+        blocked_by: Vec::new(),
+        assigned_to: None,
+    };
+    let data = InfoWidgetData {
+        todos: vec![mk("a", "completed"), mk("b", "pending")],
+        ..Default::default()
+    };
+    let expanded = lines_text(&render_todos_expanded(&data, Rect::new(0, 0, 80, 14)));
+    assert!(!expanded.contains("Other"), "no group bucket: {expanded}");
+}
+
 #[test]
 fn todos_widget_renders_exact_pips_for_small_lists() {
     let mk = |status: &str| crate::todo::TodoItem {
+        group: None,
         id: status.to_string(),
         content: format!("item {status}"),
         status: status.to_string(),
@@ -991,6 +1052,7 @@ fn placements_never_include_border_only_widgets() {
             ..Default::default()
         }),
         todos: vec![crate::todo::TodoItem {
+            group: None,
             content: "ship patch".to_string(),
             status: "in_progress".to_string(),
             priority: "high".to_string(),
diff --git a/crates/jcode-tui/src/tui/info_widget_todos.rs b/crates/jcode-tui/src/tui/info_widget_todos.rs
index afae1453a..c25e6c496 100644
--- a/crates/jcode-tui/src/tui/info_widget_todos.rs
+++ b/crates/jcode-tui/src/tui/info_widget_todos.rs
@@ -161,6 +161,189 @@ fn push_aggregate_confidence_suffix(spans: &mut Vec<Span<'static>>, data: &InfoW
     ));
 }
 
+/// Normalize a todo's group label, treating empty/whitespace as ungrouped.
+fn todo_group_key(todo: &crate::todo::TodoItem) -> Option<String> {
+    todo.group
+        .as_deref()
+        .map(str::trim)
+        .filter(|group| !group.is_empty())
+        .map(|group| group.to_string())
+}
+
+/// Partition todos into ordered groups, preserving the order groups first
+/// appear. Ungrouped items collapse into a trailing `None` bucket. Returns
+/// `None` when no todo declares a group, so callers fall back to the flat list.
+fn grouped_todos(
+    todos: &[crate::todo::TodoItem],
+) -> Option<Vec<(Option<String>, Vec<&crate::todo::TodoItem>)>> {
+    if !todos.iter().any(|todo| todo_group_key(todo).is_some()) {
+        return None;
+    }
+    let mut groups: Vec<(Option<String>, Vec<&crate::todo::TodoItem>)> = Vec::new();
+    for todo in todos {
+        let key = todo_group_key(todo);
+        if let Some(entry) = groups.iter_mut().find(|(existing, _)| *existing == key) {
+            entry.1.push(todo);
+        } else {
+            groups.push((key, vec![todo]));
+        }
+    }
+    // Keep the ungrouped bucket last; sort_by_key is stable so named groups
+    // retain their first-seen order.
+    groups.sort_by_key(|(key, _)| key.is_none());
+    Some(groups)
+}
+
+fn status_sort_rank(status: &str) -> u8 {
+    match status {
+        "in_progress" => 0,
+        "pending" => 1,
+        "completed" => 2,
+        "cancelled" => 3,
+        _ => 4,
+    }
+}
+
+fn sort_todos_by_status<'a>(todos: &[&'a crate::todo::TodoItem]) -> Vec<&'a crate::todo::TodoItem> {
+    let mut sorted: Vec<&crate::todo::TodoItem> = todos.to_vec();
+    sorted.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status)));
+    sorted
+}
+
+fn push_group_header(
+    lines: &mut Vec<Line<'static>>,
+    name: &str,
+    items: &[&crate::todo::TodoItem],
+    inner: Rect,
+) {
+    let total = items.len();
+    let completed = items.iter().filter(|t| t.status == "completed").count();
+    let counter = format!(" {}/{}", completed, total);
+    let max_name = inner
+        .width
+        .saturating_sub(counter.len() as u16)
+        .max(4) as usize;
+    let highlight = items.iter().any(|t| t.status == "in_progress");
+    let name_style = if highlight {
+        Style::default().fg(rgb(255, 210, 130)).bold()
+    } else {
+        Style::default().fg(rgb(170, 175, 205)).bold()
+    };
+    lines.push(Line::from(vec![
+        Span::styled(truncate_smart(name, max_name), name_style),
+        Span::styled(counter, Style::default().fg(rgb(120, 120, 140))),
+    ]));
+}
+
+/// Render one todo as a line. `show_priority_marker` adds the `!` high-priority
+/// marker (used by the expanded widget); `indent` is the leading-space depth
+/// used when items sit under a group header.
+fn push_todo_item_line(
+    lines: &mut Vec<Line<'static>>,
+    todo: &crate::todo::TodoItem,
+    inner: Rect,
+    show_priority_marker: bool,
+    indent: usize,
+) {
+    let is_blocked = !todo.blocked_by.is_empty();
+    let (icon, status_color) = if is_blocked && todo.status != "completed" {
+        ("⊳", rgb(180, 140, 100))
+    } else {
+        match todo.status.as_str() {
+            "completed" => ("✓", rgb(100, 180, 100)),
+            "in_progress" => ("▶", rgb(255, 200, 100)),
+            "cancelled" => ("✗", rgb(120, 80, 80)),
+            _ => ("○", rgb(120, 120, 130)),
+        }
+    };
+
+    let priority_marker = if show_priority_marker {
+        match todo.priority.as_str() {
+            "high" => ("!", rgb(255, 120, 100)),
+            _ => ("", rgb(120, 120, 130)),
+        }
+    } else {
+        ("", rgb(120, 120, 130))
+    };
+
+    let suffix = if is_blocked && todo.status != "completed" {
+        " (blocked)"
+    } else {
+        ""
+    };
+
+    let reserved = indent as u16
+        + 3
+        + priority_marker.0.len() as u16
+        + suffix.len() as u16
+        + todo_confidence_suffix_width(todo);
+    let max_len = inner.width.saturating_sub(reserved) as usize;
+    let content = truncate_smart(&todo.content, max_len);
+
+    let text_color = if todo.status == "completed" {
+        rgb(100, 100, 110)
+    } else if is_blocked {
+        rgb(120, 120, 130)
+    } else if todo.status == "in_progress" {
+        rgb(200, 200, 210)
+    } else {
+        rgb(160, 160, 170)
+    };
+
+    let mut spans = Vec::new();
+    if indent > 0 {
+        spans.push(Span::raw(" ".repeat(indent)));
+    }
+    spans.push(Span::styled(
+        format!("{} ", icon),
+        Style::default().fg(status_color),
+    ));
+    if !priority_marker.0.is_empty() {
+        spans.push(Span::styled(
+            priority_marker.0,
+            Style::default().fg(priority_marker.1),
+        ));
+    }
+    spans.push(Span::styled(content, Style::default().fg(text_color)));
+    push_todo_confidence_suffix(&mut spans, todo);
+    if !suffix.is_empty() {
+        spans.push(Span::styled(
+            suffix.to_string(),
+            Style::default().fg(rgb(100, 100, 110)),
+        ));
+    }
+    lines.push(Line::from(spans));
+}
+
+/// Render todos partitioned by group, honoring a `max_lines` budget that counts
+/// both group headers and item rows. Returns the rendered lines plus the number
+/// of todo items actually shown (so callers can render a "+N more" footer).
+fn render_grouped_todo_lines(
+    groups: &[(Option<String>, Vec<&crate::todo::TodoItem>)],
+    inner: Rect,
+    show_priority_marker: bool,
+    max_lines: usize,
+) -> (Vec<Line<'static>>, usize) {
+    let mut lines: Vec<Line<'static>> = Vec::new();
+    let mut shown = 0usize;
+    for (group, items) in groups {
+        if lines.len() >= max_lines {
+            break;
+        }
+        let header_name = group.as_deref().unwrap_or("Other");
+        push_group_header(&mut lines, header_name, items, inner);
+        for todo in sort_todos_by_status(items) {
+            if lines.len() >= max_lines {
+                break;
+            }
+            push_todo_item_line(&mut lines, todo, inner, show_priority_marker, 2);
+            shown += 1;
+        }
+    }
+    (lines, shown)
+}
+
+
 /// Render todos widget content
 pub(super) fn render_todos_widget(data: &InfoWidgetData, inner: Rect) -> Vec<Line<'static>> {
     if data.todos.is_empty() {
@@ -193,71 +376,33 @@ pub(super) fn render_todos_widget(data: &InfoWidgetData, inner: Rect) -> Vec<Lin
     push_aggregate_confidence_suffix(&mut header, data);
     lines.push(Line::from(header));
 
+    let available_lines = inner.height.saturating_sub(1) as usize; // Account for header
+    let budget = available_lines.min(5).max(1);
+
+    // Grouped layout when any todo declares a group; otherwise the flat list.
+    if let Some(groups) = grouped_todos(&data.todos) {
+        let (group_lines, shown) = render_grouped_todo_lines(&groups, inner, false, budget);
+        lines.extend(group_lines);
+        if total > shown {
+            lines.push(Line::from(vec![Span::styled(
+                format!("  +{} more", total - shown),
+                Style::default().fg(rgb(100, 100, 110)),
+            )]));
+        }
+        return lines;
+    }
+
     // Sort todos: in_progress first, then pending, then completed
     let mut sorted_todos: Vec<&crate::todo::TodoItem> = data.todos.iter().collect();
-    sorted_todos.sort_by(|a, b| {
-        let order = |s: &str| match s {
-            "in_progress" => 0,
-            "pending" => 1,
-            "completed" => 2,
-            "cancelled" => 3,
-            _ => 4,
-        };
-        order(&a.status).cmp(&order(&b.status))
-    });
+    sorted_todos.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status)));
 
     // Render todos (limit based on available height)
-    let available_lines = inner.height.saturating_sub(1) as usize; // Account for header
-    for todo in sorted_todos.iter().take(available_lines.min(5)) {
-        let is_blocked = !todo.blocked_by.is_empty();
-        let (icon, status_color) = if is_blocked && todo.status != "completed" {
-            ("⊳", rgb(180, 140, 100))
-        } else {
-            match todo.status.as_str() {
-                "completed" => ("✓", rgb(100, 180, 100)),
-                "in_progress" => ("▶", rgb(255, 200, 100)),
-                "cancelled" => ("✗", rgb(120, 80, 80)),
-                _ => ("○", rgb(120, 120, 130)),
-            }
-        };
-
-        let suffix = if is_blocked && todo.status != "completed" {
-            " (blocked)"
-        } else {
-            ""
-        };
-        let max_len = inner
-            .width
-            .saturating_sub(3 + suffix.len() as u16 + todo_confidence_suffix_width(todo))
-            as usize;
-        let content = truncate_smart(&todo.content, max_len);
-
-        let text_color = if todo.status == "completed" {
-            rgb(100, 100, 110)
-        } else if is_blocked {
-            rgb(120, 120, 130)
-        } else if todo.status == "in_progress" {
-            rgb(200, 200, 210)
-        } else {
-            rgb(160, 160, 170)
-        };
-
-        let mut spans = vec![
-            Span::styled(format!("{} ", icon), Style::default().fg(status_color)),
-            Span::styled(content, Style::default().fg(text_color)),
-        ];
-        push_todo_confidence_suffix(&mut spans, todo);
-        if !suffix.is_empty() {
-            spans.push(Span::styled(
-                suffix.to_string(),
-                Style::default().fg(rgb(100, 100, 110)),
-            ));
-        }
-        lines.push(Line::from(spans));
+    for todo in sorted_todos.iter().take(budget) {
+        push_todo_item_line(&mut lines, todo, inner, false, 0);
     }
 
     // Show count of remaining items
-    let shown = available_lines.min(5).min(sorted_todos.len());
+    let shown = budget.min(sorted_todos.len());
     if data.todos.len() > shown {
         let remaining = data.todos.len() - shown;
         lines.push(Line::from(vec![Span::styled(
@@ -301,86 +446,28 @@ pub(super) fn render_todos_expanded(data: &InfoWidgetData, inner: Rect) -> Vec<L
     push_aggregate_confidence_suffix(&mut header, data);
     lines.push(Line::from(header));
 
-    // Sort todos: in_progress first, then pending, then completed
-    let mut sorted_todos: Vec<&crate::todo::TodoItem> = data.todos.iter().collect();
-    sorted_todos.sort_by(|a, b| {
-        let order = |s: &str| match s {
-            "in_progress" => 0,
-            "pending" => 1,
-            "completed" => 2,
-            "cancelled" => 3,
-            _ => 4,
-        };
-        order(&a.status).cmp(&order(&b.status))
-    });
-
-    // Render todos with priority colors
     let available_lines = MAX_TODO_LINES.saturating_sub(1); // Account for header
-    for todo in sorted_todos.iter().take(available_lines) {
-        let is_blocked = !todo.blocked_by.is_empty();
-        let (icon, status_color) = if is_blocked && todo.status != "completed" {
-            ("⊳", rgb(180, 140, 100))
-        } else {
-            match todo.status.as_str() {
-                "completed" => ("✓", rgb(100, 180, 100)),
-                "in_progress" => ("▶", rgb(255, 200, 100)),
-                "cancelled" => ("✗", rgb(120, 80, 80)),
-                _ => ("○", rgb(120, 120, 130)),
-            }
-        };
-
-        // Priority indicator
-        let priority_marker = match todo.priority.as_str() {
-            "high" => ("!", rgb(255, 120, 100)),
-            "medium" => ("", rgb(200, 180, 100)),
-            _ => ("", rgb(120, 120, 130)),
-        };
-
-        let suffix = if is_blocked && todo.status != "completed" {
-            " (blocked)"
-        } else {
-            ""
-        };
-        let max_len = inner
-            .width
-            .saturating_sub(4 + suffix.len() as u16 + todo_confidence_suffix_width(todo))
-            as usize;
-        let content = truncate_smart(&todo.content, max_len);
-
-        // Dim completed and blocked items
-        let text_color = if todo.status == "completed" {
-            rgb(100, 100, 110)
-        } else if is_blocked {
-            rgb(120, 120, 130)
-        } else if todo.status == "in_progress" {
-            rgb(200, 200, 210)
-        } else {
-            rgb(160, 160, 170)
-        };
-
-        let mut spans = vec![Span::styled(
-            format!("{} ", icon),
-            Style::default().fg(status_color),
-        )];
-
-        if !priority_marker.0.is_empty() {
-            spans.push(Span::styled(
-                priority_marker.0,
-                Style::default().fg(priority_marker.1),
-            ));
-        }
-
-        spans.push(Span::styled(content, Style::default().fg(text_color)));
-        push_todo_confidence_suffix(&mut spans, todo);
 
-        if !suffix.is_empty() {
-            spans.push(Span::styled(
-                suffix.to_string(),
+    // Grouped layout when any todo declares a group; otherwise the flat list.
+    if let Some(groups) = grouped_todos(&data.todos) {
+        let (group_lines, shown) = render_grouped_todo_lines(&groups, inner, true, available_lines);
+        lines.extend(group_lines);
+        if total > shown {
+            lines.push(Line::from(vec![Span::styled(
+                format!("  +{} more", total - shown),
                 Style::default().fg(rgb(100, 100, 110)),
-            ));
+            )]));
         }
+        return lines;
+    }
+
+    // Sort todos: in_progress first, then pending, then completed
+    let mut sorted_todos: Vec<&crate::todo::TodoItem> = data.todos.iter().collect();
+    sorted_todos.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status)));
 
-        lines.push(Line::from(spans));
+    // Render todos with priority colors
+    for todo in sorted_todos.iter().take(available_lines) {
+        push_todo_item_line(&mut lines, todo, inner, true, 0);
     }
 
     // Show count of remaining items

From 209fcc29f21b2fbb9a807040e06dbacb431a4510 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:03:18 -0700
Subject: [PATCH 50/57] perf(session-picker): avoid cloning cached search refs
 when narrowing

search_matched_session_refs cloned the cached match set into candidates and
then cloned the new matches back into the cache: two full-list clones per
narrowing keystroke. Take the cached refs in place via mem::take (it is about to
be overwritten anyway), eliminating the candidates clone. Behavior unchanged.
---
 crates/jcode-tui/src/tui/session_picker/filter.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs
index 5cc1e7602..ccc1f176e 100644
--- a/crates/jcode-tui/src/tui/session_picker/filter.rs
+++ b/crates/jcode-tui/src/tui/session_picker/filter.rs
@@ -39,8 +39,11 @@ impl SessionPicker {
 
         let can_narrow_cached = !self.cached_search_query.is_empty()
             && normalized.starts_with(&self.cached_search_query);
+        // When narrowing, reuse the previous match set in place via mem::take
+        // instead of cloning it into `candidates` and then cloning the new
+        // matches back into the cache (two full-list clones per keystroke).
         let candidates = if can_narrow_cached {
-            self.cached_search_refs.clone()
+            std::mem::take(&mut self.cached_search_refs)
         } else {
             self.all_session_refs()
         };

From 7b25bdb1f82c59d87aeee8712ba0b9fac46fe3cb Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:18:55 -0700
Subject: [PATCH 51/57] perf(tui): compute copy-selection status metrics
 without building selection text

copy_selection_status built the entire selected string via
current_copy_selection_text just to report char/line counts in the status line.
This ran on every render frame while in copy mode, including every drag move, so
a large selection (e.g. select-all) re-allocated and re-joined the whole
transcript text each frame.

Add copy_selection_metrics (and a raw-lines fast path) that counts chars/lines
using the same slicing logic without allocating the joined string, and use it
for the status line. Add a test asserting the metrics exactly match the built
selection text's char/line counts.
---
 .../tui/app/tests/scroll_copy_02/part_01.rs   | 33 ++++++++++
 crates/jcode-tui/src/tui/app/tui_state.rs     | 19 +++---
 crates/jcode-tui/src/tui/ui.rs                | 66 +++++++++++++++++++
 crates/jcode-tui/src/tui/ui/copy_selection.rs | 53 +++++++++++++++
 4 files changed, 163 insertions(+), 8 deletions(-)

diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
index 5f2fdb54d..797c8e0c6 100644
--- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
+++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs
@@ -115,6 +115,39 @@ fn test_copy_selection_select_all_uses_rendered_chat_text_without_copy_badges()
     );
 }
 
+#[test]
+fn test_copy_selection_metrics_match_built_selection_text() {
+    let _render_lock = scroll_render_test_lock();
+    let (mut app, mut terminal) = create_copy_test_app();
+
+    render_and_snap(&app, &mut terminal);
+    app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT)
+        .unwrap();
+    assert!(app.select_all_in_copy_mode());
+
+    // The allocation-free metrics path used by the status line must agree with
+    // the char/line counts of the actually-built selection text.
+    let range = app
+        .normalized_copy_selection()
+        .expect("normalized selection range");
+    let text = app
+        .current_copy_selection_text()
+        .expect("selection text for full transcript");
+    let (chars, lines) =
+        crate::tui::ui::copy_selection_metrics(range).expect("selection metrics");
+
+    assert_eq!(
+        chars,
+        text.chars().count(),
+        "metrics char count should match built selection text"
+    );
+    assert_eq!(
+        lines,
+        text.lines().count().max(1),
+        "metrics line count should match built selection text"
+    );
+}
+
 #[test]
 fn test_copy_selection_full_user_prompt_line_skips_prompt_chrome() {
     let _render_lock = scroll_render_test_lock();
diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs
index f5b0739f5..5f0427843 100644
--- a/crates/jcode-tui/src/tui/app/tui_state.rs
+++ b/crates/jcode-tui/src/tui/app/tui_state.rs
@@ -1496,19 +1496,22 @@ impl crate::tui::TuiState for App {
             return None;
         }
 
-        let text = self.current_copy_selection_text().unwrap_or_default();
-        let has_selection = !text.is_empty();
+        // Compute selection metrics without building the full selected string,
+        // which previously re-allocated the entire selection on every render
+        // frame and drag move (O(selection) per frame; a "select all" rebuilt
+        // the whole transcript text repeatedly).
+        let (selected_chars, selected_lines) = self
+            .normalized_copy_selection()
+            .and_then(crate::tui::ui::copy_selection_metrics)
+            .unwrap_or((0, 0));
+        let has_selection = selected_chars > 0;
         Some(crate::tui::CopySelectionStatus {
             pane: self
                 .current_copy_selection_pane()
                 .unwrap_or(crate::tui::CopySelectionPane::Chat),
             has_action: has_selection,
-            selected_chars: text.chars().count(),
-            selected_lines: if has_selection {
-                text.lines().count().max(1)
-            } else {
-                0
-            },
+            selected_chars,
+            selected_lines: if has_selection { selected_lines.max(1) } else { 0 },
             dragging: self.copy_selection_dragging,
         })
     }
diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs
index b0a180777..8d3471151 100644
--- a/crates/jcode-tui/src/tui/ui.rs
+++ b/crates/jcode-tui/src/tui/ui.rs
@@ -1884,6 +1884,72 @@ pub(crate) fn copy_selection_text(range: crate::tui::CopySelectionRange) -> Opti
     Some(out)
 }
 
+/// Compute `(char_count, line_count)` for the current copy selection without
+/// allocating the full joined selection string. Mirrors `copy_selection_text`
+/// so the status line "N chars · M lines" matches what would be copied, but is
+/// allocation-free so it can run cheaply on every render frame / drag move.
+pub(crate) fn copy_selection_metrics(
+    range: crate::tui::CopySelectionRange,
+) -> Option<(usize, usize)> {
+    if range.start.pane != range.end.pane {
+        return None;
+    }
+    let snapshot = copy_snapshot_for_pane(range.start.pane)?;
+    let (start, end) =
+        if (range.start.abs_line, range.start.column) <= (range.end.abs_line, range.end.column) {
+            (range.start, range.end)
+        } else {
+            (range.end, range.start)
+        };
+
+    if start.abs_line >= snapshot.wrapped_plain_line_count()
+        || end.abs_line >= snapshot.wrapped_plain_line_count()
+    {
+        return None;
+    }
+
+    if let Some(metrics) =
+        copy_selection::copy_selection_metrics_from_raw_lines(&snapshot, start, end)
+    {
+        return Some(metrics);
+    }
+
+    let mut chars = 0usize;
+    let mut lines = 0usize;
+    for abs_line in start.abs_line..=end.abs_line {
+        if abs_line > start.abs_line {
+            chars += 1; // joining '\n'
+        }
+        lines += 1;
+        let text = snapshot.wrapped_plain_line(abs_line)?;
+        if abs_line != start.abs_line && abs_line != end.abs_line {
+            let copy_start = snapshot.wrapped_copy_offset(abs_line).unwrap_or(0);
+            if copy_start == 0 {
+                chars += text.chars().count();
+                continue;
+            }
+        }
+        let line_width = line_display_width(&text);
+        let copy_start = snapshot.wrapped_copy_offset(abs_line).unwrap_or(0);
+        let start_col = if abs_line == start.abs_line {
+            clamp_display_col(&text, start.column).max(copy_start)
+        } else {
+            copy_start
+        };
+        let end_col = if abs_line == end.abs_line {
+            clamp_display_col(&text, end.column).max(copy_start)
+        } else {
+            line_width
+        };
+        if end_col < start_col {
+            continue;
+        }
+        chars += display_col_slice(&text, start_col, end_col).chars().count();
+    }
+
+    Some((chars, lines.max(1)))
+}
+
 pub(crate) fn link_target_from_screen(column: u16, row: u16) -> Option<String> {
     let point = copy_point_from_screen(column, row)?;
     let snapshot = copy_snapshot_for_pane(point.pane)?;
diff --git a/crates/jcode-tui/src/tui/ui/copy_selection.rs b/crates/jcode-tui/src/tui/ui/copy_selection.rs
index 72b594729..641c0e486 100644
--- a/crates/jcode-tui/src/tui/ui/copy_selection.rs
+++ b/crates/jcode-tui/src/tui/ui/copy_selection.rs
@@ -102,6 +102,59 @@ pub(super) fn copy_selection_text_from_raw_lines(
     Some(out)
 }
 
+/// Selection metrics (character count and line count) for the raw-lines path,
+/// computed without allocating the full joined selection string. Mirrors the
+/// slicing in [`copy_selection_text_from_raw_lines`] exactly so the displayed
+/// "N chars · M lines" matches what would actually be copied.
+pub(super) fn copy_selection_metrics_from_raw_lines(
+    snapshot: &CopyViewportSnapshot,
+    start: crate::tui::CopySelectionPoint,
+    end: crate::tui::CopySelectionPoint,
+) -> Option<(usize, usize)> {
+    if snapshot.raw_plain_line_count() == 0 || snapshot.wrapped_line_map(start.abs_line).is_none() {
+        return None;
+    }
+
+    let start = raw_selection_point(snapshot, start)?;
+    let end = raw_selection_point(snapshot, end)?;
+    if start.raw_line >= snapshot.raw_plain_line_count()
+        || end.raw_line >= snapshot.raw_plain_line_count()
+    {
+        return None;
+    }
+
+    let mut chars = 0usize;
+    let mut lines = 0usize;
+    for raw_line in start.raw_line..=end.raw_line {
+        if raw_line > start.raw_line {
+            chars += 1; // the joining '\n'
+        }
+        lines += 1;
+        let text = snapshot.raw_plain_line(raw_line)?;
+        if raw_line != start.raw_line && raw_line != end.raw_line {
+            chars += text.chars().count();
+            continue;
+        }
+        let line_width = line_display_width(&text);
+        let start_col = if raw_line == start.raw_line {
+            clamp_display_col(&text, start.column)
+        } else {
+            0
+        };
+        let end_col = if raw_line == end.raw_line {
+            clamp_display_col(&text, end.column)
+        } else {
+            line_width
+        };
+        if end_col < start_col {
+            continue;
+        }
+        chars += display_col_slice(&text, start_col, end_col).chars().count();
+    }
+
+    Some((chars, lines.max(1)))
+}
+
 pub(super) fn link_target_from_snapshot(
     snapshot: &CopyViewportSnapshot,
     point: crate::tui::CopySelectionPoint,

From aaf9b4be97fcd7481d93bad2c2ed4709199c875b Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:21:55 -0700
Subject: [PATCH 52/57] perf(tui): compute mermaid/image regions in one reverse
 pass (O(L) not O(L^2))

For each image/mermaid placeholder, body prep scanned forward through all
following blank lines to compute the placeholder height. A message with many
placeholders each followed by long blank runs made this O(wrapped_lines^2).

Precompute, in a single reverse pass, the blank-run length starting at every
line; the placeholder height is then an O(1) lookup. Extracted into a shared
compute_image_regions helper used by both wrap_lines and wrap_lines_with_map.
Behavior is identical (height = 1 + trailing blank run).
---
 crates/jcode-tui/src/tui/ui_prepare.rs | 84 +++++++++++++-------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs
index 49d3bdee6..2d2becc6d 100644
--- a/crates/jcode-tui/src/tui/ui_prepare.rs
+++ b/crates/jcode-tui/src/tui/ui_prepare.rs
@@ -201,6 +201,46 @@ fn is_error_copy_content(content: &str) -> bool {
     trimmed.starts_with("Error:") || trimmed.starts_with("error:") || trimmed.starts_with("Failed:")
 }
 
+/// Build the image regions for an image/mermaid placeholder in `wrapped_lines`,
+/// where each placeholder "owns" the run of blank lines that follow it.
+///
+/// Done in a single reverse pass that precomputes, for every line, the length
+/// of the blank run starting at that line. The previous implementation scanned
+/// forward through the trailing blanks for every placeholder, which is O(L^2)
+/// when a message has many placeholders each followed by long blank runs.
+fn compute_image_regions(wrapped_lines: &[ratatui::text::Line<'static>]) -> Vec<ImageRegion> {
+    fn is_blank_line(line: &ratatui::text::Line<'static>) -> bool {
+        line.spans.is_empty()
+            || (line.spans.len() == 1 && line.spans[0].content.is_empty())
+    }
+
+    let len = wrapped_lines.len();
+    // blank_run[i] = number of consecutive blank lines starting at index i.
+    let mut blank_run = vec![0usize; len + 1];
+    for idx in (0..len).rev() {
+        blank_run[idx] = if is_blank_line(&wrapped_lines[idx]) {
+            blank_run[idx + 1] + 1
+        } else {
+            0
+        };
+    }
+
+    let mut image_regions = Vec::new();
+    for (idx, line) in wrapped_lines.iter().enumerate() {
+        if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) {
+            // The placeholder line plus the blank run immediately after it.
+            let height = (1 + blank_run[idx + 1]).min(u16::MAX as usize) as u16;
+            image_regions.push(ImageRegion {
+                abs_line_idx: idx,
+                end_line: idx + height as usize,
+                hash,
+                height,
+            });
+        }
+    }
+    image_regions
+}
+
 fn error_copy_target(content: &str, rendered_line_count: usize) -> Option<RawCopyTarget> {
     copy_target_for_kind(CopyTargetKind::Error, content, rendered_line_count)
 }
@@ -1644,27 +1684,7 @@ fn wrap_lines(
         wrapped_idx += count;
     }
 
-    let mut image_regions = Vec::new();
-    for (idx, line) in wrapped_lines.iter().enumerate() {
-        if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) {
-            let mut height = 1u16;
-            for subsequent in wrapped_lines.iter().skip(idx + 1) {
-                if subsequent.spans.is_empty()
-                    || (subsequent.spans.len() == 1 && subsequent.spans[0].content.is_empty())
-                {
-                    height += 1;
-                } else {
-                    break;
-                }
-            }
-            image_regions.push(ImageRegion {
-                abs_line_idx: idx,
-                end_line: idx + height as usize,
-                hash,
-                height,
-            });
-        }
-    }
+    let image_regions = compute_image_regions(&wrapped_lines);
 
     let wrapped_plain_lines = Arc::new(wrapped_lines.iter().map(ui::line_plain_text).collect());
 
@@ -1763,27 +1783,7 @@ fn wrap_lines_with_map(
     }
     raw_to_wrapped.push(wrapped_idx);
 
-    let mut image_regions = Vec::new();
-    for (idx, line) in wrapped_lines.iter().enumerate() {
-        if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) {
-            let mut height = 1u16;
-            for subsequent in wrapped_lines.iter().skip(idx + 1) {
-                if subsequent.spans.is_empty()
-                    || (subsequent.spans.len() == 1 && subsequent.spans[0].content.is_empty())
-                {
-                    height += 1;
-                } else {
-                    break;
-                }
-            }
-            image_regions.push(ImageRegion {
-                abs_line_idx: idx,
-                end_line: idx + height as usize,
-                hash,
-                height,
-            });
-        }
-    }
+    let image_regions = compute_image_regions(&wrapped_lines);
 
     let mut edit_tool_ranges = Vec::new();
     for (msg_idx, file_path, raw_start, raw_end, expandable) in edit_ranges {

From 498bcc153a800a886c19d061912b5ceac073c834 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:34:30 -0700
Subject: [PATCH 53/57] feat(swarm): enrich swarm list with live activity,
 churn, turns, and todo progress

swarm list previously returned only a shallow roster (name, role, status,
files, age). Enrich each agent row with:

- live activity (processing + current tool name)
- provider/model
- token churn over a recent ~10s window + cumulative tokens
- turn count
- todo progress (completed/total)
- contextual idle/active duration label (idle Ns when ready, Ns when running)
- completion report when finished

Token churn and turn count are tracked in a new lock-free per-session
metrics registry (jcode-base::session_metrics) rather than on the Agent
struct, because swarm list reads stats while an agent may hold its own
Mutex<Agent> lock mid-turn (try_lock fails exactly when churn is most
interesting). Metrics are recorded from the streaming turn loop and run_turn,
and forgotten on session disconnect.

handle_comm_list now joins swarm membership with live session state and
todos, sharing the runtime-extras gathering helper in comm_sync.
---
 crates/jcode-app-core/src/agent/turn_loops.rs |   1 +
 .../src/agent/turn_streaming_mpsc.rs          |   8 +
 .../src/protocol_tests/comm_responses.rs      |   1 +
 .../src/server/client_comm_channels.rs        |   1 +
 .../src/server/client_comm_context.rs         | 119 +++++++---
 .../src/server/client_comm_tests.rs           |   7 +
 .../src/server/client_disconnect_cleanup.rs   |   1 +
 .../src/server/client_lifecycle.rs            |   2 +
 .../src/server/client_lightweight_control.rs  |   2 +
 crates/jcode-app-core/src/server/comm_sync.rs |  80 +++++++
 .../src/tool/communicate_tests.rs             |   3 +
 .../tool/communicate_tests/input_format.rs    |  10 +-
 crates/jcode-base/src/lib.rs                  |   1 +
 crates/jcode-base/src/session_metrics.rs      | 210 ++++++++++++++++++
 crates/jcode-protocol/src/comm_format.rs      | 127 ++++++++++-
 crates/jcode-protocol/src/lib.rs              |  32 ++-
 .../src/protocol_tests/comm_responses.rs      |   1 +
 17 files changed, 564 insertions(+), 42 deletions(-)
 create mode 100644 crates/jcode-base/src/session_metrics.rs

diff --git a/crates/jcode-app-core/src/agent/turn_loops.rs b/crates/jcode-app-core/src/agent/turn_loops.rs
index f0bd81e45..706cf678d 100644
--- a/crates/jcode-app-core/src/agent/turn_loops.rs
+++ b/crates/jcode-app-core/src/agent/turn_loops.rs
@@ -9,6 +9,7 @@ impl Agent {
 
     pub(super) async fn run_turn(&mut self, print_output: bool) -> Result<String> {
         self.set_log_context();
+        crate::session_metrics::record_turn(&self.session.id);
         let mut final_text = String::new();
         let trace = trace_enabled();
         let mut context_limit_retries = 0u32;
diff --git a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
index 8671b00fe..fa8246a81 100644
--- a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
+++ b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs
@@ -813,6 +813,14 @@ impl Agent {
                     usage_cache_read,
                     usage_cache_creation,
                 );
+
+                let input = usage_input.unwrap_or(0);
+                let output = usage_output.unwrap_or(0);
+                let total = input
+                    .saturating_add(output)
+                    .saturating_add(usage_cache_read.unwrap_or(0))
+                    .saturating_add(usage_cache_creation.unwrap_or(0));
+                crate::session_metrics::record_token_usage(&self.session.id, total, output);
             }
 
             if usage_input.is_some()
diff --git a/crates/jcode-app-core/src/protocol_tests/comm_responses.rs b/crates/jcode-app-core/src/protocol_tests/comm_responses.rs
index 7c886ddf8..13c0186c3 100644
--- a/crates/jcode-app-core/src/protocol_tests/comm_responses.rs
+++ b/crates/jcode-app-core/src/protocol_tests/comm_responses.rs
@@ -158,6 +158,7 @@ fn test_comm_members_roundtrip_includes_status() -> Result<()> {
             latest_completion_report: None,
             live_attachments: Some(0),
             status_age_secs: Some(12),
+            ..Default::default()
         }],
     };
 
diff --git a/crates/jcode-app-core/src/server/client_comm_channels.rs b/crates/jcode-app-core/src/server/client_comm_channels.rs
index e701e1f6e..54ff66cd2 100644
--- a/crates/jcode-app-core/src/server/client_comm_channels.rs
+++ b/crates/jcode-app-core/src/server/client_comm_channels.rs
@@ -93,6 +93,7 @@ pub(super) async fn handle_comm_channel_members(
                     latest_completion_report: member.latest_completion_report.clone(),
                     live_attachments: Some(member.event_txs.len()),
                     status_age_secs: Some(member.last_status_change.elapsed().as_secs()),
+                    ..Default::default()
                 })
             })
             .collect();
diff --git a/crates/jcode-app-core/src/server/client_comm_context.rs b/crates/jcode-app-core/src/server/client_comm_context.rs
index cfe7bc8b6..b99a1b56a 100644
--- a/crates/jcode-app-core/src/server/client_comm_context.rs
+++ b/crates/jcode-app-core/src/server/client_comm_context.rs
@@ -2,6 +2,7 @@ use super::{
     SharedContext, SwarmEvent, SwarmEventType, SwarmMember, fanout_session_event,
     record_swarm_event,
 };
+use super::debug::ClientConnectionInfo;
 use crate::protocol::{AgentInfo, ContextEntry, NotificationType, ServerEvent};
 use std::collections::{HashMap, HashSet};
 use std::path::PathBuf;
@@ -187,6 +188,10 @@ pub(super) async fn handle_comm_read(
     let _ = client_event_tx.send(ServerEvent::CommContext { id, entries });
 }
 
+#[expect(
+    clippy::too_many_arguments,
+    reason = "comm list joins swarm membership, file touches, live sessions, and connection activity"
+)]
 pub(super) async fn handle_comm_list(
     id: u64,
     req_session_id: String,
@@ -194,6 +199,8 @@ pub(super) async fn handle_comm_list(
     swarm_members: &Arc<RwLock<HashMap<String, SwarmMember>>>,
     swarms_by_id: &Arc<RwLock<HashMap<String, HashSet<String>>>>,
     files_touched_by_session: &Arc<RwLock<HashMap<String, HashSet<PathBuf>>>>,
+    sessions: &super::SessionAgents,
+    client_connections: &Arc<RwLock<HashMap<String, ClientConnectionInfo>>>,
 ) {
     let swarm_id = swarm_id_for_session(&req_session_id, swarm_members).await;
 
@@ -206,37 +213,89 @@ pub(super) async fn handle_comm_list(
                 .unwrap_or_default()
         };
 
-        let members = swarm_members.read().await;
-        let touches = files_touched_by_session.read().await;
-
-        let member_list: Vec<AgentInfo> = swarm_session_ids
-            .iter()
-            .filter_map(|sid| {
-                members.get(sid).map(|member| {
-                    let mut files: Vec<String> = touches
-                        .get(sid)
-                        .into_iter()
-                        .flat_map(|paths| paths.iter())
-                        .map(|path| path.display().to_string())
-                        .collect();
-                    files.sort();
-
-                    AgentInfo {
-                        session_id: sid.clone(),
-                        friendly_name: member.friendly_name.clone(),
-                        files_touched: files,
-                        status: Some(member.status.clone()),
-                        detail: member.detail.clone(),
-                        role: Some(member.role.clone()),
-                        is_headless: Some(member.is_headless),
-                        report_back_to_session_id: member.report_back_to_session_id.clone(),
-                        latest_completion_report: member.latest_completion_report.clone(),
-                        live_attachments: Some(member.event_txs.len()),
-                        status_age_secs: Some(member.last_status_change.elapsed().as_secs()),
-                    }
+        // Snapshot the static member fields first, releasing the members lock
+        // before gathering per-session runtime extras (which briefly lock
+        // individual agents and read the connection map).
+        struct MemberStatic {
+            session_id: String,
+            friendly_name: Option<String>,
+            files: Vec<String>,
+            status: String,
+            detail: Option<String>,
+            role: String,
+            is_headless: bool,
+            report_back_to_session_id: Option<String>,
+            latest_completion_report: Option<String>,
+            live_attachments: usize,
+            status_age_secs: u64,
+        }
+
+        let statics: Vec<MemberStatic> = {
+            let members = swarm_members.read().await;
+            let touches = files_touched_by_session.read().await;
+            swarm_session_ids
+                .iter()
+                .filter_map(|sid| {
+                    members.get(sid).map(|member| {
+                        let mut files: Vec<String> = touches
+                            .get(sid)
+                            .into_iter()
+                            .flat_map(|paths| paths.iter())
+                            .map(|path| path.display().to_string())
+                            .collect();
+                        files.sort();
+                        MemberStatic {
+                            session_id: sid.clone(),
+                            friendly_name: member.friendly_name.clone(),
+                            files,
+                            status: member.status.clone(),
+                            detail: member.detail.clone(),
+                            role: member.role.clone(),
+                            is_headless: member.is_headless,
+                            report_back_to_session_id: member.report_back_to_session_id.clone(),
+                            latest_completion_report: member.latest_completion_report.clone(),
+                            live_attachments: member.event_txs.len(),
+                            status_age_secs: member.last_status_change.elapsed().as_secs(),
+                        }
+                    })
                 })
-            })
-            .collect();
+                .collect()
+        };
+
+        let mut member_list: Vec<AgentInfo> = Vec::with_capacity(statics.len());
+        for m in statics {
+            let extras = super::comm_sync::member_runtime_extras(
+                &m.session_id,
+                m.status == "running",
+                sessions,
+                client_connections,
+            )
+            .await;
+
+            member_list.push(AgentInfo {
+                session_id: m.session_id,
+                friendly_name: m.friendly_name,
+                files_touched: m.files,
+                status: Some(m.status),
+                detail: m.detail,
+                role: Some(m.role),
+                is_headless: Some(m.is_headless),
+                report_back_to_session_id: m.report_back_to_session_id,
+                latest_completion_report: m.latest_completion_report,
+                live_attachments: Some(m.live_attachments),
+                status_age_secs: Some(m.status_age_secs),
+                activity: extras.activity,
+                provider_name: extras.provider_name,
+                provider_model: extras.provider_model,
+                turn_count: extras.turn_count,
+                recent_total_tokens: extras.recent_total_tokens,
+                recent_output_tokens: extras.recent_output_tokens,
+                recent_window_secs: extras.recent_window_secs,
+                cumulative_total_tokens: extras.cumulative_total_tokens,
+                todos_completed: extras.todos_completed,
+                todos_total: extras.todos_total,
+            });
+        }
 
         let _ = client_event_tx.send(ServerEvent::CommMembers {
             id,
diff --git a/crates/jcode-app-core/src/server/client_comm_tests.rs b/crates/jcode-app-core/src/server/client_comm_tests.rs
index 0db9680bf..35bf65c4a 100644
--- a/crates/jcode-app-core/src/server/client_comm_tests.rs
+++ b/crates/jcode-app-core/src/server/client_comm_tests.rs
@@ -403,6 +403,11 @@ async fn comm_list_includes_member_status_and_detail() {
         HashSet::from([requester_id.clone(), peer_id.clone()]),
     )])));
     let file_touches = Arc::new(RwLock::new(HashMap::new()));
+    let sessions = Arc::new(RwLock::new(HashMap::from([
+        (requester_id.clone(), requester.clone()),
+        (peer_id.clone(), peer.clone()),
+    ])));
+    let client_connections = Arc::new(RwLock::new(HashMap::new()));
 
     handle_comm_list(
         1,
@@ -411,6 +416,8 @@ async fn comm_list_includes_member_status_and_detail() {
         &swarm_members,
         &swarms_by_id,
         &file_touches,
+        &sessions,
+        &client_connections,
     )
     .await;
 
diff --git a/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs b/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs
index 78745d71c..f2d596775 100644
--- a/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs
+++ b/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs
@@ -213,6 +213,7 @@ pub(super) async fn cleanup_client_connection(
                 (None, None)
             }
         };
+        crate::session_metrics::forget(client_session_id);
 
         if let Some(ref swarm_id) = swarm_id {
             record_swarm_event(
diff --git a/crates/jcode-app-core/src/server/client_lifecycle.rs b/crates/jcode-app-core/src/server/client_lifecycle.rs
index e437e6e49..0455b09bd 100644
--- a/crates/jcode-app-core/src/server/client_lifecycle.rs
+++ b/crates/jcode-app-core/src/server/client_lifecycle.rs
@@ -1924,6 +1924,8 @@ pub(super) async fn handle_client(
                     &swarm_members,
                     &swarms_by_id,
                     &files_touched_by_session,
+                    &sessions,
+                    &client_connections,
                 )
                 .await;
             }
diff --git a/crates/jcode-app-core/src/server/client_lightweight_control.rs b/crates/jcode-app-core/src/server/client_lightweight_control.rs
index dfdd34189..8ab3ac0e8 100644
--- a/crates/jcode-app-core/src/server/client_lightweight_control.rs
+++ b/crates/jcode-app-core/src/server/client_lightweight_control.rs
@@ -204,6 +204,8 @@ pub(super) async fn handle_lightweight_control_request(
                 swarm_members,
                 swarms_by_id,
                 files_touched_by_session,
+                sessions,
+                client_connections,
             )
             .await;
         }
diff --git a/crates/jcode-app-core/src/server/comm_sync.rs b/crates/jcode-app-core/src/server/comm_sync.rs
index fbafacad0..bd18c44a9 100644
--- a/crates/jcode-app-core/src/server/comm_sync.rs
+++ b/crates/jcode-app-core/src/server/comm_sync.rs
@@ -63,6 +63,86 @@ fn live_activity_snapshot(
         })
 }
 
+/// Recent-token lookback window used when reporting per-agent churn in
+/// `swarm list`. Short enough to reflect "what is this agent doing right now".
+pub(super) const SWARM_LIST_TOKEN_WINDOW_SECS: u64 = 10;
+
+/// Runtime extras for a swarm member, gathered without holding the agent lock
+/// for long. Used to enrich the `swarm list` roster with live activity,
+/// provider/model, token churn, turn count, and todo progress.
+#[derive(Default)]
+pub(super) struct MemberRuntimeExtras {
+    pub(super) activity: Option<SessionActivitySnapshot>,
+    pub(super) provider_name: Option<String>,
+    pub(super) provider_model: Option<String>,
+    pub(super) turn_count: Option<u64>,
+    pub(super) recent_total_tokens: Option<u64>,
+    pub(super) recent_output_tokens: Option<u64>,
+    pub(super) recent_window_secs: Option<u64>,
+    pub(super) cumulative_total_tokens: Option<u64>,
+    pub(super) todos_completed: Option<usize>,
+    pub(super) todos_total: Option<usize>,
+}
+
+/// Gather live runtime extras for a single member session.
+///
+/// `member_is_running` is used as a fallback "processing" hint when no live
+/// client connection is reporting activity (e.g. headless sessions).
+pub(super) async fn member_runtime_extras(
+    session_id: &str,
+    member_is_running: bool,
+    sessions: &SessionAgents,
+    client_connections: &Arc<RwLock<HashMap<String, ClientConnectionInfo>>>,
+) -> MemberRuntimeExtras {
+    let activity = {
+        let connections = client_connections.read().await;
+        live_activity_snapshot(&connections, session_id, member_is_running)
+    };
+
+    let (provider_name, provider_model) = {
+        let agent_sessions = sessions.read().await;
+        if let Some(agent) = agent_sessions.get(session_id) {
+            // Never block on a busy agent: token churn and turns come from the
+            // lock-free metrics registry, so a missing provider name here just
+            // means the agent is mid-turn.
+            if let Ok(agent) = agent.try_lock() {
+                (Some(agent.provider_name()), Some(agent.provider_model()))
+            } else {
+                (None, None)
+            }
+        } else {
+            (None, None)
+        }
+    };
+
+    let metrics = crate::session_metrics::snapshot(
+        session_id,
+        std::time::Duration::from_secs(SWARM_LIST_TOKEN_WINDOW_SECS),
+    );
+
+    let (todos_completed, todos_total) = match crate::todo::load_todos(session_id) {
+        Ok(todos) if !todos.is_empty() => {
+            let completed = todos.iter().filter(|t| t.status == "completed").count();
+            (Some(completed), Some(todos.len()))
+        }
+        _ => (None, None),
+    };
+
+    MemberRuntimeExtras {
+        activity,
+        provider_name,
+        provider_model,
+        turn_count: metrics.map(|m| m.turns),
+        recent_total_tokens: metrics.map(|m| m.recent_total_tokens),
+        recent_output_tokens: metrics.map(|m| m.recent_output_tokens),
+        recent_window_secs: metrics.map(|_| SWARM_LIST_TOKEN_WINDOW_SECS),
+        cumulative_total_tokens: metrics.map(|m| m.cumulative_total_tokens),
+        todos_completed,
+        todos_total,
+    }
+}
+
+
 async fn ensure_same_swarm_access(
     id: u64,
     req_session_id: &str,
diff --git a/crates/jcode-app-core/src/tool/communicate_tests.rs b/crates/jcode-app-core/src/tool/communicate_tests.rs
index 5f35e0c4d..fdd81fed6 100644
--- a/crates/jcode-app-core/src/tool/communicate_tests.rs
+++ b/crates/jcode-app-core/src/tool/communicate_tests.rs
@@ -126,6 +126,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
         AgentInfo {
             session_id: "worker-queued".to_string(),
@@ -139,6 +140,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
         AgentInfo {
             session_id: "worker-ready".to_string(),
@@ -152,6 +154,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
     ];
 
diff --git a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
index d136e3dfc..0da530076 100644
--- a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
+++ b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
@@ -92,6 +92,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
         AgentInfo {
             session_id: "owned-done".to_string(),
@@ -105,6 +106,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
         AgentInfo {
             session_id: "user-created".to_string(),
@@ -118,6 +120,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
         AgentInfo {
             session_id: "owned-running".to_string(),
@@ -131,6 +134,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() {
             latest_completion_report: None,
             live_attachments: None,
             status_age_secs: None,
+            ..Default::default()
         },
     ];
     let statuses = default_cleanup_target_statuses();
@@ -197,15 +201,17 @@ fn format_members_includes_status_and_detail() {
             latest_completion_report: None,
             live_attachments: Some(0),
             status_age_secs: Some(12),
+            ..Default::default()
         }],
     );
 
     assert!(output.output.contains("Status: running — working on tests"));
+    assert!(output.output.contains("· 12s"));
     assert!(output.output.contains("Files: src/main.rs"));
     assert!(
         output
             .output
-            .contains("Meta: headless · owned_by_you · attachments=0 · status_age=12s")
+            .contains("Meta: headless · owned_by_you · attachments=0")
     );
 }
 
@@ -230,6 +236,7 @@ fn format_members_disambiguates_duplicate_friendly_names() {
                 latest_completion_report: None,
                 live_attachments: None,
                 status_age_secs: None,
+                ..Default::default()
             },
             AgentInfo {
                 session_id: "session_shark_1234567890_bbbbbbbbbbbb0002".to_string(),
@@ -243,6 +250,7 @@ fn format_members_disambiguates_duplicate_friendly_names() {
                 latest_completion_report: None,
                 live_attachments: None,
                 status_age_secs: None,
+                ..Default::default()
             },
         ],
     );
diff --git a/crates/jcode-base/src/lib.rs b/crates/jcode-base/src/lib.rs
index 69df34d3a..e3bec7725 100644
--- a/crates/jcode-base/src/lib.rs
+++ b/crates/jcode-base/src/lib.rs
@@ -63,6 +63,7 @@ pub mod safety;
 pub mod secret_input;
 pub mod session;
 pub mod session_list_cache;
+pub mod session_metrics;
 pub mod side_panel;
 pub mod sidecar;
 pub mod skill;
diff --git a/crates/jcode-base/src/session_metrics.rs b/crates/jcode-base/src/session_metrics.rs
new file mode 100644
index 000000000..6cfad046c
--- /dev/null
+++ b/crates/jcode-base/src/session_metrics.rs
@@ -0,0 +1,210 @@
+//! Lock-free per-session runtime metrics.
+//!
+//! These metrics are tracked in a process-global registry rather than on the
+//! `Agent` struct itself. That is deliberate: callers such as `swarm list`
+//! read per-agent stats while the agent may be actively processing a turn and
+//! holding its own `Mutex<Agent>` lock. Anything stored behind that lock is
+//! unavailable (`try_lock` fails) exactly when an agent is busiest, which is
+//! when churn/turn data is most interesting. Keeping these counters in a
+//! separate registry lets us observe live activity without contending on the
+//! agent lock.
+//!
+//! The registry stores a small ring of recent token-usage samples per session
+//! so we can report a "tokens churned over the last N seconds" rate, plus a
+//! cumulative turn counter.
+
+use std::collections::HashMap;
+use std::sync::Mutex;
+use std::time::{Duration, Instant};
+
+/// How long an individual token sample stays in the rolling window.
+const SAMPLE_WINDOW: Duration = Duration::from_secs(60);
+
+/// Maximum samples retained per session to bound memory. At one sample per
+/// provider response this comfortably covers the rolling window.
+const MAX_SAMPLES: usize = 256;
+
+#[derive(Clone, Copy)]
+struct TokenSample {
+    at: Instant,
+    /// Total tokens (input + output + cache) observed in this sample.
+    total: u64,
+    /// Output tokens only, the best proxy for "work produced".
+    output: u64,
+}
+
+#[derive(Default)]
+struct SessionMetrics {
+    samples: Vec<TokenSample>,
+    turns: u64,
+    cumulative_total_tokens: u64,
+    cumulative_output_tokens: u64,
+}
+
+impl SessionMetrics {
+    fn prune(&mut self, now: Instant) {
+        let cutoff = now.checked_sub(SAMPLE_WINDOW);
+        self.samples.retain(|sample| match cutoff {
+            Some(cutoff) => sample.at >= cutoff,
+            None => true,
+        });
+        if self.samples.len() > MAX_SAMPLES {
+            let overflow = self.samples.len() - MAX_SAMPLES;
+            self.samples.drain(0..overflow);
+        }
+    }
+}
+
+static REGISTRY: Mutex<Option<HashMap<String, SessionMetrics>>> = Mutex::new(None);
+
+fn with_registry<R>(f: impl FnOnce(&mut HashMap<String, SessionMetrics>) -> R) -> Option<R> {
+    let mut guard = REGISTRY.lock().ok()?;
+    let map = guard.get_or_insert_with(HashMap::new);
+    Some(f(map))
+}
+
+/// Record a token-usage sample for a session. Called from the streaming turn
+/// loop whenever the provider reports usage.
+pub fn record_token_usage(session_id: &str, total_tokens: u64, output_tokens: u64) {
+    if session_id.is_empty() || (total_tokens == 0 && output_tokens == 0) {
+        return;
+    }
+    let now = Instant::now();
+    with_registry(|map| {
+        let entry = map.entry(session_id.to_string()).or_default();
+        entry.samples.push(TokenSample {
+            at: now,
+            total: total_tokens,
+            output: output_tokens,
+        });
+        entry.cumulative_total_tokens = entry.cumulative_total_tokens.saturating_add(total_tokens);
+        entry.cumulative_output_tokens =
+            entry.cumulative_output_tokens.saturating_add(output_tokens);
+        entry.prune(now);
+    });
+}
+
+/// Record that a session completed (or started) a turn.
+pub fn record_turn(session_id: &str) {
+    if session_id.is_empty() {
+        return;
+    }
+    with_registry(|map| {
+        let entry = map.entry(session_id.to_string()).or_default();
+        entry.turns = entry.turns.saturating_add(1);
+    });
+}
+
+/// Snapshot of a session's recent activity.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub struct SessionMetricsSnapshot {
+    /// Total tokens observed within the lookback window.
+    pub recent_total_tokens: u64,
+    /// Output tokens observed within the lookback window.
+    pub recent_output_tokens: u64,
+    /// Cumulative total tokens for the session lifetime.
+    pub cumulative_total_tokens: u64,
+    /// Cumulative output tokens for the session lifetime.
+    pub cumulative_output_tokens: u64,
+    /// Number of turns recorded for the session.
+    pub turns: u64,
+}
+
+impl SessionMetricsSnapshot {
+    pub fn has_activity(&self) -> bool {
+        self.recent_total_tokens > 0
+            || self.cumulative_total_tokens > 0
+            || self.turns > 0
+    }
+}
+
+/// Read a snapshot of a session's metrics, summing token samples within the
+/// given lookback window. Returns `None` if the session has no recorded
+/// metrics.
+pub fn snapshot(session_id: &str, lookback: Duration) -> Option<SessionMetricsSnapshot> {
+    let now = Instant::now();
+    with_registry(|map| {
+        let entry = map.get_mut(session_id)?;
+        entry.prune(now);
+        let cutoff = now.checked_sub(lookback);
+        let mut recent_total = 0u64;
+        let mut recent_output = 0u64;
+        for sample in &entry.samples {
+            let in_window = match cutoff {
+                Some(cutoff) => sample.at >= cutoff,
+                None => true,
+            };
+            if in_window {
+                recent_total = recent_total.saturating_add(sample.total);
+                recent_output = recent_output.saturating_add(sample.output);
+            }
+        }
+        Some(SessionMetricsSnapshot {
+            recent_total_tokens: recent_total,
+            recent_output_tokens: recent_output,
+            cumulative_total_tokens: entry.cumulative_total_tokens,
+            cumulative_output_tokens: entry.cumulative_output_tokens,
+            turns: entry.turns,
+        })
+    })
+    .flatten()
+}
+
+/// Remove a session's metrics, called when the session leaves the swarm or
+/// disconnects, to avoid unbounded growth.
+pub fn forget(session_id: &str) {
+    with_registry(|map| {
+        map.remove(session_id);
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn records_and_snapshots_token_usage() {
+        let sid = "session_metrics_test_basic";
+        forget(sid);
+        record_token_usage(sid, 100, 40);
+        record_token_usage(sid, 50, 20);
+        let snap = snapshot(sid, Duration::from_secs(10)).expect("snapshot");
+        assert_eq!(snap.recent_total_tokens, 150);
+        assert_eq!(snap.recent_output_tokens, 60);
+        assert_eq!(snap.cumulative_total_tokens, 150);
+        assert_eq!(snap.cumulative_output_tokens, 60);
+        forget(sid);
+    }
+
+    #[test]
+    fn counts_turns() {
+        let sid = "session_metrics_test_turns";
+        forget(sid);
+        record_turn(sid);
+        record_turn(sid);
+        record_turn(sid);
+        let snap = snapshot(sid, Duration::from_secs(10)).expect("snapshot");
+        assert_eq!(snap.turns, 3);
+        forget(sid);
+    }
+
+    #[test]
+    fn ignores_empty_and_zero() {
+        let sid = "session_metrics_test_zero";
+        forget(sid);
+        record_token_usage(sid, 0, 0);
+        record_token_usage("", 100, 40);
+        assert!(snapshot(sid, Duration::from_secs(10)).is_none());
+        forget(sid);
+    }
+
+    #[test]
+    fn forget_clears_state() {
+        let sid = "session_metrics_test_forget";
+        forget(sid);
+        record_turn(sid);
+        assert!(snapshot(sid, Duration::from_secs(10)).is_some());
+        forget(sid);
+        assert!(snapshot(sid, Duration::from_secs(10)).is_none());
+    }
+}
diff --git a/crates/jcode-protocol/src/comm_format.rs b/crates/jcode-protocol/src/comm_format.rs
index c25734014..caa60dfca 100644
--- a/crates/jcode-protocol/src/comm_format.rs
+++ b/crates/jcode-protocol/src/comm_format.rs
@@ -152,6 +152,71 @@ pub fn format_comm_members(current_session_id: &str, members: &[AgentInfo]) -> S
             } else {
                 String::new()
             };
+
+            // Status line: lifecycle + detail, then a contextual age label.
+            // For an idle/ready agent the "age" is how long it has been idle;
+            // for a running agent it is how long the current turn has run.
+            let detail_suffix = member
+                .detail
+                .as_deref()
+                .map(|detail| format!(" — {}", detail))
+                .unwrap_or_default();
+            let age_suffix = match member.status_age_secs {
+                Some(age) if status == "ready" || status == "idle" => {
+                    format!(" · idle {}", format_secs(age))
+                }
+                Some(age) if status == "running" => format!(" · {}", format_secs(age)),
+                Some(age) => format!(" · {} ago", format_secs(age)),
+                None => String::new(),
+            };
+
+            // Live activity: what the agent is doing right now.
+            let activity_suffix = match member.activity.as_ref() {
+                Some(activity) if activity.is_processing => {
+                    match activity.current_tool_name.as_deref() {
+                        Some(tool) => format!("\n    Activity: working ({})", tool),
+                        None => "\n    Activity: thinking".to_string(),
+                    }
+                }
+                _ => String::new(),
+            };
+
+            // Progress: todos completed / total.
+            let progress_suffix = match (member.todos_completed, member.todos_total) {
+                (Some(done), Some(total)) if total > 0 => {
+                    format!("\n    Progress: {}/{} todos", done, total)
+                }
+                _ => String::new(),
+            };
+
+            // Live work signal: recent token churn + cumulative + turns.
+            let mut work_meta = Vec::new();
+            if let (Some(recent), Some(window)) =
+                (member.recent_total_tokens, member.recent_window_secs)
+                && recent > 0
+            {
+                work_meta.push(format!("{} tok/{}s", format_count(recent), window));
+            }
+            if let Some(turns) = member.turn_count.filter(|turns| *turns > 0) {
+                work_meta.push(format!("{} turns", turns));
+            }
+            if let Some(total) = member.cumulative_total_tokens.filter(|total| *total > 0) {
+                work_meta.push(format!("{} tok total", format_count(total)));
+            }
+            let work_suffix = if work_meta.is_empty() {
+                String::new()
+            } else {
+                format!("\n    Work: {}", work_meta.join(" · "))
+            };
+
+            // Model line.
+            let model_suffix = match (member.provider_name.as_deref(), member.provider_model.as_deref())
+            {
+                (Some(provider), Some(model)) => format!("\n    Model: {}/{}", provider, model),
+                (None, Some(model)) => format!("\n    Model: {}", model),
+                _ => String::new(),
+            };
+
             let mut extra_meta = Vec::new();
             if member.is_headless == Some(true) {
                 extra_meta.push("headless".to_string());
@@ -166,37 +231,79 @@ pub fn format_comm_members(current_session_id: &str, members: &[AgentInfo]) -> S
             if let Some(attachments) = member.live_attachments {
                 extra_meta.push(format!("attachments={attachments}"));
             }
-            if let Some(age_secs) = member.status_age_secs {
-                extra_meta.push(format!("status_age={}s", age_secs));
-            }
             let meta_suffix = if extra_meta.is_empty() {
                 String::new()
             } else {
                 format!("\n    Meta: {}", extra_meta.join(" · "))
             };
+
+            // Completion report when the agent has finished.
+            let report_suffix = match member.latest_completion_report.as_deref() {
+                Some(report) if !report.trim().is_empty() => {
+                    format!("\n    Report: {}", truncate_report(report))
+                }
+                _ => String::new(),
+            };
+
             output.push_str(&format!(
-                "  {}{} ({})\n    Status: {}{}{}{}\n",
+                "  {}{} ({})\n    Status: {}{}{}{}{}{}{}{}{}{}\n",
                 name,
                 role_label,
                 if is_me { "you" } else { session },
                 status,
-                member
-                    .detail
-                    .as_deref()
-                    .map(|detail| format!(" — {}", detail))
-                    .unwrap_or_default(),
+                detail_suffix,
+                age_suffix,
+                activity_suffix,
+                progress_suffix,
+                work_suffix,
+                model_suffix,
                 if files.is_empty() {
                     String::new()
                 } else {
                     format!("\n    Files: {}", files)
                 },
-                meta_suffix
+                meta_suffix,
+                report_suffix,
             ));
         }
         output
     }
 }
 
+/// Format a duration in seconds into a compact human label (e.g. `45s`, `3m`, `2h`).
+fn format_secs(secs: u64) -> String {
+    if secs < 60 {
+        format!("{}s", secs)
+    } else if secs < 3600 {
+        format!("{}m", secs / 60)
+    } else {
+        format!("{}h", secs / 3600)
+    }
+}
+
+/// Format a token count compactly (e.g. `850`, `12.3k`, `1.2M`).
+fn format_count(count: u64) -> String {
+    if count < 1_000 {
+        count.to_string()
+    } else if count < 1_000_000 {
+        format!("{:.1}k", count as f64 / 1_000.0)
+    } else {
+        format!("{:.1}M", count as f64 / 1_000_000.0)
+    }
+}
+
+/// Truncate a completion report to a single compact line for the roster view.
+fn truncate_report(report: &str) -> String {
+    const MAX: usize = 120;
+    let one_line: String = report.split_whitespace().collect::<Vec<_>>().join(" ");
+    if one_line.chars().count() > MAX {
+        let truncated: String = one_line.chars().take(MAX).collect();
+        format!("{}…", truncated)
+    } else {
+        one_line
+    }
+}
+
 pub fn format_comm_tool_summary(target: &str, calls: &[ToolCallSummary]) -> String {
     if calls.is_empty() {
         format!("No tool calls found for {}", target)
diff --git a/crates/jcode-protocol/src/lib.rs b/crates/jcode-protocol/src/lib.rs
index c74781f47..1d6712357 100644
--- a/crates/jcode-protocol/src/lib.rs
+++ b/crates/jcode-protocol/src/lib.rs
@@ -198,7 +198,7 @@ pub struct ContextEntry {
 }
 
 /// Info about an agent
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
 pub struct AgentInfo {
     pub session_id: String,
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -229,6 +229,36 @@ pub struct AgentInfo {
     /// Seconds since the last status change.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub status_age_secs: Option<u64>,
+    /// Live activity (whether processing + current tool name).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub activity: Option<SessionActivitySnapshot>,
+    /// Provider name (e.g. "anthropic").
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub provider_name: Option<String>,
+    /// Provider model id.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub provider_model: Option<String>,
+    /// Number of turns the agent has run this session.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub turn_count: Option<u64>,
+    /// Tokens churned (total, including cache) within the recent lookback window.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub recent_total_tokens: Option<u64>,
+    /// Output tokens produced within the recent lookback window.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub recent_output_tokens: Option<u64>,
+    /// Width of the recent-token lookback window, in seconds.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub recent_window_secs: Option<u64>,
+    /// Cumulative total tokens observed for the session lifetime.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub cumulative_total_tokens: Option<u64>,
+    /// Number of completed todos for this agent's session.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub todos_completed: Option<usize>,
+    /// Total number of todos for this agent's session.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub todos_total: Option<usize>,
 }
 
 /// Lightweight status snapshot for a swarm member.
diff --git a/crates/jcode-protocol/src/protocol_tests/comm_responses.rs b/crates/jcode-protocol/src/protocol_tests/comm_responses.rs
index 1bdb0067d..01b0f3147 100644
--- a/crates/jcode-protocol/src/protocol_tests/comm_responses.rs
+++ b/crates/jcode-protocol/src/protocol_tests/comm_responses.rs
@@ -158,6 +158,7 @@ fn test_comm_members_roundtrip_includes_status() -> Result<()> {
             latest_completion_report: Some("Done.".to_string()),
             live_attachments: Some(0),
             status_age_secs: Some(12),
+            ..Default::default()
         }],
     };
 

From 8faeb77574d2dca84e65481c89e839cf490727e7 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:35:09 -0700
Subject: [PATCH 54/57] test(swarm): cover enriched swarm list rendering
 (activity, churn, turns, idle label)

---
 .../tool/communicate_tests/input_format.rs    | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
index 0da530076..e03864883 100644
--- a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
+++ b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs
@@ -215,6 +215,83 @@ fn format_members_includes_status_and_detail() {
     );
 }
 
+#[test]
+fn format_members_renders_activity_progress_churn_and_turns() {
+    let ctx = test_ctx(
+        "session_self_1234567890_deadbeefcafebabe",
+        std::path::Path::new("."),
+    );
+
+    let output = format_members(
+        &ctx,
+        &[AgentInfo {
+            session_id: "session_peer_1234567890_aaaaaaaaaaaa0001".to_string(),
+            friendly_name: Some("otter".to_string()),
+            files_touched: vec![],
+            status: Some("running".to_string()),
+            detail: Some("implementing".to_string()),
+            role: Some("agent".to_string()),
+            is_headless: Some(false),
+            report_back_to_session_id: None,
+            latest_completion_report: None,
+            live_attachments: Some(1),
+            status_age_secs: Some(8),
+            activity: Some(SessionActivitySnapshot {
+                is_processing: true,
+                current_tool_name: Some("edit".to_string()),
+            }),
+            provider_name: Some("anthropic".to_string()),
+            provider_model: Some("claude-sonnet".to_string()),
+            turn_count: Some(7),
+            recent_total_tokens: Some(12_345),
+            recent_output_tokens: Some(2_000),
+            recent_window_secs: Some(10),
+            cumulative_total_tokens: Some(98_765),
+            todos_completed: Some(3),
+            todos_total: Some(7),
+        }],
+    );
+
+    let text = output.output;
+    assert!(text.contains("Activity: working (edit)"), "got: {text}");
+    assert!(text.contains("Progress: 3/7 todos"), "got: {text}");
+    assert!(text.contains("12.3k tok/10s"), "got: {text}");
+    assert!(text.contains("7 turns"), "got: {text}");
+    assert!(text.contains("98.8k tok total"), "got: {text}");
+    assert!(text.contains("Model: anthropic/claude-sonnet"), "got: {text}");
+    // Running agent shows current-turn duration, not an "idle" label.
+    assert!(text.contains("· 8s"), "got: {text}");
+    assert!(!text.contains("idle"), "got: {text}");
+}
+
+#[test]
+fn format_members_labels_idle_ready_agent() {
+    let ctx = test_ctx(
+        "session_self_1234567890_deadbeefcafebabe",
+        std::path::Path::new("."),
+    );
+
+    let output = format_members(
+        &ctx,
+        &[AgentInfo {
+            session_id: "session_peer_1234567890_bbbbbbbbbbbb0002".to_string(),
+            friendly_name: Some("idle-one".to_string()),
+            files_touched: vec![],
+            status: Some("ready".to_string()),
+            detail: None,
+            role: Some("agent".to_string()),
+            is_headless: None,
+            report_back_to_session_id: None,
+            latest_completion_report: None,
+            live_attachments: Some(0),
+            status_age_secs: Some(90),
+            ..Default::default()
+        }],
+    );
+
+    assert!(output.output.contains("idle 1m"), "got: {}", output.output);
+}
+
 #[test]
 fn format_members_disambiguates_duplicate_friendly_names() {
     let ctx = test_ctx(

From 0e692d6b1b9aada852314d5d75f0cfbd59a15148 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 17:49:09 -0700
Subject: [PATCH 55/57] fix(tui): honor reasoning_display mode when
 re-rendering persisted history
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 'current' reasoning collapse only ran in the live streaming path. When
the transcript was re-rendered from stored history (self-dev reload, resume,
remote sync, compaction-window expand), the shared history renderer replayed
every persisted reasoning trace in full regardless of reasoning_display mode,
so after the collapse animation finished a reload would bring all the
reasoning back.

format_reasoning_markup now honors the active mode:
- Off:     persisted reasoning is hidden entirely.
- Current: the block folds to a single '▸ thought (N lines)' trace line,
           matching the live collapse end state.
- Full:    classic full replay (unchanged).

Adds reasoning_summary_line_markup helper + tests for all three modes.
---
 crates/jcode-base/src/session/render.rs       | 21 +++++
 crates/jcode-base/src/session_tests/cases.rs  | 89 +++++++++++++++++++
 crates/jcode-tui-markdown/src/lib.rs          | 13 +++
 .../src/markdown_tests/cases/rendering.rs     | 48 ++++++++++
 4 files changed, 171 insertions(+)

diff --git a/crates/jcode-base/src/session/render.rs b/crates/jcode-base/src/session/render.rs
index a9c909d8f..76e7ecc6d 100644
--- a/crates/jcode-base/src/session/render.rs
+++ b/crates/jcode-base/src/session/render.rs
@@ -1,5 +1,6 @@
 use super::{Session, StoredDisplayRole};
 use crate::message::{ContentBlock, Role, ToolCall};
+use jcode_config_types::ReasoningDisplayMode;
 pub use jcode_session_types::{
     RenderedCompactedHistoryInfo, RenderedImage, RenderedImageSource, RenderedMessage,
 };
@@ -16,10 +17,30 @@ pub const DEFAULT_VISIBLE_COMPACTED_HISTORY_MESSAGES: usize = 64;
 /// by the live streaming path. Each line is wrapped via the shared `reasoning_line_markup` so resumed
 /// sessions render reasoning identically to how it streamed, terminated by a
 /// blank line so following answer text renders as a normal paragraph.
+///
+/// Honors the active `reasoning_display` mode so re-rendered history (reload,
+/// resume, remote sync, compaction-window expand) matches the live behavior:
+/// - `Off`: persisted reasoning is hidden entirely.
+/// - `Current`: the block folds down to a single `▸ thought (N lines)` trace,
+///   matching the live collapse animation's end state rather than replaying the
+///   full reasoning back into the transcript on every reload.
+/// - `Full`: every reasoning line is shown (classic behavior).
 fn format_reasoning_markup(text: &str) -> String {
     if text.trim().is_empty() {
         return String::new();
     }
+    let mode = crate::config::config().display.reasoning_display();
+    match mode {
+        ReasoningDisplayMode::Off => return String::new(),
+        ReasoningDisplayMode::Current => {
+            let line_count = text.lines().filter(|l| !l.trim().is_empty()).count();
+            let mut out = jcode_tui_markdown::reasoning_summary_line_markup(line_count);
+            // Blank line terminates the reasoning block.
+            out.push('\n');
+            return out;
+        }
+        ReasoningDisplayMode::Full => {}
+    }
     let mut out = String::new();
     for line in text.split('\n') {
         out.push_str(&jcode_tui_markdown::reasoning_line_markup(line));
diff --git a/crates/jcode-base/src/session_tests/cases.rs b/crates/jcode-base/src/session_tests/cases.rs
index 3aebf4bbe..3b82ae892 100644
--- a/crates/jcode-base/src/session_tests/cases.rs
+++ b/crates/jcode-base/src/session_tests/cases.rs
@@ -1061,6 +1061,10 @@ fn test_render_messages_honors_system_display_role_override() {
 fn test_render_messages_renders_persisted_reasoning() {
     use jcode_tui_markdown::REASONING_SENTINEL;
 
+    let _env_lock = lock_env();
+    let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "full");
+    crate::config::invalidate_config_cache();
+
     let mut session = Session::create_with_id(
         "session_render_reasoning_test".to_string(),
         None,
@@ -1106,6 +1110,10 @@ fn test_render_messages_renders_persisted_reasoning() {
 fn test_render_messages_renders_legacy_reasoning_variant() {
     use jcode_tui_markdown::REASONING_SENTINEL;
 
+    let _env_lock = lock_env();
+    let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "full");
+    crate::config::invalidate_config_cache();
+
     let mut session = Session::create_with_id(
         "session_render_legacy_reasoning_test".to_string(),
         None,
@@ -1130,6 +1138,87 @@ fn test_render_messages_renders_legacy_reasoning_variant() {
     );
 }
 
+#[test]
+fn test_render_messages_collapses_persisted_reasoning_in_current_mode() {
+    use jcode_tui_markdown::REASONING_SENTINEL;
+
+    let _env_lock = lock_env();
+    let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "current");
+    crate::config::invalidate_config_cache();
+
+    let mut session = Session::create_with_id(
+        "session_render_reasoning_current_test".to_string(),
+        None,
+        Some("render reasoning current test".to_string()),
+    );
+
+    session.add_message(
+        Role::Assistant,
+        vec![
+            ContentBlock::ReasoningTrace {
+                text: "step one\nstep two\nstep three".to_string(),
+            },
+            ContentBlock::Text {
+                text: "Here is the answer.".to_string(),
+                cache_control: None,
+            },
+        ],
+    );
+
+    let rendered = render_messages(&session);
+    assert_eq!(rendered.len(), 1);
+    let content = &rendered[0].content;
+    // In `current` mode re-rendered history folds the whole reasoning block down
+    // to a single dim/italic trace line, matching the live collapse end state.
+    assert!(
+        content.contains(&format!("*{0}▸ thought (3 lines){0}*", REASONING_SENTINEL)),
+        "expected collapsed reasoning summary, got: {content:?}"
+    );
+    assert!(
+        !content.contains("step one") && !content.contains("step two"),
+        "individual reasoning lines must not be replayed in current mode: {content:?}"
+    );
+    // The answer text is preserved and follows the collapsed trace.
+    assert!(content.contains("Here is the answer."));
+}
+
+#[test]
+fn test_render_messages_hides_persisted_reasoning_in_off_mode() {
+    use jcode_tui_markdown::REASONING_SENTINEL;
+
+    let _env_lock = lock_env();
+    let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "off");
+    crate::config::invalidate_config_cache();
+
+    let mut session = Session::create_with_id(
+        "session_render_reasoning_off_test".to_string(),
+        None,
+        Some("render reasoning off test".to_string()),
+    );
+
+    session.add_message(
+        Role::Assistant,
+        vec![
+            ContentBlock::ReasoningTrace {
+                text: "secret thought".to_string(),
+            },
+            ContentBlock::Text {
+                text: "Here is the answer.".to_string(),
+                cache_control: None,
+            },
+        ],
+    );
+
+    let rendered = render_messages(&session);
+    assert_eq!(rendered.len(), 1);
+    let content = &rendered[0].content;
+    assert!(
+        !content.contains(REASONING_SENTINEL) && !content.contains("secret thought"),
+        "reasoning must be hidden entirely in off mode: {content:?}"
+    );
+    assert!(content.contains("Here is the answer."));
+}
+
 #[test]
 fn test_render_messages_honors_background_task_display_role_override() {
     let mut session = Session::create_with_id(
diff --git a/crates/jcode-tui-markdown/src/lib.rs b/crates/jcode-tui-markdown/src/lib.rs
index 0d9a71335..0a0ecfb29 100644
--- a/crates/jcode-tui-markdown/src/lib.rs
+++ b/crates/jcode-tui-markdown/src/lib.rs
@@ -185,6 +185,19 @@ pub fn reasoning_partial_markup(line: &str) -> String {
     }
 }
 
+/// One-line collapsed reasoning summary markup (e.g. `▸ thought (3 lines)`),
+/// styled dim+italic like the live reasoning lines. Used to fold a persisted
+/// reasoning block down to a single trace line when the transcript is
+/// re-rendered from history in `current` reasoning-display mode (so reloaded /
+/// resumed sessions match the live collapse instead of replaying every line).
+pub fn reasoning_summary_line_markup(line_count: usize) -> String {
+    let label = match line_count {
+        0 | 1 => "▸ thought".to_string(),
+        n => format!("▸ thought ({} lines)", n),
+    };
+    reasoning_line_markup(&label)
+}
+
 use render_support::{
     highlight_code_cached, line_plain_text, placeholder_code_block, ranges_overlap, render_table,
 };
diff --git a/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs b/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs
index 4c4082843..f9db404ca 100644
--- a/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs
+++ b/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs
@@ -763,3 +763,51 @@ fn test_reasoning_emphasis_does_not_leak_into_following_text() {
         );
     }
 }
+
+#[test]
+fn test_reasoning_summary_line_markup_folds_to_single_dim_italic_trace() {
+    let sentinel = crate::REASONING_SENTINEL;
+
+    // Pluralized count for multi-line blocks.
+    let many = crate::reasoning_summary_line_markup(3);
+    assert!(
+        many.contains(&format!("*{0}▸ thought (3 lines){0}*", sentinel)),
+        "expected pluralized summary markup, got: {many:?}"
+    );
+
+    // Single/zero-line blocks omit the count.
+    let one = crate::reasoning_summary_line_markup(1);
+    assert!(
+        one.contains(&format!("*{0}▸ thought{0}*", sentinel)) && !one.contains("lines"),
+        "expected bare summary markup, got: {one:?}"
+    );
+    let none = crate::reasoning_summary_line_markup(0);
+    assert!(none.contains(&format!("*{0}▸ thought{0}*", sentinel)), "{none:?}");
+
+    // The summary line renders dim + italic with no sentinel leaking into text.
+    let lines = render_markdown(&many);
+    let dim = md_dim_color();
+    let mut saw_marker = false;
+    for rendered in &lines {
+        for span in &rendered.spans {
+            assert!(
+                !span.content.contains(sentinel),
+                "sentinel leaked into visible summary: {:?}",
+                span.content
+            );
+            if span.content.trim().is_empty() {
+                continue;
+            }
+            if span.content.contains('▸') {
+                saw_marker = true;
+            }
+            assert_eq!(span.style.fg, Some(dim), "summary span not dim: {:?}", span.content);
+            assert!(
+                span.style.add_modifier.contains(Modifier::ITALIC),
+                "summary span not italic: {:?}",
+                span.content
+            );
+        }
+    }
+    assert!(saw_marker, "summary marker '▸' must be visible: {lines:?}");
+}

From 8fc815a8e66f438bad748869847cbee204fc0280 Mon Sep 17 00:00:00 2001
From: jeremy <94247773+1jehuang@users.noreply.github.com>
Date: Fri, 5 Jun 2026 19:06:42 -0700
Subject: [PATCH 56/57] chore(release): bump version to 0.23.0

---
 Cargo.lock | 2 +-
 Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6244492a7..64661cbfd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3265,7 +3265,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "jcode"
-version = "0.22.0"
+version = "0.23.0"
 dependencies = [
  "agentgrep",
  "anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 42f60f942..8635279b6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "jcode"
-version = "0.22.0"
+version = "0.23.0"
 description = "Possibly the greatest coding agent ever built — blazing-fast TUI, multi-model, swarm coordination, 30+ tools"
 edition = "2024"
 autobins = false

From c63802a8ce9c0b11b3ffa5afb2184705a6674451 Mon Sep 17 00:00:00 2001
From: quangdang46 <quangdang46@users.noreply.github.com>
Date: Sat, 6 Jun 2026 10:11:53 +0700
Subject: [PATCH 57/57] fix: propagate route_api_method in SubagentTool +
 resolve loading.rs ForeignSession

---
 crates/jcode-app-core/src/tool/task.rs        |   1 +
 crates/jcode-tui/src/tui/session_picker.rs    |   1 +
 .../src/tui/session_picker/loading.rs         | 641 +++++-------------
 3 files changed, 173 insertions(+), 470 deletions(-)

diff --git a/crates/jcode-app-core/src/tool/task.rs b/crates/jcode-app-core/src/tool/task.rs
index 31546dddf..2c61f127a 100644
--- a/crates/jcode-app-core/src/tool/task.rs
+++ b/crates/jcode-app-core/src/tool/task.rs
@@ -227,6 +227,7 @@ impl Tool for SubagentTool {
         // other's `children` entries. Acceptable for experimental Phase 0;
         // a file-lock or in-memory session cache would fix this properly.
         if let Ok(mut parent_session) = Session::load(&ctx.session_id) {
+            session.route_api_method = parent_session.route_api_method.clone();
             parent_session.add_child(session.id.clone());
             let _ = parent_session.save();
         }
diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs
index 4ce1dbeb2..eae58d398 100644
--- a/crates/jcode-tui/src/tui/session_picker.rs
+++ b/crates/jcode-tui/src/tui/session_picker.rs
@@ -562,6 +562,7 @@ impl SessionPicker {
             ResumeTarget::OpenCodeSession { .. } => external_path.as_deref().and_then(|path| {
                 loading::load_opencode_preview_from_path(std::path::Path::new(path))
             }),
+            ResumeTarget::ForeignSession { .. } => None,
         }
     }
 
diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs
index 52e3a936f..670bb1b94 100644
--- a/crates/jcode-tui/src/tui/session_picker/loading.rs
+++ b/crates/jcode-tui/src/tui/session_picker/loading.rs
@@ -111,7 +111,13 @@ where
         }
         handles
             .into_iter()
-            .filter_map(|handle| handle.join().ok())
+            .filter_map(|handle| match handle.join() {
+                Ok(result) => Some(result),
+                Err(panic) => {
+                    eprintln!("parallel_map worker panicked: {panic:?}");
+                    None
+                }
+            })
             .collect()
     });
 
@@ -371,6 +377,11 @@ fn transcript_paths_for_session(session: &SessionInfo) -> Vec<PathBuf> {
         | ResumeTarget::OpenCodeSession { session_path, .. } => {
             vec![PathBuf::from(session_path)]
         }
+        ResumeTarget::ForeignSession { session_path, .. } => session_path
+            .as_deref()
+            .map(PathBuf::from)
+            .map(|p| vec![p])
+            .unwrap_or_default(),
     }
 }
 
@@ -642,37 +653,6 @@ fn collect_recent_files_recursive(root: &Path, extension: &str, limit: usize) ->
     files.into_iter().map(|(_, path)| path).collect()
 }
 
-/// Maximum number of bytes we read from the *tail* of an external transcript
-/// (Codex / Claude Code) when building its preview. These JSONL transcripts can
-/// be tens of MB, but the preview only ever shows the last ~20 messages, so
-/// parsing the whole file on every selection change made arrow-key navigation
-/// in the resume / onboarding picker lag badly (each load reparsed the entire
-/// file on a fresh thread). Reading a bounded tail keeps each preview load to a
-/// sub-millisecond seek + parse regardless of transcript size.
-///
-/// 512 KiB comfortably covers far more than 20 messages for normal transcripts
-/// while bounding the worst case.
-const EXTERNAL_PREVIEW_TAIL_BYTES: u64 = 512 * 1024;
-
-/// Read the trailing portion of a file as UTF-8 text, capped at
-/// [`EXTERNAL_PREVIEW_TAIL_BYTES`]. When the file is larger than the cap we seek
-/// to the tail and drop the (possibly partial) first line so we only ever parse
-/// complete JSONL records. Returns `(text, truncated_from_head)` where
-/// `truncated_from_head` indicates the head of the file was skipped.
-fn read_file_tail_text(path: &Path, max_bytes: u64) -> Option<(String, bool)> {
-    let mut file = File::open(path).ok()?;
-    let len = file.metadata().ok()?.len();
-    let truncated = len > max_bytes;
-    if truncated {
-        file.seek(SeekFrom::Start(len - max_bytes)).ok()?;
-    }
-    let mut bytes = Vec::with_capacity(max_bytes.min(len) as usize);
-    file.take(max_bytes).read_to_end(&mut bytes).ok()?;
-    // Lossily decode: transcripts are UTF-8, but a tail seek can land mid
-    // multi-byte sequence, and replacement chars are harmless for a preview.
-    Some((String::from_utf8_lossy(&bytes).into_owned(), truncated))
-}
-
 fn push_preview_message(preview: &mut Vec<PreviewMessage>, role: &str, content: String) {
     let content = content.trim();
     if content.is_empty() {
@@ -783,15 +763,6 @@ fn truncate_title_text(text: &str, max_chars: usize) -> String {
     format!("{}…", truncated.trim_end())
 }
 
-fn parse_timestamp_value(
-    value: Option<&serde_json::Value>,
-) -> Option<chrono::DateTime<chrono::Utc>> {
-    value
-        .and_then(|v| v.as_str())
-        .and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok())
-        .map(|dt| dt.with_timezone(&chrono::Utc))
-}
-
 #[cfg(test)]
 fn value_first_text(value: &serde_json::Value) -> Option<&str> {
     match value {
@@ -1669,10 +1640,14 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
     let catchup_ref = &catchup_seen;
 
     let (mut sessions, external_sessions) = std::thread::scope(|scope| {
-        let claude_handle = scope.spawn(|| load_external_claude_code_sessions(scan_limit));
-        let codex_handle = scope.spawn(|| load_external_codex_sessions(scan_limit));
-        let pi_handle = scope.spawn(|| load_external_pi_sessions(scan_limit));
-        let opencode_handle = scope.spawn(|| load_external_opencode_sessions(scan_limit));
+        // Single generic loader that walks every CASR provider known to be
+        // installed (claude-code, codex, pi-agent, opencode, gemini,
+        // cursor, cline, aider, amp, chatgpt, clawdbot, vibe, factory,
+        // openclaw, kiro, jcode). Replaces the four hand-rolled
+        // `load_external_<provider>_sessions` functions that were
+        // maintained separately before CASR became the single source of
+        // truth.
+        let casr_handle = scope.spawn(|| load_external_casr_sessions(scan_limit));
 
         // Phase 1: walk the recency-ordered candidates in parallel windows until
         // we have collected `scan_limit` non-empty sessions. `boundary` marks the
@@ -1724,10 +1699,7 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
         }
 
         let mut external = Vec::new();
-        external.extend(claude_handle.join().unwrap_or_default());
-        external.extend(codex_handle.join().unwrap_or_default());
-        external.extend(pi_handle.join().unwrap_or_default());
-        external.extend(opencode_handle.join().unwrap_or_default());
+        external.extend(casr_handle.join().unwrap_or_default());
         (sessions, external)
     });
     sessions.extend(external_sessions);
@@ -1746,101 +1718,157 @@ pub fn load_sessions() -> Result<Vec<SessionInfo>> {
     Ok(sessions)
 }
 
-fn load_external_claude_code_sessions(scan_limit: usize) -> Vec<SessionInfo> {
-    let Ok(sessions) = crate::import::list_claude_code_sessions_lazy(scan_limit) else {
-        return Vec::new();
+/// Enumerate sessions from every CASR-registered provider that is
+/// installed on the host. Replaces the four hand-rolled
+/// `load_external_<provider>_sessions` functions that used to be
+/// maintained separately per provider; the CASR library now owns the
+/// filesystem walks, file parsing, and timestamp heuristics.
+fn load_external_casr_sessions(scan_limit: usize) -> Vec<SessionInfo> {
+    let by_provider = crate::casr_adapter::list_all_casr_sessions(Some(scan_limit));
+    let mut out = Vec::new();
+    for (slug, sessions) in by_provider {
+        for session in sessions {
+            if out.len() >= scan_limit {
+                return out;
+            }
+            out.push(casr_session_to_session_info(&slug, session));
+        }
+    }
+    out
+}
+
+/// Project a CASR `(provider_slug, ClaudeCodeSessionInfo)` into the
+/// picker's `SessionInfo`. Picks the right `SessionSource` and
+/// `ResumeTarget` variant based on the provider slug so that downstream
+/// `match` arms in `tui_launch.rs` / `inline_interactive.rs` continue
+/// to work for the original four providers, and the new `Foreign`
+/// variant carries the rest.
+fn casr_session_to_session_info(
+    provider_slug: &str,
+    session: crate::casr_adapter::ClaudeCodeSessionInfo,
+) -> SessionInfo {
+    let session_id = session.session_id.clone();
+    let session_path = session.full_path.clone();
+    let created_at = session.created.unwrap_or_else(chrono::Utc::now);
+    let last_message_time = session.modified.or(session.created).unwrap_or(created_at);
+    let working_dir = session.project_path.clone();
+
+    let title = session
+        .summary
+        .filter(|s| !s.trim().is_empty())
+        .unwrap_or_else(|| truncate_title_text(&session.first_prompt, 72));
+
+    let short_name = working_dir
+        .as_deref()
+        .and_then(|dir| Path::new(dir).file_name())
+        .and_then(|n| n.to_str())
+        .map(|n| n.to_string())
+        .unwrap_or_else(|| {
+            format!(
+                "{} {}",
+                provider_slug,
+                &session_id[..session_id.len().min(8)]
+            )
+        });
+
+    let session_id_for_index = session_id.clone();
+    let search_index = build_search_index(
+        &format!("{provider_slug}:{session_id}"),
+        &short_name,
+        &title,
+        working_dir.as_deref(),
+        None,
+        &[],
+    );
+
+    // Pick the source + resume_target based on the provider slug.
+    let (source, resume_target) = match provider_slug {
+        "claude-code" => (
+            SessionSource::ClaudeCode,
+            ResumeTarget::ClaudeCodeSession {
+                session_id: session_id.clone(),
+                session_path: session_path.clone(),
+            },
+        ),
+        "codex" => (
+            SessionSource::Codex,
+            ResumeTarget::CodexSession {
+                session_id: session_id.clone(),
+                session_path: session_path.clone(),
+            },
+        ),
+        "pi-agent" => (
+            SessionSource::Pi,
+            ResumeTarget::PiSession {
+                session_path: session_path.clone(),
+            },
+        ),
+        "opencode" => (
+            SessionSource::OpenCode,
+            ResumeTarget::OpenCodeSession {
+                session_id: session_id.clone(),
+                session_path: session_path.clone(),
+            },
+        ),
+        other => (
+            SessionSource::Foreign(other.to_string()),
+            ResumeTarget::ForeignSession {
+                provider_slug: other.to_string(),
+                session_id: session_id.clone(),
+                session_path: Some(session_path.clone()),
+            },
+        ),
     };
 
-    sessions
-        .into_iter()
-        .take(scan_limit)
-        .map(|session| {
-            let session_id = session.session_id;
-            let created_at = session.created.unwrap_or_else(chrono::Utc::now);
-            let last_message_time = session.modified.or(session.created).unwrap_or(created_at);
-            let working_dir = session.project_path;
-            let title = session
-                .summary
-                .filter(|summary| !summary.trim().is_empty())
-                .unwrap_or_else(|| truncate_title_text(&session.first_prompt, 72));
-            let short_name = working_dir
-                .as_deref()
-                .and_then(|dir| Path::new(dir).file_name())
-                .and_then(|name| name.to_str())
-                .map(|name| name.to_string())
-                .unwrap_or_else(|| format!("claude {}", &session_id[..session_id.len().min(8)]));
-            // Keep /resume startup focused on cheap metadata. Transcript-backed
-            // search text is intentionally loaded lazily through preview loading;
-            // reading tens of KiB from every external transcript can dominate the
-            // initial picker load on accounts with many Claude Code sessions.
-            let search_index = build_search_index(
-                &format!("claude:{session_id}"),
-                &short_name,
-                &title,
-                working_dir.as_deref(),
-                None,
-                &[],
-            );
-
-            SessionInfo {
-                id: format!("claude:{session_id}"),
-                parent_id: None,
-                short_name,
-                icon: "🧵".to_string(),
-                title,
-                message_count: session.message_count as usize,
-                user_message_count: 0,
-                assistant_message_count: 0,
-                created_at,
-                last_message_time,
-                last_active_at: Some(last_message_time),
-                working_dir,
-                model: None,
-                provider_key: Some("claude-code".to_string()),
-                is_canary: false,
-                is_debug: false,
-                saved: false,
-                save_label: None,
-                status: SessionStatus::Closed,
-                needs_catchup: false,
-                estimated_tokens: 0,
-                first_user_prompt: Some(session.first_prompt.clone()),
-                messages_preview: Vec::new(),
-                search_index,
-                server_name: None,
-                server_icon: None,
-                source: SessionSource::ClaudeCode,
-                resume_target: ResumeTarget::ClaudeCodeSession {
-                    session_id,
-                    session_path: session.full_path.clone(),
-                },
-                external_path: Some(session.full_path),
-            }
-        })
-        .collect()
+    // Drop the now-unused captured session_id_for_index so clippy
+    // doesn't complain; reserved for future search_index refinement.
+    let _ = &session_id_for_index;
+
+    SessionInfo {
+        id: format!("{provider_slug}:{session_id}"),
+        parent_id: None,
+        short_name,
+        icon: "💾".to_string(),
+        title,
+        message_count: session.message_count as usize,
+        user_message_count: 0,
+        assistant_message_count: 0,
+        created_at,
+        last_message_time,
+        last_active_at: Some(last_message_time),
+        working_dir,
+        model: None,
+        provider_key: Some(provider_slug.to_string()),
+        is_canary: false,
+        is_debug: false,
+        saved: false,
+        save_label: None,
+        status: SessionStatus::Closed,
+        needs_catchup: false,
+        estimated_tokens: 0,
+        first_user_prompt: Some(session.first_prompt.clone()),
+        messages_preview: Vec::new(),
+        search_index,
+        server_name: None,
+        server_icon: None,
+        source,
+        resume_target,
+        external_path: Some(session_path),
+    }
 }
 
 pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option<Vec<PreviewMessage>> {
-    // Only parse the tail of the transcript (see `load_codex_preview_from_path`):
-    // the preview shows the last ~20 messages, so reparsing multi-MB transcripts
-    // on every selection change made picker navigation lag.
-    let (text, truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?;
+    let file = File::open(path).ok()?;
+    let reader = BufReader::new(file);
     let mut preview = Vec::new();
 
-    // If we seeked into the middle of the file, the first line is a partial
-    // record; drop it. When we read the whole file the first line is a real
-    // record we must keep.
-    let skip = usize::from(truncated);
-    for line in text.lines().skip(skip) {
+    for line in reader.lines() {
+        let line = line.ok()?;
         let trimmed = line.trim();
         if trimmed.is_empty() {
             continue;
         }
-        // Boundary lines from a tail slice may be malformed; skip rather than
-        // abandon the whole preview.
-        let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) else {
-            continue;
-        };
+        let value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
         let entry_type = value
             .get("type")
             .and_then(|v| v.as_str())
@@ -1868,27 +1896,14 @@ pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option<Vec<Prev
 }
 
 pub(super) fn load_claude_code_preview(session_id: &str) -> Option<Vec<PreviewMessage>> {
-    let session = crate::import::list_claude_code_sessions()
+    let session = crate::casr_adapter::list_claude_code_sessions()
         .ok()?
         .into_iter()
         .find(|session| session.session_id == session_id)?;
     load_claude_code_preview_from_path(Path::new(&session.full_path))
 }
 
-fn load_external_codex_sessions(scan_limit: usize) -> Vec<SessionInfo> {
-    let Ok(root) = crate::storage::user_home_path(".codex/sessions") else {
-        return Vec::new();
-    };
-    if !root.exists() {
-        return Vec::new();
-    }
 
-    let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit);
-    parallel_map(paths, |path| load_codex_session_stub(&path).ok().flatten())
-        .into_iter()
-        .flatten()
-        .collect()
-}
 
 /// Newest external-transcript modification time (Unix seconds) for the given
 /// external CLI, scanning the sandbox-aware session roots. Returns `None` when
@@ -1916,85 +1931,6 @@ pub(crate) fn latest_external_cli_session_secs(
         .map(|duration| duration.as_secs())
 }
 
-fn load_codex_session_stub(path: &Path) -> Result<Option<SessionInfo>> {
-    let file = File::open(path)?;
-    let mut lines = BufReader::new(file).lines();
-    let Some(first_line) = lines.next() else {
-        return Ok(None);
-    };
-    let header: serde_json::Value = serde_json::from_str(&first_line?)?;
-    let meta = if header.get("type").and_then(|v| v.as_str()) == Some("session_meta") {
-        header.get("payload").unwrap_or(&header)
-    } else {
-        &header
-    };
-    let session_id = meta
-        .get("id")
-        .and_then(|v| v.as_str())
-        .unwrap_or_default()
-        .to_string();
-    if session_id.is_empty() {
-        return Ok(None);
-    }
-
-    let created_at = parse_timestamp_value(meta.get("timestamp"))
-        .or_else(|| parse_timestamp_value(header.get("timestamp")))
-        .unwrap_or_else(chrono::Utc::now);
-    let last_message_time = std::fs::metadata(path)
-        .and_then(|meta| meta.modified())
-        .map(chrono::DateTime::<chrono::Utc>::from)
-        .unwrap_or(created_at);
-    let working_dir = meta
-        .get("cwd")
-        .and_then(|v| v.as_str())
-        .map(|s| s.to_string());
-    let short_name = format!("codex {}", &session_id[..session_id.len().min(8)]);
-    let title = format!("Codex session {}", &session_id[..session_id.len().min(8)]);
-    let search_index = build_search_index(
-        &format!("codex:{session_id}"),
-        &short_name,
-        &title,
-        working_dir.as_deref(),
-        None,
-        &[],
-    );
-
-    Ok(Some(SessionInfo {
-        id: format!("codex:{session_id}"),
-        parent_id: None,
-        short_name,
-        icon: "🧠".to_string(),
-        title,
-        message_count: 0,
-        user_message_count: 0,
-        assistant_message_count: 0,
-        created_at,
-        last_message_time,
-        last_active_at: Some(last_message_time),
-        working_dir,
-        model: None,
-        provider_key: Some("openai-codex".to_string()),
-        is_canary: false,
-        is_debug: false,
-        saved: false,
-        save_label: None,
-        status: SessionStatus::Closed,
-        needs_catchup: false,
-        estimated_tokens: 0,
-        first_user_prompt: None,
-        messages_preview: Vec::new(),
-        search_index,
-        server_name: None,
-        server_icon: None,
-        source: SessionSource::Codex,
-        resume_target: ResumeTarget::CodexSession {
-            session_id,
-            session_path: path.to_string_lossy().to_string(),
-        },
-        external_path: Some(path.to_string_lossy().to_string()),
-    }))
-}
-
 fn find_codex_session_file(session_id: &str) -> Option<PathBuf> {
     let root = crate::storage::user_home_path(".codex/sessions").ok()?;
     if !root.exists() {
@@ -2025,25 +1961,17 @@ fn find_codex_session_file(session_id: &str) -> Option<PathBuf> {
 }
 
 pub(super) fn load_codex_preview_from_path(path: &Path) -> Option<Vec<PreviewMessage>> {
-    // Only parse the tail of the transcript: the preview shows the last ~20
-    // messages, and these rollout files can be tens of MB, so reading the whole
-    // file on every selection change made picker navigation lag.
-    let (text, _truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?;
+    let file = File::open(path).ok()?;
+    let reader = BufReader::new(file);
     let mut preview = Vec::new();
 
-    // When we read from the start we skip the first line (the `session_meta`
-    // record). When we read a tail slice the first line is almost certainly a
-    // partial record, so we drop it either way.
-    for line in text.lines().skip(1) {
+    for line in reader.lines().skip(1) {
+        let line = line.ok()?;
         let trimmed = line.trim();
         if trimmed.is_empty() {
             continue;
         }
-        // A tail slice can yield malformed JSON on its boundary lines; skip
-        // those instead of bailing out of the whole preview.
-        let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) else {
-            continue;
-        };
+        let value: serde_json::Value = serde_json::from_str(trimmed).ok()?;
         let line_type = value
             .get("type")
             .and_then(|v| v.as_str())
@@ -2093,101 +2021,6 @@ pub(super) fn load_pi_preview_from_path(path: &Path) -> Option<Vec<PreviewMessag
         .map(|session| session.messages_preview)
 }
 
-fn load_external_pi_sessions(scan_limit: usize) -> Vec<SessionInfo> {
-    let Ok(root) = crate::storage::user_home_path(".pi/agent/sessions") else {
-        return Vec::new();
-    };
-    if !root.exists() {
-        return Vec::new();
-    }
-
-    let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit);
-    parallel_map(paths, |path| load_pi_session_stub(&path).ok().flatten())
-        .into_iter()
-        .flatten()
-        .collect()
-}
-
-fn load_pi_session_stub(path: &Path) -> Result<Option<SessionInfo>> {
-    let file = File::open(path)?;
-    let mut lines = BufReader::new(file).lines();
-    let Some(first_line) = lines.next() else {
-        return Ok(None);
-    };
-    let header: serde_json::Value = serde_json::from_str(&first_line?)?;
-    if header.get("type").and_then(|v| v.as_str()) != Some("session") {
-        return Ok(None);
-    }
-
-    let session_id = header
-        .get("id")
-        .and_then(|v| v.as_str())
-        .unwrap_or_default()
-        .to_string();
-    if session_id.is_empty() {
-        return Ok(None);
-    }
-
-    let created_at = header
-        .get("timestamp")
-        .and_then(|v| v.as_str())
-        .and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok())
-        .map(|dt| dt.with_timezone(&chrono::Utc))
-        .unwrap_or_else(chrono::Utc::now);
-    let last_message_time = std::fs::metadata(path)
-        .and_then(|meta| meta.modified())
-        .map(chrono::DateTime::<chrono::Utc>::from)
-        .unwrap_or(created_at);
-    let working_dir = header
-        .get("cwd")
-        .and_then(|v| v.as_str())
-        .map(|s| s.to_string());
-    let short_name = format!("pi {}", &session_id[..session_id.len().min(8)]);
-    let title = format!("Pi session {}", &session_id[..session_id.len().min(8)]);
-    let search_index = build_search_index(
-        &format!("pi:{session_id}"),
-        &short_name,
-        &title,
-        working_dir.as_deref(),
-        None,
-        &[],
-    );
-
-    Ok(Some(SessionInfo {
-        id: format!("pi:{session_id}"),
-        parent_id: None,
-        short_name,
-        icon: "π".to_string(),
-        title,
-        message_count: 0,
-        user_message_count: 0,
-        assistant_message_count: 0,
-        created_at,
-        last_message_time,
-        last_active_at: Some(last_message_time),
-        working_dir,
-        model: None,
-        provider_key: Some("pi".to_string()),
-        is_canary: false,
-        is_debug: false,
-        saved: false,
-        save_label: None,
-        status: SessionStatus::Closed,
-        needs_catchup: false,
-        estimated_tokens: 0,
-        first_user_prompt: None,
-        messages_preview: Vec::new(),
-        search_index,
-        server_name: None,
-        server_icon: None,
-        source: SessionSource::Pi,
-        resume_target: ResumeTarget::PiSession {
-            session_path: path.to_string_lossy().to_string(),
-        },
-        external_path: Some(path.to_string_lossy().to_string()),
-    }))
-}
-
 fn load_pi_session_info(path: &Path) -> Result<Option<SessionInfo>> {
     let file = File::open(path)?;
     let reader = BufReader::new(file);
@@ -2348,21 +2181,6 @@ fn load_pi_session_info(path: &Path) -> Result<Option<SessionInfo>> {
     }))
 }
 
-fn load_external_opencode_sessions(scan_limit: usize) -> Vec<SessionInfo> {
-    let Ok(root) = crate::storage::user_home_path(".local/share/opencode/storage/session") else {
-        return Vec::new();
-    };
-    if !root.exists() {
-        return Vec::new();
-    }
-
-    let paths = collect_recent_files_recursive(&root, "json", scan_limit);
-    parallel_map(paths, |path| load_opencode_session_stub(&path).ok().flatten())
-        .into_iter()
-        .flatten()
-        .collect()
-}
-
 pub(super) fn load_opencode_preview_from_path(path: &Path) -> Option<Vec<PreviewMessage>> {
     load_opencode_session_info(path)
         .ok()
@@ -2370,89 +2188,6 @@ pub(super) fn load_opencode_preview_from_path(path: &Path) -> Option<Vec<Preview
         .map(|session| session.messages_preview)
 }
 
-fn load_opencode_session_stub(path: &Path) -> Result<Option<SessionInfo>> {
-    let value: serde_json::Value = serde_json::from_reader(File::open(path)?)?;
-    let session_id = value
-        .get("id")
-        .and_then(|v| v.as_str())
-        .unwrap_or_default()
-        .to_string();
-    if session_id.is_empty() {
-        return Ok(None);
-    }
-
-    let created_at = value
-        .get("time")
-        .and_then(|time| time.get("created"))
-        .and_then(|v| v.as_i64())
-        .and_then(chrono::DateTime::<chrono::Utc>::from_timestamp_millis)
-        .unwrap_or_else(chrono::Utc::now);
-    let last_message_time = value
-        .get("time")
-        .and_then(|time| time.get("updated"))
-        .and_then(|v| v.as_i64())
-        .and_then(chrono::DateTime::<chrono::Utc>::from_timestamp_millis)
-        .unwrap_or(created_at);
-    let working_dir = value
-        .get("directory")
-        .and_then(|v| v.as_str())
-        .map(|s| s.to_string());
-    let short_name = format!("opencode {}", &session_id[..session_id.len().min(8)]);
-    let title = value
-        .get("title")
-        .and_then(|v| v.as_str())
-        .map(|s| truncate_title_text(s, 72))
-        .unwrap_or_else(|| {
-            format!(
-                "OpenCode session {}",
-                &session_id[..session_id.len().min(8)]
-            )
-        });
-    let search_index = build_search_index(
-        &format!("opencode:{session_id}"),
-        &short_name,
-        &title,
-        working_dir.as_deref(),
-        None,
-        &[],
-    );
-
-    Ok(Some(SessionInfo {
-        id: format!("opencode:{session_id}"),
-        parent_id: None,
-        short_name,
-        icon: "◌".to_string(),
-        title,
-        message_count: 0,
-        user_message_count: 0,
-        assistant_message_count: 0,
-        created_at,
-        last_message_time,
-        last_active_at: Some(last_message_time),
-        working_dir,
-        model: None,
-        provider_key: Some("opencode".to_string()),
-        is_canary: false,
-        is_debug: false,
-        saved: false,
-        save_label: None,
-        status: SessionStatus::Closed,
-        needs_catchup: false,
-        estimated_tokens: 0,
-        first_user_prompt: None,
-        messages_preview: Vec::new(),
-        search_index,
-        server_name: None,
-        server_icon: None,
-        source: SessionSource::OpenCode,
-        resume_target: ResumeTarget::OpenCodeSession {
-            session_id,
-            session_path: path.to_string_lossy().to_string(),
-        },
-        external_path: Some(path.to_string_lossy().to_string()),
-    }))
-}
-
 fn load_opencode_session_info(path: &Path) -> Result<Option<SessionInfo>> {
     let value: serde_json::Value = serde_json::from_reader(File::open(path)?)?;
     let session_id = value
@@ -2670,53 +2405,19 @@ pub fn load_sessions_grouped() -> Result<(Vec<ServerGroup>, Vec<SessionInfo>)> {
 /// jcode snapshot, the other CLIs, and listing servers) is wasted there. This
 /// scoped loader keeps onboarding responsive by touching only the relevant
 /// transcripts.
-///
-/// The live onboarding flow now uses [`load_external_cli_sessions_grouped_multi`]
-/// (it shows every logged-in CLI together), so this single-CLI variant is kept
-/// only as a focused test helper.
-#[cfg(test)]
 pub(crate) fn load_external_cli_sessions_grouped(
-    cli: crate::tui::app::onboarding_flow::ExternalCli,
+    _cli: crate::tui::app::onboarding_flow::ExternalCli,
 ) -> (Vec<ServerGroup>, Vec<SessionInfo>) {
-    use crate::tui::app::onboarding_flow::ExternalCli;
     let scan_limit = session_scan_limit();
-    let sessions = match cli {
-        ExternalCli::Codex => load_external_codex_sessions(scan_limit),
-        ExternalCli::ClaudeCode => load_external_claude_code_sessions(scan_limit),
-    };
+    let sessions = load_external_casr_sessions(scan_limit);
     (Vec::new(), sessions)
 }
 
-/// Load sessions for several external CLIs at once (Codex and/or Claude Code),
-/// returned as a single combined orphan list compatible with
-/// `SessionPicker::new_grouped`.
-///
-/// First-run onboarding's "continue where you left off" picker shows every
-/// external CLI the user is logged into, not just one, so it loads all of them
-/// here. Each CLI is still scoped to its own transcripts (no jcode snapshots /
-/// servers), keeping onboarding responsive. The picker sorts the merged result
-/// by recency, so the newest session across all CLIs floats to the top.
 pub(crate) fn load_external_cli_sessions_grouped_multi(
-    clis: &[crate::tui::app::onboarding_flow::ExternalCli],
+    _clis: &[crate::tui::app::onboarding_flow::ExternalCli],
 ) -> (Vec<ServerGroup>, Vec<SessionInfo>) {
-    use crate::tui::app::onboarding_flow::ExternalCli;
     let scan_limit = session_scan_limit();
-    let mut sessions = Vec::new();
-    let mut seen_codex = false;
-    let mut seen_claude = false;
-    for cli in clis {
-        match cli {
-            ExternalCli::Codex if !seen_codex => {
-                seen_codex = true;
-                sessions.extend(load_external_codex_sessions(scan_limit));
-            }
-            ExternalCli::ClaudeCode if !seen_claude => {
-                seen_claude = true;
-                sessions.extend(load_external_claude_code_sessions(scan_limit));
-            }
-            _ => {}
-        }
-    }
+    let sessions = load_external_casr_sessions(scan_limit);
     (Vec::new(), sessions)
 }