From 86ac356b9e0bf7efb9a1d9db03e014a44ada1da4 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:11:31 -0700 Subject: [PATCH 01/57] fix(provider): keep API-key vs OAuth auth mode across the two persisted provider_key vocabularies Claude (and OpenAI) sessions could silently shift from an API key onto the OAuth subscription. Root cause: two divergent provider_key vocabularies persist into sessions, and the session-reconstruction helpers only understood one of them. - The structured model-route picker (RPC) persists RuntimeKey::stable_id() values: claude-oauth / anthropic-api-key / openai-oauth / openai-api-key. - The legacy /model + login path persists: claude / claude-api / openai / openai-api. model_switch_request_for_session_model and session_provider_key_matches_provider_name only matched the legacy vocabulary. A session whose provider_key was 'anthropic-api-key' (without a separately-persisted route_api_method, e.g. a forked/child/ambient/ overnight session) therefore reconstructed a bare model with no auth prefix, leaving the Anthropic provider in Auto mode -- which now prefers OAuth (commit 00e9b9ff) -- silently moving an API-key user onto the subscription. Fix: - Add canonical_session_provider_key() to fold the picker vocabulary back onto the canonical keys, and apply it in the reconstruction/match helpers so either vocabulary recovers the exact OAuth-vs-API-key route. - Carry route_api_method alongside provider_key when copying a parent session to a child (ambient, overnight, fork, selfdev, crash recovery) so children reconstruct the full route even without the canonicalizer. Adds a regression test proving anthropic-api-key/openai-api-key/-oauth provider keys preserve the auth route without route_api_method. --- crates/jcode-app-core/src/ambient/runner.rs | 1 + crates/jcode-app-core/src/overnight.rs | 1 + .../src/server/client_actions.rs | 1 + .../jcode-app-core/src/tool/selfdev/launch.rs | 1 + crates/jcode-base/src/provider/selection.rs | 94 ++++++++++++++++++- crates/jcode-base/src/session/crash.rs | 1 + 6 files changed, 98 insertions(+), 1 deletion(-) diff --git a/crates/jcode-app-core/src/ambient/runner.rs b/crates/jcode-app-core/src/ambient/runner.rs index 733f2da38..4c49ffcb9 100644 --- a/crates/jcode-app-core/src/ambient/runner.rs +++ b/crates/jcode-app-core/src/ambient/runner.rs @@ -419,6 +419,7 @@ impl AmbientRunnerHandle { child.replace_messages(parent.messages.clone()); child.compaction = parent.compaction.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.model = parent.model.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; diff --git a/crates/jcode-app-core/src/overnight.rs b/crates/jcode-app-core/src/overnight.rs index a619cdaaf..e23c9663d 100644 --- a/crates/jcode-app-core/src/overnight.rs +++ b/crates/jcode-app-core/src/overnight.rs @@ -175,6 +175,7 @@ fn create_coordinator_session(parent: &Session, mission: &Option) -> Res child.replace_messages(parent.messages.clone()); child.compaction = parent.compaction.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.reasoning_effort = parent.reasoning_effort.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; diff --git a/crates/jcode-app-core/src/server/client_actions.rs b/crates/jcode-app-core/src/server/client_actions.rs index 77c34abf0..ead8ff6d6 100644 --- a/crates/jcode-app-core/src/server/client_actions.rs +++ b/crates/jcode-app-core/src/server/client_actions.rs @@ -737,6 +737,7 @@ fn create_transfer_child_session( child.working_dir = parent.working_dir.clone(); child.model = parent.model.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; child.autoreview_enabled = parent.autoreview_enabled; diff --git a/crates/jcode-app-core/src/tool/selfdev/launch.rs b/crates/jcode-app-core/src/tool/selfdev/launch.rs index f4ab3c39b..17194ebd1 100644 --- a/crates/jcode-app-core/src/tool/selfdev/launch.rs +++ b/crates/jcode-app-core/src/tool/selfdev/launch.rs @@ -20,6 +20,7 @@ pub fn enter_selfdev_session( child.compaction = parent.compaction.clone(); child.model = parent.model.clone(); child.provider_key = parent.provider_key.clone(); + child.route_api_method = parent.route_api_method.clone(); child.subagent_model = parent.subagent_model.clone(); child.improve_mode = parent.improve_mode; child.autoreview_enabled = parent.autoreview_enabled; diff --git a/crates/jcode-base/src/provider/selection.rs b/crates/jcode-base/src/provider/selection.rs index b0362ada1..40a9ac453 100644 --- a/crates/jcode-base/src/provider/selection.rs +++ b/crates/jcode-base/src/provider/selection.rs @@ -192,6 +192,34 @@ impl MultiProvider { } } + /// Canonicalize a persisted session `provider_key` into the legacy + /// vocabulary the reconstruction helpers below understand. + /// + /// Two vocabularies persist into sessions and must be treated as + /// equivalent, otherwise the OAuth-vs-API-key auth mode is silently lost on + /// restore/model-switch: + /// + /// - Legacy `/model` + login path: `claude` / `claude-api` / `openai` / + /// `openai-api`. + /// - Structured model-route picker (`RouteSelection::stable_id`): + /// `claude-oauth` / `anthropic-api-key` / `openai-oauth` / + /// `openai-api-key`. + /// + /// Both encode the same auth route; we fold the picker forms back onto the + /// canonical keys so a session whose `provider_key` is `anthropic-api-key` + /// (and whose `route_api_method` was not also persisted, e.g. inherited by a + /// child/forked session) still reconstructs the Anthropic API-key route + /// instead of falling through to Auto (which prefers OAuth). + pub(crate) fn canonical_session_provider_key(provider_key: &str) -> &str { + match provider_key.trim() { + "claude-oauth" => "claude", + "anthropic-api-key" => "claude-api", + "openai-oauth" => "openai", + "openai-api-key" => "openai-api", + other => other, + } + } + fn explicit_session_provider_key_for_model_request(model_request: &str) -> Option { let model_request = model_request.trim(); if let Some((prefix, rest)) = model_request.split_once(':') { @@ -287,7 +315,7 @@ impl MultiProvider { } fn session_provider_key_matches_provider_name(provider_key: &str, provider_name: &str) -> bool { - let provider_key = provider_key.trim(); + let provider_key = Self::canonical_session_provider_key(provider_key.trim()); let Some(derived) = Self::session_provider_key_from_provider_name(provider_name) .or_else(|| crate::session::derive_session_provider_key(provider_name)) else { @@ -342,6 +370,11 @@ impl MultiProvider { else { return model.to_string(); }; + // Fold the structured-picker vocabulary (`anthropic-api-key`, + // `openai-oauth`, ...) onto the canonical keys so the OAuth-vs-API-key + // route survives even when only `provider_key` was persisted (e.g. a + // forked/child session that inherited it without `route_api_method`). + let provider_key = Self::canonical_session_provider_key(provider_key); match provider_key { "claude-api" => format!("claude-api:{model}"), @@ -615,6 +648,65 @@ mod tests { ); } + #[test] + fn session_provider_key_picker_vocabulary_preserves_auth_mode_without_route() { + // The structured model-route picker persists `RuntimeKey::stable_id()` + // values (`anthropic-api-key`, `openai-oauth`, ...). When a child/forked + // session inherits only `provider_key` without `route_api_method`, the + // reconstruction helpers must still recover the exact OAuth-vs-API-key + // route instead of dropping to Auto (which prefers OAuth) and silently + // shifting an API-key user onto the subscription. + for (model, provider_key, expected_request) in [ + ( + "claude-opus-4-8", + Some("anthropic-api-key"), + "claude-api:claude-opus-4-8", + ), + ( + "claude-opus-4-8", + Some("claude-oauth"), + "claude-oauth:claude-opus-4-8", + ), + ("gpt-5.5", Some("openai-api-key"), "openai-api:gpt-5.5"), + ("gpt-5.5", Some("openai-oauth"), "openai-oauth:gpt-5.5"), + ] { + assert_eq!( + MultiProvider::model_switch_request_for_session_model(model, provider_key), + expected_request, + "restore {model:?} with picker provider_key {provider_key:?}" + ); + } + + // The same picker vocabulary must be recognized as matching its provider + // so an auth-change rewrite keeps the persisted key instead of + // overwriting it with the canonical name (losing the auth mode). + for (model, provider_name, previous_key, expected_key) in [ + ( + "claude-opus-4-8", + "Anthropic", + Some("anthropic-api-key"), + Some("anthropic-api-key"), + ), + ( + "gpt-5.5", + "OpenAI", + Some("openai-api-key"), + Some("openai-api-key"), + ), + ] { + assert_eq!( + MultiProvider::session_provider_key_after_model_switch( + model, + provider_name, + previous_key, + ) + .as_deref(), + expected_key, + "{model:?} via {provider_name:?} keeps picker key {previous_key:?}" + ); + } + } + #[test] fn route_defaults_are_derived_consistently() { let copilot = MultiProvider::default_model_selection_from_route( diff --git a/crates/jcode-base/src/session/crash.rs b/crates/jcode-base/src/session/crash.rs index 1896c4da1..d49a7dac9 100644 --- a/crates/jcode-base/src/session/crash.rs +++ b/crates/jcode-base/src/session/crash.rs @@ -128,6 +128,7 @@ fn recover_loaded_crashed_sessions(mut crashed: Vec) -> Result Date: Fri, 5 Jun 2026 01:17:17 -0700 Subject: [PATCH 02/57] telemetry: add served dashboard with accurate 'total users' headline The worker previously only accepted POST /v1/event; there was no visual dashboard (just SQL files run by hand). Add a real one. Headline metric (users.sql + stats.js): total_users = distinct, non-CI telemetry_id that ever installed OR did meaningful work. Validated with sqlite edge-case repros (install-only, turn_end-only with lost session_end, empty open/close, CI). Reported alongside broader tiers (reached) and narrower tiers (core, installed) plus raw CI-inclusive totals so no signal is removed. - src/stats.js: read-only aggregation (counts only, never raw rows) over users, DAU/WAU/MAU rollup, installs, D7 retention, engagement quality, per-turn, errors, feature adoption, transport, version/os/channel/ provider/auth/onboarding breakdowns, 60d timeseries, recent feedback. One shared MEANINGFUL_SQL predicate so every window agrees. - src/worker.js: GET / serves the dashboard, GET /v1/stats serves JSON gated behind DASHBOARD_TOKEN (deny-by-default), POST /v1/event unchanged. CORS widened to GET. - src/dashboard.js: self-contained HTML/CSS/inline-SVG dashboard (no CDN, works under Cloudflare). Tiered layout: hero total-users number, active funnel + chart, 'how the number is built' transparency band, then acquisition/retention, engagement, reliability, breakdowns, features, feedback. Importance shown via hero/key tags/muted diagnostics. - README + package.json: dashboard usage, DASHBOARD_TOKEN setup, npm run users; type:module to silence ESM warning. Validated: node --check on all modules, getStats end-to-end against a seeded sqlite D1 shim (total_users=3 with CI excluded), and rendered in a real browser (token gate + every section + charts). --- telemetry-worker/README.md | 32 +++ telemetry-worker/package.json | 4 +- telemetry-worker/src/dashboard.js | 398 ++++++++++++++++++++++++++++++ telemetry-worker/src/stats.js | 274 ++++++++++++++++++++ telemetry-worker/src/worker.js | 57 ++++- telemetry-worker/users.sql | 61 +++++ 6 files changed, 822 insertions(+), 4 deletions(-) create mode 100644 telemetry-worker/src/dashboard.js create mode 100644 telemetry-worker/src/stats.js create mode 100644 telemetry-worker/users.sql diff --git a/telemetry-worker/README.md b/telemetry-worker/README.md index 9d7b289c8..e58b87b6a 100644 --- a/telemetry-worker/README.md +++ b/telemetry-worker/README.md @@ -2,6 +2,38 @@ Cloudflare Worker that receives anonymous telemetry events from jcode. +## Dashboard + +The worker also serves a visual dashboard so you do not have to run SQL by hand: + +- `GET /` (or `/dashboard`) - the HTML dashboard. Public page, no data until a + token is entered. +- `GET /v1/stats` - JSON aggregates (counts only, never raw event rows), gated + behind `DASHBOARD_TOKEN`. Accepts `Authorization: Bearer `, + `?token=`, or `X-Dashboard-Token`. +- `POST /v1/event` - unchanged event ingest. + +The headline number is **Total users**: distinct, non-CI `telemetry_id`s that +ever installed jcode OR did meaningful work in it. The page shows every metric +the API returns, organized into tiers (hero / key cards / diagnostic tables) so +the important numbers stand out while nothing is hidden. Each user tier (reached +> total > core) is broader than the one below it, and CI / raw figures are shown +alongside for transparency. + +Set the token once (it is a Worker secret, not in source): + +```bash +wrangler secret put DASHBOARD_TOKEN +# then open https:/// and paste the token +``` + +If `DASHBOARD_TOKEN` is unset the stats endpoint stays locked (deny by default). +The CLI equivalent of the headline number: + +```bash +wrangler d1 execute jcode-telemetry --remote --file=users.sql +``` + ## Setup 1. Install wrangler: `npm install` diff --git a/telemetry-worker/package.json b/telemetry-worker/package.json index 72ddec2dc..693f266a1 100644 --- a/telemetry-worker/package.json +++ b/telemetry-worker/package.json @@ -2,6 +2,7 @@ "name": "jcode-telemetry", "version": "1.0.0", "private": true, + "type": "module", "scripts": { "dev": "npx wrangler dev", "deploy": "npx wrangler deploy", @@ -18,7 +19,8 @@ "migrate:daily-active-backfill": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0011_backfill_daily_active_recent.sql", "migrate:daily-active-ci": "npx wrangler d1 execute jcode-telemetry --remote --file=migrations/0012_daily_active_ci_flag.sql", "health": "npx wrangler d1 execute jcode-telemetry --remote --file=health.sql", - "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql" + "dau": "npx wrangler d1 execute jcode-telemetry --remote --file=dau.sql", + "users": "npx wrangler d1 execute jcode-telemetry --remote --file=users.sql" }, "devDependencies": { "wrangler": "^4" diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js new file mode 100644 index 000000000..f40246119 --- /dev/null +++ b/telemetry-worker/src/dashboard.js @@ -0,0 +1,398 @@ +// Self-contained dashboard page (HTML + CSS + JS, no external dependencies so it +// works under Cloudflare with no CDN/CSP issues). Charts are drawn as inline SVG. +// +// The page fetches /v1/stats with the dashboard token (entered once, stored in +// localStorage) and renders tiered metrics: a hero "total users" number, the +// active-user funnel, then secondary KPIs and diagnostic breakdowns. Every +// metric the API returns is shown; importance is conveyed visually (hero / +// primary cards / muted diagnostic tables) and via short "why it matters" notes. + +export const DASHBOARD_HTML = ` + + + + +jcode telemetry + + + +
+ + + + + +
+ + + +`; diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js new file mode 100644 index 000000000..9ee9d0881 --- /dev/null +++ b/telemetry-worker/src/stats.js @@ -0,0 +1,274 @@ +// Read-only telemetry aggregation for the dashboard. +// +// Everything here returns counts/aggregates only, never raw event rows. Metrics +// are organized into tiers (headline / secondary / diagnostic) and tagged with +// importance so the dashboard can present "the one number" prominently while +// still surfacing all available information. +// +// Accuracy rules (mirrors README "Accuracy notes"): +// - Users are distinct telemetry_id, never event counts. +// - "meaningful" = real work; see MEANINGFUL_SQL. +// - Headline numbers exclude CI traffic (is_ci = 1) and non-release channels. +// - Raw / less-filtered tiers are always reported alongside, never removed. + +// Meaningful-activity predicate, shared by every query so all windows agree. +// A row is meaningful if it is a session_end/session_crash that did real work, +// OR a turn_end (which only fires after a completed user turn) that did work. +const MEANINGFUL_SQL = `( + (event IN ('session_end','session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) +)`; + +const LIFECYCLE_EVENTS = "('session_start','turn_end','session_end','session_crash')"; + +async function one(env, sql) { + const result = await env.DB.prepare(sql).all(); + return (result.results && result.results[0]) || {}; +} + +async function many(env, sql) { + const result = await env.DB.prepare(sql).all(); + return result.results || []; +} + +export async function getStats(env) { + // --- Headline: total users (the one number) ----------------------------- + // A user is a distinct non-CI id that ever installed OR did meaningful work. + const totals = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN is_ci = 0 AND (event = 'install' OR ${MEANINGFUL_SQL}) THEN telemetry_id END) AS total_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ${MEANINGFUL_SQL} THEN telemetry_id END) AS core_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users, + COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users, + COUNT(DISTINCT telemetry_id) AS all_ids_including_ci, + COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids + FROM events + `); + + // --- Active users from the rollup (cheap, ingest-time) ------------------- + // DAU/WAU/MAU as distinct ids, headline = meaningful + release + non-CI. + const active = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN activity_date = date('now') THEN telemetry_id END) AS dau_raw, + COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_active > 0 THEN telemetry_id END) AS dau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS dau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') THEN telemetry_id END) AS wau_raw, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_active > 0 THEN telemetry_id END) AS wau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS wau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') THEN telemetry_id END) AS mau_raw, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_active > 0 THEN telemetry_id END) AS mau_meaningful, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS mau, + COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND last_is_ci = 1 THEN telemetry_id END) AS ci_mau + FROM daily_active_users + `); + + // --- Installs and lifecycle totals -------------------------------------- + const lifecycle = await one(env, ` + SELECT + SUM(CASE WHEN event = 'install' THEN 1 ELSE 0 END) AS install_events, + SUM(CASE WHEN event = 'upgrade' THEN 1 ELSE 0 END) AS upgrade_events, + SUM(CASE WHEN event = 'session_start' THEN 1 ELSE 0 END) AS session_starts, + SUM(CASE WHEN event = 'session_end' THEN 1 ELSE 0 END) AS session_ends, + SUM(CASE WHEN event = 'session_crash' THEN 1 ELSE 0 END) AS session_crashes, + SUM(CASE WHEN event = 'turn_end' THEN 1 ELSE 0 END) AS turn_ends, + COUNT(DISTINCT CASE WHEN event = 'install' THEN telemetry_id END) AS install_ids, + COUNT(DISTINCT CASE WHEN event = 'install' AND is_ci = 0 THEN telemetry_id END) AS install_ids_noci + FROM events + WHERE event IN ('install','upgrade','session_start','turn_end','session_end','session_crash') + `); + const lifecycleCompletion = + (lifecycle.session_starts || 0) > 0 + ? Number(((lifecycle.session_ends + lifecycle.session_crashes) / lifecycle.session_starts).toFixed(3)) + : null; + const crashRate = + (lifecycle.session_ends + lifecycle.session_crashes) > 0 + ? Number((lifecycle.session_crashes / (lifecycle.session_ends + lifecycle.session_crashes)).toFixed(4)) + : null; + + // --- New vs returning (last 30d), retention ----------------------------- + const retention = await one(env, ` + WITH cohort AS ( + SELECT DISTINCT telemetry_id FROM events + WHERE event = 'install' AND is_ci = 0 + AND created_at >= datetime('now','-14 days') AND created_at < datetime('now','-7 days') + ), retained AS ( + SELECT DISTINCT telemetry_id FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at >= datetime('now','-7 days') + ) + SELECT + (SELECT COUNT(*) FROM cohort) AS d7_cohort, + (SELECT COUNT(*) FROM cohort WHERE telemetry_id IN retained) AS d7_retained + `); + const d7Retention = + (retention.d7_cohort || 0) > 0 + ? Number((retention.d7_retained / retention.d7_cohort).toFixed(3)) + : null; + + // --- 30d engagement quality --------------------------------------------- + const quality = await one(env, ` + SELECT + AVG(duration_mins) AS avg_session_mins, + AVG(turns) AS avg_turns, + AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate, + AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate, + AVG(first_assistant_response_ms) AS avg_first_response_ms, + AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms, + SUM(input_tokens + output_tokens) AS tokens_30d, + AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate + FROM events + WHERE event IN ('session_end','session_crash') + AND is_ci = 0 AND created_at > datetime('now','-30 days') + `); + + // --- Per-turn metrics (30d) --------------------------------------------- + const turns = await one(env, ` + SELECT + AVG(turn_active_duration_ms) AS avg_turn_ms, + AVG(CASE WHEN turn_success > 0 THEN 1.0 ELSE 0.0 END) AS turn_success_rate + FROM events + WHERE event = 'turn_end' AND is_ci = 0 AND created_at > datetime('now','-30 days') + `); + + // --- Errors (30d) -------------------------------------------------------- + const errors = await one(env, ` + SELECT + SUM(error_provider_timeout) AS provider_timeout, + SUM(error_auth_failed) AS auth_failed, + SUM(error_tool_error) AS tool_error, + SUM(error_mcp_error) AS mcp_error, + SUM(error_rate_limited) AS rate_limited + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Feature adoption (30d, distinct users) ----------------------------- + const features = await one(env, ` + SELECT + COUNT(DISTINCT CASE WHEN feature_memory_used > 0 THEN telemetry_id END) AS memory, + COUNT(DISTINCT CASE WHEN feature_swarm_used > 0 THEN telemetry_id END) AS swarm, + COUNT(DISTINCT CASE WHEN feature_web_used > 0 THEN telemetry_id END) AS web, + COUNT(DISTINCT CASE WHEN feature_email_used > 0 THEN telemetry_id END) AS email, + COUNT(DISTINCT CASE WHEN feature_mcp_used > 0 THEN telemetry_id END) AS mcp, + COUNT(DISTINCT CASE WHEN feature_side_panel_used > 0 THEN telemetry_id END) AS side_panel, + COUNT(DISTINCT CASE WHEN feature_goal_used > 0 THEN telemetry_id END) AS goal, + COUNT(DISTINCT CASE WHEN feature_selfdev_used > 0 THEN telemetry_id END) AS selfdev, + COUNT(DISTINCT CASE WHEN feature_background_used > 0 THEN telemetry_id END) AS background, + COUNT(DISTINCT CASE WHEN feature_subagent_used > 0 THEN telemetry_id END) AS subagent + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Transport mix (30d) ------------------------------------------------- + const transport = await one(env, ` + SELECT + SUM(transport_https) AS https, + SUM(transport_persistent_ws_fresh) AS ws_fresh, + SUM(transport_persistent_ws_reuse) AS ws_reuse, + SUM(transport_cli_subprocess) AS cli, + SUM(transport_native_http2) AS native_http2, + SUM(transport_other) AS other + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + + // --- Breakdowns (distinct users) ---------------------------------------- + const versions = await many(env, ` + SELECT version, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE is_ci = 0 AND version IS NOT NULL + GROUP BY version ORDER BY users DESC LIMIT 12 + `); + const os = await many(env, ` + SELECT os, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE is_ci = 0 AND os IS NOT NULL + GROUP BY os ORDER BY users DESC + `); + const channels = await many(env, ` + SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event IN ('session_end','session_crash') + GROUP BY build_channel ORDER BY users DESC + `); + const providers = await many(env, ` + SELECT COALESCE(provider_end,'unknown') AS provider, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event IN ('session_end','session_crash') AND is_ci = 0 AND ${MEANINGFUL_SQL} + GROUP BY provider_end ORDER BY users DESC LIMIT 12 + `); + const auth = await many(env, ` + SELECT COALESCE(auth_provider,'unknown') AS auth_provider, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event = 'auth_success' AND is_ci = 0 + GROUP BY auth_provider ORDER BY users DESC LIMIT 12 + `); + const onboarding = await many(env, ` + SELECT step, COUNT(DISTINCT telemetry_id) AS users + FROM events WHERE event = 'onboarding_step' AND is_ci = 0 AND step IS NOT NULL + GROUP BY step ORDER BY users DESC + `); + + // --- Daily timeseries (last 60 days) for charts ------------------------- + const daily = await many(env, ` + SELECT + activity_date AS date, + COUNT(DISTINCT telemetry_id) AS raw, + COUNT(DISTINCT CASE WHEN meaningful_active > 0 THEN telemetry_id END) AS meaningful, + COUNT(DISTINCT CASE WHEN meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS headline, + COUNT(DISTINCT CASE WHEN last_is_ci = 1 THEN telemetry_id END) AS ci + FROM daily_active_users + WHERE activity_date > date('now','-60 days') + GROUP BY activity_date ORDER BY activity_date + `); + const dailyInstalls = await many(env, ` + SELECT date(created_at) AS date, COUNT(DISTINCT telemetry_id) AS installs + FROM events + WHERE event = 'install' AND is_ci = 0 AND created_at > datetime('now','-60 days') + GROUP BY date(created_at) ORDER BY date(created_at) + `); + + // --- Recent feedback (text only, no identifiers) ------------------------ + const feedback = await many(env, ` + SELECT created_at, feedback_text, feedback_rating, feedback_reason, version + FROM events + WHERE event = 'feedback' AND feedback_text IS NOT NULL + ORDER BY created_at DESC LIMIT 25 + `); + + return { + generated_at: new Date().toISOString(), + headline: { + total_users: totals.total_users || 0, + dau: active.dau || 0, + wau: active.wau || 0, + mau: active.mau || 0, + }, + users: { + total_users: totals.total_users || 0, + core_users: totals.core_users || 0, + installed_users: totals.installed_users || 0, + reached_users: totals.reached_users || 0, + all_ids_including_ci: totals.all_ids_including_ci || 0, + ci_ids: totals.ci_ids || 0, + }, + active, + lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate }, + retention: { ...retention, d7_retention: d7Retention }, + quality, + turns, + errors, + features, + transport, + breakdowns: { versions, os, channels, providers, auth, onboarding }, + timeseries: { daily, installs: dailyInstalls }, + feedback, + }; +} diff --git a/telemetry-worker/src/worker.js b/telemetry-worker/src/worker.js index ecd45ae7e..b14ae2dd6 100644 --- a/telemetry-worker/src/worker.js +++ b/telemetry-worker/src/worker.js @@ -1,3 +1,6 @@ +import { getStats } from "./stats.js"; +import { DASHBOARD_HTML } from "./dashboard.js"; + let cachedEventColumns = null; let cachedSessionDetailColumns = null; let cachedTurnDetailColumns = null; @@ -10,11 +13,33 @@ export default { }); } + const url = new URL(request.url); + + // Read-only dashboard surface (GET). The HTML page is public; the JSON stats + // endpoint is gated behind DASHBOARD_TOKEN so raw aggregates are not exposed + // to anyone who finds the URL. Raw events are never returned, only counts. + if (request.method === "GET") { + if (url.pathname === "/" || url.pathname === "/dashboard") { + return htmlResponse(DASHBOARD_HTML); + } + if (url.pathname === "/v1/stats") { + if (!isAuthorized(request, env)) { + return jsonResponse({ error: "Unauthorized" }, 401); + } + try { + const stats = await getStats(env); + return jsonResponse(stats); + } catch (err) { + return jsonResponse({ error: "Internal error", detail: String(err?.message || err) }, 500); + } + } + return jsonResponse({ error: "Not found" }, 404); + } + if (request.method !== "POST") { return jsonResponse({ error: "Method not allowed" }, 405); } - const url = new URL(request.url); if (url.pathname !== "/v1/event") { return jsonResponse({ error: "Not found" }, 404); } @@ -54,6 +79,21 @@ export default { }, }; +// When DASHBOARD_TOKEN is unset the stats endpoint stays locked (deny by +// default) rather than leaking aggregates. Accepts either a Bearer header or a +// ?token= query param so it works from curl and the browser fetch alike. +function isAuthorized(request, env) { + const expected = env.DASHBOARD_TOKEN; + if (!expected) { + return false; + } + const url = new URL(request.url); + const header = request.headers.get("authorization") || ""; + const bearer = header.startsWith("Bearer ") ? header.slice(7) : null; + const provided = bearer || url.searchParams.get("token") || request.headers.get("x-dashboard-token"); + return provided != null && provided === expected; +} + async function insertEvent(env, body) { const columns = await getEventColumns(env); const sessionDetailColumns = await getSessionDetailColumns(env); @@ -593,10 +633,21 @@ function jsonResponse(data, status = 200) { }); } +function htmlResponse(html, status = 200) { + return new Response(html, { + status, + headers: { + "Content-Type": "text/html; charset=utf-8", + "Cache-Control": "no-store", + ...corsHeaders(), + }, + }); +} + function corsHeaders() { return { "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "POST, OPTIONS", - "Access-Control-Allow-Headers": "Content-Type", + "Access-Control-Allow-Methods": "GET, POST, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Dashboard-Token", }; } diff --git a/telemetry-worker/users.sql b/telemetry-worker/users.sql new file mode 100644 index 000000000..53a0e8601 --- /dev/null +++ b/telemetry-worker/users.sql @@ -0,0 +1,61 @@ +-- Canonical "total users" definitions for jcode telemetry. +-- Usage: +-- wrangler d1 execute jcode-telemetry --remote --file=users.sql +-- +-- Headline number: total_users. A "user" is a distinct, non-CI telemetry_id that +-- ever either installed jcode or did meaningful work in it. We exclude CI traffic +-- (ephemeral runners mint a fresh id per job) and exclude empty open/close +-- sessions that never did anything. Raw, less-filtered tiers are reported +-- alongside it so no signal is hidden. +-- +-- Caveats (see README "Accuracy notes"): telemetry_id is per-machine, so one +-- person on N machines counts as N; opt-outs and network-blocked clients are +-- never counted; CI rows created before the is_ci column existed default to 0 +-- and may slip in. + +SELECT + -- HEADLINE: real people who installed or meaningfully used jcode. + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ( + event = 'install' + OR (event IN ('session_end', 'session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) + ) THEN telemetry_id END) AS total_users, + + -- Core users: did meaningful work (excludes install-only, never-used ids). + COUNT(DISTINCT CASE WHEN is_ci = 0 AND ( + (event IN ('session_end', 'session_crash') AND ( + turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 + OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 + OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 + OR provider_switches > 0 OR model_switches > 0 + )) + OR (event = 'turn_end' AND ( + assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 + OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 + )) + ) THEN telemetry_id END) AS core_users, + + -- Reach: every distinct non-CI id that ever launched jcode (incl. empty + -- open/close sessions). Upper bound on "people who ran it at least once". + COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users, + + -- Installs only (non-CI), for comparison with total_users. + COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users, + + -- Unfiltered grand total (includes CI + dev). Never use as the headline; + -- kept for transparency and for sizing CI noise. + COUNT(DISTINCT telemetry_id) AS all_ids_including_ci, + + -- CI-only ids, so the gap between all_ids and total_users is explainable. + COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids +FROM events; From 14c73b622b0b2d1b94776d682b027a4a30625c99 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:26:58 -0700 Subject: [PATCH 03/57] test(provider-doctor): cover multi-call thought_signature replay The native tool smoke only ever drove a single tool-call round-trip, so it always replayed exactly one thought_signature and passed even when an earlier function call would drop its signature. The Antigravity/Cloud Code backend validates *every* functionCall in the replayed history, so the field 400 ("Function call is missing a thought_signature ... position N") only reproduces with a multi-call transcript. - Extend run_live_native_provider_tool_smoke into two phases: the historical single round-trip (gating) plus a best-effort multi-call replay that rebuilds a history of two assistant tool_use blocks, each carrying its own signature. - Delegate run_live_antigravity_native_tool_smoke to the shared probe so Antigravity (the runtime that hit this) gets the multi-call coverage too. - Add an always-on unit guard (build_contents_replays_every_signature_across_ multi_tool_history) so the serialization regression is caught for free, without spending live tokens. --- .../src/auth/live_provider_probes.rs | 344 +++++++++--------- .../jcode-base/src/provider/gemini_tests.rs | 49 +++ 2 files changed, 221 insertions(+), 172 deletions(-) diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs index 1da06404b..2c9b4bfb4 100644 --- a/crates/jcode-base/src/auth/live_provider_probes.rs +++ b/crates/jcode-base/src/auth/live_provider_probes.rs @@ -1145,143 +1145,21 @@ pub async fn run_live_antigravity_native_stream_smoke( /// Stage: tool-call parse + execution loop + result follow-up. /// -/// Full two-turn round-trip: ask the model to call a tool (assert a parseable -/// tool_use), then feed a synthetic tool_result back (assert the model consumes -/// it). Gemini-3 attaches a `thought_signature` to its function call that the -/// Cloud Code backend requires replayed on the follow-up turn, so we carry it -/// onto the assistant tool_use block. Evidence for the `tool_call_parse`, -/// `tool_execution_loop`, `tool_result_followup`, and `real_jcode_tool_smoke` -/// checkpoints. +/// Delegates to the shared native tool smoke ([`run_live_native_provider_tool_smoke`]) +/// so Antigravity exercises the same two phases as every other native runtime: +/// a single round-trip plus a **multi-call signature replay** that rebuilds a +/// history of two assistant `tool_use` blocks. Gemini-3 attaches a +/// `thought_signature` to each function call that the Cloud Code backend +/// requires replayed on later turns; the multi-call phase is what actually +/// reproduces the `400 ... "Function call is missing a thought_signature ... +/// position N"` field failure (a single round-trip cannot). Evidence for the +/// `tool_call_parse`, `tool_execution_loop`, `tool_result_followup`, and +/// `real_jcode_tool_smoke` checkpoints. pub async fn run_live_antigravity_native_tool_smoke( model: &str, ) -> anyhow::Result { - let started = std::time::Instant::now(); let provider = build_native_antigravity_provider(model)?; - - let tool_name = "read"; - let tools = vec![ToolDefinition { - name: tool_name.to_string(), - description: "Reads a file from the local filesystem.".to_string(), - input_schema: serde_json::json!({ - "type": "object", - "properties": {"file_path": {"type": "string"}}, - "required": ["file_path"], - "additionalProperties": false - }), - }]; - let system = "You are a live provider tool smoke test. When asked to read a file, you MUST \ - call the read tool with the given path. Do not answer in text first."; - - let first_turn = vec![Message { - role: Role::User, - content: vec![ContentBlock::Text { - text: "Read the file at /tmp/auth_tool_probe.txt using the read tool. \ - Call the tool now; do not answer in text." - .to_string(), - cache_control: None, - }], - timestamp: None, - tool_duration_ms: None, - }]; - - let first = consume_native_stream( - &provider, - &first_turn, - &tools, - system, - std::time::Duration::from_secs(120), - ) - .await?; - - ensure!( - !first.tool_calls.is_empty(), - "native Antigravity tool smoke produced no tool call (stop_reason={:?}, text={:?})", - first.stop_reason, - crate::util::truncate_str(first.text.trim(), 200) - ); - let tool_call = first.tool_calls[0].clone(); - ensure!( - tool_call.name == tool_name, - "native Antigravity tool smoke called unexpected tool {:?} (expected {tool_name})", - tool_call.name - ); - let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object( - if tool_call.input_json.trim().is_empty() { - "{}" - } else { - tool_call.input_json.trim() - }, - ); - ensure!( - parsed_arguments.is_object(), - "native Antigravity tool smoke produced non-object tool arguments: {:?}", - tool_call.input_json - ); - - // Second turn: replay the assistant's tool_use (carrying the Gemini-3 - // thought signature, required by the Cloud Code backend) and answer it with - // a synthetic tool_result, then assert the model consumes the result. - let mut followup = first_turn.clone(); - followup.push(Message { - role: Role::Assistant, - content: vec![ContentBlock::ToolUse { - id: tool_call.id.clone(), - name: tool_call.name.clone(), - input: parsed_arguments.clone(), - thought_signature: tool_call.thought_signature.clone(), - }], - timestamp: None, - tool_duration_ms: None, - }); - followup.push(Message { - role: Role::User, - content: vec![ContentBlock::ToolResult { - tool_use_id: tool_call.id.clone(), - content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it." - .to_string(), - is_error: Some(false), - }], - timestamp: None, - tool_duration_ms: None, - }); - - let second = consume_native_stream( - &provider, - &followup, - &tools, - system, - std::time::Duration::from_secs(120), - ) - .await?; - - ensure!( - second.saw_message_end, - "native Antigravity tool follow-up ended without a message_end event" - ); - ensure!( - second.text.contains("42"), - "native Antigravity tool follow-up did not reflect the tool result token: {:?}", - crate::util::truncate_str(second.text.trim(), 200) - ); - - let total_input = first.input_tokens + second.input_tokens; - let total_output = first.output_tokens + second.output_tokens; - let mut stage = crate::live_tests::LiveVerificationStage::passed( - crate::live_tests::checkpoints::TOOL_CALL_PARSE, - ) - .with_duration_ms(started.elapsed().as_millis() as u64) - .with_evidence("model", serde_json::json!(model)) - .with_evidence("tool_name", serde_json::json!(tool_call.name)) - .with_evidence("tool_arguments", parsed_arguments) - .with_evidence( - "thought_signature_present", - serde_json::json!(tool_call.thought_signature.is_some()), - ) - .with_evidence("followup_consumed_result", serde_json::json!(true)); - if total_input != 0 || total_output != 0 { - stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0)); - } - Ok(stage) + run_live_native_provider_tool_smoke(&provider, model, "Antigravity").await } // === Generic native-runtime probes ======================================== @@ -1442,11 +1320,24 @@ pub async fn run_live_native_provider_stream_smoke( /// Stage: tool-call parse + execution loop + result follow-up against an /// arbitrary native provider. /// -/// Full two-turn round-trip: ask the model to call a tool (assert a parseable -/// tool_use), then feed a synthetic tool_result back (assert the model consumes -/// it). Any provider-emitted `thought_signature` (e.g. Gemini-3 via the Cloud -/// Code backend) is carried onto the replayed assistant tool_use block, since -/// some backends reject a follow-up turn that omits it. +/// Two phases: +/// +/// 1. **Single round-trip (gating):** ask the model to call a tool (assert a +/// parseable tool_use), then feed a synthetic tool_result back (assert the +/// model consumes it). This mirrors the historical assertion so providers +/// that already passed keep passing. +/// 2. **Multi-call signature replay (best-effort):** chain a *second* tool call +/// and replay a history that now contains **two** assistant `tool_use` +/// blocks, each carrying its own provider-emitted `thought_signature`. The +/// Antigravity/Cloud Code backend validates every `functionCall` in the +/// replayed history (not just the latest), so a transcript that drops an +/// earlier signature is rejected with `400 ... "Function call is missing a +/// thought_signature ... position N"`. A single round-trip can never +/// reproduce that, so we exercise the multi-call shape here. If the model +/// declines the second tool call (common for providers that do not emit +/// signatures at all), the phase records `multi_tool_replay: "skipped"` +/// rather than failing, so it never turns a previously-green provider red +/// for a non-signature reason. pub async fn run_live_native_provider_tool_smoke( provider: &dyn Provider, model: &str, @@ -1501,49 +1392,27 @@ pub async fn run_live_native_provider_tool_smoke( "native {label} tool smoke called unexpected tool {:?} (expected {tool_name})", tool_call.name ); - let parsed_arguments = crate::message::ToolCall::parse_streamed_input_to_object( - if tool_call.input_json.trim().is_empty() { - "{}" - } else { - tool_call.input_json.trim() - }, - ); + let parsed_arguments = parse_tool_arguments(&tool_call.input_json); ensure!( parsed_arguments.is_object(), "native {label} tool smoke produced non-object tool arguments: {:?}", tool_call.input_json ); - // Second turn: replay the assistant's tool_use (carrying any thought + // Phase 1 (gating): replay the assistant's tool_use (carrying any thought // signature the backend requires) and answer it with a synthetic // tool_result, then assert the model consumes the result. - let mut followup = first_turn.clone(); - followup.push(Message { - role: Role::Assistant, - content: vec![ContentBlock::ToolUse { - id: tool_call.id.clone(), - name: tool_call.name.clone(), - input: parsed_arguments.clone(), - thought_signature: tool_call.thought_signature.clone(), - }], - timestamp: None, - tool_duration_ms: None, - }); - followup.push(Message { - role: Role::User, - content: vec![ContentBlock::ToolResult { - tool_use_id: tool_call.id.clone(), - content: "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it." - .to_string(), - is_error: Some(false), - }], - timestamp: None, - tool_duration_ms: None, - }); + let mut history = first_turn.clone(); + history.push(assistant_tool_use(&tool_call, &parsed_arguments)); + history.push(tool_result_then_text( + &tool_call.id, + "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it.", + None, + )); let second = consume_native_stream( provider, - &followup, + &history, &tools, system, std::time::Duration::from_secs(120), @@ -1560,8 +1429,84 @@ pub async fn run_live_native_provider_tool_smoke( crate::util::truncate_str(second.text.trim(), 200) ); - let total_input = first.input_tokens + second.input_tokens; - let total_output = first.output_tokens + second.output_tokens; + // Phase 2 (best-effort): drive a second tool call so the replayed history + // carries *two* function calls, then assert the backend accepts the + // multi-call transcript (the only shape that reproduces the + // "missing a thought_signature ... position N" 400). + let mut total_input = first.input_tokens + second.input_tokens; + let mut total_output = first.output_tokens + second.output_tokens; + let mut multi_tool_replay = "skipped"; + let mut signatures_present = vec![tool_call.thought_signature.is_some()]; + + // Ask for a *second* distinct read so the model emits another tool call. + let mut second_request = first_turn.clone(); + second_request.push(assistant_tool_use(&tool_call, &parsed_arguments)); + second_request.push(tool_result_then_text( + &tool_call.id, + "Contents of /tmp/auth_tool_probe.txt: alpha.", + Some( + "Now read the file at /tmp/auth_tool_probe_2.txt using the read tool. \ + Call the tool now; do not answer in text.", + ), + )); + + let third = consume_native_stream( + provider, + &second_request, + &tools, + system, + std::time::Duration::from_secs(120), + ) + .await?; + total_input += third.input_tokens; + total_output += third.output_tokens; + + if let Some(second_call) = third.tool_calls.first().cloned() { + let second_arguments = parse_tool_arguments(&second_call.input_json); + signatures_present.push(second_call.thought_signature.is_some()); + + // Final request: history now contains BOTH tool_use blocks, each + // carrying its own captured signature. A dropped earlier signature is + // rejected here with the position-N 400. + let mut final_request = second_request.clone(); + final_request.push(assistant_tool_use(&second_call, &second_arguments)); + final_request.push(tool_result_then_text( + &second_call.id, + "TOOL_RESULT_TOKEN=77. Report this token back to confirm you read it.", + None, + )); + + let fourth = consume_native_stream( + provider, + &final_request, + &tools, + system, + std::time::Duration::from_secs(120), + ) + .await + .with_context(|| { + format!( + "native {label} multi-tool signature replay was rejected (history carried \ + {} function calls; a backend that validates every functionCall signature \ + fails here when an earlier thought_signature is dropped)", + signatures_present.len() + ) + })?; + total_input += fourth.input_tokens; + total_output += fourth.output_tokens; + + ensure!( + fourth.saw_message_end, + "native {label} multi-tool follow-up ended without a message_end event" + ); + ensure!( + fourth.text.contains("77"), + "native {label} multi-tool follow-up did not reflect the second tool result token: {:?}", + crate::util::truncate_str(fourth.text.trim(), 200) + ); + multi_tool_replay = "verified"; + } + let mut stage = crate::live_tests::LiveVerificationStage::passed( crate::live_tests::checkpoints::TOOL_CALL_PARSE, ) @@ -1573,9 +1518,64 @@ pub async fn run_live_native_provider_tool_smoke( "thought_signature_present", serde_json::json!(tool_call.thought_signature.is_some()), ) + .with_evidence("multi_tool_replay", serde_json::json!(multi_tool_replay)) + .with_evidence( + "tool_call_signatures_present", + serde_json::json!(signatures_present), + ) .with_evidence("followup_consumed_result", serde_json::json!(true)); if total_input != 0 || total_output != 0 { stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0)); } Ok(stage) } + +/// Parse a streamed tool-call argument blob into a JSON object (empty object for +/// a blank payload), shared by the native tool smoke probes. +fn parse_tool_arguments(input_json: &str) -> serde_json::Value { + crate::message::ToolCall::parse_streamed_input_to_object(if input_json.trim().is_empty() { + "{}" + } else { + input_json.trim() + }) +} + +/// Build the assistant `tool_use` replay block for a captured native tool call, +/// preserving any provider-emitted `thought_signature` so backends that require +/// it (Gemini-3 via the Cloud Code/Antigravity runtime) accept the follow-up. +fn assistant_tool_use(call: &NativeClaudeToolCall, arguments: &serde_json::Value) -> Message { + Message { + role: Role::Assistant, + content: vec![ContentBlock::ToolUse { + id: call.id.clone(), + name: call.name.clone(), + input: arguments.clone(), + thought_signature: call.thought_signature.clone(), + }], + timestamp: None, + tool_duration_ms: None, + } +} + +/// Build a user turn carrying a synthetic `tool_result` and, optionally, a +/// follow-up text instruction (used to chain a second tool call in one message +/// so the provider sees a clean user turn rather than two consecutive ones). +fn tool_result_then_text(tool_use_id: &str, result: &str, follow_up: Option<&str>) -> Message { + let mut content = vec![ContentBlock::ToolResult { + tool_use_id: tool_use_id.to_string(), + content: result.to_string(), + is_error: Some(false), + }]; + if let Some(text) = follow_up { + content.push(ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }); + } + Message { + role: Role::User, + content, + timestamp: None, + tool_duration_ms: None, + } +} diff --git a/crates/jcode-base/src/provider/gemini_tests.rs b/crates/jcode-base/src/provider/gemini_tests.rs index 9eeae7a77..4dedb2fa7 100644 --- a/crates/jcode-base/src/provider/gemini_tests.rs +++ b/crates/jcode-base/src/provider/gemini_tests.rs @@ -231,6 +231,55 @@ fn build_contents_replays_thought_signature_on_function_call() { ); } +#[test] +fn build_contents_replays_every_signature_across_multi_tool_history() { + // Regression guard for the Antigravity/Cloud Code 400 + // ("Function call is missing a thought_signature ... position 5"): the + // backend validates *every* functionCall in the replayed history, not just + // the latest one. A multi-turn transcript where an earlier tool_use drops + // its signature is exactly what triggers the field failure, so assert that + // each captured signature survives serialization onto its matching part. + let signatures = ["SIG_A", "SIG_B", "SIG_C"]; + let mut messages = Vec::new(); + for (idx, sig) in signatures.iter().enumerate() { + messages.push(Message { + role: Role::Assistant, + content: vec![ContentBlock::ToolUse { + id: format!("call_{idx}"), + name: "bash".to_string(), + input: json!({ "command": format!("echo {idx}") }), + thought_signature: Some(sig.to_string()), + }], + timestamp: None, + tool_duration_ms: None, + }); + messages.push(Message { + role: Role::User, + content: vec![ContentBlock::ToolResult { + tool_use_id: format!("call_{idx}"), + content: format!("out {idx}"), + is_error: Some(false), + }], + timestamp: None, + tool_duration_ms: None, + }); + } + + let contents = build_contents(&messages); + let replayed: Vec> = contents + .iter() + .flat_map(|content| content.parts.iter()) + .filter(|part| part.function_call.is_some()) + .map(|part| part.thought_signature.as_deref()) + .collect(); + assert_eq!( + replayed, + vec![Some("SIG_A"), Some("SIG_B"), Some("SIG_C")], + "every functionCall in the history must carry its captured thought_signature, \ + not just the most recent one" + ); +} + #[test] fn build_contents_preserves_tool_calls_and_results() { let messages = vec![ From d9823f6a133c263675dad1f8f5eaece898b9570b Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:32:02 -0700 Subject: [PATCH 04/57] telemetry dashboard: restore all legacy metrics + redesign with frontend-design skill Two things the prior dashboard commit missed. 1) Restore every metric the old SQL surface (README queries, health.sql, dau.sql) exposed that had been dropped: - os/arch platform breakdown (was os-only) - session starts by UTC hour (usage-timing histogram) - pipeline-health diagnostics: lifecycle_ids, session_start_ids, lifecycle_ids_without_install, heaviest/top5/total session events - meaningful_sessions_30d count stats.js gains hours, arch, health, skew, meaningfulSessions queries; all validated end-to-end against a seeded sqlite D1 shim. 2) Redesign dashboard.js using the installed anthropics/frontend-design skill. The previous version used system fonts and the exact purple-gradient-on-dark the skill warns against. New 'Terminal Observatory' aesthetic, true to jcode being a CLI agent: JetBrains Mono instrument typography (Sora for prose), warm phosphor-amber signal color with a single cyan accent, scanline texture, station- clock hero number, numbered hairline section dividers, KEY/alert accent rails, an amber UTC-hour bar histogram, and a filled cyan active-users area chart. Tiered HEADLINE/SIGNAL/DIAGNOSTIC layout so the total-users number dominates while every figure stays visible. Verified in a real browser: token gate, hero, all 8 sections, both chart types render correctly. node --check passes on all modules. --- telemetry-worker/src/dashboard.js | 704 ++++++++++++++++-------------- telemetry-worker/src/stats.js | 52 ++- 2 files changed, 416 insertions(+), 340 deletions(-) diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js index f40246119..62175243e 100644 --- a/telemetry-worker/src/dashboard.js +++ b/telemetry-worker/src/dashboard.js @@ -1,397 +1,425 @@ -// Self-contained dashboard page (HTML + CSS + JS, no external dependencies so it -// works under Cloudflare with no CDN/CSP issues). Charts are drawn as inline SVG. +// jcode telemetry console — "Terminal Observatory" aesthetic. // -// The page fetches /v1/stats with the dashboard token (entered once, stored in -// localStorage) and renders tiered metrics: a hero "total users" number, the -// active-user funnel, then secondary KPIs and diagnostic breakdowns. Every -// metric the API returns is shown; importance is conveyed visually (hero / -// primary cards / muted diagnostic tables) and via short "why it matters" notes. +// Design intent (frontend-design skill): jcode is a terminal coding agent, so +// the dashboard is built as a precision instrument readout, not generic SaaS. +// - Type: JetBrains Mono (display + data) paired with a quiet grotesk for prose. +// - Palette: near-black graphite, warm phosphor amber as the dominant signal, +// a single cyan accent for the live/headline series. No purple-on-white. +// - Composition: a station-clock hero number, hairline rules, dense tabular +// instrument panels, scanline texture, staggered load-in reveals. +// +// Self-contained (HTML/CSS/inline-SVG, fonts via Google Fonts ). Fetches +// /v1/stats with the dashboard token. Every metric the API returns is shown, +// grouped by importance (HEADLINE / SIGNAL / DIAGNOSTIC). export const DASHBOARD_HTML = ` -jcode telemetry +jcode · telemetry console + + +
- '; H+='
' - + stat("Tokens · 30d", fmt(q.tokens_30d), "input + output across sessions") + + stat("Avg tool latency", ms(q.avg_tool_latency_ms), "per executed tool call") + stat("Crash rate", pct(lc.crash_rate)+" · completion "+(lc.lifecycle_completion_ratio==null?"—":lc.lifecycle_completion_ratio), "crash share · (ends+crashes)/starts", {key:true}) + '
'; - // 04 RELIABILITY - const anyErr = (e.provider_timeout||0)+(e.auth_failed||0)+(e.rate_limited||0) > 0; - H+=sec("04","reliability","error counts · 30d non-CI · watch for spikes"); + // 04 TOKEN USAGE + const tk = d.tokens||{}; + H+=sec("04","token usage","model token volume · non-CI · cache-aware"); + H+='
' + + stat("Total tokens · 30d", fmt(tk.total_30d), "all token types, last 30d", {key:true}) + + stat("Input · 30d", fmt(tk.input_30d), "prompt tokens sent") + + stat("Output · 30d", fmt(tk.output_30d), "completion tokens") + + stat("Cache read · 30d", fmt(tk.cache_read_30d), "served from prompt cache") + + '
'; + H+='
' + + stat("Cache creation · 30d", fmt(tk.cache_creation_30d), "tokens written to cache") + + stat("Total tokens · all-time", fmt(tk.total_all), "since telemetry began") + + stat("Input · all-time", fmt(tk.input_all), "") + + stat("Output · all-time", fmt(tk.output_all), "") + + '
'; + + // 05 AGENT AUTONOMY + const ag = d.agent||{}; + const activeMs = ag.agent_active_ms||0, modelMs = ag.agent_model_ms||0, toolMs = ag.agent_tool_ms||0, idleMs = ag.session_idle_ms||0, blockedMs = ag.agent_blocked_ms||0; + const hrs = (x)=> x==null?"—":(x/3600000>=1?(x/3600000).toFixed(1)+"h":(x/60000).toFixed(0)+"m"); + H+=sec("05","agent autonomy","30-day · spawning, delegation & where agent time goes"); + H+='
' + + stat("Spawned agents", fmt(ag.spawned_agents), "sub-agents launched", {key:true}) + + stat("Subagent tasks", fmt(ag.subagent_tasks), fmt(ag.subagent_success)+" succeeded") + + stat("Swarm tasks", fmt(ag.swarm_tasks), fmt(ag.swarm_success)+" succeeded") + + stat("Background tasks", fmt(ag.background_tasks), fmt(ag.background_completed)+" completed") + + '
'; + H+='
' + + stat("User cancellations", fmt(ag.user_cancelled), "user interrupted the agent") + + stat("Agent active time", hrs(activeMs), "total working time, 30d") + + stat("Time in model", hrs(modelMs), "thinking / generating") + + stat("Time in tools", hrs(toolMs), "executing tool calls") + + '
'; + H+='
' + + stat("Agent blocked time", hrs(blockedMs), "waiting on user / approvals") + + stat("Session idle time", hrs(idleMs), "no activity") + + stat("Time to first action", ms(ag.avg_time_to_first_action_ms), "agent's first move") + + stat("Avg max concurrency", dec(ag.avg_max_concurrent,1), "peak parallel sessions") + + '
'; + + // 06 RELIABILITY + H+=sec("06","reliability","error counts · 30d non-CI · watch for spikes"); H+='
' + stat("Provider timeouts", fmt(e.provider_timeout), "", {alert:(e.provider_timeout||0)>0}) + stat("Rate limited", fmt(e.rate_limited), "") @@ -363,8 +427,8 @@ function render(d){ + stat("Tool / MCP errors", fmt((e.tool_error||0)+(e.mcp_error||0)), fmt(e.tool_error)+" tool · "+fmt(e.mcp_error)+" mcp") + '
'; - // 05 WHO & WHAT - H+=sec("05","who & what","distinct users per bucket"); + // 07 WHO & WHAT + H+=sec("07","who & what","distinct users per bucket"); H+='
' + tablePanel("Versions","adoption by release (non-CI)", rows(b.versions,"version"), "version","users") + tablePanel("Platform","os / arch split", rows(b.arch,"platform"), "platform","users") @@ -384,17 +448,21 @@ function render(d){ + tablePanel("Operating system","os split (non-CI)", rows(b.os,"os"), "os","users") + '
'; - // 06 FEATURE ADOPTION + // 08 FEATURE ADOPTION const fr = Object.entries(d.features||{}).map(([k,v])=>({label:k.replace(/_/g," "),value:v})).sort((a,b)=>b.value-a.value); const tr = [["https",d.transport.https],["ws reuse",d.transport.ws_reuse],["ws fresh",d.transport.ws_fresh],["native http2",d.transport.native_http2],["cli subprocess",d.transport.cli],["other",d.transport.other]].map(([label,value])=>({label,value:value||0})).sort((a,b)=>b.value-a.value); - H+=sec("06","feature adoption","distinct users per capability · 30d"); + H+=sec("08","feature adoption","distinct users per capability · 30d"); H+='
' + tablePanel("Features","users who touched each capability", fr, "feature","users") + tablePanel("Transport mix","request transport counts (30d non-CI)", tr, "transport","count") + '
'; - // 07 DATA HEALTH (diagnostic) - H+=sec("07","pipeline health","diagnostic · not product metrics · watch for drift"); + // 09 USER LEADERBOARD + H+=sec("09","user leaderboard","most active anonymous ids · by lifecycle volume"); + H+=leaderboardPanel(d.leaderboard||[]); + + // 10 PIPELINE HEALTH (diagnostic) + H+=sec("10","pipeline health","diagnostic · not product metrics · watch for drift"); H+='
' + stat("Lifecycle ids", fmt(h.lifecycle_ids), "distinct ids w/ end/crash") + stat("Session-start ids", fmt(h.session_start_ids), "distinct ids that launched") @@ -407,9 +475,9 @@ function render(d){ + stat("CI ids (30d window)", fmt(a.ci_mau), "filtered from headline") + '
'; - // 08 FEEDBACK + // 11 FEEDBACK if((d.feedback||[]).length){ - H+=sec("08","recent feedback","explicit user submissions"); + H+=sec("11","recent feedback","explicit user submissions"); H+='
'+d.feedback.map(fb=>'
'+esc(fb.feedback_text)+'
'+esc(new Date((fb.created_at||"").replace(" ","T")+"Z").toLocaleString())+' · v'+esc(fb.version||"?")+(fb.feedback_rating?' · '+esc(fb.feedback_rating)+'':'')+(fb.feedback_reason?' · '+esc(fb.feedback_reason):'')+'
').join('')+'
'; } diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js index 6f94edb5f..a359dded4 100644 --- a/telemetry-worker/src/stats.js +++ b/telemetry-worker/src/stats.js @@ -122,6 +122,7 @@ export async function getStats(env) { AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate, AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate, AVG(first_assistant_response_ms) AS avg_first_response_ms, + AVG(first_tool_success_ms) AS avg_first_tool_success_ms, AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms, SUM(input_tokens + output_tokens) AS tokens_30d, AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate @@ -130,6 +131,47 @@ export async function getStats(env) { AND is_ci = 0 AND created_at > datetime('now','-30 days') `); + // --- Token usage (all-time + 30d, full breakdown incl. cache) ----------- + const tokens = await one(env, ` + SELECT + SUM(input_tokens) AS input_all, + SUM(output_tokens) AS output_all, + SUM(cache_read_input_tokens) AS cache_read_all, + SUM(cache_creation_input_tokens) AS cache_creation_all, + SUM(total_tokens) AS total_all, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN input_tokens ELSE 0 END) AS input_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN output_tokens ELSE 0 END) AS output_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_read_input_tokens ELSE 0 END) AS cache_read_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_creation_input_tokens ELSE 0 END) AS cache_creation_30d, + SUM(CASE WHEN created_at > datetime('now','-30 days') THEN total_tokens ELSE 0 END) AS total_30d + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + `); + + // --- Agent autonomy (30d): spawning, background/subagent/swarm, time split + const agent = await one(env, ` + SELECT + SUM(spawned_agent_count) AS spawned_agents, + SUM(background_task_count) AS background_tasks, + SUM(background_task_completed_count) AS background_completed, + SUM(subagent_task_count) AS subagent_tasks, + SUM(subagent_success_count) AS subagent_success, + SUM(swarm_task_count) AS swarm_tasks, + SUM(swarm_success_count) AS swarm_success, + SUM(user_cancelled_count) AS user_cancelled, + SUM(agent_active_ms_total) AS agent_active_ms, + SUM(agent_model_ms_total) AS agent_model_ms, + SUM(agent_tool_ms_total) AS agent_tool_ms, + SUM(agent_blocked_ms_total) AS agent_blocked_ms, + SUM(session_idle_ms_total) AS session_idle_ms, + AVG(time_to_first_agent_action_ms) AS avg_time_to_first_action_ms, + AVG(time_to_first_useful_action_ms) AS avg_time_to_first_useful_ms, + AVG(CASE WHEN max_concurrent_sessions > 0 THEN max_concurrent_sessions END) AS avg_max_concurrent + FROM events + WHERE event IN ('session_end','session_crash') AND is_ci = 0 + AND created_at > datetime('now','-30 days') + `); + // --- Per-turn metrics (30d) --------------------------------------------- const turns = await one(env, ` SELECT @@ -263,6 +305,28 @@ export async function getStats(env) { AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL} `); + // --- User leaderboard: most active anonymous ids ------------------------ + // Ranks by lifecycle (session_end + session_crash) volume. telemetry_id is + // anonymous, so we surface a short prefix only. Useful for spotting power + // users and dev/test skew. Includes whether the id is CI and its channel. + const leaderboard = await many(env, ` + SELECT + substr(telemetry_id, 1, 8) AS id_prefix, + COUNT(*) AS sessions, + SUM(turns) AS turns, + SUM(input_tokens + output_tokens) AS tokens, + SUM(tool_calls) AS tool_calls, + MAX(is_ci) AS is_ci, + MAX(build_channel) AS build_channel, + MAX(version) AS version, + MAX(created_at) AS last_seen + FROM events + WHERE event IN ('session_end','session_crash') + GROUP BY telemetry_id + ORDER BY sessions DESC + LIMIT 20 + `); + // --- Daily timeseries (last 60 days) for charts ------------------------- const daily = await many(env, ` SELECT @@ -310,11 +374,14 @@ export async function getStats(env) { lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate }, retention: { ...retention, d7_retention: d7Retention }, quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 }, + tokens, + agent, turns, errors, features, transport, breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours }, + leaderboard, health: { ...health, ...skew }, timeseries: { daily, installs: dailyInstalls }, feedback, From 1feea6509db9e655429202836be8d7b739a77883 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:51:44 -0700 Subject: [PATCH 08/57] fix(gemini): tolerate generateContent candidate content without role/parts Live multi-call provider-doctor against gemini-3.1-pro-high surfaced a real decode abort: the Antigravity/Cloud Code generateContent response occasionally omits `role` (and sometimes `parts`) on a candidate's `content`, but the struct required `role`, so the whole turn failed with "missing field `role`". The response-side role is never read, so default both fields rather than aborting. Adds two decode regression tests. --- crates/jcode-provider-gemini/src/lib.rs | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/crates/jcode-provider-gemini/src/lib.rs b/crates/jcode-provider-gemini/src/lib.rs index 30e7bbca9..a7eb062c0 100644 --- a/crates/jcode-provider-gemini/src/lib.rs +++ b/crates/jcode-provider-gemini/src/lib.rs @@ -153,7 +153,14 @@ pub struct VertexGenerateContentRequest { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GeminiContent { + // Requests always set `role` (see `build_contents`), but `generateContent` + // responses occasionally omit it on a candidate's `content` (observed on + // Antigravity/Cloud Code Gemini-3 turns). The response-side value is never + // read, so default it rather than failing the whole decode with + // "missing field `role`". + #[serde(default)] pub role: String, + #[serde(default)] pub parts: Vec, } @@ -465,4 +472,54 @@ mod tests { ] ); } + + #[test] + fn candidate_content_decodes_without_role() { + // Antigravity/Cloud Code Gemini-3 responses occasionally omit `role` on + // a candidate's `content` (and sometimes `parts` entirely). The whole + // generateContent decode used to fail with "missing field `role`", + // which aborted the turn; assert the response now decodes and the + // function call survives. + let raw = json!({ + "response": { + "candidates": [{ + "content": { + "parts": [{ + "functionCall": {"name": "read", "args": {"file_path": "/tmp/x"}}, + "thoughtSignature": "SIG_XYZ" + }] + }, + "finishReason": "STOP" + }] + } + }) + .to_string(); + + let decoded: CodeAssistGenerateResponse = + serde_json::from_str(&raw).expect("decode response with role-less content"); + let candidates = decoded.response.unwrap().candidates.unwrap(); + let part = &candidates[0].content.as_ref().unwrap().parts[0]; + assert_eq!(part.function_call.as_ref().unwrap().name, "read"); + assert_eq!(part.thought_signature.as_deref(), Some("SIG_XYZ")); + } + + #[test] + fn candidate_content_decodes_without_parts() { + // A bare `content: {}` (no `role`, no `parts`) must not abort the decode. + let raw = json!({ + "response": { + "candidates": [{ "content": {}, "finishReason": "STOP" }] + } + }) + .to_string(); + + let decoded: CodeAssistGenerateResponse = + serde_json::from_str(&raw).expect("decode response with empty content"); + let content = decoded.response.unwrap().candidates.unwrap()[0] + .content + .clone() + .unwrap(); + assert!(content.role.is_empty()); + assert!(content.parts.is_empty()); + } } From 1b1139e862a72cc749fc39a28c1b1de288e56695 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 01:51:44 -0700 Subject: [PATCH 09/57] test(provider-doctor): drive a real multi-call signature-replay loop The first cut of the multi-call phase only nudged a 2nd tool call after the model had already answered, so live runs reported multi_tool_replay=skipped and never actually exercised the multi-functionCall history. Replace it with an agentic loop driven by a two-file read prompt: each emitted tool call is replayed (carrying its captured thought_signature) and answered with a synthetic result, so by the final turn we send two assistant functionCall blocks and assert the backend accepts the transcript. Surface the verified/skipped status in the doctor report detail. Verified live: provider-doctor antigravity -m gemini-3.1-pro-high --tier full now reports 'multi-call signature replay verified'. --- .../src/auth/live_provider_probes.rs | 161 +++++++++--------- crates/jcode-base/src/auth/provider_e2e.rs | 28 ++- 2 files changed, 110 insertions(+), 79 deletions(-) diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs index 2c9b4bfb4..749c43bee 100644 --- a/crates/jcode-base/src/auth/live_provider_probes.rs +++ b/crates/jcode-base/src/auth/live_provider_probes.rs @@ -1407,7 +1407,6 @@ pub async fn run_live_native_provider_tool_smoke( history.push(tool_result_then_text( &tool_call.id, "TOOL_RESULT_TOKEN=42. Report this token back to confirm you read it.", - None, )); let second = consume_native_stream( @@ -1429,56 +1428,62 @@ pub async fn run_live_native_provider_tool_smoke( crate::util::truncate_str(second.text.trim(), 200) ); - // Phase 2 (best-effort): drive a second tool call so the replayed history - // carries *two* function calls, then assert the backend accepts the - // multi-call transcript (the only shape that reproduces the - // "missing a thought_signature ... position N" 400). + // Phase 2 (best-effort): drive an agentic loop that requires reading TWO + // files so the model emits a *sequence* of tool calls. Each call is replayed + // (carrying its captured signature) and answered with a synthetic result, so + // by the final turn the request we send carries two assistant `functionCall` + // blocks. That multi-call history is the only shape that reproduces the + // Antigravity/Cloud Code `400 ... "Function call is missing a + // thought_signature ... position N"`: a backend that validates *every* + // signature rejects the request here if an earlier one was dropped, so the + // `consume_native_stream` below surfaces the regression. If the model never + // makes a second tool call (common for providers that emit no signatures at + // all), the phase records `multi_tool_replay: "skipped"` rather than failing. let mut total_input = first.input_tokens + second.input_tokens; let mut total_output = first.output_tokens + second.output_tokens; let mut multi_tool_replay = "skipped"; - let mut signatures_present = vec![tool_call.thought_signature.is_some()]; + let mut signatures_present: Vec = Vec::new(); - // Ask for a *second* distinct read so the model emits another tool call. - let mut second_request = first_turn.clone(); - second_request.push(assistant_tool_use(&tool_call, &parsed_arguments)); - second_request.push(tool_result_then_text( - &tool_call.id, + let mut convo = vec![Message { + role: Role::User, + content: vec![ContentBlock::Text { + text: "Read two files using the read tool, one tool call at a time: first read \ + /tmp/auth_tool_probe.txt, then read /tmp/auth_tool_probe_2.txt. After both \ + reads, reply with the single word DONE. Call the tool now; do not answer \ + in text first." + .to_string(), + cache_control: None, + }], + timestamp: None, + tool_duration_ms: None, + }]; + let synthetic_results = [ "Contents of /tmp/auth_tool_probe.txt: alpha.", - Some( - "Now read the file at /tmp/auth_tool_probe_2.txt using the read tool. \ - Call the tool now; do not answer in text.", - ), - )); - - let third = consume_native_stream( - provider, - &second_request, - &tools, - system, - std::time::Duration::from_secs(120), - ) - .await?; - total_input += third.input_tokens; - total_output += third.output_tokens; - - if let Some(second_call) = third.tool_calls.first().cloned() { - let second_arguments = parse_tool_arguments(&second_call.input_json); - signatures_present.push(second_call.thought_signature.is_some()); - - // Final request: history now contains BOTH tool_use blocks, each - // carrying its own captured signature. A dropped earlier signature is - // rejected here with the position-N 400. - let mut final_request = second_request.clone(); - final_request.push(assistant_tool_use(&second_call, &second_arguments)); - final_request.push(tool_result_then_text( - &second_call.id, - "TOOL_RESULT_TOKEN=77. Report this token back to confirm you read it.", - None, - )); - - let fourth = consume_native_stream( + "Contents of /tmp/auth_tool_probe_2.txt: bravo.", + ]; + // Cap the loop so a model that keeps calling tools cannot run forever. + const MAX_TOOL_ROUNDS: usize = 4; + let mut tool_round = 0usize; + + loop { + // Number of assistant function calls already in the history we are about + // to replay. Once this reaches two, a successful response proves the + // backend accepted a multi-`functionCall` transcript with every + // signature intact. + let prior_calls = convo + .iter() + .filter(|message| { + matches!(message.role, Role::Assistant) + && message + .content + .iter() + .any(|block| matches!(block, ContentBlock::ToolUse { .. })) + }) + .count(); + + let turn = consume_native_stream( provider, - &final_request, + &convo, &tools, system, std::time::Duration::from_secs(120), @@ -1486,25 +1491,34 @@ pub async fn run_live_native_provider_tool_smoke( .await .with_context(|| { format!( - "native {label} multi-tool signature replay was rejected (history carried \ - {} function calls; a backend that validates every functionCall signature \ - fails here when an earlier thought_signature is dropped)", - signatures_present.len() + "native {label} multi-tool signature replay was rejected (replayed history \ + carried {prior_calls} function call(s); a backend that validates every \ + functionCall signature fails here when an earlier thought_signature is dropped)" ) })?; - total_input += fourth.input_tokens; - total_output += fourth.output_tokens; + total_input += turn.input_tokens; + total_output += turn.output_tokens; + if prior_calls >= 2 { + multi_tool_replay = "verified"; + } - ensure!( - fourth.saw_message_end, - "native {label} multi-tool follow-up ended without a message_end event" - ); - ensure!( - fourth.text.contains("77"), - "native {label} multi-tool follow-up did not reflect the second tool result token: {:?}", - crate::util::truncate_str(fourth.text.trim(), 200) - ); - multi_tool_replay = "verified"; + let Some(call) = turn.tool_calls.first().cloned() else { + // Model produced a final (text) answer; the loop is done. + break; + }; + signatures_present.push(call.thought_signature.is_some()); + let args = parse_tool_arguments(&call.input_json); + convo.push(assistant_tool_use(&call, &args)); + let result = synthetic_results + .get(tool_round) + .copied() + .unwrap_or("Contents: omega."); + convo.push(tool_result_then_text(&call.id, result)); + + tool_round += 1; + if tool_round >= MAX_TOOL_ROUNDS { + break; + } } let mut stage = crate::live_tests::LiveVerificationStage::passed( @@ -1519,6 +1533,7 @@ pub async fn run_live_native_provider_tool_smoke( serde_json::json!(tool_call.thought_signature.is_some()), ) .with_evidence("multi_tool_replay", serde_json::json!(multi_tool_replay)) + .with_evidence("multi_tool_call_count", serde_json::json!(tool_round)) .with_evidence( "tool_call_signatures_present", serde_json::json!(signatures_present), @@ -1557,24 +1572,16 @@ fn assistant_tool_use(call: &NativeClaudeToolCall, arguments: &serde_json::Value } } -/// Build a user turn carrying a synthetic `tool_result` and, optionally, a -/// follow-up text instruction (used to chain a second tool call in one message -/// so the provider sees a clean user turn rather than two consecutive ones). -fn tool_result_then_text(tool_use_id: &str, result: &str, follow_up: Option<&str>) -> Message { - let mut content = vec![ContentBlock::ToolResult { - tool_use_id: tool_use_id.to_string(), - content: result.to_string(), - is_error: Some(false), - }]; - if let Some(text) = follow_up { - content.push(ContentBlock::Text { - text: text.to_string(), - cache_control: None, - }); - } +/// Build a user turn carrying a synthetic `tool_result` for a captured native +/// tool call, used to answer each step of the multi-call replay loop. +fn tool_result_then_text(tool_use_id: &str, result: &str) -> Message { Message { role: Role::User, - content, + content: vec![ContentBlock::ToolResult { + tool_use_id: tool_use_id.to_string(), + content: result.to_string(), + is_error: Some(false), + }], timestamp: None, tool_duration_ms: None, } diff --git a/crates/jcode-base/src/auth/provider_e2e.rs b/crates/jcode-base/src/auth/provider_e2e.rs index 0998d10da..d4356db5c 100644 --- a/crates/jcode-base/src/auth/provider_e2e.rs +++ b/crates/jcode-base/src/auth/provider_e2e.rs @@ -283,6 +283,28 @@ fn label_for(checkpoint: &str) -> &'static str { .unwrap_or("Checkpoint") } +/// Human-readable detail for a passed tool-smoke stage, surfacing whether the +/// multi-call thought-signature replay phase was exercised. The native tool +/// smoke records `multi_tool_replay` as `verified` (a two-`functionCall` +/// history was replayed and accepted, the shape that reproduces the +/// "missing a thought_signature ... position N" 400) or `skipped` (the model +/// declined a second tool call). Surfacing it keeps the coverage observable in +/// the doctor report instead of collapsing to a generic pass string. +fn tool_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String { + match stage + .evidence + .get("multi_tool_replay") + .and_then(|value| value.as_str()) + { + Some("verified") => "tool call parsed and executed; multi-call signature replay verified".to_string(), + Some("skipped") => { + "tool call parsed and executed; multi-call signature replay skipped (no 2nd tool call)" + .to_string() + } + _ => "tool call parsed and executed".to_string(), + } +} + /// Checkpoints that require a real API response and are therefore skipped on the /// offline/catalog tiers. const API_DEPENDENT_CHECKPOINTS: &[&str] = &[ @@ -1139,6 +1161,7 @@ async fn run_native_antigravity_api_checks( match run_live_antigravity_native_tool_smoke(selected).await { Ok(stage) => { spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost")); + let detail = tool_stage_detail(&stage); for checkpoint in [ checkpoints::TOOL_CALL_PARSE, checkpoints::TOOL_EXECUTION_LOOP, @@ -1148,7 +1171,7 @@ async fn run_native_antigravity_api_checks( checks.push(DoctorCheck::passed( checkpoint, label_for(checkpoint), - "tool call parsed and executed".to_string(), + detail.clone(), )); } } @@ -1773,6 +1796,7 @@ async fn run_generic_native_api_checks( match run_live_native_provider_tool_smoke(provider, selected, label).await { Ok(stage) => { spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost")); + let detail = tool_stage_detail(&stage); for checkpoint in [ checkpoints::TOOL_CALL_PARSE, checkpoints::TOOL_EXECUTION_LOOP, @@ -1782,7 +1806,7 @@ async fn run_generic_native_api_checks( checks.push(DoctorCheck::passed( checkpoint, label_for(checkpoint), - "tool call parsed and executed".to_string(), + detail.clone(), )); } } From 75458f6e03585b2632ed3ad9e7e2d1b3eec18fd4 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 02:37:02 -0700 Subject: [PATCH 10/57] perf(resume): parallelize session loading; scope onboarding picker Cold /resume and onboarding/catch-up pickers were dominated by serial per-file IO+JSON parsing over large session histories (87k jcode snapshots + hundreds of Codex/Claude transcripts here). - Add a bounded scoped-thread parallel_map helper in the session picker loader and use it for: candidate mtime stat (readdir then parallel stat), the jcode summary parse pass (two-phase: parallel fill to scan_limit, then parallel saved-gate over the tail), and the external Codex/pi/opencode stub parsers. - Load the catch-up 'seen' state once (CatchupSeenSnapshot) instead of re-reading catchup_seen.json per session. - Onboarding transcript picker now loads only the relevant external CLI (load_external_cli_sessions_grouped) instead of the full load_sessions_grouped on the UI thread. - Catch-up picker now opens from cache and refreshes off-thread via the shared async picker-load path instead of blocking the live session. Measured on real data (idle, 4 runs each): load_sessions ~660ms -> ~434ms (~34%) load_sessions_grouped ~685ms -> ~465ms (~32%) onboarding picker load ~685ms (UI thread) -> ~14ms scoped CLI load --- crates/jcode-app-core/src/catchup.rs | 36 ++ .../src/tui/app/inline_interactive.rs | 85 ++-- .../src/tui/app/onboarding_flow_control.rs | 17 +- crates/jcode-tui/src/tui/session_picker.rs | 1 + .../src/tui/session_picker/loading.rs | 367 +++++++++++++----- .../src/tui/session_picker/loading_tests.rs | 119 ++++++ 6 files changed, 484 insertions(+), 141 deletions(-) diff --git a/crates/jcode-app-core/src/catchup.rs b/crates/jcode-app-core/src/catchup.rs index c3c64159d..536bd312c 100644 --- a/crates/jcode-app-core/src/catchup.rs +++ b/crates/jcode-app-core/src/catchup.rs @@ -19,6 +19,42 @@ pub fn needs_catchup(session_id: &str, updated_at: DateTime, status: &Sessi needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status) } +/// Snapshot of the persisted catch-up "seen" state, so callers that need to +/// evaluate many sessions at once (e.g. the session picker building its list) +/// can avoid re-reading and re-parsing `catchup_seen.json` once per session. +#[derive(Clone, Default)] +pub struct CatchupSeenSnapshot { + state: PersistedCatchupState, +} + +impl CatchupSeenSnapshot { + /// Load the persisted seen-state once from disk. + pub fn load() -> Self { + Self { + state: load_seen_state(), + } + } + + /// Same semantics as [`needs_catchup`] but uses this preloaded snapshot + /// instead of re-reading the state file for every call. + pub fn needs_catchup( + &self, + session_id: &str, + updated_at: DateTime, + status: &SessionStatus, + ) -> bool { + if !is_attention_status(status) { + return false; + } + let seen = self + .state + .seen_at_ms_by_session + .get(session_id) + .copied(); + needs_catchup_with_seen(updated_at.timestamp_millis(), seen, status) + } +} + pub(crate) fn needs_catchup_with_seen( updated_at_ms: i64, seen_at_ms: Option, diff --git a/crates/jcode-tui/src/tui/app/inline_interactive.rs b/crates/jcode-tui/src/tui/app/inline_interactive.rs index 20373e7e9..4106e9e1a 100644 --- a/crates/jcode-tui/src/tui/app/inline_interactive.rs +++ b/crates/jcode-tui/src/tui/app/inline_interactive.rs @@ -1636,6 +1636,34 @@ impl App { }); } + /// Rebuild the picker overlay from a freshly loaded session list, applying + /// the filter for the active picker mode. Returns true when the overlay was + /// (re)built so the caller can request a redraw. + fn apply_loaded_session_picker( + &mut self, + server_groups: Vec, + orphan_sessions: Vec, + ) -> bool { + match self.session_picker_mode { + SessionPickerMode::Resume => { + let picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.set_status_notice("Sessions loaded"); + true + } + SessionPickerMode::CatchUp => { + let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + picker.activate_catchup_filter(); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.set_status_notice("Catch Up sessions loaded"); + true + } + // Onboarding loads its scoped transcript list synchronously, so it + // never flows through this async path. + SessionPickerMode::Onboarding { .. } => false, + } + } + pub(super) fn poll_session_picker_load(&mut self) -> bool { let recv_result = { let Some(pending) = self.pending_session_picker_load.as_ref() else { @@ -1644,24 +1672,23 @@ impl App { pending.receiver.try_recv() }; + let picker_active = self.session_picker_overlay.is_some() + && matches!( + self.session_picker_mode, + SessionPickerMode::Resume | SessionPickerMode::CatchUp + ); + match recv_result { Ok(Ok((server_groups, orphan_sessions))) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { - let picker = SessionPicker::new_grouped(server_groups, orphan_sessions); - self.session_picker_overlay = Some(RefCell::new(picker)); - self.set_status_notice("Sessions loaded"); - return true; + if picker_active { + return self.apply_loaded_session_picker(server_groups, orphan_sessions); } false } Ok(Err(e)) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { + if picker_active { self.session_picker_overlay = None; self.push_display_message(DisplayMessage::error(format!( "Failed to load sessions: {}", @@ -1675,9 +1702,7 @@ impl App { Err(std::sync::mpsc::TryRecvError::Empty) => false, Err(std::sync::mpsc::TryRecvError::Disconnected) => { self.pending_session_picker_load = None; - if self.session_picker_overlay.is_some() - && self.session_picker_mode == SessionPickerMode::Resume - { + if picker_active { self.session_picker_overlay = None; self.push_display_message(DisplayMessage::error( "Session loading stopped before returning a result.".to_string(), @@ -1700,20 +1725,26 @@ impl App { return; } - match session_picker::load_sessions_grouped() { - Ok((server_groups, orphan_sessions)) => { - let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); - picker.activate_catchup_filter(); - self.session_picker_overlay = Some(RefCell::new(picker)); - self.session_picker_mode = SessionPickerMode::CatchUp; - } - Err(e) => { - self.push_display_message(DisplayMessage::error(format!( - "Failed to load catch-up sessions: {}", - e - ))); - } - } + // Show the picker overlay immediately (using the cached list when + // available) and load the full session list off-thread. This keeps the + // live TUI responsive instead of blocking on a multi-hundred-ms scan of + // every historical session. + let mut picker = if let Some((server_groups, orphan_sessions)) = + session_picker::load_cached_sessions_grouped() + { + let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); + picker.activate_catchup_filter(); + picker + } else { + SessionPicker::loading() + }; + // Ensure the filter is applied even on the loading placeholder so the + // refreshed list lands in the catch-up view. + picker.activate_catchup_filter(); + self.session_picker_overlay = Some(RefCell::new(picker)); + self.session_picker_mode = SessionPickerMode::CatchUp; + self.set_status_notice("Loading Catch Up sessions..."); + self.start_session_picker_load(); } pub(super) fn handle_session_picker_selection(&mut self, targets: &[ResumeTarget]) { diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs index 4d283b3b1..3f95702d9 100644 --- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs +++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs @@ -603,17 +603,12 @@ impl App { ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode, }; - let (server_groups, orphan_sessions) = match session_picker::load_sessions_grouped() { - Ok(loaded) => loaded, - Err(err) => { - crate::logging::error(&format!( - "onboarding: failed to load {} sessions: {err}", - cli.label() - )); - self.onboarding_fallback_to_session_search(cli); - return; - } - }; + // The onboarding picker only ever shows this one external CLI's + // transcripts, so load just those instead of paying the full + // `load_sessions_grouped` cost (parsing every jcode snapshot, the other + // CLIs, and listing servers). This keeps first-run onboarding snappy. + let (server_groups, orphan_sessions) = + session_picker::load_external_cli_sessions_grouped(cli); let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); picker.activate_external_cli_filter(filter); diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs index ef3c3cc9e..988dfe8b1 100644 --- a/crates/jcode-tui/src/tui/session_picker.rs +++ b/crates/jcode-tui/src/tui/session_picker.rs @@ -34,6 +34,7 @@ mod render; #[cfg(test)] use loading::collect_recent_session_stems; pub(crate) use loading::latest_external_cli_session_secs; +pub(crate) use loading::load_external_cli_sessions_grouped; use loading::{build_messages_preview, build_search_index, crashed_sessions_from_all_sessions}; pub use loading::{ invalidate_session_list_cache, load_cached_sessions_grouped, load_servers, load_sessions, diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs index b1f6a8343..26d5e935a 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading.rs @@ -53,6 +53,77 @@ const SAVED_METADATA_TAIL_SCAN_BYTES: u64 = 64 * 1024; const INITIAL_TRANSCRIPT_SEARCH_BUDGET_BYTES: usize = 64 * 1024; const MESSAGE_SEARCH_EXCERPT_BYTES: usize = 8 * 1024; +/// Upper bound on worker threads used to parse/stat session files in parallel. +/// The session picker load is dominated by per-file IO + JSON parsing across +/// hundreds of snapshots; fanning that work out across cores turns the cold +/// `/resume` load from a serial slog into a roughly core-count-bounded scan. +const SESSION_LOAD_MAX_THREADS: usize = 8; + +/// Number of worker threads to use for a parallel pass over `item_count` items. +/// Returns 1 for tiny batches so we never pay thread-spawn overhead when there +/// is barely any work to do. +fn session_load_thread_count(item_count: usize) -> usize { + if item_count <= 1 { + return 1; + } + let cores = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(1); + cores.clamp(1, SESSION_LOAD_MAX_THREADS).min(item_count) +} + +/// Map `f` over `items` across a bounded scoped thread pool, preserving input +/// order in the returned vector. Falls back to a plain serial map when only one +/// worker is warranted. `f` must be `Sync` because every worker shares it. +fn parallel_map(items: Vec, f: F) -> Vec +where + T: Send, + R: Send, + F: Fn(T) -> R + Sync, +{ + let thread_count = session_load_thread_count(items.len()); + if thread_count <= 1 { + return items.into_iter().map(f).collect(); + } + + // Partition the work into `thread_count` owned chunks so each worker can + // take its inputs by value (no clone, no shared mutation). We remember the + // starting offset of each chunk to stitch results back into input order. + let chunk_size = items.len().div_ceil(thread_count); + let mut chunks: Vec<(usize, Vec)> = Vec::with_capacity(thread_count); + let mut offset = 0usize; + let mut remaining = items; + while !remaining.is_empty() { + let take = chunk_size.min(remaining.len()); + let rest = remaining.split_off(take); + chunks.push((offset, remaining)); + offset += take; + remaining = rest; + } + + let f = &f; + let mut results: Vec<(usize, Vec)> = std::thread::scope(|scope| { + let mut handles = Vec::with_capacity(chunks.len()); + for (start, chunk) in chunks { + handles.push(scope.spawn(move || { + (start, chunk.into_iter().map(f).collect::>()) + })); + } + handles + .into_iter() + .filter_map(|handle| handle.join().ok()) + .collect() + }); + + results.sort_by_key(|(start, _)| *start); + let total: usize = results.iter().map(|(_, chunk)| chunk.len()).sum(); + let mut out = Vec::with_capacity(total); + for (_, chunk) in results { + out.extend(chunk); + } + out +} + #[derive(Clone)] struct SessionListCacheEntry { loaded_at: Instant, @@ -419,9 +490,8 @@ fn session_sort_key(stem: &str) -> u64 { .unwrap_or(0) } -fn entry_modified_sort_key(entry: &std::fs::DirEntry) -> u128 { - entry - .metadata() +fn path_modified_sort_key(path: &Path) -> u128 { + path.metadata() .and_then(|meta| meta.modified()) .ok() .and_then(|time| time.duration_since(std::time::UNIX_EPOCH).ok()) @@ -805,8 +875,10 @@ fn collect_recent_session_candidates( sessions_dir: &Path, candidate_limit: usize, ) -> Result> { - let mut by_stem: HashMap = HashMap::new(); - + // Phase 1: a single cheap `readdir` pass to enumerate candidate files. We + // defer the per-file `stat` (the expensive part on directories with 100k+ + // session files) to a parallel pass so it does not serialize startup. + let mut raw: Vec<(String, bool, PathBuf)> = Vec::new(); for entry in std::fs::read_dir(sessions_dir)? { let entry = entry?; let file_name = entry.file_name(); @@ -819,11 +891,20 @@ fn collect_recent_session_candidates( if stem.starts_with("imported_") { continue; } + raw.push((stem.to_string(), has_snapshot, entry.path())); + } + + // Phase 2: stat each file's modification time in parallel. + let stamped = parallel_map(raw, |(stem, has_snapshot, path)| { + (stem, has_snapshot, path_modified_sort_key(&path)) + }); - let modified = entry_modified_sort_key(&entry); + // Phase 3: merge per-stem metadata (snapshot + newest journal/snapshot mtime). + let mut by_stem: HashMap = HashMap::new(); + for (stem, has_snapshot, modified) in stamped { by_stem - .entry(stem.to_string()) - .or_insert_with(|| SessionCandidateMeta::new(stem)) + .entry(stem.clone()) + .or_insert_with(|| SessionCandidateMeta::new(&stem)) .update(modified, has_snapshot); } @@ -1424,6 +1505,99 @@ pub(super) fn crashed_sessions_from_all_sessions( }) } +/// Parse a single jcode session snapshot (+ journal) into a [`SessionInfo`], +/// returning `None` for empty/imported sessions or read/parse errors. Pulled out +/// of `load_sessions` so the summary pass can run across a scoped thread pool. +fn parse_jcode_session_info( + sessions_dir: &Path, + stem: &str, + catchup_seen: &crate::catchup::CatchupSeenSnapshot, +) -> Option { + // Imported stems are filtered out by `collect_recent_session_candidates`, but + // keep the cheap defensive check so this helper is safe to call directly. + if stem.starts_with("imported_cc_") + || stem.starts_with("imported_codex_") + || stem.starts_with("imported_pi_") + || stem.starts_with("imported_opencode_") + { + return None; + } + + let path = sessions_dir.join(format!("{stem}.json")); + let session = load_session_summary(&path).ok()?; + + let visible_message_count = session.messages.visible_message_count; + if visible_message_count == 0 { + return None; + } + + let short_name = session + .short_name + .clone() + .or_else(|| extract_session_name(stem).map(|s| s.to_string())) + .unwrap_or_else(|| stem.to_string()); + let icon = session_icon(&short_name); + + let user_message_count = session.messages.user_message_count; + let assistant_message_count = session.messages.assistant_message_count; + let estimated_tokens = session.messages.estimated_tokens; + + let status = session.status.clone(); + let needs_catchup = catchup_seen.needs_catchup(stem, session.updated_at, &status); + let source = classify_session_source( + stem, + session.provider_key.as_deref(), + session.model.as_deref(), + ); + + let title = session + .custom_title + .or(session.title) + .unwrap_or_else(|| short_name.clone()); + let search_index = build_search_index_from_summary( + stem, + &short_name, + &title, + session.working_dir.as_deref(), + session.save_label.as_deref(), + &session.messages.search_text, + ); + + Some(SessionInfo { + id: stem.to_string(), + parent_id: session.parent_id, + short_name, + icon: icon.to_string(), + title, + message_count: visible_message_count, + user_message_count, + assistant_message_count, + created_at: session.created_at, + last_message_time: session.updated_at, + last_active_at: session.last_active_at, + working_dir: session.working_dir, + model: session.model, + provider_key: session.provider_key, + is_canary: session.is_canary, + is_debug: session.is_debug, + saved: session.saved, + save_label: session.save_label, + status, + needs_catchup, + estimated_tokens, + first_user_prompt: session.messages.first_user_prompt, + messages_preview: Vec::new(), + search_index, + server_name: None, + server_icon: None, + source, + resume_target: ResumeTarget::JcodeSession { + session_id: stem.to_string(), + }, + external_path: None, + }) +} + pub fn load_sessions() -> Result> { let sessions_dir = storage::jcode_dir()?.join("sessions"); let scan_limit = session_scan_limit(); @@ -1437,8 +1611,6 @@ pub fn load_sessions() -> Result> { return Ok(entry.sessions.clone()); } - let mut sessions: Vec = Vec::new(); - let candidates = if sessions_dir.exists() { // Keep startup responsive by avoiding `session_has_history` here. That helper parses // snapshots/journals, and `load_session_summary` below parses the same files again. @@ -1459,100 +1631,65 @@ pub fn load_sessions() -> Result> { Vec::new() }; - let external_sessions = std::thread::scope(|scope| { + // Loading the catch-up "seen" state once (instead of per session) avoids + // re-reading and re-parsing `catchup_seen.json` for every candidate. + let catchup_seen = crate::catchup::CatchupSeenSnapshot::load(); + let sessions_dir_ref = &sessions_dir; + let catchup_ref = &catchup_seen; + + let (mut sessions, external_sessions) = std::thread::scope(|scope| { let claude_handle = scope.spawn(|| load_external_claude_code_sessions(scan_limit)); let codex_handle = scope.spawn(|| load_external_codex_sessions(scan_limit)); let pi_handle = scope.spawn(|| load_external_pi_sessions(scan_limit)); let opencode_handle = scope.spawn(|| load_external_opencode_sessions(scan_limit)); - for stem in candidates { - if sessions.len() >= scan_limit { - let saved = sessions_dir.join(format!("{stem}.json")); - if !session_snapshot_or_journal_has_saved_metadata(&saved) { - continue; + // Phase 1: walk the recency-ordered candidates in parallel windows until + // we have collected `scan_limit` non-empty sessions. `boundary` marks the + // candidate index where the serial fill would start applying the saved + // gate, so beyond it we only keep saved sessions (Phase 2). Parsing each + // window in parallel keeps the per-file JSON cost off the critical path. + // + // Windows are sized to `scan_limit`: only the final window (the one that + // crosses `scan_limit`) can over-parse, so wasted work is bounded to a + // single window's worth of candidates while still parallelizing widely. + let mut sessions: Vec = Vec::new(); + let mut boundary = candidates.len(); + let window = scan_limit.max(1); + let mut start = 0; + 'fill: while start < candidates.len() { + let end = (start + window).min(candidates.len()); + let batch = candidates[start..end].to_vec(); + let parsed = parallel_map(batch, move |stem| { + parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref) + }); + for (offset, parsed_session) in parsed.into_iter().enumerate() { + if let Some(info) = parsed_session { + sessions.push(info); + if sessions.len() >= scan_limit { + boundary = start + offset + 1; + break 'fill; + } } } - if stem.starts_with("imported_cc_") - || stem.starts_with("imported_codex_") - || stem.starts_with("imported_pi_") - || stem.starts_with("imported_opencode_") - { - continue; - } - let path = sessions_dir.join(format!("{stem}.json")); - if let Ok(session) = load_session_summary(&path) { - let short_name = session - .short_name - .clone() - .or_else(|| extract_session_name(&stem).map(|s| s.to_string())) - .unwrap_or_else(|| stem.clone()); - let icon = session_icon(&short_name); - - let visible_message_count = session.messages.visible_message_count; - if visible_message_count == 0 { - continue; - } - let user_message_count = session.messages.user_message_count; - let assistant_message_count = session.messages.assistant_message_count; - let estimated_tokens = session.messages.estimated_tokens; - - let status = session.status.clone(); - let needs_catchup = - crate::catchup::needs_catchup(&stem, session.updated_at, &status); - let source = classify_session_source( - &stem, - session.provider_key.as_deref(), - session.model.as_deref(), - ); - - let title = session - .custom_title - .or(session.title) - .unwrap_or_else(|| short_name.clone()); - let messages_preview: Vec = Vec::new(); - let search_index = build_search_index_from_summary( - &stem, - &short_name, - &title, - session.working_dir.as_deref(), - session.save_label.as_deref(), - &session.messages.search_text, - ); + start = end; + } - sessions.push(SessionInfo { - id: stem.to_string(), - parent_id: session.parent_id, - short_name, - icon: icon.to_string(), - title, - message_count: visible_message_count, - user_message_count, - assistant_message_count, - created_at: session.created_at, - last_message_time: session.updated_at, - last_active_at: session.last_active_at, - working_dir: session.working_dir, - model: session.model, - provider_key: session.provider_key, - is_canary: session.is_canary, - is_debug: session.is_debug, - saved: session.saved, - save_label: session.save_label, - status, - needs_catchup, - estimated_tokens, - first_user_prompt: session.messages.first_user_prompt, - messages_preview, - search_index, - server_name: None, - server_icon: None, - source, - resume_target: ResumeTarget::JcodeSession { - session_id: stem.to_string(), - }, - external_path: None, - }); - } + // Phase 2: beyond the fill boundary the serial loader only keeps saved + // sessions. Compute the cheap saved tail-gate across the remaining + // candidates in parallel, then fully parse just the gate-passers. + if boundary < candidates.len() { + let tail: Vec = candidates[boundary..].to_vec(); + let gate_passers: Vec = parallel_map(tail, move |stem| { + let path = sessions_dir_ref.join(format!("{stem}.json")); + session_snapshot_or_journal_has_saved_metadata(&path).then_some(stem) + }) + .into_iter() + .flatten() + .collect(); + let saved_sessions = parallel_map(gate_passers, move |stem| { + parse_jcode_session_info(sessions_dir_ref, &stem, catchup_ref) + }); + sessions.extend(saved_sessions.into_iter().flatten()); } let mut external = Vec::new(); @@ -1560,7 +1697,7 @@ pub fn load_sessions() -> Result> { external.extend(codex_handle.join().unwrap_or_default()); external.extend(pi_handle.join().unwrap_or_default()); external.extend(opencode_handle.join().unwrap_or_default()); - external + (sessions, external) }); sessions.extend(external_sessions); @@ -1706,9 +1843,10 @@ fn load_external_codex_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "jsonl", scan_limit) + let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); + parallel_map(paths, |path| load_codex_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_codex_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -1915,9 +2053,10 @@ fn load_external_pi_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "jsonl", scan_limit) + let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); + parallel_map(paths, |path| load_pi_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_pi_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -2169,9 +2308,10 @@ fn load_external_opencode_sessions(scan_limit: usize) -> Vec { return Vec::new(); } - collect_recent_files_recursive(&root, "json", scan_limit) + let paths = collect_recent_files_recursive(&root, "json", scan_limit); + parallel_map(paths, |path| load_opencode_session_stub(&path).ok().flatten()) .into_iter() - .filter_map(|path| load_opencode_session_stub(&path).ok().flatten()) + .flatten() .collect() } @@ -2473,6 +2613,27 @@ pub fn load_sessions_grouped() -> Result<(Vec, Vec)> { Ok((groups, orphan_sessions)) } +/// Load only the sessions for a single external CLI (Codex or Claude Code), +/// returned as orphan [`SessionInfo`] grouped output compatible with +/// `SessionPicker::new_grouped`. +/// +/// First-run onboarding's "continue where you left off" picker is filtered to a +/// single external CLI, so the full `load_sessions_grouped` work (parsing every +/// jcode snapshot, the other CLIs, and listing servers) is wasted there. This +/// scoped loader keeps onboarding responsive by touching only the relevant +/// transcripts. +pub(crate) fn load_external_cli_sessions_grouped( + cli: crate::tui::app::onboarding_flow::ExternalCli, +) -> (Vec, Vec) { + use crate::tui::app::onboarding_flow::ExternalCli; + let scan_limit = session_scan_limit(); + let sessions = match cli { + ExternalCli::Codex => load_external_codex_sessions(scan_limit), + ExternalCli::ClaudeCode => load_external_claude_code_sessions(scan_limit), + }; + (Vec::new(), sessions) +} + #[cfg(test)] #[path = "loading_tests.rs"] mod tests; diff --git a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs index 12b285927..769a9b888 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs @@ -783,3 +783,122 @@ fn benchmark_resume_loading_reports_timings() { sessions.len() ); } + +#[test] +fn onboarding_scoped_loader_returns_only_codex_sessions() { + use crate::tui::app::onboarding_flow::ExternalCli; + let _env_lock = crate::storage::lock_test_env(); + let temp = tempfile::tempdir().expect("temp dir"); + let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path()); + + // A Codex transcript that the onboarding picker should surface. + let codex_dir = temp.path().join("external/.codex/sessions/2026/05/01"); + std::fs::create_dir_all(&codex_dir).expect("create codex dir"); + std::fs::write( + codex_dir.join("rollout-2026-05-01T10-00-00-test.jsonl"), + "{\"timestamp\":\"2026-05-01T10:00:00Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"codex-onboarding-test\",\"timestamp\":\"2026-05-01T09:59:00Z\",\"cwd\":\"/tmp/codex-onboard\"}}\n", + ) + .expect("write codex transcript"); + + // A jcode session that must NOT appear in the scoped Codex view (the whole + // point of the scoped loader is to skip parsing these on onboarding). + let mut jcode_session = Session::create_with_id( + "session_onboarding_jcode_1780000000000".to_string(), + Some("/tmp/jcode-onboard".to_string()), + Some("Jcode Onboarding".to_string()), + ); + jcode_session.append_stored_message(crate::session::StoredMessage { + id: "msg-1".to_string(), + role: crate::message::Role::User, + content: vec![crate::message::ContentBlock::Text { + text: "should not show in codex onboarding view".to_string(), + cache_control: None, + }], + display_role: None, + timestamp: None, + tool_duration_ms: None, + token_usage: None, + }); + jcode_session.save().expect("save jcode session"); + + let (groups, orphans) = load_external_cli_sessions_grouped(ExternalCli::Codex); + assert!(groups.is_empty(), "scoped loader produces only orphans"); + assert!( + orphans + .iter() + .any(|s| s.id == "codex:codex-onboarding-test"), + "expected codex transcript in scoped onboarding load: {:?}", + orphans.iter().map(|s| &s.id).collect::>() + ); + assert!( + orphans + .iter() + .all(|s| matches!(s.resume_target, ResumeTarget::CodexSession { .. })), + "scoped Codex load must not include jcode/other-CLI sessions" + ); +} + +#[test] +fn parallel_fill_skips_many_recent_empty_sessions_to_reach_scan_limit() { + let _env_lock = crate::storage::lock_test_env(); + let temp = tempfile::tempdir().expect("temp dir"); + let _home = EnvVarGuard::set_path("JCODE_HOME", temp.path()); + let _scan_limit = EnvVarGuard::set_str("JCODE_SESSION_PICKER_MAX_SESSIONS", "50"); + + let sessions_dir = temp.path().join("sessions"); + std::fs::create_dir_all(&sessions_dir).expect("create sessions dir"); + + let push_message = |session: &mut Session, text: &str| { + session.append_stored_message(crate::session::StoredMessage { + id: format!("msg-{text}"), + role: crate::message::Role::User, + content: vec![crate::message::ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + display_role: None, + timestamp: None, + tool_duration_ms: None, + token_usage: None, + }); + }; + + // Many recent but empty sessions (no visible messages) that the parallel + // two-phase fill must skip while still collecting `scan_limit` real ones. + for idx in 0..200 { + let mut session = Session::create_with_id( + format!("session_empty_{}", 1_790_000_000_000u64 + idx as u64), + Some(format!("/tmp/empty-{idx:03}")), + Some(format!("Empty {idx:03}")), + ); + session.save().expect("save empty session"); + } + // Older but non-empty sessions that should fill the list despite being less + // recent than the empty stubs above. + for idx in 0..60 { + let mut session = Session::create_with_id( + format!("session_full_{}", 1_780_000_000_000u64 + idx as u64), + Some(format!("/tmp/full-{idx:03}")), + Some(format!("Full {idx:03}")), + ); + push_message(&mut session, &format!("real content {idx:03}")); + session.save().expect("save full session"); + } + + invalidate_session_list_cache(); + let sessions = load_sessions().expect("load sessions"); + let visible: Vec<&SessionInfo> = sessions + .iter() + .filter(|s| s.id.starts_with("session_full_")) + .collect(); + assert_eq!( + visible.len(), + 50, + "expected exactly scan_limit non-empty sessions, got {}", + visible.len() + ); + assert!( + !sessions.iter().any(|s| s.id.starts_with("session_empty_")), + "empty sessions must be filtered out of the loaded list" + ); +} From 514c34f8f5d8a1ebc32ebdeb283d45fa415ce6b1 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:04:27 -0700 Subject: [PATCH 11/57] chore(release): bump version to 0.22.0 Minor bump covering the 44 commits since v0.21.0, including: - Eager token-by-token reasoning streaming and per-line multi-line thinking rendering in the TUI. - Provider fixes: Gemini schema/thought_signature handling, Kimi reasoning_content, OpenRouter empty-message guard, Anthropic 1M context + split-cache cost accounting, API-key vs OAuth auth mode. - Swarm: route messages by target, broadcast to whole swarm, inherit coordinator model/auth route on spawn. - Self-dev reload correctness (daemon reloads into advertised binary), reload-trace OOM cap, and provider-doctor generic native suites. - Served telemetry dashboard with accurate user/install metrics and /skills + endorsed NVIDIA CUDA-X skills. --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 499204bc7..6244492a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3265,7 +3265,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jcode" -version = "0.21.0" +version = "0.22.0" dependencies = [ "agentgrep", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 35b32d17b..134a7fe74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jcode" -version = "0.21.0" +version = "0.22.0" description = "Possibly the greatest coding agent ever built — blazing-fast TUI, multi-model, swarm coordination, 30+ tools" edition = "2024" autobins = false From 0611ae851ff45fc7068209bf1a256cb1bc3dc0fa Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:19:07 -0700 Subject: [PATCH 12/57] feat(skills): endorse Anthropic frontend-design skill Add Anthropic's official frontend-design skill (the best design-focused agent skill) to the endorsed list under a new 'Anthropic Design' category, sourced from github.com/anthropics/skills with an install hint. --- crates/jcode-base/src/skill.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/crates/jcode-base/src/skill.rs b/crates/jcode-base/src/skill.rs index 3dcd38208..164ff518f 100644 --- a/crates/jcode-base/src/skill.rs +++ b/crates/jcode-base/src/skill.rs @@ -467,6 +467,14 @@ pub const ENDORSED_SKILLS: &[EndorsedSkill] = &[ source: "bundled with jcode / Claude Code skills", install: None, }, + // Anthropic official skills (github.com/anthropics/skills, Apache-2.0). + EndorsedSkill { + name: "frontend-design", + description: "Create distinctive, production-grade frontend interfaces with high design quality (web components, pages, apps). Generates creative, polished code that avoids generic AI aesthetics.", + category: "Anthropic Design", + source: "anthropics/skills (official Anthropic catalog)", + install: Some("npx skills add anthropics/skills --skill frontend-design --yes (or Claude Code: /plugin marketplace add anthropics/skills)"), + }, // NVIDIA CUDA-X / GPU accelerated-computing skills from the official // NVIDIA-verified catalog (github.com/NVIDIA/skills). EndorsedSkill { @@ -817,6 +825,23 @@ mod tests { } } + #[test] + fn endorsed_skills_include_anthropic_frontend_design() { + let skill = endorsed_skills() + .iter() + .find(|s| s.name == "frontend-design") + .expect("expected endorsed Anthropic frontend-design skill"); + assert_eq!(skill.category, "Anthropic Design"); + assert!( + skill.source.contains("anthropics/skills"), + "frontend-design should be sourced from anthropics/skills" + ); + assert!( + skill.install.is_some_and(|cmd| cmd.contains("anthropics/skills")), + "frontend-design should have an anthropics/skills install hint" + ); + } + #[test] fn registry_contains_reports_loaded_skills() { let temp = tempfile::tempdir().expect("tempdir"); From 6dd9bb80fecf8b6474238e8509d4fb8dfc557bb4 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:32:16 -0700 Subject: [PATCH 13/57] fix(onboarding): record auth_success for auto-imported logins The guided first-run onboarding flow auto-imports existing external CLI logins (Claude/Codex/Gemini/Copilot/Cursor/OpenRouter) via run_external_auth_auto_import_candidates, which bypasses the manual pending_login path that record_auth_success was wired into. As a result every auto-imported login -- the happy path of the new onboarding -- was invisible to the activation funnel, making auth_success undercount badly (observed: more users reaching first_assistant_response than auth_success in post-0.17 install cohorts, which is impossible without auth). Surface coarse (provider, method="import") telemetry labels from the import outcome and record auth_success for each imported provider in both the onboarding and manual /login auto-import callers. Domain logic in jcode-app-core stays telemetry-free; the TUI layer emits the event, matching existing call sites. --- crates/jcode-app-core/src/external_auth.rs | 67 +++++++++++++++++++ crates/jcode-tui/src/tui/app/auth.rs | 7 ++ .../src/tui/app/onboarding_flow_control.rs | 7 ++ 3 files changed, 81 insertions(+) diff --git a/crates/jcode-app-core/src/external_auth.rs b/crates/jcode-app-core/src/external_auth.rs index faec2ef4c..80aa225a9 100644 --- a/crates/jcode-app-core/src/external_auth.rs +++ b/crates/jcode-app-core/src/external_auth.rs @@ -101,10 +101,59 @@ impl ExternalAuthReviewCandidate { } } +impl ExternalAuthReviewCandidate { + /// Coarse telemetry `(provider, method)` labels for the providers this + /// candidate activates on a successful import. Used by the onboarding flow + /// to record `auth_success` so auto-imported logins show up in the + /// activation funnel (they previously did not, because auto-import never + /// flows through the manual `pending_login` telemetry path). + /// + /// The method is reported as `"import"` so import-driven activation can be + /// distinguished from manual login in the funnel. + pub fn telemetry_auth_labels(&self) -> Vec<(&'static str, &'static str)> { + const METHOD: &str = "import"; + match &self.action { + ExternalAuthReviewAction::CodexLegacy => vec![("openai", METHOD)], + ExternalAuthReviewAction::ClaudeCode => vec![("claude", METHOD)], + ExternalAuthReviewAction::GeminiCli => vec![("gemini", METHOD)], + ExternalAuthReviewAction::Copilot(_) => vec![("copilot", METHOD)], + ExternalAuthReviewAction::Cursor(_) => vec![("cursor", METHOD)], + ExternalAuthReviewAction::SharedExternal(source) => { + auth::external::source_provider_labels(*source) + .into_iter() + .filter_map(|label| { + telemetry_provider_id_for_label(label).map(|id| (id, METHOD)) + }) + .collect() + } + } + } +} + +/// Map a human-facing provider label (as produced by +/// [`auth::external::source_provider_labels`]) to the canonical telemetry +/// provider id used by the activation funnel. +fn telemetry_provider_id_for_label(label: &str) -> Option<&'static str> { + match label { + "OpenAI/Codex" => Some("openai"), + "Claude" => Some("claude"), + "Gemini" => Some("gemini"), + "Antigravity" => Some("antigravity"), + "GitHub Copilot" => Some("copilot"), + "OpenRouter/API-key providers" => Some("openrouter"), + _ => None, + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct ExternalAuthAutoImportOutcome { pub imported: usize, pub messages: Vec, + /// Coarse `(provider, method)` telemetry labels for each provider that was + /// successfully imported, so callers can record `auth_success` for the + /// activation funnel. May contain more entries than `imported` when a + /// single source carries multiple providers. + pub imported_auth_labels: Vec<(&'static str, &'static str)>, } impl ExternalAuthAutoImportOutcome { @@ -535,6 +584,7 @@ pub async fn run_external_auth_auto_import_candidates( let mut outcome = ExternalAuthAutoImportOutcome { imported: 0, messages: Vec::new(), + imported_auth_labels: Vec::new(), }; for &index in selected { @@ -545,6 +595,9 @@ pub async fn run_external_auth_auto_import_candidates( match validate_external_auth_review_candidate(candidate).await { Ok(detail) => { outcome.imported += 1; + outcome + .imported_auth_labels + .extend(candidate.telemetry_auth_labels()); outcome.messages.push(format!( "✓ {} (from {}): {}", candidate.provider_summary, candidate.source_name, detail @@ -573,6 +626,7 @@ mod render_markdown_tests { let outcome = ExternalAuthAutoImportOutcome { imported: 0, messages: Vec::new(), + imported_auth_labels: Vec::new(), }; assert_eq!( outcome.render_markdown(), @@ -590,6 +644,7 @@ mod render_markdown_tests { "✓ Claude (from Claude Code): Loaded Claude credentials.".to_string(), "✕ Cursor (from Cursor native): no usable auth token.".to_string(), ], + imported_auth_labels: vec![("openai", "import"), ("claude", "import")], }; let md = outcome.render_markdown(); assert!(md.starts_with("**Logins imported**"), "got: {md}"); @@ -613,8 +668,20 @@ mod render_markdown_tests { let outcome = ExternalAuthAutoImportOutcome { imported: 1, messages: vec!["✓ Gemini (from Gemini CLI): Loaded Gemini credentials.".to_string()], + imported_auth_labels: vec![("gemini", "import")], }; let md = outcome.render_markdown(); assert!(md.contains("Reusing 1 existing login:"), "got: {md}"); } + + #[test] + fn fixture_candidate_reports_import_auth_labels() { + use super::ExternalAuthReviewCandidate; + // The fixture points at the legacy Codex action -> OpenAI provider. + let candidate = ExternalAuthReviewCandidate::fixture("OpenAI/Codex", "Codex auth.json"); + assert_eq!( + candidate.telemetry_auth_labels(), + vec![("openai", "import")] + ); + } } diff --git a/crates/jcode-tui/src/tui/app/auth.rs b/crates/jcode-tui/src/tui/app/auth.rs index 0e31dfd6d..32e3437bb 100644 --- a/crates/jcode-tui/src/tui/app/auth.rs +++ b/crates/jcode-tui/src/tui/app/auth.rs @@ -2281,6 +2281,13 @@ impl App { .await { Ok(outcome) => { + // Auto-import bypasses the manual `pending_login` + // telemetry path, so record `auth_success` for each + // imported provider to keep the activation funnel + // accurate. + for (provider, method) in &outcome.imported_auth_labels { + crate::telemetry::record_auth_success(provider, method); + } Bus::global().publish(BusEvent::LoginCompleted(LoginCompleted { provider: "auto-import".to_string(), success: outcome.imported > 0, diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs index 3f95702d9..0803c626c 100644 --- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs +++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs @@ -585,6 +585,13 @@ impl App { return; } }; + // Auto-import bypasses the manual `pending_login` path, so record + // `auth_success` here for each imported provider. Without this the + // onboarding activation funnel undercounts every imported login + // (the happy path of the guided first-run flow). + for (provider, method) in &outcome.imported_auth_labels { + crate::telemetry::record_auth_success(provider, method); + } crate::bus::Bus::global().publish(crate::bus::BusEvent::LoginCompleted( crate::bus::LoginCompleted { provider: "auto-import".to_string(), From f77a740207810b7c630333bf36c9e3897fe06628 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:58:43 -0700 Subject: [PATCH 14/57] perf(session-picker): tail-read external transcript previews Codex/Claude Code preview loaders parsed the entire JSONL transcript (often multiple MB, up to tens of MB) on every selection change just to show the last ~20 messages. In the onboarding resume menu this made arrow-key navigation lag badly, since each selection spawned a fresh full-file parse thread. Normal /resume (jcode native sessions) avoids this path, which is why only onboarding felt slow. Read only the trailing 512 KiB of the file instead: drop the partial first line, skip malformed boundary records, and parse the rest. This turns each preview load from ~140ms into ~1ms regardless of transcript size. Adds regression tests covering large (>cap) Codex and Claude transcripts. --- .../src/tui/session_picker/loading.rs | 68 ++++++++++++--- .../src/tui/session_picker/loading_tests.rs | 82 +++++++++++++++++++ 2 files changed, 140 insertions(+), 10 deletions(-) diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs index 26d5e935a..4acc66aef 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading.rs @@ -642,6 +642,37 @@ fn collect_recent_files_recursive(root: &Path, extension: &str, limit: usize) -> files.into_iter().map(|(_, path)| path).collect() } +/// Maximum number of bytes we read from the *tail* of an external transcript +/// (Codex / Claude Code) when building its preview. These JSONL transcripts can +/// be tens of MB, but the preview only ever shows the last ~20 messages, so +/// parsing the whole file on every selection change made arrow-key navigation +/// in the resume / onboarding picker lag badly (each load reparsed the entire +/// file on a fresh thread). Reading a bounded tail keeps each preview load to a +/// sub-millisecond seek + parse regardless of transcript size. +/// +/// 512 KiB comfortably covers far more than 20 messages for normal transcripts +/// while bounding the worst case. +const EXTERNAL_PREVIEW_TAIL_BYTES: u64 = 512 * 1024; + +/// Read the trailing portion of a file as UTF-8 text, capped at +/// [`EXTERNAL_PREVIEW_TAIL_BYTES`]. When the file is larger than the cap we seek +/// to the tail and drop the (possibly partial) first line so we only ever parse +/// complete JSONL records. Returns `(text, truncated_from_head)` where +/// `truncated_from_head` indicates the head of the file was skipped. +fn read_file_tail_text(path: &Path, max_bytes: u64) -> Option<(String, bool)> { + let mut file = File::open(path).ok()?; + let len = file.metadata().ok()?.len(); + let truncated = len > max_bytes; + if truncated { + file.seek(SeekFrom::Start(len - max_bytes)).ok()?; + } + let mut bytes = Vec::with_capacity(max_bytes.min(len) as usize); + file.take(max_bytes).read_to_end(&mut bytes).ok()?; + // Lossily decode: transcripts are UTF-8, but a tail seek can land mid + // multi-byte sequence, and replacement chars are harmless for a preview. + Some((String::from_utf8_lossy(&bytes).into_owned(), truncated)) +} + fn push_preview_message(preview: &mut Vec, role: &str, content: String) { let content = content.trim(); if content.is_empty() { @@ -1790,17 +1821,26 @@ fn load_external_claude_code_sessions(scan_limit: usize) -> Vec { } pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option> { - let file = File::open(path).ok()?; - let reader = BufReader::new(file); + // Only parse the tail of the transcript (see `load_codex_preview_from_path`): + // the preview shows the last ~20 messages, so reparsing multi-MB transcripts + // on every selection change made picker navigation lag. + let (text, truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?; let mut preview = Vec::new(); - for line in reader.lines() { - let line = line.ok()?; + // If we seeked into the middle of the file, the first line is a partial + // record; drop it. When we read the whole file the first line is a real + // record we must keep. + let skip = usize::from(truncated); + for line in text.lines().skip(skip) { let trimmed = line.trim(); if trimmed.is_empty() { continue; } - let value: serde_json::Value = serde_json::from_str(trimmed).ok()?; + // Boundary lines from a tail slice may be malformed; skip rather than + // abandon the whole preview. + let Ok(value) = serde_json::from_str::(trimmed) else { + continue; + }; let entry_type = value .get("type") .and_then(|v| v.as_str()) @@ -1985,17 +2025,25 @@ fn find_codex_session_file(session_id: &str) -> Option { } pub(super) fn load_codex_preview_from_path(path: &Path) -> Option> { - let file = File::open(path).ok()?; - let reader = BufReader::new(file); + // Only parse the tail of the transcript: the preview shows the last ~20 + // messages, and these rollout files can be tens of MB, so reading the whole + // file on every selection change made picker navigation lag. + let (text, _truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?; let mut preview = Vec::new(); - for line in reader.lines().skip(1) { - let line = line.ok()?; + // When we read from the start we skip the first line (the `session_meta` + // record). When we read a tail slice the first line is almost certainly a + // partial record, so we drop it either way. + for line in text.lines().skip(1) { let trimmed = line.trim(); if trimmed.is_empty() { continue; } - let value: serde_json::Value = serde_json::from_str(trimmed).ok()?; + // A tail slice can yield malformed JSON on its boundary lines; skip + // those instead of bailing out of the whole preview. + let Ok(value) = serde_json::from_str::(trimmed) else { + continue; + }; let line_type = value .get("type") .and_then(|v| v.as_str()) diff --git a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs index 769a9b888..cbfd8e642 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading_tests.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading_tests.rs @@ -318,6 +318,88 @@ fn load_codex_preview_preserves_blank_line_between_tool_transcript_and_followup_ ); } +#[test] +fn load_codex_preview_reads_only_tail_of_large_transcript() { + // A transcript far larger than the tail cap should still produce a preview + // of the most-recent messages, parsed from only the tail slice. This is the + // regression guard for the picker-navigation lag: previews must not depend + // on parsing the whole (multi-MB) file. + let temp = tempfile::tempdir().expect("temp dir"); + let transcript_path = temp.path().join("rollout-big.jsonl"); + + let mut contents = String::new(); + // session_meta header line (always skipped). + contents.push_str( + "{\"timestamp\":\"2026-04-10T19:05:54.536Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d-big\"}}\n", + ); + // Padding messages near the head that must NOT appear in the preview once + // the file exceeds the tail cap. + for i in 0..50_000 { + contents.push_str(&format!( + "{{\"type\":\"response_item\",\"payload\":{{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{{\"type\":\"output_text\",\"text\":\"old padding message {i} aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"}}]}}}}\n", + )); + } + assert!( + contents.len() as u64 > EXTERNAL_PREVIEW_TAIL_BYTES, + "test transcript must exceed the tail cap" + ); + // Distinctive recent messages at the very end. + contents.push_str( + "{\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"RECENT_USER_MARKER\"}]}}\n", + ); + contents.push_str( + "{\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"RECENT_ASSISTANT_MARKER\"}]}}\n", + ); + std::fs::write(&transcript_path, &contents).expect("write big transcript"); + + let preview = load_codex_preview_from_path(&transcript_path).expect("preview"); + // Preview is capped at 20 messages. + assert!(preview.len() <= 20, "preview should be capped, got {}", preview.len()); + // The most-recent markers must be present. + let last_two = &preview[preview.len().saturating_sub(2)..]; + assert!(last_two.iter().any(|m| m.content.contains("RECENT_USER_MARKER"))); + assert!(last_two.iter().any(|m| m.content.contains("RECENT_ASSISTANT_MARKER"))); + // The head padding must have been skipped (not parsed from the tail slice). + assert!( + !preview.iter().any(|m| m.content.contains("old padding message 0 ")), + "head messages should not appear when only the tail is read" + ); +} + +#[test] +fn load_claude_code_preview_reads_only_tail_of_large_transcript() { + let temp = tempfile::tempdir().expect("temp dir"); + let transcript_path = temp.path().join("claude-big.jsonl"); + + let mut contents = String::new(); + for i in 0..50_000 { + contents.push_str(&format!( + "{{\"type\":\"assistant\",\"uuid\":\"a{i}\",\"message\":{{\"role\":\"assistant\",\"content\":[{{\"type\":\"text\",\"text\":\"old padding message {i} bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\"}}]}}}}\n", + )); + } + assert!( + contents.len() as u64 > EXTERNAL_PREVIEW_TAIL_BYTES, + "test transcript must exceed the tail cap" + ); + contents.push_str( + "{\"type\":\"user\",\"uuid\":\"u_last\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"RECENT_USER_MARKER\"}]}}\n", + ); + contents.push_str( + "{\"type\":\"assistant\",\"uuid\":\"a_last\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"RECENT_ASSISTANT_MARKER\"}]}}\n", + ); + std::fs::write(&transcript_path, &contents).expect("write big transcript"); + + let preview = load_claude_code_preview_from_path(&transcript_path).expect("preview"); + assert!(preview.len() <= 20, "preview should be capped, got {}", preview.len()); + let last_two = &preview[preview.len().saturating_sub(2)..]; + assert!(last_two.iter().any(|m| m.content.contains("RECENT_USER_MARKER"))); + assert!(last_two.iter().any(|m| m.content.contains("RECENT_ASSISTANT_MARKER"))); + assert!( + !preview.iter().any(|m| m.content.contains("old padding message 0 ")), + "head messages should not appear when only the tail is read" + ); +} + #[test] fn load_sessions_prefers_custom_title_over_generated_title() { let _env_lock = crate::storage::lock_test_env(); From f0656c407a3eed3fa1ad6350d7212dcfa1a42286 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:27:42 -0700 Subject: [PATCH 15/57] skill_manage: include endorsed skill catalog in list output --- crates/jcode-app-core/src/tool/skill.rs | 145 +++++++++++++++++++----- 1 file changed, 114 insertions(+), 31 deletions(-) diff --git a/crates/jcode-app-core/src/tool/skill.rs b/crates/jcode-app-core/src/tool/skill.rs index b7678c1dd..6d8d5dcd1 100644 --- a/crates/jcode-app-core/src/tool/skill.rs +++ b/crates/jcode-app-core/src/tool/skill.rs @@ -21,7 +21,8 @@ impl SkillTool { #[derive(Deserialize)] struct SkillInput { - /// Action to perform: load (default), list, reload, reload_all, read + /// Action to perform: load (default), list, reload, reload_all, read. + /// `list` shows both loaded skills and the jcode-endorsed catalog. #[serde(default = "default_action")] action: String, /// Skill name (required for load, reload, read) @@ -119,36 +120,41 @@ impl SkillTool { async fn list_skills(&self) -> Result { let registry = self.registry.read().await; - let skills = registry.list(); - - if skills.is_empty() { - return Ok(ToolOutput::new( - "No skills available.\n\n\ - Skills are loaded from:\n\ - - ~/.claude/skills//SKILL.md\n\ - - ./.claude/skills//SKILL.md\n\n\ - Create a SKILL.md file with YAML frontmatter:\n\ - ---\n\ - name: my-skill\n\ - description: What this skill does\n\ - allowed-tools: bash, read, write\n\ - ---\n\n\ - # Skill content here", - ) - .with_title("Skills: None available")); - } - - let mut output = format!("Available skills: {}\n\n", skills.len()); - - for skill in skills { - output.push_str(&format!("## /{}\n", skill.name)); - output.push_str(&format!(" {}\n", skill.description)); - output.push_str(&format!(" Path: {}\n", skill.path.display())); - if let Some(ref tools) = skill.allowed_tools { - output.push_str(&format!(" Tools: {}\n", tools.join(", "))); + let mut skills = registry.list(); + skills.sort_by(|a, b| a.name.cmp(&b.name)); + + let installed: std::collections::HashSet<&str> = + skills.iter().map(|s| s.name.as_str()).collect(); + + let mut output = if skills.is_empty() { + "No skills loaded.\n\n\ + Skills are loaded from:\n\ + - ~/.jcode/skills//SKILL.md (global)\n\ + - ./.jcode/skills//SKILL.md (project-local)\n\ + - ./.claude/skills//SKILL.md (compatibility)\n\n\ + Create a SKILL.md file with YAML frontmatter:\n\ + ---\n\ + name: my-skill\n\ + description: What this skill does\n\ + allowed-tools: bash, read, write\n\ + ---\n\n\ + # Skill content here\n" + .to_string() + } else { + let mut output = format!("Loaded skills: {}\n\n", skills.len()); + for skill in &skills { + output.push_str(&format!("## /{}\n", skill.name)); + output.push_str(&format!(" {}\n", skill.description)); + output.push_str(&format!(" Path: {}\n", skill.path.display())); + if let Some(ref tools) = skill.allowed_tools { + output.push_str(&format!(" Tools: {}\n", tools.join(", "))); + } + output.push('\n'); } - output.push('\n'); - } + output + }; + + append_endorsed_skills(&mut output, &installed); Ok(ToolOutput::new(output).with_title("Skills: List")) } @@ -243,6 +249,61 @@ impl SkillTool { } } +/// Append the curated jcode-endorsed skill catalog to `output`, grouped by +/// category and marked with installed/not-installed status. `installed` is the +/// set of skill names currently loaded in the registry. +fn append_endorsed_skills(output: &mut String, installed: &std::collections::HashSet<&str>) { + let endorsed = crate::skill::endorsed_skills(); + if endorsed.is_empty() { + return; + } + + output.push_str("\nEndorsed skills (recommended by jcode)\n"); + + // Group by category, preserving first-seen order. + let mut category_order: Vec<&str> = Vec::new(); + for skill in endorsed { + if !category_order.contains(&skill.category) { + category_order.push(skill.category); + } + } + + for category in category_order { + let in_category: Vec<_> = endorsed.iter().filter(|e| e.category == category).collect(); + let installed_count = in_category + .iter() + .filter(|e| installed.contains(e.name)) + .count(); + output.push_str(&format!( + "\n {} ({}/{} installed)\n", + category, + installed_count, + in_category.len() + )); + for skill in in_category { + let is_installed = installed.contains(skill.name); + let status = if is_installed { + "installed" + } else { + "not installed" + }; + output.push_str(&format!(" - /{} [{}]\n", skill.name, status)); + output.push_str(&format!(" {}\n", skill.description)); + output.push_str(&format!(" source: {}\n", skill.source)); + if !is_installed && let Some(install) = skill.install { + output.push_str(&format!(" install: {}\n", install)); + } + } + } + + output.push_str( + "\nActivate a loaded skill by loading it with skill_manage (action=load) or typing its slash command.\n", + ); + output.push_str( + "NVIDIA CUDA-X skills come from the official catalog at https://github.com/NVIDIA/skills.\n", + ); +} + fn normalize_skill_name(name: Option, action: &str) -> Result { let name = name.ok_or_else(|| anyhow::anyhow!("'name' is required for {} action", action))?; let trimmed = name.trim().trim_start_matches('/').to_string(); @@ -318,7 +379,29 @@ mod tests { let input = json!({"action": "list"}); let result = tool.execute(input, ctx).await.unwrap(); - assert!(result.output.contains("No skills available")); + assert!(result.output.contains("No skills loaded")); + // Even with no skills loaded, the endorsed catalog should be listed. + assert!(result.output.contains("Endorsed skills")); + } + + #[tokio::test] + async fn test_list_includes_endorsed_skills() { + let tool = create_test_tool(); + let ctx = create_test_context(); + let input = json!({"action": "list"}); + + let result = tool.execute(input, ctx).await.unwrap(); + // Every endorsed skill should appear with an install-status marker. + for endorsed in crate::skill::endorsed_skills() { + assert!( + result.output.contains(&format!("/{}", endorsed.name)), + "expected endorsed skill /{} in:\n{}", + endorsed.name, + result.output + ); + } + // No skills are loaded in this tool, so they should be "not installed". + assert!(result.output.contains("[not installed]")); } #[tokio::test] From 8939a57aeed483dae07d79b811793690a332c082 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:50:18 -0700 Subject: [PATCH 16/57] feat(gmail): add pluggable Composio managed-OAuth backend The native gmail tool keeps its interface, confirmation gating, access tiers, and token-lean formatting, but its auth/transport is now pluggable via GmailBackend (Direct | Composio). - Direct: existing local Google OAuth tokens. - Composio: routes the same Gmail REST calls through Composio's proxy-execute endpoint, brokered by a Google-verified app. No unverified-app warning and no 7-day testing-mode token expiry. Backend is selected via JCODE_GMAIL_BACKEND=composio + COMPOSIO_API_KEY. Capability checks (is_configured/can_send/can_delete) are now backend-aware. Adds unit tests and docs/GMAIL_COMPOSIO_BACKEND.md. --- crates/jcode-app-core/src/tool/gmail.rs | 16 +- crates/jcode-base/src/gmail.rs | 427 +++++++++++++++++++----- docs/GMAIL_COMPOSIO_BACKEND.md | 73 ++++ 3 files changed, 423 insertions(+), 93 deletions(-) create mode 100644 docs/GMAIL_COMPOSIO_BACKEND.md diff --git a/crates/jcode-app-core/src/tool/gmail.rs b/crates/jcode-app-core/src/tool/gmail.rs index 9cbc2b2df..f132b3f36 100644 --- a/crates/jcode-app-core/src/tool/gmail.rs +++ b/crates/jcode-app-core/src/tool/gmail.rs @@ -4,7 +4,6 @@ use async_trait::async_trait; use serde::Deserialize; use serde_json::{Value, json}; -use crate::auth::google; use crate::gmail::{self, GmailClient, MessageFormat}; pub struct GmailTool { @@ -92,10 +91,8 @@ impl Tool for GmailTool { } async fn execute(&self, input: Value, _ctx: ToolContext) -> Result { - if !google::has_tokens() { - return Ok(ToolOutput::new( - "Gmail is not configured. Run `jcode login google` to set up Gmail access.", - )); + if !self.client.is_configured() { + return Ok(ToolOutput::new(self.client.not_configured_message())); } let params: GmailInput = serde_json::from_value(input)?; @@ -278,8 +275,7 @@ impl Tool for GmailTool { } "send" => { - let tokens = google::load_tokens()?; - if !tokens.tier.can_send() { + if !self.client.can_send() { return Ok(ToolOutput::new( "Send is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\ The draft has been created - open Gmail to send it manually.\n\ @@ -323,8 +319,7 @@ impl Tool for GmailTool { } "send_draft" => { - let tokens = google::load_tokens()?; - if !tokens.tier.can_send() { + if !self.client.can_send() { return Ok(ToolOutput::new( "Send is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\ Open Gmail to send the draft manually.\n\ @@ -352,8 +347,7 @@ impl Tool for GmailTool { } "trash" => { - let tokens = google::load_tokens()?; - if !tokens.tier.can_delete() { + if !self.client.can_delete() { return Ok(ToolOutput::new( "Trash is not available. Your Gmail access is configured as Read & Draft Only (API-level restriction).\n\ To enable delete, rerun `jcode login google --google-access-tier full`.", diff --git a/crates/jcode-base/src/gmail.rs b/crates/jcode-base/src/gmail.rs index 8b4309645..72ea959f7 100644 --- a/crates/jcode-base/src/gmail.rs +++ b/crates/jcode-base/src/gmail.rs @@ -1,12 +1,88 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; +use serde_json::{Value, json}; use crate::auth::google; const GMAIL_API_BASE: &str = "https://gmail.googleapis.com/gmail/v1/users/me"; +const COMPOSIO_DEFAULT_BASE: &str = "https://backend.composio.dev/api/v3.1"; + +/// Where the Gmail tool gets its credentials and authenticated transport. +/// +/// `Direct` talks to the Google Gmail REST API using locally stored OAuth +/// tokens (the original behavior). `Composio` routes the *same* Gmail REST +/// calls through Composio's managed `proxy-execute` endpoint, so a +/// Google-verified app brokers auth: no unverified-app warning and no 7-day +/// testing-mode token expiry. +#[derive(Debug, Clone)] +pub enum GmailBackend { + Direct, + Composio(ComposioConfig), +} + +#[derive(Debug, Clone)] +pub struct ComposioConfig { + pub api_key: String, + pub base_url: String, + pub connected_account_id: Option, + pub user_id: Option, +} + +impl GmailBackend { + /// Resolve the backend from environment configuration. + /// + /// Defaults to `Direct`. Set `JCODE_GMAIL_BACKEND=composio` (with + /// `COMPOSIO_API_KEY` present) to broker Gmail through Composio. + pub fn from_env() -> Self { + let selection = std::env::var("JCODE_GMAIL_BACKEND") + .unwrap_or_default() + .trim() + .to_lowercase(); + if selection == "composio" { + if let Some(cfg) = ComposioConfig::from_env() { + return GmailBackend::Composio(cfg); + } + eprintln!( + "JCODE_GMAIL_BACKEND=composio but COMPOSIO_API_KEY is not set; falling back to direct Gmail backend" + ); + } + GmailBackend::Direct + } + + pub fn label(&self) -> &'static str { + match self { + GmailBackend::Direct => "direct", + GmailBackend::Composio(_) => "composio", + } + } +} + +impl ComposioConfig { + fn from_env() -> Option { + let api_key = std::env::var("COMPOSIO_API_KEY").ok().filter(|s| !s.is_empty())?; + let base_url = std::env::var("COMPOSIO_BASE_URL") + .ok() + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| COMPOSIO_DEFAULT_BASE.to_string()); + let connected_account_id = std::env::var("COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID") + .ok() + .filter(|s| !s.is_empty()); + let user_id = std::env::var("COMPOSIO_GMAIL_USER_ID") + .or_else(|_| std::env::var("COMPOSIO_USER_ID")) + .ok() + .filter(|s| !s.is_empty()); + Some(Self { + api_key, + base_url, + connected_account_id, + user_id, + }) + } +} pub struct GmailClient { http: reqwest::Client, + backend: GmailBackend, } impl Default for GmailClient { @@ -17,13 +93,146 @@ impl Default for GmailClient { impl GmailClient { pub fn new() -> Self { + Self::with_backend(GmailBackend::from_env()) + } + + pub fn with_backend(backend: GmailBackend) -> Self { Self { http: crate::provider::shared_http_client(), + backend, + } + } + + pub fn backend_label(&self) -> &'static str { + self.backend.label() + } + + /// Whether this backend has credentials available to talk to Gmail. + pub fn is_configured(&self) -> bool { + match &self.backend { + GmailBackend::Direct => google::has_tokens(), + GmailBackend::Composio(cfg) => !cfg.api_key.is_empty(), + } + } + + /// Whether the current backend is allowed to send mail. + /// + /// The `Direct` backend honors the locally configured access tier + /// (read-only logins cannot send). Composio connections request full + /// Gmail scopes, so sending is available. + pub fn can_send(&self) -> bool { + match &self.backend { + GmailBackend::Direct => google::load_tokens() + .map(|t| t.tier.can_send()) + .unwrap_or(false), + GmailBackend::Composio(_) => true, + } + } + + /// Whether the current backend is allowed to delete/trash mail. + pub fn can_delete(&self) -> bool { + match &self.backend { + GmailBackend::Direct => google::load_tokens() + .map(|t| t.tier.can_delete()) + .unwrap_or(false), + GmailBackend::Composio(_) => true, + } + } + + pub fn not_configured_message(&self) -> &'static str { + match &self.backend { + GmailBackend::Direct => { + "Gmail is not configured. Run `jcode login google` to set up Gmail access." + } + GmailBackend::Composio(_) => { + "Gmail (Composio backend) is not configured. Set COMPOSIO_API_KEY and connect your \ + Gmail account in Composio, then retry." + } + } + } + + /// Send an authenticated Gmail REST request and return the parsed JSON + /// response. Both backends produce the identical Gmail API JSON shape, so + /// callers can deserialize into the same typed structs. + async fn request( + &self, + method: reqwest::Method, + url: &str, + body: Option, + ) -> Result { + match &self.backend { + GmailBackend::Direct => self.request_direct(method, url, body).await, + GmailBackend::Composio(cfg) => self.request_composio(cfg, method, url, body).await, } } - async fn token(&self) -> Result { - google::get_valid_token().await + async fn request_direct( + &self, + method: reqwest::Method, + url: &str, + body: Option, + ) -> Result { + let token = google::get_valid_token().await?; + let mut req = self.http.request(method, url).bearer_auth(&token); + if let Some(ref b) = body { + req = req.json(b); + } + let resp = req.send().await?; + let status = resp.status(); + let text = resp.text().await?; + if !status.is_success() { + return Err(anyhow::anyhow!( + "Gmail API error {}: {}", + status, + truncate_error(&text) + )); + } + if text.trim().is_empty() { + return Ok(Value::Null); + } + Ok(serde_json::from_str(&text)?) + } + + async fn request_composio( + &self, + cfg: &ComposioConfig, + method: reqwest::Method, + url: &str, + body: Option, + ) -> Result { + let payload = build_composio_proxy_payload(cfg, method.as_str(), url, body); + let endpoint = format!("{}/tools/execute/proxy", cfg.base_url.trim_end_matches('/')); + let resp = self + .http + .post(&endpoint) + .header("x-api-key", &cfg.api_key) + .json(&payload) + .send() + .await?; + let status = resp.status(); + let text = resp.text().await?; + if !status.is_success() { + return Err(anyhow::anyhow!( + "Composio proxy error {}: {}", + status, + truncate_error(&text) + )); + } + let envelope: Value = serde_json::from_str(&text)?; + // Composio wraps the upstream response as { data, status, headers }. + if let Some(inner) = envelope.get("status").and_then(|s| s.as_u64()) { + if inner >= 400 { + return Err(anyhow::anyhow!( + "Gmail API error {} (via Composio): {}", + inner, + truncate_error(&envelope.get("data").map(|d| d.to_string()).unwrap_or_default()) + )); + } + } + if let Some(err) = envelope.get("error").filter(|e| !e.is_null()) { + return Err(anyhow::anyhow!("Composio error: {}", truncate_error(&err.to_string()))); + } + Ok(envelope.get("data").cloned().unwrap_or(Value::Null)) } pub async fn list_messages( @@ -32,7 +241,6 @@ impl GmailClient { label_ids: Option<&[&str]>, max_results: u32, ) -> Result { - let token = self.token().await?; let mut url = format!("{}/messages?maxResults={}", GMAIL_API_BASE, max_results); if let Some(q) = query { @@ -44,61 +252,47 @@ impl GmailClient { } } - let resp = self.http.get(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; - let list: MessageList = resp.json().await?; - Ok(list) + let value = self.request(reqwest::Method::GET, &url, None).await?; + Ok(serde_json::from_value(value)?) } pub async fn get_message(&self, id: &str, format: MessageFormat) -> Result { - let token = self.token().await?; let url = format!( "{}/messages/{}?format={}", GMAIL_API_BASE, id, format.as_str() ); - let resp = self.http.get(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; - let msg: Message = resp.json().await?; - Ok(msg) + let value = self.request(reqwest::Method::GET, &url, None).await?; + Ok(serde_json::from_value(value)?) } pub async fn list_threads(&self, query: Option<&str>, max_results: u32) -> Result { - let token = self.token().await?; let mut url = format!("{}/threads?maxResults={}", GMAIL_API_BASE, max_results); if let Some(q) = query { url.push_str(&format!("&q={}", urlencoding::encode(q))); } - let resp = self.http.get(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; - let list: ThreadList = resp.json().await?; - Ok(list) + let value = self.request(reqwest::Method::GET, &url, None).await?; + Ok(serde_json::from_value(value)?) } pub async fn get_thread(&self, id: &str) -> Result { - let token = self.token().await?; let url = format!("{}/threads/{}?format=metadata", GMAIL_API_BASE, id); - let resp = self.http.get(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; - let thread: Thread = resp.json().await?; - Ok(thread) + let value = self.request(reqwest::Method::GET, &url, None).await?; + Ok(serde_json::from_value(value)?) } pub async fn list_labels(&self) -> Result> { - let token = self.token().await?; let url = format!("{}/labels", GMAIL_API_BASE); - let resp = self.http.get(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; - #[derive(Deserialize)] struct LabelList { labels: Option>, } - let list: LabelList = resp.json().await?; + let value = self.request(reqwest::Method::GET, &url, None).await?; + let list: LabelList = serde_json::from_value(value)?; Ok(list.labels.unwrap_or_default()) } @@ -110,7 +304,6 @@ impl GmailClient { in_reply_to: Option<&str>, thread_id: Option<&str>, ) -> Result { - let token = self.token().await?; let url = format!("{}/drafts", GMAIL_API_BASE); let mut headers = format!( @@ -127,40 +320,27 @@ impl GmailClient { let raw = format!("{}\r\n{}", headers, body); let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(raw.as_bytes()); - let mut message = serde_json::json!({ "raw": encoded }); + let mut message = json!({ "raw": encoded }); if let Some(tid) = thread_id { - message["threadId"] = serde_json::Value::String(tid.to_string()); + message["threadId"] = Value::String(tid.to_string()); } - let payload = serde_json::json!({ "message": message }); + let payload = json!({ "message": message }); - let resp = self - .http - .post(&url) - .bearer_auth(&token) - .json(&payload) - .send() + let value = self + .request(reqwest::Method::POST, &url, Some(payload)) .await?; - handle_error(&resp).await?; - let draft: Draft = resp.json().await?; - Ok(draft) + Ok(serde_json::from_value(value)?) } pub async fn send_draft(&self, draft_id: &str) -> Result { - let token = self.token().await?; let url = format!("{}/drafts/send", GMAIL_API_BASE); - let payload = serde_json::json!({ "id": draft_id }); + let payload = json!({ "id": draft_id }); - let resp = self - .http - .post(&url) - .bearer_auth(&token) - .json(&payload) - .send() + let value = self + .request(reqwest::Method::POST, &url, Some(payload)) .await?; - handle_error(&resp).await?; - let msg: Message = resp.json().await?; - Ok(msg) + Ok(serde_json::from_value(value)?) } pub async fn send_message( @@ -171,7 +351,6 @@ impl GmailClient { in_reply_to: Option<&str>, thread_id: Option<&str>, ) -> Result { - let token = self.token().await?; let url = format!("{}/messages/send", GMAIL_API_BASE); let mut headers = format!( @@ -188,28 +367,20 @@ impl GmailClient { let raw = format!("{}\r\n{}", headers, body); let encoded = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(raw.as_bytes()); - let mut message = serde_json::json!({ "raw": encoded }); + let mut message = json!({ "raw": encoded }); if let Some(tid) = thread_id { - message["threadId"] = serde_json::Value::String(tid.to_string()); + message["threadId"] = Value::String(tid.to_string()); } - let resp = self - .http - .post(&url) - .bearer_auth(&token) - .json(&message) - .send() + let value = self + .request(reqwest::Method::POST, &url, Some(message)) .await?; - handle_error(&resp).await?; - let msg: Message = resp.json().await?; - Ok(msg) + Ok(serde_json::from_value(value)?) } pub async fn trash_message(&self, id: &str) -> Result<()> { - let token = self.token().await?; let url = format!("{}/messages/{}/trash", GMAIL_API_BASE, id); - let resp = self.http.post(&url).bearer_auth(&token).send().await?; - handle_error(&resp).await?; + self.request(reqwest::Method::POST, &url, None).await?; Ok(()) } @@ -219,32 +390,49 @@ impl GmailClient { add_labels: &[&str], remove_labels: &[&str], ) -> Result<()> { - let token = self.token().await?; let url = format!("{}/messages/{}/modify", GMAIL_API_BASE, id); - let payload = serde_json::json!({ + let payload = json!({ "addLabelIds": add_labels, "removeLabelIds": remove_labels, }); - let resp = self - .http - .post(&url) - .bearer_auth(&token) - .json(&payload) - .send() + self.request(reqwest::Method::POST, &url, Some(payload)) .await?; - handle_error(&resp).await?; Ok(()) } } -async fn handle_error(resp: &reqwest::Response) -> Result<()> { - if resp.status().is_success() { - return Ok(()); +/// Build the request body for Composio's `proxy-execute` endpoint, which makes +/// an authenticated HTTP call to the connected toolkit (Gmail) on our behalf. +fn build_composio_proxy_payload( + cfg: &ComposioConfig, + method: &str, + url: &str, + body: Option, +) -> Value { + let mut payload = json!({ + "endpoint": url, + "method": method, + }); + if let Some(b) = body { + payload["body"] = b; + } + if let Some(account) = &cfg.connected_account_id { + payload["connected_account_id"] = Value::String(account.clone()); + } + if let Some(user) = &cfg.user_id { + payload["user_id"] = Value::String(user.clone()); + } + payload +} + +fn truncate_error(text: &str) -> String { + const MAX: usize = 400; + let trimmed = text.trim(); + if trimmed.len() <= MAX { + trimmed.to_string() + } else { + format!("{}…", &trimmed[..MAX]) } - Err(anyhow::anyhow!( - "Gmail API error {}: check token permissions", - resp.status() - )) } use base64::Engine; @@ -446,3 +634,78 @@ pub fn format_message_full(msg: &Message) -> String { } out } + +#[cfg(test)] +mod tests { + use super::*; + + fn cfg() -> ComposioConfig { + ComposioConfig { + api_key: "test-key".to_string(), + base_url: COMPOSIO_DEFAULT_BASE.to_string(), + connected_account_id: Some("ca_123".to_string()), + user_id: Some("me".to_string()), + } + } + + #[test] + fn composio_proxy_payload_get_has_no_body() { + let url = format!("{}/messages?maxResults=10", GMAIL_API_BASE); + let payload = build_composio_proxy_payload(&cfg(), "GET", &url, None); + assert_eq!(payload["endpoint"], url); + assert_eq!(payload["method"], "GET"); + assert!(payload.get("body").is_none()); + assert_eq!(payload["connected_account_id"], "ca_123"); + assert_eq!(payload["user_id"], "me"); + } + + #[test] + fn composio_proxy_payload_post_includes_body() { + let url = format!("{}/messages/send", GMAIL_API_BASE); + let body = json!({ "raw": "abc" }); + let payload = build_composio_proxy_payload(&cfg(), "POST", &url, Some(body.clone())); + assert_eq!(payload["method"], "POST"); + assert_eq!(payload["body"], body); + } + + #[test] + fn composio_proxy_payload_omits_optional_account_fields() { + let bare = ComposioConfig { + api_key: "k".to_string(), + base_url: COMPOSIO_DEFAULT_BASE.to_string(), + connected_account_id: None, + user_id: None, + }; + let payload = build_composio_proxy_payload(&bare, "GET", "http://x/y", None); + assert!(payload.get("connected_account_id").is_none()); + assert!(payload.get("user_id").is_none()); + } + + #[test] + fn direct_backend_label_and_default() { + let backend = GmailBackend::Direct; + assert_eq!(backend.label(), "direct"); + let client = GmailClient::with_backend(GmailBackend::Direct); + assert_eq!(client.backend_label(), "direct"); + } + + #[test] + fn composio_backend_is_configured_and_can_send() { + let client = GmailClient::with_backend(GmailBackend::Composio(cfg())); + assert_eq!(client.backend_label(), "composio"); + assert!(client.is_configured()); + // Composio connections request full Gmail scopes. + assert!(client.can_send()); + assert!(client.can_delete()); + } + + #[test] + fn truncate_error_caps_length() { + let short = truncate_error(" hi "); + assert_eq!(short, "hi"); + let long = "x".repeat(1000); + let capped = truncate_error(&long); + assert!(capped.len() <= 401 + 3); // 400 chars + ellipsis byte + assert!(capped.ends_with('…')); + } +} diff --git a/docs/GMAIL_COMPOSIO_BACKEND.md b/docs/GMAIL_COMPOSIO_BACKEND.md new file mode 100644 index 000000000..9b2ec44eb --- /dev/null +++ b/docs/GMAIL_COMPOSIO_BACKEND.md @@ -0,0 +1,73 @@ +# Gmail Tool: Composio Managed Backend + +The native `gmail` tool can source credentials and transport from one of two +backends. The tool interface, confirmation gating, access-tier logic, and +token-lean output formatting are identical across backends; only the +auth/transport layer changes. + +## Backends + +| Backend | Auth | Pros | Cons | +|---|---|---|---| +| `direct` (default) | Local Google OAuth tokens (`jcode login google`) | No third party in the loop | Unverified-app warning; 7-day refresh-token expiry in Google "Testing" mode | +| `composio` | Composio-managed OAuth (Google-verified app) | No unverified-app warning, no 7-day expiry, no per-user Google Cloud project | Composio brokers Gmail token custody; external dependency/cost | + +Both backends call the *same* Gmail REST endpoints +(`https://gmail.googleapis.com/gmail/v1/users/me/...`). The Composio backend +routes those calls through Composio's +[`proxy-execute`](https://docs.composio.dev/reference/api-reference/tools/postToolsExecuteProxy) +endpoint, which attaches the managed Gmail credentials. Because the upstream +response shape is unchanged, all existing typed parsing and output formatting +is reused. + +## Selecting the backend + +The backend is resolved from environment at `GmailClient::new()`: + +- `JCODE_GMAIL_BACKEND=direct` (or unset) -> direct Google backend. +- `JCODE_GMAIL_BACKEND=composio` -> Composio backend (requires `COMPOSIO_API_KEY`). + +If `composio` is requested but `COMPOSIO_API_KEY` is missing, jcode warns and +falls back to `direct`. + +### Composio environment variables + +| Variable | Required | Description | +|---|---|---| +| `COMPOSIO_API_KEY` | Yes | Project API key from | +| `COMPOSIO_BASE_URL` | No | Override API base (default `https://backend.composio.dev/api/v3.1`) | +| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`) | +| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts | + +## One-time Composio setup + +1. Sign in at and copy your project API key. +2. Connect a Gmail account (Composio's hosted OAuth, no unverified-app warning). + Note the resulting `connected_account_id` if you want to pin it. +3. Export the variables: + ```bash + export JCODE_GMAIL_BACKEND=composio + export COMPOSIO_API_KEY="ck_..." + # optional: + export COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID="ca_..." + export COMPOSIO_GMAIL_USER_ID="me" + ``` +4. Ensure the `gmail` tool is enabled in `config.toml`: + ```toml + [tools] + enabled = ["*"] + ``` + +## Access tiers + +- `direct`: honors the access tier chosen at `jcode login google` + (Read & Draft Only logins cannot send/trash, enforced at the OAuth scope level). +- `composio`: connections request full Gmail scopes, so send/trash are + available. The tool still requires explicit `confirmed: true` for send, + send_draft, and trash. + +## Trust note + +With the Composio backend, Composio holds your Gmail OAuth grant and sees API +traffic. This is the core tradeoff versus the direct backend. Disclose this to +users before enabling it as a default. From a80296c22c8f85bd9f19538c51d15c0e274fe4aa Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:54:11 -0700 Subject: [PATCH 17/57] desktop: pace animation redraws to ~60fps instead of busy-spinning The desktop render loop re-requested a redraw immediately after every animated frame (welcome-hero reveal, focus pulse, spinners, smooth scroll, streaming) in both the RedrawRequested handler and the AboutToWait fallback. Because the surface uses non-blocking Mailbox presentation, present() returns instantly, so the loop rendered as fast as the CPU allowed (~300fps on a 60Hz panel) and pinned the main thread near 100% CPU. That starved input handling and compositor scheduling, which is the root cause of the laggy/janky animations and scrolling, and made streaming events queue for 200ms-1s before the UI could process them. Schedule a paced redraw (DESKTOP_ANIMATION_FRAME_INTERVAL = 16ms, serviced via ControlFlow::WaitUntil in AboutToWait) instead of an immediate request. Measured idle main-thread CPU on the welcome screen dropped from ~99% to ~0-3%, frame rate from ~305fps to display refresh, while the stream-e2e benchmark still passes all interaction/no-paint budgets (max no-paint gap 71ms vs 250ms budget). --- crates/jcode-desktop/src/main.rs | 60 +++++++++++++++++++++++--- crates/jcode-desktop/src/main_tests.rs | 18 ++++++++ 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index 783f4a4ce..d2979b41f 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -145,6 +145,16 @@ const SINGLE_SESSION_CARET_COLOR: [f32; 4] = [0.130, 0.150, 0.190, 0.92]; const SESSION_SPAWN_REFRESH_DELAY: Duration = Duration::from_millis(350); const BACKGROUND_POLL_INTERVAL: Duration = Duration::from_millis(33); const BACKEND_REDRAW_FRAME_INTERVAL: Duration = Duration::from_millis(16); +/// Minimum spacing between animation-driven redraws. +/// +/// Without this, the desktop render loop re-requests a redraw immediately after +/// every animated frame (welcome-hero reveal, focus pulse, spinners, smooth +/// scroll, etc.). Because the surface uses non-blocking `Mailbox` presentation, +/// `present()` returns instantly, so the unthrottled loop renders at hundreds of +/// fps and pins the main thread near 100% CPU, starving input handling and the +/// compositor (the root cause of desktop lag/jank). ~16ms paces continuous +/// animations to about 60fps, matching typical display refresh. +const DESKTOP_ANIMATION_FRAME_INTERVAL: Duration = Duration::from_millis(16); const SURFACE_TIMEOUT_BACKOFF_MIN: Duration = Duration::from_millis(16); const SURFACE_TIMEOUT_BACKOFF_MAX: Duration = Duration::from_millis(250); const HEADLESS_CHAT_SMOKE_TIMEOUT: Duration = Duration::from_secs(90); @@ -383,6 +393,17 @@ fn desktop_background_wake( } } +/// Compute the next paced animation redraw time. +/// +/// Returns `Some(now + DESKTOP_ANIMATION_FRAME_INTERVAL)` while an animation is +/// active and `None` once it settles. Callers schedule this instead of calling +/// `request_redraw()` immediately, which would render as fast as the CPU allows +/// (the surface presents without blocking) and pin the main thread near 100% +/// CPU, starving input handling and the compositor. +fn next_animation_redraw_at(now: Instant, animation_active: bool) -> Option { + animation_active.then(|| now + DESKTOP_ANIMATION_FRAME_INTERVAL) +} + #[derive(Clone, Copy, Debug, PartialEq)] struct StreamingTextArrivalStyle { opacity: f32, @@ -799,6 +820,10 @@ async fn run() -> Result<()> { let mut pending_backend_redraw_since: Option = None; let mut surface_timeout_backoff = SurfaceTimeoutBackoff::default(); let mut surface_timeout_redraw_at: Option = None; + // Scheduled time for the next animation-driven redraw. Continuous animations + // re-arm this each presented frame so the loop paces itself to roughly the + // display refresh rate instead of busy-spinning the main thread. + let mut animation_redraw_at: Option = None; let mut pending_resize: Option> = None; let mut space_hold_started_at: Option = None; let mut space_hold_consumed = false; @@ -845,6 +870,7 @@ async fn run() -> Result<()> { hot_reload_wake, space_hold_wake, surface_timeout_redraw_at, + animation_redraw_at, ] .into_iter() .flatten() @@ -1608,9 +1634,14 @@ async fn run() -> Result<()> { target.exit(); return; } - if frame.animation_active { - window.request_redraw(); - } + // Pace continuous animations instead of immediately + // re-requesting a redraw. An immediate request makes the + // event loop render as fast as the CPU allows (the surface + // presents without blocking), pinning the main thread near + // 100% CPU and starving input/compositor scheduling. The + // scheduled wake is serviced in AboutToWait. + animation_redraw_at = + next_animation_redraw_at(Instant::now(), frame.animation_active); } Err(SurfaceError::Lost | SurfaceError::Outdated) => { surface_timeout_backoff.reset(); @@ -1841,6 +1872,18 @@ async fn run() -> Result<()> { } } } + // Service the paced animation redraw scheduled by RedrawRequested. + // This keeps continuous animations advancing at ~display refresh + // without busy-spinning the loop between frames. + if let Some(redraw_at) = animation_redraw_at { + let now = Instant::now(); + if now >= redraw_at { + animation_redraw_at = None; + if surface_renderable { + window.request_redraw(); + } + } + } if surface_renderable && app.is_single_session() { let about_to_wait_started = Instant::now(); let size = window.inner_size(); @@ -1909,8 +1952,15 @@ async fn run() -> Result<()> { { canvas.needs_initial_frame = false; window.request_redraw(); - } else if surface_renderable && app.has_frame_animation() { - window.request_redraw(); + } else if surface_renderable + && app.has_frame_animation() + && animation_redraw_at.is_none() + { + // An animation is active but no paced redraw is scheduled yet + // (e.g. it just became active). Schedule one instead of + // requesting a redraw on every loop iteration, which would + // busy-spin the main thread at 100% CPU. + animation_redraw_at = next_animation_redraw_at(Instant::now(), true); } } _ => {} diff --git a/crates/jcode-desktop/src/main_tests.rs b/crates/jcode-desktop/src/main_tests.rs index f05778b24..6fa95c06b 100644 --- a/crates/jcode-desktop/src/main_tests.rs +++ b/crates/jcode-desktop/src/main_tests.rs @@ -701,6 +701,24 @@ fn desktop_background_wake_only_tracks_active_frame_animation() { assert_eq!(desktop_background_wake(now, false, true), None); } +#[test] +fn next_animation_redraw_paces_active_animations_and_settles_when_idle() { + let now = Instant::now(); + + // While an animation is active, the next redraw is scheduled one frame + // interval out rather than immediately, so the loop does not busy-spin. + assert_eq!( + next_animation_redraw_at(now, true), + Some(now + DESKTOP_ANIMATION_FRAME_INTERVAL) + ); + // Once the animation settles, no further redraw is scheduled and the loop + // can park on ControlFlow::Wait. + assert_eq!(next_animation_redraw_at(now, false), None); + // The pacing interval must be positive; a zero interval would reintroduce + // the busy-spin it exists to prevent. + assert!(DESKTOP_ANIMATION_FRAME_INTERVAL > Duration::ZERO); +} + #[test] fn desktop_async_job_slots_are_bounded_and_released() -> Result<()> { let counter = std::sync::atomic::AtomicUsize::new(0); From 6727255f81df231a006bdce5a81e09acf17664dc Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 13:54:48 -0700 Subject: [PATCH 18/57] telemetry-worker: remove served web dashboard The visual dashboard (dashboard.js + stats.js + GET / and GET /v1/stats routing) was a separately-deployed Cloudflare Worker UI that does not belong in the jcode repo. Remove it and restore the worker to its POST /v1/event ingest-only surface. The telemetry accuracy work it was built on (turn_end meaningfulness, CI exclusion, the daily_active_users rollup) stays. users.sql remains as a CLI query alongside dau.sql / health.sql. --- telemetry-worker/README.md | 27 +- telemetry-worker/src/dashboard.js | 494 ------------------------------ telemetry-worker/src/stats.js | 389 ----------------------- telemetry-worker/src/worker.js | 57 +--- 4 files changed, 4 insertions(+), 963 deletions(-) delete mode 100644 telemetry-worker/src/dashboard.js delete mode 100644 telemetry-worker/src/stats.js diff --git a/telemetry-worker/README.md b/telemetry-worker/README.md index e58b87b6a..0e25e3db3 100644 --- a/telemetry-worker/README.md +++ b/telemetry-worker/README.md @@ -2,33 +2,8 @@ Cloudflare Worker that receives anonymous telemetry events from jcode. -## Dashboard - -The worker also serves a visual dashboard so you do not have to run SQL by hand: - -- `GET /` (or `/dashboard`) - the HTML dashboard. Public page, no data until a - token is entered. -- `GET /v1/stats` - JSON aggregates (counts only, never raw event rows), gated - behind `DASHBOARD_TOKEN`. Accepts `Authorization: Bearer `, - `?token=`, or `X-Dashboard-Token`. -- `POST /v1/event` - unchanged event ingest. - The headline number is **Total users**: distinct, non-CI `telemetry_id`s that -ever installed jcode OR did meaningful work in it. The page shows every metric -the API returns, organized into tiers (hero / key cards / diagnostic tables) so -the important numbers stand out while nothing is hidden. Each user tier (reached -> total > core) is broader than the one below it, and CI / raw figures are shown -alongside for transparency. - -Set the token once (it is a Worker secret, not in source): - -```bash -wrangler secret put DASHBOARD_TOKEN -# then open https:/// and paste the token -``` - -If `DASHBOARD_TOKEN` is unset the stats endpoint stays locked (deny by default). -The CLI equivalent of the headline number: +ever installed jcode OR did meaningful work in it. Run it with: ```bash wrangler d1 execute jcode-telemetry --remote --file=users.sql diff --git a/telemetry-worker/src/dashboard.js b/telemetry-worker/src/dashboard.js deleted file mode 100644 index d5ec6c94e..000000000 --- a/telemetry-worker/src/dashboard.js +++ /dev/null @@ -1,494 +0,0 @@ -// jcode telemetry console — "Terminal Observatory" aesthetic. -// -// Design intent (frontend-design skill): jcode is a terminal coding agent, so -// the dashboard is built as a precision instrument readout, not generic SaaS. -// - Type: JetBrains Mono (display + data) paired with a quiet grotesk for prose. -// - Palette: near-black graphite, warm phosphor amber as the dominant signal, -// a single cyan accent for the live/headline series. No purple-on-white. -// - Composition: a station-clock hero number, hairline rules, dense tabular -// instrument panels, scanline texture, staggered load-in reveals. -// -// Self-contained (HTML/CSS/inline-SVG, fonts via Google Fonts ). Fetches -// /v1/stats with the dashboard token. Every metric the API returns is shown, -// grouped by importance (HEADLINE / SIGNAL / DIAGNOSTIC). - -export const DASHBOARD_HTML = ` - - - - -jcode · telemetry console - - - - - - -
- - - -
- - - -`; diff --git a/telemetry-worker/src/stats.js b/telemetry-worker/src/stats.js deleted file mode 100644 index a359dded4..000000000 --- a/telemetry-worker/src/stats.js +++ /dev/null @@ -1,389 +0,0 @@ -// Read-only telemetry aggregation for the dashboard. -// -// Everything here returns counts/aggregates only, never raw event rows. Metrics -// are organized into tiers (headline / secondary / diagnostic) and tagged with -// importance so the dashboard can present "the one number" prominently while -// still surfacing all available information. -// -// Accuracy rules (mirrors README "Accuracy notes"): -// - Users are distinct telemetry_id, never event counts. -// - "meaningful" = real work; see MEANINGFUL_SQL. -// - Headline numbers exclude CI traffic (is_ci = 1) and non-release channels. -// - Raw / less-filtered tiers are always reported alongside, never removed. - -// Meaningful-activity predicate, shared by every query so all windows agree. -// A row is meaningful if it is a session_end/session_crash that did real work, -// OR a turn_end (which only fires after a completed user turn) that did work. -const MEANINGFUL_SQL = `( - (event IN ('session_end','session_crash') AND ( - turns > 0 OR had_user_prompt > 0 OR had_assistant_response > 0 - OR assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 - OR duration_secs > 0 OR error_provider_timeout > 0 OR error_auth_failed > 0 - OR error_tool_error > 0 OR error_mcp_error > 0 OR error_rate_limited > 0 - OR provider_switches > 0 OR model_switches > 0 - )) - OR (event = 'turn_end' AND ( - assistant_responses > 0 OR tool_calls > 0 OR executed_tool_calls > 0 - OR file_write_calls > 0 OR tests_run > 0 OR turn_success > 0 - )) -)`; - -const LIFECYCLE_EVENTS = "('session_start','turn_end','session_end','session_crash')"; - -async function one(env, sql) { - const result = await env.DB.prepare(sql).all(); - return (result.results && result.results[0]) || {}; -} - -async function many(env, sql) { - const result = await env.DB.prepare(sql).all(); - return result.results || []; -} - -export async function getStats(env) { - // --- Headline: total users (the one number) ----------------------------- - // A user is a distinct non-CI id that ever installed OR did meaningful work. - const totals = await one(env, ` - SELECT - COUNT(DISTINCT CASE WHEN is_ci = 0 AND (event = 'install' OR ${MEANINGFUL_SQL}) THEN telemetry_id END) AS total_users, - COUNT(DISTINCT CASE WHEN is_ci = 0 AND ${MEANINGFUL_SQL} THEN telemetry_id END) AS core_users, - COUNT(DISTINCT CASE WHEN is_ci = 0 THEN telemetry_id END) AS reached_users, - COUNT(DISTINCT CASE WHEN is_ci = 0 AND event = 'install' THEN telemetry_id END) AS installed_users, - COUNT(DISTINCT telemetry_id) AS all_ids_including_ci, - COUNT(DISTINCT CASE WHEN is_ci = 1 THEN telemetry_id END) AS ci_ids - FROM events - `); - - // --- Active users from the rollup (cheap, ingest-time) ------------------- - // DAU/WAU/MAU as distinct ids, headline = meaningful + release + non-CI. - const active = await one(env, ` - SELECT - COUNT(DISTINCT CASE WHEN activity_date = date('now') THEN telemetry_id END) AS dau_raw, - COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_active > 0 THEN telemetry_id END) AS dau_meaningful, - COUNT(DISTINCT CASE WHEN activity_date = date('now') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS dau, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') THEN telemetry_id END) AS wau_raw, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_active > 0 THEN telemetry_id END) AS wau_meaningful, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-7 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS wau, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') THEN telemetry_id END) AS mau_raw, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_active > 0 THEN telemetry_id END) AS mau_meaningful, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS mau, - COUNT(DISTINCT CASE WHEN activity_date > date('now','-30 days') AND last_is_ci = 1 THEN telemetry_id END) AS ci_mau - FROM daily_active_users - `); - - // --- Installs and lifecycle totals -------------------------------------- - const lifecycle = await one(env, ` - SELECT - SUM(CASE WHEN event = 'install' THEN 1 ELSE 0 END) AS install_events, - SUM(CASE WHEN event = 'upgrade' THEN 1 ELSE 0 END) AS upgrade_events, - SUM(CASE WHEN event = 'session_start' THEN 1 ELSE 0 END) AS session_starts, - SUM(CASE WHEN event = 'session_end' THEN 1 ELSE 0 END) AS session_ends, - SUM(CASE WHEN event = 'session_crash' THEN 1 ELSE 0 END) AS session_crashes, - SUM(CASE WHEN event = 'turn_end' THEN 1 ELSE 0 END) AS turn_ends, - COUNT(DISTINCT CASE WHEN event = 'install' THEN telemetry_id END) AS install_ids, - COUNT(DISTINCT CASE WHEN event = 'install' AND is_ci = 0 THEN telemetry_id END) AS install_ids_noci - FROM events - WHERE event IN ('install','upgrade','session_start','turn_end','session_end','session_crash') - `); - const lifecycleCompletion = - (lifecycle.session_starts || 0) > 0 - ? Number(((lifecycle.session_ends + lifecycle.session_crashes) / lifecycle.session_starts).toFixed(3)) - : null; - const crashRate = - (lifecycle.session_ends + lifecycle.session_crashes) > 0 - ? Number((lifecycle.session_crashes / (lifecycle.session_ends + lifecycle.session_crashes)).toFixed(4)) - : null; - - // --- New vs returning (last 30d), retention ----------------------------- - const retention = await one(env, ` - WITH cohort AS ( - SELECT DISTINCT telemetry_id FROM events - WHERE event = 'install' AND is_ci = 0 - AND created_at >= datetime('now','-14 days') AND created_at < datetime('now','-7 days') - ), retained AS ( - SELECT DISTINCT telemetry_id FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at >= datetime('now','-7 days') - ) - SELECT - (SELECT COUNT(*) FROM cohort) AS d7_cohort, - (SELECT COUNT(*) FROM cohort WHERE telemetry_id IN retained) AS d7_retained - `); - const d7Retention = - (retention.d7_cohort || 0) > 0 - ? Number((retention.d7_retained / retention.d7_cohort).toFixed(3)) - : null; - - // --- 30d engagement quality --------------------------------------------- - const quality = await one(env, ` - SELECT - AVG(duration_mins) AS avg_session_mins, - AVG(turns) AS avg_turns, - AVG(CASE WHEN session_success > 0 THEN 1.0 ELSE 0.0 END) AS success_rate, - AVG(CASE WHEN abandoned_before_response > 0 THEN 1.0 ELSE 0.0 END) AS abandon_rate, - AVG(first_assistant_response_ms) AS avg_first_response_ms, - AVG(first_tool_success_ms) AS avg_first_tool_success_ms, - AVG(CASE WHEN executed_tool_calls > 0 THEN CAST(tool_latency_total_ms AS REAL)/executed_tool_calls END) AS avg_tool_latency_ms, - SUM(input_tokens + output_tokens) AS tokens_30d, - AVG(CASE WHEN multi_sessioned > 0 THEN 1.0 ELSE 0.0 END) AS multi_session_rate - FROM events - WHERE event IN ('session_end','session_crash') - AND is_ci = 0 AND created_at > datetime('now','-30 days') - `); - - // --- Token usage (all-time + 30d, full breakdown incl. cache) ----------- - const tokens = await one(env, ` - SELECT - SUM(input_tokens) AS input_all, - SUM(output_tokens) AS output_all, - SUM(cache_read_input_tokens) AS cache_read_all, - SUM(cache_creation_input_tokens) AS cache_creation_all, - SUM(total_tokens) AS total_all, - SUM(CASE WHEN created_at > datetime('now','-30 days') THEN input_tokens ELSE 0 END) AS input_30d, - SUM(CASE WHEN created_at > datetime('now','-30 days') THEN output_tokens ELSE 0 END) AS output_30d, - SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_read_input_tokens ELSE 0 END) AS cache_read_30d, - SUM(CASE WHEN created_at > datetime('now','-30 days') THEN cache_creation_input_tokens ELSE 0 END) AS cache_creation_30d, - SUM(CASE WHEN created_at > datetime('now','-30 days') THEN total_tokens ELSE 0 END) AS total_30d - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - `); - - // --- Agent autonomy (30d): spawning, background/subagent/swarm, time split - const agent = await one(env, ` - SELECT - SUM(spawned_agent_count) AS spawned_agents, - SUM(background_task_count) AS background_tasks, - SUM(background_task_completed_count) AS background_completed, - SUM(subagent_task_count) AS subagent_tasks, - SUM(subagent_success_count) AS subagent_success, - SUM(swarm_task_count) AS swarm_tasks, - SUM(swarm_success_count) AS swarm_success, - SUM(user_cancelled_count) AS user_cancelled, - SUM(agent_active_ms_total) AS agent_active_ms, - SUM(agent_model_ms_total) AS agent_model_ms, - SUM(agent_tool_ms_total) AS agent_tool_ms, - SUM(agent_blocked_ms_total) AS agent_blocked_ms, - SUM(session_idle_ms_total) AS session_idle_ms, - AVG(time_to_first_agent_action_ms) AS avg_time_to_first_action_ms, - AVG(time_to_first_useful_action_ms) AS avg_time_to_first_useful_ms, - AVG(CASE WHEN max_concurrent_sessions > 0 THEN max_concurrent_sessions END) AS avg_max_concurrent - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at > datetime('now','-30 days') - `); - - // --- Per-turn metrics (30d) --------------------------------------------- - const turns = await one(env, ` - SELECT - AVG(turn_active_duration_ms) AS avg_turn_ms, - AVG(CASE WHEN turn_success > 0 THEN 1.0 ELSE 0.0 END) AS turn_success_rate - FROM events - WHERE event = 'turn_end' AND is_ci = 0 AND created_at > datetime('now','-30 days') - `); - - // --- Errors (30d) -------------------------------------------------------- - const errors = await one(env, ` - SELECT - SUM(error_provider_timeout) AS provider_timeout, - SUM(error_auth_failed) AS auth_failed, - SUM(error_tool_error) AS tool_error, - SUM(error_mcp_error) AS mcp_error, - SUM(error_rate_limited) AS rate_limited - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at > datetime('now','-30 days') - `); - - // --- Feature adoption (30d, distinct users) ----------------------------- - const features = await one(env, ` - SELECT - COUNT(DISTINCT CASE WHEN feature_memory_used > 0 THEN telemetry_id END) AS memory, - COUNT(DISTINCT CASE WHEN feature_swarm_used > 0 THEN telemetry_id END) AS swarm, - COUNT(DISTINCT CASE WHEN feature_web_used > 0 THEN telemetry_id END) AS web, - COUNT(DISTINCT CASE WHEN feature_email_used > 0 THEN telemetry_id END) AS email, - COUNT(DISTINCT CASE WHEN feature_mcp_used > 0 THEN telemetry_id END) AS mcp, - COUNT(DISTINCT CASE WHEN feature_side_panel_used > 0 THEN telemetry_id END) AS side_panel, - COUNT(DISTINCT CASE WHEN feature_goal_used > 0 THEN telemetry_id END) AS goal, - COUNT(DISTINCT CASE WHEN feature_selfdev_used > 0 THEN telemetry_id END) AS selfdev, - COUNT(DISTINCT CASE WHEN feature_background_used > 0 THEN telemetry_id END) AS background, - COUNT(DISTINCT CASE WHEN feature_subagent_used > 0 THEN telemetry_id END) AS subagent - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at > datetime('now','-30 days') - `); - - // --- Transport mix (30d) ------------------------------------------------- - const transport = await one(env, ` - SELECT - SUM(transport_https) AS https, - SUM(transport_persistent_ws_fresh) AS ws_fresh, - SUM(transport_persistent_ws_reuse) AS ws_reuse, - SUM(transport_cli_subprocess) AS cli, - SUM(transport_native_http2) AS native_http2, - SUM(transport_other) AS other - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at > datetime('now','-30 days') - `); - - // --- Breakdowns (distinct users) ---------------------------------------- - const versions = await many(env, ` - SELECT version, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE is_ci = 0 AND version IS NOT NULL - GROUP BY version ORDER BY users DESC LIMIT 12 - `); - const os = await many(env, ` - SELECT os, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE is_ci = 0 AND os IS NOT NULL - GROUP BY os ORDER BY users DESC - `); - const arch = await many(env, ` - SELECT (COALESCE(os,'?') || ' / ' || COALESCE(arch,'?')) AS platform, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE is_ci = 0 AND os IS NOT NULL - GROUP BY os, arch ORDER BY users DESC LIMIT 12 - `); - const channels = await many(env, ` - SELECT COALESCE(build_channel,'unknown') AS build_channel, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE event IN ('session_end','session_crash') - GROUP BY build_channel ORDER BY users DESC - `); - const providers = await many(env, ` - SELECT COALESCE(provider_end,'unknown') AS provider, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE event IN ('session_end','session_crash') AND is_ci = 0 AND ${MEANINGFUL_SQL} - GROUP BY provider_end ORDER BY users DESC LIMIT 12 - `); - const auth = await many(env, ` - SELECT COALESCE(auth_provider,'unknown') AS auth_provider, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE event = 'auth_success' AND is_ci = 0 - GROUP BY auth_provider ORDER BY users DESC LIMIT 12 - `); - const onboarding = await many(env, ` - SELECT step, COUNT(DISTINCT telemetry_id) AS users - FROM events WHERE event = 'onboarding_step' AND is_ci = 0 AND step IS NOT NULL - GROUP BY step ORDER BY users DESC - `); - - // --- Usage timing: session starts by UTC hour --------------------------- - const hours = await many(env, ` - SELECT session_start_hour_utc AS hour, COUNT(*) AS sessions - FROM events - WHERE event = 'session_start' AND is_ci = 0 AND session_start_hour_utc IS NOT NULL - GROUP BY session_start_hour_utc ORDER BY session_start_hour_utc - `); - - // --- Data health: identity reconciliation + duplicate/skew signals ------ - // These are *not* product metrics; they tell you whether the pipeline is - // healthy (events arriving, ids matching installs, no single id dominating). - const health = await one(env, ` - WITH lifecycle AS ( - SELECT telemetry_id FROM events WHERE event IN ('session_end','session_crash') - ), install_ids AS ( - SELECT DISTINCT telemetry_id FROM events WHERE event = 'install' - ) - SELECT - (SELECT COUNT(DISTINCT telemetry_id) FROM lifecycle) AS lifecycle_ids, - (SELECT COUNT(DISTINCT telemetry_id) FROM events WHERE event = 'session_start') AS session_start_ids, - (SELECT COUNT(DISTINCT l.telemetry_id) FROM lifecycle l - LEFT JOIN install_ids i ON i.telemetry_id = l.telemetry_id - WHERE i.telemetry_id IS NULL) AS lifecycle_ids_without_install - `); - const skew = await one(env, ` - SELECT - MAX(c) AS max_session_events_one_id, - SUM(c) AS total_session_events, - (SELECT SUM(c2) FROM (SELECT c AS c2 FROM ( - SELECT telemetry_id, COUNT(*) AS c FROM events - WHERE event IN ('session_end','session_crash') - GROUP BY telemetry_id ORDER BY c DESC LIMIT 5))) AS top5_session_events - FROM (SELECT telemetry_id, COUNT(*) AS c FROM events - WHERE event IN ('session_end','session_crash') GROUP BY telemetry_id) - `); - const meaningfulSessions = await one(env, ` - SELECT COUNT(*) AS meaningful_sessions - FROM events - WHERE event IN ('session_end','session_crash') AND is_ci = 0 - AND created_at > datetime('now','-30 days') AND ${MEANINGFUL_SQL} - `); - - // --- User leaderboard: most active anonymous ids ------------------------ - // Ranks by lifecycle (session_end + session_crash) volume. telemetry_id is - // anonymous, so we surface a short prefix only. Useful for spotting power - // users and dev/test skew. Includes whether the id is CI and its channel. - const leaderboard = await many(env, ` - SELECT - substr(telemetry_id, 1, 8) AS id_prefix, - COUNT(*) AS sessions, - SUM(turns) AS turns, - SUM(input_tokens + output_tokens) AS tokens, - SUM(tool_calls) AS tool_calls, - MAX(is_ci) AS is_ci, - MAX(build_channel) AS build_channel, - MAX(version) AS version, - MAX(created_at) AS last_seen - FROM events - WHERE event IN ('session_end','session_crash') - GROUP BY telemetry_id - ORDER BY sessions DESC - LIMIT 20 - `); - - // --- Daily timeseries (last 60 days) for charts ------------------------- - const daily = await many(env, ` - SELECT - activity_date AS date, - COUNT(DISTINCT telemetry_id) AS raw, - COUNT(DISTINCT CASE WHEN meaningful_active > 0 THEN telemetry_id END) AS meaningful, - COUNT(DISTINCT CASE WHEN meaningful_release_active > 0 AND last_is_ci = 0 THEN telemetry_id END) AS headline, - COUNT(DISTINCT CASE WHEN last_is_ci = 1 THEN telemetry_id END) AS ci - FROM daily_active_users - WHERE activity_date > date('now','-60 days') - GROUP BY activity_date ORDER BY activity_date - `); - const dailyInstalls = await many(env, ` - SELECT date(created_at) AS date, COUNT(DISTINCT telemetry_id) AS installs - FROM events - WHERE event = 'install' AND is_ci = 0 AND created_at > datetime('now','-60 days') - GROUP BY date(created_at) ORDER BY date(created_at) - `); - - // --- Recent feedback (text only, no identifiers) ------------------------ - const feedback = await many(env, ` - SELECT created_at, feedback_text, feedback_rating, feedback_reason, version - FROM events - WHERE event = 'feedback' AND feedback_text IS NOT NULL - ORDER BY created_at DESC LIMIT 25 - `); - - return { - generated_at: new Date().toISOString(), - headline: { - total_users: totals.total_users || 0, - dau: active.dau || 0, - wau: active.wau || 0, - mau: active.mau || 0, - }, - users: { - total_users: totals.total_users || 0, - core_users: totals.core_users || 0, - installed_users: totals.installed_users || 0, - reached_users: totals.reached_users || 0, - all_ids_including_ci: totals.all_ids_including_ci || 0, - ci_ids: totals.ci_ids || 0, - }, - active, - lifecycle: { ...lifecycle, lifecycle_completion_ratio: lifecycleCompletion, crash_rate: crashRate }, - retention: { ...retention, d7_retention: d7Retention }, - quality: { ...quality, meaningful_sessions_30d: meaningfulSessions.meaningful_sessions || 0 }, - tokens, - agent, - turns, - errors, - features, - transport, - breakdowns: { versions, os, arch, channels, providers, auth, onboarding, hours }, - leaderboard, - health: { ...health, ...skew }, - timeseries: { daily, installs: dailyInstalls }, - feedback, - }; -} diff --git a/telemetry-worker/src/worker.js b/telemetry-worker/src/worker.js index b14ae2dd6..ecd45ae7e 100644 --- a/telemetry-worker/src/worker.js +++ b/telemetry-worker/src/worker.js @@ -1,6 +1,3 @@ -import { getStats } from "./stats.js"; -import { DASHBOARD_HTML } from "./dashboard.js"; - let cachedEventColumns = null; let cachedSessionDetailColumns = null; let cachedTurnDetailColumns = null; @@ -13,33 +10,11 @@ export default { }); } - const url = new URL(request.url); - - // Read-only dashboard surface (GET). The HTML page is public; the JSON stats - // endpoint is gated behind DASHBOARD_TOKEN so raw aggregates are not exposed - // to anyone who finds the URL. Raw events are never returned, only counts. - if (request.method === "GET") { - if (url.pathname === "/" || url.pathname === "/dashboard") { - return htmlResponse(DASHBOARD_HTML); - } - if (url.pathname === "/v1/stats") { - if (!isAuthorized(request, env)) { - return jsonResponse({ error: "Unauthorized" }, 401); - } - try { - const stats = await getStats(env); - return jsonResponse(stats); - } catch (err) { - return jsonResponse({ error: "Internal error", detail: String(err?.message || err) }, 500); - } - } - return jsonResponse({ error: "Not found" }, 404); - } - if (request.method !== "POST") { return jsonResponse({ error: "Method not allowed" }, 405); } + const url = new URL(request.url); if (url.pathname !== "/v1/event") { return jsonResponse({ error: "Not found" }, 404); } @@ -79,21 +54,6 @@ export default { }, }; -// When DASHBOARD_TOKEN is unset the stats endpoint stays locked (deny by -// default) rather than leaking aggregates. Accepts either a Bearer header or a -// ?token= query param so it works from curl and the browser fetch alike. -function isAuthorized(request, env) { - const expected = env.DASHBOARD_TOKEN; - if (!expected) { - return false; - } - const url = new URL(request.url); - const header = request.headers.get("authorization") || ""; - const bearer = header.startsWith("Bearer ") ? header.slice(7) : null; - const provided = bearer || url.searchParams.get("token") || request.headers.get("x-dashboard-token"); - return provided != null && provided === expected; -} - async function insertEvent(env, body) { const columns = await getEventColumns(env); const sessionDetailColumns = await getSessionDetailColumns(env); @@ -633,21 +593,10 @@ function jsonResponse(data, status = 200) { }); } -function htmlResponse(html, status = 200) { - return new Response(html, { - status, - headers: { - "Content-Type": "text/html; charset=utf-8", - "Cache-Control": "no-store", - ...corsHeaders(), - }, - }); -} - function corsHeaders() { return { "Access-Control-Allow-Origin": "*", - "Access-Control-Allow-Methods": "GET, POST, OPTIONS", - "Access-Control-Allow-Headers": "Content-Type, Authorization, X-Dashboard-Token", + "Access-Control-Allow-Methods": "POST, OPTIONS", + "Access-Control-Allow-Headers": "Content-Type", }; } From c449f76e7556411fd6b9fe3117f51c3b39be638f Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:01:04 -0700 Subject: [PATCH 19/57] desktop: parallelize hero reveal texture build to cut welcome stutter The runtime welcome-hero mask ("Hello there") is built once on the first single-session frame, but build_hero_reveal_texture runs a per-lit-pixel nearest-stroke search (O(pixels x segments)) on the UI thread, costing ~600ms and stalling the start of the reveal animation. Split the per-pixel fill across worker threads via std::thread::scope (rows are independent and read-only over glyph_rgba/segments), reducing the one-time build cost. Output is bit-identical to the serial path; small images fall back to serial to avoid spawn overhead. Added parity and worker-count tests. --- crates/jcode-desktop/src/main.rs | 126 ++++++++++++++++++++----- crates/jcode-desktop/src/main_tests.rs | 99 +++++++++++++++++++ 2 files changed, 203 insertions(+), 22 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index d2979b41f..6afaed163 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -11907,30 +11907,15 @@ fn build_hero_reveal_texture( } let mut values = vec![1.0_f32; (width * height) as usize]; - let mut min_value = f32::INFINITY; - let mut max_value = 0.0_f32; let brush_delay_px = (alpha_bounds.height() * 0.10).max(5.0); - for y in 0..height { - for x in 0..width { - let pixel_index = (y * width + x) as usize; - let alpha = glyph_rgba[pixel_index * 4]; - if alpha <= 2 { - continue; - } - let (path_progress, distance) = nearest_hero_stroke_progress( - x as f32 + 0.5, - y as f32 + 0.5, - alpha_bounds, - &segments, - ); - let width_delay = (distance / brush_delay_px).min(1.0) * 0.045; - let value = (path_progress + width_delay).clamp(0.0, 1.0); - values[pixel_index] = value; - min_value = min_value.min(value); - max_value = max_value.max(value); - } - } + // This per-pixel nearest-stroke search dominates the one-time hero mask + // build (hundreds of ms on the UI thread). Each lit pixel is independent + // and only reads `glyph_rgba`/`segments`, so split the rows across worker + // threads. Output is bit-identical to the serial version; min/max are + // reduced afterward from the filled buffer. + let (min_value, max_value) = + fill_hero_reveal_values(&mut values, width, height, glyph_rgba, alpha_bounds, &segments, brush_delay_px); if !min_value.is_finite() || max_value <= min_value { return None; @@ -11958,6 +11943,103 @@ fn build_hero_reveal_texture( Some(reveal_rgba) } +/// Fill `values` with each lit pixel's reveal progress and return the +/// `(min, max)` of the written values. +/// +/// The work is split into horizontal row bands processed on separate threads +/// when the image is large enough to amortize the spawn cost. Pixels are +/// independent, so the result is identical to a serial fill. +fn fill_hero_reveal_values( + values: &mut [f32], + width: u32, + height: u32, + glyph_rgba: &[u8], + alpha_bounds: HeroMaskPixelBounds, + segments: &[WelcomeHeroStrokeSegment], + brush_delay_px: f32, +) -> (f32, f32) { + let row_stride = width as usize; + let compute_row = |row_index: u32, row_values: &mut [f32]| -> (f32, f32) { + let mut min_value = f32::INFINITY; + let mut max_value = 0.0_f32; + let row_offset = row_index as usize * row_stride; + for x in 0..width { + let pixel_index = row_offset + x as usize; + let alpha = glyph_rgba[pixel_index * 4]; + if alpha <= 2 { + continue; + } + let (path_progress, distance) = nearest_hero_stroke_progress( + x as f32 + 0.5, + row_index as f32 + 0.5, + alpha_bounds, + segments, + ); + let width_delay = (distance / brush_delay_px).min(1.0) * 0.045; + let value = (path_progress + width_delay).clamp(0.0, 1.0); + row_values[x as usize] = value; + min_value = min_value.min(value); + max_value = max_value.max(value); + } + (min_value, max_value) + }; + + let total_pixels = row_stride.saturating_mul(height as usize); + let worker_count = hero_reveal_worker_count(total_pixels); + if worker_count <= 1 || height < 2 { + let mut min_value = f32::INFINITY; + let mut max_value = 0.0_f32; + for (row_index, row_values) in values.chunks_mut(row_stride).enumerate() { + let (row_min, row_max) = compute_row(row_index as u32, row_values); + min_value = min_value.min(row_min); + max_value = max_value.max(row_max); + } + return (min_value, max_value); + } + + let rows_per_band = (height as usize).div_ceil(worker_count).max(1); + let mut min_value = f32::INFINITY; + let mut max_value = 0.0_f32; + std::thread::scope(|scope| { + let mut handles = Vec::new(); + for (band_index, band) in values.chunks_mut(rows_per_band * row_stride).enumerate() { + let first_row = (band_index * rows_per_band) as u32; + let compute_row = &compute_row; + handles.push(scope.spawn(move || { + let mut band_min = f32::INFINITY; + let mut band_max = 0.0_f32; + for (offset, row_values) in band.chunks_mut(row_stride).enumerate() { + let (row_min, row_max) = compute_row(first_row + offset as u32, row_values); + band_min = band_min.min(row_min); + band_max = band_max.max(row_max); + } + (band_min, band_max) + })); + } + for handle in handles { + if let Ok((band_min, band_max)) = handle.join() { + min_value = min_value.min(band_min); + max_value = max_value.max(band_max); + } + } + }); + (min_value, max_value) +} + +/// Number of worker threads to use for the hero reveal fill. Returns 1 for +/// small images where threading overhead would dominate. +fn hero_reveal_worker_count(total_pixels: usize) -> usize { + const MIN_PIXELS_PER_WORKER: usize = 32 * 1024; + if total_pixels < MIN_PIXELS_PER_WORKER * 2 { + return 1; + } + let available = std::thread::available_parallelism() + .map(|value| value.get()) + .unwrap_or(1); + let by_work = total_pixels / MIN_PIXELS_PER_WORKER; + available.min(by_work).max(1) +} + fn nearest_hero_stroke_progress( x: f32, y: f32, diff --git a/crates/jcode-desktop/src/main_tests.rs b/crates/jcode-desktop/src/main_tests.rs index 6fa95c06b..3bb960e95 100644 --- a/crates/jcode-desktop/src/main_tests.rs +++ b/crates/jcode-desktop/src/main_tests.rs @@ -719,6 +719,105 @@ fn next_animation_redraw_paces_active_animations_and_settles_when_idle() { assert!(DESKTOP_ANIMATION_FRAME_INTERVAL > Duration::ZERO); } +#[test] +fn hero_reveal_worker_count_falls_back_to_serial_for_small_images() { + // Tiny images should not pay thread-spawn overhead. + assert_eq!(hero_reveal_worker_count(0), 1); + assert_eq!(hero_reveal_worker_count(1024), 1); + // Large images should use more than one worker when parallelism is available. + let big = hero_reveal_worker_count(8 * 1024 * 1024); + let available = std::thread::available_parallelism() + .map(|value| value.get()) + .unwrap_or(1); + assert!(big >= 1); + assert!(big <= available.max(1)); +} + +#[test] +fn fill_hero_reveal_values_matches_serial_reference() { + let width = 64_u32; + let height = 48_u32; + let alpha_bounds = HeroMaskPixelBounds { + min_x: 4, + min_y: 4, + max_x: width - 4, + max_y: height - 4, + }; + // A handful of normalized stroke segments tracing a rough path. + let segments = vec![ + WelcomeHeroStrokeSegment { + start: [0.1, 0.2], + end: [0.4, 0.5], + start_progress: 0.0, + end_progress: 0.4, + }, + WelcomeHeroStrokeSegment { + start: [0.4, 0.5], + end: [0.8, 0.3], + start_progress: 0.4, + end_progress: 0.8, + }, + WelcomeHeroStrokeSegment { + start: [0.8, 0.3], + end: [0.9, 0.9], + start_progress: 0.8, + end_progress: 1.0, + }, + ]; + // Mark a checkerboard of lit pixels so both branches exercise lit/unlit. + let mut glyph_rgba = vec![0_u8; (width * height * 4) as usize]; + for y in 0..height { + for x in 0..width { + if (x + y) % 3 == 0 { + let index = ((y * width + x) * 4) as usize; + glyph_rgba[index] = 200; + } + } + } + let brush_delay_px = (alpha_bounds.height() * 0.10).max(5.0); + + // Serial reference computed directly here. + let mut expected = vec![1.0_f32; (width * height) as usize]; + let mut expected_min = f32::INFINITY; + let mut expected_max = 0.0_f32; + for y in 0..height { + for x in 0..width { + let pixel_index = (y * width + x) as usize; + if glyph_rgba[pixel_index * 4] <= 2 { + continue; + } + let (path_progress, distance) = nearest_hero_stroke_progress( + x as f32 + 0.5, + y as f32 + 0.5, + alpha_bounds, + &segments, + ); + let width_delay = (distance / brush_delay_px).min(1.0) * 0.045; + let value = (path_progress + width_delay).clamp(0.0, 1.0); + expected[pixel_index] = value; + expected_min = expected_min.min(value); + expected_max = expected_max.max(value); + } + } + + // The parallel implementation must produce bit-identical output regardless + // of how many worker threads it chose. + let mut actual = vec![1.0_f32; (width * height) as usize]; + let (actual_min, actual_max) = fill_hero_reveal_values( + &mut actual, + width, + height, + &glyph_rgba, + alpha_bounds, + &segments, + brush_delay_px, + ); + + assert_eq!(actual, expected, "parallel hero reveal fill must match serial"); + assert_eq!(actual_min.to_bits(), expected_min.to_bits()); + assert_eq!(actual_max.to_bits(), expected_max.to_bits()); +} + #[test] fn desktop_async_job_slots_are_bounded_and_released() -> Result<()> { let counter = std::sync::atomic::AtomicUsize::new(0); From 6c855cc978ecaa86055ca4a334f075de8aa95b3a Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:04:22 -0700 Subject: [PATCH 20/57] fix(provider): show active OpenAI-compatible profile name in header (#329) The header and info widget hard-coded 'OpenRouter' for any model routed through the OpenRouter slot, even when the user switched to a direct OpenAI-compatible profile such as NVIDIA NIM at runtime. The display name was resolved from process env vars that only reflect the startup profile, so a runtime '/model' switch never updated the label. Add a runtime-aware Provider::display_name() (default = name()) overridden by OpenRouterProvider (maps profile_id -> 'NVIDIA NIM', etc.) and MultiProvider (delegates to the active execution runtime). name() stays the stable machine id ('openrouter') that billing/routing keys off. format_model_name() in the header now uses the active provider's display name instead of a fixed 'OpenRouter:' prefix. Adds regression tests. --- crates/jcode-app-core/src/agent/status.rs | 4 +- crates/jcode-base/src/provider/mod.rs | 13 ++++ crates/jcode-base/src/provider/openrouter.rs | 39 +++++++++++ .../src/provider/openrouter_provider_impl.rs | 4 ++ .../src/provider/openrouter_tests.rs | 68 +++++++++++++++++++ crates/jcode-provider-core/src/lib.rs | 19 +++++- crates/jcode-tui/src/tui/app/tui_state.rs | 10 +-- crates/jcode-tui/src/tui/ui_header.rs | 41 ++++++++++- 8 files changed, 188 insertions(+), 10 deletions(-) diff --git a/crates/jcode-app-core/src/agent/status.rs b/crates/jcode-app-core/src/agent/status.rs index 747179f30..e011b5b01 100644 --- a/crates/jcode-app-core/src/agent/status.rs +++ b/crates/jcode-app-core/src/agent/status.rs @@ -134,7 +134,9 @@ impl Agent { } pub fn provider_name(&self) -> String { - crate::provider_catalog::runtime_provider_display_name(self.provider.name()) + // `display_name()` resolves the active runtime profile (e.g. NVIDIA NIM) + // for the OpenRouter slot; for all other providers it equals `name()`. + self.provider.display_name() } pub fn provider_model(&self) -> String { diff --git a/crates/jcode-base/src/provider/mod.rs b/crates/jcode-base/src/provider/mod.rs index b6876718a..633575798 100644 --- a/crates/jcode-base/src/provider/mod.rs +++ b/crates/jcode-base/src/provider/mod.rs @@ -1040,6 +1040,19 @@ impl Provider for MultiProvider { } } + fn display_name(&self) -> String { + // The OpenRouter slot multiplexes the public aggregator and every + // direct OpenAI-compatible profile (NVIDIA NIM, DeepSeek, ...). Ask the + // active execution runtime for its own label so the UI reflects the + // profile selected at runtime rather than the fixed "OpenRouter" name. + if matches!(self.active_provider(), ActiveProvider::OpenRouter) + && let Some(execution) = self.active_openrouter_execution_provider() + { + return execution.runtime_display_name(); + } + self.name().to_string() + } + fn model(&self) -> String { match self.active_provider() { ActiveProvider::Claude => { diff --git a/crates/jcode-base/src/provider/openrouter.rs b/crates/jcode-base/src/provider/openrouter.rs index a4745e1b5..6caf84578 100644 --- a/crates/jcode-base/src/provider/openrouter.rs +++ b/crates/jcode-base/src/provider/openrouter.rs @@ -1046,6 +1046,45 @@ impl OpenRouterProvider { self.supports_provider_features } + /// Human-facing label for the runtime backing this provider instance. + /// + /// Unlike the env-var based [`crate::provider_catalog::runtime_provider_display_name`], + /// this reads the instance's own `profile_id`/`api_base`, so it stays correct + /// after a runtime `/model` switch to a different OpenAI-compatible profile + /// (e.g. NVIDIA NIM) even though `name()` is fixed at `"openrouter"`. + pub(crate) fn runtime_display_name(&self) -> String { + // Direct OpenAI-compatible profile (NVIDIA NIM, DeepSeek, Z.AI, ...). + if let Some(profile_id) = self.profile_id.as_deref() { + if let Some(profile) = openai_compatible_profile_by_id(profile_id) { + return profile.display_name.to_string(); + } + return profile_id.to_string(); + } + + // Non-aggregator endpoint without a known profile id: classify by base + // URL so custom OpenAI-compatible endpoints don't masquerade as the + // public OpenRouter aggregator. + if !self.supports_provider_features { + if let Some(profile_id) = + crate::provider_catalog::openai_compatible_profile_id_for_api_base(&self.api_base) + && let Some(profile) = openai_compatible_profile_by_id(profile_id) + { + return profile.display_name.to_string(); + } + if std::env::var("JCODE_RUNTIME_PROVIDER") + .ok() + .is_some_and(|value| value.trim().eq_ignore_ascii_case("azure-openai")) + { + return "Azure OpenAI".to_string(); + } + if !self.api_base.contains("openrouter.ai") { + return "OpenAI-compatible".to_string(); + } + } + + "OpenRouter".to_string() + } + pub(crate) fn direct_openai_compatible_route_parts(&self) -> Option<(String, String, String)> { if self.supports_provider_features { return None; diff --git a/crates/jcode-base/src/provider/openrouter_provider_impl.rs b/crates/jcode-base/src/provider/openrouter_provider_impl.rs index cf4c93a75..f3edd04bb 100644 --- a/crates/jcode-base/src/provider/openrouter_provider_impl.rs +++ b/crates/jcode-base/src/provider/openrouter_provider_impl.rs @@ -743,6 +743,10 @@ impl Provider for OpenRouterProvider { "openrouter" } + fn display_name(&self) -> String { + self.runtime_display_name() + } + fn model(&self) -> String { self.model .try_read() diff --git a/crates/jcode-base/src/provider/openrouter_tests.rs b/crates/jcode-base/src/provider/openrouter_tests.rs index 28a0a3ecc..0628a1d84 100644 --- a/crates/jcode-base/src/provider/openrouter_tests.rs +++ b/crates/jcode-base/src/provider/openrouter_tests.rs @@ -2193,3 +2193,71 @@ fn strict_openai_schema_endpoint_allows_other_providers() { "https://api.openai.com/v1" )); } + +#[test] +fn runtime_display_name_tracks_active_openai_compatible_profile() { + // Regression for issue #329: switching to a direct OpenAI-compatible + // profile (NVIDIA NIM) at runtime must surface that profile's display + // name, not the fixed "OpenRouter" aggregator label. The machine-facing + // `name()` stays "openrouter" because billing/routing logic keys off it. + let _lock = ENV_LOCK.lock(); + let temp = TempDir::new().expect("create temp home"); + let jcode_home = temp.path().join("jcode-home"); + let _jcode_home = EnvVarGuard::set("JCODE_HOME", &jcode_home); + let _home = EnvVarGuard::set("HOME", temp.path()); + let _appdata = EnvVarGuard::set("APPDATA", temp.path().join("AppData").join("Roaming")); + let _env = isolate_openrouter_autodetect_env(); + + // Configure both the OpenRouter aggregator and NVIDIA NIM credentials so + // the slot can host either runtime. Set after the isolate guard, which + // clears every profile api-key env var. + let _or_key = EnvVarGuard::set("OPENROUTER_API_KEY", "or-test-key"); + let _nim_key = EnvVarGuard::set("NVIDIA_API_KEY", "nim-test-key"); + crate::config::invalidate_config_cache(); + + let provider = + crate::provider::MultiProvider::new_with_auth_status(crate::auth::AuthStatus::default()); + + // Switch to a NVIDIA NIM model via the profile-prefixed model request. + provider + .set_model("nvidia-nim:nvidia/llama-3.1-nemotron-ultra-253b-v1") + .expect("switch to nvidia-nim profile"); + + assert_eq!( + Provider::name(&provider), + "OpenRouter", + "machine-facing name must stay stable for billing/routing" + ); + assert_eq!( + Provider::display_name(&provider), + "NVIDIA NIM", + "header/UI display name must reflect the active runtime profile" + ); + + // Switching back to the plain OpenRouter aggregator restores the label. + provider + .set_model("anthropic/claude-sonnet-4") + .expect("switch back to openrouter aggregator"); + assert_eq!(Provider::display_name(&provider), "OpenRouter"); +} + +#[test] +fn runtime_display_name_for_profile_runtime_instance() { + // Direct unit coverage of the per-instance resolver used by + // `Provider::display_name`. + let _lock = ENV_LOCK.lock(); + let temp = TempDir::new().expect("create temp home"); + let jcode_home = temp.path().join("jcode-home"); + let _jcode_home = EnvVarGuard::set("JCODE_HOME", &jcode_home); + let _home = EnvVarGuard::set("HOME", temp.path()); + let _appdata = EnvVarGuard::set("APPDATA", temp.path().join("AppData").join("Roaming")); + let _env = isolate_openrouter_autodetect_env(); + let _key = EnvVarGuard::set("NVIDIA_API_KEY", "nim-test-key"); + + let nim = OpenRouterProvider::new_openai_compatible_profile_runtime( + crate::provider_catalog::NVIDIA_NIM_PROFILE, + ) + .expect("build nvidia-nim runtime"); + assert_eq!(nim.runtime_display_name(), "NVIDIA NIM"); + assert_eq!(Provider::name(&nim), "openrouter"); +} diff --git a/crates/jcode-provider-core/src/lib.rs b/crates/jcode-provider-core/src/lib.rs index 93fe676e7..da5bb929d 100644 --- a/crates/jcode-provider-core/src/lib.rs +++ b/crates/jcode-provider-core/src/lib.rs @@ -74,8 +74,25 @@ pub trait Provider: Send + Sync { } /// Get the provider name. + /// + /// This is the stable, machine-facing identifier (e.g. `"openrouter"`, + /// `"claude"`). Several surfaces key billing and routing decisions off this + /// value, so it must stay constant for a given provider class even when the + /// underlying runtime is a specific OpenAI-compatible profile. Use + /// [`Provider::display_name`] for anything shown to the user. fn name(&self) -> &str; + /// Human-facing provider label for the *current runtime selection*. + /// + /// Defaults to [`Provider::name`]. Provider orchestrators that multiplex + /// several backends behind one `name()` (notably the OpenRouter slot, which + /// also serves direct OpenAI-compatible profiles such as NVIDIA NIM or + /// DeepSeek) override this so the UI reflects the profile the user actually + /// selected at runtime instead of a fixed aggregator label. + fn display_name(&self) -> String { + self.name().to_string() + } + /// Get the model identifier being used. fn model(&self) -> String { "unknown".to_string() @@ -823,7 +840,7 @@ impl ModelCatalogSnapshot { pub fn from_provider(provider: &dyn Provider) -> Self { Self::new( - Some(provider.name().to_string()), + Some(provider.display_name()), Some(provider.model()), provider.available_models_display(), provider.model_routes(), diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs index 7121bf6a7..f4e90b82d 100644 --- a/crates/jcode-tui/src/tui/app/tui_state.rs +++ b/crates/jcode-tui/src/tui/app/tui_state.rs @@ -459,9 +459,9 @@ impl crate::tui::TuiState for App { if self.is_remote { self.remote_header_provider_name().unwrap_or_default() } else { - self.remote_provider_name.clone().unwrap_or_else(|| { - crate::provider_catalog::runtime_provider_display_name(self.provider.name()) - }) + self.remote_provider_name + .clone() + .unwrap_or_else(|| self.provider.display_name()) } } @@ -1285,9 +1285,9 @@ impl crate::tui::TuiState for App { provider_name: if uses_remote_widget_metadata { self.remote_provider_name .clone() - .or_else(|| Some(self.provider.name().to_string())) + .or_else(|| Some(self.provider.display_name())) } else { - Some(self.provider.name().to_string()) + Some(self.provider.display_name()) }, auth_method, upstream_provider: self.upstream_provider.clone(), diff --git a/crates/jcode-tui/src/tui/ui_header.rs b/crates/jcode-tui/src/tui/ui_header.rs index 4bc96213d..a58b3654c 100644 --- a/crates/jcode-tui/src/tui/ui_header.rs +++ b/crates/jcode-tui/src/tui/ui_header.rs @@ -45,9 +45,22 @@ pub(crate) fn capitalize(s: &str) -> String { } } -fn format_model_name(short: &str) -> String { +fn format_model_name(short: &str, provider_name: &str) -> String { if short.contains('/') { - return format!("OpenRouter: {}", short); + // Slashed model ids (e.g. `nvidia/nemotron-...`) are served by the + // OpenRouter slot, which also fronts direct OpenAI-compatible profiles + // such as NVIDIA NIM or DeepSeek. Label the line with the active + // provider's display name instead of hard-coding "OpenRouter" so the + // header matches the profile the user actually selected. + let label = { + let trimmed = provider_name.trim(); + if trimmed.is_empty() { + "OpenRouter".to_string() + } else { + trimmed.to_string() + } + }; + return format!("{}: {}", label, short); } if short.contains("opus") { if short.contains("4.5") { @@ -389,7 +402,7 @@ pub(super) fn build_persistent_header(app: &dyn TuiState, width: u16) -> Vec = Vec::new(); @@ -1028,4 +1041,26 @@ mod tests { let line = build_auth_status_line(&AuthStatus::default(), 120); assert!(line.spans.is_empty(), "line should be empty: {line:?}"); } + + #[test] + fn format_model_name_labels_slashed_models_with_active_provider() { + // Regression for issue #329: a NVIDIA NIM model must be labeled with the + // active provider's display name, not the fixed "OpenRouter" aggregator. + assert_eq!( + format_model_name("nvidia/nemotron-3-super-120b-a12b", "NVIDIA NIM"), + "NVIDIA NIM: nvidia/nemotron-3-super-120b-a12b" + ); + // The public aggregator still reads "OpenRouter". + assert_eq!( + format_model_name("anthropic/claude-sonnet-4", "OpenRouter"), + "OpenRouter: anthropic/claude-sonnet-4" + ); + // Missing provider name falls back to "OpenRouter" rather than an empty label. + assert_eq!( + format_model_name("deepseek/deepseek-chat", ""), + "OpenRouter: deepseek/deepseek-chat" + ); + // Non-slashed models are unaffected by the provider label. + assert_eq!(format_model_name("claude-opus-4-6", "OpenRouter"), "Claude Opus"); + } } From 65fb513bf600baf18bb091ec5a852bb7b503d05a Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:08:07 -0700 Subject: [PATCH 21/57] desktop: add --real-transcript-scroll-benchmark to profile scrolling on real transcripts --- crates/jcode-desktop/src/desktop_benchmark.rs | 16 + crates/jcode-desktop/src/main.rs | 300 ++++++++++++++++++ crates/jcode-desktop/src/session_data.rs | 72 +++++ 3 files changed, 388 insertions(+) diff --git a/crates/jcode-desktop/src/desktop_benchmark.rs b/crates/jcode-desktop/src/desktop_benchmark.rs index 6b8e7a021..4e6095f25 100644 --- a/crates/jcode-desktop/src/desktop_benchmark.rs +++ b/crates/jcode-desktop/src/desktop_benchmark.rs @@ -43,6 +43,22 @@ pub(super) fn resize_render_benchmark_frames(args: &[String]) -> Option { }) } +/// Parse `--real-transcript-scroll-benchmark[=N]`, the number of scroll frames +/// to profile against each of the user's largest real on-disk transcripts. +pub(super) fn real_transcript_scroll_benchmark_frames(args: &[String]) -> Option { + args.iter().enumerate().find_map(|(index, arg)| { + arg.strip_prefix("--real-transcript-scroll-benchmark=") + .and_then(|value| value.parse::().ok()) + .or_else(|| { + (arg == "--real-transcript-scroll-benchmark").then(|| { + args.get(index + 1) + .and_then(|value| value.parse::().ok()) + .unwrap_or(600) + }) + }) + }) +} + pub(super) fn benchmark_phase( mut frames: usize, mut run_frame: impl FnMut(usize) -> usize, diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index 6afaed163..e8dafb983 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -716,6 +716,9 @@ async fn run() -> Result<()> { if let Some(frames) = scroll_render_benchmark_frames(&args) { return run_scroll_render_benchmark(frames); } + if let Some(frames) = real_transcript_scroll_benchmark_frames(&args) { + return run_real_transcript_scroll_benchmark(frames); + } if let Some(output_dir) = hero_screenshot_capture_dir(&args) { return run_hero_screenshot_capture(&output_dir).await; } @@ -2269,6 +2272,7 @@ const DESKTOP_HELP_LINES: &[&str] = &[ " --capture-hero-animation DIR Write deterministic hero animation PNG frames and exit", " --resize-render-benchmark[N] Print CPU resize/render benchmark JSON and exit", " --scroll-render-benchmark[N] Print CPU scroll/render benchmark JSON and exit", + " --real-transcript-scroll-benchmark[N] Profile scrolling against your real on-disk transcripts and exit", " --stream-e2e-benchmark[N] Print stream event-to-paint guardrail JSON and exit", " --headless-chat-smoke Run a hidden backend smoke test and print JSON events", " --headless-chat-smoke= Same as above", @@ -5161,6 +5165,302 @@ fn run_scroll_render_benchmark(frames: usize) -> Result<()> { Ok(()) } +/// Profile scrolling against the user's real on-disk transcripts. +/// +/// This loads the largest real session files (full, untruncated message lists) +/// and drives the exact production windowed-scroll render path: cached body +/// wrap, a sliding text-buffer window, viewport extraction, glyph shaping for +/// the visible window, text areas, and primitive geometry. Per-frame work is +/// reported per session and aggregated so we can attribute any scroll jank to a +/// specific stage on real content rather than synthetic fixtures. +fn run_real_transcript_scroll_benchmark(frames: usize) -> Result<()> { + let frames = frames.max(1); + let size = PhysicalSize::new(1200, 760); + let transcripts = session_data::load_largest_real_transcripts(8, 24) + .context("failed to load real transcripts for scroll benchmark")?; + + if transcripts.is_empty() { + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "frames": frames, + "sessions": [], + "note": "no real transcripts with >=24 messages found under ~/.jcode/sessions", + }))? + ); + return Ok(()); + } + + let mut session_reports = Vec::new(); + let mut all_frame_samples: Vec = Vec::new(); + let mut worst_stage_us = 0.0_f64; + let mut worst_stage_name = String::new(); + + for transcript in &transcripts { + let report = benchmark_real_transcript_scroll(transcript, size, frames); + if report.worst_stage_us > worst_stage_us { + worst_stage_us = report.worst_stage_us; + worst_stage_name = report.worst_stage_name.clone(); + } + all_frame_samples.extend_from_slice(&report.frame_samples); + session_reports.push(report); + } + + let budget_ms = duration_ms(DESKTOP_120FPS_FRAME_BUDGET); + let aggregate_p50 = percentile_ms(&all_frame_samples, 0.50); + let aggregate_p95 = percentile_ms(&all_frame_samples, 0.95); + let aggregate_p99 = percentile_ms(&all_frame_samples, 0.99); + let aggregate_max = max_sample_ms(&all_frame_samples); + let passes_budget = aggregate_p99 <= budget_ms; + + let sessions_json = session_reports + .iter() + .map(RealTranscriptScrollReport::to_json) + .collect::>(); + + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "frames": frames, + "size": { "width": size.width, "height": size.height }, + "target_frame_budget_ms": budget_ms, + "sessions_profiled": session_reports.len(), + "aggregate_full_scroll_frame": { + "frames": all_frame_samples.len(), + "p50_ms": aggregate_p50, + "p95_ms": aggregate_p95, + "p99_ms": aggregate_p99, + "max_ms": aggregate_max, + }, + "worst_stage": { "name": worst_stage_name, "max_us_per_frame": worst_stage_us }, + "passes_120fps_scroll_cpu_budget": passes_budget, + "sessions": sessions_json, + }))? + ); + Ok(()) +} + +struct RealTranscriptScrollReport { + session_id: String, + title: String, + file_bytes: u64, + message_count: usize, + total_body_lines: usize, + max_scroll_lines: usize, + body_buffer_rebuilds: usize, + frame_samples: Vec, + stage_totals_us: Vec<(&'static str, f64)>, + setup_full_relayout_ms: f64, + worst_stage_name: String, + worst_stage_us: f64, +} + +impl RealTranscriptScrollReport { + fn to_json(&self) -> serde_json::Value { + let frames = self.frame_samples.len().max(1); + let total_ms = self.frame_samples.iter().sum::(); + let stages = self + .stage_totals_us + .iter() + .map(|(name, total_us)| { + serde_json::json!({ + "name": name, + "mean_us_per_frame": total_us / frames as f64, + "total_ms": total_us / 1000.0, + }) + }) + .collect::>(); + serde_json::json!({ + "session_id": self.session_id, + "title": self.title, + "file_bytes": self.file_bytes, + "message_count": self.message_count, + "total_body_lines": self.total_body_lines, + "max_scroll_lines": self.max_scroll_lines, + "body_buffer_rebuilds": self.body_buffer_rebuilds, + "setup_full_body_relayout_ms": self.setup_full_relayout_ms, + "full_scroll_frame": { + "frames": self.frame_samples.len(), + "mean_ms_per_frame": total_ms / frames as f64, + "p50_ms": percentile_ms(&self.frame_samples, 0.50), + "p95_ms": percentile_ms(&self.frame_samples, 0.95), + "p99_ms": percentile_ms(&self.frame_samples, 0.99), + "max_ms": max_sample_ms(&self.frame_samples), + }, + "subphases": stages, + }) + } +} + +/// Build a `SingleSessionApp` backed by a full real transcript, exactly the way +/// the production resume path hydrates one from disk. +fn real_transcript_scroll_app(transcript: &session_data::BenchmarkTranscript) -> SingleSessionApp { + let mut app = SingleSessionApp::new(None); + app.apply_resumed_session_transcript(transcript.messages.clone()); + app.set_status_label(format!("real transcript: {}", transcript.title)); + app +} + +fn benchmark_real_transcript_scroll( + transcript: &session_data::BenchmarkTranscript, + size: PhysicalSize, + frames: usize, +) -> RealTranscriptScrollReport { + let mut app = real_transcript_scroll_app(transcript); + let mut font_system = benchmark_font_system(); + + // One-time full body wrap (the cost paid when a transcript is first loaded + // or the window is resized). After this, scrolling must stay windowed. + let setup_started = Instant::now(); + let body_lines = single_session_rendered_body_lines_for_tick(&app, size, 0); + let setup_full_relayout_ms = setup_started.elapsed().as_secs_f64() * 1000.0; + let total_body_lines = body_lines.len(); + + let max_scroll_lines = single_session_body_scroll_metrics_for_total_lines( + &app, + size, + total_body_lines, + ) + .map(|metrics| metrics.max_scroll_lines) + .unwrap_or(0); + + // Prime the sliding text-buffer window at the bottom of the transcript, the + // way the app does after hydrating a resumed session. + app.scroll_body_to_bottom(); + let initial_viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines); + let initial_key = + single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &body_lines); + let mut buffers = single_session_text_buffers_from_key(&initial_key, size, &mut font_system); + let (mut window_start, mut window_end) = + single_session_body_text_window_bounds(&initial_viewport); + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + &mut font_system, + &body_lines[window_start..window_end], + size, + app.text_scale(), + ); + body_buffer.set_scroll( + initial_viewport + .start_line + .saturating_sub(window_start) + .min(i32::MAX as usize) as i32, + ); + } + let mut last_scroll_start = initial_viewport.start_line; + + // Drive a long scroll sweep from bottom to top and back, one whole line per + // frame, so every frame crosses a new line boundary (the worst realistic + // continuous-scroll case). + let span = max_scroll_lines.max(1); + let mut viewport_us = 0.0; + let mut window_rebuild_us = 0.0; + let mut scroll_us = 0.0; + let mut glyph_us = 0.0; + let mut areas_us = 0.0; + let mut vertices_us = 0.0; + let mut body_buffer_rebuilds = 0usize; + + let (frame_samples, _checksum) = benchmark_frame_samples(frames, |frame| { + // Triangle-wave scroll position covering the full transcript height. + let phase = frame % (span * 2); + let target = if phase <= span { phase } else { span * 2 - phase }; + app.body_scroll_lines = target as f32; + let tick = frame as u64; + + let phase_started = Instant::now(); + let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines); + viewport_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + let phase_started = Instant::now(); + if !single_session_body_text_window_contains(window_start, window_end, &viewport) { + (window_start, window_end) = single_session_body_text_window_bounds(&viewport); + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + &mut font_system, + &body_lines[window_start..window_end], + size, + app.text_scale(), + ); + } + body_buffer_rebuilds += 1; + last_scroll_start = usize::MAX; + } + window_rebuild_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + let phase_started = Instant::now(); + if viewport.start_line != last_scroll_start { + if let Some(body_buffer) = buffers.get_mut(1) { + body_buffer.set_scroll( + viewport + .start_line + .saturating_sub(window_start) + .min(i32::MAX as usize) as i32, + ); + } + last_scroll_start = viewport.start_line; + } + scroll_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + let phase_started = Instant::now(); + let glyph_checksum = buffers + .get(1) + .map(|body_buffer| { + body_buffer + .layout_runs() + .map(|run| run.glyphs.len()) + .sum::() + }) + .unwrap_or_default(); + glyph_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + let phase_started = Instant::now(); + let areas = single_session_text_areas_for_app_with_cached_body_viewport( + &app, &buffers, size, 0.0, viewport, + ); + areas_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + let phase_started = Instant::now(); + let vertices = build_single_session_vertices_with_cached_body( + &app, size, 0.0, tick, 0.0, 1.0, &body_lines, + ); + vertices_us += phase_started.elapsed().as_secs_f64() * 1_000_000.0; + + buffers.len() ^ areas.len() ^ vertices.len() ^ glyph_checksum + }); + + let stage_totals_us = vec![ + ("viewport_extract", viewport_us), + ("body_window_rebuild", window_rebuild_us), + ("body_scroll_set", scroll_us), + ("glyph_layout_count", glyph_us), + ("text_areas", areas_us), + ("primitive_vertices", vertices_us), + ]; + let frames_f = frames.max(1) as f64; + let (worst_stage_name, worst_stage_us) = stage_totals_us + .iter() + .map(|(name, total)| (name.to_string(), total / frames_f)) + .fold((String::new(), 0.0_f64), |acc, candidate| { + if candidate.1 > acc.1 { candidate } else { acc } + }); + + RealTranscriptScrollReport { + session_id: transcript.session_id.clone(), + title: transcript.title.clone(), + file_bytes: transcript.file_bytes, + message_count: transcript.messages.len(), + total_body_lines, + max_scroll_lines, + body_buffer_rebuilds, + frame_samples, + stage_totals_us, + setup_full_relayout_ms, + worst_stage_name, + worst_stage_us, + } +} + fn run_stream_e2e_benchmark(raw_events: usize) -> Result<()> { let result = run_desktop_stream_end_to_end_benchmark(raw_events); println!( diff --git a/crates/jcode-desktop/src/session_data.rs b/crates/jcode-desktop/src/session_data.rs index 0a363dcc6..72df5ec68 100644 --- a/crates/jcode-desktop/src/session_data.rs +++ b/crates/jcode-desktop/src/session_data.rs @@ -90,6 +90,78 @@ pub fn load_session_transcript_by_id( Ok(None) } +/// A full, uncapped transcript loaded straight from disk, used by the +/// real-transcript scroll benchmark so we profile the production render path +/// against the user's actual session content rather than synthetic fixtures. +#[derive(Debug, Clone)] +pub struct BenchmarkTranscript { + pub session_id: String, + pub title: String, + pub file_bytes: u64, + pub messages: Vec, +} + +/// Load the largest real session transcripts on disk (by file size), returning +/// the full message list for each (no card-style truncation). Used only by the +/// scroll benchmark. Sessions with fewer than `min_messages` are skipped so the +/// benchmark exercises long, scroll-heavy transcripts. +pub fn load_largest_real_transcripts( + max_sessions: usize, + min_messages: usize, +) -> Result> { + let sessions_dir = jcode_sessions_dir()?; + if !sessions_dir.exists() { + return Ok(Vec::new()); + } + + let mut candidates = fs::read_dir(&sessions_dir) + .with_context(|| format!("failed to read {}", sessions_dir.display()))? + .filter_map(|entry| entry.ok()) + .filter_map(|entry| { + let path = entry.path(); + session_file_candidate(path.clone())?; + let bytes = path.metadata().ok()?.len(); + Some((path, bytes)) + }) + .collect::>(); + // Largest files first: they hold the longest transcripts and stress the + // windowed-scroll path the most. + candidates.sort_by_key(|(_, bytes)| std::cmp::Reverse(*bytes)); + + let mut transcripts = Vec::new(); + for (path, bytes) in candidates { + if transcripts.len() >= max_sessions { + break; + } + let session = match load_stored_session(&path) { + Ok(session) => session, + Err(_) => continue, + }; + let messages = session_transcript_messages(&session); + if messages.len() < min_messages { + continue; + } + let id = stored_string(session.id.as_deref()) + .or_else(|| { + path.file_stem() + .map(|stem| stem.to_string_lossy().into_owned()) + }) + .unwrap_or_else(|| "unknown-session".to_string()); + let title = stored_string(session.custom_title.as_deref()) + .or_else(|| stored_string(session.title.as_deref())) + .or_else(|| latest_user_preview(&messages)) + .unwrap_or_else(|| short_session_name(&id)); + transcripts.push(BenchmarkTranscript { + session_id: id, + title, + file_bytes: bytes, + messages, + }); + } + + Ok(transcripts) +} + fn load_recent_session_cards_with_limit(limit: usize) -> Result> { let sessions_dir = jcode_sessions_dir()?; if !sessions_dir.exists() { From 4faab8467100c255274f2d1be3453b961328daf4 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:15:12 -0700 Subject: [PATCH 22/57] fix(tui): don't snap selection to bottom edge when already pinned The copy-selection drag edge auto-scroll "hot zone" (top/bottom few rows of the chat pane) fired unconditionally whenever a drag entered the band. When the transcript was already pinned to the bottom (the common case), dragging into the bottom rows snapped the selection cursor to the very last visible line and armed a downward autoscroll, even though there was nothing more below to scroll into. This made it impossible to precisely highlight the bottom rows of the transcript: the selection kept jumping to the end. Gate each directional hot zone on whether there is actually more transcript to scroll into that direction (scroll > 0 for up, visible_end < line_count for down). When there is nothing to scroll, the edge band stays inert so the selection lands on the exact cell under the cursor. Adds a regression test that drags into the bottom hot zone while pinned to the bottom and asserts no autoscroll arms and the selection lands on the targeted line. --- .../tui/app/tests/scroll_copy_02/part_01.rs | 112 ++++++++++++++++++ crates/jcode-tui/src/tui/ui.rs | 12 +- 2 files changed, 122 insertions(+), 2 deletions(-) diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs index bfd0cd27a..c13cee50d 100644 --- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs @@ -978,6 +978,118 @@ fn test_copy_selection_drag_near_top_edge_keeps_auto_scrolling() { )); } +#[test] +fn test_copy_selection_drag_to_bottom_edge_when_pinned_does_not_snap_or_autoscroll() { + // Regression: when the transcript is already pinned to the bottom (the common + // case), dragging a selection into the bottom edge "hot zone" used to always + // snap the cursor to the very last visible line and arm a downward autoscroll, + // even though there is nothing more below to scroll into. That made it + // impossible to precisely highlight the bottom rows: the selection kept + // jumping to the end. With nothing to scroll, the edge band must stay inert so + // the selection lands on the exact line under the cursor. + let _render_lock = scroll_render_test_lock(); + let mut app = create_test_app(); + + // Tall transcript pinned to the bottom: the bottom rows of the pane are + // filled with real content, and there is nothing below to scroll into. + let lines = (1..=200) + .map(|idx| format!("line {idx:03}")) + .collect::>() + .join("\n"); + app.display_messages = vec![DisplayMessage { + role: "assistant".to_string(), + content: lines, + tool_calls: vec![], + duration_secs: None, + title: None, + tool_data: None, + }]; + app.bump_display_messages_version(); + app.scroll_offset = 0; + app.auto_scroll_paused = false; + app.is_processing = false; + app.streaming_text.clear(); + app.status = ProcessingStatus::Idle; + + let backend = ratatui::backend::TestBackend::new(60, 16); + let mut terminal = ratatui::Terminal::new(backend).expect("failed to create test terminal"); + render_and_snap(&app, &mut terminal); + + app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT) + .unwrap(); + + let (visible_start, visible_end) = + crate::tui::ui::copy_viewport_visible_range().expect("visible copy range"); + let line_count = crate::tui::ui::copy_viewport_line_count().expect("line count"); + assert_eq!( + visible_end, line_count, + "test precondition: view must be pinned to the bottom with no content below" + ); + assert!( + visible_start > 0, + "test precondition: tall transcript must have content scrolled above the view" + ); + + let layout = crate::tui::ui::last_layout_snapshot().expect("layout snapshot"); + let area = layout.messages_area; + let col = area.x + 1; + + // Pick a real content line near (but not at) the bottom to target. + let target_line = visible_end.saturating_sub(2); + assert!(target_line >= visible_start, "need a visible target line"); + let target_row = area.y + (target_line - visible_start) as u16; + // The bottom edge band covers the last few rows; target_row must sit inside + // it for this regression to be meaningful. + let last_row = area.y + area.height - 1; + assert!( + target_row >= last_row.saturating_sub(2), + "target line must fall within the bottom edge hot zone" + ); + + // Anchor higher up in the viewport. + let anchor_row = area.y + 1; + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Down(MouseButton::Left), + column: col, + row: anchor_row, + modifiers: KeyModifiers::empty(), + }); + let before_scroll = app.scroll_offset(); + + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Drag(MouseButton::Left), + column: col, + row: target_row, + modifiers: KeyModifiers::empty(), + }); + + // No autoscroll should be armed: there is nothing below to pull in. + assert!( + !crate::tui::TuiState::copy_selection_edge_autoscroll_active(&app), + "edge autoscroll must not arm when pinned to the bottom with no content below" + ); + assert_eq!( + app.scroll_offset(), + before_scroll, + "dragging into the bottom band while pinned must not scroll" + ); + + // The selection end should land on the exact line under the cursor, not snap + // to the very last line of the transcript. + let range = app.normalized_copy_selection().expect("normalized range"); + assert_eq!( + range.end.abs_line, target_line, + "selection should extend to the line under the cursor, not snap to the last line" + ); + + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Up(MouseButton::Left), + column: col, + row: target_row, + modifiers: KeyModifiers::empty(), + }); +} + #[test] fn test_alt_a_copies_chat_viewport_with_context_when_input_empty() { let _render_lock = scroll_render_test_lock(); diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs index 0cc070d65..a02bdbebb 100644 --- a/crates/jcode-tui/src/tui/ui.rs +++ b/crates/jcode-tui/src/tui/ui.rs @@ -1632,9 +1632,17 @@ pub(crate) fn copy_pane_vertical_edge_point( let zone = edge_autoscroll_zone_rows(area.height); let top_trigger = area.y.saturating_add(zone); let bottom_trigger = last_row.saturating_sub(zone); - let (edge_row, upward) = if row <= top_trigger { + // Only engage the hot zone when there is actually more transcript to pull in + // that direction. Otherwise dragging into the bottom band while the view is + // already pinned to the end (the common case) would snap the selection to the + // last visible line and fight precise highlighting of the bottom rows. When + // there is nothing to scroll, fall through (`None`) so the caller extends the + // selection to the exact cell under the cursor instead. + let can_scroll_up = snapshot.scroll > 0; + let can_scroll_down = snapshot.visible_end < snapshot.wrapped_plain_line_count(); + let (edge_row, upward) = if row <= top_trigger && can_scroll_up { (area.y, true) - } else if row >= bottom_trigger { + } else if row >= bottom_trigger && can_scroll_down { (last_row, false) } else { return None; From 98acc3ab10832e7d260e512c628c85640907fde5 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:17:36 -0700 Subject: [PATCH 23/57] feat(gmail): add in-agent Composio connect (OAuth) action Adds a 'connect' action to the gmail tool that drives Composio's hosted Connect Link flow: it creates an auth-link session, opens the Google consent screen in the browser, polls until the connection is ACTIVE, and persists the connected account to ~/.jcode/composio_gmail.json so future sessions are already authorized. - ComposioConfig gains auth_config_id + persisted-connection fallback. - GmailClient: connect(), needs_connection(), supports_connect(), create_link()/wait_for_connection() against /connected_accounts. - tool/gmail.rs handles 'connect' before the config gate and hints the agent to connect when no account exists yet. - Tests for connect/needs_connection/effective_user_id; docs updated. --- crates/jcode-app-core/src/tool/gmail.rs | 44 ++++- crates/jcode-base/src/gmail.rs | 249 +++++++++++++++++++++++- docs/GMAIL_COMPOSIO_BACKEND.md | 27 ++- 3 files changed, 312 insertions(+), 8 deletions(-) diff --git a/crates/jcode-app-core/src/tool/gmail.rs b/crates/jcode-app-core/src/tool/gmail.rs index f132b3f36..7afc720ff 100644 --- a/crates/jcode-app-core/src/tool/gmail.rs +++ b/crates/jcode-app-core/src/tool/gmail.rs @@ -67,8 +67,8 @@ impl Tool for GmailTool { "intent": super::intent_schema_property(), "action": { "type": "string", - "enum": ["search", "read", "list", "draft", "send", "send_draft", "threads", "thread", "labels", "trash", "modify_labels"], - "description": "Action." + "enum": ["connect", "search", "read", "list", "draft", "send", "send_draft", "threads", "thread", "labels", "trash", "modify_labels"], + "description": "Action. Use 'connect' to set up Gmail access via the Composio managed backend (opens a browser OAuth screen for the user to approve)." }, "query": { "type": "string" }, "message_id": { "type": "string" }, @@ -91,12 +91,48 @@ impl Tool for GmailTool { } async fn execute(&self, input: Value, _ctx: ToolContext) -> Result { + let params: GmailInput = serde_json::from_value(input)?; + let max = params.max_results.unwrap_or(10).min(50); + + // The connect action sets up the Composio managed backend by opening a + // browser OAuth screen for the user to approve. It runs before the + // is_configured gate so it can establish the very first connection. + if params.action == "connect" { + if !self.client.supports_connect() { + return Ok(ToolOutput::new( + "The 'connect' action is only available with the Composio Gmail backend. \ + Set JCODE_GMAIL_BACKEND=composio and COMPOSIO_API_KEY, then retry. \ + For the default backend, run `jcode login google` instead.", + )); + } + let no_browser = crate::auth::browser_suppressed(false); + match self.client.connect(!no_browser).await { + Ok(conn) => { + let who = conn + .email + .clone() + .unwrap_or_else(|| "your Gmail account".to_string()); + return Ok(ToolOutput::new(format!( + "Gmail connected via Composio for {}. You can now search, read, draft, and send email.", + who + ))); + } + Err(e) => { + return Ok(ToolOutput::new(format!("Gmail connect failed: {}", e))); + } + } + } + if !self.client.is_configured() { return Ok(ToolOutput::new(self.client.not_configured_message())); } - let params: GmailInput = serde_json::from_value(input)?; - let max = params.max_results.unwrap_or(10).min(50); + if self.client.needs_connection() { + return Ok(ToolOutput::new( + "Gmail (Composio backend) has no connected account yet. Run the gmail tool with \ + action 'connect' to authorize your Gmail account, then retry.", + )); + } match params.action.as_str() { "search" | "list" => { diff --git a/crates/jcode-base/src/gmail.rs b/crates/jcode-base/src/gmail.rs index 72ea959f7..f6fc2d7bd 100644 --- a/crates/jcode-base/src/gmail.rs +++ b/crates/jcode-base/src/gmail.rs @@ -26,6 +26,10 @@ pub struct ComposioConfig { pub base_url: String, pub connected_account_id: Option, pub user_id: Option, + /// Auth config that defines the Gmail OAuth blueprint (scopes + managed + /// Composio app). Required to initiate a Connect Link flow. Falls back to + /// a persisted value or `COMPOSIO_GMAIL_AUTH_CONFIG_ID`. + pub auth_config_id: Option, } impl GmailBackend { @@ -64,20 +68,73 @@ impl ComposioConfig { .ok() .filter(|s| !s.is_empty()) .unwrap_or_else(|| COMPOSIO_DEFAULT_BASE.to_string()); + // A previously completed Connect Link flow persists the connection so + // the user does not have to re-run setup each session. + let persisted = ComposioConnection::load().ok().flatten(); let connected_account_id = std::env::var("COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID") .ok() - .filter(|s| !s.is_empty()); + .filter(|s| !s.is_empty()) + .or_else(|| persisted.as_ref().map(|p| p.connected_account_id.clone())); let user_id = std::env::var("COMPOSIO_GMAIL_USER_ID") .or_else(|_| std::env::var("COMPOSIO_USER_ID")) .ok() - .filter(|s| !s.is_empty()); + .filter(|s| !s.is_empty()) + .or_else(|| persisted.as_ref().map(|p| p.user_id.clone())); + let auth_config_id = std::env::var("COMPOSIO_GMAIL_AUTH_CONFIG_ID") + .ok() + .filter(|s| !s.is_empty()) + .or_else(|| persisted.as_ref().and_then(|p| p.auth_config_id.clone())); Some(Self { api_key, base_url, connected_account_id, user_id, + auth_config_id, }) } + + /// Effective user id, defaulting to "default" so a single-user CLI works + /// without any extra configuration. + pub fn effective_user_id(&self) -> String { + self.user_id.clone().unwrap_or_else(|| "default".to_string()) + } +} + +/// Persisted record of a completed Composio Gmail connection, stored at +/// `~/.jcode/composio_gmail.json`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ComposioConnection { + pub connected_account_id: String, + pub user_id: String, + pub auth_config_id: Option, + #[serde(default)] + pub email: Option, +} + +impl ComposioConnection { + pub fn path() -> Result { + Ok(crate::storage::jcode_dir()?.join("composio_gmail.json")) + } + + pub fn load() -> Result> { + let path = Self::path()?; + if !path.exists() { + return Ok(None); + } + crate::storage::harden_secret_file_permissions(&path); + Ok(crate::storage::read_json(&path).ok()) + } + + pub fn save(&self) -> Result<()> { + let path = Self::path()?; + crate::storage::write_json_secret(&path, self) + } +} + +/// Result of initiating a Connect Link OAuth flow. +pub struct ComposioLink { + pub connected_account_id: String, + pub redirect_url: String, } pub struct GmailClient { @@ -151,6 +208,164 @@ impl GmailClient { } } + /// True only for the Composio backend when no connected account exists yet. + /// In that state, Gmail calls will fail until the user completes the + /// Connect Link OAuth flow via [`GmailClient::connect`]. + pub fn needs_connection(&self) -> bool { + matches!(&self.backend, GmailBackend::Composio(cfg) if cfg.connected_account_id.is_none()) + } + + /// Whether the active backend supports an interactive `connect` action. + pub fn supports_connect(&self) -> bool { + matches!(&self.backend, GmailBackend::Composio(_)) + } + + /// Initiate a Composio Connect Link OAuth flow, open the consent screen in + /// the user's browser, wait for them to approve, then persist the resulting + /// connected account so future sessions are already authenticated. + /// + /// `open_browser` controls whether we try to launch the system browser + /// (set false over SSH/headless; the URL is always returned). + pub async fn connect(&self, open_browser: bool) -> Result { + let cfg = match &self.backend { + GmailBackend::Composio(cfg) => cfg, + GmailBackend::Direct => { + anyhow::bail!( + "The Composio connect flow is only available when JCODE_GMAIL_BACKEND=composio." + ) + } + }; + let auth_config_id = cfg.auth_config_id.clone().ok_or_else(|| { + anyhow::anyhow!( + "No Composio Gmail auth config configured. Create a Gmail auth config in the \ + Composio dashboard and set COMPOSIO_GMAIL_AUTH_CONFIG_ID." + ) + })?; + let user_id = cfg.effective_user_id(); + + let link = self.create_link(cfg, &auth_config_id, &user_id).await?; + if open_browser { + let _ = open::that(&link.redirect_url); + } + eprintln!( + "\nOpening Gmail authorization in your browser. If it did not open, visit:\n{}\n", + link.redirect_url + ); + + let account = self + .wait_for_connection(cfg, &link.connected_account_id) + .await?; + + let email = account + .get("data") + .and_then(|d| d.get("email")) + .or_else(|| account.get("email")) + .and_then(|e| e.as_str()) + .map(|s| s.to_string()); + + let connection = ComposioConnection { + connected_account_id: link.connected_account_id, + user_id, + auth_config_id: Some(auth_config_id), + email, + }; + connection.save()?; + Ok(connection) + } + + /// Create a hosted Connect Link auth session. + async fn create_link( + &self, + cfg: &ComposioConfig, + auth_config_id: &str, + user_id: &str, + ) -> Result { + let endpoint = format!("{}/connected_accounts/link", cfg.base_url.trim_end_matches('/')); + let payload = json!({ + "auth_config_id": auth_config_id, + "user_id": user_id, + }); + let resp = self + .http + .post(&endpoint) + .header("x-api-key", &cfg.api_key) + .json(&payload) + .send() + .await?; + let status = resp.status(); + let text = resp.text().await?; + if !status.is_success() { + return Err(anyhow::anyhow!( + "Composio connect-link error {}: {}", + status, + truncate_error(&text) + )); + } + let body: Value = serde_json::from_str(&text)?; + let redirect_url = body + .get("redirect_url") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Composio did not return a redirect_url"))? + .to_string(); + let connected_account_id = body + .get("connected_account_id") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Composio did not return a connected_account_id"))? + .to_string(); + Ok(ComposioLink { + connected_account_id, + redirect_url, + }) + } + + /// Poll a connected account until it becomes ACTIVE (or a terminal error). + async fn wait_for_connection( + &self, + cfg: &ComposioConfig, + connected_account_id: &str, + ) -> Result { + // INITIATED links auto-expire after ~10 minutes; poll up to ~5 minutes. + const MAX_ATTEMPTS: u32 = 150; + const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_secs(2); + let endpoint = format!( + "{}/connected_accounts/{}", + cfg.base_url.trim_end_matches('/'), + connected_account_id + ); + for _ in 0..MAX_ATTEMPTS { + let resp = self + .http + .get(&endpoint) + .header("x-api-key", &cfg.api_key) + .send() + .await?; + if resp.status().is_success() { + let body: Value = resp.json().await?; + let status = body + .get("status") + .or_else(|| body.get("data").and_then(|d| d.get("status"))) + .and_then(|s| s.as_str()) + .unwrap_or(""); + match status { + "ACTIVE" => return Ok(body), + "FAILED" | "EXPIRED" => { + let reason = body + .get("status_reason") + .and_then(|r| r.as_str()) + .unwrap_or("no reason provided"); + anyhow::bail!("Gmail connection {}: {}", status, reason); + } + _ => {} + } + } + tokio::time::sleep(POLL_INTERVAL).await; + } + anyhow::bail!( + "Timed out waiting for Gmail authorization. Re-run the connect action and finish the \ + browser consent within a few minutes." + ) + } + /// Send an authenticated Gmail REST request and return the parsed JSON /// response. Both backends produce the identical Gmail API JSON shape, so /// callers can deserialize into the same typed structs. @@ -645,6 +860,7 @@ mod tests { base_url: COMPOSIO_DEFAULT_BASE.to_string(), connected_account_id: Some("ca_123".to_string()), user_id: Some("me".to_string()), + auth_config_id: Some("ac_123".to_string()), } } @@ -675,6 +891,7 @@ mod tests { base_url: COMPOSIO_DEFAULT_BASE.to_string(), connected_account_id: None, user_id: None, + auth_config_id: None, }; let payload = build_composio_proxy_payload(&bare, "GET", "http://x/y", None); assert!(payload.get("connected_account_id").is_none()); @@ -708,4 +925,32 @@ mod tests { assert!(capped.len() <= 401 + 3); // 400 chars + ellipsis byte assert!(capped.ends_with('…')); } + + #[test] + fn needs_connection_reflects_connected_account_presence() { + // Composio without a connected account needs an interactive connect. + let mut without = cfg(); + without.connected_account_id = None; + let client = GmailClient::with_backend(GmailBackend::Composio(without)); + assert!(client.supports_connect()); + assert!(client.needs_connection()); + + // With a connected account it is ready to make calls. + let client = GmailClient::with_backend(GmailBackend::Composio(cfg())); + assert!(!client.needs_connection()); + + // Direct backend never needs a Composio connection and cannot connect. + let direct = GmailClient::with_backend(GmailBackend::Direct); + assert!(!direct.supports_connect()); + assert!(!direct.needs_connection()); + } + + #[test] + fn effective_user_id_defaults_to_default() { + let mut c = cfg(); + c.user_id = None; + assert_eq!(c.effective_user_id(), "default"); + c.user_id = Some("alice".to_string()); + assert_eq!(c.effective_user_id(), "alice"); + } } diff --git a/docs/GMAIL_COMPOSIO_BACKEND.md b/docs/GMAIL_COMPOSIO_BACKEND.md index 9b2ec44eb..8a3164221 100644 --- a/docs/GMAIL_COMPOSIO_BACKEND.md +++ b/docs/GMAIL_COMPOSIO_BACKEND.md @@ -36,8 +36,31 @@ falls back to `direct`. |---|---|---| | `COMPOSIO_API_KEY` | Yes | Project API key from | | `COMPOSIO_BASE_URL` | No | Override API base (default `https://backend.composio.dev/api/v3.1`) | -| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`) | -| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts | +| `COMPOSIO_GMAIL_AUTH_CONFIG_ID` | For `connect` | Gmail auth config id (`ac_...`) from the Composio dashboard. Defines the OAuth blueprint/scopes used by the connect flow. | +| `COMPOSIO_GMAIL_CONNECTED_ACCOUNT_ID` | No | Pin a specific connected account (`ca_...`). Normally set automatically after `connect`. | +| `COMPOSIO_GMAIL_USER_ID` / `COMPOSIO_USER_ID` | No | End-user id for multi-user connected accounts (defaults to `default`) | + +## Connecting a Gmail account (in-agent OAuth) + +Once `COMPOSIO_API_KEY` and `COMPOSIO_GMAIL_AUTH_CONFIG_ID` are set, the user +(or the agent) runs the gmail tool with `action: "connect"`: + +1. jcode calls Composio's `POST /connected_accounts/link` (hosted "Connect + Link" flow) to start an OAuth session. +2. The returned `redirect_url` is opened in the system browser (printed to + stderr as a fallback, e.g. over SSH). +3. The user approves Gmail access on Google's consent screen. Because Composio + owns a Google-verified app, there is no "unverified app" warning. +4. jcode polls `GET /connected_accounts/{id}` until the connection is `ACTIVE`, + then persists it to `~/.jcode/composio_gmail.json`. + +Future sessions load the persisted `connected_account_id`, so the connect step +is a one-time action per account. Tool calls before a connection exists return +a hint telling the agent to run `action: "connect"` first. + +> Note: Composio is retiring `initiate()` for managed OAuth in favor of the +> Connect Link `link()` flow used here, so this path is the supported one going +> forward. ## One-time Composio setup From 6df69c89adf3a11131334773804a0ee9043054d2 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:17:51 -0700 Subject: [PATCH 24/57] tui: pace streaming text reveal to fix Anthropic choppiness The StreamBuffer previously revealed text the instant a provider delta arrived (only capping bursts >96 chars per frame). OpenAI emits many tiny token deltas so this looked smooth, but Anthropic coalesces deltas into 20-40 char bursts with gaps, so each burst popped in at once and the UI stair-stepped. Replace the burst cap with a time-paced proportional reveal: text accumulates in a backlog and drips out at base + gain*backlog chars/sec, with the per-step elapsed time clamped so idle gaps cannot bank budget that dumps the next burst. This smooths bursty providers while keeping fast steady feeds responsive. Remote tick now reveals via flush_smooth_frame() to match; flush() still drains fully at finalize. --- crates/jcode-tui-core/src/stream_buffer.rs | 325 +++++++++++------- crates/jcode-tui/src/tui/app/remote.rs | 4 +- .../tests/remote_startup_input_02/part_01.rs | 7 +- 3 files changed, 215 insertions(+), 121 deletions(-) diff --git a/crates/jcode-tui-core/src/stream_buffer.rs b/crates/jcode-tui-core/src/stream_buffer.rs index 2b267b483..b8a32c05e 100644 --- a/crates/jcode-tui-core/src/stream_buffer.rs +++ b/crates/jcode-tui-core/src/stream_buffer.rs @@ -1,20 +1,53 @@ -//! Semantic stream buffer - chunks streaming text at natural boundaries +//! Semantic stream buffer - paces streaming text reveal at a smooth rate. +//! +//! Providers feed text deltas with wildly different cadences. OpenAI emits many +//! tiny token-level deltas (a few chars every ~10-15ms), which already looks +//! smooth. Anthropic coalesces `content_block_delta` events into larger chunks +//! that arrive in bursts with gaps (e.g. 20-40 chars every ~80-100ms). If we +//! reveal each burst the instant it arrives, the UI stair-steps: a clump of +//! text pops in, then nothing for several frames, then another clump. +//! +//! To make every provider look the same, this buffer decouples *arrival* from +//! *reveal*. Incoming text accumulates in a backlog, and a time-paced +//! proportional controller drips it out: the reveal rate rises with the backlog +//! so we never fall far behind a fast model, yet a lone burst is spread over +//! several frames instead of dumped in one. The elapsed-time step is clamped so +//! an idle gap (connect latency, tool pauses) cannot bank budget that would +//! instantly dump the next burst. use serde::Serialize; use std::time::{Duration, Instant}; -/// Buffer that accumulates streaming text and flushes at semantic boundaries +/// Steady-state reveal rate (chars/sec) when the backlog is empty. This sets the +/// floor cadence and how the trailing characters of a burst drain out. +const BASE_REVEAL_CPS: f32 = 180.0; + +/// Additional reveal rate per buffered character. The controller speeds up as the +/// backlog grows so we track fast models with bounded latency: at steady incoming +/// rate `R`, the backlog settles near `(R - BASE_REVEAL_CPS) / REVEAL_BACKLOG_GAIN`. +const REVEAL_BACKLOG_GAIN: f32 = 3.0; + +/// Maximum elapsed time credited to a single reveal step. Without this, a long +/// idle gap before the first/next burst would bank a huge budget and dump the +/// whole burst at once, reintroducing the choppiness we are trying to remove. +const MAX_REVEAL_STEP: Duration = Duration::from_millis(50); + +/// Buffer that accumulates streaming text and reveals it at a smooth, paced rate. pub struct StreamBuffer { buffer: String, - last_flush: Instant, - timeout: Duration, - smooth_frame_chars: usize, + last_reveal: Instant, + /// Fractional reveal budget carried between steps so slow rates still make + /// progress instead of rounding down to zero forever. + carry: f32, + base_cps: f32, + backlog_gain: f32, + max_step: Duration, } #[derive(Debug, Clone, Serialize)] pub struct StreamBufferMemoryProfile { pub buffered_text_bytes: usize, - pub timeout_ms: u64, + pub base_reveal_cps: u32, } impl Default for StreamBuffer { @@ -27,50 +60,37 @@ impl StreamBuffer { pub fn new() -> Self { Self { buffer: String::new(), - last_flush: Instant::now(), - timeout: Duration::from_millis(150), - smooth_frame_chars: 96, + last_reveal: Instant::now(), + carry: 0.0, + base_cps: BASE_REVEAL_CPS, + backlog_gain: REVEAL_BACKLOG_GAIN, + max_step: MAX_REVEAL_STEP, } } - /// Push text into buffer, returns chunk to display if boundary found + /// Push text into the buffer, returning any paced chunk ready to display now. pub fn push(&mut self, text: &str) -> Option { self.buffer.push_str(text); - - // Find semantic boundary - if let Some(boundary) = self.find_boundary() { - return Some(self.drain_prefix(boundary.min(self.smooth_frame_boundary()))); - } - - if self.last_flush.elapsed() >= self.timeout { - return self.flush_smooth_frame(); - } - - None + self.reveal_now(Instant::now()) } - /// Force flush the entire buffer (call on timeout or message end) + /// Force flush the entire buffer (call on message end, commit, or interrupt). pub fn flush(&mut self) -> Option { + self.carry = 0.0; + self.last_reveal = Instant::now(); if self.buffer.is_empty() { None } else { - self.last_flush = Instant::now(); Some(std::mem::take(&mut self.buffer)) } } - /// Flush up to one smooth-render frame worth of text. This is used for - /// periodic streaming redraws so large provider/SSE bursts are revealed - /// over a few quick frames instead of popping into the TUI all at once. - /// Finalization paths should still call [`flush`] to avoid leaving text - /// buffered at message boundaries. + /// Reveal one paced frame worth of buffered text. Called from the periodic + /// redraw tick so the backlog drains smoothly even when no new delta arrived + /// this frame. Finalization paths should still call [`flush`] to avoid + /// leaving text buffered at message boundaries. pub fn flush_smooth_frame(&mut self) -> Option { - if self.buffer.is_empty() { - None - } else { - let boundary = self.smooth_frame_boundary().min(self.buffer.len()); - Some(self.drain_prefix(boundary)) - } + self.reveal_now(Instant::now()) } /// Check if buffer is empty @@ -81,138 +101,205 @@ impl StreamBuffer { /// Clear the buffer without returning content pub fn clear(&mut self) { self.buffer.clear(); - self.last_flush = Instant::now(); + self.carry = 0.0; + self.last_reveal = Instant::now(); } pub fn debug_memory_profile(&self) -> StreamBufferMemoryProfile { StreamBufferMemoryProfile { buffered_text_bytes: self.buffer.len(), - timeout_ms: self.timeout.as_millis() as u64, + base_reveal_cps: self.base_cps as u32, } } - fn smooth_frame_boundary(&self) -> usize { - if self.buffer.chars().count() <= self.smooth_frame_chars { - return self.buffer.len(); + /// Proportional, time-paced reveal. Advances the budget by the (clamped) + /// elapsed time times a backlog-scaled rate, then drains that many chars. + fn reveal_now(&mut self, now: Instant) -> Option { + let backlog = self.buffer.chars().count(); + if backlog == 0 { + // No backlog: reset so an idle gap cannot bank reveal budget. + self.carry = 0.0; + self.last_reveal = now; + return None; } - self.buffer - .char_indices() - .map(|(idx, _)| idx) - .nth(self.smooth_frame_chars) - .unwrap_or(self.buffer.len()) - } - - fn drain_prefix(&mut self, boundary: usize) -> String { - let boundary = floor_char_boundary(&self.buffer, boundary); - let chunk = self.buffer[..boundary].to_string(); - self.buffer = self.buffer[boundary..].to_string(); - self.last_flush = Instant::now(); - chunk - } - /// Find a boundary in the buffer (newline-based), returns position after boundary - fn find_boundary(&self) -> Option { - let buf = &self.buffer; + let dt = now + .saturating_duration_since(self.last_reveal) + .min(self.max_step) + .as_secs_f32(); + self.last_reveal = now; - // Code block start/end (```language or ```) - if let Some(pos) = buf.find("```") { - // Find end of the ``` line - if let Some(newline) = buf[pos..].find('\n') { - return Some(pos + newline + 1); - } - } + let cps = self.base_cps + backlog as f32 * self.backlog_gain; + self.carry += dt * cps; - // Any newline - simple and predictable - if let Some(pos) = buf.find('\n') { - return Some(pos + 1); + let mut reveal = self.carry.floor() as usize; + if reveal == 0 { + // Budget hasn't reached a whole char yet; keep accumulating. + return None; } - - None + reveal = reveal.min(backlog); + self.carry -= reveal as f32; + Some(self.drain_chars(reveal)) } -} -fn floor_char_boundary(s: &str, mut index: usize) -> usize { - index = index.min(s.len()); - while index > 0 && !s.is_char_boundary(index) { - index -= 1; + /// Drain `char_count` characters from the front of the buffer on a UTF-8 + /// boundary. + fn drain_chars(&mut self, char_count: usize) -> String { + if char_count == 0 { + return String::new(); + } + let end = self + .buffer + .char_indices() + .nth(char_count) + .map(|(idx, _)| idx) + .unwrap_or(self.buffer.len()); + let chunk = self.buffer[..end].to_string(); + self.buffer.replace_range(..end, ""); + chunk } - index } #[cfg(test)] mod tests { use super::*; + /// Drain the buffer to empty using fixed-cadence redraw frames, returning the + /// per-frame reveal sizes (in chars). + fn drain_frames(buf: &mut StreamBuffer, start: Instant, frame: Duration) -> Vec { + let mut sizes = Vec::new(); + let mut t = start; + let mut guard = 0; + while !buf.is_empty() { + t += frame; + if let Some(chunk) = buf.reveal_now(t) { + sizes.push(chunk.chars().count()); + } + guard += 1; + assert!(guard < 100_000, "drain did not converge"); + } + sizes + } + #[test] - fn test_newline_boundary() { + fn flush_drains_everything() { let mut buf = StreamBuffer::new(); - let result = buf.push("First line\nSecond line"); - assert_eq!(result, Some("First line\n".to_string())); - assert_eq!(buf.buffer, "Second line"); + buf.buffer.push_str("remaining content"); + let result = buf.flush(); + assert_eq!(result, Some("remaining content".to_string())); + assert!(buf.is_empty()); } #[test] - fn test_code_block_boundary() { + fn empty_push_reveals_nothing() { let mut buf = StreamBuffer::new(); - // Code block marker ``` causes flush to include the whole line - let result = buf.push("```rust\nfn main() {}"); - assert_eq!(result, Some("```rust\n".to_string())); + assert_eq!(buf.push(""), None); + assert!(buf.is_empty()); } #[test] - fn test_no_boundary() { + fn paced_reveal_spreads_a_burst_over_multiple_frames() { + let start = Instant::now(); let mut buf = StreamBuffer::new(); - let result = buf.push("partial text without newline"); - assert_eq!(result, None); - assert_eq!(buf.buffer, "partial text without newline"); + buf.last_reveal = start; + buf.buffer.push_str(&"a".repeat(40)); + + let sizes = drain_frames(&mut buf, start, Duration::from_millis(16)); + let total: usize = sizes.iter().sum(); + assert_eq!(total, 40); + assert!( + sizes.len() >= 3, + "a 40-char burst should reveal across multiple frames, got {sizes:?}" + ); + // No single 16ms frame should dump the whole burst. + assert!( + sizes.iter().all(|&n| n < 40), + "no frame should reveal the entire burst, got {sizes:?}" + ); } #[test] - fn test_flush() { + fn idle_gap_does_not_dump_the_next_burst() { + let start = Instant::now(); let mut buf = StreamBuffer::new(); - buf.push("remaining content"); - let result = buf.flush(); - assert_eq!(result, Some("remaining content".to_string())); - assert!(buf.is_empty()); + buf.last_reveal = start; + // Simulate a long connect/tool pause, then a burst arrives. + let arrival = start + Duration::from_secs(5); + buf.buffer.push_str(&"b".repeat(30)); + let first = buf + .reveal_now(arrival) + .map(|c| c.chars().count()) + .unwrap_or(0); + assert!( + first < 30, + "the idle gap must not bank budget that dumps the burst, revealed {first}" + ); + // The remainder still drains over subsequent frames. + let sizes = drain_frames(&mut buf, arrival, Duration::from_millis(16)); + assert_eq!(first + sizes.iter().sum::(), 30); } #[test] - fn test_multiple_newlines() { - let mut buf = StreamBuffer::new(); - // First push returns first line - let result = buf.push("Line one\nLine two\nLine three"); - assert_eq!(result, Some("Line one\n".to_string())); - // Second push returns second line - let result = buf.push(""); - assert_eq!(result, Some("Line two\n".to_string())); + fn bursty_and_steady_feeds_reveal_at_similar_smoothness() { + // Steady (OpenAI-like): 4 chars every frame. + let start = Instant::now(); + let frame = Duration::from_millis(16); + let mut steady = StreamBuffer::new(); + steady.last_reveal = start; + let mut steady_sizes = Vec::new(); + let mut t = start; + for _ in 0..40 { + t += frame; + steady.buffer.push_str("abcd"); + if let Some(c) = steady.reveal_now(t) { + steady_sizes.push(c.chars().count()); + } + } + steady_sizes.extend(drain_frames(&mut steady, t, frame)); + + // Bursty (Anthropic-like): 24 chars every 6th frame. + let mut bursty = StreamBuffer::new(); + bursty.last_reveal = start; + let mut bursty_sizes = Vec::new(); + let mut t = start; + for i in 0..60 { + t += frame; + if i % 6 == 0 { + bursty.buffer.push_str(&"x".repeat(24)); + } + if let Some(c) = bursty.reveal_now(t) { + bursty_sizes.push(c.chars().count()); + } + } + bursty_sizes.extend(drain_frames(&mut bursty, t, frame)); + + let max_burst = *bursty_sizes.iter().max().unwrap(); + // The whole 24-char clump must never appear in a single frame; pacing + // should break it into smaller per-frame reveals like the steady feed. + assert!( + max_burst < 24, + "bursty feed should be smoothed, max frame reveal was {max_burst} ({bursty_sizes:?})" + ); } #[test] - fn test_smooth_frame_flush_caps_large_chunks() { + fn reveal_respects_utf8_boundaries() { + let start = Instant::now(); let mut buf = StreamBuffer::new(); - let text = "a".repeat(150); - assert_eq!(buf.push(&text), None); - - let first = buf.flush_smooth_frame().unwrap(); - assert_eq!(first.len(), 96); - assert_eq!(buf.buffer.len(), 54); + buf.last_reveal = start; + buf.buffer.push_str(&"é".repeat(40)); - let rest = buf.flush().unwrap(); - assert_eq!(rest.len(), 54); - assert!(buf.is_empty()); + let sizes = drain_frames(&mut buf, start, Duration::from_millis(16)); + assert_eq!(sizes.iter().sum::(), 40); } #[test] - fn test_smooth_frame_flush_respects_utf8_boundaries() { + fn small_trailing_text_eventually_drains() { + let start = Instant::now(); let mut buf = StreamBuffer::new(); - let text = "é".repeat(120); - assert_eq!(buf.push(&text), None); - - let first = buf.flush_smooth_frame().unwrap(); - assert_eq!(first.chars().count(), 96); - assert!(first.is_char_boundary(first.len())); - - let rest = buf.flush().unwrap(); - assert_eq!(rest.chars().count(), 24); + buf.last_reveal = start; + buf.buffer.push_str("hi"); + let sizes = drain_frames(&mut buf, start, Duration::from_millis(16)); + assert_eq!(sizes.iter().sum::(), 2); } } diff --git a/crates/jcode-tui/src/tui/app/remote.rs b/crates/jcode-tui/src/tui/app/remote.rs index 50089382d..db3d3f8ab 100644 --- a/crates/jcode-tui/src/tui/app/remote.rs +++ b/crates/jcode-tui/src/tui/app/remote.rs @@ -81,7 +81,9 @@ pub(super) async fn handle_tick(app: &mut App, remote: &mut RemoteConnection) -> needs_redraw |= app.update_chat_overscroll(); needs_redraw |= app.update_pinned_images_auto_hide(); needs_redraw |= dispatch_compacted_history_load(app, remote).await; - if let Some(chunk) = app.stream_buffer.flush() { + // Reveal buffered streaming text at the smooth paced rate on each tick, the + // same as the local turn loop. Finalization paths still call flush(). + if let Some(chunk) = app.stream_buffer.flush_smooth_frame() { app.append_streaming_text(&chunk); needs_redraw = true; } diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs index 4fd24e789..bb765c636 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs @@ -591,7 +591,12 @@ fn test_submit_input_commits_pending_streaming_assistant_text_before_user_messag )); app.bump_display_messages_version(); app.streaming_text = "Here is the final paragraph".to_string(); - assert_eq!(app.stream_buffer.push(" that was still buffered."), None); + // Mirror the real streaming caller: append any paced chunk the buffer reveals. + // The paced StreamBuffer may reveal part of the text immediately, so commit + // (below) must still flush the remainder. + if let Some(chunk) = app.stream_buffer.push(" that was still buffered.") { + app.append_streaming_text(&chunk); + } app.input = "follow up".to_string(); app.cursor_pos = app.input.len(); From a54a6b3d55f0a54b696e366f4ea23672415c8b64 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:24:24 -0700 Subject: [PATCH 25/57] feat(onboarding): show both Codex and Claude Code sessions in resume picker The first-run onboarding 'continue where you left off' picker previously surfaced only ONE external CLI: when a user was logged into both Codex and Claude Code, it picked whichever had the most recent transcript and hid the other CLI's history entirely. Now the onboarding picker loads and displays every detected external CLI's transcripts together in one combined, recency-sorted list: - Add SessionFilterMode::ExternalClis (Codex OR Claude Code). - Add load_external_cli_sessions_grouped_multi to load several CLIs. - onboarding_open_transcript_picker now takes the full detected CLI set; the banner reads 'We found your Codex and Claude Code sessions' when both are present. Resume still works off each session's own id/source, so selecting either CLI's transcript resumes correctly. Adds a regression test seeding both a Codex and a Claude Code transcript and asserting both appear. --- crates/jcode-tui-session-picker/src/lib.rs | 9 ++ .../src/tui/app/onboarding_flow_control.rs | 110 ++++++++++-------- .../src/tui/app/tests/onboarding_flow.rs | 68 ++++++++++- crates/jcode-tui/src/tui/session_picker.rs | 12 +- .../src/tui/session_picker/filter.rs | 3 + .../src/tui/session_picker/loading.rs | 38 ++++++ 6 files changed, 192 insertions(+), 48 deletions(-) diff --git a/crates/jcode-tui-session-picker/src/lib.rs b/crates/jcode-tui-session-picker/src/lib.rs index d3deb23d8..44417f48b 100644 --- a/crates/jcode-tui-session-picker/src/lib.rs +++ b/crates/jcode-tui-session-picker/src/lib.rs @@ -69,6 +69,10 @@ pub enum SessionFilterMode { Codex, Pi, OpenCode, + /// External CLI transcripts (Codex and/or Claude Code) shown together. + /// Used by the first-run onboarding "continue where you left off" picker so + /// it surfaces every external CLI the user is logged into, not just one. + ExternalClis, } impl SessionFilterMode { @@ -81,6 +85,9 @@ impl SessionFilterMode { Self::Codex => Self::Pi, Self::Pi => Self::OpenCode, Self::OpenCode => Self::All, + // ExternalClis is an onboarding-only composite filter, not part of + // the user-facing cycle; treat it as a no-op anchor. + Self::ExternalClis => Self::All, } } @@ -93,6 +100,7 @@ impl SessionFilterMode { Self::Codex => Self::ClaudeCode, Self::Pi => Self::Codex, Self::OpenCode => Self::Pi, + Self::ExternalClis => Self::All, } } @@ -105,6 +113,7 @@ impl SessionFilterMode { Self::Codex => Some("🧠 Codex"), Self::Pi => Some("π Pi"), Self::OpenCode => Some("◌ OpenCode"), + Self::ExternalClis => Some("🧠 Codex + 🧵 Claude Code"), } } } diff --git a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs index 0803c626c..ec20f5439 100644 --- a/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs +++ b/crates/jcode-tui/src/tui/app/onboarding_flow_control.rs @@ -208,36 +208,17 @@ impl App { /// straight into the resume picker (with an onboarding banner + a /// "Start a new session" option) instead of asking a separate Yes/No /// "continue where you left off" question. When both CLIs are present we - /// surface whichever one has the most recent transcript. + /// show *both* their transcripts together in one combined, recency-sorted + /// list rather than hiding one behind the other. pub(super) fn onboarding_after_model_select(&mut self) { if !matches!(self.onboarding_phase(), Some(OnboardingPhase::ModelSelect)) { return; } - match self.onboarding_most_recent_external_cli() { - Some(cli) => self.onboarding_open_transcript_picker(cli), - None => self.onboarding_show_suggestions(), - } - } - - /// Among the external CLIs whose OAuth credentials are present, pick the one - /// with the most recent transcript. Ties (or a CLI with no transcripts yet) - /// fall back to detection order (Codex first). Returns `None` when no - /// external CLI login is present. - fn onboarding_most_recent_external_cli(&self) -> Option { let present = crate::tui::app::onboarding_flow::detect_external_cli_oauths(); - match present.as_slice() { - [] => None, - [only] => Some(*only), - _ => { - // Multiple logins: rank by newest transcript mtime. - present - .iter() - .max_by_key(|cli| { - session_picker::latest_external_cli_session_secs(**cli).unwrap_or(0) - }) - .copied() - .or_else(|| present.first().copied()) - } + if present.is_empty() { + self.onboarding_show_suggestions(); + } else { + self.onboarding_open_transcript_picker(&present); } } @@ -283,7 +264,7 @@ impl App { _ => return, }; if wants_continue { - self.onboarding_open_transcript_picker(cli); + self.onboarding_open_transcript_picker(std::slice::from_ref(&cli)); } else { self.onboarding_show_suggestions(); } @@ -602,51 +583,89 @@ impl App { }); } - /// Open a single-select resume-style picker filtered to the external CLI's - /// transcripts. Falls back to the session-search prompt if none load. - pub(super) fn onboarding_open_transcript_picker(&mut self, cli: ExternalCli) { - let filter = match cli { - ExternalCli::Codex => SessionFilterMode::Codex, - ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode, + /// Open a single-select resume-style picker showing the transcripts of every + /// detected external CLI together (Codex and/or Claude Code), sorted by + /// recency. Falls back to the session-search prompt if none load. + /// + /// `clis` is the set of external CLIs the user is logged into. When more than + /// one is present we still show them in one combined list so the user never + /// has a CLI's history hidden behind the other. + pub(super) fn onboarding_open_transcript_picker(&mut self, clis: &[ExternalCli]) { + // Choose a representative CLI for the banner/mode headline: the one with + // the most recent transcript (falling back to detection order). + let headline_cli = clis + .iter() + .copied() + .max_by_key(|cli| session_picker::latest_external_cli_session_secs(*cli).unwrap_or(0)) + .or_else(|| clis.first().copied()) + .unwrap_or(ExternalCli::Codex); + + let multi = clis.len() > 1; + let filter = if multi { + SessionFilterMode::ExternalClis + } else { + match headline_cli { + ExternalCli::Codex => SessionFilterMode::Codex, + ExternalCli::ClaudeCode => SessionFilterMode::ClaudeCode, + } }; - // The onboarding picker only ever shows this one external CLI's - // transcripts, so load just those instead of paying the full - // `load_sessions_grouped` cost (parsing every jcode snapshot, the other - // CLIs, and listing servers). This keeps first-run onboarding snappy. + // The onboarding picker only shows external CLI transcripts, so load just + // those instead of paying the full `load_sessions_grouped` cost (parsing + // every jcode snapshot and listing servers). This keeps first-run + // onboarding snappy while still surfacing every logged-in CLI. let (server_groups, orphan_sessions) = - session_picker::load_external_cli_sessions_grouped(cli); + session_picker::load_external_cli_sessions_grouped_multi(clis); let mut picker = SessionPicker::new_grouped(server_groups, orphan_sessions); picker.activate_external_cli_filter(filter); if picker.visible_session_count() == 0 { - self.onboarding_fallback_to_session_search(cli); + self.onboarding_fallback_to_session_search(headline_cli); return; } - picker.activate_onboarding_banner(Self::onboarding_resume_banner_lines(cli)); + picker.activate_onboarding_banner(Self::onboarding_resume_banner_lines(clis)); self.session_picker_overlay = Some(RefCell::new(picker)); - self.session_picker_mode = SessionPickerMode::Onboarding { cli }; + self.session_picker_mode = SessionPickerMode::Onboarding { cli: headline_cli }; if let Some(flow) = self.onboarding_flow.as_mut() { flow.phase = OnboardingPhase::TranscriptPick { - cli, + cli: headline_cli, shown_at: Instant::now(), }; } + let resume_label = if multi { + "Resume a Codex or Claude Code session".to_string() + } else { + format!("Resume a {} session", headline_cli.label()) + }; self.set_status_notice(format!( - "Resume a {} session (↑↓ to choose, Enter to resume) or pick \"Start a new session\"", - cli.label() + "{resume_label} (↑↓ to choose, Enter to resume) or pick \"Start a new session\"" )); } /// Formatted onboarding prompt shown in the reserved top band of the /// resume picker on first run. - fn onboarding_resume_banner_lines(cli: ExternalCli) -> Vec> { + fn onboarding_resume_banner_lines(clis: &[ExternalCli]) -> Vec> { use ratatui::style::{Color, Modifier, Style}; use ratatui::text::{Line, Span}; let accent = crate::tui::color_support::rgb(186, 139, 255); + // Describe whichever CLIs were detected: "Codex", "Claude Code", or + // "Codex and Claude Code" when both are present. + let mut labels: Vec<&'static str> = Vec::new(); + for cli in clis { + let label = cli.label(); + if !labels.contains(&label) { + labels.push(label); + } + } + let found = match labels.as_slice() { + [] => "external".to_string(), + [only] => (*only).to_string(), + [first, second] => format!("{first} and {second}"), + _ => labels.join(", "), + }; vec![ Line::from(vec![Span::styled( "Welcome to jcode 🎉", @@ -654,8 +673,7 @@ impl App { )]), Line::from(vec![Span::styled( format!( - "We found your {} sessions. Pick one below to pick up right where you left off,", - cli.label() + "We found your {found} sessions. Pick one below to pick up right where you left off," ), Style::default().fg(Color::White), )]), diff --git a/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs b/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs index e0aa77f69..f93ecfc9c 100644 --- a/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs +++ b/crates/jcode-tui/src/tui/app/tests/onboarding_flow.rs @@ -411,7 +411,7 @@ fn no_external_transcripts_lands_on_suggestions_without_autosubmit() { // Temp home has no Codex transcripts, so opening the picker should land // the user on the clean new-session suggestion cards rather than // auto-submitting a "search for my last session" turn. - app.onboarding_open_transcript_picker(ExternalCli::Codex); + app.onboarding_open_transcript_picker(&[ExternalCli::Codex]); assert!(matches!( app.onboarding_phase(), Some(OnboardingPhase::Suggestions) @@ -432,6 +432,72 @@ fn onboarding_picker_mode_carries_cli() { assert_ne!(mode, SessionPickerMode::Resume); } +#[test] +fn onboarding_picker_shows_both_codex_and_claude_transcripts() { + use std::fs; + with_temp_jcode_home(|| { + // Seed one Codex transcript and one Claude Code transcript under the + // sandbox-aware external home ($JCODE_HOME/external/...), mirroring a + // user who is logged into BOTH CLIs. + let home = std::env::var_os("JCODE_HOME").expect("JCODE_HOME"); + let external = std::path::Path::new(&home).join("external"); + + let codex_dir = external.join(".codex/sessions/2026/04/05"); + fs::create_dir_all(&codex_dir).expect("codex dir"); + fs::write( + codex_dir.join("rollout-2026-04-05T19-00-00-codextest.jsonl"), + concat!( + "{\"timestamp\":\"2026-04-05T19:00:00Z\",\"type\":\"session_meta\",\"payload\":{\"id\":\"019d-codex-both\",\"timestamp\":\"2026-04-05T18:59:00Z\",\"cwd\":\"/tmp/codex-demo\",\"source\":\"cli\"}}\n", + "{\"timestamp\":\"2026-04-05T19:00:03Z\",\"type\":\"response_item\",\"payload\":{\"type\":\"message\",\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"CODEX_MARKER fix the widget\"}]}}\n", + ), + ) + .expect("write codex transcript"); + + let claude_dir = external.join(".claude/projects/demo-project"); + fs::create_dir_all(&claude_dir).expect("claude dir"); + fs::write( + claude_dir.join("claude-session-both.jsonl"), + concat!( + "{\"type\":\"user\",\"uuid\":\"u1\",\"message\":{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"CLAUDE_MARKER fix the flaky test\"}]}}\n", + "{\"type\":\"assistant\",\"uuid\":\"a1\",\"parentUuid\":\"u1\",\"message\":{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"done\"}]}}\n" + ), + ) + .expect("write claude transcript"); + + let mut app = onboarding_test_app(); + // Open the combined picker for BOTH detected CLIs. + app.onboarding_open_transcript_picker(&[ExternalCli::Codex, ExternalCli::ClaudeCode]); + + // The picker overlay should be up with both CLIs' sessions visible + // (not just one). + let picker_cell = app + .session_picker_overlay + .as_ref() + .expect("picker overlay should be open"); + let picker = picker_cell.borrow(); + assert!( + picker.visible_session_count() >= 2, + "combined picker should list both CLIs' sessions, got {}", + picker.visible_session_count() + ); + + let mut saw_codex = false; + let mut saw_claude = false; + for session in picker.visible_session_iter_for_test() { + match session.source { + jcode_tui_session_picker::SessionSource::Codex => saw_codex = true, + jcode_tui_session_picker::SessionSource::ClaudeCode => saw_claude = true, + _ => {} + } + } + assert!(saw_codex, "Codex session should be present in combined picker"); + assert!( + saw_claude, + "Claude Code session should be present in combined picker" + ); + }); +} + #[test] fn startup_check_skips_when_session_already_has_activity() { with_temp_jcode_home(|| { diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs index 988dfe8b1..4ce1dbeb2 100644 --- a/crates/jcode-tui/src/tui/session_picker.rs +++ b/crates/jcode-tui/src/tui/session_picker.rs @@ -34,7 +34,7 @@ mod render; #[cfg(test)] use loading::collect_recent_session_stems; pub(crate) use loading::latest_external_cli_session_secs; -pub(crate) use loading::load_external_cli_sessions_grouped; +pub(crate) use loading::load_external_cli_sessions_grouped_multi; use loading::{build_messages_preview, build_search_index, crashed_sessions_from_all_sessions}; pub use loading::{ invalidate_session_list_cache, load_cached_sessions_grouped, load_servers, load_sessions, @@ -525,6 +525,16 @@ impl SessionPicker { .filter_map(|session_ref| self.session_by_ref(*session_ref)) } + /// Test-only accessor: the source classification of every currently visible + /// session. Used by onboarding tests to assert the combined external-CLI + /// picker surfaces both Codex and Claude Code transcripts. + #[cfg(test)] + pub(crate) fn visible_session_iter_for_test( + &self, + ) -> impl Iterator + '_ { + self.visible_session_iter() + } + fn load_preview_for_target( resume_target: ResumeTarget, external_path: Option, diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs index d12da9141..82883b2f4 100644 --- a/crates/jcode-tui/src/tui/session_picker/filter.rs +++ b/crates/jcode-tui/src/tui/session_picker/filter.rs @@ -146,6 +146,9 @@ impl SessionPicker { SessionFilterMode::Codex => Self::session_is_codex(session), SessionFilterMode::Pi => Self::session_is_pi(session), SessionFilterMode::OpenCode => Self::session_is_open_code(session), + SessionFilterMode::ExternalClis => { + Self::session_is_codex(session) || Self::session_is_claude_code(session) + } } } diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs index 4acc66aef..52e3a936f 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading.rs @@ -2670,6 +2670,11 @@ pub fn load_sessions_grouped() -> Result<(Vec, Vec)> { /// jcode snapshot, the other CLIs, and listing servers) is wasted there. This /// scoped loader keeps onboarding responsive by touching only the relevant /// transcripts. +/// +/// The live onboarding flow now uses [`load_external_cli_sessions_grouped_multi`] +/// (it shows every logged-in CLI together), so this single-CLI variant is kept +/// only as a focused test helper. +#[cfg(test)] pub(crate) fn load_external_cli_sessions_grouped( cli: crate::tui::app::onboarding_flow::ExternalCli, ) -> (Vec, Vec) { @@ -2682,6 +2687,39 @@ pub(crate) fn load_external_cli_sessions_grouped( (Vec::new(), sessions) } +/// Load sessions for several external CLIs at once (Codex and/or Claude Code), +/// returned as a single combined orphan list compatible with +/// `SessionPicker::new_grouped`. +/// +/// First-run onboarding's "continue where you left off" picker shows every +/// external CLI the user is logged into, not just one, so it loads all of them +/// here. Each CLI is still scoped to its own transcripts (no jcode snapshots / +/// servers), keeping onboarding responsive. The picker sorts the merged result +/// by recency, so the newest session across all CLIs floats to the top. +pub(crate) fn load_external_cli_sessions_grouped_multi( + clis: &[crate::tui::app::onboarding_flow::ExternalCli], +) -> (Vec, Vec) { + use crate::tui::app::onboarding_flow::ExternalCli; + let scan_limit = session_scan_limit(); + let mut sessions = Vec::new(); + let mut seen_codex = false; + let mut seen_claude = false; + for cli in clis { + match cli { + ExternalCli::Codex if !seen_codex => { + seen_codex = true; + sessions.extend(load_external_codex_sessions(scan_limit)); + } + ExternalCli::ClaudeCode if !seen_claude => { + seen_claude = true; + sessions.extend(load_external_claude_code_sessions(scan_limit)); + } + _ => {} + } + } + (Vec::new(), sessions) +} + #[cfg(test)] #[path = "loading_tests.rs"] mod tests; From bd7bac86ce510d2109ee1a9f3965c4629a247a14 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:25:35 -0700 Subject: [PATCH 26/57] feat(display): default reasoning display to current Show the model's live reasoning out of the box. DisplayConfig now defaults reasoning_display to Current (with show_thinking=true to keep the provider request + streaming display paths in sync), and the generated default config documents reasoning_display = "current". --- crates/jcode-base/src/config/default_file.rs | 6 +++--- crates/jcode-config-types/src/lib.rs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/jcode-base/src/config/default_file.rs b/crates/jcode-base/src/config/default_file.rs index 9d05c4695..c4799c611 100644 --- a/crates/jcode-base/src/config/default_file.rs +++ b/crates/jcode-base/src/config/default_file.rs @@ -114,8 +114,8 @@ mouse_capture = true # Enable debug socket for external control/testing (default: false) debug_socket = false -# Show thinking/reasoning content (default: false) -show_thinking = false +# Show thinking/reasoning content (default: true) +show_thinking = true # How to display reasoning/thinking content: "off", "full", or "current". # off - never show reasoning @@ -123,7 +123,7 @@ show_thinking = false # current - show only the live reasoning; collapse it once the model commits # an assistant message or runs a tool, then show the next one # When unset, falls back to show_thinking (true => full, false => off). -# reasoning_display = "current" +reasoning_display = "current" # Markdown spacing style: "compact" (chat/TUI) or "document" (docs-like) # markdown_spacing = "compact" diff --git a/crates/jcode-config-types/src/lib.rs b/crates/jcode-config-types/src/lib.rs index 932377a1d..31785cba7 100644 --- a/crates/jcode-config-types/src/lib.rs +++ b/crates/jcode-config-types/src/lib.rs @@ -592,7 +592,7 @@ pub struct DisplayConfig { pub debug_socket: bool, /// Center all content (default: false) pub centered: bool, - /// Show thinking/reasoning content by default (default: false) + /// Show thinking/reasoning content by default (default: true) pub show_thinking: bool, /// How to display reasoning/thinking content (off/full/current). /// When unset, falls back to `show_thinking` (true => full, false => off). @@ -638,8 +638,8 @@ impl Default for DisplayConfig { mouse_capture: true, debug_socket: false, centered: false, - show_thinking: false, - reasoning_display: None, + show_thinking: true, + reasoning_display: Some(ReasoningDisplayMode::Current), diagram_mode: DiagramDisplayMode::default(), markdown_spacing: MarkdownSpacingMode::default(), idle_animation: true, From 66ac0eb4fd33bb8037033e7d5e9262bfb7fddff3 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:37:36 -0700 Subject: [PATCH 27/57] provider-doctor: add observe-only reasoning_capability checkpoint + parallel tool-call probe - New REASONING_CAPABILITY checkpoint (taxonomy v3), never required for user-readiness and excluded from strict coverage. A reasoning word problem is sent and the turn is classified streamed/opaque/none from StreamEvent signals (ThinkingDelta text, ThinkingSignatureDelta, OpenAIReasoning, and Gemini-3 tool thought_signature). Absence records 'none' and passes. - Shared native tool smoke gains a Phase 3 that asks for two tool calls in a single assistant message, replays both tool_use blocks (each with its own thought_signature) in one assistant turn and answers both results, recording parallel_tool_calls: verified|skipped (best-effort, never fails). - Wired reasoning into the antigravity, generic-native, and claude drivers; skipped on non-full tiers; surfaced in the doctor report detail. - Unit tests for classification, parallel replay shape, detail strings, and the observe-only contract (probe error -> skipped, never failed). --- .../src/auth/live_provider_probes.rs | 432 +++++++++++++++++- crates/jcode-base/src/auth/provider_e2e.rs | 187 +++++++- crates/jcode-base/src/live_tests.rs | 35 +- 3 files changed, 639 insertions(+), 15 deletions(-) diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs index 749c43bee..fe6ae0b2d 100644 --- a/crates/jcode-base/src/auth/live_provider_probes.rs +++ b/crates/jcode-base/src/auth/live_provider_probes.rs @@ -258,6 +258,123 @@ mod tests { "gpt-5.1" ); } + + fn tool_call_with_signature(signature: Option<&str>) -> NativeClaudeToolCall { + NativeClaudeToolCall { + id: "call_1".to_string(), + name: "read".to_string(), + input_json: "{}".to_string(), + thought_signature: signature.map(str::to_string), + } + } + + #[test] + fn reasoning_capability_classifies_streamed_when_reasoning_text_present() { + let outcome = NativeClaudeStreamOutcome { + reasoning_text_len: 42, + saw_message_end: true, + ..Default::default() + }; + assert_eq!(outcome.reasoning_capability(), "streamed"); + } + + #[test] + fn reasoning_capability_classifies_opaque_from_thinking_signature() { + // No reasoning text, but a ThinkingSignatureDelta-style signal: opaque. + let outcome = NativeClaudeStreamOutcome { + saw_reasoning_signal: true, + saw_message_end: true, + ..Default::default() + }; + assert_eq!(outcome.reasoning_capability(), "opaque"); + } + + #[test] + fn reasoning_capability_classifies_opaque_from_tool_thought_signature() { + // A Gemini-3 tool call carrying a thought_signature is an opaque signal + // even when no reasoning text streamed. + let outcome = NativeClaudeStreamOutcome { + tool_calls: vec![tool_call_with_signature(Some("SIG_ABC"))], + saw_message_end: true, + ..Default::default() + }; + assert_eq!(outcome.reasoning_capability(), "opaque"); + } + + #[test] + fn reasoning_capability_classifies_none_without_any_signal() { + // A tool call with no signature is not a reasoning signal. + let outcome = NativeClaudeStreamOutcome { + tool_calls: vec![tool_call_with_signature(None)], + saw_message_end: true, + ..Default::default() + }; + assert_eq!(outcome.reasoning_capability(), "none"); + } + + #[test] + fn reasoning_capability_prefers_streamed_over_opaque() { + // Streamed reasoning text wins even when an opaque signal is also present. + let outcome = NativeClaudeStreamOutcome { + reasoning_text_len: 10, + saw_reasoning_signal: true, + tool_calls: vec![tool_call_with_signature(Some("SIG"))], + saw_message_end: true, + ..Default::default() + }; + assert_eq!(outcome.reasoning_capability(), "streamed"); + } + + #[test] + fn parallel_tool_use_replays_every_signature_in_one_assistant_message() { + let calls = vec![ + NativeClaudeToolCall { + id: "a".to_string(), + name: "read".to_string(), + input_json: "{\"file_path\":\"/tmp/a\"}".to_string(), + thought_signature: Some("SIG_A".to_string()), + }, + NativeClaudeToolCall { + id: "b".to_string(), + name: "read".to_string(), + input_json: "{\"file_path\":\"/tmp/b\"}".to_string(), + thought_signature: Some("SIG_B".to_string()), + }, + ]; + let assistant = assistant_parallel_tool_uses(&calls); + assert!(matches!(assistant.role, Role::Assistant)); + // One assistant message must carry BOTH tool_use blocks, each with its + // own signature preserved. + assert_eq!(assistant.content.len(), 2); + let sigs: Vec> = assistant + .content + .iter() + .map(|block| match block { + ContentBlock::ToolUse { + thought_signature, .. + } => thought_signature.clone(), + other => panic!("expected ToolUse, got {other:?}"), + }) + .collect(); + assert_eq!( + sigs, + vec![Some("SIG_A".to_string()), Some("SIG_B".to_string())] + ); + + // The results message must answer every call with a matching id. + let results = parallel_tool_results(&calls); + assert!(matches!(results.role, Role::User)); + assert_eq!(results.content.len(), 2); + let ids: Vec = results + .content + .iter() + .map(|block| match block { + ContentBlock::ToolResult { tool_use_id, .. } => tool_use_id.clone(), + other => panic!("expected ToolResult, got {other:?}"), + }) + .collect(); + assert_eq!(ids, vec!["a".to_string(), "b".to_string()]); + } } pub async fn run_live_openai_compatible_stream_smoke( @@ -528,6 +645,15 @@ struct NativeClaudeStreamOutcome { /// Number of thinking deltas seen (extended/adaptive thinking). Useful when /// a turn is consumed entirely by reasoning and emits no visible text. thinking_chunk_count: usize, + /// Length of streamed reasoning text (sum of `ThinkingDelta` payloads). + /// Distinct from `thinking_chunk_count`: a provider can emit a single empty + /// `ThinkingStart`/`ThinkingEnd` pair without ever streaming visible + /// reasoning text, which we must classify as `opaque`/`none`, not `streamed`. + reasoning_text_len: usize, + /// Saw an *opaque* reasoning signal: a `thought_signature` (Gemini-3), a + /// `ThinkingSignatureDelta`, or an `OpenAIReasoning` item. This is the + /// evidence that the model reasoned even though it never streamed the text. + saw_reasoning_signal: bool, /// Total stream events observed, for diagnosing empty/odd streams. total_events: usize, saw_message_end: bool, @@ -576,6 +702,33 @@ impl NativeClaudeStreamOutcome { self.tool_calls.len() ) } + + /// Did any captured tool call carry a Gemini-3 `thought_signature`? This is + /// an opaque reasoning signal even when the model streamed no reasoning text. + fn any_tool_signature(&self) -> bool { + self.tool_calls + .iter() + .any(|call| call.thought_signature.is_some()) + } + + /// Classify how this turn exposed the model's reasoning: + /// - `streamed`: streamed visible reasoning text (`ThinkingDelta`). + /// - `opaque`: no reasoning text, but an opaque reasoning signal was present + /// (a `thought_signature`, a `ThinkingSignatureDelta`, or an + /// `OpenAIReasoning` item). Legitimate and common (Gemini-3, OpenAI). + /// - `none`: neither was observed. + /// + /// All three are valid; the reasoning checkpoint records the classification + /// and never fails on `none`. + fn reasoning_capability(&self) -> &'static str { + if self.reasoning_text_len > 0 { + "streamed" + } else if self.saw_reasoning_signal || self.any_tool_signature() { + "opaque" + } else { + "none" + } + } } /// Drive any native [`Provider`] runtime's `complete` and fold the resulting @@ -610,8 +763,19 @@ async fn consume_native_stream( outcome.chunk_count += 1; outcome.text.push_str(&text); } - StreamEvent::ThinkingDelta(_) => { + StreamEvent::ThinkingDelta(text) => { outcome.thinking_chunk_count += 1; + outcome.reasoning_text_len += text.len(); + } + // Opaque reasoning signals: the model reasoned but the runtime + // surfaces only a signature/encrypted item, not readable text. + StreamEvent::ThinkingSignatureDelta(signature) => { + if !signature.is_empty() { + outcome.saw_reasoning_signal = true; + } + } + StreamEvent::OpenAIReasoning { .. } => { + outcome.saw_reasoning_signal = true; } StreamEvent::ToolUseStart { id, name } => { pending_tool = Some(NativeClaudeToolCall { @@ -981,6 +1145,18 @@ pub async fn run_live_claude_native_tool_smoke( Ok(stage) } +/// Stage: reasoning capability (observe-only). +/// +/// Delegates to the shared [`run_live_native_provider_reasoning_smoke`] so the +/// native Claude runtime records whether the model streamed reasoning text +/// (extended thinking) or hid it behind an opaque signal. +pub async fn run_live_claude_native_reasoning_smoke( + model: &str, +) -> anyhow::Result { + let provider = build_native_claude_provider(model)?; + run_live_native_provider_reasoning_smoke(&provider, model, "Claude").await +} + // === Native Antigravity probes ============================================ // // Antigravity is a Google OAuth login provider whose `generateContent` runtime @@ -1162,6 +1338,18 @@ pub async fn run_live_antigravity_native_tool_smoke( run_live_native_provider_tool_smoke(&provider, model, "Antigravity").await } +/// Stage: reasoning capability (observe-only). +/// +/// Delegates to the shared [`run_live_native_provider_reasoning_smoke`] so +/// Antigravity records whether the resolved model streams reasoning text or +/// hides it behind an opaque signal (Gemini-3 thought signatures are opaque). +pub async fn run_live_antigravity_native_reasoning_smoke( + model: &str, +) -> anyhow::Result { + let provider = build_native_antigravity_provider(model)?; + run_live_native_provider_reasoning_smoke(&provider, model, "Antigravity").await +} + // === Generic native-runtime probes ======================================== // // The native Claude and native Antigravity probes above each build a concrete @@ -1317,10 +1505,110 @@ pub async fn run_live_native_provider_stream_smoke( Ok(stage) } +/// Stage: reasoning capability (observe-only). +/// +/// Sends a small multi-step logic/word problem that forces the model to reason +/// before answering, consumes the stream, and classifies how the model exposed +/// its reasoning: +/// +/// - `streamed`: the runtime streamed visible reasoning text (`ThinkingDelta`). +/// - `opaque`: no reasoning text, but an opaque reasoning signal was present (a +/// Gemini-3 `thought_signature`, a `ThinkingSignatureDelta`, or an +/// `OpenAIReasoning` item). This is legitimate and common (Gemini-3 and +/// OpenAI hide their reasoning), so it MUST be a pass. +/// - `none`: neither was observed. +/// +/// The checkpoint passes as long as the turn completes cleanly (a `MessageEnd` +/// plus a coherent answer); it never hard-fails just because reasoning was +/// hidden or absent. The classification is recorded as the `reasoning_capability` +/// evidence. Expected-to-reason gating (a capability list) can layer on later. +pub async fn run_live_native_provider_reasoning_smoke( + provider: &dyn Provider, + model: &str, + label: &str, +) -> anyhow::Result { + let started = std::time::Instant::now(); + // A small logic word problem with a single unambiguous numeric answer (2). + // The answer token `REASON_TEST_ANSWER=2` lets us assert a coherent result + // without depending on the model's prose. The problem requires at least one + // step of arithmetic/elimination so a reasoning model actually reasons. + let messages = vec![Message { + role: Role::User, + content: vec![ContentBlock::Text { + text: "Solve this step by step, then give the final answer. A farmer has chickens \ + and cows. Together they have 7 heads and 22 legs. How many cows are there? \ + After reasoning, end your reply with exactly REASON_TEST_ANSWER= on \ + its own final line." + .to_string(), + cache_control: None, + }], + timestamp: None, + tool_duration_ms: None, + }]; + let system = "You are a live provider reasoning smoke test. Think through the problem, then \ + finish with the required REASON_TEST_ANSWER= line."; + + let outcome = consume_native_stream( + provider, + &messages, + &[], + system, + std::time::Duration::from_secs(120), + ) + .await?; + + ensure!( + outcome.saw_message_end, + "native {label} reasoning smoke ended without a message_end event ({})", + outcome.diagnostics() + ); + // Coherence: the turn must produce a real final answer. We accept either the + // exact sentinel or the correct numeric answer (4 cows) appearing in the + // text, so a model that ignores the formatting instruction but still answers + // correctly is not penalized. The reasoning checkpoint is about completion, + // not about reasoning visibility. + let answered = outcome.text.contains("REASON_TEST_ANSWER=4") + || outcome.text.contains("REASON_TEST_ANSWER= 4") + || outcome.text.to_ascii_lowercase().contains("4 cows") + || outcome.text.contains("REASON_TEST_ANSWER"); + ensure!( + !outcome.text.trim().is_empty() && answered, + "native {label} reasoning smoke produced no coherent answer: {:?} ({})", + crate::util::truncate_str(outcome.text.trim(), 200), + outcome.diagnostics() + ); + + let classification = outcome.reasoning_capability(); + let mut stage = crate::live_tests::LiveVerificationStage::passed( + crate::live_tests::checkpoints::REASONING_CAPABILITY, + ) + .with_duration_ms(started.elapsed().as_millis() as u64) + .with_evidence("model", serde_json::json!(model)) + .with_evidence("reasoning_capability", serde_json::json!(classification)) + .with_evidence( + "reasoning_text_chars", + serde_json::json!(outcome.reasoning_text_len), + ) + .with_evidence( + "thinking_delta_count", + serde_json::json!(outcome.thinking_chunk_count), + ) + .with_evidence( + "saw_opaque_reasoning_signal", + serde_json::json!(outcome.saw_reasoning_signal), + ) + .with_evidence("total_events", serde_json::json!(outcome.total_events)) + .with_evidence("stop_reason", serde_json::json!(outcome.stop_reason.clone())); + if let Some(usage) = outcome.usage_evidence() { + stage = stage.with_evidence("usage", usage); + } + Ok(stage) +} + /// Stage: tool-call parse + execution loop + result follow-up against an /// arbitrary native provider. /// -/// Two phases: +/// Three phases: /// /// 1. **Single round-trip (gating):** ask the model to call a tool (assert a /// parseable tool_use), then feed a synthetic tool_result back (assert the @@ -1338,6 +1626,14 @@ pub async fn run_live_native_provider_stream_smoke( /// signatures at all), the phase records `multi_tool_replay: "skipped"` /// rather than failing, so it never turns a previously-green provider red /// for a non-signature reason. +/// 3. **Parallel tool calls in one turn (best-effort):** ask the model to call +/// the tool TWICE in a single assistant message, then replay BOTH `tool_use` +/// blocks (each with its own `thought_signature`) inside one assistant turn +/// and answer both `tool_result`s, asserting the backend accepts a single +/// assistant message carrying two `functionCall` parts. Distinct from the +/// sequential loop in phase 2. Records `parallel_tool_calls: "verified"` when +/// the model emitted >=2 calls in one turn and the follow-up was accepted, or +/// `"skipped"` when the model only emitted one (best-effort, never a fail). pub async fn run_live_native_provider_tool_smoke( provider: &dyn Provider, model: &str, @@ -1521,6 +1817,86 @@ pub async fn run_live_native_provider_tool_smoke( } } + // Phase 3 (best-effort): ask the model to call the tool TWICE in a single + // assistant turn (parallel/batch tool calls), then replay BOTH tool_use + // blocks inside ONE assistant message (each carrying its own captured + // thought_signature) and answer BOTH tool_results. A backend that accepts a + // single assistant message containing two `functionCall` parts completes the + // follow-up cleanly; one that rejects parallel calls surfaces here. If the + // model only emits a single call (common: many models serialize tool use), + // we record `parallel_tool_calls: "skipped"` rather than failing. + let mut parallel_tool_calls = "skipped"; + let mut parallel_call_count = 0usize; + let parallel_turn = consume_native_stream( + provider, + &[Message { + role: Role::User, + content: vec![ContentBlock::Text { + text: "In this single turn, make TWO read tool calls at once (in parallel, in \ + one message): read /tmp/auth_tool_probe.txt AND read \ + /tmp/auth_tool_probe_2.txt. Emit both tool calls now; do not answer in \ + text and do not wait for the first result before making the second call." + .to_string(), + cache_control: None, + }], + timestamp: None, + tool_duration_ms: None, + }], + &tools, + system, + std::time::Duration::from_secs(120), + ) + .await?; + total_input += parallel_turn.input_tokens; + total_output += parallel_turn.output_tokens; + + if parallel_turn.tool_calls.len() >= 2 { + parallel_call_count = parallel_turn.tool_calls.len(); + // Build ONE assistant message holding every tool_use block (each with + // its own signature), then ONE user message holding every tool_result. + let assistant = assistant_parallel_tool_uses(¶llel_turn.tool_calls); + let results = parallel_tool_results(¶llel_turn.tool_calls); + let convo = vec![ + Message { + role: Role::User, + content: vec![ContentBlock::Text { + text: "In this single turn, make TWO read tool calls at once (in parallel, \ + in one message): read /tmp/auth_tool_probe.txt AND read \ + /tmp/auth_tool_probe_2.txt." + .to_string(), + cache_control: None, + }], + timestamp: None, + tool_duration_ms: None, + }, + assistant, + results, + ]; + let parallel_followup = consume_native_stream( + provider, + &convo, + &tools, + system, + std::time::Duration::from_secs(120), + ) + .await + .with_context(|| { + format!( + "native {label} parallel tool-call replay was rejected (one assistant message \ + carried {parallel_call_count} functionCall parts; a backend that does not \ + accept parallel tool calls in a single message fails here)" + ) + })?; + total_input += parallel_followup.input_tokens; + total_output += parallel_followup.output_tokens; + ensure!( + parallel_followup.saw_message_end, + "native {label} parallel tool-call follow-up ended without a message_end event ({})", + parallel_followup.diagnostics() + ); + parallel_tool_calls = "verified"; + } + let mut stage = crate::live_tests::LiveVerificationStage::passed( crate::live_tests::checkpoints::TOOL_CALL_PARSE, ) @@ -1538,6 +1914,14 @@ pub async fn run_live_native_provider_tool_smoke( "tool_call_signatures_present", serde_json::json!(signatures_present), ) + .with_evidence( + "parallel_tool_calls", + serde_json::json!(parallel_tool_calls), + ) + .with_evidence( + "parallel_tool_call_count", + serde_json::json!(parallel_call_count), + ) .with_evidence("followup_consumed_result", serde_json::json!(true)); if total_input != 0 || total_output != 0 { stage = stage.with_evidence("usage", usage_evidence(total_input, total_output, 0, 0)); @@ -1586,3 +1970,47 @@ fn tool_result_then_text(tool_use_id: &str, result: &str) -> Message { tool_duration_ms: None, } } + +/// Build a single assistant message that replays *every* captured tool call as a +/// parallel batch (multiple `ToolUse` blocks in one message), each preserving +/// its own `thought_signature`. This is the shape the parallel-tool-call phase +/// asserts the backend accepts as one assistant turn carrying N `functionCall` +/// parts. +fn assistant_parallel_tool_uses(calls: &[NativeClaudeToolCall]) -> Message { + let content = calls + .iter() + .map(|call| ContentBlock::ToolUse { + id: call.id.clone(), + name: call.name.clone(), + input: parse_tool_arguments(&call.input_json), + thought_signature: call.thought_signature.clone(), + }) + .collect(); + Message { + role: Role::Assistant, + content, + timestamp: None, + tool_duration_ms: None, + } +} + +/// Build a single user message answering *every* parallel tool call with a +/// synthetic `tool_result`, so a parallel assistant turn is fully resolved in +/// one follow-up message. +fn parallel_tool_results(calls: &[NativeClaudeToolCall]) -> Message { + let content = calls + .iter() + .enumerate() + .map(|(index, call)| ContentBlock::ToolResult { + tool_use_id: call.id.clone(), + content: format!("Contents of file {}: token_{index}.", index + 1), + is_error: Some(false), + }) + .collect(); + Message { + role: Role::User, + content, + timestamp: None, + tool_duration_ms: None, + } +} diff --git a/crates/jcode-base/src/auth/provider_e2e.rs b/crates/jcode-base/src/auth/provider_e2e.rs index d4356db5c..05ef8c40a 100644 --- a/crates/jcode-base/src/auth/provider_e2e.rs +++ b/crates/jcode-base/src/auth/provider_e2e.rs @@ -22,13 +22,14 @@ use crate::auth::lifecycle::{ AuthActivationRequest, activate_auth_change, validate_catalog_invariants, }; use crate::auth::live_provider_probes::{ - fetch_live_openai_compatible_models, run_live_antigravity_native_smoke, - run_live_antigravity_native_stream_smoke, run_live_antigravity_native_tool_smoke, + fetch_live_openai_compatible_models, run_live_antigravity_native_reasoning_smoke, + run_live_antigravity_native_smoke, run_live_antigravity_native_stream_smoke, + run_live_antigravity_native_tool_smoke, run_live_claude_native_reasoning_smoke, run_live_claude_native_smoke, run_live_claude_native_stream_smoke, - run_live_claude_native_tool_smoke, run_live_native_provider_smoke, - run_live_native_provider_stream_smoke, run_live_native_provider_tool_smoke, - run_live_openai_compatible_smoke, run_live_openai_compatible_stream_smoke, - run_live_openai_compatible_tool_smoke, + run_live_claude_native_tool_smoke, run_live_native_provider_reasoning_smoke, + run_live_native_provider_smoke, run_live_native_provider_stream_smoke, + run_live_native_provider_tool_smoke, run_live_openai_compatible_smoke, + run_live_openai_compatible_stream_smoke, run_live_openai_compatible_tool_smoke, }; use crate::live_tests::{ self, LiveVerificationAuth, LiveVerificationEvent, LiveVerificationResult, @@ -273,6 +274,7 @@ const FULL_PIPELINE_LABELS: &[(&str, &str)] = &[ (checkpoints::TOOL_EXECUTION_LOOP, "Tool execution loop"), (checkpoints::TOOL_RESULT_FOLLOWUP, "Tool-result followup"), (checkpoints::REAL_JCODE_TOOL_SMOKE, "Real Jcode tool smoke"), + (checkpoints::REASONING_CAPABILITY, "Reasoning capability"), ]; fn label_for(checkpoint: &str) -> &'static str { @@ -291,17 +293,89 @@ fn label_for(checkpoint: &str) -> &'static str { /// declined a second tool call). Surfacing it keeps the coverage observable in /// the doctor report instead of collapsing to a generic pass string. fn tool_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String { - match stage + let multi = match stage .evidence .get("multi_tool_replay") .and_then(|value| value.as_str()) { - Some("verified") => "tool call parsed and executed; multi-call signature replay verified".to_string(), - Some("skipped") => { - "tool call parsed and executed; multi-call signature replay skipped (no 2nd tool call)" - .to_string() + Some("verified") => "multi-call signature replay verified", + Some("skipped") => "multi-call signature replay skipped (no 2nd tool call)", + _ => "", + }; + let parallel = match stage + .evidence + .get("parallel_tool_calls") + .and_then(|value| value.as_str()) + { + Some("verified") => "parallel tool calls verified", + Some("skipped") => "parallel tool calls skipped (single call)", + _ => "", + }; + let mut detail = "tool call parsed and executed".to_string(); + for part in [multi, parallel] { + if !part.is_empty() { + detail.push_str("; "); + detail.push_str(part); + } + } + detail +} + +/// Human-readable detail for a passed reasoning-capability stage. The stage +/// records `reasoning_capability` as `streamed` (visible reasoning text), +/// `opaque` (no text but a reasoning signal: thought signature, reasoning item, +/// or reasoning tokens), or `none` (neither). All three are passes; `opaque` and +/// `none` are legitimate because providers like Gemini-3 and OpenAI hide their +/// reasoning. Surfacing the classification keeps the observation visible in the +/// doctor report. +fn reasoning_stage_detail(stage: &crate::live_tests::LiveVerificationStage) -> String { + match stage + .evidence + .get("reasoning_capability") + .and_then(|value| value.as_str()) + { + Some("streamed") => "reasoning streamed (visible thinking text)".to_string(), + Some("opaque") => { + "reasoning hidden but signaled (opaque: thought signature / reasoning item)".to_string() + } + Some("none") => "no reasoning signal observed (model hides or skips reasoning)".to_string(), + _ => "reasoning turn completed".to_string(), + } +} + +/// Fold a reasoning-capability probe result into a [`DoctorCheck`], honoring the +/// observe-only contract. +/// +/// A clean turn records a passed checkpoint carrying the `streamed`/`opaque`/ +/// `none` classification (all three are passes; hiding reasoning is legitimate). +/// A probe *error* (network, or a turn that did not complete with a coherent +/// answer) is recorded as **skipped**, never failed: this checkpoint must never +/// flip a provider to "not user-ready", and it is not part of the strict +/// coverage ladder, so an observational miss should not fail the tier. The +/// broader chat/streaming checkpoints already guard turn completion. +fn push_reasoning_check( + result: anyhow::Result, + checks: &mut Vec, + spend: &mut DoctorSpend, +) { + match result { + Ok(stage) => { + spend.accumulate(stage.evidence.get("usage"), stage.evidence.get("cost")); + let detail = reasoning_stage_detail(&stage); + checks.push(DoctorCheck::passed( + checkpoints::REASONING_CAPABILITY, + label_for(checkpoints::REASONING_CAPABILITY), + detail, + )); } - _ => "tool call parsed and executed".to_string(), + Err(error) => checks.push(DoctorCheck::skipped( + checkpoints::REASONING_CAPABILITY, + label_for(checkpoints::REASONING_CAPABILITY), + format!( + "observe-only reasoning probe did not complete: {}", + format_error_chain(&error) + ), + )), } } @@ -314,6 +388,7 @@ const API_DEPENDENT_CHECKPOINTS: &[&str] = &[ checkpoints::TOOL_EXECUTION_LOOP, checkpoints::TOOL_RESULT_FOLLOWUP, checkpoints::REAL_JCODE_TOOL_SMOKE, + checkpoints::REASONING_CAPABILITY, ]; /// Run the strict provider/model diagnostic. @@ -846,6 +921,13 @@ async fn run_native_claude_api_checks( } } } + + // Reasoning capability (observe-only; never gates readiness). + push_reasoning_check( + run_live_claude_native_reasoning_smoke(selected).await, + checks, + spend, + ); } /// The wiring contract for the native Antigravity (Google OAuth Cloud Code) @@ -1190,6 +1272,13 @@ async fn run_native_antigravity_api_checks( } } } + + // Reasoning capability (observe-only; never gates readiness). + push_reasoning_check( + run_live_antigravity_native_reasoning_smoke(selected).await, + checks, + spend, + ); } // === Generic native-runtime doctor ========================================= @@ -1825,6 +1914,13 @@ async fn run_generic_native_api_checks( } } } + + // Reasoning capability (observe-only; never gates readiness). + push_reasoning_check( + run_live_native_provider_reasoning_smoke(provider, selected, label).await, + checks, + spend, + ); } /// The jcode-side wiring a given compat profile is expected to activate. @@ -2477,4 +2573,71 @@ mod tests { let anonymous = native_antigravity_auth(""); assert!(anonymous.source.contains("Antigravity Google OAuth")); } + + #[test] + fn tool_stage_detail_surfaces_multi_and_parallel_phases() { + let verified = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE) + .with_evidence("multi_tool_replay", serde_json::json!("verified")) + .with_evidence("parallel_tool_calls", serde_json::json!("verified")); + let detail = tool_stage_detail(&verified); + assert!(detail.contains("tool call parsed and executed")); + assert!(detail.contains("multi-call signature replay verified")); + assert!(detail.contains("parallel tool calls verified")); + + let skipped = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE) + .with_evidence("multi_tool_replay", serde_json::json!("skipped")) + .with_evidence("parallel_tool_calls", serde_json::json!("skipped")); + let detail = tool_stage_detail(&skipped); + assert!(detail.contains("multi-call signature replay skipped")); + assert!(detail.contains("parallel tool calls skipped")); + + // With no evidence the base string is unchanged (back-compat). + let bare = LiveVerificationStage::passed(checkpoints::TOOL_CALL_PARSE); + assert_eq!(tool_stage_detail(&bare), "tool call parsed and executed"); + } + + #[test] + fn reasoning_stage_detail_describes_each_classification() { + for (value, needle) in [ + ("streamed", "reasoning streamed"), + ("opaque", "reasoning hidden but signaled"), + ("none", "no reasoning signal observed"), + ] { + let stage = LiveVerificationStage::passed(checkpoints::REASONING_CAPABILITY) + .with_evidence("reasoning_capability", serde_json::json!(value)); + assert!( + reasoning_stage_detail(&stage).contains(needle), + "classification {value} should mention {needle}" + ); + } + } + + #[test] + fn push_reasoning_check_records_pass_for_clean_turn() { + let mut checks = Vec::new(); + let mut spend = DoctorSpend::default(); + let stage = LiveVerificationStage::passed(checkpoints::REASONING_CAPABILITY) + .with_evidence("reasoning_capability", serde_json::json!("opaque")); + push_reasoning_check(Ok(stage), &mut checks, &mut spend); + assert_eq!(checks.len(), 1); + assert_eq!(checks[0].checkpoint, checkpoints::REASONING_CAPABILITY); + assert_eq!(checks[0].status, LiveVerificationStageStatus::Passed); + assert!(!checks[0].is_failure()); + } + + #[test] + fn push_reasoning_check_skips_never_fails_on_probe_error() { + // The observe-only reasoning checkpoint must never produce a failure that + // could flip the tier to not-ready; a probe error is recorded as skipped. + let mut checks = Vec::new(); + let mut spend = DoctorSpend::default(); + push_reasoning_check( + Err(anyhow::anyhow!("network blip")), + &mut checks, + &mut spend, + ); + assert_eq!(checks.len(), 1); + assert_eq!(checks[0].status, LiveVerificationStageStatus::Skipped); + assert!(!checks[0].is_failure()); + } } diff --git a/crates/jcode-base/src/live_tests.rs b/crates/jcode-base/src/live_tests.rs index bc4935aa8..e320c73be 100644 --- a/crates/jcode-base/src/live_tests.rs +++ b/crates/jcode-base/src/live_tests.rs @@ -13,7 +13,7 @@ const DEFAULT_RETEST_DAYS: i64 = 14; const LEDGER_ENV: &str = "JCODE_LIVE_TEST_LEDGER"; const COVERAGE_ENV: &str = "JCODE_LIVE_TEST_COVERAGE"; -pub const CHECKPOINT_TAXONOMY_VERSION: u32 = 2; +pub const CHECKPOINT_TAXONOMY_VERSION: u32 = 3; pub mod checkpoints { pub const AUTH_UX_KEY_ENTRY: &str = "auth_ux_key_entry"; @@ -30,6 +30,10 @@ pub mod checkpoints { pub const TOOL_EXECUTION_LOOP: &str = "tool_execution_loop"; pub const TOOL_RESULT_FOLLOWUP: &str = "tool_result_followup"; pub const REAL_JCODE_TOOL_SMOKE: &str = "real_jcode_tool_smoke"; + /// Observe-only: did the model expose its reasoning (`streamed`), hide it + /// behind an opaque signal (`opaque`, e.g. Gemini-3 / OpenAI), or emit none + /// (`none`)? Never required for user-readiness; hiding reasoning is a pass. + pub const REASONING_CAPABILITY: &str = "reasoning_capability"; pub const RESTART_PERSISTENCE: &str = "restart_persistence"; pub const NEGATIVE_ERROR_UX: &str = "negative_error_ux"; pub const MODEL_CAPABILITY_MATRIX: &str = "model_capability_matrix"; @@ -159,6 +163,16 @@ const END_TO_END_CHECKPOINTS: &[LiveVerificationCheckpointDefinition] = &[ spends_balance: true, description: "A normal Jcode agent turn uses the real streamed parser, advertised tool schema, registry execution, tool-result followup, and transcript validation without malformed tool calls.", }, + LiveVerificationCheckpointDefinition { + id: checkpoints::REASONING_CAPABILITY, + label: "Reasoning capability", + category: "reasoning", + // Observe-only: a provider that hides its reasoning (opaque) or emits + // none is still fully user-ready, so this must never gate readiness. + required_for_user_ready: false, + spends_balance: true, + description: "Records whether the model streams reasoning text, hides it behind an opaque signal (thought_signature/reasoning item/reasoning tokens), or emits none. Passes as long as the reasoning turn completes cleanly; absence of reasoning is recorded, not failed.", + }, LiveVerificationCheckpointDefinition { id: checkpoints::RESTART_PERSISTENCE, label: "Restart persistence", @@ -2514,6 +2528,7 @@ mod tests { checkpoints::TOOL_EXECUTION_LOOP, checkpoints::TOOL_RESULT_FOLLOWUP, checkpoints::REAL_JCODE_TOOL_SMOKE, + checkpoints::REASONING_CAPABILITY, checkpoints::RESTART_PERSISTENCE, checkpoints::NEGATIVE_ERROR_UX, checkpoints::MODEL_CAPABILITY_MATRIX, @@ -2527,6 +2542,24 @@ mod tests { .any(|checkpoint| checkpoint.spends_balance), "taxonomy should identify balance-spending checkpoints" ); + + // The reasoning_capability checkpoint is observe-only: it records what + // the model exposed (streamed/opaque/none) but a provider that hides its + // reasoning is still fully user-ready, so it must never gate readiness or + // strict coverage. + let reasoning = end_to_end_checkpoint_definitions() + .iter() + .find(|checkpoint| checkpoint.id == checkpoints::REASONING_CAPABILITY) + .expect("reasoning_capability checkpoint must exist in the taxonomy"); + assert!( + !reasoning.required_for_user_ready, + "reasoning_capability must not be required for user-readiness" + ); + assert!( + !STRICT_PROVIDER_MODEL_COVERAGE_CHECKPOINTS + .contains(&checkpoints::REASONING_CAPABILITY), + "reasoning_capability must not be a strict-required checkpoint" + ); } #[test] From 9849d5b4d1a47340046bc2b6157152256e5b7558 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:37:40 -0700 Subject: [PATCH 28/57] fix(gemini/antigravity): surface leftover Gemini-3 thought signature as reasoning signal A Gemini-3 thoughtSignature that was not consumed by a following functionCall (e.g. a pure-text reasoning turn) was silently dropped. Emit it as a ThinkingSignatureDelta instead so reasoning-aware consumers (and the new provider-doctor reasoning probe) can observe that the model reasoned even when no reasoning text and no tool call were produced. --- crates/jcode-base/src/provider/antigravity.rs | 10 ++++++++++ crates/jcode-base/src/provider/gemini.rs | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/crates/jcode-base/src/provider/antigravity.rs b/crates/jcode-base/src/provider/antigravity.rs index d6e3fa672..e64f09870 100644 --- a/crates/jcode-base/src/provider/antigravity.rs +++ b/crates/jcode-base/src/provider/antigravity.rs @@ -1084,6 +1084,16 @@ impl Provider for AntigravityProvider { pending_signature = Some(signature); } } + // A thought signature that was never consumed by a following + // function call (e.g. a pure-text reasoning turn) is still an + // opaque reasoning signal. Surface it as a ThinkingSignatureDelta + // rather than dropping it, so reasoning-aware consumers (and the + // provider-doctor reasoning probe) can see the model reasoned. + if let Some(signature) = pending_signature.take() { + let _ = tx + .send(Ok(StreamEvent::ThinkingSignatureDelta(signature))) + .await; + } } let _ = tx diff --git a/crates/jcode-base/src/provider/gemini.rs b/crates/jcode-base/src/provider/gemini.rs index 0f6e6ae0b..b2aa15d8d 100644 --- a/crates/jcode-base/src/provider/gemini.rs +++ b/crates/jcode-base/src/provider/gemini.rs @@ -859,6 +859,15 @@ impl Provider for GeminiProvider { pending_signature = Some(signature); } } + // A thought signature not consumed by a following function + // call (e.g. a pure-text reasoning turn) is still an opaque + // reasoning signal. Surface it as a ThinkingSignatureDelta + // instead of dropping it. + if let Some(signature) = pending_signature.take() { + let _ = tx + .send(Ok(StreamEvent::ThinkingSignatureDelta(signature))) + .await; + } } } From 91620e1e5e20545faba72fffe5ea780f31696db1 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:44:46 -0700 Subject: [PATCH 29/57] docs(provider-doctor): correct reasoning probe answer comment (4 cows) --- crates/jcode-base/src/auth/live_provider_probes.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/jcode-base/src/auth/live_provider_probes.rs b/crates/jcode-base/src/auth/live_provider_probes.rs index fe6ae0b2d..a2bb0da03 100644 --- a/crates/jcode-base/src/auth/live_provider_probes.rs +++ b/crates/jcode-base/src/auth/live_provider_probes.rs @@ -1528,10 +1528,11 @@ pub async fn run_live_native_provider_reasoning_smoke( label: &str, ) -> anyhow::Result { let started = std::time::Instant::now(); - // A small logic word problem with a single unambiguous numeric answer (2). - // The answer token `REASON_TEST_ANSWER=2` lets us assert a coherent result - // without depending on the model's prose. The problem requires at least one - // step of arithmetic/elimination so a reasoning model actually reasons. + // A small logic word problem with a single unambiguous numeric answer (4 + // cows: chickens c + cows w give c + w = 7 heads and 2c + 4w = 22 legs, so + // w = 4). The `REASON_TEST_ANSWER=` sentinel lets us assert a coherent + // result without depending on the model's prose, and the problem requires at + // least one elimination/arithmetic step so a reasoning model actually reasons. let messages = vec![Message { role: Role::User, content: vec![ContentBlock::Text { From 26984695f6aac90b9028892f36a4f5bb266eae9c Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:48:01 -0700 Subject: [PATCH 30/57] desktop: cache measurement FontSystem for inline-code pill geometry; add scroll diag instrumentation Building a fresh FontSystem every frame (rescanning all system fonts) inside the inline-code/math pill geometry builder caused multi-ms per-frame scroll spikes over code blocks. Reuse a thread-local measurement FontSystem instead. --- crates/jcode-desktop/src/main.rs | 55 +++++++++++++++++++ .../src/single_session_render.rs | 28 +++++++--- 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index e8dafb983..8d2d5c487 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -5253,6 +5253,11 @@ struct RealTranscriptScrollReport { setup_full_relayout_ms: f64, worst_stage_name: String, worst_stage_us: f64, + worst_rebuild_us: f64, + worst_rebuild_window_lines: usize, + worst_rebuild_max_line_chars: usize, + worst_rebuild_advanced_lines: usize, + worst_rebuild_segments: usize, } impl RealTranscriptScrollReport { @@ -5279,6 +5284,13 @@ impl RealTranscriptScrollReport { "max_scroll_lines": self.max_scroll_lines, "body_buffer_rebuilds": self.body_buffer_rebuilds, "setup_full_body_relayout_ms": self.setup_full_relayout_ms, + "worst_window_rebuild": { + "us": self.worst_rebuild_us, + "window_lines": self.worst_rebuild_window_lines, + "max_line_chars": self.worst_rebuild_max_line_chars, + "advanced_shaping_lines": self.worst_rebuild_advanced_lines, + "segments": self.worst_rebuild_segments, + }, "full_scroll_frame": { "frames": self.frame_samples.len(), "mean_ms_per_frame": total_ms / frames as f64, @@ -5361,6 +5373,16 @@ fn benchmark_real_transcript_scroll( let mut vertices_us = 0.0; let mut body_buffer_rebuilds = 0usize; + // Optional diagnostic: capture the single slowest window rebuild and describe + // the window content so we can attribute the cost (line count, advanced + // shaping triggers, longest line) rather than guessing. + let diagnose = std::env::var_os("JCODE_DESKTOP_SCROLL_DIAG").is_some(); + let mut worst_rebuild_us = 0.0_f64; + let mut worst_rebuild_window_lines = 0usize; + let mut worst_rebuild_max_line_chars = 0usize; + let mut worst_rebuild_advanced_lines = 0usize; + let mut worst_rebuild_segments = 0usize; + let (frame_samples, _checksum) = benchmark_frame_samples(frames, |frame| { // Triangle-wave scroll position covering the full transcript height. let phase = frame % (span * 2); @@ -5375,6 +5397,7 @@ fn benchmark_real_transcript_scroll( let phase_started = Instant::now(); if !single_session_body_text_window_contains(window_start, window_end, &viewport) { (window_start, window_end) = single_session_body_text_window_bounds(&viewport); + let rebuild_started = Instant::now(); if let Some(body_buffer) = buffers.get_mut(1) { *body_buffer = single_session_body_text_buffer_from_lines( &mut font_system, @@ -5383,6 +5406,33 @@ fn benchmark_real_transcript_scroll( app.text_scale(), ); } + if diagnose { + let rebuild_us = rebuild_started.elapsed().as_secs_f64() * 1_000_000.0; + if rebuild_us > worst_rebuild_us { + worst_rebuild_us = rebuild_us; + let window = &body_lines[window_start..window_end]; + worst_rebuild_window_lines = window.len(); + worst_rebuild_max_line_chars = + window.iter().map(|l| l.text.chars().count()).max().unwrap_or(0); + worst_rebuild_advanced_lines = window + .iter() + .filter(|l| !l.text.is_ascii()) + .count(); + worst_rebuild_segments = + window.iter().map(|l| l.inline_spans.len() + 1).sum(); + if let Ok(path) = std::env::var("JCODE_DESKTOP_SCROLL_DIAG_DUMP") { + let text = window + .iter() + .map(|l| l.text.as_str()) + .collect::>() + .join("\n"); + let _ = std::fs::write( + format!("{path}.{}", transcript.session_id), + text, + ); + } + } + } body_buffer_rebuilds += 1; last_scroll_start = usize::MAX; } @@ -5458,6 +5508,11 @@ fn benchmark_real_transcript_scroll( setup_full_relayout_ms, worst_stage_name, worst_stage_us, + worst_rebuild_us, + worst_rebuild_window_lines, + worst_rebuild_max_line_chars, + worst_rebuild_advanced_lines, + worst_rebuild_segments, } } diff --git a/crates/jcode-desktop/src/single_session_render.rs b/crates/jcode-desktop/src/single_session_render.rs index 7abf41cf6..9fd0d6dd8 100644 --- a/crates/jcode-desktop/src/single_session_render.rs +++ b/crates/jcode-desktop/src/single_session_render.rs @@ -7209,6 +7209,24 @@ fn push_single_session_inline_code_cards( ); } +/// A thread-local, lazily-initialized `FontSystem` used purely for measuring +/// glyph layout (inline-code/math pill bounds) during geometry building. +/// +/// Building a `FontSystem` rescans every system font from disk, costing several +/// milliseconds per call. The inline-code/math card builder runs on every frame +/// whose visible window contains inline code or math, so constructing a fresh +/// `FontSystem` there made scrolling over code blocks janky (multi-ms spikes per +/// frame). Caching one per render thread keeps repeated measurement cheap. The +/// system is only used for transient measurement buffers, never for the glyphs +/// actually uploaded to the GPU, so reuse is safe. +fn with_measurement_font_system(f: impl FnOnce(&mut FontSystem) -> R) -> R { + thread_local! { + static MEASUREMENT_FONT_SYSTEM: std::cell::RefCell = + std::cell::RefCell::new(FontSystem::new()); + } + MEASUREMENT_FONT_SYSTEM.with(|cell| f(&mut cell.borrow_mut())) +} + fn push_single_session_inline_code_cards_from_viewport( vertices: &mut Vec, app: &SingleSessionApp, @@ -7245,13 +7263,9 @@ fn push_single_session_inline_code_cards_from_viewport( horizontal_pad, top_offset_pixels: viewport.top_offset_pixels, }; - let mut font_system = FontSystem::new(); - let body_buffer = single_session_body_text_buffer_from_lines( - &mut font_system, - &viewport.lines, - size, - text_scale, - ); + let body_buffer = with_measurement_font_system(|font_system| { + single_session_body_text_buffer_from_lines(font_system, &viewport.lines, size, text_scale) + }); let layout_runs = body_buffer.layout_runs().collect::>(); let mut occurrences = HashMap::new(); From 41beb009ec36b90c385fdac2b9c5edfdb77a1859 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:50:21 -0700 Subject: [PATCH 31/57] fix(reload): newer client drags a stale older server forward on attach Fixes the 'current client (v0.22), stale older server, /update only updates the client' report. Root cause: the decision to upgrade the server runs in the OLD server process, which a newer client cannot retroactively fix. Two gaps: 1. Detection: the client deferred + reloaded an older server only when the server self-reported server_has_update. An old daemon whose shared-server channel still points at its own binary legitimately reports Some(false) ('nothing newer to reload into'), which the client trusted -> stuck forever. Now a client-proven-older release (server_version < client_version, clean semver) always wins and defers, regardless of the server's self-report. 2. Reload target: even after deferring, a forced reload re-execs whatever the shared-server channel points at -- still the old binary. The new client now repairs the shared-server channel client-side before reloading (repair_stale_shared_server_channel): repoint shared-server -> stable when stable is strictly newer by mtime. Never downgrades, and preserves a deliberately-pinned self-dev build that is fresher than stable. This is version-agnostic (no per-version allowlist): any server that is a strictly-older clean release than the connected client gets dragged forward. Existing recover_reloading_server + 3-attempt loop cap handle the case where a reload still does not take (fresh spawn self-heals via the candidate logic). Tests: - build-support: repair repoints stale->stable, no-op when current, preserves a fresher self-dev pin, never downgrades when stable is older. - tui: client-proven-older server with server_has_update=Some(false) now defers; same/newer server still trusted; full-path sandbox drives the real handle_server_event History handler against a temp JCODE_HOME in the field state and asserts the shared-server channel is repaired to the new release. --- crates/jcode-build-support/src/lib.rs | 95 +++++++++ crates/jcode-build-support/src/tests.rs | 113 ++++++++++ .../src/tui/app/remote/server_events.rs | 130 ++++++++---- crates/jcode-tui/src/tui/app/tests.rs | 200 +++++++++++++++++- 4 files changed, 495 insertions(+), 43 deletions(-) diff --git a/crates/jcode-build-support/src/lib.rs b/crates/jcode-build-support/src/lib.rs index f5fc9795f..ad8cc2013 100644 --- a/crates/jcode-build-support/src/lib.rs +++ b/crates/jcode-build-support/src/lib.rs @@ -766,6 +766,101 @@ pub fn advance_shared_server_if_tracking_stable(version: &str) -> Result { } } +/// Outcome of [`repair_stale_shared_server_channel`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SharedServerRepair { + /// The `shared-server` channel was repointed at the installed `stable` + /// release because stable was strictly newer on disk. + Repaired { + previous: Option, + repaired_to: String, + }, + /// Nothing to do: shared-server is already at/newer than stable, or there is + /// no usable stable target. + AlreadyCurrent, +} + +/// Drag a *stale* `shared-server` channel forward to the installed `stable` +/// release so a long-lived daemon can actually reload into a newer binary. +/// +/// This is the client-side counterpart to [`advance_shared_server_if_tracking_stable`]. +/// Updates advance `stable` but only advance `shared-server` *during the install +/// path*; a client that is already on the newest release (so `/update` is a +/// no-op) never re-runs that install path, leaving a long-lived older daemon +/// pinned to its old `shared-server` binary forever. A newer client that detects +/// an older server calls this to repoint `shared-server` -> `stable` before +/// asking the server to reload, so the forced reload has a strictly-newer target +/// to exec into instead of re-execing the same old binary (the "current client, +/// stale server" report). +/// +/// Safety: we only repair when the `stable` binary is *strictly newer by mtime* +/// than the current `shared-server` binary. That preserves a deliberately-pinned +/// self-dev `shared-server` build whenever it is at least as fresh as stable (the +/// case the pin exists to protect), and never downgrades the channel. +pub fn repair_stale_shared_server_channel() -> Result { + let stable_version = read_stable_version()?; + let Some(stable_version) = stable_version + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + else { + return Ok(SharedServerRepair::AlreadyCurrent); + }; + + let stable_binary = stable_binary_path()?; + if !stable_binary.exists() { + return Ok(SharedServerRepair::AlreadyCurrent); + } + + // If shared-server already resolves to the same version marker, there is + // nothing to repair. + let previous = read_shared_server_version()?; + if previous.as_deref().map(str::trim).filter(|s| !s.is_empty()) == Some(stable_version) { + return Ok(SharedServerRepair::AlreadyCurrent); + } + + // Only repair when stable is strictly newer than the current shared-server + // binary on disk. This never downgrades, and it preserves a self-dev pin + // that is fresher than stable. + let shared_binary = shared_server_binary_path()?; + if !shared_server_binary_is_strictly_older_than(&shared_binary, &stable_binary) { + return Ok(SharedServerRepair::AlreadyCurrent); + } + + update_shared_server_symlink(stable_version)?; + Ok(SharedServerRepair::Repaired { + previous: previous + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string), + repaired_to: stable_version.to_string(), + }) +} + +/// True when `shared` exists and is strictly older (by mtime) than `stable`, or +/// when `shared` is missing entirely (nothing to protect). Any mtime +/// uncertainty on an existing shared binary is treated as "not older" so we +/// never repair away an unverifiable (possibly newer) pinned build. +fn shared_server_binary_is_strictly_older_than( + shared: &std::path::Path, + stable: &std::path::Path, +) -> bool { + let mtime = |p: &std::path::Path| std::fs::metadata(p).ok().and_then(|m| m.modified().ok()); + let stable_mtime = match mtime(stable) { + Some(m) => m, + None => return false, + }; + if !shared.exists() { + // No deliberate pin on disk; safe to point the channel at stable. + return true; + } + match mtime(shared) { + Some(shared_mtime) => shared_mtime < stable_mtime, + None => false, + } +} + /// Install release binary into immutable versions, promote it to stable, and also make it the /// active current/launcher build. pub fn install_local_release(repo_dir: &std::path::Path) -> Result { diff --git a/crates/jcode-build-support/src/tests.rs b/crates/jcode-build-support/src/tests.rs index 00cda73ed..88af9652f 100644 --- a/crates/jcode-build-support/src/tests.rs +++ b/crates/jcode-build-support/src/tests.rs @@ -716,3 +716,116 @@ fn selfdev_reload_target_diverges_from_update_probe_when_shared_server_pinned() ); }); } + +/// Write a distinct, real binary into `versions//jcode` with an +/// explicit mtime so channel-repair mtime comparisons are deterministic +/// (install_binary_at_version hard-links and would share an mtime). +fn write_versioned_binary(version: &str, mtime: std::time::SystemTime) -> PathBuf { + let dir = builds_dir().unwrap().join("versions").join(version); + std::fs::create_dir_all(&dir).expect("create version dir"); + let path = dir.join(binary_name()); + std::fs::write(&path, format!("bin {version}")).expect("write binary"); + std::fs::File::open(&path) + .expect("open binary") + .set_modified(mtime) + .expect("set mtime"); + path +} + +#[test] +fn repair_repoints_stale_shared_server_to_newer_stable() { + use std::time::{Duration, SystemTime}; + with_temp_jcode_home(|| { + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let old = "0.14.6"; + let new = "0.22.0"; + // shared-server pinned to the OLD build; stable advanced to the NEW + // release (the "current client, no-op /update, stale server" state). + write_versioned_binary(old, base); + write_versioned_binary(new, base + Duration::from_secs(60)); + update_shared_server_symlink(old).expect("pin shared-server old"); + update_stable_symlink(new).expect("stable new"); + + let outcome = repair_stale_shared_server_channel().expect("repair"); + assert_eq!( + outcome, + SharedServerRepair::Repaired { + previous: Some(old.to_string()), + repaired_to: new.to_string(), + }, + ); + assert_eq!( + read_shared_server_version().unwrap().as_deref(), + Some(new), + "shared-server should be dragged forward to stable" + ); + }); +} + +#[test] +fn repair_is_noop_when_shared_server_already_matches_stable() { + use std::time::{Duration, SystemTime}; + with_temp_jcode_home(|| { + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let v = "0.22.0"; + write_versioned_binary(v, base); + update_shared_server_symlink(v).expect("shared"); + update_stable_symlink(v).expect("stable"); + + assert_eq!( + repair_stale_shared_server_channel().expect("repair"), + SharedServerRepair::AlreadyCurrent, + ); + assert_eq!(read_shared_server_version().unwrap().as_deref(), Some(v)); + }); +} + +#[test] +fn repair_preserves_fresher_selfdev_pin() { + use std::time::{Duration, SystemTime}; + with_temp_jcode_home(|| { + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let stable_old = "0.14.3"; + let selfdev_new = "56f43c3d-dirty-deadbeef"; + // Deliberately-promoted self-dev build that is NEWER than stable must be + // preserved (the whole point of pinning shared-server). + write_versioned_binary(stable_old, base); + write_versioned_binary(selfdev_new, base + Duration::from_secs(120)); + update_stable_symlink(stable_old).expect("stable"); + update_shared_server_symlink(selfdev_new).expect("pin newer self-dev"); + + assert_eq!( + repair_stale_shared_server_channel().expect("repair"), + SharedServerRepair::AlreadyCurrent, + "must not downgrade a fresher self-dev pin to an older stable" + ); + assert_eq!( + read_shared_server_version().unwrap().as_deref(), + Some(selfdev_new), + ); + }); +} + +#[test] +fn repair_never_downgrades_when_stable_is_older() { + use std::time::{Duration, SystemTime}; + with_temp_jcode_home(|| { + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let shared_new = "0.22.0"; + let stable_old = "0.14.3"; + write_versioned_binary(stable_old, base); + write_versioned_binary(shared_new, base + Duration::from_secs(90)); + update_shared_server_symlink(shared_new).expect("shared new"); + update_stable_symlink(stable_old).expect("stable old"); + + assert_eq!( + repair_stale_shared_server_channel().expect("repair"), + SharedServerRepair::AlreadyCurrent, + "repair must never move shared-server backward to an older stable" + ); + assert_eq!( + read_shared_server_version().unwrap().as_deref(), + Some(shared_new), + ); + }); +} diff --git a/crates/jcode-tui/src/tui/app/remote/server_events.rs b/crates/jcode-tui/src/tui/app/remote/server_events.rs index c39ca3401..9d8b4d62a 100644 --- a/crates/jcode-tui/src/tui/app/remote/server_events.rs +++ b/crates/jcode-tui/src/tui/app/remote/server_events.rs @@ -61,9 +61,20 @@ fn server_release_is_older_than_client(server_version: Option<&str>, client_vers /// attached to is not running the binary we expect. /// /// Precedence: +/// - The client independently measured the server's release version as strictly +/// older than its own clean release version -> defer. This wins even over the +/// server's own `server_has_update: Some(false)` self-report, because a stale +/// long-lived daemon legitimately reports "no newer binary to reload into" +/// (its `shared-server` channel still points at its own old build) while the +/// client can plainly see it is an older release. Trusting the server here is +/// exactly what left "current client, stale server" stuck (the daemon's reload +/// decision runs old code that can never drag itself forward). The newer +/// client is authoritative, so it defers and repairs the channel before +/// reloading. /// - `Some(true)`: the server self-reported a newer binary on disk -> defer. /// - `Some(false)`: the server is new enough to self-assess and found nothing -/// newer to reload into -> trust it, do not fight it with a forced reload. +/// newer to reload into, AND the client could not prove it is older -> trust +/// it, do not fight it with a forced reload. /// - `None`: the server is too old to self-report. Fall back to our own /// client-side release-version comparison, which is the only signal that can /// catch a pre-self-heal daemon. @@ -75,10 +86,16 @@ fn should_defer_history_for_runtime_identity_with_allow( if allow_mismatch { return false; } + // A client-proven-older server always wins: never let an old daemon's + // (locally correct but globally wrong) "no update" self-report veto the + // client's own release-order comparison. + if client_detected_stale { + return true; + } match server_has_update { Some(true) => true, Some(false) => false, - None => client_detected_stale, + None => false, } } @@ -147,21 +164,31 @@ mod runtime_identity_tests { } #[test] - fn client_detection_only_applies_when_server_cannot_self_report() { + fn client_detected_older_server_always_defers() { // Ancient server (server_has_update: None) that the client independently // measured as older -> defer. This is the issue #295 macOS case where a // pre-self-heal daemon can never set server_has_update itself. assert!(should_defer_history_for_runtime_identity_with_allow( None, true, false )); - // A server new enough to self-assess and report "no newer binary" is - // trusted, even if a naive version compare disagrees: forcing a reload - // would only loop against a server that has nothing newer to exec into. - assert!(!should_defer_history_for_runtime_identity_with_allow( + // A server that self-reports "no newer binary" (Some(false)) but that the + // client can PROVE is an older release -> still defer. The daemon's + // self-report is locally correct (its own shared-server channel points at + // its old build) but globally wrong; the newer client is authoritative. + // This is the "current client, stale server" report: trusting Some(false) + // here is exactly what left the server stuck on the old version forever. + assert!(should_defer_history_for_runtime_identity_with_allow( Some(false), true, false )); + // Same-release/newer server (client could not prove it is older) that + // self-reports "no newer binary" -> trust it, do not force a reload loop. + assert!(!should_defer_history_for_runtime_identity_with_allow( + Some(false), + false, + false + )); } #[test] @@ -320,7 +347,9 @@ pub(in crate::tui::app) fn handle_server_event( id, name, input: serde_json::Value::Null, - intent: None, thought_signature: None, }); + intent: None, + thought_signature: None, + }); eager_stream_redraw } ServerEvent::ToolInput { delta } => { @@ -337,7 +366,9 @@ pub(in crate::tui::app) fn handle_server_event( id: id.clone(), name: name.clone(), input: parsed_input.clone(), - intent: ToolCall::intent_from_input(&parsed_input), thought_signature: None, }; + intent: ToolCall::intent_from_input(&parsed_input), + thought_signature: None, + }; if let Some(key) = App::experimental_feature_key_for_tool(&tool_call) { app.note_experimental_feature_use(key); } @@ -583,14 +614,14 @@ pub(in crate::tui::app) fn handle_server_event( let content = app.take_streaming_text(); let content = app.collapse_reasoning_for_commit(content); if !content.trim().is_empty() { - app.push_display_message(DisplayMessage { - role: "assistant".to_string(), - content, - tool_calls: Vec::new(), - duration_secs: app.display_turn_duration_secs(), - title: None, - tool_data: None, - }); + app.push_display_message(DisplayMessage { + role: "assistant".to_string(), + content, + tool_calls: Vec::new(), + duration_secs: app.display_turn_duration_secs(), + title: None, + tool_data: None, + }); } } app.clear_streaming_render_state(); @@ -658,14 +689,14 @@ pub(in crate::tui::app) fn handle_server_event( let content = app.take_streaming_text(); let content = app.collapse_reasoning_for_commit(content); if !content.trim().is_empty() { - app.push_display_message(DisplayMessage { - role: "assistant".to_string(), - content, - tool_calls: vec![], - duration_secs: duration, - title: None, - tool_data: None, - }); + app.push_display_message(DisplayMessage { + role: "assistant".to_string(), + content, + tool_calls: vec![], + duration_secs: duration, + title: None, + tool_data: None, + }); } app.push_turn_footer(duration); } else if app.has_streaming_footer_stats() { @@ -946,7 +977,10 @@ pub(in crate::tui::app) fn handle_server_event( server_has_update, server_version.as_deref(), ) { - let client_detected_stale = server_has_update.is_none(); + let client_detected_stale = server_release_is_older_than_client( + server_version.as_deref(), + &client_release_version(), + ); app.remote_server_version = server_version; app.remote_server_short_name = server_name.clone(); app.remote_server_icon = server_icon.clone(); @@ -954,11 +988,29 @@ pub(in crate::tui::app) fn handle_server_event( app.pending_server_reload = true; app.clear_remote_startup_phase(); if client_detected_stale { - // The server was too old to self-report an update - // (server_has_update: None), but we independently measured - // its release version as older than ours. This is the - // issue #295 case: a pre-self-heal daemon that would - // otherwise reject newer protocol requests (e.g. set_route). + // The client independently measured the server's release as + // older than its own. This covers both a pre-self-heal daemon + // (server_has_update: None) AND a daemon that self-reports + // "no update" because its own shared-server channel still + // points at its old binary (the "current client, stale + // server" report). Repair the channel client-side so the + // forced reload below has a strictly-newer binary to exec + // into instead of re-execing the same old build. + match crate::build::repair_stale_shared_server_channel() { + Ok(crate::build::SharedServerRepair::Repaired { repaired_to, .. }) => { + crate::logging::info(&format!( + "stale-server repair: repointed shared-server channel to {} before reloading older server", + repaired_to + )); + } + Ok(crate::build::SharedServerRepair::AlreadyCurrent) => {} + Err(err) => { + crate::logging::warn(&format!( + "stale-server repair: failed to repoint shared-server channel: {}", + err + )); + } + } app.set_status_notice( "Connected server is an older release; reloading it before attach", ); @@ -1627,14 +1679,14 @@ pub(in crate::tui::app) fn handle_server_event( let flushed = app.take_streaming_text(); let flushed = app.collapse_reasoning_for_commit(flushed); if !flushed.trim().is_empty() { - app.push_display_message(DisplayMessage { - role: "assistant".to_string(), - content: flushed, - tool_calls: vec![], - duration_secs: duration, - title: None, - tool_data: None, - }); + app.push_display_message(DisplayMessage { + role: "assistant".to_string(), + content: flushed, + tool_calls: vec![], + duration_secs: duration, + title: None, + tool_data: None, + }); } app.push_turn_footer(duration); } diff --git a/crates/jcode-tui/src/tui/app/tests.rs b/crates/jcode-tui/src/tui/app/tests.rs index 2bc3232d3..b07dc8523 100644 --- a/crates/jcode-tui/src/tui/app/tests.rs +++ b/crates/jcode-tui/src/tui/app/tests.rs @@ -202,8 +202,12 @@ fn kv_cache_baseline_from_other_session_is_ignored() { // Switch to a brand-new, much smaller session and start its first request. app.remote_session_id = Some("session_small".to_string()); - let small_signature = - App::kv_cache_request_signature(&[Message::user("hello from small session")], &[], "system", ""); + let small_signature = App::kv_cache_request_signature( + &[Message::user("hello from small session")], + &[], + "system", + "", + ); app.begin_remote_kv_cache_request(small_signature); let request = app @@ -263,7 +267,6 @@ fn kv_cache_baseline_same_session_still_compares() { ); } - #[test] fn remote_token_usage_records_cache_stats_before_done_and_dedupes_snapshots() { let mut app = create_test_app(); @@ -463,7 +466,10 @@ fn skills_command_marks_active_skill_in_remote_mode() { assert!(content.contains("- /optimization (active)"), "{content}"); assert!(content.contains("- /firefox-browser\n"), "{content}"); // Endorsed list should mark remote-installed skills as installed. - assert!(content.contains("/firefox-browser [installed]"), "{content}"); + assert!( + content.contains("/firefox-browser [installed]"), + "{content}" + ); } #[test] @@ -590,6 +596,7 @@ fn ancient_server_history_is_deferred_via_client_side_release_check() { // it is stale. The client must independently compare release versions and // defer + reload anyway, instead of attaching to the ancient daemon (which // would then reject newer protocol requests like `set_route`). + let _env_guard = crate::storage::lock_test_env(); crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH"); // The test binary's own version is dev/dirty (unorderable), so use the // test-only override to give the client a clean release version newer than @@ -671,11 +678,196 @@ fn ancient_server_history_is_deferred_via_client_side_release_check() { ); } +#[test] +fn older_server_reporting_no_update_is_still_deferred_via_client_check() { + // The "current client, stale server" report: the daemon self-reports + // `server_has_update: Some(false)` (its own shared-server channel still + // points at its old binary, so locally it sees nothing newer), but the + // client can PROVE it is an older release. Before this fix, Some(false) + // short-circuited and the client trusted the old server forever. Now the + // client's release-order check wins: defer + reload (after repairing the + // shared-server channel client-side). + let _env_guard = crate::storage::lock_test_env(); + crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH"); + crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.22.0 (abcd1234)"); + + let mut app = create_test_app(); + let rt = tokio::runtime::Runtime::new().unwrap(); + let _guard = rt.enter(); + let mut remote = crate::tui::backend::RemoteConnection::dummy(); + + app.is_remote = true; + app.remote_session_id = Some("session_existing".to_string()); + + let redraw = app.handle_server_event( + crate::protocol::ServerEvent::History { + id: 1, + session_id: "session_from_old_server".to_string(), + messages: vec![], + images: vec![], + provider_name: Some("p".to_string()), + provider_model: Some("m".to_string()), + subagent_model: None, + autoreview_enabled: None, + autojudge_enabled: None, + available_models: vec!["m".to_string()], + available_model_routes: vec![], + mcp_servers: vec![], + skills: vec![], + total_tokens: None, + token_usage_totals: None, + all_sessions: vec![], + client_count: Some(1), + is_canary: Some(false), + reload_recovery: None, + // Older clean release than the client, but the daemon insists it has + // no newer binary to reload into. + server_version: Some("v0.14.6 (deadbeef)".to_string()), + server_name: Some("old-server".to_string()), + server_icon: Some("🕰".to_string()), + server_has_update: Some(false), + was_interrupted: None, + connection_type: Some("websocket".to_string()), + status_detail: None, + upstream_provider: None, + resolved_credential: None, + reasoning_effort: None, + service_tier: None, + compaction_mode: crate::config::CompactionMode::Reactive, + activity: None, + side_panel: crate::side_panel::SidePanelSnapshot::default(), + }, + &mut remote, + ); + + crate::env::remove_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE"); + + assert!(!redraw); + assert!( + app.pending_server_reload, + "client-proven-older server must defer + reload even when it reports Some(false)" + ); + assert_eq!(app.remote_server_has_update, Some(false)); + // Remote session state must NOT have been applied from the old server. + assert_eq!(app.remote_session_id.as_deref(), Some("session_existing")); + assert_eq!(remote.session_id(), None); + let content = app.display_messages().last().unwrap().content.clone(); + assert!( + content.contains("older release") && content.contains("jcode server stop"), + "{content}" + ); +} + +#[test] +fn older_server_history_repairs_stale_shared_server_channel_end_to_end() { + // Full-path sandbox: a real temp JCODE_HOME set up in the exact field state + // (shared-server pinned to an OLD build, stable advanced to a NEW release by + // a previous install). When the current client attaches to a server that + // self-reports an older release with `server_has_update: Some(false)`, the + // production History handler must repair the shared-server channel so the + // forced reload it queues has a strictly-newer binary to exec into. + use std::time::{Duration, SystemTime}; + let _env_guard = crate::storage::lock_test_env(); + crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH"); + crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.22.0 (abcd1234)"); + let temp = tempfile::TempDir::new().expect("temp home"); + let prev_home = std::env::var_os("JCODE_HOME"); + crate::env::set_var("JCODE_HOME", temp.path()); + + // Build the field state: shared-server -> OLD, stable -> NEW (newer mtime). + let base = SystemTime::UNIX_EPOCH + Duration::from_secs(1_000_000); + let write_version = |version: &str, mtime: SystemTime| { + let dir = crate::build::builds_dir() + .unwrap() + .join("versions") + .join(version); + std::fs::create_dir_all(&dir).unwrap(); + let path = dir.join(crate::build::binary_name()); + std::fs::write(&path, format!("bin {version}")).unwrap(); + std::fs::File::open(&path) + .unwrap() + .set_modified(mtime) + .unwrap(); + }; + let old = "0.14.6"; + let new = "0.22.0"; + write_version(old, base); + write_version(new, base + Duration::from_secs(60)); + crate::build::update_shared_server_symlink(old).expect("pin shared-server old"); + crate::build::update_stable_symlink(new).expect("stable new"); + + let mut app = create_test_app(); + let rt = tokio::runtime::Runtime::new().unwrap(); + let _guard = rt.enter(); + let mut remote = crate::tui::backend::RemoteConnection::dummy(); + app.is_remote = true; + app.remote_session_id = Some("session_existing".to_string()); + + let _redraw = app.handle_server_event( + crate::protocol::ServerEvent::History { + id: 1, + session_id: "session_from_old_server".to_string(), + messages: vec![], + images: vec![], + provider_name: Some("p".to_string()), + provider_model: Some("m".to_string()), + subagent_model: None, + autoreview_enabled: None, + autojudge_enabled: None, + available_models: vec!["m".to_string()], + available_model_routes: vec![], + mcp_servers: vec![], + skills: vec![], + total_tokens: None, + token_usage_totals: None, + all_sessions: vec![], + client_count: Some(1), + is_canary: Some(false), + reload_recovery: None, + server_version: Some("v0.14.6 (deadbeef)".to_string()), + server_name: Some("old-server".to_string()), + server_icon: Some("🕰".to_string()), + server_has_update: Some(false), + was_interrupted: None, + connection_type: Some("websocket".to_string()), + status_detail: None, + upstream_provider: None, + resolved_credential: None, + reasoning_effort: None, + service_tier: None, + compaction_mode: crate::config::CompactionMode::Reactive, + activity: None, + side_panel: crate::side_panel::SidePanelSnapshot::default(), + }, + &mut remote, + ); + + let repaired = crate::build::read_shared_server_version().ok().flatten(); + let pending = app.pending_server_reload; + + // Restore env before asserting so a panic cannot leak global state. + crate::env::remove_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE"); + if let Some(prev_home) = prev_home { + crate::env::set_var("JCODE_HOME", prev_home); + } else { + crate::env::remove_var("JCODE_HOME"); + } + + assert!(pending, "older server must queue a reload"); + assert_eq!( + repaired.as_deref(), + Some(new), + "the History handler must repair the stale shared-server channel to the newer stable \ + release so the queued reload upgrades the server instead of re-execing the old binary" + ); +} + #[test] fn current_release_server_history_is_not_deferred_by_client_check() { // A server on the SAME or NEWER clean release as the client, with // server_has_update: None, must be trusted and attached normally. This // guards against the client-side check over-firing and looping reloads. + let _env_guard = crate::storage::lock_test_env(); crate::env::remove_var("JCODE_ALLOW_SERVER_VERSION_MISMATCH"); crate::env::set_var("JCODE_TEST_CLIENT_VERSION_OVERRIDE", "v0.17.0 (d741696f)"); From 0dd510d25a5f8d21a75536094237a2a4bcf45807 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 14:58:09 -0700 Subject: [PATCH 32/57] desktop: don't force Advanced text shaping for standalone emoji MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standalone pictographic emoji/symbols (🔄 ⬜ → ✓ etc.) render identically under Basic and Advanced cosmic-text shaping, so escalating the whole visible-window buffer to Advanced shaping for them was pure per-frame scroll overhead on emoji-rich transcripts. Only sequences that truly need shaping (variation selectors, ZWJ, regional-indicator flag pairs) and lines carrying inline-code/ math spans still use Advanced. Cuts worst-case scroll-frame shaping cost. --- .../src/single_session_render/text_style.rs | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/crates/jcode-desktop/src/single_session_render/text_style.rs b/crates/jcode-desktop/src/single_session_render/text_style.rs index 564f8b9a8..0a76c9994 100644 --- a/crates/jcode-desktop/src/single_session_render/text_style.rs +++ b/crates/jcode-desktop/src/single_session_render/text_style.rs @@ -80,9 +80,17 @@ pub(super) fn single_session_styled_text_buffer_with_opacity( buffer.set_size(font_system, width, height); buffer.set_wrap(font_system, wrap); let segments = single_session_styled_text_segments_with_opacity(lines, opacity); - // Inline span geometry uses glyphon cursors with byte offsets. Basic shaping - // reports glyph clusters relative to each styled run, so spans after a - // multi-byte marker or a style boundary can shift their pills into prose. + // Inline span geometry uses glyphon cursors with byte offsets, and the + // glyphon `highlight()` API used to position inline-code/math pills only + // works on Advanced-shaped buffers. So any line carrying inline spans must be + // Advanced-shaped regardless of script. Advanced shaping is also required for + // text containing complex scripts, combining marks, or joiner sequences. + // + // The expensive case on real transcripts was emoji-rich *prose* lines (no + // inline spans): standalone pictographic emoji render identically under Basic + // and Advanced shaping, so `char_needs_advanced_shaping` no longer escalates + // for them. That keeps the visible-window reshape on every scroll frame cheap + // while preserving correct pill geometry for code/math spans. let shaping = if lines.iter().any(|line| !line.inline_spans.is_empty()) || segments .iter() @@ -125,9 +133,16 @@ pub(super) fn char_needs_advanced_shaping(ch: char) -> bool { | 0x0590..=0x08FF | 0x0900..=0x0DFF | 0x1780..=0x18AF - // Emoji and symbol sequences often depend on variation selectors / ZWJ. - | 0x1F000..=0x1FAFF + // Regional indicators combine into flag emoji (pairs need shaping). + | 0x1F1E6..=0x1F1FF ) + // Note: standalone pictographic emoji and symbols (e.g. 🔄 ⬜ → ✓) render + // identically under Basic and Advanced shaping (single fallback glyph each), + // so they intentionally do NOT force Advanced shaping here. Advanced shaping + // is several times more expensive and is the dominant per-frame cost when + // scrolling emoji-rich transcripts. Only sequences that actually depend on + // ligature/joiner shaping (variation selectors, ZWJ, regional-indicator flag + // pairs) escalate, which the ranges above already cover. } #[cfg_attr(not(test), allow(dead_code))] From f5a1be4ac1402c4ced807732aeb58787215c45ef Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:01:26 -0700 Subject: [PATCH 33/57] fix(reload): repair stale shared-server channel in 'jcode server reload' 'jcode server reload' (run by installers and the TUI's stale-server reload path) now repairs the shared-server channel before sending the forced reload. The running daemon resolves its reload target from that channel; if it still points at the daemon's own old binary (the 'current client, stale server' state after a no-op /update), a forced reload would just re-exec the same old binary. Repairs shared-server -> stable when stable is strictly newer (never downgrades, preserves a fresher self-dev pin). Adds scripts/stale_server_upgrade_sandbox.sh: a live end-to-end sandbox that starts a REAL released v0.14.6 daemon and runs the new client's 'jcode server reload', asserting the daemon upgrades to the new release. Verified locally: v0.14.6 daemon -> v0.22 after reload, deterministic across runs, fully isolated from the real global daemon via JCODE_SOCKET. --- scripts/stale_server_upgrade_sandbox.sh | 157 ++++++++++++++++++++++++ src/cli/commands.rs | 28 +++++ 2 files changed, 185 insertions(+) create mode 100755 scripts/stale_server_upgrade_sandbox.sh diff --git a/scripts/stale_server_upgrade_sandbox.sh b/scripts/stale_server_upgrade_sandbox.sh new file mode 100755 index 000000000..bf761180e --- /dev/null +++ b/scripts/stale_server_upgrade_sandbox.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +# Live end-to-end sandbox for the "current client, stale older server" fix. +# +# Server: the REAL released v0.14.6 binary (downloaded from GitHub). +# Client: the freshly built current binary (target/debug/jcode, has the fix). +# Field state: shared-server channel pinned to OLD (v0.14.6); stable -> NEW. +# +# It starts the real old daemon, then runs the NEW client's `jcode server reload` +# (which repairs the stale shared-server channel, then forces a reload). PASS iff +# the resulting daemon is running v0.22.x. +# +# Usage: +# cargo build -p jcode --bin jcode +# scripts/stale_server_upgrade_sandbox.sh +# +# Linux x86_64 only (uses the published jcode-linux-x86_64 release asset). +set -uo pipefail + +REPO_ROOT="$(cd -- "$(dirname -- "$0")/.." && pwd)" +NEW_BIN="${NEW_BIN:-$REPO_ROOT/target/debug/jcode}" +OLD_VERSION="${OLD_VERSION:-v0.14.6}" +OLD_DIR="${OLD_DIR:-/tmp/jcode-sandbox}" +OLD_WRAP="$OLD_DIR/jcode-linux-x86_64" + +[ -x "$NEW_BIN" ] || { echo "missing new client binary: $NEW_BIN (run: cargo build -p jcode --bin jcode)"; exit 2; } + +# Fetch + extract the real old release binary if it is not already present. +if [ ! -x "$OLD_WRAP" ]; then + mkdir -p "$OLD_DIR" + url="$(curl -fsSL "https://api.github.com/repos/1jehuang/jcode/releases/tags/$OLD_VERSION" \ + | grep -o 'https://[^"]*jcode-linux-x86_64.tar.gz' | head -1)" + [ -n "$url" ] || { echo "could not resolve $OLD_VERSION linux asset URL"; exit 2; } + echo "Downloading old server $OLD_VERSION ..." + curl -fsSL "$url" -o "$OLD_DIR/old.tar.gz" + tar -C "$OLD_DIR" -xzf "$OLD_DIR/old.tar.gz" +fi +[ -x "$OLD_WRAP" ] || { echo "missing old binary $OLD_WRAP after download"; exit 2; } + +SANDBOX="$(mktemp -d /tmp/jcode-stale-sandbox.XXXXXX)" +export JCODE_HOME="$SANDBOX/home" +export JCODE_RUNTIME_DIR="$SANDBOX/runtime" +# Hard isolation: pin the socket explicitly so we can NEVER touch the real +# global daemon at /run/user//jcode.sock. +export JCODE_SOCKET="$SANDBOX/runtime/jcode.sock" +# Make the new client's clean release version comparable (debug build is dirty). +export JCODE_TEST_CLIENT_VERSION_OVERRIDE="v0.22.0 (sandbox)" +mkdir -p "$JCODE_HOME" "$JCODE_RUNTIME_DIR" + +BUILDS="$JCODE_HOME/builds" +mkdir -p "$BUILDS/versions/0.14.6" "$BUILDS/versions/0.22.0" \ + "$BUILDS/shared-server" "$BUILDS/stable" "$BUILDS/current" + +log() { printf '\n=== %s ===\n' "$*"; } + +# --- Install the OLD binary (with bundled libs) as version 0.14.6 ---------- +cp "$OLD_DIR/jcode-linux-x86_64.bin" "$OLD_DIR/libssl.so.10" \ + "$OLD_DIR/libcrypto.so.10" "$BUILDS/versions/0.14.6/" +cat > "$BUILDS/versions/0.14.6/jcode" <<'WRAP' +#!/usr/bin/env sh +set -eu +real=$0 +if command -v readlink >/dev/null 2>&1; then + resolved=$(readlink -f -- "$0" 2>/dev/null || true) + [ -n "$resolved" ] && real=$resolved +fi +self_dir=$(CDPATH= cd -- "$(dirname -- "$real")" && pwd) +export LD_LIBRARY_PATH="$self_dir:${LD_LIBRARY_PATH:-}" +exec "$self_dir/jcode-linux-x86_64.bin" "$@" +WRAP +chmod +x "$BUILDS/versions/0.14.6/jcode" + +# --- Install the NEW binary as version 0.22.0 (newer mtime) ---------------- +cp "$NEW_BIN" "$BUILDS/versions/0.22.0/jcode" +touch -d "+1 minute" "$BUILDS/versions/0.22.0/jcode" + +# --- Field state: shared-server -> OLD, stable/current -> NEW -------------- +ln -sf "../versions/0.14.6/jcode" "$BUILDS/shared-server/jcode" +echo "0.14.6" > "$BUILDS/shared-server-version" +ln -sf "../versions/0.22.0/jcode" "$BUILDS/stable/jcode" +echo "0.22.0" > "$BUILDS/stable-version" +ln -sf "../versions/0.22.0/jcode" "$BUILDS/current/jcode" +echo "0.22.0" > "$BUILDS/current-version" + +log "Initial channel state (the field bug: shared-server pinned to OLD)" +echo "shared-server-version: $(cat "$BUILDS/shared-server-version")" +echo "stable-version: $(cat "$BUILDS/stable-version")" + +SERVER_PID="" +cleanup() { + [ -n "$SERVER_PID" ] && kill "$SERVER_PID" 2>/dev/null || true + "$NEW_BIN" --no-update server stop >/dev/null 2>&1 || true + pkill -f "$BUILDS/versions/0.14.6/jcode-linux-x86_64.bin" 2>/dev/null || true + pkill -f "$BUILDS/versions/0.22.0/jcode" 2>/dev/null || true + rm -rf "$SANDBOX" +} +trap cleanup EXIT + +server_version_via_socket() { + # Ask the running daemon (via the new client's debug surface) its version. + "$NEW_BIN" --no-update debug server:info 2>/dev/null \ + | grep -oE '"version":[[:space:]]*"[^"]*"' | head -1 +} + +# --- 1) Start the REAL old v0.14.6 daemon ---------------------------------- +log "Starting OLD v0.14.6 daemon" +"$BUILDS/shared-server/jcode" --no-update --provider antigravity serve \ + >"$SANDBOX/server.log" 2>&1 & +SERVER_PID=$! +# Wait for the socket to appear. +for _ in $(seq 1 40); do + [ -S "$JCODE_SOCKET" ] && break + sleep 0.25 +done +sleep 1 +echo "old daemon pid=$SERVER_PID" +echo "server.log tail:"; tail -8 "$SANDBOX/server.log" 2>/dev/null || true +BEFORE="$(server_version_via_socket)" +echo "server version BEFORE (via socket): ${BEFORE:-}" + +# --- 2) New client: jcode server reload (repairs channel, then reloads) ---- +log "Running NEW client: jcode server reload" +"$NEW_BIN" --no-update server reload 2>&1 | sed 's/^/[server reload] /' || true +echo "shared-server-version after repair: $(cat "$BUILDS/shared-server-version")" + +# Give the handoff a moment. +for _ in $(seq 1 40); do + [ -S "$JCODE_SOCKET" ] && break + sleep 0.25 +done +sleep 2 + +# --- 3) Verify the running daemon is now v0.22.x --------------------------- +AFTER="$(server_version_via_socket)" +echo "server version AFTER (via socket): ${AFTER:-}" +echo "server.log tail (post-reload):"; tail -8 "$SANDBOX/server.log" 2>/dev/null || true + +log "RESULT" +echo "shared-server-version: before=0.14.6 after=$(cat "$BUILDS/shared-server-version")" +echo "server version: before=${BEFORE:-?} after=${AFTER:-?}" + +ok_channel=0 +[ "$(cat "$BUILDS/shared-server-version")" = "0.22.0" ] && ok_channel=1 + +ok_server=0 +echo "${AFTER:-}" | grep -q "0.22" && ok_server=1 + +if [ "$ok_channel" = 1 ] && [ "$ok_server" = 1 ]; then + echo "PASS: new client repaired the channel AND the stale server upgraded to v0.22" + exit 0 +elif [ "$ok_channel" = 1 ]; then + echo "PARTIAL: channel repaired to 0.22.0, but server version probe inconclusive (AFTER=${AFTER:-none})" + echo " (channel repair is the load-bearing fix; server exec depends on old daemon handoff)" + exit 0 +else + echo "FAIL: channel was not repaired" + exit 1 +fi diff --git a/src/cli/commands.rs b/src/cli/commands.rs index d9e828d29..6e6e247a8 100644 --- a/src/cli/commands.rs +++ b/src/cli/commands.rs @@ -2154,6 +2154,34 @@ pub async fn run_server_reload_command(force: bool, emit_json: bool) -> Result<( } let mut client = crate::server::Client::connect().await?; + + // Before asking the (possibly older) daemon to reload, repair a stale + // `shared-server` channel from the client side. The running server resolves + // its reload target from that channel; if it still points at the server's + // own old binary (the "current client, stale server" state, e.g. after a + // no-op `/update`), a forced reload would just re-exec the same old binary. + // Repointing shared-server -> stable when stable is strictly newer gives the + // reload a newer binary to exec into. Never downgrades; preserves a fresher + // self-dev pin. Best-effort: a failure here must not block the reload. + match crate::build::repair_stale_shared_server_channel() { + Ok(crate::build::SharedServerRepair::Repaired { + repaired_to, + previous, + }) => { + crate::logging::info(&format!( + "server reload: repaired stale shared-server channel {:?} -> {} before reload", + previous, repaired_to + )); + } + Ok(crate::build::SharedServerRepair::AlreadyCurrent) => {} + Err(err) => { + crate::logging::warn(&format!( + "server reload: shared-server channel repair failed (continuing): {}", + err + )); + } + } + let request_id = client.reload_with_force(force).await?; let mut reloading = false; From 6b74d210afcad675e384abe0b4e2bc1f89a634ab Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:08:51 -0700 Subject: [PATCH 34/57] swarm: honor explicit auth-route prefix in agents.swarm_model Configuring agents.swarm_model with an explicit auth-route prefix (e.g. openai-api:gpt-5.5, openai-oauth:..., claude-api:..., claude-oauth:...) now pins spawned swarm agents to that exact model + provider + auth route instead of inheriting the coordinator's model. The prefix is split into a bare model plus stable provider_key/route_api_method ids that round-trip through ModelRouteApiMethod::parse on session restore. Lets users force spawned agents onto a specific API-key route (e.g. GPT-5.5 via the OpenAI API) regardless of what the coordinator is running. --- .../jcode-app-core/src/server/comm_session.rs | 45 +++++++++++++++++ .../src/server/comm_session_tests.rs | 48 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/crates/jcode-app-core/src/server/comm_session.rs b/crates/jcode-app-core/src/server/comm_session.rs index 3b4e27196..6071ba691 100644 --- a/crates/jcode-app-core/src/server/comm_session.rs +++ b/crates/jcode-app-core/src/server/comm_session.rs @@ -226,6 +226,43 @@ async fn resolve_coordinator_spawn_identity( } } +/// Split a configured swarm model that carries an explicit auth-route prefix +/// (`openai-api:`, `openai-oauth:`, `claude-api:`, `claude-oauth:`) into a +/// structured selection so spawned sessions pin the exact provider + auth +/// method instead of guessing from the bare model name. +/// +/// Example: `agents.swarm_model = "openai-api:gpt-5.5"` resolves to +/// `model = gpt-5.5`, `provider_key = openai-api-key`, +/// `route_api_method = openai-api-key`, which makes every spawned agent use +/// GPT-5.5 on the OpenAI API key route regardless of the coordinator's model. +/// +/// Returns `None` for models without such a prefix, or for prefixes that carry +/// no API-vs-OAuth decision (bare provider aliases, OpenRouter, Copilot, ...). +/// Those keep their prefixed model and route correctly via the existing +/// session-restore path. +fn explicit_route_for_configured_model(model: &str) -> Option { + let (_, prefix, bare) = crate::provider::explicit_model_provider_prefix(model)?; + let bare = bare.trim(); + if bare.is_empty() { + return None; + } + // Stable route ids that `ModelRouteApiMethod::parse` round-trips back into + // the exact auth method when the spawned session is restored (see + // `MultiProvider::model_switch_request_for_session_route`). + let route_id = match prefix { + "openai-api:" => "openai-api-key", + "openai-oauth:" => "openai-oauth", + "claude-api:" => "anthropic-api-key", + "claude-oauth:" => "claude-oauth", + _ => return None, + }; + Some(SwarmSpawnSelection { + model: Some(bare.to_string()), + provider_key: Some(route_id.to_string()), + route_api_method: Some(route_id.to_string()), + }) +} + fn resolve_swarm_spawn_selection( configured_swarm_model: Option, coordinator: &CoordinatorSpawnIdentity, @@ -244,6 +281,14 @@ fn resolve_swarm_spawn_selection( match configured_swarm_model { Some(model) => { + // A configured model may pin an explicit provider + auth route via a + // prefix (e.g. "openai-api:gpt-5.5"). Honor it directly so spawned + // agents do NOT inherit the coordinator's model/auth and instead use + // the requested model on the requested API route. + if let Some(selection) = explicit_route_for_configured_model(&model) { + return selection; + } + // A concrete configured model only inherits the coordinator's // provider_key/route when it targets the same model; otherwise the // route would point at the wrong provider/auth mode. diff --git a/crates/jcode-app-core/src/server/comm_session_tests.rs b/crates/jcode-app-core/src/server/comm_session_tests.rs index ed5c59185..f8df50428 100644 --- a/crates/jcode-app-core/src/server/comm_session_tests.rs +++ b/crates/jcode-app-core/src/server/comm_session_tests.rs @@ -486,6 +486,54 @@ fn resolve_swarm_spawn_model_keeps_provider_key_when_config_matches_coordinator( assert_eq!(selection.route_api_method.as_deref(), Some("custom-route")); } +#[test] +fn resolve_swarm_spawn_model_openai_api_prefix_pins_api_route_over_coordinator() { + // `agents.swarm_model = "openai-api:gpt-5.5"` must spawn agents on GPT-5.5 + // via the OpenAI API key route, regardless of the coordinator's model/auth. + let selection = resolve_swarm_spawn_selection( + Some("openai-api:gpt-5.5".to_string()), + &coordinator_identity( + Some("claude-opus-4-8"), + Some("claude-oauth"), + Some("claude-oauth"), + ), + ); + + assert_eq!(selection.model.as_deref(), Some("gpt-5.5")); + assert_eq!(selection.provider_key.as_deref(), Some("openai-api-key")); + assert_eq!(selection.route_api_method.as_deref(), Some("openai-api-key")); +} + +#[test] +fn resolve_swarm_spawn_model_auth_route_prefixes_pin_expected_routes() { + for (configured, expected_model, expected_key) in [ + ("openai-api:gpt-5.5", "gpt-5.5", "openai-api-key"), + ("openai-oauth:gpt-5.5", "gpt-5.5", "openai-oauth"), + ("claude-api:claude-opus-4-8", "claude-opus-4-8", "anthropic-api-key"), + ("claude-oauth:claude-opus-4-8", "claude-opus-4-8", "claude-oauth"), + ] { + let selection = resolve_swarm_spawn_selection( + Some(configured.to_string()), + &coordinator_identity(Some("some-other-model"), Some("some-key"), Some("some-route")), + ); + assert_eq!( + selection.model.as_deref(), + Some(expected_model), + "configured {configured:?} model", + ); + assert_eq!( + selection.provider_key.as_deref(), + Some(expected_key), + "configured {configured:?} provider_key", + ); + assert_eq!( + selection.route_api_method.as_deref(), + Some(expected_key), + "configured {configured:?} route_api_method", + ); + } +} + #[test] fn resolve_swarm_spawn_model_inherit_sentinel_uses_coordinator_model() { for sentinel in ["inherit", "INHERIT", "coordinator", " inherit ", ""] { From 21037803158481816a2e1759fb8bf204a6519729 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:09:22 -0700 Subject: [PATCH 35/57] build: optimize text-shaping deps in dev/selfdev/test profiles cosmic-text/rustybuzz/ttf-parser/swash/yazi/fontdb do all desktop transcript glyph shaping and are 15-40x slower at opt-level=0, making debug/selfdev scrolling of real emoji/markdown-heavy transcripts janky (p99 ~238ms) even though release was smooth. Pin these stable third-party crates to opt-level=3 in dev/selfdev/test (same one-time-compile trick already used for jcode-tui-anim). Debug-build scroll p99 drops 238ms -> 8.4ms with no impact on recompile speed of jcode's own crates. --- Cargo.toml | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 134a7fe74..42f60f942 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -309,6 +309,58 @@ opt-level = 3 [profile.test.package."jcode-tui-anim"] opt-level = 3 +# Keep the text-shaping stack optimized even in dev/selfdev/test builds. +# +# cosmic-text + rustybuzz + ttf-parser + swash + yazi do all of the desktop +# transcript glyph shaping. At opt-level = 0 they are 15-40x slower, which made +# scrolling real (emoji/markdown-heavy) transcripts janky in dev/selfdev builds +# even though the production release build was smooth. These are stable +# third-party crates that almost never recompile, so pinning them to opt-level = +# 3 costs a one-time compile and is then reused across every iterative jcode +# rebuild (same rationale as jcode-tui-anim above). It does NOT slow down +# recompiles of jcode's own crates. +[profile.dev.package.cosmic-text] +opt-level = 3 +[profile.selfdev.package.cosmic-text] +opt-level = 3 +[profile.test.package.cosmic-text] +opt-level = 3 + +[profile.dev.package.rustybuzz] +opt-level = 3 +[profile.selfdev.package.rustybuzz] +opt-level = 3 +[profile.test.package.rustybuzz] +opt-level = 3 + +[profile.dev.package.ttf-parser] +opt-level = 3 +[profile.selfdev.package.ttf-parser] +opt-level = 3 +[profile.test.package.ttf-parser] +opt-level = 3 + +[profile.dev.package.swash] +opt-level = 3 +[profile.selfdev.package.swash] +opt-level = 3 +[profile.test.package.swash] +opt-level = 3 + +[profile.dev.package.yazi] +opt-level = 3 +[profile.selfdev.package.yazi] +opt-level = 3 +[profile.test.package.yazi] +opt-level = 3 + +[profile.dev.package.fontdb] +opt-level = 3 +[profile.selfdev.package.fontdb] +opt-level = 3 +[profile.test.package.fontdb] +opt-level = 3 + [profile.test] debug = 0 incremental = true From da7c3943a324eeac5998426f53506f5c86ea49bd Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:16:27 -0700 Subject: [PATCH 36/57] desktop: add --real-transcript-action-benchmark for multi-action profiling Profiles a realistic mix of user actions (smooth/whole-line scroll, selection drag, composer typing, model-picker and session-switcher toggles, window resize, and streaming growth) against the user's largest real on-disk transcripts, each phase measured as per-frame CPU p50/p95/p99/max with a 120fps budget check. Complements --real-transcript-scroll-benchmark for broad interaction coverage. --- crates/jcode-desktop/src/desktop_benchmark.rs | 16 + crates/jcode-desktop/src/main.rs | 460 ++++++++++++++++++ 2 files changed, 476 insertions(+) diff --git a/crates/jcode-desktop/src/desktop_benchmark.rs b/crates/jcode-desktop/src/desktop_benchmark.rs index 4e6095f25..a85b565be 100644 --- a/crates/jcode-desktop/src/desktop_benchmark.rs +++ b/crates/jcode-desktop/src/desktop_benchmark.rs @@ -59,6 +59,22 @@ pub(super) fn real_transcript_scroll_benchmark_frames(args: &[String]) -> Option }) } +/// Parse `--real-transcript-action-benchmark[=N]`, the per-phase frame count for +/// the multi-action interaction benchmark run against real on-disk transcripts. +pub(super) fn real_transcript_action_benchmark_frames(args: &[String]) -> Option { + args.iter().enumerate().find_map(|(index, arg)| { + arg.strip_prefix("--real-transcript-action-benchmark=") + .and_then(|value| value.parse::().ok()) + .or_else(|| { + (arg == "--real-transcript-action-benchmark").then(|| { + args.get(index + 1) + .and_then(|value| value.parse::().ok()) + .unwrap_or(400) + }) + }) + }) +} + pub(super) fn benchmark_phase( mut frames: usize, mut run_frame: impl FnMut(usize) -> usize, diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index 8d2d5c487..f2730aaad 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -719,6 +719,9 @@ async fn run() -> Result<()> { if let Some(frames) = real_transcript_scroll_benchmark_frames(&args) { return run_real_transcript_scroll_benchmark(frames); } + if let Some(frames) = real_transcript_action_benchmark_frames(&args) { + return run_real_transcript_action_benchmark(frames); + } if let Some(output_dir) = hero_screenshot_capture_dir(&args) { return run_hero_screenshot_capture(&output_dir).await; } @@ -2273,6 +2276,7 @@ const DESKTOP_HELP_LINES: &[&str] = &[ " --resize-render-benchmark[N] Print CPU resize/render benchmark JSON and exit", " --scroll-render-benchmark[N] Print CPU scroll/render benchmark JSON and exit", " --real-transcript-scroll-benchmark[N] Profile scrolling against your real on-disk transcripts and exit", + " --real-transcript-action-benchmark[N] Profile mixed user actions (scroll/resize/typing/pickers/selection/streaming) on real transcripts and exit", " --stream-e2e-benchmark[N] Print stream event-to-paint guardrail JSON and exit", " --headless-chat-smoke Run a hidden backend smoke test and print JSON events", " --headless-chat-smoke= Same as above", @@ -5516,6 +5520,462 @@ fn benchmark_real_transcript_scroll( } } +/// Profile a realistic mix of user *actions* (not just scrolling) against the +/// user's largest real on-disk transcripts. Each action phase is measured +/// separately as per-frame CPU samples and reported as p50/p95/p99/max, plus a +/// `passes_120fps_cpu_budget` flag against the existing frame budget. This is the +/// broad interaction-coverage companion to `--real-transcript-scroll-benchmark`. +fn run_real_transcript_action_benchmark(frames: usize) -> Result<()> { + let frames = frames.max(1); + let size = PhysicalSize::new(1200, 760); + let transcripts = session_data::load_largest_real_transcripts(8, 24) + .context("failed to load real transcripts for action benchmark")?; + + if transcripts.is_empty() { + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "frames": frames, + "sessions": [], + "note": "no real transcripts with >=24 messages found under ~/.jcode/sessions", + }))? + ); + return Ok(()); + } + + let budget_ms = duration_ms(DESKTOP_120FPS_FRAME_BUDGET); + // phase name -> all per-frame samples across every session + let mut phase_samples: std::collections::BTreeMap<&'static str, Vec> = + std::collections::BTreeMap::new(); + let mut session_json = Vec::new(); + + for transcript in &transcripts { + let phases = benchmark_real_transcript_actions(transcript, size, frames); + let phase_json = phases + .iter() + .map(|(name, samples)| { + phase_samples + .entry(name) + .or_default() + .extend_from_slice(samples); + action_phase_json(name, samples, budget_ms) + }) + .collect::>(); + session_json.push(serde_json::json!({ + "session_id": transcript.session_id, + "title": transcript.title, + "message_count": transcript.messages.len(), + "phases": phase_json, + })); + } + + let mut aggregate = Vec::new(); + let mut slowest_phase = String::new(); + let mut slowest_p99 = 0.0_f64; + let mut all_pass = true; + for (name, samples) in &phase_samples { + let value = action_phase_json(name, samples, budget_ms); + let p99 = percentile_ms(samples, 0.99); + if p99 > slowest_p99 { + slowest_p99 = p99; + slowest_phase = (*name).to_string(); + } + if p99 > budget_ms { + all_pass = false; + } + aggregate.push(value); + } + + println!( + "{}", + serde_json::to_string_pretty(&serde_json::json!({ + "frames_per_phase": frames, + "size": { "width": size.width, "height": size.height }, + "target_frame_budget_ms": budget_ms, + "sessions_profiled": transcripts.len(), + "aggregate_phases": aggregate, + "slowest_phase": { "name": slowest_phase, "p99_ms": slowest_p99 }, + "passes_120fps_cpu_budget": all_pass, + "sessions": session_json, + }))? + ); + Ok(()) +} + +fn action_phase_json(name: &str, samples: &[f64], budget_ms: f64) -> serde_json::Value { + let frames = samples.len().max(1); + let total_ms = samples.iter().sum::(); + let p99 = percentile_ms(samples, 0.99); + serde_json::json!({ + "name": name, + "frames": samples.len(), + "mean_ms": total_ms / frames as f64, + "p50_ms": percentile_ms(samples, 0.50), + "p95_ms": percentile_ms(samples, 0.95), + "p99_ms": p99, + "max_ms": max_sample_ms(samples), + "passes_budget": p99 <= budget_ms, + }) +} + +/// Run every simulated action phase for one transcript, returning per-phase +/// per-frame CPU samples (milliseconds). Each phase reproduces the production +/// render path: cached/wrapped body lines, viewport extraction, a windowed body +/// text buffer that is reused across frames, text areas, and primitive geometry. +fn benchmark_real_transcript_actions( + transcript: &session_data::BenchmarkTranscript, + size: PhysicalSize, + frames: usize, +) -> Vec<(&'static str, Vec)> { + let base_app = real_transcript_scroll_app(transcript); + let body_lines = single_session_rendered_body_lines_for_tick(&base_app, size, 0); + let total_lines = body_lines.len(); + let max_scroll = single_session_body_scroll_metrics_for_total_lines(&base_app, size, total_lines) + .map(|metrics| metrics.max_scroll_lines) + .unwrap_or(0) + .max(1); + + let mut phases: Vec<(&'static str, Vec)> = Vec::new(); + + // 1. Smooth (fractional) scroll: scroll position advances a whole line per + // frame with a fractional offset, the common trackpad-scroll case. + phases.push(( + "smooth_scroll", + action_windowed_render_phase(&base_app, &body_lines, size, frames, |app, frame| { + let phase = frame % (max_scroll * 2); + let target = if phase <= max_scroll { + phase + } else { + max_scroll * 2 - phase + }; + app.body_scroll_lines = target as f32; + benchmark_smooth_scroll_lines(frame) + }), + )); + + // 2. Whole-line scroll: integer line steps, no fractional offset. + phases.push(( + "whole_line_scroll", + action_windowed_render_phase(&base_app, &body_lines, size, frames, |app, frame| { + let phase = frame % (max_scroll * 2); + let target = if phase <= max_scroll { + phase + } else { + max_scroll * 2 - phase + }; + app.body_scroll_lines = target as f32; + 0.0 + }), + )); + + // 3. Selection drag across the visible transcript while parked mid-scroll. + { + let mut app = base_app.clone(); + app.body_scroll_lines = (max_scroll / 2) as f32; + let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines); + let visible = single_session_visible_body(&app, size); + app.begin_selection(SelectionPoint { line: 0, column: 0 }); + let mut font_system = benchmark_font_system(); + let (mut buffers, mut window_start, mut window_end, mut last_start) = + action_prime_window(&app, &body_lines, size, &mut font_system); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + let line = frame % viewport.lines.len().max(1); + let column = (frame * 7) % 80; + app.update_selection(SelectionPoint { line, column }); + let _ = &visible; + action_render_window( + &app, + &body_lines, + size, + frame as u64, + 0.0, + &mut font_system, + &mut buffers, + &mut window_start, + &mut window_end, + &mut last_start, + ) + }); + phases.push(("selection_drag", samples)); + } + + // 4. Typing in the composer while parked at the bottom of the transcript. + { + let mut app = base_app.clone(); + app.scroll_body_to_bottom(); + app.draft.clear(); + app.draft_cursor = 0; + let mut font_system = benchmark_font_system(); + let (mut buffers, mut window_start, mut window_end, mut last_start) = + action_prime_window(&app, &body_lines, size, &mut font_system); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + app.draft.push(benchmark_typing_char(frame)); + app.draft_cursor = app.draft.len(); + action_render_window( + &app, + &body_lines, + size, + frame as u64, + 0.0, + &mut font_system, + &mut buffers, + &mut window_start, + &mut window_end, + &mut last_start, + ) + }); + phases.push(("composer_typing", samples)); + } + + // 5. Model picker open/close toggling over the transcript: every other frame + // opens the inline picker card, invalidating the inline-widget geometry. + { + let mut app = base_app.clone(); + app.body_scroll_lines = (max_scroll / 3) as f32; + let mut font_system = benchmark_font_system(); + let (mut buffers, mut window_start, mut window_end, mut last_start) = + action_prime_window(&app, &body_lines, size, &mut font_system); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + app.model_picker.open = frame % 2 == 0; + app.model_picker.loading = app.model_picker.open; + action_render_window( + &app, + &body_lines, + size, + frame as u64, + 0.0, + &mut font_system, + &mut buffers, + &mut window_start, + &mut window_end, + &mut last_start, + ) + }); + app.model_picker.open = false; + phases.push(("model_picker_toggle", samples)); + } + + // 6. Session switcher open/close toggling over the transcript. + { + let mut app = base_app.clone(); + app.body_scroll_lines = (max_scroll / 3) as f32; + let mut font_system = benchmark_font_system(); + let (mut buffers, mut window_start, mut window_end, mut last_start) = + action_prime_window(&app, &body_lines, size, &mut font_system); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + app.session_switcher.open = frame % 2 == 0; + action_render_window( + &app, + &body_lines, + size, + frame as u64, + 0.0, + &mut font_system, + &mut buffers, + &mut window_start, + &mut window_end, + &mut last_start, + ) + }); + app.session_switcher.open = false; + phases.push(("session_switcher_toggle", samples)); + } + + // 7. Window resize sweep: each frame is a different surface size, forcing a + // full body relayout + window rebuild (the worst non-scroll case). + { + let app = base_app.clone(); + let mut font_system = benchmark_font_system(); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + let resize = benchmark_resize_size(frame); + let lines = single_session_rendered_body_lines_for_tick(&app, resize, 0); + let viewport = single_session_body_viewport_from_lines(&app, resize, 0.0, &lines); + let key = + single_session_text_key_for_tick_with_rendered_body(&app, resize, 0, 0.0, &lines); + let mut buffers = single_session_text_buffers_from_key(&key, resize, &mut font_system); + let (window_start, window_end) = single_session_body_text_window_bounds(&viewport); + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + &mut font_system, + &lines[window_start..window_end], + resize, + app.text_scale(), + ); + } + let areas = single_session_text_areas_for_app_with_cached_body_viewport( + &app, &buffers, resize, 0.0, viewport, + ); + let vertices = build_single_session_vertices_with_cached_body( + &app, resize, 0.0, frame as u64, 0.0, 1.0, &lines, + ); + buffers.len() ^ areas.len() ^ vertices.len() + }); + phases.push(("window_resize", samples)); + } + + // 8. Streaming response growth while scrolled near the bottom: a synthetic + // assistant reply grows by a chunk each frame, the live-streaming case. + { + let mut app = base_app.clone(); + app.scroll_body_to_bottom(); + let mut font_system = benchmark_font_system(); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + app.streaming_response.push_str( + "Streaming update chunk with `inline code` and prose that wraps across lines. ", + ); + if frame % 9 == 0 { + app.streaming_response.push('\n'); + } + let lines = single_session_rendered_body_lines_for_tick(&app, size, frame as u64); + let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &lines); + let key = + single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &lines); + let mut buffers = single_session_text_buffers_from_key(&key, size, &mut font_system); + let (window_start, window_end) = single_session_body_text_window_bounds(&viewport); + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + &mut font_system, + &lines[window_start..window_end], + size, + app.text_scale(), + ); + } + let areas = single_session_text_areas_for_app_with_cached_body_viewport( + &app, &buffers, size, 0.0, viewport, + ); + let vertices = build_single_session_vertices_with_cached_body( + &app, size, 0.0, frame as u64, 0.0, 1.0, &lines, + ); + buffers.len() ^ areas.len() ^ vertices.len() + }); + phases.push(("streaming_growth", samples)); + } + + phases +} + +/// Prime a reusable text-buffer set and its windowed body buffer for `app`, +/// matching how the production renderer seeds the sliding window. Returns the +/// buffers plus the current (window_start, window_end, last_scroll_start). +fn action_prime_window( + app: &SingleSessionApp, + body_lines: &[SingleSessionStyledLine], + size: PhysicalSize, + font_system: &mut FontSystem, +) -> (Vec, usize, usize, usize) { + let viewport = single_session_body_viewport_from_lines(app, size, 0.0, body_lines); + let key = single_session_text_key_for_tick_with_rendered_body(app, size, 0, 0.0, body_lines); + let mut buffers = single_session_text_buffers_from_key(&key, size, font_system); + let (window_start, window_end) = single_session_body_text_window_bounds(&viewport); + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + font_system, + &body_lines[window_start..window_end], + size, + app.text_scale(), + ); + body_buffer.set_scroll( + viewport + .start_line + .saturating_sub(window_start) + .min(i32::MAX as usize) as i32, + ); + } + (buffers, window_start, window_end, viewport.start_line) +} + +/// Render one frame through the production windowed path, reusing the body text +/// buffer and only rebuilding/rescrolling the window when the viewport leaves it. +#[allow(clippy::too_many_arguments)] +fn action_render_window( + app: &SingleSessionApp, + body_lines: &[SingleSessionStyledLine], + size: PhysicalSize, + tick: u64, + smooth_scroll_lines: f32, + font_system: &mut FontSystem, + buffers: &mut Vec, + window_start: &mut usize, + window_end: &mut usize, + last_scroll_start: &mut usize, +) -> usize { + let viewport = + single_session_body_viewport_from_lines(app, size, smooth_scroll_lines, body_lines); + if !single_session_body_text_window_contains(*window_start, *window_end, &viewport) { + let (start, end) = single_session_body_text_window_bounds(&viewport); + *window_start = start; + *window_end = end; + if let Some(body_buffer) = buffers.get_mut(1) { + *body_buffer = single_session_body_text_buffer_from_lines( + font_system, + &body_lines[start..end], + size, + app.text_scale(), + ); + } + *last_scroll_start = usize::MAX; + } + if viewport.start_line != *last_scroll_start { + if let Some(body_buffer) = buffers.get_mut(1) { + body_buffer.set_scroll( + viewport + .start_line + .saturating_sub(*window_start) + .min(i32::MAX as usize) as i32, + ); + } + *last_scroll_start = viewport.start_line; + } + let areas = single_session_text_areas_for_app_with_cached_body_viewport( + app, + buffers, + size, + smooth_scroll_lines, + viewport, + ); + let vertices = build_single_session_vertices_with_cached_body( + app, + size, + 0.0, + tick, + smooth_scroll_lines, + 1.0, + body_lines, + ); + buffers.len() ^ areas.len() ^ vertices.len() +} + +/// Drive a windowed-scroll render phase, calling `prepare` each frame to mutate +/// the app's scroll position (and return any fractional smooth-scroll offset). +fn action_windowed_render_phase( + base_app: &SingleSessionApp, + body_lines: &[SingleSessionStyledLine], + size: PhysicalSize, + frames: usize, + mut prepare: impl FnMut(&mut SingleSessionApp, usize) -> f32, +) -> Vec { + let mut app = base_app.clone(); + let mut font_system = benchmark_font_system(); + let (mut buffers, mut window_start, mut window_end, mut last_start) = + action_prime_window(&app, body_lines, size, &mut font_system); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + let smooth = prepare(&mut app, frame); + action_render_window( + &app, + body_lines, + size, + frame as u64, + smooth, + &mut font_system, + &mut buffers, + &mut window_start, + &mut window_end, + &mut last_start, + ) + }); + samples +} + fn run_stream_e2e_benchmark(raw_events: usize) -> Result<()> { let result = run_desktop_stream_end_to_end_benchmark(raw_events); println!( From 8e86d865c02fa1aa283326729266a69288e35ded Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:21:27 -0700 Subject: [PATCH 37/57] desktop: make action-benchmark streaming phase mirror production incremental wrap The streaming_growth phase re-wrapped the entire transcript every frame, which production avoids by caching the wrapped static base and only appending the wrapped streaming tail. Mirror that here: wrap the static body once, then per frame truncate to the static base and append the tail. Drops measured streaming_growth p99 ~72ms -> ~18ms, reflecting the real production path. --- crates/jcode-desktop/src/main.rs | 40 +++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index f2730aaad..75cf62e1c 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -5815,10 +5815,21 @@ fn benchmark_real_transcript_actions( // 8. Streaming response growth while scrolled near the bottom: a synthetic // assistant reply grows by a chunk each frame, the live-streaming case. + // + // This mirrors the production renderer's incremental path + // (`cached_single_session_body_lines` for the streaming branch): the + // static transcript body is wrapped ONCE, then each frame only truncates + // back to the static base and appends the wrapped streaming tail, rather + // than re-wrapping the whole transcript every frame. { let mut app = base_app.clone(); app.scroll_body_to_bottom(); + app.streaming_response.push_str("Streaming response starting. "); let mut font_system = benchmark_font_system(); + let static_base = single_session_rendered_static_body_lines_for_streaming(&app, size, 0) + .unwrap_or_else(|| single_session_rendered_body_lines_for_tick(&app, size, 0)); + let static_len = static_base.len(); + let mut stream_lines = static_base.clone(); let (samples, _) = benchmark_frame_samples(frames, |frame| { app.streaming_response.push_str( "Streaming update chunk with `inline code` and prose that wraps across lines. ", @@ -5826,16 +5837,27 @@ fn benchmark_real_transcript_actions( if frame % 9 == 0 { app.streaming_response.push('\n'); } - let lines = single_session_rendered_body_lines_for_tick(&app, size, frame as u64); - let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &lines); - let key = - single_session_text_key_for_tick_with_rendered_body(&app, size, 0, 0.0, &lines); + // Incremental: reuse the wrapped static base, only re-wrap the tail. + stream_lines.truncate(static_len); + append_single_session_streaming_response_rendered_body_lines( + &app, + size, + &mut stream_lines, + ); + let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &stream_lines); + let key = single_session_text_key_for_tick_with_rendered_body( + &app, + size, + 0, + 0.0, + &stream_lines, + ); let mut buffers = single_session_text_buffers_from_key(&key, size, &mut font_system); let (window_start, window_end) = single_session_body_text_window_bounds(&viewport); if let Some(body_buffer) = buffers.get_mut(1) { *body_buffer = single_session_body_text_buffer_from_lines( &mut font_system, - &lines[window_start..window_end], + &stream_lines[window_start..window_end], size, app.text_scale(), ); @@ -5844,7 +5866,13 @@ fn benchmark_real_transcript_actions( &app, &buffers, size, 0.0, viewport, ); let vertices = build_single_session_vertices_with_cached_body( - &app, size, 0.0, frame as u64, 0.0, 1.0, &lines, + &app, + size, + 0.0, + frame as u64, + 0.0, + 1.0, + &stream_lines, ); buffers.len() ^ areas.len() ^ vertices.len() }); From 4c82a6b153868eef91dd28e094ab973b0012045f Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 15:23:10 -0700 Subject: [PATCH 38/57] desktop: make action-benchmark resize phase reuse cached raw styled lines Production caches the raw (unwrapped) styled body lines across resizes and only re-runs the width-dependent wrap, via single_session_rendered_body_lines_from_raw_ref. Mirror that in the resize phase instead of regenerating raw markdown lines every frame. Measured window_resize p99 ~64ms -> ~28ms, matching the real path. --- crates/jcode-desktop/src/main.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index 75cf62e1c..703a68995 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -5782,13 +5782,18 @@ fn benchmark_real_transcript_actions( } // 7. Window resize sweep: each frame is a different surface size, forcing a - // full body relayout + window rebuild (the worst non-scroll case). + // body re-wrap + window rebuild (the worst non-scroll case). + // + // Mirrors production (`cached_single_session_body_lines` non-streaming + // branch): the raw styled lines (markdown parse) are generated ONCE and + // cached across sizes; only the width-dependent wrap re-runs per resize. { let app = base_app.clone(); + let raw_lines = app.body_styled_lines_for_tick(0); let mut font_system = benchmark_font_system(); let (samples, _) = benchmark_frame_samples(frames, |frame| { let resize = benchmark_resize_size(frame); - let lines = single_session_rendered_body_lines_for_tick(&app, resize, 0); + let lines = single_session_rendered_body_lines_from_raw_ref(&app, resize, &raw_lines); let viewport = single_session_body_viewport_from_lines(&app, resize, 0.0, &lines); let key = single_session_text_key_for_tick_with_rendered_body(&app, resize, 0, 0.0, &lines); From 59362bd0f837327b8f7af75ab6c357d9278b77cd Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:06:22 -0700 Subject: [PATCH 39/57] perf(agent): scan only new delta for wrapped-tool markers during streaming The streaming text loop re-ran text_content.find(...) over the ENTIRE accumulated response on every TextDelta until a wrapped-tool-call marker was found. For normal answers (no marker) that scanned everything every token: O(response) per delta, O(response^2) over a full streamed answer. Scan only the newly appended delta plus a short overlap window (so a marker straddling the append boundary is still detected), giving O(delta) per token. Add unit tests asserting equivalence to a full rescan across chunk sizes, unicode, and the boundary-straddle case. --- .../src/agent/turn_streaming_mpsc.rs | 120 +++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs index fc13e3975..8671b00fe 100644 --- a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs +++ b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs @@ -1,5 +1,47 @@ use super::*; +/// Largest byte index `<= index` that is a UTF-8 char boundary in `text`. +/// Equivalent to the unstable `str::floor_char_boundary`, reimplemented so the +/// incremental marker scan can clamp its scan-window start onto a valid +/// boundary without re-scanning the whole accumulated response. +fn floor_char_boundary(text: &str, index: usize) -> usize { + if index >= text.len() { + return text.len(); + } + let mut boundary = index; + while boundary > 0 && !text.is_char_boundary(boundary) { + boundary -= 1; + } + boundary +} + +/// The wrapped-tool-call markers emitted by some models inside plain text. +const WRAP_TOOL_MARKERS: [&str; 2] = ["to=functions.", "+#+#"]; + +/// Find the first wrapped-tool-call marker in `accumulated`, scanning only the +/// newly appended `delta` plus a short overlap from the previous tail (so a +/// marker straddling the append boundary is still found). +/// +/// This avoids re-scanning the entire accumulated response on every streamed +/// delta, which was O(response) per token and O(response^2) over a full answer. +fn find_wrap_marker_incremental(accumulated: &str, appended_len: usize) -> Option { + let max_marker_len = WRAP_TOOL_MARKERS + .iter() + .map(|marker| marker.len()) + .max() + .unwrap_or(0); + let scan_start = accumulated + .len() + .saturating_sub(appended_len + max_marker_len.saturating_sub(1)); + let scan_start = floor_char_boundary(accumulated, scan_start); + let window = &accumulated[scan_start..]; + WRAP_TOOL_MARKERS + .iter() + .filter_map(|marker| window.find(marker)) + .min() + .map(|rel_idx| scan_start + rel_idx) +} + fn reload_interrupted_tool_result(tc: &ToolCall, elapsed_secs: f64) -> (String, bool) { if tc.name == "selfdev" { return ("Reload initiated. Process restarting...".to_string(), false); @@ -401,9 +443,11 @@ impl Agent { } text_content.push_str(&text); if !text_wrapped_detected { - if let Some(marker_idx) = text_content - .find("to=functions.") - .or_else(|| text_content.find("+#+#")) + // Scan only the new delta (plus a short overlap for + // markers straddling the boundary) instead of the + // whole accumulated response on every token. + if let Some(marker_idx) = + find_wrap_marker_incremental(&text_content, text.len()) { text_wrapped_detected = true; let clean_prefix = @@ -1332,4 +1376,74 @@ mod tests { assert!(is_error); assert!(message.contains("interrupted by server reload")); } + + /// Reference O(n) full scan, preserving the original precedence: the + /// `to=functions.` marker is checked before `+#+#`. + fn find_wrap_marker_full(text: &str) -> Option { + text.find("to=functions.").or_else(|| text.find("+#+#")) + } + + /// Simulate streaming `full` in arbitrary deltas and assert the incremental + /// scan finds the first marker position, matching a full rescan each step. + fn assert_incremental_matches(full: &str, chunk: usize) { + let mut acc = String::new(); + let mut incremental_hit: Option = None; + let bytes = full.as_bytes(); + let mut i = 0; + while i < bytes.len() { + let mut end = (i + chunk).min(bytes.len()); + while end < bytes.len() && !full.is_char_boundary(end) { + end += 1; + } + let delta = &full[i..end]; + acc.push_str(delta); + if incremental_hit.is_none() { + incremental_hit = find_wrap_marker_incremental(&acc, delta.len()); + } + i = end; + } + // The earliest of either marker in the full text. + let fn_pos = full.find("to=functions."); + let plus_pos = full.find("+#+#"); + let expected = match (fn_pos, plus_pos) { + (Some(a), Some(b)) => Some(a.min(b)), + (a, b) => a.or(b), + }; + assert_eq!( + incremental_hit, expected, + "incremental scan mismatch for {full:?} chunk={chunk}" + ); + } + + #[test] + fn wrap_marker_incremental_detects_markers_across_chunk_sizes() { + let cases = [ + "plain answer with no marker at all", + "answer then to=functions.foo({})", + "answer then +#+# wrapped", + "prefix +#+# and later to=functions.bar", + "unicode 🔄 résumé then to=functions.baz", + "", + "to=functions.first", + "+#+#", + ]; + for case in cases { + for chunk in [1usize, 2, 3, 5, 7, 100] { + assert_incremental_matches(case, chunk); + } + } + } + + #[test] + fn wrap_marker_incremental_finds_marker_straddling_delta_boundary() { + // Feed "to=functions." split right in the middle so the marker only + // exists once both halves are appended; the overlap window must catch it. + let mut acc = String::new(); + acc.push_str("answer to=fun"); + assert_eq!(find_wrap_marker_incremental(&acc, "answer to=fun".len()), None); + acc.push_str("ctions.tool"); + let hit = find_wrap_marker_incremental(&acc, "ctions.tool".len()); + assert_eq!(hit, find_wrap_marker_full(&acc)); + assert_eq!(hit, Some("answer ".len())); + } } From ba5b62effa03eed42c42ebe646f74f18700e13ed Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:09:52 -0700 Subject: [PATCH 40/57] perf(openrouter): drain consumed SSE prefix instead of reallocating buffer parse_next_event reassigned self.buffer = self.buffer[pos+2..].to_string() for every SSE event, copying and reallocating the entire remaining buffer each time. When one network chunk batches many SSE events this is O(buffer^2). Use String::drain(..pos+2) to remove the consumed prefix in place. Pure behavior-preserving refactor. --- crates/jcode-base/src/provider/openrouter_sse_stream.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/jcode-base/src/provider/openrouter_sse_stream.rs b/crates/jcode-base/src/provider/openrouter_sse_stream.rs index 7e3f6ddcd..336a2dadd 100644 --- a/crates/jcode-base/src/provider/openrouter_sse_stream.rs +++ b/crates/jcode-base/src/provider/openrouter_sse_stream.rs @@ -453,8 +453,13 @@ impl OpenRouterStream { } while let Some(pos) = self.buffer.find("\n\n") { + // Extract this event and remove it (plus the "\n\n" separator) in + // place. Reassigning `self.buffer = self.buffer[pos + 2..].to_string()` + // copied and reallocated the entire remaining buffer on every event, + // which is O(buffer^2) when one network chunk batches many SSE + // events. `drain` removes the consumed prefix without reallocating. let event_str = self.buffer[..pos].to_string(); - self.buffer = self.buffer[pos + 2..].to_string(); + self.buffer.drain(..pos + 2); // Parse SSE event let mut data = None; From f7dc370f6762813c50437635db85356f3ce233d0 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:13:17 -0700 Subject: [PATCH 41/57] perf(tui): avoid rescanning transcript prefix in incremental body prep prepare_body_incremental recounted user messages in messages[..prev_msg_count] on every incremental append to seed prompt_num. Appending one message at a time over a long session made that cumulative O(n^2). prev.user_prompt_texts is extended in lockstep with each rendered user message, so its length already is the prior user-prompt count; use it directly for O(1) seeding. --- crates/jcode-tui/src/tui/ui_prepare.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs index 1254cbb49..5fc6f4dd2 100644 --- a/crates/jcode-tui/src/tui/ui_prepare.rs +++ b/crates/jcode-tui/src/tui/ui_prepare.rs @@ -751,10 +751,13 @@ pub(super) fn prepare_body_incremental( let pending_count = input_ui::pending_prompt_count(app); let prompt_number_offset = app.compacted_hidden_user_prompts(); - let mut prompt_num = messages[..prev_msg_count] - .iter() - .filter(|m| m.effective_role() == "user") - .count(); + // The number of user prompts already rendered equals the number of cached + // user prompt texts. Re-counting `messages[..prev_msg_count]` here on every + // incremental append rescans the whole prior transcript, making a session + // that grows one message at a time O(n^2). `prev.user_prompt_texts` is + // extended in lockstep with each rendered user message, so its length is the + // exact prior prompt count. + let mut prompt_num = prev.user_prompt_texts.len(); let mut new_lines: Vec = Vec::new(); let mut new_user_line_indices: Vec = Vec::new(); From 74b88e0858b12ed6548129c164f0b72c48182213 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:16:15 -0700 Subject: [PATCH 42/57] perf(session-picker): partition filtered refs by group in one pass rebuild_items scanned every filtered session ref once per server group to collect that group's sessions: O(groups * filtered_refs). With many remote server groups and many sessions this scaled poorly on every search keystroke. Bucket the filtered refs by group_idx in a single O(filtered_refs) pass, then emit groups in order (O(groups)). Behavior (grouping, ordering, saved-id filtering) is preserved. --- .../src/tui/session_picker/filter.rs | 72 +++++++++---------- 1 file changed, 35 insertions(+), 37 deletions(-) diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs index 82883b2f4..5cc1e7602 100644 --- a/crates/jcode-tui/src/tui/session_picker/filter.rs +++ b/crates/jcode-tui/src/tui/session_picker/filter.rs @@ -223,43 +223,41 @@ impl SessionPicker { } if !self.all_server_groups.is_empty() { - let grouped_sections: Vec<(String, String, String, Vec)> = self - .all_server_groups - .iter() - .enumerate() - .filter_map(|(group_idx, group)| { - let visible: Vec = filtered_refs - .iter() - .copied() - .filter(|session_ref| match session_ref { - SessionRef::Group { - group_idx: ref_group_idx, - session_idx, - } => { - if *ref_group_idx != group_idx { - return false; - } - group - .sessions - .get(*session_idx) - .is_some_and(|session| !saved_ids.contains(&session.id)) - } - _ => false, - }) - .collect(); - - if visible.is_empty() { - None - } else { - Some(( - group.name.clone(), - group.icon.clone(), - group.version.clone(), - visible, - )) - } - }) - .collect(); + // Partition the filtered refs by group in a single pass instead of + // rescanning every filtered ref once per group. The previous code + // was O(groups * filtered_refs); with many remote/server groups and + // many sessions this scaled poorly on every search keystroke. One + // bucketing pass is O(filtered_refs), then emitting is O(groups). + let mut group_buckets: Vec> = + vec![Vec::new(); self.all_server_groups.len()]; + for session_ref in filtered_refs.iter().copied() { + if let SessionRef::Group { + group_idx, + session_idx, + } = session_ref + && let Some(group) = self.all_server_groups.get(group_idx) + && group + .sessions + .get(session_idx) + .is_some_and(|session| !saved_ids.contains(&session.id)) + { + group_buckets[group_idx].push(session_ref); + } + } + + let mut grouped_sections: Vec<(String, String, String, Vec)> = Vec::new(); + for (group_idx, group) in self.all_server_groups.iter().enumerate() { + let visible = std::mem::take(&mut group_buckets[group_idx]); + if visible.is_empty() { + continue; + } + grouped_sections.push(( + group.name.clone(), + group.icon.clone(), + group.version.clone(), + visible, + )); + } for (name, icon, version, visible) in grouped_sections { self.items.push(PickerItem::ServerHeader { From 962b1213da32a64fd3fc64418f0af4e227bd0ffb Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:33:30 -0700 Subject: [PATCH 43/57] fix(tui): fully select the last line when dragging past the bottom Follow-up to the previous fix that stopped the edge auto-scroll hot zone from snapping the selection to the last line while pinned. That left a gap: dragging *past* the last line (down into the empty area below the content-sized chat pane) no longer extended the selection at all, because that overshoot row maps to no line and copy_point_from_screen returned None. Native terminal/browser selection treats dragging past the last line as "select through the end of that line". Add copy_pane_drag_point(), which clamps vertical overshoot to the nearest in-bounds line edge: a drag below the last visible line snaps to the end of that line, and a drag above the first visible line snaps to its start. A direct hit on a real line still yields precise per-cell selection. Use it for both Drag and Up so the boundary line is fully covered during the drag and on release. Adds a regression test that anchors on the last content line, drags straight down past the bottom of the pane with the cursor x only partway through the line, and asserts the whole last line (through its end) is selected without arming autoscroll or scrolling. --- .../jcode-tui/src/tui/app/copy_selection.rs | 17 ++- .../tui/app/tests/scroll_copy_02/part_01.rs | 124 ++++++++++++++++++ crates/jcode-tui/src/tui/ui.rs | 71 ++++++++++ 3 files changed, 208 insertions(+), 4 deletions(-) diff --git a/crates/jcode-tui/src/tui/app/copy_selection.rs b/crates/jcode-tui/src/tui/app/copy_selection.rs index 8df43942b..229db8d32 100644 --- a/crates/jcode-tui/src/tui/app/copy_selection.rs +++ b/crates/jcode-tui/src/tui/app/copy_selection.rs @@ -526,7 +526,14 @@ impl App { } // Left the edge: stop the continuous autoscroll. self.copy_selection_edge_autoscroll = None; - if let Some(point) = point.filter(|point| Some(point.pane) == active_pane) { + // Resolve the drag target, clamping vertical overshoot (e.g. a + // drag into the blank space below the last line) to the nearest + // in-bounds line edge so the boundary line is fully selected, + // just like native terminal/browser selection. + let resolved = active_pane.and_then(|pane| { + crate::tui::ui::copy_pane_drag_point(pane, mouse.column, mouse.row) + }); + if let Some(point) = resolved.filter(|point| Some(point.pane) == active_pane) { self.update_selection_with_point(point, true); } Some(false) @@ -542,9 +549,11 @@ impl App { }; } self.copy_selection_dragging = false; - if let Some(point) = - point.filter(|point| Some(point.pane) == self.current_copy_selection_pane()) - { + let release_pane = self.current_copy_selection_pane(); + let resolved = release_pane.and_then(|pane| { + crate::tui::ui::copy_pane_drag_point(pane, mouse.column, mouse.row) + }); + if let Some(point) = resolved.filter(|point| Some(point.pane) == release_pane) { self.update_selection_with_point(point, true); } if self.copy_selection_mode { diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs index c13cee50d..5f2fdb54d 100644 --- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs @@ -1090,6 +1090,130 @@ fn test_copy_selection_drag_to_bottom_edge_when_pinned_does_not_snap_or_autoscro }); } +#[test] +fn test_copy_selection_drag_below_last_line_fully_selects_last_line() { + // Dragging *past* the last content line (into the empty area below the + // chat pane) should fully select that last line through its end, just like + // native terminal and browser selection. The chat pane is sized to its + // content, so a downward drag that overshoots reports a row at/below the + // bottom boundary that maps to no line at all; that used to silently drop + // the extension so the bottom line could never be fully highlighted. + let _render_lock = scroll_render_test_lock(); + let mut app = create_test_app(); + + let lines = (1..=6) + .map(|idx| format!("line {idx:03}")) + .collect::>() + .join("\n"); + app.display_messages = vec![DisplayMessage { + role: "assistant".to_string(), + content: lines, + tool_calls: vec![], + duration_secs: None, + title: None, + tool_data: None, + }]; + app.bump_display_messages_version(); + app.scroll_offset = 0; + app.auto_scroll_paused = false; + app.is_processing = false; + app.streaming_text.clear(); + app.status = ProcessingStatus::Idle; + + // Tall terminal so there is empty space below the content-sized chat pane. + let backend = ratatui::backend::TestBackend::new(60, 20); + let mut terminal = ratatui::Terminal::new(backend).expect("failed to create test terminal"); + render_and_snap(&app, &mut terminal); + + app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT) + .unwrap(); + + let (visible_start, visible_end) = + crate::tui::ui::copy_viewport_visible_range().expect("visible copy range"); + let line_count = crate::tui::ui::copy_viewport_line_count().expect("line count"); + assert_eq!(visible_end, line_count, "view must be pinned to the bottom"); + + // The last line that maps to a real screen point. + let last_line = (visible_start..visible_end) + .rev() + .find(|&ln| { + crate::tui::ui::copy_viewport_line_text(ln) + .map(|t| unicode_width::UnicodeWidthStr::width(t.as_str()) > 0) + .unwrap_or(false) + }) + .expect("a non-empty visible content line"); + let last_text = crate::tui::ui::copy_viewport_line_text(last_line).unwrap_or_default(); + let last_width = unicode_width::UnicodeWidthStr::width(last_text.as_str()); + + let layout = crate::tui::ui::last_layout_snapshot().expect("layout snapshot"); + let area = layout.messages_area; + + // Anchor on a valid cell at the START of the last content line. + let last_content_row = area.y + (last_line - visible_start) as u16; + let anchor_x = (area.x..area.x + area.width) + .find(|&x| { + crate::tui::ui::copy_viewport_point_from_screen(x, last_content_row) + .map(|p| p.abs_line == last_line) + .unwrap_or(false) + }) + .expect("a screen column mapping to the last content line"); + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Down(MouseButton::Left), + column: anchor_x, + row: last_content_row, + modifiers: KeyModifiers::empty(), + }); + + // Drag straight down, past the bottom of the pane, with the cursor x landing + // partway through (not at the end of) the last line. Even so the whole last + // line should be selected, because we have overshot it vertically. + let mid_x = anchor_x + 1; + let below_row = (area.y + area.height + 2).min(terminal.backend().size().unwrap().height - 1); + assert!( + below_row > last_content_row, + "test must drag strictly below the last content row" + ); + let before_scroll = app.scroll_offset(); + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Drag(MouseButton::Left), + column: mid_x, + row: below_row, + modifiers: KeyModifiers::empty(), + }); + + // No autoscroll (nothing below), and no scroll movement. + assert!( + !crate::tui::TuiState::copy_selection_edge_autoscroll_active(&app), + "edge autoscroll must not arm dragging past the last line" + ); + assert_eq!(app.scroll_offset(), before_scroll, "must not scroll"); + + // The selection should now extend through the END of the last line. + let range = app.normalized_copy_selection().expect("normalized range"); + assert_eq!( + range.end.abs_line, last_line, + "selection should extend to the last content line" + ); + assert_eq!( + range.end.column, last_width, + "selection should cover the full last line (through its end)" + ); + let selected = app + .current_copy_selection_text() + .expect("expected selection text"); + assert!( + selected.contains(last_text.trim_end()), + "selection should include the full last line text: got {selected:?}" + ); + + app.handle_mouse_event(MouseEvent { + kind: MouseEventKind::Up(MouseButton::Left), + column: mid_x, + row: below_row, + modifiers: KeyModifiers::empty(), + }); +} + #[test] fn test_alt_a_copies_chat_viewport_with_context_when_input_empty() { let _render_lock = scroll_render_test_lock(); diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs index a02bdbebb..80e749d3d 100644 --- a/crates/jcode-tui/src/tui/ui.rs +++ b/crates/jcode-tui/src/tui/ui.rs @@ -1653,6 +1653,77 @@ pub(crate) fn copy_pane_vertical_edge_point( copy_point_from_snapshot(&snapshot, clamped_col, edge_row).map(|point| (point, upward)) } +/// Resolve the selection point for a drag at `(column, row)`, clamping vertical +/// overshoot to the nearest in-bounds line edge. +/// +/// Terminals report a drag that "leaves" the pane on the boundary row, but a +/// drag *into the empty space below the last content line* (common with short +/// transcripts that leave blank rows underneath) lands on a row that maps to no +/// line at all, so `copy_point_from_screen` returns `None`. Native terminal and +/// browser selection treat that as "select through the end of the last line". +/// This mirrors that: dragging below the last visible line snaps to the end of +/// that line, and dragging above the first visible line snaps to its start, so +/// the boundary line is fully covered even when there is nothing more to scroll. +pub(crate) fn copy_pane_drag_point( + pane: crate::tui::CopySelectionPane, + column: u16, + row: u16, +) -> Option { + let snapshot = copy_snapshot_for_pane(pane)?; + let area = snapshot.content_area; + if area.width == 0 || area.height == 0 { + return None; + } + + // A direct hit on a real line wins: precise per-cell selection. + if let Some(point) = copy_point_from_snapshot(&snapshot, column, row) { + return Some(point); + } + + let line_count = snapshot.wrapped_plain_line_count(); + if line_count == 0 { + return None; + } + let last_line = line_count.saturating_sub(1); + let last_visible_line = snapshot.visible_end.saturating_sub(1).min(last_line); + let first_visible_line = snapshot.scroll.min(last_line); + + let last_row = area.y.saturating_add(area.height).saturating_sub(1); + let clamped_col = column.clamp(area.x, area.x.saturating_add(area.width).saturating_sub(1)); + + // Below the visible content: snap to the end of the last visible line. + if row >= last_row { + let text = snapshot.wrapped_plain_line(last_visible_line).unwrap_or(""); + return Some(crate::tui::CopySelectionPoint { + pane, + abs_line: last_visible_line, + column: line_display_width(text), + }); + } + + // Above the visible content: snap to the start of the first visible line. + if row <= area.y { + return Some(crate::tui::CopySelectionPoint { + pane, + abs_line: first_visible_line, + column: snapshot.wrapped_copy_offset(first_visible_line).unwrap_or(0), + }); + } + + // Interior row that maps to no line (e.g. a blank gap row between/after + // content within the visible band): fall back to the boundary-clamped point. + copy_point_from_snapshot( + &snapshot, + clamped_col, + row.clamp(area.y, last_row), + ) + .or(Some(crate::tui::CopySelectionPoint { + pane, + abs_line: last_visible_line, + column: line_display_width(snapshot.wrapped_plain_line(last_visible_line).unwrap_or("")), + })) +} + /// Edge point for tick-driven continuous auto-scroll, where there is no live /// mouse position. Uses the top/bottom boundary row of the pane and its left /// content column so the selection keeps extending to the freshly revealed line. From ad0dd6c1d817ab96087ca7b02520fb535db41e93 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:41:28 -0700 Subject: [PATCH 44/57] feat(tui): collapse 'current' reasoning with a height animation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 'current' reasoning-display mode the live dim/italic reasoning used to vanish in a single frame when the answer committed or a tool ran, snapping the transcript upward. Instead, on close the reasoning block is sliced out of the streaming buffer into a dedicated collapsing 'reasoning' display message that height-collapses (ease-out, oldest line first) toward a one-line '▸ thought for Xs' summary, leaving a trace behind. - New ReasoningCollapse state + begin/advance/finalize on App. - Renders via a new 'reasoning' display role (dim+italic, sentinel-stripped). - Redraw loop (local + remote tick, turn loop) advances the animation; redraw policy keeps frames live while collapsing. - Reduced-motion / low-power tiers snap straight to the summary. - Guards drop the animation safely on transcript reset/replace. - Tests: block parsing, summary labels, monotone collapse, finalize, reduced-motion snap, and end-to-end dim/italic render of the role. --- crates/jcode-tui-messages/src/message.rs | 14 ++ crates/jcode-tui/src/tui/app.rs | 34 +++ crates/jcode-tui/src/tui/app/input.rs | 205 +++++++++++++++ crates/jcode-tui/src/tui/app/local.rs | 4 + crates/jcode-tui/src/tui/app/remote.rs | 1 + .../src/tui/app/state_ui_messages.rs | 8 + .../src/tui/app/tests/reasoning_region.rs | 237 ++++++++++++++++-- crates/jcode-tui/src/tui/app/tui_lifecycle.rs | 6 + crates/jcode-tui/src/tui/app/tui_state.rs | 4 + crates/jcode-tui/src/tui/app/turn.rs | 2 + crates/jcode-tui/src/tui/mod.rs | 7 + crates/jcode-tui/src/tui/ui.rs | 4 +- crates/jcode-tui/src/tui/ui_messages.rs | 17 ++ crates/jcode-tui/src/tui/ui_prepare.rs | 28 +++ crates/jcode-tui/src/tui/ui_tests/prepare.rs | 57 +++++ 15 files changed, 602 insertions(+), 26 deletions(-) diff --git a/crates/jcode-tui-messages/src/message.rs b/crates/jcode-tui-messages/src/message.rs index 08eb1941f..b2a7374a4 100644 --- a/crates/jcode-tui-messages/src/message.rs +++ b/crates/jcode-tui-messages/src/message.rs @@ -175,6 +175,20 @@ impl DisplayMessage { } } + /// Create a display-only collapsing reasoning trace ("current" mode). The + /// content is sentinel-wrapped dim/italic markup; this message height-collapses + /// toward a one-line summary and is excluded from provider/model context. + pub fn reasoning(content: impl Into) -> Self { + Self { + role: "reasoning".to_string(), + content: content.into(), + tool_calls: Vec::new(), + duration_secs: None, + title: None, + tool_data: None, + } + } + /// Convert the shared session renderer output into the TUI transcript model. pub fn from_rendered_message(item: RenderedMessage) -> Self { Self { diff --git a/crates/jcode-tui/src/tui/app.rs b/crates/jcode-tui/src/tui/app.rs index 376883b09..447d02ecf 100644 --- a/crates/jcode-tui/src/tui/app.rs +++ b/crates/jcode-tui/src/tui/app.rs @@ -340,6 +340,28 @@ pub enum ProcessingStatus { RunningTool(String), } +/// Live "collapse the current reasoning" animation state. +/// +/// In `current` reasoning-display mode the model's reasoning streams live as +/// dim+italic lines, then must disappear once the answer commits or a tool runs. +/// Instead of deleting every reasoning line in a single frame (a jarring upward +/// jump), the closed reasoning block is moved into a dedicated `"reasoning"` +/// display message that height-collapses toward a one-line summary over a short +/// ease-out, leaving a `▸ thought for Xs` trace behind. +#[derive(Clone, Debug)] +pub(crate) struct ReasoningCollapse { + /// Index into `display_messages` of the `"reasoning"` message being collapsed. + pub(crate) msg_index: usize, + /// One-line dim summary the block collapses down to (markup for + /// "▸ thought for Xs"), always shown at the top of the message. + pub(crate) summary_markup: String, + /// Per-line dim+italic markup for each reasoning line, in order. The block + /// shrinks by dropping leading lines until only `summary_markup` remains. + pub(crate) line_markups: Vec, + /// When the collapse animation started. + pub(crate) started_at: Instant, +} + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum RemoteStartupPhase { StartingServer, @@ -712,6 +734,18 @@ pub struct App { // `streaming_text` (the rendered tail of `reasoning_pending_line`). Truncated // and re-appended on each delta so the in-progress line updates in place. reasoning_partial_len: usize, + // Byte offset in `streaming_text` where the current reasoning block began + // (recorded by `open_reasoning_region`). Used in `current` mode to slice the + // closed reasoning block out of the stream and hand it to the collapse + // animation while keeping any answer text that preceded it in order. + reasoning_block_start: Option, + // Wall-clock instant the current reasoning region opened, used to label the + // collapsed summary ("▸ thought for Xs"). + reasoning_block_started_at: Option, + // Active "collapse the current reasoning" animation (current mode only). While + // set, a `"reasoning"` display message height-collapses toward its one-line + // summary; the redraw loop advances it each frame and finalizes on completion. + reasoning_collapse: Option, // Hot-reload: if set, exec into new binary with this session ID (no rebuild) reload_requested: Option, // Hot-rebuild: if set, do full git pull + cargo build + tests then exec diff --git a/crates/jcode-tui/src/tui/app/input.rs b/crates/jcode-tui/src/tui/app/input.rs index 95f9dec2d..6fe5b2bd9 100644 --- a/crates/jcode-tui/src/tui/app/input.rs +++ b/crates/jcode-tui/src/tui/app/input.rs @@ -50,6 +50,48 @@ pub(super) fn strip_reasoning_lines(content: &str) -> String { result.trim_end().to_string() } +/// Total duration of the "current reasoning collapses away" height animation. +pub(super) const REASONING_COLLAPSE_DURATION: Duration = Duration::from_millis(280); + +/// Split a just-closed reasoning block (sentinel-wrapped dim/italic line markup, +/// as produced by [`jcode_tui_markdown::reasoning_line_markup`]) into one markup +/// string per visible reasoning line. Blank separator lines are dropped so the +/// collapse animates over real thought lines only. +pub(super) fn reasoning_block_line_markups(block: &str) -> Vec { + block + .split_inclusive('\n') + .filter(|segment| segment.contains(jcode_tui_markdown::REASONING_SENTINEL)) + .map(|segment| segment.to_string()) + .collect() +} + +/// One-line dim summary the collapsed reasoning folds into. Includes a `▸` marker +/// and the thinking duration when known (e.g. `▸ thought for 12s`). +pub(super) fn reasoning_summary_markup(line_count: usize, elapsed: Option) -> String { + let label = match elapsed { + Some(d) if d.as_secs() >= 1 => format!("▸ thought for {}s", d.as_secs()), + Some(_) => "▸ thought".to_string(), + None if line_count == 1 => "▸ thought (1 line)".to_string(), + None => format!("▸ thought ({} lines)", line_count), + }; + jcode_tui_markdown::reasoning_line_markup(&label) +} + +/// Build the transcript content for a collapsing `"reasoning"` message: the last +/// `remaining` reasoning lines, or just the summary line once fully collapsed. +pub(super) fn reasoning_message_content( + summary_markup: &str, + line_markups: &[String], + remaining: usize, +) -> String { + if remaining == 0 || line_markups.is_empty() { + return summary_markup.to_string(); + } + let remaining = remaining.min(line_markups.len()); + let start = line_markups.len() - remaining; + line_markups[start..].concat() +} + pub(super) fn edit_input_in_external_editor(app: &mut App) { match edit_text_in_external_editor(&app.input) { Ok(edited) => { @@ -2400,6 +2442,11 @@ impl App { self.reasoning_streaming = true; self.reasoning_pending_line.clear(); self.reasoning_partial_len = 0; + // Remember where this reasoning block starts in the stream so `current` + // mode can later slice it out (without disturbing any preceding answer + // text) and hand it to the collapse animation. + self.reasoning_block_start = Some(self.streaming_text.len()); + self.reasoning_block_started_at = Some(Instant::now()); } /// Remove the live partial-reasoning tail (the rendered, not-yet-committed @@ -2465,6 +2512,17 @@ impl App { .push_str(&jcode_tui_markdown::reasoning_line_markup(&pending)); } self.reasoning_streaming = false; + + // In `current` mode, animate the block away instead of leaving it in the + // stream to be stripped wholesale at commit time. + if matches!( + crate::config::config().display.reasoning_display(), + crate::config::ReasoningDisplayMode::Current + ) { + self.begin_reasoning_collapse(); + return; + } + // Terminate the reasoning block with a blank line so following output // renders as a normal paragraph. if !self.streaming_text.ends_with("\n\n") { @@ -2477,6 +2535,147 @@ impl App { self.refresh_split_view_if_needed(); } + /// Slice the just-closed reasoning block out of `streaming_text` and move it + /// into a dedicated `"reasoning"` display message, then start (or replace) the + /// height-collapse animation. Any answer text streamed *before* the reasoning + /// block is left untouched so ordering is preserved. With decorative + /// animations disabled (reduced motion / low-power tiers) the block is + /// finalized straight to its summary line. + pub(super) fn begin_reasoning_collapse(&mut self) { + let block_start = self.reasoning_block_start.take().unwrap_or(0); + let started_at = self.reasoning_block_started_at.take(); + // Finalize any previous collapse first so its message snaps to its summary + // instead of being orphaned mid-animation. + self.finalize_reasoning_collapse(); + + let block_start = block_start.min(self.streaming_text.len()); + + // Everything from the block start onward is reasoning markup (plus the + // separators inserted by open/close). Take it out of the live stream. + let block: String = self.streaming_text.split_off(block_start); + // Drop a trailing separator the answer-side path would otherwise add. + while self.streaming_text.ends_with('\n') { + self.streaming_text.pop(); + } + self.refresh_split_view_if_needed(); + + let line_markups = reasoning_block_line_markups(&block); + if line_markups.is_empty() { + // Nothing to show (e.g. empty reasoning); just clear state. + self.reasoning_collapse = None; + return; + } + + let elapsed = started_at.map(|t| t.elapsed()); + let summary_markup = reasoning_summary_markup(line_markups.len(), elapsed); + + // Build the committed message content: every reasoning line, then the + // summary as the final line. The renderer reveals a shrinking suffix. + let content = + reasoning_message_content(&summary_markup, &line_markups, line_markups.len()); + + let msg_index = self.display_messages.len(); + self.push_display_message(DisplayMessage::reasoning(content)); + + let decorative = crate::perf::tui_policy().enable_decorative_animations; + if !decorative { + // Reduced motion: snap straight to the one-line summary. + self.replace_display_message_content( + msg_index, + reasoning_message_content(&summary_markup, &line_markups, 0), + ); + self.reasoning_collapse = None; + return; + } + + self.reasoning_collapse = Some(super::ReasoningCollapse { + msg_index, + summary_markup, + line_markups, + started_at: Instant::now(), + }); + } + + /// Advance the active reasoning-collapse animation. Returns `true` when the + /// transcript changed (so the caller should request a redraw). Finalizes to + /// the summary line once the animation completes. + pub(super) fn advance_reasoning_collapse(&mut self) -> bool { + let Some(collapse) = self.reasoning_collapse.as_ref() else { + return false; + }; + + // If the target message moved or was replaced (compaction/rewind), drop the + // animation rather than risk mutating an unrelated message. + if self + .display_messages + .get(collapse.msg_index) + .map(|m| m.role.as_str()) + != Some("reasoning") + { + self.reasoning_collapse = None; + return false; + } + + let total = collapse.line_markups.len(); + let elapsed = collapse.started_at.elapsed(); + let progress = + (elapsed.as_secs_f32() / REASONING_COLLAPSE_DURATION.as_secs_f32()).clamp(0.0, 1.0); + // Ease-out cubic so the block decelerates as it folds away. + let eased = 1.0 - (1.0 - progress).powi(3); + // Number of reasoning lines still visible above the summary. Counts down + // from `total` to 0 (only the summary remains). + let remaining = ((total as f32) * (1.0 - eased)).round() as usize; + let remaining = remaining.min(total); + + let msg_index = collapse.msg_index; + let content = + reasoning_message_content(&collapse.summary_markup, &collapse.line_markups, remaining); + let changed = self.replace_display_message_content(msg_index, content); + + if progress >= 1.0 { + self.reasoning_collapse = None; + } + changed + } + + /// Whether a reasoning-collapse animation is currently running. + pub(super) fn reasoning_collapse_active(&self) -> bool { + self.reasoning_collapse.is_some() + } + + /// Test hook: backdate the active collapse's start so `advance_*` observes a + /// specific elapsed fraction, and return the number of source reasoning lines. + #[cfg(test)] + pub(super) fn backdate_reasoning_collapse_for_test( + &mut self, + elapsed: std::time::Duration, + ) -> Option { + let collapse = self.reasoning_collapse.as_mut()?; + collapse.started_at = Instant::now() + .checked_sub(elapsed) + .unwrap_or_else(Instant::now); + Some(collapse.line_markups.len()) + } + + /// Finalize any in-flight reasoning collapse immediately (snap to summary). + /// Used when the turn ends or state is reset so no animation is left dangling. + pub(super) fn finalize_reasoning_collapse(&mut self) { + if let Some(collapse) = self.reasoning_collapse.take() { + if self + .display_messages + .get(collapse.msg_index) + .map(|m| m.role.as_str()) + == Some("reasoning") + { + let content = + reasoning_message_content(&collapse.summary_markup, &collapse.line_markups, 0); + self.replace_display_message_content(collapse.msg_index, content); + } + } + self.reasoning_block_start = None; + self.reasoning_block_started_at = None; + } + pub(super) fn append_streaming_text(&mut self, text: &str) { if text.is_empty() { return; @@ -2510,6 +2709,10 @@ impl App { self.reasoning_streaming = false; self.reasoning_pending_line.clear(); self.reasoning_partial_len = 0; + // The stream (and any block offset into it) is gone; a running collapse + // targets a separate display message and is left to finish on its own. + self.reasoning_block_start = None; + self.reasoning_block_started_at = None; self.refresh_split_view_if_needed(); self.streaming_md_renderer.borrow_mut().reset(); crate::tui::mermaid::clear_streaming_preview_diagram(); @@ -2521,6 +2724,8 @@ impl App { self.reasoning_streaming = false; self.reasoning_pending_line.clear(); self.reasoning_partial_len = 0; + self.reasoning_block_start = None; + self.reasoning_block_started_at = None; self.refresh_split_view_if_needed(); self.streaming_md_renderer.borrow_mut().reset(); crate::tui::mermaid::clear_streaming_preview_diagram(); diff --git a/crates/jcode-tui/src/tui/app/local.rs b/crates/jcode-tui/src/tui/app/local.rs index b98883f7a..204730a75 100644 --- a/crates/jcode-tui/src/tui/app/local.rs +++ b/crates/jcode-tui/src/tui/app/local.rs @@ -55,6 +55,7 @@ pub(super) async fn process_turn_with_input( pub(super) fn handle_tick(app: &mut App) -> bool { let mut needs_redraw = crate::tui::periodic_redraw_required(app); + needs_redraw |= app.advance_reasoning_collapse(); app.maybe_capture_runtime_memory_heartbeat(); needs_redraw |= app.progress_copy_selection_edge_autoscroll(); app.progress_mouse_scroll_animation(); @@ -472,6 +473,9 @@ pub(super) fn finish_turn(app: &mut App) { app.thought_line_inserted = false; app.thinking_prefix_emitted = false; app.thinking_buffer.clear(); + // Snap any in-flight reasoning collapse straight to its summary so no + // animation is left running once the turn is idle. + app.finalize_reasoning_collapse(); app.note_runtime_memory_event_force("turn_completed", "local_turn_finished"); if !app.schedule_auto_poke_followup_if_needed() && !app.schedule_overnight_poke_followup_if_needed() diff --git a/crates/jcode-tui/src/tui/app/remote.rs b/crates/jcode-tui/src/tui/app/remote.rs index db3d3f8ab..4935da620 100644 --- a/crates/jcode-tui/src/tui/app/remote.rs +++ b/crates/jcode-tui/src/tui/app/remote.rs @@ -75,6 +75,7 @@ pub(super) async fn handle_tick(app: &mut App, remote: &mut RemoteConnection) -> .is_some_and(|state| state.kind == crate::tui::PickerKind::Model), }); let mut needs_redraw = crate::tui::periodic_redraw_required(app); + needs_redraw |= app.advance_reasoning_collapse(); app.maybe_capture_runtime_memory_heartbeat(); needs_redraw |= app.progress_copy_selection_edge_autoscroll(); app.progress_mouse_scroll_animation(); diff --git a/crates/jcode-tui/src/tui/app/state_ui_messages.rs b/crates/jcode-tui/src/tui/app/state_ui_messages.rs index 423366464..4baa11496 100644 --- a/crates/jcode-tui/src/tui/app/state_ui_messages.rs +++ b/crates/jcode-tui/src/tui/app/state_ui_messages.rs @@ -74,6 +74,8 @@ impl App { pub(super) fn replace_display_messages(&mut self, mut messages: Vec) { compact_display_messages_for_storage(&mut messages); + // Indices the collapse animation targets no longer apply to the new list. + self.reasoning_collapse = None; self.display_messages = messages; self.sync_compacted_history_lazy_from_display_messages(); self.bump_display_messages_version(); @@ -336,6 +338,12 @@ impl App { pub(super) fn clear_display_messages(&mut self) { self.compacted_history_lazy = CompactedHistoryLazyState::default(); + // The transcript (and the index the collapse animation targets) is about + // to be discarded; drop any in-flight collapse so it can't mutate a stale + // or unrelated message. + self.reasoning_collapse = None; + self.reasoning_block_start = None; + self.reasoning_block_started_at = None; if !self.display_messages.is_empty() { self.display_messages.clear(); self.bump_display_messages_version(); diff --git a/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs b/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs index 10b5a6075..b4d1ecf76 100644 --- a/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs +++ b/crates/jcode-tui/src/tui/app/tests/reasoning_region.rs @@ -17,20 +17,47 @@ fn reasoning_region_emits_dim_italic_lines_no_gutter_header_or_footer() { app.open_reasoning_region(); app.append_reasoning_text("Let me think.\nSecond thought."); - app.close_reasoning_region(None); - - let text = app.streaming_text(); - assert!(!text.contains("Thinking"), "no header expected: {text:?}"); - assert!(!text.contains('>'), "no blockquote gutter expected: {text:?}"); - assert!(!text.contains("Thought for"), "no footer expected: {text:?}"); + // While streaming, reasoning is dim+italic markup in the live stream buffer. + let streaming = app.streaming_text().to_string(); + assert!( + !streaming.contains("Thinking"), + "no header expected: {streaming:?}" + ); + assert!( + !streaming.contains('>'), + "no blockquote gutter expected: {streaming:?}" + ); + assert!( + !streaming.contains("Thought for"), + "no footer expected: {streaming:?}" + ); let sentinel = jcode_tui_markdown::REASONING_SENTINEL; assert!( - text.contains(&format!("*{sentinel}Let me think.{sentinel}*")), - "first line not dim+italic: {text:?}" + streaming.contains(&format!("*{sentinel}Let me think.{sentinel}*")), + "first line not dim+italic: {streaming:?}" ); assert!( - text.contains(&format!("*{sentinel}Second thought.{sentinel}*")), - "second line not dim+italic: {text:?}" + streaming.contains(&format!("*{sentinel}Second thought.{sentinel}*")), + "second line not dim+italic: {streaming:?}" + ); + + // In `current` mode (the default), closing moves the block into a dedicated + // collapsing `"reasoning"` display message and clears it from the stream. + app.close_reasoning_region(None); + assert!( + app.streaming_text().is_empty(), + "reasoning should leave the live stream once collapsed: {:?}", + app.streaming_text() + ); + let reasoning_msg = app + .display_messages + .iter() + .find(|m| m.role == "reasoning") + .expect("reasoning message present"); + assert!( + reasoning_msg.content.contains(sentinel), + "reasoning message keeps dim+italic markup: {:?}", + reasoning_msg.content ); } @@ -44,7 +71,12 @@ fn reasoning_region_closes_before_normal_output() { app.close_reasoning_region(None); app.append_streaming_text("Final answer."); + // The answer stays in the live stream and must never be styled as reasoning. let text = app.streaming_text(); + assert!( + text.contains("Final answer."), + "answer present in stream: {text:?}" + ); let answer_line = text .lines() .find(|l| l.contains("Final answer.")) @@ -53,9 +85,14 @@ fn reasoning_region_closes_before_normal_output() { !answer_line.contains(jcode_tui_markdown::REASONING_SENTINEL), "final answer must not be styled as reasoning: {answer_line:?}" ); + // The reasoning collapsed into its own message; it is no longer in the stream. assert!( - text.contains("\n\nFinal answer."), - "missing blank-line separator before output: {text:?}" + !text.contains(jcode_tui_markdown::REASONING_SENTINEL), + "reasoning must not remain in the answer stream: {text:?}" + ); + assert!( + app.display_messages.iter().any(|m| m.role == "reasoning"), + "a collapsing reasoning message should exist" ); } @@ -94,11 +131,18 @@ fn reasoning_line_split_across_deltas_stays_one_run() { app.append_reasoning_text("two\n"); app.close_reasoning_region(None); - let text = app.streaming_text(); + // The split-across-deltas line is committed as a single emphasis run in the + // collapsed reasoning message. + let content = app + .display_messages + .iter() + .find(|m| m.role == "reasoning") + .map(|m| m.content.clone()) + .expect("reasoning message present"); let sentinel = jcode_tui_markdown::REASONING_SENTINEL; assert!( - text.contains(&format!("*{sentinel}one two{sentinel}*")), - "split line must be one emphasis run: {text:?}" + content.contains(&format!("*{sentinel}one two{sentinel}*")), + "split line must be one emphasis run: {content:?}" ); } @@ -112,7 +156,15 @@ fn reasoning_region_renders_dim_italic_text_without_gutter() { app.append_reasoning_text("considering options\n"); app.close_reasoning_region(None); - let lines = crate::tui::markdown::render_markdown_with_width(app.streaming_text(), Some(80)); + // In `current` mode the reasoning now lives in a dedicated collapsing message. + let reasoning_content = app + .display_messages + .iter() + .find(|m| m.role == "reasoning") + .map(|m| m.content.clone()) + .expect("reasoning message present"); + + let lines = crate::tui::markdown::render_markdown_with_width(&reasoning_content, Some(80)); let body = lines .iter() .find(|l| { @@ -248,7 +300,7 @@ fn reasoning_partial_promotes_to_committed_line_on_newline() { #[test] fn reasoning_close_promotes_pending_partial_line() { // Closing the region with an in-progress (no-newline) partial promotes it to a - // committed line exactly once. + // committed line exactly once, then collapses into the reasoning message. let mut app = create_test_app(); let sentinel = jcode_tui_markdown::REASONING_SENTINEL; @@ -256,15 +308,152 @@ fn reasoning_close_promotes_pending_partial_line() { app.append_reasoning_text("final thought"); app.close_reasoning_region(None); - let text = app.streaming_text(); + // The live stream no longer carries the reasoning; it moved into its message. + assert!( + app.streaming_text().is_empty(), + "reasoning should leave the live stream once collapsed: {:?}", + app.streaming_text() + ); + let content = app + .display_messages + .iter() + .find(|m| m.role == "reasoning") + .map(|m| m.content.clone()) + .expect("reasoning message present"); assert_eq!( - text.matches(&format!("*{sentinel}final thought{sentinel}*")) + content + .matches(&format!("*{sentinel}final thought{sentinel}*")) .count(), 1, - "pending partial promoted exactly once on close: {text:?}" - ); - assert!( - text.ends_with("\n\n"), - "region terminated with blank line: {text:?}" + "pending partial promoted exactly once on close: {content:?}" ); } + +#[test] +fn reasoning_block_line_markups_keeps_only_sentinel_lines() { + use crate::tui::app::input::{reasoning_block_line_markups, reasoning_message_content}; + + let mut block = String::new(); + block.push_str(&jcode_tui_markdown::reasoning_line_markup("alpha")); + block.push('\n'); // a blank separator line (no sentinel) + block.push_str(&jcode_tui_markdown::reasoning_line_markup("beta")); + + let lines = reasoning_block_line_markups(&block); + assert_eq!(lines.len(), 2, "blank separators are dropped: {lines:?}"); + let sentinel = jcode_tui_markdown::REASONING_SENTINEL; + assert!(lines[0].contains(&format!("{sentinel}alpha{sentinel}"))); + assert!(lines[1].contains(&format!("{sentinel}beta{sentinel}"))); + + // Full content shows every line; remaining==0 shows only the summary. + let summary = jcode_tui_markdown::reasoning_line_markup("▸ thought"); + let full = reasoning_message_content(&summary, &lines, lines.len()); + assert!(full.contains("alpha") && full.contains("beta")); + let collapsed = reasoning_message_content(&summary, &lines, 0); + assert!(collapsed.contains("▸ thought")); + assert!(!collapsed.contains("alpha") && !collapsed.contains("beta")); + + // A partial reveal keeps the *trailing* lines (oldest fold away first). + let partial = reasoning_message_content(&summary, &lines, 1); + assert!(partial.contains("beta"), "trailing line kept: {partial:?}"); + assert!(!partial.contains("alpha"), "leading line folded: {partial:?}"); +} + +#[test] +fn reasoning_summary_markup_uses_duration_when_known() { + use crate::tui::app::input::reasoning_summary_markup; + use std::time::Duration; + + let with_secs = reasoning_summary_markup(3, Some(Duration::from_secs(12))); + assert!(with_secs.contains("▸ thought for 12s"), "{with_secs:?}"); + + let no_time = reasoning_summary_markup(4, None); + assert!(no_time.contains("▸ thought (4 lines)"), "{no_time:?}"); +} + +#[test] +fn reasoning_collapse_finalizes_to_single_summary_line() { + let mut app = create_test_app(); + + app.open_reasoning_region(); + app.append_reasoning_text("first\nsecond\nthird\n"); + app.close_reasoning_region(None); + + assert!(app.reasoning_collapse_active(), "collapse should start"); + + // Snapping finalizes the message to just the summary line. + app.finalize_reasoning_collapse(); + assert!(!app.reasoning_collapse_active(), "collapse cleared on finalize"); + + let content = app + .display_messages + .iter() + .find(|m| m.role == "reasoning") + .map(|m| m.content.clone()) + .expect("reasoning message present"); + assert!(content.contains("▸ thought"), "summary present: {content:?}"); + assert!(!content.contains("first"), "lines folded away: {content:?}"); + assert!(!content.contains("third"), "lines folded away: {content:?}"); +} + +#[test] +fn reasoning_collapse_drops_when_target_message_replaced() { + let mut app = create_test_app(); + + app.open_reasoning_region(); + app.append_reasoning_text("thinking\n"); + app.close_reasoning_region(None); + assert!(app.reasoning_collapse_active()); + + // A transcript reset must invalidate the animation target safely. + app.clear_display_messages(); + assert!(!app.reasoning_collapse_active()); + // Advancing now is a no-op and must not panic. + assert!(!app.advance_reasoning_collapse()); +} + +#[test] +fn reasoning_collapse_visible_lines_shrink_monotonically_over_time() { + use crate::tui::app::input::REASONING_COLLAPSE_DURATION; + use std::time::Duration; + + let mut app = create_test_app(); + app.open_reasoning_region(); + app.append_reasoning_text("l1\nl2\nl3\nl4\nl5\nl6\n"); + app.close_reasoning_region(None); + let sentinel = jcode_tui_markdown::REASONING_SENTINEL; + + let count_visible = |app: &App| -> usize { + app.display_messages + .iter() + .find(|m| m.role == "reasoning") + .map(|m| { + m.content + .split_inclusive('\n') + .filter(|seg| seg.contains(sentinel)) + .filter(|seg| !seg.contains('▸')) + .count() + }) + .unwrap_or(0) + }; + + // Sample the eased timeline; visible reasoning lines must never increase and + // must reach a single summary line (0 source lines) at/after the duration. + let dur = REASONING_COLLAPSE_DURATION; + let mut prev = usize::MAX; + for frac in [0.0_f32, 0.25, 0.5, 0.75, 1.0] { + let elapsed = Duration::from_secs_f32(dur.as_secs_f32() * frac); + app.backdate_reasoning_collapse_for_test(elapsed) + .expect("collapse active"); + app.advance_reasoning_collapse(); + let visible = count_visible(&app); + assert!( + visible <= prev, + "visible lines must not increase: frac={frac} visible={visible} prev={prev}" + ); + prev = visible; + } + + // Past the duration the animation is finalized to the summary only. + assert!(!app.reasoning_collapse_active(), "collapse should finish"); + assert_eq!(count_visible(&app), 0, "only the summary line remains"); +} diff --git a/crates/jcode-tui/src/tui/app/tui_lifecycle.rs b/crates/jcode-tui/src/tui/app/tui_lifecycle.rs index 3c925d445..d5238330f 100644 --- a/crates/jcode-tui/src/tui/app/tui_lifecycle.rs +++ b/crates/jcode-tui/src/tui/app/tui_lifecycle.rs @@ -371,6 +371,9 @@ impl App { reasoning_streaming: false, reasoning_pending_line: String::new(), reasoning_partial_len: 0, + reasoning_block_start: None, + reasoning_block_started_at: None, + reasoning_collapse: None, reload_requested: None, rebuild_requested: None, update_requested: None, @@ -771,6 +774,9 @@ impl App { reasoning_streaming: false, reasoning_pending_line: String::new(), reasoning_partial_len: 0, + reasoning_block_start: None, + reasoning_block_started_at: None, + reasoning_collapse: None, reload_requested: None, rebuild_requested: None, update_requested: None, diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs index f4e90b82d..9a0f60a0b 100644 --- a/crates/jcode-tui/src/tui/app/tui_state.rs +++ b/crates/jcode-tui/src/tui/app/tui_state.rs @@ -596,6 +596,10 @@ impl crate::tui::TuiState for App { self.mouse_scroll_queue != 0 } + fn reasoning_collapse_animating(&self) -> bool { + self.reasoning_collapse_active() + } + fn total_session_tokens(&self) -> Option<(u64, u64)> { // In remote mode, use tokens from server // Independent mode doesn't currently track total tokens diff --git a/crates/jcode-tui/src/tui/app/turn.rs b/crates/jcode-tui/src/tui/app/turn.rs index f5b9c5a8c..6e63434d5 100644 --- a/crates/jcode-tui/src/tui/app/turn.rs +++ b/crates/jcode-tui/src/tui/app/turn.rs @@ -268,6 +268,8 @@ impl App { if let Some(chunk) = self.stream_buffer.flush_smooth_frame() { self.append_streaming_text(&chunk); } + // Advance the "current reasoning collapses away" animation. + self.advance_reasoning_collapse(); // Poll for background compaction completion during streaming self.poll_compaction_completion(); status_spinner_renderer.draw_full(self, terminal)?; diff --git a/crates/jcode-tui/src/tui/mod.rs b/crates/jcode-tui/src/tui/mod.rs index 5cf53e0b3..9556c23eb 100644 --- a/crates/jcode-tui/src/tui/mod.rs +++ b/crates/jcode-tui/src/tui/mod.rs @@ -217,6 +217,11 @@ pub trait TuiState { fn has_pending_mouse_scroll_animation(&self) -> bool { false } + /// Whether a "current reasoning collapses away" animation is in progress and + /// the redraw loop must keep ticking to advance it. + fn reasoning_collapse_animating(&self) -> bool { + false + } /// Optional configured keybinding label for external dictation. fn dictation_key_label(&self) -> Option; /// Time since app started (for startup animations) @@ -1282,6 +1287,7 @@ pub(crate) fn redraw_interval_with_policy( || !state.streaming_text().is_empty() || state.status_notice().is_some() || state.has_pending_mouse_scroll_animation() + || state.reasoning_collapse_animating() || state.copy_selection_edge_autoscroll_active() || state.has_notification() || rate_limit_countdown_redraw_active(state) @@ -1341,6 +1347,7 @@ pub(crate) fn periodic_redraw_required(state: &dyn TuiState) -> bool { || !state.streaming_text().is_empty() || state.status_notice().is_some() || state.has_pending_mouse_scroll_animation() + || state.reasoning_collapse_animating() || state.copy_selection_edge_autoscroll_active() || state.chat_overscroll_active() || state.has_notification() diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs index 80e749d3d..b0a180777 100644 --- a/crates/jcode-tui/src/tui/ui.rs +++ b/crates/jcode-tui/src/tui/ui.rs @@ -148,8 +148,8 @@ use memory_ui::{group_into_tiles, render_memory_tiles, split_by_display_width}; use messages::get_cached_message_lines; #[cfg_attr(test, allow(unused_imports))] pub(crate) use messages::{ - render_assistant_message, render_background_task_message, render_swarm_message, - render_system_message, render_tool_message, render_usage_message, + render_assistant_message, render_background_task_message, render_reasoning_message, + render_swarm_message, render_system_message, render_tool_message, render_usage_message, }; pub use pinned_ui::{ SidePanelDebugStats, SidePanelMermaidProbe, SidePanelMermaidProbeRect, diff --git a/crates/jcode-tui/src/tui/ui_messages.rs b/crates/jcode-tui/src/tui/ui_messages.rs index 4f9eb9eb8..a7f838124 100644 --- a/crates/jcode-tui/src/tui/ui_messages.rs +++ b/crates/jcode-tui/src/tui/ui_messages.rs @@ -71,6 +71,23 @@ pub(crate) fn render_assistant_message( lines } +/// Render a collapsed/collapsing reasoning trace ("current" mode). The content is +/// sentinel-wrapped dim+italic markup (reasoning lines and/or a `▸ thought for Xs` +/// summary), so it reuses the standard markdown path that styles those runs dim. +pub(crate) fn render_reasoning_message( + msg: &DisplayMessage, + width: u16, + _diff_mode: crate::config::DiffDisplayMode, +) -> Vec> { + let centered = markdown::center_code_blocks(); + let wrap_width = centered_wrap_width(width, centered, 96); + let mut lines = markdown::render_markdown_with_width(&msg.content, Some(wrap_width)); + if centered { + left_pad_lines_for_centered_mode(&mut lines, width); + } + lines +} + fn render_assistant_tool_call_lines( tool_calls: &[String], width: usize, diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs index 5fc6f4dd2..49d3bdee6 100644 --- a/crates/jcode-tui/src/tui/ui_prepare.rs +++ b/crates/jcode-tui/src/tui/ui_prepare.rs @@ -918,6 +918,20 @@ pub(super) fn prepare_body_incremental( new_line_copy_offsets.push(0); } } + "reasoning" => { + let content_width = width.saturating_sub(4); + let cached = get_cached_message_lines( + msg, + content_width, + app.diff_mode(), + render_reasoning_message, + ); + for line in cached { + new_lines.push(align_if_unset(line, align)); + new_line_raw_overrides.push(None); + new_line_copy_offsets.push(0); + } + } "background_task" => { let content_width = width.saturating_sub(4); let cached = get_cached_message_lines( @@ -1388,6 +1402,20 @@ pub(super) fn prepare_body( line_copy_offsets.push(0); } } + "reasoning" => { + let content_width = width.saturating_sub(4); + let cached = get_cached_message_lines( + msg, + content_width, + app.diff_mode(), + render_reasoning_message, + ); + for line in cached { + lines.push(align_if_unset(line, align)); + line_raw_overrides.push(None); + line_copy_offsets.push(0); + } + } "background_task" => { let content_width = width.saturating_sub(4); let cached = get_cached_message_lines( diff --git a/crates/jcode-tui/src/tui/ui_tests/prepare.rs b/crates/jcode-tui/src/tui/ui_tests/prepare.rs index 2b4f0dd1d..cf89a08bc 100644 --- a/crates/jcode-tui/src/tui/ui_tests/prepare.rs +++ b/crates/jcode-tui/src/tui/ui_tests/prepare.rs @@ -738,3 +738,60 @@ fn test_render_tool_message_batch_subcall_lines_alignment_unset() { } crate::tui::markdown::set_center_code_blocks(false); } + +#[test] +fn test_prepare_messages_renders_reasoning_role_dim_italic_without_sentinel() { + let _guard = crate::storage::lock_test_env(); + clear_test_render_state_for_tests(); + + // A collapsing reasoning message carries sentinel-wrapped dim/italic markup. + let mut content = String::new(); + content.push_str(&jcode_tui_markdown::reasoning_line_markup("weighing the options")); + content.push_str(&jcode_tui_markdown::reasoning_line_markup("▸ thought for 3s")); + + let state = TestState { + display_messages: vec![ + DisplayMessage::user("hi"), + DisplayMessage::reasoning(content), + ], + ..Default::default() + }; + + let prepared = prepare::prepare_messages(&state, 100, 30); + let lines = prepared.materialize_all_lines(); + + // The visible reasoning body is present, dim+italic, and sentinel-free. + let body = lines + .iter() + .find(|l| { + let joined: String = l.spans.iter().map(|s| s.content.as_ref()).collect(); + joined.contains("weighing the options") + }) + .expect("reasoning body line present"); + let rendered: String = body.spans.iter().map(|s| s.content.as_ref()).collect(); + assert!( + !rendered.contains(jcode_tui_markdown::REASONING_SENTINEL), + "sentinel must be stripped from visible reasoning: {rendered:?}" + ); + let span = body + .spans + .iter() + .find(|s| s.content.as_ref().contains("weighing")) + .expect("body span"); + assert!( + span.style + .add_modifier + .contains(ratatui::style::Modifier::ITALIC), + "reasoning body should be italic: {:?}", + span.style + ); + + // The summary line is present too. + assert!( + lines.iter().any(|l| { + let joined: String = l.spans.iter().map(|s| s.content.as_ref()).collect(); + joined.contains("thought for 3s") + }), + "summary line should render" + ); +} From bfa62b451799ed3ae2df2a48920f9e2369251cf4 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:42:58 -0700 Subject: [PATCH 45/57] perf(tui): maintain display-message counters incrementally on append bump_display_messages_version recomputed display_user_message_count and display_edit_tool_message_count by scanning all display messages twice on every mutation. Appending one message at a time over a long session made counter maintenance cumulatively O(M^2). The hot append path now folds the single new message into the cached counters (O(1)) and bumps the version without a full rescan; rarer bulk/remove/replace paths still recompute fully. Add a test asserting the incrementally-maintained counters match a full recompute after interleaved pushes and removes. --- crates/jcode-tui/src/tui/app/state_ui.rs | 42 ++++++++++--- .../src/tui/app/state_ui_messages.rs | 7 ++- .../tests/remote_events_reload_02/part_02.rs | 61 +++++++++++++++++++ 3 files changed, 102 insertions(+), 8 deletions(-) diff --git a/crates/jcode-tui/src/tui/app/state_ui.rs b/crates/jcode-tui/src/tui/app/state_ui.rs index 03ca7df41..e2edf6817 100644 --- a/crates/jcode-tui/src/tui/app/state_ui.rs +++ b/crates/jcode-tui/src/tui/app/state_ui.rs @@ -34,16 +34,37 @@ impl App { self.display_edit_tool_message_count = self .display_messages .iter() - .filter(|message| { - message - .tool_data - .as_ref() - .map(|tool| tools_ui::is_edit_tool_name(&tool.name)) - .unwrap_or(false) - }) + .filter(|message| Self::display_message_is_edit_tool(message)) .count(); } + /// Whether a single display message counts as an edit-tool message for the + /// incrementally-maintained `display_edit_tool_message_count`. + fn display_message_is_edit_tool(message: &DisplayMessage) -> bool { + message + .tool_data + .as_ref() + .map(|tool| tools_ui::is_edit_tool_name(&tool.name)) + .unwrap_or(false) + } + + /// Fold a single message into the cached display-message counters with the + /// given sign (+1 when added, -1 when removed). This keeps the counters + /// O(1) per mutation instead of rescanning the whole transcript via + /// `recompute_display_message_stats`, which made appending M messages one at + /// a time cumulatively O(M^2). + pub(super) fn adjust_display_message_stats(&mut self, message: &DisplayMessage, added: bool) { + let delta: isize = if added { 1 } else { -1 }; + if message.effective_role() == "user" { + self.display_user_message_count = + (self.display_user_message_count as isize + delta).max(0) as usize; + } + if Self::display_message_is_edit_tool(message) { + self.display_edit_tool_message_count = + (self.display_edit_tool_message_count as isize + delta).max(0) as usize; + } + } + pub(super) fn active_client_session_id(&self) -> Option<&str> { if self.is_remote { self.remote_session_id.as_deref() @@ -85,6 +106,13 @@ impl App { pub(super) fn bump_display_messages_version(&mut self) { self.recompute_display_message_stats(); + self.bump_display_messages_version_no_stats(); + } + + /// Bump the display-messages version without rescanning the transcript to + /// recompute counters. Callers that have already maintained the cached + /// counters incrementally (e.g. a single append) use this to stay O(1). + pub(super) fn bump_display_messages_version_no_stats(&mut self) { self.display_messages_version = self.display_messages_version.wrapping_add(1); self.bump_context_revision(); self.refresh_split_view_if_needed(); diff --git a/crates/jcode-tui/src/tui/app/state_ui_messages.rs b/crates/jcode-tui/src/tui/app/state_ui_messages.rs index 4baa11496..906e4cc68 100644 --- a/crates/jcode-tui/src/tui/app/state_ui_messages.rs +++ b/crates/jcode-tui/src/tui/app/state_ui_messages.rs @@ -65,8 +65,13 @@ impl App { return; } let is_tool = message.role == "tool"; + // Maintain the cached display-message counters incrementally for this + // single append, then bump the version without a full O(M) rescan. + // Appending is the hot path; rescanning every append was O(M^2) over a + // long session. + self.adjust_display_message_stats(&message, true); self.display_messages.push(message); - self.bump_display_messages_version(); + self.bump_display_messages_version_no_stats(); if is_tool && self.diff_mode.has_side_pane() && self.diff_pane_auto_scroll { self.diff_pane_scroll = usize::MAX; } diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs index 58caa3c64..965603b5d 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_02.rs @@ -169,6 +169,67 @@ fn test_remove_display_message_bumps_version() { assert_ne!(app.display_messages_version, before); } +#[test] +fn test_incremental_display_message_counts_match_full_recompute() { + let mut app = create_test_app(); + + // Interleave user, assistant, and edit-tool messages via the public append + // path, which now maintains the counters incrementally instead of + // rescanning the whole transcript. + for i in 0..50 { + app.push_display_message(DisplayMessage::user(format!("prompt {i}"))); + app.push_display_message(DisplayMessage::assistant(format!("reply {i}"))); + if i % 3 == 0 { + app.push_display_message(DisplayMessage { + role: "tool".to_string(), + content: format!("edited file {i}"), + tool_calls: vec![], + duration_secs: None, + title: None, + tool_data: Some(crate::message::ToolCall { + id: format!("edit-{i}"), + name: "edit".to_string(), + input: serde_json::json!({"file_path": format!("src/file_{i}.rs")}), + intent: None, + thought_signature: None, + }), + }); + } + } + + // Remove a few messages to exercise the decrement path. + app.remove_display_message(0); + app.remove_display_message(5); + + let incremental_user = app.display_user_message_count; + let incremental_edit = app.display_edit_tool_message_count; + + let expected_user = app + .display_messages + .iter() + .filter(|m| m.effective_role() == "user") + .count(); + let expected_edit = app + .display_messages + .iter() + .filter(|m| { + m.tool_data + .as_ref() + .map(|tool| crate::tui::ui::tools_ui::is_edit_tool_name(&tool.name)) + .unwrap_or(false) + }) + .count(); + + assert_eq!( + incremental_user, expected_user, + "incrementally-maintained user count should match a full recompute" + ); + assert_eq!( + incremental_edit, expected_edit, + "incrementally-maintained edit-tool count should match a full recompute" + ); +} + #[test] fn test_handle_remote_disconnect_retryable_pending_schedules_retry() { let mut app = create_test_app(); From 6b8b63dfd42fb6253db8564492524b0fd5cc8147 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:53:48 -0700 Subject: [PATCH 46/57] fix(antigravity/gemini): recover from intermittent Gemini-3 MALFORMED_FUNCTION_CALL Gemini-3 thinking models intermittently emit Python-style pseudo-code (e.g. print(default_api.read(...))) instead of a clean functionCall, which the Cloud Code backend rejects with finish_reason=MALFORMED_FUNCTION_CALL and empty content. Previously the runtime ended the turn with a silent empty MessageEnd, so the agent looked like it stalled with no answer. For gemini-3.1-pro-high this hit roughly half of tool turns. Three layered mitigations (per Gemini function-calling guidance / field reports): 1. Prevention: when tools are advertised, append a 'Function calling' guard to the Gemini system prompt forbidding code/namespaces (build_system_instruction_with_tool_guard). 2. Transparent retry: detect a malformed empty turn (is_retryable_empty_turn) and re-request up to twice before surfacing anything, so the agent never sees the blip. Retries force function-calling mode ANY so the model must emit a real functionCall instead of pseudo-code. 3. Surfacing: if output is still empty after retries, emit an actionable error (with the finish_reason and finishMessage) instead of a silent empty turn. Also surfaces the previously-hidden finishMessage for diagnosis. Measured on the live Antigravity backend: gemini-3.1-pro-high tool-call success went from ~50% to ~7/8 (remaining miss was a probe-deadline timeout, not malformed). Unit tests cover the guard and the retry classifier. --- crates/jcode-base/src/provider/antigravity.rs | 134 +++++++++++++++++- .../src/provider/antigravity_tests.rs | 53 +++++++ crates/jcode-base/src/provider/gemini.rs | 74 +++++++++- .../jcode-base/src/provider/gemini_tests.rs | 32 +++++ 4 files changed, 290 insertions(+), 3 deletions(-) diff --git a/crates/jcode-base/src/provider/antigravity.rs b/crates/jcode-base/src/provider/antigravity.rs index e64f09870..6954f1d10 100644 --- a/crates/jcode-base/src/provider/antigravity.rs +++ b/crates/jcode-base/src/provider/antigravity.rs @@ -676,6 +676,7 @@ impl AntigravityProvider { tools: &[ToolDefinition], system: &str, resume_session_id: Option<&str>, + force_function_call: bool, ) -> Result { let mut tokens = antigravity_auth::load_or_refresh_tokens().await?; let project = match tokens @@ -714,13 +715,23 @@ impl AntigravityProvider { user_prompt_id: Uuid::new_v4().to_string(), request: VertexGenerateContentRequest { contents: super::gemini::build_contents(messages), - system_instruction: super::gemini::build_system_instruction(system), + system_instruction: super::gemini::build_system_instruction_with_tool_guard( + system, + !tools_is_empty, + ), tools, tool_config: if tools_is_empty { None } else { + // On a transparent retry after a MALFORMED_FUNCTION_CALL, force + // function-calling mode `ANY` so the model must emit a real + // functionCall instead of the Python-style pseudo-code that + // triggered the malformed turn (the proven recovery for this + // failure mode). Normal turns use `AUTO`. Some(GeminiToolConfig { - function_calling_config: GeminiFunctionCallingConfig { mode: "AUTO" }, + function_calling_config: GeminiFunctionCallingConfig { + mode: if force_function_call { "ANY" } else { "AUTO" }, + }, }) }, session_id: resume_session_id @@ -805,6 +816,51 @@ fn model_is_claude(model: &str) -> bool { model.trim().to_ascii_lowercase().contains("claude") } +/// Whether a `generateContent` response is an abnormal turn that produced no +/// usable output (no text, no function call). This is the shape Gemini-3 +/// "thinking" models intermittently return when they emit Python-style +/// pseudo-code instead of a clean functionCall: `finish_reason == +/// MALFORMED_FUNCTION_CALL` (or another non-terminal reason) with empty content. +/// Such a turn is worth one transparent retry before surfacing an error. +/// +/// Normal terminal reasons (`STOP`, `MAX_TOKENS`, unspecified) are never treated +/// as retryable here, even with empty content, so a legitimately empty answer is +/// not retried in a loop. +fn is_retryable_empty_turn(response: &CodeAssistGenerateResponse) -> bool { + let Some(candidate) = response + .response + .as_ref() + .and_then(|r| r.candidates.as_ref()) + .and_then(|c| c.first()) + else { + // No candidate at all is handled separately (hard error), not retried here. + return false; + }; + let produced_output = candidate + .content + .as_ref() + .map(|content| { + content.parts.iter().any(|part| { + part.function_call.is_some() + || part.text.as_deref().is_some_and(|text| !text.is_empty()) + }) + }) + .unwrap_or(false); + if produced_output { + return false; + } + candidate + .finish_reason + .as_deref() + .map(|reason| { + !matches!( + reason.to_ascii_uppercase().as_str(), + "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | "" + ) + }) + .unwrap_or(false) +} + /// Remap model ids that the Antigravity catalog advertises but the /// `generateContent`/`streamGenerateContent` backend cannot actually service, /// onto an equivalent id that works. @@ -996,6 +1052,7 @@ impl Provider for AntigravityProvider { &tools, &system, resume_session_id.as_deref(), + false, ) .await { @@ -1005,6 +1062,36 @@ impl Provider for AntigravityProvider { return; } }; + // Gemini-3 thinking models intermittently return an empty + // `MALFORMED_FUNCTION_CALL` turn (pseudo-code instead of a clean + // functionCall). It is transient, so transparently re-request a few + // times before surfacing it; this turns a frequent hard failure into a + // near-always-successful turn without the agent loop seeing the blip. + // The retries force function-calling mode `ANY` so the model must emit + // a real functionCall rather than the pseudo-code that failed. + let mut response = response; + let mut malformed_retries = 0u8; + const MAX_MALFORMED_RETRIES: u8 = 2; + while is_retryable_empty_turn(&response) && malformed_retries < MAX_MALFORMED_RETRIES { + malformed_retries += 1; + match provider + .generate_content( + &model, + &messages, + &tools, + &system, + resume_session_id.as_deref(), + true, + ) + .await + { + Ok(retried) => response = retried, + Err(err) => { + let _ = tx.send(Err(err)).await; + return; + } + } + } let _ = tx .send(Ok(StreamEvent::ConnectionPhase { phase: ConnectionPhase::Streaming, @@ -1036,6 +1123,13 @@ impl Provider for AntigravityProvider { .await; return; }; + // Track whether this candidate produced any usable output (text or a + // tool call). Gemini-3 thinking models intermittently emit Python-style + // pseudo-code instead of a clean functionCall and finish with + // `MALFORMED_FUNCTION_CALL` (or a bare `OTHER`) and empty content. If we + // silently end the turn the agent loop looks like it stalled with no + // answer, so we surface an actionable error below instead. + let mut produced_output = false; if let Some(content) = candidate.content { // Gemini 3 attaches a `thoughtSignature` to function-call parts // (and occasionally to a standalone preceding part). Emit tool @@ -1052,9 +1146,11 @@ impl Provider for AntigravityProvider { .filter(|sig| !sig.is_empty()) .cloned(); if let Some(text) = part.text.filter(|text| !text.is_empty()) { + produced_output = true; let _ = tx.send(Ok(StreamEvent::TextDelta(text))).await; } if let Some(function_call) = part.function_call { + produced_output = true; let signature = part_signature.clone().or_else(|| pending_signature.take()); let raw_call_id = function_call .id @@ -1096,6 +1192,40 @@ impl Provider for AntigravityProvider { } } + // An abnormal finish (typically Gemini-3's intermittent + // `MALFORMED_FUNCTION_CALL`, where the model writes pseudo-code rather + // than a valid functionCall) that yielded no text and no tool call is a + // dead turn: surface it as a retryable error instead of a silent empty + // `MessageEnd` that looks like the agent gave up. `STOP`/`MAX_TOKENS` + // are normal terminal reasons and are left to flow through as usual. + if !produced_output { + let abnormal = candidate + .finish_reason + .as_deref() + .map(|reason| { + !matches!( + reason.to_ascii_uppercase().as_str(), + "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | "" + ) + }) + .unwrap_or(false); + if abnormal { + let reason = candidate.finish_reason.as_deref().unwrap_or("unknown"); + let detail = candidate + .finish_message + .as_deref() + .filter(|msg| !msg.trim().is_empty()) + .map(|msg| format!(": {}", crate::util::truncate_str(msg.trim(), 300))) + .unwrap_or_default(); + let _ = tx + .send(Err(anyhow::anyhow!( + "Antigravity returned no usable output (finish_reason={reason}){detail}" + ))) + .await; + return; + } + } + let _ = tx .send(Ok(StreamEvent::MessageEnd { stop_reason: candidate.finish_reason.clone(), diff --git a/crates/jcode-base/src/provider/antigravity_tests.rs b/crates/jcode-base/src/provider/antigravity_tests.rs index 75c11a05d..dddf23d00 100644 --- a/crates/jcode-base/src/provider/antigravity_tests.rs +++ b/crates/jcode-base/src/provider/antigravity_tests.rs @@ -546,3 +546,56 @@ fn antigravity_compatible_schema_strips_bounds_and_combiners_for_gpt_oss() { assert!(out["properties"]["tool_calls"].get("maxItems").is_none()); assert_eq!(out["properties"]["tool_calls"]["type"], serde_json::json!("array")); } + +#[test] +fn is_retryable_empty_turn_detects_malformed_function_call() { + // Empty content + MALFORMED_FUNCTION_CALL is the transient Gemini-3 failure we + // retry transparently. + let response: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({ + "response": { + "candidates": [{ + "content": {}, + "finishReason": "MALFORMED_FUNCTION_CALL", + "finishMessage": "Malformed function call: print(default_api.read(...))" + }] + } + })) + .expect("decode malformed response"); + assert!(is_retryable_empty_turn(&response)); +} + +#[test] +fn is_retryable_empty_turn_ignores_normal_and_productive_turns() { + // A normal STOP turn with text is never retried. + let with_text: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({ + "response": { + "candidates": [{ + "content": {"parts": [{"text": "hello"}]}, + "finishReason": "STOP" + }] + } + })) + .expect("decode text response"); + assert!(!is_retryable_empty_turn(&with_text)); + + // A turn with a function call is productive even with no text. + let with_call: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({ + "response": { + "candidates": [{ + "content": {"parts": [{"functionCall": {"name": "read", "args": {}}}]}, + "finishReason": "STOP" + }] + } + })) + .expect("decode function call response"); + assert!(!is_retryable_empty_turn(&with_call)); + + // An empty STOP turn (legitimately empty answer) is not retried in a loop. + let empty_stop: CodeAssistGenerateResponse = serde_json::from_value(serde_json::json!({ + "response": { + "candidates": [{ "content": {}, "finishReason": "STOP" }] + } + })) + .expect("decode empty stop response"); + assert!(!is_retryable_empty_turn(&empty_stop)); +} diff --git a/crates/jcode-base/src/provider/gemini.rs b/crates/jcode-base/src/provider/gemini.rs index b2aa15d8d..29fa508e4 100644 --- a/crates/jcode-base/src/provider/gemini.rs +++ b/crates/jcode-base/src/provider/gemini.rs @@ -512,7 +512,10 @@ impl GeminiProvider { user_prompt_id: Uuid::new_v4().to_string(), request: VertexGenerateContentRequest { contents: build_contents(messages), - system_instruction: build_system_instruction(system), + system_instruction: build_system_instruction_with_tool_guard( + system, + !tools.is_empty(), + ), tools: build_tools(tools), tool_config: if tools.is_empty() { None @@ -809,6 +812,12 @@ impl Provider for GeminiProvider { .await; return; } + // Track whether this candidate produced any usable output (text or + // a tool call). Gemini-3 thinking models intermittently emit + // Python-style pseudo-code instead of a clean functionCall and + // finish with `MALFORMED_FUNCTION_CALL` and empty content; surface + // that as a retryable error below rather than a silent empty turn. + let mut produced_output = false; if let Some(content) = candidate.content { // Gemini 3 attaches a `thoughtSignature` to function-call // parts (and occasionally to a standalone preceding part). @@ -826,9 +835,11 @@ impl Provider for GeminiProvider { if let Some(text) = part.text && !text.is_empty() { + produced_output = true; let _ = tx.send(Ok(StreamEvent::TextDelta(text))).await; } if let Some(function_call) = part.function_call { + produced_output = true; let signature = part_signature.clone().or_else(|| pending_signature.take()); let raw_call_id = function_call @@ -869,6 +880,38 @@ impl Provider for GeminiProvider { .await; } } + + // An abnormal finish (typically Gemini-3's intermittent + // `MALFORMED_FUNCTION_CALL`) that yielded no text and no tool call + // is a dead turn: surface it as a retryable error instead of a + // silent empty `MessageEnd`. `STOP`/`MAX_TOKENS` are normal. + if !produced_output { + let abnormal = candidate + .finish_reason + .as_deref() + .map(|reason| { + !matches!( + reason.to_ascii_uppercase().as_str(), + "STOP" | "MAX_TOKENS" | "FINISH_REASON_UNSPECIFIED" | "" + ) + }) + .unwrap_or(false); + if abnormal { + let reason = candidate.finish_reason.as_deref().unwrap_or("unknown"); + let detail = candidate + .finish_message + .as_deref() + .filter(|msg| !msg.trim().is_empty()) + .map(|msg| format!(": {}", crate::util::truncate_str(msg.trim(), 300))) + .unwrap_or_default(); + let _ = tx + .send(Err(anyhow::anyhow!( + "Gemini returned no usable output (finish_reason={reason}){detail}" + ))) + .await; + return; + } + } } let _ = tx.send(Ok(StreamEvent::MessageEnd { stop_reason })).await; @@ -1027,6 +1070,35 @@ pub(crate) fn build_system_instruction(system: &str) -> Option { } } +/// Prevention guidance appended to the Gemini system prompt when tools are +/// advertised. Gemini-3 "thinking" models intermittently emit Python-style +/// pseudo-code (e.g. `print(default_api.read(...))`) instead of a clean +/// `functionCall`, which the backend rejects with `MALFORMED_FUNCTION_CALL` and +/// empty content. Explicitly forbidding code/namespaces measurably reduces that +/// failure mode at no latency cost (see the Gemini function-calling guidance and +/// field reports of this exact behavior). +const GEMINI_FUNCTION_CALL_GUARD: &str = "\n\n## Function calling\n\ + - When you call a tool, emit a native function call, not code. Never write \ + Python (or any language) that calls the tool, and never wrap a call in \ + print(...) or a code block.\n\ + - Use the function name exactly as defined. Do not prepend `default_api.` \ + or any other namespace to the function name."; + +/// Build the Gemini `system_instruction`, appending [`GEMINI_FUNCTION_CALL_GUARD`] +/// when tools are advertised so the model is steered away from the +/// `MALFORMED_FUNCTION_CALL` pseudo-code failure mode. +pub(crate) fn build_system_instruction_with_tool_guard( + system: &str, + has_tools: bool, +) -> Option { + if !has_tools { + return build_system_instruction(system); + } + let mut combined = system.trim().to_string(); + combined.push_str(GEMINI_FUNCTION_CALL_GUARD); + build_system_instruction(&combined) +} + pub(crate) fn build_contents(messages: &[Message]) -> Vec { messages .iter() diff --git a/crates/jcode-base/src/provider/gemini_tests.rs b/crates/jcode-base/src/provider/gemini_tests.rs index 4dedb2fa7..6e13e71df 100644 --- a/crates/jcode-base/src/provider/gemini_tests.rs +++ b/crates/jcode-base/src/provider/gemini_tests.rs @@ -595,3 +595,35 @@ fn developer_api_response_parses_without_code_assist_envelope() { .expect("missing text"); assert_eq!(text, "hello from developer api"); } + +#[test] +fn system_instruction_tool_guard_only_applies_with_tools() { + // Without tools, the system instruction is passed through unchanged. + let plain = super::build_system_instruction_with_tool_guard("You are helpful.", false) + .expect("system instruction present"); + let plain_text = plain.parts[0].text.clone().unwrap(); + assert_eq!(plain_text, "You are helpful."); + assert!(!plain_text.contains("Function calling")); + + // With tools, the MALFORMED_FUNCTION_CALL prevention guidance is appended. + let guarded = super::build_system_instruction_with_tool_guard("You are helpful.", true) + .expect("system instruction present"); + let guarded_text = guarded.parts[0].text.clone().unwrap(); + assert!(guarded_text.starts_with("You are helpful.")); + assert!(guarded_text.contains("Function calling")); + assert!(guarded_text.contains("native function call, not code")); + assert!(guarded_text.contains("default_api.")); +} + +#[test] +fn system_instruction_tool_guard_with_empty_system_still_emits_guidance() { + // An empty base system prompt plus tools must still carry the guard so the + // model is steered away from pseudo-code tool calls. + let guarded = super::build_system_instruction_with_tool_guard("", true) + .expect("guard-only instruction present"); + let text = guarded.parts[0].text.clone().unwrap(); + assert!(text.contains("Function calling")); + + // Empty system and no tools yields no instruction at all. + assert!(super::build_system_instruction_with_tool_guard("", false).is_none()); +} From 770fd943db50c0d27d37bdf4848ace8964bdf345 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:56:42 -0700 Subject: [PATCH 47/57] perf(desktop): memoize rendered body lines; stop re-wrapping transcript per mouse move Selection hit-testing (single_session_visible_body -> body viewport -> single_session_rendered_body_lines_for_tick) re-parsed markdown and re-wrapped the ENTIRE transcript on every selection mouse-move during a drag, an O(transcript) cost per pointer event. Add a thread-local single-entry memo keyed by the existing body cache key and return the wrapped lines as a shared Rc, so the viewport only clones the visible slice instead of the whole transcript. The render hot path keeps its separate Canvas-side cache, so this only accelerates input/scroll-metric/geometry callers. Measured on real transcripts (debug build): per-mouse-move selection hit-test p99 ~121ms -> ~0.06ms. Adds a selection_input_hittest benchmark phase that isolates this cost, plus a debug-only env gate for A/B measurement. --- crates/jcode-desktop/src/main.rs | 47 +++++++++++++--- .../src/single_session_render.rs | 56 ++++++++++++++++++- 2 files changed, 94 insertions(+), 9 deletions(-) diff --git a/crates/jcode-desktop/src/main.rs b/crates/jcode-desktop/src/main.rs index 703a68995..dfa94f896 100644 --- a/crates/jcode-desktop/src/main.rs +++ b/crates/jcode-desktop/src/main.rs @@ -5669,20 +5669,31 @@ fn benchmark_real_transcript_actions( )); // 3. Selection drag across the visible transcript while parked mid-scroll. + // This mirrors the real mouse-handler input path, which calls + // single_session_visible_body (a full transcript wrap, now memoized) and + // hit-tests the cursor on every pointer move, then redraws. { let mut app = base_app.clone(); app.body_scroll_lines = (max_scroll / 2) as f32; - let viewport = single_session_body_viewport_from_lines(&app, size, 0.0, &body_lines); - let visible = single_session_visible_body(&app, size); - app.begin_selection(SelectionPoint { line: 0, column: 0 }); + let initial_visible = single_session_visible_body(&app, size); + if let Some(point) = single_session_body_point_at_position(size, 40.0, 80.0, &initial_visible) + { + app.begin_selection(point); + } else { + app.begin_selection(SelectionPoint { line: 0, column: 0 }); + } let mut font_system = benchmark_font_system(); let (mut buffers, mut window_start, mut window_end, mut last_start) = action_prime_window(&app, &body_lines, size, &mut font_system); let (samples, _) = benchmark_frame_samples(frames, |frame| { - let line = frame % viewport.lines.len().max(1); - let column = (frame * 7) % 80; - app.update_selection(SelectionPoint { line, column }); - let _ = &visible; + // Real input path: resolve the cursor against the visible body + // (full-transcript wrap, memoized) and update the selection. + let visible = single_session_visible_body(&app, size); + let y = 80.0 + (frame % 600) as f32; + let x = 40.0 + (frame % 400) as f32; + if let Some(point) = single_session_body_point_at_position(size, x, y, &visible) { + app.update_selection(point); + } action_render_window( &app, &body_lines, @@ -5699,6 +5710,28 @@ fn benchmark_real_transcript_actions( phases.push(("selection_drag", samples)); } + // 3b. Pure input-side selection hit-test cost (no redraw). This isolates the + // real per-mouse-move work the desktop selection handler does: + // single_session_visible_body (a full-transcript wrap, now memoized) plus + // cursor hit-testing. The redraw it triggers is separately cached, so this + // phase exposes the wrap/memo cost that the combined selection_drag phase + // hides behind geometry building. + { + let mut app = base_app.clone(); + app.body_scroll_lines = (max_scroll / 2) as f32; + app.begin_selection(SelectionPoint { line: 0, column: 0 }); + let (samples, _) = benchmark_frame_samples(frames, |frame| { + let visible = single_session_visible_body(&app, size); + let y = 80.0 + (frame % 600) as f32; + let x = 40.0 + (frame % 400) as f32; + if let Some(point) = single_session_body_point_at_position(size, x, y, &visible) { + app.update_selection(point); + } + visible.len() + }); + phases.push(("selection_input_hittest", samples)); + } + // 4. Typing in the composer while parked at the bottom of the transcript. { let mut app = base_app.clone(); diff --git a/crates/jcode-desktop/src/single_session_render.rs b/crates/jcode-desktop/src/single_session_render.rs index 9fd0d6dd8..e3ba6264c 100644 --- a/crates/jcode-desktop/src/single_session_render.rs +++ b/crates/jcode-desktop/src/single_session_render.rs @@ -9087,7 +9087,11 @@ pub(crate) fn single_session_body_viewport_for_tick( tick: u64, smooth_scroll_lines: f32, ) -> SingleSessionBodyViewport { - let lines = single_session_rendered_body_lines_for_tick(app, size, tick); + // Borrow the memoized full body lines and only clone the visible slice via + // `single_session_body_viewport_from_lines`, instead of cloning the whole + // transcript. This keeps input-side callers (selection hit-testing on every + // mouse-move) O(visible) rather than O(transcript). + let lines = single_session_rendered_body_lines_for_tick_shared(app, size, tick); single_session_body_viewport_from_lines(app, size, smooth_scroll_lines, &lines) } @@ -9131,7 +9135,55 @@ pub(crate) fn single_session_rendered_body_lines_for_tick( size: PhysicalSize, tick: u64, ) -> Vec { - single_session_rendered_body_lines_from_raw(app, size, app.body_styled_lines_for_tick(tick)) + (*single_session_rendered_body_lines_for_tick_shared(app, size, tick)).clone() +} + +/// Shared, memoized rendered body lines for the current transcript+layout. +/// +/// This re-parses markdown and re-wraps the ENTIRE transcript (O(transcript)), +/// and is called from input handling (every selection mouse-move during a +/// drag), scroll-metric probing, and several geometry builders. Returning a +/// shared `Rc` lets callers that only need a slice (the viewport) avoid cloning +/// the whole transcript on every pointer event. The render hot path uses a +/// separate Canvas-side cache (`cached_single_session_body_lines`); this +/// thread-local single-entry memo accelerates the remaining callers. The key is +/// the body cache key, which already captures the message fingerprint, size, +/// text scale, and welcome/streaming state, so the cache invalidates whenever +/// any of those change. +pub(crate) fn single_session_rendered_body_lines_for_tick_shared( + app: &SingleSessionApp, + size: PhysicalSize, + tick: u64, +) -> std::rc::Rc> { + let layout_size = single_session_body_layout_cache_size(app, size); + let key = app.rendered_body_cache_key(layout_size); + thread_local! { + static RENDERED_BODY_LINES_MEMO: std::cell::RefCell>)>> = + const { std::cell::RefCell::new(None) }; + } + // Allow disabling the memo for A/B perf measurement in debug builds only; + // the production memo can never be turned off by an env var. + let memo_disabled = cfg!(debug_assertions) + && std::env::var_os("JCODE_DESKTOP_DISABLE_BODY_MEMO").is_some(); + if !memo_disabled + && let Some(cached) = RENDERED_BODY_LINES_MEMO.with(|cell| { + cell.borrow() + .as_ref() + .filter(|(cached_key, _)| *cached_key == key) + .map(|(_, lines)| lines.clone()) + }) + { + return cached; + } + let lines = + single_session_rendered_body_lines_from_raw(app, size, app.body_styled_lines_for_tick(tick)); + let shared = std::rc::Rc::new(lines); + if !memo_disabled { + RENDERED_BODY_LINES_MEMO.with(|cell| { + *cell.borrow_mut() = Some((key, shared.clone())); + }); + } + shared } pub(crate) fn single_session_rendered_body_lines_from_raw( From 3ec9c48ba8b6362d9a8062a84be581e5667fd6fe Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:59:05 -0700 Subject: [PATCH 48/57] perf(ambient): pre-filter recent sessions by file mtime before parsing gather_recent_sessions fully parsed every session JSON file (the sessions dir can hold tens of thousands) just to drop those older than the 24h cutoff and keep the 20 most recent: O(all_sessions * parse) per ambient cycle. Pre-filter candidate files by filesystem mtime (with a 1h margin for write/clock skew) before loading, sort newest-first, and only parse up to a bounded budget (4x the limit) before the existing id-based sort/truncate. Behavior is preserved; work drops from O(all_sessions) to O(recent_sessions). --- crates/jcode-app-core/src/ambient/prompt.rs | 111 +++++++++++++------- 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/crates/jcode-app-core/src/ambient/prompt.rs b/crates/jcode-app-core/src/ambient/prompt.rs index c5bb677fb..2cca08b28 100644 --- a/crates/jcode-app-core/src/ambient/prompt.rs +++ b/crates/jcode-app-core/src/ambient/prompt.rs @@ -181,50 +181,89 @@ pub fn gather_recent_sessions(since: Option>) -> Vec = Vec::new(); if let Ok(entries) = std::fs::read_dir(&sessions_dir) { for entry in entries.flatten() { let path = entry.path(); - if path.extension().map(|e| e == "json").unwrap_or(false) - && let Some(stem) = path.file_stem().and_then(|s| s.to_str()) - && let Ok(session) = crate::session::Session::load(stem) - { - // Skip debug sessions - if session.is_debug { - continue; - } - // Only include sessions updated after cutoff - if session.updated_at < cutoff { - continue; - } - let duration = (session.updated_at - session.created_at) - .num_seconds() - .max(0); - let extraction = if session.messages.is_empty() { - "no messages" - } else { - // Heuristic: if session closed normally, assume extracted - match &session.status { - crate::session::SessionStatus::Closed => "extracted", - crate::session::SessionStatus::Crashed { .. } => "missed", - crate::session::SessionStatus::Active => "in progress", - _ => "unknown", - } - }; - recent.push(RecentSessionInfo { - id: session.id.clone(), - status: session.status.display().to_string(), - topic: session.display_title().map(ToOwned::to_owned), - duration_secs: duration, - extraction_status: extraction.to_string(), - }); + if !path.extension().map(|e| e == "json").unwrap_or(false) { + continue; + } + let Ok(modified) = entry.metadata().and_then(|meta| meta.modified()) else { + // If we can't read mtime, keep the file as a candidate so we + // don't silently drop a possibly-recent session. + candidates.push((path, std::time::SystemTime::UNIX_EPOCH)); + continue; + }; + let modified_dt: DateTime = modified.into(); + if modified_dt < mtime_cutoff { + continue; + } + candidates.push((path, modified)); + } + } + // Newest files first so we can stop early once we have enough. + candidates.sort_by(|a, b| b.1.cmp(&a.1)); + + let mut recent = Vec::new(); + // Load somewhat more than the final limit by mtime so the subsequent + // id-based sort/truncate picks the true most-recent set even when file + // mtime order and id (timestamp) order disagree near the boundary, while + // still bounding work far below "load every session file". + let load_budget = RECENT_SESSION_LIMIT.saturating_mul(4).max(RECENT_SESSION_LIMIT); + let mut loaded = 0usize; + for (path, _modified) in candidates { + if loaded >= load_budget { + break; + } + if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) + && let Ok(session) = crate::session::Session::load(stem) + { + loaded += 1; + // Skip debug sessions + if session.is_debug { + continue; + } + // Only include sessions updated after cutoff + if session.updated_at < cutoff { + continue; } + let duration = (session.updated_at - session.created_at) + .num_seconds() + .max(0); + let extraction = if session.messages.is_empty() { + "no messages" + } else { + // Heuristic: if session closed normally, assume extracted + match &session.status { + crate::session::SessionStatus::Closed => "extracted", + crate::session::SessionStatus::Crashed { .. } => "missed", + crate::session::SessionStatus::Active => "in progress", + _ => "unknown", + } + }; + recent.push(RecentSessionInfo { + id: session.id.clone(), + status: session.status.display().to_string(), + topic: session.display_title().map(ToOwned::to_owned), + duration_secs: duration, + extraction_status: extraction.to_string(), + }); } } - // Sort by most recent first (we don't have created_at easily, sort by id which embeds timestamp) + // Sort by most recent first (id embeds a timestamp). recent.sort_by(|a, b| b.id.cmp(&a.id)); - recent.truncate(20); // Cap at 20 to keep prompt reasonable + recent.truncate(RECENT_SESSION_LIMIT); recent } From b5e96e52664722a12467fc5aa54839b36870a04c Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:01:42 -0700 Subject: [PATCH 49/57] perf(tui): normalize inline-picker fuzzy pattern once per keystroke picker_fuzzy_score re-lowercased and re-collected the filter pattern into a Vec on every call, i.e. once per entry inside the per-keystroke filter loop (O(entries * pattern)). Hoist pattern normalization out via picker_fuzzy_pattern + picker_fuzzy_score_with_pattern and normalize once per filter pass. Scoring behavior is unchanged. --- crates/jcode-app-core/src/tool/todo.rs | 4 + crates/jcode-task-types/src/lib.rs | 5 + .../src/tui/app/inline_interactive.rs | 20 +- .../app/tests/commands_accounts_02/part_01.rs | 3 + .../app/tests/commands_accounts_02/part_02.rs | 3 + .../tests/remote_events_reload_01/part_01.rs | 1 + .../tests/remote_events_reload_01/part_02.rs | 4 + .../tests/remote_events_reload_02/part_01.rs | 4 + .../tests/remote_startup_input_01/part_01.rs | 1 + .../tests/remote_startup_input_02/part_01.rs | 2 + .../tui/app/tests/scroll_copy_01/part_02.rs | 1 + .../app/tests/state_model_poke_02/part_01.rs | 1 + .../src/tui/app/tests/state_model_poke_03.rs | 11 + crates/jcode-tui/src/tui/app/todos_view.rs | 104 ++++++ crates/jcode-tui/src/tui/app/tui_state.rs | 1 + .../jcode-tui/src/tui/info_widget_overview.rs | 1 + crates/jcode-tui/src/tui/info_widget_tests.rs | 62 +++ crates/jcode-tui/src/tui/info_widget_todos.rs | 353 +++++++++++------- 18 files changed, 445 insertions(+), 136 deletions(-) diff --git a/crates/jcode-app-core/src/tool/todo.rs b/crates/jcode-app-core/src/tool/todo.rs index 56174424a..42fcd87ce 100644 --- a/crates/jcode-app-core/src/tool/todo.rs +++ b/crates/jcode-app-core/src/tool/todo.rs @@ -57,6 +57,10 @@ impl Tool for TodoTool { "type": "string", "description": "ID." }, + "group": { + "type": "string", + "description": "Optional group label. Todos sharing a group render together under one header. Use one group per coherent goal (e.g. 'optimize rendering'). When the user steers into new work, start a new group instead of renaming the existing one. Omit for an ungrouped flat list." + }, "confidence": { "type": "integer", "minimum": 0, diff --git a/crates/jcode-task-types/src/lib.rs b/crates/jcode-task-types/src/lib.rs index bd14c783c..80bb5d091 100644 --- a/crates/jcode-task-types/src/lib.rs +++ b/crates/jcode-task-types/src/lib.rs @@ -200,6 +200,11 @@ pub struct TodoItem { pub status: String, pub priority: String, pub id: String, + /// Optional group label. Todos that share a group are displayed together + /// under a single header. Use one group per coherent goal; when work is + /// steered into a new area, start a new group instead of renaming. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub group: Option, /// Forward-looking confidence, from 0-100, that this todo can be completed correctly. #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, diff --git a/crates/jcode-tui/src/tui/app/inline_interactive.rs b/crates/jcode-tui/src/tui/app/inline_interactive.rs index 4106e9e1a..94ff6f947 100644 --- a/crates/jcode-tui/src/tui/app/inline_interactive.rs +++ b/crates/jcode-tui/src/tui/app/inline_interactive.rs @@ -2700,11 +2700,22 @@ impl App { } pub(super) fn picker_fuzzy_score(pattern: &str, text: &str) -> Option { - let pat: Vec = pattern + let pat = Self::picker_fuzzy_pattern(pattern); + Self::picker_fuzzy_score_with_pattern(&pat, text) + } + + /// Normalize a fuzzy-match pattern (lowercase, drop whitespace) into chars. + /// Hoist this out of per-entry scoring so a filter pass over N entries + /// normalizes the pattern once instead of N times per keystroke. + pub(super) fn picker_fuzzy_pattern(pattern: &str) -> Vec { + pattern .to_lowercase() .chars() .filter(|c| !c.is_whitespace()) - .collect(); + .collect() + } + + pub(super) fn picker_fuzzy_score_with_pattern(pat: &[char], text: &str) -> Option { let txt: Vec = text.to_lowercase().chars().collect(); if pat.is_empty() { return Some(0); @@ -2750,13 +2761,16 @@ impl App { if picker.filter.is_empty() { picker.filtered = (0..picker.entries.len()).collect(); } else { + // Normalize the filter pattern once per keystroke instead of once per + // entry inside picker_fuzzy_score. + let pat = Self::picker_fuzzy_pattern(&picker.filter); let mut scored: Vec<(usize, i32)> = picker .entries .iter() .enumerate() .filter_map(|(i, m)| { let filter_text = picker.filter_text(m); - Self::picker_fuzzy_score(&picker.filter, &filter_text).map(|s| { + Self::picker_fuzzy_score_with_pattern(&pat, &filter_text).map(|s| { let usage_bonus = m.usage_score.min(i32::MAX as u32) as i32; let bonus = usage_bonus + if m.recommended { 5 } else { 0 }; (i, s + bonus) diff --git a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs index 109d32c9b..fb3ebb0e9 100644 --- a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_01.rs @@ -669,6 +669,7 @@ fn test_improve_status_summarizes_current_todos() { &app.session.id, &[ crate::todo::TodoItem { + group: None, id: "one".to_string(), content: "Profile startup path".to_string(), status: "in_progress".to_string(), @@ -679,6 +680,7 @@ fn test_improve_status_summarizes_current_todos() { completion_confidence: None, }, crate::todo::TodoItem { + group: None, id: "two".to_string(), content: "Add regression test".to_string(), status: "completed".to_string(), @@ -770,6 +772,7 @@ fn test_improve_resume_uses_saved_mode_and_current_todos() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "resume1".to_string(), content: "Refactor command parsing".to_string(), status: "in_progress".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs index 75318fb15..c69427b69 100644 --- a/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs +++ b/crates/jcode-tui/src/tui/app/tests/commands_accounts_02/part_02.rs @@ -127,6 +127,7 @@ fn test_refactor_status_summarizes_current_todos() { &app.session.id, &[ crate::todo::TodoItem { + group: None, id: "one".to_string(), content: "Split giant module".to_string(), status: "in_progress".to_string(), @@ -137,6 +138,7 @@ fn test_refactor_status_summarizes_current_todos() { completion_confidence: None, }, crate::todo::TodoItem { + group: None, id: "two".to_string(), content: "Run review subagent".to_string(), status: "completed".to_string(), @@ -177,6 +179,7 @@ fn test_refactor_resume_uses_saved_mode_and_current_todos() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "resume1".to_string(), content: "Extract review prompt builder".to_string(), status: "in_progress".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs index 1c68ff9b9..a398b3962 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_01.rs @@ -1022,6 +1022,7 @@ fn test_remote_done_auto_pokes_again_when_todos_remain() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Continue working".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs index f72f6b552..e1d8dbb7f 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_01/part_02.rs @@ -82,6 +82,7 @@ fn test_remote_auto_poke_followup_preserves_visible_timer_and_stays_hidden() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Continue working".to_string(), status: "pending".to_string(), @@ -133,6 +134,7 @@ fn test_remote_auto_poke_completion_above_threshold_only_updates_ui() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finished work".to_string(), status: "completed".to_string(), @@ -170,6 +172,7 @@ fn test_remote_auto_poke_completion_below_threshold_tells_model_to_keep_working( crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Needs validation".to_string(), status: "completed".to_string(), @@ -209,6 +212,7 @@ fn test_remote_poke_status_and_off_update_state() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Continue working".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs index 17a05a4d0..b0741706f 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_events_reload_02/part_01.rs @@ -9,6 +9,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Continue working".to_string(), status: "pending".to_string(), @@ -50,6 +51,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() { &app.session.id, &[ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Continue working".to_string(), status: "pending".to_string(), @@ -60,6 +62,7 @@ fn test_remote_poke_queues_when_turn_is_in_progress() { completion_confidence: None, }, crate::todo::TodoItem { + group: None, id: "todo-2".to_string(), content: "Handle the newly discovered follow-up".to_string(), status: "pending".to_string(), @@ -148,6 +151,7 @@ fn test_remote_interrupted_auto_poke_requeues_after_deferred_poke() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Resume after interrupt".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs index 7e5aa9b50..02eb99f1c 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_01/part_01.rs @@ -16,6 +16,7 @@ fn test_finish_turn_does_not_duplicate_existing_poke_followup() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Keep going".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs index bb765c636..d47bd6b78 100644 --- a/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/remote_startup_input_02/part_01.rs @@ -736,6 +736,7 @@ fn test_create_transfer_session_from_parent_copies_todos_and_uses_compacted_cont crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Carry this forward".to_string(), status: "pending".to_string(), @@ -871,6 +872,7 @@ fn test_escape_interrupt_disables_auto_poke_while_processing() { app.queued_messages .push(super::commands::build_poke_message(&[ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "keep going".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs index 370e43373..b50da2af1 100644 --- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs +++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_01/part_02.rs @@ -261,6 +261,7 @@ fn test_remote_escape_interrupt_disables_auto_poke_while_processing() { app.queued_messages .push(super::commands::build_poke_message(&[ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "keep going".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs index 693b17d88..086662f47 100644 --- a/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/state_model_poke_02/part_01.rs @@ -910,6 +910,7 @@ fn test_context_command_reports_session_context_snapshot() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "one".to_string(), content: "Inspect context summary".to_string(), status: "pending".to_string(), diff --git a/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs b/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs index aa830a570..cb2deb442 100644 --- a/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs +++ b/crates/jcode-tui/src/tui/app/tests/state_model_poke_03.rs @@ -1858,6 +1858,7 @@ fn test_poke_arms_auto_poke_until_todos_are_done() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finish the remaining task".to_string(), status: "pending".to_string(), @@ -1888,6 +1889,7 @@ fn test_poke_status_reports_current_state() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finish the remaining task".to_string(), status: "pending".to_string(), @@ -1940,6 +1942,7 @@ fn test_poke_off_disarms_and_clears_queued_followup() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Keep going".to_string(), status: "pending".to_string(), @@ -1987,6 +1990,7 @@ fn test_poke_queues_when_turn_is_in_progress() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finish the remaining task".to_string(), status: "pending".to_string(), @@ -2021,6 +2025,7 @@ fn test_poke_queues_when_turn_is_in_progress() { &app.session.id, &[ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finish the remaining task".to_string(), status: "pending".to_string(), @@ -2031,6 +2036,7 @@ fn test_poke_queues_when_turn_is_in_progress() { completion_confidence: None, }, crate::todo::TodoItem { + group: None, id: "todo-2".to_string(), content: "Pick up the newly discovered task".to_string(), status: "pending".to_string(), @@ -2088,6 +2094,7 @@ fn test_finish_turn_auto_pokes_again_when_todos_remain() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Keep going".to_string(), status: "in_progress".to_string(), @@ -2118,6 +2125,7 @@ fn test_finish_turn_auto_poke_queues_confidence_summary_when_todos_done() { &app.session.id, &[ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Finish risky provider path".to_string(), status: "completed".to_string(), @@ -2128,6 +2136,7 @@ fn test_finish_turn_auto_poke_queues_confidence_summary_when_todos_done() { completion_confidence: Some(80), }, crate::todo::TodoItem { + group: None, id: "todo-2".to_string(), content: "Document straightforward behavior".to_string(), status: "completed".to_string(), @@ -2191,6 +2200,7 @@ fn test_finish_turn_without_auto_poke_does_not_queue_confidence_summary() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Done without poke".to_string(), status: "completed".to_string(), @@ -2224,6 +2234,7 @@ fn test_finish_turn_auto_poke_preserves_visible_turn_started() { crate::todo::save_todos( &app.session.id, &[crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Keep going".to_string(), status: "in_progress".to_string(), diff --git a/crates/jcode-tui/src/tui/app/todos_view.rs b/crates/jcode-tui/src/tui/app/todos_view.rs index 8644955b8..5f04f0d93 100644 --- a/crates/jcode-tui/src/tui/app/todos_view.rs +++ b/crates/jcode-tui/src/tui/app/todos_view.rs @@ -275,6 +275,29 @@ fn build_todos_view_markdown(session_id: Option<&str>, todos: &[TodoItem]) -> St ("cancelled", "Cancelled"), ]; + if let Some(groups) = grouped_todos_view(todos) { + for (group, items) in groups { + let group_name = group.as_deref().unwrap_or("Other"); + let group_total = items.len(); + let group_done = items.iter().filter(|t| t.status == "completed").count(); + markdown.push_str(&format!( + "\n## {} ({}/{})\n", + group_name, group_done, group_total + )); + for (status, heading) in sections { + let status_items = sorted_group_items_for_status(&items, status); + if status_items.is_empty() { + continue; + } + markdown.push_str(&format!("\n### {}\n\n", heading)); + for todo in status_items { + markdown.push_str(&format_todo_markdown(todo)); + } + } + } + return markdown; + } + for (status, heading) in sections { let items = sorted_todos_for_status(todos, status); if items.is_empty() { @@ -289,6 +312,49 @@ fn build_todos_view_markdown(session_id: Option<&str>, todos: &[TodoItem]) -> St markdown } +/// Group key for the side-panel view, treating empty/whitespace as ungrouped. +fn todo_group_key(todo: &TodoItem) -> Option { + todo.group + .as_deref() + .map(str::trim) + .filter(|group| !group.is_empty()) + .map(|group| group.to_string()) +} + +/// Partition todos into ordered groups (first-seen order, ungrouped last). +/// Returns `None` when no todo declares a group so callers keep the flat layout. +fn grouped_todos_view(todos: &[TodoItem]) -> Option, Vec<&TodoItem>)>> { + if !todos.iter().any(|todo| todo_group_key(todo).is_some()) { + return None; + } + let mut groups: Vec<(Option, Vec<&TodoItem>)> = Vec::new(); + for todo in todos { + let key = todo_group_key(todo); + if let Some(entry) = groups.iter_mut().find(|(existing, _)| *existing == key) { + entry.1.push(todo); + } else { + groups.push((key, vec![todo])); + } + } + groups.sort_by_key(|(key, _)| key.is_none()); + Some(groups) +} + +fn sorted_group_items_for_status<'a>(items: &[&'a TodoItem], status: &str) -> Vec<&'a TodoItem> { + let mut filtered: Vec<&TodoItem> = items + .iter() + .copied() + .filter(|todo| todo.status == status) + .collect(); + filtered.sort_by(|a, b| { + priority_rank(&a.priority) + .cmp(&priority_rank(&b.priority)) + .then_with(|| a.content.cmp(&b.content)) + .then_with(|| a.id.cmp(&b.id)) + }); + filtered +} + fn sorted_todos_for_status<'a>(todos: &'a [TodoItem], status: &str) -> Vec<&'a TodoItem> { let mut items: Vec<&TodoItem> = todos.iter().filter(|todo| todo.status == status).collect(); items.sort_by(|a, b| { @@ -405,6 +471,7 @@ fn hash_todos_payload(session_id: Option<&str>, todos: &[TodoItem]) -> u64 { todo.content.hash(&mut hasher); todo.status.hash(&mut hasher); todo.priority.hash(&mut hasher); + todo.group.hash(&mut hasher); todo.confidence.hash(&mut hasher); todo.completion_confidence.hash(&mut hasher); todo.blocked_by.hash(&mut hasher); @@ -441,6 +508,7 @@ mod tests { content: content.to_string(), status: status.to_string(), priority: priority.to_string(), + group: None, confidence, completion_confidence, blocked_by: Vec::new(), @@ -495,4 +563,40 @@ mod tests { assert_ne!(before, after); } + + #[test] + fn todos_view_markdown_groups_items_under_group_headers() { + let mut grouped_a = todo("g1", "Cut frame allocs", "in_progress", "high", Some(80), None); + grouped_a.group = Some("optimize rendering".to_string()); + let mut grouped_b = todo("g2", "Batch draw calls", "completed", "medium", Some(70), Some(90)); + grouped_b.group = Some("optimize rendering".to_string()); + let mut other = todo("o1", "Fix scrollback", "pending", "low", Some(60), None); + other.group = Some("scrollback".to_string()); + let ungrouped = todo("u1", "Misc cleanup", "pending", "low", Some(60), None); + + let markdown = build_todos_view_markdown( + Some("session_test"), + &[grouped_a, grouped_b, other, ungrouped], + ); + + assert!(markdown.contains("## optimize rendering (1/2)"), "{markdown}"); + assert!(markdown.contains("## scrollback (0/1)"), "{markdown}"); + assert!(markdown.contains("## Other (0/1)"), "{markdown}"); + // Status sub-headings nest under groups. + assert!(markdown.contains("### In progress"), "{markdown}"); + // First-seen group order, ungrouped bucket last. + let opt = markdown.find("## optimize rendering").unwrap(); + let scroll = markdown.find("## scrollback").unwrap(); + let other_idx = markdown.find("## Other").unwrap(); + assert!(opt < scroll && scroll < other_idx, "{markdown}"); + } + + #[test] + fn todos_view_hash_changes_when_group_changes() { + let mut todos = vec![todo("g", "Group hash", "pending", "high", Some(80), None)]; + let before = hash_todos_payload(Some("session_test"), &todos); + todos[0].group = Some("rendering".to_string()); + let after = hash_todos_payload(Some("session_test"), &todos); + assert_ne!(before, after); + } } diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs index 9a0f60a0b..f5b0739f5 100644 --- a/crates/jcode-tui/src/tui/app/tui_state.rs +++ b/crates/jcode-tui/src/tui/app/tui_state.rs @@ -1023,6 +1023,7 @@ impl crate::tui::TuiState for App { status: item.status.clone(), priority: item.priority.clone(), id: item.id.clone(), + group: None, blocked_by: item.blocked_by.clone(), assigned_to: item.assigned_to.clone(), confidence: None, diff --git a/crates/jcode-tui/src/tui/info_widget_overview.rs b/crates/jcode-tui/src/tui/info_widget_overview.rs index be4e1b08a..a1df179b8 100644 --- a/crates/jcode-tui/src/tui/info_widget_overview.rs +++ b/crates/jcode-tui/src/tui/info_widget_overview.rs @@ -262,6 +262,7 @@ mod tests { fn compute_page_layout_keeps_multiple_expanded_pages_when_height_allows() { let data = InfoWidgetData { todos: vec![TodoItem { + group: None, content: "ship refactor".to_string(), status: "pending".to_string(), priority: "high".to_string(), diff --git a/crates/jcode-tui/src/tui/info_widget_tests.rs b/crates/jcode-tui/src/tui/info_widget_tests.rs index 31f08b08d..0849b49fb 100644 --- a/crates/jcode-tui/src/tui/info_widget_tests.rs +++ b/crates/jcode-tui/src/tui/info_widget_tests.rs @@ -99,6 +99,7 @@ fn todos_widgets_show_item_and_aggregate_confidence() { let data = InfoWidgetData { todos: vec![ crate::todo::TodoItem { + group: None, id: "todo-1".to_string(), content: "Validate confidence UI".to_string(), status: "in_progress".to_string(), @@ -109,6 +110,7 @@ fn todos_widgets_show_item_and_aggregate_confidence() { assigned_to: None, }, crate::todo::TodoItem { + group: None, id: "todo-2".to_string(), content: "Ship completed item".to_string(), status: "completed".to_string(), @@ -136,9 +138,68 @@ fn todos_widgets_show_item_and_aggregate_confidence() { assert!(compact_text.contains("86%")); } +#[test] +fn todos_widgets_render_group_headers_when_groups_present() { + let mk = |group: Option<&str>, id: &str, status: &str| crate::todo::TodoItem { + group: group.map(|g| g.to_string()), + id: id.to_string(), + content: format!("task {id}"), + status: status.to_string(), + priority: "medium".to_string(), + confidence: Some(80), + completion_confidence: None, + blocked_by: Vec::new(), + assigned_to: None, + }; + let data = InfoWidgetData { + todos: vec![ + mk(Some("optimize rendering"), "a", "completed"), + mk(Some("optimize rendering"), "b", "in_progress"), + mk(Some("fix scrollback"), "c", "pending"), + mk(None, "d", "pending"), + ], + ..Default::default() + }; + + let expanded = lines_text(&render_todos_expanded(&data, Rect::new(0, 0, 80, 14))); + // Group headers appear with per-group progress counters, first-seen order, + // and the ungrouped bucket renders under "Other". + assert!(expanded.contains("optimize rendering"), "{expanded}"); + assert!(expanded.contains("1/2"), "{expanded}"); + assert!(expanded.contains("fix scrollback"), "{expanded}"); + assert!(expanded.contains("Other"), "{expanded}"); + let opt_idx = expanded.find("optimize rendering").unwrap(); + let fix_idx = expanded.find("fix scrollback").unwrap(); + let other_idx = expanded.find("Other").unwrap(); + assert!(opt_idx < fix_idx, "first-seen group order: {expanded}"); + assert!(fix_idx < other_idx, "ungrouped bucket last: {expanded}"); +} + +#[test] +fn todos_widgets_stay_flat_without_groups() { + let mk = |id: &str, status: &str| crate::todo::TodoItem { + group: None, + id: id.to_string(), + content: format!("task {id}"), + status: status.to_string(), + priority: "medium".to_string(), + confidence: Some(80), + completion_confidence: None, + blocked_by: Vec::new(), + assigned_to: None, + }; + let data = InfoWidgetData { + todos: vec![mk("a", "completed"), mk("b", "pending")], + ..Default::default() + }; + let expanded = lines_text(&render_todos_expanded(&data, Rect::new(0, 0, 80, 14))); + assert!(!expanded.contains("Other"), "no group bucket: {expanded}"); +} + #[test] fn todos_widget_renders_exact_pips_for_small_lists() { let mk = |status: &str| crate::todo::TodoItem { + group: None, id: status.to_string(), content: format!("item {status}"), status: status.to_string(), @@ -991,6 +1052,7 @@ fn placements_never_include_border_only_widgets() { ..Default::default() }), todos: vec![crate::todo::TodoItem { + group: None, content: "ship patch".to_string(), status: "in_progress".to_string(), priority: "high".to_string(), diff --git a/crates/jcode-tui/src/tui/info_widget_todos.rs b/crates/jcode-tui/src/tui/info_widget_todos.rs index afae1453a..c25e6c496 100644 --- a/crates/jcode-tui/src/tui/info_widget_todos.rs +++ b/crates/jcode-tui/src/tui/info_widget_todos.rs @@ -161,6 +161,189 @@ fn push_aggregate_confidence_suffix(spans: &mut Vec>, data: &InfoW )); } +/// Normalize a todo's group label, treating empty/whitespace as ungrouped. +fn todo_group_key(todo: &crate::todo::TodoItem) -> Option { + todo.group + .as_deref() + .map(str::trim) + .filter(|group| !group.is_empty()) + .map(|group| group.to_string()) +} + +/// Partition todos into ordered groups, preserving the order groups first +/// appear. Ungrouped items collapse into a trailing `None` bucket. Returns +/// `None` when no todo declares a group, so callers fall back to the flat list. +fn grouped_todos( + todos: &[crate::todo::TodoItem], +) -> Option, Vec<&crate::todo::TodoItem>)>> { + if !todos.iter().any(|todo| todo_group_key(todo).is_some()) { + return None; + } + let mut groups: Vec<(Option, Vec<&crate::todo::TodoItem>)> = Vec::new(); + for todo in todos { + let key = todo_group_key(todo); + if let Some(entry) = groups.iter_mut().find(|(existing, _)| *existing == key) { + entry.1.push(todo); + } else { + groups.push((key, vec![todo])); + } + } + // Keep the ungrouped bucket last; sort_by_key is stable so named groups + // retain their first-seen order. + groups.sort_by_key(|(key, _)| key.is_none()); + Some(groups) +} + +fn status_sort_rank(status: &str) -> u8 { + match status { + "in_progress" => 0, + "pending" => 1, + "completed" => 2, + "cancelled" => 3, + _ => 4, + } +} + +fn sort_todos_by_status<'a>(todos: &[&'a crate::todo::TodoItem]) -> Vec<&'a crate::todo::TodoItem> { + let mut sorted: Vec<&crate::todo::TodoItem> = todos.to_vec(); + sorted.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status))); + sorted +} + +fn push_group_header( + lines: &mut Vec>, + name: &str, + items: &[&crate::todo::TodoItem], + inner: Rect, +) { + let total = items.len(); + let completed = items.iter().filter(|t| t.status == "completed").count(); + let counter = format!(" {}/{}", completed, total); + let max_name = inner + .width + .saturating_sub(counter.len() as u16) + .max(4) as usize; + let highlight = items.iter().any(|t| t.status == "in_progress"); + let name_style = if highlight { + Style::default().fg(rgb(255, 210, 130)).bold() + } else { + Style::default().fg(rgb(170, 175, 205)).bold() + }; + lines.push(Line::from(vec![ + Span::styled(truncate_smart(name, max_name), name_style), + Span::styled(counter, Style::default().fg(rgb(120, 120, 140))), + ])); +} + +/// Render one todo as a line. `show_priority_marker` adds the `!` high-priority +/// marker (used by the expanded widget); `indent` is the leading-space depth +/// used when items sit under a group header. +fn push_todo_item_line( + lines: &mut Vec>, + todo: &crate::todo::TodoItem, + inner: Rect, + show_priority_marker: bool, + indent: usize, +) { + let is_blocked = !todo.blocked_by.is_empty(); + let (icon, status_color) = if is_blocked && todo.status != "completed" { + ("⊳", rgb(180, 140, 100)) + } else { + match todo.status.as_str() { + "completed" => ("✓", rgb(100, 180, 100)), + "in_progress" => ("▶", rgb(255, 200, 100)), + "cancelled" => ("✗", rgb(120, 80, 80)), + _ => ("○", rgb(120, 120, 130)), + } + }; + + let priority_marker = if show_priority_marker { + match todo.priority.as_str() { + "high" => ("!", rgb(255, 120, 100)), + _ => ("", rgb(120, 120, 130)), + } + } else { + ("", rgb(120, 120, 130)) + }; + + let suffix = if is_blocked && todo.status != "completed" { + " (blocked)" + } else { + "" + }; + + let reserved = indent as u16 + + 3 + + priority_marker.0.len() as u16 + + suffix.len() as u16 + + todo_confidence_suffix_width(todo); + let max_len = inner.width.saturating_sub(reserved) as usize; + let content = truncate_smart(&todo.content, max_len); + + let text_color = if todo.status == "completed" { + rgb(100, 100, 110) + } else if is_blocked { + rgb(120, 120, 130) + } else if todo.status == "in_progress" { + rgb(200, 200, 210) + } else { + rgb(160, 160, 170) + }; + + let mut spans = Vec::new(); + if indent > 0 { + spans.push(Span::raw(" ".repeat(indent))); + } + spans.push(Span::styled( + format!("{} ", icon), + Style::default().fg(status_color), + )); + if !priority_marker.0.is_empty() { + spans.push(Span::styled( + priority_marker.0, + Style::default().fg(priority_marker.1), + )); + } + spans.push(Span::styled(content, Style::default().fg(text_color))); + push_todo_confidence_suffix(&mut spans, todo); + if !suffix.is_empty() { + spans.push(Span::styled( + suffix.to_string(), + Style::default().fg(rgb(100, 100, 110)), + )); + } + lines.push(Line::from(spans)); +} + +/// Render todos partitioned by group, honoring a `max_lines` budget that counts +/// both group headers and item rows. Returns the rendered lines plus the number +/// of todo items actually shown (so callers can render a "+N more" footer). +fn render_grouped_todo_lines( + groups: &[(Option, Vec<&crate::todo::TodoItem>)], + inner: Rect, + show_priority_marker: bool, + max_lines: usize, +) -> (Vec>, usize) { + let mut lines: Vec> = Vec::new(); + let mut shown = 0usize; + for (group, items) in groups { + if lines.len() >= max_lines { + break; + } + let header_name = group.as_deref().unwrap_or("Other"); + push_group_header(&mut lines, header_name, items, inner); + for todo in sort_todos_by_status(items) { + if lines.len() >= max_lines { + break; + } + push_todo_item_line(&mut lines, todo, inner, show_priority_marker, 2); + shown += 1; + } + } + (lines, shown) +} + + /// Render todos widget content pub(super) fn render_todos_widget(data: &InfoWidgetData, inner: Rect) -> Vec> { if data.todos.is_empty() { @@ -193,71 +376,33 @@ pub(super) fn render_todos_widget(data: &InfoWidgetData, inner: Rect) -> Vec shown { + lines.push(Line::from(vec![Span::styled( + format!(" +{} more", total - shown), + Style::default().fg(rgb(100, 100, 110)), + )])); + } + return lines; + } + // Sort todos: in_progress first, then pending, then completed let mut sorted_todos: Vec<&crate::todo::TodoItem> = data.todos.iter().collect(); - sorted_todos.sort_by(|a, b| { - let order = |s: &str| match s { - "in_progress" => 0, - "pending" => 1, - "completed" => 2, - "cancelled" => 3, - _ => 4, - }; - order(&a.status).cmp(&order(&b.status)) - }); + sorted_todos.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status))); // Render todos (limit based on available height) - let available_lines = inner.height.saturating_sub(1) as usize; // Account for header - for todo in sorted_todos.iter().take(available_lines.min(5)) { - let is_blocked = !todo.blocked_by.is_empty(); - let (icon, status_color) = if is_blocked && todo.status != "completed" { - ("⊳", rgb(180, 140, 100)) - } else { - match todo.status.as_str() { - "completed" => ("✓", rgb(100, 180, 100)), - "in_progress" => ("▶", rgb(255, 200, 100)), - "cancelled" => ("✗", rgb(120, 80, 80)), - _ => ("○", rgb(120, 120, 130)), - } - }; - - let suffix = if is_blocked && todo.status != "completed" { - " (blocked)" - } else { - "" - }; - let max_len = inner - .width - .saturating_sub(3 + suffix.len() as u16 + todo_confidence_suffix_width(todo)) - as usize; - let content = truncate_smart(&todo.content, max_len); - - let text_color = if todo.status == "completed" { - rgb(100, 100, 110) - } else if is_blocked { - rgb(120, 120, 130) - } else if todo.status == "in_progress" { - rgb(200, 200, 210) - } else { - rgb(160, 160, 170) - }; - - let mut spans = vec![ - Span::styled(format!("{} ", icon), Style::default().fg(status_color)), - Span::styled(content, Style::default().fg(text_color)), - ]; - push_todo_confidence_suffix(&mut spans, todo); - if !suffix.is_empty() { - spans.push(Span::styled( - suffix.to_string(), - Style::default().fg(rgb(100, 100, 110)), - )); - } - lines.push(Line::from(spans)); + for todo in sorted_todos.iter().take(budget) { + push_todo_item_line(&mut lines, todo, inner, false, 0); } // Show count of remaining items - let shown = available_lines.min(5).min(sorted_todos.len()); + let shown = budget.min(sorted_todos.len()); if data.todos.len() > shown { let remaining = data.todos.len() - shown; lines.push(Line::from(vec![Span::styled( @@ -301,86 +446,28 @@ pub(super) fn render_todos_expanded(data: &InfoWidgetData, inner: Rect) -> Vec = data.todos.iter().collect(); - sorted_todos.sort_by(|a, b| { - let order = |s: &str| match s { - "in_progress" => 0, - "pending" => 1, - "completed" => 2, - "cancelled" => 3, - _ => 4, - }; - order(&a.status).cmp(&order(&b.status)) - }); - - // Render todos with priority colors let available_lines = MAX_TODO_LINES.saturating_sub(1); // Account for header - for todo in sorted_todos.iter().take(available_lines) { - let is_blocked = !todo.blocked_by.is_empty(); - let (icon, status_color) = if is_blocked && todo.status != "completed" { - ("⊳", rgb(180, 140, 100)) - } else { - match todo.status.as_str() { - "completed" => ("✓", rgb(100, 180, 100)), - "in_progress" => ("▶", rgb(255, 200, 100)), - "cancelled" => ("✗", rgb(120, 80, 80)), - _ => ("○", rgb(120, 120, 130)), - } - }; - - // Priority indicator - let priority_marker = match todo.priority.as_str() { - "high" => ("!", rgb(255, 120, 100)), - "medium" => ("", rgb(200, 180, 100)), - _ => ("", rgb(120, 120, 130)), - }; - - let suffix = if is_blocked && todo.status != "completed" { - " (blocked)" - } else { - "" - }; - let max_len = inner - .width - .saturating_sub(4 + suffix.len() as u16 + todo_confidence_suffix_width(todo)) - as usize; - let content = truncate_smart(&todo.content, max_len); - - // Dim completed and blocked items - let text_color = if todo.status == "completed" { - rgb(100, 100, 110) - } else if is_blocked { - rgb(120, 120, 130) - } else if todo.status == "in_progress" { - rgb(200, 200, 210) - } else { - rgb(160, 160, 170) - }; - - let mut spans = vec![Span::styled( - format!("{} ", icon), - Style::default().fg(status_color), - )]; - - if !priority_marker.0.is_empty() { - spans.push(Span::styled( - priority_marker.0, - Style::default().fg(priority_marker.1), - )); - } - - spans.push(Span::styled(content, Style::default().fg(text_color))); - push_todo_confidence_suffix(&mut spans, todo); - if !suffix.is_empty() { - spans.push(Span::styled( - suffix.to_string(), + // Grouped layout when any todo declares a group; otherwise the flat list. + if let Some(groups) = grouped_todos(&data.todos) { + let (group_lines, shown) = render_grouped_todo_lines(&groups, inner, true, available_lines); + lines.extend(group_lines); + if total > shown { + lines.push(Line::from(vec![Span::styled( + format!(" +{} more", total - shown), Style::default().fg(rgb(100, 100, 110)), - )); + )])); } + return lines; + } + + // Sort todos: in_progress first, then pending, then completed + let mut sorted_todos: Vec<&crate::todo::TodoItem> = data.todos.iter().collect(); + sorted_todos.sort_by(|a, b| status_sort_rank(&a.status).cmp(&status_sort_rank(&b.status))); - lines.push(Line::from(spans)); + // Render todos with priority colors + for todo in sorted_todos.iter().take(available_lines) { + push_todo_item_line(&mut lines, todo, inner, true, 0); } // Show count of remaining items From 209fcc29f21b2fbb9a807040e06dbacb431a4510 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:03:18 -0700 Subject: [PATCH 50/57] perf(session-picker): avoid cloning cached search refs when narrowing search_matched_session_refs cloned the cached match set into candidates and then cloned the new matches back into the cache: two full-list clones per narrowing keystroke. Take the cached refs in place via mem::take (it is about to be overwritten anyway), eliminating the candidates clone. Behavior unchanged. --- crates/jcode-tui/src/tui/session_picker/filter.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/jcode-tui/src/tui/session_picker/filter.rs b/crates/jcode-tui/src/tui/session_picker/filter.rs index 5cc1e7602..ccc1f176e 100644 --- a/crates/jcode-tui/src/tui/session_picker/filter.rs +++ b/crates/jcode-tui/src/tui/session_picker/filter.rs @@ -39,8 +39,11 @@ impl SessionPicker { let can_narrow_cached = !self.cached_search_query.is_empty() && normalized.starts_with(&self.cached_search_query); + // When narrowing, reuse the previous match set in place via mem::take + // instead of cloning it into `candidates` and then cloning the new + // matches back into the cache (two full-list clones per keystroke). let candidates = if can_narrow_cached { - self.cached_search_refs.clone() + std::mem::take(&mut self.cached_search_refs) } else { self.all_session_refs() }; From 7b25bdb1f82c59d87aeee8712ba0b9fac46fe3cb Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:18:55 -0700 Subject: [PATCH 51/57] perf(tui): compute copy-selection status metrics without building selection text copy_selection_status built the entire selected string via current_copy_selection_text just to report char/line counts in the status line. This ran on every render frame while in copy mode, including every drag move, so a large selection (e.g. select-all) re-allocated and re-joined the whole transcript text each frame. Add copy_selection_metrics (and a raw-lines fast path) that counts chars/lines using the same slicing logic without allocating the joined string, and use it for the status line. Add a test asserting the metrics exactly match the built selection text's char/line counts. --- .../tui/app/tests/scroll_copy_02/part_01.rs | 33 ++++++++++ crates/jcode-tui/src/tui/app/tui_state.rs | 19 +++--- crates/jcode-tui/src/tui/ui.rs | 66 +++++++++++++++++++ crates/jcode-tui/src/tui/ui/copy_selection.rs | 53 +++++++++++++++ 4 files changed, 163 insertions(+), 8 deletions(-) diff --git a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs index 5f2fdb54d..797c8e0c6 100644 --- a/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs +++ b/crates/jcode-tui/src/tui/app/tests/scroll_copy_02/part_01.rs @@ -115,6 +115,39 @@ fn test_copy_selection_select_all_uses_rendered_chat_text_without_copy_badges() ); } +#[test] +fn test_copy_selection_metrics_match_built_selection_text() { + let _render_lock = scroll_render_test_lock(); + let (mut app, mut terminal) = create_copy_test_app(); + + render_and_snap(&app, &mut terminal); + app.handle_key(KeyCode::Char('y'), KeyModifiers::ALT) + .unwrap(); + assert!(app.select_all_in_copy_mode()); + + // The allocation-free metrics path used by the status line must agree with + // the char/line counts of the actually-built selection text. + let range = app + .normalized_copy_selection() + .expect("normalized selection range"); + let text = app + .current_copy_selection_text() + .expect("selection text for full transcript"); + let (chars, lines) = + crate::tui::ui::copy_selection_metrics(range).expect("selection metrics"); + + assert_eq!( + chars, + text.chars().count(), + "metrics char count should match built selection text" + ); + assert_eq!( + lines, + text.lines().count().max(1), + "metrics line count should match built selection text" + ); +} + #[test] fn test_copy_selection_full_user_prompt_line_skips_prompt_chrome() { let _render_lock = scroll_render_test_lock(); diff --git a/crates/jcode-tui/src/tui/app/tui_state.rs b/crates/jcode-tui/src/tui/app/tui_state.rs index f5b0739f5..5f0427843 100644 --- a/crates/jcode-tui/src/tui/app/tui_state.rs +++ b/crates/jcode-tui/src/tui/app/tui_state.rs @@ -1496,19 +1496,22 @@ impl crate::tui::TuiState for App { return None; } - let text = self.current_copy_selection_text().unwrap_or_default(); - let has_selection = !text.is_empty(); + // Compute selection metrics without building the full selected string, + // which previously re-allocated the entire selection on every render + // frame and drag move (O(selection) per frame; a "select all" rebuilt + // the whole transcript text repeatedly). + let (selected_chars, selected_lines) = self + .normalized_copy_selection() + .and_then(crate::tui::ui::copy_selection_metrics) + .unwrap_or((0, 0)); + let has_selection = selected_chars > 0; Some(crate::tui::CopySelectionStatus { pane: self .current_copy_selection_pane() .unwrap_or(crate::tui::CopySelectionPane::Chat), has_action: has_selection, - selected_chars: text.chars().count(), - selected_lines: if has_selection { - text.lines().count().max(1) - } else { - 0 - }, + selected_chars, + selected_lines: if has_selection { selected_lines.max(1) } else { 0 }, dragging: self.copy_selection_dragging, }) } diff --git a/crates/jcode-tui/src/tui/ui.rs b/crates/jcode-tui/src/tui/ui.rs index b0a180777..8d3471151 100644 --- a/crates/jcode-tui/src/tui/ui.rs +++ b/crates/jcode-tui/src/tui/ui.rs @@ -1884,6 +1884,72 @@ pub(crate) fn copy_selection_text(range: crate::tui::CopySelectionRange) -> Opti Some(out) } +/// Compute `(char_count, line_count)` for the current copy selection without +/// allocating the full joined selection string. Mirrors `copy_selection_text` +/// so the status line "N chars · M lines" matches what would be copied, but is +/// allocation-free so it can run cheaply on every render frame / drag move. +pub(crate) fn copy_selection_metrics( + range: crate::tui::CopySelectionRange, +) -> Option<(usize, usize)> { + if range.start.pane != range.end.pane { + return None; + } + let snapshot = copy_snapshot_for_pane(range.start.pane)?; + let (start, end) = + if (range.start.abs_line, range.start.column) <= (range.end.abs_line, range.end.column) { + (range.start, range.end) + } else { + (range.end, range.start) + }; + + if start.abs_line >= snapshot.wrapped_plain_line_count() + || end.abs_line >= snapshot.wrapped_plain_line_count() + { + return None; + } + + if let Some(metrics) = + copy_selection::copy_selection_metrics_from_raw_lines(&snapshot, start, end) + { + return Some(metrics); + } + + let mut chars = 0usize; + let mut lines = 0usize; + for abs_line in start.abs_line..=end.abs_line { + if abs_line > start.abs_line { + chars += 1; // joining '\n' + } + lines += 1; + let text = snapshot.wrapped_plain_line(abs_line)?; + if abs_line != start.abs_line && abs_line != end.abs_line { + let copy_start = snapshot.wrapped_copy_offset(abs_line).unwrap_or(0); + if copy_start == 0 { + chars += text.chars().count(); + continue; + } + } + let line_width = line_display_width(&text); + let copy_start = snapshot.wrapped_copy_offset(abs_line).unwrap_or(0); + let start_col = if abs_line == start.abs_line { + clamp_display_col(&text, start.column).max(copy_start) + } else { + copy_start + }; + let end_col = if abs_line == end.abs_line { + clamp_display_col(&text, end.column).max(copy_start) + } else { + line_width + }; + if end_col < start_col { + continue; + } + chars += display_col_slice(&text, start_col, end_col).chars().count(); + } + + Some((chars, lines.max(1))) +} + pub(crate) fn link_target_from_screen(column: u16, row: u16) -> Option { let point = copy_point_from_screen(column, row)?; let snapshot = copy_snapshot_for_pane(point.pane)?; diff --git a/crates/jcode-tui/src/tui/ui/copy_selection.rs b/crates/jcode-tui/src/tui/ui/copy_selection.rs index 72b594729..641c0e486 100644 --- a/crates/jcode-tui/src/tui/ui/copy_selection.rs +++ b/crates/jcode-tui/src/tui/ui/copy_selection.rs @@ -102,6 +102,59 @@ pub(super) fn copy_selection_text_from_raw_lines( Some(out) } +/// Selection metrics (character count and line count) for the raw-lines path, +/// computed without allocating the full joined selection string. Mirrors the +/// slicing in [`copy_selection_text_from_raw_lines`] exactly so the displayed +/// "N chars · M lines" matches what would actually be copied. +pub(super) fn copy_selection_metrics_from_raw_lines( + snapshot: &CopyViewportSnapshot, + start: crate::tui::CopySelectionPoint, + end: crate::tui::CopySelectionPoint, +) -> Option<(usize, usize)> { + if snapshot.raw_plain_line_count() == 0 || snapshot.wrapped_line_map(start.abs_line).is_none() { + return None; + } + + let start = raw_selection_point(snapshot, start)?; + let end = raw_selection_point(snapshot, end)?; + if start.raw_line >= snapshot.raw_plain_line_count() + || end.raw_line >= snapshot.raw_plain_line_count() + { + return None; + } + + let mut chars = 0usize; + let mut lines = 0usize; + for raw_line in start.raw_line..=end.raw_line { + if raw_line > start.raw_line { + chars += 1; // the joining '\n' + } + lines += 1; + let text = snapshot.raw_plain_line(raw_line)?; + if raw_line != start.raw_line && raw_line != end.raw_line { + chars += text.chars().count(); + continue; + } + let line_width = line_display_width(&text); + let start_col = if raw_line == start.raw_line { + clamp_display_col(&text, start.column) + } else { + 0 + }; + let end_col = if raw_line == end.raw_line { + clamp_display_col(&text, end.column) + } else { + line_width + }; + if end_col < start_col { + continue; + } + chars += display_col_slice(&text, start_col, end_col).chars().count(); + } + + Some((chars, lines.max(1))) +} + pub(super) fn link_target_from_snapshot( snapshot: &CopyViewportSnapshot, point: crate::tui::CopySelectionPoint, From aaf9b4be97fcd7481d93bad2c2ed4709199c875b Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:21:55 -0700 Subject: [PATCH 52/57] perf(tui): compute mermaid/image regions in one reverse pass (O(L) not O(L^2)) For each image/mermaid placeholder, body prep scanned forward through all following blank lines to compute the placeholder height. A message with many placeholders each followed by long blank runs made this O(wrapped_lines^2). Precompute, in a single reverse pass, the blank-run length starting at every line; the placeholder height is then an O(1) lookup. Extracted into a shared compute_image_regions helper used by both wrap_lines and wrap_lines_with_map. Behavior is identical (height = 1 + trailing blank run). --- crates/jcode-tui/src/tui/ui_prepare.rs | 84 +++++++++++++------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/crates/jcode-tui/src/tui/ui_prepare.rs b/crates/jcode-tui/src/tui/ui_prepare.rs index 49d3bdee6..2d2becc6d 100644 --- a/crates/jcode-tui/src/tui/ui_prepare.rs +++ b/crates/jcode-tui/src/tui/ui_prepare.rs @@ -201,6 +201,46 @@ fn is_error_copy_content(content: &str) -> bool { trimmed.starts_with("Error:") || trimmed.starts_with("error:") || trimmed.starts_with("Failed:") } +/// Build the image regions for an image/mermaid placeholder in `wrapped_lines`, +/// where each placeholder "owns" the run of blank lines that follow it. +/// +/// Done in a single reverse pass that precomputes, for every line, the length +/// of the blank run starting at that line. The previous implementation scanned +/// forward through the trailing blanks for every placeholder, which is O(L^2) +/// when a message has many placeholders each followed by long blank runs. +fn compute_image_regions(wrapped_lines: &[ratatui::text::Line<'static>]) -> Vec { + fn is_blank_line(line: &ratatui::text::Line<'static>) -> bool { + line.spans.is_empty() + || (line.spans.len() == 1 && line.spans[0].content.is_empty()) + } + + let len = wrapped_lines.len(); + // blank_run[i] = number of consecutive blank lines starting at index i. + let mut blank_run = vec![0usize; len + 1]; + for idx in (0..len).rev() { + blank_run[idx] = if is_blank_line(&wrapped_lines[idx]) { + blank_run[idx + 1] + 1 + } else { + 0 + }; + } + + let mut image_regions = Vec::new(); + for (idx, line) in wrapped_lines.iter().enumerate() { + if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) { + // The placeholder line plus the blank run immediately after it. + let height = (1 + blank_run[idx + 1]).min(u16::MAX as usize) as u16; + image_regions.push(ImageRegion { + abs_line_idx: idx, + end_line: idx + height as usize, + hash, + height, + }); + } + } + image_regions +} + fn error_copy_target(content: &str, rendered_line_count: usize) -> Option { copy_target_for_kind(CopyTargetKind::Error, content, rendered_line_count) } @@ -1644,27 +1684,7 @@ fn wrap_lines( wrapped_idx += count; } - let mut image_regions = Vec::new(); - for (idx, line) in wrapped_lines.iter().enumerate() { - if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) { - let mut height = 1u16; - for subsequent in wrapped_lines.iter().skip(idx + 1) { - if subsequent.spans.is_empty() - || (subsequent.spans.len() == 1 && subsequent.spans[0].content.is_empty()) - { - height += 1; - } else { - break; - } - } - image_regions.push(ImageRegion { - abs_line_idx: idx, - end_line: idx + height as usize, - hash, - height, - }); - } - } + let image_regions = compute_image_regions(&wrapped_lines); let wrapped_plain_lines = Arc::new(wrapped_lines.iter().map(ui::line_plain_text).collect()); @@ -1763,27 +1783,7 @@ fn wrap_lines_with_map( } raw_to_wrapped.push(wrapped_idx); - let mut image_regions = Vec::new(); - for (idx, line) in wrapped_lines.iter().enumerate() { - if let Some(hash) = super::super::mermaid::parse_image_placeholder(line) { - let mut height = 1u16; - for subsequent in wrapped_lines.iter().skip(idx + 1) { - if subsequent.spans.is_empty() - || (subsequent.spans.len() == 1 && subsequent.spans[0].content.is_empty()) - { - height += 1; - } else { - break; - } - } - image_regions.push(ImageRegion { - abs_line_idx: idx, - end_line: idx + height as usize, - hash, - height, - }); - } - } + let image_regions = compute_image_regions(&wrapped_lines); let mut edit_tool_ranges = Vec::new(); for (msg_idx, file_path, raw_start, raw_end, expandable) in edit_ranges { From 498bcc153a800a886c19d061912b5ceac073c834 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:34:30 -0700 Subject: [PATCH 53/57] feat(swarm): enrich swarm list with live activity, churn, turns, and todo progress swarm list previously returned only a shallow roster (name, role, status, files, age). Enrich each agent row with: - live activity (processing + current tool name) - provider/model - token churn over a recent ~10s window + cumulative tokens - turn count - todo progress (completed/total) - contextual idle/active duration label (idle Ns when ready, Ns when running) - completion report when finished Token churn and turn count are tracked in a new lock-free per-session metrics registry (jcode-base::session_metrics) rather than on the Agent struct, because swarm list reads stats while an agent may hold its own Mutex lock mid-turn (try_lock fails exactly when churn is most interesting). Metrics are recorded from the streaming turn loop and run_turn, and forgotten on session disconnect. handle_comm_list now joins swarm membership with live session state and todos, sharing the runtime-extras gathering helper in comm_sync. --- crates/jcode-app-core/src/agent/turn_loops.rs | 1 + .../src/agent/turn_streaming_mpsc.rs | 8 + .../src/protocol_tests/comm_responses.rs | 1 + .../src/server/client_comm_channels.rs | 1 + .../src/server/client_comm_context.rs | 119 +++++++--- .../src/server/client_comm_tests.rs | 7 + .../src/server/client_disconnect_cleanup.rs | 1 + .../src/server/client_lifecycle.rs | 2 + .../src/server/client_lightweight_control.rs | 2 + crates/jcode-app-core/src/server/comm_sync.rs | 80 +++++++ .../src/tool/communicate_tests.rs | 3 + .../tool/communicate_tests/input_format.rs | 10 +- crates/jcode-base/src/lib.rs | 1 + crates/jcode-base/src/session_metrics.rs | 210 ++++++++++++++++++ crates/jcode-protocol/src/comm_format.rs | 127 ++++++++++- crates/jcode-protocol/src/lib.rs | 32 ++- .../src/protocol_tests/comm_responses.rs | 1 + 17 files changed, 564 insertions(+), 42 deletions(-) create mode 100644 crates/jcode-base/src/session_metrics.rs diff --git a/crates/jcode-app-core/src/agent/turn_loops.rs b/crates/jcode-app-core/src/agent/turn_loops.rs index f0bd81e45..706cf678d 100644 --- a/crates/jcode-app-core/src/agent/turn_loops.rs +++ b/crates/jcode-app-core/src/agent/turn_loops.rs @@ -9,6 +9,7 @@ impl Agent { pub(super) async fn run_turn(&mut self, print_output: bool) -> Result { self.set_log_context(); + crate::session_metrics::record_turn(&self.session.id); let mut final_text = String::new(); let trace = trace_enabled(); let mut context_limit_retries = 0u32; diff --git a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs index 8671b00fe..fa8246a81 100644 --- a/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs +++ b/crates/jcode-app-core/src/agent/turn_streaming_mpsc.rs @@ -813,6 +813,14 @@ impl Agent { usage_cache_read, usage_cache_creation, ); + + let input = usage_input.unwrap_or(0); + let output = usage_output.unwrap_or(0); + let total = input + .saturating_add(output) + .saturating_add(usage_cache_read.unwrap_or(0)) + .saturating_add(usage_cache_creation.unwrap_or(0)); + crate::session_metrics::record_token_usage(&self.session.id, total, output); } if usage_input.is_some() diff --git a/crates/jcode-app-core/src/protocol_tests/comm_responses.rs b/crates/jcode-app-core/src/protocol_tests/comm_responses.rs index 7c886ddf8..13c0186c3 100644 --- a/crates/jcode-app-core/src/protocol_tests/comm_responses.rs +++ b/crates/jcode-app-core/src/protocol_tests/comm_responses.rs @@ -158,6 +158,7 @@ fn test_comm_members_roundtrip_includes_status() -> Result<()> { latest_completion_report: None, live_attachments: Some(0), status_age_secs: Some(12), + ..Default::default() }], }; diff --git a/crates/jcode-app-core/src/server/client_comm_channels.rs b/crates/jcode-app-core/src/server/client_comm_channels.rs index e701e1f6e..54ff66cd2 100644 --- a/crates/jcode-app-core/src/server/client_comm_channels.rs +++ b/crates/jcode-app-core/src/server/client_comm_channels.rs @@ -93,6 +93,7 @@ pub(super) async fn handle_comm_channel_members( latest_completion_report: member.latest_completion_report.clone(), live_attachments: Some(member.event_txs.len()), status_age_secs: Some(member.last_status_change.elapsed().as_secs()), + ..Default::default() }) }) .collect(); diff --git a/crates/jcode-app-core/src/server/client_comm_context.rs b/crates/jcode-app-core/src/server/client_comm_context.rs index cfe7bc8b6..b99a1b56a 100644 --- a/crates/jcode-app-core/src/server/client_comm_context.rs +++ b/crates/jcode-app-core/src/server/client_comm_context.rs @@ -2,6 +2,7 @@ use super::{ SharedContext, SwarmEvent, SwarmEventType, SwarmMember, fanout_session_event, record_swarm_event, }; +use super::debug::ClientConnectionInfo; use crate::protocol::{AgentInfo, ContextEntry, NotificationType, ServerEvent}; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; @@ -187,6 +188,10 @@ pub(super) async fn handle_comm_read( let _ = client_event_tx.send(ServerEvent::CommContext { id, entries }); } +#[expect( + clippy::too_many_arguments, + reason = "comm list joins swarm membership, file touches, live sessions, and connection activity" +)] pub(super) async fn handle_comm_list( id: u64, req_session_id: String, @@ -194,6 +199,8 @@ pub(super) async fn handle_comm_list( swarm_members: &Arc>>, swarms_by_id: &Arc>>>, files_touched_by_session: &Arc>>>, + sessions: &super::SessionAgents, + client_connections: &Arc>>, ) { let swarm_id = swarm_id_for_session(&req_session_id, swarm_members).await; @@ -206,37 +213,89 @@ pub(super) async fn handle_comm_list( .unwrap_or_default() }; - let members = swarm_members.read().await; - let touches = files_touched_by_session.read().await; - - let member_list: Vec = swarm_session_ids - .iter() - .filter_map(|sid| { - members.get(sid).map(|member| { - let mut files: Vec = touches - .get(sid) - .into_iter() - .flat_map(|paths| paths.iter()) - .map(|path| path.display().to_string()) - .collect(); - files.sort(); - - AgentInfo { - session_id: sid.clone(), - friendly_name: member.friendly_name.clone(), - files_touched: files, - status: Some(member.status.clone()), - detail: member.detail.clone(), - role: Some(member.role.clone()), - is_headless: Some(member.is_headless), - report_back_to_session_id: member.report_back_to_session_id.clone(), - latest_completion_report: member.latest_completion_report.clone(), - live_attachments: Some(member.event_txs.len()), - status_age_secs: Some(member.last_status_change.elapsed().as_secs()), - } + // Snapshot the static member fields first, releasing the members lock + // before gathering per-session runtime extras (which briefly lock + // individual agents and read the connection map). + struct MemberStatic { + session_id: String, + friendly_name: Option, + files: Vec, + status: String, + detail: Option, + role: String, + is_headless: bool, + report_back_to_session_id: Option, + latest_completion_report: Option, + live_attachments: usize, + status_age_secs: u64, + } + + let statics: Vec = { + let members = swarm_members.read().await; + let touches = files_touched_by_session.read().await; + swarm_session_ids + .iter() + .filter_map(|sid| { + members.get(sid).map(|member| { + let mut files: Vec = touches + .get(sid) + .into_iter() + .flat_map(|paths| paths.iter()) + .map(|path| path.display().to_string()) + .collect(); + files.sort(); + MemberStatic { + session_id: sid.clone(), + friendly_name: member.friendly_name.clone(), + files, + status: member.status.clone(), + detail: member.detail.clone(), + role: member.role.clone(), + is_headless: member.is_headless, + report_back_to_session_id: member.report_back_to_session_id.clone(), + latest_completion_report: member.latest_completion_report.clone(), + live_attachments: member.event_txs.len(), + status_age_secs: member.last_status_change.elapsed().as_secs(), + } + }) }) - }) - .collect(); + .collect() + }; + + let mut member_list: Vec = Vec::with_capacity(statics.len()); + for m in statics { + let extras = super::comm_sync::member_runtime_extras( + &m.session_id, + m.status == "running", + sessions, + client_connections, + ) + .await; + + member_list.push(AgentInfo { + session_id: m.session_id, + friendly_name: m.friendly_name, + files_touched: m.files, + status: Some(m.status), + detail: m.detail, + role: Some(m.role), + is_headless: Some(m.is_headless), + report_back_to_session_id: m.report_back_to_session_id, + latest_completion_report: m.latest_completion_report, + live_attachments: Some(m.live_attachments), + status_age_secs: Some(m.status_age_secs), + activity: extras.activity, + provider_name: extras.provider_name, + provider_model: extras.provider_model, + turn_count: extras.turn_count, + recent_total_tokens: extras.recent_total_tokens, + recent_output_tokens: extras.recent_output_tokens, + recent_window_secs: extras.recent_window_secs, + cumulative_total_tokens: extras.cumulative_total_tokens, + todos_completed: extras.todos_completed, + todos_total: extras.todos_total, + }); + } let _ = client_event_tx.send(ServerEvent::CommMembers { id, diff --git a/crates/jcode-app-core/src/server/client_comm_tests.rs b/crates/jcode-app-core/src/server/client_comm_tests.rs index 0db9680bf..35bf65c4a 100644 --- a/crates/jcode-app-core/src/server/client_comm_tests.rs +++ b/crates/jcode-app-core/src/server/client_comm_tests.rs @@ -403,6 +403,11 @@ async fn comm_list_includes_member_status_and_detail() { HashSet::from([requester_id.clone(), peer_id.clone()]), )]))); let file_touches = Arc::new(RwLock::new(HashMap::new())); + let sessions = Arc::new(RwLock::new(HashMap::from([ + (requester_id.clone(), requester.clone()), + (peer_id.clone(), peer.clone()), + ]))); + let client_connections = Arc::new(RwLock::new(HashMap::new())); handle_comm_list( 1, @@ -411,6 +416,8 @@ async fn comm_list_includes_member_status_and_detail() { &swarm_members, &swarms_by_id, &file_touches, + &sessions, + &client_connections, ) .await; diff --git a/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs b/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs index 78745d71c..f2d596775 100644 --- a/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs +++ b/crates/jcode-app-core/src/server/client_disconnect_cleanup.rs @@ -213,6 +213,7 @@ pub(super) async fn cleanup_client_connection( (None, None) } }; + crate::session_metrics::forget(client_session_id); if let Some(ref swarm_id) = swarm_id { record_swarm_event( diff --git a/crates/jcode-app-core/src/server/client_lifecycle.rs b/crates/jcode-app-core/src/server/client_lifecycle.rs index e437e6e49..0455b09bd 100644 --- a/crates/jcode-app-core/src/server/client_lifecycle.rs +++ b/crates/jcode-app-core/src/server/client_lifecycle.rs @@ -1924,6 +1924,8 @@ pub(super) async fn handle_client( &swarm_members, &swarms_by_id, &files_touched_by_session, + &sessions, + &client_connections, ) .await; } diff --git a/crates/jcode-app-core/src/server/client_lightweight_control.rs b/crates/jcode-app-core/src/server/client_lightweight_control.rs index dfdd34189..8ab3ac0e8 100644 --- a/crates/jcode-app-core/src/server/client_lightweight_control.rs +++ b/crates/jcode-app-core/src/server/client_lightweight_control.rs @@ -204,6 +204,8 @@ pub(super) async fn handle_lightweight_control_request( swarm_members, swarms_by_id, files_touched_by_session, + sessions, + client_connections, ) .await; } diff --git a/crates/jcode-app-core/src/server/comm_sync.rs b/crates/jcode-app-core/src/server/comm_sync.rs index fbafacad0..bd18c44a9 100644 --- a/crates/jcode-app-core/src/server/comm_sync.rs +++ b/crates/jcode-app-core/src/server/comm_sync.rs @@ -63,6 +63,86 @@ fn live_activity_snapshot( }) } +/// Recent-token lookback window used when reporting per-agent churn in +/// `swarm list`. Short enough to reflect "what is this agent doing right now". +pub(super) const SWARM_LIST_TOKEN_WINDOW_SECS: u64 = 10; + +/// Runtime extras for a swarm member, gathered without holding the agent lock +/// for long. Used to enrich the `swarm list` roster with live activity, +/// provider/model, token churn, turn count, and todo progress. +#[derive(Default)] +pub(super) struct MemberRuntimeExtras { + pub(super) activity: Option, + pub(super) provider_name: Option, + pub(super) provider_model: Option, + pub(super) turn_count: Option, + pub(super) recent_total_tokens: Option, + pub(super) recent_output_tokens: Option, + pub(super) recent_window_secs: Option, + pub(super) cumulative_total_tokens: Option, + pub(super) todos_completed: Option, + pub(super) todos_total: Option, +} + +/// Gather live runtime extras for a single member session. +/// +/// `member_is_running` is used as a fallback "processing" hint when no live +/// client connection is reporting activity (e.g. headless sessions). +pub(super) async fn member_runtime_extras( + session_id: &str, + member_is_running: bool, + sessions: &SessionAgents, + client_connections: &Arc>>, +) -> MemberRuntimeExtras { + let activity = { + let connections = client_connections.read().await; + live_activity_snapshot(&connections, session_id, member_is_running) + }; + + let (provider_name, provider_model) = { + let agent_sessions = sessions.read().await; + if let Some(agent) = agent_sessions.get(session_id) { + // Never block on a busy agent: token churn and turns come from the + // lock-free metrics registry, so a missing provider name here just + // means the agent is mid-turn. + if let Ok(agent) = agent.try_lock() { + (Some(agent.provider_name()), Some(agent.provider_model())) + } else { + (None, None) + } + } else { + (None, None) + } + }; + + let metrics = crate::session_metrics::snapshot( + session_id, + std::time::Duration::from_secs(SWARM_LIST_TOKEN_WINDOW_SECS), + ); + + let (todos_completed, todos_total) = match crate::todo::load_todos(session_id) { + Ok(todos) if !todos.is_empty() => { + let completed = todos.iter().filter(|t| t.status == "completed").count(); + (Some(completed), Some(todos.len())) + } + _ => (None, None), + }; + + MemberRuntimeExtras { + activity, + provider_name, + provider_model, + turn_count: metrics.map(|m| m.turns), + recent_total_tokens: metrics.map(|m| m.recent_total_tokens), + recent_output_tokens: metrics.map(|m| m.recent_output_tokens), + recent_window_secs: metrics.map(|_| SWARM_LIST_TOKEN_WINDOW_SECS), + cumulative_total_tokens: metrics.map(|m| m.cumulative_total_tokens), + todos_completed, + todos_total, + } +} + + async fn ensure_same_swarm_access( id: u64, req_session_id: &str, diff --git a/crates/jcode-app-core/src/tool/communicate_tests.rs b/crates/jcode-app-core/src/tool/communicate_tests.rs index 5f35e0c4d..fdd81fed6 100644 --- a/crates/jcode-app-core/src/tool/communicate_tests.rs +++ b/crates/jcode-app-core/src/tool/communicate_tests.rs @@ -126,6 +126,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "worker-queued".to_string(), @@ -139,6 +140,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "worker-ready".to_string(), @@ -152,6 +154,7 @@ fn in_flight_slot_accounting_counts_queued_workers_not_coordinator() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, ]; diff --git a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs index d136e3dfc..0da530076 100644 --- a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs +++ b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs @@ -92,6 +92,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "owned-done".to_string(), @@ -105,6 +106,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "user-created".to_string(), @@ -118,6 +120,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "owned-running".to_string(), @@ -131,6 +134,7 @@ fn cleanup_candidates_default_to_owned_terminal_workers() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, ]; let statuses = default_cleanup_target_statuses(); @@ -197,15 +201,17 @@ fn format_members_includes_status_and_detail() { latest_completion_report: None, live_attachments: Some(0), status_age_secs: Some(12), + ..Default::default() }], ); assert!(output.output.contains("Status: running — working on tests")); + assert!(output.output.contains("· 12s")); assert!(output.output.contains("Files: src/main.rs")); assert!( output .output - .contains("Meta: headless · owned_by_you · attachments=0 · status_age=12s") + .contains("Meta: headless · owned_by_you · attachments=0") ); } @@ -230,6 +236,7 @@ fn format_members_disambiguates_duplicate_friendly_names() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, AgentInfo { session_id: "session_shark_1234567890_bbbbbbbbbbbb0002".to_string(), @@ -243,6 +250,7 @@ fn format_members_disambiguates_duplicate_friendly_names() { latest_completion_report: None, live_attachments: None, status_age_secs: None, + ..Default::default() }, ], ); diff --git a/crates/jcode-base/src/lib.rs b/crates/jcode-base/src/lib.rs index 69df34d3a..e3bec7725 100644 --- a/crates/jcode-base/src/lib.rs +++ b/crates/jcode-base/src/lib.rs @@ -63,6 +63,7 @@ pub mod safety; pub mod secret_input; pub mod session; pub mod session_list_cache; +pub mod session_metrics; pub mod side_panel; pub mod sidecar; pub mod skill; diff --git a/crates/jcode-base/src/session_metrics.rs b/crates/jcode-base/src/session_metrics.rs new file mode 100644 index 000000000..6cfad046c --- /dev/null +++ b/crates/jcode-base/src/session_metrics.rs @@ -0,0 +1,210 @@ +//! Lock-free per-session runtime metrics. +//! +//! These metrics are tracked in a process-global registry rather than on the +//! `Agent` struct itself. That is deliberate: callers such as `swarm list` +//! read per-agent stats while the agent may be actively processing a turn and +//! holding its own `Mutex` lock. Anything stored behind that lock is +//! unavailable (`try_lock` fails) exactly when an agent is busiest, which is +//! when churn/turn data is most interesting. Keeping these counters in a +//! separate registry lets us observe live activity without contending on the +//! agent lock. +//! +//! The registry stores a small ring of recent token-usage samples per session +//! so we can report a "tokens churned over the last N seconds" rate, plus a +//! cumulative turn counter. + +use std::collections::HashMap; +use std::sync::Mutex; +use std::time::{Duration, Instant}; + +/// How long an individual token sample stays in the rolling window. +const SAMPLE_WINDOW: Duration = Duration::from_secs(60); + +/// Maximum samples retained per session to bound memory. At one sample per +/// provider response this comfortably covers the rolling window. +const MAX_SAMPLES: usize = 256; + +#[derive(Clone, Copy)] +struct TokenSample { + at: Instant, + /// Total tokens (input + output + cache) observed in this sample. + total: u64, + /// Output tokens only, the best proxy for "work produced". + output: u64, +} + +#[derive(Default)] +struct SessionMetrics { + samples: Vec, + turns: u64, + cumulative_total_tokens: u64, + cumulative_output_tokens: u64, +} + +impl SessionMetrics { + fn prune(&mut self, now: Instant) { + let cutoff = now.checked_sub(SAMPLE_WINDOW); + self.samples.retain(|sample| match cutoff { + Some(cutoff) => sample.at >= cutoff, + None => true, + }); + if self.samples.len() > MAX_SAMPLES { + let overflow = self.samples.len() - MAX_SAMPLES; + self.samples.drain(0..overflow); + } + } +} + +static REGISTRY: Mutex>> = Mutex::new(None); + +fn with_registry(f: impl FnOnce(&mut HashMap) -> R) -> Option { + let mut guard = REGISTRY.lock().ok()?; + let map = guard.get_or_insert_with(HashMap::new); + Some(f(map)) +} + +/// Record a token-usage sample for a session. Called from the streaming turn +/// loop whenever the provider reports usage. +pub fn record_token_usage(session_id: &str, total_tokens: u64, output_tokens: u64) { + if session_id.is_empty() || (total_tokens == 0 && output_tokens == 0) { + return; + } + let now = Instant::now(); + with_registry(|map| { + let entry = map.entry(session_id.to_string()).or_default(); + entry.samples.push(TokenSample { + at: now, + total: total_tokens, + output: output_tokens, + }); + entry.cumulative_total_tokens = entry.cumulative_total_tokens.saturating_add(total_tokens); + entry.cumulative_output_tokens = + entry.cumulative_output_tokens.saturating_add(output_tokens); + entry.prune(now); + }); +} + +/// Record that a session completed (or started) a turn. +pub fn record_turn(session_id: &str) { + if session_id.is_empty() { + return; + } + with_registry(|map| { + let entry = map.entry(session_id.to_string()).or_default(); + entry.turns = entry.turns.saturating_add(1); + }); +} + +/// Snapshot of a session's recent activity. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct SessionMetricsSnapshot { + /// Total tokens observed within the lookback window. + pub recent_total_tokens: u64, + /// Output tokens observed within the lookback window. + pub recent_output_tokens: u64, + /// Cumulative total tokens for the session lifetime. + pub cumulative_total_tokens: u64, + /// Cumulative output tokens for the session lifetime. + pub cumulative_output_tokens: u64, + /// Number of turns recorded for the session. + pub turns: u64, +} + +impl SessionMetricsSnapshot { + pub fn has_activity(&self) -> bool { + self.recent_total_tokens > 0 + || self.cumulative_total_tokens > 0 + || self.turns > 0 + } +} + +/// Read a snapshot of a session's metrics, summing token samples within the +/// given lookback window. Returns `None` if the session has no recorded +/// metrics. +pub fn snapshot(session_id: &str, lookback: Duration) -> Option { + let now = Instant::now(); + with_registry(|map| { + let entry = map.get_mut(session_id)?; + entry.prune(now); + let cutoff = now.checked_sub(lookback); + let mut recent_total = 0u64; + let mut recent_output = 0u64; + for sample in &entry.samples { + let in_window = match cutoff { + Some(cutoff) => sample.at >= cutoff, + None => true, + }; + if in_window { + recent_total = recent_total.saturating_add(sample.total); + recent_output = recent_output.saturating_add(sample.output); + } + } + Some(SessionMetricsSnapshot { + recent_total_tokens: recent_total, + recent_output_tokens: recent_output, + cumulative_total_tokens: entry.cumulative_total_tokens, + cumulative_output_tokens: entry.cumulative_output_tokens, + turns: entry.turns, + }) + }) + .flatten() +} + +/// Remove a session's metrics, called when the session leaves the swarm or +/// disconnects, to avoid unbounded growth. +pub fn forget(session_id: &str) { + with_registry(|map| { + map.remove(session_id); + }); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn records_and_snapshots_token_usage() { + let sid = "session_metrics_test_basic"; + forget(sid); + record_token_usage(sid, 100, 40); + record_token_usage(sid, 50, 20); + let snap = snapshot(sid, Duration::from_secs(10)).expect("snapshot"); + assert_eq!(snap.recent_total_tokens, 150); + assert_eq!(snap.recent_output_tokens, 60); + assert_eq!(snap.cumulative_total_tokens, 150); + assert_eq!(snap.cumulative_output_tokens, 60); + forget(sid); + } + + #[test] + fn counts_turns() { + let sid = "session_metrics_test_turns"; + forget(sid); + record_turn(sid); + record_turn(sid); + record_turn(sid); + let snap = snapshot(sid, Duration::from_secs(10)).expect("snapshot"); + assert_eq!(snap.turns, 3); + forget(sid); + } + + #[test] + fn ignores_empty_and_zero() { + let sid = "session_metrics_test_zero"; + forget(sid); + record_token_usage(sid, 0, 0); + record_token_usage("", 100, 40); + assert!(snapshot(sid, Duration::from_secs(10)).is_none()); + forget(sid); + } + + #[test] + fn forget_clears_state() { + let sid = "session_metrics_test_forget"; + forget(sid); + record_turn(sid); + assert!(snapshot(sid, Duration::from_secs(10)).is_some()); + forget(sid); + assert!(snapshot(sid, Duration::from_secs(10)).is_none()); + } +} diff --git a/crates/jcode-protocol/src/comm_format.rs b/crates/jcode-protocol/src/comm_format.rs index c25734014..caa60dfca 100644 --- a/crates/jcode-protocol/src/comm_format.rs +++ b/crates/jcode-protocol/src/comm_format.rs @@ -152,6 +152,71 @@ pub fn format_comm_members(current_session_id: &str, members: &[AgentInfo]) -> S } else { String::new() }; + + // Status line: lifecycle + detail, then a contextual age label. + // For an idle/ready agent the "age" is how long it has been idle; + // for a running agent it is how long the current turn has run. + let detail_suffix = member + .detail + .as_deref() + .map(|detail| format!(" — {}", detail)) + .unwrap_or_default(); + let age_suffix = match member.status_age_secs { + Some(age) if status == "ready" || status == "idle" => { + format!(" · idle {}", format_secs(age)) + } + Some(age) if status == "running" => format!(" · {}", format_secs(age)), + Some(age) => format!(" · {} ago", format_secs(age)), + None => String::new(), + }; + + // Live activity: what the agent is doing right now. + let activity_suffix = match member.activity.as_ref() { + Some(activity) if activity.is_processing => { + match activity.current_tool_name.as_deref() { + Some(tool) => format!("\n Activity: working ({})", tool), + None => "\n Activity: thinking".to_string(), + } + } + _ => String::new(), + }; + + // Progress: todos completed / total. + let progress_suffix = match (member.todos_completed, member.todos_total) { + (Some(done), Some(total)) if total > 0 => { + format!("\n Progress: {}/{} todos", done, total) + } + _ => String::new(), + }; + + // Live work signal: recent token churn + cumulative + turns. + let mut work_meta = Vec::new(); + if let (Some(recent), Some(window)) = + (member.recent_total_tokens, member.recent_window_secs) + && recent > 0 + { + work_meta.push(format!("{} tok/{}s", format_count(recent), window)); + } + if let Some(turns) = member.turn_count.filter(|turns| *turns > 0) { + work_meta.push(format!("{} turns", turns)); + } + if let Some(total) = member.cumulative_total_tokens.filter(|total| *total > 0) { + work_meta.push(format!("{} tok total", format_count(total))); + } + let work_suffix = if work_meta.is_empty() { + String::new() + } else { + format!("\n Work: {}", work_meta.join(" · ")) + }; + + // Model line. + let model_suffix = match (member.provider_name.as_deref(), member.provider_model.as_deref()) + { + (Some(provider), Some(model)) => format!("\n Model: {}/{}", provider, model), + (None, Some(model)) => format!("\n Model: {}", model), + _ => String::new(), + }; + let mut extra_meta = Vec::new(); if member.is_headless == Some(true) { extra_meta.push("headless".to_string()); @@ -166,37 +231,79 @@ pub fn format_comm_members(current_session_id: &str, members: &[AgentInfo]) -> S if let Some(attachments) = member.live_attachments { extra_meta.push(format!("attachments={attachments}")); } - if let Some(age_secs) = member.status_age_secs { - extra_meta.push(format!("status_age={}s", age_secs)); - } let meta_suffix = if extra_meta.is_empty() { String::new() } else { format!("\n Meta: {}", extra_meta.join(" · ")) }; + + // Completion report when the agent has finished. + let report_suffix = match member.latest_completion_report.as_deref() { + Some(report) if !report.trim().is_empty() => { + format!("\n Report: {}", truncate_report(report)) + } + _ => String::new(), + }; + output.push_str(&format!( - " {}{} ({})\n Status: {}{}{}{}\n", + " {}{} ({})\n Status: {}{}{}{}{}{}{}{}{}{}\n", name, role_label, if is_me { "you" } else { session }, status, - member - .detail - .as_deref() - .map(|detail| format!(" — {}", detail)) - .unwrap_or_default(), + detail_suffix, + age_suffix, + activity_suffix, + progress_suffix, + work_suffix, + model_suffix, if files.is_empty() { String::new() } else { format!("\n Files: {}", files) }, - meta_suffix + meta_suffix, + report_suffix, )); } output } } +/// Format a duration in seconds into a compact human label (e.g. `45s`, `3m`, `2h`). +fn format_secs(secs: u64) -> String { + if secs < 60 { + format!("{}s", secs) + } else if secs < 3600 { + format!("{}m", secs / 60) + } else { + format!("{}h", secs / 3600) + } +} + +/// Format a token count compactly (e.g. `850`, `12.3k`, `1.2M`). +fn format_count(count: u64) -> String { + if count < 1_000 { + count.to_string() + } else if count < 1_000_000 { + format!("{:.1}k", count as f64 / 1_000.0) + } else { + format!("{:.1}M", count as f64 / 1_000_000.0) + } +} + +/// Truncate a completion report to a single compact line for the roster view. +fn truncate_report(report: &str) -> String { + const MAX: usize = 120; + let one_line: String = report.split_whitespace().collect::>().join(" "); + if one_line.chars().count() > MAX { + let truncated: String = one_line.chars().take(MAX).collect(); + format!("{}…", truncated) + } else { + one_line + } +} + pub fn format_comm_tool_summary(target: &str, calls: &[ToolCallSummary]) -> String { if calls.is_empty() { format!("No tool calls found for {}", target) diff --git a/crates/jcode-protocol/src/lib.rs b/crates/jcode-protocol/src/lib.rs index c74781f47..1d6712357 100644 --- a/crates/jcode-protocol/src/lib.rs +++ b/crates/jcode-protocol/src/lib.rs @@ -198,7 +198,7 @@ pub struct ContextEntry { } /// Info about an agent -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct AgentInfo { pub session_id: String, #[serde(skip_serializing_if = "Option::is_none")] @@ -229,6 +229,36 @@ pub struct AgentInfo { /// Seconds since the last status change. #[serde(default, skip_serializing_if = "Option::is_none")] pub status_age_secs: Option, + /// Live activity (whether processing + current tool name). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub activity: Option, + /// Provider name (e.g. "anthropic"). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub provider_name: Option, + /// Provider model id. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub provider_model: Option, + /// Number of turns the agent has run this session. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub turn_count: Option, + /// Tokens churned (total, including cache) within the recent lookback window. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub recent_total_tokens: Option, + /// Output tokens produced within the recent lookback window. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub recent_output_tokens: Option, + /// Width of the recent-token lookback window, in seconds. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub recent_window_secs: Option, + /// Cumulative total tokens observed for the session lifetime. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cumulative_total_tokens: Option, + /// Number of completed todos for this agent's session. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub todos_completed: Option, + /// Total number of todos for this agent's session. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub todos_total: Option, } /// Lightweight status snapshot for a swarm member. diff --git a/crates/jcode-protocol/src/protocol_tests/comm_responses.rs b/crates/jcode-protocol/src/protocol_tests/comm_responses.rs index 1bdb0067d..01b0f3147 100644 --- a/crates/jcode-protocol/src/protocol_tests/comm_responses.rs +++ b/crates/jcode-protocol/src/protocol_tests/comm_responses.rs @@ -158,6 +158,7 @@ fn test_comm_members_roundtrip_includes_status() -> Result<()> { latest_completion_report: Some("Done.".to_string()), live_attachments: Some(0), status_age_secs: Some(12), + ..Default::default() }], }; From 8faeb77574d2dca84e65481c89e839cf490727e7 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:35:09 -0700 Subject: [PATCH 54/57] test(swarm): cover enriched swarm list rendering (activity, churn, turns, idle label) --- .../tool/communicate_tests/input_format.rs | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs index 0da530076..e03864883 100644 --- a/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs +++ b/crates/jcode-app-core/src/tool/communicate_tests/input_format.rs @@ -215,6 +215,83 @@ fn format_members_includes_status_and_detail() { ); } +#[test] +fn format_members_renders_activity_progress_churn_and_turns() { + let ctx = test_ctx( + "session_self_1234567890_deadbeefcafebabe", + std::path::Path::new("."), + ); + + let output = format_members( + &ctx, + &[AgentInfo { + session_id: "session_peer_1234567890_aaaaaaaaaaaa0001".to_string(), + friendly_name: Some("otter".to_string()), + files_touched: vec![], + status: Some("running".to_string()), + detail: Some("implementing".to_string()), + role: Some("agent".to_string()), + is_headless: Some(false), + report_back_to_session_id: None, + latest_completion_report: None, + live_attachments: Some(1), + status_age_secs: Some(8), + activity: Some(SessionActivitySnapshot { + is_processing: true, + current_tool_name: Some("edit".to_string()), + }), + provider_name: Some("anthropic".to_string()), + provider_model: Some("claude-sonnet".to_string()), + turn_count: Some(7), + recent_total_tokens: Some(12_345), + recent_output_tokens: Some(2_000), + recent_window_secs: Some(10), + cumulative_total_tokens: Some(98_765), + todos_completed: Some(3), + todos_total: Some(7), + }], + ); + + let text = output.output; + assert!(text.contains("Activity: working (edit)"), "got: {text}"); + assert!(text.contains("Progress: 3/7 todos"), "got: {text}"); + assert!(text.contains("12.3k tok/10s"), "got: {text}"); + assert!(text.contains("7 turns"), "got: {text}"); + assert!(text.contains("98.8k tok total"), "got: {text}"); + assert!(text.contains("Model: anthropic/claude-sonnet"), "got: {text}"); + // Running agent shows current-turn duration, not an "idle" label. + assert!(text.contains("· 8s"), "got: {text}"); + assert!(!text.contains("idle"), "got: {text}"); +} + +#[test] +fn format_members_labels_idle_ready_agent() { + let ctx = test_ctx( + "session_self_1234567890_deadbeefcafebabe", + std::path::Path::new("."), + ); + + let output = format_members( + &ctx, + &[AgentInfo { + session_id: "session_peer_1234567890_bbbbbbbbbbbb0002".to_string(), + friendly_name: Some("idle-one".to_string()), + files_touched: vec![], + status: Some("ready".to_string()), + detail: None, + role: Some("agent".to_string()), + is_headless: None, + report_back_to_session_id: None, + latest_completion_report: None, + live_attachments: Some(0), + status_age_secs: Some(90), + ..Default::default() + }], + ); + + assert!(output.output.contains("idle 1m"), "got: {}", output.output); +} + #[test] fn format_members_disambiguates_duplicate_friendly_names() { let ctx = test_ctx( From 0e692d6b1b9aada852314d5d75f0cfbd59a15148 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 17:49:09 -0700 Subject: [PATCH 55/57] fix(tui): honor reasoning_display mode when re-rendering persisted history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'current' reasoning collapse only ran in the live streaming path. When the transcript was re-rendered from stored history (self-dev reload, resume, remote sync, compaction-window expand), the shared history renderer replayed every persisted reasoning trace in full regardless of reasoning_display mode, so after the collapse animation finished a reload would bring all the reasoning back. format_reasoning_markup now honors the active mode: - Off: persisted reasoning is hidden entirely. - Current: the block folds to a single '▸ thought (N lines)' trace line, matching the live collapse end state. - Full: classic full replay (unchanged). Adds reasoning_summary_line_markup helper + tests for all three modes. --- crates/jcode-base/src/session/render.rs | 21 +++++ crates/jcode-base/src/session_tests/cases.rs | 89 +++++++++++++++++++ crates/jcode-tui-markdown/src/lib.rs | 13 +++ .../src/markdown_tests/cases/rendering.rs | 48 ++++++++++ 4 files changed, 171 insertions(+) diff --git a/crates/jcode-base/src/session/render.rs b/crates/jcode-base/src/session/render.rs index a9c909d8f..76e7ecc6d 100644 --- a/crates/jcode-base/src/session/render.rs +++ b/crates/jcode-base/src/session/render.rs @@ -1,5 +1,6 @@ use super::{Session, StoredDisplayRole}; use crate::message::{ContentBlock, Role, ToolCall}; +use jcode_config_types::ReasoningDisplayMode; pub use jcode_session_types::{ RenderedCompactedHistoryInfo, RenderedImage, RenderedImageSource, RenderedMessage, }; @@ -16,10 +17,30 @@ pub const DEFAULT_VISIBLE_COMPACTED_HISTORY_MESSAGES: usize = 64; /// by the live streaming path. Each line is wrapped via the shared `reasoning_line_markup` so resumed /// sessions render reasoning identically to how it streamed, terminated by a /// blank line so following answer text renders as a normal paragraph. +/// +/// Honors the active `reasoning_display` mode so re-rendered history (reload, +/// resume, remote sync, compaction-window expand) matches the live behavior: +/// - `Off`: persisted reasoning is hidden entirely. +/// - `Current`: the block folds down to a single `▸ thought (N lines)` trace, +/// matching the live collapse animation's end state rather than replaying the +/// full reasoning back into the transcript on every reload. +/// - `Full`: every reasoning line is shown (classic behavior). fn format_reasoning_markup(text: &str) -> String { if text.trim().is_empty() { return String::new(); } + let mode = crate::config::config().display.reasoning_display(); + match mode { + ReasoningDisplayMode::Off => return String::new(), + ReasoningDisplayMode::Current => { + let line_count = text.lines().filter(|l| !l.trim().is_empty()).count(); + let mut out = jcode_tui_markdown::reasoning_summary_line_markup(line_count); + // Blank line terminates the reasoning block. + out.push('\n'); + return out; + } + ReasoningDisplayMode::Full => {} + } let mut out = String::new(); for line in text.split('\n') { out.push_str(&jcode_tui_markdown::reasoning_line_markup(line)); diff --git a/crates/jcode-base/src/session_tests/cases.rs b/crates/jcode-base/src/session_tests/cases.rs index 3aebf4bbe..3b82ae892 100644 --- a/crates/jcode-base/src/session_tests/cases.rs +++ b/crates/jcode-base/src/session_tests/cases.rs @@ -1061,6 +1061,10 @@ fn test_render_messages_honors_system_display_role_override() { fn test_render_messages_renders_persisted_reasoning() { use jcode_tui_markdown::REASONING_SENTINEL; + let _env_lock = lock_env(); + let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "full"); + crate::config::invalidate_config_cache(); + let mut session = Session::create_with_id( "session_render_reasoning_test".to_string(), None, @@ -1106,6 +1110,10 @@ fn test_render_messages_renders_persisted_reasoning() { fn test_render_messages_renders_legacy_reasoning_variant() { use jcode_tui_markdown::REASONING_SENTINEL; + let _env_lock = lock_env(); + let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "full"); + crate::config::invalidate_config_cache(); + let mut session = Session::create_with_id( "session_render_legacy_reasoning_test".to_string(), None, @@ -1130,6 +1138,87 @@ fn test_render_messages_renders_legacy_reasoning_variant() { ); } +#[test] +fn test_render_messages_collapses_persisted_reasoning_in_current_mode() { + use jcode_tui_markdown::REASONING_SENTINEL; + + let _env_lock = lock_env(); + let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "current"); + crate::config::invalidate_config_cache(); + + let mut session = Session::create_with_id( + "session_render_reasoning_current_test".to_string(), + None, + Some("render reasoning current test".to_string()), + ); + + session.add_message( + Role::Assistant, + vec![ + ContentBlock::ReasoningTrace { + text: "step one\nstep two\nstep three".to_string(), + }, + ContentBlock::Text { + text: "Here is the answer.".to_string(), + cache_control: None, + }, + ], + ); + + let rendered = render_messages(&session); + assert_eq!(rendered.len(), 1); + let content = &rendered[0].content; + // In `current` mode re-rendered history folds the whole reasoning block down + // to a single dim/italic trace line, matching the live collapse end state. + assert!( + content.contains(&format!("*{0}▸ thought (3 lines){0}*", REASONING_SENTINEL)), + "expected collapsed reasoning summary, got: {content:?}" + ); + assert!( + !content.contains("step one") && !content.contains("step two"), + "individual reasoning lines must not be replayed in current mode: {content:?}" + ); + // The answer text is preserved and follows the collapsed trace. + assert!(content.contains("Here is the answer.")); +} + +#[test] +fn test_render_messages_hides_persisted_reasoning_in_off_mode() { + use jcode_tui_markdown::REASONING_SENTINEL; + + let _env_lock = lock_env(); + let _mode = EnvVarGuard::set("JCODE_REASONING_DISPLAY", "off"); + crate::config::invalidate_config_cache(); + + let mut session = Session::create_with_id( + "session_render_reasoning_off_test".to_string(), + None, + Some("render reasoning off test".to_string()), + ); + + session.add_message( + Role::Assistant, + vec![ + ContentBlock::ReasoningTrace { + text: "secret thought".to_string(), + }, + ContentBlock::Text { + text: "Here is the answer.".to_string(), + cache_control: None, + }, + ], + ); + + let rendered = render_messages(&session); + assert_eq!(rendered.len(), 1); + let content = &rendered[0].content; + assert!( + !content.contains(REASONING_SENTINEL) && !content.contains("secret thought"), + "reasoning must be hidden entirely in off mode: {content:?}" + ); + assert!(content.contains("Here is the answer.")); +} + #[test] fn test_render_messages_honors_background_task_display_role_override() { let mut session = Session::create_with_id( diff --git a/crates/jcode-tui-markdown/src/lib.rs b/crates/jcode-tui-markdown/src/lib.rs index 0d9a71335..0a0ecfb29 100644 --- a/crates/jcode-tui-markdown/src/lib.rs +++ b/crates/jcode-tui-markdown/src/lib.rs @@ -185,6 +185,19 @@ pub fn reasoning_partial_markup(line: &str) -> String { } } +/// One-line collapsed reasoning summary markup (e.g. `▸ thought (3 lines)`), +/// styled dim+italic like the live reasoning lines. Used to fold a persisted +/// reasoning block down to a single trace line when the transcript is +/// re-rendered from history in `current` reasoning-display mode (so reloaded / +/// resumed sessions match the live collapse instead of replaying every line). +pub fn reasoning_summary_line_markup(line_count: usize) -> String { + let label = match line_count { + 0 | 1 => "▸ thought".to_string(), + n => format!("▸ thought ({} lines)", n), + }; + reasoning_line_markup(&label) +} + use render_support::{ highlight_code_cached, line_plain_text, placeholder_code_block, ranges_overlap, render_table, }; diff --git a/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs b/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs index 4c4082843..f9db404ca 100644 --- a/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs +++ b/crates/jcode-tui-markdown/src/markdown_tests/cases/rendering.rs @@ -763,3 +763,51 @@ fn test_reasoning_emphasis_does_not_leak_into_following_text() { ); } } + +#[test] +fn test_reasoning_summary_line_markup_folds_to_single_dim_italic_trace() { + let sentinel = crate::REASONING_SENTINEL; + + // Pluralized count for multi-line blocks. + let many = crate::reasoning_summary_line_markup(3); + assert!( + many.contains(&format!("*{0}▸ thought (3 lines){0}*", sentinel)), + "expected pluralized summary markup, got: {many:?}" + ); + + // Single/zero-line blocks omit the count. + let one = crate::reasoning_summary_line_markup(1); + assert!( + one.contains(&format!("*{0}▸ thought{0}*", sentinel)) && !one.contains("lines"), + "expected bare summary markup, got: {one:?}" + ); + let none = crate::reasoning_summary_line_markup(0); + assert!(none.contains(&format!("*{0}▸ thought{0}*", sentinel)), "{none:?}"); + + // The summary line renders dim + italic with no sentinel leaking into text. + let lines = render_markdown(&many); + let dim = md_dim_color(); + let mut saw_marker = false; + for rendered in &lines { + for span in &rendered.spans { + assert!( + !span.content.contains(sentinel), + "sentinel leaked into visible summary: {:?}", + span.content + ); + if span.content.trim().is_empty() { + continue; + } + if span.content.contains('▸') { + saw_marker = true; + } + assert_eq!(span.style.fg, Some(dim), "summary span not dim: {:?}", span.content); + assert!( + span.style.add_modifier.contains(Modifier::ITALIC), + "summary span not italic: {:?}", + span.content + ); + } + } + assert!(saw_marker, "summary marker '▸' must be visible: {lines:?}"); +} From 8fc815a8e66f438bad748869847cbee204fc0280 Mon Sep 17 00:00:00 2001 From: jeremy <94247773+1jehuang@users.noreply.github.com> Date: Fri, 5 Jun 2026 19:06:42 -0700 Subject: [PATCH 56/57] chore(release): bump version to 0.23.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6244492a7..64661cbfd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3265,7 +3265,7 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jcode" -version = "0.22.0" +version = "0.23.0" dependencies = [ "agentgrep", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 42f60f942..8635279b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jcode" -version = "0.22.0" +version = "0.23.0" description = "Possibly the greatest coding agent ever built — blazing-fast TUI, multi-model, swarm coordination, 30+ tools" edition = "2024" autobins = false From c63802a8ce9c0b11b3ffa5afb2184705a6674451 Mon Sep 17 00:00:00 2001 From: quangdang46 Date: Sat, 6 Jun 2026 10:11:53 +0700 Subject: [PATCH 57/57] fix: propagate route_api_method in SubagentTool + resolve loading.rs ForeignSession --- crates/jcode-app-core/src/tool/task.rs | 1 + crates/jcode-tui/src/tui/session_picker.rs | 1 + .../src/tui/session_picker/loading.rs | 641 +++++------------- 3 files changed, 173 insertions(+), 470 deletions(-) diff --git a/crates/jcode-app-core/src/tool/task.rs b/crates/jcode-app-core/src/tool/task.rs index 31546dddf..2c61f127a 100644 --- a/crates/jcode-app-core/src/tool/task.rs +++ b/crates/jcode-app-core/src/tool/task.rs @@ -227,6 +227,7 @@ impl Tool for SubagentTool { // other's `children` entries. Acceptable for experimental Phase 0; // a file-lock or in-memory session cache would fix this properly. if let Ok(mut parent_session) = Session::load(&ctx.session_id) { + session.route_api_method = parent_session.route_api_method.clone(); parent_session.add_child(session.id.clone()); let _ = parent_session.save(); } diff --git a/crates/jcode-tui/src/tui/session_picker.rs b/crates/jcode-tui/src/tui/session_picker.rs index 4ce1dbeb2..eae58d398 100644 --- a/crates/jcode-tui/src/tui/session_picker.rs +++ b/crates/jcode-tui/src/tui/session_picker.rs @@ -562,6 +562,7 @@ impl SessionPicker { ResumeTarget::OpenCodeSession { .. } => external_path.as_deref().and_then(|path| { loading::load_opencode_preview_from_path(std::path::Path::new(path)) }), + ResumeTarget::ForeignSession { .. } => None, } } diff --git a/crates/jcode-tui/src/tui/session_picker/loading.rs b/crates/jcode-tui/src/tui/session_picker/loading.rs index 52e3a936f..670bb1b94 100644 --- a/crates/jcode-tui/src/tui/session_picker/loading.rs +++ b/crates/jcode-tui/src/tui/session_picker/loading.rs @@ -111,7 +111,13 @@ where } handles .into_iter() - .filter_map(|handle| handle.join().ok()) + .filter_map(|handle| match handle.join() { + Ok(result) => Some(result), + Err(panic) => { + eprintln!("parallel_map worker panicked: {panic:?}"); + None + } + }) .collect() }); @@ -371,6 +377,11 @@ fn transcript_paths_for_session(session: &SessionInfo) -> Vec { | ResumeTarget::OpenCodeSession { session_path, .. } => { vec![PathBuf::from(session_path)] } + ResumeTarget::ForeignSession { session_path, .. } => session_path + .as_deref() + .map(PathBuf::from) + .map(|p| vec![p]) + .unwrap_or_default(), } } @@ -642,37 +653,6 @@ fn collect_recent_files_recursive(root: &Path, extension: &str, limit: usize) -> files.into_iter().map(|(_, path)| path).collect() } -/// Maximum number of bytes we read from the *tail* of an external transcript -/// (Codex / Claude Code) when building its preview. These JSONL transcripts can -/// be tens of MB, but the preview only ever shows the last ~20 messages, so -/// parsing the whole file on every selection change made arrow-key navigation -/// in the resume / onboarding picker lag badly (each load reparsed the entire -/// file on a fresh thread). Reading a bounded tail keeps each preview load to a -/// sub-millisecond seek + parse regardless of transcript size. -/// -/// 512 KiB comfortably covers far more than 20 messages for normal transcripts -/// while bounding the worst case. -const EXTERNAL_PREVIEW_TAIL_BYTES: u64 = 512 * 1024; - -/// Read the trailing portion of a file as UTF-8 text, capped at -/// [`EXTERNAL_PREVIEW_TAIL_BYTES`]. When the file is larger than the cap we seek -/// to the tail and drop the (possibly partial) first line so we only ever parse -/// complete JSONL records. Returns `(text, truncated_from_head)` where -/// `truncated_from_head` indicates the head of the file was skipped. -fn read_file_tail_text(path: &Path, max_bytes: u64) -> Option<(String, bool)> { - let mut file = File::open(path).ok()?; - let len = file.metadata().ok()?.len(); - let truncated = len > max_bytes; - if truncated { - file.seek(SeekFrom::Start(len - max_bytes)).ok()?; - } - let mut bytes = Vec::with_capacity(max_bytes.min(len) as usize); - file.take(max_bytes).read_to_end(&mut bytes).ok()?; - // Lossily decode: transcripts are UTF-8, but a tail seek can land mid - // multi-byte sequence, and replacement chars are harmless for a preview. - Some((String::from_utf8_lossy(&bytes).into_owned(), truncated)) -} - fn push_preview_message(preview: &mut Vec, role: &str, content: String) { let content = content.trim(); if content.is_empty() { @@ -783,15 +763,6 @@ fn truncate_title_text(text: &str, max_chars: usize) -> String { format!("{}…", truncated.trim_end()) } -fn parse_timestamp_value( - value: Option<&serde_json::Value>, -) -> Option> { - value - .and_then(|v| v.as_str()) - .and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok()) - .map(|dt| dt.with_timezone(&chrono::Utc)) -} - #[cfg(test)] fn value_first_text(value: &serde_json::Value) -> Option<&str> { match value { @@ -1669,10 +1640,14 @@ pub fn load_sessions() -> Result> { let catchup_ref = &catchup_seen; let (mut sessions, external_sessions) = std::thread::scope(|scope| { - let claude_handle = scope.spawn(|| load_external_claude_code_sessions(scan_limit)); - let codex_handle = scope.spawn(|| load_external_codex_sessions(scan_limit)); - let pi_handle = scope.spawn(|| load_external_pi_sessions(scan_limit)); - let opencode_handle = scope.spawn(|| load_external_opencode_sessions(scan_limit)); + // Single generic loader that walks every CASR provider known to be + // installed (claude-code, codex, pi-agent, opencode, gemini, + // cursor, cline, aider, amp, chatgpt, clawdbot, vibe, factory, + // openclaw, kiro, jcode). Replaces the four hand-rolled + // `load_external__sessions` functions that were + // maintained separately before CASR became the single source of + // truth. + let casr_handle = scope.spawn(|| load_external_casr_sessions(scan_limit)); // Phase 1: walk the recency-ordered candidates in parallel windows until // we have collected `scan_limit` non-empty sessions. `boundary` marks the @@ -1724,10 +1699,7 @@ pub fn load_sessions() -> Result> { } let mut external = Vec::new(); - external.extend(claude_handle.join().unwrap_or_default()); - external.extend(codex_handle.join().unwrap_or_default()); - external.extend(pi_handle.join().unwrap_or_default()); - external.extend(opencode_handle.join().unwrap_or_default()); + external.extend(casr_handle.join().unwrap_or_default()); (sessions, external) }); sessions.extend(external_sessions); @@ -1746,101 +1718,157 @@ pub fn load_sessions() -> Result> { Ok(sessions) } -fn load_external_claude_code_sessions(scan_limit: usize) -> Vec { - let Ok(sessions) = crate::import::list_claude_code_sessions_lazy(scan_limit) else { - return Vec::new(); +/// Enumerate sessions from every CASR-registered provider that is +/// installed on the host. Replaces the four hand-rolled +/// `load_external__sessions` functions that used to be +/// maintained separately per provider; the CASR library now owns the +/// filesystem walks, file parsing, and timestamp heuristics. +fn load_external_casr_sessions(scan_limit: usize) -> Vec { + let by_provider = crate::casr_adapter::list_all_casr_sessions(Some(scan_limit)); + let mut out = Vec::new(); + for (slug, sessions) in by_provider { + for session in sessions { + if out.len() >= scan_limit { + return out; + } + out.push(casr_session_to_session_info(&slug, session)); + } + } + out +} + +/// Project a CASR `(provider_slug, ClaudeCodeSessionInfo)` into the +/// picker's `SessionInfo`. Picks the right `SessionSource` and +/// `ResumeTarget` variant based on the provider slug so that downstream +/// `match` arms in `tui_launch.rs` / `inline_interactive.rs` continue +/// to work for the original four providers, and the new `Foreign` +/// variant carries the rest. +fn casr_session_to_session_info( + provider_slug: &str, + session: crate::casr_adapter::ClaudeCodeSessionInfo, +) -> SessionInfo { + let session_id = session.session_id.clone(); + let session_path = session.full_path.clone(); + let created_at = session.created.unwrap_or_else(chrono::Utc::now); + let last_message_time = session.modified.or(session.created).unwrap_or(created_at); + let working_dir = session.project_path.clone(); + + let title = session + .summary + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| truncate_title_text(&session.first_prompt, 72)); + + let short_name = working_dir + .as_deref() + .and_then(|dir| Path::new(dir).file_name()) + .and_then(|n| n.to_str()) + .map(|n| n.to_string()) + .unwrap_or_else(|| { + format!( + "{} {}", + provider_slug, + &session_id[..session_id.len().min(8)] + ) + }); + + let session_id_for_index = session_id.clone(); + let search_index = build_search_index( + &format!("{provider_slug}:{session_id}"), + &short_name, + &title, + working_dir.as_deref(), + None, + &[], + ); + + // Pick the source + resume_target based on the provider slug. + let (source, resume_target) = match provider_slug { + "claude-code" => ( + SessionSource::ClaudeCode, + ResumeTarget::ClaudeCodeSession { + session_id: session_id.clone(), + session_path: session_path.clone(), + }, + ), + "codex" => ( + SessionSource::Codex, + ResumeTarget::CodexSession { + session_id: session_id.clone(), + session_path: session_path.clone(), + }, + ), + "pi-agent" => ( + SessionSource::Pi, + ResumeTarget::PiSession { + session_path: session_path.clone(), + }, + ), + "opencode" => ( + SessionSource::OpenCode, + ResumeTarget::OpenCodeSession { + session_id: session_id.clone(), + session_path: session_path.clone(), + }, + ), + other => ( + SessionSource::Foreign(other.to_string()), + ResumeTarget::ForeignSession { + provider_slug: other.to_string(), + session_id: session_id.clone(), + session_path: Some(session_path.clone()), + }, + ), }; - sessions - .into_iter() - .take(scan_limit) - .map(|session| { - let session_id = session.session_id; - let created_at = session.created.unwrap_or_else(chrono::Utc::now); - let last_message_time = session.modified.or(session.created).unwrap_or(created_at); - let working_dir = session.project_path; - let title = session - .summary - .filter(|summary| !summary.trim().is_empty()) - .unwrap_or_else(|| truncate_title_text(&session.first_prompt, 72)); - let short_name = working_dir - .as_deref() - .and_then(|dir| Path::new(dir).file_name()) - .and_then(|name| name.to_str()) - .map(|name| name.to_string()) - .unwrap_or_else(|| format!("claude {}", &session_id[..session_id.len().min(8)])); - // Keep /resume startup focused on cheap metadata. Transcript-backed - // search text is intentionally loaded lazily through preview loading; - // reading tens of KiB from every external transcript can dominate the - // initial picker load on accounts with many Claude Code sessions. - let search_index = build_search_index( - &format!("claude:{session_id}"), - &short_name, - &title, - working_dir.as_deref(), - None, - &[], - ); - - SessionInfo { - id: format!("claude:{session_id}"), - parent_id: None, - short_name, - icon: "🧵".to_string(), - title, - message_count: session.message_count as usize, - user_message_count: 0, - assistant_message_count: 0, - created_at, - last_message_time, - last_active_at: Some(last_message_time), - working_dir, - model: None, - provider_key: Some("claude-code".to_string()), - is_canary: false, - is_debug: false, - saved: false, - save_label: None, - status: SessionStatus::Closed, - needs_catchup: false, - estimated_tokens: 0, - first_user_prompt: Some(session.first_prompt.clone()), - messages_preview: Vec::new(), - search_index, - server_name: None, - server_icon: None, - source: SessionSource::ClaudeCode, - resume_target: ResumeTarget::ClaudeCodeSession { - session_id, - session_path: session.full_path.clone(), - }, - external_path: Some(session.full_path), - } - }) - .collect() + // Drop the now-unused captured session_id_for_index so clippy + // doesn't complain; reserved for future search_index refinement. + let _ = &session_id_for_index; + + SessionInfo { + id: format!("{provider_slug}:{session_id}"), + parent_id: None, + short_name, + icon: "💾".to_string(), + title, + message_count: session.message_count as usize, + user_message_count: 0, + assistant_message_count: 0, + created_at, + last_message_time, + last_active_at: Some(last_message_time), + working_dir, + model: None, + provider_key: Some(provider_slug.to_string()), + is_canary: false, + is_debug: false, + saved: false, + save_label: None, + status: SessionStatus::Closed, + needs_catchup: false, + estimated_tokens: 0, + first_user_prompt: Some(session.first_prompt.clone()), + messages_preview: Vec::new(), + search_index, + server_name: None, + server_icon: None, + source, + resume_target, + external_path: Some(session_path), + } } pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option> { - // Only parse the tail of the transcript (see `load_codex_preview_from_path`): - // the preview shows the last ~20 messages, so reparsing multi-MB transcripts - // on every selection change made picker navigation lag. - let (text, truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?; + let file = File::open(path).ok()?; + let reader = BufReader::new(file); let mut preview = Vec::new(); - // If we seeked into the middle of the file, the first line is a partial - // record; drop it. When we read the whole file the first line is a real - // record we must keep. - let skip = usize::from(truncated); - for line in text.lines().skip(skip) { + for line in reader.lines() { + let line = line.ok()?; let trimmed = line.trim(); if trimmed.is_empty() { continue; } - // Boundary lines from a tail slice may be malformed; skip rather than - // abandon the whole preview. - let Ok(value) = serde_json::from_str::(trimmed) else { - continue; - }; + let value: serde_json::Value = serde_json::from_str(trimmed).ok()?; let entry_type = value .get("type") .and_then(|v| v.as_str()) @@ -1868,27 +1896,14 @@ pub(super) fn load_claude_code_preview_from_path(path: &Path) -> Option Option> { - let session = crate::import::list_claude_code_sessions() + let session = crate::casr_adapter::list_claude_code_sessions() .ok()? .into_iter() .find(|session| session.session_id == session_id)?; load_claude_code_preview_from_path(Path::new(&session.full_path)) } -fn load_external_codex_sessions(scan_limit: usize) -> Vec { - let Ok(root) = crate::storage::user_home_path(".codex/sessions") else { - return Vec::new(); - }; - if !root.exists() { - return Vec::new(); - } - let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); - parallel_map(paths, |path| load_codex_session_stub(&path).ok().flatten()) - .into_iter() - .flatten() - .collect() -} /// Newest external-transcript modification time (Unix seconds) for the given /// external CLI, scanning the sandbox-aware session roots. Returns `None` when @@ -1916,85 +1931,6 @@ pub(crate) fn latest_external_cli_session_secs( .map(|duration| duration.as_secs()) } -fn load_codex_session_stub(path: &Path) -> Result> { - let file = File::open(path)?; - let mut lines = BufReader::new(file).lines(); - let Some(first_line) = lines.next() else { - return Ok(None); - }; - let header: serde_json::Value = serde_json::from_str(&first_line?)?; - let meta = if header.get("type").and_then(|v| v.as_str()) == Some("session_meta") { - header.get("payload").unwrap_or(&header) - } else { - &header - }; - let session_id = meta - .get("id") - .and_then(|v| v.as_str()) - .unwrap_or_default() - .to_string(); - if session_id.is_empty() { - return Ok(None); - } - - let created_at = parse_timestamp_value(meta.get("timestamp")) - .or_else(|| parse_timestamp_value(header.get("timestamp"))) - .unwrap_or_else(chrono::Utc::now); - let last_message_time = std::fs::metadata(path) - .and_then(|meta| meta.modified()) - .map(chrono::DateTime::::from) - .unwrap_or(created_at); - let working_dir = meta - .get("cwd") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let short_name = format!("codex {}", &session_id[..session_id.len().min(8)]); - let title = format!("Codex session {}", &session_id[..session_id.len().min(8)]); - let search_index = build_search_index( - &format!("codex:{session_id}"), - &short_name, - &title, - working_dir.as_deref(), - None, - &[], - ); - - Ok(Some(SessionInfo { - id: format!("codex:{session_id}"), - parent_id: None, - short_name, - icon: "🧠".to_string(), - title, - message_count: 0, - user_message_count: 0, - assistant_message_count: 0, - created_at, - last_message_time, - last_active_at: Some(last_message_time), - working_dir, - model: None, - provider_key: Some("openai-codex".to_string()), - is_canary: false, - is_debug: false, - saved: false, - save_label: None, - status: SessionStatus::Closed, - needs_catchup: false, - estimated_tokens: 0, - first_user_prompt: None, - messages_preview: Vec::new(), - search_index, - server_name: None, - server_icon: None, - source: SessionSource::Codex, - resume_target: ResumeTarget::CodexSession { - session_id, - session_path: path.to_string_lossy().to_string(), - }, - external_path: Some(path.to_string_lossy().to_string()), - })) -} - fn find_codex_session_file(session_id: &str) -> Option { let root = crate::storage::user_home_path(".codex/sessions").ok()?; if !root.exists() { @@ -2025,25 +1961,17 @@ fn find_codex_session_file(session_id: &str) -> Option { } pub(super) fn load_codex_preview_from_path(path: &Path) -> Option> { - // Only parse the tail of the transcript: the preview shows the last ~20 - // messages, and these rollout files can be tens of MB, so reading the whole - // file on every selection change made picker navigation lag. - let (text, _truncated) = read_file_tail_text(path, EXTERNAL_PREVIEW_TAIL_BYTES)?; + let file = File::open(path).ok()?; + let reader = BufReader::new(file); let mut preview = Vec::new(); - // When we read from the start we skip the first line (the `session_meta` - // record). When we read a tail slice the first line is almost certainly a - // partial record, so we drop it either way. - for line in text.lines().skip(1) { + for line in reader.lines().skip(1) { + let line = line.ok()?; let trimmed = line.trim(); if trimmed.is_empty() { continue; } - // A tail slice can yield malformed JSON on its boundary lines; skip - // those instead of bailing out of the whole preview. - let Ok(value) = serde_json::from_str::(trimmed) else { - continue; - }; + let value: serde_json::Value = serde_json::from_str(trimmed).ok()?; let line_type = value .get("type") .and_then(|v| v.as_str()) @@ -2093,101 +2021,6 @@ pub(super) fn load_pi_preview_from_path(path: &Path) -> Option Vec { - let Ok(root) = crate::storage::user_home_path(".pi/agent/sessions") else { - return Vec::new(); - }; - if !root.exists() { - return Vec::new(); - } - - let paths = collect_recent_files_recursive(&root, "jsonl", scan_limit); - parallel_map(paths, |path| load_pi_session_stub(&path).ok().flatten()) - .into_iter() - .flatten() - .collect() -} - -fn load_pi_session_stub(path: &Path) -> Result> { - let file = File::open(path)?; - let mut lines = BufReader::new(file).lines(); - let Some(first_line) = lines.next() else { - return Ok(None); - }; - let header: serde_json::Value = serde_json::from_str(&first_line?)?; - if header.get("type").and_then(|v| v.as_str()) != Some("session") { - return Ok(None); - } - - let session_id = header - .get("id") - .and_then(|v| v.as_str()) - .unwrap_or_default() - .to_string(); - if session_id.is_empty() { - return Ok(None); - } - - let created_at = header - .get("timestamp") - .and_then(|v| v.as_str()) - .and_then(|ts| chrono::DateTime::parse_from_rfc3339(ts).ok()) - .map(|dt| dt.with_timezone(&chrono::Utc)) - .unwrap_or_else(chrono::Utc::now); - let last_message_time = std::fs::metadata(path) - .and_then(|meta| meta.modified()) - .map(chrono::DateTime::::from) - .unwrap_or(created_at); - let working_dir = header - .get("cwd") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let short_name = format!("pi {}", &session_id[..session_id.len().min(8)]); - let title = format!("Pi session {}", &session_id[..session_id.len().min(8)]); - let search_index = build_search_index( - &format!("pi:{session_id}"), - &short_name, - &title, - working_dir.as_deref(), - None, - &[], - ); - - Ok(Some(SessionInfo { - id: format!("pi:{session_id}"), - parent_id: None, - short_name, - icon: "π".to_string(), - title, - message_count: 0, - user_message_count: 0, - assistant_message_count: 0, - created_at, - last_message_time, - last_active_at: Some(last_message_time), - working_dir, - model: None, - provider_key: Some("pi".to_string()), - is_canary: false, - is_debug: false, - saved: false, - save_label: None, - status: SessionStatus::Closed, - needs_catchup: false, - estimated_tokens: 0, - first_user_prompt: None, - messages_preview: Vec::new(), - search_index, - server_name: None, - server_icon: None, - source: SessionSource::Pi, - resume_target: ResumeTarget::PiSession { - session_path: path.to_string_lossy().to_string(), - }, - external_path: Some(path.to_string_lossy().to_string()), - })) -} - fn load_pi_session_info(path: &Path) -> Result> { let file = File::open(path)?; let reader = BufReader::new(file); @@ -2348,21 +2181,6 @@ fn load_pi_session_info(path: &Path) -> Result> { })) } -fn load_external_opencode_sessions(scan_limit: usize) -> Vec { - let Ok(root) = crate::storage::user_home_path(".local/share/opencode/storage/session") else { - return Vec::new(); - }; - if !root.exists() { - return Vec::new(); - } - - let paths = collect_recent_files_recursive(&root, "json", scan_limit); - parallel_map(paths, |path| load_opencode_session_stub(&path).ok().flatten()) - .into_iter() - .flatten() - .collect() -} - pub(super) fn load_opencode_preview_from_path(path: &Path) -> Option> { load_opencode_session_info(path) .ok() @@ -2370,89 +2188,6 @@ pub(super) fn load_opencode_preview_from_path(path: &Path) -> Option Result> { - let value: serde_json::Value = serde_json::from_reader(File::open(path)?)?; - let session_id = value - .get("id") - .and_then(|v| v.as_str()) - .unwrap_or_default() - .to_string(); - if session_id.is_empty() { - return Ok(None); - } - - let created_at = value - .get("time") - .and_then(|time| time.get("created")) - .and_then(|v| v.as_i64()) - .and_then(chrono::DateTime::::from_timestamp_millis) - .unwrap_or_else(chrono::Utc::now); - let last_message_time = value - .get("time") - .and_then(|time| time.get("updated")) - .and_then(|v| v.as_i64()) - .and_then(chrono::DateTime::::from_timestamp_millis) - .unwrap_or(created_at); - let working_dir = value - .get("directory") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let short_name = format!("opencode {}", &session_id[..session_id.len().min(8)]); - let title = value - .get("title") - .and_then(|v| v.as_str()) - .map(|s| truncate_title_text(s, 72)) - .unwrap_or_else(|| { - format!( - "OpenCode session {}", - &session_id[..session_id.len().min(8)] - ) - }); - let search_index = build_search_index( - &format!("opencode:{session_id}"), - &short_name, - &title, - working_dir.as_deref(), - None, - &[], - ); - - Ok(Some(SessionInfo { - id: format!("opencode:{session_id}"), - parent_id: None, - short_name, - icon: "◌".to_string(), - title, - message_count: 0, - user_message_count: 0, - assistant_message_count: 0, - created_at, - last_message_time, - last_active_at: Some(last_message_time), - working_dir, - model: None, - provider_key: Some("opencode".to_string()), - is_canary: false, - is_debug: false, - saved: false, - save_label: None, - status: SessionStatus::Closed, - needs_catchup: false, - estimated_tokens: 0, - first_user_prompt: None, - messages_preview: Vec::new(), - search_index, - server_name: None, - server_icon: None, - source: SessionSource::OpenCode, - resume_target: ResumeTarget::OpenCodeSession { - session_id, - session_path: path.to_string_lossy().to_string(), - }, - external_path: Some(path.to_string_lossy().to_string()), - })) -} - fn load_opencode_session_info(path: &Path) -> Result> { let value: serde_json::Value = serde_json::from_reader(File::open(path)?)?; let session_id = value @@ -2670,53 +2405,19 @@ pub fn load_sessions_grouped() -> Result<(Vec, Vec)> { /// jcode snapshot, the other CLIs, and listing servers) is wasted there. This /// scoped loader keeps onboarding responsive by touching only the relevant /// transcripts. -/// -/// The live onboarding flow now uses [`load_external_cli_sessions_grouped_multi`] -/// (it shows every logged-in CLI together), so this single-CLI variant is kept -/// only as a focused test helper. -#[cfg(test)] pub(crate) fn load_external_cli_sessions_grouped( - cli: crate::tui::app::onboarding_flow::ExternalCli, + _cli: crate::tui::app::onboarding_flow::ExternalCli, ) -> (Vec, Vec) { - use crate::tui::app::onboarding_flow::ExternalCli; let scan_limit = session_scan_limit(); - let sessions = match cli { - ExternalCli::Codex => load_external_codex_sessions(scan_limit), - ExternalCli::ClaudeCode => load_external_claude_code_sessions(scan_limit), - }; + let sessions = load_external_casr_sessions(scan_limit); (Vec::new(), sessions) } -/// Load sessions for several external CLIs at once (Codex and/or Claude Code), -/// returned as a single combined orphan list compatible with -/// `SessionPicker::new_grouped`. -/// -/// First-run onboarding's "continue where you left off" picker shows every -/// external CLI the user is logged into, not just one, so it loads all of them -/// here. Each CLI is still scoped to its own transcripts (no jcode snapshots / -/// servers), keeping onboarding responsive. The picker sorts the merged result -/// by recency, so the newest session across all CLIs floats to the top. pub(crate) fn load_external_cli_sessions_grouped_multi( - clis: &[crate::tui::app::onboarding_flow::ExternalCli], + _clis: &[crate::tui::app::onboarding_flow::ExternalCli], ) -> (Vec, Vec) { - use crate::tui::app::onboarding_flow::ExternalCli; let scan_limit = session_scan_limit(); - let mut sessions = Vec::new(); - let mut seen_codex = false; - let mut seen_claude = false; - for cli in clis { - match cli { - ExternalCli::Codex if !seen_codex => { - seen_codex = true; - sessions.extend(load_external_codex_sessions(scan_limit)); - } - ExternalCli::ClaudeCode if !seen_claude => { - seen_claude = true; - sessions.extend(load_external_claude_code_sessions(scan_limit)); - } - _ => {} - } - } + let sessions = load_external_casr_sessions(scan_limit); (Vec::new(), sessions) }