diff --git a/coderag/api.py b/coderag/api.py index 497993d..8be79b9 100644 --- a/coderag/api.py +++ b/coderag/api.py @@ -174,6 +174,15 @@ def delete_path(self, path: Union[str, Path]) -> int: self.vectors.save() return len(removed) + def warm(self) -> None: + """Eagerly load the provider, store, vectors, and embedding model. + + Done at server startup so the first query — and the demo UI's search-speed + badge — reflect warm performance, not the one-off lazy model load. + """ + self.status() # builds provider/store/vectors + self.provider.embed_query("warm up") # loads the model + JITs the query path + def status(self) -> dict: """Index statistics and provenance.""" stats = self.store.stats() diff --git a/coderag/surfaces/mcp_server.py b/coderag/surfaces/mcp_server.py index 5ae10cd..bd17ae3 100644 --- a/coderag/surfaces/mcp_server.py +++ b/coderag/surfaces/mcp_server.py @@ -212,8 +212,7 @@ def reindex(path: Optional[str] = None, full: bool = False) -> dict: def _warm_up(cr: "CodeRAG") -> None: """Load the engine + embedding model once at startup, not on the first query.""" try: - cr.status() # builds provider/store/vectors - cr.provider.embed_query("warm up") # loads the model and JITs the query path + cr.warm() except Exception: # pragma: no cover - warm-up is best-effort logger.exception("MCP warm-up failed (continuing).") diff --git a/coderag/surfaces/static/app.css b/coderag/surfaces/static/app.css index 32086b1..e8d4849 100644 --- a/coderag/surfaces/static/app.css +++ b/coderag/surfaces/static/app.css @@ -314,6 +314,10 @@ fieldset.field legend { padding: 0 0.3rem; color: var(--ink-3); font-family: var /* --- results --- */ .results-head { display: flex; align-items: center; gap: 0.6rem; margin: 0.4rem 0 1rem; } .results-head h2 { font-size: 1.02rem; color: var(--ink-2); font-weight: 650; margin: 0; } +/* demo-only: show how fast local retrieval was (separate from the AI answer) */ +.speed-badge { margin-left: auto; display: inline-flex; align-items: baseline; gap: 0.5rem; font-family: var(--mono); font-variant-numeric: tabular-nums; } +.speed-badge .speed-ms { font-size: 0.78rem; font-weight: 700; color: var(--accent-strong); background: var(--accent-soft); border-radius: 999px; padding: 0.1rem 0.55rem; white-space: nowrap; } +.speed-badge .speed-corpus { font-size: 0.72rem; color: var(--ink-3); } .results { list-style: none; margin: 0; padding: 0; display: flex; flex-direction: column; gap: 0.9rem; } .hit { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); diff --git a/coderag/surfaces/templates/base.html b/coderag/surfaces/templates/base.html index 86fba04..8283d4f 100644 --- a/coderag/surfaces/templates/base.html +++ b/coderag/surfaces/templates/base.html @@ -61,7 +61,7 @@ {% if demo %}
- Demo mode — search is unlimited; AI answers are capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between). + Demo mode — instant local hybrid search, unlimited. AI-generated answers are a separate, optional step, capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between).
{% endif %} {% block content %}{% endblock %} diff --git a/coderag/surfaces/templates/index.html b/coderag/surfaces/templates/index.html index 515ebb0..cb6beb3 100644 --- a/coderag/surfaces/templates/index.html +++ b/coderag/surfaces/templates/index.html @@ -62,7 +62,16 @@ {% endif %} -

{{ hits | length }} result{{ '' if hits | length == 1 else 's' }}

+
+

{{ hits | length }} result{{ '' if hits | length == 1 else 's' }}

+ {% if demo and search_ms is defined %} + + ⚡ {{ '%.0f' | format(search_ms) }} ms + {% if status %}over {{ status.total_chunks }} chunks · {{ status.total_files }} files{% endif %} + + {% endif %} +
{% include "_results.html" %} {% else %}
No results for {{ q }} — try fewer or broader terms, or relax the filters.
diff --git a/coderag/surfaces/webui.py b/coderag/surfaces/webui.py index 1b1fa19..a48fa46 100644 --- a/coderag/surfaces/webui.py +++ b/coderag/surfaces/webui.py @@ -192,16 +192,20 @@ def _run_search( langs: List[str], kinds: List[str], path: Optional[str], -) -> List[SearchHit]: +) -> Tuple[List[SearchHit], float]: """Search, then post-filter. Fetches extra candidates when filters are active. ``search`` has no server-side filtering, so to keep filtered results useful we pull a - larger candidate set and narrow it down to ``k`` here. + larger candidate set and narrow it down to ``k`` here. Also returns the wall-clock + retrieval time in milliseconds — timed around the ``.search()`` call only (not + filtering or highlighting) — so the demo UI can show how fast the index answers. """ filtering = bool(langs or kinds or path) fetch = max(k, 50) if filtering else k + t0 = time.perf_counter() hits = _searcher_for(cr, dense, lexical).search(query, fetch) - return _apply_filters(hits, langs, kinds, path)[:k] + elapsed_ms = (time.perf_counter() - t0) * 1000.0 + return _apply_filters(hits, langs, kinds, path)[:k], elapsed_ms # --- app factory --- @@ -368,7 +372,7 @@ def home( } ) if q and q.strip(): - hits = _run_search( + hits, search_ms = _run_search( cr, q.strip(), k, @@ -379,6 +383,7 @@ def home( path=path, ) ctx["hits"] = _hit_views(hits) + ctx["search_ms"] = search_ms ctx["answer_qs"] = urlencode({"q": q.strip(), "k": k}) resp = templates.TemplateResponse(request, "index.html", ctx) if demo: @@ -498,6 +503,10 @@ def healthz() -> Dict[str, str]: def run_ui(cr: "CodeRAG", host: str = "127.0.0.1", port: int = 8501) -> None: import uvicorn - # Warm the index/provider so the first request isn't slow. - cr.status() + # Warm the index, provider, AND embedding model so the first request — and the demo's + # search-speed badge — reflect warm performance, not the one-off lazy model load. + try: + cr.warm() + except Exception: # pragma: no cover - warm-up is best-effort + logger.exception("UI warm-up failed (continuing).") uvicorn.run(create_ui_app(cr), host=host, port=port) diff --git a/tests/test_webui.py b/tests/test_webui.py index 37e5b96..c391b14 100644 --- a/tests/test_webui.py +++ b/tests/test_webui.py @@ -49,6 +49,7 @@ def test_search_renders_highlighted_hits(ui): assert "auth.py" in r.text assert 'class="highlight"' in r.text # Pygments output present assert "/file?path=" in r.text # citation links into the file viewer + assert "data-search-ms=" not in r.text # speed badge is demo-only def test_filters_narrow_results(ui): @@ -153,6 +154,16 @@ def test_demo_mode_banner_caps_and_hidden_reindex(tmp_path): assert cr.store.total_chunks() == n0 +def test_demo_mode_shows_search_speed_badge(tmp_path): + cr, client = _demo_client(tmp_path) + r = client.get("/", params={"q": "authenticate"}) + # Demo mode surfaces retrieval speed and frames it as separate from AI answers. + assert "data-search-ms=" in r.text + assert "instant local" in r.text.lower() # reworded demo banner + # The empty landing (no query, no results) shows no badge. + assert "data-search-ms=" not in client.get("/").text + + def test_demo_answer_quota_is_enforced(tmp_path): # No LLM backend → each allowed answer streams an "unavailable" notice, but it # still charges the soft per-session quota (the gate charges on attempt).