diff --git a/coderag/api.py b/coderag/api.py
index 497993d..8be79b9 100644
--- a/coderag/api.py
+++ b/coderag/api.py
@@ -174,6 +174,15 @@ def delete_path(self, path: Union[str, Path]) -> int:
self.vectors.save()
return len(removed)
+ def warm(self) -> None:
+ """Eagerly load the provider, store, vectors, and embedding model.
+
+ Done at server startup so the first query — and the demo UI's search-speed
+ badge — reflect warm performance, not the one-off lazy model load.
+ """
+ self.status() # builds provider/store/vectors
+ self.provider.embed_query("warm up") # loads the model + JITs the query path
+
def status(self) -> dict:
"""Index statistics and provenance."""
stats = self.store.stats()
diff --git a/coderag/surfaces/mcp_server.py b/coderag/surfaces/mcp_server.py
index 5ae10cd..bd17ae3 100644
--- a/coderag/surfaces/mcp_server.py
+++ b/coderag/surfaces/mcp_server.py
@@ -212,8 +212,7 @@ def reindex(path: Optional[str] = None, full: bool = False) -> dict:
def _warm_up(cr: "CodeRAG") -> None:
"""Load the engine + embedding model once at startup, not on the first query."""
try:
- cr.status() # builds provider/store/vectors
- cr.provider.embed_query("warm up") # loads the model and JITs the query path
+ cr.warm()
except Exception: # pragma: no cover - warm-up is best-effort
logger.exception("MCP warm-up failed (continuing).")
diff --git a/coderag/surfaces/static/app.css b/coderag/surfaces/static/app.css
index 32086b1..e8d4849 100644
--- a/coderag/surfaces/static/app.css
+++ b/coderag/surfaces/static/app.css
@@ -314,6 +314,10 @@ fieldset.field legend { padding: 0 0.3rem; color: var(--ink-3); font-family: var
/* --- results --- */
.results-head { display: flex; align-items: center; gap: 0.6rem; margin: 0.4rem 0 1rem; }
.results-head h2 { font-size: 1.02rem; color: var(--ink-2); font-weight: 650; margin: 0; }
+/* demo-only: show how fast local retrieval was (separate from the AI answer) */
+.speed-badge { margin-left: auto; display: inline-flex; align-items: baseline; gap: 0.5rem; font-family: var(--mono); font-variant-numeric: tabular-nums; }
+.speed-badge .speed-ms { font-size: 0.78rem; font-weight: 700; color: var(--accent-strong); background: var(--accent-soft); border-radius: 999px; padding: 0.1rem 0.55rem; white-space: nowrap; }
+.speed-badge .speed-corpus { font-size: 0.72rem; color: var(--ink-3); }
.results { list-style: none; margin: 0; padding: 0; display: flex; flex-direction: column; gap: 0.9rem; }
.hit {
background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius);
diff --git a/coderag/surfaces/templates/base.html b/coderag/surfaces/templates/base.html
index 86fba04..8283d4f 100644
--- a/coderag/surfaces/templates/base.html
+++ b/coderag/surfaces/templates/base.html
@@ -61,7 +61,7 @@
{% if demo %}
- Demo mode — search is unlimited; AI answers are capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between).
+ Demo mode — instant local hybrid search, unlimited. AI-generated answers are a separate, optional step, capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between).
{% endif %}
{% block content %}{% endblock %}
diff --git a/coderag/surfaces/templates/index.html b/coderag/surfaces/templates/index.html
index 515ebb0..cb6beb3 100644
--- a/coderag/surfaces/templates/index.html
+++ b/coderag/surfaces/templates/index.html
@@ -62,7 +62,16 @@
{% endif %}
- {{ hits | length }} result{{ '' if hits | length == 1 else 's' }}
+
+
{{ hits | length }} result{{ '' if hits | length == 1 else 's' }}
+ {% if demo and search_ms is defined %}
+
+ ⚡ {{ '%.0f' | format(search_ms) }} ms
+ {% if status %}over {{ status.total_chunks }} chunks · {{ status.total_files }} files{% endif %}
+
+ {% endif %}
+
{% include "_results.html" %}
{% else %}
No results for {{ q }} — try fewer or broader terms, or relax the filters.
diff --git a/coderag/surfaces/webui.py b/coderag/surfaces/webui.py
index 1b1fa19..a48fa46 100644
--- a/coderag/surfaces/webui.py
+++ b/coderag/surfaces/webui.py
@@ -192,16 +192,20 @@ def _run_search(
langs: List[str],
kinds: List[str],
path: Optional[str],
-) -> List[SearchHit]:
+) -> Tuple[List[SearchHit], float]:
"""Search, then post-filter. Fetches extra candidates when filters are active.
``search`` has no server-side filtering, so to keep filtered results useful we pull a
- larger candidate set and narrow it down to ``k`` here.
+ larger candidate set and narrow it down to ``k`` here. Also returns the wall-clock
+ retrieval time in milliseconds — timed around the ``.search()`` call only (not
+ filtering or highlighting) — so the demo UI can show how fast the index answers.
"""
filtering = bool(langs or kinds or path)
fetch = max(k, 50) if filtering else k
+ t0 = time.perf_counter()
hits = _searcher_for(cr, dense, lexical).search(query, fetch)
- return _apply_filters(hits, langs, kinds, path)[:k]
+ elapsed_ms = (time.perf_counter() - t0) * 1000.0
+ return _apply_filters(hits, langs, kinds, path)[:k], elapsed_ms
# --- app factory ---
@@ -368,7 +372,7 @@ def home(
}
)
if q and q.strip():
- hits = _run_search(
+ hits, search_ms = _run_search(
cr,
q.strip(),
k,
@@ -379,6 +383,7 @@ def home(
path=path,
)
ctx["hits"] = _hit_views(hits)
+ ctx["search_ms"] = search_ms
ctx["answer_qs"] = urlencode({"q": q.strip(), "k": k})
resp = templates.TemplateResponse(request, "index.html", ctx)
if demo:
@@ -498,6 +503,10 @@ def healthz() -> Dict[str, str]:
def run_ui(cr: "CodeRAG", host: str = "127.0.0.1", port: int = 8501) -> None:
import uvicorn
- # Warm the index/provider so the first request isn't slow.
- cr.status()
+ # Warm the index, provider, AND embedding model so the first request — and the demo's
+ # search-speed badge — reflect warm performance, not the one-off lazy model load.
+ try:
+ cr.warm()
+ except Exception: # pragma: no cover - warm-up is best-effort
+ logger.exception("UI warm-up failed (continuing).")
uvicorn.run(create_ui_app(cr), host=host, port=port)
diff --git a/tests/test_webui.py b/tests/test_webui.py
index 37e5b96..c391b14 100644
--- a/tests/test_webui.py
+++ b/tests/test_webui.py
@@ -49,6 +49,7 @@ def test_search_renders_highlighted_hits(ui):
assert "auth.py" in r.text
assert 'class="highlight"' in r.text # Pygments output present
assert "/file?path=" in r.text # citation links into the file viewer
+ assert "data-search-ms=" not in r.text # speed badge is demo-only
def test_filters_narrow_results(ui):
@@ -153,6 +154,16 @@ def test_demo_mode_banner_caps_and_hidden_reindex(tmp_path):
assert cr.store.total_chunks() == n0
+def test_demo_mode_shows_search_speed_badge(tmp_path):
+ cr, client = _demo_client(tmp_path)
+ r = client.get("/", params={"q": "authenticate"})
+ # Demo mode surfaces retrieval speed and frames it as separate from AI answers.
+ assert "data-search-ms=" in r.text
+ assert "instant local" in r.text.lower() # reworded demo banner
+ # The empty landing (no query, no results) shows no badge.
+ assert "data-search-ms=" not in client.get("/").text
+
+
def test_demo_answer_quota_is_enforced(tmp_path):
# No LLM backend → each allowed answer streams an "unavailable" notice, but it
# still charges the soft per-session quota (the gate charges on attempt).