Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions coderag/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,15 @@ def delete_path(self, path: Union[str, Path]) -> int:
self.vectors.save()
return len(removed)

def warm(self) -> None:
"""Eagerly load the provider, store, vectors, and embedding model.

Done at server startup so the first query — and the demo UI's search-speed
badge — reflect warm performance, not the one-off lazy model load.
"""
self.status() # builds provider/store/vectors
self.provider.embed_query("warm up") # loads the model + JITs the query path

def status(self) -> dict:
"""Index statistics and provenance."""
stats = self.store.stats()
Expand Down
3 changes: 1 addition & 2 deletions coderag/surfaces/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,7 @@ def reindex(path: Optional[str] = None, full: bool = False) -> dict:
def _warm_up(cr: "CodeRAG") -> None:
"""Load the engine + embedding model once at startup, not on the first query."""
try:
cr.status() # builds provider/store/vectors
cr.provider.embed_query("warm up") # loads the model and JITs the query path
cr.warm()
except Exception: # pragma: no cover - warm-up is best-effort
logger.exception("MCP warm-up failed (continuing).")

Expand Down
4 changes: 4 additions & 0 deletions coderag/surfaces/static/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,10 @@ fieldset.field legend { padding: 0 0.3rem; color: var(--ink-3); font-family: var
/* --- results --- */
.results-head { display: flex; align-items: center; gap: 0.6rem; margin: 0.4rem 0 1rem; }
.results-head h2 { font-size: 1.02rem; color: var(--ink-2); font-weight: 650; margin: 0; }
/* demo-only: show how fast local retrieval was (separate from the AI answer) */
.speed-badge { margin-left: auto; display: inline-flex; align-items: baseline; gap: 0.5rem; font-family: var(--mono); font-variant-numeric: tabular-nums; }
.speed-badge .speed-ms { font-size: 0.78rem; font-weight: 700; color: var(--accent-strong); background: var(--accent-soft); border-radius: 999px; padding: 0.1rem 0.55rem; white-space: nowrap; }
.speed-badge .speed-corpus { font-size: 0.72rem; color: var(--ink-3); }
.results { list-style: none; margin: 0; padding: 0; display: flex; flex-direction: column; gap: 0.9rem; }
.hit {
background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius);
Expand Down
2 changes: 1 addition & 1 deletion coderag/surfaces/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
{% if demo %}
<div class="demo-banner">
<span class="demo-dot" aria-hidden="true"></span>
<span><strong>Demo mode</strong> — search is unlimited; AI answers are capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between).</span>
<span><strong>Demo mode</strong> — instant local hybrid search, unlimited. AI-generated answers are a separate, optional step, capped at {{ demo_left }}/{{ demo_max }} this session ({{ demo_cooldown }}s between).</span>
</div>
{% endif %}
{% block content %}{% endblock %}
Expand Down
11 changes: 10 additions & 1 deletion coderag/surfaces/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,16 @@
<pre id="answer" class="answer" hidden></pre>
</section>
{% endif %}
<div class="results-head"><h2>{{ hits | length }} result{{ '' if hits | length == 1 else 's' }}</h2></div>
<div class="results-head">
<h2>{{ hits | length }} result{{ '' if hits | length == 1 else 's' }}</h2>
{% if demo and search_ms is defined %}
<span class="speed-badge" data-search-ms="{{ '%.0f' | format(search_ms) }}"
title="Local hybrid retrieval time — separate from the optional AI answer">
<span class="speed-ms">⚡ {{ '%.0f' | format(search_ms) }} ms</span>
{% if status %}<span class="speed-corpus">over {{ status.total_chunks }} chunks · {{ status.total_files }} files</span>{% endif %}
</span>
{% endif %}
</div>
{% include "_results.html" %}
{% else %}
<div class="notice">No results for <strong>{{ q }}</strong> — try fewer or broader terms, or relax the filters.</div>
Expand Down
21 changes: 15 additions & 6 deletions coderag/surfaces/webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,16 +192,20 @@ def _run_search(
langs: List[str],
kinds: List[str],
path: Optional[str],
) -> List[SearchHit]:
) -> Tuple[List[SearchHit], float]:
"""Search, then post-filter. Fetches extra candidates when filters are active.

``search`` has no server-side filtering, so to keep filtered results useful we pull a
larger candidate set and narrow it down to ``k`` here.
larger candidate set and narrow it down to ``k`` here. Also returns the wall-clock
retrieval time in milliseconds — timed around the ``.search()`` call only (not
filtering or highlighting) — so the demo UI can show how fast the index answers.
"""
filtering = bool(langs or kinds or path)
fetch = max(k, 50) if filtering else k
t0 = time.perf_counter()
hits = _searcher_for(cr, dense, lexical).search(query, fetch)
return _apply_filters(hits, langs, kinds, path)[:k]
elapsed_ms = (time.perf_counter() - t0) * 1000.0
return _apply_filters(hits, langs, kinds, path)[:k], elapsed_ms


# --- app factory ---
Expand Down Expand Up @@ -368,7 +372,7 @@ def home(
}
)
if q and q.strip():
hits = _run_search(
hits, search_ms = _run_search(
cr,
q.strip(),
k,
Expand All @@ -379,6 +383,7 @@ def home(
path=path,
)
ctx["hits"] = _hit_views(hits)
ctx["search_ms"] = search_ms
ctx["answer_qs"] = urlencode({"q": q.strip(), "k": k})
resp = templates.TemplateResponse(request, "index.html", ctx)
if demo:
Expand Down Expand Up @@ -498,6 +503,10 @@ def healthz() -> Dict[str, str]:
def run_ui(cr: "CodeRAG", host: str = "127.0.0.1", port: int = 8501) -> None:
import uvicorn

# Warm the index/provider so the first request isn't slow.
cr.status()
# Warm the index, provider, AND embedding model so the first request — and the demo's
# search-speed badge — reflect warm performance, not the one-off lazy model load.
try:
cr.warm()
except Exception: # pragma: no cover - warm-up is best-effort
logger.exception("UI warm-up failed (continuing).")
uvicorn.run(create_ui_app(cr), host=host, port=port)
11 changes: 11 additions & 0 deletions tests/test_webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_search_renders_highlighted_hits(ui):
assert "auth.py" in r.text
assert 'class="highlight"' in r.text # Pygments output present
assert "/file?path=" in r.text # citation links into the file viewer
assert "data-search-ms=" not in r.text # speed badge is demo-only


def test_filters_narrow_results(ui):
Expand Down Expand Up @@ -153,6 +154,16 @@ def test_demo_mode_banner_caps_and_hidden_reindex(tmp_path):
assert cr.store.total_chunks() == n0


def test_demo_mode_shows_search_speed_badge(tmp_path):
cr, client = _demo_client(tmp_path)
r = client.get("/", params={"q": "authenticate"})
# Demo mode surfaces retrieval speed and frames it as separate from AI answers.
assert "data-search-ms=" in r.text
assert "instant local" in r.text.lower() # reworded demo banner
# The empty landing (no query, no results) shows no badge.
assert "data-search-ms=" not in client.get("/").text


def test_demo_answer_quota_is_enforced(tmp_path):
# No LLM backend → each allowed answer streams an "unavailable" notice, but it
# still charges the soft per-session quota (the gate charges on attempt).
Expand Down
Loading