From 68a6127df42a6feea5601b29d289d552373b4b6f Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 19 Jun 2026 11:13:39 +0000
Subject: [PATCH] perf(warm): warm the full search path at startup, not just
 the model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

warm() ran status() + embed_query(), so the store's vector/FTS/scalar indexes
and LanceDB's query path stayed cold until the first real query — which then
paid the entire index-load cost. With the new badge breakdown this is visible
as a large store_ms (e.g. embed 26ms vs store 363ms over 548 chunks on the
demo) while embed is already warm.

Run one representative search() in warm() so the retrieval indexes are resident
before the first user query. Measured locally (~550 chunks): first-query store
drops from ~35ms to ~14ms; on a slower deployed host the cold-load is far larger.
Best-effort and guarded so warm-up can never block startup.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01Y1DfHPqxHppXF6zEYgFKi3
---
 coderag/api.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/coderag/api.py b/coderag/api.py
index f306da3..45f8845 100644
--- a/coderag/api.py
+++ b/coderag/api.py
@@ -196,13 +196,23 @@ def delete_path(self, path: Union[str, Path]) -> int:
             return self.store.delete_file(rel)
 
     def warm(self) -> None:
-        """Eagerly load the provider, store, and embedding model.
+        """Eagerly load the provider, store, and embedding model — and the search path.
 
         Done at server startup so the first query — and the demo UI's search-speed
-        badge — reflect warm performance, not the one-off lazy model load.
+        badge — reflect warm performance, not the one-off lazy load. A real search is
+        run (not just an embed) because the store's vector/FTS/scalar indexes and
+        LanceDB's query path are loaded lazily on first use; warming only the model
+        leaves that cold-load to land on the first user query, where it shows up as a
+        large ``store_ms``. Best-effort: warm-up failures must not block startup.
         """
         self.status()  # builds provider/store
         self.provider.embed_query("warm up")  # loads the model + JITs the query path
+        try:
+            # Exercise the full retrieval path (vector + lexical + hydrate) so the
+            # store's indexes are resident before the first real query.
+            self.search("warm up", top_k=1)
+        except Exception:  # pragma: no cover - warm-up is best-effort
+            logger.exception("Search warm-up failed (continuing).")
 
     def status(self) -> dict:
         """Index statistics and provenance."""