diff --git a/coderag/api.py b/coderag/api.py index f306da3..45f8845 100644 --- a/coderag/api.py +++ b/coderag/api.py @@ -196,13 +196,23 @@ def delete_path(self, path: Union[str, Path]) -> int: return self.store.delete_file(rel) def warm(self) -> None: - """Eagerly load the provider, store, and embedding model. + """Eagerly load the provider, store, and embedding model — and the search path. Done at server startup so the first query — and the demo UI's search-speed - badge — reflect warm performance, not the one-off lazy model load. + badge — reflect warm performance, not the one-off lazy load. A real search is + run (not just an embed) because the store's vector/FTS/scalar indexes and + LanceDB's query path are loaded lazily on first use; warming only the model + leaves that cold-load to land on the first user query, where it shows up as a + large ``store_ms``. Best-effort: warm-up failures must not block startup. """ self.status() # builds provider/store self.provider.embed_query("warm up") # loads the model + JITs the query path + try: + # Exercise the full retrieval path (vector + lexical + hydrate) so the + # store's indexes are resident before the first real query. + self.search("warm up", top_k=1) + except Exception: # pragma: no cover - warm-up is best-effort + logger.exception("Search warm-up failed (continuing).") def status(self) -> dict: """Index statistics and provenance."""