From 64c1b5f3eb9498bbd4440829e172c2899c13ae62 Mon Sep 17 00:00:00 2001 From: Captain Date: Sun, 24 May 2026 23:03:50 +0100 Subject: [PATCH 1/2] feat: add POST /search endpoint to HTTP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exposes the hybrid retrieval pipeline as a single HTTP endpoint, enabling custom Python agents to query CCE without subprocess management. The HTTP server previously only exposed /ingest and /health — no query surface at all. This adds /search as a thin wrapper around the existing HybridRetriever pipeline (the same path used by the context_search MCP tool). Accepts: {"query": "...", "top_k": 10, "confidence_threshold": 0.2} Returns: ranked chunks with file_path, line range, content, confidence_score --- src/context_engine/serve_http.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/context_engine/serve_http.py b/src/context_engine/serve_http.py index 08fa9a4..c894f9f 100644 --- a/src/context_engine/serve_http.py +++ b/src/context_engine/serve_http.py @@ -14,6 +14,7 @@ from context_engine.storage.local_backend import LocalBackend from context_engine.indexer.embedder import Embedder from context_engine.compression.compressor import Compressor +from context_engine.retrieval.retriever import HybridRetriever from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType try: @@ -34,6 +35,7 @@ def __init__(self, backend: LocalBackend, embedder: Embedder, compressor: Compre self.backend = backend self.embedder = embedder self.compressor = compressor + self.retriever = HybridRetriever(backend=backend, embedder=embedder) async def handle_vector_search(self, request: web.Request) -> web.Response: data = await _read_json(request) @@ -88,6 +90,35 @@ async def handle_delete_file(self, request: web.Request) -> web.Response: await self.backend.delete_by_file(file_path) return web.json_response({"ok": True}) + async def handle_search(self, request: web.Request) -> web.Response: + data = await _read_json(request) + query = (data.get("query") or "").strip() + if not query: + return web.json_response({"error": "query cannot be empty"}, status=400) + top_k = int(data.get("top_k", 10)) + confidence_threshold = float(data.get("confidence_threshold", 0.2)) + chunks = await self.retriever.retrieve( + query, + top_k=top_k, + confidence_threshold=confidence_threshold, + ) + return web.json_response({ + "results": [ + { + "id": c.id, + "file_path": c.file_path, + "start_line": c.start_line, + "end_line": c.end_line, + "content": c.content, + "chunk_type": c.chunk_type.value, + "language": c.language, + "confidence_score": getattr(c, "confidence_score", None), + "metadata": c.metadata, + } + for c in chunks + ] + }) + async def handle_health(self, request: web.Request) -> web.Response: return web.json_response({"status": "ok"}) @@ -171,6 +202,7 @@ def create_app(backend, embedder, compressor, *, api_token: str | None = None) - middlewares=[_make_auth_middleware(api_token), _error_middleware], ) app.router.add_get("/health", handler.handle_health) + app.router.add_post("/search", handler.handle_search) app.router.add_post("/vector_search", handler.handle_vector_search) app.router.add_post("/fts_search", handler.handle_fts_search) app.router.add_post("/chunks_by_ids", handler.handle_chunks_by_ids) From 26f850bd749a900eff902e871af811378e102af0 Mon Sep 17 00:00:00 2001 From: Captain Date: Sun, 21 Jun 2026 03:00:45 +0100 Subject: [PATCH 2/2] Harden POST /search input handling Address review feedback on the new endpoint: clamp top_k (1-100) and confidence_threshold (0.0-1.0) and return 400 on non-numeric input instead of raising; cap query length at 10,000 chars to match the MCP server's guard; and note that /search does not record token savings (unlike the MCP context_search handler). --- src/context_engine/serve_http.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/context_engine/serve_http.py b/src/context_engine/serve_http.py index c894f9f..a55e0ba 100644 --- a/src/context_engine/serve_http.py +++ b/src/context_engine/serve_http.py @@ -27,6 +27,9 @@ _MAX_REQUEST_BYTES = 10 * 1024 * 1024 # 10 MB — generous for bulk ingest, not unbounded +# Mirror the MCP server's guard (mcp_server._MAX_QUERY_CHARS) so a buggy or +# malicious client can't submit a multi-MB query string for embedding. +_MAX_QUERY_CHARS = 10_000 _LOOPBACK_HOSTS = {"127.0.0.1", "::1", "localhost"} @@ -95,8 +98,24 @@ async def handle_search(self, request: web.Request) -> web.Response: query = (data.get("query") or "").strip() if not query: return web.json_response({"error": "query cannot be empty"}, status=400) - top_k = int(data.get("top_k", 10)) - confidence_threshold = float(data.get("confidence_threshold", 0.2)) + if len(query) > _MAX_QUERY_CHARS: + return web.json_response( + {"error": f"query too long (max {_MAX_QUERY_CHARS} characters)"}, + status=400, + ) + # Validate + clamp: non-numeric input would otherwise raise ValueError and + # surface as a 400 "missing field" via the generic handler; clamp to the same + # ranges the MCP context_search handler uses. + try: + top_k = max(1, min(int(data.get("top_k", 10)), 100)) + confidence_threshold = max(0.0, min(float(data.get("confidence_threshold", 0.2)), 1.0)) + except (TypeError, ValueError): + return web.json_response( + {"error": "top_k must be an int and confidence_threshold a float"}, + status=400, + ) + # NOTE: unlike the MCP context_search handler, this endpoint does not call + # _record(), so queries via /search are not reflected in `cce savings`. chunks = await self.retriever.retrieve( query, top_k=top_k,