From 64c1b5f3eb9498bbd4440829e172c2899c13ae62 Mon Sep 17 00:00:00 2001
From: Captain <ogtradewithmeai@gmail.com>
Date: Sun, 24 May 2026 23:03:50 +0100
Subject: [PATCH 1/2] feat: add POST /search endpoint to HTTP server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exposes the hybrid retrieval pipeline as a single HTTP endpoint,
enabling custom Python agents to query CCE without subprocess management.

The HTTP server previously only exposed /ingest and /health — no query
surface at all. This adds /search as a thin wrapper around the existing
HybridRetriever pipeline (the same path used by the context_search MCP tool).

Accepts: {"query": "...", "top_k": 10, "confidence_threshold": 0.2}
Returns: ranked chunks with file_path, line range, content, confidence_score
---
 src/context_engine/serve_http.py | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/context_engine/serve_http.py b/src/context_engine/serve_http.py
index 08fa9a4..c894f9f 100644
--- a/src/context_engine/serve_http.py
+++ b/src/context_engine/serve_http.py
@@ -14,6 +14,7 @@
 from context_engine.storage.local_backend import LocalBackend
 from context_engine.indexer.embedder import Embedder
 from context_engine.compression.compressor import Compressor
+from context_engine.retrieval.retriever import HybridRetriever
 from context_engine.models import Chunk, ChunkType, GraphNode, GraphEdge, NodeType, EdgeType
 
 try:
@@ -34,6 +35,7 @@ def __init__(self, backend: LocalBackend, embedder: Embedder, compressor: Compre
         self.backend = backend
         self.embedder = embedder
         self.compressor = compressor
+        self.retriever = HybridRetriever(backend=backend, embedder=embedder)
 
     async def handle_vector_search(self, request: web.Request) -> web.Response:
         data = await _read_json(request)
@@ -88,6 +90,35 @@ async def handle_delete_file(self, request: web.Request) -> web.Response:
         await self.backend.delete_by_file(file_path)
         return web.json_response({"ok": True})
 
+    async def handle_search(self, request: web.Request) -> web.Response:
+        data = await _read_json(request)
+        query = (data.get("query") or "").strip()
+        if not query:
+            return web.json_response({"error": "query cannot be empty"}, status=400)
+        top_k = int(data.get("top_k", 10))
+        confidence_threshold = float(data.get("confidence_threshold", 0.2))
+        chunks = await self.retriever.retrieve(
+            query,
+            top_k=top_k,
+            confidence_threshold=confidence_threshold,
+        )
+        return web.json_response({
+            "results": [
+                {
+                    "id": c.id,
+                    "file_path": c.file_path,
+                    "start_line": c.start_line,
+                    "end_line": c.end_line,
+                    "content": c.content,
+                    "chunk_type": c.chunk_type.value,
+                    "language": c.language,
+                    "confidence_score": getattr(c, "confidence_score", None),
+                    "metadata": c.metadata,
+                }
+                for c in chunks
+            ]
+        })
+
     async def handle_health(self, request: web.Request) -> web.Response:
         return web.json_response({"status": "ok"})
 
@@ -171,6 +202,7 @@ def create_app(backend, embedder, compressor, *, api_token: str | None = None) -
         middlewares=[_make_auth_middleware(api_token), _error_middleware],
     )
     app.router.add_get("/health", handler.handle_health)
+    app.router.add_post("/search", handler.handle_search)
     app.router.add_post("/vector_search", handler.handle_vector_search)
     app.router.add_post("/fts_search", handler.handle_fts_search)
     app.router.add_post("/chunks_by_ids", handler.handle_chunks_by_ids)

From 26f850bd749a900eff902e871af811378e102af0 Mon Sep 17 00:00:00 2001
From: Captain <ogtradewithmeai@gmail.com>
Date: Sun, 21 Jun 2026 03:00:45 +0100
Subject: [PATCH 2/2] Harden POST /search input handling

Address review feedback on the new endpoint: clamp top_k (1-100) and
confidence_threshold (0.0-1.0) and return 400 on non-numeric input instead of
raising; cap query length at 10,000 chars to match the MCP server's guard; and
note that /search does not record token savings (unlike the MCP context_search
handler).
---
 src/context_engine/serve_http.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/context_engine/serve_http.py b/src/context_engine/serve_http.py
index c894f9f..a55e0ba 100644
--- a/src/context_engine/serve_http.py
+++ b/src/context_engine/serve_http.py
@@ -27,6 +27,9 @@
 
 
 _MAX_REQUEST_BYTES = 10 * 1024 * 1024  # 10 MB — generous for bulk ingest, not unbounded
+# Mirror the MCP server's guard (mcp_server._MAX_QUERY_CHARS) so a buggy or
+# malicious client can't submit a multi-MB query string for embedding.
+_MAX_QUERY_CHARS = 10_000
 _LOOPBACK_HOSTS = {"127.0.0.1", "::1", "localhost"}
 
 
@@ -95,8 +98,24 @@ async def handle_search(self, request: web.Request) -> web.Response:
         query = (data.get("query") or "").strip()
         if not query:
             return web.json_response({"error": "query cannot be empty"}, status=400)
-        top_k = int(data.get("top_k", 10))
-        confidence_threshold = float(data.get("confidence_threshold", 0.2))
+        if len(query) > _MAX_QUERY_CHARS:
+            return web.json_response(
+                {"error": f"query too long (max {_MAX_QUERY_CHARS} characters)"},
+                status=400,
+            )
+        # Validate + clamp: non-numeric input would otherwise raise ValueError and
+        # surface as a 400 "missing field" via the generic handler; clamp to the same
+        # ranges the MCP context_search handler uses.
+        try:
+            top_k = max(1, min(int(data.get("top_k", 10)), 100))
+            confidence_threshold = max(0.0, min(float(data.get("confidence_threshold", 0.2)), 1.0))
+        except (TypeError, ValueError):
+            return web.json_response(
+                {"error": "top_k must be an int and confidence_threshold a float"},
+                status=400,
+            )
+        # NOTE: unlike the MCP context_search handler, this endpoint does not call
+        # _record(), so queries via /search are not reflected in `cce savings`.
         chunks = await self.retriever.retrieve(
             query,
             top_k=top_k,