FalkorDB · DvirDukhan · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/.gitignore b/.gitignore
@@ -58,3 +58,4 @@ htmlcov/
 pytest_cache/
 *.log
 repositories/
+logs/
diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py
@@ -57,6 +57,11 @@ def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: P
             locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column)
             return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files]
         except Exception as e:
+            import logging
+            logging.getLogger(__name__).warning(
+                "resolve() failed for %s @%d:%d: %s",
+                file_path, node.start_point.row, node.start_point.column, e,
+            )
             return []
 
     @abstractmethod

diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
@@ -134,7 +134,27 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
         else:
             lsps[".java"] = NullLanguageServer()
         if any(path.rglob('*.py')):
-            config = MultilspyConfig.from_dict({"code_language": "python", "environment_path": f"{path}/venv"})
+            import sys
+            py_venv = path / "venv"
+            py_dotvenv = path / ".venv"
+            if py_venv.is_dir() and (py_venv / "bin" / "python").exists():
+                env_path = str(py_venv)
+            elif py_dotvenv.is_dir() and (py_dotvenv / "bin" / "python").exists():
+                env_path = str(py_dotvenv)
+            else:
+                # Fall back to the host's Python environment so jedi has a
+                # valid interpreter to introspect; otherwise every
+                # request_definition() raises InvalidPythonEnvironment and
+                # we'd silently produce a graph with zero CALLS edges.
+                env_path = str(Path(sys.executable).resolve().parent.parent)
+                logging.info(
+                    "No venv at %s; falling back to host env %s for jedi LSP",
+                    path, env_path,
+                )
+            config = MultilspyConfig.from_dict({
+                "code_language": "python",
+                "environment_path": env_path,
+            })
             lsps[".py"] = SyncLanguageServer.create(config, logger, str(path))
         else:
             lsps[".py"] = NullLanguageServer()
@@ -146,7 +166,16 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
         with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
             files_len = len(self.files)
             for i, file_path in enumerate(files):
-                file = self.files[file_path]
+                file = self.files.get(file_path)
+                if file is None:
+                    # first_pass skipped this file (e.g. parse error, empty,
+                    # or ignored after entering the candidate list). Skip
+                    # in second_pass too instead of crashing the whole index.
+                    logging.warning(
+                        "second_pass: %s not in files map (first_pass skipped it); skipping",
+                        file_path,
+                    )
+                    continue
                 logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}')
                 for _, entity in file.entities.items():
                     entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol))

diff --git a/bench/agents/code_graph_mcp_adapter.py b/bench/agents/code_graph_mcp_adapter.py
@@ -0,0 +1,163 @@
+"""MCP-transport adapter to cgraph-mcp for the benchmark.
+
+Sibling of `code_graph_adapter.py` (HTTP). Where the HTTP adapter talks
+to the host FastAPI service over the network, this one spawns the
+`cgraph-mcp` stdio MCP server in-process via the official MCP Python
+SDK and dispatches tool calls over JSON-RPC.
+
+This gives us a second, real-world benchmark track that exercises the
+exact same transport agents (Claude Code, Cursor, …) will use in
+production. Tool names match the 8-tool MCP surface
+(`index_repo`, `search_code`, `get_callers`, `get_callees`,
+`get_dependencies`, `impact_analysis`, `find_path`, `ask`).
+
+Each call spawns a fresh server, runs the call, and exits. That's
+~0.5-1s overhead per call but keeps the model trivially safe to call
+from a bash shim (one process per invocation, no shared state).
+A future optimisation could persist the server across calls via a
+side-channel daemon, but per-call spawn matches how external agents
+actually use MCP servers today.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+from typing import Any
+
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+
+DEFAULT_TIMEOUT_SEC = 60.0
+
+
+def _env_for_mcp() -> dict[str, str]:
+    """Build the env for the spawned cgraph-mcp process.
+
+    Pass through everything from the caller but make sure the FalkorDB
+    coordinates are present — the runner usually sets them to point at
+    the host FalkorDB container.
+    """
+    env = dict(os.environ)
+    env.setdefault("FALKORDB_HOST", os.environ.get("FALKORDB_HOST", "127.0.0.1"))
+    env.setdefault("FALKORDB_PORT", os.environ.get("FALKORDB_PORT", "6379"))
+    return env
+
+
+def _extract(result: Any) -> Any:
+    """Normalize a CallToolResult into a JSON-serialisable Python value.
+
+    The MCP spec lets servers put the payload in `structuredContent`
+    and/or echo it as a JSON text chunk. Our 8 tools do both; agents
+    have historically preferred the text payload. We mirror that:
+    return the parsed text chunk when present, otherwise fall back to
+    structuredContent (unwrapping the spec's `{"result": ...}` wrapper
+    for collection-returning tools).
+    """
+    for chunk in result.content:
+        if hasattr(chunk, "text") and chunk.text:
+            try:
+                return json.loads(chunk.text)
+            except json.JSONDecodeError:
+                return chunk.text
+    struct = getattr(result, "structuredContent", None)
+    if isinstance(struct, dict) and set(struct.keys()) == {"result"}:
+        return struct["result"]
+    return struct
+
+
+async def _call_tool_async(name: str, arguments: dict[str, Any], timeout: float) -> Any:
+    params = StdioServerParameters(command="cgraph-mcp", args=[], env=_env_for_mcp())
+    async with stdio_client(params) as (read, write):
+        async with ClientSession(read, write) as session:
+            await asyncio.wait_for(session.initialize(), timeout=timeout)
+            result = await asyncio.wait_for(
+                session.call_tool(name, arguments), timeout=timeout
+            )
+            payload = _extract(result)
+            if getattr(result, "isError", False):
+                return {"error": payload}
+            return payload
+
+
+def call_tool(name: str, arguments: dict[str, Any], *, timeout: float = DEFAULT_TIMEOUT_SEC) -> Any:
+    """Sync entry point for the bash shim. One spawn per call."""
+    return asyncio.run(_call_tool_async(name, arguments, timeout))
+
+
+# ── Top-level convenience wrappers ─────────────────────────────────────
+# Names map 1:1 onto MCP tool names (and onto bench/tools/code_graph_mcp/
+# tools.yaml entries). Kwargs mirror each tool's MCP arg schema.
+
+
+def index_repo(path_or_url: str, branch: str | None = None, ignore: list[str] | None = None) -> dict[str, Any]:
+    args: dict[str, Any] = {"path_or_url": path_or_url}
+    if branch is not None:
+        args["branch"] = branch
+    if ignore is not None:
+        args["ignore"] = ignore
+    return call_tool("index_repo", args)
+
+
+def search_code(prefix: str, project: str, branch: str | None = None, limit: int = 10) -> Any:
+    args: dict[str, Any] = {"prefix": prefix, "project": project, "limit": limit}
+    if branch is not None:
+        args["branch"] = branch
+    return call_tool("search_code", args)
+
+
+def _neighbors(tool: str, symbol_id: int, project: str, branch: str | None, limit: int) -> Any:
+    args: dict[str, Any] = {"symbol_id": symbol_id, "project": project, "limit": limit}
+    if branch is not None:
+        args["branch"] = branch
+    return call_tool(tool, args)
+
+
+def get_callers(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any:
+    return _neighbors("get_callers", symbol_id, project, branch, limit)
+
+
+def get_callees(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any:
+    return _neighbors("get_callees", symbol_id, project, branch, limit)
+
+
+def get_dependencies(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any:
+    return _neighbors("get_dependencies", symbol_id, project, branch, limit)
+
+
+def impact_analysis(
+    symbol_id: int,
+    project: str,
+    branch: str | None = None,
+    direction: str = "IN",
+    depth: int = 3,
+) -> Any:
+    args: dict[str, Any] = {
+        "symbol_id": symbol_id,
+        "project": project,
+        "direction": direction,
+        "depth": depth,
+    }
+    if branch is not None:
+        args["branch"] = branch
+    return call_tool("impact_analysis", args)
+
+
+def find_path(source_id: int, dest_id: int, project: str, branch: str | None = None) -> Any:
+    args: dict[str, Any] = {
+        "source_id": source_id,
+        "dest_id": dest_id,
+        "project": project,
+    }
+    if branch is not None:
+        args["branch"] = branch
+    return call_tool("find_path", args)
+
+
+def ask(question: str, project: str, branch: str | None = None) -> Any:
+    args: dict[str, Any] = {"question": question, "project": project}
+    if branch is not None:
+        args["branch"] = branch
+    return call_tool("ask", args)
diff --git a/bench/agents/lsp_adapter.py b/bench/agents/lsp_adapter.py
@@ -131,6 +131,7 @@ def __init__(self, repo_root: str | Path, language: str = "python",
         self.shim = shim
         self._env_path = environment_path
         self._server: Any | None = None  # SyncLanguageServer
+        self._cm: Any | None = None  # live start_server() context (persistent mode)
 
     # ----- lifecycle ------------------------------------------------------
 
@@ -166,6 +167,34 @@ def server_running(self) -> Iterator["LSPClient"]:
             finally:
                 self._server = None
 
+    # ----- persistent lifecycle (for a long-lived MCP server) -------------
+
+    def start(self) -> "LSPClient":
+        """Start a persistent language-server subprocess.
+
+        Unlike ``server_running`` (a per-call context manager used by the
+        bash CLI), this keeps one jedi process alive so an MCP server can
+        serve many tool calls without paying the ~1-3s startup each time.
+        The caller is responsible for calling ``stop()`` at shutdown.
+        """
+        if self._server is not None:
+            return self
+        server = self._build_server()
+        cm = server.start_server()
+        cm.__enter__()
+        self._server = server
+        self._cm = cm
+        return self
+
+    def stop(self) -> None:
+        cm = getattr(self, "_cm", None)
+        if cm is not None:
+            try:
+                cm.__exit__(None, None, None)
+            finally:
+                self._cm = None
+        self._server = None
+
     # ----- relative path normalization -----------------------------------
 
     def _rel(self, file_path: str) -> str:

diff --git a/bench/analysis/__init__.py b/bench/analysis/__init__.py
diff --git a/bench/analysis/adopt_audit/edit_critical_overrides.json b/bench/analysis/adopt_audit/edit_critical_overrides.json
@@ -0,0 +1 @@
+{}
-Original file line number
+Diff line change
@@ Expand Up / @@ -58,3 +58,4 @@ htmlcov/ @@
     pytest_cache/
     *.log
     repositories/
+    logs/