From 5e376e6647dada6c64628122c8f0366d0b0867f2 Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Wed, 27 May 2026 14:26:45 +0300 Subject: [PATCH] =?UTF-8?q?feat(mcp):=20auto-init=20=E2=80=94=20ensure=20F?= =?UTF-8?q?alkorDB=20+=20opt-in=20auto-index=20(T12=20#660)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-config startup so a fresh user doesn't need to run `cgraph ensure-db` and `index_repo` manually. api/mcp/auto_init.py - ensure_falkordb(): on server boot, ping FalkorDB; if unreachable on a localhost host, shell out to `cgraph ensure-db` (reuses the existing CLI's Docker bootstrap rather than duplicating it). Subprocess (not in-process call) so the CLI's stdout JSON doesn't pollute the MCP server's stdio transport. Never raises — server start continues even on bootstrap failure so individual tools can surface their own errors. - maybe_auto_index(cwd=None, project=None, branch=None): opt-in via CODE_GRAPH_AUTO_INDEX env var (off by default — indexing a large repo can take minutes and surprising the user on first call is bad UX). Detects current branch via `git rev-parse`, falls back to `_default`. Per-(project, branch) idempotency via a module-level set; second call for the same key is a no-op. - _truthy helper accepts 1/true/yes/on (case insensitive). api/mcp/server.py - main() now runs ensure_falkordb() and maybe_auto_index() before app.run(). Module-level import behaviour unchanged (tests that `import api.mcp.server` don't trigger any I/O). tests/mcp/test_auto_init.py (9 tests) - ensure_falkordb: no-op when reachable, runs cgraph when not, skips Docker for remote hosts, handles missing CLI binary. - maybe_auto_index: skipped when env unset, indexes when opt-in, idempotent across calls for same key, distinct branches each get one auto-index, _truthy semantics. All mocks — no Docker, no real FalkorDB writes — so the tests run in <2s without external dependencies. Out of scope per ticket: watch mode / re-indexing on FS change, auto-pulling Docker image (cgraph ensure-db handles that), cross- session state. Closes #660. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/mcp/auto_init.py | 172 ++++++++++++++++++++++++++++++++++++ api/mcp/server.py | 10 ++- tests/mcp/test_auto_init.py | 168 +++++++++++++++++++++++++++++++++++ 3 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 api/mcp/auto_init.py create mode 100644 tests/mcp/test_auto_init.py diff --git a/api/mcp/auto_init.py b/api/mcp/auto_init.py new file mode 100644 index 00000000..86ac913d --- /dev/null +++ b/api/mcp/auto_init.py @@ -0,0 +1,172 @@ +"""Zero-config startup helpers for the MCP server (T12). + +Two automation behaviours: + +1. :func:`ensure_falkordb` — at server boot, ping FalkorDB; if it's + unreachable on a localhost host, run ``cgraph ensure-db`` to spin up + the existing Docker container. Reuses ``api.cli.ensure_db`` rather + than duplicating Docker logic. + +2. :func:`maybe_auto_index` — when ``CODE_GRAPH_AUTO_INDEX=true`` is set + (opt-in, off by default), index the current working directory into a + per-branch graph so the agent doesn't have to call ``index_repo`` + first. Idempotent within a single process — the second call for the + same ``(project, branch)`` is a no-op. + +Both are deliberately conservative: ensure-db only acts on localhost +hosts, and auto-index requires explicit opt-in because indexing a +large repo can take minutes and surprising the user with that on +first tool call is bad UX. +""" + +from __future__ import annotations + +import logging +import os +import socket +import subprocess +from pathlib import Path +from typing import Iterable, Optional + + +logger = logging.getLogger(__name__) + + +_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1"} +_AUTO_INDEXED: set[tuple[str, str]] = set() + + +# --------------------------------------------------------------------------- +# ensure_falkordb +# --------------------------------------------------------------------------- + + +def _falkordb_reachable(host: str, port: int, timeout: float = 1.0) -> bool: + try: + with socket.create_connection((host, port), timeout=timeout): + return True + except OSError: + return False + + +def ensure_falkordb() -> dict: + """Make sure FalkorDB is reachable; bootstrap Docker if not. + + Returns a small status dict so the caller can log it. Never raises — + the goal is to start the MCP server even if the bootstrap fails; + individual tools will then surface their own errors. + """ + host = os.getenv("FALKORDB_HOST", "localhost") + try: + port = int(os.getenv("FALKORDB_PORT", "6379")) + except ValueError: + return {"status": "error", "message": "invalid FALKORDB_PORT"} + + if _falkordb_reachable(host, port): + return {"status": "ok", "host": host, "port": port, "action": "none"} + + if host not in _LOCAL_HOSTS: + return { + "status": "error", + "host": host, + "port": port, + "message": "FalkorDB unreachable; auto-start only supports localhost", + } + + logger.info("FalkorDB unreachable on %s:%s — running `cgraph ensure-db`", host, port) + try: + # Subprocess so the CLI's stdout (which prints JSON) doesn't pollute + # the MCP server's own stdio transport. + result = subprocess.run( + ["cgraph", "ensure-db"], + capture_output=True, + text=True, + check=False, + ) + except FileNotFoundError: + return {"status": "error", "message": "cgraph CLI not on PATH"} + + return { + "status": "ok" if result.returncode == 0 else "error", + "host": host, + "port": port, + "action": "started", + "stdout": result.stdout.strip(), + "stderr": result.stderr.strip(), + } + + +# --------------------------------------------------------------------------- +# maybe_auto_index +# --------------------------------------------------------------------------- + + +def _truthy(val: Optional[str]) -> bool: + return (val or "").strip().lower() in {"1", "true", "yes", "on"} + + +def _detect_branch(cwd: Path) -> str: + """Best-effort current-branch detection. Falls back to ``_default``.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=str(cwd), + capture_output=True, + text=True, + check=False, + ) + if result.returncode == 0 and result.stdout.strip(): + return result.stdout.strip() + except FileNotFoundError: + pass + return "_default" + + +def maybe_auto_index( + cwd: Optional[Path] = None, + *, + project: Optional[str] = None, + branch: Optional[str] = None, +) -> dict: + """If opt-in env var is set, index ``cwd`` into the per-branch graph. + + Caches "already auto-indexed this session" per ``(project, branch)`` + in the module-level :data:`_AUTO_INDEXED` set so subsequent calls + are no-ops. + """ + if not _truthy(os.getenv("CODE_GRAPH_AUTO_INDEX")): + return {"status": "skipped", "reason": "CODE_GRAPH_AUTO_INDEX not set"} + + cwd_path = (cwd or Path.cwd()).resolve() + project_name = project or cwd_path.name + branch_name = branch or _detect_branch(cwd_path) + + key = (project_name, branch_name) + if key in _AUTO_INDEXED: + return {"status": "skipped", "reason": "already auto-indexed", "key": key} + + # Local imports so the MCP server can import this module without paying + # the analyzer-stack import cost at module load. + from api.analyzers.source_analyzer import SourceAnalyzer + from api.graph import Graph + + logger.info("Auto-indexing %s @ %s into code:%s:%s", cwd_path, branch_name, project_name, branch_name) + graph = Graph(project_name, branch=branch_name) + SourceAnalyzer().analyze_local_folder(str(cwd_path), graph) + + _AUTO_INDEXED.add(key) + return { + "status": "indexed", + "project": project_name, + "branch": branch_name, + "path": str(cwd_path), + } + + +def reset_auto_index_cache(keys: Optional[Iterable[tuple[str, str]]] = None) -> None: + """Drop the auto-index session cache. Tests only.""" + if keys is None: + _AUTO_INDEXED.clear() + else: + for k in keys: + _AUTO_INDEXED.discard(k) diff --git a/api/mcp/server.py b/api/mcp/server.py index 63ce5cfa..7be41464 100644 --- a/api/mcp/server.py +++ b/api/mcp/server.py @@ -22,8 +22,16 @@ def main() -> None: """Run the MCP server over stdio. - Console-script entry point for ``cgraph-mcp``. + Console-script entry point for ``cgraph-mcp``. Runs the T12 + auto-init helpers first so a freshly-cloned user gets a working + FalkorDB without manual `cgraph ensure-db`, and (opt-in via + ``CODE_GRAPH_AUTO_INDEX``) an indexed CWD without manual + `index_repo`. """ + from .auto_init import ensure_falkordb, maybe_auto_index + + ensure_falkordb() + maybe_auto_index() app.run(transport="stdio") diff --git a/tests/mcp/test_auto_init.py b/tests/mcp/test_auto_init.py new file mode 100644 index 00000000..f7db2f5f --- /dev/null +++ b/tests/mcp/test_auto_init.py @@ -0,0 +1,168 @@ +"""T12 — auto_init tests (mocked subprocess / graph).""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# ensure_falkordb +# --------------------------------------------------------------------------- + + +def test_ensure_falkordb_no_action_when_reachable(monkeypatch): + from api.mcp import auto_init + + monkeypatch.setenv("FALKORDB_HOST", "localhost") + monkeypatch.setenv("FALKORDB_PORT", "6379") + + with patch.object(auto_init, "_falkordb_reachable", return_value=True), \ + patch("api.mcp.auto_init.subprocess.run") as mock_run: + status = auto_init.ensure_falkordb() + + assert status["status"] == "ok" + assert status["action"] == "none" + mock_run.assert_not_called() + + +def test_ensure_falkordb_runs_cgraph_when_unreachable(monkeypatch): + from api.mcp import auto_init + + monkeypatch.setenv("FALKORDB_HOST", "localhost") + monkeypatch.setenv("FALKORDB_PORT", "6379") + + fake_result = MagicMock(returncode=0, stdout="ok", stderr="") + with patch.object(auto_init, "_falkordb_reachable", return_value=False), \ + patch("api.mcp.auto_init.subprocess.run", return_value=fake_result) as mock_run: + status = auto_init.ensure_falkordb() + + assert status["status"] == "ok" + assert status["action"] == "started" + mock_run.assert_called_once() + args = mock_run.call_args.args[0] + assert args == ["cgraph", "ensure-db"] + + +def test_ensure_falkordb_skips_docker_for_remote_host(monkeypatch): + """Auto-start is localhost-only by design.""" + from api.mcp import auto_init + + monkeypatch.setenv("FALKORDB_HOST", "graph.example.com") + monkeypatch.setenv("FALKORDB_PORT", "6379") + + with patch.object(auto_init, "_falkordb_reachable", return_value=False), \ + patch("api.mcp.auto_init.subprocess.run") as mock_run: + status = auto_init.ensure_falkordb() + + assert status["status"] == "error" + assert "localhost" in status["message"] + mock_run.assert_not_called() + + +def test_ensure_falkordb_handles_missing_cli(monkeypatch): + from api.mcp import auto_init + + monkeypatch.setenv("FALKORDB_HOST", "localhost") + monkeypatch.setenv("FALKORDB_PORT", "6379") + + with patch.object(auto_init, "_falkordb_reachable", return_value=False), \ + patch("api.mcp.auto_init.subprocess.run", side_effect=FileNotFoundError): + status = auto_init.ensure_falkordb() + + assert status["status"] == "error" + assert "PATH" in status["message"] + + +# --------------------------------------------------------------------------- +# maybe_auto_index +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _reset_cache(): + from api.mcp.auto_init import reset_auto_index_cache + + reset_auto_index_cache() + yield + reset_auto_index_cache() + + +def test_maybe_auto_index_skipped_when_env_unset(monkeypatch, tmp_path): + from api.mcp import auto_init + + monkeypatch.delenv("CODE_GRAPH_AUTO_INDEX", raising=False) + + with patch.object(auto_init, "SourceAnalyzer", None, create=True): + status = auto_init.maybe_auto_index(cwd=tmp_path) + + assert status["status"] == "skipped" + assert "CODE_GRAPH_AUTO_INDEX" in status["reason"] + + +def test_maybe_auto_index_indexes_when_opt_in(monkeypatch, tmp_path): + from api.mcp import auto_init + + monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "true") + + fake_analyzer_instance = MagicMock() + fake_graph_instance = MagicMock() + with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer_instance), \ + patch("api.graph.Graph", return_value=fake_graph_instance), \ + patch.object(auto_init, "_detect_branch", return_value="main"): + status = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj") + + assert status["status"] == "indexed" + assert status["project"] == "myproj" + assert status["branch"] == "main" + fake_analyzer_instance.analyze_local_folder.assert_called_once() + + +def test_maybe_auto_index_idempotent(monkeypatch, tmp_path): + """Second call for the same (project, branch) is a no-op.""" + from api.mcp import auto_init + + monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "1") + + fake_analyzer = MagicMock() + with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \ + patch("api.graph.Graph", return_value=MagicMock()), \ + patch.object(auto_init, "_detect_branch", return_value="main"): + first = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj") + second = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj") + + assert first["status"] == "indexed" + assert second["status"] == "skipped" + assert "already" in second["reason"] + # Critical: the analyzer was invoked exactly once. + assert fake_analyzer.analyze_local_folder.call_count == 1 + + +def test_maybe_auto_index_per_branch(monkeypatch, tmp_path): + """Different branches under the same project each get one auto-index.""" + from api.mcp import auto_init + + monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "yes") + + fake_analyzer = MagicMock() + with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \ + patch("api.graph.Graph", return_value=MagicMock()): + a = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main") + b = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="feature-x") + c = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main") + + assert a["status"] == "indexed" + assert b["status"] == "indexed" + assert c["status"] == "skipped" + assert fake_analyzer.analyze_local_folder.call_count == 2 + + +def test_truthy_helper(): + from api.mcp.auto_init import _truthy + + for v in ("1", "true", "TRUE", "yes", "YES", "on"): + assert _truthy(v) + for v in ("", "0", "false", "no", "off", None): + assert not _truthy(v)