Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions api/mcp/auto_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
"""Zero-config startup helpers for the MCP server (T12).

Two automation behaviours:

1. :func:`ensure_falkordb` — at server boot, ping FalkorDB; if it's
unreachable on a localhost host, run ``cgraph ensure-db`` to spin up
the existing Docker container. Reuses ``api.cli.ensure_db`` rather
than duplicating Docker logic.

2. :func:`maybe_auto_index` — when ``CODE_GRAPH_AUTO_INDEX=true`` is set
(opt-in, off by default), index the current working directory into a
per-branch graph so the agent doesn't have to call ``index_repo``
first. Idempotent within a single process — the second call for the
same ``(project, branch)`` is a no-op.

Both are deliberately conservative: ensure-db only acts on localhost
hosts, and auto-index requires explicit opt-in because indexing a
large repo can take minutes and surprising the user with that on
first tool call is bad UX.
"""

from __future__ import annotations

import logging
import os
import socket
import subprocess
from pathlib import Path
from typing import Iterable, Optional


logger = logging.getLogger(__name__)


_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1"}
_AUTO_INDEXED: set[tuple[str, str]] = set()


# ---------------------------------------------------------------------------
# ensure_falkordb
# ---------------------------------------------------------------------------


def _falkordb_reachable(host: str, port: int, timeout: float = 1.0) -> bool:
try:
with socket.create_connection((host, port), timeout=timeout):
return True
except OSError:
return False


def ensure_falkordb() -> dict:
"""Make sure FalkorDB is reachable; bootstrap Docker if not.

Returns a small status dict so the caller can log it. Never raises —
the goal is to start the MCP server even if the bootstrap fails;
individual tools will then surface their own errors.
"""
host = os.getenv("FALKORDB_HOST", "localhost")
try:
port = int(os.getenv("FALKORDB_PORT", "6379"))
except ValueError:
return {"status": "error", "message": "invalid FALKORDB_PORT"}

if _falkordb_reachable(host, port):
return {"status": "ok", "host": host, "port": port, "action": "none"}

if host not in _LOCAL_HOSTS:
return {
"status": "error",
"host": host,
"port": port,
"message": "FalkorDB unreachable; auto-start only supports localhost",
}

logger.info("FalkorDB unreachable on %s:%s — running `cgraph ensure-db`", host, port)
try:
# Subprocess so the CLI's stdout (which prints JSON) doesn't pollute
# the MCP server's own stdio transport.
result = subprocess.run(
["cgraph", "ensure-db"],
capture_output=True,
text=True,
check=False,
)
except FileNotFoundError:
return {"status": "error", "message": "cgraph CLI not on PATH"}

return {
"status": "ok" if result.returncode == 0 else "error",
"host": host,
"port": port,
"action": "started",
"stdout": result.stdout.strip(),
"stderr": result.stderr.strip(),
}


# ---------------------------------------------------------------------------
# maybe_auto_index
# ---------------------------------------------------------------------------


def _truthy(val: Optional[str]) -> bool:
return (val or "").strip().lower() in {"1", "true", "yes", "on"}


def _detect_branch(cwd: Path) -> str:
"""Best-effort current-branch detection. Falls back to ``_default``."""
try:
result = subprocess.run(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
cwd=str(cwd),
capture_output=True,
text=True,
check=False,
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout.strip()
except FileNotFoundError:
pass
return "_default"


def maybe_auto_index(
cwd: Optional[Path] = None,
*,
project: Optional[str] = None,
branch: Optional[str] = None,
) -> dict:
"""If opt-in env var is set, index ``cwd`` into the per-branch graph.

Caches "already auto-indexed this session" per ``(project, branch)``
in the module-level :data:`_AUTO_INDEXED` set so subsequent calls
are no-ops.
"""
if not _truthy(os.getenv("CODE_GRAPH_AUTO_INDEX")):
return {"status": "skipped", "reason": "CODE_GRAPH_AUTO_INDEX not set"}

cwd_path = (cwd or Path.cwd()).resolve()
project_name = project or cwd_path.name
branch_name = branch or _detect_branch(cwd_path)

key = (project_name, branch_name)
if key in _AUTO_INDEXED:
return {"status": "skipped", "reason": "already auto-indexed", "key": key}

# Local imports so the MCP server can import this module without paying
# the analyzer-stack import cost at module load.
from api.analyzers.source_analyzer import SourceAnalyzer
from api.graph import Graph

logger.info("Auto-indexing %s @ %s into code:%s:%s", cwd_path, branch_name, project_name, branch_name)
graph = Graph(project_name, branch=branch_name)
SourceAnalyzer().analyze_local_folder(str(cwd_path), graph)

_AUTO_INDEXED.add(key)
return {
"status": "indexed",
"project": project_name,
"branch": branch_name,
"path": str(cwd_path),
}


def reset_auto_index_cache(keys: Optional[Iterable[tuple[str, str]]] = None) -> None:
"""Drop the auto-index session cache. Tests only."""
if keys is None:
_AUTO_INDEXED.clear()
else:
for k in keys:
_AUTO_INDEXED.discard(k)
10 changes: 9 additions & 1 deletion api/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,16 @@
def main() -> None:
"""Run the MCP server over stdio.

Console-script entry point for ``cgraph-mcp``.
Console-script entry point for ``cgraph-mcp``. Runs the T12
auto-init helpers first so a freshly-cloned user gets a working
FalkorDB without manual `cgraph ensure-db`, and (opt-in via
``CODE_GRAPH_AUTO_INDEX``) an indexed CWD without manual
`index_repo`.
"""
from .auto_init import ensure_falkordb, maybe_auto_index

ensure_falkordb()
maybe_auto_index()
app.run(transport="stdio")


Expand Down
168 changes: 168 additions & 0 deletions tests/mcp/test_auto_init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""T12 — auto_init tests (mocked subprocess / graph)."""

from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest


# ---------------------------------------------------------------------------
# ensure_falkordb
# ---------------------------------------------------------------------------


def test_ensure_falkordb_no_action_when_reachable(monkeypatch):
from api.mcp import auto_init

monkeypatch.setenv("FALKORDB_HOST", "localhost")
monkeypatch.setenv("FALKORDB_PORT", "6379")

with patch.object(auto_init, "_falkordb_reachable", return_value=True), \
patch("api.mcp.auto_init.subprocess.run") as mock_run:
status = auto_init.ensure_falkordb()

assert status["status"] == "ok"
assert status["action"] == "none"
mock_run.assert_not_called()


def test_ensure_falkordb_runs_cgraph_when_unreachable(monkeypatch):
from api.mcp import auto_init

monkeypatch.setenv("FALKORDB_HOST", "localhost")
monkeypatch.setenv("FALKORDB_PORT", "6379")

fake_result = MagicMock(returncode=0, stdout="ok", stderr="")
with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
patch("api.mcp.auto_init.subprocess.run", return_value=fake_result) as mock_run:
status = auto_init.ensure_falkordb()

assert status["status"] == "ok"
assert status["action"] == "started"
mock_run.assert_called_once()
args = mock_run.call_args.args[0]
assert args == ["cgraph", "ensure-db"]


def test_ensure_falkordb_skips_docker_for_remote_host(monkeypatch):
"""Auto-start is localhost-only by design."""
from api.mcp import auto_init

monkeypatch.setenv("FALKORDB_HOST", "graph.example.com")
monkeypatch.setenv("FALKORDB_PORT", "6379")

with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
patch("api.mcp.auto_init.subprocess.run") as mock_run:
status = auto_init.ensure_falkordb()

assert status["status"] == "error"
assert "localhost" in status["message"]
mock_run.assert_not_called()


def test_ensure_falkordb_handles_missing_cli(monkeypatch):
from api.mcp import auto_init

monkeypatch.setenv("FALKORDB_HOST", "localhost")
monkeypatch.setenv("FALKORDB_PORT", "6379")

with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
patch("api.mcp.auto_init.subprocess.run", side_effect=FileNotFoundError):
status = auto_init.ensure_falkordb()

assert status["status"] == "error"
assert "PATH" in status["message"]


# ---------------------------------------------------------------------------
# maybe_auto_index
# ---------------------------------------------------------------------------


@pytest.fixture(autouse=True)
def _reset_cache():
from api.mcp.auto_init import reset_auto_index_cache

reset_auto_index_cache()
yield
reset_auto_index_cache()


def test_maybe_auto_index_skipped_when_env_unset(monkeypatch, tmp_path):
from api.mcp import auto_init

monkeypatch.delenv("CODE_GRAPH_AUTO_INDEX", raising=False)

with patch.object(auto_init, "SourceAnalyzer", None, create=True):
status = auto_init.maybe_auto_index(cwd=tmp_path)

assert status["status"] == "skipped"
assert "CODE_GRAPH_AUTO_INDEX" in status["reason"]


def test_maybe_auto_index_indexes_when_opt_in(monkeypatch, tmp_path):
from api.mcp import auto_init

monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "true")

fake_analyzer_instance = MagicMock()
fake_graph_instance = MagicMock()
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer_instance), \
patch("api.graph.Graph", return_value=fake_graph_instance), \
patch.object(auto_init, "_detect_branch", return_value="main"):
status = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")

assert status["status"] == "indexed"
assert status["project"] == "myproj"
assert status["branch"] == "main"
fake_analyzer_instance.analyze_local_folder.assert_called_once()


def test_maybe_auto_index_idempotent(monkeypatch, tmp_path):
"""Second call for the same (project, branch) is a no-op."""
from api.mcp import auto_init

monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "1")

fake_analyzer = MagicMock()
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \
patch("api.graph.Graph", return_value=MagicMock()), \
patch.object(auto_init, "_detect_branch", return_value="main"):
first = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")
second = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")

assert first["status"] == "indexed"
assert second["status"] == "skipped"
assert "already" in second["reason"]
# Critical: the analyzer was invoked exactly once.
assert fake_analyzer.analyze_local_folder.call_count == 1


def test_maybe_auto_index_per_branch(monkeypatch, tmp_path):
"""Different branches under the same project each get one auto-index."""
from api.mcp import auto_init

monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "yes")

fake_analyzer = MagicMock()
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \
patch("api.graph.Graph", return_value=MagicMock()):
a = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main")
b = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="feature-x")
c = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main")

assert a["status"] == "indexed"
assert b["status"] == "indexed"
assert c["status"] == "skipped"
assert fake_analyzer.analyze_local_folder.call_count == 2


def test_truthy_helper():
from api.mcp.auto_init import _truthy

for v in ("1", "true", "TRUE", "yes", "YES", "on"):
assert _truthy(v)
for v in ("", "0", "false", "no", "off", None):
assert not _truthy(v)