Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@

app: FastMCP = FastMCP("code-graph")

# Register tools on import so both direct ``import api.mcp.server`` and the
# stdio entry point see the same tool list. Imported below ``app`` because
# the tool modules need a reference to it.
from . import tools # noqa: F401, E402


def main() -> None:
"""Run the MCP server over stdio.
Expand Down
7 changes: 7 additions & 0 deletions api/mcp/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""MCP tool implementations for code-graph.

Each submodule registers tools against the shared FastMCP app exposed by
``api.mcp.server``. Import this package to register all tools.
"""

from . import structural # noqa: F401 (registers tools on import)
184 changes: 184 additions & 0 deletions api/mcp/tools/structural.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""Structural MCP tools (T4-T8).

These tools wrap the existing ``Project`` / ``Graph`` / ``AsyncGraphQuery``
operations so MCP-capable agents (Claude Code, Cursor, Copilot, Cline)
can drive code-graph over the standard stdio transport.

Conventions shared by all tools in this module:

* Every tool accepts an optional ``branch`` so the agent can scope queries
to a specific per-branch graph (see T17, issue #651). When omitted the
branch is either auto-detected from a local checkout (``index_repo``)
or defaults to ``_default``.
* Long-running synchronous operations are pushed into a thread via
``asyncio.get_running_loop().run_in_executor`` so the MCP event loop
stays responsive.
"""

from __future__ import annotations

import asyncio
import logging
import os
from pathlib import Path
from typing import Any, Optional

from ..server import app


logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _looks_like_url(spec: str) -> bool:
"""Return True for HTTP(S) / git URLs, False for local paths."""
return spec.startswith(("http://", "https://", "git@", "ssh://", "git://"))


def _languages_detected(graph) -> list[str]:
"""Best-effort enumeration of distinct ``File.ext`` values.

Returns a sorted list of extension strings (without the leading dot).
Empty when no files were indexed.
"""
try:
rows = graph.g.query(
"MATCH (f:File) RETURN DISTINCT f.ext AS ext"
).result_set
except Exception as e: # pragma: no cover — defensive
logger.warning("languages_detected query failed: %s", e)
return []
seen: set[str] = set()
for row in rows or []:
ext = (row[0] or "").lstrip(".")
if ext:
seen.add(ext)
return sorted(seen)


def _count(graph, label: str) -> int:
try:
rows = graph.g.query(
f"MATCH (n:{label}) RETURN count(n) AS c"
).result_set
return int(rows[0][0]) if rows else 0
except Exception:
return 0


def _count_edges(graph) -> int:
try:
rows = graph.g.query("MATCH ()-[r]->() RETURN count(r) AS c").result_set
return int(rows[0][0]) if rows else 0
except Exception:
return 0


# ---------------------------------------------------------------------------
# T4 — index_repo
# ---------------------------------------------------------------------------


@app.tool(
name="index_repo",
description=(
"Index a code repository into code-graph for subsequent navigation. "
"Accepts a local path or a git URL. When `branch` is omitted, "
"auto-detects the current branch from the local checkout (defaults "
"to '_default' for non-git folders). Returns the indexed graph's "
"node/edge counts, detected languages, and the (project, branch) "
"identity callers should pass to other code-graph tools."
),
)
async def index_repo(
path_or_url: str,
branch: Optional[str] = None,
incremental: bool = True, # accepted now, fully honored once T18 lands
ignore: Optional[list[str]] = None,
) -> dict[str, Any]:
"""Implementation for the ``index_repo`` MCP tool.

Args:
path_or_url: Filesystem path to a local repository **or** a clonable
git URL (``https://...``, ``git@host:...``, ``ssh://...``).
branch: Branch identity for the indexed graph. When ``None``:
auto-detect from the checkout via ``git rev-parse --abbrev-ref
HEAD``; falls back to ``_default`` if not a git checkout.
incremental: Accepted for forward-compatibility with T18; the
current full-reindex path ignores it.
ignore: List of relative paths to skip during analysis.
"""

from api.project import Project, detect_branch

if ignore is None:
ignore = []

loop = asyncio.get_running_loop()

def _do_index() -> dict[str, Any]:
if _looks_like_url(path_or_url):
project = Project.from_git_repository(path_or_url, branch=branch)
else:
local_path = Path(path_or_url).expanduser().resolve()
if not local_path.exists():
raise ValueError(f"path does not exist: {local_path}")

# Reject paths outside the allow-list when one is configured.
allowed_root = os.getenv("ALLOWED_ANALYSIS_DIR")
if allowed_root:
allowed = Path(allowed_root).expanduser().resolve()
try:
local_path.relative_to(allowed)
except ValueError as e:
raise ValueError(
f"path {local_path} is outside ALLOWED_ANALYSIS_DIR={allowed}"
) from e

# Use Project for git-repo paths so commit metadata is saved,
# otherwise drive SourceAnalyzer directly so non-git folders work.
if (local_path / ".git").is_dir():
project = Project.from_local_repository(local_path, branch=branch)
else:
# Synthesize a Project-like object so the return shape is uniform.
from api.analyzers.source_analyzer import SourceAnalyzer
from api.graph import Graph

detected = branch if branch is not None else detect_branch(local_path)
graph = Graph(local_path.name, branch=detected)
analyzer = SourceAnalyzer()
analyzer.analyze_local_folder(str(local_path), graph, ignore)

class _Synth: # tiny shim to mirror Project's surface
name = local_path.name

def __init__(self, g, b):
self.graph = g
self.branch = b

return _payload(_Synth(graph, detected))

project.analyze_sources(ignore)
return _payload(project)

def _payload(project) -> dict[str, Any]:
g = project.graph
return {
"project_name": project.name,
"branch": getattr(project, "branch", None),
"graph_name": g.name,
"num_nodes": (
_count(g, "File") + _count(g, "Class") + _count(g, "Function")
),
"num_edges": _count_edges(g),
"languages_detected": _languages_detected(g),
# T18 will flip this to "incremental" when only changed files
# were re-analyzed.
"mode": "full",
}

return await loop.run_in_executor(None, _do_index)
80 changes: 80 additions & 0 deletions tests/mcp/test_index_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""T4 — ``index_repo`` MCP tool tests."""

from __future__ import annotations

import json
from pathlib import Path

import pytest


pytestmark = pytest.mark.anyio


@pytest.fixture
def anyio_backend() -> str:
return "asyncio"


async def test_index_repo_local_path(sample_project_path: Path, expected_contract):
"""Index a local non-git folder and verify the response shape."""
from api.mcp.tools.structural import index_repo

result = await index_repo(str(sample_project_path), branch="t4-local-test")

assert result["project_name"] == "sample_project"
assert result["branch"] == "t4-local-test"
assert result["graph_name"].startswith("code:sample_project:")
assert result["mode"] == "full"
assert result["num_nodes"] >= sum(expected_contract["counts_min"].values())
assert result["num_edges"] > 0
assert "py" in result["languages_detected"]


async def test_index_repo_rejects_missing_path():
"""Missing local paths surface as a clear ValueError to the agent."""
from api.mcp.tools.structural import index_repo

with pytest.raises(ValueError, match="path does not exist"):
await index_repo("/this/path/definitely/does/not/exist/anywhere")


async def test_index_repo_honors_allowed_analysis_dir(
sample_project_path: Path,
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
):
"""Sandboxing: paths outside ALLOWED_ANALYSIS_DIR are rejected."""
from api.mcp.tools import structural

monkeypatch.setenv("ALLOWED_ANALYSIS_DIR", str(tmp_path))

with pytest.raises(ValueError, match="outside ALLOWED_ANALYSIS_DIR"):
await structural.index_repo(str(sample_project_path), branch="t4-sandbox")


async def test_index_repo_registered_via_app():
"""The tool is reachable via ``app.list_tools()`` (protocol parity)."""
from api.mcp.server import app

tools = await app.list_tools()
names = {t.name for t in tools}
assert "index_repo" in names

tool = next(t for t in tools if t.name == "index_repo")
schema = tool.inputSchema
# Description / param schema are surfaced to the agent.
assert "path_or_url" in schema["properties"]
assert "branch" in schema["properties"]
assert "incremental" in schema["properties"]


async def test_index_repo_response_serialises_to_json(
sample_project_path: Path,
):
"""MCP transports JSON — the response dict must be JSON-serialisable."""
from api.mcp.tools.structural import index_repo

result = await index_repo(str(sample_project_path), branch="t4-json-test")
# Must not raise.
json.dumps(result)
13 changes: 9 additions & 4 deletions tests/mcp/test_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ def test_main_entry_point_exists() -> None:


@pytest.mark.anyio
async def test_stdio_server_lists_zero_tools() -> None:
async def test_stdio_server_lists_registered_tools() -> None:
"""Spawn ``cgraph-mcp`` over stdio and verify the protocol handshake.

The scaffold registers no tools, so ``list_tools`` must return an
empty list. Tool tickets (T4-T8, T11) extend this expectation.
Once tool tickets land (T4+), ``list_tools`` returns at least the
tools they register. This test only guards the *handshake* — per-tool
behavioural assertions live in the per-tool test modules.
"""
cgraph_mcp = shutil.which("cgraph-mcp")
assert cgraph_mcp is not None, (
Expand All @@ -62,4 +63,8 @@ async def test_stdio_server_lists_zero_tools() -> None:
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.list_tools()
assert result.tools == []
# ``index_repo`` lands in T4; this assertion intentionally
# only checks for presence so it stays stable as more tools
# are registered in T5-T8 / T11.
names = {t.name for t in result.tools}
assert "index_repo" in names