diff --git a/api/mcp/server.py b/api/mcp/server.py index f0ae6f84..63ce5cfa 100644 --- a/api/mcp/server.py +++ b/api/mcp/server.py @@ -13,6 +13,11 @@ app: FastMCP = FastMCP("code-graph") +# Register tools on import so both direct ``import api.mcp.server`` and the +# stdio entry point see the same tool list. Imported below ``app`` because +# the tool modules need a reference to it. +from . import tools # noqa: F401, E402 + def main() -> None: """Run the MCP server over stdio. diff --git a/api/mcp/tools/__init__.py b/api/mcp/tools/__init__.py new file mode 100644 index 00000000..87b8b3a0 --- /dev/null +++ b/api/mcp/tools/__init__.py @@ -0,0 +1,7 @@ +"""MCP tool implementations for code-graph. + +Each submodule registers tools against the shared FastMCP app exposed by +``api.mcp.server``. Import this package to register all tools. +""" + +from . import structural # noqa: F401 (registers tools on import) diff --git a/api/mcp/tools/structural.py b/api/mcp/tools/structural.py new file mode 100644 index 00000000..30540f4b --- /dev/null +++ b/api/mcp/tools/structural.py @@ -0,0 +1,184 @@ +"""Structural MCP tools (T4-T8). + +These tools wrap the existing ``Project`` / ``Graph`` / ``AsyncGraphQuery`` +operations so MCP-capable agents (Claude Code, Cursor, Copilot, Cline) +can drive code-graph over the standard stdio transport. + +Conventions shared by all tools in this module: + +* Every tool accepts an optional ``branch`` so the agent can scope queries + to a specific per-branch graph (see T17, issue #651). When omitted the + branch is either auto-detected from a local checkout (``index_repo``) + or defaults to ``_default``. +* Long-running synchronous operations are pushed into a thread via + ``asyncio.get_running_loop().run_in_executor`` so the MCP event loop + stays responsive. +""" + +from __future__ import annotations + +import asyncio +import logging +import os +from pathlib import Path +from typing import Any, Optional + +from ..server import app + + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _looks_like_url(spec: str) -> bool: + """Return True for HTTP(S) / git URLs, False for local paths.""" + return spec.startswith(("http://", "https://", "git@", "ssh://", "git://")) + + +def _languages_detected(graph) -> list[str]: + """Best-effort enumeration of distinct ``File.ext`` values. + + Returns a sorted list of extension strings (without the leading dot). + Empty when no files were indexed. + """ + try: + rows = graph.g.query( + "MATCH (f:File) RETURN DISTINCT f.ext AS ext" + ).result_set + except Exception as e: # pragma: no cover — defensive + logger.warning("languages_detected query failed: %s", e) + return [] + seen: set[str] = set() + for row in rows or []: + ext = (row[0] or "").lstrip(".") + if ext: + seen.add(ext) + return sorted(seen) + + +def _count(graph, label: str) -> int: + try: + rows = graph.g.query( + f"MATCH (n:{label}) RETURN count(n) AS c" + ).result_set + return int(rows[0][0]) if rows else 0 + except Exception: + return 0 + + +def _count_edges(graph) -> int: + try: + rows = graph.g.query("MATCH ()-[r]->() RETURN count(r) AS c").result_set + return int(rows[0][0]) if rows else 0 + except Exception: + return 0 + + +# --------------------------------------------------------------------------- +# T4 — index_repo +# --------------------------------------------------------------------------- + + +@app.tool( + name="index_repo", + description=( + "Index a code repository into code-graph for subsequent navigation. " + "Accepts a local path or a git URL. When `branch` is omitted, " + "auto-detects the current branch from the local checkout (defaults " + "to '_default' for non-git folders). Returns the indexed graph's " + "node/edge counts, detected languages, and the (project, branch) " + "identity callers should pass to other code-graph tools." + ), +) +async def index_repo( + path_or_url: str, + branch: Optional[str] = None, + incremental: bool = True, # accepted now, fully honored once T18 lands + ignore: Optional[list[str]] = None, +) -> dict[str, Any]: + """Implementation for the ``index_repo`` MCP tool. + + Args: + path_or_url: Filesystem path to a local repository **or** a clonable + git URL (``https://...``, ``git@host:...``, ``ssh://...``). + branch: Branch identity for the indexed graph. When ``None``: + auto-detect from the checkout via ``git rev-parse --abbrev-ref + HEAD``; falls back to ``_default`` if not a git checkout. + incremental: Accepted for forward-compatibility with T18; the + current full-reindex path ignores it. + ignore: List of relative paths to skip during analysis. + """ + + from api.project import Project, detect_branch + + if ignore is None: + ignore = [] + + loop = asyncio.get_running_loop() + + def _do_index() -> dict[str, Any]: + if _looks_like_url(path_or_url): + project = Project.from_git_repository(path_or_url, branch=branch) + else: + local_path = Path(path_or_url).expanduser().resolve() + if not local_path.exists(): + raise ValueError(f"path does not exist: {local_path}") + + # Reject paths outside the allow-list when one is configured. + allowed_root = os.getenv("ALLOWED_ANALYSIS_DIR") + if allowed_root: + allowed = Path(allowed_root).expanduser().resolve() + try: + local_path.relative_to(allowed) + except ValueError as e: + raise ValueError( + f"path {local_path} is outside ALLOWED_ANALYSIS_DIR={allowed}" + ) from e + + # Use Project for git-repo paths so commit metadata is saved, + # otherwise drive SourceAnalyzer directly so non-git folders work. + if (local_path / ".git").is_dir(): + project = Project.from_local_repository(local_path, branch=branch) + else: + # Synthesize a Project-like object so the return shape is uniform. + from api.analyzers.source_analyzer import SourceAnalyzer + from api.graph import Graph + + detected = branch if branch is not None else detect_branch(local_path) + graph = Graph(local_path.name, branch=detected) + analyzer = SourceAnalyzer() + analyzer.analyze_local_folder(str(local_path), graph, ignore) + + class _Synth: # tiny shim to mirror Project's surface + name = local_path.name + + def __init__(self, g, b): + self.graph = g + self.branch = b + + return _payload(_Synth(graph, detected)) + + project.analyze_sources(ignore) + return _payload(project) + + def _payload(project) -> dict[str, Any]: + g = project.graph + return { + "project_name": project.name, + "branch": getattr(project, "branch", None), + "graph_name": g.name, + "num_nodes": ( + _count(g, "File") + _count(g, "Class") + _count(g, "Function") + ), + "num_edges": _count_edges(g), + "languages_detected": _languages_detected(g), + # T18 will flip this to "incremental" when only changed files + # were re-analyzed. + "mode": "full", + } + + return await loop.run_in_executor(None, _do_index) diff --git a/tests/mcp/test_index_repo.py b/tests/mcp/test_index_repo.py new file mode 100644 index 00000000..ebf88a19 --- /dev/null +++ b/tests/mcp/test_index_repo.py @@ -0,0 +1,80 @@ +"""T4 — ``index_repo`` MCP tool tests.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + + +pytestmark = pytest.mark.anyio + + +@pytest.fixture +def anyio_backend() -> str: + return "asyncio" + + +async def test_index_repo_local_path(sample_project_path: Path, expected_contract): + """Index a local non-git folder and verify the response shape.""" + from api.mcp.tools.structural import index_repo + + result = await index_repo(str(sample_project_path), branch="t4-local-test") + + assert result["project_name"] == "sample_project" + assert result["branch"] == "t4-local-test" + assert result["graph_name"].startswith("code:sample_project:") + assert result["mode"] == "full" + assert result["num_nodes"] >= sum(expected_contract["counts_min"].values()) + assert result["num_edges"] > 0 + assert "py" in result["languages_detected"] + + +async def test_index_repo_rejects_missing_path(): + """Missing local paths surface as a clear ValueError to the agent.""" + from api.mcp.tools.structural import index_repo + + with pytest.raises(ValueError, match="path does not exist"): + await index_repo("/this/path/definitely/does/not/exist/anywhere") + + +async def test_index_repo_honors_allowed_analysis_dir( + sample_project_path: Path, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +): + """Sandboxing: paths outside ALLOWED_ANALYSIS_DIR are rejected.""" + from api.mcp.tools import structural + + monkeypatch.setenv("ALLOWED_ANALYSIS_DIR", str(tmp_path)) + + with pytest.raises(ValueError, match="outside ALLOWED_ANALYSIS_DIR"): + await structural.index_repo(str(sample_project_path), branch="t4-sandbox") + + +async def test_index_repo_registered_via_app(): + """The tool is reachable via ``app.list_tools()`` (protocol parity).""" + from api.mcp.server import app + + tools = await app.list_tools() + names = {t.name for t in tools} + assert "index_repo" in names + + tool = next(t for t in tools if t.name == "index_repo") + schema = tool.inputSchema + # Description / param schema are surfaced to the agent. + assert "path_or_url" in schema["properties"] + assert "branch" in schema["properties"] + assert "incremental" in schema["properties"] + + +async def test_index_repo_response_serialises_to_json( + sample_project_path: Path, +): + """MCP transports JSON — the response dict must be JSON-serialisable.""" + from api.mcp.tools.structural import index_repo + + result = await index_repo(str(sample_project_path), branch="t4-json-test") + # Must not raise. + json.dumps(result) diff --git a/tests/mcp/test_scaffold.py b/tests/mcp/test_scaffold.py index 851f091a..b02a6780 100644 --- a/tests/mcp/test_scaffold.py +++ b/tests/mcp/test_scaffold.py @@ -45,11 +45,12 @@ def test_main_entry_point_exists() -> None: @pytest.mark.anyio -async def test_stdio_server_lists_zero_tools() -> None: +async def test_stdio_server_lists_registered_tools() -> None: """Spawn ``cgraph-mcp`` over stdio and verify the protocol handshake. - The scaffold registers no tools, so ``list_tools`` must return an - empty list. Tool tickets (T4-T8, T11) extend this expectation. + Once tool tickets land (T4+), ``list_tools`` returns at least the + tools they register. This test only guards the *handshake* — per-tool + behavioural assertions live in the per-tool test modules. """ cgraph_mcp = shutil.which("cgraph-mcp") assert cgraph_mcp is not None, ( @@ -62,4 +63,8 @@ async def test_stdio_server_lists_zero_tools() -> None: async with ClientSession(read, write) as session: await session.initialize() result = await session.list_tools() - assert result.tools == [] + # ``index_repo`` lands in T4; this assertion intentionally + # only checks for presence so it stays stable as more tools + # are registered in T5-T8 / T11. + names = {t.name for t in result.tools} + assert "index_repo" in names