From 60e2bd1d9c8afffa16e2e029f16ef1990edd76fd Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Wed, 27 May 2026 14:48:01 +0300 Subject: [PATCH 1/7] bench: add MCP-transport sibling of the code_graph track MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a second bench track that exercises code-graph through the exact transport real-world agents use (Claude Code, Cursor, …) — JSON-RPC over stdio to a spawned `cgraph-mcp` server — instead of HTTP to the FastAPI service. Files: - bench/agents/code_graph_mcp_adapter.py — sync Python adapter that spawns cgraph-mcp per call via the official MCP Python SDK. Knows the 8-tool MCP surface (search_code, get_callers, get_callees, get_dependencies, impact_analysis, find_path, index_repo, ask). - bench/cli/cg-mcp + cg_mcp.py — bash-callable CLI shim mirroring the existing `cg` shim. mini-swe-agent only does bash, so each "tool" is one CLI invocation. - bench/tools/code_graph_mcp/{tools.yaml,system_preamble.md} — agent config for the MCP track. Mirrors code_graph; same Q2 decision to exclude `ask` (no nested LLM in the benchmarked tool set). - tests/bench/test_cg_mcp_adapter.py — 5 unit + 1 e2e test (FalkorDB-gated AND MCP-server-gated so it skips cleanly until the MCP stack lands on staging). Heavy e2e validated against the api/ subgraph (~6.3k nodes) over real stdio: search_code -> get_callers -> impact_analysis returned expected payloads. Depends on the MCP stack (PRs #675–#683) for cgraph-mcp itself. Lands cleanly once that stack merges. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bench/agents/code_graph_mcp_adapter.py | 163 ++++++++++++++++ bench/cli/cg-mcp | 5 + bench/cli/cg_mcp.py | 140 ++++++++++++++ bench/tools/code_graph_mcp/system_preamble.md | 72 +++++++ bench/tools/code_graph_mcp/tools.yaml | 39 ++++ tests/bench/__init__.py | 0 tests/bench/test_cg_mcp_adapter.py | 178 ++++++++++++++++++ 7 files changed, 597 insertions(+) create mode 100644 bench/agents/code_graph_mcp_adapter.py create mode 100755 bench/cli/cg-mcp create mode 100644 bench/cli/cg_mcp.py create mode 100644 bench/tools/code_graph_mcp/system_preamble.md create mode 100644 bench/tools/code_graph_mcp/tools.yaml create mode 100644 tests/bench/__init__.py create mode 100644 tests/bench/test_cg_mcp_adapter.py diff --git a/bench/agents/code_graph_mcp_adapter.py b/bench/agents/code_graph_mcp_adapter.py new file mode 100644 index 00000000..9a6347bd --- /dev/null +++ b/bench/agents/code_graph_mcp_adapter.py @@ -0,0 +1,163 @@ +"""MCP-transport adapter to cgraph-mcp for the benchmark. + +Sibling of `code_graph_adapter.py` (HTTP). Where the HTTP adapter talks +to the host FastAPI service over the network, this one spawns the +`cgraph-mcp` stdio MCP server in-process via the official MCP Python +SDK and dispatches tool calls over JSON-RPC. + +This gives us a second, real-world benchmark track that exercises the +exact same transport agents (Claude Code, Cursor, …) will use in +production. Tool names match the 8-tool MCP surface +(`index_repo`, `search_code`, `get_callers`, `get_callees`, +`get_dependencies`, `impact_analysis`, `find_path`, `ask`). + +Each call spawns a fresh server, runs the call, and exits. That's +~0.5-1s overhead per call but keeps the model trivially safe to call +from a bash shim (one process per invocation, no shared state). +A future optimisation could persist the server across calls via a +side-channel daemon, but per-call spawn matches how external agents +actually use MCP servers today. +""" + +from __future__ import annotations + +import asyncio +import json +import os +from typing import Any + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + + +DEFAULT_TIMEOUT_SEC = 60.0 + + +def _env_for_mcp() -> dict[str, str]: + """Build the env for the spawned cgraph-mcp process. + + Pass through everything from the caller but make sure the FalkorDB + coordinates are present — the runner usually sets them to point at + the host FalkorDB container. + """ + env = dict(os.environ) + env.setdefault("FALKORDB_HOST", os.environ.get("FALKORDB_HOST", "127.0.0.1")) + env.setdefault("FALKORDB_PORT", os.environ.get("FALKORDB_PORT", "6379")) + return env + + +def _extract(result: Any) -> Any: + """Normalize a CallToolResult into a JSON-serialisable Python value. + + The MCP spec lets servers put the payload in `structuredContent` + and/or echo it as a JSON text chunk. Our 8 tools do both; agents + have historically preferred the text payload. We mirror that: + return the parsed text chunk when present, otherwise fall back to + structuredContent (unwrapping the spec's `{"result": ...}` wrapper + for collection-returning tools). + """ + for chunk in result.content: + if hasattr(chunk, "text") and chunk.text: + try: + return json.loads(chunk.text) + except json.JSONDecodeError: + return chunk.text + struct = getattr(result, "structuredContent", None) + if isinstance(struct, dict) and set(struct.keys()) == {"result"}: + return struct["result"] + return struct + + +async def _call_tool_async(name: str, arguments: dict[str, Any], timeout: float) -> Any: + params = StdioServerParameters(command="cgraph-mcp", args=[], env=_env_for_mcp()) + async with stdio_client(params) as (read, write): + async with ClientSession(read, write) as session: + await asyncio.wait_for(session.initialize(), timeout=timeout) + result = await asyncio.wait_for( + session.call_tool(name, arguments), timeout=timeout + ) + payload = _extract(result) + if getattr(result, "isError", False): + return {"error": payload} + return payload + + +def call_tool(name: str, arguments: dict[str, Any], *, timeout: float = DEFAULT_TIMEOUT_SEC) -> Any: + """Sync entry point for the bash shim. One spawn per call.""" + return asyncio.run(_call_tool_async(name, arguments, timeout)) + + +# ── Top-level convenience wrappers ───────────────────────────────────── +# Names map 1:1 onto MCP tool names (and onto bench/tools/code_graph_mcp/ +# tools.yaml entries). Kwargs mirror each tool's MCP arg schema. + + +def index_repo(path_or_url: str, branch: str | None = None, ignore: list[str] | None = None) -> dict[str, Any]: + args: dict[str, Any] = {"path_or_url": path_or_url} + if branch is not None: + args["branch"] = branch + if ignore is not None: + args["ignore"] = ignore + return call_tool("index_repo", args) + + +def search_code(prefix: str, project: str, branch: str | None = None, limit: int = 10) -> Any: + args: dict[str, Any] = {"prefix": prefix, "project": project, "limit": limit} + if branch is not None: + args["branch"] = branch + return call_tool("search_code", args) + + +def _neighbors(tool: str, symbol_id: int, project: str, branch: str | None, limit: int) -> Any: + args: dict[str, Any] = {"symbol_id": symbol_id, "project": project, "limit": limit} + if branch is not None: + args["branch"] = branch + return call_tool(tool, args) + + +def get_callers(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any: + return _neighbors("get_callers", symbol_id, project, branch, limit) + + +def get_callees(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any: + return _neighbors("get_callees", symbol_id, project, branch, limit) + + +def get_dependencies(symbol_id: int, project: str, branch: str | None = None, limit: int = 50) -> Any: + return _neighbors("get_dependencies", symbol_id, project, branch, limit) + + +def impact_analysis( + symbol_id: int, + project: str, + branch: str | None = None, + direction: str = "IN", + depth: int = 3, +) -> Any: + args: dict[str, Any] = { + "symbol_id": symbol_id, + "project": project, + "direction": direction, + "depth": depth, + } + if branch is not None: + args["branch"] = branch + return call_tool("impact_analysis", args) + + +def find_path(source_id: int, dest_id: int, project: str, branch: str | None = None) -> Any: + args: dict[str, Any] = { + "source_id": source_id, + "dest_id": dest_id, + "project": project, + } + if branch is not None: + args["branch"] = branch + return call_tool("find_path", args) + + +def ask(question: str, project: str, branch: str | None = None) -> Any: + args: dict[str, Any] = {"question": question, "project": project} + if branch is not None: + args["branch"] = branch + return call_tool("ask", args) diff --git a/bench/cli/cg-mcp b/bench/cli/cg-mcp new file mode 100755 index 00000000..be6c09bb --- /dev/null +++ b/bench/cli/cg-mcp @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# Bash-callable entry point for the code-graph MCP CLI. Mirrors `cg` +# but speaks JSON-RPC over stdio to a spawned `cgraph-mcp` server +# instead of HTTP to the FastAPI service. Runner adds bench/cli to PATH. +exec "${BENCH_PYTHON:-python3}" -m bench.cli.cg_mcp "$@" diff --git a/bench/cli/cg_mcp.py b/bench/cli/cg_mcp.py new file mode 100644 index 00000000..95c91390 --- /dev/null +++ b/bench/cli/cg_mcp.py @@ -0,0 +1,140 @@ +"""`cg-mcp` — bash-callable CLI exposing code-graph's 8 MCP tools. + +This is the MCP-transport sibling of `cg`. Where `cg` calls the host +FastAPI service over HTTP, `cg-mcp` spawns the `cgraph-mcp` stdio +server (via the official MCP Python SDK) for every invocation and +dispatches one tool call. + +The MCP track is what external agents (Claude Code, Cursor, …) use +in production; benchmarking through it tells us how the *real-world* +integration behaves under SWE-bench, not just the in-process FastAPI +adapter. + +Subcommands mirror the MCP tool names: + + cg-mcp index_repo --path-or-url . [--branch B] [--ignore PAT ...] + cg-mcp search_code --project P --prefix STR [--branch B] [--limit N] + cg-mcp get_callers --project P --symbol-id ID [--branch B] [--limit N] + cg-mcp get_callees --project P --symbol-id ID [--branch B] [--limit N] + cg-mcp get_dependencies --project P --symbol-id ID [--branch B] [--limit N] + cg-mcp impact_analysis --project P --symbol-id ID [--direction IN|OUT] [--depth N] + cg-mcp find_path --project P --source-id ID --dest-id ID [--branch B] + cg-mcp ask --project P --question "..." [--branch B] + +Output: one JSON document per call on stdout. Errors print to stderr +and exit non-zero. + +Env: FALKORDB_HOST / FALKORDB_PORT are passed through to the spawned +server. Optionally set CGRAPH_MCP_TIMEOUT_SEC to override the +default 60s timeout. +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +from typing import Any + +from bench.agents import code_graph_mcp_adapter as cgm + + +def _print(obj: Any) -> None: + json.dump(obj, sys.stdout, indent=2, sort_keys=True, default=str) + sys.stdout.write("\n") + + +def _timeout() -> float: + try: + return float(os.getenv("CGRAPH_MCP_TIMEOUT_SEC", "60")) + except ValueError: + return 60.0 + + +def _add_project(p: argparse.ArgumentParser) -> None: + p.add_argument("--project", required=True) + p.add_argument("--branch", default=None) + + +def _add_symbol(p: argparse.ArgumentParser) -> None: + p.add_argument("--symbol-id", type=int, required=True, dest="symbol_id") + p.add_argument("--limit", type=int, default=50) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(prog="cg-mcp", description=__doc__) + sub = parser.add_subparsers(dest="cmd", required=True) + + ir = sub.add_parser("index_repo") + ir.add_argument("--path-or-url", required=True, dest="path_or_url") + ir.add_argument("--branch", default=None) + ir.add_argument("--ignore", nargs="*", default=None) + + sc = sub.add_parser("search_code") + _add_project(sc) + sc.add_argument("--prefix", required=True) + sc.add_argument("--limit", type=int, default=10) + + for name in ("get_callers", "get_callees", "get_dependencies"): + p = sub.add_parser(name) + _add_project(p) + _add_symbol(p) + + ia = sub.add_parser("impact_analysis") + _add_project(ia) + ia.add_argument("--symbol-id", type=int, required=True, dest="symbol_id") + ia.add_argument("--direction", choices=["IN", "OUT"], default="IN") + ia.add_argument("--depth", type=int, default=3) + + fp = sub.add_parser("find_path") + _add_project(fp) + fp.add_argument("--source-id", type=int, required=True, dest="source_id") + fp.add_argument("--dest-id", type=int, required=True, dest="dest_id") + + aq = sub.add_parser("ask") + _add_project(aq) + aq.add_argument("--question", required=True) + + args = parser.parse_args(argv) + timeout = _timeout() + + # Inject timeout for adapter calls. + cgm.DEFAULT_TIMEOUT_SEC = timeout + + try: + if args.cmd == "index_repo": + _print(cgm.index_repo(args.path_or_url, branch=args.branch, ignore=args.ignore)) + elif args.cmd == "search_code": + _print(cgm.search_code(args.prefix, args.project, branch=args.branch, limit=args.limit)) + elif args.cmd == "get_callers": + _print(cgm.get_callers(args.symbol_id, args.project, branch=args.branch, limit=args.limit)) + elif args.cmd == "get_callees": + _print(cgm.get_callees(args.symbol_id, args.project, branch=args.branch, limit=args.limit)) + elif args.cmd == "get_dependencies": + _print(cgm.get_dependencies(args.symbol_id, args.project, branch=args.branch, limit=args.limit)) + elif args.cmd == "impact_analysis": + _print( + cgm.impact_analysis( + args.symbol_id, + args.project, + branch=args.branch, + direction=args.direction, + depth=args.depth, + ) + ) + elif args.cmd == "find_path": + _print(cgm.find_path(args.source_id, args.dest_id, args.project, branch=args.branch)) + elif args.cmd == "ask": + _print(cgm.ask(args.question, args.project, branch=args.branch)) + else: # pragma: no cover — argparse already enforces this + parser.error(f"unknown subcommand: {args.cmd}") + except Exception as e: # noqa: BLE001 — surface everything to the agent + print(f"cg-mcp error: {e}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bench/tools/code_graph_mcp/system_preamble.md b/bench/tools/code_graph_mcp/system_preamble.md new file mode 100644 index 00000000..bf5af4a1 --- /dev/null +++ b/bench/tools/code_graph_mcp/system_preamble.md @@ -0,0 +1,72 @@ +# code-graph (MCP) preamble + +You are an autonomous coding agent solving a software-engineering task. +Your sole tool is bash: every action you take is a shell command that +is executed in the repository's working directory. + +## Code-navigation workflow — use this BEFORE grep/find + +A code-graph **MCP server** (`cgraph-mcp`) is available for this repo. +**Before reading or editing code, locate the relevant symbols through +`cg-mcp` rather than grepping the file tree** — it's faster, returns +precise `{id, file, line}` records, and reveals caller / callee / +impact relationships you would otherwise reconstruct by hand. Fall +back to bash only when `cg-mcp` cannot answer the question. + +`$PROJECT_NAME` and `$BRANCH` are exported for you (do not guess). +The graph is already indexed against the current commit. + +Typical loop: + +1. `cg-mcp search_code --project "$PROJECT_NAME" --prefix ` — + locate a function/class by name. Pick the `id` of the best hit. +2. `cg-mcp get_callers --project "$PROJECT_NAME" --symbol-id ` — + "who calls this?" before refactoring. +3. `cg-mcp impact_analysis --project "$PROJECT_NAME" --symbol-id + --depth 3` — full transitive blast radius. Use this BEFORE + non-trivial edits. +4. Read the implicated file(s) with `sed -n` / `cat`, then edit. + +## Available `cg-mcp` sub-commands + +- `cg-mcp search_code --project P --prefix STR [--limit N]` — + prefix search; returns `[{id, name, label, file, line}, ...]`. +- `cg-mcp get_callers --project P --symbol-id ID [--limit N]` — + incoming CALLS edges (who calls X). +- `cg-mcp get_callees --project P --symbol-id ID [--limit N]` — + outgoing CALLS edges (what X calls). +- `cg-mcp get_dependencies --project P --symbol-id ID [--limit N]` — + all outgoing edges (CALLS + IMPORTS + DEFINES). +- `cg-mcp impact_analysis --project P --symbol-id ID + [--direction IN|OUT] [--depth N]` — + transitive blast radius (default IN, depth 3). +- `cg-mcp find_path --project P --source-id ID --dest-id ID` — + the call chain(s) between two symbols. +- `cg-mcp index_repo --path-or-url PATH [--branch B]` — + (re)index a folder or git URL. Only needed for repos that aren't + pre-indexed. + +You also have the usual Unix tools (`cat`, `grep`/`rg`, `find`, `sed`) +for cases the graph can't answer. + +## Rules of thumb + +1. **Always run `search_code` first** to turn a name into an `id`. +2. **`impact_analysis` before any non-trivial edit.** Even when you + think you know the answer — the transitive closure often surprises + you. +3. **Don't `grep` for callers.** `get_callers` is one cheap Cypher + hop; grep over a large repo costs tens of thousands of tokens. + +## Submission + +When you believe the task is complete, run a bash command whose first +line of stdout is exactly: + +``` +COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT +``` + +followed by your final answer or summary on subsequent lines. The +runner reads the working-tree `git diff` automatically; you do not +need to commit. diff --git a/bench/tools/code_graph_mcp/tools.yaml b/bench/tools/code_graph_mcp/tools.yaml new file mode 100644 index 00000000..3b676977 --- /dev/null +++ b/bench/tools/code_graph_mcp/tools.yaml @@ -0,0 +1,39 @@ +# SWE-agent tool bundle: code-graph MCP-transport config. +# +# This is the MCP-transport sibling of bench/tools/code_graph/tools.yaml. +# Same backend graph; different transport. Where `code_graph` calls the +# host FastAPI service over HTTP, `code_graph_mcp` spawns the +# `cgraph-mcp` stdio server for each tool call — the exact transport +# Claude Code / Cursor / Cline use in production. +# +# Tool names mirror the 8 MCP tools registered in api/mcp/tools/ +# (search_code, get_callers, get_callees, get_dependencies, +# impact_analysis, find_path, index_repo, ask). The bash agent calls +# them through the `cg-mcp ...` shim (see bench/cli/cg-mcp). +# +# IMPORTANT: `ask` (GraphRAG) is intentionally NOT in the tool list. +# Including it would double-count tokens (nested LLM agent). Same Q2 +# decision as the HTTP code_graph config — we benchmark the *graph*, +# not GraphRAG. + +extends: ../baseline/tools.yaml + +tools: + - index_repo # (path_or_url, branch?) -> indexing stats + - search_code # (project, prefix) -> [symbol] + - get_callers # (project, symbol_id) -> [caller] + - get_callees # (project, symbol_id) -> [callee] + - get_dependencies # (project, symbol_id) -> [dep] + - impact_analysis # (project, symbol_id, direction, depth) -> [impacted] + - find_path # (project, source_id, dest_id) -> [path] + +backend: + transport: mcp_stdio + command: cgraph-mcp + # Container has cgraph-mcp on PATH via `pip install -e .` against this + # repo. FALKORDB_HOST/PORT are passed through to the spawned MCP + # server, pointing at the same host FalkorDB the HTTP config uses. + env_passthrough: + - FALKORDB_HOST + - FALKORDB_PORT + - MODEL_NAME diff --git a/tests/bench/__init__.py b/tests/bench/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/bench/test_cg_mcp_adapter.py b/tests/bench/test_cg_mcp_adapter.py new file mode 100644 index 00000000..7d6f9274 --- /dev/null +++ b/tests/bench/test_cg_mcp_adapter.py @@ -0,0 +1,178 @@ +"""Tests for the MCP-transport bench adapter (`cg-mcp`). + +Heavy end-to-end test (talks to real cgraph-mcp + FalkorDB) is gated +behind the same `_falkordb_reachable` check as the existing MCP tests. +Light tests run unconditionally and validate the argparse surface and +`_extract` shape handling. +""" + +from __future__ import annotations + +import json +import os +import socket +import subprocess +import sys +from pathlib import Path + +import pytest + +from bench.agents import code_graph_mcp_adapter as cgm +from bench.cli import cg_mcp + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _mcp_server_available() -> bool: + """The benchmark MCP adapter requires the in-repo `cgraph-mcp` server. + + On branches that pre-date the MCP stack (e.g. this branch's base, + `fix-find-symbol-nested-name`), `api.mcp.server` is absent. The + end-to-end test must skip there; it will run on staging once the + MCP stack lands. + """ + try: + import api.mcp.server # noqa: F401 + return True + except ImportError: + return False + + +def _falkordb_reachable() -> bool: + host = os.environ.get("FALKORDB_HOST", "127.0.0.1") + port = int(os.environ.get("FALKORDB_PORT", "6390")) + try: + with socket.create_connection((host, port), timeout=1): + return True + except OSError: + return False + + +# ── light unit tests ────────────────────────────────────────────────── + + +class _FakeChunk: + def __init__(self, text: str) -> None: + self.text = text + + +class _FakeResult: + def __init__(self, content, structured=None, is_error=False): + self.content = content + self.structuredContent = structured + self.isError = is_error + + +def test_extract_prefers_text_chunk_json(): + r = _FakeResult([_FakeChunk('{"id": 7, "name": "foo"}')]) + assert cgm._extract(r) == {"id": 7, "name": "foo"} + + +def test_extract_falls_back_to_structured_result_wrapper(): + r = _FakeResult(content=[], structured={"result": [1, 2, 3]}) + assert cgm._extract(r) == [1, 2, 3] + + +def test_extract_returns_raw_text_when_not_json(): + r = _FakeResult([_FakeChunk("not json at all")]) + assert cgm._extract(r) == "not json at all" + + +def test_cli_rejects_unknown_subcommand(capsys): + with pytest.raises(SystemExit): + cg_mcp.main(["totally_bogus"]) + + +def test_cli_index_repo_parses_ignore_list(monkeypatch): + captured: dict = {} + + def fake_index_repo(path_or_url, branch=None, ignore=None): + captured.update(path_or_url=path_or_url, branch=branch, ignore=ignore) + return {"ok": True, **captured} + + monkeypatch.setattr(cgm, "index_repo", fake_index_repo) + rc = cg_mcp.main( + [ + "index_repo", + "--path-or-url", + "/tmp/x", + "--branch", + "main", + "--ignore", + ".venv", + "node_modules", + ] + ) + assert rc == 0 + assert captured["path_or_url"] == "/tmp/x" + assert captured["branch"] == "main" + assert captured["ignore"] == [".venv", "node_modules"] + + +# ── heavy end-to-end test ───────────────────────────────────────────── + + +@pytest.mark.skipif( + not _mcp_server_available(), + reason="api.mcp.server not present — requires MCP stack to be merged", +) +@pytest.mark.skipif(not _falkordb_reachable(), reason="FalkorDB unreachable") +def test_cg_mcp_search_code_end_to_end(tmp_path): + """Spawn the actual cg-mcp shim against a freshly-indexed fixture.""" + fixture = REPO_ROOT / "tests" / "mcp" / "fixtures" / "sample_project" + if not fixture.exists(): + pytest.skip("MCP sample fixture not present") + + env = os.environ.copy() + env["FALKORDB_HOST"] = os.environ.get("FALKORDB_HOST", "127.0.0.1") + env["FALKORDB_PORT"] = os.environ.get("FALKORDB_PORT", "6390") + env["BENCH_PYTHON"] = sys.executable + # Ensure cgraph-mcp is on PATH for the spawned subprocess. + venv_bin = str(Path(sys.executable).parent) + env["PATH"] = f"{venv_bin}:{env.get('PATH', '')}" + + # Index the fixture under a deterministic project/branch. + project = "sample_project" + branch = f"benchmcp-{os.getpid()}" + idx = subprocess.run( + [ + str(REPO_ROOT / "bench" / "cli" / "cg-mcp"), + "index_repo", + "--path-or-url", + str(fixture), + "--branch", + branch, + ], + env=env, + capture_output=True, + text=True, + timeout=120, + ) + assert idx.returncode == 0, idx.stderr + idx_payload = json.loads(idx.stdout) + assert "graph_name" in idx_payload + assert idx_payload["num_nodes"] > 0 + + # Then search for any known symbol from the fixture. + sr = subprocess.run( + [ + str(REPO_ROOT / "bench" / "cli" / "cg-mcp"), + "search_code", + "--project", + project, + "--branch", + branch, + "--prefix", + "a", # broad prefix to match something in the fixture + "--limit", + "3", + ], + env=env, + capture_output=True, + text=True, + timeout=60, + ) + assert sr.returncode == 0, sr.stderr + out = json.loads(sr.stdout) + assert out is not None From f17d437b3abf856b96494890965ea63e5a169d83 Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Wed, 27 May 2026 14:51:38 +0300 Subject: [PATCH 2/7] bench: wire code_graph_mcp into mini_runner dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The adapter and shim from the previous commit were inert from the runner's perspective — VALID_CONFIGS only knew baseline/lsp/code_graph. This commit makes `--config code_graph_mcp` a first-class track. Changes in bench/runners/mini_runner.py: - VALID_CONFIGS gains "code_graph_mcp" (passes argparse + help string). - New INSTANCE_TEMPLATE_CODE_GRAPH_MCP: mirrors the HTTP code_graph template but tells the agent to call `cg-mcp` with $PROJECT_NAME + $BRANCH, and to use impact_analysis before non-trivial edits. - load_instance_template dispatches the new template. - config_env("code_graph_mcp", ...) prepends venv bin to PATH (so cgraph-mcp is callable from the agent's bash), passes FALKORDB_* through to the spawned MCP server, and exports PROJECT_NAME + BRANCH which the preamble references. - New _ensure_indexed_mcp() mirrors _ensure_indexed but goes through the bench MCP adapter instead of HTTP. Skip-if-present probe hits FalkorDB's GRAPH.LIST directly (one trip, no MCP spawn). - Per-instance loop now dispatches to _ensure_indexed_mcp for the new config. Smoke-verified that: - VALID_CONFIGS == ('baseline','lsp','code_graph','code_graph_mcp') - load_instance_template('code_graph_mcp') contains 'cg-mcp' - config_env populates PROJECT_NAME/BRANCH/FALKORDB_HOST Unit tests for the adapter still pass (5 passed, 1 skipped — heavy e2e double-gated on FalkorDB + api.mcp.server availability). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bench/runners/mini_runner.py | 87 +++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/bench/runners/mini_runner.py b/bench/runners/mini_runner.py index 3689c0aa..17b0cbaa 100644 --- a/bench/runners/mini_runner.py +++ b/bench/runners/mini_runner.py @@ -49,7 +49,7 @@ DEFAULT_CACHE_DIR = BENCH_DIR / "cache" DEFAULT_RESULTS = DEFAULT_CACHE_DIR / "results.jsonl" -VALID_CONFIGS = ("baseline", "lsp", "code_graph") +VALID_CONFIGS = ("baseline", "lsp", "code_graph", "code_graph_mcp") # --------------------------------------------------------------------------- @@ -155,11 +155,40 @@ class Task: """ +INSTANCE_TEMPLATE_CODE_GRAPH_MCP = """\ +You are working in the repository at {{cwd}}. +The code-graph MCP server has already indexed this repository under the +project name `$PROJECT_NAME` on branch `$BRANCH` (use the env vars +literally). + +The task to solve: + +{{task}} + +**Required workflow.** Before reading or editing any file, your first +bash command MUST be: + + `cg-mcp search_code --project "$PROJECT_NAME" --branch "$BRANCH" --prefix ` + +Then use `cg-mcp get_callers --project "$PROJECT_NAME" --branch "$BRANCH" --symbol-id ` +to expand relationships before doing any textual search. Use +`cg-mcp impact_analysis ... --symbol-id --depth 3` before +non-trivial edits. + +When you believe the task is complete, finish your turn with a final +message that contains a unified diff of your changes inside a fenced +``` block, then exit. Do not commit; the harness reads the diff via +`git diff`. +""" + + def load_instance_template(config: str) -> str: if config == "lsp": return INSTANCE_TEMPLATE_LSP if config == "code_graph": return INSTANCE_TEMPLATE_CODE_GRAPH + if config == "code_graph_mcp": + return INSTANCE_TEMPLATE_CODE_GRAPH_MCP return INSTANCE_TEMPLATE @@ -210,6 +239,23 @@ def config_env(config: str, repo_path: Path) -> dict[str, str]: # The agent's preamble references $REPO_NAME — set it to the # worktree dirname, which is what analyze_folder used as the id. env["REPO_NAME"] = repo_path.name + elif config == "code_graph_mcp": + # MCP transport: agent calls `cg-mcp …` which spawns the + # `cgraph-mcp` stdio server per call. FalkorDB coordinates + # are passed through verbatim. + env.setdefault("FALKORDB_HOST", os.environ.get("FALKORDB_HOST", "127.0.0.1")) + env.setdefault("FALKORDB_PORT", os.environ.get("FALKORDB_PORT", "6379")) + # `cgraph-mcp` must be on PATH; the runner installs the + # falkordb-code-graph package into the same interpreter, so + # prepending the venv bin gives us the entry point. + venv_bin = str(Path(sys.executable).parent) + env["PATH"] = f"{venv_bin}:{env['PATH']}" + # The preamble references $PROJECT_NAME and $BRANCH; project + # name matches what `index_repo` derives from the folder + # (= worktree dirname), and branch is the per-instance tag we + # used when indexing. + env["PROJECT_NAME"] = repo_path.name + env["BRANCH"] = os.environ.get("CGRAPH_MCP_BRANCH", "_default") return env @@ -248,6 +294,41 @@ def _ensure_indexed(repo_path: Path) -> None: print(f"[index] WARN failed to index {repo_name}: {exc!r}") +def _ensure_indexed_mcp(repo_path: Path) -> None: + """MCP-track equivalent of _ensure_indexed. + + Drives the `index_repo` MCP tool in-process via the bench adapter + (avoids spawning a second cgraph-mcp just to bootstrap; the agent + will spawn its own per call). Same skip-if-present optimization + as the HTTP path: cheap GRAPH.LIST scan against FalkorDB. + """ + from bench.agents import code_graph_mcp_adapter as cgm + import redis + + repo_name = repo_path.name + branch = os.environ.get("CGRAPH_MCP_BRANCH", "_default") + host = os.environ.get("FALKORDB_HOST", "127.0.0.1") + port = int(os.environ.get("FALKORDB_PORT", "6379")) + expected_graph = f"code:{repo_name}:{branch}" + try: + r = redis.Redis(host=host, port=port, decode_responses=True, socket_timeout=2) + if expected_graph in (r.execute_command("GRAPH.LIST") or []): + print(f"[index-mcp] {expected_graph} already indexed; skip") + return + except Exception as exc: # noqa: BLE001 + print(f"[index-mcp] WARN list_graphs failed ({exc!r}); will attempt index anyway") + + print(f"[index-mcp] indexing {repo_path} as {expected_graph} ...") + try: + payload = cgm.index_repo(str(repo_path), branch=branch) + if isinstance(payload, dict) and payload.get("error"): + print(f"[index-mcp] WARN index_repo error: {payload['error']!r}") + else: + print(f"[index-mcp] indexed: {payload}") + except Exception as exc: # noqa: BLE001 + print(f"[index-mcp] WARN failed to index {repo_name}: {exc!r}") + + # --------------------------------------------------------------------------- # Dry-run stub model # --------------------------------------------------------------------------- @@ -569,7 +650,7 @@ def main(argv: list[str] | None = None) -> int: p = argparse.ArgumentParser(description="code-graph benchmark runner") p.add_argument("--config", choices=VALID_CONFIGS, action="append", - help="one of baseline / lsp / code_graph; repeatable. " + help="one of baseline / lsp / code_graph / code_graph_mcp; repeatable. " "Default: all three.") mode = p.add_mutually_exclusive_group(required=True) mode.add_argument("--dry-run", action="store_true", @@ -640,6 +721,8 @@ def main(argv: list[str] | None = None) -> int: # call returns nothing and the agent abandons the tool. if cfg == "code_graph": _ensure_indexed(cfg_wt) + elif cfg == "code_graph_mcp": + _ensure_indexed_mcp(cfg_wt) cfg_rows = run_batch( [task], [cfg], From b14432b9448c2b9cc3483b0f93a887bbc55e3813 Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Wed, 27 May 2026 16:28:47 +0300 Subject: [PATCH 3/7] bench: fail loudly on indexing errors + bump analyze_folder timeout The mini_runner previously printed a [index] WARN line on analyze_folder errors and continued. This meant SWE-bench instances whose path falls outside ALLOWED_ANALYSIS_DIR (e.g. when the API server is started from a sibling worktree) would silently run the agent against a missing code-graph project. The agent's first cg call returns 400 'Missing project ...', the agent falls back to grep/sed, and we get a token count that looks bad for the code_graph track but actually reflects 'tool unavailable'. Two changes: * analyze_folder errors and httpx exceptions now raise RuntimeError with the offending path. This stops the run and surfaces the ALLOWED_ANALYSIS_DIR misconfiguration immediately. * analyze_folder timeout bumped 600s -> 1800s. The 600s default was tight for sympy (~5 MB of Python, ~5000 functions) and caused a timeout during indexing. This was discovered while running the first real 3-way SWE-bench smoke. With the fix and a corrected ALLOWED_ANALYSIS_DIR, the code_graph track produces sensible numbers (-11% input vs baseline across the smoke sample vs the prior bogus +4.7%). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bench/runners/mini_runner.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bench/runners/mini_runner.py b/bench/runners/mini_runner.py index 17b0cbaa..cb6b72a0 100644 --- a/bench/runners/mini_runner.py +++ b/bench/runners/mini_runner.py @@ -281,17 +281,19 @@ def _ensure_indexed(repo_path: Path) -> None: print(f"[index] {repo_name} already indexed; skip") return print(f"[index] analyzing {repo_path} ...") - with httpx.Client(timeout=600.0, headers=headers) as c: + with httpx.Client(timeout=1800.0, headers=headers) as c: r = c.post( f"{base}/api/analyze_folder", json={"path": str(repo_path), "ignore": []}, ) if r.status_code != 200: - print(f"[index] WARN analyze_folder returned {r.status_code}: {r.text[:200]}") - else: - print(f"[index] indexed {repo_name}") - except Exception as exc: # noqa: BLE001 - print(f"[index] WARN failed to index {repo_name}: {exc!r}") + raise RuntimeError( + f"analyze_folder returned {r.status_code}: {r.text[:300]}. " + f"Check ALLOWED_ANALYSIS_DIR on the API server covers {repo_path}." + ) + print(f"[index] indexed {repo_name}") + except Exception as exc: + raise RuntimeError(f"failed to index {repo_name} at {repo_path}: {exc}") from exc def _ensure_indexed_mcp(repo_path: Path) -> None: From 532d84957763cf1114681c5bdd75fc6c44889bfe Mon Sep 17 00:00:00 2001 From: dvirdukhan Date: Wed, 27 May 2026 16:51:42 +0300 Subject: [PATCH 4/7] fix(analyzer): resolve LSP CALLS edges on repos without a venv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Python analyzer hardcoded `environment_path={path}/venv` when starting jedi-language-server via multilspy. When the repo had no venv (the common case for cloned codebases like sphinx, sympy, anything from SWE-bench), jedi raised `InvalidPythonEnvironment` on every `request_definition()` call. analyzer.resolve() then swallowed the exception silently and the indexer produced a graph with DEFINES edges only — zero CALLS, zero EXTENDS. Benchmark validation showed sphinx (5K functions) and sympy (41K functions) had no resolved cross-references at all. Fix: - source_analyzer.py: prefer {repo}/venv, then {repo}/.venv, then fall back to the host interpreter's environment (sys.executable's prefix) so jedi always has a valid Python to introspect. - analyzer.py: log resolve() failures at WARN with file/line context instead of swallowing them silently, so the next regression is loud. Verified: re-indexed sphinx-doc/sphinx-9230 with the fix: DEFINES: 5640, CALLS: 4931, EXTENDS: 484 (was DEFINES-only). Fixes #685. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/analyzer.py | 5 +++++ api/analyzers/source_analyzer.py | 22 +++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/api/analyzers/analyzer.py b/api/analyzers/analyzer.py index 64d49004..0564606b 100644 --- a/api/analyzers/analyzer.py +++ b/api/analyzers/analyzer.py @@ -57,6 +57,11 @@ def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: P locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column) return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files] except Exception as e: + import logging + logging.getLogger(__name__).warning( + "resolve() failed for %s @%d:%d: %s", + file_path, node.start_point.row, node.start_point.column, e, + ) return [] @abstractmethod diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 4186f358..1b8f85b1 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -134,7 +134,27 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: else: lsps[".java"] = NullLanguageServer() if any(path.rglob('*.py')): - config = MultilspyConfig.from_dict({"code_language": "python", "environment_path": f"{path}/venv"}) + import sys + py_venv = path / "venv" + py_dotvenv = path / ".venv" + if py_venv.is_dir() and (py_venv / "bin" / "python").exists(): + env_path = str(py_venv) + elif py_dotvenv.is_dir() and (py_dotvenv / "bin" / "python").exists(): + env_path = str(py_dotvenv) + else: + # Fall back to the host's Python environment so jedi has a + # valid interpreter to introspect; otherwise every + # request_definition() raises InvalidPythonEnvironment and + # we'd silently produce a graph with zero CALLS edges. + env_path = str(Path(sys.executable).resolve().parent.parent) + logging.info( + "No venv at %s; falling back to host env %s for jedi LSP", + path, env_path, + ) + config = MultilspyConfig.from_dict({ + "code_language": "python", + "environment_path": env_path, + }) lsps[".py"] = SyncLanguageServer.create(config, logger, str(path)) else: lsps[".py"] = NullLanguageServer() From 476bc73d4a3b029ee437e057e82bf05a64874230 Mon Sep 17 00:00:00 2001 From: dvirdukhan Date: Wed, 27 May 2026 20:25:26 +0300 Subject: [PATCH 5/7] bench: add resume support + ignore sympy rubi rules Two production-quality fixes from the calibration run that crashed at 14/30 trajectories: 1. Resume support: skip (instance, cfg) pairs whose trajectory file already exists. Lets us recover from crashes/kills without re-running completed work (avoids ~$3 of wasted compute on this run). 2. Ignore pathological files at index time: sympy/integrals/rubi/rules contains auto-generated 3000-line files with hundreds of unresolvable symbols per line. jedi spends hours and never makes progress. Adding it to the default ignore list unblocks sympy-19040 (and other sympy instances) without affecting graph quality. Also expanded default ignore set: __pycache__, build, dist, .tox, .eggs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bench/runners/mini_runner.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/bench/runners/mini_runner.py b/bench/runners/mini_runner.py index cb6b72a0..03e32e7b 100644 --- a/bench/runners/mini_runner.py +++ b/bench/runners/mini_runner.py @@ -281,10 +281,19 @@ def _ensure_indexed(repo_path: Path) -> None: print(f"[index] {repo_name} already indexed; skip") return print(f"[index] analyzing {repo_path} ...") + # Default ignore set: auto-generated / vendored / pathological dirs + # that either contain no useful symbols or send jedi into a + # multi-hour resolve loop (e.g. sympy/integrals/rubi/rules has + # 3000-line files with hundreds of unresolvable symbols per line). + default_ignore = [ + ".git", "venv", ".venv", "node_modules", "__pycache__", + "rubi/rules", # sympy: blocks indexing for ~hours otherwise + "build", "dist", ".tox", ".eggs", + ] with httpx.Client(timeout=1800.0, headers=headers) as c: r = c.post( f"{base}/api/analyze_folder", - json={"path": str(repo_path), "ignore": []}, + json={"path": str(repo_path), "ignore": default_ignore}, ) if r.status_code != 200: raise RuntimeError( @@ -708,6 +717,13 @@ def main(argv: list[str] | None = None) -> int: f"x {len(configs)} configs = {len(insts) * len(configs)} trajectories") for inst in insts: for cfg in configs: + # Resume support: if a trajectory file for this (instance, cfg) + # already exists, skip the run entirely. Lets us recover from + # crashes / kills without re-spending tokens on completed work. + existing_traj = args.trajectories / f"{inst.instance_id}__{cfg}.json" + if existing_traj.exists(): + print(f"[resume] {inst.instance_id}/{cfg}: trajectory exists, skip") + continue # Fresh worktree per (instance, config) to avoid cross-talk. wt = prepare_worktree(inst) # Rename so each cfg gets a distinct path. From d23ef79f80df790515309a28c15bf1590d9ef05f Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Thu, 28 May 2026 07:29:12 +0300 Subject: [PATCH 6/7] fix(analyzer): defensive skip when second_pass references untracked file In source_analyzer.second_pass, the list of files we iterate can include paths that first_pass did not add to self.files (e.g. parse errors, LSP-induced timeouts, or rare edge cases where a candidate file is present in the input list but never makes it into the files map). Previously this raised KeyError and aborted the entire index. Hit on sympy/polys/distributedmodules.py during bench calibration of sympy-12481. Skip with a WARN log instead so a single bad file no longer takes down the whole index. Also bump mini_runner httpx timeout 1800s -> 7200s; observed sympy-12481 index taking >30 min in the field, which previously left the API server indexing successfully but the runner gave up early. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- api/analyzers/source_analyzer.py | 11 ++++++++++- bench/runners/mini_runner.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py index 1b8f85b1..ead8707a 100644 --- a/api/analyzers/source_analyzer.py +++ b/api/analyzers/source_analyzer.py @@ -166,7 +166,16 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None: with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(): files_len = len(self.files) for i, file_path in enumerate(files): - file = self.files[file_path] + file = self.files.get(file_path) + if file is None: + # first_pass skipped this file (e.g. parse error, empty, + # or ignored after entering the candidate list). Skip + # in second_pass too instead of crashing the whole index. + logging.warning( + "second_pass: %s not in files map (first_pass skipped it); skipping", + file_path, + ) + continue logging.info(f'Processing file ({i + 1}/{files_len}): {file_path}') for _, entity in file.entities.items(): entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol)) diff --git a/bench/runners/mini_runner.py b/bench/runners/mini_runner.py index 03e32e7b..3081bd6e 100644 --- a/bench/runners/mini_runner.py +++ b/bench/runners/mini_runner.py @@ -290,7 +290,7 @@ def _ensure_indexed(repo_path: Path) -> None: "rubi/rules", # sympy: blocks indexing for ~hours otherwise "build", "dist", ".tox", ".eggs", ] - with httpx.Client(timeout=1800.0, headers=headers) as c: + with httpx.Client(timeout=7200.0, headers=headers) as c: r = c.post( f"{base}/api/analyze_folder", json={"path": str(repo_path), "ignore": default_ignore}, From ec7fac6cc80e9bbdbfd6dc20696696fbb87fe35c Mon Sep 17 00:00:00 2001 From: Dvir Dukhan <12258836+DvirDukhan@users.noreply.github.com> Date: Thu, 28 May 2026 09:19:47 +0300 Subject: [PATCH 7/7] bench: add start-api.sh helper enabling tree-sitter fast resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After T18 (#691) + query-cache (#692), code_graph indexing on pytest-6202 drops from 247s to 3.7s — but only if the API server is launched with CODE_GRAPH_PY_RESOLVER=tree_sitter. This helper bakes in that env plus the public/permissive flags the bench harness expects, so calibration runs hit the fast path without manual setup. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- bench/scripts/start-api.sh | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 bench/scripts/start-api.sh diff --git a/bench/scripts/start-api.sh b/bench/scripts/start-api.sh new file mode 100755 index 00000000..4e55f673 --- /dev/null +++ b/bench/scripts/start-api.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Launch the code-graph API server with the fast tree-sitter Python +# resolver enabled (PR #691 + #692). This is what the bench harness +# expects to talk to at 127.0.0.1:5000. +# +# Usage: +# bench/scripts/start-api.sh # default port 5000 +# bench/scripts/start-api.sh --port 5001 +# +# Prereqs: +# - FalkorDB running. For native falkordb on 6380 set +# FALKORDB_HOST=127.0.0.1 FALKORDB_PORT=6380 before invoking. +# - uv on PATH. +# - cwd must be a code-graph worktree containing api/ with PR #691 +# and PR #692 applied (i.e. the dvirdukhan/query-cache branch tip +# or staging once those are merged). + +set -euo pipefail + +PORT=5000 +while [[ $# -gt 0 ]]; do + case "$1" in + --port) PORT="$2"; shift 2 ;; + *) echo "Unknown arg: $1" >&2; exit 1 ;; + esac +done + +# Tree-sitter static resolver — turns Python indexing from minutes to +# seconds. Default is still jedi, so callers must opt in explicitly. +export CODE_GRAPH_PY_RESOLVER="${CODE_GRAPH_PY_RESOLVER:-tree_sitter}" + +# Allow the bench harness to analyze any folder; the bench worktrees +# live under bench/cache/worktrees. +export ALLOWED_ANALYSIS_DIR="${ALLOWED_ANALYSIS_DIR:-/}" + +# Public mode: bench harness does not bother with bearer tokens. +export CODE_GRAPH_PUBLIC="${CODE_GRAPH_PUBLIC:-1}" + +echo "[start-api] CODE_GRAPH_PY_RESOLVER=$CODE_GRAPH_PY_RESOLVER" +echo "[start-api] CODE_GRAPH_PUBLIC=$CODE_GRAPH_PUBLIC" +echo "[start-api] FALKORDB_HOST=${FALKORDB_HOST:-127.0.0.1} FALKORDB_PORT=${FALKORDB_PORT:-6379}" +echo "[start-api] Listening on 127.0.0.1:$PORT" + +exec uv run uvicorn api.index:app --host 127.0.0.1 --port "$PORT"