From ae8066cca4b9a4f15845a6ee019be4a274959aaf Mon Sep 17 00:00:00 2001 From: PrashantUnity Date: Thu, 18 Jun 2026 20:37:25 +0530 Subject: [PATCH] Initial Phase Done --- .gitignore | 2 +- AGENT.md | 6 +- README.md | 4 +- docker-compose.prod.yml | 21 + docker-compose.yml | 21 + docs/MCP.md | 79 +- docs/OPS.md | 14 + requirements.txt | 6 +- src/website_profiling/mcp/http.py | 5 + src/website_profiling/mcp/http_server.py | 253 ++++++ src/website_profiling/mcp/server.py | 58 +- src/website_profiling/mcp/settings.py | 57 ++ tests/test_mcp_http_server.py | 752 ++++++++++++++++++ web/app/mcp/page.tsx | 7 + web/src/components/mcp/McpCopyBlock.tsx | 52 ++ .../secrets/SecretsSettingsPanel.tsx | 12 +- web/src/hooks/useMcpSettings.ts | 57 ++ web/src/lib/appNav.ts | 24 +- web/src/lib/mcpClientConfig.test.ts | 42 + web/src/lib/mcpClientConfig.ts | 104 +++ web/src/lib/pipelineConfigSchema.test.ts | 3 + web/src/lib/pipelineConfigSchema.ts | 41 + web/src/lib/secretsConfigSchema.ts | 85 +- web/src/server/pipelineConfig.ts | 5 +- web/src/server/secrets.test.ts | 51 ++ web/src/server/secrets.ts | 24 +- web/src/strings.json | 47 ++ web/src/views/McpSettings.tsx | 296 +++++++ web/src/views/Secrets.tsx | 14 +- 29 files changed, 2095 insertions(+), 47 deletions(-) create mode 100644 src/website_profiling/mcp/http.py create mode 100644 src/website_profiling/mcp/http_server.py create mode 100644 src/website_profiling/mcp/settings.py create mode 100644 tests/test_mcp_http_server.py create mode 100644 web/app/mcp/page.tsx create mode 100644 web/src/components/mcp/McpCopyBlock.tsx create mode 100644 web/src/hooks/useMcpSettings.ts create mode 100644 web/src/lib/mcpClientConfig.test.ts create mode 100644 web/src/lib/mcpClientConfig.ts create mode 100644 web/src/views/McpSettings.tsx diff --git a/.gitignore b/.gitignore index 162eb9b..094323a 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,4 @@ pipeline-config.txt .coverage .agents/ skills-lock.json -.crawl_results.csv \ No newline at end of file +crawl_results.csv \ No newline at end of file diff --git a/AGENT.md b/AGENT.md index 31a5dab..116e28c 100644 --- a/AGENT.md +++ b/AGENT.md @@ -27,11 +27,11 @@ Developer reference for agents and contributors. User-facing overview: [README.m - **Pipeline storage** (crawl, edges, nodes, report payload, Lighthouse, keywords, warnings) lives in **PostgreSQL only**. Deliverables use the Export view, `GET /api/report/export`, or MCP `export_*` tools — not files written by the main pipeline step. - **Pool tuning:** `DB_POOL_MIN` / `DB_POOL_MAX` (Python), `PGPOOL_MAX` (Node). Bulk crawl writes via `executemany`; optional **`crawl_stream_to_db`** streams rows during fetch. Per-URL raw HTML: `crawl_page_html` table (migration `015`); API `GET/POST /api/crawl/page-html` (localhost). - **`web/` APIs:** `/api/report/*` read routes (payload, meta, history — not localhost-guarded; protect with `AUTH_*` when exposed); `/api/run` spawns Python (localhost); `/api/jobs`, `/api/jobs/[id]`, `/api/jobs/[id]/cancel` (localhost); `/api/crawl/browser-status`, `/api/crawl/page-html` (localhost); `/api/pipeline-config` GET/PUT; `/api/llm-config` GET/PUT; `/api/chat` POST (SSE); `/api/chat/sessions` GET/POST; `/api/ollama/status` (localhost); `/api/properties/{id}/google/links/import` POST; `PipelineRunnerFab` saves pipeline + LLM state before each run. Full route list: `web/app/api/**/route.ts`. -- **MCP:** `python -m website_profiling.mcp` (stdio, **340 read-only audit tools**, domain-scoped via `WP_MCP_DOMAIN`). See `docs/MCP.md`. Requires `pip install -r requirements.txt`. +- **MCP:** `python -m website_profiling.mcp` (stdio) or `python -m website_profiling.mcp.http` (remote Streamable HTTP). Configure at **`/mcp`** in the web UI. See `docs/MCP.md`. - **AI Chat UI:** `/chat` — property-scoped chat with saved sessions (`chat_sessions`, `chat_messages`; migration `012_chat_sessions`). - **Job store:** PostgreSQL `pipeline_jobs` when `DATABASE_URL` is set (`pipelineJobsDb.ts` — status, timestamps, truncated logs). In-memory map in `pipelineJobs.ts` holds live log tail and child process handles; stale rows reconciled via `PIPELINE_JOB_STALE_HOURS`. - **Schema head:** `015_crawl_page_html` (recent: `013` link_edges/discovery, `014` job log truncation, `015` per-URL HTML storage). -- **Docker:** `Dockerfile` + `docker-compose.yml` (postgres + web); **`docker-compose.prod.yml`** (production); **`docker-compose.pull.yml`** for pre-built images (`WEB_IMAGE`); **`LIGHTHOUSE_CHROME_FLAGS`** +- **Docker:** `Dockerfile` + `docker-compose.yml` (postgres + web); **`docker-compose.prod.yml`** (production + remote MCP on `:8000`); **`docker-compose.pull.yml`** for pre-built images (`WEB_IMAGE`); **`LIGHTHOUSE_CHROME_FLAGS`** **Where to edit** @@ -42,7 +42,7 @@ Developer reference for agents and contributors. User-facing overview: [README.m | DB schema | `alembic/versions/` | | Local analysis | `analysis/local.py`, `requirements.txt` | | AI insights (LLM) | `llm/enrich.py`, `llm/agent.py`, `llm_config.py`, `requirements.txt` | -| Audit query tools (MCP + chat) | `tools/audit_tools/`, `mcp/server.py`, `commands/chat_cmd.py` | +| Audit query tools (MCP + chat) | `tools/audit_tools/`, `mcp/server.py`, `mcp/http_server.py`, `commands/chat_cmd.py` | | Config / CLI | `config.py` (`load_config`, `load_config_from_db`), `cli.py`, `input.txt.example` | | UI pipeline schema | `web/src/lib/pipelineConfigSchema.ts` | | UI LLM schema | `web/src/lib/llmConfigSchema.ts` | diff --git a/README.md b/README.md index 74299f4..8fb9153 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ Site Audit focuses on **honest, self-hosted technical SEO**. It is not a drop-in -Also included: **AI chat** over audit data (optional), **Content studio** (write & optimize with live SEO scoring), **340 MCP tools** (domain-scoped servers), image SEO, GEO/AEO readiness, keyword explorer (GSC + on-site), backlinks (GSC Links import), compare runs, and portfolio management for agencies. +Also included: **AI chat** over audit data (optional), **Content studio** (write & optimize with live SEO scoring), **340 MCP tools** (local stdio or remote Streamable HTTP), image SEO, GEO/AEO readiness, keyword explorer (GSC + on-site), backlinks (GSC Links import), compare runs, and portfolio management for agencies. Site Audit — developer-friendly SEO audit preview @@ -109,7 +109,7 @@ WebsiteProfiling/ │ ├── integrations/ # Google Search Console, GA4, Bing, CrUX │ ├── llm/ # AI enrich + chat agent │ ├── tools/ # Exports, audit query tools, MCP helpers -│ ├── mcp/ # MCP server (340 read-only tools, domain bundles) +│ ├── mcp/ # MCP server (stdio + remote HTTP, domain bundles) │ ├── db/ # PostgreSQL storage layer │ ├── commands/ # CLI subcommands │ ├── cli.py # Pipeline entrypoint diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 6306d0e..0ba7a67 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -55,6 +55,27 @@ services: profiles: - worker + mcp: + build: + context: . + dockerfile: Dockerfile + depends_on: + postgres: + condition: service_healthy + command: ['python', '-m', 'website_profiling.mcp.http'] + environment: + WEBSITE_PROFILING_ROOT: /app + DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-website_profiling} + WP_MCP_HTTP_HOST: 0.0.0.0 + WP_MCP_HTTP_PORT: 8000 + WP_MCP_TOKEN: ${WP_MCP_TOKEN:?set WP_MCP_TOKEN} + WP_MCP_ALLOWED_HOSTS: ${WP_MCP_ALLOWED_HOSTS:-} + WP_MCP_ALLOWED_ORIGINS: ${WP_MCP_ALLOWED_ORIGINS:-} + WP_MCP_DOMAIN: ${WP_MCP_DOMAIN:-core} + WP_PROPERTY_ID: ${WP_PROPERTY_ID:-} + ports: + - '${MCP_PORT:-8000}:8000' + volumes: pg-data: profiling-data: diff --git a/docker-compose.yml b/docker-compose.yml index 94d375d..b4ecaa4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -41,6 +41,27 @@ services: retries: 3 start_period: 15s + # Optional remote MCP (Streamable HTTP). Uncomment and set WP_MCP_TOKEN / WP_MCP_ALLOWED_HOSTS. + # mcp: + # build: + # context: . + # dockerfile: Dockerfile + # image: website-profiling:latest + # depends_on: + # postgres: + # condition: service_healthy + # command: ['python', '-m', 'website_profiling.mcp.http'] + # environment: + # WEBSITE_PROFILING_ROOT: /app + # DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling + # WP_MCP_HTTP_HOST: 0.0.0.0 + # WP_MCP_HTTP_PORT: 8000 + # WP_MCP_TOKEN: ${WP_MCP_TOKEN:-dev-mcp-token} + # WP_MCP_ALLOWED_HOSTS: localhost,127.0.0.1 + # WP_MCP_DOMAIN: core + # ports: + # - "8000:8000" + volumes: pg-data: profiling-data: diff --git a/docs/MCP.md b/docs/MCP.md index 420bef6..5a5c0db 100644 --- a/docs/MCP.md +++ b/docs/MCP.md @@ -13,6 +13,7 @@ The same tool catalog powers in-app **AI Chat** at `/chat`. - [Prerequisites](#prerequisites) - [Domain-scoped servers](#domain-scoped-servers) - [Configuration](#configuration) +- [Remote Streamable HTTP](#remote-streamable-http) - [MCP resources](#mcp-resources) - [Tool reference](#tool-reference) - [In-app chat](#in-app-chat) @@ -31,12 +32,14 @@ export DATABASE_URL=postgres://profiling:profiling@localhost:5432/website_profil export PYTHONPATH=src ``` -Start the server: +Start the local stdio server: ```bash python -m website_profiling.mcp ``` +For remote access over HTTP, see [Remote Streamable HTTP](#remote-streamable-http). + --- ## Domain-scoped servers @@ -111,6 +114,80 @@ Add to `.cursor/mcp.json` or your MCP client settings: --- +## Remote Streamable HTTP + +Use this when Site Audit runs on a hosted server and your MCP client (Cursor, Claude Desktop, etc.) connects over the network instead of spawning a local stdio subprocess. + +### Start the HTTP server + +Configure access on **MCP settings** (`/mcp`) in the web UI (recommended), or set environment variables. UI changes apply on the next MCP request without restarting the service. + +```bash +export DATABASE_URL=postgres://profiling:profiling@localhost:5432/website_profiling +export PYTHONPATH=src +export WP_MCP_HTTP_HOST=0.0.0.0 +export WP_MCP_HTTP_PORT=8000 +export WP_MCP_DOMAIN=core +export WP_PROPERTY_ID=1 + +python -m website_profiling.mcp.http +``` + +Set **MCP bearer token** and **Allowed hostnames** on the Secrets page (or via `WP_MCP_TOKEN` / `WP_MCP_ALLOWED_HOSTS`). Environment variables override saved values when set. + +The MCP endpoint is `http://:8000/mcp` by default (`WP_MCP_HTTP_PATH=/mcp`). + +### Environment variables + +| Variable | Default | Purpose | +|----------|---------|---------| +| `WP_MCP_HTTP_HOST` | `127.0.0.1` | Bind address (`0.0.0.0` for Docker) | +| `WP_MCP_HTTP_PORT` | `8000` | Listen port | +| `WP_MCP_HTTP_PATH` | `/mcp` | Mount path | +| `WP_MCP_TOKEN` | unset | Bearer token (**required** when not binding localhost). Save on **Secrets → Remote MCP** or set here (env wins). | +| `WP_MCP_ALLOWED_HOSTS` | unset | Comma-separated `Host` allowlist (**required** for non-localhost bind). Save on **Secrets → Remote MCP** or set here. | +| `WP_MCP_ALLOWED_ORIGINS` | unset | Comma-separated `Origin` allowlist for browser clients | +| `WP_MCP_JSON_RESPONSE` | `false` | JSON responses instead of SSE streams | +| `WP_MCP_DOMAIN` | `core` | Tool bundle (same as stdio) | +| `WP_PROPERTY_ID` | unset | Default property (same as stdio) | + +**Security:** `WP_MCP_TOKEN` is required when `WP_MCP_HTTP_HOST` is not localhost. Tools are read-only but expose audit, GSC, and GA4 data — treat the token like a database credential. + +**DNS rebinding protection:** Whenever a token **and** allowed hosts are configured — via the UI **or** environment variables — the HTTP service enforces the bearer token plus the Host/Origin allowlist in its own middleware, and the MCP SDK's built-in DNS-rebinding check is turned off (the middleware supersedes it). The SDK check only applies as a fallback on a non-localhost bind that has no remote access configured (a state the startup validation otherwise refuses to boot in). Either way, set `WP_MCP_ALLOWED_HOSTS` to the public hostname clients use (e.g. `audit.example.com`). + +### Cursor / Claude Desktop (remote) + +```json +{ + "mcpServers": { + "site-audit-remote": { + "url": "https://audit.example.com/mcp", + "headers": { + "Authorization": "Bearer your-long-random-token" + } + } + } +} +``` + +### Docker (production) + +The `mcp` service in `docker-compose.prod.yml` includes an `mcp` service. Set `WP_MCP_TOKEN` and `WP_MCP_ALLOWED_HOSTS` in the environment **or** configure them on **Secrets → Remote MCP** after deploy. + +Terminate TLS at your reverse proxy and route `/mcp` to the MCP container. Recommended proxy settings: `proxy_buffering off`, long `proxy_read_timeout`. + +### Troubleshooting + +| Symptom | Likely cause | +|---------|----------------| +| Server refuses to start | Missing token or allowed hosts on non-localhost bind (Secrets page or env) | +| 401 Unauthorized | Wrong or missing `Authorization: Bearer` header | +| 404 Not Found | Wrong path — endpoint is `/mcp` by default | +| Blocked / bad request behind proxy | Host not listed in allowed hostnames (Secrets → Remote MCP) | +| Connection refused | Firewall, wrong port, or MCP service not running | + +--- + ## MCP resources | URI | Content | diff --git a/docs/OPS.md b/docs/OPS.md index 765ffd0..8447ffa 100644 --- a/docs/OPS.md +++ b/docs/OPS.md @@ -117,6 +117,20 @@ AUTH_DEFAULT_ROLE=client-readonly Production also requires `AUTH_SECRET` and optionally `AUTH_USER` / `AUTH_PASSWORD` (see `docker-compose.prod.yml`). +### Remote MCP (Streamable HTTP) + +The `mcp` service in `docker-compose.prod.yml` exposes read-only audit tools over HTTP at `/mcp`. Configure on **Secrets → Remote MCP** (`/secrets`) or via environment variables (env overrides saved values): + +| Variable | Purpose | +|----------|---------| +| `WP_MCP_TOKEN` | Bearer token for MCP clients (`Authorization: Bearer …`) | +| `WP_MCP_ALLOWED_HOSTS` | Public hostname allowlist (e.g. `audit.example.com`) | +| `WP_MCP_ALLOWED_ORIGINS` | Optional `Origin` allowlist | +| `WP_MCP_DOMAIN` | Tool bundle (`core` recommended for remote) | +| `MCP_PORT` | Host port mapped to container `8000` (default `8000`) | + +Terminate TLS at your reverse proxy; do not expose plain HTTP publicly. Configure token and allowed hostnames on **Secrets → Remote MCP** (`/secrets`, Remote MCP section). + ### Read-only client dashboards Set `AUTH_DEFAULT_ROLE=client-readonly` so session logins cannot run audits or save settings. The API returns 403 on mutations; the UI hides **Run audit** and disables save controls. Use `viewer` instead if chat access should also be blocked. diff --git a/requirements.txt b/requirements.txt index ad01943..afb3055 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,8 +43,10 @@ groq==1.4.0 pyspellchecker==0.9.0 html5lib==1.1 -# MCP server for Cursor / Claude Desktop -mcp~=1.0.0 +# MCP server for Cursor / Claude Desktop (stdio + remote Streamable HTTP) +mcp>=1.19,<2 +uvicorn>=0.30 +starlette>=0.38 # Dev / test pytest==9.0.3 diff --git a/src/website_profiling/mcp/http.py b/src/website_profiling/mcp/http.py new file mode 100644 index 0000000..ee88f77 --- /dev/null +++ b/src/website_profiling/mcp/http.py @@ -0,0 +1,5 @@ +"""Entry point: python -m website_profiling.mcp.http""" +from .http_server import main + +if __name__ == "__main__": + main() diff --git a/src/website_profiling/mcp/http_server.py b/src/website_profiling/mcp/http_server.py new file mode 100644 index 0000000..88f4981 --- /dev/null +++ b/src/website_profiling/mcp/http_server.py @@ -0,0 +1,253 @@ +"""Streamable HTTP MCP server for remote Site Audit tool access.""" +from __future__ import annotations + +import contextlib +import hmac +import json +import os +from collections.abc import AsyncIterator, Callable +from typing import Any +from urllib.parse import urlparse + +from .settings import McpHttpSettings, load_mcp_http_settings + +_LOCALHOST_HOSTS = frozenset({"127.0.0.1", "localhost", "::1"}) + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default).strip() + + +def _bool_env(name: str, *, default: bool = False) -> bool: + raw = _env(name) + if not raw: + return default + return raw.lower() in {"1", "true", "yes", "on"} + + +def _http_host() -> str: + return _env("WP_MCP_HTTP_HOST", "127.0.0.1") or "127.0.0.1" + + +def _http_port() -> int: + raw = _env("WP_MCP_HTTP_PORT", "8000") or "8000" + try: + port = int(raw) + except ValueError as e: + raise SystemExit(f"Invalid WP_MCP_HTTP_PORT: {raw!r}") from e + if port < 1 or port > 65535: + raise SystemExit(f"Invalid WP_MCP_HTTP_PORT: {port}") + return port + + +def _http_path() -> str: + path = _env("WP_MCP_HTTP_PATH", "/mcp") or "/mcp" + if not path.startswith("/"): + path = f"/{path}" + return path.rstrip("/") or "/mcp" + + +def _is_public_bind(host: str) -> bool: + return host not in _LOCALHOST_HOSTS + + +def _host_from_header(host_header: str) -> str: + host = host_header.strip().lower() + if not host: + return "" + if host.startswith("["): + end = host.index("]") if "]" in host else -1 + if end > 1: + return host[1:end] + return host.split(":")[0] + + +def _origin_host(origin: str) -> str: + parsed = urlparse(origin.strip()) + return (parsed.hostname or "").lower() + + +def _host_allowed(host: str, allowed_hosts: list[str]) -> bool: + if not allowed_hosts: + return True + host_lower = host.lower() + for entry in allowed_hosts: + pattern = entry.strip().lower() + if not pattern: + continue + if pattern.startswith("*."): + suffix = pattern[1:] + if host_lower == pattern[2:] or host_lower.endswith(suffix): + return True + continue + if host_lower == pattern or host_lower.endswith(f":{pattern}"): + return True + if pattern.endswith(":*") and host_lower == pattern[:-2]: + return True + return False + + +def _origin_allowed(origin: str, allowed_origins: list[str]) -> bool: + if not allowed_origins: + return True + if not origin.strip(): + return True + origin_host = _origin_host(origin) + for entry in allowed_origins: + pattern = entry.strip().lower() + if not pattern: + continue + if pattern == origin.strip().lower(): + return True + if pattern.startswith("http://") or pattern.startswith("https://"): + continue + if origin_host == pattern or origin_host.endswith(f".{pattern.removeprefix('*.')}"): + return True + return False + + +def _validate_startup_config() -> None: + host = _http_host() + if not _is_public_bind(host): + return + settings = load_mcp_http_settings() + if not settings.token: + raise SystemExit( + "Remote MCP token is required when WP_MCP_HTTP_HOST is not localhost " + f"(current host: {host!r}). Set WP_MCP_TOKEN or save mcp_token on the Secrets page.", + ) + if not settings.allowed_hosts: + raise SystemExit( + "Allowed MCP hosts are required when binding to a non-localhost address " + f"(current host: {host!r}). Set WP_MCP_ALLOWED_HOSTS or save mcp_allowed_hosts on the Secrets page.", + ) + + +def _transport_security_settings(host: str): + from mcp.server.transport_security import TransportSecuritySettings + + public = _is_public_bind(host) + settings = load_mcp_http_settings() + if settings.remote_access_configured: + return TransportSecuritySettings( + enable_dns_rebinding_protection=False, + allowed_hosts=[], + allowed_origins=[], + ) + return TransportSecuritySettings( + enable_dns_rebinding_protection=public, + allowed_hosts=settings.allowed_hosts or [], + allowed_origins=settings.allowed_origins or [], + ) + + +async def _reject_request(send: Any, status: int, message: str) -> None: + body = json.dumps({"error": message}).encode() + await send( + { + "type": "http.response.start", + "status": status, + "headers": [ + (b"content-type", b"application/json"), + (b"content-length", str(len(body)).encode("ascii")), + ], + }, + ) + await send({"type": "http.response.body", "body": body}) + + +class RemoteAccessMiddleware: + """Enforce bearer token and allowed Host/Origin using UI-managed settings.""" + + def __init__(self, app: Callable[..., Any]) -> None: + self.app = app + + async def __call__(self, scope: dict[str, Any], receive: Any, send: Any) -> None: + if scope.get("type") != "http": + await self.app(scope, receive, send) + return + + settings = load_mcp_http_settings() + headers = { + name.decode("latin-1").lower(): value.decode("latin-1") + for name, value in scope.get("headers", []) + } + host = _host_from_header(headers.get("host", "")) + + if settings.allowed_hosts and not _host_allowed(host, settings.allowed_hosts): + await _reject_request(send, 403, "Host not allowed for remote MCP") + return + + origin = headers.get("origin", "") + if settings.allowed_origins and not _origin_allowed(origin, settings.allowed_origins): + await _reject_request(send, 403, "Origin not allowed for remote MCP") + return + + if settings.token: + auth_value = headers.get("authorization", "") + expected = f"Bearer {settings.token}" + if not hmac.compare_digest(auth_value.encode("utf-8"), expected.encode("utf-8")): + await _reject_request(send, 401, "Unauthorized") + return + + await self.app(scope, receive, send) + + +def _with_remote_access(app: Callable[..., Any]) -> Callable[..., Any]: + return RemoteAccessMiddleware(app) + + +def build_app(): + try: + from mcp.server.streamable_http_manager import StreamableHTTPSessionManager + from starlette.applications import Starlette + from starlette.routing import Mount + from starlette.types import Receive, Scope, Send + except ImportError as e: + raise SystemExit( + "MCP HTTP dependencies not installed. Run: pip install -r requirements.txt", + ) from e + + from .server import create_server + + host = _http_host() + path = _http_path() + settings = load_mcp_http_settings() + server = create_server(domain=settings.domain) + security = _transport_security_settings(host) + + manager = StreamableHTTPSessionManager( + app=server, + event_store=None, + json_response=_bool_env("WP_MCP_JSON_RESPONSE", default=False), + stateless=True, + security_settings=security, + ) + + async def handle(scope: Scope, receive: Receive, send: Send) -> None: + await manager.handle_request(scope, receive, send) + + @contextlib.asynccontextmanager + async def lifespan(_: Starlette) -> AsyncIterator[None]: + async with manager.run(): + yield + + starlette_app = Starlette(routes=[Mount(path, app=handle)], lifespan=lifespan) + return _with_remote_access(starlette_app) + + +def main() -> None: + _validate_startup_config() + try: + import uvicorn + except ImportError as e: + raise SystemExit( + "uvicorn not installed. Run: pip install -r requirements.txt", + ) from e + + uvicorn.run( + build_app(), + host=_http_host(), + port=_http_port(), + log_level=_env("WP_MCP_LOG_LEVEL", "info").lower() or "info", + ) diff --git a/src/website_profiling/mcp/server.py b/src/website_profiling/mcp/server.py index 4ca3735..86090ba 100644 --- a/src/website_profiling/mcp/server.py +++ b/src/website_profiling/mcp/server.py @@ -75,13 +75,9 @@ def _mcp_domain() -> str: return (os.environ.get("WP_MCP_DOMAIN") or "core").strip().lower() -def _exposed_tool_names() -> set[str]: - return mcp_tool_names(_mcp_domain()) - - -def _tools_catalog_json() -> str: - domain = _mcp_domain() - exposed = _exposed_tool_names() +def _tools_catalog_json(domain: str | None = None) -> str: + effective = (domain or _mcp_domain()).strip().lower() or "core" + exposed = mcp_tool_names(effective) by_domain = tools_catalog_by_domain() scoped: dict[str, list[str]] = {} for d, names in by_domain.items(): @@ -89,7 +85,7 @@ def _tools_catalog_json() -> str: if filtered: scoped[d] = filtered return json.dumps({ - "mcp_domain": domain, + "mcp_domain": effective, "tool_count": len(exposed), "handlers": sorted(exposed), "domains": scoped, @@ -97,9 +93,10 @@ def _tools_catalog_json() -> str: }, indent=2) -def _domains_resource_json() -> str: +def _domains_resource_json(domain: str | None = None) -> str: + effective = (domain or _mcp_domain()).strip().lower() or "core" return json.dumps({ - "current_mcp_domain": _mcp_domain(), + "current_mcp_domain": effective, "bundles": { key: sorted(domains) for key, domains in MCP_DOMAIN_BUNDLES.items() @@ -108,7 +105,7 @@ def _domains_resource_json() -> str: }, indent=2) -def _resolve_resource(uri: str) -> str: +def _resolve_resource(uri: str, domain: str | None = None) -> str: if uri == "audit://properties": result = dispatch_tool("list_properties", {}) return json.dumps(result, indent=2, default=str) @@ -117,10 +114,10 @@ def _resolve_resource(uri: str) -> str: return _read_glossary_excerpt() if uri == "audit://tools": - return _tools_catalog_json() + return _tools_catalog_json(domain=domain) if uri == "audit://domains": - return _domains_resource_json() + return _domains_resource_json(domain=domain) m = _URI_PROPERTY.match(uri) if m: @@ -153,19 +150,25 @@ def _resolve_resource(uri: str) -> str: return json.dumps({"error": f"unknown resource: {uri}"}) -def main() -> None: +def _import_mcp_types(): try: from mcp.server import Server - from mcp.server.stdio import stdio_server from mcp.types import Resource, TextContent, Tool except ImportError as e: raise SystemExit( "MCP SDK not installed. Run: pip install -r requirements.txt", ) from e + return Server, Resource, TextContent, Tool + - server = Server(f"site-audit-{_mcp_domain()}") +def create_server(domain: str | None = None): + """Build transport-agnostic MCP server with Site Audit tools and resources.""" + Server, Resource, TextContent, Tool = _import_mcp_types() + + effective_domain = (domain or _mcp_domain()).strip().lower() or "core" + server = Server(f"site-audit-{effective_domain}") default_pid = _default_property_id() - exposed = _exposed_tool_names() + exposed = mcp_tool_names(effective_domain) @server.list_tools() async def list_tools() -> list[Tool]: @@ -186,7 +189,7 @@ async def list_tools() -> list[Tool]: async def call_tool(name: str, arguments: dict[str, Any] | None) -> list[TextContent]: if name not in exposed: result = { - "error": f"tool not exposed in MCP domain {_mcp_domain()}: {name}", + "error": f"tool not exposed in MCP domain {effective_domain}: {name}", "hint": "Connect WP_MCP_DOMAIN=full or the domain server that includes this tool.", } return [TextContent(type="text", text=json.dumps(result, indent=2, default=str))] @@ -223,7 +226,20 @@ async def list_resources() -> list[Resource]: @server.read_resource() async def read_resource(uri: str) -> str: - return _resolve_resource(uri) + return _resolve_resource(uri, domain=effective_domain) + + return server + + +def run_stdio() -> None: + try: + from mcp.server.stdio import stdio_server + except ImportError as e: + raise SystemExit( + "MCP SDK not installed. Run: pip install -r requirements.txt", + ) from e + + server = create_server() async def run() -> None: async with stdio_server() as (read_stream, write_stream): @@ -234,5 +250,9 @@ async def run() -> None: asyncio.run(run()) +def main() -> None: + run_stdio() + + if __name__ == "__main__": main() diff --git a/src/website_profiling/mcp/settings.py b/src/website_profiling/mcp/settings.py new file mode 100644 index 0000000..feceb07 --- /dev/null +++ b/src/website_profiling/mcp/settings.py @@ -0,0 +1,57 @@ +"""Load remote MCP HTTP settings from environment and pipeline_config.""" +from __future__ import annotations + +import os +from dataclasses import dataclass + +from ..db.config_store import read_pipeline_config +from ..db.storage import db_session + + +def _env(name: str, default: str = "") -> str: + return os.environ.get(name, default).strip() + + +def _parse_csv(raw: str) -> list[str]: + if not raw.strip(): + return [] + return [part.strip() for part in raw.split(",") if part.strip()] + + +@dataclass(frozen=True) +class McpHttpSettings: + token: str + allowed_hosts: list[str] + allowed_origins: list[str] + domain: str = "core" + + @property + def remote_access_configured(self) -> bool: + return bool(self.token and self.allowed_hosts) + + +def _load_pipeline_mcp_settings() -> dict[str, str]: + try: + with db_session() as conn: + known, _unknown = read_pipeline_config(conn) + return known + except Exception: + return {} + + +def load_mcp_http_settings() -> McpHttpSettings: + """Merge MCP HTTP settings: environment overrides database values.""" + pipeline = _load_pipeline_mcp_settings() + + token = _env("WP_MCP_TOKEN") or str(pipeline.get("mcp_token", "")).strip() + + hosts_raw = _env("WP_MCP_ALLOWED_HOSTS") or str(pipeline.get("mcp_allowed_hosts", "")).strip() + origins_raw = _env("WP_MCP_ALLOWED_ORIGINS") or str(pipeline.get("mcp_allowed_origins", "")).strip() + domain = _env("WP_MCP_DOMAIN") or str(pipeline.get("mcp_domain", "")).strip().lower() or "core" + + return McpHttpSettings( + token=token, + allowed_hosts=_parse_csv(hosts_raw), + allowed_origins=_parse_csv(origins_raw), + domain=domain, + ) diff --git a/tests/test_mcp_http_server.py b/tests/test_mcp_http_server.py new file mode 100644 index 0000000..56ed31e --- /dev/null +++ b/tests/test_mcp_http_server.py @@ -0,0 +1,752 @@ +"""MCP HTTP server auth, startup validation, and app wiring.""" +from __future__ import annotations + +import asyncio +import json +import os +import runpy +import sys +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from starlette.testclient import TestClient + +from website_profiling.mcp import http_server +from website_profiling.mcp import server as mcp_server +from website_profiling.mcp.settings import McpHttpSettings, load_mcp_http_settings + + +def test_validate_startup_public_bind_requires_token() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_HOST": "0.0.0.0"}, clear=False): + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="", allowed_hosts=["audit.example.com"], allowed_origins=[]), + ): + with pytest.raises(SystemExit, match="Remote MCP token"): + http_server._validate_startup_config() + + +def test_validate_startup_public_bind_requires_allowed_hosts() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_HOST": "0.0.0.0"}, clear=False): + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="secret", allowed_hosts=[], allowed_origins=[]), + ): + with pytest.raises(SystemExit, match="Allowed MCP hosts"): + http_server._validate_startup_config() + + +def test_validate_startup_localhost_without_token_ok() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_HOST": "127.0.0.1"}, clear=False): + http_server._validate_startup_config() + + +def test_transport_security_disables_sdk_checks_when_ui_configured() -> None: + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="secret", allowed_hosts=["audit.example.com"], allowed_origins=[]), + ): + settings = http_server._transport_security_settings("0.0.0.0") + assert settings.enable_dns_rebinding_protection is False + + +def test_transport_security_localhost_without_ui_config() -> None: + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="", allowed_hosts=[], allowed_origins=[]), + ): + settings = http_server._transport_security_settings("127.0.0.1") + assert settings.enable_dns_rebinding_protection is False + + +def test_host_and_origin_allowed_helpers() -> None: + assert http_server._host_allowed("audit.example.com", ["audit.example.com"]) + assert http_server._host_allowed("sub.example.com", ["*.example.com"]) + assert not http_server._host_allowed("evil.example.net", ["audit.example.com"]) + assert http_server._origin_allowed("https://audit.example.com", ["https://audit.example.com"]) + assert http_server._origin_allowed("", ["https://audit.example.com"]) + + +def test_remote_access_middleware_rejects_missing_token() -> None: + app = AsyncMock() + + async def run() -> None: + middleware = http_server.RemoteAccessMiddleware(app) + sent: list[dict] = [] + + async def capture_send(message: dict) -> None: + sent.append(message) + + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="secret-token", allowed_hosts=[], allowed_origins=[]), + ): + await middleware( + {"type": "http", "headers": []}, + AsyncMock(), + capture_send, + ) + + assert app.await_count == 0 + assert sent[0]["status"] == 401 + + asyncio.run(run()) + + +def test_remote_access_middleware_rejects_wrong_token_with_json_body() -> None: + app = AsyncMock() + + async def run() -> None: + middleware = http_server.RemoteAccessMiddleware(app) + sent: list[dict] = [] + + async def capture_send(message: dict) -> None: + sent.append(message) + + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="secret-token", allowed_hosts=[], allowed_origins=[]), + ): + await middleware( + {"type": "http", "headers": [(b"authorization", b"Bearer wrong-token")]}, + AsyncMock(), + capture_send, + ) + + assert app.await_count == 0 + assert sent[0]["status"] == 401 + # Regression: repr() produced single-quoted, non-parseable JSON. + assert json.loads(sent[1]["body"]) == {"error": "Unauthorized"} + + asyncio.run(run()) + + +def test_remote_access_middleware_non_ascii_auth_header_does_not_crash() -> None: + app = AsyncMock() + + async def run() -> None: + middleware = http_server.RemoteAccessMiddleware(app) + sent: list[dict] = [] + + async def capture_send(message: dict) -> None: + sent.append(message) + + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="secret-token", allowed_hosts=[], allowed_origins=[]), + ): + # A non-ASCII Authorization header must not raise (hmac on str would). + await middleware( + {"type": "http", "headers": [(b"authorization", "Bearer \xe9".encode("latin-1"))]}, + AsyncMock(), + capture_send, + ) + + assert app.await_count == 0 + assert sent[0]["status"] == 401 + + asyncio.run(run()) + + +def test_remote_access_middleware_accepts_valid_request() -> None: + app = AsyncMock() + middleware = http_server.RemoteAccessMiddleware(app) + settings = McpHttpSettings( + token="secret-token", + allowed_hosts=["audit.example.com"], + allowed_origins=[], + ) + + async def run() -> None: + with patch("website_profiling.mcp.http_server.load_mcp_http_settings", return_value=settings): + await middleware( + { + "type": "http", + "headers": [ + (b"authorization", b"Bearer secret-token"), + (b"host", b"audit.example.com"), + ], + }, + AsyncMock(), + AsyncMock(), + ) + + asyncio.run(run()) + assert app.await_count == 1 + + +def test_remote_access_middleware_rejects_bad_host() -> None: + app = AsyncMock() + + async def run() -> None: + middleware = http_server.RemoteAccessMiddleware(app) + sent: list[dict] = [] + + async def capture_send(message: dict) -> None: + sent.append(message) + + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings( + token="secret-token", + allowed_hosts=["audit.example.com"], + allowed_origins=[], + ), + ): + await middleware( + { + "type": "http", + "headers": [ + (b"authorization", b"Bearer secret-token"), + (b"host", b"evil.example.net"), + ], + }, + AsyncMock(), + capture_send, + ) + + assert app.await_count == 0 + assert sent[0]["status"] == 403 + + asyncio.run(run()) + + +def test_with_remote_access_wraps_app() -> None: + inner = MagicMock() + wrapped = http_server._with_remote_access(inner) + assert isinstance(wrapped, http_server.RemoteAccessMiddleware) + + +def test_load_mcp_http_settings_env_overrides_db() -> None: + with patch.dict( + os.environ, + { + "WP_MCP_TOKEN": "env-token", + "WP_MCP_ALLOWED_HOSTS": "host.example", + "WP_MCP_ALLOWED_ORIGINS": "https://host.example", + }, + clear=False, + ): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={ + "mcp_token": "db-token", + "mcp_allowed_hosts": "db.example", + "mcp_allowed_origins": "https://db.example", + }, + ): + settings = load_mcp_http_settings() + assert settings.token == "env-token" + assert settings.allowed_hosts == ["host.example"] + assert settings.allowed_origins == ["https://host.example"] + + +def test_load_mcp_http_settings_from_db_when_env_empty() -> None: + with patch.dict(os.environ, {}, clear=True): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={ + "mcp_token": "db-token", + "mcp_allowed_hosts": "one.example,two.example", + }, + ): + settings = load_mcp_http_settings() + assert settings.token == "db-token" + assert settings.allowed_hosts == ["one.example", "two.example"] + + +def test_build_app_smoke(monkeypatch) -> None: + captured: dict[str, object] = {} + + class FakeServer: + def __init__(self, name: str) -> None: + captured["name"] = name + + def list_tools(self): + def decorator(fn): + captured["list_tools"] = fn + return fn + return decorator + + def call_tool(self): + def decorator(fn): + captured["call_tool"] = fn + return fn + return decorator + + def list_resources(self): + def decorator(fn): + captured["list_resources"] = fn + return fn + return decorator + + def read_resource(self): + def decorator(fn): + captured["read_resource"] = fn + return fn + return decorator + + def create_initialization_options(self): + return {} + + async def run(self, *_args, **_kwargs) -> None: + return None + + class FakeManager: + def __init__(self, **_kwargs) -> None: + captured["manager_kwargs"] = _kwargs + + async def handle_request(self, *_args, **_kwargs) -> None: + captured["handled"] = True + + def run(self): + from contextlib import asynccontextmanager + + @asynccontextmanager + async def _cm(): + yield + + return _cm() + + fake_server_mod = MagicMock() + fake_server_mod.Server = FakeServer + fake_types_mod = MagicMock() + fake_types_mod.Tool = lambda **kwargs: kwargs + fake_types_mod.TextContent = lambda **kwargs: kwargs + fake_types_mod.Resource = lambda **kwargs: kwargs + fake_manager_mod = MagicMock() + fake_manager_mod.StreamableHTTPSessionManager = FakeManager + fake_security_mod = MagicMock() + fake_security_mod.TransportSecuritySettings = lambda **kwargs: kwargs + + monkeypatch.setitem(sys.modules, "mcp", MagicMock()) + monkeypatch.setitem(sys.modules, "mcp.server", fake_server_mod) + monkeypatch.setitem(sys.modules, "mcp.types", fake_types_mod) + monkeypatch.setitem(sys.modules, "mcp.server.streamable_http_manager", fake_manager_mod) + monkeypatch.setitem(sys.modules, "mcp.server.transport_security", fake_security_mod) + + with patch.dict( + os.environ, + { + "WP_MCP_HTTP_HOST": "127.0.0.1", + "WP_MCP_HTTP_PATH": "/mcp", + "WP_MCP_DOMAIN": "core", + }, + clear=False, + ): + app = http_server.build_app() + + assert captured["name"] == "site-audit-core" + tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type] + assert isinstance(tools, list) + manager_kwargs = captured["manager_kwargs"] # type: ignore[assignment] + assert manager_kwargs["stateless"] is True + assert manager_kwargs["json_response"] is False + + +def test_create_server_registers_handlers(monkeypatch) -> None: + captured: dict[str, object] = {} + + class FakeServer: + def __init__(self, name: str) -> None: + captured["name"] = name + + def list_tools(self): + def decorator(fn): + captured["list_tools"] = fn + return fn + return decorator + + def call_tool(self): + def decorator(fn): + captured["call_tool"] = fn + return fn + return decorator + + def list_resources(self): + def decorator(fn): + captured["list_resources"] = fn + return fn + return decorator + + def read_resource(self): + def decorator(fn): + captured["read_resource"] = fn + return fn + return decorator + + def create_initialization_options(self): + return {} + + fake_server_mod = MagicMock() + fake_server_mod.Server = FakeServer + fake_types_mod = MagicMock() + fake_types_mod.Tool = lambda **kwargs: kwargs + fake_types_mod.TextContent = lambda **kwargs: kwargs + fake_types_mod.Resource = lambda **kwargs: kwargs + + monkeypatch.setitem(sys.modules, "mcp", MagicMock()) + monkeypatch.setitem(sys.modules, "mcp.server", fake_server_mod) + monkeypatch.setitem(sys.modules, "mcp.types", fake_types_mod) + + with patch.dict(os.environ, {"WP_PROPERTY_ID": "7", "WP_MCP_DOMAIN": "full"}, clear=False): + mcp_server.create_server() + + assert captured["name"] == "site-audit-full" + tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type] + assert len(tools) >= 340 + + +def test_bool_env_helper() -> None: + with patch.dict(os.environ, {"WP_MCP_JSON_RESPONSE": "true"}, clear=False): + assert http_server._bool_env("WP_MCP_JSON_RESPONSE") is True + assert http_server._bool_env("WP_MCP_JSON_RESPONSE", default=False) is False + + +def test_http_port_invalid() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_PORT": "bad"}, clear=False): + with pytest.raises(SystemExit, match="Invalid WP_MCP_HTTP_PORT"): + http_server._http_port() + + +def test_http_path_normalizes() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_PATH": "mcp"}, clear=False): + assert http_server._http_path() == "/mcp" + + +def test_remote_access_passthrough_non_http() -> None: + app = AsyncMock() + middleware = http_server.RemoteAccessMiddleware(app) + + async def run() -> None: + await middleware({"type": "lifespan"}, AsyncMock(), AsyncMock()) + + asyncio.run(run()) + app.assert_awaited_once() + + +def test_http_main_runs_uvicorn(monkeypatch) -> None: + mock_uvicorn = MagicMock() + monkeypatch.setitem(sys.modules, "uvicorn", mock_uvicorn) + with patch.object(http_server, "build_app", return_value=MagicMock()): + with patch.dict( + os.environ, + {"WP_MCP_HTTP_HOST": "127.0.0.1", "WP_MCP_HTTP_PORT": "9001"}, + clear=False, + ): + http_server.main() + mock_uvicorn.run.assert_called_once() + assert mock_uvicorn.run.call_args.kwargs["port"] == 9001 + + +def test_http_main_missing_uvicorn() -> None: + with patch.dict(sys.modules, {"uvicorn": None}): + with patch.object(http_server, "_validate_startup_config"): + with pytest.raises(SystemExit, match="uvicorn"): + http_server.main() + + +def test_http_module_main() -> None: + with patch("website_profiling.mcp.http_server.main") as mock_main: + runpy.run_module("website_profiling.mcp.http", run_name="__main__") + mock_main.assert_called_once() + + +def test_http_port_out_of_range() -> None: + with patch.dict(os.environ, {"WP_MCP_HTTP_PORT": "70000"}, clear=False): + with pytest.raises(SystemExit, match="Invalid WP_MCP_HTTP_PORT"): + http_server._http_port() + + +def test_build_app_handle_and_lifespan() -> None: + empty_settings = McpHttpSettings(token="", allowed_hosts=[], allowed_origins=[], domain="core") + with patch.dict( + os.environ, + { + "WP_MCP_HTTP_HOST": "127.0.0.1", + "WP_MCP_HTTP_PATH": "/mcp", + "WP_MCP_DOMAIN": "core", + }, + clear=False, + ): + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=empty_settings, + ): + app = http_server.build_app() + + with TestClient(app) as client: + response = client.post( + "/mcp", + json={ + "jsonrpc": "2.0", + "id": 1, + "method": "initialize", + "params": { + "protocolVersion": "2024-11-05", + "capabilities": {}, + "clientInfo": {"name": "test", "version": "1.0"}, + }, + }, + headers={"Accept": "application/json, text/event-stream"}, + ) + assert response.status_code in {200, 202, 406} + + +def test_create_server_missing_sdk() -> None: + with patch.dict(sys.modules, {"mcp.types": None}): + with pytest.raises(SystemExit, match="MCP SDK"): + mcp_server.create_server() + + +def test_run_stdio_missing_sdk() -> None: + with patch.dict(sys.modules, {"mcp.server.stdio": None}): + with pytest.raises(SystemExit, match="MCP SDK"): + mcp_server.run_stdio() + + +def test_build_app_import_error(monkeypatch) -> None: + monkeypatch.setitem(sys.modules, "mcp.server.streamable_http_manager", None) + with patch.dict(os.environ, {"WP_MCP_HTTP_HOST": "127.0.0.1"}, clear=False): + with pytest.raises(SystemExit, match="MCP HTTP dependencies"): + http_server.build_app() + + +def test_load_pipeline_mcp_settings_db_error() -> None: + with patch("website_profiling.mcp.settings.db_session", side_effect=RuntimeError("no db")): + assert load_mcp_http_settings().token == "" + + +def test_load_pipeline_mcp_settings_success() -> None: + with patch( + "website_profiling.mcp.settings.read_pipeline_config", + return_value=({"mcp_token": "db"}, []), + ): + with patch("website_profiling.mcp.settings.db_session") as mock_db: + mock_db.return_value.__enter__.return_value = object() + from website_profiling.mcp.settings import _load_pipeline_mcp_settings + + assert _load_pipeline_mcp_settings()["mcp_token"] == "db" + + +def test_host_from_header_ipv6() -> None: + assert http_server._host_from_header("[::1]:8000") == "::1" + + +def test_host_allowed_port_suffix_pattern() -> None: + assert http_server._host_allowed("audit.example.com", ["audit.example.com:*"]) + + +def test_origin_allowed_url_and_hostname_patterns() -> None: + assert http_server._origin_allowed( + "https://audit.example.com", + ["https://audit.example.com"], + ) + assert http_server._origin_allowed( + "https://app.example.com", + ["example.com"], + ) + assert not http_server._origin_allowed( + "https://evil.example.net", + ["https://audit.example.com"], + ) + + +def test_remote_access_middleware_rejects_bad_origin() -> None: + app = AsyncMock() + middleware = http_server.RemoteAccessMiddleware(app) + + async def run() -> None: + sent: list[dict] = [] + + async def capture_send(message: dict) -> None: + sent.append(message) + + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings( + token="secret-token", + allowed_hosts=[], + allowed_origins=["https://audit.example.com"], + ), + ): + await middleware( + { + "type": "http", + "headers": [ + (b"authorization", b"Bearer secret-token"), + (b"origin", b"https://evil.example.net"), + ], + }, + AsyncMock(), + capture_send, + ) + + assert sent[0]["status"] == 403 + + asyncio.run(run()) + + +def test_transport_security_public_env_only() -> None: + with patch( + "website_profiling.mcp.http_server.load_mcp_http_settings", + return_value=McpHttpSettings(token="", allowed_hosts=["audit.example.com"], allowed_origins=[]), + ): + settings = http_server._transport_security_settings("0.0.0.0") + assert settings.enable_dns_rebinding_protection is True + +def test_host_allowed_wildcard_nomatch_then_exact() -> None: + assert http_server._host_allowed("allowed.example", ["*.other.example", "allowed.example"]) + + +def test_origin_allowed_http_nomatch_then_hostname() -> None: + assert http_server._origin_allowed( + "https://app.example.com", + ["https://other.example.com", "example.com"], + ) + + +def test_host_allowed_empty_list_and_blank_entries() -> None: + assert http_server._host_allowed("anything.example", []) + assert not http_server._host_allowed("anything.example", ["", "other.example"]) + + +def test_origin_allowed_multiple_https_patterns_miss() -> None: + assert not http_server._origin_allowed( + "https://app.example.com", + ["https://other.example.com", "https://third.example.com"], + ) + + +def test_origin_allowed_empty_and_blank_entries() -> None: + assert http_server._origin_allowed("https://x.example", []) + assert not http_server._origin_allowed("https://x.example", ["https://other.example"]) + assert http_server._origin_allowed("", ["https://other.example"]) + assert not http_server._origin_allowed("https://x.example", [""]) + + +def test_mcp_http_settings_domain_defaults_to_core() -> None: + settings = McpHttpSettings(token="t", allowed_hosts=[], allowed_origins=[]) + assert settings.domain == "core" + + +def test_load_mcp_http_settings_domain_env_wins() -> None: + with patch.dict(os.environ, {"WP_MCP_DOMAIN": "google"}, clear=False): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={"mcp_domain": "full"}, + ): + settings = load_mcp_http_settings() + assert settings.domain == "google" + + +def test_load_mcp_http_settings_domain_from_db_when_env_unset() -> None: + with patch.dict(os.environ, {}, clear=True): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={"mcp_domain": "links"}, + ): + settings = load_mcp_http_settings() + assert settings.domain == "links" + + +def test_load_mcp_http_settings_domain_defaults_core_when_absent() -> None: + with patch.dict(os.environ, {}, clear=True): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={}, + ): + settings = load_mcp_http_settings() + assert settings.domain == "core" + + +def test_create_server_domain_param_overrides_env(monkeypatch) -> None: + captured: dict[str, object] = {} + + class FakeServer: + def __init__(self, name: str) -> None: + captured["name"] = name + + def list_tools(self): + return lambda fn: fn + + def call_tool(self): + return lambda fn: fn + + def list_resources(self): + return lambda fn: fn + + def read_resource(self): + return lambda fn: fn + + fake_server_mod = MagicMock() + fake_server_mod.Server = FakeServer + monkeypatch.setitem(sys.modules, "mcp", MagicMock()) + monkeypatch.setitem(sys.modules, "mcp.server", fake_server_mod) + monkeypatch.setitem(sys.modules, "mcp.types", MagicMock()) + + # Env says "core" but the explicit domain= arg should win. + with patch.dict(os.environ, {"WP_MCP_DOMAIN": "core"}, clear=False): + mcp_server.create_server(domain="google") + + assert captured["name"] == "site-audit-google" + + +def test_build_app_passes_db_domain_to_create_server(monkeypatch) -> None: + captured: dict[str, object] = {} + + class FakeServer: + def __init__(self, name: str) -> None: + captured["name"] = name + + def list_tools(self): + return lambda fn: fn + + def call_tool(self): + return lambda fn: fn + + def list_resources(self): + return lambda fn: fn + + def read_resource(self): + return lambda fn: fn + + def create_initialization_options(self): + return {} + + async def run(self, *_args, **_kwargs) -> None: + return None + + class FakeManager: + def __init__(self, **_kwargs) -> None: + pass + + async def handle_request(self, *_args, **_kwargs) -> None: + pass + + def run(self): + from contextlib import asynccontextmanager + + @asynccontextmanager + async def _cm(): + yield + + return _cm() + + fake_server_mod = MagicMock() + fake_server_mod.Server = FakeServer + monkeypatch.setitem(sys.modules, "mcp", MagicMock()) + monkeypatch.setitem(sys.modules, "mcp.server", fake_server_mod) + monkeypatch.setitem(sys.modules, "mcp.types", MagicMock()) + monkeypatch.setitem(sys.modules, "mcp.server.streamable_http_manager", MagicMock(StreamableHTTPSessionManager=FakeManager)) + monkeypatch.setitem(sys.modules, "mcp.server.transport_security", MagicMock(TransportSecuritySettings=lambda **kw: kw)) + + # No WP_MCP_DOMAIN env var; DB returns "crawl" → build_app should use "crawl". + with patch.dict(os.environ, {"WP_MCP_HTTP_HOST": "127.0.0.1"}, clear=True): + with patch( + "website_profiling.mcp.settings._load_pipeline_mcp_settings", + return_value={"mcp_domain": "crawl"}, + ): + http_server.build_app() + + assert captured["name"] == "site-audit-crawl" diff --git a/web/app/mcp/page.tsx b/web/app/mcp/page.tsx new file mode 100644 index 0000000..a4b597a --- /dev/null +++ b/web/app/mcp/page.tsx @@ -0,0 +1,7 @@ +import McpSettingsPage from '@/views/McpSettings'; + +export const dynamic = 'force-dynamic'; + +export default function McpRoutePage() { + return ; +} diff --git a/web/src/components/mcp/McpCopyBlock.tsx b/web/src/components/mcp/McpCopyBlock.tsx new file mode 100644 index 0000000..7d0c73c --- /dev/null +++ b/web/src/components/mcp/McpCopyBlock.tsx @@ -0,0 +1,52 @@ +'use client'; + +import { useCallback, useState } from 'react'; +import { Check, Copy } from 'lucide-react'; +import { strings } from '@/lib/strings'; + +const s = strings.mcpSettings; + +interface McpCopyBlockProps { + label: string; + description?: string; + value: string; + language?: 'json' | 'shell'; +} + +export default function McpCopyBlock({ label, description, value, language = 'json' }: McpCopyBlockProps) { + const [copied, setCopied] = useState(false); + + const handleCopy = useCallback(async () => { + try { + await navigator.clipboard.writeText(value); + setCopied(true); + window.setTimeout(() => setCopied(false), 2000); + } catch { + /* clipboard unavailable */ + } + }, [value]); + + return ( +
+
+
+

{label}

+ {description ? ( +

{description}

+ ) : null} +
+ +
+
+        {language === 'shell' ? `$ ${value}` : value}
+      
+
+ ); +} diff --git a/web/src/components/secrets/SecretsSettingsPanel.tsx b/web/src/components/secrets/SecretsSettingsPanel.tsx index f56fa7e..2c154b1 100644 --- a/web/src/components/secrets/SecretsSettingsPanel.tsx +++ b/web/src/components/secrets/SecretsSettingsPanel.tsx @@ -120,20 +120,28 @@ export function SecretsSaveBar({ saveMsg, readOnly, onSave, + saveHint, + saveButton, + savingLabel, }: { saving: boolean; loading: boolean; saveMsg: string; readOnly: boolean; onSave: () => void; + saveHint?: string; + saveButton?: string; + savingLabel?: string; }) { const saveFailed = saveMsg && !saveMsg.includes('saved'); + const hint = saveHint ?? s.saveHint; + const buttonLabel = readOnly ? strings.app.readonlyBanner : saving ? (savingLabel ?? s.saving) : (saveButton ?? s.saveButton); return (
- {saveMsg || s.saveHint} + {saveMsg || hint}
); diff --git a/web/src/hooks/useMcpSettings.ts b/web/src/hooks/useMcpSettings.ts new file mode 100644 index 0000000..6f2d05c --- /dev/null +++ b/web/src/hooks/useMcpSettings.ts @@ -0,0 +1,57 @@ +'use client'; + +import { useCallback, useMemo } from 'react'; +import { MCP_SETTINGS_FIELDS } from '@/lib/secretsConfigSchema'; +import { generateMcpToken } from '@/lib/mcpClientConfig'; +import { useSecrets } from '@/hooks/useSecrets'; + +const MCP_KEYS = MCP_SETTINGS_FIELDS.map((field) => field.key); + +export function useMcpSettings() { + const secrets = useSecrets(); + + const mcpState = useMemo(() => { + const out: Record = {}; + for (const key of MCP_KEYS) { + if (secrets.state[key] !== undefined) { + out[key] = secrets.state[key]; + } + } + return out; + }, [secrets.state]); + + const setField = useCallback( + (key: string, value: string | boolean) => { + secrets.setField(key, value); + }, + [secrets.setField], + ); + + const generateToken = useCallback(() => { + setField('mcp_token', generateMcpToken()); + }, [setField]); + + const suggestHostsFromUrl = useCallback(() => { + const raw = String(secrets.state.mcp_public_url || '').trim(); + if (!raw) return; + try { + const url = raw.startsWith('http') ? raw : `https://${raw}`; + const host = new URL(url).hostname; + if (host && !String(secrets.state.mcp_allowed_hosts || '').includes(host)) { + const existing = String(secrets.state.mcp_allowed_hosts || '').trim(); + setField('mcp_allowed_hosts', existing ? `${existing},${host}` : host); + } + } catch { + /* invalid url */ + } + }, [secrets.state.mcp_allowed_hosts, secrets.state.mcp_public_url, setField]); + + return { + ...secrets, + mcpState, + setField, + generateToken, + suggestHostsFromUrl, + tokenMasked: secrets.state.mcp_token_masked === true, + }; +} diff --git a/web/src/lib/appNav.ts b/web/src/lib/appNav.ts index fb6b670..5c9e469 100644 --- a/web/src/lib/appNav.ts +++ b/web/src/lib/appNav.ts @@ -16,6 +16,7 @@ import { Home as HomeIcon, Images, Key, + Plug, PenLine, LayoutDashboard, Link as LinkIcon, @@ -32,7 +33,7 @@ import { import { strings } from '@/lib/strings'; import { viewIdToPathSlug, type ViewId } from '@/routes'; -export type NavItemId = ViewId | 'pipeline' | 'secrets' | 'chat' | 'write'; +export type NavItemId = ViewId | 'pipeline' | 'secrets' | 'mcp' | 'chat' | 'write'; export interface AppNavItem { id: NavItemId; @@ -80,6 +81,7 @@ const NAV_DESCRIPTIONS: Partial> = { 'keywords-explorer': 'Keyword research & expansion', pipeline: 'Crawl a site and build a report', secrets: 'API keys and credentials', + mcp: 'Remote MCP client setup', chat: 'Ask questions about this audit', write: 'Draft content from audit data', }; @@ -133,6 +135,15 @@ const SECRETS_NAV: AppNavItem = { description: NAV_DESCRIPTIONS.secrets, }; +const MCP_NAV: AppNavItem = { + id: 'mcp', + label: strings.nav.mcp.label, + section: strings.nav.mcp.section, + icon: Plug, + hrefPath: '/mcp', + description: NAV_DESCRIPTIONS.mcp, +}; + const CHAT_NAV: AppNavItem = { id: 'chat', label: strings.nav.chat.label, @@ -162,6 +173,7 @@ export const APP_NAV_ITEMS: AppNavItem[] = [ })), PIPELINE_NAV, SECRETS_NAV, + MCP_NAV, WRITE_NAV, CHAT_NAV, ]; @@ -172,7 +184,7 @@ export const REPORT_VIEW_IDS: ViewId[] = VIEW_NAV.map(({ id }) => id); export const APP_NAV_SECTIONS = [...new Set(APP_NAV_ITEMS.map((item) => item.section))]; /** Routes with their own app pages — not resolved by `pathSlugToViewId`. */ -export const STANDALONE_NAV_IDS = ['pipeline', 'secrets', 'chat', 'write'] as const satisfies readonly NavItemId[]; +export const STANDALONE_NAV_IDS = ['pipeline', 'secrets', 'mcp', 'chat', 'write'] as const satisfies readonly NavItemId[]; export type StandaloneNavId = (typeof STANDALONE_NAV_IDS)[number]; @@ -211,6 +223,7 @@ export const CHAT_SIDEBAR_NAV_IDS = [ 'links', 'pipeline', 'secrets', + 'mcp', 'write', ] as const satisfies readonly NavItemId[]; @@ -220,6 +233,7 @@ export const WRITE_SIDEBAR_NAV_IDS = [ 'links', 'pipeline', 'secrets', + 'mcp', 'chat', 'write', ] as const satisfies readonly NavItemId[]; @@ -230,6 +244,7 @@ export const PIPELINE_SIDEBAR_NAV_IDS = WRITE_SIDEBAR_NAV_IDS; export function isMiniNavLinkActive(href: string, pathname: string): boolean { if (href === '/secrets') return pathname.startsWith('/secrets'); + if (href === '/mcp') return pathname.startsWith('/mcp'); if (href === '/write') return pathname.startsWith('/write'); if (href === '/chat') return pathname.startsWith('/chat'); if (href === '/pipeline') return pathname.startsWith('/pipeline'); @@ -237,7 +252,7 @@ export function isMiniNavLinkActive(href: string, pathname: string): boolean { } export function navHref(item: AppNavItem, trailingQuery: string): string { - if (item.id === 'home' || item.id === 'pipeline' || item.id === 'secrets' || item.id === 'chat' || item.id === 'write') { + if (item.id === 'home' || item.id === 'pipeline' || item.id === 'secrets' || item.id === 'mcp' || item.id === 'chat' || item.id === 'write') { return item.hrefPath; } const raw = trailingQuery.startsWith('?') ? trailingQuery.slice(1) : trailingQuery; @@ -258,6 +273,9 @@ export function isNavItemActive(item: AppNavItem, pathname: string): boolean { if (item.id === 'secrets') { return pathname === '/secrets' || pathname.startsWith('/secrets/'); } + if (item.id === 'mcp') { + return pathname === '/mcp' || pathname.startsWith('/mcp/'); + } if (item.id === 'chat') { return pathname === '/chat' || pathname.startsWith('/chat/'); } diff --git a/web/src/lib/mcpClientConfig.test.ts b/web/src/lib/mcpClientConfig.test.ts new file mode 100644 index 0000000..67f36bd --- /dev/null +++ b/web/src/lib/mcpClientConfig.test.ts @@ -0,0 +1,42 @@ +import { describe, expect, it } from 'vitest'; +import { + buildRemoteCursorConfig, + generateMcpToken, + hostFromPublicUrl, + mcpEndpointUrl, + normalizeMcpDomain, + normalizePublicUrl, + tokenForSnippet, +} from '@/lib/mcpClientConfig'; + +describe('mcpClientConfig', () => { + it('normalizes public URL and endpoint', () => { + expect(normalizePublicUrl('audit.example.com')).toBe('https://audit.example.com'); + expect(mcpEndpointUrl('https://audit.example.com')).toBe('https://audit.example.com/mcp'); + expect(hostFromPublicUrl('https://audit.example.com/path')).toBe('audit.example.com'); + }); + + it('builds remote cursor json', () => { + const json = buildRemoteCursorConfig({ + publicUrl: 'https://audit.example.com', + token: 'secret', + domain: 'core', + }); + expect(json).toContain('"url": "https://audit.example.com/mcp"'); + expect(json).toContain('Bearer secret'); + }); + + it('generates token prefix', () => { + expect(generateMcpToken().startsWith('wp_mcp_')).toBe(true); + }); + + it('normalizes domain bundle', () => { + expect(normalizeMcpDomain('full')).toBe('full'); + expect(normalizeMcpDomain('bogus')).toBe('core'); + }); + + it('masks token in snippets', () => { + expect(tokenForSnippet('••••abcd', true)).toBe(''); + expect(tokenForSnippet('plain-token', false)).toBe('plain-token'); + }); +}); diff --git a/web/src/lib/mcpClientConfig.ts b/web/src/lib/mcpClientConfig.ts new file mode 100644 index 0000000..57c1b33 --- /dev/null +++ b/web/src/lib/mcpClientConfig.ts @@ -0,0 +1,104 @@ +export type McpDomainBundle = 'core' | 'crawl' | 'google' | 'links' | 'full'; + +export interface McpClientConfigInput { + publicUrl: string; + token: string; + domain: McpDomainBundle; + propertyId?: string; + databaseUrl?: string; +} + +const MCP_DOMAIN_BUNDLES: McpDomainBundle[] = ['core', 'crawl', 'google', 'links', 'full']; + +export function isMcpDomainBundle(value: string): value is McpDomainBundle { + return MCP_DOMAIN_BUNDLES.includes(value as McpDomainBundle); +} + +export function normalizeMcpDomain(value: string | undefined): McpDomainBundle { + const trimmed = String(value || 'core').trim().toLowerCase(); + return isMcpDomainBundle(trimmed) ? trimmed : 'core'; +} + +export function normalizePublicUrl(raw: string): string { + const trimmed = raw.trim().replace(/\/+$/, ''); + if (!trimmed) return ''; + if (/^https?:\/\//i.test(trimmed)) return trimmed; + return `https://${trimmed}`; +} + +export function mcpEndpointUrl(publicUrl: string): string { + const base = normalizePublicUrl(publicUrl); + if (!base) return 'https://your-host.example/mcp'; + return `${base}/mcp`; +} + +export function hostFromPublicUrl(publicUrl: string): string { + const base = normalizePublicUrl(publicUrl); + if (!base) return ''; + try { + return new URL(base).hostname; + } catch { + return ''; + } +} + +export function generateMcpToken(): string { + if (typeof crypto !== 'undefined' && typeof crypto.randomUUID === 'function') { + const a = crypto.randomUUID().replace(/-/g, ''); + const b = crypto.randomUUID().replace(/-/g, '').slice(0, 12); + return `wp_mcp_${a}${b}`; + } + return `wp_mcp_${Math.random().toString(36).slice(2)}${Date.now().toString(36)}`; +} + +export function buildRemoteCursorConfig(input: McpClientConfigInput): string { + const url = mcpEndpointUrl(input.publicUrl); + const token = input.token.trim() || ''; + const payload = { + mcpServers: { + 'site-audit-remote': { + url, + headers: { + Authorization: `Bearer ${token}`, + }, + }, + }, + }; + return `${JSON.stringify(payload, null, 2)}\n`; +} + +export function buildLocalStdioConfig(input: McpClientConfigInput): string { + const domain = normalizeMcpDomain(input.domain); + const propertyId = input.propertyId?.trim() || '1'; + const databaseUrl = input.databaseUrl?.trim() || 'postgres://USER:PASS@localhost:5432/website_profiling'; + const payload = { + mcpServers: { + 'site-audit-local': { + command: 'python', + args: ['-m', 'website_profiling.mcp'], + env: { + DATABASE_URL: databaseUrl, + PYTHONPATH: 'src', + WP_MCP_DOMAIN: domain, + WP_PROPERTY_ID: propertyId, + }, + }, + }, + }; + return `${JSON.stringify(payload, null, 2)}\n`; +} + +export function buildDockerStartCommand(): string { + return 'docker compose -f docker-compose.prod.yml up -d mcp'; +} + +export function buildHttpStartCommand(): string { + return 'python -m website_profiling.mcp.http'; +} + +export function tokenForSnippet(rawToken: string, masked: boolean): string { + if (masked || !rawToken.trim() || rawToken.startsWith('••••')) { + return ''; + } + return rawToken.trim(); +} diff --git a/web/src/lib/pipelineConfigSchema.test.ts b/web/src/lib/pipelineConfigSchema.test.ts index b21dee0..bfee88f 100644 --- a/web/src/lib/pipelineConfigSchema.test.ts +++ b/web/src/lib/pipelineConfigSchema.test.ts @@ -9,6 +9,7 @@ import { isPipelineFieldVisible, validatePipelineRun, } from '@/lib/pipelineConfigSchema'; +import { MCP_MANAGED_KEYS } from '@/lib/secretsConfigSchema'; function parseConfigKeys(raw: string): Set { const keys = new Set(); @@ -32,6 +33,8 @@ describe('pipelineConfigSchema', () => { const optionalOmitted = new Set([ 'enrich_keywords_after_report', ...INTERNAL_PIPELINE_KEYS, + // MCP keys are managed on /mcp (server-runtime settings), not in the CLI example. + ...MCP_MANAGED_KEYS, ]); for (const key of ALL_SCHEMA_KEYS) { if (optionalOmitted.has(key)) continue; diff --git a/web/src/lib/pipelineConfigSchema.ts b/web/src/lib/pipelineConfigSchema.ts index a0e40af..010f22d 100644 --- a/web/src/lib/pipelineConfigSchema.ts +++ b/web/src/lib/pipelineConfigSchema.ts @@ -700,6 +700,47 @@ export const PIPELINE_CONFIG_SECTIONS: PipelineConfigSection[] = [ }, ], }, + { + id: 'mcp', + label: 'Remote MCP', + fields: [ + { + key: 'mcp_token', + label: 'MCP bearer token', + type: 'secret', + defaultValue: '', + help: 'Managed on Secrets → Remote MCP. Used by the Streamable HTTP MCP service.', + }, + { + key: 'mcp_allowed_hosts', + label: 'Allowed MCP hostnames', + type: 'text', + defaultValue: '', + help: 'Comma-separated public hostnames clients may use (e.g. audit.example.com).', + }, + { + key: 'mcp_allowed_origins', + label: 'Allowed MCP origins', + type: 'text', + defaultValue: '', + help: 'Optional comma-separated Origin values for browser MCP clients.', + }, + { + key: 'mcp_public_url', + label: 'Public MCP base URL', + type: 'text', + defaultValue: '', + help: 'Managed on MCP settings. Used for copy-paste client configs.', + }, + { + key: 'mcp_domain', + label: 'MCP tool bundle', + type: 'text', + defaultValue: 'core', + help: 'Managed on MCP settings (WP_MCP_DOMAIN).', + }, + ], + }, { id: 'advanced', label: 'Advanced', diff --git a/web/src/lib/secretsConfigSchema.ts b/web/src/lib/secretsConfigSchema.ts index 929db45..94c8b7a 100644 --- a/web/src/lib/secretsConfigSchema.ts +++ b/web/src/lib/secretsConfigSchema.ts @@ -38,6 +38,20 @@ export const PIPELINE_SECRET_KEYS = new Set([ 'google_rich_results_api_key', 'crawl_auth_password', 'crawl_cookies', + 'mcp_token', +]); + +/** + * Keys managed on the dedicated /mcp page. They are hidden from the generic + * Pipeline page and kept out of the CLI shadow file, but only `mcp_token` is a + * true secret (in PIPELINE_SECRET_KEYS) — the rest must round-trip as plain text. + */ +export const MCP_MANAGED_KEYS = new Set([ + 'mcp_token', + 'mcp_allowed_hosts', + 'mcp_allowed_origins', + 'mcp_public_url', + 'mcp_domain', ]); export const SECRETS_SECTIONS: SecretsSection[] = [ @@ -135,9 +149,57 @@ export const SECRETS_SECTIONS: SecretsSection[] = [ }, ]; -export const ALL_SECRETS_KEYS = new Set( - SECRETS_SECTIONS.flatMap((s) => s.fields.map((f) => f.key)), -); +/** Managed on /mcp — stored in pipeline_config like other secrets. */ +export const MCP_SETTINGS_FIELDS: SecretsField[] = [ + { + key: 'mcp_token', + label: 'MCP bearer token', + type: 'secret', + storage: 'pipeline', + placeholder: 'Long random token', + help: 'Required for remote MCP clients. Sent as Authorization: Bearer … in Cursor or Claude Desktop.', + envVars: ['WP_MCP_TOKEN'], + }, + { + key: 'mcp_allowed_hosts', + label: 'Allowed hostnames', + type: 'text', + storage: 'pipeline', + placeholder: 'audit.example.com,*.example.com', + help: 'Comma-separated Host header values clients may use. Required when MCP binds beyond localhost.', + envVars: ['WP_MCP_ALLOWED_HOSTS'], + }, + { + key: 'mcp_allowed_origins', + label: 'Allowed origins (optional)', + type: 'text', + storage: 'pipeline', + placeholder: 'https://audit.example.com', + help: 'Comma-separated Origin values for browser MCP clients. Leave blank to skip Origin checks.', + envVars: ['WP_MCP_ALLOWED_ORIGINS'], + }, + { + key: 'mcp_public_url', + label: 'Public MCP base URL', + type: 'text', + storage: 'pipeline', + placeholder: 'https://audit.example.com', + help: 'Used to build copy-paste client configs. Include scheme, no trailing slash.', + }, + { + key: 'mcp_domain', + label: 'Tool bundle', + type: 'text', + storage: 'pipeline', + placeholder: 'core', + help: 'WP_MCP_DOMAIN bundle: core (default), crawl, google, links, or full.', + }, +]; + +export const ALL_SECRETS_KEYS = new Set([ + ...SECRETS_SECTIONS.flatMap((s) => s.fields.map((f) => f.key)), + ...MCP_SETTINGS_FIELDS.map((f) => f.key), +]); export const SECRETS_MASK_SENTINEL = '__MASKED__'; @@ -145,11 +207,18 @@ export function isPipelineSecretKey(key: string): boolean { return PIPELINE_SECRET_KEYS.has(key); } +/** Keys hidden from the generic Pipeline page: secrets plus /mcp-managed config. */ +export function isPipelineHiddenKey(key: string): boolean { + return isPipelineSecretKey(key) || MCP_MANAGED_KEYS.has(key); +} + export function isPipelineFieldVisibleOnPipeline(field: { key: string }): boolean { - return !isPipelineSecretKey(field.key); + return !isPipelineHiddenKey(field.key); } export function getSecretsFieldByKey(key: string): SecretsField | null { + const mcpField = MCP_SETTINGS_FIELDS.find((f) => f.key === key); + if (mcpField) return mcpField; for (const section of SECRETS_SECTIONS) { const field = section.fields.find((f) => f.key === key); if (field) return field; @@ -177,6 +246,9 @@ export function buildInitialSecretsState(): SecretsState { out[f.key] = ''; } } + for (const f of MCP_SETTINGS_FIELDS) { + out[f.key] = f.key === 'mcp_domain' ? 'core' : ''; + } return out; } @@ -189,6 +261,11 @@ export function collectEnvHints(): Record { } } } + for (const field of MCP_SETTINGS_FIELDS) { + for (const envVar of field.envVars ?? []) { + vars.add(envVar); + } + } const hints: Record = {}; for (const name of vars) { hints[name] = Boolean(process.env[name]?.trim()); diff --git a/web/src/server/pipelineConfig.ts b/web/src/server/pipelineConfig.ts index 0a043f1..781b1c6 100644 --- a/web/src/server/pipelineConfig.ts +++ b/web/src/server/pipelineConfig.ts @@ -14,6 +14,7 @@ import { getFieldByKey, } from '@/lib/pipelineConfigSchema'; import { + isPipelineHiddenKey, isPipelineSecretKey, maskSecretForClient, SECRETS_MASK_SENTINEL, @@ -151,7 +152,7 @@ export function serializeConfig( seenIds.add(section.id); lines.push(`# --- ${section.label} ---`); for (const f of section.fields) { - if (isPipelineSecretKey(f.key)) continue; + if (isPipelineHiddenKey(f.key)) continue; const v = state[f.key]; if (f.type === 'bool') { lines.push(`${f.key} = ${v === true ? 'true' : 'false'}`); @@ -323,7 +324,7 @@ export async function savePipelineConfig( shadowState[key] = entries[key]; } for (const key of Object.keys(shadowState)) { - if (isPipelineSecretKey(key)) { + if (isPipelineHiddenKey(key)) { delete shadowState[key]; delete shadowState[`${key}_masked`]; } diff --git a/web/src/server/secrets.test.ts b/web/src/server/secrets.test.ts index 09d0565..e4136f2 100644 --- a/web/src/server/secrets.test.ts +++ b/web/src/server/secrets.test.ts @@ -28,6 +28,20 @@ describe('maskPipelineSecretsForClient', () => { expect(masked.bing_webmaster_api_key_masked).toBe(true); expect(masked.start_url).toBe('https://example.com'); }); + + it('masks the MCP token but not the non-secret MCP fields', () => { + const masked = maskPipelineSecretsForClient({ + mcp_token: 'wp_mcp_secrettoken', + mcp_allowed_hosts: 'audit.example.com', + mcp_domain: 'core', + }); + expect(masked.mcp_token).toBe('••••oken'); + expect(masked.mcp_token_masked).toBe(true); + // Regression: these were masked to "••••.com" / "••••" and shown in text inputs. + expect(masked.mcp_allowed_hosts).toBe('audit.example.com'); + expect(masked.mcp_allowed_hosts_masked).toBeUndefined(); + expect(masked.mcp_domain).toBe('core'); + }); }); describe('serializeConfig', () => { @@ -87,3 +101,40 @@ describe('loadSecrets', () => { expect(result.envHints).toBeTypeOf('object'); }); }); + +describe('saveSecrets', () => { + beforeEach(() => { + vi.resetModules(); + }); + + it('persists MCP fields entered on the /mcp page', async () => { + const savePipelineConfig = vi.fn().mockResolvedValue('postgresql'); + vi.doMock('@/server/llmConfig', () => ({ + readLlmConfigRaw: vi.fn().mockResolvedValue({}), + saveLlmConfig: vi.fn(), + })); + vi.doMock('@/server/pipelineConfig', () => ({ + loadPipelineConfig: vi.fn().mockResolvedValue({ state: {}, unknownKeys: [] }), + savePipelineConfig, + })); + vi.doMock('@/server/googleAppSettings', () => ({ + loadGoogleAppSettings: vi + .fn() + .mockResolvedValue({ clientId: '', clientSecret: '', serviceAccount: null }), + saveGoogleAppSettings: vi.fn(), + })); + + const { saveSecrets } = await import('@/server/secrets'); + await saveSecrets({ + mcp_token: 'wp_mcp_newtoken123', + mcp_allowed_hosts: 'audit.example.com', + mcp_domain: 'google', + }); + + expect(savePipelineConfig).toHaveBeenCalledTimes(1); + const savedState = (savePipelineConfig.mock.calls[0] as unknown[])[0] as Record; + expect(savedState.mcp_token).toBe('wp_mcp_newtoken123'); + expect(savedState.mcp_allowed_hosts).toBe('audit.example.com'); + expect(savedState.mcp_domain).toBe('google'); + }); +}); diff --git a/web/src/server/secrets.ts b/web/src/server/secrets.ts index 27db0a1..dd07528 100644 --- a/web/src/server/secrets.ts +++ b/web/src/server/secrets.ts @@ -3,6 +3,7 @@ */ import { ALL_SECRETS_KEYS, + MCP_SETTINGS_FIELDS, SECRETS_MASK_SENTINEL, SECRETS_SECTIONS, buildInitialSecretsState, @@ -117,18 +118,27 @@ export async function saveSecrets(rawState: SecretsState): Promise { } const pipelineState = { ...pipelineLoaded.state }; + const copyPipelineField = (key: string): void => { + if (rawState[key] === undefined) return; + pipelineState[key] = String(rawState[key] ?? ''); + if (rawState[`${key}_masked`] === true) { + pipelineState[`${key}_masked`] = true; + } else { + delete pipelineState[`${key}_masked`]; + } + }; for (const section of SECRETS_SECTIONS) { for (const field of section.fields) { if (field.storage !== 'pipeline') continue; - if (rawState[field.key] === undefined) continue; - pipelineState[field.key] = String(rawState[field.key] ?? ''); - if (rawState[`${field.key}_masked`] === true) { - pipelineState[`${field.key}_masked`] = true; - } else { - delete pipelineState[`${field.key}_masked`]; - } + copyPipelineField(field.key); } } + // MCP fields live in a separate array (managed on /mcp), not in SECRETS_SECTIONS; + // copy them too or they are silently dropped on save. + for (const field of MCP_SETTINGS_FIELDS) { + if (field.storage !== 'pipeline') continue; + copyPipelineField(field.key); + } const googlePatch: Parameters[0] = {}; if (rawState.google_client_id !== undefined) { diff --git a/web/src/strings.json b/web/src/strings.json index 4964984..a9c89d8 100644 --- a/web/src/strings.json +++ b/web/src/strings.json @@ -717,6 +717,10 @@ "label": "Secrets", "section": "Overview" }, + "mcp": { + "label": "MCP", + "section": "Overview" + }, "chat": { "label": "AI Chat", "section": "AI" @@ -844,6 +848,8 @@ "saveButton": "Save secrets", "saving": "Saving…", "saveHint": "Changes are stored in PostgreSQL only — not in audit settings files.", + "mcpMovedHint": "Remote MCP token, allowed hosts, and copy-paste client configs live on the", + "mcpMovedLink": "MCP settings page", "loading": "Loading secrets…", "envConfigured": "Environment variables set", "serviceAccountSaved": "Service account key saved. Paste new JSON to replace.", @@ -857,6 +863,47 @@ "googleNotConfigured": "Google Cloud credentials are not configured yet.", "googleCredentialsHint": "Add OAuth Client ID, Client Secret, or service account JSON on the" }, + "mcpSettings": { + "pageTitle": "MCP settings", + "pageSubtitle": "Generate tokens, allow remote hosts, and copy client configs for Cursor or Claude Desktop.", + "sidebarTitle": "Navigation", + "loading": "Loading MCP settings…", + "accessTitle": "Remote access", + "accessSubtitle": "Saved in PostgreSQL and applied on the next MCP HTTP request.", + "tokenLabel": "Bearer token", + "tokenHelp": "Sent as Authorization: Bearer … by remote MCP clients.", + "tokenPlaceholder": "Click Generate token or paste your own", + "generateToken": "Generate token", + "publicUrlLabel": "Public base URL", + "publicUrlHelp": "Your public Site Audit URL (used in copy-paste configs). No trailing slash.", + "syncHosts": "Add hostname from URL to allowlist", + "endpointPreview": "MCP endpoint", + "allowedHostsLabel": "Allowed hostnames", + "allowedHostsHelp": "Comma-separated Host values clients may use (e.g. audit.example.com).", + "allowedOriginsLabel": "Allowed origins (optional)", + "allowedOriginsHelp": "Comma-separated Origin values for browser MCP clients.", + "domainLabel": "Tool bundle (WP_MCP_DOMAIN)", + "domainHelp": "Which audit tools this MCP server exposes.", + "copyTitle": "Copy client config", + "copySubtitle": "Paste into .cursor/mcp.json, Claude Desktop config, or your MCP client.", + "remoteConfigLabel": "Remote Streamable HTTP (hosted server)", + "remoteConfigHelp": "Use when Site Audit runs on a server you deploy. Replace placeholders after saving.", + "localConfigLabel": "Local stdio (same machine as Postgres)", + "localConfigHelp": "Use when Cursor runs on the same machine as the database.", + "startHttpLabel": "Start MCP HTTP service", + "startHttpHelp": "Run on the server after saving settings.", + "startDockerLabel": "Docker (production compose)", + "startDockerHelp": "Starts the mcp service from docker-compose.prod.yml.", + "copy": "Copy", + "copied": "Copied", + "envConfigured": "Environment variables set", + "envOverrides": "env overrides saved values", + "docsHint": "Full reference:", + "docsLink": "MCP remote setup guide", + "saveHint": "Save to apply token and host allowlist to the MCP HTTP service.", + "saveButton": "Save MCP settings", + "saving": "Saving…" + }, "pipelineRunner": { "fabTitle": "Run audit", "fabAriaOpen": "Open Run audit", diff --git a/web/src/views/McpSettings.tsx b/web/src/views/McpSettings.tsx new file mode 100644 index 0000000..40a26a7 --- /dev/null +++ b/web/src/views/McpSettings.tsx @@ -0,0 +1,296 @@ +'use client'; + +import Link from 'next/link'; +import { usePathname } from 'next/navigation'; +import { Loader2, Plug, RefreshCw, Sparkles } from 'lucide-react'; +import ConfigField from '@/components/pipeline/ConfigField'; +import McpCopyBlock from '@/components/mcp/McpCopyBlock'; +import ChatShell from '@/components/chat/ChatShell'; +import { SecretsSaveBar } from '@/components/secrets/SecretsSettingsPanel'; +import { useMcpSettings } from '@/hooks/useMcpSettings'; +import { useReadOnlySession } from '@/hooks/useReadOnlySession'; +import { MCP_SETTINGS_FIELDS } from '@/lib/secretsConfigSchema'; +import { + buildDockerStartCommand, + buildHttpStartCommand, + buildLocalStdioConfig, + buildRemoteCursorConfig, + hostFromPublicUrl, + normalizeMcpDomain, + normalizePublicUrl, + tokenForSnippet, +} from '@/lib/mcpClientConfig'; +import { CHAT_SIDEBAR_NAV_IDS, isMiniNavLinkActive, miniNavLinks } from '@/lib/appNav'; +import { strings } from '@/lib/strings'; + +const s = strings.mcpSettings; + +const MCP_DOMAIN_OPTIONS = [ + { value: 'core', label: 'core — router + insight (recommended)' }, + { value: 'crawl', label: 'crawl — technical crawl tools' }, + { value: 'google', label: 'google — GSC / GA4 tools' }, + { value: 'links', label: 'links — link architecture' }, + { value: 'full', label: 'full — all 340 tools' }, +]; + +export default function McpSettingsPage() { + const { + state, + envHints, + loading, + saving, + saveMsg, + loadError, + setField, + save, + generateToken, + suggestHostsFromUrl, + tokenMasked, + } = useMcpSettings(); + const { readOnly } = useReadOnlySession(); + const pathname = usePathname(); + + const publicUrl = normalizePublicUrl(String(state.mcp_public_url || '')); + const domain = normalizeMcpDomain(String(state.mcp_domain || 'core')); + const snippetToken = tokenForSnippet(String(state.mcp_token || ''), tokenMasked); + + const remoteConfig = buildRemoteCursorConfig({ + publicUrl, + token: snippetToken, + domain, + }); + + const localConfig = buildLocalStdioConfig({ + publicUrl, + token: snippetToken, + domain, + propertyId: '1', + }); + + const envHintNames = MCP_SETTINGS_FIELDS.flatMap((field) => + (field.envVars ?? []).filter((name) => envHints[name]), + ); + + return ( + ( + + )} + > +
+
+ +
+

{s.pageTitle}

+

{s.pageSubtitle}

+
+
+ +
+ {loading ? ( +
+ + {s.loading} +
+ ) : loadError ? ( +
{loadError}
+ ) : ( +
+ {envHintNames.length ? ( +
+ {s.envConfigured}: {envHintNames.join(', ')} ({s.envOverrides}) +
+ ) : null} + +
+
+

{s.accessTitle}

+

{s.accessSubtitle}

+
+ +
+
+
+ setField('mcp_token', value)} + /> +
+ +
+ + setField('mcp_public_url', value)} + /> + +
+ + {publicUrl ? ( + + {s.endpointPreview}: {publicUrl}/mcp + + ) : null} +
+ + setField('mcp_allowed_hosts', value)} + /> + + setField('mcp_allowed_origins', value)} + /> + + +
+
+ +
+
+

{s.copyTitle}

+

{s.copySubtitle}

+
+ + + + + +
+ +

+ {s.docsHint}{' '} + + {s.docsLink} + +

+
+ )} +
+ +
+ void save()} + saveHint={s.saveHint} + saveButton={s.saveButton} + savingLabel={s.saving} + /> +
+
+
+ ); +} diff --git a/web/src/views/Secrets.tsx b/web/src/views/Secrets.tsx index f17ee71..c193534 100644 --- a/web/src/views/Secrets.tsx +++ b/web/src/views/Secrets.tsx @@ -1,5 +1,6 @@ 'use client'; +import Link from 'next/link'; import { useState } from 'react'; import { Loader2 } from 'lucide-react'; import ChatShell from '@/components/chat/ChatShell'; @@ -42,13 +43,24 @@ export default function SecretsPage() { {loadError} ) : ( - +
+

+ {s.mcpMovedHint}{' '} + + {s.mcpMovedLink} + + . +

+
+ + )}