From 14f41755e9157f2acbce4feb5ccada1a0af94334 Mon Sep 17 00:00:00 2001 From: karansohi Date: Thu, 7 May 2026 11:03:53 -0700 Subject: [PATCH 1/2] feat(examples): add Agent Control Protect demo for Cursor Cursor before-submit hook that runs Galileo Protect (secrets + PII guardrails) on user prompts. Includes hook, setup script, 40-prompt test battery, docker-compose for local server, and README. Also adds .env / .env.local to root .gitignore so example .env files are never committed alongside their .env.example templates. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 5 + .../.cursor/hooks.json | 11 + .../hooks/before_submit_protect_guardrails.py | 326 +++++++++++++++++ .../agent-control-protect-cursor/.env.example | 13 + .../Dockerfile.server | 28 ++ .../agent-control-protect-cursor/README.md | 330 ++++++++++++++++++ .../docker-compose.yml | 66 ++++ .../requirements.txt | 3 + .../scripts/setup.py | 266 ++++++++++++++ .../scripts/test_hook.py | 211 +++++++++++ 10 files changed, 1259 insertions(+) create mode 100644 examples/agent-control-protect-cursor/.cursor/hooks.json create mode 100644 examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py create mode 100644 examples/agent-control-protect-cursor/.env.example create mode 100644 examples/agent-control-protect-cursor/Dockerfile.server create mode 100644 examples/agent-control-protect-cursor/README.md create mode 100644 examples/agent-control-protect-cursor/docker-compose.yml create mode 100644 examples/agent-control-protect-cursor/requirements.txt create mode 100644 examples/agent-control-protect-cursor/scripts/setup.py create mode 100644 examples/agent-control-protect-cursor/scripts/test_hook.py diff --git a/.gitignore b/.gitignore index 41d1f9f6..46fc5bec 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,11 @@ env/ ENV/ .venv +# Local env files (never commit real secrets; .env.example is fine to track) +.env +.env.local +.env.*.local + # UV .uv/ uv.lock diff --git a/examples/agent-control-protect-cursor/.cursor/hooks.json b/examples/agent-control-protect-cursor/.cursor/hooks.json new file mode 100644 index 00000000..8a29cd88 --- /dev/null +++ b/examples/agent-control-protect-cursor/.cursor/hooks.json @@ -0,0 +1,11 @@ +{ + "version": 1, + "hooks": { + "beforeSubmitPrompt": [ + { + "command": ".venv/bin/python3 .cursor/hooks/before_submit_protect_guardrails.py", + "timeout": 15 + } + ] + } +} diff --git a/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py b/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py new file mode 100644 index 00000000..b151c059 --- /dev/null +++ b/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +"""Cursor beforeSubmitPrompt hook — protect guardrails (SDK end-to-end). + +Uses `agent_control.init()` + `@control()`. The decorator registers the step +with the AC server, evaluates every control attached to the agent (regex +secrets + galileo.luna2 PII via Galileo Protect with rules-in-payload), and +emits an OpenTelemetry-flavored observability event per evaluation so the AC +UI at http://localhost:4000 stays populated. + +After AC's verdict, we also write a project-log-stream trace via +`GalileoLogger.add_protect_span` so the prompt shows up in Galileo's project +view alongside its underlying Protect call. The Protect call is reused from +AC — no second roundtrip. + +Runs in the venv because of the SDK imports. + +Stdin (Cursor): {"hook_event_name":"beforeSubmitPrompt","prompt":"…", …} +Stdout (Cursor): {"continue": true|false, "user_message":"…","agent_message":"…"} + +Env (loaded from /.env if present): + AGENT_CONTROL_URL default http://localhost:8000 + AC_AGENT_NAME default cursor-protect-v4 + GALILEO_PROJECT (required for project-log-stream traces) + GALILEO_LOG_STREAM default cursor-hooks + CURSOR_PROTECT_FAIL_MODE "allow" (default) or "deny" + CURSOR_PROTECT_LOG_RAW set to 1/true to skip PII redaction (debug) + CURSOR_PROTECT_DEBUG set to 1/true to print to stderr +""" + +from __future__ import annotations + +import asyncio +import json +import os +import re +import sys +from pathlib import Path + +from dotenv import load_dotenv + +import agent_control +from agent_control import control, ControlViolationError + + +# ── PII redactors (regex, local). Names/locations survive verbatim, but the +# redacted twin is what we send to Galileo's log stream so project-trace +# bodies don't leak raw PII even though AC already saw the original. + +_REDACTORS: list[tuple[re.Pattern, str]] = [ + (re.compile(r"\b[\w.+\-]+@[\w\-]+(?:\.[\w\-]+)+\b"), ""), + (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), ""), + (re.compile(r"\b(?:\d[ -]*?){13,19}\b"), ""), + (re.compile(r"\+?\d{1,2}[\s\-.]?\(?\d{3}\)?[\s\-.]?\d{3}[\s\-.]?\d{4}\b"), ""), + (re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b"), ""), + (re.compile(r"\bhttps?://[^\s]+"), ""), +] + + +def _redact(text: str) -> str: + if os.environ.get("CURSOR_PROTECT_LOG_RAW", "").strip().lower() in {"1", "true", "yes", "on"}: + return text + for pattern, replacement in _REDACTORS: + text = pattern.sub(replacement, text) + return text + + +# ── Cursor I/O --------------------------------------------------------------- + +def _read_stdin_json() -> dict: + raw = sys.stdin.read() + if not raw.strip(): + return {} + try: + return json.loads(raw) + except json.JSONDecodeError: + return {"_raw": raw} + + +def _emit(obj: dict) -> None: + sys.stdout.write(json.dumps(obj, ensure_ascii=False)) + sys.stdout.flush() + + +def _allow() -> dict: + return {"continue": True} + + +def _deny(user_message: str, agent_message: str | None = None) -> dict: + out: dict = {"continue": False, "user_message": user_message} + if agent_message: + out["agent_message"] = agent_message + return out + + +def _extract_prompt(payload: dict) -> str: + p = payload.get("prompt") + if isinstance(p, str): + return p + if isinstance(p, dict): + for key in ("text", "content"): + v = p.get(key) + if isinstance(v, str): + return v + nested = payload.get("input") + if isinstance(nested, dict): + v = nested.get("prompt") + if isinstance(v, str): + return v + return "" + + +def _debug(msg: str) -> None: + if os.environ.get("CURSOR_PROTECT_DEBUG", "").strip().lower() in {"1", "true", "yes", "on"}: + print(f"[guardrails-hook] {msg}", file=sys.stderr, flush=True) + + +_SECRETS_CONTROL = "block-secrets-v4" +_PII_CONTROL = "block-pii-v4" + + +def _deny_message(control_name: str | None) -> tuple[str, str]: + if control_name == _SECRETS_CONTROL: + return ( + "🔒 Blocked: prompt looks like it contains a secret/API key.", + "Agent Control blocked: secret detected.", + ) + if control_name == _PII_CONTROL: + return ( + "🪪 Blocked: prompt contains PII (Galileo Protect via Agent Control).", + "Galileo Protect (via Agent Control) blocked: PII detected.", + ) + return ( + f"🛑 Blocked by control: {control_name}", + f"Blocked by {control_name}", + ) + + +# ── @control()-decorated step. The decorator names the step `check_prompt`, +# binds the `prompt` argument as `input` (per the SDK's input-name preference +# list), and triggers AC's pre/llm controls before the body runs. We just +# return the prompt unchanged — the work happens in the decorator. + +@control() +async def check_prompt(prompt: str) -> str: + return prompt + + +# ── GenAI trace per Cursor prompt → Galileo project log_stream view -------- + +def _log_to_galileo( + *, + prompt_original: str, + decision: str, + deny_control: str | None, + deny_metadata: dict | None, +) -> None: + """Write one cursor-hook trace per prompt to the Galileo project log stream. + + Reuses AC's existing Protect call: when the PII control fires, AC's + `Luna2Evaluator` already invoked Protect and got back a real Galileo + `trace_id`. We surface that here as a `add_protect_span` so the project's + log_stream view shows TRIGGERED Protect spans linked to the same trace AC + saw — no second Protect roundtrip. + + Secrets denies and allow paths still write a basic trace (no Protect span + on the deny side because regex is server-local; no AC metadata is exposed + to @control on the allow side, so we omit the span there). + + Best-effort. Any failure (missing SDK, missing project, network) is + swallowed via debug. + """ + project = os.environ.get("GALILEO_PROJECT", "").strip() + if not project: + _debug("logging skipped: GALILEO_PROJECT not set") + return + log_stream = os.environ.get("GALILEO_LOG_STREAM", "cursor-hooks").strip() or "cursor-hooks" + + try: + from galileo import GalileoLogger + from galileo_core.schemas.protect.payload import Payload + from galileo_core.schemas.protect.response import Response + except ImportError as exc: + _debug(f"galileo SDK not importable; skipping log: {exc}") + return + + try: + prompt_redacted = _redact(prompt_original) + logger = GalileoLogger(project=project, log_stream=log_stream) + logger.start_trace( + input=prompt_redacted, + name="cursor-hook", + tags=["cursor-hook", "guardrails", f"decision:{decision}"], + ) + + # PII deny path: we have AC's evaluator metadata containing the real + # Galileo Protect trace_id. Build a synthetic Response so the span + # links back to the actual call and shows TRIGGERED. + if deny_control == _PII_CONTROL and deny_metadata: + status = (deny_metadata.get("status") or "not_triggered").lower() + trace_id = deny_metadata.get("trace_id") + response_obj = Response.model_validate({ + "status": status, + "text": prompt_original, + "trace_metadata": {"id": trace_id} if trace_id else {}, + }) + redacted_response = Response.model_validate({ + "status": status, + "text": prompt_redacted, + "trace_metadata": {"id": trace_id} if trace_id else {}, + }) + logger.add_protect_span( + payload=Payload(input=prompt_original), + redacted_payload=Payload(input=prompt_redacted), + response=response_obj, + redacted_response=redacted_response, + metadata={ + "source": "cursor-hook", + "control_name": deny_control, + "metric": str(deny_metadata.get("metric") or "input_pii"), + "execution_time_ms": str(deny_metadata.get("execution_time_ms") or ""), + "decision": decision, + "redaction": "regex", + }, + tags=["cursor-hook", "guardrails", f"decision:{decision}"], + status_code=200, + ) + + logger.conclude( + output=f"{decision}: control={deny_control or 'none'}", + status_code=200, + ) + logger.flush() + _debug(f"logged galileo trace project={project} stream={log_stream} decision={decision} control={deny_control}") + except Exception as exc: + _debug(f"galileo logging failed: {type(exc).__name__}: {exc}") + + +async def main() -> int: + load_dotenv(Path.cwd() / ".env") + fail_mode = os.environ.get("CURSOR_PROTECT_FAIL_MODE", "allow").strip().lower() + server_url = os.environ.get("AGENT_CONTROL_URL", "http://localhost:8000") + agent_name = os.environ.get("AC_AGENT_NAME", "cursor-protect-v4") + + payload = _read_stdin_json() + event = payload.get("hook_event_name", "") + if event and event != "beforeSubmitPrompt": + _emit(_allow()) + return 0 + + prompt = _extract_prompt(payload) + if not prompt.strip(): + _emit(_allow()) + return 0 + + try: + agent_control.init( + agent_name=agent_name, + agent_description="Cursor guardrails — SDK + @control + observability.", + server_url=server_url, + observability_enabled=True, + ) + except Exception as exc: + _debug(f"AC init failed ({type(exc).__name__}): {exc}") + if fail_mode == "deny": + _emit(_deny("🛑 Agent Control unavailable (fail-closed).", + agent_message=f"AC init failed: {exc}")) + else: + _emit(_allow()) + return 0 + + decision = "allow" + deny_control: str | None = None + deny_metadata: dict | None = None + + try: + try: + await check_prompt(prompt) + except ControlViolationError as exc: + decision = "deny" + deny_control = exc.control_name + deny_metadata = exc.metadata or {} + user_msg, agent_msg = _deny_message(deny_control) + _debug(f"AC deny: {deny_control}: {exc.message}") + _emit(_deny(user_msg, agent_message=agent_msg)) + except Exception as exc: + _debug(f"AC eval error ({type(exc).__name__}): {exc}") + if fail_mode == "deny": + _emit(_deny("🛑 Agent Control returned an error (fail-closed).", + agent_message=str(exc))) + else: + _emit(_allow()) + # Don't write a Galileo trace on infra error — it's not a policy event. + return 0 + else: + _debug("AC: allow") + _emit(_allow()) + + # Best-effort: write one trace per prompt to the project log_stream so + # the AC verdict shows up in Galileo's project view alongside Protect. + _log_to_galileo( + prompt_original=prompt, + decision=decision, + deny_control=deny_control, + deny_metadata=deny_metadata, + ) + return 0 + finally: + # Flush AC observability events to the AC server. Best-effort. + try: + await agent_control.ashutdown() + except Exception as exc: + _debug(f"AC shutdown failed: {exc}") + + +if __name__ == "__main__": + try: + raise SystemExit(asyncio.run(main())) + except SystemExit: + raise + except Exception as exc: + if os.environ.get("CURSOR_PROTECT_FAIL_MODE", "allow").strip().lower() == "deny": + _emit({"continue": False, "user_message": "🛑 Prompt guard crashed.", + "agent_message": f"Hook crashed: {exc}"}) + else: + _emit({"continue": True, "agent_message": f"Hook crashed (allowed): {exc}"}) + raise SystemExit(0) diff --git a/examples/agent-control-protect-cursor/.env.example b/examples/agent-control-protect-cursor/.env.example new file mode 100644 index 00000000..b90f6fd9 --- /dev/null +++ b/examples/agent-control-protect-cursor/.env.example @@ -0,0 +1,13 @@ +# Galileo +GALILEO_API_KEY= +GALILEO_PROJECT= +# GALILEO_CONSOLE_URL=https://console.demo-v2.galileocloud.io + +# Agent Control (started with: docker compose up -d) +AGENT_CONTROL_URL=http://localhost:8000 +AC_AGENT_NAME=cursor-hook + +# Hook fail mode: allow (fail-open, default) | deny (fail-closed) +PROTECT_FAIL_MODE=allow + +# CURSOR_PROTECT_DEBUG=true diff --git a/examples/agent-control-protect-cursor/Dockerfile.server b/examples/agent-control-protect-cursor/Dockerfile.server new file mode 100644 index 00000000..589a6e07 --- /dev/null +++ b/examples/agent-control-protect-cursor/Dockerfile.server @@ -0,0 +1,28 @@ +FROM galileoai/agent-control-server:latest + +# The base image ships the Galileo Luna-2 evaluator source at +# /app/evaluators/contrib/galileo but does not install it into the venv, +# so AC's evaluator registry never discovers it. Install it from that +# in-image source with --no-deps — installing the PyPI package or its +# extra pulls a newer agent-control-models that breaks the server. +RUN /app/.venv/bin/python3 -m ensurepip --upgrade && \ + /app/.venv/bin/python3 -m pip install --no-deps /app/evaluators/contrib/galileo + +# Galileo Protect's REST API uses metric "input_pii" with operator "not_empty"; +# AC's Luna2EvaluatorConfig pins both fields to Pydantic Literals that only +# include "pii_detection" / "any" etc. Patch the Literals so a control with +# metric=input_pii / operator=not_empty validates at PUT time and is forwarded +# as-is to Protect. +RUN CFG=/app/.venv/lib/python3.12/site-packages/agent_control_evaluator_galileo/luna2/config.py && \ + sed -i 's/"pii_detection",/"pii_detection",\n "input_pii",/' "$CFG" && \ + sed -i 's/Luna2Operator = Literal\["gt", "lt", "gte", "lte", "eq", "contains", "any"\]/Luna2Operator = Literal["gt", "lt", "gte", "lte", "eq", "contains", "any", "not_empty"]/' "$CFG" + +# Galileo Protect requires a stage_name (the stage just hosts the project linkage; +# rules come from the payload). AC's _evaluate_local_stage doesn't forward +# stage_name from config — patch it in. The marker line is unique to local mode. +# Also: AC sends `target_value: 0` for the rule (because Rule requires a value +# and the config defaults to 0), but Protect's `not_empty` operator only +# triggers when target_value is null. Send null instead for not_empty. +RUN EVAL=/app/.venv/lib/python3.12/site-packages/agent_control_evaluator_galileo/luna2/evaluator.py && \ + sed -i 's|prioritized_rulesets=\[ruleset\],|prioritized_rulesets=[ruleset],\n stage_name=self.config.stage_name,|' "$EVAL" && \ + sed -i 's|target_value=self._get_numeric_target_value() or 0,|target_value=None if self.config.operator == "not_empty" else (self._get_numeric_target_value() or 0),|' "$EVAL" diff --git a/examples/agent-control-protect-cursor/README.md b/examples/agent-control-protect-cursor/README.md new file mode 100644 index 00000000..f4e51169 --- /dev/null +++ b/examples/agent-control-protect-cursor/README.md @@ -0,0 +1,330 @@ +# Cursor × Agent Control × Galileo Protect + +A `beforeSubmitPrompt` hook for Cursor that blocks prompts containing **secrets** +(AWS keys, GitHub tokens, etc.) or **PII** (emails, phone numbers, names, SSNs, +credit cards, …) before the prompt ever leaves the developer's machine. + +The hook is intentionally thin. **All policy lives in Agent Control.** Adding a +new check (toxicity, prompt injection, custom regex) is a config edit, not a +hook edit. + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph Laptop["Developer's laptop"] + Cursor["Cursor IDE"] + Hook["before_submit_protect_guardrails.py
(@control + agent_control.init)"] + ACS[("Agent Control server
localhost:8000
(Docker)")] + PG[(postgres)] + UI["AC UI
localhost:4000
(Docker)"] + end + + subgraph Cloud["Galileo Cloud"] + Protect["Protect API
/v1/protect/invoke"] + Logs["Project log streams
+ Protect Traces"] + end + + Cursor -->|stdin: prompt JSON| Hook + Hook -->|"@control evaluation"| ACS + ACS --> PG + ACS --> UI + ACS -->|"galileo.luna2 evaluator
(stage_type=local + ruleset)"| Protect + ACS -->|"observability events"| ACS + Hook -.->|"GalileoLogger.add_protect_span
(reuses Protect trace_id)"| Logs + Hook -->|stdout: continue / deny| Cursor +``` + +Two guardrails are attached to one AC agent: + +| Control | Evaluator | Where it runs | What it catches | +|---|---|---|---| +| `block-secrets-v4` | `regex` | locally inside the AC server | AWS / GitHub / Anthropic / OpenAI / Stripe / Slack / JWT / PEM private key | +| `block-pii-v4` | `galileo.luna2` (`stage_type=local`) | AC server calls Galileo Protect with the ruleset in the request body | email, phone, name, SSN, credit card, address, IP, URL — anything Luna-2's `input_pii` detects | + +When AC's `Luna2Evaluator` calls Protect, Galileo records a real Protect trace. +The hook also writes a per-prompt trace to your project's log stream via +`GalileoLogger.add_protect_span`, **reusing the same `trace_id`** — so clicking +through in the project view lands on the actual Protect call. No second +Protect roundtrip. + +--- + +## Why "stage_type=local"? + +Galileo Protect supports two ways to ship rulesets: + +- **central** — ruleset is stored on Galileo against a stage. Every caller of + the stage gets the same rules. Good for org-wide policy you don't want clients + to redefine. +- **local** — caller sends `prioritized_rulesets` in the *payload* on every + invoke. The stage is just a project anchor (it owns no rules of its own). + Good for fast iteration on rules without re-deploying anything Galileo-side. + +We use **local** because the policy lives in AC's control config — AC's +`Luna2Evaluator` builds the ruleset from that config and ships it on every call. +One source of truth, no Galileo-side ruleset to keep in sync. The senior dev +note that drove this: *"the protect API seems like a good endpoint to continue +to use… stage==local mode that we can use to send RuleSets in the payload… +get rid of all other.. which adds latency in the hot path."* + +--- + +## What `@control()` does + +The hook is a few lines: + +```python +@control() +async def check_prompt(prompt: str) -> str: + return prompt # no-op — the work happens in the decorator + +await check_prompt(prompt_from_cursor) +``` + +The decorator: + +1. Takes the function's `prompt` argument (the SDK's input-name preference list + includes `prompt`) and binds it to AC's `selector.path = "input"`. +2. Calls AC's `/api/v1/evaluation` with `step={type: llm, name: check_prompt, input: prompt}`, `stage: pre`. +3. AC iterates **every** control attached to the agent. Regex runs locally in + the AC server; `galileo.luna2` calls out to Galileo Protect. +4. AC short-circuits on the first deny. +5. If anything denies → SDK raises `ControlViolationError(control_name=…)`. +6. SDK also POSTs an observability event per control evaluated to + `/api/v1/observability/events`, which is what populates the AC UI dashboard. + +The hook just translates `ControlViolationError` → Cursor's deny shape and +return-success → Cursor's allow shape. It doesn't know there's a regex check or +a Luna-2 check; it doesn't talk to Galileo. Adding a third control tomorrow +(toxicity, prompt injection, …) is a `setup.py` edit. The hook stays untouched. + +--- + +## What happens when a Cursor prompt is submitted + +```mermaid +sequenceDiagram + participant U as Developer + participant C as Cursor + participant H as Hook (Python) + participant A as AC server + participant P as Galileo Protect + participant G as Galileo project + + U->>C: types prompt + Enter + C->>H: stdin {prompt} + H->>H: agent_control.init() + H->>A: @control() → /api/v1/evaluation + par run all attached controls + A->>A: regex (block-secrets-v4) — local + A->>P: galileo.luna2 → /v1/protect/invoke (ruleset in body) + P-->>A: {status, trace_id, metric_results.value} + end + A-->>H: {is_safe, matches:[...]} + H->>A: /api/v1/observability/events (per-control events) + H->>G: GalileoLogger.add_protect_span (reuses trace_id) + H->>C: stdout {continue: true | false, user_message} + C-->>U: prompt sent OR block message +``` + +If the verdict is **deny**: + +- Cursor shows the developer the user_message (`🔒 Blocked: prompt looks like + it contains a secret/API key.` or `🪪 Blocked: prompt contains PII`). +- AC UI (localhost:4000) shows a deny event under the right control. +- Galileo project's `cursor-hooks` log stream shows a `cursor-hook` trace with + `decision:deny`. PII denies include a Protect span linking to the Protect + trace. + +If the verdict is **allow**: + +- Prompt goes to Cursor's LLM as usual. +- AC UI shows non_match events for the controls that ran. +- Galileo project trace is written with `decision:allow`. + +--- + +## Repo layout + +``` +.cursor/ + hooks.json # Cursor reads this on startup + hooks/ + before_submit_protect_guardrails.py # the hook +scripts/ + setup.py # provisions AC + Galileo entities (run once) + test_hook.py # 40-prompt battery (15 wrong + 5 right per guardrail) +Dockerfile.server # extends AC image w/ luna2 evaluator + 3 sed patches +docker-compose.yml # postgres + AC server + AC UI +requirements.txt # SDK deps for the venv +.env / .env.example +``` + +### Why the Dockerfile patches AC's source + +The AC base image (`galileoai/agent-control-server:latest`) ships with the +Galileo Luna-2 evaluator source at `/app/evaluators/contrib/galileo` but +doesn't install it into the venv, so AC's evaluator registry never sees it. We +install it from that in-image source with `--no-deps`. The PyPI package's +`[galileo]` extra pulls a newer `agent-control-models` that breaks the server. + +Three small `sed`s on top: + +1. Add `"input_pii"` to `Luna2Metric` Literal. Galileo Protect's REST API uses + `input_pii` as the metric name, but AC's evaluator config restricts metrics + to a whitelist that only includes `pii_detection`. Without this our control + would fail validation at PUT time. +2. Add `"not_empty"` to `Luna2Operator` Literal. Same reason — Protect uses + `not_empty`, the whitelist doesn't include it. +3. Patch `_evaluate_local_stage` to forward `stage_name` from config (Protect + requires it even for local stages — the stage is the project anchor) and + send `target_value: null` for `not_empty` (AC defaulted to `0`, which makes + Protect not trigger). + +These are tiny diffs in the Dockerfile itself, no upstream fork needed. + +--- + +## Why AC runs locally (per laptop) + +Each developer runs the AC stack (postgres + server + UI) in Docker on their +own laptop. The hook talks to `localhost:8000` — sub-millisecond round trip. +Every developer can tweak their controls in `localhost:4000` without affecting +others. Galileo Protect remains the centralized cloud service for the ML check. + +Trade-off: the AC observability data is per-laptop. If you want shared dashboards +across the team you'd run a shared AC server somewhere — code change is +just `AGENT_CONTROL_URL`. + +--- + +## Setup + +### Prerequisites + +- macOS or Linux +- Docker Desktop running +- Python 3.12 (for the venv used by `setup.py` and the hook) +- A Galileo API key + +### One-time, per developer + +```bash +# 1. Clone, enter the repo, create the venv +python3.12 -m venv .venv +.venv/bin/pip install -r requirements.txt + +# 2. Configure +cp .env.example .env +# Edit .env: GALILEO_API_KEY=…, GALILEO_PROJECT=protect-cursor (or your project) +# GALILEO_CONSOLE_URL=https://console.demo-v2.galileocloud.io (if self-hosted) + +# 3. Bring up Agent Control locally (postgres + server + UI) +docker compose up -d --build +curl -s http://localhost:8000/health # → {"status":"healthy", ...} + +# 4. Provision the agent + controls + Galileo local stage +.venv/bin/python3 scripts/setup.py +# creates: AC agent cursor-protect-v4 +# AC controls block-secrets-v4 (regex), block-pii-v4 (galileo.luna2) +# Galileo local stage Cursor Protect v4 (local) +# safe to re-run; updates in place + +# 5. Run the 40-prompt battery (15 wrong + 5 right for each guardrail) +.venv/bin/python3 scripts/test_hook.py +# → ALL PASS (40/40) + +# 6. Restart Cursor (Cmd+Q, then reopen the workspace) +# Cursor caches hooks.json on startup; until you restart, prompts won't be hooked. +``` + +### Try it in Cursor + +After Cmd+Q and reopen, paste any of these and watch them get blocked: + +``` +Why is AWS rejecting AKIAIOSFODNN7EXAMPLE in production? +ghp_aBcDeF1234567890aBcDeF1234567890aBcDeF leaked into the repo +Stripe webhook failing — sk_live_ + +Customer alice.smith@example.com keeps getting 500s +Call John Doe at +1-415-555-0182 about the bug +Investigate the row with SSN 123-45-6789 +``` + +These should pass through: + +``` +Refactor this function to remove duplicated null checks. +Write unit tests for a JWT refresh token validator. +``` + +--- + +## Where to look at the data + +| What | Where | +|---|---| +| AC controls + per-control deny/allow counts | http://localhost:4000 → `cursor-protect-v4` | +| Per-prompt Galileo trace (allow + deny) | Galileo console → your project → log streams → `cursor-hooks` | +| Underlying Protect call (every PII check) | Galileo console → Protect Traces | + +--- + +## Adding new patterns / controls + +**Secrets regex:** edit `SECRETS_PATTERN` in [scripts/setup.py](scripts/setup.py) +and re-run `.venv/bin/python3 scripts/setup.py`. The script updates the +existing control in place. + +**PII categories:** Luna-2's `input_pii` already covers email / phone / name / +SSN / credit card / address / IP / URL out of the box (see the rule in +`PII_CONTROL`). To restrict to specific categories, change `operator` to `any` +and pass `target_value: ["email", "ssn"]` in the control config. + +**A new check entirely:** add another control spec in `setup.py`, attach it to +the agent, re-run `setup.py`. Then add a branch to `_deny_message` in +[the hook](.cursor/hooks/before_submit_protect_guardrails.py) so the user-facing +message is right. That's it — the hook code path doesn't change. + +--- + +## Environment variables + +| Var | Default | Purpose | +|---|---|---| +| `AGENT_CONTROL_URL` | `http://localhost:8000` | AC server base URL | +| `AC_AGENT_NAME` | `cursor-protect-v4` | AC agent the hook evaluates against | +| `GALILEO_API_KEY` | — | Required for any Galileo call (Protect + log_stream traces) | +| `GALILEO_CONSOLE_URL` | `https://console.galileo.ai` | Console URL; API host derived as `api.` | +| `GALILEO_PROJECT` | — | Galileo project for stages + traces | +| `GALILEO_PROTECT_LOCAL_STAGE` | `Cursor Protect v4 (local)` | Local stage AC's luna2 evaluator points at | +| `GALILEO_LOG_STREAM` | `cursor-hooks` | Project log stream the hook writes traces to | +| `CURSOR_PROTECT_FAIL_MODE` | `allow` | `allow` (fail-open) or `deny` (fail-closed) when a backend is unreachable | +| `CURSOR_PROTECT_DEBUG` | `false` | `true` → print diagnostics to stderr (visible in Cursor → Settings → Hooks) | +| `CURSOR_PROTECT_LOG_RAW` | `false` | `true` → skip PII redaction in logged traces (debug only) | + +--- + +## Troubleshooting + +**Prompts go through unblocked after switching hooks.** Cursor caches +`hooks.json` on startup — Cmd+Q (full quit) and reopen the workspace. + +**`AC server unreachable`.** `docker ps` for `agent_control_server`. If missing, +`docker compose up -d --build`. Health: `curl http://localhost:8000/health`. + +**`ModuleNotFoundError: agent_control` from the hook.** Use the venv: +`.venv/bin/python3` is what `hooks.json` already points at. System Python +(often 3.9 on macOS) doesn't have the SDK. + +**No events in AC UI.** The SDK only flushes when `agent_control.ashutdown()` +runs. The hook calls it in a `finally` block — if the process is killed +mid-flight events can be lost. Run `test_hook.py` to verify the path works. + +**Galileo project trace has `decision:deny` but no Protect span.** That's a +secrets deny — regex is local to AC, no Protect call to attach. Only PII denies +carry a Protect span (linked via `trace_id` to the actual Protect call). diff --git a/examples/agent-control-protect-cursor/docker-compose.yml b/examples/agent-control-protect-cursor/docker-compose.yml new file mode 100644 index 00000000..c8ee9187 --- /dev/null +++ b/examples/agent-control-protect-cursor/docker-compose.yml @@ -0,0 +1,66 @@ +# Agent Control + Galileo Luna-2 — local FDE stack +# +# Usage: +# docker compose up -d # start +# docker compose down -v # stop + wipe data +# curl http://localhost:8000/health +# +# Reads GALILEO_API_KEY and GALILEO_CONSOLE_URL from .env automatically. +# Agent Control UI dashboard is available at http://localhost:4000. + +services: + postgres: + image: postgres:16-alpine + container_name: agent_control_postgres + ports: + - "${AGENT_CONTROL_DB_HOST_PORT:-5432}:5432" + environment: + POSTGRES_DB: agent_control + POSTGRES_USER: agent_control + POSTGRES_PASSWORD: "${AGENT_CONTROL_POSTGRES_PASSWORD:-agent_control}" + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U agent_control -d agent_control"] + interval: 5s + timeout: 5s + retries: 10 + restart: unless-stopped + + server: + platform: linux/amd64 + build: + context: . + dockerfile: Dockerfile.server + image: agent-control-server-galileo:local + container_name: agent_control_server + ports: + - "${AGENT_CONTROL_SERVER_HOST_PORT:-8000}:8000" + environment: + AGENT_CONTROL_DB_URL: "postgresql+psycopg://agent_control:${AGENT_CONTROL_POSTGRES_PASSWORD:-agent_control}@postgres:5432/agent_control" + AGENT_CONTROL_HOST: 0.0.0.0 + AGENT_CONTROL_PORT: 8000 + AGENT_CONTROL_API_KEY_ENABLED: "false" + AGENT_CONTROL_CORS_ORIGINS: "${AGENT_CONTROL_CORS_ORIGINS:-http://localhost:4000}" + # Galileo credentials — passed through so Luna-2 evaluator can call Galileo Protect + GALILEO_API_KEY: "${GALILEO_API_KEY:-}" + GALILEO_CONSOLE_URL: "${GALILEO_CONSOLE_URL:-https://console.demo-v2.galileocloud.io}" + depends_on: + postgres: + condition: service_healthy + restart: unless-stopped + + ui: + platform: linux/amd64 + image: galileoai/agent-control-ui:latest + container_name: agent_control_ui + ports: + - "${AGENT_CONTROL_UI_HOST_PORT:-4000}:4000" + environment: + AGENT_CONTROL_SERVER_URL: "http://localhost:8000" + depends_on: + - server + restart: unless-stopped + +volumes: + pgdata: diff --git a/examples/agent-control-protect-cursor/requirements.txt b/examples/agent-control-protect-cursor/requirements.txt new file mode 100644 index 00000000..4a5e9603 --- /dev/null +++ b/examples/agent-control-protect-cursor/requirements.txt @@ -0,0 +1,3 @@ +agent-control-sdk +agent-control-evaluators[galileo] +python-dotenv>=1.0 diff --git a/examples/agent-control-protect-cursor/scripts/setup.py b/examples/agent-control-protect-cursor/scripts/setup.py new file mode 100644 index 00000000..fdc4036f --- /dev/null +++ b/examples/agent-control-protect-cursor/scripts/setup.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python3 +"""Provision the Cursor guardrails hook — Agent Control agent + controls. + +Defines two controls on a single AC agent so the runtime hook can issue one +@control() call and let AC orchestrate: + - block-secrets-v4 (regex) — server-local match on the prompt input + - block-pii-v4 (galileo.luna2) — calls Galileo Protect with rules in + payload (stage_type=local) against the + Galileo local stage created here + +Re-running is safe; existing rows are updated in place. Run once per machine +(or whenever you change the patterns/configs in this file). + +Usage: + .venv/bin/python3 scripts/setup.py +""" + +from __future__ import annotations + +import asyncio +import json +import os +import sys +import urllib.parse +import urllib.request +import urllib.error +from pathlib import Path +from typing import Any + +from dotenv import load_dotenv + +from agent_control import AgentControlClient + +load_dotenv(Path(__file__).resolve().parent.parent / ".env") + +AGENT_NAME = os.getenv("AC_AGENT_NAME", "cursor-protect-v4") +SERVER_URL = os.getenv("AGENT_CONTROL_URL", "http://localhost:8000") +GALILEO_PROJECT = os.getenv("GALILEO_PROJECT", "protect-cursor") +GALILEO_API_KEY = os.getenv("GALILEO_API_KEY", "") +GALILEO_CONSOLE_URL = os.getenv("GALILEO_CONSOLE_URL", "https://console.galileo.ai").rstrip("/") +GALILEO_LOCAL_STAGE = os.getenv("GALILEO_PROTECT_LOCAL_STAGE", "Cursor Protect v4 (local)") + + +SECRETS_PATTERN = "|".join([ + r"AKIA[0-9A-Z]{16}", + r"ASIA[0-9A-Z]{16}", + r"gh[pousr]_[A-Za-z0-9]{36,}", + r"github_pat_[A-Za-z0-9_]{82}", + r"sk-ant-[A-Za-z0-9_\-]{40,}", + r"sk-[A-Za-z0-9]{32,}", + r"sk_(?:live|test)_[A-Za-z0-9]{24,}", + r"xox[abposr]-[A-Za-z0-9\-]{10,}", + r"eyJ[A-Za-z0-9_\-]+\.eyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+", + r"-----BEGIN (?:[A-Z ]+ )?PRIVATE KEY-----", +]) + + +SECRETS_CONTROL = { + "name": "block-secrets-v4", + "data": { + "description": "Block common secret formats in the Cursor prompt input.", + "enabled": True, + "execution": "server", + "scope": {"step_types": ["llm"], "stages": ["pre"]}, + "condition": { + "selector": {"path": "input"}, + "evaluator": { + "name": "regex", + "config": {"pattern": SECRETS_PATTERN, "flags": []}, + }, + }, + "action": {"decision": "deny"}, + "tags": ["cursor", "secrets", "input-filter", "guardrails", "sdk"], + }, +} + + +def PII_CONTROL(stage_name: str) -> dict: + return { + "name": "block-pii-v4", + "data": { + "description": "Block PII via Galileo Protect (luna2, local stage).", + "enabled": True, + "execution": "server", + "scope": {"step_types": ["llm"], "stages": ["pre"]}, + "condition": { + "selector": {"path": "input"}, + "evaluator": { + "name": "galileo.luna2", + "config": { + "stage_type": "local", + "stage_name": stage_name, + "metric": "input_pii", + "operator": "not_empty", + "target_value": 0, + "galileo_project": GALILEO_PROJECT, + "payload_field": "input", + "on_error": "allow", + "timeout_ms": 8000, + }, + }, + }, + "action": {"decision": "deny"}, + "tags": ["cursor", "pii", "luna2", "galileo-protect", "guardrails", "sdk"], + }, + } + + +# ── Galileo: ensure the local stage exists ---------------------------------- + +def _galileo_api() -> str: + return GALILEO_CONSOLE_URL.replace("console.", "api.") if "console." in GALILEO_CONSOLE_URL else GALILEO_CONSOLE_URL + + +def _gal_request(method: str, path: str, body: dict | None = None) -> tuple[int, Any]: + headers = {"Galileo-API-Key": GALILEO_API_KEY, "Content-Type": "application/json"} + data = json.dumps(body).encode() if body is not None else None + req = urllib.request.Request(f"{_galileo_api()}{path}", data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=15) as r: + return r.status, json.loads(r.read()) + except urllib.error.HTTPError as exc: + try: + return exc.code, json.loads(exc.read()) + except Exception: + return exc.code, None + + +def _resolve_galileo_project_id() -> str | None: + status, projects = _gal_request("GET", f"/projects/all?project_name={GALILEO_PROJECT}") + if status != 200 or not isinstance(projects, list) or not projects: + return None + matches = [p for p in projects if p.get("name") == GALILEO_PROJECT] + return matches[0]["id"] if matches else None + + +def ensure_galileo_local_stage() -> bool: + if not GALILEO_API_KEY: + print(" ✗ GALILEO_API_KEY not set — cannot create local stage") + return False + + pid = _resolve_galileo_project_id() + if not pid: + print(f" ✗ Galileo project {GALILEO_PROJECT!r} not found") + return False + print(f" Galileo project {GALILEO_PROJECT}: id={pid}") + + status, existing = _gal_request("GET", f"/projects/{pid}/stages?stage_name={urllib.parse.quote(GALILEO_LOCAL_STAGE)}") + if status == 200 and isinstance(existing, dict) and existing.get("id") and existing.get("type") == "local": + print(f" local stage {GALILEO_LOCAL_STAGE!r}: already exists (id={existing['id']})") + return True + + body = { + "name": GALILEO_LOCAL_STAGE, + "project_id": pid, + "description": "Local stage for the guardrails PII control — rules sent in payload.", + "type": "local", + "paused": False, + } + status, result = _gal_request("POST", f"/projects/{pid}/stages", body=body) + if status not in (200, 201): + print(f" ✗ failed to create local stage: http={status} body={result}") + return False + print(f" local stage {GALILEO_LOCAL_STAGE!r}: created (id={result.get('id')})") + return True + + +# ── AC: agent + controls ---------------------------------------------------- + +async def upsert_agent(client: AgentControlClient) -> None: + resp = await client.http_client.post( + "/api/v1/agents/initAgent", + json={ + "agent": { + "agent_name": AGENT_NAME, + "agent_description": "Cursor guardrails hook (SDK + @control + observability).", + }, + "steps": [], + }, + ) + resp.raise_for_status() + created = resp.json().get("created", False) + print(f" agent {AGENT_NAME}: {'created' if created else 'already exists'}") + + +async def upsert_control(client: AgentControlClient, spec: dict) -> int: + name = spec["name"] + resp = await client.http_client.put("/api/v1/controls", json=spec) + if resp.status_code == 409: + existing = await _find_control_id(client, name) + if existing is None: + raise RuntimeError(f"control {name!r} reported 409 but cannot be found") + upd = await client.http_client.put( + f"/api/v1/controls/{existing}/data", json={"data": spec["data"]} + ) + upd.raise_for_status() + print(f" control {name}: updated (id={existing})") + return existing + resp.raise_for_status() + control_id = resp.json()["control_id"] + print(f" control {name}: created (id={control_id})") + return control_id + + +async def _find_control_id(client: AgentControlClient, name: str) -> int | None: + resp = await client.http_client.get(f"/api/v1/agents/{AGENT_NAME}/controls") + if resp.status_code == 200: + for ctrl in resp.json().get("controls", []): + if ctrl.get("name") == name: + return ctrl.get("id") + resp = await client.http_client.get("/api/v1/controls") + if resp.status_code == 200: + for ctrl in resp.json().get("controls", []): + if ctrl.get("name") == name: + return ctrl.get("id") + return None + + +async def associate(client: AgentControlClient, control_id: int) -> None: + resp = await client.http_client.post( + f"/api/v1/agents/{AGENT_NAME}/controls/{control_id}" + ) + if resp.status_code not in (200, 201, 409): + resp.raise_for_status() + + +async def main() -> int: + print(f"Agent Control: {SERVER_URL}") + print(f"Agent: {AGENT_NAME}") + print() + + print("[1/3] Ensure Galileo local stage") + pii_ready = ensure_galileo_local_stage() + print() + + print("[2/3] Ensure AC agent + controls") + async with AgentControlClient(base_url=SERVER_URL) as client: + try: + await client.health_check() + except Exception as exc: + print(f" ✗ AC server unreachable at {SERVER_URL}: {exc}") + print(" Start it with: docker compose up -d") + return 1 + + await upsert_agent(client) + + secrets_id = await upsert_control(client, SECRETS_CONTROL) + await associate(client, secrets_id) + + if pii_ready: + pii_id = await upsert_control(client, PII_CONTROL(GALILEO_LOCAL_STAGE)) + await associate(client, pii_id) + else: + print(" block-pii-v4: skipped (Galileo local stage not ready)") + print() + + print("[3/3] Done.") + print(f" UI: http://localhost:4000") + print(f" agent '{AGENT_NAME}'") + print(f" → block-secrets-v4 (regex)") + print(f" → block-pii-v4 (galileo.luna2, stage_type=local, metric=input_pii)") + return 0 + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/examples/agent-control-protect-cursor/scripts/test_hook.py b/examples/agent-control-protect-cursor/scripts/test_hook.py new file mode 100644 index 00000000..445f74d3 --- /dev/null +++ b/examples/agent-control-protect-cursor/scripts/test_hook.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 +"""Drive the Cursor guardrails hook with a 40-prompt battery and print results. + +20 prompts target the secrets guardrail (15 with secrets that should deny, 5 +clean that should allow). 20 prompts target the PII guardrail (15 with PII +that should deny, 5 clean that should allow). +""" +import json +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +HOOK = str(ROOT / ".cursor/hooks/before_submit_protect_guardrails.py") +PY = str(ROOT / ".venv/bin/python3") + + +# Synthetic secret-shaped fixtures, assembled at runtime so source-scanning tools +# (GitHub push protection, etc.) don't flag this demo file. Values are not real +# credentials — they exist only to exercise the guardrail's regexes. +_STRIPE_LIVE = "_".join(["sk", "live", "4eC39HqLyjWDarjtT1zdp7dc"]) +_STRIPE_TEST = "_".join(["sk", "test", "4eC39HqLyjWDarjtT1zdp7dc"]) +_SLACK_BOT = "-".join(["xoxb", "1234567890", "aBcDeFgHiJkLmNoP123456"]) + + +# ── 20 prompts targeting the secrets guardrail ────────────────────────────── +SECRETS_CASES = [ + # 15 with secrets — every one of these should deny. + ("AWS access key id", + "Why is AWS rejecting AKIAIOSFODNN7EXAMPLE in production?", + "deny"), + ("AWS access key id (alt)", + "Config has AKIA1234567890ABCDEF — getting 403 from S3", + "deny"), + ("AWS STS temp credential", + "STS token ASIAQXJZK3FE9HK4LMNO not working for assumed role", + "deny"), + ("GitHub PAT", + "ghp_aBcDeF1234567890aBcDeF1234567890aBcDeF leaked into the repo", + "deny"), + ("GitHub OAuth token", + "Why is gho_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx returning 401?", + "deny"), + ("GitHub server-to-server", + "App auth failing with ghs_yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy", + "deny"), + ("GitHub user-to-server", + "Refresh ghu_zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz before next call", + "deny"), + ("GitHub fine-grained PAT", + "Token github_pat_11ABCDEFG0aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789ab is in env", + "deny"), + ("Anthropic key", + "Anthropic call failing with sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + "deny"), + ("OpenAI key", + "Why is sk-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA giving 429?", + "deny"), + ("Stripe live secret", + f"Stripe webhook failing — {_STRIPE_LIVE}", + "deny"), + ("Stripe test secret", + f"Test mode broken with {_STRIPE_TEST}", + "deny"), + ("Slack bot token", + f"{_SLACK_BOT} stopped posting", + "deny"), + ("JWT in body", + "Decode failure: eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTYifQ.signature_bytes_here", + "deny"), + ("PEM private key", + "Found this in a config -----BEGIN RSA PRIVATE KEY----- MIIEpAIBAAKCAQEA", + "deny"), + + # 5 clean — should allow. + ("clean — refactor", + "Refactor this function to remove duplicated null checks.", + "allow"), + ("clean — circuit breaker", + "Help me design a circuit breaker for downstream services.", + "allow"), + ("clean — index perf", + "Why is my postgres query slow on this composite index?", + "allow"), + ("clean — async port", + "Convert this callback-based function to async/await.", + "allow"), + ("clean — DI explainer", + "Explain how dependency injection works in TypeScript.", + "allow"), +] + + +# ── 20 prompts targeting the PII guardrail ────────────────────────────────── +PII_CASES = [ + # 15 with PII — every one of these should deny. + ("email", + "Customer alice.smith@example.com keeps getting 500s — what's wrong?", + "deny"), + ("phone + name", + "Call John Doe at +1-415-555-0182 about the bug", + "deny"), + ("SSN", + "Investigate why the row with SSN 123-45-6789 has duplicate entries", + "deny"), + ("address", + "Ship the package to 1600 Pennsylvania Avenue NW, Washington DC", + "deny"), + ("two emails", + "Loop in bob@example.com and alice@example.com on this incident", + "deny"), + ("name + DOB", + "Maria Garcia born 1985-03-12 cannot access her account", + "deny"), + ("credit card", + "Customer card 4242 4242 4242 4242 was declined yesterday", + "deny"), + ("IP address", + "User from 192.168.1.105 is hitting consistent 500s on /api", + "deny"), + ("email + phone", + "Support ticket — jane@example.com phone 555-867-5309", + "deny"), + ("SSN + name", + "David Johnson SSN 987-65-4321 cannot login since this morning", + "deny"), + ("name + address", + "Sara Lee at 742 Evergreen Terrace, Springfield IL has a billing dispute", + "deny"), + ("two phones", + "Reach the on-call at 212-555-1234 or 415-555-5678", + "deny"), + ("name + email", + "Tom Hanks (thanks@example.com) reports a missing record", + "deny"), + ("DOB + SSN", + "DOB 1990-12-25, SSN 555-44-3333 — please verify", + "deny"), + ("name + phone", + "Priya Patel at +44 20 7946 0958 can't connect from London", + "deny"), + + # 5 clean — should allow. + ("clean — refactor", + "Refactor this function to remove duplicated null checks.", + "allow"), + ("clean — JWT tests", + "Write unit tests for a JWT refresh token validator.", + "allow"), + ("clean — SQL tune", + "Help me optimize this SQL query for the orders table.", + "allow"), + ("clean — CSV dedup", + "Generate a Python script to deduplicate a CSV by column 3.", + "allow"), + ("clean — DI explainer", + "Explain how dependency injection works in TypeScript.", + "allow"), +] + + +def run(prompt: str) -> dict: + payload = json.dumps({"hook_event_name": "beforeSubmitPrompt", "prompt": prompt}) + result = subprocess.run( + [PY, HOOK], + input=payload, + capture_output=True, + text=True, + timeout=60, + cwd=str(ROOT), + ) + try: + return json.loads(result.stdout.strip().splitlines()[-1]) + except Exception: + return {"continue": None, "raw": result.stdout, "stderr": result.stderr} + + +def run_battery(label: str, cases: list) -> int: + fmt = " {icon} {name:<28} → {decision:<5}" + print(f"\n— {label} ({len(cases)} prompts: 15 wrong + 5 right) —") + fails = 0 + counts = {"deny": 0, "allow": 0} + for name, prompt, expected in cases: + result = run(prompt) + cont = result.get("continue", True) + decision = "allow" if cont else "deny" + counts[decision] = counts.get(decision, 0) + 1 + ok = decision == expected + if not ok: + fails += 1 + icon = "✅" if ok else "❌" + print(fmt.format(icon=icon, name=name, decision=decision)) + print(f" ── {len(cases)-fails}/{len(cases)} match expected | denied={counts['deny']} allowed={counts['allow']}") + return fails + + +def main() -> int: + secrets_fails = run_battery("Secrets guardrail", SECRETS_CASES) + pii_fails = run_battery("PII guardrail", PII_CASES) + total_fails = secrets_fails + pii_fails + total_cases = len(SECRETS_CASES) + len(PII_CASES) + print() + if total_fails == 0: + print(f"ALL PASS ({total_cases}/{total_cases})") + return 0 + print(f"{total_fails} FAILURE(S) ({total_cases - total_fails}/{total_cases})") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From 0a7481f6b9f54c9596835e384138b66df5d7984c Mon Sep 17 00:00:00 2001 From: karansohi Date: Fri, 8 May 2026 11:16:39 -0700 Subject: [PATCH 2/2] Update --- .../hooks/before_submit_protect_guardrails.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py b/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py index b151c059..1ffb96d4 100644 --- a/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py +++ b/examples/agent-control-protect-cursor/.cursor/hooks/before_submit_protect_guardrails.py @@ -57,11 +57,15 @@ def _redact(text: str) -> str: - if os.environ.get("CURSOR_PROTECT_LOG_RAW", "").strip().lower() in {"1", "true", "yes", "on"}: - return text - for pattern, replacement in _REDACTORS: - text = pattern.sub(replacement, text) + # Redaction temporarily disabled: redacted payloads were causing Protect to + # report "Protect not invoked" in the UI. Pass the original text through so + # Protect spans show TRIGGERED with the real prompt. return text + # if os.environ.get("CURSOR_PROTECT_LOG_RAW", "").strip().lower() in {"1", "true", "yes", "on"}: + # return text + # for pattern, replacement in _REDACTORS: + # text = pattern.sub(replacement, text) + # return text # ── Cursor I/O ---------------------------------------------------------------