diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000000..1f6f35d1f7 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,11 @@ +{ + "permissions": { + "deny": [ + "mcp__github__merge_pull_request", + "mcp__github__delete_file", + "mcp__github__fork_repository", + "mcp__github__create_repository", + "mcp__github__actions_run_trigger" + ] + } +} diff --git a/.claude/skills/dev-loop/SKILL.md b/.claude/skills/dev-loop/SKILL.md new file mode 100644 index 0000000000..6b56237e3f --- /dev/null +++ b/.claude/skills/dev-loop/SKILL.md @@ -0,0 +1,84 @@ +--- +name: dev-loop +description: Full autonomous development loop — implement, build, test, commit, push, create PR, monitor CI, fix failures until green +disable-model-invocation: true +allowed-tools: Bash(cmake *), Bash(ctest *), Bash(nproc), Bash(git add *), Bash(git commit *), Bash(git diff *), Bash(git describe *), Bash(git branch *), Bash(git status), Bash(git log *), Bash(git rev-parse *), Bash(clang-format *), Bash(sleep *), Bash(date *), Read, Write, Edit, Glob, Grep, Agent +--- + +# Dev Loop + +Complete a development task end-to-end: implement, build, test, push, create PR, monitor CI, fix failures. +Do NOT stop until CI is green or you are blocked. + +## Phase 1: Implement + +1. Read and understand the task from $ARGUMENTS +2. Explore relevant code +3. Implement the changes +4. Build: `cmake -S . -B cmake-build -DCMAKE_BUILD_TYPE=Release -DCOLLECTOR_VERSION=$(git describe --tags --abbrev=10 --long) && cmake --build cmake-build -- -j$(nproc)` + - If build fails, fix and retry +5. Test: `ctest --no-tests=error -V --test-dir cmake-build` + - If tests fail, fix and retry +6. Format: `clang-format --style=file -i ` +7. Commit: `git add` changed files, `git commit` with a descriptive message + +## Phase 2: Push and create PR + +Use the GitHub MCP server to push files and create a PR. +Do NOT use `git push` — it will fail (no SSH keys in this container). + +1. Get the current branch name and the list of changed files: + - `git branch --show-current` for the branch + - `git diff --name-only origin/HEAD..HEAD` for changed files +2. Use the GitHub MCP `push_files` tool to push the changed files directly to + the remote branch. This creates a commit via the GitHub API using the file + contents from your local workspace — it does not sync git history. + - owner: stackrox, repo: collector, branch: + - Read each changed file and include its content + - Provide a commit message +3. Search for an open PR for this branch via GitHub MCP +4. If no PR exists, create a draft PR via GitHub MCP + +## Phase 3: Monitor CI + +Loop until all checks pass or blocked (max 6 cycles, ~3 hours): + +1. Wait 10 minutes: `sleep 600` +2. Check CI status via GitHub MCP (PR checks, workflow runs) +3. Update PR body with an `## Agent Status` section: + ``` + ## Agent Status + **Last updated:** <`date -u +"%Y-%m-%d %H:%M UTC"`> + **CI cycle:** N of 6 + **Status:** PENDING | PASSED | FIXED | FLAKE | BLOCKED + **Details:** + ``` +4. Evaluate: + - **All checks passed** → update PR body, report success, stop + - **Still running** → continue loop + - **Failed** → + - Get job logs via GitHub MCP + - Diagnose: build error, test assertion, lint, infra flake + - If fixable: fix → build → test → push changed files via MCP → continue + - If infra flake: note as FLAKE, continue + - If not fixable: update PR body, report BLOCKED, stop + +## Phase 4: Check PR comments + +Before each CI cycle, check if there are new PR review comments via GitHub MCP. +If a reviewer left feedback: +- Address the feedback (edit code, fix issues) +- Build and test +- Push changed files via MCP +- Note in the Agent Status section what feedback was addressed + +## Completion + +Print summary: +``` +STATUS: PASSED | BLOCKED | TIMEOUT +Branch: +PR: +Cycles: N +Changes: +``` diff --git a/.claude/skills/task/SKILL.md b/.claude/skills/task/SKILL.md new file mode 100644 index 0000000000..a3ec9779ef --- /dev/null +++ b/.claude/skills/task/SKILL.md @@ -0,0 +1,43 @@ +--- +name: task +description: Implement a change — edit code, build, test, format, commit locally. No push. +disable-model-invocation: true +allowed-tools: Bash(cmake *), Bash(ctest *), Bash(nproc), Bash(git add *), Bash(git commit *), Bash(git diff *), Bash(git describe *), Bash(git branch *), Bash(git status), Bash(clang-format *), Read, Write, Edit, Glob, Grep, Agent +--- + +# Task + +Implement a change locally: edit, build, test, format, commit. +Do NOT push or create PRs — use /watch-ci for that. + +## Steps + +1. Read and understand the task from $ARGUMENTS +2. Explore relevant code in the repository +3. Implement the changes +4. Build: + - `cmake -S . -B cmake-build -DCMAKE_BUILD_TYPE=Release -DCOLLECTOR_VERSION=$(git describe --tags --abbrev=10 --long) && cmake --build cmake-build -- -j$(nproc)` + - If build fails, fix and retry +5. Run unit tests: + - `ctest --no-tests=error -V --test-dir cmake-build` + - If tests fail, fix and retry +6. Format changed C++ files: + - `clang-format --style=file -i ` +7. Commit: + - `git add` the changed files + - `git commit` with a descriptive message + +## STOP here. Report and wait. + +Print this summary and then STOP. Do not continue with any other actions. + +``` +TASK COMPLETE +Branch: +Commit: +Files changed: +Tests: +``` + +The user will review and decide whether to run /watch-ci. +Do NOT push, create branches, or create PRs. diff --git a/.claude/skills/watch-ci/SKILL.md b/.claude/skills/watch-ci/SKILL.md new file mode 100644 index 0000000000..b9e6cc8772 --- /dev/null +++ b/.claude/skills/watch-ci/SKILL.md @@ -0,0 +1,68 @@ +--- +name: watch-ci +description: Push files to existing remote branch via GitHub MCP, create PR if needed, monitor CI, fix failures until green +disable-model-invocation: true +allowed-tools: Bash(cmake *), Bash(ctest *), Bash(nproc), Bash(git add *), Bash(git commit *), Bash(git diff *), Bash(git describe *), Bash(git branch *), Bash(git status), Bash(git log *), Bash(git rev-parse *), Bash(clang-format *), Bash(sleep *), Bash(date *), Read, Write, Edit, Glob, Grep +--- + +# Watch CI + +Push changed files via the GitHub MCP server, create PR if needed, and monitor CI until green. +Do NOT use `git push` — it will fail (no SSH keys in this container). + +## How pushing works + +Use the GitHub MCP `push_files` tool to send file contents directly to the remote +branch via the GitHub API. This does NOT sync local git history — it creates a new +commit on the remote with the file contents you provide. + +1. Get the branch name: `git branch --show-current` +2. Get changed files: `git diff --name-only origin/HEAD..HEAD` +3. Read each changed file's content +4. Call `push_files` with owner: stackrox, repo: collector, branch, files, and commit message + +## Steps + +1. **Push** changed files: + - Use the GitHub MCP `push_files` tool as described above + - If no files have changed since last push, skip + +2. **Find or create PR**: + - Use the GitHub MCP server to search for an open PR for this branch + - If no PR exists, create a draft PR via the GitHub MCP server + +3. **Monitor CI loop** (repeat until all checks pass or blocked): + - Wait 10 minutes: `sleep 600` + - Use the GitHub MCP server to get PR check status and workflow runs + - Update PR body with an `## Agent Status` section: + ``` + ## Agent Status + **Last updated:** <`date -u +"%Y-%m-%d %H:%M UTC"`> + **CI cycle:** N of 6 + **Status:** PENDING | PASSED | FIXED | FLAKE | BLOCKED + **Details:** + ``` + - Evaluate: + - **All checks passed** → update PR body, report success and stop + - **Checks still running** → report progress, continue loop + - **Checks failed** → + - Get job logs via the GitHub MCP server + - Diagnose: + - Build failure: read error, fix code + - Unit test failure: read assertion, fix code + - Lint failure: run `clang-format --style=file -i` + - Integration test infra flake (VM timeout, network): report as flake, continue + - Integration test real failure: analyze and fix code + - If fixable: fix → build → test → push changed files via MCP → continue loop + - If not fixable: update PR body, report diagnosis and stop + +4. **Safety limits**: + - Maximum 6 CI cycles (about 3 hours of monitoring) + - If exceeded, update PR body and stop + +5. **Summary**: end with a status line: + - `PASSED` — all checks green + - `PENDING` — checks still running + - `FIXED` — failure diagnosed and fix pushed + - `FLAKE` — infra failure, not a code issue + - `BLOCKED` — failure requires human intervention diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000000..c27184110d --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,82 @@ +# Collector development container +# Based on the collector-builder image which has all C++ dependencies pre-installed. +# Adds Claude Code, Go, and developer tooling for agent-driven development. +# +# Build environment: CentOS Stream 10 with clang, llvm, cmake, grpc, protobuf, +# libbpf, bpftool, and all other collector dependencies. + +ARG COLLECTOR_BUILDER_TAG=master +FROM quay.io/stackrox-io/collector-builder:${COLLECTOR_BUILDER_TAG} + +# Install developer tooling not in the builder image +# Note: git, findutils, which, openssh-clients already in builder +# bubblewrap: Claude Code uses this for built-in command sandboxing +RUN dnf install -y \ + bubblewrap \ + clang-tools-extra \ + jq \ + socat \ + zsh \ + procps-ng \ + sudo \ + python3-pip \ + iptables \ + ipset \ + && dnf clean all + +# Determine architecture strings used by various download URLs +# uname -m gives aarch64 or x86_64 +# Go uses arm64/amd64, ripgrep/fd use aarch64/x86_64 +RUN ARCH=$(uname -m) \ + && GOARCH=$([ "$ARCH" = "aarch64" ] && echo "arm64" || echo "amd64") \ + # Install Go + && curl -fsSL "https://go.dev/dl/go1.23.6.linux-${GOARCH}.tar.gz" | tar -C /usr/local -xzf - \ + # Install ripgrep + && curl -fsSL "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-${ARCH}-unknown-linux-gnu.tar.gz" \ + | tar -xzf - --strip-components=1 -C /usr/local/bin "ripgrep-14.1.1-${ARCH}-unknown-linux-gnu/rg" \ + # Install fd + && curl -fsSL "https://github.com/sharkdp/fd/releases/download/v10.2.0/fd-v10.2.0-${ARCH}-unknown-linux-gnu.tar.gz" \ + | tar -xzf - --strip-components=1 -C /usr/local/bin "fd-v10.2.0-${ARCH}-unknown-linux-gnu/fd" + +ENV PATH="/usr/local/go/bin:${PATH}" +ENV GOPATH="/home/dev/go" +ENV PATH="${GOPATH}/bin:${PATH}" + +# Install Node.js (needed for Claude Code) +ARG NODE_VERSION=22 +RUN curl -fsSL https://rpm.nodesource.com/setup_${NODE_VERSION}.x | bash - \ + && dnf install -y nodejs \ + && dnf clean all + +# Install Claude Code +RUN npm install -g @anthropic-ai/claude-code + +# Install gcloud CLI (for Vertex AI auth and GCP VM management) +RUN curl -fsSL https://sdk.cloud.google.com > /tmp/install-gcloud.sh \ + && bash /tmp/install-gcloud.sh --disable-prompts --install-dir=/opt \ + && rm /tmp/install-gcloud.sh +ENV PATH="/opt/google-cloud-sdk/bin:${PATH}" + +# Create non-root dev user with passwordless sudo +RUN useradd -m -s /bin/zsh dev \ + && echo "dev ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers.d/dev \ + && mkdir -p /home/dev/.claude/debug /home/dev/.commandhistory \ + && chown -R dev:dev /home/dev/.claude /home/dev/.commandhistory + +# Install ansible for VM-based testing (optional, lightweight) +RUN pip3 install ansible-core + +# Firewall script for network isolation (optional, used with --dangerously-skip-permissions) +COPY --chmod=755 init-firewall.sh /usr/local/bin/init-firewall.sh +COPY --chmod=755 entrypoint.sh /usr/local/bin/entrypoint.sh + +USER dev +WORKDIR /workspace + +# Persist shell history and Claude state across rebuilds (volumes in devcontainer.json) +ENV HISTFILE=/home/dev/.commandhistory/.zsh_history + +ENV SHELL=/bin/zsh +ENV DEVCONTAINER=true + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 0000000000..45c525f594 --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,140 @@ +# Collector Devcontainer + +Sandboxed Claude Code environment for developing collector. The agent works +in an isolated git worktree inside a container with no SSH keys — code is +pushed via GitHub MCP only. + +## Quick Start + +```bash +# 1. Build the image (one time) +docker build --platform linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + -t collector-dev:test -f .devcontainer/Dockerfile .devcontainer/ + +# 2. Set environment (add to shell profile) +export CLAUDE_CODE_USE_VERTEX=1 +export GOOGLE_CLOUD_PROJECT= +export GOOGLE_CLOUD_LOCATION=us-east5 +export ANTHROPIC_VERTEX_PROJECT_ID= +export GITHUB_TOKEN=github_pat_... + +# 3. Authenticate GCP +gcloud auth login +gcloud auth application-default login + +# 4. Run +.devcontainer/run.sh --interactive # manual control +.devcontainer/run.sh "add unit tests for ExternalIPsConfig" # autonomous +``` + +## Modes + +| Command | Description | +|---------|-------------| +| `run.sh "task"` | Autonomous `/dev-loop`: implement → push → PR → CI loop | +| `run.sh "/skill args"` | Run a specific skill | +| `run.sh --interactive` | Worktree + TUI, you drive | +| `run.sh --local` | Edit working tree directly, no worktree | +| `run.sh --shell` | Shell into the container | + +### Options + +| Flag | Description | +|------|-------------| +| `--branch ` | Custom branch name (default: `claude/agent-`) | +| `--no-tui` | Stream JSON output instead of TUI | +| `--debug` | Verbose MCP/auth logging | + +## Skills + +| Skill | Purpose | +|-------|---------| +| `/task` | Implement, build, test, format, commit. Stops after commit. | +| `/watch-ci` | Push via MCP, create PR, monitor CI, fix failures until green. | +| `/dev-loop` | Full cycle: `/task` then `/watch-ci` in one run. | + +## GitHub Token Setup + +Create a **fine-grained Personal Access Token** at: +https://github.com/settings/tokens?type=beta + +### Repository access + +Select the repositories the agent should work on (e.g., `stackrox/collector`). + +### Required permissions + +| Permission | Access | Why | +|-----------|--------|-----| +| **Contents** | Read and write | Push files to branches via MCP | +| **Pull requests** | Read and write | Create/update PRs, read PR status | +| **Actions** | Read and write | List workflow runs, get job logs | +| **Commit statuses** | Read-only | Check CI check status | +| **Metadata** | Read-only | Required by GitHub for all PATs | + +### Optional permissions + +| Permission | Access | Why | +|-----------|--------|-----| +| Issues | Read-only | Read issue context if task references one | +| Discussions | Read-only | Read discussion context | + +### Permissions NOT needed + +Do not grant these — they are denied in `.claude/settings.json`: + +- ~~Administration~~ — agent should not manage repo settings +- ~~Merge queues~~ — agent cannot merge PRs +- ~~Pages~~ — not relevant +- ~~Environments~~ — not relevant +- ~~Secrets~~ — agent should not access repo secrets + +## Security Model + +| Layer | Protection | +|-------|-----------| +| Container isolation | Agent can't access host filesystem | +| No SSH keys | `git push` fails — only MCP `push_files` works | +| Read-only mounts | gcloud credentials, gitconfig can't be modified | +| MCP deny rules | merge, delete, fork, create-repo, trigger-actions blocked | +| Worktree isolation | Agent works on a separate branch, can't touch your checkout | +| .git mount scoping | Worktree git dir writable, shared objects read-only* | + +*Note: `.git` is currently mounted read-write due to submodule init requirements. +The agent can't push (no SSH keys) so risk is limited to local git state. + +## Environment Variables + +| Variable | Required | Description | +|----------|----------|-------------| +| `GITHUB_TOKEN` | Yes (for MCP) | Fine-grained PAT (see above) | +| `CLAUDE_CODE_USE_VERTEX` | Yes | Set to `1` | +| `GOOGLE_CLOUD_PROJECT` | Yes | GCP project ID | +| `GOOGLE_CLOUD_LOCATION` | Yes | Vertex AI region (e.g., `us-east5`) | +| `ANTHROPIC_VERTEX_PROJECT_ID` | Yes | Usually same as `GOOGLE_CLOUD_PROJECT` | +| `COLLECTOR_DEV_IMAGE` | No | Docker image name (default: `collector-dev:test`) | + +## Worktree Management + +Worktrees are created in `/tmp/collector-worktrees/` and cleaned up on exit. + +```bash +# List active worktrees +git worktree list + +# Clean up stale worktrees +git worktree prune + +# Remove a specific worktree +git worktree remove /tmp/collector-worktrees/ +``` + +## Rebuilding the Image + +```bash +docker build --platform linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') \ + -t collector-dev:test -f .devcontainer/Dockerfile .devcontainer/ + +# Clear cached Claude state (MCP registrations, theme, etc.) +docker volume rm collector-dev-claude +``` diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000000..863b8976da --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,66 @@ +{ + "name": "collector-dev", + "build": { + "dockerfile": "Dockerfile", + "args": { + "COLLECTOR_BUILDER_TAG": "master" + } + }, + "containerUser": "dev", + "workspaceMount": "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=delegated", + "workspaceFolder": "/workspace", + + "mounts": [ + "source=collector-dev-history,target=/home/dev/.commandhistory,type=volume", + "source=collector-dev-claude,target=/home/dev/.claude,type=volume", + "source=${localEnv:HOME}/.gitconfig,target=/home/dev/.gitconfig,type=bind,readonly", + "source=${localEnv:HOME}/.config/gcloud,target=/home/dev/.config/gcloud,type=bind,readonly", + "source=${localWorkspaceFolder}/.devcontainer,target=/workspace/.devcontainer,type=bind,readonly" + ], + + "runArgs": [ + "--init" + ], + + "customizations": { + "vscode": { + "extensions": [ + "llvm-vs-code-extensions.vscode-clangd", + "ms-vscode.cmake-tools", + "golang.go", + "ms-python.python" + ], + "settings": { + "cmake.sourceDirectory": "${workspaceFolder}", + "cmake.buildDirectory": "${workspaceFolder}/cmake-build/${buildType}", + "clangd.path": "/usr/bin/clangd", + "files.associations": { + "*.bpf.c": "c", + "*.skel.h": "c" + } + } + } + }, + + "postCreateCommand": "/usr/local/bin/init-firewall.sh || true", + + "containerEnv": { + "DEVCONTAINER": "true", + "NPM_CONFIG_IGNORE_SCRIPTS": "true", + "NPM_CONFIG_AUDIT": "true", + "NPM_CONFIG_FUND": "false", + "PYTHONDONTWRITEBYTECODE": "1" + }, + + "remoteEnv": { + "COLLECTOR_BUILDER_TAG": "master", + "CMAKE_BUILD_TYPE": "Release", + "CLOUDSDK_CONFIG": "/home/dev/.config/gcloud", + "GOOGLE_APPLICATION_CREDENTIALS": "/home/dev/.config/gcloud/application_default_credentials.json", + "CLAUDE_CODE_USE_VERTEX": "${localEnv:CLAUDE_CODE_USE_VERTEX}", + "GOOGLE_CLOUD_PROJECT": "${localEnv:GOOGLE_CLOUD_PROJECT}", + "GOOGLE_CLOUD_LOCATION": "${localEnv:GOOGLE_CLOUD_LOCATION}", + "ANTHROPIC_VERTEX_PROJECT_ID": "${localEnv:ANTHROPIC_VERTEX_PROJECT_ID}", + "GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}" + } +} diff --git a/.devcontainer/entrypoint.sh b/.devcontainer/entrypoint.sh new file mode 100755 index 0000000000..2531b727ba --- /dev/null +++ b/.devcontainer/entrypoint.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Ensure Claude Code directories exist (volumes may mount as empty) +mkdir -p /home/dev/.claude/debug /home/dev/.commandhistory + +# Set defaults so Claude Code doesn't prompt on startup +claude config set --global theme dark 2> /dev/null || true +claude config set --global verbose false 2> /dev/null || true + +# Register GitHub MCP server if token is available +if [[ -n "${GITHUB_TOKEN:-}" ]]; then + if ! claude mcp add-json github \ + '{"type":"http","url":"https://api.githubcopilot.com/mcp","headers":{"Authorization":"Bearer '"$GITHUB_TOKEN"'","X-MCP-Toolsets":"context,repos,pull_requests,issues,actions,git"}}' \ + --scope user 2> /dev/null; then + echo "WARNING: Failed to register GitHub MCP server" >&2 + fi +else + echo "NOTE: GITHUB_TOKEN not set — GitHub MCP tools unavailable" >&2 +fi + +exec "$@" diff --git a/.devcontainer/init-firewall.sh b/.devcontainer/init-firewall.sh new file mode 100644 index 0000000000..b518fe41fd --- /dev/null +++ b/.devcontainer/init-firewall.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Optional network firewall for use with --dangerously-skip-permissions mode. +# Restricts outbound traffic to only necessary services. +# Requires: --cap-add=NET_ADMIN on the container (not set by default). +# To enable: add "--cap-add=NET_ADMIN" to runArgs in devcontainer.json. + +set -euo pipefail + +if ! command -v iptables &> /dev/null; then + echo "iptables not available, skipping firewall setup" + exit 0 +fi + +if ! iptables -L &> /dev/null 2>&1; then + echo "No NET_ADMIN capability, skipping firewall setup" + exit 0 +fi + +echo "Configuring network firewall..." + +# Allow loopback +iptables -A OUTPUT -o lo -j ACCEPT + +# Allow established connections +iptables -A OUTPUT -m state --state ESTABLISHED,RELATED -j ACCEPT + +# Allow DNS +iptables -A OUTPUT -p udp --dport 53 -j ACCEPT +iptables -A OUTPUT -p tcp --dport 53 -j ACCEPT + +# Allow GCP / Vertex AI (Claude Code backend + gcloud CLI + VM management) +# Vertex AI endpoints: https://{REGION}-aiplatform.googleapis.com +iptables -A OUTPUT -d oauth2.googleapis.com -j ACCEPT +iptables -A OUTPUT -d accounts.google.com -j ACCEPT +iptables -A OUTPUT -d www.googleapis.com -j ACCEPT +iptables -A OUTPUT -d storage.googleapis.com -j ACCEPT +iptables -A OUTPUT -d compute.googleapis.com -j ACCEPT +iptables -A OUTPUT -d cloudresourcemanager.googleapis.com -j ACCEPT +# Vertex AI regions (allow all *.googleapis.com via port 443) +iptables -A OUTPUT -p tcp --dport 443 -d us-central1-aiplatform.googleapis.com -j ACCEPT +iptables -A OUTPUT -p tcp --dport 443 -d us-east5-aiplatform.googleapis.com -j ACCEPT +iptables -A OUTPUT -p tcp --dport 443 -d europe-west1-aiplatform.googleapis.com -j ACCEPT +iptables -A OUTPUT -d metadata.google.internal -j ACCEPT + +# Allow Claude API (direct Anthropic, if used alongside or instead of Vertex) +iptables -A OUTPUT -d api.anthropic.com -j ACCEPT +iptables -A OUTPUT -d statsig.anthropic.com -j ACCEPT +iptables -A OUTPUT -d sentry.io -j ACCEPT + +# Allow GitHub (for git push, gh CLI, API) +iptables -A OUTPUT -d github.com -j ACCEPT +iptables -A OUTPUT -d api.github.com -j ACCEPT + +# Allow container registries +iptables -A OUTPUT -d quay.io -j ACCEPT +iptables -A OUTPUT -d cdn.quay.io -j ACCEPT +iptables -A OUTPUT -d cdn01.quay.io -j ACCEPT +iptables -A OUTPUT -d cdn02.quay.io -j ACCEPT +iptables -A OUTPUT -d cdn03.quay.io -j ACCEPT +iptables -A OUTPUT -d registry.access.redhat.com -j ACCEPT + +# Allow SSH (for GCP VM access during integration testing) +iptables -A OUTPUT -p tcp --dport 22 -j ACCEPT + +# Allow npm registry +iptables -A OUTPUT -d registry.npmjs.org -j ACCEPT + +# Allow Go module proxy +iptables -A OUTPUT -d proxy.golang.org -j ACCEPT +iptables -A OUTPUT -d sum.golang.org -j ACCEPT + +# Drop everything else +iptables -A OUTPUT -j DROP + +echo "Firewall configured." diff --git a/.devcontainer/mcp_protector_plan.md b/.devcontainer/mcp_protector_plan.md new file mode 100644 index 0000000000..08cd5279b0 --- /dev/null +++ b/.devcontainer/mcp_protector_plan.md @@ -0,0 +1,126 @@ +# MCP Security Proxy Plan + +Security proxy options for protecting Claude Code ↔ GitHub MCP communication. + +## Option 1: mcp-watchdog (Recommended) + +[bountyyfi/mcp-watchdog](https://github.com/bountyyfi/mcp-watchdog) — lightweight, +pattern-based security proxy. No ML models required. 273+ tests. + +### What it does + +- **Credential redaction** — catches 30+ secret patterns (GitHub PATs, AWS keys, JWTs, + etc.) in MCP responses. Prevents token leakage via PR bodies, commit messages, CI logs. +- **Prompt injection detection** — `` tag injection, role injection markers, + SANDWORM-style instructions, homoglyph evasion, HTML-encoded variants. +- **Tool integrity (rug pull detection)** — hashes tool definitions, alerts on schema changes. +- **ANSI/Unicode sanitization** — strips zero-width chars, bidirectional overrides, + escape sequences that hide instructions from users. +- **Command/SQL/SSRF injection** — shell metacharacters, reverse shell patterns, + cloud metadata access (AWS IMDS, GCP, Azure). +- **Filesystem scope enforcement** — blocks writes to `.git/config`, `.ssh/`, `.aws/`. +- **Rate limiting** — consent fatigue protection, notification injection blocking. +- **Cross-server flow tracking** — detects token propagation between MCP servers. + +### Dependencies + +- Python 3.10+ (already in our container) +- `pip install mcp-watchdog` — core is pattern matching + entropy, no ML +- Optional `[semantic]` extra — adds Claude Haiku classifier (opt-in, not needed) +- Optional `[filesystem]` extra — adds inotify/FSEvents monitoring + +### Integration + +```bash +# In entrypoint.sh, wrap the GitHub MCP server: +claude mcp add-json github \ + '{"command":"mcp-watchdog","args":["--verbose","--url","https://api.githubcopilot.com/mcp","--headers","Authorization: Bearer TOKEN","--headers","X-MCP-Toolsets: repos,pull_requests,actions"]}' +``` + +### Open questions + +- [ ] Does `--url` mode support custom headers for HTTP upstream? +- [ ] Does `X-MCP-Toolsets` pass through the proxy correctly? +- [ ] Where does watchdog store tool integrity hashes? (needs persistence in volume) +- [ ] What's the per-request latency overhead for large CI log responses? +- [ ] How does `--verbose` output surface in headless/stream-json mode? + +### Install in Dockerfile + +```dockerfile +RUN pip3 install mcp-watchdog +``` + +Estimated size: ~10-20MB (pattern matching only, no ML). + +--- + +## Option 2: mcp-context-protector (Heavy) + +[trailofbits/mcp-context-protector](https://github.com/trailofbits/mcp-context-protector) — +Trail of Bits security wrapper with ML-based guardrails. + +### What it does + +- **TOFU pinning** — records tool definitions on first use, alerts on changes. +- **ANSI sanitization** — strips escape sequences. +- **LlamaFirewall guardrails** — ML-based prompt injection detection in responses. +- **Quarantine** — flags suspicious responses for manual review. + +### Why not (for now) + +- **LlamaFirewall is a hard dependency** — `llamafirewall>=1.0.3` in pyproject.toml, + not optional. Pulls in PyTorch, transformers, huggingface_hub, semgrep. +- **Adds ~3GB** to the container image. +- **Heavy per-request cost** — ML inference on every MCP response. +- **Better suited for centralized deployment** (shared proxy for team) rather + than per-devcontainer. + +### When to reconsider + +- If Trail of Bits makes LlamaFirewall optional (`pip install mcp-context-protector[guardrails]`) +- If deploying a shared MCP proxy for the whole team (not per-container) +- If prompt injection via CI logs becomes a demonstrated threat (not just theoretical) + +--- + +## Option 3: open-mcp-guardrails + +[interactive-inc/open-mcp-guardrails](https://github.com/interactive-inc/open-mcp-guardrails) — +policy-based guardrails proxy. Early stage (0 stars). + +- PII leak detection +- Secret exposure prevention +- Prompt injection blocking +- Policy-based access control + +Too early to evaluate. Worth watching. + +--- + +## Threat Model + +| Threat | No proxy | mcp-watchdog | mcp-context-protector | +|--------|----------|-------------|----------------------| +| Credential leak in MCP response | Unmitigated | 30+ patterns redacted | Guardrail may detect | +| Prompt injection in CI logs | Unmitigated | 70+ patterns blocked | ML-based detection | +| Tool definition change (rug pull) | Undetected | Hash-based detection | TOFU pinning | +| ANSI/Unicode hidden instructions | Unmitigated | Stripped | Stripped | +| SSRF to cloud metadata | Unmitigated | Blocked | Not covered | +| Command injection via MCP | Unmitigated | Shell/SQL patterns blocked | Not covered | +| Cross-server token propagation | Unmitigated | Flow tracking | Not covered | + +## Recommendation + +**Start with mcp-watchdog.** It covers more attack vectors than mcp-context-protector, +has no ML dependencies, and is designed for exactly our use case (wrapping MCP servers +for AI coding assistants). The credential redaction alone justifies the ~10MB install. + +### Implementation steps + +1. Test `mcp-watchdog --url` with custom headers locally +2. Add `pip3 install mcp-watchdog` to Dockerfile +3. Update entrypoint.sh to wrap GitHub MCP with watchdog +4. Verify `X-MCP-Toolsets` header passes through +5. Configure watchdog state persistence in `collector-dev-claude` volume +6. Test with `/watch-ci` to ensure CI log scanning works diff --git a/.devcontainer/run.sh b/.devcontainer/run.sh new file mode 100755 index 0000000000..b4dabc79e6 --- /dev/null +++ b/.devcontainer/run.sh @@ -0,0 +1,274 @@ +#!/usr/bin/env bash +# Launch Claude Code in the collector devcontainer with a task. +# +# Usage: +# .devcontainer/run.sh "task description" Autonomous: /task then /watch-ci +# .devcontainer/run.sh --interactive Worktree + TUI +# .devcontainer/run.sh --local ["task"] Edit working tree directly +# .devcontainer/run.sh --shell Shell into container +# +# Options: +# --branch Branch name (default: claude/agent-) +# --no-tui Stream JSON instead of TUI +# --debug Verbose MCP/auth logging +# +# Prerequisites: +# - Docker +# - gcloud auth login && gcloud auth application-default login +# - CLAUDE_CODE_USE_VERTEX=1 and related env vars (see CLAUDE.md) +# - GITHUB_TOKEN for GitHub MCP (PR creation, CI status) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +IMAGE="${COLLECTOR_DEV_IMAGE:-collector-dev:test}" +WORKTREE_BASE="/tmp/collector-worktrees" +DEBUG=false +NO_TUI=false +BRANCH_NAME="" +ACTIVE_WORKTREE="" + +# Parse global flags +ARGS=() +for arg in "$@"; do + case "$arg" in + --debug) DEBUG=true ;; + --no-tui) NO_TUI=true ;; + --branch=*) BRANCH_NAME="${arg#--branch=}" ;; + --branch) BRANCH_NAME="__NEXT__" ;; + *) + if [[ "$BRANCH_NAME" == "__NEXT__" ]]; then + BRANCH_NAME="$arg" + else + ARGS+=("$arg") + fi + ;; + esac +done +set -- "${ARGS[@]+"${ARGS[@]}"}" + +CLAUDE_CMD=(claude --dangerously-skip-permissions) + +if [[ "$DEBUG" == "true" ]]; then + CLAUDE_CMD+=(--debug) +fi + +if [[ "$NO_TUI" == "true" ]]; then + CLAUDE_CMD+=(--output-format stream-json --verbose) +fi + +# --- Preflight checks --- +check_docker() { + if ! command -v docker &> /dev/null; then + echo "ERROR: docker not found." >&2 + exit 1 + fi + if ! docker info &> /dev/null 2>&1; then + echo "ERROR: Docker daemon not running." >&2 + exit 1 + fi +} + +check_image() { + if ! docker image inspect "$IMAGE" &> /dev/null 2>&1; then + echo "ERROR: Image '$IMAGE' not found. Build with:" >&2 + echo " docker build --platform linux/$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') -t $IMAGE -f .devcontainer/Dockerfile .devcontainer/" >&2 + exit 1 + fi +} + +check_gcloud() { + if [[ ! -f "$HOME/.config/gcloud/application_default_credentials.json" ]]; then + echo "ERROR: Run: gcloud auth application-default login" >&2 + exit 1 + fi +} + +check_vertex_env() { + local missing=() + [[ -z "${CLAUDE_CODE_USE_VERTEX:-}" ]] && missing+=(CLAUDE_CODE_USE_VERTEX) + [[ -z "${GOOGLE_CLOUD_PROJECT:-}" ]] && missing+=(GOOGLE_CLOUD_PROJECT) + [[ -z "${GOOGLE_CLOUD_LOCATION:-}" ]] && missing+=(GOOGLE_CLOUD_LOCATION) + if [[ ${#missing[@]} -gt 0 ]]; then + echo "ERROR: Missing env vars: ${missing[*]} (see CLAUDE.md)" >&2 + exit 1 + fi +} + +preflight() { + check_docker + check_image + check_gcloud + check_vertex_env + if [[ -z "${GITHUB_TOKEN:-}" ]]; then + echo "WARNING: GITHUB_TOKEN not set. GitHub MCP (PR creation, CI) will not work." >&2 + fi +} + +# --- Worktree --- +setup_worktree() { + local branch + if [[ -n "$BRANCH_NAME" ]]; then + branch="$BRANCH_NAME" + else + branch="claude/agent-$(date +%s)-$$" + fi + local safe_name="${branch//\//-}" + local worktree_dir="${WORKTREE_BASE}/${safe_name}" + + mkdir -p "$WORKTREE_BASE" + git -C "$REPO_ROOT" worktree add -b "$branch" "$worktree_dir" HEAD > /dev/null 2>&1 + + # Make worktree git dir writable by container user (different uid) + local worktree_git_name + worktree_git_name=$(basename "$worktree_dir") + chmod -R a+rwX "$REPO_ROOT/.git/worktrees/$worktree_git_name" + + echo "Initializing submodules..." >&2 + git -C "$worktree_dir" submodule update --init --depth 1 \ + falcosecurity-libs \ + collector/proto/third_party/stackrox \ + 2>&1 | sed 's/^/ /' >&2 + + echo "$worktree_dir" +} + +cleanup_worktree() { + local worktree_dir="$1" + if [[ -d "$worktree_dir" ]]; then + local branch + branch=$(git -C "$worktree_dir" branch --show-current 2> /dev/null || true) + git -C "$REPO_ROOT" worktree remove --force "$worktree_dir" 2> /dev/null || true + if [[ -n "$branch" ]]; then + if ! git -C "$REPO_ROOT" config "branch.${branch}.remote" &> /dev/null; then + git -C "$REPO_ROOT" branch -D "$branch" 2> /dev/null || true + fi + fi + fi +} + +on_exit() { + if [[ -n "$ACTIVE_WORKTREE" ]]; then + cleanup_worktree "$ACTIVE_WORKTREE" + fi +} + +# --- Docker --- +build_docker_args() { + local workspace="$1" + DOCKER_ARGS=( + --rm + -v "$workspace:/workspace" + -v "$HOME/.config/gcloud:/home/dev/.config/gcloud:ro" + -v "$HOME/.gitconfig:/home/dev/.gitconfig:ro" + -v "collector-dev-claude:/home/dev/.claude" + -e CLOUDSDK_CONFIG=/home/dev/.config/gcloud + -e GOOGLE_APPLICATION_CREDENTIALS=/home/dev/.config/gcloud/application_default_credentials.json + -w /workspace + ) + + # Mount .git so worktree resolves (agent can't push — no SSH keys) + DOCKER_ARGS+=(-v "$REPO_ROOT/.git:$REPO_ROOT/.git") + + for var in CLAUDE_CODE_USE_VERTEX GOOGLE_CLOUD_PROJECT GOOGLE_CLOUD_LOCATION ANTHROPIC_VERTEX_PROJECT_ID GITHUB_TOKEN; do + if [[ -n "${!var:-}" ]]; then + DOCKER_ARGS+=(-e "$var=${!var}") + fi + done +} + +# --- Main --- +case "${1:-}" in + --interactive | -i) + preflight + ACTIVE_WORKTREE=$(setup_worktree) + trap on_exit EXIT + BRANCH=$(git -C "$ACTIVE_WORKTREE" branch --show-current) + echo "Working in isolated worktree: $ACTIVE_WORKTREE" + echo "Branch: $BRANCH" + build_docker_args "$ACTIVE_WORKTREE" + docker run -it "${DOCKER_ARGS[@]}" "$IMAGE" "${CLAUDE_CMD[@]}" + ;; + + --local | -l) + shift + if [[ -n "$BRANCH_NAME" ]]; then + echo "ERROR: --branch cannot be used with --local" >&2 + exit 1 + fi + preflight + build_docker_args "$REPO_ROOT" + if [[ -z "${1:-}" ]]; then + docker run -it "${DOCKER_ARGS[@]}" "$IMAGE" "${CLAUDE_CMD[@]}" + else + docker run -it "${DOCKER_ARGS[@]}" "$IMAGE" "${CLAUDE_CMD[@]}" -p "$*" + fi + ;; + + --shell | -s) + check_docker + check_image + ACTIVE_WORKTREE=$(setup_worktree) + trap on_exit EXIT + echo "Working in isolated worktree: $ACTIVE_WORKTREE" + build_docker_args "$ACTIVE_WORKTREE" + docker run -it "${DOCKER_ARGS[@]}" "$IMAGE" zsh + ;; + + "" | --help | -h) + cat << USAGE +Usage: + $0 "task" Run /dev-loop with TUI (implement → PR → CI green) + $0 "/skill args" Run a specific skill with TUI + $0 --interactive Worktree + TUI (no task, manual control) + $0 --local ["task"] Edit working tree directly, TUI + $0 --shell Shell into the container + +Options: + --branch Branch name (default: claude/agent-) + --no-tui Stream JSON output instead of TUI + --debug Verbose MCP/auth logging + +Environment: + COLLECTOR_DEV_IMAGE Docker image (default: collector-dev:test) + GITHUB_TOKEN Fine-grained PAT for GitHub MCP + CLAUDE_CODE_USE_VERTEX=1 Enable Vertex AI + GOOGLE_CLOUD_PROJECT GCP project ID + GOOGLE_CLOUD_LOCATION Vertex AI region (e.g., us-east5) +USAGE + exit 0 + ;; + + *) + preflight + ACTIVE_WORKTREE=$(setup_worktree) + BRANCH=$(git -C "$ACTIVE_WORKTREE" branch --show-current) + TASK="$*" + + # Push branch for /dev-loop so the agent can use GitHub MCP push_files + if [[ "$TASK" != /* ]]; then + echo "Pushing branch $BRANCH..." >&2 + git -C "$ACTIVE_WORKTREE" push -u origin "$BRANCH" > /dev/null 2>&1 + fi + + echo "Working in isolated worktree: $ACTIVE_WORKTREE" + echo "Branch: $BRANCH" + echo "Task: $TASK" + echo "---" + + trap on_exit EXIT + + build_docker_args "$ACTIVE_WORKTREE" + PROMPT="/dev-loop $TASK" + if [[ "$TASK" == /* ]]; then + PROMPT="$TASK" + fi + + if [[ "$NO_TUI" == "true" ]]; then + docker run "${DOCKER_ARGS[@]}" "$IMAGE" "${CLAUDE_CMD[@]}" -p "$PROMPT" + else + docker run -it "${DOCKER_ARGS[@]}" "$IMAGE" "${CLAUDE_CMD[@]}" -p "$PROMPT" + fi + ;; +esac diff --git a/.gitignore b/.gitignore index 25842a918b..79b230ae66 100644 --- a/.gitignore +++ b/.gitignore @@ -22,8 +22,6 @@ cmake-build-*/ # vscode configuration files .vscode/ -.devcontainer/ -.devcontainer.json cmake-build/ out/ diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000000..2ac6351353 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,38 @@ +# Collector + +C++ eBPF runtime security agent. Captures process, network, and container +events via CO-RE BPF (falcosecurity-libs) and reports to StackRox Sensor +via gRPC. + +## Build (inside devcontainer) + +```bash +cmake -S . -B cmake-build -DCMAKE_BUILD_TYPE=Release \ + -DCOLLECTOR_VERSION=$(git describe --tags --abbrev=10 --long) +cmake --build cmake-build -- -j$(nproc) +ctest --no-tests=error -V --test-dir cmake-build +``` + +## Key Paths + +``` +collector/lib/ C++ core library (~108 files) +collector/test/ Unit tests (GTest/GMock, 17 suites) +collector/collector.cpp Main entrypoint +falcosecurity-libs/ Submodule: eBPF engine + CO-RE BPF programs +integration-tests/ Go test framework (26 suites, needs privileged) +``` + +## Testing Rules + +- Unit tests validate C++ logic only — no kernel needed +- eBPF changes CANNOT be tested locally — push PR, CI runs on real kernels +- CI matrix: rhel, ubuntu, cos, flatcar, fedora-coreos (amd64/arm64/s390x/ppc64le) + +## Git Rules + +- NEVER run `git push` unless you are explicitly executing the /watch-ci skill +- NEVER create new branches +- You may use: git add, git commit, git diff, git status, git describe, git branch, git log +- Do NOT create PRs unless executing /watch-ci +- C++17, clang, `clang-format --style=file`