From ada9b1ef5c7abedb4dca0d8a202253203c918efc Mon Sep 17 00:00:00 2001 From: bordumb Date: Tue, 24 Mar 2026 15:05:33 -0700 Subject: [PATCH] feat: add compare commands with standalone scanner, CI template generation, and migration priority reports --- .github/workflows/ci.yml | 13 + README.md | 84 +++--- crates/cargo-capsec/README.md | 212 ++++++++++---- crates/cargo-capsec/src/cli.rs | 34 +++ crates/cargo-capsec/src/diff.rs | 445 +++++++++++++++++++++++++++++ crates/cargo-capsec/src/lib.rs | 2 + crates/cargo-capsec/src/main.rs | 13 + crates/cargo-capsec/src/scanner.rs | 46 +++ 8 files changed, 762 insertions(+), 87 deletions(-) create mode 100644 crates/cargo-capsec/src/diff.rs create mode 100644 crates/cargo-capsec/src/scanner.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1be1b1..8360a5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -68,6 +68,19 @@ jobs: - name: Build and verify proofs run: cd proofs && lake build + deep-driver: + name: Deep Analysis Driver (nightly) + runs-on: ubuntu-latest + # Only run when capsec-deep changes + if: contains(github.event.pull_request.title, 'deep') || contains(join(github.event.commits.*.modified, ','), 'capsec-deep') + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + with: + components: rustc-dev, llvm-tools + - run: cd crates/capsec-deep && cargo build + - run: cd crates/capsec-deep && CAPSEC_DEEP_DEBUG=1 cargo run -- --edition 2024 tests/fixtures/simple_fs.rs 2>&1 | grep "Found 1 findings" + capsec-audit: name: Capability Audit runs-on: ubuntu-latest diff --git a/README.md b/README.md index fd26c3d..4c14339 100644 --- a/README.md +++ b/README.md @@ -31,33 +31,28 @@ The audit tool finds the problems. The type system prevents them at compile time ## cargo-capsec — Static Capability Audit -Scans Rust source for ambient authority (filesystem, network, env, process) and reports what your code — and your dependencies — can do to the outside world. Zero config, zero code changes. +Scans Rust source for ambient authority (filesystem, network, env, process, FFI) and reports what your code — and your dependencies — can do to the outside world. Zero config, zero code changes. ### Install ```bash cargo install cargo-capsec - -# Or from source: -cargo install --path crates/cargo-capsec ``` -### Run +### Adopt in 30 seconds ```bash -# Scan workspace crates only (fast, default) -cargo capsec audit +cargo capsec init +``` -# Scan workspace + dependencies — cross-crate propagation shows -# which of YOUR functions inherit authority from dependencies -cargo capsec audit --include-deps +Runs a full audit, generates a `.capsec.toml` that suppresses all existing findings, saves a baseline, and optionally sets up CI. You immediately start catching *new* ambient authority without drowning in legacy noise. -# Control dependency depth (default: 1 = direct deps only) -cargo capsec audit --include-deps --dep-depth 3 # up to 3 hops -cargo capsec audit --include-deps --dep-depth 0 # unlimited +### Audit -# Supply-chain view — only dependency findings -cargo capsec audit --deps-only +```bash +cargo capsec audit # workspace only +cargo capsec audit --include-deps # + cross-crate dependency propagation +cargo capsec audit --deep --include-deps # + MIR analysis (nightly, sees through macros) ``` ``` @@ -66,35 +61,54 @@ my-app v0.1.0 FS src/config.rs:8:5 fs::read_to_string load_config() NET src/api.rs:15:9 reqwest::get fetch_data() ↳ Cross-crate: reqwest::get() → TcpStream::connect [NET] + FFI src/db.rs:31:9 rusqlite::execute query() + ↳ Cross-crate: rusqlite::execute() → sqlite3_exec [FFI] PROC src/deploy.rs:42:17 Command::new run_migration() +``` + +### Diff dependency versions + +```bash +cargo capsec diff serde_json@1.0.130 serde_json@1.0.133 +``` + +``` +serde_json 1.0.130 → 1.0.133 +───────────────────────────── ++ NET src/de.rs:142:9 TcpStream::connect fetch_schema() +- FS src/io.rs:88:5 fs::read old_loader() -Summary -─────── - Crates with findings: 1 - Total findings: 3 - Categories: FS: 1 NET: 1 ENV: 0 PROC: 1 - 1 critical-risk findings +Summary: 1 added, 1 removed, 1 unchanged +``` + +When Dependabot bumps a dependency, know exactly what new authority it introduced. + +### Compare crates + +```bash +cargo capsec compare ureq@2.12.1 reqwest@0.12.12 +``` + +Side-by-side authority profiles to make informed dependency choices. + +### CI + +```bash +cargo capsec init --ci github # generates .github/workflows/capsec.yml ``` -### Add to CI +Or manually: ```yaml -# .github/workflows/capsec.yml -name: Capability Audit -on: [pull_request] -jobs: - audit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - run: cargo install cargo-capsec - - run: cargo capsec audit --fail-on high --quiet +- run: cargo capsec audit --fail-on high --format sarif > capsec.sarif +- uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: capsec.sarif ``` -New high-risk I/O in a PR? CI fails. No new I/O? CI passes. Teams can adopt incrementally with `--baseline` and `--diff` to only flag *new* findings. +See the [full CLI reference](crates/cargo-capsec/README.md) for all commands and flags. -To see it in action, you can reference these: +To see it in action: * [CI/CD](https://github.com/auths-dev/capsec/blob/main/.github/workflows/ci.yml#L57) * [Pre-Commit Hook](https://github.com/auths-dev/capsec/blob/main/.pre-commit-config.yaml#L32) diff --git a/crates/cargo-capsec/README.md b/crates/cargo-capsec/README.md index 2272b4b..58270fa 100644 --- a/crates/cargo-capsec/README.md +++ b/crates/cargo-capsec/README.md @@ -2,82 +2,159 @@ Static capability audit for Rust — find out what your code can do to the outside world. -## What it does - -`cargo-capsec` scans Rust source code and reports every function that exercises ambient authority: filesystem access, network connections, environment variable reads, process spawning. Point it at a workspace and it tells you what's happening — no annotations or code changes required. +`cargo-capsec` scans Rust source code and reports every function that exercises ambient authority: filesystem access, network connections, environment variable reads, process spawning, and FFI calls. No annotations or code changes required. ## Installation -From crates.io: - ```bash cargo install cargo-capsec ``` -From source: +## Commands + +### `cargo capsec init` — Bootstrap for existing codebases ```bash -cargo install --path crates/cargo-capsec +cargo capsec init # generate .capsec.toml + baseline +cargo capsec init --ci github # + GitHub Actions workflow +cargo capsec init --ci gitlab # + GitLab CI config +cargo capsec init --interactive # guided setup +cargo capsec init --report # show migration priority ranking ``` -## Quick start +Runs a full audit, generates a `.capsec.toml` with allow rules for all existing findings, saves a baseline, and optionally sets up CI. Adopt in 30 seconds — then catch regressions. + +### `cargo capsec audit` — Scan for ambient authority ```bash -# Audit your workspace +# Basic scan (workspace crates only) cargo capsec audit -# JSON output for CI -cargo capsec audit --format json +# Cross-crate propagation (workspace + dependencies) +cargo capsec audit --include-deps -# Only show high-risk and critical findings -cargo capsec audit --min-risk high +# Full dependency tree analysis +cargo capsec audit --include-deps --dep-depth 0 -# Fail CI if any critical findings -cargo capsec audit --fail-on critical +# MIR-based deep analysis (requires nightly + capsec-driver) +cargo capsec audit --deep --include-deps -# Save baseline, then diff on next run -cargo capsec audit --baseline -cargo capsec audit --diff +# Supply-chain view (only dependency findings) +cargo capsec audit --deps-only -# Skip known-good crates -cargo capsec audit --skip my-cli,xtask +# Output formats +cargo capsec audit --format text # default, color-coded terminal output +cargo capsec audit --format json # structured JSON for scripts +cargo capsec audit --format sarif # SARIF for GitHub Code Scanning -# Only scan specific crates -cargo capsec audit --only my-core,my-sdk +# Filtering +cargo capsec audit --min-risk high # only high + critical +cargo capsec audit --only my-core,my-sdk # specific crates +cargo capsec audit --skip my-cli,xtask # exclude crates -# SARIF output for GitHub Code Scanning -cargo capsec audit --format sarif > capsec.sarif +# CI integration +cargo capsec audit --fail-on high --quiet # exit 1 on high-risk, no output -# Suppress output, exit code only (for CI) -cargo capsec audit --quiet --fail-on high +# Baselines +cargo capsec audit --baseline # save current findings +cargo capsec audit --diff # show changes since baseline +cargo capsec audit --diff --fail-on high # fail only on NEW high-risk findings ``` -## Output example +#### Output example ``` my-app v0.1.0 ───────────── - FS src/main.rs:8:5 fs::read_to_string main() - FS src/main.rs:22:9 fs::write save_output() - NET src/api.rs:15:9 TcpStream::connect fetch_data() - ENV src/config.rs:3:5 env::var load_config() + FS src/config.rs:8:5 fs::read_to_string load_config() + NET src/api.rs:15:9 reqwest::get fetch_data() + ↳ Cross-crate: reqwest::get() → TcpStream::connect [NET] + FFI src/db.rs:31:9 rusqlite::execute query() + ↳ Cross-crate: rusqlite::execute() → sqlite3_exec [FFI] + PROC src/deploy.rs:42:17 Command::new run_migration() + VIA src/main.rs:5:9 load_config() main() Summary ─────── Crates with findings: 1 - Total findings: 4 - Categories: FS: 2 NET: 1 ENV: 1 PROC: 0 + Total findings: 5 + Categories: FS: 1 NET: 1 ENV: 0 PROC: 1 FFI: 1 + 2 critical-risk findings +``` + +#### Analysis modes + +| Mode | Flag | What it scans | Speed | +|------|------|---------------|-------| +| Workspace only | *(default)* | Your code | Fast | +| Cross-crate | `--include-deps` | Your code + dependency source (syntactic) | Medium | +| Deep | `--deep --include-deps` | Everything via MIR (sees through macros, FFI wrappers) | Slow (nightly) | + +### `cargo capsec diff` — Compare crate versions + +```bash +cargo capsec diff serde_json@1.0.130 serde_json@1.0.133 +cargo capsec diff tokio@1.37.0 tokio@1.38.0 --format json +cargo capsec diff my-dep@0.4.0 my-dep@0.5.0 --fail-on-new +``` + +Shows what ambient authority was added or removed between two versions of a crate. Useful for reviewing Dependabot PRs or evaluating upgrades. + +``` +serde_json 1.0.130 → 1.0.133 +───────────────────────────── ++ NET src/de.rs:142:9 TcpStream::connect fetch_schema() +- FS src/io.rs:88:5 fs::read old_loader() + +Summary: 1 added, 1 removed, 1 unchanged +``` + +### `cargo capsec compare` — Compare different crates + +```bash +cargo capsec compare ureq@2.12.1 reqwest@0.12.12 ``` -## Configuration +Side-by-side capability profiles for making informed dependency choices. + +``` +ureq v2.12.1 reqwest v0.12.12 +────────── ──────────────── +FS: 0 FS: 3 +NET: 4 NET: 18 +ENV: 1 ENV: 4 +PROC: 0 PROC: 0 +FFI: 0 FFI: 12 +Total: 5 Total: 37 +``` -Create `.capsec.toml` in your workspace root: +### `cargo capsec check-deny` — Verify `#[capsec::deny]` annotations + +```bash +cargo capsec check-deny +``` + +Checks that functions annotated with `#[capsec::deny(fs)]` or `#[capsec::deny(all)]` don't contain ambient authority calls. Any violation is promoted to critical risk. + +### `cargo capsec badge` — Generate shields.io badge + +```bash +cargo capsec badge # markdown badge +cargo capsec badge --json # shields.io endpoint JSON +``` + +## Configuration (`.capsec.toml`) ```toml +# Exclude directories from scanning [analysis] -exclude = ["tests/**", "benches/**"] +exclude = ["tests/**", "benches/**", "examples/**"] + +# Crate-level deny — all ambient authority is a violation +[deny] +categories = ["all"] -# Custom authority patterns +# Custom authority patterns for project-specific I/O [[authority]] path = ["my_crate", "secrets", "fetch"] category = "net" @@ -88,32 +165,63 @@ description = "Fetches secrets from vault" [[allow]] crate = "tracing" reason = "Logging framework, reviewed" + +[[allow]] +crate = "my-app" +function = "load_config" +reason = "Known FS access, reviewed" + +# Classify crates as pure (no I/O) or resource (has I/O) +[[classify]] +crate = "my-parser" +classification = "pure" ``` -## `#[capsec::deny]` enforcement +## Deep analysis (`--deep`) + +The `--deep` flag uses a custom Rust compiler driver (`capsec-driver`) that walks MIR after macro expansion and type resolution. This catches: + +- FFI calls hidden behind macros (e.g., `git2`'s `try_call!()` → `libgit2_sys`) +- Authority exercised through trait dispatch +- Generic instantiations that resolve to I/O functions -The audit tool honors `#[capsec::deny(...)]` annotations. Any ambient authority call inside a `#[deny]`-annotated function is promoted to **critical** risk and tagged as a deny violation: +Requires nightly: +```bash +cd crates/capsec-deep && cargo install --path . +cargo capsec audit --deep --include-deps ``` -my-app v0.1.0 -───────────── - DENY src/parser.rs:42:9 std::fs::read in #[deny(all)] function parse_config() -Summary -─────── - 1 deny violation (ambient authority in #[deny] function) - 1 critical-risk findings +See [`crates/capsec-deep/README.md`](../capsec-deep/README.md) for architecture details. + +## Cross-crate propagation + +With `--include-deps`, capsec builds an **export map** for each dependency: which functions exercise ambient authority. When your workspace code calls those functions, the finding propagates transitively: + +``` +your_code::handler() → reqwest::get() → TcpStream::connect [NET] ``` -Use `--fail-on critical` in CI to catch deny violations alongside other critical findings. +This works across: +- Registry dependencies (crates.io) +- Workspace member dependencies (topological ordering) +- FFI boundaries (extern function declarations) +- Multiple hops (`A → B → C → std::fs::read`) ## Limitations -- **Use aliases**: `use std::fs::read as r; r(...)` — the import is flagged, but the bare aliased call may not be detected in all cases. -- **Method call matching is contextual**: `.output()`, `.spawn()`, `.status()` only flag when `Command::new` is in the same function. `.send_to()` requires `UdpSocket::bind`. Other method names not matched. -- **Proc macro generated code** is not visible to the analysis. This is inherent to syntax-level tooling — `cargo expand` support is on the roadmap. -- **No data flow analysis**: Dead code will be flagged. -- **FFI**: `extern` blocks are detected but individual libc calls aren't categorized. +- **Dynamic dispatch** (`dyn Trait`) — cannot statically resolve which implementation runs +- **C/C++ internals** — sees FFI call boundaries but not what foreign code does inside +- **Inline assembly** — `asm!()` blocks are opaque +- **Runtime-loaded code** — `dlopen`/`libloading` is invisible to static analysis + +## Output formats + +| Format | Flag | Use case | +|--------|------|----------| +| Text | `--format text` | Terminal, human review | +| JSON | `--format json` | Scripts, dashboards, CI pipelines | +| SARIF | `--format sarif` | GitHub Code Scanning, VS Code SARIF Viewer | ## License diff --git a/crates/cargo-capsec/src/cli.rs b/crates/cargo-capsec/src/cli.rs index a231bd4..ab0d5ae 100644 --- a/crates/cargo-capsec/src/cli.rs +++ b/crates/cargo-capsec/src/cli.rs @@ -45,6 +45,10 @@ pub enum Commands { Badge(BadgeArgs), /// Bootstrap capsec for an existing codebase Init(InitArgs), + /// Compare capability profiles between two crate versions + Diff(DiffArgs), + /// Compare capability profiles of two different crates + Compare(CompareArgs), } #[derive(clap::Args)] @@ -171,3 +175,33 @@ pub struct InitArgs { #[arg(long)] pub force: bool, } + +#[derive(clap::Args)] +pub struct DiffArgs { + /// First crate specifier: name@version + pub left: String, + + /// Second crate specifier: name@version + pub right: String, + + /// Output format + #[arg(short, long, default_value = "text", value_parser = ["text", "json"])] + pub format: String, + + /// Fail (exit 1) if new findings were added + #[arg(long)] + pub fail_on_new: bool, +} + +#[derive(clap::Args)] +pub struct CompareArgs { + /// First crate: name or name@version + pub left: String, + + /// Second crate: name or name@version + pub right: String, + + /// Output format + #[arg(short, long, default_value = "text", value_parser = ["text", "json"])] + pub format: String, +} diff --git a/crates/cargo-capsec/src/diff.rs b/crates/cargo-capsec/src/diff.rs new file mode 100644 index 0000000..61c8aa0 --- /dev/null +++ b/crates/cargo-capsec/src/diff.rs @@ -0,0 +1,445 @@ +//! Dependency version diffing and cross-crate comparison. +//! +//! `cargo capsec diff crate@v1 crate@v2` — what new authority did a version bump introduce? +//! `cargo capsec compare crate_a crate_b` — which crate has less ambient authority? + +use crate::authorities::Category; +use crate::config::Config; +use crate::detector::Finding; +use crate::scanner; +use colored::Colorize; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +// ── Types ── + +/// Result of diffing findings between two crate versions. +pub struct DiffResult { + pub added: Vec, + pub removed: Vec, + pub unchanged: usize, +} + +/// Parsed crate specifier: name@version. +pub struct CrateSpec { + pub name: String, + pub version: String, +} + +/// Options for `cargo capsec diff`. +pub struct DiffOptions { + pub left: String, + pub right: String, + pub format: String, + pub fail_on_new: bool, +} + +/// Options for `cargo capsec compare`. +pub struct CompareOptions { + pub left: String, + pub right: String, + pub format: String, +} + +// ── Public entry points ── + +/// Runs `cargo capsec diff crate@v1 crate@v2`. +pub fn run_diff(opts: DiffOptions) { + let cap_root = capsec_core::root::root(); + let fs_read = cap_root.grant::(); + let spawn_cap = cap_root.grant::(); + + let left = parse_crate_spec(&opts.left).unwrap_or_else(|e| { + eprintln!("Error: {e}"); + std::process::exit(1); + }); + let right = parse_crate_spec(&opts.right).unwrap_or_else(|e| { + eprintln!("Error: {e}"); + std::process::exit(1); + }); + + eprintln!("Fetching {} v{}...", left.name, left.version); + let left_dir = fetch_crate_source(&left.name, &left.version, &spawn_cap, &fs_read) + .unwrap_or_else(|e| { + eprintln!("Error fetching {} v{}: {e}", left.name, left.version); + std::process::exit(1); + }); + + eprintln!("Fetching {} v{}...", right.name, right.version); + let right_dir = fetch_crate_source(&right.name, &right.version, &spawn_cap, &fs_read) + .unwrap_or_else(|e| { + eprintln!("Error fetching {} v{}: {e}", right.name, right.version); + std::process::exit(1); + }); + + eprintln!("Scanning..."); + let config = Config::default(); + let left_findings = + scanner::scan_crate(&left_dir, &left.name, &left.version, &config, &fs_read); + let right_findings = + scanner::scan_crate(&right_dir, &right.name, &right.version, &config, &fs_read); + + let result = diff_findings(&left_findings, &right_findings); + + match opts.format.as_str() { + "json" => print_diff_json(&left, &right, &result), + _ => print_diff_text(&left, &right, &result), + } + + if opts.fail_on_new && !result.added.is_empty() { + std::process::exit(1); + } +} + +/// Runs `cargo capsec compare crate_a crate_b`. +pub fn run_compare(opts: CompareOptions) { + let cap_root = capsec_core::root::root(); + let fs_read = cap_root.grant::(); + let spawn_cap = cap_root.grant::(); + + let left = parse_crate_spec_or_latest(&opts.left); + let right = parse_crate_spec_or_latest(&opts.right); + + eprintln!("Fetching {} v{}...", left.name, left.version); + let left_dir = fetch_crate_source(&left.name, &left.version, &spawn_cap, &fs_read) + .unwrap_or_else(|e| { + eprintln!("Error: {e}"); + std::process::exit(1); + }); + + eprintln!("Fetching {} v{}...", right.name, right.version); + let right_dir = fetch_crate_source(&right.name, &right.version, &spawn_cap, &fs_read) + .unwrap_or_else(|e| { + eprintln!("Error: {e}"); + std::process::exit(1); + }); + + eprintln!("Scanning...\n"); + let config = Config::default(); + let left_findings = + scanner::scan_crate(&left_dir, &left.name, &left.version, &config, &fs_read); + let right_findings = + scanner::scan_crate(&right_dir, &right.name, &right.version, &config, &fs_read); + + match opts.format.as_str() { + "json" => print_compare_json(&left, &right, &left_findings, &right_findings), + _ => print_compare_text(&left, &right, &left_findings, &right_findings), + } +} + +// ── Registry source fetcher ── + +/// Fetches the source directory for a crate@version. +/// Checks ~/.cargo/registry/src/ first, falls back to `cargo fetch` with a temp manifest. +fn fetch_crate_source( + crate_name: &str, + version: &str, + spawn_cap: &impl capsec_core::cap_provider::CapProvider, + _fs_read: &impl capsec_core::cap_provider::CapProvider, +) -> Result { + // Check registry cache first + if let Some(cached) = find_registry_source(crate_name, version) { + return Ok(cached); + } + + // Not cached — create a temp project and cargo fetch + let temp_dir = std::env::temp_dir().join(format!("capsec-fetch-{crate_name}-{version}")); + let _ = std::fs::create_dir_all(&temp_dir); + + let cargo_toml = format!( + "[package]\nname = \"capsec-fetch-temp\"\nversion = \"0.0.1\"\nedition = \"2021\"\n\n[dependencies]\n{crate_name} = \"={version}\"\n" + ); + std::fs::write(temp_dir.join("Cargo.toml"), cargo_toml) + .map_err(|e| format!("Failed to write temp Cargo.toml: {e}"))?; + + // Create a dummy src/lib.rs so cargo doesn't complain + let _ = std::fs::create_dir_all(temp_dir.join("src")); + std::fs::write(temp_dir.join("src/lib.rs"), "") + .map_err(|e| format!("Failed to write temp lib.rs: {e}"))?; + + // Run cargo fetch to download the crate + let output = capsec_std::process::command("cargo", spawn_cap) + .map_err(|e| format!("Failed to create command: {e}"))? + .arg("fetch") + .current_dir(&temp_dir) + .output() + .map_err(|e| format!("Failed to run cargo fetch: {e}"))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!("cargo fetch failed: {stderr}")); + } + + // Clean up temp dir + let _ = std::fs::remove_dir_all(&temp_dir); + + // Now it should be in the registry cache + find_registry_source(crate_name, version).ok_or_else(|| { + format!("Crate {crate_name}@{version} not found in registry cache after fetch") + }) +} + +/// Looks for a crate's source in ~/.cargo/registry/src/. +fn find_registry_source(crate_name: &str, version: &str) -> Option { + let home = std::env::var("CARGO_HOME").unwrap_or_else(|_| { + std::env::var("HOME") + .map(|h| format!("{h}/.cargo")) + .unwrap_or_default() + }); + let registry_src = Path::new(&home).join("registry/src"); + + if !registry_src.exists() { + return None; + } + + // Iterate index directories (index.crates.io-HASH/) + let entries = std::fs::read_dir(®istry_src).ok()?; + for entry in entries.flatten() { + let crate_dir = entry.path().join(format!("{crate_name}-{version}")); + if crate_dir.exists() { + // Some crates have src/ subdir, some don't + let src_dir = crate_dir.join("src"); + if src_dir.exists() { + return Some(src_dir); + } + return Some(crate_dir); + } + } + + None +} + +// ── Diff engine ── + +/// Compares findings between two versions of a crate. +/// Matches by (function, call_text, category) — NOT by line number. +fn diff_findings(old: &[Finding], new: &[Finding]) -> DiffResult { + type Key = (String, String, String); + + fn finding_key(f: &Finding) -> Key { + ( + f.function.clone(), + f.call_text.clone(), + f.category.label().to_string(), + ) + } + + let old_keys: HashSet = old.iter().map(finding_key).collect(); + let new_keys: HashSet = new.iter().map(finding_key).collect(); + + let added: Vec = new + .iter() + .filter(|f| !old_keys.contains(&finding_key(f))) + .cloned() + .collect(); + + let removed: Vec = old + .iter() + .filter(|f| !new_keys.contains(&finding_key(f))) + .cloned() + .collect(); + + let unchanged = new_keys.intersection(&old_keys).count(); + + DiffResult { + added, + removed, + unchanged, + } +} + +// ── Parsers ── + +/// Parses "serde_json@1.0.133" into CrateSpec. +fn parse_crate_spec(spec: &str) -> Result { + let parts: Vec<&str> = spec.splitn(2, '@').collect(); + if parts.len() != 2 || parts[1].is_empty() { + return Err(format!( + "Invalid crate specifier '{spec}'. Expected format: crate_name@version" + )); + } + Ok(CrateSpec { + name: parts[0].to_string(), + version: parts[1].to_string(), + }) +} + +/// Parses "serde_json@1.0.133" or just "serde_json" (uses "latest" placeholder). +fn parse_crate_spec_or_latest(spec: &str) -> CrateSpec { + if let Ok(parsed) = parse_crate_spec(spec) { + parsed + } else { + // No version specified — use a wildcard that cargo fetch will resolve + CrateSpec { + name: spec.to_string(), + version: "*".to_string(), + } + } +} + +// ── Output formatters ── + +fn print_diff_text(left: &CrateSpec, right: &CrateSpec, result: &DiffResult) { + println!( + "\n{} {} \u{2192} {}", + left.name.bold(), + left.version.dimmed(), + right.version.bold() + ); + let sep_len = left.name.len() + left.version.len() + right.version.len() + 4; + println!("{}", "\u{2500}".repeat(sep_len)); + + for f in &result.added { + println!( + " {} {:<5} {}:{}:{} {:<28} {}()", + "+".green().bold(), + colorize_category(&f.category), + f.file.dimmed(), + f.call_line, + f.call_col, + f.call_text.bold(), + f.function, + ); + } + for f in &result.removed { + println!( + " {} {:<5} {}:{}:{} {:<28} {}()", + "-".red().bold(), + colorize_category(&f.category), + f.file.dimmed(), + f.call_line, + f.call_col, + f.call_text.bold(), + f.function, + ); + } + + println!( + "\n{}: {} added, {} removed, {} unchanged", + "Summary".bold(), + result.added.len(), + result.removed.len(), + result.unchanged, + ); +} + +fn print_diff_json(left: &CrateSpec, right: &CrateSpec, result: &DiffResult) { + let json = serde_json::json!({ + "left": { "name": left.name, "version": left.version }, + "right": { "name": right.name, "version": right.version }, + "added": result.added.len(), + "removed": result.removed.len(), + "unchanged": result.unchanged, + "findings_added": result.added, + "findings_removed": result.removed, + }); + println!( + "{}", + serde_json::to_string_pretty(&json).unwrap_or_default() + ); +} + +fn print_compare_text( + left: &CrateSpec, + right: &CrateSpec, + left_findings: &[Finding], + right_findings: &[Finding], +) { + fn count_by_cat(findings: &[Finding]) -> (usize, usize, usize, usize, usize) { + let mut fs = 0; + let mut net = 0; + let mut env = 0; + let mut proc_ = 0; + let mut ffi = 0; + for f in findings { + match f.category { + Category::Fs => fs += 1, + Category::Net => net += 1, + Category::Env => env += 1, + Category::Process => proc_ += 1, + Category::Ffi => ffi += 1, + } + } + (fs, net, env, proc_, ffi) + } + + let (lfs, lnet, lenv, lproc, lffi) = count_by_cat(left_findings); + let (rfs, rnet, renv, rproc, rffi) = count_by_cat(right_findings); + + let left_header = format!("{} v{}", left.name, left.version); + let right_header = format!("{} v{}", right.name, right.version); + + println!("\n{:<30} {}", left_header.bold(), right_header.bold()); + println!( + "{:<30} {}", + "\u{2500}".repeat(left_header.len()), + "\u{2500}".repeat(right_header.len()) + ); + println!( + "{:<30} {}", + format!("FS: {lfs}").blue(), + format!("FS: {rfs}").blue() + ); + println!( + "{:<30} {}", + format!("NET: {lnet}").red(), + format!("NET: {rnet}").red() + ); + println!( + "{:<30} {}", + format!("ENV: {lenv}").yellow(), + format!("ENV: {renv}").yellow() + ); + println!( + "{:<30} {}", + format!("PROC: {lproc}").magenta(), + format!("PROC: {rproc}").magenta() + ); + println!( + "{:<30} {}", + format!("FFI: {lffi}").cyan(), + format!("FFI: {rffi}").cyan() + ); + println!( + "{:<30} {}", + format!("Total: {}", left_findings.len()).bold(), + format!("Total: {}", right_findings.len()).bold() + ); +} + +fn print_compare_json( + left: &CrateSpec, + right: &CrateSpec, + left_findings: &[Finding], + right_findings: &[Finding], +) { + let json = serde_json::json!({ + "left": { + "name": left.name, + "version": left.version, + "total": left_findings.len(), + "findings": left_findings, + }, + "right": { + "name": right.name, + "version": right.version, + "total": right_findings.len(), + "findings": right_findings, + }, + }); + println!( + "{}", + serde_json::to_string_pretty(&json).unwrap_or_default() + ); +} + +fn colorize_category(cat: &Category) -> colored::ColoredString { + let label = cat.label(); + match cat { + Category::Fs => label.blue(), + Category::Net => label.red(), + Category::Env => label.yellow(), + Category::Process => label.magenta(), + Category::Ffi => label.cyan(), + } +} diff --git a/crates/cargo-capsec/src/lib.rs b/crates/cargo-capsec/src/lib.rs index f487494..3c42efb 100644 --- a/crates/cargo-capsec/src/lib.rs +++ b/crates/cargo-capsec/src/lib.rs @@ -48,8 +48,10 @@ pub mod config; pub mod cross_crate; pub mod deep; pub mod detector; +pub mod diff; pub mod discovery; pub mod export_map; pub mod init; pub mod parser; pub mod reporter; +pub mod scanner; diff --git a/crates/cargo-capsec/src/main.rs b/crates/cargo-capsec/src/main.rs index febeb3b..a520d18 100644 --- a/crates/cargo-capsec/src/main.rs +++ b/crates/cargo-capsec/src/main.rs @@ -5,11 +5,13 @@ mod config; mod cross_crate; mod deep; mod detector; +mod diff; mod discovery; mod export_map; mod init; mod parser; mod reporter; +mod scanner; use authorities::Risk; use clap::Parser; @@ -34,6 +36,17 @@ fn main() { baseline: args.baseline, force: args.force, }), + Commands::Diff(args) => diff::run_diff(diff::DiffOptions { + left: args.left, + right: args.right, + format: args.format, + fail_on_new: args.fail_on_new, + }), + Commands::Compare(args) => diff::run_compare(diff::CompareOptions { + left: args.left, + right: args.right, + format: args.format, + }), } } diff --git a/crates/cargo-capsec/src/scanner.rs b/crates/cargo-capsec/src/scanner.rs new file mode 100644 index 0000000..e868d17 --- /dev/null +++ b/crates/cargo-capsec/src/scanner.rs @@ -0,0 +1,46 @@ +//! Standalone crate scanner — scans a single crate's source and returns raw findings. +//! +//! This is the shared scan logic used by `audit`, `check-deny`, `badge`, `init`, and `diff`. + +use crate::config::{self, Config}; +use crate::detector::{self, Finding}; +use crate::{discovery, parser}; +use std::path::Path; + +/// Scans a single crate's source directory and returns raw findings. +/// +/// No path normalization, no filtering, no reporting — just raw findings. +/// The caller decides what to do with them. +pub fn scan_crate( + source_dir: &Path, + crate_name: &str, + crate_version: &str, + config: &Config, + fs_read: &impl capsec_core::cap_provider::CapProvider, +) -> Vec { + let mut det = detector::Detector::new(); + let customs = config::custom_authorities(config); + det.add_custom_authorities(&customs); + let crate_deny = config.deny.normalized_categories(); + + let source_files = discovery::discover_source_files(source_dir, fs_read); + let mut findings = Vec::new(); + + for file_path in source_files { + if config::should_exclude(&file_path, &config.analysis.exclude) { + continue; + } + + match parser::parse_file(&file_path, fs_read) { + Ok(parsed) => { + let file_findings = det.analyse(&parsed, crate_name, crate_version, &crate_deny); + findings.extend(file_findings); + } + Err(_) => { + // Silently skip unparseable files + } + } + } + + findings +}