diff --git a/.capsec.toml b/.capsec.toml index b137cb2..42d7810 100644 --- a/.capsec.toml +++ b/.capsec.toml @@ -6,3 +6,8 @@ crate = "capsec-std" [[allow]] crate = "capsec-tokio" + +# cargo-capsec is the CLI tool — it necessarily performs I/O to scan, +# cache, and invoke the deep analysis driver. +[[allow]] +crate = "cargo-capsec" diff --git a/Cargo.toml b/Cargo.toml index 37fe5eb..6699376 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ ] exclude = [ "crates/capsec-example-db", + "crates/capsec-deep", ] [workspace.package] diff --git a/crates/capsec-deep/.gitignore b/crates/capsec-deep/.gitignore new file mode 100644 index 0000000..4d09b49 --- /dev/null +++ b/crates/capsec-deep/.gitignore @@ -0,0 +1,8 @@ +/target +*.o +*.d +# Binaries produced when testing the driver on fixture files +/clean +/simple_fs +/macro_ffi +/capsec_test diff --git a/crates/capsec-deep/Cargo.lock b/crates/capsec-deep/Cargo.lock new file mode 100644 index 0000000..3b0e4e2 --- /dev/null +++ b/crates/capsec-deep/Cargo.lock @@ -0,0 +1,107 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "capsec-deep" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/crates/capsec-deep/Cargo.toml b/crates/capsec-deep/Cargo.toml new file mode 100644 index 0000000..cc2741c --- /dev/null +++ b/crates/capsec-deep/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "capsec-deep" +version = "0.1.0" +edition = "2024" +license = "Apache-2.0" +description = "MIR-based deep analysis driver for capsec — requires nightly" +publish = false + +[[bin]] +name = "capsec-driver" +path = "src/main.rs" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[package.metadata.rust-analyzer] +rustc_private = true diff --git a/crates/capsec-deep/README.md b/crates/capsec-deep/README.md new file mode 100644 index 0000000..8fb3be7 --- /dev/null +++ b/crates/capsec-deep/README.md @@ -0,0 +1,60 @@ +# capsec-deep + +MIR-based deep analysis driver for capsec. Uses `rustc`'s Mid-level IR to detect ambient authority usage that syntactic analysis misses — macro-expanded FFI calls, trait dispatch, and generic instantiation. + +## Requirements + +- Nightly Rust toolchain (pinned in `rust-toolchain.toml`) +- `rustc-dev` and `llvm-tools` components + +## Install + +```bash +cd crates/capsec-deep +cargo install --path . +``` + +This installs the `capsec-driver` binary, which `cargo capsec audit --deep` invokes automatically. + +## How it works + +`capsec-driver` is a custom Rust compiler driver. When invoked via `RUSTC_WRAPPER`, it intercepts every crate compilation, runs the normal compiler pipeline through type checking, then walks the MIR of every function looking for: + +- **Authority calls** — `std::fs::*`, `std::net::*`, `std::env::*`, `std::process::*` resolved through the full type system (including macro expansion) +- **FFI calls** — any call to a `DefKind::ForeignFn` item (catches `-sys` crate wrappers like `libgit2-sys`, `sqlite3-sys`) + +Findings are written as JSONL to a temp file, which the main `cargo-capsec` CLI reads, merges with syntactic findings, and feeds into the cross-crate export map system for transitive propagation. + +## Architecture + +```mermaid +flowchart TD + A["cargo capsec audit --deep"] --> B["cargo check\n(RUSTC_WRAPPER=capsec-driver)"] + B --> C["capsec-driver replaces rustc\nfor each crate"] + C --> D["after_analysis callback"] + D --> E["Walk MIR BasicBlocks\nTerminatorKind::Call"] + E --> F["Extract callee DefId\ntcx.def_path_str()"] + F --> G{Classify call} + G -->|"std::fs, std::net,\nstd::env, std::process"| H["Authority finding\n(FS/NET/ENV/PROC)"] + G -->|"tcx.is_foreign_item()"| I["FFI finding"] + G -->|"No match"| J["Skip"] + H --> K["Write JSONL to\n$CAPSEC_DEEP_OUTPUT"] + I --> K + K --> L["cargo-capsec reads JSONL\nbuilds export maps"] + L --> M["Phase 2: workspace scan\nwith MIR export maps injected"] + M --> N["Unified cross-crate\ntransitive findings"] +``` + +## Standalone testing + +```bash +# Test on a single file +CAPSEC_DEEP_DEBUG=1 cargo run -- --edition 2024 tests/fixtures/simple_fs.rs + +# Test FFI detection through macros +CAPSEC_DEEP_DEBUG=1 cargo run -- --edition 2024 tests/fixtures/macro_ffi.rs +``` + +## Excluded from workspace + +This crate requires nightly and is listed in the workspace `exclude` list. It builds independently and does not affect `cargo test --workspace` or `cargo check --workspace` on stable. diff --git a/crates/capsec-deep/build.rs b/crates/capsec-deep/build.rs new file mode 100644 index 0000000..04ec1f1 --- /dev/null +++ b/crates/capsec-deep/build.rs @@ -0,0 +1,14 @@ +/// Embeds the sysroot library path so the binary can find librustc_driver at runtime. +fn main() { + let rustc = std::env::var("RUSTC").unwrap_or_else(|_| "rustc".to_string()); + let output = std::process::Command::new(&rustc) + .arg("--print=sysroot") + .output() + .expect("Failed to run rustc --print=sysroot"); + let sysroot = String::from_utf8(output.stdout) + .expect("Invalid UTF-8 from rustc --print=sysroot"); + let sysroot = sysroot.trim(); + + // Link against the sysroot lib directory so librustc_driver.dylib/.so is found + println!("cargo:rustc-link-arg=-Wl,-rpath,{sysroot}/lib"); +} diff --git a/crates/capsec-deep/rust-toolchain.toml b/crates/capsec-deep/rust-toolchain.toml new file mode 100644 index 0000000..6b670b9 --- /dev/null +++ b/crates/capsec-deep/rust-toolchain.toml @@ -0,0 +1,4 @@ +[toolchain] +channel = "nightly-2026-02-17" +components = ["rustc-dev", "llvm-tools", "rust-src"] +profile = "minimal" diff --git a/crates/capsec-deep/src/main.rs b/crates/capsec-deep/src/main.rs new file mode 100644 index 0000000..3505016 --- /dev/null +++ b/crates/capsec-deep/src/main.rs @@ -0,0 +1,414 @@ +//! capsec-driver — MIR-based deep analysis for capsec. +//! +//! This binary is used as `RUSTC_WRAPPER` to intercept compilation +//! of all crates (workspace + dependencies) and analyze their MIR for +//! ambient authority usage. +//! +//! It walks every function's MIR, extracts `TerminatorKind::Call` targets, +//! classifies them against known authority patterns (FS, NET, ENV, PROC, FFI), +//! and writes findings as JSONL to a file specified by `$CAPSEC_DEEP_OUTPUT`. +//! +//! Requires nightly Rust with the `rustc-dev` component installed. + +#![feature(rustc_private)] + +extern crate rustc_driver; +extern crate rustc_hir; +extern crate rustc_interface; +extern crate rustc_middle; +extern crate rustc_session; +extern crate rustc_span; + +use rustc_driver::Compilation; +use rustc_hir::def::DefKind; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_interface::interface::Compiler; +use rustc_middle::mir::TerminatorKind; +use rustc_middle::ty::TyCtxt; +use serde::Serialize; +use std::io::Write; + +/// A finding from MIR analysis — matches the capsec Finding JSON schema. +#[derive(Debug, Clone, Serialize)] +struct DeepFinding { + file: String, + function: String, + function_line: usize, + call_line: usize, + call_col: usize, + call_text: String, + category: String, + subcategory: String, + risk: String, + description: String, + is_build_script: bool, + crate_name: String, + crate_version: String, + is_deny_violation: bool, + is_transitive: bool, +} + +/// Authority category for a detected call. +#[derive(Debug, Clone)] +struct AuthorityMatch { + category: &'static str, + subcategory: &'static str, + risk: &'static str, + description: &'static str, +} + +/// Classifies a resolved function path against known authority patterns. +/// Risk values use PascalCase to match the `Risk` enum serialization in cargo-capsec. +fn classify_authority(path: &str) -> Option { + // Filesystem + if path.starts_with("std::fs::") || path.starts_with("core::fs::") { + let subcategory = if path.contains("write") || path.contains("create") || path.contains("remove") || path.contains("rename") { + "write" + } else { + "read" + }; + let risk = if path.contains("remove_dir_all") { + "Critical" + } else if path.contains("write") || path.contains("remove") || path.contains("create") { + "High" + } else { + "Medium" + }; + return Some(AuthorityMatch { + category: "Fs", + subcategory, + risk, + description: "Filesystem access", + }); + } + + // File::open, File::create + if (path.contains("::File::open") || path.contains("::File::create")) + && (path.starts_with("std::") || path.contains("fs::")) + { + let (sub, risk) = if path.contains("create") { + ("write", "High") + } else { + ("read", "Medium") + }; + return Some(AuthorityMatch { + category: "Fs", + subcategory: sub, + risk, + description: "File access", + }); + } + + // OpenOptions + if path.contains("OpenOptions") && path.contains("open") { + return Some(AuthorityMatch { + category: "Fs", + subcategory: "read+write", + risk: "Medium", + description: "File access with custom options", + }); + } + + // Tokio filesystem + if path.starts_with("tokio::fs::") { + return Some(AuthorityMatch { + category: "Fs", + subcategory: "async", + risk: "Medium", + description: "Async filesystem access", + }); + } + + // Network — std + if path.starts_with("std::net::") { + let risk = if path.contains("connect") || path.contains("bind") { + "High" + } else { + "Medium" + }; + return Some(AuthorityMatch { + category: "Net", + subcategory: "connect", + risk, + description: "Network access", + }); + } + + // Network — tokio + if path.starts_with("tokio::net::") { + return Some(AuthorityMatch { + category: "Net", + subcategory: "async_connect", + risk: "High", + description: "Async network access", + }); + } + + // Network — reqwest + if path.starts_with("reqwest::") { + return Some(AuthorityMatch { + category: "Net", + subcategory: "http", + risk: "High", + description: "HTTP request", + }); + } + + // Network — hyper + if path.starts_with("hyper::") && (path.contains("request") || path.contains("bind") || path.contains("connect")) { + return Some(AuthorityMatch { + category: "Net", + subcategory: "http", + risk: "High", + description: "Hyper HTTP", + }); + } + + // Environment + if path.starts_with("std::env::") { + let (sub, risk) = if path.contains("set_var") || path.contains("remove_var") || path.contains("set_current_dir") { + ("write", "High") + } else { + ("read", "Medium") + }; + return Some(AuthorityMatch { + category: "Env", + subcategory: sub, + risk, + description: "Environment access", + }); + } + + // Process + if path.starts_with("std::process::") { + return Some(AuthorityMatch { + category: "Process", + subcategory: "spawn", + risk: "Critical", + description: "Process spawning", + }); + } + + None +} + +/// The capsec analysis callbacks — hooks into the compiler after type checking. +struct CapsecCallbacks; + +impl rustc_driver::Callbacks for CapsecCallbacks { + fn after_analysis<'tcx>( + &mut self, + _compiler: &Compiler, + tcx: TyCtxt<'tcx>, + ) -> Compilation { + let crate_name = tcx.crate_name(LOCAL_CRATE).to_string(); + let crate_version = std::env::var("CAPSEC_CRATE_VERSION").unwrap_or_else(|_| "0.0.0".to_string()); + let debug = std::env::var("CAPSEC_DEEP_DEBUG").is_ok(); + + if debug { + eprintln!("[capsec-deep] Analyzing crate: {crate_name}"); + } + + // Skip std/core/alloc — not useful for authority analysis + if matches!( + crate_name.as_str(), + "std" | "core" | "alloc" | "compiler_builtins" + | "rustc_std_workspace_core" | "rustc_std_workspace_alloc" + | "panic_unwind" | "panic_abort" | "unwind" + | "hashbrown" | "std_detect" | "rustc_demangle" + | "addr2line" | "gimli" | "miniz_oxide" | "adler2" | "object" | "memchr" + | "cfg_if" | "libc" + ) { + if debug { + eprintln!("[capsec-deep] Skipping stdlib/low-level crate: {crate_name}"); + } + return Compilation::Continue; + } + + // Skip proc-macro crates (compile-time only, not runtime authority) + if tcx.crate_types().contains(&rustc_session::config::CrateType::ProcMacro) { + if debug { + eprintln!("[capsec-deep] Skipping proc-macro crate: {crate_name}"); + } + return Compilation::Continue; + } + + // Detect build scripts + let is_build_script = crate_name == "build_script_build" + || crate_name.starts_with("build_script_"); + + let mut findings: Vec = Vec::new(); + let source_map = tcx.sess.source_map(); + + // Walk all local function bodies + for local_def_id in tcx.hir_body_owners() { + let def_id = local_def_id.to_def_id(); + let def_kind = tcx.def_kind(def_id); + + // Only analyze functions and methods + if !matches!(def_kind, DefKind::Fn | DefKind::AssocFn) { + continue; + } + + // Get the function path and span + let fn_path = tcx.def_path_str(def_id); + let fn_span = tcx.def_span(def_id); + let fn_name = tcx.item_name(def_id).to_string(); + let fn_loc = source_map.lookup_char_pos(fn_span.lo()); + let fn_file = match &fn_loc.file.name { + rustc_span::FileName::Real(real) => real + .local_path() + .map(|p| p.display().to_string()) + .unwrap_or_else(|| format!("{real:?}")), + other => format!("{other:?}"), + }; + let fn_line = fn_loc.line; + + // Get the optimized MIR for this function + let mir = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + tcx.optimized_mir(def_id) + })) { + Ok(mir) => mir, + Err(_) => { + if debug { + eprintln!("[capsec-deep] Skipping {fn_path}: MIR unavailable"); + } + continue; + } + }; + + // Walk all basic blocks looking for Call terminators + for (_bb, bb_data) in mir.basic_blocks.iter_enumerated() { + let Some(terminator) = &bb_data.terminator else { + continue; + }; + + if let TerminatorKind::Call { func, .. } = &terminator.kind { + // Extract the callee DefId from the function operand + let Some((callee_def_id, _generic_args)) = func.const_fn_def() else { + continue; // indirect call (fn pointer, vtable) — skip + }; + + let callee_path = tcx.def_path_str(callee_def_id); + + // Get call site location + let call_span = terminator.source_info.span; + let call_loc = source_map.lookup_char_pos(call_span.lo()); + let call_line = call_loc.line; + let call_col = call_loc.col_display; + + // Check 1: Authority pattern match (FS, NET, ENV, PROC) + if let Some(auth) = classify_authority(&callee_path) { + findings.push(DeepFinding { + file: fn_file.clone(), + function: fn_name.clone(), + function_line: fn_line, + call_line, + call_col, + call_text: callee_path.clone(), + category: auth.category.to_string(), + subcategory: auth.subcategory.to_string(), + risk: auth.risk.to_string(), + description: format!("{}: {}", auth.description, callee_path), + is_build_script, + crate_name: crate_name.clone(), + crate_version: crate_version.clone(), + is_deny_violation: false, + is_transitive: false, + }); + } + + // Check 2: FFI — calls to foreign functions + if tcx.is_foreign_item(callee_def_id) { + findings.push(DeepFinding { + file: fn_file.clone(), + function: fn_name.clone(), + function_line: fn_line, + call_line, + call_col, + call_text: callee_path.clone(), + category: "Ffi".to_string(), + subcategory: "ffi_call".to_string(), + risk: "High".to_string(), + description: format!("Calls FFI function {callee_path}()"), + is_build_script, + crate_name: crate_name.clone(), + crate_version: crate_version.clone(), + is_deny_violation: false, + is_transitive: false, + }); + } + } + } + } + + if debug { + eprintln!("[capsec-deep] Found {} findings in {crate_name}", findings.len()); + } + + // Write findings as JSONL + if let Ok(output_path) = std::env::var("CAPSEC_DEEP_OUTPUT") { + if let Ok(mut file) = std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(&output_path) + { + for finding in &findings { + if let Ok(json) = serde_json::to_string(finding) { + let _ = writeln!(file, "{json}"); + } + } + } + } else if debug { + // Print to stderr in debug mode when no output file specified + for finding in &findings { + if let Ok(json) = serde_json::to_string_pretty(finding) { + eprintln!("{json}"); + } + } + } + + Compilation::Continue + } +} + +/// Checks if this invocation is a cargo probe (e.g., `--print=cfg`) rather than +/// an actual compilation. Cargo calls the wrapper with these flags to learn about +/// the target — we must delegate to real rustc for these. +fn is_target_info_query(args: &[String]) -> bool { + args.iter().any(|a| { + a.starts_with("--print") + || a == "-vV" + || a == "--version" + }) +} + +fn main() { + // Install the ICE hook for useful panic reports + rustc_driver::install_ice_hook( + "https://github.com/auths-dev/capsec/issues", + |_| (), + ); + + let mut args: Vec = std::env::args().collect(); + + // When used as RUSTC_WORKSPACE_WRAPPER, cargo invokes us as: + // capsec-driver /path/to/real/rustc + // The second arg is the real rustc path — we need to strip it since + // run_compiler expects args[0] to be the binary name followed by rustc flags. + if args.len() > 1 && (args[1].ends_with("rustc") || args[1].contains("/rustc")) { + args.remove(1); + } + + // For cargo probe calls (--print=cfg, --version, etc.), run as plain rustc + // without our analysis callbacks. This is the pattern used by Miri and Clippy. + if is_target_info_query(&args) { + let mut callbacks = rustc_driver::TimePassesCallbacks::default(); + rustc_driver::run_compiler(&args, &mut callbacks); + return; + } + + // For actual compilations, run with our analysis callbacks + let mut callbacks = CapsecCallbacks; + rustc_driver::run_compiler(&args, &mut callbacks); +} diff --git a/crates/capsec-deep/tests/fixtures/clean.rs b/crates/capsec-deep/tests/fixtures/clean.rs new file mode 100644 index 0000000..b19df81 --- /dev/null +++ b/crates/capsec-deep/tests/fixtures/clean.rs @@ -0,0 +1,7 @@ +fn add(a: i32, b: i32) -> i32 { + a + b +} + +fn main() { + let _ = add(1, 2); +} diff --git a/crates/capsec-deep/tests/fixtures/macro_ffi.rs b/crates/capsec-deep/tests/fixtures/macro_ffi.rs new file mode 100644 index 0000000..89cfbf1 --- /dev/null +++ b/crates/capsec-deep/tests/fixtures/macro_ffi.rs @@ -0,0 +1,20 @@ +// Simulates the pattern in git2/sqlite -sys crates: +// a macro that expands to an FFI call. + +unsafe extern "C" { + fn sqlite3_open(filename: *const u8, db: *mut *mut u8) -> i32; +} + +macro_rules! ffi_call { + ($fn:ident($($arg:expr),*)) => { + unsafe { $fn($($arg),*) } + }; +} + +fn open_database() -> i32 { + ffi_call!(sqlite3_open(std::ptr::null(), std::ptr::null_mut())) +} + +fn main() { + let _ = open_database(); +} diff --git a/crates/capsec-deep/tests/fixtures/simple_fs.rs b/crates/capsec-deep/tests/fixtures/simple_fs.rs new file mode 100644 index 0000000..344fa1e --- /dev/null +++ b/crates/capsec-deep/tests/fixtures/simple_fs.rs @@ -0,0 +1,9 @@ +use std::fs; + +fn load_config() -> Vec { + fs::read("config.toml").unwrap() +} + +fn main() { + let _ = load_config(); +} diff --git a/crates/cargo-capsec/src/cli.rs b/crates/cargo-capsec/src/cli.rs index ad6b26d..9b91f65 100644 --- a/crates/cargo-capsec/src/cli.rs +++ b/crates/cargo-capsec/src/cli.rs @@ -70,6 +70,12 @@ pub struct AuditArgs { #[arg(long, default_value_t = 1)] pub dep_depth: usize, + /// Use MIR-based deep analysis (requires nightly toolchain + capsec-driver). + /// Catches macro-expanded FFI, trait dispatch, and generic instantiation + /// that syntactic analysis misses. + #[arg(long)] + pub deep: bool, + /// Minimum risk level to report #[arg(long, default_value = "low", value_parser = ["low", "medium", "high", "critical"])] pub min_risk: String, diff --git a/crates/cargo-capsec/src/detector.rs b/crates/cargo-capsec/src/detector.rs index 2994030..ae598de 100644 --- a/crates/cargo-capsec/src/detector.rs +++ b/crates/cargo-capsec/src/detector.rs @@ -34,7 +34,7 @@ use std::collections::{HashMap, HashSet}; /// The detector deduplicates findings by `(file, function, call_line, call_col)`, /// so each unique call site appears at most once even if multiple import paths /// could match it. -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, serde::Deserialize)] pub struct Finding { /// Source file path. pub file: String, diff --git a/crates/cargo-capsec/src/main.rs b/crates/cargo-capsec/src/main.rs index a260021..07715c3 100644 --- a/crates/cargo-capsec/src/main.rs +++ b/crates/cargo-capsec/src/main.rs @@ -212,6 +212,160 @@ fn run_audit(args: AuditArgs) { } } + // ── Deep MIR analysis (runs before Phase 2 so findings feed into export maps) ── + if args.deep { + let output_path = + std::env::temp_dir().join(format!("capsec-deep-{}.jsonl", std::process::id())); + + let driver_available = capsec_std::process::command("which", &spawn_cap) + .ok() + .and_then(|mut cmd| cmd.arg("capsec-driver").output().ok()) + .map(|o| o.status.success()) + .unwrap_or(false); + + if !driver_available { + eprintln!("Error: --deep requires capsec-driver (MIR analysis driver)."); + eprintln!("Install with: cd crates/capsec-deep && cargo install --path ."); + eprintln!("Continuing with syntactic-only analysis..."); + } else { + let deep_target_dir = workspace_root.join("target/capsec-deep"); + let toolchain = { + let pinned = "nightly-2026-02-17"; + let has_pinned = capsec_std::process::command("rustup", &spawn_cap) + .ok() + .and_then(|mut cmd| { + cmd.arg("run") + .arg(pinned) + .arg("rustc") + .arg("--version") + .output() + .ok() + }) + .map(|o| o.status.success()) + .unwrap_or(false); + if has_pinned { pinned } else { "nightly" } + }; + + let _ = std::fs::remove_dir_all(&deep_target_dir); + + let deep_result = capsec_std::process::command("cargo", &spawn_cap) + .ok() + .and_then(|mut cmd| { + cmd.arg("check") + .current_dir(&path_arg) + .env("RUSTC_WRAPPER", "capsec-driver") + .env("CAPSEC_DEEP_OUTPUT", &output_path) + .env("CAPSEC_CRATE_VERSION", "0.0.0") + .env("CARGO_TARGET_DIR", &deep_target_dir) + .env("RUSTUP_TOOLCHAIN", toolchain) + .output() + .ok() + }); + + // Build name/version lookup for patching MIR findings + let crate_lookup: HashMap = workspace_crates + .iter() + .chain(dep_crates.iter()) + .map(|c| { + ( + discovery::normalize_crate_name(&c.name), + (c.name.clone(), c.version.clone()), + ) + }) + .collect(); + + let mut mir_findings: Vec = Vec::new(); + + match deep_result { + Some(output) if output.status.success() || output_path.exists() => { + if let Ok(contents) = capsec_std::fs::read_to_string(&output_path, &fs_read) + { + for line in contents.lines() { + if line.trim().is_empty() { + continue; + } + match serde_json::from_str::(line) { + Ok(mut finding) => { + let normalized = + discovery::normalize_crate_name(&finding.crate_name); + if let Some((cargo_name, ver)) = + crate_lookup.get(&normalized) + { + finding.crate_name = cargo_name.clone(); + if finding.crate_version == "0.0.0" { + finding.crate_version = ver.clone(); + } + } + mir_findings.push(finding); + } + Err(e) => { + eprintln!("Warning: Failed to parse deep finding: {e}"); + } + } + } + } + let _ = std::fs::remove_file(&output_path); + } + Some(output) => { + let stderr = String::from_utf8_lossy(&output.stderr); + eprintln!("Warning: Deep analysis failed (cargo check returned non-zero)."); + for line in stderr + .lines() + .filter(|l| l.contains("error") || l.contains("Error")) + .take(5) + { + eprintln!(" {line}"); + } + if stderr.contains("incompatible version of rustc") { + eprintln!( + " Hint: try `rm -rf target/capsec-deep` to clear stale artifacts." + ); + } + eprintln!("Continuing with syntactic-only findings."); + } + None => { + eprintln!("Warning: Could not invoke cargo check for deep analysis."); + eprintln!("Continuing with syntactic-only findings."); + } + } + + if !mir_findings.is_empty() { + eprintln!( + "Deep analysis: {} MIR-level findings. Building export maps...", + mir_findings.len() + ); + + // Build export maps from MIR findings so they propagate to Phase 2 + let mut mir_by_crate: HashMap> = HashMap::new(); + for f in &mir_findings { + mir_by_crate + .entry(f.crate_name.clone()) + .or_default() + .push(f.clone()); + } + for (crate_name, findings) in &mir_by_crate { + let normalized = discovery::normalize_crate_name(crate_name); + let src_dir = dep_crates + .iter() + .chain(workspace_crates.iter()) + .find(|c| discovery::normalize_crate_name(&c.name) == normalized) + .map(|c| c.source_dir.clone()) + .unwrap_or_default(); + let mir_emap = export_map::build_export_map( + &normalized, + &findings[0].crate_version, + findings, + &src_dir, + ); + export_maps.push(mir_emap); + } + + // Add MIR findings to the main collection + all_findings.extend(mir_findings); + } + } + } + // Phase 2: Scan workspace crates with dependency export maps injected. // Process in topological order so workspace-to-workspace findings propagate // (e.g., radicle-cli depends on radicle → radicle scanned first). @@ -357,6 +511,20 @@ fn run_audit(args: AuditArgs) { } } + // Dedup: if both syntactic and MIR found the same call site, keep one + { + let mut seen = std::collections::HashSet::new(); + all_findings.retain(|f| { + seen.insert(( + f.file.clone(), + f.function.clone(), + f.call_line, + f.call_col, + f.category.label().to_string(), + )) + }); + } + // Normalize file paths to workspace-relative for portable baselines and output for f in &mut all_findings { f.file = make_relative(&f.file, &workspace_root); diff --git a/crates/cargo-capsec/tests/integration.rs b/crates/cargo-capsec/tests/integration.rs index b98d349..2b80bdd 100644 --- a/crates/cargo-capsec/tests/integration.rs +++ b/crates/cargo-capsec/tests/integration.rs @@ -579,3 +579,81 @@ fn workspace_to_workspace_propagation() { "app::init should get ws-to-ws FS finding from core_lib::read_config" ); } + +#[test] +fn mir_findings_feed_into_cross_crate_propagation() { + // Simulates the unified chain: MIR finds FFI in a dep, that finding becomes + // an export map entry, and workspace code calling the dep gets a cross-crate finding. + // + // This is the flow: + // 1. MIR driver scans "ffi_dep" → finds open_db() calls sqlite3_open (FFI) + // 2. That finding is built into an export map for ffi_dep + // 3. Workspace "app" calls ffi_dep::open_db() + // 4. The export map entry matches → app::init gets a cross-crate FFI finding + + // Step 1: Simulate MIR findings from a dependency (as if read from JSONL) + let mir_finding = cargo_capsec::detector::Finding { + file: "src/lib.rs".to_string(), + function: "open_db".to_string(), + function_line: 5, + call_line: 6, + call_col: 5, + call_text: "sqlite3_open".to_string(), + category: Category::Ffi, + subcategory: "ffi_call".to_string(), + risk: cargo_capsec::authorities::Risk::High, + description: "Calls FFI function sqlite3_open()".to_string(), + is_build_script: false, + crate_name: "ffi_dep".to_string(), + crate_version: "1.0.0".to_string(), + is_deny_violation: false, + is_transitive: false, + }; + + // Step 2: Build export map from the MIR finding (same as main.rs does) + let mir_export_map = build_export_map("ffi_dep", "1.0.0", &[mir_finding], Path::new("src")); + assert!( + !mir_export_map.exports.is_empty(), + "MIR finding should produce an export map entry" + ); + + // Step 3: Convert to custom authorities and inject into detector + let customs = export_map_to_custom_authorities(&[mir_export_map]); + assert!( + !customs.is_empty(), + "Export map should produce custom authorities" + ); + + let mut det = Detector::new(); + det.add_custom_authorities(&customs); + + // Step 4: Scan workspace code that calls ffi_dep::open_db() + let app_source = r#" + pub fn init() -> i32 { + ffi_dep::open_db() + } + "#; + let app_parsed = parse_source(app_source, "src/lib.rs").unwrap(); + let app_findings = det.analyse(&app_parsed, "app", "0.1.0", &[]); + + // The unified chain: app::init → ffi_dep::open_db → sqlite3_open (FFI) + let init_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "init") + .collect(); + assert!( + !init_findings.is_empty(), + "app::init should get cross-crate FFI finding from MIR-discovered ffi_dep::open_db. \ + This proves MIR findings feed into the export map system and propagate transitively. \ + Got: {app_findings:?}" + ); + assert!( + init_findings[0].description.contains("Cross-crate"), + "Finding should be marked as cross-crate" + ); + assert_eq!( + init_findings[0].category, + Category::Ffi, + "Finding should carry the FFI category from the MIR-discovered dep" + ); +}