diff --git a/crates/cargo-capsec/src/config.rs b/crates/cargo-capsec/src/config.rs index 88d6016..8bd2400 100644 --- a/crates/cargo-capsec/src/config.rs +++ b/crates/cargo-capsec/src/config.rs @@ -277,26 +277,58 @@ pub fn resolve_classification( cargo_toml_classification } +/// Pre-compiled exclude patterns for efficient repeated matching. +#[allow(dead_code)] +pub struct CompiledExcludes { + set: globset::GlobSet, +} + +#[allow(dead_code)] +impl CompiledExcludes { + /// Compiles exclude patterns once. Invalid patterns are silently skipped. + pub fn new(patterns: &[String]) -> Self { + let mut builder = globset::GlobSetBuilder::new(); + for p in patterns { + if let Ok(glob) = globset::Glob::new(p) { + builder.add(glob); + } + } + Self { + set: builder + .build() + .unwrap_or_else(|_| globset::GlobSetBuilder::new().build().unwrap()), + } + } + + /// Returns `true` if a file path matches any compiled exclude pattern. + pub fn is_excluded(&self, path: &Path) -> bool { + let path_str = path.display().to_string(); + self.set.is_match(&path_str) + || path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|name| self.set.is_match(name)) + } +} + /// Returns `true` if a file path matches any `[analysis].exclude` glob pattern. /// /// Uses the [`globset`] crate for correct glob semantics (supports `**`, `*`, /// `?`, and character classes). pub fn should_exclude(path: &Path, excludes: &[String]) -> bool { let path_str = path.display().to_string(); - excludes.iter().any(|pattern| { - match globset::Glob::new(pattern) { + excludes + .iter() + .any(|pattern| match globset::Glob::new(pattern) { Ok(glob) => match glob.compile_matcher().is_match(&path_str) { true => true, - false => { - // Also try matching against just the file name for simple patterns - path.file_name() - .and_then(|n| n.to_str()) - .is_some_and(|name| glob.compile_matcher().is_match(name)) - } + false => path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|name| glob.compile_matcher().is_match(name)), }, Err(_) => path_str.contains(pattern), - } - }) + }) } #[cfg(test)] diff --git a/crates/cargo-capsec/src/deep.rs b/crates/cargo-capsec/src/deep.rs new file mode 100644 index 0000000..f81e5fb --- /dev/null +++ b/crates/cargo-capsec/src/deep.rs @@ -0,0 +1,231 @@ +//! Deep MIR analysis integration. +//! +//! Invokes `capsec-driver` as `RUSTC_WRAPPER` to analyze all crates via MIR, +//! then reads findings from JSONL and builds export maps for cross-crate propagation. + +use crate::detector::Finding; +use crate::discovery::{self, CrateInfo}; +use crate::export_map::{self, CrateExportMap}; +use std::collections::HashMap; +use std::path::Path; + +/// Pinned nightly date for capsec-driver. Must match `crates/capsec-deep/rust-toolchain.toml`. +const PINNED_NIGHTLY: &str = "nightly-2026-02-17"; + +/// Result of running deep MIR analysis. +pub struct DeepResult { + /// Findings from the MIR driver, with crate names/versions patched to match Cargo metadata. + pub findings: Vec, + /// Export maps built from MIR findings, ready to inject into Phase 2. + pub export_maps: Vec, + /// Warnings encountered during analysis (driver missing, parse errors, etc.). + pub warnings: Vec, +} + +/// Runs the MIR-based deep analysis driver on the target project. +/// +/// Invokes `capsec-driver` via `RUSTC_WRAPPER` + `cargo check`, reads JSONL +/// findings, patches crate names/versions, and builds export maps. +/// +/// Returns an empty `DeepResult` if the driver is not available or fails. +/// Warnings are collected in `DeepResult::warnings` rather than printed directly. +pub fn run_deep_analysis( + path: &Path, + workspace_root: &Path, + workspace_crates: &[CrateInfo], + dep_crates: &[CrateInfo], + fs_read: &impl capsec_core::cap_provider::CapProvider, + spawn_cap: &impl capsec_core::cap_provider::CapProvider, +) -> DeepResult { + let mut warnings: Vec = Vec::new(); + let output_path = + std::env::temp_dir().join(format!("capsec-deep-{}.jsonl", std::process::id())); + + // Check if capsec-driver is available by trying to run it + let driver_available = capsec_std::process::command("capsec-driver", spawn_cap) + .ok() + .and_then(|mut cmd| cmd.arg("--version").output().ok()) + .map(|o| o.status.success()) + .unwrap_or(false); + + if !driver_available { + warnings.push( + "--deep requires capsec-driver. Install with: cd crates/capsec-deep && cargo install --path .".to_string() + ); + return DeepResult { + findings: Vec::new(), + export_maps: Vec::new(), + warnings, + }; + } + + let deep_target_dir = workspace_root.join("target/capsec-deep"); + let toolchain = detect_nightly_toolchain(spawn_cap); + + // Clean to force full rebuild (incremental cache prevents driver from running) + let _ = std::fs::remove_dir_all(&deep_target_dir); + + let deep_result = capsec_std::process::command("cargo", spawn_cap) + .ok() + .and_then(|mut cmd| { + cmd.arg("check") + .current_dir(path) + .env("RUSTC_WRAPPER", "capsec-driver") + .env("CAPSEC_DEEP_OUTPUT", &output_path) + .env("CAPSEC_CRATE_VERSION", "0.0.0") + .env("CARGO_TARGET_DIR", &deep_target_dir) + .env("RUSTUP_TOOLCHAIN", toolchain) + .output() + .ok() + }); + + // Build name/version lookup for patching MIR findings + let crate_lookup: HashMap = workspace_crates + .iter() + .chain(dep_crates.iter()) + .map(|c| { + ( + discovery::normalize_crate_name(&c.name), + (c.name.clone(), c.version.clone()), + ) + }) + .collect(); + + let mir_findings = match deep_result { + Some(output) if output.status.success() || output_path.exists() => { + let findings = + parse_findings_jsonl(&output_path, &crate_lookup, fs_read, &mut warnings); + let _ = std::fs::remove_file(&output_path); + findings + } + Some(output) => { + let stderr = String::from_utf8_lossy(&output.stderr); + let mut msg = "Deep analysis failed (cargo check returned non-zero).".to_string(); + for line in stderr + .lines() + .filter(|l| l.contains("error") || l.contains("Error")) + .take(5) + { + msg.push_str(&format!("\n {line}")); + } + if stderr.contains("incompatible version of rustc") { + msg.push_str("\n Hint: try `rm -rf target/capsec-deep` to clear stale artifacts."); + } + warnings.push(msg); + Vec::new() + } + None => { + warnings.push("Could not invoke cargo check for deep analysis.".to_string()); + Vec::new() + } + }; + + // Build export maps from MIR findings + let export_maps = build_mir_export_maps(&mir_findings, workspace_crates, dep_crates); + + DeepResult { + findings: mir_findings, + export_maps, + warnings, + } +} + +/// Parses JSONL findings from the MIR driver output file. +/// Patches crate names (rustc → Cargo) and versions (0.0.0 → real) using the lookup. +fn parse_findings_jsonl( + output_path: &Path, + crate_lookup: &HashMap, + fs_read: &impl capsec_core::cap_provider::CapProvider, + warnings: &mut Vec, +) -> Vec { + let mut findings = Vec::new(); + let Ok(contents) = capsec_std::fs::read_to_string(output_path, fs_read) else { + return findings; + }; + for line in contents.lines() { + if line.trim().is_empty() { + continue; + } + match serde_json::from_str::(line) { + Ok(mut finding) => { + let normalized = discovery::normalize_crate_name(&finding.crate_name); + if let Some((cargo_name, ver)) = crate_lookup.get(&normalized) { + finding.crate_name = cargo_name.clone(); + if finding.crate_version == "0.0.0" { + finding.crate_version = ver.clone(); + } + } + findings.push(finding); + } + Err(e) => { + warnings.push(format!("Failed to parse deep finding: {e}")); + } + } + } + findings +} + +/// Builds export maps from MIR findings, grouped by crate. +fn build_mir_export_maps( + findings: &[Finding], + workspace_crates: &[CrateInfo], + dep_crates: &[CrateInfo], +) -> Vec { + if findings.is_empty() { + return Vec::new(); + } + + // Group findings by crate name + let mut by_crate: HashMap> = HashMap::new(); + for f in findings { + by_crate.entry(f.crate_name.clone()).or_default().push(f); + } + + let all_crates: Vec<&CrateInfo> = dep_crates.iter().chain(workspace_crates.iter()).collect(); + + let mut export_maps = Vec::new(); + for (crate_name, crate_findings) in &by_crate { + let normalized = discovery::normalize_crate_name(crate_name); + let src_dir = all_crates + .iter() + .find(|c| discovery::normalize_crate_name(&c.name) == normalized) + .map(|c| &c.source_dir); + + let Some(src_dir) = src_dir else { + eprintln!( + "Warning: MIR findings for unknown crate '{crate_name}', skipping export map" + ); + continue; + }; + + // Collect owned findings for build_export_map (which takes &[Finding]) + let owned: Vec = crate_findings.iter().map(|f| (*f).clone()).collect(); + let mir_emap = + export_map::build_export_map(&normalized, &owned[0].crate_version, &owned, src_dir); + export_maps.push(mir_emap); + } + export_maps +} + +/// Detects the nightly toolchain to use for the MIR driver. +fn detect_nightly_toolchain( + spawn_cap: &impl capsec_core::cap_provider::CapProvider, +) -> &'static str { + let has_pinned = capsec_std::process::command("rustup", spawn_cap) + .ok() + .and_then(|mut cmd| { + cmd.arg("run") + .arg(PINNED_NIGHTLY) + .arg("rustc") + .arg("--version") + .output() + .ok() + }) + .map(|o| o.status.success()) + .unwrap_or(false); + if has_pinned { + PINNED_NIGHTLY + } else { + "nightly" + } +} diff --git a/crates/cargo-capsec/src/lib.rs b/crates/cargo-capsec/src/lib.rs index 0f531a7..84e8f21 100644 --- a/crates/cargo-capsec/src/lib.rs +++ b/crates/cargo-capsec/src/lib.rs @@ -46,6 +46,7 @@ pub mod authorities; pub mod baseline; pub mod config; pub mod cross_crate; +pub mod deep; pub mod detector; pub mod discovery; pub mod export_map; diff --git a/crates/cargo-capsec/src/main.rs b/crates/cargo-capsec/src/main.rs index 07715c3..a9b9bbf 100644 --- a/crates/cargo-capsec/src/main.rs +++ b/crates/cargo-capsec/src/main.rs @@ -3,6 +3,7 @@ mod baseline; mod cli; mod config; mod cross_crate; +mod deep; mod detector; mod discovery; mod export_map; @@ -214,156 +215,25 @@ fn run_audit(args: AuditArgs) { // ── Deep MIR analysis (runs before Phase 2 so findings feed into export maps) ── if args.deep { - let output_path = - std::env::temp_dir().join(format!("capsec-deep-{}.jsonl", std::process::id())); - - let driver_available = capsec_std::process::command("which", &spawn_cap) - .ok() - .and_then(|mut cmd| cmd.arg("capsec-driver").output().ok()) - .map(|o| o.status.success()) - .unwrap_or(false); - - if !driver_available { - eprintln!("Error: --deep requires capsec-driver (MIR analysis driver)."); - eprintln!("Install with: cd crates/capsec-deep && cargo install --path ."); - eprintln!("Continuing with syntactic-only analysis..."); - } else { - let deep_target_dir = workspace_root.join("target/capsec-deep"); - let toolchain = { - let pinned = "nightly-2026-02-17"; - let has_pinned = capsec_std::process::command("rustup", &spawn_cap) - .ok() - .and_then(|mut cmd| { - cmd.arg("run") - .arg(pinned) - .arg("rustc") - .arg("--version") - .output() - .ok() - }) - .map(|o| o.status.success()) - .unwrap_or(false); - if has_pinned { pinned } else { "nightly" } - }; - - let _ = std::fs::remove_dir_all(&deep_target_dir); - - let deep_result = capsec_std::process::command("cargo", &spawn_cap) - .ok() - .and_then(|mut cmd| { - cmd.arg("check") - .current_dir(&path_arg) - .env("RUSTC_WRAPPER", "capsec-driver") - .env("CAPSEC_DEEP_OUTPUT", &output_path) - .env("CAPSEC_CRATE_VERSION", "0.0.0") - .env("CARGO_TARGET_DIR", &deep_target_dir) - .env("RUSTUP_TOOLCHAIN", toolchain) - .output() - .ok() - }); - - // Build name/version lookup for patching MIR findings - let crate_lookup: HashMap = workspace_crates - .iter() - .chain(dep_crates.iter()) - .map(|c| { - ( - discovery::normalize_crate_name(&c.name), - (c.name.clone(), c.version.clone()), - ) - }) - .collect(); - - let mut mir_findings: Vec = Vec::new(); - - match deep_result { - Some(output) if output.status.success() || output_path.exists() => { - if let Ok(contents) = capsec_std::fs::read_to_string(&output_path, &fs_read) - { - for line in contents.lines() { - if line.trim().is_empty() { - continue; - } - match serde_json::from_str::(line) { - Ok(mut finding) => { - let normalized = - discovery::normalize_crate_name(&finding.crate_name); - if let Some((cargo_name, ver)) = - crate_lookup.get(&normalized) - { - finding.crate_name = cargo_name.clone(); - if finding.crate_version == "0.0.0" { - finding.crate_version = ver.clone(); - } - } - mir_findings.push(finding); - } - Err(e) => { - eprintln!("Warning: Failed to parse deep finding: {e}"); - } - } - } - } - let _ = std::fs::remove_file(&output_path); - } - Some(output) => { - let stderr = String::from_utf8_lossy(&output.stderr); - eprintln!("Warning: Deep analysis failed (cargo check returned non-zero)."); - for line in stderr - .lines() - .filter(|l| l.contains("error") || l.contains("Error")) - .take(5) - { - eprintln!(" {line}"); - } - if stderr.contains("incompatible version of rustc") { - eprintln!( - " Hint: try `rm -rf target/capsec-deep` to clear stale artifacts." - ); - } - eprintln!("Continuing with syntactic-only findings."); - } - None => { - eprintln!("Warning: Could not invoke cargo check for deep analysis."); - eprintln!("Continuing with syntactic-only findings."); - } - } - - if !mir_findings.is_empty() { - eprintln!( - "Deep analysis: {} MIR-level findings. Building export maps...", - mir_findings.len() - ); - - // Build export maps from MIR findings so they propagate to Phase 2 - let mut mir_by_crate: HashMap> = HashMap::new(); - for f in &mir_findings { - mir_by_crate - .entry(f.crate_name.clone()) - .or_default() - .push(f.clone()); - } - for (crate_name, findings) in &mir_by_crate { - let normalized = discovery::normalize_crate_name(crate_name); - let src_dir = dep_crates - .iter() - .chain(workspace_crates.iter()) - .find(|c| discovery::normalize_crate_name(&c.name) == normalized) - .map(|c| c.source_dir.clone()) - .unwrap_or_default(); - let mir_emap = export_map::build_export_map( - &normalized, - &findings[0].crate_version, - findings, - &src_dir, - ); - export_maps.push(mir_emap); - } - - // Add MIR findings to the main collection - all_findings.extend(mir_findings); - } + let deep_result = deep::run_deep_analysis( + &path_arg, + &workspace_root, + &workspace_crates, + &dep_crates, + &fs_read, + &spawn_cap, + ); + for warning in &deep_result.warnings { + eprintln!("Warning: {warning}"); } + if !deep_result.findings.is_empty() { + eprintln!( + "Deep analysis: {} MIR-level findings. Building export maps...", + deep_result.findings.len() + ); + } + export_maps.extend(deep_result.export_maps); + all_findings.extend(deep_result.findings); } // Phase 2: Scan workspace crates with dependency export maps injected. @@ -854,15 +724,8 @@ fn run_badge(args: BadgeArgs) { } fn make_relative(file_path: &str, workspace_root: &Path) -> String { - let root_str = workspace_root.to_string_lossy(); - let root_prefix = if root_str.ends_with('/') { - root_str.to_string() - } else { - format!("{root_str}/") - }; - if file_path.starts_with(&root_prefix) { - file_path[root_prefix.len()..].to_string() - } else { - file_path.to_string() - } + Path::new(file_path) + .strip_prefix(workspace_root) + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|_| file_path.to_string()) } diff --git a/crates/cargo-capsec/src/reporter.rs b/crates/cargo-capsec/src/reporter.rs index d69bacf..d03f94d 100644 --- a/crates/cargo-capsec/src/reporter.rs +++ b/crates/cargo-capsec/src/reporter.rs @@ -543,17 +543,10 @@ fn risk_to_security_severity(risk: Risk) -> String { /// Strips `workspace_root` prefix from a file path to produce a repo-relative URI. fn make_relative(file_path: &str, workspace_root: &Path) -> String { - let root_str = workspace_root.to_string_lossy(); - let root_prefix = if root_str.ends_with('/') { - root_str.to_string() - } else { - format!("{root_str}/") - }; - if file_path.starts_with(&root_prefix) { - file_path[root_prefix.len()..].to_string() - } else { - file_path.to_string() - } + Path::new(file_path) + .strip_prefix(workspace_root) + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_else(|_| file_path.to_string()) } /// Maps risk to SARIF precision (how confident we are in the finding).