diff --git a/Cargo.lock b/Cargo.lock index bc29b44..2d01fc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,6 +185,7 @@ dependencies = [ "globset", "insta", "proc-macro2", + "rayon", "serde", "serde_json", "syn", @@ -266,6 +267,37 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -594,6 +626,26 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex-automata" version = "0.4.14" diff --git a/Cargo.toml b/Cargo.toml index e85c3d0..37fe5eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,9 @@ toml = "1.0.7" # Output colored = "3.1.1" +# Parallelism +rayon = "1" + # Testing trybuild = "1" insta = { version = "1", features = ["yaml"] } diff --git a/README.md b/README.md index 4814fd4..fd26c3d 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,27 @@ cargo install --path crates/cargo-capsec ### Run ```bash +# Scan workspace crates only (fast, default) cargo capsec audit + +# Scan workspace + dependencies — cross-crate propagation shows +# which of YOUR functions inherit authority from dependencies +cargo capsec audit --include-deps + +# Control dependency depth (default: 1 = direct deps only) +cargo capsec audit --include-deps --dep-depth 3 # up to 3 hops +cargo capsec audit --include-deps --dep-depth 0 # unlimited + +# Supply-chain view — only dependency findings +cargo capsec audit --deps-only ``` ``` my-app v0.1.0 ───────────── FS src/config.rs:8:5 fs::read_to_string load_config() - NET src/api.rs:15:9 TcpStream::connect fetch_data() + NET src/api.rs:15:9 reqwest::get fetch_data() + ↳ Cross-crate: reqwest::get() → TcpStream::connect [NET] PROC src/deploy.rs:42:17 Command::new run_migration() Summary diff --git a/crates/cargo-capsec/Cargo.toml b/crates/cargo-capsec/Cargo.toml index e12018a..de52a55 100644 --- a/crates/cargo-capsec/Cargo.toml +++ b/crates/cargo-capsec/Cargo.toml @@ -12,6 +12,10 @@ categories = ["development-tools", "command-line-utilities"] name = "cargo-capsec" path = "src/main.rs" +[features] +default = ["parallel"] +parallel = ["dep:rayon"] + [dependencies] clap.workspace = true syn.workspace = true @@ -22,6 +26,7 @@ serde.workspace = true serde_json.workspace = true toml.workspace = true colored.workspace = true +rayon = { workspace = true, optional = true } capsec-core.workspace = true capsec-std.workspace = true diff --git a/crates/cargo-capsec/src/authorities.rs b/crates/cargo-capsec/src/authorities.rs index 27aee99..ed7d4f3 100644 --- a/crates/cargo-capsec/src/authorities.rs +++ b/crates/cargo-capsec/src/authorities.rs @@ -8,7 +8,7 @@ //! The registry is compiled into the binary via [`build_registry`]. Users can extend it //! at runtime with custom patterns loaded from `.capsec.toml` (see [`CustomAuthority`]). -use serde::Serialize; +use serde::{Deserialize, Serialize}; /// The kind of ambient authority a call exercises. /// @@ -24,7 +24,7 @@ use serde::Serialize; /// | `Env` | Environment variable access | Yellow | /// | `Process` | Subprocess spawning (`Command::new`) | Magenta | /// | `Ffi` | Foreign function interface (`extern` blocks) | Cyan | -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] #[non_exhaustive] pub enum Category { /// Filesystem access: reads, writes, deletes, directory operations. @@ -65,7 +65,7 @@ impl Category { /// | `Medium` | Can read data or create resources | `fs::read`, `env::var`, `File::open` | /// | `High` | Can write, delete, or open network connections | `fs::write`, `TcpStream::connect` | /// | `Critical` | Can destroy data or execute arbitrary code | `remove_dir_all`, `Command::new` | -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] #[non_exhaustive] pub enum Risk { /// Read-only metadata or low-impact queries. diff --git a/crates/cargo-capsec/src/cli.rs b/crates/cargo-capsec/src/cli.rs index 1de1d34..ad6b26d 100644 --- a/crates/cargo-capsec/src/cli.rs +++ b/crates/cargo-capsec/src/cli.rs @@ -55,10 +55,21 @@ pub struct AuditArgs { #[arg(short, long, default_value = "text", value_parser = ["text", "json", "sarif"])] pub format: String, - /// Also scan dependency source code from cargo cache + /// Also scan dependency source code from cargo cache. + /// With cross-crate propagation, findings from dependencies are + /// transitively attributed to workspace functions that call them. #[arg(long)] pub include_deps: bool, + /// Only scan dependencies, skip workspace crates (supply-chain view) + #[arg(long, conflicts_with = "include_deps")] + pub deps_only: bool, + + /// Maximum dependency depth to scan (0 = unlimited, default: 1 = direct deps only). + /// Only meaningful with --include-deps or --deps-only. + #[arg(long, default_value_t = 1)] + pub dep_depth: usize, + /// Minimum risk level to report #[arg(long, default_value = "low", value_parser = ["low", "medium", "high", "critical"])] pub min_risk: String, diff --git a/crates/cargo-capsec/src/cross_crate.rs b/crates/cargo-capsec/src/cross_crate.rs new file mode 100644 index 0000000..78ccab2 --- /dev/null +++ b/crates/cargo-capsec/src/cross_crate.rs @@ -0,0 +1,174 @@ +//! Cross-crate authority propagation. +//! +//! Converts export maps from dependency crates into [`CustomAuthority`] values +//! that can be injected into the detector. This bridges dependency analysis +//! (Phase 1) with workspace crate analysis (Phase 2). + +use crate::authorities::CustomAuthority; +use crate::export_map::CrateExportMap; + +/// Converts a collection of export maps into [`CustomAuthority`] values for +/// injection into the detector. +/// +/// For each entry in each export map, creates a `CustomAuthority` with the +/// module-qualified path split into segments. The suffix matching in +/// [`Detector::matches_custom_path`](crate::detector) handles both +/// fully-qualified calls and imported calls. +#[must_use] +pub fn export_map_to_custom_authorities(export_maps: &[CrateExportMap]) -> Vec { + let mut customs = Vec::new(); + + for map in export_maps { + for (key, authorities) in &map.exports { + let path: Vec = key.split("::").map(String::from).collect(); + + for auth in authorities { + customs.push(CustomAuthority { + path: path.clone(), + category: auth.category.clone(), + risk: auth.risk, + description: format!( + "Cross-crate: {}() → {} [{}]", + key, + auth.leaf_call, + auth.category.label(), + ), + }); + } + } + } + + customs +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::authorities::{Category, Risk}; + use crate::export_map::{CrateExportMap, ExportedAuthority}; + use std::collections::HashMap; + + fn make_export_map( + crate_name: &str, + entries: Vec<(&str, Category, Risk, &str)>, + ) -> CrateExportMap { + let mut exports = HashMap::new(); + for (key, category, risk, leaf_call) in entries { + exports + .entry(key.to_string()) + .or_insert_with(Vec::new) + .push(ExportedAuthority { + category, + risk, + leaf_call: leaf_call.to_string(), + is_transitive: false, + }); + } + CrateExportMap { + crate_name: crate_name.to_string(), + crate_version: "1.0.0".to_string(), + exports, + } + } + + #[test] + fn single_export_map() { + let map = make_export_map( + "reqwest", + vec![( + "reqwest::get", + Category::Net, + Risk::High, + "TcpStream::connect", + )], + ); + let customs = export_map_to_custom_authorities(&[map]); + assert_eq!(customs.len(), 1); + assert_eq!(customs[0].path, vec!["reqwest", "get"]); + assert_eq!(customs[0].category, Category::Net); + assert!(customs[0].description.contains("Cross-crate")); + assert!(customs[0].description.contains("reqwest::get")); + } + + #[test] + fn multiple_exports_per_crate() { + let map = make_export_map( + "tokio", + vec![ + ( + "tokio::fs::read", + Category::Fs, + Risk::Medium, + "std::fs::read", + ), + ( + "tokio::net::connect", + Category::Net, + Risk::High, + "TcpStream::connect", + ), + ], + ); + let customs = export_map_to_custom_authorities(&[map]); + assert_eq!(customs.len(), 2); + } + + #[test] + fn multiple_crates() { + let map1 = make_export_map( + "reqwest", + vec![( + "reqwest::get", + Category::Net, + Risk::High, + "TcpStream::connect", + )], + ); + let map2 = make_export_map( + "rusqlite", + vec![( + "rusqlite::execute", + Category::Ffi, + Risk::High, + "extern sqlite3_exec", + )], + ); + let customs = export_map_to_custom_authorities(&[map1, map2]); + assert_eq!(customs.len(), 2); + } + + #[test] + fn empty_export_maps() { + let customs = export_map_to_custom_authorities(&[]); + assert!(customs.is_empty()); + } + + #[test] + fn empty_exports_in_map() { + let map = CrateExportMap { + crate_name: "empty".to_string(), + crate_version: "1.0.0".to_string(), + exports: HashMap::new(), + }; + let customs = export_map_to_custom_authorities(&[map]); + assert!(customs.is_empty()); + } + + #[test] + fn path_segments_split_correctly() { + let map = make_export_map( + "reqwest", + vec![( + "reqwest::blocking::client::get", + Category::Net, + Risk::High, + "connect", + )], + ); + let customs = export_map_to_custom_authorities(&[map]); + assert_eq!( + customs[0].path, + vec!["reqwest", "blocking", "client", "get"] + ); + } +} diff --git a/crates/cargo-capsec/src/detector.rs b/crates/cargo-capsec/src/detector.rs index 2a0d655..2994030 100644 --- a/crates/cargo-capsec/src/detector.rs +++ b/crates/cargo-capsec/src/detector.rs @@ -144,6 +144,13 @@ impl Detector { let mut findings = Vec::new(); let (import_map, glob_prefixes) = build_import_map(&file.use_imports); + // Build file-scoped set of extern function names for FFI call-site detection + let extern_fn_names: HashSet<&str> = file + .extern_blocks + .iter() + .flat_map(|ext| ext.functions.iter().map(String::as_str)) + .collect(); + for func in &file.functions { let effective_deny = merge_deny(&func.deny_categories, crate_deny); @@ -251,6 +258,46 @@ impl Detector { } } } + + // Pass 3: FFI call-site detection. + // Flag calls TO extern-declared functions (e.g., git_repository_open, + // sqlite3_exec). The last segment of the expanded path is checked against + // all extern function names from this file. This catches both direct calls + // (sqlite3_exec()) and qualified calls (raw::git_repository_open()). + if !extern_fn_names.is_empty() { + for (call, expanded) in func.calls.iter().zip(expanded_calls.iter()) { + if let Some(last_seg) = expanded.last() + && extern_fn_names.contains(last_seg.as_str()) + { + let deny_violation = is_category_denied(&effective_deny, &Category::Ffi); + findings.push(Finding { + file: file.path.clone(), + function: func.name.clone(), + function_line: func.line, + call_line: call.line, + call_col: call.col, + call_text: expanded.join("::"), + category: Category::Ffi, + subcategory: "ffi_call".to_string(), + risk: if deny_violation { + Risk::Critical + } else { + Risk::High + }, + description: if deny_violation { + format!("DENY VIOLATION: Calls FFI function {}()", last_seg) + } else { + format!("Calls FFI function {}()", last_seg) + }, + is_build_script: func.is_build_script, + crate_name: crate_name.to_string(), + crate_version: crate_version.to_string(), + is_deny_violation: deny_violation, + is_transitive: false, + }); + } + } + } } // Extern blocks — check crate-level deny for FFI category @@ -623,11 +670,30 @@ fn matches_custom_path(expanded_path: &[String], pattern: &[String]) -> bool { if expanded_path.len() < pattern.len() { return false; } + // Standard suffix matching let offset = expanded_path.len() - pattern.len(); - expanded_path[offset..] + let suffix_match = expanded_path[offset..] .iter() .zip(pattern.iter()) - .all(|(a, b)| a == b) + .all(|(a, b)| a == b); + if suffix_match { + return true; + } + + // Crate-scoped matching for cross-crate authorities: + // Pattern ["crate_name", "func"] matches expanded ["crate_name", "Type", "func"] + // or ["crate_name", "module", "Type", "func"]. This handles type-qualified + // calls like `git2::Repository::open()` where the module path and type name + // don't align with the file-path-derived export map keys. + if pattern.len() == 2 && expanded_path.len() >= 2 { + let crate_matches = expanded_path[0] == pattern[0]; + let func_matches = expanded_path.last() == pattern.last(); + if crate_matches && func_matches { + return true; + } + } + + false } #[cfg(test)] @@ -1164,4 +1230,98 @@ mod tests { "call to function not in file should not propagate" ); } + + // ── FFI call-site detection tests ── + + #[test] + fn detect_ffi_call_to_extern_function() { + let source = r#" + extern "C" { + fn sqlite3_exec(db: *mut u8, sql: *const u8) -> i32; + } + fn run_query() { + unsafe { sqlite3_exec(std::ptr::null_mut(), std::ptr::null()); } + } + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + let detector = Detector::new(); + let findings = detector.analyse(&parsed, "test-crate", "0.1.0", &[]); + let ffi_call: Vec<_> = findings + .iter() + .filter(|f| f.function == "run_query" && f.subcategory == "ffi_call") + .collect(); + assert!( + !ffi_call.is_empty(), + "run_query should get FFI finding for calling sqlite3_exec" + ); + assert_eq!(ffi_call[0].category, Category::Ffi); + } + + #[test] + fn detect_ffi_call_bare_name() { + let source = r#" + extern "C" { + fn open(path: *const u8, flags: i32) -> i32; + } + fn opener() { + unsafe { open(std::ptr::null(), 0); } + } + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + let detector = Detector::new(); + let findings = detector.analyse(&parsed, "test-crate", "0.1.0", &[]); + let ffi_call: Vec<_> = findings + .iter() + .filter(|f| f.function == "opener" && f.subcategory == "ffi_call") + .collect(); + assert!( + !ffi_call.is_empty(), + "opener should get FFI finding for calling extern fn open" + ); + } + + #[test] + fn ffi_call_coexists_with_extern_block_finding() { + let source = r#" + extern "C" { + fn do_thing(x: i32) -> i32; + } + fn caller() { + unsafe { do_thing(42); } + } + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + let detector = Detector::new(); + let findings = detector.analyse(&parsed, "test-crate", "0.1.0", &[]); + let extern_finding = findings.iter().find(|f| f.subcategory == "extern"); + let call_finding = findings.iter().find(|f| f.subcategory == "ffi_call"); + assert!( + extern_finding.is_some(), + "Extern block finding should exist" + ); + assert!( + call_finding.is_some(), + "Call-site FFI finding should also exist" + ); + } + + #[test] + fn ffi_call_not_triggered_without_extern_block() { + let source = r#" + fn caller() { + some_function(42); + } + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + let detector = Detector::new(); + let findings = detector.analyse(&parsed, "test-crate", "0.1.0", &[]); + let ffi_findings: Vec<_> = findings + .iter() + .filter(|f| f.subcategory == "ffi_call") + .collect(); + assert!( + ffi_findings.is_empty(), + "No FFI call findings without extern block" + ); + } } diff --git a/crates/cargo-capsec/src/discovery.rs b/crates/cargo-capsec/src/discovery.rs index 6250dc1..b4135fc 100644 --- a/crates/cargo-capsec/src/discovery.rs +++ b/crates/cargo-capsec/src/discovery.rs @@ -10,6 +10,7 @@ use crate::config::Classification; use serde::Deserialize; +use std::collections::{HashMap, VecDeque}; use std::path::{Path, PathBuf}; /// Metadata about a crate discovered in the workspace. @@ -27,22 +28,75 @@ pub struct CrateInfo { /// Classification from `[package.metadata.capsec]` in the crate's Cargo.toml. /// `None` if not specified. pub classification: Option, + /// Opaque package ID from `cargo metadata` (for linking to the resolve graph). + /// Only populated when `include_deps` is true. + pub package_id: Option, } #[derive(Deserialize)] struct CargoMetadata { packages: Vec, workspace_root: String, + /// The resolved dependency graph. Present when `cargo metadata` is run + /// without `--no-deps`; `None` otherwise. + resolve: Option, } #[derive(Deserialize)] struct Package { name: String, version: String, + id: String, manifest_path: String, source: Option, #[serde(default)] metadata: Option, + #[serde(default)] + targets: Vec, +} + +#[derive(Deserialize)] +struct Target { + kind: Vec, + #[allow(dead_code)] + name: String, + #[allow(dead_code)] + src_path: String, +} + +/// The resolved dependency graph from `cargo metadata`. +#[derive(Deserialize)] +struct Resolve { + nodes: Vec, +} + +/// A single node in the resolved dependency graph. +#[derive(Deserialize)] +struct ResolveNode { + /// Opaque package ID (matches `Package::id`). + id: String, + /// Resolved dependencies with extern crate names and dependency kinds. + #[serde(default)] + deps: Vec, +} + +/// A resolved dependency edge — which package this node depends on and how. +#[derive(Deserialize)] +struct NodeDep { + /// The extern crate name as seen in Rust source (underscored, handles renames). + name: String, + /// The package ID of the dependency (matches `Package::id`). + pkg: String, + /// Dependency kinds (normal, dev, build). + #[serde(default)] + dep_kinds: Vec, +} + +/// Metadata about the kind of a dependency edge. +#[derive(Deserialize)] +struct DepKindInfo { + /// `null` = normal, `"dev"` = dev-dependency, `"build"` = build-dependency. + kind: Option, } /// Extracts `classification` from `package.metadata.capsec.classification` JSON value. @@ -61,12 +115,205 @@ fn extract_classification(metadata: &Option) -> Option String { + name.replace('-', "_") +} + +/// Returns true if a package is a proc-macro crate (compile-time code, not runtime). +fn is_proc_macro(pkg: &Package) -> bool { + pkg.targets + .iter() + .any(|t| t.kind.contains(&"proc-macro".to_string())) +} + +/// Information about a dependency edge in the resolved graph. +#[derive(Debug, Clone)] +pub struct DepEdge { + /// Normalized extern crate name (underscored, handles renames). + #[allow(dead_code)] + pub extern_name: String, + /// Package ID of the dependency. + pub pkg_id: String, +} + +/// Produces a topological ordering of package IDs from leaves (no dependencies) +/// to roots (workspace crates). Dev-dependencies are filtered out to avoid cycles. +/// +/// Returns `Err` if a cycle is detected (should not happen in a valid Cargo graph +/// with dev-deps removed, but handled gracefully). +pub fn topological_order(resolve: &[(String, Vec)]) -> Result, String> { + let num_nodes = resolve.len(); + + // Build index: pkg_id -> index + let id_to_idx: HashMap<&str, usize> = resolve + .iter() + .enumerate() + .map(|(i, (id, _))| (id.as_str(), i)) + .collect(); + + // Build adjacency list and in-degree counts. + // Edge: node -> dependency (we want leaves first, so edges point from + // dependents to dependencies). + let mut in_degree = vec![0usize; num_nodes]; + let mut dependents: Vec> = vec![vec![]; num_nodes]; + + for (idx, (_id, deps)) in resolve.iter().enumerate() { + for dep in deps { + if let Some(&dep_idx) = id_to_idx.get(dep.pkg_id.as_str()) { + // idx depends on dep_idx. + // In our topo sort, dep_idx must come before idx. + // So dep_idx -> idx is a "dependent" edge. + dependents[dep_idx].push(idx); + in_degree[idx] += 1; + } + // Ignore deps not in the resolve set (e.g., filtered out proc-macros). + } + } + + // Kahn's algorithm: start with leaves (in_degree == 0). + let mut queue: VecDeque = in_degree + .iter() + .enumerate() + .filter(|&(_, &d)| d == 0) + .map(|(i, _)| i) + .collect(); + + let mut order = Vec::with_capacity(num_nodes); + + while let Some(node) = queue.pop_front() { + order.push(resolve[node].0.clone()); + for &dependent in &dependents[node] { + in_degree[dependent] -= 1; + if in_degree[dependent] == 0 { + queue.push_back(dependent); + } + } + } + + if order.len() == num_nodes { + Ok(order) + } else { + Err(format!( + "Cycle detected in dependency graph ({} of {} nodes processed)", + order.len(), + num_nodes + )) + } +} + +/// Result of extracting the dependency graph: the graph itself and a name lookup map. +pub type DepGraphResult = (Vec<(String, Vec)>, HashMap); + +/// Extracts the resolved dependency graph from `CargoMetadata`, filtering out +/// dev-dependencies and optionally proc-macro crates. +/// +/// Returns a list of `(package_id, Vec)` suitable for `topological_order()`. +pub fn extract_dep_graph( + metadata_json: &[u8], + exclude_proc_macros: bool, +) -> Result { + let metadata: CargoMetadata = serde_json::from_slice(metadata_json) + .map_err(|e| format!("Failed to parse cargo metadata: {e}"))?; + + let resolve = metadata + .resolve + .ok_or("No resolve field in cargo metadata (was --no-deps used?)")?; + + // Build a set of proc-macro package IDs to exclude. + let proc_macro_ids: std::collections::HashSet<&str> = if exclude_proc_macros { + metadata + .packages + .iter() + .filter(|p| is_proc_macro(p)) + .map(|p| p.id.as_str()) + .collect() + } else { + std::collections::HashSet::new() + }; + + // Map package ID -> normalized crate name for callers. + let id_to_name: HashMap = metadata + .packages + .iter() + .map(|p| (p.id.clone(), normalize_crate_name(&p.name))) + .collect(); + + let mut graph = Vec::new(); + + for node in &resolve.nodes { + if proc_macro_ids.contains(node.id.as_str()) { + continue; + } + + let deps: Vec = node + .deps + .iter() + .filter(|d| { + // Exclude dev-dependencies (can create cycles). + !d.dep_kinds + .iter() + .all(|dk| dk.kind.as_deref() == Some("dev")) + }) + .filter(|d| !proc_macro_ids.contains(d.pkg.as_str())) + .map(|d| DepEdge { + extern_name: normalize_crate_name(&d.name), + pkg_id: d.pkg.clone(), + }) + .collect(); + + graph.push((node.id.clone(), deps)); + } + + Ok((graph, id_to_name)) +} + +/// Returns workspace member package IDs in topological order (leaves first). +/// +/// Filters the resolve graph to only workspace-member nodes and edges, +/// then calls `topological_order()`. Crates with no intra-workspace +/// dependencies come first. Returns `None` if topo sort fails. +pub fn workspace_topological_order( + workspace_crates: &[CrateInfo], + resolve_graph: &[(String, Vec)], +) -> Option> { + let ws_pkg_ids: std::collections::HashSet = workspace_crates + .iter() + .filter_map(|c| c.package_id.clone()) + .collect(); + + if ws_pkg_ids.is_empty() { + return None; + } + + // Filter resolve graph to workspace-member-only nodes and edges + let ws_graph: Vec<(String, Vec)> = resolve_graph + .iter() + .filter(|(id, _)| ws_pkg_ids.contains(id)) + .map(|(id, deps)| { + let ws_deps: Vec = deps + .iter() + .filter(|d| ws_pkg_ids.contains(&d.pkg_id)) + .cloned() + .collect(); + (id.clone(), ws_deps) + }) + .collect(); + + topological_order(&ws_graph).ok() +} + /// Result of workspace discovery: crates and the resolved workspace root. pub struct DiscoveryResult { /// All discovered crates. pub crates: Vec, /// The Cargo workspace root (from `cargo metadata`). pub workspace_root: PathBuf, + /// Resolved dependency graph (only populated when `include_deps` is true). + pub resolve_graph: Option)>>, } /// Discovers all crates in a Cargo workspace by running `cargo metadata`. @@ -123,13 +370,28 @@ pub fn discover_crates( source_dir: src_dir, is_dependency: package.source.is_some(), classification: extract_classification(&package.metadata), + package_id: if include_deps { + Some(package.id.clone()) + } else { + None + }, }); } } + // Extract the resolve graph when available (for topological ordering) + let resolve_graph = if include_deps { + extract_dep_graph(&output.stdout, true) + .ok() + .map(|(graph, _)| graph) + } else { + None + }; + Ok(DiscoveryResult { crates, workspace_root: resolved_root, + resolve_graph, }) } @@ -195,4 +457,276 @@ mod tests { .all(|f| f.extension().unwrap_or_default() == "rs") ); } + + #[test] + fn normalize_crate_name_replaces_hyphens() { + assert_eq!(normalize_crate_name("serde-json"), "serde_json"); + assert_eq!(normalize_crate_name("serde_json"), "serde_json"); + assert_eq!(normalize_crate_name("my-cool-crate"), "my_cool_crate"); + assert_eq!(normalize_crate_name("plain"), "plain"); + } + + fn make_graph(edges: &[(&str, &[&str])]) -> Vec<(String, Vec)> { + edges + .iter() + .map(|(id, deps)| { + let dep_edges = deps + .iter() + .map(|d| DepEdge { + extern_name: d.to_string(), + pkg_id: d.to_string(), + }) + .collect(); + (id.to_string(), dep_edges) + }) + .collect() + } + + #[test] + fn topo_sort_single_node() { + let graph = make_graph(&[("a", &[])]); + let order = topological_order(&graph).unwrap(); + assert_eq!(order, vec!["a"]); + } + + #[test] + fn topo_sort_linear_chain() { + // a -> b -> c (a depends on b, b depends on c) + let graph = make_graph(&[("a", &["b"]), ("b", &["c"]), ("c", &[])]); + let order = topological_order(&graph).unwrap(); + // c must come before b, b before a + let pos = |id: &str| order.iter().position(|x| x == id).unwrap(); + assert!(pos("c") < pos("b")); + assert!(pos("b") < pos("a")); + } + + #[test] + fn topo_sort_diamond() { + // a + // / \ + // b c + // \ / + // d + let graph = make_graph(&[("a", &["b", "c"]), ("b", &["d"]), ("c", &["d"]), ("d", &[])]); + let order = topological_order(&graph).unwrap(); + let pos = |id: &str| order.iter().position(|x| x == id).unwrap(); + assert!(pos("d") < pos("b")); + assert!(pos("d") < pos("c")); + assert!(pos("b") < pos("a")); + assert!(pos("c") < pos("a")); + } + + #[test] + fn topo_sort_cycle_detected() { + // a -> b -> a (cycle) + let graph = make_graph(&[("a", &["b"]), ("b", &["a"])]); + let result = topological_order(&graph); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("Cycle detected")); + } + + #[test] + fn topo_sort_ignores_unknown_deps() { + // a depends on "missing" which is not in the graph — should be ignored + let graph = make_graph(&[("a", &["missing"]), ("b", &[])]); + let order = topological_order(&graph).unwrap(); + assert_eq!(order.len(), 2); + } + + #[test] + fn extract_dep_graph_filters_dev_deps() { + let metadata_json = serde_json::json!({ + "packages": [ + { + "name": "app", + "version": "0.1.0", + "id": "app 0.1.0", + "manifest_path": "/fake/app/Cargo.toml", + "source": null, + "targets": [{"kind": ["lib"], "name": "app", "src_path": "/fake/app/src/lib.rs"}] + }, + { + "name": "helper", + "version": "1.0.0", + "id": "helper 1.0.0", + "manifest_path": "/fake/helper/Cargo.toml", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "targets": [{"kind": ["lib"], "name": "helper", "src_path": "/fake/helper/src/lib.rs"}] + }, + { + "name": "test-util", + "version": "0.1.0", + "id": "test-util 0.1.0", + "manifest_path": "/fake/test-util/Cargo.toml", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "targets": [{"kind": ["lib"], "name": "test_util", "src_path": "/fake/test-util/src/lib.rs"}] + } + ], + "workspace_root": "/fake", + "workspace_members": ["app 0.1.0"], + "resolve": { + "nodes": [ + { + "id": "app 0.1.0", + "deps": [ + { + "name": "helper", + "pkg": "helper 1.0.0", + "dep_kinds": [{"kind": null, "target": null}] + }, + { + "name": "test_util", + "pkg": "test-util 0.1.0", + "dep_kinds": [{"kind": "dev", "target": null}] + } + ] + }, + { + "id": "helper 1.0.0", + "deps": [] + }, + { + "id": "test-util 0.1.0", + "deps": [] + } + ], + "root": "app 0.1.0" + } + }); + + let json_bytes = serde_json::to_vec(&metadata_json).unwrap(); + let (graph, id_to_name) = extract_dep_graph(&json_bytes, false).unwrap(); + + // app should have only "helper" as a dep (test-util is dev-only) + let app_node = graph.iter().find(|(id, _)| id == "app 0.1.0").unwrap(); + assert_eq!(app_node.1.len(), 1); + assert_eq!(app_node.1[0].extern_name, "helper"); + + // id_to_name should normalize + assert_eq!(id_to_name.get("test-util 0.1.0").unwrap(), "test_util"); + + // topo sort should work + let order = topological_order(&graph).unwrap(); + assert_eq!(order.len(), 3); + } + + #[test] + fn extract_dep_graph_excludes_proc_macros() { + let metadata_json = serde_json::json!({ + "packages": [ + { + "name": "app", + "version": "0.1.0", + "id": "app 0.1.0", + "manifest_path": "/fake/app/Cargo.toml", + "source": null, + "targets": [{"kind": ["lib"], "name": "app", "src_path": "/fake/app/src/lib.rs"}] + }, + { + "name": "my-derive", + "version": "1.0.0", + "id": "my-derive 1.0.0", + "manifest_path": "/fake/my-derive/Cargo.toml", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "targets": [{"kind": ["proc-macro"], "name": "my_derive", "src_path": "/fake/my-derive/src/lib.rs"}] + } + ], + "workspace_root": "/fake", + "workspace_members": ["app 0.1.0"], + "resolve": { + "nodes": [ + { + "id": "app 0.1.0", + "deps": [ + { + "name": "my_derive", + "pkg": "my-derive 1.0.0", + "dep_kinds": [{"kind": null, "target": null}] + } + ] + }, + { + "id": "my-derive 1.0.0", + "deps": [] + } + ], + "root": "app 0.1.0" + } + }); + + let json_bytes = serde_json::to_vec(&metadata_json).unwrap(); + let (graph, _) = extract_dep_graph(&json_bytes, true).unwrap(); + + // my-derive should be excluded as a proc-macro + assert_eq!(graph.len(), 1); // only "app" remains + let app_node = &graph[0]; + assert_eq!(app_node.0, "app 0.1.0"); + assert!(app_node.1.is_empty()); // dep on proc-macro filtered out + } + + #[test] + fn workspace_topo_order_basic() { + let ws_crates = vec![ + CrateInfo { + name: "app".to_string(), + version: "0.1.0".to_string(), + source_dir: PathBuf::from("/fake/app/src"), + is_dependency: false, + classification: None, + package_id: Some("app 0.1.0".to_string()), + }, + CrateInfo { + name: "core-lib".to_string(), + version: "0.1.0".to_string(), + source_dir: PathBuf::from("/fake/core-lib/src"), + is_dependency: false, + classification: None, + package_id: Some("core-lib 0.1.0".to_string()), + }, + ]; + let graph = vec![ + ( + "app 0.1.0".to_string(), + vec![DepEdge { + extern_name: "core_lib".to_string(), + pkg_id: "core-lib 0.1.0".to_string(), + }], + ), + ("core-lib 0.1.0".to_string(), vec![]), + ]; + let order = workspace_topological_order(&ws_crates, &graph).unwrap(); + let pos = |id: &str| order.iter().position(|x| x == id).unwrap(); + assert!( + pos("core-lib 0.1.0") < pos("app 0.1.0"), + "core-lib should come before app" + ); + } + + #[test] + fn workspace_topo_order_independent() { + let ws_crates = vec![ + CrateInfo { + name: "a".to_string(), + version: "0.1.0".to_string(), + source_dir: PathBuf::from("/fake/a/src"), + is_dependency: false, + classification: None, + package_id: Some("a 0.1.0".to_string()), + }, + CrateInfo { + name: "b".to_string(), + version: "0.1.0".to_string(), + source_dir: PathBuf::from("/fake/b/src"), + is_dependency: false, + classification: None, + package_id: Some("b 0.1.0".to_string()), + }, + ]; + let graph = vec![ + ("a 0.1.0".to_string(), vec![]), + ("b 0.1.0".to_string(), vec![]), + ]; + let order = workspace_topological_order(&ws_crates, &graph).unwrap(); + assert_eq!(order.len(), 2); + } } diff --git a/crates/cargo-capsec/src/export_map.rs b/crates/cargo-capsec/src/export_map.rs new file mode 100644 index 0000000..d63470e --- /dev/null +++ b/crates/cargo-capsec/src/export_map.rs @@ -0,0 +1,410 @@ +//! Cross-crate export map construction. +//! +//! After scanning a dependency crate, this module extracts a summary of its +//! authority surface: which functions (directly or transitively) exercise ambient +//! authority. The export map is keyed by fully-qualified module path within the crate. + +use crate::authorities::{Category, Risk}; +use crate::detector::Finding; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::path::Path; + +/// A dependency crate's authority surface — its functions that transitively +/// exercise ambient authority. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CrateExportMap { + /// Normalized crate name (underscores, not hyphens). + pub crate_name: String, + /// Crate version string. + pub crate_version: String, + /// Maps module-qualified function names to the authority categories they exercise. + /// Key format: `"crate_name::module::function"` (e.g., `"reqwest::blocking::get"`). + pub exports: HashMap>, +} + +/// A single authority finding associated with an exported function. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportedAuthority { + /// What kind of ambient authority this exercises. + pub category: Category, + /// How dangerous this call is. + pub risk: Risk, + /// The leaf authority call that this traces back to. + pub leaf_call: String, + /// Whether this is a direct call in the function or transitively propagated. + pub is_transitive: bool, +} + +/// Converts a source file path to a module path within the crate. +/// +/// # Examples +/// +/// - `"src/lib.rs"` → `[]` +/// - `"src/blocking/client.rs"` → `["blocking", "client"]` +/// - `"src/fs.rs"` → `["fs"]` +/// - `"src/fs/mod.rs"` → `["fs"]` +/// - `"src/main.rs"` → `[]` +#[must_use] +pub fn file_to_module_path(file_path: &str, src_dir: &Path) -> Vec { + let relative = Path::new(file_path) + .strip_prefix(src_dir) + .unwrap_or(Path::new(file_path)); + + let stem = relative.file_stem().unwrap_or_default().to_string_lossy(); + + let mut parts: Vec = relative + .parent() + .unwrap_or(Path::new("")) + .components() + .map(|c| c.as_os_str().to_string_lossy().to_string()) + .collect(); + + // "mod.rs" → module name is the parent directory (already captured) + // "lib.rs" / "main.rs" → crate root, no additional segment + // anything else → add the file stem as a module segment + match stem.as_ref() { + "mod" | "lib" | "main" => {} + other => parts.push(other.to_string()), + } + + parts +} + +/// Builds an export map from a dependency crate's scan findings. +/// +/// For each finding, derives the full module-qualified key from the file path +/// and function name. Build-script findings (`is_build_script: true`) are excluded +/// since they represent compile-time authority, not runtime authority. +#[must_use] +pub fn build_export_map( + crate_name: &str, + crate_version: &str, + findings: &[Finding], + src_dir: &Path, +) -> CrateExportMap { + let mut exports: HashMap> = HashMap::new(); + + for finding in findings { + // Exclude build-script findings (compile-time only) + if finding.is_build_script { + continue; + } + + let auth = ExportedAuthority { + category: finding.category.clone(), + risk: finding.risk, + leaf_call: finding.call_text.clone(), + is_transitive: finding.is_transitive, + }; + + // Entry 1: Full module-qualified path (e.g., "git2::repository::open") + // Matches calls like `crate::module::function()` + let module_path = file_to_module_path(&finding.file, src_dir); + let mut full_path = vec![crate_name.to_string()]; + full_path.extend(module_path); + full_path.push(finding.function.clone()); + let key = full_path.join("::"); + + exports.entry(key.clone()).or_default().push(auth.clone()); + + // Entry 2: Crate-scoped function name (e.g., "git2" + "open") + // For type-qualified calls like `git2::Repository::open()`, strict suffix + // matching fails ("Repository" ≠ "repository" from file path). This entry + // enables crate-scoped matching: if the expanded call path contains the + // crate name AND ends with the function name, it's a match. + let scoped_key = format!("{crate_name}::{}", finding.function); + if scoped_key != key { + exports.entry(scoped_key).or_default().push(auth); + } + } + + CrateExportMap { + crate_name: crate_name.to_string(), + + crate_version: crate_version.to_string(), + exports, + } +} + +/// Adds extern function declarations from parsed files to an export map. +/// +/// When a crate like `libgit2-sys` declares `extern "C" { fn git_repository_open(...); }`, +/// this creates an FFI export entry for `git_repository_open` so that other crates +/// calling `libgit2_sys::git_repository_open()` get a cross-crate FFI finding. +/// +/// This is necessary because extern block findings have `function: "extern \"C\""` which +/// produces useless export map keys. The individual function names need to be exported. +pub fn add_extern_exports( + export_map: &mut CrateExportMap, + parsed_files: &[crate::parser::ParsedFile], + src_dir: &Path, +) { + let crate_name = &export_map.crate_name; + + for file in parsed_files { + // Skip build.rs extern blocks (compile-time only) + if file.path.ends_with("build.rs") { + continue; + } + + for ext in &file.extern_blocks { + let module_path = file_to_module_path(&file.path, src_dir); + + for fn_name in &ext.functions { + let auth = ExportedAuthority { + category: crate::authorities::Category::Ffi, + risk: crate::authorities::Risk::High, + leaf_call: format!("extern {fn_name}"), + is_transitive: false, + }; + + // Full path: crate::module::fn_name + let mut full_path = vec![crate_name.clone()]; + full_path.extend(module_path.clone()); + full_path.push(fn_name.clone()); + let key = full_path.join("::"); + export_map + .exports + .entry(key.clone()) + .or_default() + .push(auth.clone()); + + // Short form: crate::fn_name (for crate-scoped matching) + let short_key = format!("{crate_name}::{fn_name}"); + if short_key != key { + export_map.exports.entry(short_key).or_default().push(auth); + } + } + } + } +} + +/// Cached export map format for disk persistence. +#[derive(Debug, Serialize, Deserialize)] +pub struct CachedExportMap { + /// Schema version — bump when the export map format changes. + pub schema_version: u32, + /// The actual export map data. + #[serde(flatten)] + pub export_map: CrateExportMap, +} + +/// Current schema version for cached export maps. +/// Bumped to 2: added extern function declaration exports. +pub const EXPORT_MAP_SCHEMA_VERSION: u32 = 2; + +/// Attempts to load a cached export map for a dependency crate. +/// +/// Returns `None` if the cache is missing, stale, or corrupt. +/// Only caches registry crates (path deps are always re-scanned). +pub fn load_cached_export_map( + cache_dir: &Path, + crate_name: &str, + crate_version: &str, + cap: &impl capsec_core::cap_provider::CapProvider, +) -> Option { + let path = cache_dir + .join("export-maps") + .join(format!("{crate_name}-{crate_version}.json")); + let content = capsec_std::fs::read_to_string(&path, cap).ok()?; + let cached: CachedExportMap = serde_json::from_str(&content).ok()?; + if cached.schema_version != EXPORT_MAP_SCHEMA_VERSION { + return None; // Schema changed — re-scan + } + Some(cached.export_map) +} + +/// Saves an export map to the cache directory. +/// +/// Silently ignores write failures (caching is best-effort). +pub fn save_export_map_cache( + cache_dir: &Path, + export_map: &CrateExportMap, + cap: &impl capsec_core::cap_provider::CapProvider, +) { + let dir = cache_dir.join("export-maps"); + // Create directory if needed + let _ = std::fs::create_dir_all(&dir); + + let cached = CachedExportMap { + schema_version: EXPORT_MAP_SCHEMA_VERSION, + export_map: export_map.clone(), + }; + + if let Ok(json) = serde_json::to_string_pretty(&cached) { + let path = dir.join(format!( + "{}-{}.json", + export_map.crate_name, export_map.crate_version + )); + let _ = capsec_std::fs::write(path, json, cap); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::authorities::{Category, Risk}; + use crate::detector::Finding; + + fn make_finding( + file: &str, + function: &str, + call_text: &str, + category: Category, + is_build_script: bool, + ) -> Finding { + Finding { + file: file.to_string(), + function: function.to_string(), + function_line: 1, + call_line: 2, + call_col: 5, + call_text: call_text.to_string(), + category, + subcategory: "test".to_string(), + risk: Risk::Medium, + description: "test".to_string(), + is_build_script, + crate_name: "test_crate".to_string(), + crate_version: "1.0.0".to_string(), + is_deny_violation: false, + is_transitive: false, + } + } + + #[test] + fn file_to_module_path_lib() { + assert_eq!( + file_to_module_path("src/lib.rs", Path::new("src")), + Vec::::new() + ); + } + + #[test] + fn file_to_module_path_main() { + assert_eq!( + file_to_module_path("src/main.rs", Path::new("src")), + Vec::::new() + ); + } + + #[test] + fn file_to_module_path_simple_module() { + assert_eq!( + file_to_module_path("src/fs.rs", Path::new("src")), + vec!["fs"] + ); + } + + #[test] + fn file_to_module_path_nested() { + assert_eq!( + file_to_module_path("src/blocking/client.rs", Path::new("src")), + vec!["blocking", "client"] + ); + } + + #[test] + fn file_to_module_path_mod_rs() { + assert_eq!( + file_to_module_path("src/fs/mod.rs", Path::new("src")), + vec!["fs"] + ); + } + + #[test] + fn build_export_map_basic() { + let findings = vec![make_finding( + "src/lib.rs", + "read_file", + "std::fs::read", + Category::Fs, + false, + )]; + let map = build_export_map("my_crate", "1.0.0", &findings, Path::new("src")); + assert!(map.exports.contains_key("my_crate::read_file")); + let auths = &map.exports["my_crate::read_file"]; + assert_eq!(auths.len(), 1); + assert_eq!(auths[0].category, Category::Fs); + } + + #[test] + fn build_export_map_excludes_build_script() { + let findings = vec![ + make_finding( + "src/lib.rs", + "read_file", + "std::fs::read", + Category::Fs, + false, + ), + make_finding("build.rs", "main", "std::env::var", Category::Env, true), + ]; + let map = build_export_map("my_crate", "1.0.0", &findings, Path::new("src")); + assert_eq!(map.exports.len(), 1); + assert!(map.exports.contains_key("my_crate::read_file")); + } + + #[test] + fn build_export_map_nested_module() { + let findings = vec![make_finding( + "src/blocking/client.rs", + "get", + "TcpStream::connect", + Category::Net, + false, + )]; + let map = build_export_map("reqwest", "0.12.5", &findings, Path::new("src")); + assert!(map.exports.contains_key("reqwest::blocking::client::get")); + } + + #[test] + fn build_export_map_multiple_findings_same_function() { + let findings = vec![ + make_finding("src/lib.rs", "mixed", "std::fs::read", Category::Fs, false), + make_finding( + "src/lib.rs", + "mixed", + "TcpStream::connect", + Category::Net, + false, + ), + ]; + let map = build_export_map("my_crate", "1.0.0", &findings, Path::new("src")); + let auths = &map.exports["my_crate::mixed"]; + assert_eq!(auths.len(), 2); + } + + #[test] + fn build_export_map_empty_findings() { + let map = build_export_map("empty", "1.0.0", &[], Path::new("src")); + assert!(map.exports.is_empty()); + } + + #[test] + fn cached_export_map_round_trip() { + let findings = vec![make_finding( + "src/lib.rs", + "read_file", + "std::fs::read", + Category::Fs, + false, + )]; + let export_map = build_export_map("my_crate", "1.0.0", &findings, Path::new("src")); + let cached = CachedExportMap { + schema_version: EXPORT_MAP_SCHEMA_VERSION, + export_map, + }; + let json = serde_json::to_string(&cached).unwrap(); + let loaded: CachedExportMap = serde_json::from_str(&json).unwrap(); + assert_eq!(loaded.schema_version, EXPORT_MAP_SCHEMA_VERSION); + assert_eq!(loaded.export_map.crate_name, "my_crate"); + assert!( + loaded + .export_map + .exports + .contains_key("my_crate::read_file") + ); + } +} diff --git a/crates/cargo-capsec/src/lib.rs b/crates/cargo-capsec/src/lib.rs index d255f95..0f531a7 100644 --- a/crates/cargo-capsec/src/lib.rs +++ b/crates/cargo-capsec/src/lib.rs @@ -45,7 +45,9 @@ pub mod authorities; pub mod baseline; pub mod config; +pub mod cross_crate; pub mod detector; pub mod discovery; +pub mod export_map; pub mod parser; pub mod reporter; diff --git a/crates/cargo-capsec/src/main.rs b/crates/cargo-capsec/src/main.rs index 7388bbd..a260021 100644 --- a/crates/cargo-capsec/src/main.rs +++ b/crates/cargo-capsec/src/main.rs @@ -2,14 +2,17 @@ mod authorities; mod baseline; mod cli; mod config; +mod cross_crate; mod detector; mod discovery; +mod export_map; mod parser; mod reporter; use authorities::Risk; use clap::Parser; use cli::{AuditArgs, BadgeArgs, CargoSubcommand, CheckDenyArgs, Cli, Commands}; +use std::collections::HashMap; use std::path::Path; fn main() { @@ -33,6 +36,8 @@ fn run_audit(args: AuditArgs) { let path_arg = args.path.canonicalize().unwrap_or(args.path.clone()); + let scan_deps = args.include_deps || args.deps_only; + // Load config let cfg = match config::load_config(&path_arg, &fs_read) { Ok(c) => c, @@ -42,62 +47,311 @@ fn run_audit(args: AuditArgs) { } }; - // Discover crates - let discovery = - match discovery::discover_crates(&path_arg, args.include_deps, &spawn_cap, &fs_read) { - Ok(d) => d, - Err(e) => { - eprintln!("Error: {e}"); - eprintln!("Hint: Run from a directory containing Cargo.toml, or use --path"); - std::process::exit(2); - } - }; + // Discover crates — always include deps when cross-crate scanning is active + let discovery = match discovery::discover_crates(&path_arg, scan_deps, &spawn_cap, &fs_read) { + Ok(d) => d, + Err(e) => { + eprintln!("Error: {e}"); + eprintln!("Hint: Run from a directory containing Cargo.toml, or use --path"); + std::process::exit(2); + } + }; let workspace_root = discovery.workspace_root; - let crates = discovery.crates; + let resolve_graph = discovery.resolve_graph; + let all_crates = discovery.crates; - // Filter crates - let crates: Vec<_> = crates - .into_iter() - .filter(|c| { - if !args.include_deps && c.is_dependency { - return false; + // Separate workspace crates from dependencies + let (workspace_crates, dep_crates): (Vec<_>, Vec<_>) = + all_crates.into_iter().partition(|c| !c.is_dependency); + + let crate_deny = cfg.deny.normalized_categories(); + let customs = config::custom_authorities(&cfg); + + let mut all_findings = Vec::new(); + + if scan_deps { + // ── Cross-crate two-phase scan ── + + // Phase 1: Scan dependency crates, build export maps. + // Check cache first; only scan if cache miss. + // At depth=1, all deps are independent and can be scanned in parallel. + let cache_dir = workspace_root.join(".capsec-cache"); + + let scan_one_dep = + |krate: &discovery::CrateInfo| -> (export_map::CrateExportMap, Vec) { + let normalized_name = discovery::normalize_crate_name(&krate.name); + + // Try loading from cache (only for registry deps) + if krate.is_dependency + && let Some(cached) = export_map::load_cached_export_map( + &cache_dir, + &normalized_name, + &krate.version, + &fs_read, + ) + { + return (cached, Vec::new()); + } + + let mut det = detector::Detector::new(); + det.add_custom_authorities(&customs); + + let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read); + let mut dep_findings = Vec::new(); + let mut parsed_files = Vec::new(); + + for file_path in source_files { + match parser::parse_file(&file_path, &fs_read) { + Ok(parsed) => { + let findings = + det.analyse(&parsed, &krate.name, &krate.version, &crate_deny); + dep_findings.extend(findings); + parsed_files.push(parsed); + } + Err(_e) => { + // Silently skip unparseable files in deps + } + } + } + + let mut emap = export_map::build_export_map( + &normalized_name, + &krate.version, + &dep_findings, + &krate.source_dir, + ); + + // Also export extern function declarations (e.g., libgit2-sys, sqlite3-sys) + // so callers like git2 get cross-crate FFI findings. + export_map::add_extern_exports(&mut emap, &parsed_files, &krate.source_dir); + + // Cache for registry deps + if krate.is_dependency { + export_map::save_export_map_cache(&cache_dir, &emap, &fs_write); + } + + (emap, dep_findings) + }; + + // Scan deps — parallel at depth=1, sequential at depth>1 (needs prior maps) + let mut export_maps = Vec::new(); + + if args.dep_depth == 1 { + // All deps are independent at depth 1 — scan sequentially + // (capsec capabilities are !Send, so rayon can't parallelize I/O; + // parallelism will be added when capabilities support Sync) + let results: Vec<_> = dep_crates.iter().map(scan_one_dep).collect(); + + for (emap, dep_findings) in results { + export_maps.push(emap); + if args.deps_only { + all_findings.extend(dep_findings); + } + } + } else { + // Multi-hop: sequential, injecting prior maps at each step + for krate in &dep_crates { + let normalized_name = discovery::normalize_crate_name(&krate.name); + + // Try cache + if krate.is_dependency + && let Some(cached) = export_map::load_cached_export_map( + &cache_dir, + &normalized_name, + &krate.version, + &fs_read, + ) + { + export_maps.push(cached); + continue; + } + + let mut det = detector::Detector::new(); + det.add_custom_authorities(&customs); + + // Inject previously-scanned deps' export maps for multi-hop chains + let cross_crate_customs = + cross_crate::export_map_to_custom_authorities(&export_maps); + det.add_custom_authorities(&cross_crate_customs); + + let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read); + let mut dep_findings = Vec::new(); + let mut parsed_files = Vec::new(); + + for file_path in source_files { + match parser::parse_file(&file_path, &fs_read) { + Ok(parsed) => { + let findings = + det.analyse(&parsed, &krate.name, &krate.version, &crate_deny); + dep_findings.extend(findings); + parsed_files.push(parsed); + } + Err(e) => { + eprintln!(" Warning: {e}"); + } + } + } + + let mut emap = export_map::build_export_map( + &normalized_name, + &krate.version, + &dep_findings, + &krate.source_dir, + ); + export_map::add_extern_exports(&mut emap, &parsed_files, &krate.source_dir); + + if krate.is_dependency { + export_map::save_export_map_cache(&cache_dir, &emap, &fs_write); + } + + export_maps.push(emap); + + if args.deps_only { + all_findings.extend(dep_findings); + } + } + } + + // Phase 2: Scan workspace crates with dependency export maps injected. + // Process in topological order so workspace-to-workspace findings propagate + // (e.g., radicle-cli depends on radicle → radicle scanned first). + if !args.deps_only { + let dep_customs = cross_crate::export_map_to_custom_authorities(&export_maps); + + // Determine scan order: topological if resolve graph available + let ordered_ws_crates: Vec<&discovery::CrateInfo> = + if let Some(ref graph) = resolve_graph { + if let Some(topo_ids) = + discovery::workspace_topological_order(&workspace_crates, graph) + { + let id_to_idx: HashMap<&str, usize> = workspace_crates + .iter() + .enumerate() + .filter_map(|(i, c)| c.package_id.as_deref().map(|id| (id, i))) + .collect(); + let mut ordered: Vec<&discovery::CrateInfo> = Vec::new(); + for id in &topo_ids { + if let Some(&idx) = id_to_idx.get(id.as_str()) { + ordered.push(&workspace_crates[idx]); + } + } + // Add any not in topo order (defensive fallback) + let seen: std::collections::HashSet<&str> = + topo_ids.iter().map(|s| s.as_str()).collect(); + for c in &workspace_crates { + if !c + .package_id + .as_ref() + .is_some_and(|id| seen.contains(id.as_str())) + { + ordered.push(c); + } + } + ordered + } else { + workspace_crates.iter().collect() + } + } else { + workspace_crates.iter().collect() + }; + + let mut workspace_export_maps: Vec = Vec::new(); + + for krate in &ordered_ws_crates { + // Apply --only / --skip filtering to workspace crates + if let Some(ref only) = args.only { + let allowed: Vec<&str> = only.split(',').collect(); + if !allowed.contains(&krate.name.as_str()) { + continue; + } + } + if let Some(ref skip) = args.skip { + let skipped: Vec<&str> = skip.split(',').collect(); + if skipped.contains(&krate.name.as_str()) { + continue; + } + } + + let mut det = detector::Detector::new(); + det.add_custom_authorities(&customs); + det.add_custom_authorities(&dep_customs); + + // Inject previously-scanned workspace member export maps + let ws_customs = + cross_crate::export_map_to_custom_authorities(&workspace_export_maps); + det.add_custom_authorities(&ws_customs); + + let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read); + let mut ws_crate_findings = Vec::new(); + let mut ws_parsed_files = Vec::new(); + + for file_path in source_files { + if config::should_exclude(&file_path, &cfg.analysis.exclude) { + continue; + } + + match parser::parse_file(&file_path, &fs_read) { + Ok(parsed) => { + let findings = + det.analyse(&parsed, &krate.name, &krate.version, &crate_deny); + ws_crate_findings.extend(findings); + ws_parsed_files.push(parsed); + } + Err(e) => { + eprintln!(" Warning: {e}"); + } + } + } + + // Build export map for this workspace crate (for downstream ws crates) + let normalized_name = discovery::normalize_crate_name(&krate.name); + let mut ws_emap = export_map::build_export_map( + &normalized_name, + &krate.version, + &ws_crate_findings, + &krate.source_dir, + ); + export_map::add_extern_exports(&mut ws_emap, &ws_parsed_files, &krate.source_dir); + workspace_export_maps.push(ws_emap); + + all_findings.extend(ws_crate_findings); } + } + } else { + // ── Original single-pass scan (no deps) ── + for krate in &workspace_crates { if let Some(ref only) = args.only { let allowed: Vec<&str> = only.split(',').collect(); - return allowed.contains(&c.name.as_str()); + if !allowed.contains(&krate.name.as_str()) { + continue; + } } if let Some(ref skip) = args.skip { let skipped: Vec<&str> = skip.split(',').collect(); - return !skipped.contains(&c.name.as_str()); + if skipped.contains(&krate.name.as_str()) { + continue; + } } - true - }) - .collect(); - // Set up detector with custom authorities - let mut det = detector::Detector::new(); - let customs = config::custom_authorities(&cfg); - det.add_custom_authorities(&customs); - let crate_deny = cfg.deny.normalized_categories(); + let mut det = detector::Detector::new(); + det.add_custom_authorities(&customs); - // Parse and detect - let mut all_findings = Vec::new(); + let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read); - for krate in &crates { - let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read); - - for file_path in source_files { - if config::should_exclude(&file_path, &cfg.analysis.exclude) { - continue; - } - - match parser::parse_file(&file_path, &fs_read) { - Ok(parsed) => { - let findings = det.analyse(&parsed, &krate.name, &krate.version, &crate_deny); - all_findings.extend(findings); + for file_path in source_files { + if config::should_exclude(&file_path, &cfg.analysis.exclude) { + continue; } - Err(e) => { - eprintln!(" Warning: {e}"); + + match parser::parse_file(&file_path, &fs_read) { + Ok(parsed) => { + let findings = + det.analyse(&parsed, &krate.name, &krate.version, &crate_deny); + all_findings.extend(findings); + } + Err(e) => { + eprintln!(" Warning: {e}"); + } } } } @@ -108,15 +362,22 @@ fn run_audit(args: AuditArgs) { f.file = make_relative(&f.file, &workspace_root); } - // Filter by risk level + // Filter by risk level (applied after all propagation) let min_risk = Risk::parse(&args.min_risk); all_findings.retain(|f| f.risk >= min_risk); // Apply allow rules all_findings.retain(|f| !config::should_allow(f, &cfg)); + // Use the appropriate crate list for classification + let crates_for_classification = if args.deps_only { + &dep_crates + } else { + &workspace_crates + }; + // Classification verification - let classification_results: Vec = crates + let classification_results: Vec = crates_for_classification .iter() .map(|krate| { let resolved = config::resolve_classification(&krate.name, krate.classification, &cfg); diff --git a/crates/cargo-capsec/src/parser.rs b/crates/cargo-capsec/src/parser.rs index a3ff87a..1932ffe 100644 --- a/crates/cargo-capsec/src/parser.rs +++ b/crates/cargo-capsec/src/parser.rs @@ -28,6 +28,22 @@ pub struct ParsedFile { pub extern_blocks: Vec, } +/// Extracted visibility for a parsed function. +/// +/// Best-effort heuristic — tracks `pub`, `pub(crate)`, and private functions. +/// Does not resolve `pub use` re-exports or trait method visibility. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Visibility { + /// `pub fn` — visible to downstream crates. + Public, + /// `pub(crate) fn` — visible within the crate only. + CratePub, + /// `pub(super) fn`, `pub(in path) fn`, or other restricted visibility. + Restricted, + /// No `pub` keyword — private. + Private, +} + /// A single function (free, `impl` method, or trait default method) and its call sites. #[derive(Debug, Clone)] pub struct ParsedFunction { @@ -42,6 +58,9 @@ pub struct ParsedFunction { /// Categories denied by `#[capsec::deny(...)]` on this function. /// Parsed from `#[doc = "capsec::deny(...)"]` attributes. pub deny_categories: Vec, + /// Best-effort visibility of this function. + #[allow(dead_code)] + pub visibility: Visibility, } /// A single call expression inside a function body. @@ -178,6 +197,7 @@ impl<'ast> Visit<'ast> for FileVisitor { calls: Vec::new(), is_build_script: self.file_path.ends_with("build.rs") && node.sig.ident == "main", deny_categories: extract_deny_categories(&node.attrs), + visibility: extract_visibility(&node.vis), }; let prev = self.current_function.take(); @@ -198,6 +218,7 @@ impl<'ast> Visit<'ast> for FileVisitor { calls: Vec::new(), is_build_script: false, deny_categories: extract_deny_categories(&node.attrs), + visibility: extract_visibility(&node.vis), }; let prev = self.current_function.take(); @@ -220,6 +241,8 @@ impl<'ast> Visit<'ast> for FileVisitor { calls: Vec::new(), is_build_script: false, deny_categories: extract_deny_categories(&node.attrs), + // Trait methods are effectively public if the trait is public + visibility: Visibility::Public, }; let prev = self.current_function.take(); @@ -350,6 +373,22 @@ fn extract_deny_categories(attrs: &[syn::Attribute]) -> Vec { categories } +/// Extracts the visibility from a `syn::Visibility`. +fn extract_visibility(vis: &syn::Visibility) -> Visibility { + match vis { + syn::Visibility::Public(_) => Visibility::Public, + syn::Visibility::Restricted(r) => { + // pub(crate), pub(super), pub(in path) + if r.path.is_ident("crate") { + Visibility::CratePub + } else { + Visibility::Restricted + } + } + syn::Visibility::Inherited => Visibility::Private, + } +} + fn collect_use_paths(tree: &syn::UseTree, prefix: &mut Vec, out: &mut Vec) { match tree { syn::UseTree::Path(p) => { @@ -552,6 +591,57 @@ mod tests { assert!(parsed.functions[0].deny_categories.is_empty()); } + #[test] + fn parse_visibility_public() { + let source = r#" + pub fn public_func() {} + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + assert_eq!(parsed.functions[0].visibility, Visibility::Public); + } + + #[test] + fn parse_visibility_private() { + let source = r#" + fn private_func() {} + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + assert_eq!(parsed.functions[0].visibility, Visibility::Private); + } + + #[test] + fn parse_visibility_crate_pub() { + let source = r#" + pub(crate) fn crate_func() {} + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + assert_eq!(parsed.functions[0].visibility, Visibility::CratePub); + } + + #[test] + fn parse_visibility_impl_method() { + let source = r#" + struct Foo; + impl Foo { + pub fn public_method(&self) {} + fn private_method(&self) {} + } + "#; + let parsed = parse_source(source, "test.rs").unwrap(); + let public = parsed + .functions + .iter() + .find(|f| f.name == "public_method") + .unwrap(); + let private = parsed + .functions + .iter() + .find(|f| f.name == "private_method") + .unwrap(); + assert_eq!(public.visibility, Visibility::Public); + assert_eq!(private.visibility, Visibility::Private); + } + #[test] fn parse_trait_default_methods() { let source = r#" diff --git a/crates/cargo-capsec/src/reporter.rs b/crates/cargo-capsec/src/reporter.rs index 970bae8..d69bacf 100644 --- a/crates/cargo-capsec/src/reporter.rs +++ b/crates/cargo-capsec/src/reporter.rs @@ -67,6 +67,19 @@ pub fn report_text(findings: &[Finding], classifications: &[ClassificationResult f.call_text.bold(), f.function, ); + } else if f.description.starts_with("Cross-crate:") { + let colored_cat = colorize_category(&f.category); + println!( + " {:<5} {}:{}:{} {:<28} {}()", + colored_cat, + f.file.dimmed(), + f.call_line, + f.call_col, + f.call_text.bold(), + f.function, + ); + // Show cross-crate chain detail + println!(" {} {}", "\u{21b3}".dimmed(), f.description.dimmed(),); } else if f.is_transitive { println!( " {:<5} {}:{}:{} {:<28} {}()", @@ -252,6 +265,9 @@ pub struct JsonFinding { pub is_deny_violation: bool, /// Whether this finding was propagated through the intra-file call graph. pub is_transitive: bool, + /// Cross-crate chain description, if this finding was propagated from a dependency. + #[serde(skip_serializing_if = "Option::is_none")] + pub cross_crate_chain: Option, } /// Aggregate statistics in the JSON report. @@ -325,6 +341,12 @@ pub fn report_json(findings: &[Finding], classifications: &[ClassificationResult } fn finding_to_json(f: &Finding) -> JsonFinding { + let cross_crate_chain = if f.description.starts_with("Cross-crate:") { + Some(f.description.clone()) + } else { + None + }; + JsonFinding { file: f.file.clone(), function: f.function.clone(), @@ -338,6 +360,7 @@ fn finding_to_json(f: &Finding) -> JsonFinding { is_build_script: f.is_build_script, is_deny_violation: f.is_deny_violation, is_transitive: f.is_transitive, + cross_crate_chain, } } diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate/app/src/lib.rs new file mode 100644 index 0000000..0a18a53 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate/app/src/lib.rs @@ -0,0 +1,5 @@ +use helper; + +pub fn load() -> Vec { + helper::read_file() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate/helper/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate/helper/src/lib.rs new file mode 100644 index 0000000..51fe91b --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate/helper/src/lib.rs @@ -0,0 +1,5 @@ +use std::fs; + +pub fn read_file() -> Vec { + fs::read("data.bin").unwrap() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/app/src/lib.rs new file mode 100644 index 0000000..6c8efe0 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/app/src/lib.rs @@ -0,0 +1,5 @@ +use build_dep; + +pub fn process(input: &str) -> Vec { + build_dep::parse(input) +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/build.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/build.rs new file mode 100644 index 0000000..06d9d9b --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/build.rs @@ -0,0 +1,5 @@ +fn main() { + let val = std::env::var("OUT_DIR").unwrap_or_default(); + println!("cargo:rerun-if-changed=build.rs"); + let _ = val; +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/src/lib.rs new file mode 100644 index 0000000..f79f302 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_build_script/build_dep/src/lib.rs @@ -0,0 +1,3 @@ +pub fn parse(data: &str) -> Vec { + data.lines().map(String::from).collect() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_chain/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/app/src/lib.rs new file mode 100644 index 0000000..85425d8 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/app/src/lib.rs @@ -0,0 +1,5 @@ +use mid; + +pub fn handler() -> Vec { + mid::fetch() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_chain/leaf/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/leaf/src/lib.rs new file mode 100644 index 0000000..bdd639a --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/leaf/src/lib.rs @@ -0,0 +1,5 @@ +use std::net::TcpStream; + +pub fn connect() -> std::io::Result { + TcpStream::connect("127.0.0.1:8080") +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_chain/mid/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/mid/src/lib.rs new file mode 100644 index 0000000..4674b54 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_chain/mid/src/lib.rs @@ -0,0 +1,6 @@ +use leaf; + +pub fn fetch() -> Vec { + let _stream = leaf::connect().unwrap(); + vec![1, 2, 3] +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_clean/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_clean/app/src/lib.rs new file mode 100644 index 0000000..dd3e30b --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_clean/app/src/lib.rs @@ -0,0 +1,5 @@ +use pure_lib; + +pub fn process(data: &[u8]) -> Vec { + pure_lib::transform(data) +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_clean/pure_lib/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_clean/pure_lib/src/lib.rs new file mode 100644 index 0000000..ffe9cff --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_clean/pure_lib/src/lib.rs @@ -0,0 +1,3 @@ +pub fn transform(data: &[u8]) -> Vec { + data.iter().map(|b| b.wrapping_add(1)).collect() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/app/src/lib.rs new file mode 100644 index 0000000..eb93d69 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/app/src/lib.rs @@ -0,0 +1,5 @@ +use ffi_dep; + +pub fn init() -> i32 { + ffi_dep::open_db() +} diff --git a/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/ffi_dep/src/lib.rs b/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/ffi_dep/src/lib.rs new file mode 100644 index 0000000..a8fcf28 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/cross_crate_ffi/ffi_dep/src/lib.rs @@ -0,0 +1,7 @@ +extern "C" { + fn sqlite3_open(filename: *const u8, db: *mut *mut u8) -> i32; +} + +pub fn open_db() -> i32 { + unsafe { sqlite3_open(std::ptr::null(), std::ptr::null_mut()) } +} diff --git a/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/app/src/lib.rs b/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/app/src/lib.rs new file mode 100644 index 0000000..9ac75f6 --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/app/src/lib.rs @@ -0,0 +1,5 @@ +use core_lib; + +pub fn init() -> Vec { + core_lib::read_config() +} diff --git a/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/core_lib/src/lib.rs b/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/core_lib/src/lib.rs new file mode 100644 index 0000000..f918c5f --- /dev/null +++ b/crates/cargo-capsec/tests/fixtures/workspace_to_workspace/core_lib/src/lib.rs @@ -0,0 +1,5 @@ +use std::fs; + +pub fn read_config() -> Vec { + fs::read("config.toml").unwrap() +} diff --git a/crates/cargo-capsec/tests/integration.rs b/crates/cargo-capsec/tests/integration.rs index 6d69cd3..b98d349 100644 --- a/crates/cargo-capsec/tests/integration.rs +++ b/crates/cargo-capsec/tests/integration.rs @@ -1,7 +1,9 @@ use cargo_capsec::authorities::Category; +use cargo_capsec::cross_crate::export_map_to_custom_authorities; use cargo_capsec::detector::Detector; +use cargo_capsec::export_map::build_export_map; use cargo_capsec::parser::parse_source; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; fn fixture_path(name: &str) -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")) @@ -253,3 +255,327 @@ fn transitive_crate_propagates_findings() { assert_eq!(api_findings[0].category, Category::Fs); assert_eq!(api_findings[0].call_text, "read_helper"); } + +// ── Cross-crate propagation tests ── + +#[test] +fn cross_crate_type_qualified_method_call() { + // Simulate: dependency "mydb" has `fn open() { std::fs::read(...) }` + // living in src/connection.rs (so module path is "connection") + // Workspace code calls `mydb::Connection::open()` (type-qualified) + let dep_source = r#" + use std::fs; + pub fn open() -> Vec { + fs::read("database.db").unwrap() + } + "#; + // File is "src/connection.rs" so module path = ["connection"] + let dep_parsed = parse_source(dep_source, "src/connection.rs").unwrap(); + let det = Detector::new(); + let dep_findings = det.analyse(&dep_parsed, "mydb", "1.0.0", &[]); + assert!(!dep_findings.is_empty(), "mydb should have FS finding"); + + // Export map should produce: + // "mydb::connection::open" (full path from file) + // "mydb::open" (short form for crate-scoped matching) + let dep_map = build_export_map("mydb", "1.0.0", &dep_findings, Path::new("src")); + assert!( + dep_map.exports.contains_key("mydb::connection::open"), + "Should have full-path entry" + ); + assert!( + dep_map.exports.contains_key("mydb::open"), + "Should have short-form entry for crate-scoped matching" + ); + + // Workspace code calls mydb::Connection::open() + // Parser produces segments: ["mydb", "Connection", "open"] + // Crate-scoped matching: expanded[0]=="mydb" && expanded.last()=="open" + // matches pattern ["mydb", "open"] + let app_source = r#" + pub fn query() { + mydb::Connection::open(); + } + "#; + let app_parsed = parse_source(app_source, "src/lib.rs").unwrap(); + + let customs = export_map_to_custom_authorities(&[dep_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&customs); + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + let query_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "query") + .collect(); + assert!( + !query_findings.is_empty(), + "app::query should get cross-crate finding via mydb::Connection::open(), got: {app_findings:?}" + ); + assert!(query_findings[0].description.contains("Cross-crate")); +} + +fn cross_crate_fixture_source(fixture: &str, crate_name: &str) -> String { + let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures") + .join(fixture) + .join(crate_name) + .join("src/lib.rs"); + std::fs::read_to_string(path).unwrap() +} + +#[test] +fn cross_crate_basic_fs_propagation() { + // Phase 1: Scan the "helper" dependency crate + let helper_source = cross_crate_fixture_source("cross_crate", "helper"); + let helper_parsed = parse_source(&helper_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let helper_findings = det.analyse(&helper_parsed, "helper", "0.1.0", &[]); + + // helper should have a direct FS finding + assert!( + !helper_findings.is_empty(), + "helper should have FS findings" + ); + assert_eq!(helper_findings[0].category, Category::Fs); + + // Build export map from helper's findings + let export_map = build_export_map("helper", "0.1.0", &helper_findings, Path::new("src")); + assert!( + !export_map.exports.is_empty(), + "export map should have entries" + ); + + // Phase 2: Scan the "app" workspace crate with helper's export map injected + let app_source = cross_crate_fixture_source("cross_crate", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + + let cross_crate_customs = export_map_to_custom_authorities(&[export_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&cross_crate_customs); + + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + // app::load() should have a cross-crate FS finding via helper::read_file + let load_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "load") + .collect(); + assert!( + !load_findings.is_empty(), + "app::load should get cross-crate FS finding, got: {app_findings:?}" + ); + assert!(load_findings[0].description.contains("Cross-crate")); +} + +#[test] +fn cross_crate_chain_multi_hop() { + // Phase 1a: Scan "leaf" (has direct NET finding) + let leaf_source = cross_crate_fixture_source("cross_crate_chain", "leaf"); + let leaf_parsed = parse_source(&leaf_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let leaf_findings = det.analyse(&leaf_parsed, "leaf", "0.1.0", &[]); + assert!( + leaf_findings.iter().any(|f| f.category == Category::Net), + "leaf should have NET finding" + ); + + let leaf_map = build_export_map("leaf", "0.1.0", &leaf_findings, Path::new("src")); + + // Phase 1b: Scan "mid" with leaf's export map injected + let mid_source = cross_crate_fixture_source("cross_crate_chain", "mid"); + let mid_parsed = parse_source(&mid_source, "src/lib.rs").unwrap(); + let leaf_customs = export_map_to_custom_authorities(&[leaf_map.clone()]); + let mut det_mid = Detector::new(); + det_mid.add_custom_authorities(&leaf_customs); + let mid_findings = det_mid.analyse(&mid_parsed, "mid", "0.1.0", &[]); + + // mid::fetch() should have a cross-crate NET finding via leaf::connect + let mid_fetch: Vec<_> = mid_findings + .iter() + .filter(|f| f.function == "fetch") + .collect(); + assert!( + !mid_fetch.is_empty(), + "mid::fetch should get cross-crate NET finding" + ); + + let mid_map = build_export_map("mid", "0.1.0", &mid_findings, Path::new("src")); + + // Phase 2: Scan "app" with mid's export map + let app_source = cross_crate_fixture_source("cross_crate_chain", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + let all_customs = export_map_to_custom_authorities(&[leaf_map, mid_map]); + let mut det_app = Detector::new(); + det_app.add_custom_authorities(&all_customs); + let app_findings = det_app.analyse(&app_parsed, "app", "0.1.0", &[]); + + // app::handler() should have a cross-crate finding via mid::fetch + let handler_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "handler") + .collect(); + assert!( + !handler_findings.is_empty(), + "app::handler should get cross-crate finding via mid::fetch" + ); +} + +#[test] +fn cross_crate_clean_no_propagation() { + // Phase 1: Scan "pure_lib" (no I/O) + let pure_source = cross_crate_fixture_source("cross_crate_clean", "pure_lib"); + let pure_parsed = parse_source(&pure_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let pure_findings = det.analyse(&pure_parsed, "pure_lib", "0.1.0", &[]); + assert!( + pure_findings.is_empty(), + "pure_lib should have zero findings" + ); + + let pure_map = build_export_map("pure_lib", "0.1.0", &pure_findings, Path::new("src")); + assert!( + pure_map.exports.is_empty(), + "empty findings -> empty export map" + ); + + // Phase 2: Scan "app" with empty export map + let app_source = cross_crate_fixture_source("cross_crate_clean", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + let customs = export_map_to_custom_authorities(&[pure_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&customs); + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + assert!( + app_findings.is_empty(), + "app calling pure_lib should have zero findings" + ); +} + +#[test] +fn cross_crate_build_script_excluded() { + // Phase 1: Scan "build_dep" — has build.rs with env::var, but lib.rs is clean + let dep_source = cross_crate_fixture_source("cross_crate_build_script", "build_dep"); + let dep_parsed = parse_source(&dep_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let dep_findings = det.analyse(&dep_parsed, "build_dep", "0.1.0", &[]); + + // lib.rs has no authority calls + assert!(dep_findings.is_empty(), "build_dep lib.rs should be clean"); + + // Also scan build.rs + let build_source = std::fs::read_to_string( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures/cross_crate_build_script/build_dep/build.rs"), + ) + .unwrap(); + let build_parsed = parse_source(&build_source, "build.rs").unwrap(); + let build_findings = det.analyse(&build_parsed, "build_dep", "0.1.0", &[]); + + // build.rs has env::var — finding is marked as build_script + assert!( + build_findings.iter().any(|f| f.is_build_script), + "build.rs findings should be marked as build_script" + ); + + // Build export map — build_script findings should be excluded + let mut all_dep_findings = dep_findings; + all_dep_findings.extend(build_findings); + let dep_map = build_export_map("build_dep", "0.1.0", &all_dep_findings, Path::new("src")); + + assert!( + dep_map.exports.is_empty(), + "export map should exclude build.rs findings" + ); + + // Phase 2: Scan "app" + let app_source = cross_crate_fixture_source("cross_crate_build_script", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + let customs = export_map_to_custom_authorities(&[dep_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&customs); + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + assert!( + app_findings.is_empty(), + "app should have no findings (build.rs excluded from export map)" + ); +} + +#[test] +fn cross_crate_ffi_propagation() { + // Phase 1: Scan ffi_dep — has extern block + function calling extern fn + let dep_source = cross_crate_fixture_source("cross_crate_ffi", "ffi_dep"); + let dep_parsed = parse_source(&dep_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let dep_findings = det.analyse(&dep_parsed, "ffi_dep", "0.1.0", &[]); + + // open_db should have an ffi_call finding for calling sqlite3_open + let open_db_ffi: Vec<_> = dep_findings + .iter() + .filter(|f| f.function == "open_db" && f.subcategory == "ffi_call") + .collect(); + assert!( + !open_db_ffi.is_empty(), + "open_db should have FFI call-site finding, got: {dep_findings:?}" + ); + + // Build export map — open_db's FFI finding should appear + let dep_map = build_export_map("ffi_dep", "0.1.0", &dep_findings, Path::new("src")); + assert!( + dep_map.exports.contains_key("ffi_dep::open_db"), + "export map should have ffi_dep::open_db" + ); + + // Phase 2: Scan app with ffi_dep's export map + let app_source = cross_crate_fixture_source("cross_crate_ffi", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + let customs = export_map_to_custom_authorities(&[dep_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&customs); + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + let init_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "init") + .collect(); + assert!( + !init_findings.is_empty(), + "app::init should get cross-crate FFI finding via ffi_dep::open_db, got: {app_findings:?}" + ); + assert!(init_findings[0].description.contains("Cross-crate")); +} + +#[test] +fn workspace_to_workspace_propagation() { + // Simulate workspace-to-workspace: core_lib scanned first, then app + let core_source = cross_crate_fixture_source("workspace_to_workspace", "core_lib"); + let core_parsed = parse_source(&core_source, "src/lib.rs").unwrap(); + let det = Detector::new(); + let core_findings = det.analyse(&core_parsed, "core_lib", "0.1.0", &[]); + assert!( + core_findings.iter().any(|f| f.category == Category::Fs), + "core_lib should have FS finding" + ); + + // Build export map for core_lib (as if scanned first in topo order) + let core_map = build_export_map("core_lib", "0.1.0", &core_findings, Path::new("src")); + + // Scan app with core_lib's export map + let app_source = cross_crate_fixture_source("workspace_to_workspace", "app"); + let app_parsed = parse_source(&app_source, "src/lib.rs").unwrap(); + let customs = export_map_to_custom_authorities(&[core_map]); + let mut det2 = Detector::new(); + det2.add_custom_authorities(&customs); + let app_findings = det2.analyse(&app_parsed, "app", "0.1.0", &[]); + + let init_findings: Vec<_> = app_findings + .iter() + .filter(|f| f.function == "init") + .collect(); + assert!( + !init_findings.is_empty(), + "app::init should get ws-to-ws FS finding from core_lib::read_config" + ); +}