From db0722c1af523bde3742ec47198ab11155236851 Mon Sep 17 00:00:00 2001
From: bordumb <bordumbb@gmail.com>
Date: Tue, 24 Mar 2026 14:45:44 -0700
Subject: [PATCH] feat: add init onboarding

---
 crates/cargo-capsec/src/cli.rs  |  33 +++
 crates/cargo-capsec/src/init.rs | 378 ++++++++++++++++++++++++++++++++
 crates/cargo-capsec/src/lib.rs  |   1 +
 crates/cargo-capsec/src/main.rs |  16 +-
 4 files changed, 425 insertions(+), 3 deletions(-)
 create mode 100644 crates/cargo-capsec/src/init.rs
diff --git a/crates/cargo-capsec/src/cli.rs b/crates/cargo-capsec/src/cli.rs
index 9b91f65..a231bd4 100644
--- a/crates/cargo-capsec/src/cli.rs
+++ b/crates/cargo-capsec/src/cli.rs
@@ -43,6 +43,8 @@ pub enum Commands {
     CheckDeny(CheckDenyArgs),
     /// Generate a shields.io badge from audit results
     Badge(BadgeArgs),
+    /// Bootstrap capsec for an existing codebase
+    Init(InitArgs),
 }
 
 #[derive(clap::Args)]
@@ -138,3 +140,34 @@ pub struct BadgeArgs {
     #[arg(long, default_value = "high", value_parser = ["low", "medium", "high", "critical"])]
     pub fail_on: String,
 }
+
+#[derive(clap::Args)]
+pub struct InitArgs {
+    /// Path to workspace root
+    #[arg(short, long, default_value = ".")]
+    pub path: PathBuf,
+
+    /// Generate CI config (github, gitlab, generic)
+    #[arg(long, value_parser = ["github", "gitlab", "generic"])]
+    pub ci: Option<String>,
+
+    /// Run interactively (prompt for each choice)
+    #[arg(long)]
+    pub interactive: bool,
+
+    /// Show migration priority report after init
+    #[arg(long)]
+    pub report: bool,
+
+    /// Exclude test/bench/example directories (default: true)
+    #[arg(long, default_value_t = true)]
+    pub exclude_tests: bool,
+
+    /// Save baseline alongside config (default: true)
+    #[arg(long, default_value_t = true)]
+    pub baseline: bool,
+
+    /// Overwrite existing .capsec.toml
+    #[arg(long)]
+    pub force: bool,
+}
diff --git a/crates/cargo-capsec/src/init.rs b/crates/cargo-capsec/src/init.rs
new file mode 100644
index 0000000..a47fd56
--- /dev/null
+++ b/crates/cargo-capsec/src/init.rs
@@ -0,0 +1,378 @@
+//! `cargo capsec init` — bootstrap capsec for an existing codebase.
+//!
+//! Runs a full audit, then generates a `.capsec.toml` that suppresses all existing
+//! findings. This lets teams adopt capsec incrementally: accept the current state,
+//! then catch regressions in CI.
+
+use crate::authorities::Risk;
+use crate::detector::Finding;
+use crate::{baseline, config, detector, discovery, parser};
+use std::collections::{BTreeMap, HashSet};
+use std::fmt::Write;
+use std::io::BufRead;
+use std::path::{Path, PathBuf};
+
+/// Options for `cargo capsec init`.
+pub struct InitOptions {
+    pub path: PathBuf,
+    pub ci: Option<String>,
+    pub interactive: bool,
+    pub report: bool,
+    pub exclude_tests: bool,
+    pub baseline: bool,
+    pub force: bool,
+}
+
+/// Runs the `cargo capsec init` command.
+pub fn run_init(opts: InitOptions) {
+    let cap_root = capsec_core::root::root();
+    let fs_read = cap_root.grant::<capsec_core::permission::FsRead>();
+    let fs_write = cap_root.grant::<capsec_core::permission::FsWrite>();
+    let spawn_cap = cap_root.grant::<capsec_core::permission::Spawn>();
+
+    let path_arg = opts.path.canonicalize().unwrap_or(opts.path.clone());
+    let config_path = path_arg.join(".capsec.toml");
+
+    // Check for existing config
+    if config_path.exists() && !opts.force {
+        eprintln!(
+            "Error: .capsec.toml already exists. Use --force to overwrite, or edit it manually."
+        );
+        std::process::exit(1);
+    }
+
+    // Interactive mode: prompt for choices
+    let (exclude_tests, save_baseline, ci_type, show_report) = if opts.interactive {
+        run_interactive(&opts)
+    } else {
+        (
+            opts.exclude_tests,
+            opts.baseline,
+            opts.ci.clone(),
+            opts.report,
+        )
+    };
+
+    eprintln!("Running audit...");
+
+    // Discover and scan (workspace only, no deps — init is about YOUR code)
+    let discovery = match discovery::discover_crates(&path_arg, false, &spawn_cap, &fs_read) {
+        Ok(d) => d,
+        Err(e) => {
+            eprintln!("Error: {e}");
+            eprintln!("Hint: Run from a directory containing Cargo.toml, or use --path");
+            std::process::exit(2);
+        }
+    };
+    let workspace_root = discovery.workspace_root;
+
+    let cfg = config::Config::default();
+    let customs = config::custom_authorities(&cfg);
+    let crate_deny = cfg.deny.normalized_categories();
+
+    let mut all_findings = Vec::new();
+
+    for krate in &discovery.crates {
+        if krate.is_dependency {
+            continue;
+        }
+
+        let mut det = detector::Detector::new();
+        det.add_custom_authorities(&customs);
+
+        let source_files = discovery::discover_source_files(&krate.source_dir, &fs_read);
+        for file_path in source_files {
+            match parser::parse_file(&file_path, &fs_read) {
+                Ok(parsed) => {
+                    let findings = det.analyse(&parsed, &krate.name, &krate.version, &crate_deny);
+                    all_findings.extend(findings);
+                }
+                Err(e) => {
+                    eprintln!("  Warning: {e}");
+                }
+            }
+        }
+    }
+
+    // Normalize paths
+    for f in &mut all_findings {
+        f.file = make_relative(&f.file, &workspace_root);
+    }
+
+    let crate_count = discovery.crates.iter().filter(|c| !c.is_dependency).count();
+    eprintln!(
+        "Found {} findings across {} crates.\n",
+        all_findings.len(),
+        crate_count
+    );
+
+    // Generate .capsec.toml
+    let toml_content = generate_capsec_toml(&all_findings, exclude_tests);
+    capsec_std::fs::write(&config_path, &toml_content, &fs_write)
+        .unwrap_or_else(|e| eprintln!("Error writing .capsec.toml: {e}"));
+
+    let allow_count = all_findings
+        .iter()
+        .map(|f| (&f.crate_name, &f.function))
+        .collect::<HashSet<_>>()
+        .len();
+    eprintln!("Written: .capsec.toml ({allow_count} allow rules)");
+
+    // Save baseline
+    if save_baseline {
+        match baseline::save_baseline(&workspace_root, &all_findings, &fs_write) {
+            Ok(()) => eprintln!("Written: .capsec-baseline.json"),
+            Err(e) => eprintln!("Warning: Failed to save baseline: {e}"),
+        }
+    }
+
+    // Generate CI config
+    if let Some(ref ci) = ci_type {
+        write_ci_config(ci, &path_arg, &fs_write);
+    }
+
+    // Migration report
+    if show_report {
+        print_migration_report(&all_findings);
+    }
+
+    eprintln!("\nDone! New findings will be caught by CI.");
+    eprintln!("To see existing findings: cargo capsec audit --diff");
+}
+
+/// Generates `.capsec.toml` content from current audit findings.
+fn generate_capsec_toml(findings: &[Finding], exclude_tests: bool) -> String {
+    let mut toml = String::new();
+    let _ = writeln!(toml, "# Auto-generated by `cargo capsec init`");
+    let _ = writeln!(
+        toml,
+        "# Remove allow rules as you migrate functions to capsec types."
+    );
+    let _ = writeln!(toml);
+
+    if exclude_tests {
+        let _ = writeln!(toml, "[analysis]");
+        let _ = writeln!(
+            toml,
+            "exclude = [\"tests/**\", \"benches/**\", \"examples/**\"]"
+        );
+        let _ = writeln!(toml);
+    }
+
+    // Deduplicate: one allow rule per (crate, function)
+    let mut seen = HashSet::new();
+    let _ = writeln!(
+        toml,
+        "# {} existing findings suppressed:",
+        findings
+            .iter()
+            .map(|f| (&f.crate_name, &f.function))
+            .collect::<HashSet<_>>()
+            .len()
+    );
+    let _ = writeln!(toml);
+
+    for f in findings {
+        let key = (f.crate_name.clone(), f.function.clone());
+        if !seen.insert(key) {
+            continue;
+        }
+
+        let _ = writeln!(toml, "[[allow]]");
+        let _ = writeln!(toml, "crate = \"{}\"", f.crate_name);
+        let _ = writeln!(toml, "function = \"{}\"", f.function);
+        let _ = writeln!(
+            toml,
+            "# {} {} \u{2014} {}:{}",
+            f.category.label(),
+            f.risk.label(),
+            f.file,
+            f.call_line
+        );
+        let _ = writeln!(toml);
+    }
+
+    toml
+}
+
+// ── CI templates ──
+
+const GITHUB_CI_TEMPLATE: &str = r#"name: capsec audit
+on: [push, pull_request]
+jobs:
+  audit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      - run: cargo install cargo-capsec
+      - run: cargo capsec audit --fail-on high --format sarif > capsec.sarif
+      - uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: capsec.sarif
+        if: always()
+"#;
+
+const GITLAB_CI_TEMPLATE: &str = r#"capsec-audit:
+  stage: test
+  image: rust:latest
+  script:
+    - cargo install cargo-capsec
+    - cargo capsec audit --fail-on high --quiet
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+"#;
+
+const GENERIC_CI_TEMPLATE: &str = r#"#!/usr/bin/env bash
+set -euo pipefail
+
+# Install capsec
+cargo install cargo-capsec
+
+# Run audit — fails on high-risk or above
+cargo capsec audit --fail-on high --quiet
+
+echo "capsec audit passed."
+"#;
+
+fn write_ci_config(
+    ci_type: &str,
+    workspace_root: &Path,
+    fs_write: &impl capsec_core::cap_provider::CapProvider<capsec_core::permission::FsWrite>,
+) {
+    let (path, content) = match ci_type {
+        "github" => {
+            let dir = workspace_root.join(".github/workflows");
+            let _ = std::fs::create_dir_all(&dir);
+            (dir.join("capsec.yml"), GITHUB_CI_TEMPLATE)
+        }
+        "gitlab" => (
+            workspace_root.join(".gitlab-ci-capsec.yml"),
+            GITLAB_CI_TEMPLATE,
+        ),
+        "generic" => (workspace_root.join("capsec-audit.sh"), GENERIC_CI_TEMPLATE),
+        _ => return,
+    };
+
+    if path.exists() {
+        eprintln!("Warning: {} already exists, skipping", path.display());
+        return;
+    }
+
+    match capsec_std::fs::write(&path, content, fs_write) {
+        Ok(()) => eprintln!("Written: {}", path.display()),
+        Err(e) => eprintln!("Warning: Failed to write {}: {e}", path.display()),
+    }
+}
+
+// ── Interactive mode ──
+
+fn run_interactive(opts: &InitOptions) -> (bool, bool, Option<String>, bool) {
+    let exclude_tests = prompt_yn("Exclude test/bench/example files?", true);
+    let save_baseline = prompt_yn("Save baseline?", true);
+    let ci_type = prompt_choice(
+        "Generate CI config?",
+        &["none", "github", "gitlab", "generic"],
+        "none",
+    );
+    let ci = if ci_type == "none" {
+        None
+    } else {
+        Some(ci_type)
+    };
+    let show_report = opts.report || prompt_yn("Show migration priority report?", false);
+
+    (exclude_tests, save_baseline, ci, show_report)
+}
+
+fn prompt_yn(question: &str, default: bool) -> bool {
+    let hint = if default { "[Y/n]" } else { "[y/N]" };
+    eprint!("? {question} {hint} ");
+    let mut line = String::new();
+    let _ = std::io::stdin().lock().read_line(&mut line);
+    let trimmed = line.trim().to_lowercase();
+    if trimmed.is_empty() {
+        return default;
+    }
+    trimmed.starts_with('y')
+}
+
+fn prompt_choice(question: &str, choices: &[&str], default: &str) -> String {
+    eprint!("? {question} [{}] ", choices.join("/"));
+    let mut line = String::new();
+    let _ = std::io::stdin().lock().read_line(&mut line);
+    let trimmed = line.trim().to_lowercase();
+    if trimmed.is_empty() {
+        return default.to_string();
+    }
+    if choices.contains(&trimmed.as_str()) {
+        trimmed
+    } else {
+        default.to_string()
+    }
+}
+
+// ── Migration priority report ──
+
+fn print_migration_report(findings: &[Finding]) {
+    // Group direct findings by (crate, function) with their risk + location
+    let mut functions: BTreeMap<(String, String), (Risk, String, usize, usize)> = BTreeMap::new();
+    // Count how many transitive findings reference each function
+    let mut transitive_refs: BTreeMap<String, usize> = BTreeMap::new();
+
+    for f in findings {
+        if f.is_transitive {
+            // call_text is the callee function name for transitive findings
+            *transitive_refs.entry(f.call_text.clone()).or_default() += 1;
+        } else {
+            functions
+                .entry((f.crate_name.clone(), f.function.clone()))
+                .or_insert((f.risk, f.file.clone(), f.call_line, 0));
+        }
+    }
+
+    // Merge transitive ref counts into function entries
+    for ((_, func_name), entry) in &mut functions {
+        if let Some(&count) = transitive_refs.get(func_name) {
+            entry.3 = count;
+        }
+    }
+
+    // Sort by risk (desc) then callers (desc)
+    let mut sorted: Vec<_> = functions.into_iter().collect();
+    sorted.sort_by(|a, b| b.1.0.cmp(&a.1.0).then_with(|| b.1.3.cmp(&a.1.3)));
+
+    eprintln!("\nMigration Priority (by risk \u{00d7} frequency)");
+    eprintln!(
+        "\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}\u{2500}"
+    );
+
+    for (i, ((crate_name, func), (risk, file, line, callers))) in sorted.iter().enumerate().take(20)
+    {
+        let caller_str = if *callers > 0 {
+            format!(" \u{2014} called from {callers} functions")
+        } else {
+            String::new()
+        };
+        eprintln!(
+            "  {:<3} {}()  {:<20} {}:{}{caller_str}",
+            format!("{}.", i + 1),
+            func,
+            format!("{crate_name}/{}", risk.label()),
+            file,
+            line,
+        );
+    }
+
+    if sorted.len() > 20 {
+        eprintln!("  ... and {} more", sorted.len() - 20);
+    }
+}
+
+fn make_relative(file_path: &str, workspace_root: &Path) -> String {
+    Path::new(file_path)
+        .strip_prefix(workspace_root)
+        .map(|p| p.to_string_lossy().to_string())
+        .unwrap_or_else(|_| file_path.to_string())
+}
diff --git a/crates/cargo-capsec/src/lib.rs b/crates/cargo-capsec/src/lib.rs
index 84e8f21..f487494 100644
--- a/crates/cargo-capsec/src/lib.rs
+++ b/crates/cargo-capsec/src/lib.rs
@@ -50,5 +50,6 @@ pub mod deep;
 pub mod detector;
 pub mod discovery;
 pub mod export_map;
+pub mod init;
 pub mod parser;
 pub mod reporter;
diff --git a/crates/cargo-capsec/src/main.rs b/crates/cargo-capsec/src/main.rs
index a9b9bbf..febeb3b 100644
--- a/crates/cargo-capsec/src/main.rs
+++ b/crates/cargo-capsec/src/main.rs
@@ -7,6 +7,7 @@ mod deep;
 mod detector;
 mod discovery;
 mod export_map;
+mod init;
 mod parser;
 mod reporter;
 
@@ -24,6 +25,15 @@ fn main() {
         Commands::Audit(args) => run_audit(args),
         Commands::CheckDeny(args) => run_check_deny(args),
         Commands::Badge(args) => run_badge(args),
+        Commands::Init(args) => init::run_init(init::InitOptions {
+            path: args.path,
+            ci: args.ci,
+            interactive: args.interactive,
+            report: args.report,
+            exclude_tests: args.exclude_tests,
+            baseline: args.baseline,
+            force: args.force,
+        }),
     }
 }
 
@@ -71,7 +81,7 @@ fn run_audit(args: AuditArgs) {
     let mut all_findings = Vec::new();
 
     if scan_deps {
-        // ── Cross-crate two-phase scan ──
+        // Cross-crate two-phase scan ──
 
         // Phase 1: Scan dependency crates, build export maps.
         // Check cache first; only scan if cache miss.
@@ -213,7 +223,7 @@ fn run_audit(args: AuditArgs) {
             }
         }
 
-        // ── Deep MIR analysis (runs before Phase 2 so findings feed into export maps) ──
+        // Deep MIR analysis (runs before Phase 2 so findings feed into export maps)
         if args.deep {
             let deep_result = deep::run_deep_analysis(
                 &path_arg,
@@ -342,7 +352,7 @@ fn run_audit(args: AuditArgs) {
             }
         }
     } else {
-        // ── Original single-pass scan (no deps) ──
+        // Original single-pass scan (no deps) ──
         for krate in &workspace_crates {
             if let Some(ref only) = args.only {
                 let allowed: Vec<&str> = only.split(',').collect();