From 13a65c78ee1894cc908102ae3feaca291ad6e21a Mon Sep 17 00:00:00 2001 From: Ahavili Date: Wed, 27 May 2026 12:09:09 +0800 Subject: [PATCH 1/2] perf: respect ignore rules in Swift module discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit discover_swift_packages_recursive hand-rolled a recursive directory walk that skipped only hidden dirs and node_modules/build/DerivedData/Pods. It did not skip Cargo's target/ nor honor .gitignore, so indexing a Rust project stat'd its way through the entire build-artifact tree (60G, 15k+ dirs here). Because module discovery runs in an untimed gap between the "discovered" and "extracting" phase prints, this showed up as a multi-second freeze that the per-phase timings did not explain. Replace the hand-rolled walk with ignore::WalkBuilder — the same ignore-aware walker discover.rs already uses for source files — honoring .gitignore (require_git(false) so it also applies in non-git checkouts), skipping hidden dirs, and explicitly pruning build trees including target/. Package registration semantics are preserved (Sources/Tests registration; a package nested inside another is registered once via lexical-sort ancestor dedup). Indexing this project drops from ~13-21s to 0.6s. Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 1 + grapha-swift/Cargo.toml | 1 + grapha-swift/src/module_discovery.rs | 242 ++++++++++++++++++--------- 3 files changed, 169 insertions(+), 75 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e1f4420..ae4e454 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -817,6 +817,7 @@ version = "0.4.1" dependencies = [ "anyhow", "grapha-core", + "ignore", "libloading", "regex", "serde", diff --git a/grapha-swift/Cargo.toml b/grapha-swift/Cargo.toml index d8f2952..bc89086 100644 --- a/grapha-swift/Cargo.toml +++ b/grapha-swift/Cargo.toml @@ -16,6 +16,7 @@ anyhow = "1" tree-sitter = "0.26" tree-sitter-swift = "0.7" regex = "1" +ignore = "0.4" serde = { version = "1", features = ["derive"] } serde_json = "1" libloading = "0.9" diff --git a/grapha-swift/src/module_discovery.rs b/grapha-swift/src/module_discovery.rs index 971d168..45fbcdb 100644 --- a/grapha-swift/src/module_discovery.rs +++ b/grapha-swift/src/module_discovery.rs @@ -1,92 +1,122 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use grapha_core::ModuleMap; +use ignore::WalkBuilder; + +/// Directories that never contain a project's own Swift packages. These are +/// build outputs and vendored dependency trees; descending into them is both +/// wasteful and a correctness hazard. We prune them explicitly so discovery +/// stays fast even when they are not listed in `.gitignore`. +const PRUNED_DIRS: &[&str] = &["node_modules", "build", "DerivedData", "Pods", "target"]; pub fn discover_swift_modules(root: &Path) -> ModuleMap { let mut modules = ModuleMap::new(); - discover_swift_packages_recursive(root, &mut modules); + for package_dir in find_package_dirs(root) { + register_package(&package_dir, &mut modules); + } modules } -fn discover_swift_packages_recursive(dir: &Path, modules: &mut ModuleMap) { - let package_swift = dir.join("Package.swift"); - if package_swift.is_file() { - let module_name = dir - .file_name() - .and_then(|name| name.to_str()) - .unwrap_or("unknown") - .to_string(); - let sources_dir = dir.join("Sources"); - let source_dir = if sources_dir.is_dir() { - sources_dir - } else { - dir.to_path_buf() - }; - modules - .modules - .entry(module_name) - .or_default() - .push(source_dir); - - // Register test targets: each subdirectory under Tests/ becomes a module. - // If Tests/ has no subdirectories, register it as "Tests". - let tests_dir = dir.join("Tests"); - if tests_dir.is_dir() { - let mut found_subdirs = false; - if let Ok(entries) = std::fs::read_dir(&tests_dir) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_dir() { - let name = entry.file_name(); - let name = name.to_string_lossy(); - if !name.starts_with('.') { - modules - .modules - .entry(name.to_string()) - .or_default() - .push(path); - found_subdirs = true; - } - } - } - } - if !found_subdirs { - let pkg_name = dir - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("unknown"); - modules - .modules - .entry(format!("{pkg_name}Tests")) - .or_default() - .push(tests_dir); - } - } +/// Locate every Swift package root (a directory containing `Package.swift`) +/// under `root`, honoring `.gitignore` and skipping hidden / build directories, +/// mirroring how source files are discovered elsewhere. Packages nested inside +/// another package's tree are dropped so a single package is registered once. +fn find_package_dirs(root: &Path) -> Vec { + let walker = WalkBuilder::new(root) + .hidden(true) + .git_ignore(true) + // Honor `.gitignore` even outside a git repo so build trees stay pruned + // for standalone checkouts and freshly cloned projects. + .require_git(false) + .filter_entry(|entry| { + !entry + .file_name() + .to_str() + .is_some_and(|name| PRUNED_DIRS.contains(&name)) + }) + .build(); - return; + let mut package_dirs: Vec = walker + .flatten() + .filter(|entry| { + entry.file_name() == "Package.swift" + && entry.file_type().is_some_and(|ft| ft.is_file()) + }) + .filter_map(|entry| entry.path().parent().map(Path::to_path_buf)) + .collect(); + + // Sorting lexically places an ancestor path immediately before its + // descendants, so a single pass can drop packages nested within a package + // that was already registered. + package_dirs.sort(); + let mut roots: Vec = Vec::with_capacity(package_dirs.len()); + for dir in package_dirs { + if roots.iter().any(|root| dir.starts_with(root)) { + continue; + } + roots.push(dir); } + roots +} - let entries = match std::fs::read_dir(dir) { - Ok(entries) => entries, - Err(_) => return, +fn register_package(dir: &Path, modules: &mut ModuleMap) { + let module_name = dir + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("unknown") + .to_string(); + let sources_dir = dir.join("Sources"); + let source_dir = if sources_dir.is_dir() { + sources_dir + } else { + dir.to_path_buf() }; + modules + .modules + .entry(module_name) + .or_default() + .push(source_dir); - for entry in entries.flatten() { - let path = entry.path(); - if !path.is_dir() { - continue; - } - let name = entry.file_name(); - let name = name.to_string_lossy(); - if name.starts_with('.') - || name == "node_modules" - || name == "build" - || name == "DerivedData" - || name == "Pods" - { - continue; + register_test_targets(dir, modules); +} + +/// Register test targets: each subdirectory under `Tests/` becomes a module. +/// If `Tests/` has no subdirectories, register it as `Tests`. +fn register_test_targets(dir: &Path, modules: &mut ModuleMap) { + let tests_dir = dir.join("Tests"); + if !tests_dir.is_dir() { + return; + } + + let mut found_subdirs = false; + if let Ok(entries) = std::fs::read_dir(&tests_dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + if !name.starts_with('.') { + modules + .modules + .entry(name.to_string()) + .or_default() + .push(path); + found_subdirs = true; + } + } } - discover_swift_packages_recursive(&path, modules); + } + + if !found_subdirs { + let pkg_name = dir + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("unknown"); + modules + .modules + .entry(format!("{pkg_name}Tests")) + .or_default() + .push(tests_dir); } } @@ -144,4 +174,66 @@ mod tests { "Tests/ with no subdirs should fallback to Tests" ); } + + #[test] + fn prunes_build_artifact_directories() { + let dir = tempfile::tempdir().unwrap(); + // A real package the walk should find. + let pkg_dir = dir.path().join("RealPkg"); + fs::create_dir_all(pkg_dir.join("Sources")).unwrap(); + fs::write(pkg_dir.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + // A Package.swift buried inside Cargo's target/ output must be ignored. + let target_pkg = dir.path().join("target").join("BuildArtifact"); + fs::create_dir_all(&target_pkg).unwrap(); + fs::write(target_pkg.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + let modules = discover_swift_modules(dir.path()); + assert!(modules.modules.contains_key("RealPkg")); + assert!( + !modules.modules.contains_key("BuildArtifact"), + "packages under target/ must be pruned from discovery" + ); + } + + #[test] + fn skips_gitignored_directories() { + let dir = tempfile::tempdir().unwrap(); + fs::write(dir.path().join(".gitignore"), "vendor/\n").unwrap(); + + let pkg_dir = dir.path().join("App"); + fs::create_dir_all(pkg_dir.join("Sources")).unwrap(); + fs::write(pkg_dir.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + let ignored = dir.path().join("vendor").join("Ignored"); + fs::create_dir_all(&ignored).unwrap(); + fs::write(ignored.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + let modules = discover_swift_modules(dir.path()); + assert!(modules.modules.contains_key("App")); + assert!( + !modules.modules.contains_key("Ignored"), + "gitignored directories must be skipped during discovery" + ); + } + + #[test] + fn nested_package_is_registered_once() { + let dir = tempfile::tempdir().unwrap(); + let outer = dir.path().join("Outer"); + fs::create_dir_all(outer.join("Sources")).unwrap(); + fs::write(outer.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + // A package nested inside another package's tree should not surface. + let inner = outer.join("Sources").join("Inner"); + fs::create_dir_all(&inner).unwrap(); + fs::write(inner.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + + let modules = discover_swift_modules(dir.path()); + assert!(modules.modules.contains_key("Outer")); + assert!( + !modules.modules.contains_key("Inner"), + "packages nested inside another package are not registered" + ); + } } From 9d8d8882290a005a45d1af5bb50caf8c2e042e33 Mon Sep 17 00:00:00 2001 From: Ahavili Date: Wed, 27 May 2026 13:58:18 +0800 Subject: [PATCH 2/2] style: apply rustfmt to module_discovery Co-Authored-By: Claude Opus 4.7 (1M context) --- grapha-swift/src/module_discovery.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/grapha-swift/src/module_discovery.rs b/grapha-swift/src/module_discovery.rs index 45fbcdb..731a7e1 100644 --- a/grapha-swift/src/module_discovery.rs +++ b/grapha-swift/src/module_discovery.rs @@ -39,8 +39,7 @@ fn find_package_dirs(root: &Path) -> Vec { let mut package_dirs: Vec = walker .flatten() .filter(|entry| { - entry.file_name() == "Package.swift" - && entry.file_type().is_some_and(|ft| ft.is_file()) + entry.file_name() == "Package.swift" && entry.file_type().is_some_and(|ft| ft.is_file()) }) .filter_map(|entry| entry.path().parent().map(Path::to_path_buf)) .collect(); @@ -186,7 +185,11 @@ mod tests { // A Package.swift buried inside Cargo's target/ output must be ignored. let target_pkg = dir.path().join("target").join("BuildArtifact"); fs::create_dir_all(&target_pkg).unwrap(); - fs::write(target_pkg.join("Package.swift"), "// swift-tools-version:5.5").unwrap(); + fs::write( + target_pkg.join("Package.swift"), + "// swift-tools-version:5.5", + ) + .unwrap(); let modules = discover_swift_modules(dir.path()); assert!(modules.modules.contains_key("RealPkg"));