From cce8e67c0d856855f7c7436764a8eacb65afa01c Mon Sep 17 00:00:00 2001 From: b4prog Date: Sat, 27 Jun 2026 13:30:04 +0200 Subject: [PATCH 1/2] [build] bump crate version to 0.7.1 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c9939c..e0b0d7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -113,7 +113,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codem8" -version = "0.7.0" +version = "0.7.1" dependencies = [ "clap", "ignore", diff --git a/Cargo.toml b/Cargo.toml index 4631915..ae70ba3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codem8" -version = "0.7.0" +version = "0.7.1" edition = "2021" rust-version = "1.85" license = "MIT" From e2240aac763a128ac7784c47338c473009cfa036 Mon Sep 17 00:00:00 2001 From: b4prog Date: Sat, 27 Jun 2026 15:04:13 +0200 Subject: [PATCH 2/2] [fix] scan all source files before filtering git-branch duplicates --- README.md | 9 ++- src/cli/help.rs | 5 +- src/lib.rs | 142 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 137 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 2c28c53..31e3ddd 100644 --- a/README.md +++ b/README.md @@ -84,11 +84,10 @@ trailing Unicode whitespace are removed before hashing and comparison. Empty trimmed lines are ignored. CodeM8 currently expects UTF-8 source files; invalid UTF-8 produces a clear error rather than lossy output. -Use `-git-branch` to analyze only files changed on the current local branch -compared to the origin base branch. CodeM8 resolves that base from `origin/HEAD` -with `origin/main` and `origin/master` fallbacks. This includes committed, -staged, unstaged, and untracked files that still exist in the worktree. The -option requires a Git repository and cannot be combined with `-files`. +Use `-git-branch` to search duplicate code only in files changed on the current +local branch. CodeM8 resolves that branch set from `origin/HEAD` with +`origin/main` and `origin/master` fallbacks. The option requires a Git +repository and cannot be combined with `-files`. Duplicate block weight is calculated as: diff --git a/src/cli/help.rs b/src/cli/help.rs index a73cdf7..5f9f495 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -29,9 +29,8 @@ OPTIONS: Example: -files=src/a.ts,src/b.js -git-branch - Analyze files changed on the current local Git branch compared to the - origin base branch, including committed, staged, unstaged, and untracked - files. Cannot be combined with -files. + Search duplicate code only in files changed on the current local Git + branch. Cannot be combined with -files. -verbose Include duplicate block metrics in report output. diff --git a/src/lib.rs b/src/lib.rs index 692f991..91e5f35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,11 +7,14 @@ pub mod model; pub mod paths; pub mod report; +use std::collections::HashSet; use std::io::Write; use std::path::Path; use std::time::{Duration, Instant}; use crate::error::{CodeM8Error, Result}; +use crate::model::ProcessedFile; +use crate::paths::format_path; /// Runs the CLI workflow and writes the selected report to the provided writer. /// @@ -31,23 +34,31 @@ where .map_err(|error| CodeM8Error::new(format!("could not write help output: {error}")))?, cli::CliCommand::ReportDuplicate(config) => { let should_report_scanned_files = config.git_branch || config.files.is_some(); + let git_branch_files = if config.git_branch { + Some(discovery::changed_files_against_origin(current_dir)?) + } else { + None + }; let (source_files, discovery_duration) = time_result(config.verbose, || { - let git_branch_files = if config.git_branch { - Some(discovery::changed_files_against_origin(current_dir)?) - } else { - None - }; discovery::discover_source_files( current_dir, &config.file_extensions, - git_branch_files.as_deref().or(config.files.as_deref()), + if config.git_branch { + None + } else { + config.files.as_deref() + }, ) })?; let (processed_files, file_processing_duration) = time_result(config.verbose, || line::process_source_files(&source_files))?; + let duplicate_source_files = git_branch_files.as_deref().map_or_else( + || processed_files.clone(), + |git_branch_files| filtered_processed_files(&processed_files, git_branch_files), + ); let (duplicate_blocks, duplicate_detection_duration) = time_value(config.verbose, || { - report::detect_duplicate_blocks(&processed_files) + report::detect_duplicate_blocks(&duplicate_source_files) }); let report = report::DuplicateReport { analyzed_files: source_files.len(), @@ -99,10 +110,28 @@ fn time_value(enabled: bool, operation: impl FnOnce() -> T) -> (T, Option Vec { + let selected_files = selected_files + .iter() + .map(|path| format_path(path)) + .collect::>(); + processed_files + .iter() + .filter(|processed_file| { + selected_files.contains(&format_path(&processed_file.source.display_path)) + }) + .cloned() + .collect() +} + #[cfg(test)] mod tests { use std::fs; use std::path::{Path, PathBuf}; + use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use super::*; @@ -132,24 +161,96 @@ mod tests { } fs::write(path, contents).expect("write test file"); } + } - fn path(&self) -> &Path { + impl Drop for TempProject { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.path); + } + } + + impl AsRef for TempProject { + fn as_ref(&self) -> &Path { &self.path } } - impl Drop for TempProject { + struct TempGitRepo { + path: PathBuf, + } + + impl TempGitRepo { + fn new(name: &str) -> Self { + let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); + let path = + std::env::temp_dir().join(format!("codem8-git-{name}-{}-{id}", std::process::id())); + if path.exists() { + fs::remove_dir_all(&path).expect("remove stale test directory"); + } + fs::create_dir_all(&path).expect("create test directory"); + Self { path } + } + + fn write(&self, relative_path: &str, contents: &str) { + let path = self.path.join(relative_path); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).expect("create test parent directory"); + } + fs::write(path, contents).expect("write test file"); + } + + fn git(&self, args: &[&str]) { + let status = Command::new("git") + .arg("-C") + .arg(&self.path) + .args(args) + .status() + .expect("run git"); + assert!(status.success(), "git command failed: {args:?}"); + } + + fn commit(&self, message: &str) { + self.git(&["add", "."]); + self.git(&[ + "-c", + "user.name=CodeM8 Test", + "-c", + "user.email=codem8@example.invalid", + "commit", + "-m", + message, + ]); + } + } + + impl Drop for TempGitRepo { fn drop(&mut self) { let _ = fs::remove_dir_all(&self.path); } } - fn run_in(project: &TempProject, args: &[&str]) -> std::result::Result { + impl AsRef for TempGitRepo { + fn as_ref(&self) -> &Path { + &self.path + } + } + + fn run_in>( + project: P, + args: &[&str], + ) -> std::result::Result { let mut output = Vec::new(); - run(args.iter().copied(), project.path(), &mut output)?; + run(args.iter().copied(), project.as_ref(), &mut output)?; Ok(String::from_utf8(output).expect("report is UTF-8")) } + fn git_is_available() -> bool { + Command::new("git") + .arg("--version") + .status() + .is_ok_and(|status| status.success()) + } + #[test] fn duplicate_report_snapshot_is_stable() { let project = TempProject::new("snapshot"); @@ -258,6 +359,25 @@ mod tests { assert!(js_output.contains("Duplicate blocks found: 1")); } + #[test] + fn git_branch_mode_limits_duplicate_search_to_changed_files() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("git-branch-scope"); + project.git(&["init"]); + project.write("src/a.ts", "const original = 1;\n"); + project.write("src/b.ts", "const shared = 1;\n"); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write("src/a.ts", "const shared = 1;\n"); + let output = + run_in(&project, &["--report-duplicate", "-git-branch"]).expect("report succeeds"); + assert!(output.contains("Number of files scanned: 2")); + assert!(output.contains("Duplicate blocks found: 0")); + } + #[test] fn invalid_explicit_file_returns_a_clear_error() { let project = TempProject::new("invalid-file");