diff --git a/Cargo.lock b/Cargo.lock index b26e7af..8771335 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -61,6 +70,18 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "arrayvec" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f02882884d3e1bc524fb12c79f107f6ad0e1cfd498c536ffb494301740995dfe" + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + [[package]] name = "bstr" version = "1.12.3" @@ -71,6 +92,22 @@ dependencies = [ "serde_core", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.2.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e228eec9be7c17ccb640b59b36a5cd805ea2a564a4c5e162c2f659fea30d3b96" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "clap" version = "4.6.1" @@ -102,7 +139,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.118", ] [[package]] @@ -113,12 +150,13 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codem8" -version = "0.7.2" +version = "0.7.3" dependencies = [ "clap", "ignore", "rayon", "regex", + "rust-code-analysis", "xxhash-rust", ] @@ -128,6 +166,28 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -147,6 +207,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -159,19 +228,52 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "globset" version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "bstr", "log", "regex-automata", "regex-syntax", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -194,12 +296,34 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "log" version = "0.4.33" @@ -212,12 +336,122 @@ version = "2.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + [[package]] name = "once_cell_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset", + "indexmap", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -262,7 +496,7 @@ version = "1.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-automata", "regex-syntax", @@ -274,7 +508,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ - "aho-corasick", + "aho-corasick 1.1.4", "memchr", "regex-syntax", ] @@ -285,6 +519,39 @@ version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" +[[package]] +name = "rust-code-analysis" +version = "0.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8aef20e35eb94fc114e15eedf547c54f39c51b5a1078ee1ea41d64e660025136" +dependencies = [ + "aho-corasick 0.7.20", + "crossbeam", + "fxhash", + "globset", + "lazy_static", + "num", + "num-derive", + "num-format", + "num-traits", + "once_cell", + "petgraph", + "regex", + "serde", + "termcolor", + "tree-sitter", + "tree-sitter-ccomment", + "tree-sitter-java", + "tree-sitter-javascript", + "tree-sitter-mozcpp", + "tree-sitter-mozjs", + "tree-sitter-preproc", + "tree-sitter-python", + "tree-sitter-rust", + "tree-sitter-typescript", + "walkdir", +] + [[package]] name = "same-file" version = "1.0.6" @@ -294,6 +561,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -311,15 +588,32 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.118", ] +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.118" @@ -331,6 +625,127 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "tree-sitter" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4423c784fe11398ca91e505cdc71356b07b1a924fc8735cfab5333afe3e18bc" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-ccomment" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e346e85d350ae07c4a42ec9438f20100927215d7c97313f41ee6be6239c8bb9" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dbedbf4066bfab725b3f9e2a21530507419a7d2f98621d3c13213502b734ec0" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0bf5d3f508cbffcbfe1805834101c0d24297a8b6c2184ad9c595556c46d2420" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2490fab08630b2c8943c320f7b63473cbf65511c8d83aec551beb9b4375906ed" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-mozcpp" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9514ebbde0a575c43027fffa2702788ae7fb967be5e1a43daae92667400d13e" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-cpp", +] + +[[package]] +name = "tree-sitter-mozjs" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "836d32956e968db7fe66e15ad5cf6a46a3fc05c9710fffb9612487584da34b40" +dependencies = [ + "cc", + "tree-sitter", + "tree-sitter-javascript", +] + +[[package]] +name = "tree-sitter-preproc" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a31d01067bbe7a827115ce36366af0738780b62cd5343b3378a5cce20d1f85" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-python" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dda114f58048f5059dcf158aff691dffb8e113e6d2b50d94263fd68711975287" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "797842733e252dc11ae5d403a18060bf337b822fc2ae5ddfaa6ff4d9cc20bda6" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e8ed0ecb931cdff13c6a13f45ccd615156e2779d9ffb0395864e05505e6e86d" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/Cargo.toml b/Cargo.toml index 93b14b5..26f2b81 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codem8" -version = "0.7.2" +version = "0.7.3" edition = "2021" rust-version = "1.85" license = "MIT" @@ -14,4 +14,5 @@ clap = { version = "4.6.1", features = ["derive"] } ignore = "0.4" rayon = "1" regex = "1" +rust-code-analysis = "0.0.25" xxhash-rust = { version = "0.8", features = ["xxh3"] } diff --git a/README.md b/README.md index 31e3ddd..0b1e388 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # CodeM8 CodeM8 is a Rust command-line application for deterministic source code reports. -The initial report detects duplicated line-based code blocks in a repository: +It can detect duplicated line-based code blocks in a repository: ```bash codem8 --report-duplicate @@ -12,6 +12,13 @@ trims source lines, ignores empty lines, hashes normalized lines with XXH3 128-bit, classifies syntax-only lines as block-only, groups repeated blocks, and prints a stable plain-text report sorted by duplicate weight. +CodeM8 can also report functions whose cognitive or cyclomatic complexity +exceeds configurable limits: + +```bash +codem8 --report-complexity +``` + ## Installation Install `codem8` from the GitHub source with Cargo: @@ -29,7 +36,7 @@ cargo build --release Install from a local checkout: ```bash -cargo install --path . +cargo install --locked --path . ``` Run from the local checkout without installing: @@ -46,6 +53,12 @@ Analyze supported source files from the current directory: codem8 --report-duplicate ``` +Analyze function complexity for languages supported by `rust-code-analysis`: + +```bash +codem8 --report-complexity +``` + Restrict analysis to specific extensions: ```bash @@ -55,9 +68,12 @@ codem8 --report-duplicate -file-extension=ts,tsx,js,jsx Analyze an explicit list of files instead of recursively discovering files: ```bash -codem8 --report-duplicate -file-extension=ts,js -files=src/a.ts,src/b.js +codem8 --report-duplicate -file-extension=ts,js -files="src/a.ts,src/b.js" ``` +Quoting `-files` values is recommended in PowerShell when paths contain file +extensions. + Analyze files changed on the current local Git branch compared to the origin base branch: @@ -65,12 +81,25 @@ base branch: codem8 --report-duplicate -git-branch ``` -Include duplicate block metrics and timing information: +The duplicate and complexity reports are mutually exclusive; run one report per +command. + +Reports exit with a non-zero status when they detect issues: duplicate blocks +for `--report-duplicate`, or functions above the configured limits for +`--report-complexity`. + +Include analyzed files, report metrics, and timing information: ```bash codem8 --report-duplicate -verbose ``` +Set complexity thresholds: + +```bash +codem8 --report-complexity -max-cognitive-complexity=15 -max-cyclomatic-complexity=10 +``` + ## Duplicate Report By default, CodeM8 analyzes all registered source file extensions. Recursive @@ -99,10 +128,27 @@ Reports are sorted deterministically by descending weight, then by line count, character count, first location, and normalized block text. By default, each duplicate block prints only the duplicate locations. Use -`-verbose` to also show the duplicated code, weight, line count, occurrence -count, and timings for discovery, file processing, and duplicate detection. -Character counts are used internally for scoring and sorting, but are not -printed. +`-verbose` to also show analyzed files, the duplicated code, weight, line count, +occurrence count, and timings for discovery, file processing, and duplicate +detection. Character counts are used internally for scoring and sorting, but are +not printed. + +## Complexity Report + +The complexity report uses `rust-code-analysis` and only applies to file +extensions supported by that crate. It reports `SpaceKind::Function` entries +whose cognitive complexity exceeds the configured cognitive limit or whose +cyclomatic complexity exceeds the configured cyclomatic limit. + +The default maximum cognitive complexity is 15, and the default maximum +cyclomatic complexity is 10. Use `-max-cognitive-complexity=` and +`-max-cyclomatic-complexity=` to adjust them. + +Use `-git-branch` to analyze complexity only in supported files changed on the +current local branch. The same origin branch resolution and `-files` exclusion +rules used by the duplicate report apply. + +Use `-verbose` to list analyzed files and timing information. ## Development diff --git a/src/cli/args.rs b/src/cli/args.rs index b90e285..c8ee3f4 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -2,15 +2,20 @@ use std::path::PathBuf; use clap::{ArgAction, Parser}; -use super::CliConfig; +use super::{CliConfig, ReportKind}; use crate::error::{CodeM8Error, Result}; use crate::language::supported_file_extensions; +pub const DEFAULT_MAX_COGNITIVE_COMPLEXITY: u32 = 15; +pub const DEFAULT_MAX_CYCLOMATIC_COMPLEXITY: u32 = 10; + #[derive(Debug, Parser)] #[command(name = "codem8", disable_help_flag = true, disable_version_flag = true)] struct ClapCli { #[arg(long = "report-duplicate", action = ArgAction::Count)] report_duplicate: u8, + #[arg(long = "report-complexity", action = ArgAction::Count)] + report_complexity: u8, #[arg(long = "codem8-verbose", action = ArgAction::Count)] verbose: u8, #[arg(long = "codem8-git-branch", action = ArgAction::Count)] @@ -29,6 +34,16 @@ struct ClapCli { action = ArgAction::Append )] files: Vec>, + #[arg( + long = "codem8-max-cognitive-complexity", + value_parser = parse_complexity_limit + )] + max_cognitive_complexity: Option, + #[arg( + long = "codem8-max-cyclomatic-complexity", + value_parser = parse_complexity_limit + )] + max_cyclomatic_complexity: Option, } /// Parses command-line arguments into a validated CLI configuration. @@ -44,16 +59,51 @@ where { let parsed = ClapCli::try_parse_from(normalized_clap_args(args)?) .map_err(|error| CodeM8Error::new(error.to_string().trim().to_owned()))?; - if parsed.report_duplicate == 0 { + let report = selected_report(&parsed)?; + validate_repeated_options(&parsed)?; + let git_branch = parsed.git_branch != 0; + let files = selected_files(&parsed, git_branch)?; + validate_complexity_limits(report, &parsed)?; + Ok(CliConfig { + report, + verbose: parsed.verbose != 0, + file_extensions: selected_file_extensions(&parsed), + files, + git_branch, + max_cognitive_complexity: parsed + .max_cognitive_complexity + .unwrap_or(DEFAULT_MAX_COGNITIVE_COMPLEXITY), + max_cyclomatic_complexity: parsed + .max_cyclomatic_complexity + .unwrap_or(DEFAULT_MAX_CYCLOMATIC_COMPLEXITY), + }) +} + +fn selected_report(parsed: &ClapCli) -> Result { + let report_count = parsed.report_duplicate + parsed.report_complexity; + if report_count == 0 { return Err(CodeM8Error::with_help( - "no report switch provided; pass --report-duplicate", + "no report switch provided; pass --report-duplicate or --report-complexity", )); } - if parsed.report_duplicate > 1 { + if parsed.report_duplicate > 1 || parsed.report_complexity > 1 { return Err(CodeM8Error::new( "report switch was provided more than once", )); } + if report_count > 1 { + return Err(CodeM8Error::new( + "--report-duplicate and --report-complexity are mutually exclusive", + )); + } + Ok(if parsed.report_duplicate != 0 { + ReportKind::Duplicate + } else { + ReportKind::Complexity + }) +} + +fn validate_repeated_options(parsed: &ClapCli) -> Result<()> { if parsed.git_branch > 1 { return Err(CodeM8Error::new( "git branch mode was provided more than once", @@ -69,24 +119,36 @@ where "explicit files were provided more than once", )); } - let git_branch = parsed.git_branch != 0; - let files = parsed.files.into_iter().next(); + Ok(()) +} + +fn selected_files(parsed: &ClapCli, git_branch: bool) -> Result>> { + let files = parsed.files.first().cloned(); if git_branch && files.is_some() { return Err(CodeM8Error::new( "git branch mode cannot be combined with explicit files", )); } - Ok(CliConfig { - report_duplicate: parsed.report_duplicate != 0, - verbose: parsed.verbose != 0, - file_extensions: parsed - .file_extensions - .into_iter() - .next() - .unwrap_or_else(supported_file_extensions), - files, - git_branch, - }) + Ok(files) +} + +fn validate_complexity_limits(report: ReportKind, parsed: &ClapCli) -> Result<()> { + if report == ReportKind::Duplicate + && (parsed.max_cognitive_complexity.is_some() || parsed.max_cyclomatic_complexity.is_some()) + { + return Err(CodeM8Error::new( + "complexity limits can only be used with --report-complexity", + )); + } + Ok(()) +} + +fn selected_file_extensions(parsed: &ClapCli) -> Vec { + parsed + .file_extensions + .first() + .cloned() + .unwrap_or_else(supported_file_extensions) } /// Parses a comma-separated list of file extensions. @@ -143,18 +205,56 @@ pub fn parse_file_list(value: &str) -> Result> { Ok(files) } +/// Parses a positive complexity limit. +/// +/// # Errors +/// +/// Returns an error when the value is not a positive integer. +pub fn parse_complexity_limit(value: &str) -> Result { + let limit = value.parse::().map_err(|_| { + CodeM8Error::new(format!( + "complexity limits must be positive integers: {value}" + )) + })?; + if limit == 0 { + return Err(CodeM8Error::new( + "complexity limits must be greater than zero", + )); + } + Ok(limit) +} + fn normalized_clap_args(args: I) -> Result> where I: IntoIterator, S: Into, { let mut normalized = vec!["codem8".to_owned()]; - for arg in args { - normalized.push(normalized_clap_arg(arg.into())?); + for arg in join_split_file_extensions(args.into_iter().map(Into::into)) { + normalized.push(normalized_clap_arg(arg)?); } Ok(normalized) } +fn join_split_file_extensions(args: impl IntoIterator) -> Vec { + let mut joined = Vec::new(); + for arg in args { + if should_join_split_extension(joined.last(), &arg) { + let previous = joined + .last_mut() + .expect("previous file argument exists when extension joins"); + previous.push_str(&arg); + } else { + joined.push(arg); + } + } + joined +} + +fn should_join_split_extension(previous: Option<&String>, arg: &str) -> bool { + previous.is_some_and(|previous| previous.starts_with("-files=") && arg.starts_with('.')) +} + fn normalized_clap_arg(arg: String) -> Result { if arg == "-verbose" { Ok("--codem8-verbose".to_owned()) @@ -164,7 +264,13 @@ fn normalized_clap_arg(arg: String) -> Result { Ok(format!("--codem8-file-extension={value}")) } else if let Some(value) = arg.strip_prefix("-files=") { Ok(format!("--codem8-files={value}")) - } else if arg.starts_with("--") && arg != "--report-duplicate" { + } else if let Some(value) = arg.strip_prefix("-max-cognitive-complexity=") { + Ok(format!("--codem8-max-cognitive-complexity={value}")) + } else if let Some(value) = arg.strip_prefix("-max-cyclomatic-complexity=") { + Ok(format!("--codem8-max-cyclomatic-complexity={value}")) + } else if arg.starts_with("--") + && !matches!(arg.as_str(), "--report-duplicate" | "--report-complexity") + { Err(CodeM8Error::new(format!("unknown argument: {arg}"))) } else { Ok(arg) @@ -178,17 +284,51 @@ mod tests { #[test] fn parses_default_duplicate_report_config() { let config = parse_args(["--report-duplicate"]).expect("config parses"); - assert!(config.report_duplicate); + assert_eq!(config.report, ReportKind::Duplicate); assert!(!config.verbose); assert_eq!(config.file_extensions, supported_file_extensions()); assert_eq!(config.files, None); assert!(!config.git_branch); + assert_eq!( + config.max_cognitive_complexity, + DEFAULT_MAX_COGNITIVE_COMPLEXITY + ); + assert_eq!( + config.max_cyclomatic_complexity, + DEFAULT_MAX_CYCLOMATIC_COMPLEXITY + ); + } + + #[test] + fn parses_default_complexity_report_config() { + let config = parse_args(["--report-complexity"]).expect("config parses"); + assert_eq!(config.report, ReportKind::Complexity); + assert_eq!( + config.max_cognitive_complexity, + DEFAULT_MAX_COGNITIVE_COMPLEXITY + ); + assert_eq!( + config.max_cyclomatic_complexity, + DEFAULT_MAX_CYCLOMATIC_COMPLEXITY + ); + } + + #[test] + fn parses_custom_complexity_limits() { + let config = parse_args([ + "--report-complexity", + "-max-cognitive-complexity=20", + "-max-cyclomatic-complexity=12", + ]) + .expect("config parses"); + assert_eq!(config.max_cognitive_complexity, 20); + assert_eq!(config.max_cyclomatic_complexity, 12); } #[test] fn parses_verbose_duplicate_report_config() { let config = parse_args(["--report-duplicate", "-verbose"]).expect("config parses"); - assert!(config.report_duplicate); + assert_eq!(config.report, ReportKind::Duplicate); assert!(config.verbose); } @@ -247,6 +387,8 @@ mod tests { "--file-extension=js", "--files=src/a.ts", "--git-branch", + "--max-cognitive-complexity=20", + "--max-cyclomatic-complexity=12", ] { let error = parse_args(["--report-duplicate", option]).expect_err("double-dash option fails"); @@ -278,6 +420,29 @@ mod tests { .contains("report switch was provided more than once")); } + #[test] + fn rejects_multiple_report_kinds() { + let error = parse_args(["--report-duplicate", "--report-complexity"]) + .expect_err("exclusive reports fail"); + assert!(error.to_string().contains("mutually exclusive")); + } + + #[test] + fn rejects_zero_complexity_limits() { + let error = parse_args(["--report-complexity", "-max-cognitive-complexity=0"]) + .expect_err("zero limit fails"); + assert!(error.to_string().contains("greater than zero")); + } + + #[test] + fn rejects_complexity_limits_with_duplicate_report() { + let error = parse_args(["--report-duplicate", "-max-cognitive-complexity=15"]) + .expect_err("duplicate report complexity limit fails"); + assert!(error + .to_string() + .contains("can only be used with --report-complexity")); + } + #[test] fn rejects_repeated_explicit_file_arguments() { let error = parse_args(["--report-duplicate", "-files=a.ts", "-files=b.ts"]) @@ -314,6 +479,31 @@ mod tests { ); } + #[test] + fn rejoins_powershell_split_file_extensions() { + let config = + parse_args(["--report-complexity", "-files=src/main", ".rs"]).expect("config parses"); + assert_eq!(config.files, Some(vec![PathBuf::from("src/main.rs")])); + } + + #[test] + fn rejoins_multiple_powershell_split_file_extensions() { + let config = parse_args([ + "--report-complexity", + "-files=src/main", + ".rs,src/lib", + ".rs", + ]) + .expect("config parses"); + assert_eq!( + config.files, + Some(vec![ + PathBuf::from("src/main.rs"), + PathBuf::from("src/lib.rs") + ]) + ); + } + #[test] fn rejects_empty_explicit_file_paths() { let error = parse_file_list("src/a.ts, ").expect_err("empty explicit file fails"); diff --git a/src/cli/help.rs b/src/cli/help.rs index 5f9f495..f5fd547 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -7,6 +7,7 @@ USAGE: codem8 help codem8 -h codem8 --report-duplicate [OPTIONS] + codem8 --report-complexity [OPTIONS] COMMANDS: help @@ -17,6 +18,10 @@ REQUIRED REPORT SWITCHES: --report-duplicate Analyze source files and print a duplicate code report. + --report-complexity + Analyze supported source files and print a function complexity report. + Cannot be combined with --report-duplicate. + OPTIONS: -file-extension= Comma-separated source file extensions to analyze. @@ -26,14 +31,22 @@ OPTIONS: -files= Comma-separated explicit files to analyze instead of recursively discovering files from the current directory. - Example: -files=src/a.ts,src/b.js + Example: -files=\"src/a.ts,src/b.js\" -git-branch - Search duplicate code only in files changed on the current local Git + Search only in files changed on the current local Git branch. Cannot be combined with -files. + -max-cognitive-complexity= + Maximum allowed cognitive complexity for --report-complexity. + Defaults to 15. + + -max-cyclomatic-complexity= + Maximum allowed cyclomatic complexity for --report-complexity. + Defaults to 10. + -verbose - Include duplicate block metrics in report output. + Include analyzed files and timings in report output, plus duplicate block details. DUPLICATE REPORT PURPOSE: The duplicate report helps you find repeated code that may be worth @@ -41,11 +54,17 @@ DUPLICATE REPORT PURPOSE: the files and line ranges where it appears, making it easier to compare the repeated code and decide whether it should stay duplicated. +COMPLEXITY REPORT PURPOSE: + The complexity report helps you find functions whose cognitive or cyclomatic + complexity exceeds the configured limits. It lists each function with its + location and both computed complexity values. + EXAMPLES: codem8 --report-duplicate codem8 --report-duplicate -file-extension=ts,tsx,js,jsx - codem8 --report-duplicate -file-extension=ts,js -files=src/a.ts,src/b.js + codem8 --report-duplicate -file-extension=ts,js -files=\"src/a.ts,src/b.js\" codem8 --report-duplicate -git-branch + codem8 --report-complexity -file-extension=rs -max-cognitive-complexity=12 "; #[must_use] @@ -69,20 +88,38 @@ mod tests { #[test] fn exposes_detailed_help_text() { let help = help_text(); + assert_help_includes_expected_sections(&help); + assert_help_includes_single_dash_options(&help); + assert_help_excludes_double_dash_options(&help); + } + + fn assert_help_includes_expected_sections(help: &str) { assert!(help.contains("USAGE:")); assert!(help.contains("codem8 -h")); assert!(help.contains(" -h")); assert!(help.contains("--report-duplicate")); + assert!(help.contains("--report-complexity")); + assert!(help.contains("helps you find repeated code")); + assert!(help.contains("helps you find functions")); + assert!(!help.contains("Duplicate weight")); + } + + fn assert_help_includes_single_dash_options(help: &str) { assert!(help.contains("-verbose")); assert!(help.contains("-file-extension=")); assert!(help.contains("-files=")); assert!(help.contains("-git-branch")); + assert!(help.contains("-max-cognitive-complexity=")); + assert!(help.contains("-max-cyclomatic-complexity=")); + } + + fn assert_help_excludes_double_dash_options(help: &str) { assert!(!help.contains("--verbose")); assert!(!help.contains("--file-extension=")); assert!(!help.contains("--files=")); assert!(!help.contains("--git-branch")); - assert!(help.contains("helps you find repeated code")); - assert!(!help.contains("Duplicate weight")); + assert!(!help.contains("--max-cognitive-complexity=")); + assert!(!help.contains("--max-cyclomatic-complexity=")); } #[test] diff --git a/src/cli/mod.rs b/src/cli/mod.rs index a16bf25..31455ad 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -12,16 +12,24 @@ use crate::error::Result; #[derive(Debug, Clone, PartialEq, Eq)] pub enum CliCommand { Help, - ReportDuplicate(CliConfig), + Report(CliConfig), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ReportKind { + Duplicate, + Complexity, } #[derive(Debug, Clone, PartialEq, Eq)] pub struct CliConfig { - pub report_duplicate: bool, + pub report: ReportKind, pub verbose: bool, pub file_extensions: Vec, pub files: Option>, pub git_branch: bool, + pub max_cognitive_complexity: u32, + pub max_cyclomatic_complexity: u32, } /// Parses command-line arguments into a CLI command. @@ -39,7 +47,7 @@ where if args.len() == 1 && is_help_argument(&args[0]) { return Ok(CliCommand::Help); } - parse_args(args).map(CliCommand::ReportDuplicate) + parse_args(args).map(CliCommand::Report) } fn is_help_argument(arg: &str) -> bool { diff --git a/src/discovery/explicit.rs b/src/discovery/explicit.rs index 4805f0b..00ac384 100644 --- a/src/discovery/explicit.rs +++ b/src/discovery/explicit.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; use std::fs; +use std::io; use std::path::{Path, PathBuf}; use super::selected_extension; @@ -17,36 +18,8 @@ pub(super) fn discover_explicit_files( let mut source_files = Vec::new(); let mut seen_paths = HashSet::new(); for file in files { - let absolute_input = file.is_absolute(); - let path = if absolute_input { - file.clone() - } else { - current_dir.join(file) - }; - let metadata = fs::symlink_metadata(&path).map_err(|_| { - CodeM8Error::new(format!( - "explicit file does not exist: {}", - format_path(file) - )) - })?; - if metadata.file_type().is_symlink() { - return Err(CodeM8Error::new(format!( - "explicit file is a symbolic link and will not be followed: {}", - format_path(file) - ))); - } - if metadata.is_dir() { - return Err(CodeM8Error::new(format!( - "explicit file is a directory: {}", - format_path(file) - ))); - } - if !metadata.is_file() { - return Err(CodeM8Error::new(format!( - "explicit path is not a file: {}", - format_path(file) - ))); - } + let path = explicit_input_path(current_dir, file); + explicit_file_metadata(file, &path)?; let Some(extension) = selected_extension(&path, extensions) else { continue; }; @@ -55,13 +28,7 @@ pub(super) fn discover_explicit_files( if !seen_paths.insert(canonical_path.clone()) { continue; } - let display_path = if absolute_input { - canonical_path - .strip_prefix(&canonical_current_dir) - .map_or_else(|_| normalize_display_path(file), normalize_display_path) - } else { - normalize_display_path(file) - }; + let display_path = explicit_display_path(file, &canonical_path, &canonical_current_dir); source_files.push(SourceFile { path: canonical_path, display_path, @@ -71,6 +38,62 @@ pub(super) fn discover_explicit_files( Ok(source_files) } +fn explicit_input_path(current_dir: &Path, file: &Path) -> PathBuf { + if file.is_absolute() { + file.to_path_buf() + } else { + current_dir.join(file) + } +} + +fn explicit_file_metadata(file: &Path, path: &Path) -> Result { + let metadata = fs::symlink_metadata(path).map_err(|error| match error.kind() { + io::ErrorKind::NotFound => CodeM8Error::new(format!( + "explicit file does not exist: {}", + format_path(file) + )), + _ => CodeM8Error::io(path, "read explicit file metadata", &error), + })?; + validate_explicit_file_metadata(file, &metadata)?; + Ok(metadata) +} + +fn validate_explicit_file_metadata(file: &Path, metadata: &fs::Metadata) -> Result<()> { + if metadata.file_type().is_symlink() { + return Err(CodeM8Error::new(format!( + "explicit file is a symbolic link and will not be followed: {}", + format_path(file) + ))); + } + if metadata.is_dir() { + return Err(CodeM8Error::new(format!( + "explicit file is a directory: {}", + format_path(file) + ))); + } + if !metadata.is_file() { + return Err(CodeM8Error::new(format!( + "explicit path is not a file: {}", + format_path(file) + ))); + } + Ok(()) +} + +fn explicit_display_path( + file: &Path, + canonical_path: &Path, + canonical_current_dir: &Path, +) -> PathBuf { + if file.is_absolute() { + canonical_path + .strip_prefix(canonical_current_dir) + .map_or_else(|_| normalize_display_path(file), normalize_display_path) + } else { + normalize_display_path(file) + } +} + #[cfg(test)] mod tests { use std::fs; @@ -153,4 +176,16 @@ mod tests { .contains("explicit file is a directory: src")); fs::remove_dir_all(root).expect("cleanup"); } + + #[test] + fn explicit_files_report_missing_paths_as_not_found() { + let root = temp_dir("missing"); + let error = + discover_explicit_files(&root, &["ts".to_string()], &[PathBuf::from("missing.ts")]) + .expect_err("missing explicit file fails"); + assert!(error + .to_string() + .contains("explicit file does not exist: missing.ts")); + fs::remove_dir_all(root).expect("cleanup"); + } } diff --git a/src/lib.rs b/src/lib.rs index f06425c..4803d08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(clippy::multiple_crate_versions)] + pub mod cli; pub mod discovery; pub mod error; @@ -13,86 +15,209 @@ use std::path::Path; use std::time::{Duration, Instant}; use crate::error::{CodeM8Error, Result}; -use crate::model::ProcessedFile; +use crate::model::SourceFile; +use crate::model::{DuplicateBlock, ProcessedFile}; use crate::paths::format_path; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RunStatus { + Success, + IssuesFound, +} + +impl RunStatus { + const fn from_issue_count(issue_count: usize) -> Self { + if issue_count == 0 { + Self::Success + } else { + Self::IssuesFound + } + } + + #[must_use] + pub const fn is_success(self) -> bool { + matches!(self, Self::Success) + } +} + /// Runs the CLI workflow and writes the selected report to the provided writer. /// /// # Errors /// /// Returns an error when argument parsing, file discovery, file processing, or /// report writing fails. -pub fn run(args: I, current_dir: &Path, writer: &mut W) -> Result<()> +pub fn run(args: I, current_dir: &Path, writer: &mut W) -> Result where I: IntoIterator, S: Into, W: Write, { - match cli::parse_command(args)? { - cli::CliCommand::Help => writer - .write_all(cli::help_text().as_bytes()) - .map_err(|error| CodeM8Error::new(format!("could not write help output: {error}")))?, - cli::CliCommand::ReportDuplicate(config) => { - let should_report_analyzed_files = config.git_branch || config.files.is_some(); - let git_branch_files = if config.git_branch { - Some(discovery::changed_files_against_origin(current_dir)?) - } else { - None - }; - let (source_files, discovery_duration) = time_result(config.verbose, || { - discovery::discover_source_files( - current_dir, - &config.file_extensions, - if config.git_branch { - None - } else { - config.files.as_deref() - }, - ) - })?; - let (processed_files, file_processing_duration) = - time_result(config.verbose, || line::process_source_files(&source_files))?; - let duplicate_source_files = git_branch_files.as_deref().map_or_else( - || processed_files.clone(), - |git_branch_files| filtered_processed_files(&processed_files, git_branch_files), - ); - let (duplicate_blocks, duplicate_detection_duration) = - time_value(config.verbose, || { - report::detect_duplicate_blocks(&duplicate_source_files) - }); - let report = report::DuplicateReport { - analyzed_files: duplicate_source_files.len(), - analyzed_extensions: config.file_extensions, - analyzed_file_paths: should_report_analyzed_files.then(|| { - duplicate_source_files - .iter() - .map(|processed_file| processed_file.source.display_path.clone()) - .collect() - }), - timings: match ( - discovery_duration, - file_processing_duration, - duplicate_detection_duration, - ) { - (Some(discovery), Some(file_processing), Some(duplicate_detection)) => { - Some(report::DuplicateReportTimings { - discovery, - file_processing, - duplicate_detection, - }) - } - _ => None, - }, - duplicate_blocks, - }; - writer - .write_all(report::render_duplicate_report(&report, config.verbose).as_bytes()) - .map_err(|error| { - CodeM8Error::new(format!("could not write report output: {error}")) - })?; + let status = match cli::parse_command(args)? { + cli::CliCommand::Help => { + write_help(writer)?; + RunStatus::Success } + cli::CliCommand::Report(config) => match config.report { + cli::ReportKind::Duplicate => run_duplicate_report(&config, current_dir, writer)?, + cli::ReportKind::Complexity => run_complexity_report(&config, current_dir, writer)?, + }, + }; + Ok(status) +} + +fn write_help(writer: &mut W) -> Result<()> { + writer + .write_all(cli::help_text().as_bytes()) + .map_err(|error| CodeM8Error::new(format!("could not write help output: {error}"))) +} + +fn run_duplicate_report( + config: &cli::CliConfig, + current_dir: &Path, + writer: &mut W, +) -> Result { + let git_branch_files = changed_git_branch_files(config, current_dir)?; + let (source_files, discovery_duration) = discover_report_files( + config.verbose, + current_dir, + &config.file_extensions, + if config.git_branch { + None + } else { + config.files.as_deref() + }, + )?; + let (processed_files, file_processing_duration) = + time_result(config.verbose, || line::process_source_files(&source_files))?; + let analyzed_source_files = git_branch_files.as_deref().map_or_else( + || processed_files.clone(), + |git_branch_files| filtered_processed_files(&processed_files, git_branch_files), + ); + let (duplicate_blocks, duplicate_detection_duration) = time_value(config.verbose, || { + report::detect_duplicate_blocks(&processed_files) + }); + let duplicate_blocks = match git_branch_files.as_deref() { + Some(git_branch_files) => filtered_duplicate_blocks(duplicate_blocks, git_branch_files), + None => duplicate_blocks, + }; + let report = report::DuplicateReport { + analyzed_files: analyzed_source_files.len(), + analyzed_extensions: config.file_extensions.clone(), + analyzed_file_paths: config.verbose.then(|| { + analyzed_source_files + .iter() + .map(|processed_file| processed_file.source.display_path.clone()) + .collect() + }), + timings: duplicate_timings( + discovery_duration, + file_processing_duration, + duplicate_detection_duration, + ), + duplicate_blocks, + }; + let output = report::render_duplicate_report(&report, config.verbose); + let status = RunStatus::from_issue_count(report.duplicate_blocks.len()); + write_report_output(writer, &output)?; + Ok(status) +} + +fn run_complexity_report( + config: &cli::CliConfig, + current_dir: &Path, + writer: &mut W, +) -> Result { + let git_branch_files = changed_git_branch_files(config, current_dir)?; + let analyzed_extensions = report::complexity_supported_file_extensions(&config.file_extensions); + let (complexity_source_files, discovery_duration) = discover_report_files( + config.verbose, + current_dir, + &analyzed_extensions, + git_branch_files.as_deref().or(config.files.as_deref()), + )?; + let (functions, complexity_analysis_duration) = time_result(config.verbose, || { + report::detect_complex_functions( + &complexity_source_files, + config.max_cognitive_complexity, + config.max_cyclomatic_complexity, + ) + })?; + let report = report::ComplexityReport { + analyzed_files: complexity_source_files.len(), + analyzed_extensions, + analyzed_file_paths: config.verbose.then(|| { + complexity_source_files + .iter() + .map(|source_file| source_file.display_path.clone()) + .collect() + }), + max_cognitive_complexity: config.max_cognitive_complexity, + max_cyclomatic_complexity: config.max_cyclomatic_complexity, + timings: complexity_timings(discovery_duration, complexity_analysis_duration), + functions, + }; + let output = report::render_complexity_report(&report, config.verbose); + let status = RunStatus::from_issue_count(report.functions.len()); + write_report_output(writer, &output)?; + Ok(status) +} + +fn changed_git_branch_files( + config: &cli::CliConfig, + current_dir: &Path, +) -> Result>> { + if config.git_branch { + discovery::changed_files_against_origin(current_dir).map(Some) + } else { + Ok(None) + } +} + +fn discover_report_files( + verbose: bool, + current_dir: &Path, + file_extensions: &[String], + files: Option<&[std::path::PathBuf]>, +) -> Result<(Vec, Option)> { + time_result(verbose, || { + discovery::discover_source_files(current_dir, file_extensions, files) + }) +} + +const fn duplicate_timings( + discovery: Option, + file_processing: Option, + duplicate_detection: Option, +) -> Option { + match (discovery, file_processing, duplicate_detection) { + (Some(discovery), Some(file_processing), Some(duplicate_detection)) => { + Some(report::DuplicateReportTimings { + discovery, + file_processing, + duplicate_detection, + }) + } + _ => None, } - Ok(()) +} + +const fn complexity_timings( + discovery: Option, + complexity_analysis: Option, +) -> Option { + match (discovery, complexity_analysis) { + (Some(discovery), Some(complexity_analysis)) => Some(report::ComplexityReportTimings { + discovery, + complexity_analysis, + }), + _ => None, + } +} + +fn write_report_output(writer: &mut W, output: &str) -> Result<()> { + writer + .write_all(output.as_bytes()) + .map_err(|error| CodeM8Error::new(format!("could not write report output: {error}"))) } fn time_result( @@ -127,6 +252,25 @@ fn filtered_processed_files( .collect() } +fn filtered_duplicate_blocks( + duplicate_blocks: Vec, + selected_files: &[std::path::PathBuf], +) -> Vec { + let selected_files = selected_files + .iter() + .map(|path| format_path(path)) + .collect::>(); + duplicate_blocks + .into_iter() + .filter(|duplicate_block| { + duplicate_block + .occurrences + .iter() + .any(|occurrence| selected_files.contains(&format_path(&occurrence.file_path))) + }) + .collect() +} + #[cfg(test)] mod tests { use std::fs; @@ -239,9 +383,16 @@ mod tests { project: P, args: &[&str], ) -> std::result::Result { + run_with_status(project, args).map(|(output, _status)| output) + } + + fn run_with_status>( + project: P, + args: &[&str], + ) -> std::result::Result<(String, RunStatus), CodeM8Error> { let mut output = Vec::new(); - run(args.iter().copied(), project.as_ref(), &mut output)?; - Ok(String::from_utf8(output).expect("report is UTF-8")) + let status = run(args.iter().copied(), project.as_ref(), &mut output)?; + Ok((String::from_utf8(output).expect("report is UTF-8"), status)) } fn git_is_available() -> bool { @@ -286,6 +437,16 @@ mod tests { ); } + #[test] + fn duplicate_report_status_fails_when_duplicates_are_found() { + let project = TempProject::new("duplicate-status"); + project.write("src/a.ts", "const value = one;\n"); + project.write("src/b.ts", "const value = one;\n"); + let (_output, status) = + run_with_status(&project, &["--report-duplicate"]).expect("report succeeds"); + assert_eq!(status, RunStatus::IssuesFound); + } + #[test] fn verbose_duplicate_report_includes_metrics_without_characters() { let project = TempProject::new("verbose"); @@ -324,6 +485,16 @@ mod tests { assert!(output.contains("Duplicate blocks found: 0")); } + #[test] + fn duplicate_report_status_succeeds_when_no_duplicates_are_found() { + let project = TempProject::new("duplicate-clean-status"); + project.write("src/a.ts", "const first = one;\n"); + project.write("src/b.ts", "const second = two;\n"); + let (_output, status) = + run_with_status(&project, &["--report-duplicate"]).expect("report succeeds"); + assert_eq!(status, RunStatus::Success); + } + #[test] fn verbose_explicit_files_report_lists_analyzed_files() { let project = TempProject::new("verbose-explicit-files"); @@ -345,6 +516,22 @@ mod tests { )); } + #[test] + fn verbose_recursive_duplicate_report_lists_analyzed_files() { + let project = TempProject::new("verbose-recursive-duplicate"); + project.write("src/a.ts", "const first = one;\n"); + project.write("src/b.ts", "const second = two;\n"); + let output = + run_in(&project, &["--report-duplicate", "-verbose"]).expect("report succeeds"); + assert!(output.contains( + "Number of files analyzed: 2\n\ + Files analyzed:\n\ + - src/a.ts\n\ + - src/b.ts\n\ + Analyzed extensions:" + )); + } + #[test] fn custom_extensions_change_analyzed_files() { let project = TempProject::new("custom-extensions"); @@ -360,7 +547,7 @@ mod tests { } #[test] - fn git_branch_mode_limits_duplicate_search_to_changed_files() { + fn git_branch_mode_reports_duplicates_for_changed_files_against_repo() { if !git_is_available() { return; } @@ -375,9 +562,166 @@ mod tests { let output = run_in(&project, &["--report-duplicate", "-git-branch"]).expect("report succeeds"); assert!(output.contains("Number of files analyzed: 1")); + assert!(output.contains("Duplicate blocks found: 1")); + assert!(output.contains("- src/a.ts:1-1")); + assert!(output.contains("- src/b.ts:1-1")); + } + + #[test] + fn git_branch_mode_excludes_duplicates_without_changed_files() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("git-branch-duplicate-filter"); + project.git(&["init"]); + project.write("src/a.ts", "const branch = 1;\n"); + project.write("src/b.ts", "const shared = 1;\n"); + project.write("src/c.ts", "const shared = 1;\n"); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write("src/a.ts", "const branch = 2;\n"); + let output = + run_in(&project, &["--report-duplicate", "-git-branch"]).expect("report succeeds"); + assert!(output.contains("Number of files analyzed: 1")); assert!(output.contains("Duplicate blocks found: 0")); } + #[test] + fn complexity_report_lists_functions_over_limits() { + let project = TempProject::new("complexity"); + project.write( + "src/lib.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n", + ); + let output = run_in( + &project, + [ + "--report-complexity", + "-file-extension=rs", + "-max-cognitive-complexity=1", + "-max-cyclomatic-complexity=1", + ] + .as_slice(), + ) + .expect("report succeeds"); + assert!(output.contains("Complexity Report")); + assert!(output.contains("Number of files analyzed: 1")); + assert!(output.contains("Functions exceeding limits: 1")); + assert!(output.contains("Function: risky")); + assert!(output.contains("Location: src/lib.rs:1-9")); + assert!(output.contains("Cognitive complexity:")); + assert!(output.contains("Cyclomatic complexity:")); + } + + #[test] + fn complexity_report_status_fails_when_complex_functions_are_found() { + let project = TempProject::new("complexity-status"); + project.write( + "src/lib.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n", + ); + let (_output, status) = run_with_status( + &project, + &[ + "--report-complexity", + "-file-extension=rs", + "-max-cognitive-complexity=1", + "-max-cyclomatic-complexity=1", + ], + ) + .expect("report succeeds"); + assert_eq!(status, RunStatus::IssuesFound); + } + + #[test] + fn complexity_report_skips_unsupported_extensions() { + let project = TempProject::new("complexity-unsupported"); + project.write("src/lib.rb", "def risky\nend\n"); + let output = run_in(&project, &["--report-complexity"]).expect("report succeeds"); + assert!(output.contains("Number of files analyzed: 0")); + assert!(output.contains("Functions exceeding limits: 0")); + } + + #[test] + fn verbose_recursive_complexity_report_lists_analyzed_files() { + let project = TempProject::new("verbose-recursive-complexity"); + project.write("src/main.rs", "fn main() {\n}\n"); + project.write("src/lib.rs", "fn lib() {\n}\n"); + let output = + run_in(&project, &["--report-complexity", "-verbose"]).expect("report succeeds"); + assert!(output.contains( + "Number of files analyzed: 2\n\ + Files analyzed:\n\ + - src/lib.rs\n\ + - src/main.rs\n\ + Analyzed extensions:" + )); + } + + #[test] + fn help_status_succeeds() { + let project = TempProject::new("help-status"); + let (_output, status) = run_with_status(&project, &["help"]).expect("help succeeds"); + assert_eq!(status, RunStatus::Success); + } + + #[test] + fn git_branch_mode_limits_complexity_search_to_changed_files() { + if !git_is_available() { + return; + } + let project = TempGitRepo::new("complexity-git-branch-scope"); + project.git(&["init"]); + project.write( + "src/unchanged.rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n", + ); + project.write("src/changed.rs", "fn simple() -> i32 {\n1\n}\n"); + project.commit("initial"); + project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + project.git(&["branch", "-M", "feature"]); + project.write("src/changed.rs", "fn simple() -> i32 {\n2\n}\n"); + let output = run_in( + &project, + [ + "--report-complexity", + "-git-branch", + "-file-extension=rs", + "-max-cognitive-complexity=1", + "-max-cyclomatic-complexity=1", + ] + .as_slice(), + ) + .expect("report succeeds"); + assert!(output.contains("Number of files analyzed: 1")); + assert!(output.contains("Functions exceeding limits: 0")); + } + #[test] fn invalid_explicit_file_returns_a_clear_error() { let project = TempProject::new("invalid-file"); diff --git a/src/main.rs b/src/main.rs index a6e1e1d..02ee6ca 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![allow(clippy::multiple_crate_versions)] + use std::io::Write; use std::process::ExitCode; @@ -12,9 +14,13 @@ fn main() -> ExitCode { let stdout = std::io::stdout(); let mut stdout = stdout.lock(); match codem8::run(std::env::args().skip(1), ¤t_dir, &mut stdout) { - Ok(()) => { + Ok(status) => { let _ = stdout.flush(); - ExitCode::SUCCESS + if status.is_success() { + ExitCode::SUCCESS + } else { + ExitCode::FAILURE + } } Err(error) => { eprintln!("error: {error}"); diff --git a/src/model.rs b/src/model.rs index 9cc99e7..1fa699f 100644 --- a/src/model.rs +++ b/src/model.rs @@ -42,6 +42,16 @@ pub struct DuplicateBlock { pub weight: u64, } +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionComplexity { + pub file_path: PathBuf, + pub function_name: String, + pub start_line: usize, + pub end_line: usize, + pub cognitive_complexity: f64, + pub cyclomatic_complexity: f64, +} + impl DuplicateBlock { #[must_use] pub fn line_count(&self) -> usize { diff --git a/src/report/complexity_detection.rs b/src/report/complexity_detection.rs new file mode 100644 index 0000000..5e9d26c --- /dev/null +++ b/src/report/complexity_detection.rs @@ -0,0 +1,210 @@ +use std::cmp::Ordering; +use std::fs; + +use rayon::prelude::*; +use rust_code_analysis::{get_from_ext, get_function_spaces, FuncSpace, SpaceKind}; + +use crate::error::{CodeM8Error, Result}; +use crate::model::{FunctionComplexity, SourceFile}; +use crate::paths::format_path; + +const ANONYMOUS_FUNCTION_NAME: &str = ""; + +pub fn complexity_supported_file_extensions(extensions: &[String]) -> Vec { + extensions + .iter() + .filter(|extension| get_from_ext(extension).is_some()) + .cloned() + .collect() +} + +pub fn detect_complex_functions( + files: &[SourceFile], + max_cognitive_complexity: u32, + max_cyclomatic_complexity: u32, +) -> Result> { + let mut functions = files + .par_iter() + .map(|file| { + detect_file_complex_functions(file, max_cognitive_complexity, max_cyclomatic_complexity) + }) + .collect::>>()? + .into_iter() + .flatten() + .collect::>(); + functions.sort_by(compare_function_complexity); + Ok(functions) +} + +fn detect_file_complex_functions( + file: &SourceFile, + max_cognitive_complexity: u32, + max_cyclomatic_complexity: u32, +) -> Result> { + let Some(language) = get_from_ext(&file.extension) else { + return Ok(Vec::new()); + }; + let source = fs::read(&file.path) + .map_err(|error| CodeM8Error::io(&file.display_path, "read file", &error))?; + let Some(root_space) = get_function_spaces(&language, source, &file.path, None) else { + return Ok(Vec::new()); + }; + let mut functions = Vec::new(); + collect_complex_functions( + file, + &root_space, + max_cognitive_complexity, + max_cyclomatic_complexity, + &mut functions, + ); + Ok(functions) +} + +fn collect_complex_functions( + file: &SourceFile, + space: &FuncSpace, + max_cognitive_complexity: u32, + max_cyclomatic_complexity: u32, + functions: &mut Vec, +) { + if space.kind == SpaceKind::Function { + push_complex_function( + file, + space, + max_cognitive_complexity, + max_cyclomatic_complexity, + functions, + ); + } + for child in &space.spaces { + collect_complex_functions( + file, + child, + max_cognitive_complexity, + max_cyclomatic_complexity, + functions, + ); + } +} + +fn push_complex_function( + file: &SourceFile, + space: &FuncSpace, + max_cognitive_complexity: u32, + max_cyclomatic_complexity: u32, + functions: &mut Vec, +) { + let cognitive_complexity = space.metrics.cognitive.cognitive(); + let cyclomatic_complexity = space.metrics.cyclomatic.cyclomatic(); + if cognitive_complexity <= f64::from(max_cognitive_complexity) + && cyclomatic_complexity <= f64::from(max_cyclomatic_complexity) + { + return; + } + functions.push(FunctionComplexity { + file_path: file.display_path.clone(), + function_name: space + .name + .clone() + .unwrap_or_else(|| ANONYMOUS_FUNCTION_NAME.to_string()), + start_line: space.start_line, + end_line: space.end_line, + cognitive_complexity, + cyclomatic_complexity, + }); +} + +fn compare_function_complexity(left: &FunctionComplexity, right: &FunctionComplexity) -> Ordering { + format_path(&left.file_path) + .cmp(&format_path(&right.file_path)) + .then_with(|| left.start_line.cmp(&right.start_line)) + .then_with(|| left.end_line.cmp(&right.end_line)) + .then_with(|| left.function_name.cmp(&right.function_name)) +} + +#[cfg(test)] +mod tests { + use std::fs; + use std::path::PathBuf; + use std::sync::atomic::{AtomicUsize, Ordering}; + + use super::*; + + static TEMP_COUNTER: AtomicUsize = AtomicUsize::new(0); + + fn source_file(extension: &str, contents: &str) -> SourceFile { + let id = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); + let path = std::env::temp_dir().join(format!( + "codem8-complexity-detection-{}-{id}.{extension}", + std::process::id() + )); + fs::write(&path, contents).expect("write source file"); + SourceFile { + path, + display_path: PathBuf::from(format!("sample.{extension}")), + extension: extension.to_string(), + } + } + + #[test] + fn filters_unsupported_extensions() { + let extensions = complexity_supported_file_extensions(&[ + "rs".to_string(), + "rb".to_string(), + "ts".to_string(), + ]); + assert_eq!(extensions, ["rs", "ts"]); + } + + #[test] + fn detects_functions_over_either_limit() { + let cognitive_only_file = source_file( + "rs", + "fn nested(value: i32) -> i32 {\n\ + if value > 10 {\n\ + if value > 20 {\n\ + return 20;\n\ + }\n\ + return 10;\n\ + }\n\ + 0\n\ + }\n", + ); + let cognitive_only_functions = + detect_complex_functions(std::slice::from_ref(&cognitive_only_file), 2, 3) + .expect("detect"); + assert_eq!(cognitive_only_functions.len(), 1); + assert_eq!( + cognitive_only_functions[0].file_path, + PathBuf::from("sample.rs") + ); + assert!(cognitive_only_functions[0].function_name.contains("nested")); + assert!(cognitive_only_functions[0].cognitive_complexity > 2.0); + assert!(cognitive_only_functions[0].cyclomatic_complexity <= 3.0); + fs::remove_file(cognitive_only_file.path).expect("cleanup"); + let cyclomatic_only_file = source_file( + "rs", + "fn risky(value: i32) -> i32 {\n\ + if value > 10 {\n\ + return 10;\n\ + }\n\ + if value > 5 {\n\ + return 5;\n\ + }\n\ + 0\n\ + }\n", + ); + let cyclomatic_only_functions = + detect_complex_functions(std::slice::from_ref(&cyclomatic_only_file), 2, 2) + .expect("detect"); + assert_eq!(cyclomatic_only_functions.len(), 1); + assert_eq!( + cyclomatic_only_functions[0].file_path, + PathBuf::from("sample.rs") + ); + assert!(cyclomatic_only_functions[0].function_name.contains("risky")); + assert!(cyclomatic_only_functions[0].cognitive_complexity <= 2.0); + assert!(cyclomatic_only_functions[0].cyclomatic_complexity > 2.0); + fs::remove_file(cyclomatic_only_file.path).expect("cleanup"); + } +} diff --git a/src/report/complexity_renderer.rs b/src/report/complexity_renderer.rs new file mode 100644 index 0000000..f05d331 --- /dev/null +++ b/src/report/complexity_renderer.rs @@ -0,0 +1,208 @@ +use std::fmt::Write as _; +use std::path::PathBuf; +use std::time::Duration; + +use crate::model::FunctionComplexity; +use crate::paths::format_path; + +#[derive(Debug, Clone, PartialEq)] +pub struct ComplexityReport { + pub analyzed_files: usize, + pub analyzed_extensions: Vec, + pub analyzed_file_paths: Option>, + pub max_cognitive_complexity: u32, + pub max_cyclomatic_complexity: u32, + pub timings: Option, + pub functions: Vec, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ComplexityReportTimings { + pub discovery: Duration, + pub complexity_analysis: Duration, +} + +#[must_use] +pub fn render_complexity_report(report: &ComplexityReport, verbose: bool) -> String { + let mut output = String::new(); + output.push_str("Complexity Report\n"); + output.push_str("=================\n\n"); + let _ = writeln!( + output, + "Number of files analyzed: {}", + report.analyzed_files + ); + if verbose { + render_analyzed_files(&mut output, report.analyzed_file_paths.as_deref()); + } + let _ = writeln!( + output, + "Analyzed extensions: {}", + sorted_extensions(&report.analyzed_extensions).join(", ") + ); + let _ = writeln!( + output, + "Max cognitive complexity: {}", + report.max_cognitive_complexity + ); + let _ = writeln!( + output, + "Max cyclomatic complexity: {}", + report.max_cyclomatic_complexity + ); + let _ = writeln!( + output, + "Functions exceeding limits: {}", + report.functions.len() + ); + if verbose { + render_timings(&mut output, report.timings); + } + for (index, function) in report.functions.iter().enumerate() { + output.push('\n'); + let _ = writeln!(output, "#{}", index + 1); + let _ = writeln!(output, "Function: {}", function.function_name); + let _ = writeln!( + output, + "Location: {}:{}-{}", + format_path(&function.file_path), + function.start_line, + function.end_line + ); + let _ = writeln!( + output, + "Cognitive complexity: {}", + format_metric(function.cognitive_complexity) + ); + let _ = writeln!( + output, + "Cyclomatic complexity: {}", + format_metric(function.cyclomatic_complexity) + ); + } + output +} + +fn render_analyzed_files(output: &mut String, analyzed_file_paths: Option<&[PathBuf]>) { + if let Some(analyzed_file_paths) = analyzed_file_paths { + output.push_str("Files analyzed:\n"); + for file in analyzed_file_paths { + let _ = writeln!(output, "- {}", format_path(file)); + } + } +} + +fn render_timings(output: &mut String, timings: Option) { + if let Some(timings) = timings { + output.push_str("Timings:\n"); + let _ = writeln!( + output, + "- Discovery: {}", + format_duration(timings.discovery) + ); + let _ = writeln!( + output, + "- Complexity analysis: {}", + format_duration(timings.complexity_analysis) + ); + } +} + +fn format_metric(value: f64) -> String { + if value.fract().abs() < f64::EPSILON { + format!("{value:.0}") + } else { + format!("{value:.2}") + } +} + +fn format_duration(duration: Duration) -> String { + let microseconds = duration.as_micros(); + let milliseconds = microseconds / 1_000; + let fractional_microseconds = microseconds % 1_000; + format!("{milliseconds}.{fractional_microseconds:03} ms") +} + +fn sorted_extensions(extensions: &[String]) -> Vec { + let mut extensions = extensions.to_vec(); + extensions.sort(); + extensions +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + use std::time::Duration; + + use super::*; + + #[test] + fn renders_empty_report() { + let report = ComplexityReport { + analyzed_files: 0, + analyzed_extensions: vec!["rs".to_string()], + analyzed_file_paths: None, + max_cognitive_complexity: 15, + max_cyclomatic_complexity: 10, + timings: None, + functions: Vec::new(), + }; + assert_eq!( + render_complexity_report(&report, false), + "Complexity Report\n\ + =================\n\ + \n\ + Number of files analyzed: 0\n\ + Analyzed extensions: rs\n\ + Max cognitive complexity: 15\n\ + Max cyclomatic complexity: 10\n\ + Functions exceeding limits: 0\n" + ); + } + + #[test] + fn renders_function_metrics() { + let report = ComplexityReport { + analyzed_files: 1, + analyzed_extensions: vec!["rs".to_string()], + analyzed_file_paths: None, + max_cognitive_complexity: 15, + max_cyclomatic_complexity: 10, + timings: None, + functions: vec![FunctionComplexity { + file_path: PathBuf::from("src/lib.rs"), + function_name: "run".to_string(), + start_line: 10, + end_line: 20, + cognitive_complexity: 16.0, + cyclomatic_complexity: 8.0, + }], + }; + let output = render_complexity_report(&report, false); + assert!(output.contains("#1\n")); + assert!(output.contains("Function: run\n")); + assert!(output.contains("Location: src/lib.rs:10-20\n")); + assert!(output.contains("Cognitive complexity: 16\n")); + assert!(output.contains("Cyclomatic complexity: 8\n")); + } + + #[test] + fn renders_verbose_files_and_timings() { + let report = ComplexityReport { + analyzed_files: 1, + analyzed_extensions: vec!["rs".to_string()], + analyzed_file_paths: Some(vec![PathBuf::from("src/lib.rs")]), + max_cognitive_complexity: 15, + max_cyclomatic_complexity: 10, + timings: Some(ComplexityReportTimings { + discovery: Duration::from_micros(1_234), + complexity_analysis: Duration::from_micros(12_345), + }), + functions: Vec::new(), + }; + let output = render_complexity_report(&report, true); + assert!(output.contains("Files analyzed:\n- src/lib.rs\n")); + assert!(output.contains("- Discovery: 1.234 ms\n")); + assert!(output.contains("- Complexity analysis: 12.345 ms\n")); + } +} diff --git a/src/report/duplicate_detection.rs b/src/report/duplicate_detection.rs index 6414768..a38f49d 100644 --- a/src/report/duplicate_detection.rs +++ b/src/report/duplicate_detection.rs @@ -19,6 +19,18 @@ struct OccurrenceKey { end_line: usize, } +#[derive(Debug, Clone, Copy)] +struct LineRange { + start: usize, + end: usize, +} + +impl LineRange { + const fn overlaps(self, other: Self) -> bool { + !(self.end < other.start || other.end < self.start) + } +} + impl Ord for OccurrenceKey { fn cmp(&self, other: &Self) -> Ordering { self.file_path_key @@ -35,6 +47,12 @@ impl PartialOrd for OccurrenceKey { } pub fn detect_duplicate_blocks(files: &[ProcessedFile]) -> Vec { + let line_index = indexed_lines(files); + let blocks_by_lines = candidate_blocks_by_lines(files, &line_index); + duplicate_blocks_from_candidates(blocks_by_lines) +} + +fn indexed_lines(files: &[ProcessedFile]) -> HashMap> { let mut line_index: HashMap> = HashMap::new(); for (file_index, file) in files.iter().enumerate() { for (line_index_in_file, line) in file.lines.iter().enumerate() { @@ -44,6 +62,13 @@ pub fn detect_duplicate_blocks(files: &[ProcessedFile]) -> Vec { }); } } + line_index +} + +fn candidate_blocks_by_lines( + files: &[ProcessedFile], + line_index: &HashMap>, +) -> HashMap, BTreeSet> { let mut blocks_by_lines: HashMap, BTreeSet> = HashMap::new(); for refs in line_index.values() { if refs.len() < 2 { @@ -67,37 +92,54 @@ pub fn detect_duplicate_blocks(files: &[ProcessedFile]) -> Vec { collect_candidates(files, comparison_refs, &mut blocks_by_lines); } } + blocks_by_lines +} + +fn duplicate_blocks_from_candidates( + blocks_by_lines: HashMap, BTreeSet>, +) -> Vec { let mut duplicate_blocks = blocks_by_lines .into_iter() - .filter_map(|(normalized_lines, occurrences)| { - if normalized_lines.is_empty() || occurrences.len() < 2 { - return None; - } - let occurrences = occurrences - .into_iter() - .map(|occurrence| DuplicateOccurrence { - file_path: occurrence.file_path, - start_line: occurrence.start_line, - end_line: occurrence.end_line, - }) - .collect::>(); - let character_count = normalized_lines - .iter() - .map(|line| line.chars().count() as u64) - .sum::(); - let weight = - (occurrences.len() as u64 - 1) * normalized_lines.len() as u64 * character_count; - Some(DuplicateBlock { - normalized_lines, - occurrences, - weight, - }) - }) + .filter_map(duplicate_block_from_candidate) .collect::>(); duplicate_blocks.sort_by(compare_duplicate_blocks); duplicate_blocks } +fn duplicate_block_from_candidate( + (normalized_lines, occurrences): (Vec, BTreeSet), +) -> Option { + if normalized_lines.is_empty() || occurrences.len() < 2 { + return None; + } + let occurrences = duplicate_occurrences(occurrences); + let weight = duplicate_block_weight(&normalized_lines, occurrences.len()); + Some(DuplicateBlock { + normalized_lines, + occurrences, + weight, + }) +} + +fn duplicate_occurrences(occurrences: BTreeSet) -> Vec { + occurrences + .into_iter() + .map(|occurrence| DuplicateOccurrence { + file_path: occurrence.file_path, + start_line: occurrence.start_line, + end_line: occurrence.end_line, + }) + .collect() +} + +fn duplicate_block_weight(normalized_lines: &[String], occurrence_count: usize) -> u64 { + let character_count = normalized_lines + .iter() + .map(|line| line.chars().count() as u64) + .sum::(); + (occurrence_count as u64 - 1) * normalized_lines.len() as u64 * character_count +} + #[derive(Debug)] struct CandidateBlock { normalized_lines: Vec, @@ -136,38 +178,91 @@ fn expand_pair(files: &[ProcessedFile], left: LineRef, right: LineRef) -> Option if left == right { return None; } + let (left_start, right_start) = expanded_start(files, left, right); + let (left_end, right_end) = expanded_end(files, left, right); + let left_range = LineRange { + start: left_start, + end: left_end, + }; + let right_range = LineRange { + start: right_start, + end: right_end, + }; + if overlaps_same_file(left, right, left_range, right_range) { + return None; + } + Some(CandidateBlock { + normalized_lines: normalized_lines(files, left.file_index, left_range), + left_occurrence: occurrence_for(files, left.file_index, left_range.start, left_range.end), + right_occurrence: occurrence_for( + files, + right.file_index, + right_range.start, + right_range.end, + ), + }) +} + +fn expanded_start(files: &[ProcessedFile], left: LineRef, right: LineRef) -> (usize, usize) { let mut left_start = left.line_index; let mut right_start = right.line_index; - while left_start > 0 - && right_start > 0 - && line_text(files, left.file_index, left_start - 1) - == line_text(files, right.file_index, right_start - 1) - { + while previous_lines_match(files, left, right, left_start, right_start) { left_start -= 1; right_start -= 1; } + (left_start, right_start) +} + +fn previous_lines_match( + files: &[ProcessedFile], + left: LineRef, + right: LineRef, + left_start: usize, + right_start: usize, +) -> bool { + left_start > 0 + && right_start > 0 + && line_text(files, left.file_index, left_start - 1) + == line_text(files, right.file_index, right_start - 1) +} + +fn expanded_end(files: &[ProcessedFile], left: LineRef, right: LineRef) -> (usize, usize) { let mut left_end = left.line_index; let mut right_end = right.line_index; - while left_end + 1 < files[left.file_index].lines.len() - && right_end + 1 < files[right.file_index].lines.len() - && line_text(files, left.file_index, left_end + 1) - == line_text(files, right.file_index, right_end + 1) - { + while next_lines_match(files, left, right, left_end, right_end) { left_end += 1; right_end += 1; } - if left.file_index == right.file_index && left_start <= right_end && right_start <= left_end { - return None; - } - let normalized_lines = files[left.file_index].lines[left_start..=left_end] + (left_end, right_end) +} + +fn next_lines_match( + files: &[ProcessedFile], + left: LineRef, + right: LineRef, + left_end: usize, + right_end: usize, +) -> bool { + left_end + 1 < files[left.file_index].lines.len() + && right_end + 1 < files[right.file_index].lines.len() + && line_text(files, left.file_index, left_end + 1) + == line_text(files, right.file_index, right_end + 1) +} + +const fn overlaps_same_file( + left: LineRef, + right: LineRef, + left_range: LineRange, + right_range: LineRange, +) -> bool { + left.file_index == right.file_index && left_range.overlaps(right_range) +} + +fn normalized_lines(files: &[ProcessedFile], file_index: usize, range: LineRange) -> Vec { + files[file_index].lines[range.start..=range.end] .iter() .map(|line| line.normalized_text.clone()) - .collect::>(); - Some(CandidateBlock { - normalized_lines, - left_occurrence: occurrence_for(files, left.file_index, left_start, left_end), - right_occurrence: occurrence_for(files, right.file_index, right_start, right_end), - }) + .collect() } fn occurrence_for( diff --git a/src/report/duplicate_renderer.rs b/src/report/duplicate_renderer.rs index acb9d37..3f3129a 100644 --- a/src/report/duplicate_renderer.rs +++ b/src/report/duplicate_renderer.rs @@ -24,6 +24,12 @@ pub struct DuplicateReportTimings { #[must_use] pub fn render_duplicate_report(report: &DuplicateReport, verbose: bool) -> String { let mut output = String::new(); + render_report_summary(&mut output, report, verbose); + render_duplicate_blocks(&mut output, &report.duplicate_blocks, verbose); + output +} + +fn render_report_summary(output: &mut String, report: &DuplicateReport, verbose: bool) { output.push_str("Duplicate Code Report\n"); output.push_str("=====================\n\n"); let _ = writeln!( @@ -38,9 +44,7 @@ pub fn render_duplicate_report(report: &DuplicateReport, verbose: bool) -> Strin }; if let Some(analyzed_file_paths) = analyzed_file_paths { output.push_str("Files analyzed:\n"); - for file in analyzed_file_paths { - let _ = writeln!(output, "- {}", format_path(file)); - } + render_analyzed_files(output, analyzed_file_paths); } let _ = writeln!( output, @@ -53,52 +57,72 @@ pub fn render_duplicate_report(report: &DuplicateReport, verbose: bool) -> Strin report.duplicate_blocks.len() ); if verbose { - if let Some(timings) = report.timings { - output.push_str("Timings:\n"); - let _ = writeln!( - output, - "- Discovery: {}", - format_duration(timings.discovery) - ); - let _ = writeln!( - output, - "- File processing: {}", - format_duration(timings.file_processing) - ); - let _ = writeln!( - output, - "- Duplicate detection: {}", - format_duration(timings.duplicate_detection) - ); - } + render_timings(output, report.timings); + } +} + +fn render_analyzed_files(output: &mut String, analyzed_file_paths: &[PathBuf]) { + for file in analyzed_file_paths { + let _ = writeln!(output, "- {}", format_path(file)); + } +} + +fn render_timings(output: &mut String, timings: Option) { + if let Some(timings) = timings { + output.push_str("Timings:\n"); + let _ = writeln!( + output, + "- Discovery: {}", + format_duration(timings.discovery) + ); + let _ = writeln!( + output, + "- File processing: {}", + format_duration(timings.file_processing) + ); + let _ = writeln!( + output, + "- Duplicate detection: {}", + format_duration(timings.duplicate_detection) + ); } - for (index, block) in report.duplicate_blocks.iter().enumerate() { +} + +fn render_duplicate_blocks(output: &mut String, blocks: &[DuplicateBlock], verbose: bool) { + for (index, block) in blocks.iter().enumerate() { output.push('\n'); let _ = writeln!(output, "#{}", index + 1); if verbose { - let _ = writeln!(output, "Weight: {}", block.weight); - let _ = writeln!(output, "Lines: {}", block.line_count()); - let _ = writeln!(output, "Occurrences: {}", block.occurrences.len()); - output.push('\n'); - output.push_str("Code:\n"); - for line in &block.normalized_lines { - output.push_str(" "); - output.push_str(line); - output.push('\n'); - } - output.push_str("\nLocations:\n"); - } - for occurrence in &block.occurrences { - let _ = writeln!( - output, - "- {}:{}-{}", - format_path(&occurrence.file_path), - occurrence.start_line, - occurrence.end_line - ); + render_verbose_block(output, block); } + render_block_locations(output, block); + } +} + +fn render_verbose_block(output: &mut String, block: &DuplicateBlock) { + let _ = writeln!(output, "Weight: {}", block.weight); + let _ = writeln!(output, "Lines: {}", block.line_count()); + let _ = writeln!(output, "Occurrences: {}", block.occurrences.len()); + output.push('\n'); + output.push_str("Code:\n"); + for line in &block.normalized_lines { + output.push_str(" "); + output.push_str(line); + output.push('\n'); + } + output.push_str("\nLocations:\n"); +} + +fn render_block_locations(output: &mut String, block: &DuplicateBlock) { + for occurrence in &block.occurrences { + let _ = writeln!( + output, + "- {}:{}-{}", + format_path(&occurrence.file_path), + occurrence.start_line, + occurrence.end_line + ); } - output } fn format_duration(duration: Duration) -> String { diff --git a/src/report/mod.rs b/src/report/mod.rs index 7b85059..42edd25 100644 --- a/src/report/mod.rs +++ b/src/report/mod.rs @@ -1,5 +1,13 @@ +mod complexity_detection; +mod complexity_renderer; mod duplicate_detection; mod duplicate_renderer; +pub(crate) use complexity_detection::{ + complexity_supported_file_extensions, detect_complex_functions, +}; +pub use complexity_renderer::{ + render_complexity_report, ComplexityReport, ComplexityReportTimings, +}; pub(crate) use duplicate_detection::detect_duplicate_blocks; pub use duplicate_renderer::{render_duplicate_report, DuplicateReport, DuplicateReportTimings};