From 861c365abda1715092da25753cbaade298c41d9c Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:25:03 +0100 Subject: [PATCH 01/71] chore: add tooling config, ci workflow, and dependency updates Add dialyzer ignore file, pre-commit hooks config, gitignore updates, blocks CI workflow, and mix.exs dependency updates. Co-Authored-By: Claude Sonnet 4.6 --- .dialyzer_ignore.exs | 6 ++++++ .github/workflows/blocks.yml | 18 ++++++++++++++++++ .gitignore | 7 +++++++ .pre-commit-config.yaml | 8 ++++++++ mix.exs | 10 +++++++++- 5 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 .dialyzer_ignore.exs create mode 100644 .github/workflows/blocks.yml create mode 100644 .pre-commit-config.yaml diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs new file mode 100644 index 0000000..b76a4d0 --- /dev/null +++ b/.dialyzer_ignore.exs @@ -0,0 +1,6 @@ +[ + # Dialyzer specializes analyze/2 for the codebase call-site where include_pairs + # is always true, making the false branch appear unreachable. Both branches are + # valid and reachable at runtime from the file-level and codebase callers. + {"lib/codeqa/metrics/file/near_duplicate_blocks.ex", :pattern_match} +] diff --git a/.github/workflows/blocks.yml b/.github/workflows/blocks.yml new file mode 100644 index 0000000..284adec --- /dev/null +++ b/.github/workflows/blocks.yml @@ -0,0 +1,18 @@ +name: Extract Code Blocks + +on: + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + blocks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./ + with: + command: blocks + build: source diff --git a/.gitignore b/.gitignore index ad2603a..81b993d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,10 @@ devenv.lock # Git worktrees .worktrees/ +docs/plans/ + +# Claude Code +.claude/ + +# Node +node_modules/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d008842 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +repos: +- repo: local + hooks: + - id: mix-precommit + name: Mix precommit + entry: devenv shell precommit + language: system + pass_filenames: false diff --git a/mix.exs b/mix.exs index 2e55bfe..3c06031 100644 --- a/mix.exs +++ b/mix.exs @@ -11,7 +11,9 @@ defmodule CodeQA.MixProject do escript: [main_module: CodeQA.CLI], elixirc_paths: elixirc_paths(Mix.env()), preferred_envs: [precommit: :test], - aliases: aliases() + aliases: aliases(), + dialyzer: [ignore_warnings: ".dialyzer_ignore.exs"], + consolidate_protocols: Mix.env() != :test ] end @@ -30,6 +32,12 @@ defmodule CodeQA.MixProject do "compile --warnings-as-errors", "deps.unlock --unused", "format" + ], + health: [ + "run -e 'CodeQA.CLI.main([\"health-report\", \".\", \"--ignore-paths\", \"test/**\"])'" + ], + "health.progress": [ + "run -e 'CodeQA.CLI.main([\"health-report\", \".\", \"--ignore-paths\", \"test/**\", \"--progress\"])'" ] ] end From dad7ca1e91c4dad3b0ebfac57031207dd5290f0d Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:25:37 +0100 Subject: [PATCH 02/71] refactor(engine): extract engine layer and reorganize metrics namespaces Move analyzer/collector/pipeline/registry/parallel into engine/ module, reorganize all file metrics into metrics/file/ and codebase metrics into metrics/codebase/ namespaces. Delete obsolete telemetry and stopwords modules. Add new file metrics: bradford, brevity, comment_structure, punctuation_density, rfc, and post-processing metrics (menzerath). Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/analyzer.ex | 130 -------- lib/codeqa/cli/stopwords.ex | 97 ------ lib/codeqa/collector.ex | 99 ------- lib/codeqa/engine/analyzer.ex | 126 ++++++++ lib/codeqa/engine/collector.ex | 111 +++++++ lib/codeqa/engine/file_context.ex | 29 ++ lib/codeqa/{ => engine}/parallel.ex | 30 +- lib/codeqa/engine/pipeline.ex | 93 ++++++ lib/codeqa/{ => engine}/registry.ex | 26 +- lib/codeqa/formatter.ex | 8 +- lib/codeqa/git.ex | 18 +- .../metrics/{ => codebase}/similarity.ex | 177 +++++------ lib/codeqa/metrics/codebase_metric.ex | 42 --- lib/codeqa/metrics/file/bradford.ex | 99 +++++++ lib/codeqa/metrics/{ => file}/branching.ex | 34 ++- lib/codeqa/metrics/file/brevity.ex | 50 ++++ .../metrics/{ => file}/casing_entropy.ex | 35 ++- lib/codeqa/metrics/file/comment_structure.ex | 45 +++ lib/codeqa/metrics/{ => file}/compression.ex | 31 +- lib/codeqa/metrics/{ => file}/entropy.ex | 22 +- lib/codeqa/metrics/{ => file}/file_metric.ex | 9 +- .../metrics/{ => file}/function_metrics.ex | 19 +- lib/codeqa/metrics/{ => file}/halstead.ex | 20 +- lib/codeqa/metrics/{ => file}/heaps.ex | 16 +- .../{ => file}/identifier_length_variance.ex | 12 +- lib/codeqa/metrics/{ => file}/indentation.ex | 28 +- lib/codeqa/metrics/{ => file}/inflector.ex | 5 +- .../{ => file}/magic_number_density.ex | 25 +- lib/codeqa/metrics/{ => file}/ngram.ex | 29 +- .../metrics/file/punctuation_density.ex | 96 ++++++ lib/codeqa/metrics/{ => file}/readability.ex | 20 +- lib/codeqa/metrics/file/rfc.ex | 81 +++++ .../metrics/{ => file}/symbol_density.ex | 7 +- lib/codeqa/metrics/{ => file}/vocabulary.ex | 11 +- .../metrics/{ => file}/vowel_density.ex | 9 +- lib/codeqa/metrics/{ => file}/winnowing.ex | 2 +- lib/codeqa/metrics/{ => file}/zipf.ex | 11 +- .../metrics/post_processing/menzerath.ex | 278 ++++++++++++++++++ .../post_processing/post_processing_metric.ex | 21 ++ lib/codeqa/metrics/token_normalizer.ex | 45 --- lib/codeqa/pipeline.ex | 109 ------- lib/codeqa/stopwords.ex | 63 ---- lib/codeqa/telemetry.ex | 68 ----- test/codeqa/collector_test.exs | 52 +++- test/codeqa/engine/analyzer_test.exs | 44 +++ test/codeqa/formatter_test.exs | 1 + test/codeqa/git_test.exs | 112 +++++++ .../metrics/codebase/similarity_test.exs | 79 +++++ .../metrics/{ => file}/branching_test.exs | 10 +- .../{ => file}/function_metrics_test.exs | 18 +- 50 files changed, 1716 insertions(+), 886 deletions(-) delete mode 100644 lib/codeqa/analyzer.ex delete mode 100644 lib/codeqa/cli/stopwords.ex delete mode 100644 lib/codeqa/collector.ex create mode 100644 lib/codeqa/engine/analyzer.ex create mode 100644 lib/codeqa/engine/collector.ex create mode 100644 lib/codeqa/engine/file_context.ex rename lib/codeqa/{ => engine}/parallel.ex (68%) create mode 100644 lib/codeqa/engine/pipeline.ex rename lib/codeqa/{ => engine}/registry.ex (71%) rename lib/codeqa/metrics/{ => codebase}/similarity.ex (66%) delete mode 100644 lib/codeqa/metrics/codebase_metric.ex create mode 100644 lib/codeqa/metrics/file/bradford.ex rename lib/codeqa/metrics/{ => file}/branching.ex (70%) create mode 100644 lib/codeqa/metrics/file/brevity.ex rename lib/codeqa/metrics/{ => file}/casing_entropy.ex (61%) create mode 100644 lib/codeqa/metrics/file/comment_structure.ex rename lib/codeqa/metrics/{ => file}/compression.ex (56%) rename lib/codeqa/metrics/{ => file}/entropy.ex (82%) rename lib/codeqa/metrics/{ => file}/file_metric.ex (77%) rename lib/codeqa/metrics/{ => file}/function_metrics.ex (92%) rename lib/codeqa/metrics/{ => file}/halstead.ex (92%) rename lib/codeqa/metrics/{ => file}/heaps.ex (84%) rename lib/codeqa/metrics/{ => file}/identifier_length_variance.ex (81%) rename lib/codeqa/metrics/{ => file}/indentation.ex (60%) rename lib/codeqa/metrics/{ => file}/inflector.ex (89%) rename lib/codeqa/metrics/{ => file}/magic_number_density.ex (51%) rename lib/codeqa/metrics/{ => file}/ngram.ex (71%) create mode 100644 lib/codeqa/metrics/file/punctuation_density.ex rename lib/codeqa/metrics/{ => file}/readability.ex (89%) create mode 100644 lib/codeqa/metrics/file/rfc.ex rename lib/codeqa/metrics/{ => file}/symbol_density.ex (85%) rename lib/codeqa/metrics/{ => file}/vocabulary.ex (91%) rename lib/codeqa/metrics/{ => file}/vowel_density.ex (86%) rename lib/codeqa/metrics/{ => file}/winnowing.ex (96%) rename lib/codeqa/metrics/{ => file}/zipf.ex (86%) create mode 100644 lib/codeqa/metrics/post_processing/menzerath.ex create mode 100644 lib/codeqa/metrics/post_processing/post_processing_metric.ex delete mode 100644 lib/codeqa/metrics/token_normalizer.ex delete mode 100644 lib/codeqa/pipeline.ex delete mode 100644 lib/codeqa/stopwords.ex delete mode 100644 lib/codeqa/telemetry.ex create mode 100644 test/codeqa/engine/analyzer_test.exs create mode 100644 test/codeqa/git_test.exs create mode 100644 test/codeqa/metrics/codebase/similarity_test.exs rename test/codeqa/metrics/{ => file}/branching_test.exs (79%) rename test/codeqa/metrics/{ => file}/function_metrics_test.exs (77%) diff --git a/lib/codeqa/analyzer.ex b/lib/codeqa/analyzer.ex deleted file mode 100644 index ddcb6ab..0000000 --- a/lib/codeqa/analyzer.ex +++ /dev/null @@ -1,130 +0,0 @@ -defmodule CodeQA.Analyzer do - @moduledoc "Orchestrates metric computation across files." - - alias CodeQA.Registry - alias CodeQA.Metrics - - def build_registry do - Registry.new() - |> Registry.register_file_metric(Metrics.Entropy) - |> Registry.register_file_metric(Metrics.Compression) - |> Registry.register_file_metric(Metrics.Zipf) - |> Registry.register_file_metric(Metrics.Heaps) - |> Registry.register_file_metric(Metrics.Vocabulary) - |> Registry.register_file_metric(Metrics.Ngram) - |> Registry.register_file_metric(Metrics.Halstead) - |> Registry.register_file_metric(Metrics.Readability) - |> Registry.register_file_metric(Metrics.CasingEntropy) - |> Registry.register_file_metric(Metrics.IdentifierLengthVariance) - |> Registry.register_file_metric(Metrics.Indentation) - |> Registry.register_file_metric(Metrics.Branching) - |> Registry.register_file_metric(Metrics.FunctionMetrics) - |> Registry.register_file_metric(Metrics.MagicNumberDensity) - |> Registry.register_file_metric(Metrics.SymbolDensity) - |> Registry.register_file_metric(Metrics.VowelDensity) - |> Registry.register_codebase_metric(Metrics.Similarity) - end - - def analyze_codebase(files, opts \\ []) do - registry = build_registry() - - opts = - if Keyword.get(opts, :experimental_stopwords, false) do - has_progress = Keyword.get(opts, :on_progress) - - if has_progress, - do: IO.puts(:stderr, " Analyzing Stopwords (Tokens and Fingerprints)...") - - word_extractor = fn content -> - Regex.scan(~r/\b[a-zA-Z_]\w*\b/u, content) |> List.flatten() - end - - word_stopwords = - CodeQA.Telemetry.time(:stopwords_words, fn -> - CodeQA.Stopwords.find_stopwords(files, word_extractor, opts) - end) - - fp_extractor = fn content -> - CodeQA.Metrics.TokenNormalizer.normalize(content) |> CodeQA.Metrics.Winnowing.kgrams(5) - end - - fp_stopwords = - CodeQA.Telemetry.time(:stopwords_fingerprints, fn -> - CodeQA.Stopwords.find_stopwords(files, fp_extractor, opts) - end) - - if has_progress do - IO.puts( - :stderr, - " Found #{MapSet.size(word_stopwords)} common word stopwords and #{MapSet.size(fp_stopwords)} common fingerprint stopwords." - ) - end - - opts - |> Keyword.put(:word_stopwords, word_stopwords) - |> Keyword.put(:fp_stopwords, fp_stopwords) - else - opts - end - - file_results = CodeQA.Parallel.analyze_files(files, opts) - codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) - aggregate = aggregate_file_metrics(file_results) - - %{ - "files" => file_results, - "codebase" => %{ - "aggregate" => aggregate, - "similarity" => Map.get(codebase_metrics, "similarity", %{}) - } - } - end - - defp metric_data_to_triples({metric_name, metric_data}) do - metric_data - |> Enum.filter(fn {_k, v} -> is_number(v) end) - |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end) - end - - defp aggregate_file_metrics(file_results) do - file_results - |> Map.values() - |> Enum.flat_map(fn file_data -> - file_data - |> Map.get("metrics", %{}) - |> Enum.flat_map(&metric_data_to_triples/1) - end) - |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end) - |> Enum.reduce(%{}, fn {{metric, key}, values}, acc -> - stats = compute_stats(values) - metric_agg = Map.get(acc, metric, %{}) - - updated = - Map.merge(metric_agg, %{ - "mean_#{key}" => stats.mean, - "std_#{key}" => stats.std, - "min_#{key}" => stats.min, - "max_#{key}" => stats.max - }) - - Map.put(acc, metric, updated) - end) - end - - defp compute_stats([]), do: %{mean: 0.0, std: 0.0, min: 0.0, max: 0.0} - - defp compute_stats(values) do - n = length(values) - mean = Enum.sum(values) / n - sum_squares = Enum.reduce(values, 0.0, fn v, acc -> acc + (v - mean) ** 2 end) - variance = sum_squares / n - std = :math.sqrt(variance) - - %{ - mean: Float.round(mean * 1.0, 4), - std: Float.round(std * 1.0, 4), - min: Float.round(Enum.min(values) * 1.0, 4), - max: Float.round(Enum.max(values) * 1.0, 4) - } - end -end diff --git a/lib/codeqa/cli/stopwords.ex b/lib/codeqa/cli/stopwords.ex deleted file mode 100644 index f79027b..0000000 --- a/lib/codeqa/cli/stopwords.ex +++ /dev/null @@ -1,97 +0,0 @@ -defmodule CodeQA.CLI.Stopwords do - @moduledoc false - - @behaviour CodeQA.CLI.Command - - alias CodeQA.CLI.Options - - @impl CodeQA.CLI.Command - def usage do - """ - Usage: codeqa stopwords [options] - - Print codebase-specific stopwords based on frequency analysis. - - Options: - --stopwords-threshold FLOAT Frequency threshold for stopword detection - --progress Show per-file progress on stderr - -w, --workers N Number of parallel workers - --ignore-paths PATHS Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*") - """ - end - - @impl CodeQA.CLI.Command - def run(args) when args in [["--help"], ["-h"]] do - IO.puts(usage()) - end - - def run(args) do - {opts, [path], _} = - OptionParser.parse(args, - strict: [ - workers: :integer, - stopwords_threshold: :float, - progress: :boolean, - ignore_paths: :string - ], - aliases: [w: :workers] - ) - - Options.validate_dir!(path) - - ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) ++ Options.load_config_ignore_paths(path) - files = CodeQA.Collector.collect_files(path, ignore_patterns: ignore_patterns) - - if map_size(files) == 0 do - IO.puts(:stderr, "Warning: no source files found in '#{path}'") - exit({:shutdown, 1}) - end - - IO.puts(:stderr, "Extracting stopwords for #{map_size(files)} files...") - start_time = System.monotonic_time(:millisecond) - - word_stopwords = find_word_stopwords(files, opts) - fp_stopwords = find_fingerprint_stopwords(files, opts) - - end_time = System.monotonic_time(:millisecond) - - IO.puts(:stderr, "\nAnalysis completed in #{end_time - start_time}ms") - print_word_stopwords(word_stopwords) - IO.puts(:stderr, "\n--- Fingerprint Stopwords (#{MapSet.size(fp_stopwords)}) ---") - IO.puts(:stderr, "Found #{MapSet.size(fp_stopwords)} structural k-gram hashes.") - end - - defp find_word_stopwords(files, opts) do - word_extractor = fn content -> - Regex.scan(~r/\b[a-zA-Z_]\w*\b/u, content) |> List.flatten() - end - - CodeQA.Stopwords.find_stopwords( - files, - word_extractor, - Keyword.put(opts, :progress_label, "Words") - ) - end - - defp find_fingerprint_stopwords(files, opts) do - fp_extractor = fn content -> - CodeQA.Metrics.TokenNormalizer.normalize(content) |> CodeQA.Metrics.Winnowing.kgrams(5) - end - - CodeQA.Stopwords.find_stopwords( - files, - fp_extractor, - Keyword.put(opts, :progress_label, "Fingerprints") - ) - end - - defp print_word_stopwords(word_stopwords) do - IO.puts(:stderr, "\n--- Word Stopwords (#{MapSet.size(word_stopwords)}) ---") - - word_stopwords - |> MapSet.to_list() - |> Enum.sort() - |> Enum.chunk_every(10) - |> Enum.each(fn chunk -> IO.puts(Enum.join(chunk, ", ")) end) - end -end diff --git a/lib/codeqa/collector.ex b/lib/codeqa/collector.ex deleted file mode 100644 index 02e6f34..0000000 --- a/lib/codeqa/collector.ex +++ /dev/null @@ -1,99 +0,0 @@ -defmodule CodeQA.Collector do - @moduledoc false - - @source_extensions MapSet.new(~w[ - .py .js .ts .jsx .tsx .java .rs .go .c .cpp .h .hpp .rb .ex .exs - .swift .kt .scala .sh .css .scss .html .vue .svelte .zig .lua .pl - .pm .r .jl .cs .fs .ml .hs .erl .clj .dart - ]) - - @skip_dirs MapSet.new(~w[ - .git .hg .svn node_modules __pycache__ _build dist build vendor - .tox .venv venv target .mypy_cache .pytest_cache deps .elixir_ls - .next coverage - ]) - - @spec collect_files(String.t(), keyword()) :: %{String.t() => String.t()} - def collect_files(root, opts \\ []) do - root_path = Path.expand(root) - ignore_patterns = Keyword.get(opts, :ignore_patterns, []) - - unless File.dir?(root_path) do - raise File.Error, reason: :enoent, path: root, action: "find directory" - end - - root_path - |> walk_directory() - |> Map.new(fn path -> - rel = Path.relative_to(path, root_path) - {rel, File.read!(path)} - end) - |> reject_ignored_map(ignore_patterns) - end - - def source_extensions, do: @source_extensions - - @doc false - def ignored?(path, patterns) do - Enum.any?(patterns, fn pattern -> - match_pattern?(path, pattern) - end) - end - - @doc false - def reject_ignored_map(files_map, []), do: files_map - - def reject_ignored_map(files_map, patterns) do - Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end) - end - - @doc false - def reject_ignored(list, [], _key_fn), do: list - - def reject_ignored(list, patterns, key_fn) do - Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end) - end - - defp match_pattern?(path, pattern) do - # Convert glob pattern to regex: - # - ** matches any number of directories - # - * matches anything except / - # - ? matches a single character except / - regex_str = - pattern - |> String.replace(".", "\\.") - |> String.replace("**", "\0GLOBSTAR\0") - |> String.replace("*", "[^/]*") - |> String.replace("?", "[^/]") - |> String.replace("\0GLOBSTAR\0", ".*") - - case Regex.compile("^#{regex_str}$") do - {:ok, regex} -> Regex.match?(regex, path) - _ -> false - end - end - - defp walk_directory(dir) do - dir - |> File.ls!() - |> Enum.flat_map(fn entry -> - full_path = Path.join(dir, entry) - - cond do - File.dir?(full_path) and not skip_dir?(entry) -> - walk_directory(full_path) - - File.regular?(full_path) and source_file?(entry) -> - [full_path] - - true -> - [] - end - end) - end - - defp skip_dir?(name), do: MapSet.member?(@skip_dirs, name) or String.starts_with?(name, ".") - - defp source_file?(name), - do: MapSet.member?(@source_extensions, Path.extname(name) |> String.downcase()) -end diff --git a/lib/codeqa/engine/analyzer.ex b/lib/codeqa/engine/analyzer.ex new file mode 100644 index 0000000..b7e5c4e --- /dev/null +++ b/lib/codeqa/engine/analyzer.ex @@ -0,0 +1,126 @@ +defmodule CodeQA.Engine.Analyzer do + @moduledoc "Orchestrates metric computation across files." + + alias CodeQA.Analysis.RunSupervisor + alias CodeQA.Engine.Registry + alias CodeQA.Metrics.File, as: Metrics + alias CodeQA.Metrics.Codebase, as: CodebaseMetrics + + @registry Registry.new() + |> Registry.register_file_metric(Metrics.Entropy) + |> Registry.register_file_metric(Metrics.Compression) + |> Registry.register_file_metric(Metrics.Zipf) + |> Registry.register_file_metric(Metrics.Heaps) + |> Registry.register_file_metric(Metrics.Vocabulary) + |> Registry.register_file_metric(Metrics.Ngram) + |> Registry.register_file_metric(Metrics.Halstead) + |> Registry.register_file_metric(Metrics.Readability) + |> Registry.register_file_metric(Metrics.CasingEntropy) + |> Registry.register_file_metric(Metrics.IdentifierLengthVariance) + |> Registry.register_file_metric(Metrics.Indentation) + |> Registry.register_file_metric(Metrics.Branching) + |> Registry.register_file_metric(Metrics.FunctionMetrics) + |> Registry.register_file_metric(Metrics.MagicNumberDensity) + |> Registry.register_file_metric(Metrics.SymbolDensity) + |> Registry.register_file_metric(Metrics.VowelDensity) + |> Registry.register_file_metric(Metrics.Brevity) + |> Registry.register_file_metric(Metrics.PunctuationDensity) + |> Registry.register_file_metric(Metrics.CommentStructure) + |> Registry.register_file_metric(Metrics.LinePatterns) + |> Registry.register_codebase_metric(CodebaseMetrics.Similarity) + |> Registry.register_file_metric(Metrics.NearDuplicateBlocksFile) + |> Registry.register_codebase_metric(CodebaseMetrics.NearDuplicateBlocksCodebase) + + def build_registry, do: @registry + + @spec analyze_file(String.t(), String.t()) :: map() + def analyze_file(_path, content) do + ctx = CodeQA.Engine.Pipeline.build_file_context(content) + CodeQA.Engine.Registry.run_file_metrics(@registry, ctx, []) + end + + @spec analyze_codebase_aggregate(map(), keyword()) :: map() + def analyze_codebase_aggregate(files_map, opts \\ []) do + with_run_context(opts, fn opts -> + file_results = CodeQA.Engine.Parallel.analyze_files(files_map, opts) + aggregate_file_metrics(file_results) + end) + end + + def analyze_codebase(files, opts \\ []) do + with_run_context(opts, &do_analyze_codebase(files, &1)) + end + + defp with_run_context(opts, fun) do + {:ok, sup} = RunSupervisor.start_link() + run_ctx = RunSupervisor.run_context(sup) + opts = Keyword.put(opts, :file_context_pid, run_ctx.file_context_pid) + + try do + fun.(opts) + after + Supervisor.stop(sup) + end + end + + defp do_analyze_codebase(files, opts) do + registry = @registry + + file_results = CodeQA.Engine.Parallel.analyze_files(files, opts) + codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) + aggregate = aggregate_file_metrics(file_results) + + %{ + "files" => file_results, + "codebase" => Map.put(codebase_metrics, "aggregate", aggregate) + } + end + + defp metric_data_to_triples({metric_name, metric_data}) do + metric_data + |> Enum.filter(fn {_k, v} -> is_number(v) end) + |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end) + end + + def aggregate_file_metrics(file_results) do + file_results + |> Map.values() + |> Enum.flat_map(fn file_data -> + file_data + |> Map.get("metrics", %{}) + |> Enum.flat_map(&metric_data_to_triples/1) + end) + |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end) + |> Enum.reduce(%{}, fn {{metric, key}, values}, acc -> + stats = compute_stats(values) + metric_agg = Map.get(acc, metric, %{}) + + updated = + Map.merge(metric_agg, %{ + "mean_#{key}" => stats.mean, + "std_#{key}" => stats.std, + "min_#{key}" => stats.min, + "max_#{key}" => stats.max + }) + + Map.put(acc, metric, updated) + end) + end + + defp compute_stats([]), do: %{mean: 0.0, std: 0.0, min: 0.0, max: 0.0} + + defp compute_stats(values) do + n = length(values) + mean = Enum.sum(values) / n + sum_squares = Enum.reduce(values, 0.0, fn v, acc -> acc + (v - mean) ** 2 end) + variance = sum_squares / n + std = :math.sqrt(variance) + + %{ + mean: Float.round(mean * 1.0, 4), + std: Float.round(std * 1.0, 4), + min: Float.round(Enum.min(values) * 1.0, 4), + max: Float.round(Enum.max(values) * 1.0, 4) + } + end +end diff --git a/lib/codeqa/engine/collector.ex b/lib/codeqa/engine/collector.ex new file mode 100644 index 0000000..ea6b16f --- /dev/null +++ b/lib/codeqa/engine/collector.ex @@ -0,0 +1,111 @@ +defmodule CodeQA.Engine.Collector do + @moduledoc false + + @skip_dirs MapSet.new(~w[ + .git .hg .svn node_modules __pycache__ _build dist build vendor + .tox .venv venv target .mypy_cache .pytest_cache deps .elixir_ls + .next coverage + ]) + + @spec source_extensions() :: MapSet.t() + def source_extensions do + CodeQA.Language.all() + |> Enum.flat_map(& &1.extensions()) + |> Enum.map(&".#{&1}") + |> MapSet.new() + end + + @spec collect_files(String.t(), [String.t()]) :: %{String.t() => String.t()} + def collect_files(root, extra_ignore_patterns \\ []) do + root_path = Path.expand(root) + CodeQA.Config.load(root_path) + patterns = all_ignore_patterns(extra_ignore_patterns) + extensions = source_extensions() + + unless File.dir?(root_path) do + raise File.Error, reason: :enoent, path: root, action: "find directory" + end + + files_map = + root_path + |> walk_directory(extensions) + |> Map.new(fn path -> + rel = Path.relative_to(path, root_path) + {rel, File.read!(path)} + end) + |> do_reject_ignored_map(patterns) + + gitignored = CodeQA.Git.gitignored_files(root_path, Map.keys(files_map)) + Map.reject(files_map, fn {path, _} -> MapSet.member?(gitignored, path) end) + end + + @doc false + def ignored?(path, patterns) do + Enum.any?(patterns, fn pattern -> + match_pattern?(path, pattern) + end) + end + + @doc false + def reject_ignored_map(files_map, extra_patterns \\ []) do + do_reject_ignored_map(files_map, all_ignore_patterns(extra_patterns)) + end + + @doc false + def reject_ignored(list, key_fn, extra_patterns \\ []) do + patterns = all_ignore_patterns(extra_patterns) + Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end) + end + + defp all_ignore_patterns(extra), do: extra ++ CodeQA.Config.ignore_paths() + + defp do_reject_ignored_map(files_map, []), do: files_map + + defp do_reject_ignored_map(files_map, patterns) do + Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end) + end + + defp match_pattern?(path, pattern) do + # Convert glob pattern to regex: + # - ** matches any number of directories + # - * matches anything except / + # - ? matches a single character except / + regex_str = + pattern + |> String.replace(".", "\\.") + |> String.replace("**", "\0GLOBSTAR\0") + |> String.replace("*", "[^/]*") + |> String.replace("?", "[^/]") + |> String.replace("\0GLOBSTAR\0", ".*") + + case Regex.compile("^#{regex_str}$") do + {:ok, regex} -> Regex.match?(regex, path) + _ -> false + end + end + + defp walk_directory(dir, extensions) do + dir + |> File.ls!() + |> Enum.flat_map(fn entry -> + full_path = Path.join(dir, entry) + + cond do + File.dir?(full_path) and not skip_dir?(entry) -> + walk_directory(full_path, extensions) + + File.regular?(full_path) and source_file?(entry, extensions) and + not String.starts_with?(entry, ".") -> + [full_path] + + true -> + [] + end + end) + end + + defp skip_dir?(name), do: MapSet.member?(@skip_dirs, name) or String.starts_with?(name, ".") + + defp source_file?(name, extensions), + do: MapSet.member?(extensions, Path.extname(name) |> String.downcase()) +end diff --git a/lib/codeqa/engine/file_context.ex b/lib/codeqa/engine/file_context.ex new file mode 100644 index 0000000..6e1da6b --- /dev/null +++ b/lib/codeqa/engine/file_context.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.Engine.FileContext do + @moduledoc "Immutable pre-computed data shared across all file metrics." + @enforce_keys [ + :content, + :tokens, + :token_counts, + :words, + :identifiers, + :lines, + :encoded, + :byte_count, + :line_count + ] + defstruct @enforce_keys ++ [:path, :blocks] + + @type t :: %__MODULE__{ + content: String.t(), + tokens: [CodeQA.Engine.Pipeline.Token.t()], + token_counts: map(), + words: list(), + identifiers: list(), + lines: list(), + encoded: String.t(), + byte_count: non_neg_integer(), + line_count: non_neg_integer(), + path: String.t() | nil, + blocks: [CodeQA.AST.Enrichment.Node.t()] | nil + } +end diff --git a/lib/codeqa/parallel.ex b/lib/codeqa/engine/parallel.ex similarity index 68% rename from lib/codeqa/parallel.ex rename to lib/codeqa/engine/parallel.ex index 0e2cc46..5ddf757 100644 --- a/lib/codeqa/parallel.ex +++ b/lib/codeqa/engine/parallel.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Parallel do +defmodule CodeQA.Engine.Parallel do @moduledoc "Parallel file analysis using Flow (GenStage-based)." def analyze_files(files, opts \\ []) when is_map(files) do @@ -22,7 +22,7 @@ defmodule CodeQA.Parallel do |> Flow.map(fn {path, content} -> start_time = System.monotonic_time(:millisecond) - result = maybe_cached_analyze(content, cache_dir, opts) + result = maybe_cached_analyze(path, content, cache_dir, opts) end_time = System.monotonic_time(:millisecond) time_taken = end_time - start_time @@ -38,9 +38,10 @@ defmodule CodeQA.Parallel do |> Enum.into(%{}) end - defp maybe_cached_analyze(content, nil, opts), do: analyze_single_file(content, opts) + defp maybe_cached_analyze(path, content, nil, opts), + do: analyze_single_file(path, content, opts) - defp maybe_cached_analyze(content, cache_dir, opts) do + defp maybe_cached_analyze(path, content, cache_dir, opts) do hash = :crypto.hash(:sha256, content) |> Base.encode16(case: :lower) cache_file = Path.join(cache_dir, hash <> ".json") @@ -51,30 +52,25 @@ defmodule CodeQA.Parallel do data _ -> - data = analyze_single_file(content, opts) + data = analyze_single_file(path, content, opts) File.write!(cache_file, Jason.encode!(data)) data end _ -> - data = analyze_single_file(content, opts) + data = analyze_single_file(path, content, opts) File.write!(cache_file, Jason.encode!(data)) data end end - defp analyze_single_file(content, opts) do - registry = CodeQA.Analyzer.build_registry() + defp analyze_single_file(path, content, opts) do + registry = CodeQA.Engine.Analyzer.build_registry() + file_opts = Keyword.put(opts, :path, path) + pid = Keyword.fetch!(opts, :file_context_pid) - ctx = - CodeQA.Telemetry.time(:pipeline_build_context, fn -> - CodeQA.Pipeline.build_file_context(content, opts) - end) - - metrics = - CodeQA.Telemetry.time(:registry_run_metrics, fn -> - CodeQA.Registry.run_file_metrics(registry, ctx, opts) - end) + ctx = CodeQA.Analysis.FileContextServer.get(pid, content, file_opts) + metrics = CodeQA.Engine.Registry.run_file_metrics(registry, ctx, opts) %{ "bytes" => ctx.byte_count, diff --git a/lib/codeqa/engine/pipeline.ex b/lib/codeqa/engine/pipeline.ex new file mode 100644 index 0000000..33a9229 --- /dev/null +++ b/lib/codeqa/engine/pipeline.ex @@ -0,0 +1,93 @@ +defmodule CodeQA.Engine.Pipeline do + @moduledoc "Pre-computed shared context for file-level metrics." + + defmodule Token do + @moduledoc "A lexical token with its string content, kind tag, and 1-based source line." + defstruct [:content, :kind, :line] + + @type t :: %__MODULE__{ + content: String.t(), + kind: String.t(), + line: pos_integer() + } + end + + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser + alias CodeQA.Engine.FileContext + alias CodeQA.Language + + @word_re ~r/\b[a-zA-Z_]\w*\b/u + + @spec build_file_context(String.t(), keyword()) :: FileContext.t() + def build_file_context(content, opts \\ []) when is_binary(content) do + tokens = tokenize(content) + token_counts = tokens |> Enum.map(& &1.content) |> Enum.frequencies() + + keywords = MapSet.new(Language.all_keywords()) + + words = + Regex.scan(@word_re, content) + |> List.flatten() + + identifiers = Enum.reject(words, &MapSet.member?(keywords, &1)) + lines = content |> String.split("\n") |> trim_trailing_empty() + encoded = content + + {path, blocks} = + case Keyword.get(opts, :path) do + nil -> + {nil, nil} + + p -> + lang_mod = Language.detect(p) + structural_tokens = TokenNormalizer.normalize_structural(content) + {p, Parser.detect_blocks(structural_tokens, lang_mod)} + end + + %FileContext{ + content: content, + tokens: tokens, + token_counts: token_counts, + words: words, + identifiers: identifiers, + lines: lines, + encoded: encoded, + byte_count: byte_size(content), + line_count: length(lines), + path: path, + blocks: blocks + } + end + + # Matches identifiers, integer/float literals, and single non-whitespace chars. + @token_re ~r/[a-zA-Z_]\w*|[0-9]+(?:\.[0-9]+)?|[^\s]/u + + defp tokenize(content) do + content + |> String.split("\n") + |> Enum.with_index(1) + |> Enum.flat_map(fn {line, line_num} -> + @token_re + |> Regex.scan(line) + |> List.flatten() + |> Enum.map(&%Token{content: &1, kind: classify(&1), line: line_num}) + end) + end + + defp classify(tok) do + cond do + Regex.match?(~r/^[a-zA-Z_]\w*$/, tok) -> "" + Regex.match?(~r/^[0-9]/, tok) -> "" + true -> "" + end + end + + defp trim_trailing_empty(lines) do + # Match Python's str.splitlines() behavior + case List.last(lines) do + "" -> List.delete_at(lines, -1) + _ -> lines + end + end +end diff --git a/lib/codeqa/registry.ex b/lib/codeqa/engine/registry.ex similarity index 71% rename from lib/codeqa/registry.ex rename to lib/codeqa/engine/registry.ex index 76dfe23..5f9be25 100644 --- a/lib/codeqa/registry.ex +++ b/lib/codeqa/engine/registry.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Registry do +defmodule CodeQA.Engine.Registry do @moduledoc "Metric registration and execution." defstruct file_metrics: [], codebase_metrics: [] @@ -14,17 +14,11 @@ defmodule CodeQA.Registry do end def run_file_metrics(%__MODULE__{} = reg, ctx, opts \\ []) do - base_metrics = - Map.new(reg.file_metrics, fn mod -> - {mod.name(), - CodeQA.Telemetry.time(String.to_atom("metric_" <> mod.name()), fn -> mod.analyze(ctx) end)} - end) + base_metrics = Map.new(reg.file_metrics, fn mod -> {mod.name(), mod.analyze(ctx)} end) if Keyword.get(opts, :combinations, false) do - CodeQA.Telemetry.time(:registry_combinations, fn -> - combinations = generate_combinations(flat_numeric_metrics(base_metrics), []) - Map.merge(base_metrics, Map.new(combinations)) - end) + combinations = generate_combinations(flat_numeric_metrics(base_metrics), []) + Map.merge(base_metrics, Map.new(combinations)) else base_metrics end @@ -60,6 +54,16 @@ defmodule CodeQA.Registry do end def run_codebase_metrics(%__MODULE__{} = reg, files, opts \\ []) do - Map.new(reg.codebase_metrics, fn mod -> {mod.name(), mod.analyze(files, opts)} end) + has_progress = Keyword.has_key?(opts, :on_progress) + total = length(reg.codebase_metrics) + + reg.codebase_metrics + |> Enum.with_index(1) + |> Map.new(fn {mod, idx} -> + if has_progress, + do: IO.puts(:stderr, "\nCODEBASE #{idx}/#{total}: #{mod.name()}...") + + {mod.name(), mod.analyze(files, opts)} + end) end end diff --git a/lib/codeqa/formatter.ex b/lib/codeqa/formatter.ex index 55ba6ef..14c1992 100644 --- a/lib/codeqa/formatter.ex +++ b/lib/codeqa/formatter.ex @@ -54,8 +54,8 @@ defmodule CodeQA.Formatter do end defp mermaid_chart(head_grades) do - names = Enum.map(head_grades, fn g -> ~s("#{g.name}") end) |> Enum.join(", ") - scores = Enum.map(head_grades, fn g -> to_string(g.score) end) |> Enum.join(", ") + names = Enum.map_join(head_grades, ", ", fn g -> ~s("#{g.name}") end) + scores = Enum.map_join(head_grades, ", ", fn g -> to_string(g.score) end) [ "```mermaid", @@ -91,7 +91,8 @@ defmodule CodeQA.Formatter do end defp file_details(files, codebase, _output_mode) do - codebase_summary = CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase}) + codebase_summary = + CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase}) file_summaries = Map.new(files, fn {path, data} -> @@ -291,6 +292,7 @@ defmodule CodeQA.Formatter do |> Enum.map(fn key -> direction = Map.get(direction_map, "#{metric_name}.#{key}") delta_cell = format_delta_with_direction(delta_m[key], direction) + "| #{metric_name}.#{key} | #{format_value(base_m[key])} | #{format_value(head_m[key])} | #{delta_cell} |" end) end) diff --git a/lib/codeqa/git.ex b/lib/codeqa/git.ex index 78c0bdb..0576ddc 100644 --- a/lib/codeqa/git.ex +++ b/lib/codeqa/git.ex @@ -10,6 +10,22 @@ defmodule CodeQA.Git do @status_map %{"A" => "added", "M" => "modified", "D" => "deleted"} + @spec gitignored_files(String.t(), [String.t()]) :: MapSet.t() + def gitignored_files(_repo_path, []), do: MapSet.new() + + def gitignored_files(repo_path, paths) do + {output, _exit_code} = + System.cmd("git", ["check-ignore", "--no-index" | paths], + cd: repo_path, + stderr_to_stdout: false + ) + + output + |> String.trim() + |> String.split("\n", trim: true) + |> MapSet.new() + end + def changed_files(repo_path, base_ref, head_ref) do {output, 0} = System.cmd( @@ -66,6 +82,6 @@ defmodule CodeQA.Git do defp source_file?(path) do ext = path |> Path.extname() |> String.downcase() - MapSet.member?(CodeQA.Collector.source_extensions(), ext) + MapSet.member?(CodeQA.Engine.Collector.source_extensions(), ext) end end diff --git a/lib/codeqa/metrics/similarity.ex b/lib/codeqa/metrics/codebase/similarity.ex similarity index 66% rename from lib/codeqa/metrics/similarity.ex rename to lib/codeqa/metrics/codebase/similarity.ex index 910e631..cc8b644 100644 --- a/lib/codeqa/metrics/similarity.ex +++ b/lib/codeqa/metrics/codebase/similarity.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Similarity do +defmodule CodeQA.Metrics.Codebase.Similarity do @moduledoc """ Detects cross-file code duplication at the codebase level. @@ -9,23 +9,15 @@ defmodule CodeQA.Metrics.Similarity do See [winnowing](https://theory.stanford.edu/~aiken/publications/papers/sigmod03.pdf), [locality-sensitive hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing), and [normalized compression distance](https://en.wikipedia.org/wiki/Normalized_compression_distance). - - ## Options - - - `:show_ncd` — boolean, whether to compute per-pair NCD scores (default: `false`) - - `:ncd_paths` — list of file paths to compute similarity for (default: all files) - - `:ncd_top` — integer, max similar files to return per path (default: all) - - `:ncd_threshold` — minimum Jaccard similarity to consider as candidate (default: `0.20`) - - `:workers` — number of parallel workers (default: `System.schedulers_online/0`) - - `:on_progress` — include this key (any value) to enable progress output to stderr - - `:fp_stopwords` — `MapSet` of fingerprint hashes to exclude (default: empty) """ - @behaviour CodeQA.Metrics.CodebaseMetric + @behaviour CodeQA.Metrics.Codebase.CodebaseMetric @impl true def name, do: "similarity" + def keys, do: ["ncd_pairs", "cross_file_density"] + @spec analyze(map(), keyword()) :: map() @impl true def analyze(files, opts \\ []) @@ -37,6 +29,9 @@ defmodule CodeQA.Metrics.Similarity do def analyze(files, opts) do names = Map.keys(files) contents = Map.values(files) + has_progress = Keyword.has_key?(opts, :on_progress) + + if has_progress, do: IO.puts(:stderr, " Computing cross-file density...") result = %{ "cross_file_density" => cross_file_density(contents) @@ -80,38 +75,45 @@ defmodule CodeQA.Metrics.Similarity do if has_progress, do: IO.puts(:stderr, " 2/5 Computing Winnowing fingerprints...") result = - CodeQA.Telemetry.time(:ncd_fingerprinting, fn -> - contents - |> Enum.with_index() - |> Task.async_stream( - fn {content, i} -> - fp = compute_fingerprints(content, opts) - {i, fp} - end, max_concurrency: workers, timeout: :infinity) - |> Enum.map(fn {:ok, {i, fp}} -> - print_bar_progress(has_progress, i, length(contents), "Fingerprinting") + contents + |> Enum.with_index() + |> Task.async_stream( + fn {content, i} -> + fp = compute_fingerprints(content, opts) {i, fp} - end) - |> Map.new() + end, + max_concurrency: workers, + timeout: :infinity + ) + |> Enum.map(fn {:ok, {i, fp}} -> + maybe_print_fingerprint_progress(has_progress, i, length(contents)) + {i, fp} end) + |> Map.new() if has_progress, do: IO.puts(:stderr, "") result end + defp maybe_print_fingerprint_progress(false, _i, _total), do: :ok + + defp maybe_print_fingerprint_progress(true, i, total) do + if rem(i + 1, max(1, div(total, 20))) == 0 do + IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(i + 1, total, label: "Fingerprinting")) + end + end + defp build_inverted_index(fingerprints_by_id, has_progress) do if has_progress, do: IO.puts(:stderr, " 3/5 Building inverted index...") total = map_size(fingerprints_by_id) result = - CodeQA.Telemetry.time(:ncd_build_index, fn -> - fingerprints_by_id - |> Enum.with_index() - |> Enum.reduce(%{}, fn {{i, set}, idx}, acc -> - print_bar_progress(has_progress, idx, total, "Indexing") - index_fingerprint_set(set, i, acc) - end) + fingerprints_by_id + |> Enum.with_index() + |> Enum.reduce(%{}, fn {{i, set}, idx}, acc -> + maybe_print_index_progress(has_progress, idx, total) + index_fingerprint_set(set, i, acc) end) if has_progress, do: IO.puts(:stderr, "") @@ -124,6 +126,14 @@ defmodule CodeQA.Metrics.Similarity do end) end + defp maybe_print_index_progress(false, _idx, _total), do: :ok + + defp maybe_print_index_progress(true, idx, total) do + if rem(idx + 1, max(1, div(total, 20))) == 0 do + IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(idx + 1, total, label: "Indexing")) + end + end + defp find_candidate_pairs( fingerprints_by_id, inverted_index, @@ -136,37 +146,37 @@ defmodule CodeQA.Metrics.Similarity do if has_progress, do: IO.puts(:stderr, " 4/5 Identifying candidate pairs...") total = map_size(fingerprints_by_id) - names_tuple = List.to_tuple(names) candidates = - CodeQA.Telemetry.time(:ncd_lsh_filter, fn -> - fingerprints_by_id - |> Enum.with_index() - |> Task.async_stream( - fn {{i, set}, idx} -> - valid_pairs = - collect_valid_pairs( - i, - set, - inverted_index, - fingerprints_by_id, - names_tuple, - target_set, - threshold - ) - - {idx, valid_pairs} - end, max_concurrency: workers, timeout: :infinity) - |> Enum.reduce(%{}, fn {:ok, {idx, valid_pairs}}, acc -> - print_bar_progress(has_progress, idx, total, "LSH Filter") - merge_valid_pairs(valid_pairs, acc) - end) + fingerprints_by_id + |> Enum.with_index() + |> Task.async_stream( + fn {{i, set}, idx} -> + valid_pairs = + collect_valid_pairs( + i, + set, + inverted_index, + fingerprints_by_id, + names, + target_set, + threshold + ) + + {idx, valid_pairs} + end, + max_concurrency: workers, + timeout: :infinity + ) + |> Enum.reduce(%{}, fn {:ok, {idx, valid_pairs}}, acc -> + maybe_print_lsh_progress(has_progress, idx, total) + merge_valid_pairs(valid_pairs, acc) end) if has_progress, do: IO.puts(:stderr, "") Enum.map(candidates, fn {{i, j}, jaccard} -> - {elem(names_tuple, i), i, elem(names_tuple, j), j, jaccard} + {Enum.at(names, i), i, Enum.at(names, j), j, jaccard} end) end @@ -175,19 +185,19 @@ defmodule CodeQA.Metrics.Similarity do set, inverted_index, fingerprints_by_id, - names_tuple, + names, target_set, threshold ) do collisions = count_collisions(set, inverted_index, i) size_a = MapSet.size(set) - name_a = elem(names_tuple, i) + name_a = Enum.at(names, i) is_target_a = MapSet.member?(target_set, name_a) collisions - |> Enum.filter(fn {j, _} -> is_target_a or MapSet.member?(target_set, elem(names_tuple, j)) end) + |> Enum.filter(fn {j, _} -> is_target_a or MapSet.member?(target_set, Enum.at(names, j)) end) |> Enum.reduce([], fn {j, intersection}, acc_pairs -> jaccard = compute_jaccard(size_a, MapSet.size(Map.get(fingerprints_by_id, j)), intersection) if jaccard >= threshold, do: [{{i, j}, jaccard} | acc_pairs], else: acc_pairs @@ -217,11 +227,11 @@ defmodule CodeQA.Metrics.Similarity do end) end - defp print_bar_progress(false, _current, _total, _label), do: :ok + defp maybe_print_lsh_progress(false, _idx, _total), do: :ok - defp print_bar_progress(true, current, total, label) do - if rem(current + 1, max(1, div(total, 20))) == 0 do - IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(current + 1, total, label: label)) + defp maybe_print_lsh_progress(true, idx, total) do + if rem(idx + 1, max(1, div(total, 20))) == 0 do + IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(idx + 1, total, label: "LSH Filter")) end end @@ -240,26 +250,25 @@ defmodule CodeQA.Metrics.Similarity do counter = :counters.new(1, [:atomics]) start_time_ncd = System.monotonic_time(:millisecond) - CodeQA.Telemetry.time(:ncd_exact_compression_phase, fn -> - filtered_pairs - |> Task.async_stream( - fn {name_a, i, name_b, j, _jaccard} -> - ncd = compute_single_ncd(precomputed, i, j) - maybe_print_ncd_progress(has_progress, counter, total_pairs, start_time_ncd) - {name_a, name_b, ncd} - end, max_concurrency: workers, timeout: :infinity) - |> Enum.map(fn {:ok, res} -> res end) - end) + filtered_pairs + |> Task.async_stream( + fn {name_a, i, name_b, j, _jaccard} -> + ncd = compute_single_ncd(precomputed, i, j) + maybe_print_ncd_progress(has_progress, counter, total_pairs, start_time_ncd) + {name_a, name_b, ncd} + end, + max_concurrency: workers, + timeout: :infinity + ) + |> Enum.map(fn {:ok, res} -> res end) end defp compute_single_ncd(precomputed, i, j) do - CodeQA.Telemetry.time(:ncd_single_compression, fn -> - {a, ca} = elem(precomputed, i) - {b, cb} = elem(precomputed, j) - cab = byte_size(:zlib.compress([a, b])) - ncd = if max(ca, cb) > 0, do: (cab - min(ca, cb)) / max(ca, cb), else: 0.0 - Float.round(ncd, 4) - end) + {a, ca} = elem(precomputed, i) + {b, cb} = elem(precomputed, j) + cab = byte_size(:zlib.compress([a, b])) + ncd = if max(ca, cb) > 0, do: (cab - min(ca, cb)) / max(ca, cb), else: 0.0 + Float.round(ncd, 4) end defp maybe_print_ncd_progress(false, _counter, _total_pairs, _start_time), do: :ok @@ -316,13 +325,11 @@ defmodule CodeQA.Metrics.Similarity do end end - defp compute_fingerprints(content, opts) do - fp_stopwords = Keyword.get(opts, :fp_stopwords, MapSet.new()) - + defp compute_fingerprints(content, _opts) do content - |> CodeQA.Metrics.TokenNormalizer.normalize() - |> CodeQA.Metrics.Winnowing.kgrams(5) - |> Enum.reject(&MapSet.member?(fp_stopwords, &1)) + |> CodeQA.AST.Lexing.TokenNormalizer.normalize_structural() + |> Enum.map(& &1.kind) + |> CodeQA.Metrics.File.Winnowing.kgrams(5) |> MapSet.new() end diff --git a/lib/codeqa/metrics/codebase_metric.ex b/lib/codeqa/metrics/codebase_metric.ex deleted file mode 100644 index 0b1284d..0000000 --- a/lib/codeqa/metrics/codebase_metric.ex +++ /dev/null @@ -1,42 +0,0 @@ -defmodule CodeQA.Metrics.CodebaseMetric do - @moduledoc """ - Behaviour for metrics that operate across an entire codebase. - - Unlike `FileMetric`, which analyzes a single file, codebase metrics receive - a map of all source files and can compute cross-file properties such as - duplication or structural similarity. - - ## Common opts keys - - Implementations may accept keyword options including: - - `:workers` — number of parallel workers (default: `System.schedulers_online/0`) - - `:on_progress` — progress callback key (presence enables progress output) - - ## Minimal implementation - - defmodule MyCodebaseMetric do - @behaviour CodeQA.Metrics.CodebaseMetric - - @impl true - def name, do: "my_metric" - - @impl true - def analyze(files, _opts) do - %{"file_count" => map_size(files)} - end - end - - See [software metrics](https://en.wikipedia.org/wiki/Software_metric). - """ - - @typedoc "Map of file path to file content string." - @type file_map :: %{required(String.t()) => String.t()} - - @callback name() :: String.t() - @callback analyze(file_map(), keyword()) :: map() - - @doc "Human-readable description of what this metric measures." - @callback description() :: String.t() - - @optional_callbacks [description: 0] -end diff --git a/lib/codeqa/metrics/file/bradford.ex b/lib/codeqa/metrics/file/bradford.ex new file mode 100644 index 0000000..22b7bce --- /dev/null +++ b/lib/codeqa/metrics/file/bradford.ex @@ -0,0 +1,99 @@ +defmodule CodeQA.Metrics.File.Bradford do + @moduledoc """ + Applies Bradford's concentration law to token density across lines. + + Lines are ranked by token count (densest first), then grouped into three + zones of equal total tokens. The ratio between zone sizes gives Bradford's + k values: how many more lines each successive zone needs to match the + token yield of the previous one. + + k ≈ 1 uniform density — tokens spread evenly across lines + k = 3–5 Bradford-like — a small dense core, long sparse tail + k >> 5 extreme concentration — a few lines carry almost all tokens + + k1 = zone2_lines / zone1_lines (core → middle transition) + k2 = zone3_lines / zone2_lines (middle → tail transition) + k_ratio = k2 / k1 (> 1 means tail is more stretched than core) + + In a perfect Bradford distribution k1 ≈ k2. In practice k2 > k1 is common + (moderate core, very stretched tail); k1 > k2 suggests extreme concentration + that levels off quickly. + + See [Bradford's law](https://en.wikipedia.org/wiki/Bradford%27s_law). + """ + + @behaviour CodeQA.Metrics.File.FileMetric + + @impl true + def name, do: "bradford" + + @impl true + def keys, do: ["k1", "k2", "k_ratio"] + + @spec analyze(map()) :: map() + @impl true + def analyze(%{tokens: []}) do + %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0} + end + + def analyze(%{tokens: tokens}) do + # Count tokens per line using the .line field, then rank densest-first — + # this is Bradford's "sort sources by yield" step. + counts = + tokens + |> Enum.group_by(& &1.line) + |> Enum.map(fn {_line, toks} -> length(toks) end) + |> Enum.sort(:desc) + + total = Enum.sum(counts) + + # Need at least 3 lines and 3 tokens to form meaningful zones. + if total < 3 or length(counts) < 3 do + %{"k1" => 0.0, "k2" => 0.0, "k_ratio" => 0.0} + else + # Each zone should contain one third of all tokens. + # We find zone boundaries by walking the ranked list until each third is filled. + third = total / 3 + + # n1: lines in zone 1 (the dense core — fewest lines, highest token density) + # n2: lines in zone 2 (middle tier) + # n3: all remaining lines (the sparse tail) + {n1, rest} = count_until(counts, third) + {n2, _} = count_until(rest, third) + n3 = length(counts) - n1 - n2 + + # k1 > 1 always: the middle zone always needs more lines than the core. + # Higher k1 = more extreme concentration in the core (fewer lines do more work). + k1 = if n1 > 0, do: Float.round(n2 / n1, 4), else: 0.0 + + # k2 > 1 always: the tail always needs more lines than the middle. + # Higher k2 = longer sparse tail relative to the middle zone. + k2 = if n2 > 0, do: Float.round(n3 / n2, 4), else: 0.0 + + # k_ratio = k2 / k1 + # > 1: the tail is more stretched than the core is concentrated (common — many trivial lines) + # < 1: the core is more extreme than the tail is sparse (god-function pattern) + # ≈ 1: a clean Bradford distribution where each zone multiplies evenly + k_ratio = if k1 > 0, do: Float.round(k2 / k1, 4), else: 0.0 + + %{"k1" => k1, "k2" => k2, "k_ratio" => k_ratio} + end + end + + # Walks the density-ranked list, consuming lines until the accumulated token + # count reaches the zone target. Returns {lines_consumed, remaining_list}. + # The remaining list is passed directly to the next zone's count_until call, + # so zones are computed in a single linear pass over the sorted counts. + defp count_until(counts, target), do: do_count(counts, target, 0, 0) + + defp do_count([], _target, n, _acc), do: {n, []} + + defp do_count([h | rest], target, n, acc) do + new_acc = acc + h + # Once we've accumulated enough tokens to fill the zone, stop and return + # the remainder so the next zone can continue from where we left off. + if new_acc >= target, + do: {n + 1, rest}, + else: do_count(rest, target, n + 1, new_acc) + end +end diff --git a/lib/codeqa/metrics/branching.ex b/lib/codeqa/metrics/file/branching.ex similarity index 70% rename from lib/codeqa/metrics/branching.ex rename to lib/codeqa/metrics/file/branching.ex index 2cfdbe1..ce5e20a 100644 --- a/lib/codeqa/metrics/branching.ex +++ b/lib/codeqa/metrics/file/branching.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Branching do +defmodule CodeQA.Metrics.File.Branching do @moduledoc """ Measures branching density as a proxy for cyclomatic complexity. @@ -12,7 +12,7 @@ defmodule CodeQA.Metrics.Branching do See [cyclomatic complexity](https://en.wikipedia.org/wiki/Cyclomatic_complexity). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric # Python: if elif else for while try except finally with match case # Ruby: if elsif else unless for while until case when begin rescue ensure @@ -43,18 +43,14 @@ defmodule CodeQA.Metrics.Branching do @impl true def name, do: "branching" - @spec analyze(map()) :: map() @impl true - def analyze(%{lines: lines, tokens: tokens}) do - non_blank_count = - lines - |> Tuple.to_list() - |> Enum.count(&(String.trim(&1) != "")) + def keys, do: ["branching_density", "branch_count", "non_blank_count", "max_nesting_depth"] - branch_count = - tokens - |> Tuple.to_list() - |> Enum.count(&MapSet.member?(@branching_keywords, &1)) + @spec analyze(CodeQA.Engine.FileContext.t()) :: map() + @impl true + def analyze(%{lines: lines, tokens: tokens, content: content}) do + non_blank_count = Enum.count(lines, &(String.trim(&1) != "")) + branch_count = Enum.count(tokens, &MapSet.member?(@branching_keywords, &1.content)) density = if non_blank_count > 0, @@ -64,7 +60,19 @@ defmodule CodeQA.Metrics.Branching do %{ "branching_density" => density, "branch_count" => branch_count, - "non_blank_count" => non_blank_count + "non_blank_count" => non_blank_count, + "max_nesting_depth" => max_nesting_depth(content) } end + + defp max_nesting_depth(content) do + content + |> String.graphemes() + |> Enum.reduce({0, 0}, fn + c, {depth, max} when c in ["(", "[", "{"] -> {depth + 1, max(depth + 1, max)} + c, {depth, max} when c in [")", "]", "}"] -> {max(depth - 1, 0), max} + _, acc -> acc + end) + |> elem(1) + end end diff --git a/lib/codeqa/metrics/file/brevity.ex b/lib/codeqa/metrics/file/brevity.ex new file mode 100644 index 0000000..bc0d9a6 --- /dev/null +++ b/lib/codeqa/metrics/file/brevity.ex @@ -0,0 +1,50 @@ +defmodule CodeQA.Metrics.File.Brevity do + @moduledoc """ + Measures how well Brevity law holds in the token distribution. + + Computes the Pearson correlation between token length and token frequency. + A negative value indicates shorter tokens appear more often (law holds). + A positive value indicates longer tokens appear more often (law violated). + Also fits a log-log regression to capture the power-law slope. + + See [Brevity law](https://en.wikipedia.org/wiki/Brevity_law). + """ + + @behaviour CodeQA.Metrics.File.FileMetric + + @impl true + def name, do: "brevity" + + @impl true + def keys, do: ["correlation", "slope", "sample_size"] + + @spec analyze(map()) :: map() + @impl true + def analyze(%{token_counts: token_counts}) when map_size(token_counts) < 3 do + %{"correlation" => 0.0, "slope" => 0.0, "sample_size" => map_size(token_counts)} + end + + def analyze(%{token_counts: token_counts}) do + pairs = Enum.map(token_counts, fn {token, freq} -> {String.length(token), freq} end) + lengths = Enum.map(pairs, &elem(&1, 0)) + freqs = Enum.map(pairs, &elem(&1, 1)) + + %{ + "correlation" => CodeQA.Math.pearson_correlation_list(lengths, freqs), + "slope" => log_log_slope(lengths, freqs), + "sample_size" => map_size(token_counts) + } + end + + defp log_log_slope(lengths, freqs) do + log_lengths = lengths |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64) + log_freqs = freqs |> Enum.map(&:math.log(max(&1, 1))) |> Nx.tensor(type: :f64) + + {slope, _intercept, _r_squared} = CodeQA.Math.linear_regression(log_lengths, log_freqs) + + case Nx.to_number(slope) do + val when is_float(val) -> Float.round(val, 4) + _ -> 0.0 + end + end +end diff --git a/lib/codeqa/metrics/casing_entropy.ex b/lib/codeqa/metrics/file/casing_entropy.ex similarity index 61% rename from lib/codeqa/metrics/casing_entropy.ex rename to lib/codeqa/metrics/file/casing_entropy.ex index cb38011..6f1cb2d 100644 --- a/lib/codeqa/metrics/casing_entropy.ex +++ b/lib/codeqa/metrics/file/casing_entropy.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.CasingEntropy do +defmodule CodeQA.Metrics.File.CasingEntropy do @moduledoc """ Measures Shannon entropy of identifier casing styles in a file. @@ -12,31 +12,43 @@ defmodule CodeQA.Metrics.CasingEntropy do - `"pascal_case_count"`, `"camel_case_count"`, `"snake_case_count"`, `"macro_case_count"`, `"kebab_case_count"`, `"other_count"` — per-style counts (only keys for styles that appear are included) + - `"screaming_snake_density"` — ratio of MACRO_CASE identifiers to total identifiers See [Shannon entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) and [naming conventions](https://en.wikipedia.org/wiki/Naming_convention_(programming)). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "casing_entropy" + @impl true + def keys, + do: [ + "entropy", + "pascal_case_count", + "camel_case_count", + "snake_case_count", + "macro_case_count", + "kebab_case_count", + "other_count", + "screaming_snake_density" + ] + @spec analyze(map()) :: map() @impl true - def analyze(%{identifiers: identifiers}) when tuple_size(identifiers) == 0 do - %{"entropy" => 0.0} + def analyze(%{identifiers: []}) do + %{"entropy" => 0.0, "screaming_snake_density" => 0.0} end def analyze(%{identifiers: identifiers}) do - identifiers_list = Tuple.to_list(identifiers) - counts = - identifiers_list - |> Enum.map(&CodeQA.Metrics.Inflector.detect_casing/1) + identifiers + |> Enum.map(&CodeQA.Metrics.File.Inflector.detect_casing/1) |> Enum.frequencies() - total = length(identifiers_list) + total = length(identifiers) entropy = counts @@ -46,7 +58,10 @@ defmodule CodeQA.Metrics.CasingEntropy do acc - p * :math.log2(p) end) - %{"entropy" => Float.round(entropy, 4)} + macro_count = Map.get(counts, :macro_case, 0) + screaming_density = Float.round(macro_count / total, 4) + + %{"entropy" => Float.round(entropy, 4), "screaming_snake_density" => screaming_density} |> Map.merge(counts_to_output(counts)) end diff --git a/lib/codeqa/metrics/file/comment_structure.ex b/lib/codeqa/metrics/file/comment_structure.ex new file mode 100644 index 0000000..65bc0e0 --- /dev/null +++ b/lib/codeqa/metrics/file/comment_structure.ex @@ -0,0 +1,45 @@ +defmodule CodeQA.Metrics.File.CommentStructure do + @moduledoc """ + Measures comment density and annotation patterns. + + Counts lines that begin with a comment marker (language-agnostic: `#`, `//`, + `/*`, ` *`) relative to non-blank lines. Also counts TODO/FIXME/HACK/XXX + markers which indicate deferred work or known issues. + + ## Output keys + + - `"comment_line_ratio"` — comment lines / non-blank lines + - `"comment_line_count"` — raw count of comment lines + - `"todo_fixme_count"` — occurrences of TODO, FIXME, HACK, or XXX + """ + + @behaviour CodeQA.Metrics.File.FileMetric + + @impl true + def name, do: "comment_structure" + + @impl true + def keys, do: ["comment_line_ratio", "comment_line_count", "todo_fixme_count"] + + @comment_line ~r/^\s*(?:#|\/\/|\/\*|\*)/ + @todo_marker ~r/\b(?:TODO|FIXME|HACK|XXX)\b/ + + @spec analyze(map()) :: map() + @impl true + def analyze(%{content: content, lines: lines}) do + non_blank = Enum.reject(lines, &(String.trim(&1) == "")) + non_blank_count = length(non_blank) + + comment_count = Enum.count(non_blank, &Regex.match?(@comment_line, &1)) + todo_count = @todo_marker |> Regex.scan(content) |> length() + + comment_ratio = + if non_blank_count > 0, do: Float.round(comment_count / non_blank_count, 4), else: 0.0 + + %{ + "comment_line_ratio" => comment_ratio, + "comment_line_count" => comment_count, + "todo_fixme_count" => todo_count + } + end +end diff --git a/lib/codeqa/metrics/compression.ex b/lib/codeqa/metrics/file/compression.ex similarity index 56% rename from lib/codeqa/metrics/compression.ex rename to lib/codeqa/metrics/file/compression.ex index fe68705..9f0981b 100644 --- a/lib/codeqa/metrics/compression.ex +++ b/lib/codeqa/metrics/file/compression.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Compression do +defmodule CodeQA.Metrics.File.Compression do @moduledoc """ Measures file redundancy via zlib compression ratio. @@ -6,34 +6,49 @@ defmodule CodeQA.Metrics.Compression do original. A high compression ratio signals repetitive or boilerplate-heavy code. - `ctx.encoded` is the binary representation of the file content used for - compression, distinct from `ctx.content` which is the UTF-8 string. - See [Kolmogorov complexity](https://en.wikipedia.org/wiki/Kolmogorov_complexity) and [data compression ratio](https://en.wikipedia.org/wiki/Data_compression_ratio). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "compression" + @impl true + def keys, do: ["raw_bytes", "zlib_bytes", "zlib_ratio", "redundancy", "unique_line_ratio"] + @spec analyze(map()) :: map() @impl true def analyze(%{content: "", byte_count: 0}) do - %{"raw_bytes" => 0, "zlib_bytes" => 0, "zlib_ratio" => 0.0, "redundancy" => 0.0} + %{ + "raw_bytes" => 0, + "zlib_bytes" => 0, + "zlib_ratio" => 0.0, + "redundancy" => 0.0, + "unique_line_ratio" => 0.0 + } end def analyze(ctx) do raw_size = ctx.byte_count - zlib_data = :zlib.compress(ctx.encoded) + zlib_data = :zlib.compress(ctx.content) zlib_size = byte_size(zlib_data) + non_blank = ctx.lines |> Enum.reject(&(String.trim(&1) == "")) + + unique_line_ratio = + case length(non_blank) do + 0 -> 0.0 + n -> Float.round(length(Enum.uniq(non_blank)) / n, 4) + end + %{ "raw_bytes" => raw_size, "zlib_bytes" => zlib_size, "zlib_ratio" => Float.round(raw_size / max(1, zlib_size), 4), - "redundancy" => Float.round(max(0.0, 1.0 - zlib_size / raw_size), 4) + "redundancy" => Float.round(max(0.0, 1.0 - zlib_size / raw_size), 4), + "unique_line_ratio" => unique_line_ratio } end end diff --git a/lib/codeqa/metrics/entropy.ex b/lib/codeqa/metrics/file/entropy.ex similarity index 82% rename from lib/codeqa/metrics/entropy.ex rename to lib/codeqa/metrics/file/entropy.ex index 4756471..6533a21 100644 --- a/lib/codeqa/metrics/entropy.ex +++ b/lib/codeqa/metrics/file/entropy.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Entropy do +defmodule CodeQA.Metrics.File.Entropy do @moduledoc """ Computes Shannon entropy at both character and token levels. @@ -11,11 +11,24 @@ defmodule CodeQA.Metrics.Entropy do See [Shannon entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "entropy" + @impl true + def keys, + do: [ + "char_entropy", + "char_max_entropy", + "char_normalized", + "token_entropy", + "token_max_entropy", + "token_normalized", + "vocab_size", + "total_tokens" + ] + @spec analyze(map()) :: map() @impl true def analyze(ctx) do @@ -30,13 +43,12 @@ defmodule CodeQA.Metrics.Entropy do compute_entropy(counts, total, "char") end - defp token_entropy(%{tokens: tokens, token_counts: _token_counts}) - when tuple_size(tokens) == 0 do + defp token_entropy(%{tokens: [], token_counts: _token_counts}) do Map.merge(zero_entropy_map("token"), %{"vocab_size" => 0, "total_tokens" => 0}) end defp token_entropy(%{tokens: tokens, token_counts: token_counts}) do - total = tuple_size(tokens) + total = length(tokens) vocab_size = map_size(token_counts) entropy_map = compute_entropy(token_counts, total, "token") diff --git a/lib/codeqa/metrics/file_metric.ex b/lib/codeqa/metrics/file/file_metric.ex similarity index 77% rename from lib/codeqa/metrics/file_metric.ex rename to lib/codeqa/metrics/file/file_metric.ex index 75a6f61..9488c7b 100644 --- a/lib/codeqa/metrics/file_metric.ex +++ b/lib/codeqa/metrics/file/file_metric.ex @@ -1,8 +1,8 @@ -defmodule CodeQA.Metrics.FileMetric do +defmodule CodeQA.Metrics.File.FileMetric do @moduledoc """ Behaviour for metrics that analyze a single source file. - Implementations receive a `CodeQA.Pipeline.FileContext` struct containing + Implementations receive a `CodeQA.Engine.FileContext` struct containing pre-parsed data (tokens, identifiers, lines, etc.) and return a map of metric key-value pairs. On error, return an empty map `%{}` rather than raising. @@ -25,7 +25,10 @@ defmodule CodeQA.Metrics.FileMetric do """ @callback name() :: String.t() - @callback analyze(CodeQA.Pipeline.FileContext.t()) :: map() + @callback analyze(CodeQA.Engine.FileContext.t()) :: map() + + @doc "List of metric keys returned by analyze/1." + @callback keys() :: [String.t()] @doc "Human-readable description of what this metric measures." @callback description() :: String.t() diff --git a/lib/codeqa/metrics/function_metrics.ex b/lib/codeqa/metrics/file/function_metrics.ex similarity index 92% rename from lib/codeqa/metrics/function_metrics.ex rename to lib/codeqa/metrics/file/function_metrics.ex index 7fd2262..6a9bb0c 100644 --- a/lib/codeqa/metrics/function_metrics.ex +++ b/lib/codeqa/metrics/file/function_metrics.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.FunctionMetrics do +defmodule CodeQA.Metrics.File.FunctionMetrics do @moduledoc """ Estimates function-level structure metrics from source text. @@ -13,7 +13,7 @@ defmodule CodeQA.Metrics.FunctionMetrics do - C#: lines starting with access modifiers (`public`, `private`, etc.) """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric # Python, Ruby, Elixir: `def` family # JavaScript: `function` @@ -39,14 +39,23 @@ defmodule CodeQA.Metrics.FunctionMetrics do @impl true def name, do: "function_metrics" + @impl true + def keys, + do: [ + "function_count", + "avg_function_lines", + "max_function_lines", + "avg_param_count", + "max_param_count" + ] + @spec analyze(map()) :: map() @impl true def analyze(%{lines: lines}) do - lines_list = Tuple.to_list(lines) - total = length(lines_list) + total = length(lines) {func_indices, param_counts} = - lines_list + lines |> Enum.with_index() |> Enum.filter(fn {line, _} -> Regex.match?(@func_keyword_re, line) or Regex.match?(@csharp_method_re, line) diff --git a/lib/codeqa/metrics/halstead.ex b/lib/codeqa/metrics/file/halstead.ex similarity index 92% rename from lib/codeqa/metrics/halstead.ex rename to lib/codeqa/metrics/file/halstead.ex index ca38665..157f67b 100644 --- a/lib/codeqa/metrics/halstead.ex +++ b/lib/codeqa/metrics/file/halstead.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Halstead do +defmodule CodeQA.Metrics.File.Halstead do @moduledoc """ Implements Halstead software-science complexity metrics. @@ -9,11 +9,27 @@ defmodule CodeQA.Metrics.Halstead do See [Halstead complexity measures](https://en.wikipedia.org/wiki/Halstead_complexity_measures). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "halstead" + @impl true + def keys, + do: [ + "n1_unique_operators", + "n2_unique_operands", + "N1_total_operators", + "N2_total_operands", + "vocabulary", + "length", + "volume", + "difficulty", + "effort", + "estimated_bugs", + "time_to_implement_seconds" + ] + # Keyword operators for: # Python, Ruby, JavaScript, Elixir, C#, # Java, C++, Go, Rust, PHP, Swift, Shell, Kotlin diff --git a/lib/codeqa/metrics/heaps.ex b/lib/codeqa/metrics/file/heaps.ex similarity index 84% rename from lib/codeqa/metrics/heaps.ex rename to lib/codeqa/metrics/file/heaps.ex index edc390b..b7cae9c 100644 --- a/lib/codeqa/metrics/heaps.ex +++ b/lib/codeqa/metrics/file/heaps.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Heaps do +defmodule CodeQA.Metrics.File.Heaps do @moduledoc """ Fits Heaps' law to vocabulary growth in a file. @@ -9,25 +9,27 @@ defmodule CodeQA.Metrics.Heaps do See [Heaps' law](https://en.wikipedia.org/wiki/Heaps%27_law). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "heaps" + @impl true + def keys, do: ["k", "beta", "r_squared"] + @max_samples 50 @spec analyze(map()) :: map() @impl true - def analyze(%{tokens: tokens}) when tuple_size(tokens) == 0 do + def analyze(%{tokens: []}) do %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0} end def analyze(%{tokens: tokens}) do - token_list = Tuple.to_list(tokens) - total = length(token_list) + total = length(tokens) interval = max(1, div(total, @max_samples)) - data_points = sample_vocabulary_growth(token_list, interval) + data_points = sample_vocabulary_growth(tokens, interval) if length(data_points) < 5 do %{"k" => 0.0, "beta" => 0.0, "r_squared" => 0.0} @@ -40,7 +42,7 @@ defmodule CodeQA.Metrics.Heaps do tokens |> Enum.with_index(1) |> Enum.reduce({MapSet.new(), []}, fn {token, i}, {seen, points} -> - seen = MapSet.put(seen, token) + seen = MapSet.put(seen, token.content) if rem(i, interval) == 0 do {seen, [{i, MapSet.size(seen)} | points]} diff --git a/lib/codeqa/metrics/identifier_length_variance.ex b/lib/codeqa/metrics/file/identifier_length_variance.ex similarity index 81% rename from lib/codeqa/metrics/identifier_length_variance.ex rename to lib/codeqa/metrics/file/identifier_length_variance.ex index 2203b10..424b95b 100644 --- a/lib/codeqa/metrics/identifier_length_variance.ex +++ b/lib/codeqa/metrics/file/identifier_length_variance.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.IdentifierLengthVariance do +defmodule CodeQA.Metrics.File.IdentifierLengthVariance do @moduledoc """ Measures the mean, variance, and maximum length of identifiers. @@ -11,20 +11,22 @@ defmodule CodeQA.Metrics.IdentifierLengthVariance do and [variance](https://en.wikipedia.org/wiki/Variance). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "identifier_length_variance" + @impl true + def keys, do: ["mean", "variance", "std_dev", "max"] + @spec analyze(map()) :: map() @impl true - def analyze(%{identifiers: identifiers}) when tuple_size(identifiers) == 0 do + def analyze(%{identifiers: []}) do %{"mean" => 0.0, "variance" => 0.0, "std_dev" => 0.0, "max" => 0} end def analyze(%{identifiers: identifiers}) do - list = Tuple.to_list(identifiers) - lengths = Enum.map(list, &String.length/1) + lengths = Enum.map(identifiers, &String.length/1) n = length(lengths) mean = Enum.sum(lengths) / n diff --git a/lib/codeqa/metrics/indentation.ex b/lib/codeqa/metrics/file/indentation.ex similarity index 60% rename from lib/codeqa/metrics/indentation.ex rename to lib/codeqa/metrics/file/indentation.ex index ab44743..75923b9 100644 --- a/lib/codeqa/metrics/indentation.ex +++ b/lib/codeqa/metrics/file/indentation.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Indentation do +defmodule CodeQA.Metrics.File.Indentation do @moduledoc """ Analyzes indentation depth patterns across non-blank lines. @@ -10,20 +10,27 @@ defmodule CodeQA.Metrics.Indentation do See [indentation style](https://en.wikipedia.org/wiki/Indentation_style). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "indentation" + @impl true + def keys, do: ["mean_depth", "variance", "max_depth", "uses_tabs", "blank_line_ratio"] + @spec analyze(map()) :: map() @impl true def analyze(%{lines: lines}) do - lines_list = Tuple.to_list(lines) + uses_tabs = Enum.any?(lines, &String.match?(&1, ~r/^\t/)) + + total_lines = length(lines) + blank_count = Enum.count(lines, &(String.trim(&1) == "")) - uses_tabs = Enum.any?(lines_list, &String.match?(&1, ~r/^\t/)) + blank_line_ratio = + if total_lines > 0, do: Float.round(blank_count / total_lines, 4), else: 0.0 depths = - lines_list + lines |> Enum.reject(&(String.trim(&1) == "")) |> Enum.map(fn line -> [leading] = Regex.run(~r/^\s*/, line) @@ -31,7 +38,13 @@ defmodule CodeQA.Metrics.Indentation do end) if depths == [] do - %{"mean_depth" => 0.0, "max_depth" => 0, "variance" => 0.0, "uses_tabs" => uses_tabs} + %{ + "mean_depth" => 0.0, + "max_depth" => 0, + "variance" => 0.0, + "uses_tabs" => uses_tabs, + "blank_line_ratio" => blank_line_ratio + } else n = length(depths) mean = Enum.sum(depths) / n @@ -45,7 +58,8 @@ defmodule CodeQA.Metrics.Indentation do "mean_depth" => Float.round(mean, 4), "variance" => Float.round(variance, 4), "max_depth" => Enum.max(depths), - "uses_tabs" => uses_tabs + "uses_tabs" => uses_tabs, + "blank_line_ratio" => blank_line_ratio } end end diff --git a/lib/codeqa/metrics/inflector.ex b/lib/codeqa/metrics/file/inflector.ex similarity index 89% rename from lib/codeqa/metrics/inflector.ex rename to lib/codeqa/metrics/file/inflector.ex index 7c49531..04e732c 100644 --- a/lib/codeqa/metrics/inflector.ex +++ b/lib/codeqa/metrics/file/inflector.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Inflector do +defmodule CodeQA.Metrics.File.Inflector do @moduledoc """ Utility for detecting identifier casing styles. @@ -30,7 +30,8 @@ defmodule CodeQA.Metrics.Inflector do iex> CodeQA.Metrics.Inflector.detect_casing("FOO_BAR") :macro_case """ - @spec detect_casing(String.t()) :: :pascal_case | :camel_case | :snake_case | :macro_case | :kebab_case | :other + @spec detect_casing(String.t()) :: + :pascal_case | :camel_case | :snake_case | :macro_case | :kebab_case | :other def detect_casing(identifier) do cond do identifier =~ ~r/^[A-Z][a-zA-Z0-9]*$/ -> :pascal_case diff --git a/lib/codeqa/metrics/magic_number_density.ex b/lib/codeqa/metrics/file/magic_number_density.ex similarity index 51% rename from lib/codeqa/metrics/magic_number_density.ex rename to lib/codeqa/metrics/file/magic_number_density.ex index 3e28bb4..20428df 100644 --- a/lib/codeqa/metrics/magic_number_density.ex +++ b/lib/codeqa/metrics/file/magic_number_density.ex @@ -1,10 +1,10 @@ -defmodule CodeQA.Metrics.MagicNumberDensity do +defmodule CodeQA.Metrics.File.MagicNumberDensity do @moduledoc """ - Measures the density of magic numbers in source code. + Measures the density of magic numbers and string literals in source code. - Counts numeric literals (excluding common constants 0, 1, 0.0, 1.0) as a - proportion of total tokens. A high density suggests unexplained constants - that should be extracted into named values. + Counts numeric literals (excluding common constants 0, 1, 0.0, 1.0) and + double-quoted string literals as proportions of total tokens. High densities + suggest unexplained constants or hardcoded values that should be extracted. Note: negative numbers (e.g. `-42`) are not detected since the minus sign is a separate token. @@ -12,22 +12,25 @@ defmodule CodeQA.Metrics.MagicNumberDensity do See [magic number](). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "magic_number_density" + @impl true + def keys, do: ["density", "magic_number_count", "string_literal_ratio"] + @number_re ~r/\b\d+\.?\d*(?:[eE][+-]?\d+)?\b/ @idiomatic_constants ~w[0 1 2 0.0 1.0 0.5] + @string_literal_re ~r/"(?:[^"\\]|\\.)*"/ @spec analyze(map()) :: map() @impl true def analyze(%{content: content, tokens: tokens}) do - token_list = Tuple.to_list(tokens) - total_tokens = length(token_list) + total_tokens = length(tokens) if total_tokens == 0 do - %{"density" => 0.0, "magic_number_count" => 0} + %{"density" => 0.0, "magic_number_count" => 0, "string_literal_ratio" => 0.0} else numbers = @number_re @@ -36,10 +39,12 @@ defmodule CodeQA.Metrics.MagicNumberDensity do |> Enum.reject(&(&1 in @idiomatic_constants)) magic_count = length(numbers) + string_count = @string_literal_re |> Regex.scan(content) |> length() %{ "density" => Float.round(magic_count / total_tokens, 4), - "magic_number_count" => magic_count + "magic_number_count" => magic_count, + "string_literal_ratio" => Float.round(string_count / total_tokens, 4) } end end diff --git a/lib/codeqa/metrics/ngram.ex b/lib/codeqa/metrics/file/ngram.ex similarity index 71% rename from lib/codeqa/metrics/ngram.ex rename to lib/codeqa/metrics/file/ngram.ex index fb2b44b..b100513 100644 --- a/lib/codeqa/metrics/ngram.ex +++ b/lib/codeqa/metrics/file/ngram.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Ngram do +defmodule CodeQA.Metrics.File.Ngram do @moduledoc """ Computes bigram and trigram statistics over the token stream. @@ -10,15 +10,30 @@ defmodule CodeQA.Metrics.Ngram do and [hapax legomenon](https://en.wikipedia.org/wiki/Hapax_legomenon). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "ngram" + @impl true + def keys, + do: [ + "bigram_total", + "bigram_unique", + "bigram_repetition_rate", + "bigram_hapax_fraction", + "bigram_repeated_unique", + "trigram_total", + "trigram_unique", + "trigram_repetition_rate", + "trigram_hapax_fraction", + "trigram_repeated_unique" + ] + @spec analyze(map()) :: map() @impl true def analyze(ctx) do - tokens = Tuple.to_list(ctx.tokens) + tokens = Enum.map(ctx.tokens, & &1.content) bigram_stats = ngram_stats(tokens, 2) |> rename_keys("bigram") trigram_stats = ngram_stats(tokens, 3) |> rename_keys("trigram") @@ -27,7 +42,13 @@ defmodule CodeQA.Metrics.Ngram do end defp ngram_stats(tokens, n) when length(tokens) < n do - %{"total" => 0, "unique" => 0, "repetition_rate" => 0.0, "hapax_fraction" => 0.0, "repeated_unique" => 0} + %{ + "total" => 0, + "unique" => 0, + "repetition_rate" => 0.0, + "hapax_fraction" => 0.0, + "repeated_unique" => 0 + } end defp ngram_stats(tokens, n) do diff --git a/lib/codeqa/metrics/file/punctuation_density.ex b/lib/codeqa/metrics/file/punctuation_density.ex new file mode 100644 index 0000000..8b42ee4 --- /dev/null +++ b/lib/codeqa/metrics/file/punctuation_density.ex @@ -0,0 +1,96 @@ +defmodule CodeQA.Metrics.File.PunctuationDensity do + @moduledoc """ + Character-level punctuation and structural pattern metrics. + + Captures signals that character-level metrics miss: naming conventions using + `?`/`!` suffixes, chained method calls (dots), non-standard bracket adjacency, + and numeric bracket pair patterns. + """ + + @behaviour CodeQA.Metrics.File.FileMetric + + @impl true + def name, do: "punctuation_density" + + @impl true + def keys do + [ + "question_mark_density", + "exclamation_density", + "dot_count", + "id_nonalpha_suffix_density", + "bracket_nonalpha_prefix_count", + "bracket_nonalpha_suffix_count", + "bracket_number_pair_count", + "arrow_density", + "colon_suffix_density" + ] + end + + # identifier-like token (starts with letter/underscore) ending with non-alphanumeric non-whitespace + @id_nonalpha_suffix ~r/[a-zA-Z_]\w*[^\w\s]/ + # opening bracket immediately preceded by non-alphanumeric non-whitespace (e.g. `?(`, `==[`) + @bracket_nonalpha_prefix ~r/[^\w\s\(\[\{][\(\[\{]/ + # closing bracket immediately followed by non-alphanumeric non-whitespace (e.g. `}.`, `)?`) + @bracket_nonalpha_suffix ~r/[\)\]\}][^\w\s\)\]\}]/ + # number (with optional underscores) wrapped in brackets: (42), [1_000], (3.14) + @bracket_number_pair ~r/[\(\[]\d[\d_]*(?:\.\d+)?[\)\]]/ + # arrow operators: -> and => + @arrow ~r/->|=>/ + # identifier immediately followed by colon (keyword args, dict keys, labels) + @colon_suffix ~r/[a-zA-Z_]\w*:/ + + @spec analyze(map()) :: map() + @impl true + def analyze(%{content: content, tokens: tokens}) do + total_chars = String.length(content) + total_tokens = length(tokens) + + if total_chars == 0 do + %{ + "question_mark_density" => 0.0, + "exclamation_density" => 0.0, + "dot_count" => 0, + "id_nonalpha_suffix_density" => 0.0, + "bracket_nonalpha_prefix_count" => 0, + "bracket_nonalpha_suffix_count" => 0, + "bracket_number_pair_count" => 0, + "arrow_density" => 0.0, + "colon_suffix_density" => 0.0 + } + else + qmarks = count_char(content, "?") + excls = count_char(content, "!") + dots = count_char(content, ".") + + id_suffix_count = count_matches(content, @id_nonalpha_suffix) + bracket_prefix = count_matches(content, @bracket_nonalpha_prefix) + bracket_suffix = count_matches(content, @bracket_nonalpha_suffix) + bracket_num = count_matches(content, @bracket_number_pair) + + id_denom = max(total_tokens, 1) + arrows = count_matches(content, @arrow) + colon_suffixes = count_matches(content, @colon_suffix) + + %{ + "question_mark_density" => Float.round(qmarks / total_chars, 6), + "exclamation_density" => Float.round(excls / total_chars, 6), + "dot_count" => dots, + "id_nonalpha_suffix_density" => Float.round(id_suffix_count / id_denom, 4), + "bracket_nonalpha_prefix_count" => bracket_prefix, + "bracket_nonalpha_suffix_count" => bracket_suffix, + "bracket_number_pair_count" => bracket_num, + "arrow_density" => Float.round(arrows / id_denom, 4), + "colon_suffix_density" => Float.round(colon_suffixes / id_denom, 4) + } + end + end + + defp count_char(content, char) do + content |> String.graphemes() |> Enum.count(&(&1 == char)) + end + + defp count_matches(content, regex) do + regex |> Regex.scan(content) |> length() + end +end diff --git a/lib/codeqa/metrics/readability.ex b/lib/codeqa/metrics/file/readability.ex similarity index 89% rename from lib/codeqa/metrics/readability.ex rename to lib/codeqa/metrics/file/readability.ex index 5ffa9e1..3e1bd2c 100644 --- a/lib/codeqa/metrics/readability.ex +++ b/lib/codeqa/metrics/file/readability.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Readability do +defmodule CodeQA.Metrics.File.Readability do @moduledoc """ Computes adapted Flesch and Fog readability indices for source code. @@ -10,17 +10,27 @@ defmodule CodeQA.Metrics.Readability do and [Gunning fog index](https://en.wikipedia.org/wiki/Gunning_fog_index). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "readability" + @impl true + def keys, + do: [ + "avg_tokens_per_line", + "avg_line_length", + "avg_sub_words_per_id", + "flesch_adapted", + "fog_adapted", + "total_lines" + ] + @spec analyze(map()) :: map() @impl true def analyze(ctx) do lines = ctx.lines - |> Tuple.to_list() |> Enum.filter(fn line -> trimmed = String.trim(line) trimmed != "" and not String.starts_with?(trimmed, "#") @@ -42,11 +52,11 @@ defmodule CodeQA.Metrics.Readability do defp compute_readability(ctx, lines) do total_lines = length(lines) - total_tokens = tuple_size(ctx.tokens) + total_tokens = length(ctx.tokens) avg_tokens = total_tokens / total_lines avg_line_length = lines |> Enum.map(&String.length/1) |> Enum.sum() |> Kernel./(total_lines) - words = Tuple.to_list(ctx.words) + words = ctx.words {avg_sub_words, complex_fraction} = if words != [] do diff --git a/lib/codeqa/metrics/file/rfc.ex b/lib/codeqa/metrics/file/rfc.ex new file mode 100644 index 0000000..5416c68 --- /dev/null +++ b/lib/codeqa/metrics/file/rfc.ex @@ -0,0 +1,81 @@ +defmodule CodeQA.Metrics.File.RFC do + @moduledoc """ + Response For a Class (RFC) — a coupling metric from the Chidamber & Kemerer suite. + + RFC ≈ number of distinct methods/functions reachable from this file, counting + both locally-defined functions and distinct external call targets. + + Formula: `RFC = function_def_count + |distinct_call_targets|` + + Computed from the token stream without requiring a real AST: + - Function definitions are detected by function-keyword tokens (`def`, `fn`, etc.) + followed by an `` token. + - Call targets are detected by `` tokens immediately followed by `(`. + Duplicates are collapsed to a set. + + Higher RFC values indicate a module with more responsibility and more external + coupling, correlating empirically with higher fault density. + + See [CK metrics suite](https://en.wikipedia.org/wiki/Programming_complexity#Chidamber_and_Kemerer_metrics). + """ + + @behaviour CodeQA.Metrics.File.FileMetric + + @func_keywords MapSet.new(~w[ + def defp defmacro defmacrop defguard defdelegate + function func fun fn + sub proc method + ]) + + @impl true + def name, do: "rfc" + + @impl true + def keys, do: ["rfc_count", "rfc_density", "function_def_count", "distinct_call_count"] + + @impl true + def description, + do: "Response For a Class: function definitions + distinct call targets (CK suite)" + + @spec analyze(CodeQA.Engine.FileContext.t()) :: map() + @impl true + def analyze(%{tokens: tokens, line_count: line_count}) do + {func_def_count, call_targets} = scan_tokens(tokens) + + distinct_call_count = MapSet.size(call_targets) + rfc_count = func_def_count + distinct_call_count + + density = + if line_count > 0, + do: Float.round(rfc_count / line_count, 4), + else: 0.0 + + %{ + "rfc_count" => rfc_count, + "rfc_density" => density, + "function_def_count" => func_def_count, + "distinct_call_count" => distinct_call_count + } + end + + # Single pass: detect function definitions and call sites simultaneously. + # Uses a sliding window of two adjacent tokens. + defp scan_tokens(tokens) do + tokens + |> Enum.zip(Enum.drop(tokens, 1)) + |> Enum.reduce({0, MapSet.new()}, fn {tok, next}, {defs, calls} -> + cond do + # Function definition: keyword followed by an identifier + MapSet.member?(@func_keywords, tok.content) and next.kind == "" -> + {defs + 1, calls} + + # Call site: identifier followed by open paren + tok.kind == "" and next.content == "(" -> + {defs, MapSet.put(calls, tok.content)} + + true -> + {defs, calls} + end + end) + end +end diff --git a/lib/codeqa/metrics/symbol_density.ex b/lib/codeqa/metrics/file/symbol_density.ex similarity index 85% rename from lib/codeqa/metrics/symbol_density.ex rename to lib/codeqa/metrics/file/symbol_density.ex index 67459a0..3e71bf3 100644 --- a/lib/codeqa/metrics/symbol_density.ex +++ b/lib/codeqa/metrics/file/symbol_density.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.SymbolDensity do +defmodule CodeQA.Metrics.File.SymbolDensity do @moduledoc """ Measures the density of non-word, non-whitespace symbols in source code. @@ -9,11 +9,14 @@ defmodule CodeQA.Metrics.SymbolDensity do See [code readability](https://en.wikipedia.org/wiki/Computer_programming#Readability_of_source_code). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "symbol_density" + @impl true + def keys, do: ["density", "symbol_count", "distinct_symbol_types"] + @spec analyze(map()) :: map() @impl true def analyze(%{content: content}) do diff --git a/lib/codeqa/metrics/vocabulary.ex b/lib/codeqa/metrics/file/vocabulary.ex similarity index 91% rename from lib/codeqa/metrics/vocabulary.ex rename to lib/codeqa/metrics/file/vocabulary.ex index d9ef637..496cc68 100644 --- a/lib/codeqa/metrics/vocabulary.ex +++ b/lib/codeqa/metrics/file/vocabulary.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Vocabulary do +defmodule CodeQA.Metrics.File.Vocabulary do @moduledoc """ Analyzes vocabulary diversity using type-token ratio (TTR) and MATTR. @@ -14,19 +14,22 @@ defmodule CodeQA.Metrics.Vocabulary do and [MATTR](https://doi.org/10.3758/BRM.42.2.381). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "vocabulary" + @impl true + def keys, do: ["raw_ttr", "mattr", "unique_identifiers", "total_identifiers", "vocabulary"] + @window_size 100 @spec analyze(map()) :: map() @impl true def analyze(ctx) do - identifiers = Tuple.to_list(ctx.identifiers) + identifiers = ctx.identifiers total = length(identifiers) - vocabulary = ctx.words |> Tuple.to_list() |> Enum.uniq() |> Enum.sort() + vocabulary = ctx.words |> Enum.uniq() |> Enum.sort() if total == 0 do %{ diff --git a/lib/codeqa/metrics/vowel_density.ex b/lib/codeqa/metrics/file/vowel_density.ex similarity index 86% rename from lib/codeqa/metrics/vowel_density.ex rename to lib/codeqa/metrics/file/vowel_density.ex index 84ea39e..f3f53de 100644 --- a/lib/codeqa/metrics/vowel_density.ex +++ b/lib/codeqa/metrics/file/vowel_density.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.VowelDensity do +defmodule CodeQA.Metrics.File.VowelDensity do @moduledoc """ Measures the density of vowels in identifiers. @@ -9,17 +9,20 @@ defmodule CodeQA.Metrics.VowelDensity do See [identifier naming](https://en.wikipedia.org/wiki/Identifier_(computer_languages)). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @vowels MapSet.new(~c"aeiouyAEIOUY") @impl true def name, do: "vowel_density" + @impl true + def keys, do: ["density", "vowel_count", "total_chars"] + @spec analyze(map()) :: map() @impl true def analyze(%{identifiers: identifiers}) do - list = Tuple.to_list(identifiers) + list = identifiers if list == [] do %{"density" => 0.0, "vowel_count" => 0, "total_chars" => 0} diff --git a/lib/codeqa/metrics/winnowing.ex b/lib/codeqa/metrics/file/winnowing.ex similarity index 96% rename from lib/codeqa/metrics/winnowing.ex rename to lib/codeqa/metrics/file/winnowing.ex index 9c8961c..d725a38 100644 --- a/lib/codeqa/metrics/winnowing.ex +++ b/lib/codeqa/metrics/file/winnowing.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Winnowing do +defmodule CodeQA.Metrics.File.Winnowing do @moduledoc """ Generates structural fingerprints using k-grams. diff --git a/lib/codeqa/metrics/zipf.ex b/lib/codeqa/metrics/file/zipf.ex similarity index 86% rename from lib/codeqa/metrics/zipf.ex rename to lib/codeqa/metrics/file/zipf.ex index 4948c3d..b03a07c 100644 --- a/lib/codeqa/metrics/zipf.ex +++ b/lib/codeqa/metrics/file/zipf.ex @@ -1,4 +1,4 @@ -defmodule CodeQA.Metrics.Zipf do +defmodule CodeQA.Metrics.File.Zipf do @moduledoc """ Fits Zipf's law to the token frequency distribution. @@ -9,21 +9,24 @@ defmodule CodeQA.Metrics.Zipf do See [Zipf's law](https://en.wikipedia.org/wiki/Zipf%27s_law). """ - @behaviour CodeQA.Metrics.FileMetric + @behaviour CodeQA.Metrics.File.FileMetric @impl true def name, do: "zipf" + @impl true + def keys, do: ["exponent", "r_squared", "vocab_size", "total_tokens"] + @spec analyze(map()) :: map() @impl true - def analyze(%{tokens: tokens, token_counts: _token_counts}) when tuple_size(tokens) == 0 do + def analyze(%{tokens: [], token_counts: _token_counts}) do %{"exponent" => 0.0, "r_squared" => 0.0, "vocab_size" => 0, "total_tokens" => 0} end def analyze(%{tokens: tokens, token_counts: token_counts}) do frequencies = token_counts |> Map.values() |> Enum.sort(:desc) vocab_size = length(frequencies) - total_tokens = tuple_size(tokens) + total_tokens = length(tokens) if vocab_size < 3 do %{ diff --git a/lib/codeqa/metrics/post_processing/menzerath.ex b/lib/codeqa/metrics/post_processing/menzerath.ex new file mode 100644 index 0000000..2d9bd32 --- /dev/null +++ b/lib/codeqa/metrics/post_processing/menzerath.ex @@ -0,0 +1,278 @@ +defmodule CodeQA.Metrics.PostProcessing.Menzerath do + @moduledoc """ + Measures structural hierarchy conformance using Menzerath's law. + + ## Block-level score + + For each parsed block in a file, computes: + + ratio = block.line_count / parent.line_count + + Root blocks use the file's line count as parent. Ratio close to 1.0 means the block + dominates its parent (poor decomposition). Low ratio means the block is small relative + to its parent (good decomposition). + + For internal nodes that have children, also computes `avg_child_ratio` — the mean ratio + of direct children. High `avg_child_ratio` means this node failed to decompose its + children into small enough pieces. + + ## Codebase-level score + + Collects `{function_count, avg_function_lines}` pairs from all files and computes: + - Pearson correlation (negative = law holds across the codebase) + - Power-law exponent `b` from `y = a · x^b` fit on log-log scale + - R² of the fit + """ + + @behaviour CodeQA.Metrics.PostProcessing.PostProcessingMetric + + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser + alias CodeQA.Languages.Unknown + + @violation_threshold 0.6 + + @impl true + def name, do: "menzerath" + + @impl true + def analyze(pipeline_result, files_map, _opts) do + file_scores = + Map.new(files_map, fn {path, content} -> + {path, %{"menzerath" => score_file(content)}} + end) + + codebase_score = compute_codebase_score(pipeline_result) + + %{ + "files" => file_scores, + "codebase" => %{"menzerath" => codebase_score} + } + end + + # --- file-level scoring --- + + defp score_file("") do + %{ + "blocks" => [], + "mean_ratio" => 0.0, + "max_ratio" => 0.0, + "violation_count" => 0, + "insight" => "Empty file." + } + end + + defp score_file(content) do + file_lines = content |> String.split("\n") |> length() + root_tokens = TokenNormalizer.normalize_structural(content) + top_nodes = Parser.detect_blocks(root_tokens, Unknown) + + blocks = Enum.map(top_nodes, &score_node(&1, file_lines)) + all_ratios = collect_ratios(blocks) + n = length(all_ratios) + + mean_ratio = if(n == 0, do: 0.0, else: round4(Enum.sum(all_ratios) / n)) + max_ratio = if(n == 0, do: 0.0, else: round4(Enum.max(all_ratios))) + violation_count = Enum.count(all_ratios, &(&1 >= @violation_threshold)) + + %{ + "blocks" => blocks, + "mean_ratio" => mean_ratio, + "max_ratio" => max_ratio, + "violation_count" => violation_count, + "insight" => file_insight(mean_ratio, max_ratio, violation_count, length(top_nodes)) + } + end + + defp file_insight(_mean, _max, _violations, 0), + do: "No blocks detected." + + defp file_insight(_mean, _max, 0, _block_count), + do: "Well decomposed — all blocks are small relative to their parents." + + defp file_insight(_mean, max_ratio, violations, _block_count) when max_ratio >= 0.9, + do: + "#{violations} block(s) nearly span the entire file — the file is not decomposed into meaningful pieces." + + defp file_insight(mean_ratio, _max, violations, _block_count) when mean_ratio >= 0.5, + do: + "#{violations} violation(s); blocks are large on average (mean ratio #{mean_ratio}) — the file likely needs to be split or its blocks extracted." + + defp file_insight(_mean, _max, violations, _block_count), + do: + "#{violations} block(s) dominate their parent context — consider extracting those into separate functions or modules." + + defp score_node(node, parent_lines) do + ratio = if parent_lines > 0, do: round4(node.line_count / parent_lines), else: 0.0 + + children = Enum.map(node.children, &score_node(&1, node.line_count)) + + base = %{ + "start_line" => node.start_line, + "end_line" => node.end_line, + "line_count" => node.line_count, + "parent_lines" => parent_lines, + "ratio" => ratio, + "insight" => block_insight(ratio, []), + "children" => children + } + + case children do + [] -> + base + + kids -> + child_ratios = Enum.map(kids, & &1["ratio"]) + avg = round4(Enum.sum(child_ratios) / length(child_ratios)) + + base + |> Map.put("avg_child_ratio", avg) + |> Map.put("insight", block_insight(ratio, avg_child_ratio: avg)) + end + end + + defp block_insight(ratio, opts) do + avg_child_ratio = Keyword.get(opts, :avg_child_ratio) + + cond do + ratio >= 0.9 -> + "Block spans nearly the entire parent — no meaningful decomposition at this level." + + (ratio >= @violation_threshold and avg_child_ratio) && + avg_child_ratio >= @violation_threshold -> + "Block is large relative to its parent and its own children are also large — nested decomposition failure." + + ratio >= @violation_threshold -> + "Block is large relative to its parent — consider splitting or extracting." + + avg_child_ratio && avg_child_ratio >= @violation_threshold -> + "Block is reasonably sized but its children are too large — this block should be broken down further." + + true -> + nil + end + end + + defp collect_ratios(blocks) do + Enum.flat_map(blocks, fn block -> + [block["ratio"] | collect_ratios(block["children"])] + end) + end + + # --- codebase-level scoring --- + + defp compute_codebase_score(pipeline_result) do + pairs = + pipeline_result + |> Map.get("files", %{}) + |> Enum.flat_map(fn {_path, file_data} -> + fm = get_in(file_data, ["metrics", "function_metrics"]) || %{} + count = fm["function_count"] + avg = fm["avg_function_lines"] + + if is_number(count) and is_number(avg) and count > 0 do + [{count * 1.0, avg * 1.0}] + else + [] + end + end) + + n = length(pairs) + + if n < 3 do + %{ + "correlation" => nil, + "exponent" => nil, + "r_squared" => nil, + "sample_size" => n, + "insight" => + "Not enough files with function data to compute Menzerath conformance (need ≥ 3, got #{n})." + } + else + xs = Enum.map(pairs, &elem(&1, 0)) + ys = Enum.map(pairs, &elem(&1, 1)) + correlation = round4(pearson(xs, ys)) + {exponent, r_squared} = fit_power_law(xs, ys) + + %{ + "correlation" => correlation, + "exponent" => if(exponent, do: round4(exponent), else: nil), + "r_squared" => if(r_squared, do: round4(r_squared), else: nil), + "sample_size" => n, + "insight" => codebase_insight(correlation, r_squared) + } + end + end + + defp codebase_insight(correlation, r_squared) do + fit_quality = if r_squared && r_squared >= 0.5, do: " (strong fit, R²=#{r_squared})", else: "" + + cond do + correlation <= -0.3 -> + "Menzerath's law holds#{fit_quality} — larger files tend to have shorter functions, indicating healthy decomposition." + + correlation >= 0.3 -> + "Menzerath's law violated#{fit_quality} — larger files have longer functions. Files are growing without being decomposed; consider splitting large files or extracting functions." + + true -> + "Weak Menzerath signal (correlation #{correlation}) — no clear relationship between file size and function length. Decomposition patterns are inconsistent across the codebase." + end + end + + defp pearson(xs, ys) do + n = length(xs) + sum_x = Enum.sum(xs) + sum_y = Enum.sum(ys) + sum_xy = Enum.zip(xs, ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) + sum_x2 = Enum.reduce(xs, 0.0, fn x, acc -> acc + x * x end) + sum_y2 = Enum.reduce(ys, 0.0, fn y, acc -> acc + y * y end) + + num = n * sum_xy - sum_x * sum_y + den = :math.sqrt((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y)) + + if den == 0.0, do: 0.0, else: num / den + end + + defp fit_power_law(xs, ys) do + # Linearize: log(y) = log(a) + b * log(x), fit via OLS on log-log scale + pairs = + Enum.zip(xs, ys) + |> Enum.filter(fn {x, y} -> x > 0 and y > 0 end) + + if length(pairs) < 2 do + {nil, nil} + else + log_xs = Enum.map(pairs, fn {x, _} -> :math.log(x) end) + log_ys = Enum.map(pairs, fn {_, y} -> :math.log(y) end) + + n = length(pairs) + sum_lx = Enum.sum(log_xs) + sum_ly = Enum.sum(log_ys) + sum_lx2 = Enum.reduce(log_xs, 0.0, fn x, acc -> acc + x * x end) + sum_lxly = Enum.zip(log_xs, log_ys) |> Enum.reduce(0.0, fn {x, y}, acc -> acc + x * y end) + + denom = n * sum_lx2 - sum_lx * sum_lx + + if denom == 0.0 do + {nil, nil} + else + b = (n * sum_lxly - sum_lx * sum_ly) / denom + log_a = (sum_ly - b * sum_lx) / n + mean_ly = sum_ly / n + + ss_tot = Enum.reduce(log_ys, 0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end) + + ss_res = + Enum.zip(log_xs, log_ys) + |> Enum.reduce(0.0, fn {lx, ly}, acc -> + acc + (ly - (log_a + b * lx)) ** 2 + end) + + r_squared = if ss_tot == 0.0, do: 0.0, else: 1.0 - ss_res / ss_tot + {b, r_squared} + end + end + end + + defp round4(v), do: Float.round(v * 1.0, 4) +end diff --git a/lib/codeqa/metrics/post_processing/post_processing_metric.ex b/lib/codeqa/metrics/post_processing/post_processing_metric.ex new file mode 100644 index 0000000..c4b7bc0 --- /dev/null +++ b/lib/codeqa/metrics/post_processing/post_processing_metric.ex @@ -0,0 +1,21 @@ +defmodule CodeQA.Metrics.PostProcessing.PostProcessingMetric do + @moduledoc """ + Behaviour for post-processing metrics that derive values from the full pipeline result. + + Post-processing metrics run after both file and codebase metrics complete. They receive + the full result tree and the raw files map, and return a partial result map that is + deep-merged into the pipeline result. + """ + + @doc "Unique name used as the key in the output." + @callback name() :: String.t() + + @doc """ + Analyze the pipeline result and return a partial result map to be deep-merged. + + The returned map should use the same top-level structure as the pipeline result: + `%{"files" => %{path => additions}, "codebase" => additions}`. + Only keys present in the return value are merged; absent keys are left unchanged. + """ + @callback analyze(pipeline_result :: map(), files_map :: map(), opts :: keyword()) :: map() +end diff --git a/lib/codeqa/metrics/token_normalizer.ex b/lib/codeqa/metrics/token_normalizer.ex deleted file mode 100644 index 6967e6a..0000000 --- a/lib/codeqa/metrics/token_normalizer.ex +++ /dev/null @@ -1,45 +0,0 @@ -defmodule CodeQA.Metrics.TokenNormalizer do - @moduledoc """ - Abstracts raw source code into language-agnostic structural tokens. - - See [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis). - """ - - # Note for future: This module can be extended with a second parameter - # normalize(code, language \\ :agnostic) to load specific regex dictionaries. - - @doc """ - Normalizes source code into a list of structural tokens. - - Replaces string literals with ``, numeric literals with ``, - and identifiers/keywords with ``. Remaining punctuation is split into - individual tokens, with common multi-character operators kept together. - - ## Examples - - iex> CodeQA.Metrics.TokenNormalizer.normalize("x = 42") - ["", "=", ""] - - """ - @spec normalize(String.t()) :: [String.t()] - def normalize(code) do - code - # 1. Strings (single and double quotes, handling escaped quotes) - |> String.replace(~r/"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'/, " ") - # 2. Numbers (integers and floats) - |> String.replace(~r/\b\d+(\.\d+)?\b/, " ") - # 3. Identifiers/Keywords (negative lookbehind/ahead to avoid clobbering // tags) - |> String.replace(~r/(?)/, " ") - # 4. Split by whitespace to extract the tokens and remaining structural punctuation - |> String.split(~r/\s+/, trim: true) - # 5. Further split punctuation, keeping common multi-char operators together - |> Enum.flat_map(&split_punctuation/1) - end - - defp split_punctuation(token) when token in ["", "", ""], do: [token] - - defp split_punctuation(text) do - Regex.scan(~r/->|=>|<>|\|>|::|\.\.\.|<-|!=|==|<=|>=|\+\+|--|&&|\|\||[^\w\s]/, text) - |> List.flatten() - end -end diff --git a/lib/codeqa/pipeline.ex b/lib/codeqa/pipeline.ex deleted file mode 100644 index bcd256c..0000000 --- a/lib/codeqa/pipeline.ex +++ /dev/null @@ -1,109 +0,0 @@ -defmodule CodeQA.Pipeline do - @moduledoc "Pre-computed shared context for file-level metrics." - - defmodule FileContext do - @moduledoc "Immutable pre-computed data shared across all file metrics." - @enforce_keys [ - :content, - :tokens, - :token_counts, - :words, - :identifiers, - :lines, - :encoded, - :byte_count, - :line_count - ] - defstruct @enforce_keys - - @type t :: %__MODULE__{ - content: String.t(), - tokens: tuple(), - token_counts: map(), - words: tuple(), - identifiers: tuple(), - lines: tuple(), - encoded: String.t(), - byte_count: non_neg_integer(), - line_count: non_neg_integer() - } - end - - @word_re ~r/\b[a-zA-Z_]\w*\b/u - - # Reserved words and keywords for: - # Python, Ruby, JavaScript, Elixir, C#, - # Java, C++, Go, Rust, PHP, Swift, Shell, Kotlin - @keywords MapSet.new(~w[ - if else elif elsif unless - for foreach while until do - return break continue yield pass - try except finally rescue ensure after catch throw raise begin end throws - case when switch cond match default fallthrough - with as and or not in is - import from require use using alias namespace package - class def defp defmodule defmacro defmacrop defprotocol defimpl defguard defdelegate - module interface struct enum delegate event protocol extension - function fn func fun new delete typeof instanceof void - var let val const static public private protected internal - sealed override virtual abstract final readonly open - async await receive suspend - self super this Self - extends implements - null undefined nil None nullptr - true false True False - bool int float double long short byte char boolean string decimal object dynamic - ref out params get set value inout - lambda del global nonlocal assert - type typealias - synchronized volatile transient native strictfp - auto register extern signed unsigned typedef sizeof union - template typename operator inline friend explicit mutable constexpr decltype noexcept - func chan go select defer range - mut impl trait pub mod crate dyn unsafe loop where move - echo print array list mixed never - actor init deinit lazy open some any rethrows willSet didSet - then fi done esac local export source unset declare - fun val object data companion reified infix vararg expect actual - ]) - - @spec build_file_context(String.t(), keyword()) :: FileContext.t() - def build_file_context(content, opts \\ []) when is_binary(content) do - stopwords = Keyword.get(opts, :word_stopwords, MapSet.new()) - - tokens = content |> String.split() |> List.to_tuple() - token_list = Tuple.to_list(tokens) - token_counts = Enum.frequencies(token_list) - - words = - Regex.scan(@word_re, content) - |> List.flatten() - |> Enum.reject(&MapSet.member?(stopwords, &1)) - |> List.to_tuple() - - word_list = Tuple.to_list(words) - identifiers = word_list |> Enum.reject(&MapSet.member?(@keywords, &1)) |> List.to_tuple() - lines = content |> String.split("\n") |> trim_trailing_empty() |> List.to_tuple() - encoded = content - - %FileContext{ - content: content, - tokens: tokens, - token_counts: token_counts, - words: words, - identifiers: identifiers, - lines: lines, - encoded: encoded, - byte_count: byte_size(content), - line_count: tuple_size(lines) - } - end - - defp trim_trailing_empty(lines) do - # Match Python's str.splitlines() behavior - case List.last(lines) do - "" -> List.delete_at(lines, -1) - _ -> lines - end - end -end diff --git a/lib/codeqa/stopwords.ex b/lib/codeqa/stopwords.ex deleted file mode 100644 index bd33374..0000000 --- a/lib/codeqa/stopwords.ex +++ /dev/null @@ -1,63 +0,0 @@ -defmodule CodeQA.Stopwords do - @moduledoc "Finds highly frequent items across a codebase to act as stopwords." - - @doc """ - Finds items that appear in more than the specified threshold of files. - `extractor` is a function that takes a file's content and returns an Enumerable of items. - """ - def find_stopwords(files, extractor, opts \\ []) do - threshold_ratio = Keyword.get(opts, :stopwords_threshold, 0.15) - total_docs = map_size(files) - min_docs = max(1, round(total_docs * threshold_ratio)) - workers = Keyword.get(opts, :workers, System.schedulers_online()) - has_progress = Keyword.get(opts, :progress, false) - label = Keyword.get(opts, :progress_label, "") - - counter = :counters.new(1, [:atomics]) - start_time = System.monotonic_time(:millisecond) - - files - |> Task.async_stream( - fn {_path, content} -> - res = - content - |> extractor.() - |> MapSet.new() - - if has_progress do - :counters.add(counter, 1, 1) - completed = :counters.get(counter, 1) - print_progress(completed, total_docs, start_time, label) - end - - res - end, max_concurrency: workers, timeout: :infinity) - |> Enum.reduce(%{}, fn {:ok, unique_items_in_file}, doc_freqs -> - Enum.reduce(unique_items_in_file, doc_freqs, fn item, acc -> - Map.update(acc, item, 1, &(&1 + 1)) - end) - end) - |> Enum.filter(fn {_item, count} -> count >= min_docs end) - |> Enum.map(fn {item, _count} -> item end) - |> MapSet.new() - end - - defp print_progress(completed, total, start_time, label) do - now = System.monotonic_time(:millisecond) - elapsed = max(now - start_time, 1) - avg_time = elapsed / completed - eta_ms = round((total - completed) * avg_time) - - output = - CodeQA.CLI.UI.progress_bar(completed, total, - eta: CodeQA.CLI.UI.format_eta(eta_ms), - label: label - ) - - IO.write(:stderr, "\r" <> output) - - if completed == total do - IO.puts(:stderr, "") - end - end -end diff --git a/lib/codeqa/telemetry.ex b/lib/codeqa/telemetry.ex deleted file mode 100644 index 3f5d22d..0000000 --- a/lib/codeqa/telemetry.ex +++ /dev/null @@ -1,68 +0,0 @@ -defmodule CodeQA.Telemetry do - @moduledoc "Simple concurrent telemetry tracker using ETS." - - def setup do - if :ets.info(:codeqa_telemetry) == :undefined do - :ets.new(:codeqa_telemetry, [:named_table, :public, :set, write_concurrency: true]) - end - - :ok - end - - def time(metric_name, fun) do - if :ets.info(:codeqa_telemetry) != :undefined do - start_time = System.monotonic_time(:microsecond) - result = fun.() - end_time = System.monotonic_time(:microsecond) - duration = end_time - start_time - - :ets.update_counter(:codeqa_telemetry, metric_name, {2, duration}, {metric_name, 0}) - - :ets.update_counter( - :codeqa_telemetry, - "#{metric_name}_count", - {2, 1}, - {"#{metric_name}_count", 0} - ) - - result - else - fun.() - end - end - - defp format_metric_line({name, total_time_us}) do - count = - case :ets.lookup(:codeqa_telemetry, "#{name}_count") do - [{_, c}] -> c - _ -> 1 - end - - total_ms = Float.round(total_time_us / 1000, 2) - avg_ms = Float.round(total_ms / count, 2) - - String.pad_trailing(to_string(name), 30) <> - " | Total: #{String.pad_trailing(to_string(total_ms) <> "ms", 12)}" <> - " | Count: #{String.pad_trailing(to_string(count), 6)}" <> - " | Avg: #{avg_ms}ms" - end - - def print_report do - if :ets.info(:codeqa_telemetry) != :undefined do - IO.puts(:stderr, " ---- Telemetry Report (Wall-clock times) ---") - metrics = :ets.tab2list(:codeqa_telemetry) - - # Group totals and counts - totals = - Enum.filter(metrics, fn {k, _} -> not String.ends_with?(to_string(k), "_count") end) - - totals - |> Enum.sort_by(fn {_, time} -> time end, :desc) - |> Enum.each(&IO.puts(:stderr, format_metric_line(&1))) - - IO.puts(:stderr, "------------------------------------------- -") - end - end -end diff --git a/test/codeqa/collector_test.exs b/test/codeqa/collector_test.exs index 0a2a3f5..f2aeb59 100644 --- a/test/codeqa/collector_test.exs +++ b/test/codeqa/collector_test.exs @@ -1,7 +1,12 @@ defmodule CodeQA.CollectorTest do - use ExUnit.Case, async: true + use ExUnit.Case, async: false - alias CodeQA.Collector + alias CodeQA.Engine.Collector + + setup do + CodeQA.Config.reset() + on_exit(&CodeQA.Config.reset/0) + end describe "ignored?/2" do test "matches simple wildcard pattern" do @@ -93,7 +98,7 @@ defmodule CodeQA.CollectorTest do %{path: "lib/bar.ex", status: "modified"} ] - result = Collector.reject_ignored(items, ["test/*"], & &1.path) + result = Collector.reject_ignored(items, & &1.path, ["test/*"]) assert length(result) == 1 assert hd(result).path == "lib/bar.ex" @@ -101,7 +106,32 @@ defmodule CodeQA.CollectorTest do test "empty patterns returns list unchanged" do items = [%{path: "test/foo.ex"}] - assert Collector.reject_ignored(items, [], & &1.path) == items + assert Collector.reject_ignored(items, & &1.path, []) == items + end + end + + describe "collect_files/2 respects .gitignore" do + setup do + tmp_dir = + Path.join(System.tmp_dir!(), "codeqa_git_collector_#{System.unique_integer([:positive])}") + + File.mkdir_p!(Path.join(tmp_dir, "lib")) + System.cmd("git", ["init"], cd: tmp_dir) + System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp_dir) + System.cmd("git", ["config", "user.name", "Test"], cd: tmp_dir) + File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend") + File.write!(Path.join(tmp_dir, "lib/generated.ex"), "defmodule Gen do\nend") + File.write!(Path.join(tmp_dir, ".gitignore"), "lib/generated.ex\n") + + on_exit(fn -> File.rm_rf!(tmp_dir) end) + + %{tmp_dir: tmp_dir} + end + + test "excludes files listed in .gitignore", %{tmp_dir: tmp_dir} do + files = Collector.collect_files(tmp_dir) + assert Map.has_key?(files, "lib/app.ex") + refute Map.has_key?(files, "lib/generated.ex") end end @@ -125,9 +155,21 @@ defmodule CodeQA.CollectorTest do end test "with ignore patterns excludes matching files", %{tmp_dir: tmp_dir} do - files = Collector.collect_files(tmp_dir, ignore_patterns: ["test/*"]) + files = Collector.collect_files(tmp_dir, ["test/*"]) assert Map.has_key?(files, "lib/app.ex") refute Map.has_key?(files, "test/app_test.exs") end + + test "respects ignore_paths from .codeqa.yml", %{tmp_dir: tmp_dir} do + File.mkdir_p!(Path.join(tmp_dir, "generated")) + File.write!(Path.join(tmp_dir, "generated/schema.ex"), "defmodule Schema do\nend") + File.write!(Path.join(tmp_dir, ".codeqa.yml"), "ignore_paths:\n - generated/**\n") + + CodeQA.Config.load(tmp_dir) + files = Collector.collect_files(tmp_dir) + + assert Map.has_key?(files, "lib/app.ex") + refute Map.has_key?(files, "generated/schema.ex") + end end end diff --git a/test/codeqa/engine/analyzer_test.exs b/test/codeqa/engine/analyzer_test.exs new file mode 100644 index 0000000..ccc4a41 --- /dev/null +++ b/test/codeqa/engine/analyzer_test.exs @@ -0,0 +1,44 @@ +defmodule CodeQA.Engine.AnalyzerTest do + use ExUnit.Case, async: true + + describe "analyze_file/2" do + test "returns a metrics map with group keys" do + content = "defmodule Foo do\n def bar, do: :ok\nend\n" + result = CodeQA.Engine.Analyzer.analyze_file("lib/foo.ex", content) + assert is_map(result) + assert map_size(result) > 0 + # Each value should be a map of metric keys to numbers + Enum.each(result, fn {_group, keys} -> + assert is_map(keys) + end) + end + end + + describe "analyze_codebase_aggregate/2" do + test "returns aggregate map with mean_ keys" do + files = %{ + "lib/a.ex" => "defmodule A do\n def foo, do: :a\nend\n", + "lib/b.ex" => "defmodule B do\n def bar, do: :b\nend\n" + } + + agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files) + assert is_map(agg) + # At least one group should have mean_ keys + Enum.each(agg, fn {_group, keys} -> + Enum.each(keys, fn {key, val} -> + assert String.starts_with?(key, "mean_") or String.starts_with?(key, "std_") or + String.starts_with?(key, "min_") or String.starts_with?(key, "max_") + + assert is_float(val) or is_integer(val) + end) + end) + end + + test "does not run codebase metrics (returns quickly for large input)" do + # Just assert it returns without error for a reasonable input + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: 1\nend\n"} + agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files) + assert is_map(agg) + end + end +end diff --git a/test/codeqa/formatter_test.exs b/test/codeqa/formatter_test.exs index ccefca5..de5be57 100644 --- a/test/codeqa/formatter_test.exs +++ b/test/codeqa/formatter_test.exs @@ -126,6 +126,7 @@ defmodule CodeQA.FormatterTest do test "file changes section reflects metric directions from codebase data" do result = Formatter.format_github(@sample_comparison) + # halstead.mean_volume drops 100/500 = 20% → "decreased"; readability rises 10/65 ≈ 15% → "increased slightly" refute result =~ "File changes — 1 modified — all metrics stable" end diff --git a/test/codeqa/git_test.exs b/test/codeqa/git_test.exs new file mode 100644 index 0000000..2d1ec2f --- /dev/null +++ b/test/codeqa/git_test.exs @@ -0,0 +1,112 @@ +defmodule CodeQA.GitTest do + use ExUnit.Case, async: true + + alias CodeQA.Git + + describe "gitignored_files/2" do + test "returns files that are gitignored" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, ".gitignore"), "*.secret\n") + File.write!(Path.join(repo, "config.secret"), "password=123") + File.write!(Path.join(repo, "app.ex"), "defmodule App do end") + + ignored = Git.gitignored_files(repo, ["config.secret", "app.ex"]) + + assert ignored == MapSet.new(["config.secret"]) + end) + end + + test "returns empty set when no files are gitignored" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, ".gitignore"), "*.secret\n") + File.write!(Path.join(repo, "app.ex"), "defmodule App do end") + + ignored = Git.gitignored_files(repo, ["app.ex"]) + + assert ignored == MapSet.new() + end) + end + + test "handles empty file list" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, ".gitignore"), "*.secret\n") + + ignored = Git.gitignored_files(repo, []) + + assert ignored == MapSet.new() + end) + end + + test "respects nested .gitignore files" do + in_tmp_git_repo(fn repo -> + File.mkdir_p!(Path.join(repo, "subdir")) + File.write!(Path.join(repo, "subdir/.gitignore"), "local.ex\n") + File.write!(Path.join(repo, "subdir/local.ex"), "# local") + File.write!(Path.join(repo, "subdir/other.ex"), "# other") + + ignored = Git.gitignored_files(repo, ["subdir/local.ex", "subdir/other.ex"]) + + assert ignored == MapSet.new(["subdir/local.ex"]) + end) + end + + test "handles more than 1000 paths without ARG_MAX issues" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, ".gitignore"), "ignored.ex\n") + + paths = Enum.map(1..1200, fn i -> "file_#{i}.ex" end) ++ ["ignored.ex"] + + ignored = Git.gitignored_files(repo, paths) + + assert ignored == MapSet.new(["ignored.ex"]) + end) + end + + test "filters files inside a gitignored directory" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, ".gitignore"), "/docs/\n") + + ignored = + Git.gitignored_files(repo, [ + "docs/readme.md", + "docs/guide/intro.md", + "lib/app.ex" + ]) + + assert ignored == MapSet.new(["docs/readme.md", "docs/guide/intro.md"]) + end) + end + + test "filters gitignored-pattern files even when already tracked by git" do + in_tmp_git_repo(fn repo -> + File.mkdir_p!(Path.join(repo, "docs")) + File.mkdir_p!(Path.join(repo, "lib")) + File.write!(Path.join(repo, "docs/readme.md"), "# Docs") + File.write!(Path.join(repo, "lib/app.ex"), "defmodule App do end") + + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + File.write!(Path.join(repo, ".gitignore"), "/docs/\n") + + ignored = Git.gitignored_files(repo, ["docs/readme.md", "lib/app.ex"]) + + assert ignored == MapSet.new(["docs/readme.md"]) + end) + end + end + + defp in_tmp_git_repo(fun) do + tmp = Path.join(System.tmp_dir!(), "codeqa_git_test_#{:rand.uniform(999_999)}") + File.mkdir_p!(tmp) + System.cmd("git", ["init"], cd: tmp) + System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp) + System.cmd("git", ["config", "user.name", "Test"], cd: tmp) + + try do + fun.(tmp) + after + File.rm_rf!(tmp) + end + end +end diff --git a/test/codeqa/metrics/codebase/similarity_test.exs b/test/codeqa/metrics/codebase/similarity_test.exs new file mode 100644 index 0000000..d20dbf1 --- /dev/null +++ b/test/codeqa/metrics/codebase/similarity_test.exs @@ -0,0 +1,79 @@ +defmodule CodeQA.Metrics.Codebase.SimilarityTest do + use ExUnit.Case, async: true + alias CodeQA.Metrics.Codebase.Similarity + + describe "name/0" do + test "returns similarity" do + assert Similarity.name() == "similarity" + end + end + + describe "analyze/2 with fewer than 2 files" do + test "empty codebase returns zero density" do + result = Similarity.analyze(%{}) + assert result["cross_file_density"] == 0.0 + end + + test "single file returns zero density" do + result = Similarity.analyze(%{"a.ex" => "x = 1"}) + assert result["cross_file_density"] == 0.0 + end + + test "fewer than 2 files returns empty ncd_pairs" do + result = Similarity.analyze(%{"a.ex" => "x = 1"}) + assert result["ncd_pairs"] == %{} + end + end + + describe "analyze/2 cross_file_density" do + test "returns a float between 0 and 2" do + files = %{"a.ex" => "def foo, do: 1", "b.ex" => "def bar, do: 2"} + result = Similarity.analyze(files) + assert is_float(result["cross_file_density"]) + assert result["cross_file_density"] >= 0.0 + end + + test "identical files produce higher density than dissimilar files" do + content = String.duplicate("def foo do\n x = 1\nend\n", 20) + identical = %{"a.ex" => content, "b.ex" => content} + dissimilar = %{"a.ex" => content, "b.ex" => String.duplicate("zzz qqq rrr\n", 20)} + + assert Similarity.analyze(identical)["cross_file_density"] > + Similarity.analyze(dissimilar)["cross_file_density"] + end + + test "does not return ncd_pairs key by default" do + files = %{"a.ex" => "x = 1", "b.ex" => "y = 2"} + result = Similarity.analyze(files) + refute Map.has_key?(result, "ncd_pairs") + end + end + + describe "analyze/2 with show_ncd: true" do + test "returns ncd_pairs key" do + files = %{"a.ex" => "x = 1", "b.ex" => "y = 2"} + result = Similarity.analyze(files, show_ncd: true) + assert Map.has_key?(result, "ncd_pairs") + end + + test "identical files have ncd near 0" do + content = String.duplicate("def foo do\n x = 1\nend\n", 10) + files = %{"a.ex" => content, "b.ex" => content} + + result = Similarity.analyze(files, show_ncd: true, ncd_paths: ["a.ex"]) + pairs = result["ncd_pairs"] + + scores = pairs |> Map.values() |> List.flatten() |> Enum.map(& &1["score"]) + assert Enum.all?(scores, &(&1 < 0.2)) + end + + test "ncd_paths restricts which files are compared" do + files = %{"a.ex" => "x = 1", "b.ex" => "y = 2", "c.ex" => "z = 3"} + result = Similarity.analyze(files, show_ncd: true, ncd_paths: ["a.ex"]) + pairs = result["ncd_pairs"] + assert Map.has_key?(pairs, "a.ex") + refute Map.has_key?(pairs, "b.ex") + refute Map.has_key?(pairs, "c.ex") + end + end +end diff --git a/test/codeqa/metrics/branching_test.exs b/test/codeqa/metrics/file/branching_test.exs similarity index 79% rename from test/codeqa/metrics/branching_test.exs rename to test/codeqa/metrics/file/branching_test.exs index d794780..d527a60 100644 --- a/test/codeqa/metrics/branching_test.exs +++ b/test/codeqa/metrics/file/branching_test.exs @@ -1,8 +1,8 @@ -defmodule CodeQA.Metrics.BranchingTest do +defmodule CodeQA.Metrics.File.BranchingTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.Branching - alias CodeQA.Pipeline + alias CodeQA.Metrics.File.Branching + alias CodeQA.Engine.Pipeline defp ctx(code), do: Pipeline.build_file_context(code) defp density(code), do: Branching.analyze(ctx(code))["branching_density"] @@ -27,7 +27,9 @@ defmodule CodeQA.Metrics.BranchingTest do for keyword <- Branching.branching_keywords() |> MapSet.to_list() |> Enum.sort() do test "counts #{keyword} as a branching token" do code = "line_before\n#{unquote(keyword)} condition\nline_after" - assert density(code) > 0.0, "expected '#{unquote(keyword)}' to be counted as a branching token" + + assert density(code) > 0.0, + "expected '#{unquote(keyword)}' to be counted as a branching token" end end end diff --git a/test/codeqa/metrics/function_metrics_test.exs b/test/codeqa/metrics/file/function_metrics_test.exs similarity index 77% rename from test/codeqa/metrics/function_metrics_test.exs rename to test/codeqa/metrics/file/function_metrics_test.exs index caa1f6b..eee4bc2 100644 --- a/test/codeqa/metrics/function_metrics_test.exs +++ b/test/codeqa/metrics/file/function_metrics_test.exs @@ -1,8 +1,8 @@ -defmodule CodeQA.Metrics.FunctionMetricsTest do +defmodule CodeQA.Metrics.File.FunctionMetricsTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.FunctionMetrics - alias CodeQA.Pipeline + alias CodeQA.Metrics.File.FunctionMetrics + alias CodeQA.Engine.Pipeline defp ctx(code), do: Pipeline.build_file_context(code) defp analyze(code), do: FunctionMetrics.analyze(ctx(code)) @@ -50,8 +50,10 @@ defmodule CodeQA.Metrics.FunctionMetricsTest do for keyword <- FunctionMetrics.func_keywords() do test "detects function starting with #{keyword}" do code = "#{unquote(keyword)} my_func(x) {\n return x\n}" - result = FunctionMetrics.analyze(CodeQA.Pipeline.build_file_context(code)) - assert result["avg_function_lines"] > 0, "expected '#{unquote(keyword)}' to be detected as function start" + result = FunctionMetrics.analyze(CodeQA.Engine.Pipeline.build_file_context(code)) + + assert result["avg_function_lines"] > 0, + "expected '#{unquote(keyword)}' to be detected as function start" end end end @@ -60,8 +62,10 @@ defmodule CodeQA.Metrics.FunctionMetricsTest do for modifier <- FunctionMetrics.access_modifiers() do test "detects method starting with #{modifier}" do code = "#{unquote(modifier)} void MyMethod() {\n return;\n}" - result = FunctionMetrics.analyze(CodeQA.Pipeline.build_file_context(code)) - assert result["avg_function_lines"] > 0, "expected '#{unquote(modifier)}' access modifier to trigger method detection" + result = FunctionMetrics.analyze(CodeQA.Engine.Pipeline.build_file_context(code)) + + assert result["avg_function_lines"] > 0, + "expected '#{unquote(modifier)}' access modifier to trigger method detection" end end end From 653e8308281cb82c69233c83ebeee91650e6ff7d Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:25:50 +0100 Subject: [PATCH 03/71] feat(ast): add AST parsing system with lexing, signals, and node classification Introduce a full AST abstraction layer: token lexing (string, whitespace, newline tokens), structural and classification signals, parser with signal stream, node types (function, module, import, doc, etc.), and compound node builder with enrichment. Enables language-agnostic code structure analysis. Co-Authored-By: Claude Sonnet 4.6 --- .../ast/classification/node_classifier.ex | 123 +++++++++ .../ast/classification/node_protocol.ex | 29 ++ .../ast/classification/node_type_detector.ex | 20 ++ lib/codeqa/ast/enrichment/compound_node.ex | 41 +++ .../ast/enrichment/compound_node_builder.ex | 157 +++++++++++ lib/codeqa/ast/enrichment/node.ex | 70 +++++ lib/codeqa/ast/enrichment/node_analyzer.ex | 65 +++++ lib/codeqa/ast/lexing/newline_token.ex | 29 ++ lib/codeqa/ast/lexing/string_token.ex | 49 ++++ lib/codeqa/ast/lexing/token.ex | 45 +++ lib/codeqa/ast/lexing/token_normalizer.ex | 259 ++++++++++++++++++ lib/codeqa/ast/lexing/token_protocol.ex | 59 ++++ lib/codeqa/ast/lexing/whitespace_token.ex | 29 ++ lib/codeqa/ast/nodes/attribute_node.ex | 65 +++++ lib/codeqa/ast/nodes/code_node.ex | 44 +++ lib/codeqa/ast/nodes/doc_node.ex | 44 +++ lib/codeqa/ast/nodes/function_node.ex | 57 ++++ lib/codeqa/ast/nodes/import_node.ex | 45 +++ lib/codeqa/ast/nodes/module_node.ex | 46 ++++ lib/codeqa/ast/nodes/test_node.ex | 45 +++ lib/codeqa/ast/parsing/parser.ex | 233 ++++++++++++++++ lib/codeqa/ast/parsing/signal.ex | 44 +++ lib/codeqa/ast/parsing/signal_registry.ex | 94 +++++++ lib/codeqa/ast/parsing/signal_stream.ex | 54 ++++ .../classification/attribute_signal.ex | 66 +++++ .../classification/comment_density_signal.ex | 61 +++++ .../signals/classification/config_signal.ex | 54 ++++ .../ast/signals/classification/data_signal.ex | 67 +++++ .../ast/signals/classification/doc_signal.ex | 29 ++ .../signals/classification/function_signal.ex | 72 +++++ .../signals/classification/import_signal.ex | 62 +++++ .../signals/classification/module_signal.ex | 71 +++++ .../ast/signals/classification/test_signal.ex | 63 +++++ .../ast/signals/classification/type_signal.ex | 49 ++++ .../structural/access_modifier_signal.ex | 80 ++++++ .../structural/assignment_function_signal.ex | 135 +++++++++ .../signals/structural/blank_line_signal.ex | 45 +++ .../ast/signals/structural/bracket_signal.ex | 51 ++++ .../signals/structural/branch_split_signal.ex | 58 ++++ .../signals/structural/colon_indent_signal.ex | 83 ++++++ .../structural/comment_divider_signal.ex | 76 +++++ .../signals/structural/decorator_signal.ex | 81 ++++++ .../structural/dedent_to_zero_signal.ex | 87 ++++++ .../structural/doc_comment_lead_signal.ex | 65 +++++ .../ast/signals/structural/keyword_signal.ex | 83 ++++++ .../signals/structural/sql_block_signal.ex | 55 ++++ .../signals/structural/triple_quote_signal.ex | 31 +++ test/support/counter_signal.ex | 19 ++ test/support/node_matcher.ex | 17 ++ 49 files changed, 3306 insertions(+) create mode 100644 lib/codeqa/ast/classification/node_classifier.ex create mode 100644 lib/codeqa/ast/classification/node_protocol.ex create mode 100644 lib/codeqa/ast/classification/node_type_detector.ex create mode 100644 lib/codeqa/ast/enrichment/compound_node.ex create mode 100644 lib/codeqa/ast/enrichment/compound_node_builder.ex create mode 100644 lib/codeqa/ast/enrichment/node.ex create mode 100644 lib/codeqa/ast/enrichment/node_analyzer.ex create mode 100644 lib/codeqa/ast/lexing/newline_token.ex create mode 100644 lib/codeqa/ast/lexing/string_token.ex create mode 100644 lib/codeqa/ast/lexing/token.ex create mode 100644 lib/codeqa/ast/lexing/token_normalizer.ex create mode 100644 lib/codeqa/ast/lexing/token_protocol.ex create mode 100644 lib/codeqa/ast/lexing/whitespace_token.ex create mode 100644 lib/codeqa/ast/nodes/attribute_node.ex create mode 100644 lib/codeqa/ast/nodes/code_node.ex create mode 100644 lib/codeqa/ast/nodes/doc_node.ex create mode 100644 lib/codeqa/ast/nodes/function_node.ex create mode 100644 lib/codeqa/ast/nodes/import_node.ex create mode 100644 lib/codeqa/ast/nodes/module_node.ex create mode 100644 lib/codeqa/ast/nodes/test_node.ex create mode 100644 lib/codeqa/ast/parsing/parser.ex create mode 100644 lib/codeqa/ast/parsing/signal.ex create mode 100644 lib/codeqa/ast/parsing/signal_registry.ex create mode 100644 lib/codeqa/ast/parsing/signal_stream.ex create mode 100644 lib/codeqa/ast/signals/classification/attribute_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/comment_density_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/config_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/data_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/doc_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/function_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/import_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/module_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/test_signal.ex create mode 100644 lib/codeqa/ast/signals/classification/type_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/access_modifier_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/assignment_function_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/blank_line_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/bracket_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/branch_split_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/colon_indent_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/comment_divider_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/decorator_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/keyword_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/sql_block_signal.ex create mode 100644 lib/codeqa/ast/signals/structural/triple_quote_signal.ex create mode 100644 test/support/counter_signal.ex create mode 100644 test/support/node_matcher.ex diff --git a/lib/codeqa/ast/classification/node_classifier.ex b/lib/codeqa/ast/classification/node_classifier.ex new file mode 100644 index 0000000..57c559d --- /dev/null +++ b/lib/codeqa/ast/classification/node_classifier.ex @@ -0,0 +1,123 @@ +defmodule CodeQA.AST.Classification.NodeClassifier do + @moduledoc """ + Classifies a Node into a typed struct by running classification signals + over its tokens and weighing their votes. + + ## How it works + + Six classification signals scan the node's token stream in parallel via + `SignalStream`. Each signal emits weighted votes (e.g. `{:function_vote, 3}`) + when it detects a pattern indicating a node type. The classifier sums weights + per type and picks the winner. Ties and no-votes fall back to `:code`. + + ## Signals and votes + + | Signal | Vote key | Patterns detected | + |---|---|---| + | `DocSignal` | `:doc_vote` | `` token anywhere | + | `AttributeSignal` | `:attribute_vote` | `@name` at indent 0 | + | `FunctionSignal` | `:function_vote` | `def`, `func`, `fn`, etc. at indent 0 | + | `ModuleSignal` | `:module_vote` | `defmodule`, `class`, `module`, etc. at indent 0 | + | `ImportSignal` | `:import_vote` | `import`, `use`, `alias`, etc. at indent 0 | + | `TestSignal` | `:test_vote` | `test`, `describe`, `it`, etc. at indent 0 | + + ## Weights + + Weight 3 = first keyword seen (strong match); weight 1 = keyword later in + block (weak match, e.g. after a leading comment). `DocSignal` always emits + weight 3 and wins when a `` token is present, since triple-quoted strings + are unambiguous. + + ## Type-specific fields + + `FunctionNode.name/arity/visibility`, `ModuleNode.name/kind`, etc. all default + to `nil`. Population of those fields is left to a future enrichment pass. + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Parsing.SignalStream + + alias CodeQA.AST.Signals.Classification.{ + DocSignal, + AttributeSignal, + FunctionSignal, + ModuleSignal, + ImportSignal, + TestSignal + } + + alias CodeQA.AST.Nodes.{ + CodeNode, + DocNode, + FunctionNode, + ModuleNode, + ImportNode, + AttributeNode, + TestNode + } + + @classification_signals [ + %DocSignal{}, + %AttributeSignal{}, + %FunctionSignal{}, + %ModuleSignal{}, + %ImportSignal{}, + %TestSignal{} + ] + + @type_modules %{ + doc: DocNode, + attribute: AttributeNode, + function: FunctionNode, + module: ModuleNode, + import: ImportNode, + test: TestNode, + code: CodeNode + } + + @doc """ + Classify a Node into the most specific typed struct. + + Runs classification signals, weighs votes, and delegates to the winning + struct's `cast/1` to build the result. Type-specific fields default to nil. + """ + @spec classify(Node.t(), module()) :: term() + def classify(%Node{} = node, lang_mod) do + type = vote(node.tokens, lang_mod) + @type_modules[type].cast(node) + end + + defp vote(tokens, lang_mod) do + tokens + |> run_signals(lang_mod) + |> tally() + |> winner() + end + + defp run_signals(tokens, lang_mod) do + SignalStream.run(tokens, @classification_signals, lang_mod) + |> List.flatten() + |> Enum.filter(fn {_src, group, _name, _val} -> group == :classification end) + end + + defp tally(emissions) do + Enum.reduce(emissions, %{}, fn {_src, _grp, name, weight}, acc -> + Map.update(acc, name, weight, &(&1 + weight)) + end) + end + + defp winner(votes) when map_size(votes) == 0, do: :code + + defp winner(votes) do + {vote_name, _weight} = Enum.max_by(votes, fn {_, w} -> w end) + vote_to_type(vote_name) + end + + defp vote_to_type(:doc_vote), do: :doc + defp vote_to_type(:attribute_vote), do: :attribute + defp vote_to_type(:function_vote), do: :function + defp vote_to_type(:module_vote), do: :module + defp vote_to_type(:import_vote), do: :import + defp vote_to_type(:test_vote), do: :test + defp vote_to_type(_), do: :code +end diff --git a/lib/codeqa/ast/classification/node_protocol.ex b/lib/codeqa/ast/classification/node_protocol.ex new file mode 100644 index 0000000..fa4943d --- /dev/null +++ b/lib/codeqa/ast/classification/node_protocol.ex @@ -0,0 +1,29 @@ +defprotocol CodeQA.AST.Classification.NodeProtocol do + @moduledoc """ + Common interface for all typed AST node structs. + + All node struct types (CodeNode, DocNode, FunctionNode, etc.) implement this + protocol, allowing downstream code to work with any node type uniformly. + """ + + @spec tokens(t()) :: [term()] + def tokens(node) + + @spec flat_tokens(t()) :: [term()] + def flat_tokens(node) + + @spec line_count(t()) :: non_neg_integer() + def line_count(node) + + @spec children(t()) :: [term()] + def children(node) + + @spec start_line(t()) :: non_neg_integer() | nil + def start_line(node) + + @spec end_line(t()) :: non_neg_integer() | nil + def end_line(node) + + @spec label(t()) :: term() | nil + def label(node) +end diff --git a/lib/codeqa/ast/classification/node_type_detector.ex b/lib/codeqa/ast/classification/node_type_detector.ex new file mode 100644 index 0000000..6175968 --- /dev/null +++ b/lib/codeqa/ast/classification/node_type_detector.ex @@ -0,0 +1,20 @@ +defmodule CodeQA.AST.Classification.NodeTypeDetector do + @moduledoc """ + Classifies a list of raw `Node` structs (from `Parser`) into typed structs. + + Each node is classified by `NodeClassifier`, which runs classification signals + over the node's tokens and picks the highest-voted type. See `NodeClassifier` + for the full list of signals and their weights. + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Classification.NodeClassifier + + @doc """ + Classify each node in the list into the most specific typed struct. + """ + @spec detect_types([Node.t()], module()) :: [term()] + def detect_types(blocks, lang_mod) do + Enum.map(blocks, &NodeClassifier.classify(&1, lang_mod)) + end +end diff --git a/lib/codeqa/ast/enrichment/compound_node.ex b/lib/codeqa/ast/enrichment/compound_node.ex new file mode 100644 index 0000000..88a594c --- /dev/null +++ b/lib/codeqa/ast/enrichment/compound_node.ex @@ -0,0 +1,41 @@ +defmodule CodeQA.AST.Enrichment.CompoundNode do + @moduledoc """ + Groups semantically related typed nodes together. + + A compound node represents a complete "unit" in source code — combining + documentation, type annotations, and implementation: + + - `docs` — [DocNode.t()] (triple-quoted docstrings) + - `typespecs` — [AttributeNode.t()] (@spec, @type, etc.) + - `code` — [Node.t()] with type :code (implementation clauses) + + Boundaries span all constituent nodes in source order (docs → typespecs → + code), with leading/trailing whitespace tokens stripped. Column values are + read from the `col` field of the relevant Token structs — Node has no col + fields. + + A bare code node with no preceding docs/typespecs is still wrapped in a + CompoundNode (with empty `docs` and `typespecs`). + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Nodes.AttributeNode + + defstruct docs: [], + typespecs: [], + code: [], + start_line: nil, + start_col: nil, + end_line: nil, + end_col: nil + + @type t :: %__MODULE__{ + docs: [Node.t()], + typespecs: [AttributeNode.t()], + code: [Node.t()], + start_line: non_neg_integer() | nil, + start_col: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + end_col: non_neg_integer() | nil + } +end diff --git a/lib/codeqa/ast/enrichment/compound_node_builder.ex b/lib/codeqa/ast/enrichment/compound_node_builder.ex new file mode 100644 index 0000000..f446e72 --- /dev/null +++ b/lib/codeqa/ast/enrichment/compound_node_builder.ex @@ -0,0 +1,157 @@ +defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do + @moduledoc """ + Groups typed nodes into CompoundNode structs. + + A new compound starts when: + 1. A :doc or :typespec node appears after at least one :code node + 2. The trailing whitespace of the previous node contains 2+ tokens + + All consecutive :code nodes with no boundary between them accumulate + into the same compound's `code` list. + + Sub-blocks of :code nodes that have type :doc or :typespec are + promoted to the compound's `docs`/`typespecs` lists. + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Enrichment.CompoundNode + alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Nodes.{DocNode, AttributeNode} + alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + + @doc """ + Groups a list of typed nodes into CompoundNode structs. + """ + @spec build([Node.t()]) :: [CompoundNode.t()] + def build([]), do: [] + + def build(blocks) do + # Accumulator: {current_compound, prev_trailing_ws, finalized_compounds} + # prev_trailing_ws carries the trailing / tokens stripped from the + # PREVIOUS node. Blank-line boundaries are detected on the previous node's + # trailing whitespace — BlankLineRule places blank-line tokens at the + # END of the node that precedes the split, not at the start of the new one. + {current, _, compounds} = + Enum.reduce(blocks, {empty_compound(), [], []}, fn block, + {current, prev_trailing_ws, acc} -> + {content_tokens, trailing_ws} = split_trailing_whitespace(block.tokens) + clean_block = %{block | tokens: content_tokens} + # Check the PREVIOUS node's trailing whitespace for blank-line boundary + blank_boundary = blank_line_boundary?(prev_trailing_ws) + + cond do + # Rule 1: doc/typespec after code → flush and start new compound + (is_struct(block, DocNode) or is_struct(block, AttributeNode)) and current.code != [] -> + {start_compound(clean_block), trailing_ws, [finalize(current) | acc]} + + # Rule 2: blank-line boundary on previous node → flush and start fresh + blank_boundary and not empty_compound?(current) -> + {start_compound(clean_block), trailing_ws, [finalize(current) | acc]} + + # No boundary — accumulate into current + true -> + {add_block(current, clean_block), trailing_ws, acc} + end + end) + + compounds + |> then(fn acc -> + if empty_compound?(current), do: acc, else: [finalize(current) | acc] + end) + |> Enum.reverse() + end + + defp empty_compound, do: %CompoundNode{} + + defp empty_compound?(%CompoundNode{docs: [], typespecs: [], code: []}), do: true + defp empty_compound?(_), do: false + + defp add_block(%CompoundNode{} = compound, block) when is_struct(block, DocNode) do + %CompoundNode{compound | docs: compound.docs ++ [block]} + end + + defp add_block(%CompoundNode{} = compound, block) when is_struct(block, AttributeNode) do + %CompoundNode{compound | typespecs: compound.typespecs ++ [block]} + end + + defp add_block(%CompoundNode{} = compound, block) do + {promoted_docs, promoted_specs, clean_children} = promote_sub_blocks(block.children) + clean_block = %{block | children: clean_children} + + %CompoundNode{ + compound + | code: compound.code ++ [clean_block], + docs: compound.docs ++ promoted_docs, + typespecs: compound.typespecs ++ promoted_specs + } + end + + defp start_compound(new_block) do + add_block(empty_compound(), new_block) + end + + # Separates children by type — :doc/:typespec go up to the compound level. + defp promote_sub_blocks(children) do + Enum.reduce(children, {[], [], []}, fn sub, {docs, specs, code} -> + case sub.type do + :doc -> {docs ++ [sub], specs, code} + :typespec -> {docs, specs ++ [sub], code} + _ -> {docs, specs, code ++ [sub]} + end + end) + end + + # Strips trailing / tokens from a node's token list. + # Returns {content_tokens, trailing_ws_tokens}. + defp split_trailing_whitespace(tokens) do + last_content_idx = + tokens + |> Enum.with_index() + |> Enum.reverse() + |> Enum.find_index(fn {t, _} -> + not is_map(t) or t.kind not in [WhitespaceToken.kind(), NewlineToken.kind()] + end) + + case last_content_idx do + nil -> + {[], tokens} + + rev_idx -> + content_len = length(tokens) - rev_idx + {Enum.slice(tokens, 0, content_len), Enum.slice(tokens, content_len..-1//1)} + end + end + + # A blank-line boundary exists when the trailing whitespace contains 3+ tokens + # (i.e. 2+ blank lines). A single blank line (2 NLs: end-of-line + blank line) is + # common within a compound (e.g. between function clauses) and does not split. + defp blank_line_boundary?(trailing_ws) do + Enum.count(trailing_ws, &(&1.kind == NewlineToken.kind())) >= 3 + end + + # Computes boundaries from all constituent nodes in source order: + # docs → typespecs → code. Reads col directly from Token structs. + defp finalize(%CompoundNode{} = compound) do + all_blocks = compound.docs ++ compound.typespecs ++ compound.code + all_tokens = Enum.flat_map(all_blocks, &NodeProtocol.flat_tokens/1) + + first_token = + Enum.find( + all_tokens, + &(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()]) + ) + + last_token = + all_tokens + |> Enum.reverse() + |> Enum.find(&(is_map(&1) and &1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])) + + %CompoundNode{ + compound + | start_line: first_token && first_token.line, + start_col: first_token && first_token.col, + end_line: last_token && last_token.line, + end_col: last_token && last_token.col + } + end +end diff --git a/lib/codeqa/ast/enrichment/node.ex b/lib/codeqa/ast/enrichment/node.ex new file mode 100644 index 0000000..65e4b23 --- /dev/null +++ b/lib/codeqa/ast/enrichment/node.ex @@ -0,0 +1,70 @@ +defmodule CodeQA.AST.Enrichment.Node do + @moduledoc """ + A detected code node with optional nested sub-blocks. + + ## Fields + + - `tokens` — aggregated code content: for leaf nodes, the original token stream; + for non-leaf nodes, the flat concatenation of all children's `tokens`. + Use this for content comparison and metrics. + - `line_count` — number of source lines spanned by this node: `end_line - start_line + 1` + when both are available, else `1`. + - `children` — nested `Node.t()` structs detected by enclosure rules + (`BracketRule`, `ColonIndentationRule`). + - `label` — arbitrary term attached by the caller. Set to `"path:start_line"` + by `NearDuplicateBlocks.analyze/2` for human-readable pair reporting. + - `start_line` — 1-based line number of the first token in this node, populated by + `Parser` from `List.first(tokens).line`. + - `end_line` — 1-based line number of the last token in this node, populated by + `Parser` from `List.last(tokens).line`. + + `start_line` and `end_line` may be `nil` for synthetic nodes created in tests + without line metadata. + """ + + @enforce_keys [:tokens, :line_count, :children] + defstruct [ + :tokens, + :line_count, + :children, + :label, + :start_line, + :end_line, + type: :code + ] + + @type t :: %__MODULE__{ + tokens: [CodeQA.AST.Lexing.Token.t()], + line_count: non_neg_integer(), + children: [term()], + label: term() | nil, + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + type: :code | :doc | :typespec + } + + @spec children_count(t()) :: non_neg_integer() + def children_count(%__MODULE__{children: ch}), do: length(ch) + + # Keep old name as deprecated alias during transition + @spec sub_block_count(t()) :: non_neg_integer() + def sub_block_count(%__MODULE__{children: ch}), do: length(ch) + + @spec token_count(t()) :: non_neg_integer() + def token_count(%__MODULE__{tokens: tokens}), do: length(tokens) +end + +defimpl CodeQA.AST.Classification.NodeProtocol, for: CodeQA.AST.Enrichment.Node do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end +end diff --git a/lib/codeqa/ast/enrichment/node_analyzer.ex b/lib/codeqa/ast/enrichment/node_analyzer.ex new file mode 100644 index 0000000..d506d3d --- /dev/null +++ b/lib/codeqa/ast/enrichment/node_analyzer.ex @@ -0,0 +1,65 @@ +defmodule CodeQA.AST.Enrichment.NodeAnalyzer do + @moduledoc """ + Extracts locally bound variable names from a token list. + + Used by the domain tagger to subtract local bindings from the domain signal — + a variable bound within a node (e.g. `user = Repo.get!(id)`) is not a domain + reference and should not appear in the node's domain fingerprint. + """ + + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.NewlineToken + + @doc """ + Returns a MapSet of lowercase identifier names that are locally bound + within the given token list. + + Detected patterns: + - ` "="` — simple assignment (guards against `==`, `=>`, `=~`, `!=`, `<=`, `>=`) + - ` "<-"` — with/for binding (all `` tokens on the LHS of `<-`) + + Function parameters are NOT extracted here (see `param_variables/1`). + """ + @spec bound_variables([Token.t()]) :: MapSet.t(String.t()) + def bound_variables(tokens) do + MapSet.union( + assignment_bindings(tokens), + arrow_bindings(tokens) + ) + end + + # Collect `` immediately before `=` + defp assignment_bindings(tokens) do + tokens + |> Enum.chunk_every(2, 1, :discard) + |> Enum.flat_map(fn + [%Token{kind: "", content: name}, %Token{kind: "="}] -> + [String.downcase(name)] + + _ -> + [] + end) + |> MapSet.new() + end + + # Collect all `` tokens on the LHS of `<-` (within the same line). + # Resets the accumulator on `` so RHS tokens from prior lines don't leak. + defp arrow_bindings(tokens) do + tokens + |> Enum.reduce({[], MapSet.new()}, fn + %Token{kind: "<-"}, {lhs_ids, acc} -> + new_bindings = lhs_ids |> Enum.map(&String.downcase/1) |> MapSet.new() + {[], MapSet.union(acc, new_bindings)} + + %NewlineToken{}, {_, acc} -> + {[], acc} + + %Token{kind: "", content: name}, {lhs_ids, acc} -> + {[name | lhs_ids], acc} + + _, {lhs_ids, acc} -> + {lhs_ids, acc} + end) + |> elem(1) + end +end diff --git a/lib/codeqa/ast/lexing/newline_token.ex b/lib/codeqa/ast/lexing/newline_token.ex new file mode 100644 index 0000000..2ccb712 --- /dev/null +++ b/lib/codeqa/ast/lexing/newline_token.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.AST.Lexing.NewlineToken do + @moduledoc """ + A newline token emitted by `TokenNormalizer.normalize_structural/1`. + + Represents a `\\n` line boundary between two source lines. + + ## Fields + + - `kind` — always `""`. + - `content` — the original newline character (`"\\n"`). + - `line` — 1-based line number of the line that ends here. + - `col` — 0-based byte offset of the newline within that line. + """ + + @kind "" + + defstruct [:content, :line, :col, kind: @kind] + + @doc "Returns the normalized kind string for newline tokens." + @spec kind() :: String.t() + def kind, do: @kind + + @type t :: %__MODULE__{ + kind: String.t(), + content: String.t(), + line: non_neg_integer() | nil, + col: non_neg_integer() | nil + } +end diff --git a/lib/codeqa/ast/lexing/string_token.ex b/lib/codeqa/ast/lexing/string_token.ex new file mode 100644 index 0000000..04fc296 --- /dev/null +++ b/lib/codeqa/ast/lexing/string_token.ex @@ -0,0 +1,49 @@ +defmodule CodeQA.AST.Lexing.StringToken do + @moduledoc """ + A string token emitted by `TokenNormalizer` for all string literals, + including triple-quoted heredocs. + + ## Fields + + - `kind` — `""` for single-line strings, `""` for + triple-quoted heredoc delimiters. + - `content` — original source text (the full quoted literal or delimiter). + - `line`, `col` — source location. + - `interpolations` — list of interpolation expressions (`nil` for plain strings). + - `multiline` — `true` for triple-quoted (`\"\"\"` / `'''`) tokens. + - `quotes` — `:double`, `:single`, or `:backtick`. + """ + + @kind "" + @doc_kind "" + + defstruct [ + :content, + :line, + :col, + kind: @kind, + interpolations: nil, + multiline: false, + quotes: :double + ] + + @doc "Returns the normalized kind string for single-line string tokens." + @spec kind() :: String.t() + def kind, do: @kind + + @doc "Returns the normalized kind string for triple-quoted doc string tokens." + @spec doc_kind() :: String.t() + def doc_kind, do: @doc_kind + + @type quotes :: :double | :single | :backtick + + @type t :: %__MODULE__{ + content: String.t(), + line: non_neg_integer() | nil, + col: non_neg_integer() | nil, + kind: String.t(), + interpolations: [String.t()] | nil, + multiline: boolean(), + quotes: quotes() + } +end diff --git a/lib/codeqa/ast/lexing/token.ex b/lib/codeqa/ast/lexing/token.ex new file mode 100644 index 0000000..f705f7e --- /dev/null +++ b/lib/codeqa/ast/lexing/token.ex @@ -0,0 +1,45 @@ +defmodule CodeQA.AST.Lexing.Token do + @moduledoc """ + A single token emitted by `TokenNormalizer.normalize_structural/1`. + + ## Fields + + - `value` — normalized form used for structural comparison: ``, ``, + ``, ``, ``, or the literal character(s) for + punctuation and operators. + - `content` — original source text before normalization. Identical to `value` + for punctuation/structural tokens; differs for identifiers, + strings, and numbers. Enables source reconstruction and is the + correct field to check when matching declaration keywords. + - `line` — 1-based line number in the source file. + - `col` — 0-based byte offset from the start of the line. + + String literals are emitted as `StringToken` structs, not `Token`, so that + the `interpolations` field does not pollute the common token shape. + + ## Design notes (from tree-sitter, ctags, lizard) + + - **value vs content split** — mirrors tree-sitter's distinction between a + node's `type` (structural kind) and its `text` (original source). `value` + is the kind used for pattern matching and comparison; `content` is the + original text used for reporting and reconstruction. + - **Normalization lives in value, not content** — `content` is never modified. + This means two tokens with different `content` but the same `value` (e.g. + `"foo"` and `"bar"` both normalizing to ``) are structurally equivalent + for duplicate detection but distinguishable for reporting. + - **Line + col for precise location** — ctags records line numbers; tree-sitter + records byte ranges. We store both line (for human-readable reporting) and + col (for IDE navigation and sub-block start/end precision). + - **No enforcement on line/col** — synthetic tokens created in tests may omit + line/col. Consumers that need location data should guard for nil. + """ + + defstruct [:kind, :content, :line, :col] + + @type t :: %__MODULE__{ + kind: String.t(), + content: String.t(), + line: non_neg_integer() | nil, + col: non_neg_integer() | nil + } +end diff --git a/lib/codeqa/ast/lexing/token_normalizer.ex b/lib/codeqa/ast/lexing/token_normalizer.ex new file mode 100644 index 0000000..d2f0016 --- /dev/null +++ b/lib/codeqa/ast/lexing/token_normalizer.ex @@ -0,0 +1,259 @@ +defmodule CodeQA.AST.Lexing.TokenNormalizer do + @moduledoc """ + Abstracts raw source code into language-agnostic structural tokens. + + See [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis). + """ + + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @doc """ + Normalizes source code into language-agnostic structural tokens, preserving + newlines as `` and leading whitespace as `` tokens (one per + 2-space / 1-tab indentation unit). + + Returns `[Token.t()]` where each token carries its normalized `value`, + original source `content`, 1-based `line` number, and 0-based `col` offset. + Used for structural block detection. + """ + @spec normalize_structural(String.t()) :: [Token.t()] + def normalize_structural(code) do + code = String.replace(code, ~r/[^\x00-\x7F]/, " ") + lines = String.split(code, "\n") + last_idx = length(lines) - 1 + + lines + |> Enum.with_index() + |> Enum.flat_map(fn {line, idx} -> + line_num = idx + 1 + {tokens, last_token} = tokenize_line(line, line_num) + + if idx < last_idx do + # last_token is tracked during scanning — O(1) vs List.last/1 which is O(N). + nl_col = + case last_token do + nil -> 0 + t -> t.col + String.length(t.content) + end + + tokens ++ [%NewlineToken{content: "\n", line: line_num, col: nl_col}] + else + tokens + end + end) + end + + # Returns {tokens, last_token} where last_token is the final token on the line + # (or nil for an empty line), allowing normalize_structural to compute nl_col + # in O(1) without calling List.last/1. + defp tokenize_line(line, line_num) do + indent_chars = + line + |> String.graphemes() + |> Enum.take_while(&(&1 in [" ", "\t"])) + + indent_units = + indent_chars + |> Enum.reduce(0, fn + "\t", acc -> acc + 2 + " ", acc -> acc + 1 + end) + |> div(2) + + indent_col_width = length(indent_chars) + + ws_tokens = + for i <- 1..indent_units//1 do + %WhitespaceToken{content: " ", line: line_num, col: (i - 1) * 2} + end + + content = String.slice(line, indent_col_width..-1//1) + {content_tokens, last_content} = scan_content(content, line_num, indent_col_width) + + # Last token on the line: prefer the last content token; fall back to the + # last WS token (only possible when the content portion is empty). + last_token = last_content || List.last(ws_tokens) + + {ws_tokens ++ content_tokens, last_token} + end + + # Multi-char operators matched longest-first so that e.g. `===` beats `==`. + # Tagged `:literal` so `next_token` uses the matched text as both value and content + # (unlike ``, ``, `` which normalise content away). + @operator_regex ~r/^(?:===|!==|<=>|==|!=|<=|>=|\|>|<>|<-|->|=>|=~|!~|&&|\|\||\?\?|\?\.|:=|::|\.\.\.|\.\.|--|\+\+|\*\*|\/\/|\+=|-=|\*=|\/=|%=)/ + + # --- Individual rule atoms so dispatch groups can reference them directly --- + @skip_rule {:skip, ~r/^\s+/} + @operator_rule {:literal, @operator_regex} + @trip_quotes_rule {"", ~r/^"""|^'''/} + @str_interp_rule {"", ~r/^"(?=[^"]*#\{)(?:[^"\\#]|\\.|#(?!\{)|#\{[^}]*\})*"/} + @str_dollar_interp_rule {"", + ~r/^"(?=[^"]*\$\{)(?:[^"\\$]|\\.|\\$(?!\{)|\$\{[^}]*\})*"/} + @str_swift_interp_rule {"", ~r/^"(?=[^"]*\\\()(?:[^"\\]|\\.)*"/} + @str_rule {"", ~r/^"(?:[^"\\]|\\.)*"|^'(?:[^'\\]|\\.)*'/} + @backtick_interp_rule {"", + ~r/^`(?=[^`]*\$\{)(?:[^`\\$]|\\.|\\$(?!\{)|\$\{[^}]*\})*`/} + @backtick_str_rule {"", ~r/^`(?:[^`\\]|\\.)*`/} + @num_rule {"", ~r/^\d+(?:\.\d+)?/} + @id_rule {"", ~r/^[a-zA-Z_]\w*/} + + # Dispatch rule subsets by first character so the common cases (identifiers, + # numbers, whitespace, operators) skip irrelevant regex attempts entirely. + @double_quote_rules [ + @trip_quotes_rule, + @str_interp_rule, + @str_dollar_interp_rule, + @str_swift_interp_rule, + @str_rule + ] + @single_quote_rules [@trip_quotes_rule, @str_rule] + @backtick_rules [@backtick_interp_rule, @backtick_str_rule] + + # Returns the rule subset for the given first byte (ASCII codepoint). + defp dispatch_rules(?"), do: @double_quote_rules + defp dispatch_rules(?'), do: @single_quote_rules + defp dispatch_rules(?`), do: @backtick_rules + defp dispatch_rules(c) when c >= ?0 and c <= ?9, do: [@num_rule] + + defp dispatch_rules(c) + when (c >= ?a and c <= ?z) or (c >= ?A and c <= ?Z) or c == ?_, + do: [@id_rule] + + defp dispatch_rules(c) + when c in [?=, ?!, ?<, ?>, ?|, ?&, ??, ?:, ?., ?-, ?+, ?*, ?/, ?%], + do: [@operator_rule] + + defp dispatch_rules(c) when c <= 32, do: [@skip_rule] + + # Unknown first char — no rule applies; caller falls through to single-char token. + defp dispatch_rules(_), do: [] + + # Returns {tokens, last_token_or_nil} — last_token is tracked during scanning + # so callers get O(1) access to the final token without List.last/1. + defp scan_content(text, line_num, col_offset) do + {reversed, last} = do_scan(text, line_num, col_offset, [], nil) + {Enum.reverse(reversed), last} + end + + defp do_scan("", _line, _col, acc, last), do: {acc, last} + + defp do_scan(<> = text, line, col, acc, last) do + case next_token(first, text, line, col) do + {:skip, rest, advance} -> do_scan(rest, line, col + advance, acc, last) + {token, rest, advance} -> do_scan(rest, line, col + advance, [token | acc], token) + end + end + + # next_token/4: dispatches on the first byte to select only candidate rules, + # avoiding regex attempts for rules whose first-char pattern can't possibly match. + defp next_token(first, text, line, col) do + rules = dispatch_rules(first) + + result = + Enum.find_value(rules, fn {type, regex} -> + case Regex.run(regex, text) do + [m | _] -> {type, m} + nil -> nil + end + end) + + case result do + {:skip, m} -> + len = String.length(m) + {:skip, String.slice(text, len..-1//1), len} + + {:literal, m} -> + len = String.length(m) + {%Token{kind: m, content: m, line: line, col: col}, String.slice(text, len..-1//1), len} + + {value, m} -> + len = String.length(m) + token = postprocess(value, %Token{kind: value, content: m, line: line, col: col}) + {token, String.slice(text, len..-1//1), len} + + nil -> + # No rule matched — emit the first character as a literal single-char token. + char = String.first(text) + {%Token{kind: char, content: char, line: line, col: col}, String.slice(text, 1..-1//1), 1} + end + end + + # Extract #{...} interpolation expressions into `interpolations` and strip + # them from `content` so downstream consumers see only the static string parts. + # Nested braces (e.g. #{foo(%{a: 1})}) are left as-is in content — the + # lookahead in the scan rule ensures a match only when simple interpolations + # are present. + defp postprocess("", token), + do: extract_interpolations(token, ~r/#\{([^}]*)\}/, ~r/#\{[^}]*\}/, quotes: :double) + + defp postprocess("", token), + do: extract_interpolations(token, ~r/\$\{([^}]*)\}/, ~r/\$\{[^}]*\}/, quotes: :double) + + defp postprocess("", token), + do: extract_interpolations(token, ~r/\\\(([^)]*)\)/, ~r/\\\([^)]*\)/, quotes: :double) + + defp postprocess("", token), + do: extract_interpolations(token, ~r/\$\{([^}]*)\}/, ~r/\$\{[^}]*\}/, quotes: :backtick) + + defp postprocess("", %Token{content: ~s(""")} = token), + do: %StringToken{ + kind: StringToken.doc_kind(), + content: token.content, + line: token.line, + col: token.col, + multiline: true, + quotes: :double + } + + defp postprocess("", token), + do: %StringToken{ + kind: StringToken.doc_kind(), + content: token.content, + line: token.line, + col: token.col, + multiline: true, + quotes: :single + } + + defp postprocess("", token), + do: %StringToken{ + kind: StringToken.kind(), + content: token.content, + line: token.line, + col: token.col, + quotes: :backtick + } + + defp postprocess("", token) do + quotes = if String.starts_with?(token.content, "\""), do: :double, else: :single + + %StringToken{ + kind: StringToken.kind(), + content: token.content, + line: token.line, + col: token.col, + quotes: quotes + } + end + + defp postprocess(_value, token), do: token + + defp extract_interpolations(token, capture_regex, strip_regex, opts) do + quotes = Keyword.get(opts, :quotes, :double) + + interpolations = + Regex.scan(capture_regex, token.content, capture: :all_but_first) + |> Enum.map(fn [expr] -> String.trim(expr) end) + + %StringToken{ + content: String.replace(token.content, strip_regex, ""), + line: token.line, + col: token.col, + interpolations: interpolations, + quotes: quotes + } + end +end diff --git a/lib/codeqa/ast/lexing/token_protocol.ex b/lib/codeqa/ast/lexing/token_protocol.ex new file mode 100644 index 0000000..e38458d --- /dev/null +++ b/lib/codeqa/ast/lexing/token_protocol.ex @@ -0,0 +1,59 @@ +defprotocol CodeQA.AST.Lexing.TokenProtocol do + @moduledoc """ + Protocol for token structs emitted by `TokenNormalizer`. + + Both `Token` and `StringToken` implement this protocol, so code that + processes token streams does not need to branch on the concrete struct type. + + ## Functions + + - `kind/1` — normalized structural kind (``, ``, ``, …) + - `content/1` — original source text before normalization + - `line/1` — 1-based line number in the source file (`nil` for synthetic tokens) + - `col/1` — 0-based byte offset from the start of the line (`nil` for synthetic tokens) + """ + + @doc "Returns the normalized structural kind of the token." + @spec kind(t) :: String.t() + def kind(token) + + @doc "Returns the original source text of the token." + @spec content(t) :: String.t() + def content(token) + + @doc "Returns the 1-based line number of the token, or `nil` for synthetic tokens." + @spec line(t) :: non_neg_integer() | nil + def line(token) + + @doc "Returns the 0-based column offset of the token, or `nil` for synthetic tokens." + @spec col(t) :: non_neg_integer() | nil + def col(token) +end + +defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.Token do + def kind(%CodeQA.AST.Lexing.Token{kind: k}), do: k + def content(%CodeQA.AST.Lexing.Token{content: c}), do: c + def line(%CodeQA.AST.Lexing.Token{line: l}), do: l + def col(%CodeQA.AST.Lexing.Token{col: c}), do: c +end + +defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.StringToken do + def kind(%CodeQA.AST.Lexing.StringToken{kind: k}), do: k + def content(%CodeQA.AST.Lexing.StringToken{content: c}), do: c + def line(%CodeQA.AST.Lexing.StringToken{line: l}), do: l + def col(%CodeQA.AST.Lexing.StringToken{col: c}), do: c +end + +defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.NewlineToken do + def kind(%CodeQA.AST.Lexing.NewlineToken{kind: k}), do: k + def content(%CodeQA.AST.Lexing.NewlineToken{content: c}), do: c + def line(%CodeQA.AST.Lexing.NewlineToken{line: l}), do: l + def col(%CodeQA.AST.Lexing.NewlineToken{col: c}), do: c +end + +defimpl CodeQA.AST.Lexing.TokenProtocol, for: CodeQA.AST.Lexing.WhitespaceToken do + def kind(%CodeQA.AST.Lexing.WhitespaceToken{kind: k}), do: k + def content(%CodeQA.AST.Lexing.WhitespaceToken{content: c}), do: c + def line(%CodeQA.AST.Lexing.WhitespaceToken{line: l}), do: l + def col(%CodeQA.AST.Lexing.WhitespaceToken{col: c}), do: c +end diff --git a/lib/codeqa/ast/lexing/whitespace_token.ex b/lib/codeqa/ast/lexing/whitespace_token.ex new file mode 100644 index 0000000..cb23082 --- /dev/null +++ b/lib/codeqa/ast/lexing/whitespace_token.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.AST.Lexing.WhitespaceToken do + @moduledoc """ + A whitespace/indentation token emitted by `TokenNormalizer.normalize_structural/1`. + + Represents one indentation unit (2 spaces or 1 tab) at the start of a line. + + ## Fields + + - `kind` — always `""`. + - `content` — the original source text for this indentation unit (`" "`). + - `line` — 1-based line number in the source file. + - `col` — 0-based byte offset from the start of the line. + """ + + @kind "" + + defstruct [:content, :line, :col, kind: @kind] + + @doc "Returns the normalized kind string for whitespace tokens." + @spec kind() :: String.t() + def kind, do: @kind + + @type t :: %__MODULE__{ + kind: String.t(), + content: String.t(), + line: non_neg_integer() | nil, + col: non_neg_integer() | nil + } +end diff --git a/lib/codeqa/ast/nodes/attribute_node.ex b/lib/codeqa/ast/nodes/attribute_node.ex new file mode 100644 index 0000000..dba171d --- /dev/null +++ b/lib/codeqa/ast/nodes/attribute_node.ex @@ -0,0 +1,65 @@ +defmodule CodeQA.AST.Nodes.AttributeNode do + @moduledoc """ + AST node for fields, constants, decorators, annotations, and typespecs. + Subsumes the previous :typespec node type (kind: :typespec). + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil, + name: String.t() | nil, + kind: :field | :constant | :decorator | :annotation | :typespec | nil + } + + @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback]) + + @doc "Build an AttributeNode from a raw %Node{}, detecting :typespec kind from tokens." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label, + kind: detect_kind(node.tokens) + } + end + + defp detect_kind(tokens) do + tokens + |> Enum.drop_while(&(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()])) + |> case do + [%{kind: "@"}, %{kind: "", content: name} | _] -> + if MapSet.member?(@typespec_attrs, name), do: :typespec, else: nil + + _ -> + nil + end + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/code_node.ex b/lib/codeqa/ast/nodes/code_node.ex new file mode 100644 index 0000000..1b26244 --- /dev/null +++ b/lib/codeqa/ast/nodes/code_node.ex @@ -0,0 +1,44 @@ +defmodule CodeQA.AST.Nodes.CodeNode do + @moduledoc "Catch-all AST node for unclassified code blocks." + + alias CodeQA.AST.Enrichment.Node + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil + } + + @doc "Build a CodeNode from a raw %Node{}, copying all base fields." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/doc_node.ex b/lib/codeqa/ast/nodes/doc_node.ex new file mode 100644 index 0000000..2b6dbfd --- /dev/null +++ b/lib/codeqa/ast/nodes/doc_node.ex @@ -0,0 +1,44 @@ +defmodule CodeQA.AST.Nodes.DocNode do + @moduledoc "AST node for documentation strings and comment blocks." + + alias CodeQA.AST.Enrichment.Node + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil + } + + @doc "Build a DocNode from a raw %Node{}, copying all base fields." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/function_node.ex b/lib/codeqa/ast/nodes/function_node.ex new file mode 100644 index 0000000..6d91c88 --- /dev/null +++ b/lib/codeqa/ast/nodes/function_node.ex @@ -0,0 +1,57 @@ +defmodule CodeQA.AST.Nodes.FunctionNode do + @moduledoc "AST node for function, method, or callable definitions." + + alias CodeQA.AST.Enrichment.Node + + defstruct [ + :tokens, + :line_count, + :children, + :start_line, + :end_line, + :label, + :name, + :arity, + :visibility + ] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil, + name: String.t() | nil, + arity: non_neg_integer() | nil, + visibility: :public | :private | nil + } + + @doc "Build a FunctionNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/import_node.ex b/lib/codeqa/ast/nodes/import_node.ex new file mode 100644 index 0000000..999013d --- /dev/null +++ b/lib/codeqa/ast/nodes/import_node.ex @@ -0,0 +1,45 @@ +defmodule CodeQA.AST.Nodes.ImportNode do + @moduledoc "AST node for import, require, use, alias, or include statements." + + alias CodeQA.AST.Enrichment.Node + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :target] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil, + target: String.t() | nil + } + + @doc "Build an ImportNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/module_node.ex b/lib/codeqa/ast/nodes/module_node.ex new file mode 100644 index 0000000..9ad3efb --- /dev/null +++ b/lib/codeqa/ast/nodes/module_node.ex @@ -0,0 +1,46 @@ +defmodule CodeQA.AST.Nodes.ModuleNode do + @moduledoc "AST node for module, class, namespace, or struct definitions." + + alias CodeQA.AST.Enrichment.Node + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :name, :kind] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil, + name: String.t() | nil, + kind: :class | :module | :namespace | :struct | nil + } + + @doc "Build a ModuleNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/nodes/test_node.ex b/lib/codeqa/ast/nodes/test_node.ex new file mode 100644 index 0000000..4fc955c --- /dev/null +++ b/lib/codeqa/ast/nodes/test_node.ex @@ -0,0 +1,45 @@ +defmodule CodeQA.AST.Nodes.TestNode do + @moduledoc "AST node for test cases, describe blocks, and it blocks." + + alias CodeQA.AST.Enrichment.Node + + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label, :description] + + @type t :: %__MODULE__{ + tokens: [term()], + line_count: non_neg_integer(), + children: [term()], + start_line: non_neg_integer() | nil, + end_line: non_neg_integer() | nil, + label: term() | nil, + description: String.t() | nil + } + + @doc "Build a TestNode from a raw %Node{}, copying all base fields. Type-specific fields default to nil." + @spec cast(Node.t()) :: t() + def cast(%Node{} = node) do + %__MODULE__{ + tokens: node.tokens, + line_count: node.line_count, + children: node.children, + start_line: node.start_line, + end_line: node.end_line, + label: node.label + } + end + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end diff --git a/lib/codeqa/ast/parsing/parser.ex b/lib/codeqa/ast/parsing/parser.ex new file mode 100644 index 0000000..cb23a91 --- /dev/null +++ b/lib/codeqa/ast/parsing/parser.ex @@ -0,0 +1,233 @@ +defmodule CodeQA.AST.Parsing.Parser do + @moduledoc """ + Recursively parses a token stream into a nested node tree. + + Top-level nodes are found by splitting on blank lines and declaration keywords. + Each node is then recursively subdivided using enclosure rules (brackets, + colon-indentation) until no further subdivision is possible — forming an + arbitrarily-deep tree rather than a fixed two-level hierarchy. + + ## Recursive parsing algorithm + + `parse_block/3` is the recursive core: + + 1. Immediately create a `Node` spanning the whole token stream. + 2. Apply enclosure rules to find sub-candidate streams. + 3. **Idempotency check** — reject any enclosure that spans the entire stream + (e.g. `BracketRule` re-emitting its own input). This is the termination + condition: the node is a leaf when no strictly-smaller sub-candidates exist. + 4. Recursively call `parse_block/3` on each sub-candidate to produce children. + 5. Return the node with its children attached as `children`. + + ## Design notes (from tree-sitter, ctags, lizard) + + - **Recursive hierarchy** — replaces the old two-level (top + one level of sub-blocks) + model with an N-level tree. Each call to `parse_block/3` mirrors tree-sitter's + recursive descent: emit the node, then recurse into its contents. + - **Language detection by extension** — `language_from_path/1` follows ctags' + convention of inferring language from file extension. + - **Rule extensibility** — enclosure rules are selected per language via + `sub_block_rules/1`. Rules are composable and order-independent. + - **Error recovery** — mismatched brackets and malformed indentation are silently + skipped by individual rules. The parser emits partial nodes rather than failing, + consistent with tree-sitter's error-recovery philosophy. + """ + + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.Language + alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + + alias CodeQA.AST.Signals.Structural.{ + BlankLineSignal, + BracketSignal, + ColonIndentSignal, + KeywordSignal, + TripleQuoteSignal + } + + @spec detect_blocks([CodeQA.AST.Lexing.Token.t()], module()) :: [Node.t()] + def detect_blocks([], _lang_mod), do: [] + + def detect_blocks(tokens, lang_mod) do + all_emissions = + SignalStream.run( + tokens, + [%TripleQuoteSignal{}, %BlankLineSignal{}, %KeywordSignal{}], + lang_mod + ) + |> List.flatten() + + triple_splits = + for {_, :split, :triple_split, v} <- all_emissions, do: v + + protected_ranges = compute_protected_ranges(triple_splits) + + split_points = + for( + {_, :split, name, v} <- all_emissions, + name in [:blank_split, :keyword_split], + not inside_protected?(v, protected_ranges), + do: v + ) + |> Enum.concat(triple_splits) + |> Enum.uniq() + |> Enum.sort() + + tokens + |> split_at(split_points) + |> Enum.reject(fn s -> Enum.empty?(s) or whitespace_only?(s) end) + |> merge_same_line_slices() + |> Enum.map(&parse_block(&1, lang_mod)) + end + + @spec language_module_for_path(String.t()) :: module() + def language_module_for_path(path), do: Language.detect(path) + + @spec language_from_path(String.t()) :: atom() + def language_from_path(path), + do: path |> Language.detect() |> then(& &1.name()) |> String.to_atom() + + # Recursively parse a token stream into a Node with nested children. + # Immediately creates a node spanning the whole stream, then attempts to + # subdivide it. Terminates when no strictly-smaller sub-candidates are found. + defp parse_block(tokens, lang_mod) do + start_line = block_start_line(tokens) + end_line = block_end_line(tokens) + line_count = if start_line && end_line, do: end_line - start_line + 1, else: 1 + + block = %Node{ + tokens: tokens, + line_count: line_count, + children: [], + start_line: start_line, + end_line: end_line + } + + case find_sub_candidates(tokens, lang_mod) do + [] -> + block + + candidates -> + children = Enum.map(candidates, &parse_block(&1, lang_mod)) + %{block | children: children} + end + end + + # Collect enclosure regions from rules. + # + # If the token stream is itself a bracket pair (e.g. the stream IS `(foo, bar)`), + # we unwrap the outer brackets before running rules. Without this, BracketRule + # would only find the whole stream as a single enclosure — filtered by the + # idempotency check — and recursion would stop prematurely at every bracket level. + # Unwrapping lets us see the *inner* structure and keeps the tree growing deeper. + # + # Idempotency check: after unwrapping, reject any enclosure that still spans the + # entire search window (0..n-1), which would produce an infinite loop. + defp find_sub_candidates(tokens, lang_mod) do + {search_tokens, _} = maybe_unwrap_bracket(tokens) + n = length(search_tokens) + + enclosure_signals = + if lang_mod.uses_colon_indent?() do + [%BracketSignal{}, %ColonIndentSignal{}] + else + [%BracketSignal{}] + end + + SignalStream.run(search_tokens, enclosure_signals, lang_mod) + |> List.flatten() + |> Enum.filter(fn {_, group, _, _} -> group == :enclosure end) + |> Enum.map(fn {_, _, _, {s, e}} -> {s, e} end) + |> Enum.uniq() + |> Enum.sort() + |> Enum.reject(fn {s, e} -> s == 0 and e == n - 1 end) + |> Enum.map(fn {s, e} -> Enum.slice(search_tokens, s..e) end) + |> Enum.reject(&whitespace_only?/1) + end + + @open_brackets MapSet.new(["(", "[", "{"]) + @matching_close %{"(" => ")", "[" => "]", "{" => "}"} + + # If the stream is a balanced bracket pair, return the inner tokens. + # Otherwise return the stream unchanged. + defp maybe_unwrap_bracket([first | rest] = tokens) do + last = List.last(tokens) + + if last && MapSet.member?(@open_brackets, first.kind) && + Map.get(@matching_close, first.kind) == last.kind do + {Enum.drop(rest, -1), 1} + else + {tokens, 0} + end + end + + defp maybe_unwrap_bracket([]), do: {[], 0} + + # Pairs consecutive triple-quote split indices into protected interior ranges. + # Uses chunk_every with :discard to safely handle odd counts (malformed input). + defp compute_protected_ranges(split_indices) do + split_indices + |> Enum.chunk_every(2, 2, :discard) + |> Enum.map(fn [a, b] -> {a + 1, b - 1} end) + end + + defp inside_protected?(idx, ranges) do + Enum.any?(ranges, fn {lo, hi} -> idx >= lo and idx <= hi end) + end + + # When TripleQuoteSignal splits `@doc """` mid-line, the tokens before the + # triple-quote land in one slice and the heredoc in the next — both on the same + # starting line. Merge adjacent slices that share a line boundary so `@doc """..."""` + # becomes a single token stream fed to parse_block rather than two separate nodes. + defp merge_same_line_slices([]), do: [] + defp merge_same_line_slices([single]), do: [single] + + defp merge_same_line_slices([slice_a, slice_b | rest]) do + last_line_a = + slice_a + |> Enum.reverse() + |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])) + |> then(&(&1 && &1.line)) + + first_line_b = + slice_b + |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])) + |> then(&(&1 && &1.line)) + + if last_line_a && first_line_b && last_line_a == first_line_b do + merge_same_line_slices([slice_a ++ slice_b | rest]) + else + [slice_a | merge_same_line_slices([slice_b | rest])] + end + end + + defp split_at(tokens, []), do: [tokens] + + defp split_at(tokens, split_points) do + boundaries = [0 | split_points] ++ [length(tokens)] + + boundaries + |> Enum.chunk_every(2, 1, :discard) + |> Enum.map(fn [start, stop] -> Enum.slice(tokens, start..(stop - 1)//1) end) + end + + defp whitespace_only?(tokens) do + Enum.all?(tokens, &(&1.kind in [WhitespaceToken.kind(), NewlineToken.kind()])) + end + + defp block_start_line([%{line: line} | _]), do: line + defp block_start_line([]), do: nil + + defp block_end_line([]), do: nil + + defp block_end_line(tokens) do + tokens + |> Enum.reverse() + |> Enum.find(&(&1.kind not in [WhitespaceToken.kind(), NewlineToken.kind()])) + |> case do + nil -> tokens |> List.last() |> Map.get(:line) + token -> token.line + end + end +end diff --git a/lib/codeqa/ast/parsing/signal.ex b/lib/codeqa/ast/parsing/signal.ex new file mode 100644 index 0000000..dc2d19e --- /dev/null +++ b/lib/codeqa/ast/parsing/signal.ex @@ -0,0 +1,44 @@ +defprotocol CodeQA.AST.Parsing.Signal do + @moduledoc """ + Protocol for token-stream signal emitters. + + A signal is a stateful detector that receives one token at a time and emits + zero or more named values. All signals run independently over the same token + stream — each gets its own full pass, carrying its own state. + + ## Protocol functions + + - `source/1` — the implementing module; used for debugging emission traces + - `group/1` — atom grouping this signal's emissions (e.g. `:split`, `:enclosure`) + - `init/2` — returns initial state; called once before the token stream starts + - `emit/3` — called per token; returns `{MapSet.t({name, value}), new_state}` + + ## State + + State is owned externally (in `SignalStream`) as a positionally-aligned list. + The signal defines the shape; the orchestrator threads it through unchanged. + + ## No-op emission + + To emit nothing for a token, return `{MapSet.new(), state}`. + """ + + @doc "The module that implements this signal — for debugging traces." + @spec source(t) :: module() + def source(signal) + + @doc "Group atom for all emissions from this signal (e.g. :split, :enclosure)." + @spec group(t) :: atom() + def group(signal) + + @doc "Returns the initial state for this signal." + @spec init(t, module()) :: term() + def init(signal, lang_mod) + + @doc """ + Called once per token. Returns a MapSet of `{name, value}` emission pairs + and the updated state. + """ + @spec emit(t, token :: term(), state :: term()) :: {MapSet.t(), term()} + def emit(signal, token, state) +end diff --git a/lib/codeqa/ast/parsing/signal_registry.ex b/lib/codeqa/ast/parsing/signal_registry.ex new file mode 100644 index 0000000..555de16 --- /dev/null +++ b/lib/codeqa/ast/parsing/signal_registry.ex @@ -0,0 +1,94 @@ +defmodule CodeQA.AST.Parsing.SignalRegistry do + @moduledoc """ + Registry for structural and classification signals. + + Use `default/0` for the standard signal set. Compose custom registries + with `register_structural/2` and `register_classification/2` for + language-specific or analysis-specific configurations. + """ + + alias CodeQA.AST.Signals.Structural.{ + AccessModifierSignal, + AssignmentFunctionSignal, + BlankLineSignal, + BranchSplitSignal, + BracketSignal, + ColonIndentSignal, + CommentDividerSignal, + DecoratorSignal, + DedentToZeroSignal, + DocCommentLeadSignal, + KeywordSignal, + SQLBlockSignal, + TripleQuoteSignal + } + + alias CodeQA.AST.Signals.Classification.{ + AttributeSignal, + CommentDensitySignal, + ConfigSignal, + DataSignal, + DocSignal, + FunctionSignal, + ImportSignal, + ModuleSignal, + TestSignal, + TypeSignal + } + + defstruct structural: [], classification: [] + + @type t :: %__MODULE__{ + structural: [term()], + classification: [term()] + } + + @spec new() :: t() + def new, do: %__MODULE__{} + + @spec register_structural(t(), term()) :: t() + def register_structural(%__MODULE__{} = r, signal), + do: %{r | structural: r.structural ++ [signal]} + + @spec register_classification(t(), term()) :: t() + def register_classification(%__MODULE__{} = r, signal), + do: %{r | classification: r.classification ++ [signal]} + + @spec default() :: t() + def default do + new() + |> register_structural(%TripleQuoteSignal{}) + |> register_structural(%BlankLineSignal{}) + |> register_structural(%KeywordSignal{}) + |> register_structural(%AccessModifierSignal{}) + |> register_structural(%DecoratorSignal{}) + |> register_structural(%CommentDividerSignal{}) + |> register_structural(%DocCommentLeadSignal{}) + |> register_structural(%AssignmentFunctionSignal{}) + |> register_structural(%DedentToZeroSignal{}) + |> register_structural(%BranchSplitSignal{}) + |> register_structural(%BracketSignal{}) + |> register_classification(%DocSignal{}) + |> register_classification(%TestSignal{}) + |> register_classification(%FunctionSignal{}) + |> register_classification(%ModuleSignal{}) + |> register_classification(%ImportSignal{}) + |> register_classification(%AttributeSignal{}) + |> register_classification(%TypeSignal{}) + |> register_classification(%ConfigSignal{}) + |> register_classification(%DataSignal{}) + |> register_classification(%CommentDensitySignal{}) + end + + @spec python() :: t() + def python do + r = default() + %{r | structural: r.structural ++ [%ColonIndentSignal{}]} + end + + @spec sql() :: t() + def sql do + r = default() + %{r | structural: r.structural ++ [%SQLBlockSignal{}]} + end +end diff --git a/lib/codeqa/ast/parsing/signal_stream.ex b/lib/codeqa/ast/parsing/signal_stream.ex new file mode 100644 index 0000000..935cbc2 --- /dev/null +++ b/lib/codeqa/ast/parsing/signal_stream.ex @@ -0,0 +1,54 @@ +defmodule CodeQA.AST.Parsing.SignalStream do + @moduledoc """ + Runs a list of `Signal` implementations over a token stream. + + Each signal receives its own full pass over the token stream and accumulates + its own state. Signals are independent — no shared state, no cross-signal + coordination. + + ## Return value + + Returns a list of emission lists, one per signal, in the same order as the + input signal list. Each emission is a 4-tuple: + + {source, group, name, value} + + ## Usage + + SignalStream.run(tokens, [%BlankLineSignal{}, %KeywordSignal{}], []) + # => [[{BlankLineSignal, :split, :blank_split, 5}, ...], [...]] + """ + + alias CodeQA.AST.Parsing.Signal + + @spec run([term()], [term()], module()) :: [list()] + def run(tokens, signals, lang_mod) do + prevs = [nil | tokens] + nexts = Enum.drop(tokens, 1) ++ [nil] + triples = Enum.zip_with([prevs, tokens, nexts], fn [p, c, n] -> {p, c, n} end) + + Enum.map(signals, fn signal -> + init_state = Signal.init(signal, lang_mod) + source = Signal.source(signal) + group = Signal.group(signal) + + {_final_state, emissions} = + Enum.reduce_while(triples, {init_state, []}, fn triple, {state, acc} -> + {emitted, new_state} = Signal.emit(signal, triple, state) + + new_acc = + emitted + |> Enum.map(fn {name, value} -> {source, group, name, value} end) + |> Enum.reduce(acc, fn e, a -> [e | a] end) + + if new_state == :halt do + {:halt, {new_state, new_acc}} + else + {:cont, {new_state, new_acc}} + end + end) + + Enum.reverse(emissions) + end) + end +end diff --git a/lib/codeqa/ast/signals/classification/attribute_signal.ex b/lib/codeqa/ast/signals/classification/attribute_signal.ex new file mode 100644 index 0000000..18a52cd --- /dev/null +++ b/lib/codeqa/ast/signals/classification/attribute_signal.ex @@ -0,0 +1,66 @@ +defmodule CodeQA.AST.Signals.Classification.AttributeSignal do + @moduledoc """ + Classification signal — votes `:attribute` when an `@identifier` pattern + appears at indent 0. + + Weights: + - 3 for Elixir typespec attributes (@spec, @type, @typep, @opaque, @callback, @macrocallback) + - 2 for all other @name attributes + + Skips @doc and @moduledoc — those nodes contain tokens and are handled by DocSignal. + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @typespec_attrs MapSet.new(~w[spec type typep opaque callback macrocallback]) + @skip_attrs MapSet.new(~w[doc moduledoc]) + + def source(_), do: CodeQA.AST.Signals.Classification.AttributeSignal + def group(_), do: :classification + + def init(_, _lang_mod), + do: %{at_line_start: true, indent: 0, saw_at: false, voted: false} + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit(_, {_prev, token, _next}, %{at_line_start: als, indent: ind, saw_at: saw_at} = state) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0, saw_at: false}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + "@" when ind == 0 -> + {MapSet.new(), %{state | saw_at: true, at_line_start: false}} + + "" when saw_at -> + name = token.content + + cond do + MapSet.member?(@skip_attrs, name) -> + # @doc/@moduledoc: let DocSignal handle via tokens + {MapSet.new(), %{state | saw_at: false, at_line_start: false, voted: true}} + + MapSet.member?(@typespec_attrs, name) -> + {MapSet.new([{:attribute_vote, 3}]), + %{state | saw_at: false, at_line_start: false, voted: true}} + + true -> + {MapSet.new([{:attribute_vote, 2}]), + %{state | saw_at: false, at_line_start: false, voted: true}} + end + + _ -> + {MapSet.new(), %{state | saw_at: false, at_line_start: false}} + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/comment_density_signal.ex b/lib/codeqa/ast/signals/classification/comment_density_signal.ex new file mode 100644 index 0000000..cc3614f --- /dev/null +++ b/lib/codeqa/ast/signals/classification/comment_density_signal.ex @@ -0,0 +1,61 @@ +defmodule CodeQA.AST.Signals.Classification.CommentDensitySignal do + @moduledoc """ + Classification signal — votes `:comment` when more than 60% of non-blank + lines begin with a comment prefix. + + Requires `comment_prefixes: [String.t()]` in opts (from the language + module). Returns no vote if no prefixes are configured. + + Emits at the end of the stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + def source(_), do: CodeQA.AST.Signals.Classification.CommentDensitySignal + def group(_), do: :classification + + def init(_, lang_mod) do + prefixes = MapSet.new(lang_mod.comment_prefixes()) + %{prefixes: prefixes, at_line_start: true, comment_lines: 0, total_lines: 0} + end + + def emit(_, {_prev, token, next}, state) do + %{prefixes: prefixes, at_line_start: als} = state + + state = + case token.kind do + @nl -> + %{state | at_line_start: true} + + @ws -> + state + + _ when als -> + is_comment = MapSet.member?(prefixes, token.content) + + %{ + state + | at_line_start: false, + total_lines: state.total_lines + 1, + comment_lines: state.comment_lines + if(is_comment, do: 1, else: 0) + } + + _ -> + %{state | at_line_start: false} + end + + if next == nil and map_size(prefixes) > 0 and state.total_lines > 0 do + if state.comment_lines / state.total_lines > 0.6 do + {MapSet.new([{:comment_vote, 2}]), :halt} + else + {MapSet.new(), state} + end + else + {MapSet.new(), state} + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/config_signal.ex b/lib/codeqa/ast/signals/classification/config_signal.ex new file mode 100644 index 0000000..d6453b1 --- /dev/null +++ b/lib/codeqa/ast/signals/classification/config_signal.ex @@ -0,0 +1,54 @@ +defmodule CodeQA.AST.Signals.Classification.ConfigSignal do + @moduledoc """ + Classification signal — votes `:config` when a configuration keyword + appears at indent 0 and bracket depth 0. + + Matches `config` (Elixir Mix.Config), `configure`, `settings`, `options`, + `defaults`. Emits at most one vote. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @config_keywords MapSet.new(["config", "configure", "settings", "options", "defaults"]) + def source(_), do: CodeQA.AST.Signals.Classification.ConfigSignal + def group(_), do: :classification + + def init(_, _lang_mod), + do: %{at_line_start: true, indent: 0, bracket_depth: 0, is_first: true} + + def emit(_, {_prev, token, _next}, state) do + %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + v when v in ["(", "[", "{"] -> + {MapSet.new(), %{state | bracket_depth: bd + 1, at_line_start: false, is_first: false}} + + v when v in [")", "]", "}"] -> + _ = v + + {MapSet.new(), + %{state | bracket_depth: max(0, bd - 1), at_line_start: false, is_first: false}} + + _ -> + if ind == 0 and bd == 0 and MapSet.member?(@config_keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:config_vote, weight}]), :halt} + else + {MapSet.new(), %{state | at_line_start: false, is_first: false}} + end + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/data_signal.ex b/lib/codeqa/ast/signals/classification/data_signal.ex new file mode 100644 index 0000000..1d6aa77 --- /dev/null +++ b/lib/codeqa/ast/signals/classification/data_signal.ex @@ -0,0 +1,67 @@ +defmodule CodeQA.AST.Signals.Classification.DataSignal do + @moduledoc """ + Classification signal — votes `:data` when a token stream consists primarily + of literal values (``, ``) with no control-flow keywords. + + Emits at the end of the stream (when `next == nil`). Votes only when + literal ratio > 0.6 and no control-flow keywords were seen. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @str CodeQA.AST.Lexing.StringToken.kind() + @control_flow MapSet.new([ + "if", + "else", + "elsif", + "elif", + "unless", + "for", + "while", + "do", + "case", + "when", + "cond", + "switch", + "loop", + "until" + ]) + def source(_), do: CodeQA.AST.Signals.Classification.DataSignal + def group(_), do: :classification + + def init(_, _lang_mod), + do: %{literal_count: 0, id_count: 0, has_control_flow: false} + + def emit(_, {_prev, token, next}, state) do + state = + case token.kind do + kind when kind in [@str, ""] -> + %{state | literal_count: state.literal_count + 1} + + "" -> + if MapSet.member?(@control_flow, token.content) do + %{state | has_control_flow: true, id_count: state.id_count + 1} + else + %{state | id_count: state.id_count + 1} + end + + _ -> + state + end + + if next == nil do + total = state.literal_count + state.id_count + + if total > 0 and not state.has_control_flow and + state.literal_count / total > 0.6 do + {MapSet.new([{:data_vote, 2}]), :halt} + else + {MapSet.new(), state} + end + else + {MapSet.new(), state} + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/doc_signal.ex b/lib/codeqa/ast/signals/classification/doc_signal.ex new file mode 100644 index 0000000..615cf55 --- /dev/null +++ b/lib/codeqa/ast/signals/classification/doc_signal.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.AST.Signals.Classification.DocSignal do + @moduledoc """ + Classification signal — votes `:doc` when a `` (triple-quoted string) token + is found anywhere in the node's token stream. + + Weight: 3 (unambiguous — triple-quoted strings are documentation). + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind() + def source(_), do: CodeQA.AST.Signals.Classification.DocSignal + def group(_), do: :classification + + def init(_, _lang_mod), do: %{voted: false} + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit(_, {_prev, token, _next}, state) do + if token.kind == @doc_kind do + {MapSet.new([{:doc_vote, 3}]), %{state | voted: true}} + else + {MapSet.new(), state} + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/function_signal.ex b/lib/codeqa/ast/signals/classification/function_signal.ex new file mode 100644 index 0000000..ca261ce --- /dev/null +++ b/lib/codeqa/ast/signals/classification/function_signal.ex @@ -0,0 +1,72 @@ +defmodule CodeQA.AST.Signals.Classification.FunctionSignal do + @moduledoc """ + Classification signal — votes `:function` when a function definition keyword + appears at indent 0 and bracket depth 0. + + Weights: + - 3 when it is the first content token of the block (strong match) + - 1 when found later in the block (weak match, e.g. after a leading comment) + + Does NOT include module/class/namespace keywords (handled by ModuleSignal) or + test macros like `test`/`describe` (handled by TestSignal). + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + def source(_), do: CodeQA.AST.Signals.Classification.FunctionSignal + def group(_), do: :classification + + def init(_, lang_mod) do + %{ + at_line_start: true, + indent: 0, + bracket_depth: 0, + is_first: true, + voted: false, + keywords: CodeQA.Language.function_keywords(lang_mod) + } + end + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit( + _, + {_prev, token, _next}, + %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + ) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + v when v in ["(", "[", "{"] -> + {MapSet.new(), %{state | bracket_depth: bd + 1, is_first: false, at_line_start: false}} + + v when v in [")", "]", "}"] -> + _ = v + + {MapSet.new(), + %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}} + + _ -> + if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + + {MapSet.new([{:function_vote, weight}]), + %{state | is_first: false, at_line_start: false, voted: true}} + else + {MapSet.new(), %{state | is_first: false, at_line_start: false}} + end + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/import_signal.ex b/lib/codeqa/ast/signals/classification/import_signal.ex new file mode 100644 index 0000000..b6ad524 --- /dev/null +++ b/lib/codeqa/ast/signals/classification/import_signal.ex @@ -0,0 +1,62 @@ +defmodule CodeQA.AST.Signals.Classification.ImportSignal do + @moduledoc """ + Classification signal — votes `:import` when an import/require/use/alias keyword + appears at indent 0. + + Weights: + - 3 when it is the first content token of the block (strong match) + - 1 when found later in the block + + Covers: Elixir (import, require, use, alias), Python (import, from), + JavaScript/Go (import, package), C# (using), Ruby/Lua (require, include). + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + def source(_), do: CodeQA.AST.Signals.Classification.ImportSignal + def group(_), do: :classification + + def init(_, lang_mod) do + %{ + at_line_start: true, + indent: 0, + is_first: true, + voted: false, + keywords: CodeQA.Language.import_keywords(lang_mod) + } + end + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit( + _, + {_prev, token, _next}, + %{at_line_start: als, indent: ind, is_first: first} = state + ) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + _ -> + if ind == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + + {MapSet.new([{:import_vote, weight}]), + %{state | is_first: false, at_line_start: false, voted: true}} + else + {MapSet.new(), %{state | is_first: false, at_line_start: false}} + end + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/module_signal.ex b/lib/codeqa/ast/signals/classification/module_signal.ex new file mode 100644 index 0000000..e58a150 --- /dev/null +++ b/lib/codeqa/ast/signals/classification/module_signal.ex @@ -0,0 +1,71 @@ +defmodule CodeQA.AST.Signals.Classification.ModuleSignal do + @moduledoc """ + Classification signal — votes `:module` when a module/class/namespace definition + keyword appears at indent 0 and bracket depth 0. + + Weights: + - 3 when it is the first content token of the block (strong match) + - 1 when found later in the block + + Keyword set is disjoint from FunctionSignal and TestSignal to avoid conflicts. + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + def source(_), do: CodeQA.AST.Signals.Classification.ModuleSignal + def group(_), do: :classification + + def init(_, lang_mod) do + %{ + at_line_start: true, + indent: 0, + bracket_depth: 0, + is_first: true, + voted: false, + keywords: CodeQA.Language.module_keywords(lang_mod) + } + end + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit( + _, + {_prev, token, _next}, + %{at_line_start: als, indent: ind, bracket_depth: bd, is_first: first} = state + ) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + v when v in ["(", "[", "{"] -> + {MapSet.new(), %{state | bracket_depth: bd + 1, is_first: false, at_line_start: false}} + + v when v in [")", "]", "}"] -> + _ = v + + {MapSet.new(), + %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}} + + _ -> + if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + + {MapSet.new([{:module_vote, weight}]), + %{state | is_first: false, at_line_start: false, voted: true}} + else + {MapSet.new(), %{state | is_first: false, at_line_start: false}} + end + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/test_signal.ex b/lib/codeqa/ast/signals/classification/test_signal.ex new file mode 100644 index 0000000..b9d5a8c --- /dev/null +++ b/lib/codeqa/ast/signals/classification/test_signal.ex @@ -0,0 +1,63 @@ +defmodule CodeQA.AST.Signals.Classification.TestSignal do + @moduledoc """ + Classification signal — votes `:test` when a test block keyword appears at + indent 0. + + Weights: + - 3 when it is the first content token of the block (strong match) + - 1 when found later in the block + + Covers: ExUnit (test, describe), RSpec/Jest/Mocha (it, context, describe), + Cucumber (scenario, given, feature). `test` takes priority over + FunctionSignal — Elixir test macros look like function calls but are test blocks. + Emits at most one vote per token stream. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + def source(_), do: CodeQA.AST.Signals.Classification.TestSignal + def group(_), do: :classification + + def init(_, lang_mod) do + %{ + at_line_start: true, + indent: 0, + is_first: true, + voted: false, + keywords: CodeQA.Language.test_keywords(lang_mod) + } + end + + def emit(_, _, %{voted: true} = state), do: {MapSet.new(), state} + + def emit( + _, + {_prev, token, _next}, + %{at_line_start: als, indent: ind, is_first: first} = state + ) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0}} + + @ws when als -> + {MapSet.new(), %{state | indent: ind + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + _ -> + if ind == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + + {MapSet.new([{:test_vote, weight}]), + %{state | is_first: false, at_line_start: false, voted: true}} + else + {MapSet.new(), %{state | is_first: false, at_line_start: false}} + end + end + end + end +end diff --git a/lib/codeqa/ast/signals/classification/type_signal.ex b/lib/codeqa/ast/signals/classification/type_signal.ex new file mode 100644 index 0000000..0acf6ad --- /dev/null +++ b/lib/codeqa/ast/signals/classification/type_signal.ex @@ -0,0 +1,49 @@ +defmodule CodeQA.AST.Signals.Classification.TypeSignal do + @moduledoc """ + Classification signal — votes `:type` when an Elixir type definition + attribute (`@type`, `@typep`, `@opaque`) appears at indent 0. + + Emits at most one vote. Complements `AttributeSignal`, which handles + `@spec`, `@doc`, and other attributes. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @nl CodeQA.AST.Lexing.NewlineToken.kind() + @ws CodeQA.AST.Lexing.WhitespaceToken.kind() + @type_attrs MapSet.new(["type", "typep", "opaque"]) + def source(_), do: CodeQA.AST.Signals.Classification.TypeSignal + def group(_), do: :classification + + def init(_, _lang_mod), + do: %{at_line_start: true, indent: 0, saw_at: false, is_first: true} + + def emit(_, {_prev, token, _next}, state) do + case token.kind do + @nl -> + {MapSet.new(), %{state | at_line_start: true, indent: 0, saw_at: false}} + + @ws when state.at_line_start -> + {MapSet.new(), %{state | indent: state.indent + 1, at_line_start: true}} + + @ws -> + {MapSet.new(), state} + + "@" when state.indent == 0 -> + {MapSet.new(), %{state | saw_at: true, at_line_start: false}} + + _ when state.saw_at and state.indent == 0 -> + if MapSet.member?(@type_attrs, token.content) do + weight = if state.is_first, do: 3, else: 1 + {MapSet.new([{:type_vote, weight}]), :halt} + else + {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}} + end + + _ -> + {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}} + end + end + end +end diff --git a/lib/codeqa/ast/signals/structural/access_modifier_signal.ex b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex new file mode 100644 index 0000000..43ed068 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/access_modifier_signal.ex @@ -0,0 +1,80 @@ +defmodule CodeQA.AST.Signals.Structural.AccessModifierSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:access_modifier_split` when an access modifier keyword appears at line + start with bracket_depth == 0. + + Unlike `KeywordSignal`, this does NOT require indentation level 0, so it + detects class members inside bracket enclosures (e.g. `public void foo()` inside + a `class Foo { ... }` body). + + When `opts[:language_module]` is set, uses that language's + `access_modifiers/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.AccessModifierSignal + def group(_), do: :split + + def init(_, lang_mod) do + modifiers = CodeQA.Language.access_modifiers(lang_mod) + %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false, modifiers: modifiers} + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in ["(", "[", "{"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: bd + 1, + seen_content: true, + at_line_start: false + }} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in [")", "]", "}"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: max(0, bd - 1), + seen_content: true, + at_line_start: false + }} + + def emit(_, {_, token, _}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, seen_content: true, at_line_start: false} + + emissions = + if modifier_split?(state, token), + do: MapSet.new([{:access_modifier_split, idx}]), + else: MapSet.new() + + {emissions, base} + end + + defp modifier_split?( + %{seen_content: true, bracket_depth: 0, at_line_start: true, modifiers: m}, + %{content: c} + ), + do: MapSet.member?(m, c) + + defp modifier_split?(_, _), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/assignment_function_signal.ex b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex new file mode 100644 index 0000000..a778d55 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/assignment_function_signal.ex @@ -0,0 +1,135 @@ +defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:assignment_function_split` when a top-level assignment to a function + is detected at indent 0 and bracket depth 0. + + Covers patterns such as: + - `identifier = function(...) {}` + - `identifier = async function(...) {}` + - `identifier = (...) => {}` + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.AssignmentFunctionSignal + def group(_), do: :split + + def init(_, _lang_mod) do + %{ + idx: 0, + indent: 0, + bracket_depth: 0, + at_line_start: true, + seen_content: false, + phase: :idle + } + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true, phase: :idle}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state) + when k in ["(", "[", "{"] do + new_bd = bd + 1 + new_phase = advance_phase_open(phase, k) + + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: new_bd, + at_line_start: false, + seen_content: true, + phase: new_phase + }} + end + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd, phase: phase} = state) + when k in [")", "]", "}"] do + new_bd = max(0, bd - 1) + new_phase = advance_phase_close(phase, k) + + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: new_bd, + at_line_start: false, + seen_content: true, + phase: new_phase + }} + end + + def emit( + _, + {_, token, _}, + %{ + idx: idx, + seen_content: sc, + indent: i, + bracket_depth: bd, + at_line_start: als, + phase: phase + } = state + ) do + {emissions, new_phase} = advance_phase(phase, token, idx, sc, i, bd, als) + + {emissions, + %{state | idx: idx + 1, at_line_start: false, seen_content: true, phase: new_phase}} + end + + defp advance_phase_open({:in_parens, id_idx, pd}, "("), do: {:in_parens, id_idx, pd + 1} + defp advance_phase_open({:in_parens, id_idx, pd}, _), do: {:in_parens, id_idx, pd} + defp advance_phase_open({:saw_eq, id_idx}, "("), do: {:in_parens, id_idx, 1} + defp advance_phase_open(_, _), do: :idle + + defp advance_phase_close({:in_parens, id_idx, 1}, ")"), do: {:saw_close_paren, id_idx} + + defp advance_phase_close({:in_parens, id_idx, pd}, ")") when pd > 1, + do: {:in_parens, id_idx, pd - 1} + + defp advance_phase_close({:in_parens, id_idx, pd}, _), do: {:in_parens, id_idx, pd} + defp advance_phase_close(_, _), do: :idle + + defp advance_phase(:idle, %{kind: ""}, idx, true, 0, 0, true), + do: {MapSet.new(), {:saw_id, idx}} + + defp advance_phase(:idle, _, _, _, _, _, _), do: {MapSet.new(), :idle} + + defp advance_phase({:saw_id, id_idx}, %{kind: "="}, _, _, _, _, _), + do: {MapSet.new(), {:saw_eq, id_idx}} + + defp advance_phase({:saw_id, _}, %{kind: ""}, idx, _, _, _, _), + do: {MapSet.new(), {:saw_id, idx}} + + defp advance_phase({:saw_id, id_idx}, %{kind: "."}, _, _, _, _, _), + do: {MapSet.new(), {:saw_id, id_idx}} + + defp advance_phase({:saw_id, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle} + + defp advance_phase({:saw_eq, id_idx}, %{kind: "", content: "function"}, _, _, _, _, _), + do: {MapSet.new([{:assignment_function_split, id_idx}]), :idle} + + defp advance_phase({:saw_eq, id_idx}, %{kind: "", content: "async"}, _, _, _, _, _), + do: {MapSet.new(), {:saw_eq, id_idx}} + + defp advance_phase({:saw_eq, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle} + + defp advance_phase({:saw_close_paren, id_idx}, %{kind: "=>"}, _, _, _, _, _), + do: {MapSet.new([{:assignment_function_split, id_idx}]), :idle} + + defp advance_phase({:saw_close_paren, _}, _, _, _, _, _, _), do: {MapSet.new(), :idle} + + defp advance_phase(_, _, _, _, _, _, _), do: {MapSet.new(), :idle} + end +end diff --git a/lib/codeqa/ast/signals/structural/blank_line_signal.ex b/lib/codeqa/ast/signals/structural/blank_line_signal.ex new file mode 100644 index 0000000..c484e1a --- /dev/null +++ b/lib/codeqa/ast/signals/structural/blank_line_signal.ex @@ -0,0 +1,45 @@ +defmodule CodeQA.AST.Signals.Structural.BlankLineSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:blank_split` at the first substantive token after 2+ consecutive + blank lines that follow a known block-end token. + + When `opts[:language_module]` is set, uses that language's + `block_end_tokens/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.BlankLineSignal + def group(_), do: :split + + def init(_, lang_mod) do + tokens = CodeQA.Language.block_end_tokens(lang_mod) + %{idx: 0, nl_run: 0, seen_content: false, last_content: nil, block_end_tokens: tokens} + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx, nl_run: nl} = state), + do: {MapSet.new(), %{state | idx: idx + 1, nl_run: nl + 1}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, token, _}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, nl_run: 0, seen_content: true, last_content: token.content} + + emissions = + if blank_split?(state), do: MapSet.new([{:blank_split, idx}]), else: MapSet.new() + + {emissions, base} + end + + defp blank_split?(%{seen_content: true, nl_run: nl, block_end_tokens: t, last_content: lc}) + when nl >= 2, + do: MapSet.member?(t, lc) + + defp blank_split?(_), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/bracket_signal.ex b/lib/codeqa/ast/signals/structural/bracket_signal.ex new file mode 100644 index 0000000..201f66e --- /dev/null +++ b/lib/codeqa/ast/signals/structural/bracket_signal.ex @@ -0,0 +1,51 @@ +defmodule CodeQA.AST.Signals.Structural.BracketSignal do + @moduledoc """ + Emits `:bracket_enclosure` for each outermost bracket pair `()`, `[]`, `{}`. + + Replaces `ParseRules.BracketRule`. State tracks: token index, bracket depth, + start index of current open bracket, and a stack of open bracket kinds for + mismatch detection. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @close %{")" => "(", "]" => "[", "}" => "{"} + + def source(_), do: CodeQA.AST.Signals.Structural.BracketSignal + def group(_), do: :enclosure + + def init(_, _lang_mod), do: %{idx: 0, depth: 0, start_idx: nil, stack: []} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: 0, stack: stack} = state) + when k in ["(", "[", "{"], + do: {MapSet.new(), %{state | idx: idx + 1, depth: 1, start_idx: idx, stack: [k | stack]}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: stack} = state) + when k in ["(", "[", "{"], + do: {MapSet.new(), %{state | idx: idx + 1, depth: d + 1, stack: [k | stack]}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, depth: d, stack: [top | rest]} = state) + when k in [")", "]", "}"] do + base = %{state | idx: idx + 1} + + if @close[k] == top, + do: close_match(base, d, state.start_idx, idx, rest), + else: {MapSet.new(), base} + end + + def emit(_, {_, %{kind: k}, _}, %{idx: idx} = state) when k in [")", "]", "}"], + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, _, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + defp close_match(state, 1, start_idx, idx, rest), + do: + {MapSet.new([{:bracket_enclosure, {start_idx, idx}}]), + %{state | depth: 0, start_idx: nil, stack: rest}} + + defp close_match(state, d, _start_idx, _idx, rest), + do: {MapSet.new(), %{state | depth: d - 1, stack: rest}} + end +end diff --git a/lib/codeqa/ast/signals/structural/branch_split_signal.ex b/lib/codeqa/ast/signals/structural/branch_split_signal.ex new file mode 100644 index 0000000..1d6d264 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/branch_split_signal.ex @@ -0,0 +1,58 @@ +defmodule CodeQA.AST.Signals.Structural.BranchSplitSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:branch_split` when a branch keyword appears at bracket depth 0 + and at least one token has been seen before it. + + Unlike `KeywordSignal`, there is no indentation constraint — branches inside + functions are intentionally split into sibling child blocks by the parser's + recursive phase. + + When `opts[:language_module]` is set, uses that language's + `branch_keywords/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.BranchSplitSignal + def group(_), do: :branch_split + + def init(_, lang_mod) do + keywords = CodeQA.Language.branch_keywords(lang_mod) + %{idx: 0, bracket_depth: 0, seen_content: false, keywords: keywords} + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in ["(", "[", "{"], + do: {MapSet.new(), %{state | idx: idx + 1, bracket_depth: bd + 1, seen_content: true}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in [")", "]", "}"], + do: + {MapSet.new(), + %{state | idx: idx + 1, bracket_depth: max(0, bd - 1), seen_content: true}} + + def emit(_, {_, token, _}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, seen_content: true} + + emissions = + if branch_split?(state, token), do: MapSet.new([{:branch_split, idx}]), else: MapSet.new() + + {emissions, base} + end + + defp branch_split?(%{seen_content: true, bracket_depth: 0, keywords: kw}, %{content: c}), + do: MapSet.member?(kw, c) + + defp branch_split?(_, _), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/colon_indent_signal.ex b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex new file mode 100644 index 0000000..9189b79 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/colon_indent_signal.ex @@ -0,0 +1,83 @@ +defmodule CodeQA.AST.Signals.Structural.ColonIndentSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:colon_indent_enclosure` for colon-indented blocks (Python). + + Only active when `opts[:language_module]` returns true for `uses_colon_indent?/0`. Replaces + `ParseRules.ColonIndentationRule`. + + ## Limitation + + The original rule flushes open blocks at EOF via `close_all_open/1`. Since + `emit/3` has no end-of-stream callback, open blocks are instead flushed at + each `` token. This correctly handles single-statement blocks; multi-line + blocks are closed at the first newline (conservative). + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.ColonIndentSignal + def group(_), do: :enclosure + + def init(_, lang_mod) do + %{ + enabled: lang_mod.uses_colon_indent?(), + idx: 0, + ci: 0, + last_colon_indent: nil, + stack: [] + } + end + + def emit(_, _, %{enabled: false} = state), + do: {MapSet.new(), %{state | idx: state.idx + 1}} + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state) do + {emissions, _} = flush_stack(state.stack) + {emissions, %{state | idx: idx + 1, ci: 0, stack: []}} + end + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, ci: ci} = state), + do: {MapSet.new(), %{state | idx: idx + 1, ci: ci + 1}} + + def emit(_, {_, %{kind: ":"}, _}, %{idx: idx, ci: ci} = state), + do: {MapSet.new(), %{state | idx: idx + 1, last_colon_indent: ci}} + + def emit(_, {_, _, _}, %{idx: idx, ci: ci} = state) do + {dedent_emissions, remaining} = close_dedented(state.stack, ci) + new_stack = maybe_open_block(remaining, state.last_colon_indent, ci, idx) + + {dedent_emissions, + %{state | idx: idx + 1, last_colon_indent: nil, stack: update_top(new_stack, idx)}} + end + + defp close_dedented(stack, ci) do + {to_close, keep} = Enum.split_while(stack, fn e -> ci <= e.colon_indent end) + {build_emissions(to_close), keep} + end + + defp flush_stack(stack), do: {build_emissions(stack), []} + + defp maybe_open_block(stack, colon_indent, ci, idx) + when colon_indent != nil and ci > colon_indent, + do: [%{colon_indent: colon_indent, sub_start: idx, last_content_idx: idx} | stack] + + defp maybe_open_block(stack, _, _, _), do: stack + + defp build_emissions(entries) do + Enum.reduce(entries, MapSet.new(), fn + %{sub_start: s, last_content_idx: e}, acc when e != nil -> + MapSet.put(acc, {:colon_indent_enclosure, {s, e}}) + + _entry, acc -> + acc + end) + end + + defp update_top([], _idx), do: [] + defp update_top([top | rest], idx), do: [Map.put(top, :last_content_idx, idx) | rest] + end +end diff --git a/lib/codeqa/ast/signals/structural/comment_divider_signal.ex b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex new file mode 100644 index 0000000..d01e5e8 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/comment_divider_signal.ex @@ -0,0 +1,76 @@ +defmodule CodeQA.AST.Signals.Structural.CommentDividerSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:comment_divider_split` when a line is a "visual divider" comment — + a comment prefix at line start followed immediately by repetitive non-word + punctuation characters. + + Used to detect section separators like `# ---`, `// ===`, `-- ---`. + No split is emitted for the first such line (seen_content must be true). + + When `opts[:language_module]` is set, uses that language's + `comment_prefixes/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.CommentDividerSignal + def group(_), do: :split + + def init(_, lang_mod) do + comment_prefixes = MapSet.new(lang_mod.comment_prefixes()) + divider_indicators = CodeQA.Language.divider_indicators(lang_mod) + + %{ + idx: 0, + at_line_start: true, + seen_content: false, + indent: 0, + comment_prefixes: comment_prefixes, + divider_indicators: divider_indicators + } + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true, indent: 0}} + + def emit( + _, + {_, %WhitespaceToken{}, _}, + %{idx: idx, at_line_start: true, indent: indent} = state + ), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true, indent: indent + 1}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, token, next}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, at_line_start: false, seen_content: true} + + emissions = + if divider_split?(state, token, next), + do: MapSet.new([{:comment_divider_split, idx}]), + else: MapSet.new() + + {emissions, base} + end + + defp divider_split?( + %{ + seen_content: true, + at_line_start: true, + indent: 0, + comment_prefixes: cp, + divider_indicators: di + }, + %{kind: k}, + next + ), + do: MapSet.member?(cp, k) and next != nil and MapSet.member?(di, next.kind) + + defp divider_split?(_, _, _), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/decorator_signal.ex b/lib/codeqa/ast/signals/structural/decorator_signal.ex new file mode 100644 index 0000000..0dc1f5b --- /dev/null +++ b/lib/codeqa/ast/signals/structural/decorator_signal.ex @@ -0,0 +1,81 @@ +defmodule CodeQA.AST.Signals.Structural.DecoratorSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:decorator_split` when a decorator/annotation marker appears at line + start with bracket_depth == 0. + + Detects two patterns: + - `@` at line start (Python, TypeScript, Java, Elixir decorators/annotations) + - `#[` at line start (Rust attribute syntax) + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.DecoratorSignal + def group(_), do: :split + + def init(_, _lang_mod), + do: %{idx: 0, bracket_depth: 0, at_line_start: true, seen_content: false} + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in ["(", "[", "{"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: bd + 1, + seen_content: true, + at_line_start: false + }} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in [")", "]", "}"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: max(0, bd - 1), + seen_content: true, + at_line_start: false + }} + + def emit( + _, + {_, %{kind: "@"}, _}, + %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state + ), + do: + {MapSet.new([{:decorator_split, idx}]), + %{state | idx: idx + 1, seen_content: true, at_line_start: false}} + + def emit( + _, + {_, %{kind: "#"}, next}, + %{idx: idx, seen_content: true, bracket_depth: 0, at_line_start: true} = state + ) do + emissions = + if next != nil and next.kind == "[", + do: MapSet.new([{:decorator_split, idx}]), + else: MapSet.new() + + {emissions, %{state | idx: idx + 1, seen_content: true, at_line_start: false}} + end + + def emit(_, {_, _, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, seen_content: true, at_line_start: false}} + end +end diff --git a/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex new file mode 100644 index 0000000..d644dad --- /dev/null +++ b/lib/codeqa/ast/signals/structural/dedent_to_zero_signal.ex @@ -0,0 +1,87 @@ +defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:dedent_split` when code returns to indent level 0 after having been + at indent > 0 on the previous line. + + This is the primary split mechanism for Python and other indentation-significant + languages. The split fires at the first substantive token on a line that has no + leading ``, when the previous line did have leading ``. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.DedentToZeroSignal + def group(_), do: :split + + def init(_, _lang_mod) do + %{ + idx: 0, + at_line_start: true, + seen_content: false, + current_line_has_indent: false, + current_line_has_content: false, + prev_line_had_indent: false + } + end + + def emit( + _, + {_, %NewlineToken{}, _}, + %{ + idx: idx, + current_line_has_content: clhc, + current_line_has_indent: clhi, + prev_line_had_indent: plhi + } = state + ) do + new_plhi = if clhc, do: clhi, else: plhi + + {MapSet.new(), + %{ + state + | idx: idx + 1, + at_line_start: true, + prev_line_had_indent: new_plhi, + current_line_has_indent: false, + current_line_has_content: false + }} + end + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + do: + {MapSet.new(), + %{state | idx: idx + 1, current_line_has_indent: true, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, _, _}, %{idx: idx} = state) do + base = %{ + state + | idx: idx + 1, + at_line_start: false, + seen_content: true, + current_line_has_content: true + } + + emissions = + if dedent_split?(state), do: MapSet.new([{:dedent_split, idx}]), else: MapSet.new() + + {emissions, base} + end + + defp dedent_split?(%{ + at_line_start: true, + current_line_has_indent: false, + prev_line_had_indent: true, + seen_content: true + }), + do: true + + defp dedent_split?(_), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex new file mode 100644 index 0000000..c5e5c4e --- /dev/null +++ b/lib/codeqa/ast/signals/structural/doc_comment_lead_signal.ex @@ -0,0 +1,65 @@ +defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:doc_comment_split` when a doc-comment opener appears at line start. + + Detects: + - `///` — Rust/C# XML doc comments: `//` token immediately followed by `/` + - `/**` — Java/JS JSDoc: `/` token at line start immediately followed by `*` + + No split is emitted for the first such line (seen_content must be true). + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.DocCommentLeadSignal + def group(_), do: :split + + def init(_, _lang_mod), do: %{idx: 0, at_line_start: true, seen_content: false} + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit( + _, + {_, %{kind: "//"}, next}, + %{idx: idx, at_line_start: true, seen_content: true} = state + ) do + base = %{state | idx: idx + 1, at_line_start: false} + + emissions = + if next != nil and next.kind == "/", + do: MapSet.new([{:doc_comment_split, idx}]), + else: MapSet.new() + + {emissions, base} + end + + def emit( + _, + {_, %{kind: "/"}, next}, + %{idx: idx, at_line_start: true, seen_content: true} = state + ) do + base = %{state | idx: idx + 1, at_line_start: false} + + emissions = + if next != nil and next.kind in ["*", "**"], + do: MapSet.new([{:doc_comment_split, idx}]), + else: MapSet.new() + + {emissions, base} + end + + def emit(_, {_, _, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: false, seen_content: true}} + end +end diff --git a/lib/codeqa/ast/signals/structural/keyword_signal.ex b/lib/codeqa/ast/signals/structural/keyword_signal.ex new file mode 100644 index 0000000..c13d3cf --- /dev/null +++ b/lib/codeqa/ast/signals/structural/keyword_signal.ex @@ -0,0 +1,83 @@ +defmodule CodeQA.AST.Signals.Structural.KeywordSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:keyword_split` when a declaration keyword appears at bracket depth 0 + and indentation level 0. + + When `opts[:language_module]` is set, uses that language's + `declaration_keywords/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.KeywordSignal + def group(_), do: :split + + def init(_, lang_mod) do + keywords = CodeQA.Language.declaration_keywords(lang_mod) + + %{ + idx: 0, + bracket_depth: 0, + indent: 0, + at_line_start: true, + seen_content: false, + keywords: keywords + } + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, indent: 0, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, indent: i, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, indent: i + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in ["(", "[", "{"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: bd + 1, + seen_content: true, + at_line_start: false + }} + + def emit(_, {_, %{kind: k}, _}, %{idx: idx, bracket_depth: bd} = state) + when k in [")", "]", "}"], + do: + {MapSet.new(), + %{ + state + | idx: idx + 1, + bracket_depth: max(0, bd - 1), + seen_content: true, + at_line_start: false + }} + + def emit(_, {_, token, _}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, seen_content: true, at_line_start: false} + + emissions = + if keyword_split?(state, token), + do: MapSet.new([{:keyword_split, idx}]), + else: MapSet.new() + + {emissions, base} + end + + defp keyword_split?(%{seen_content: true, bracket_depth: 0, indent: 0, keywords: kw}, %{ + content: c + }), + do: MapSet.member?(kw, c) + + defp keyword_split?(_, _), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/sql_block_signal.ex b/lib/codeqa/ast/signals/structural/sql_block_signal.ex new file mode 100644 index 0000000..1e376f5 --- /dev/null +++ b/lib/codeqa/ast/signals/structural/sql_block_signal.ex @@ -0,0 +1,55 @@ +defmodule CodeQA.AST.Signals.Structural.SQLBlockSignal do + alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.WhitespaceToken + + @moduledoc """ + Emits `:sql_block_split` when a SQL DDL or DML statement keyword appears + at line start after prior content has been seen. + + Recognises uppercase and lowercase SQL statement starters: + DDL: CREATE, DROP, ALTER, TRUNCATE + DML: INSERT, UPDATE, DELETE, SELECT + Procedures/transactions: BEGIN, COMMIT, ROLLBACK, CALL, EXECUTE + + When `opts[:language_module]` is set, uses that language's + `statement_keywords/0` callback. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: CodeQA.AST.Signals.Structural.SQLBlockSignal + def group(_), do: :split + + def init(_, lang_mod) do + keywords = CodeQA.Language.statement_keywords(lang_mod) + %{idx: 0, at_line_start: true, seen_content: false, keywords: keywords} + end + + def emit(_, {_, %NewlineToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx, at_line_start: true} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: true}} + + def emit(_, {_, %WhitespaceToken{}, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + + def emit(_, {_, %{kind: ""} = token, _}, %{idx: idx} = state) do + base = %{state | idx: idx + 1, at_line_start: false, seen_content: true} + + emissions = + if sql_split?(state, token), do: MapSet.new([{:sql_block_split, idx}]), else: MapSet.new() + + {emissions, base} + end + + def emit(_, {_, _, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1, at_line_start: false, seen_content: true}} + + defp sql_split?(%{seen_content: true, at_line_start: true, keywords: kw}, %{content: c}), + do: MapSet.member?(kw, String.downcase(c)) + + defp sql_split?(_, _), do: false + end +end diff --git a/lib/codeqa/ast/signals/structural/triple_quote_signal.ex b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex new file mode 100644 index 0000000..ac5808d --- /dev/null +++ b/lib/codeqa/ast/signals/structural/triple_quote_signal.ex @@ -0,0 +1,31 @@ +defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignal do + @moduledoc """ + Emits `:triple_split` at each `` token boundary. + + The first of each pair marks the opening of a heredoc; the second marks the + token after the closing delimiter. These split values are used by the Parser + to compute protected ranges, preventing other signals' splits from being + applied inside heredoc content. + + Replaces `ParseRules.TripleQuoteRule`. + """ + + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + @doc_kind CodeQA.AST.Lexing.StringToken.doc_kind() + def source(_), do: CodeQA.AST.Signals.Structural.TripleQuoteSignal + def group(_), do: :split + + def init(_, _lang_mod), do: %{idx: 0, inside: false} + + def emit(_, {_, %{kind: @doc_kind}, _}, %{idx: idx, inside: false} = state), + do: {MapSet.new([{:triple_split, idx}]), %{state | idx: idx + 1, inside: true}} + + def emit(_, {_, %{kind: @doc_kind}, _}, %{idx: idx, inside: true} = state), + do: {MapSet.new([{:triple_split, idx + 1}]), %{state | idx: idx + 1, inside: false}} + + def emit(_, {_, _, _}, %{idx: idx} = state), + do: {MapSet.new(), %{state | idx: idx + 1}} + end +end diff --git a/test/support/counter_signal.ex b/test/support/counter_signal.ex new file mode 100644 index 0000000..7ffb5d8 --- /dev/null +++ b/test/support/counter_signal.ex @@ -0,0 +1,19 @@ +defmodule CodeQA.Support.CounterSignal do + @moduledoc false + defstruct [] +end + +defimpl CodeQA.AST.Parsing.Signal, for: CodeQA.Support.CounterSignal do + def source(_), do: CodeQA.Support.CounterSignal + def group(_), do: :test + def init(_, _), do: %{idx: 0} + + def emit(_, {_prev, token, _next}, %{idx: i} = state) do + emissions = + if token.kind == "", + do: MapSet.new([{:id_seen, i}]), + else: MapSet.new() + + {emissions, %{state | idx: i + 1}} + end +end diff --git a/test/support/node_matcher.ex b/test/support/node_matcher.ex new file mode 100644 index 0000000..679dc92 --- /dev/null +++ b/test/support/node_matcher.ex @@ -0,0 +1,17 @@ +defmodule Test.NodeMatcher do + @moduledoc """ + Helpers for asserting on tokens within `CompoundNode` structures. + + Returns tagged tuples that can be matched against token fields: + + - `exact(:content, "add")` — token whose `content` equals `"add"` exactly + - `partial(:content, "@doc")` — token whose `content` contains `"@doc"` as a substring + - `:value` targets the normalized token value instead of raw source content + """ + + @spec exact(:content | :value, String.t()) :: {:exact, :content | :value, String.t()} + def exact(field, value) when field in [:content, :value], do: {:exact, field, value} + + @spec partial(:content | :value, String.t()) :: {:partial, :content | :value, String.t()} + def partial(field, value) when field in [:content, :value], do: {:partial, field, value} +end From 62ed12dbeb435055d84daff2be88a64c31e2a522 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:26:02 +0100 Subject: [PATCH 04/71] feat(languages): add multi-language definitions and test fixtures Add language abstractions for 30+ languages across native (C++, Go, Rust, Swift, Zig, Haskell, OCaml), scripting (Python, Ruby, JS, PHP, Lua, R, etc.), VM-based (Elixir, Java, Kotlin, Scala, C#, Dart, etc.), web, config (Docker, Terraform, Makefile), data (SQL, YAML, GraphQL), and markup formats. Includes test fixtures for each language family. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/languages/code/native/cpp.ex | 48 +++++ lib/codeqa/languages/code/native/go.ex | 50 +++++ lib/codeqa/languages/code/native/haskell.ex | 56 ++++++ lib/codeqa/languages/code/native/ocaml.ex | 53 ++++++ lib/codeqa/languages/code/native/rust.ex | 53 ++++++ lib/codeqa/languages/code/native/swift.ex | 53 ++++++ lib/codeqa/languages/code/native/zig.ex | 53 ++++++ lib/codeqa/languages/code/scripting/julia.ex | 53 ++++++ lib/codeqa/languages/code/scripting/lua.ex | 46 +++++ lib/codeqa/languages/code/scripting/perl.ex | 53 ++++++ lib/codeqa/languages/code/scripting/php.ex | 54 ++++++ lib/codeqa/languages/code/scripting/python.ex | 56 ++++++ lib/codeqa/languages/code/scripting/r.ex | 48 +++++ lib/codeqa/languages/code/scripting/ruby.ex | 57 ++++++ lib/codeqa/languages/code/scripting/shell.ex | 46 +++++ lib/codeqa/languages/code/vm/clojure.ex | 53 ++++++ lib/codeqa/languages/code/vm/csharp.ex | 53 ++++++ lib/codeqa/languages/code/vm/dart.ex | 57 ++++++ lib/codeqa/languages/code/vm/elixir.ex | 58 ++++++ lib/codeqa/languages/code/vm/erlang.ex | 53 ++++++ lib/codeqa/languages/code/vm/fsharp.ex | 59 ++++++ lib/codeqa/languages/code/vm/java.ex | 51 +++++ lib/codeqa/languages/code/vm/kotlin.ex | 54 ++++++ lib/codeqa/languages/code/vm/scala.ex | 57 ++++++ lib/codeqa/languages/code/web/javascript.ex | 56 ++++++ lib/codeqa/languages/code/web/typescript.ex | 59 ++++++ lib/codeqa/languages/config/dockerfile.ex | 34 ++++ lib/codeqa/languages/config/makefile.ex | 31 +++ lib/codeqa/languages/config/terraform.ex | 32 ++++ lib/codeqa/languages/data/graphql.ex | 31 +++ lib/codeqa/languages/data/json.ex | 30 +++ lib/codeqa/languages/data/sql.ex | 39 ++++ lib/codeqa/languages/data/toml.ex | 30 +++ lib/codeqa/languages/data/yaml.ex | 30 +++ lib/codeqa/languages/language.ex | 180 ++++++++++++++++++ lib/codeqa/languages/markup/css.ex | 31 +++ lib/codeqa/languages/markup/html.ex | 33 ++++ lib/codeqa/languages/markup/markdown.ex | 30 +++ lib/codeqa/languages/markup/xml.ex | 30 +++ lib/codeqa/languages/unknown.ex | 30 +++ test/fixtures/sample.ex | 1 + test/support/fixtures/cpp/observer_pattern.ex | 71 +++++++ test/support/fixtures/cpp/smart_pointer.ex | 87 +++++++++ .../fixtures/cpp/template_container.ex | 90 +++++++++ .../fixtures/csharp/async_task_manager.ex | 67 +++++++ test/support/fixtures/csharp/linq_pipeline.ex | 71 +++++++ test/support/fixtures/csharp/plugin_system.ex | 72 +++++++ test/support/fixtures/dart/futures_async.ex | 78 ++++++++ .../fixtures/dart/mixin_composition.ex | 85 +++++++++ test/support/fixtures/dart/widget_state.ex | 91 +++++++++ test/support/fixtures/elixir/calculator.ex | 125 ++++++++++++ test/support/fixtures/elixir/event_bus.ex | 71 +++++++ test/support/fixtures/elixir/rate_limiter.ex | 85 +++++++++ test/support/fixtures/go/calculator.ex | 53 ++++++ test/support/fixtures/go/cli_parser.ex | 77 ++++++++ test/support/fixtures/go/http_middleware.ex | 86 +++++++++ test/support/fixtures/java/builder_pattern.ex | 81 ++++++++ .../fixtures/java/repository_pattern.ex | 76 ++++++++ .../support/fixtures/java/strategy_pattern.ex | 79 ++++++++ .../support/fixtures/javascript/calculator.ex | 51 +++++ .../fixtures/javascript/form_validator.ex | 134 +++++++++++++ .../fixtures/javascript/shopping_cart.ex | 100 ++++++++++ .../support/fixtures/kotlin/coroutine_flow.ex | 64 +++++++ .../fixtures/kotlin/extension_library.ex | 55 ++++++ test/support/fixtures/kotlin/sealed_state.ex | 63 ++++++ test/support/fixtures/lua/class_system.ex | 63 ++++++ test/support/fixtures/lua/event_system.ex | 76 ++++++++ test/support/fixtures/lua/state_machine.ex | 75 ++++++++ test/support/fixtures/python/calculator.ex | 83 ++++++++ test/support/fixtures/python/config_parser.ex | 89 +++++++++ test/support/fixtures/python/csv_pipeline.ex | 95 +++++++++ test/support/fixtures/ruby/calculator.ex | 59 ++++++ .../fixtures/ruby/markdown_renderer.ex | 79 ++++++++ test/support/fixtures/ruby/orm_lite.ex | 106 +++++++++++ test/support/fixtures/rust/calculator.ex | 70 +++++++ test/support/fixtures/rust/ring_buffer.ex | 86 +++++++++ test/support/fixtures/rust/tokenizer.ex | 112 +++++++++++ test/support/fixtures/scala/actor_messages.ex | 70 +++++++ .../fixtures/scala/case_class_algebra.ex | 73 +++++++ .../fixtures/scala/typeclass_pattern.ex | 64 +++++++ test/support/fixtures/swift/actor_model.ex | 81 ++++++++ test/support/fixtures/swift/combine_stream.ex | 66 +++++++ test/support/fixtures/swift/result_type.ex | 63 ++++++ .../typescript/dependency_injection.ex | 66 +++++++ .../fixtures/typescript/event_emitter.ex | 68 +++++++ .../fixtures/typescript/user_profile_store.ex | 72 +++++++ .../fixtures/zig/allocator_interface.ex | 72 +++++++ .../support/fixtures/zig/iterator_protocol.ex | 87 +++++++++ test/support/fixtures/zig/tagged_union.ex | 90 +++++++++ test/support/language_fixture.ex | 61 ++++++ 90 files changed, 5819 insertions(+) create mode 100644 lib/codeqa/languages/code/native/cpp.ex create mode 100644 lib/codeqa/languages/code/native/go.ex create mode 100644 lib/codeqa/languages/code/native/haskell.ex create mode 100644 lib/codeqa/languages/code/native/ocaml.ex create mode 100644 lib/codeqa/languages/code/native/rust.ex create mode 100644 lib/codeqa/languages/code/native/swift.ex create mode 100644 lib/codeqa/languages/code/native/zig.ex create mode 100644 lib/codeqa/languages/code/scripting/julia.ex create mode 100644 lib/codeqa/languages/code/scripting/lua.ex create mode 100644 lib/codeqa/languages/code/scripting/perl.ex create mode 100644 lib/codeqa/languages/code/scripting/php.ex create mode 100644 lib/codeqa/languages/code/scripting/python.ex create mode 100644 lib/codeqa/languages/code/scripting/r.ex create mode 100644 lib/codeqa/languages/code/scripting/ruby.ex create mode 100644 lib/codeqa/languages/code/scripting/shell.ex create mode 100644 lib/codeqa/languages/code/vm/clojure.ex create mode 100644 lib/codeqa/languages/code/vm/csharp.ex create mode 100644 lib/codeqa/languages/code/vm/dart.ex create mode 100644 lib/codeqa/languages/code/vm/elixir.ex create mode 100644 lib/codeqa/languages/code/vm/erlang.ex create mode 100644 lib/codeqa/languages/code/vm/fsharp.ex create mode 100644 lib/codeqa/languages/code/vm/java.ex create mode 100644 lib/codeqa/languages/code/vm/kotlin.ex create mode 100644 lib/codeqa/languages/code/vm/scala.ex create mode 100644 lib/codeqa/languages/code/web/javascript.ex create mode 100644 lib/codeqa/languages/code/web/typescript.ex create mode 100644 lib/codeqa/languages/config/dockerfile.ex create mode 100644 lib/codeqa/languages/config/makefile.ex create mode 100644 lib/codeqa/languages/config/terraform.ex create mode 100644 lib/codeqa/languages/data/graphql.ex create mode 100644 lib/codeqa/languages/data/json.ex create mode 100644 lib/codeqa/languages/data/sql.ex create mode 100644 lib/codeqa/languages/data/toml.ex create mode 100644 lib/codeqa/languages/data/yaml.ex create mode 100644 lib/codeqa/languages/language.ex create mode 100644 lib/codeqa/languages/markup/css.ex create mode 100644 lib/codeqa/languages/markup/html.ex create mode 100644 lib/codeqa/languages/markup/markdown.ex create mode 100644 lib/codeqa/languages/markup/xml.ex create mode 100644 lib/codeqa/languages/unknown.ex create mode 100644 test/support/fixtures/cpp/observer_pattern.ex create mode 100644 test/support/fixtures/cpp/smart_pointer.ex create mode 100644 test/support/fixtures/cpp/template_container.ex create mode 100644 test/support/fixtures/csharp/async_task_manager.ex create mode 100644 test/support/fixtures/csharp/linq_pipeline.ex create mode 100644 test/support/fixtures/csharp/plugin_system.ex create mode 100644 test/support/fixtures/dart/futures_async.ex create mode 100644 test/support/fixtures/dart/mixin_composition.ex create mode 100644 test/support/fixtures/dart/widget_state.ex create mode 100644 test/support/fixtures/elixir/calculator.ex create mode 100644 test/support/fixtures/elixir/event_bus.ex create mode 100644 test/support/fixtures/elixir/rate_limiter.ex create mode 100644 test/support/fixtures/go/calculator.ex create mode 100644 test/support/fixtures/go/cli_parser.ex create mode 100644 test/support/fixtures/go/http_middleware.ex create mode 100644 test/support/fixtures/java/builder_pattern.ex create mode 100644 test/support/fixtures/java/repository_pattern.ex create mode 100644 test/support/fixtures/java/strategy_pattern.ex create mode 100644 test/support/fixtures/javascript/calculator.ex create mode 100644 test/support/fixtures/javascript/form_validator.ex create mode 100644 test/support/fixtures/javascript/shopping_cart.ex create mode 100644 test/support/fixtures/kotlin/coroutine_flow.ex create mode 100644 test/support/fixtures/kotlin/extension_library.ex create mode 100644 test/support/fixtures/kotlin/sealed_state.ex create mode 100644 test/support/fixtures/lua/class_system.ex create mode 100644 test/support/fixtures/lua/event_system.ex create mode 100644 test/support/fixtures/lua/state_machine.ex create mode 100644 test/support/fixtures/python/calculator.ex create mode 100644 test/support/fixtures/python/config_parser.ex create mode 100644 test/support/fixtures/python/csv_pipeline.ex create mode 100644 test/support/fixtures/ruby/calculator.ex create mode 100644 test/support/fixtures/ruby/markdown_renderer.ex create mode 100644 test/support/fixtures/ruby/orm_lite.ex create mode 100644 test/support/fixtures/rust/calculator.ex create mode 100644 test/support/fixtures/rust/ring_buffer.ex create mode 100644 test/support/fixtures/rust/tokenizer.ex create mode 100644 test/support/fixtures/scala/actor_messages.ex create mode 100644 test/support/fixtures/scala/case_class_algebra.ex create mode 100644 test/support/fixtures/scala/typeclass_pattern.ex create mode 100644 test/support/fixtures/swift/actor_model.ex create mode 100644 test/support/fixtures/swift/combine_stream.ex create mode 100644 test/support/fixtures/swift/result_type.ex create mode 100644 test/support/fixtures/typescript/dependency_injection.ex create mode 100644 test/support/fixtures/typescript/event_emitter.ex create mode 100644 test/support/fixtures/typescript/user_profile_store.ex create mode 100644 test/support/fixtures/zig/allocator_interface.ex create mode 100644 test/support/fixtures/zig/iterator_protocol.ex create mode 100644 test/support/fixtures/zig/tagged_union.ex create mode 100644 test/support/language_fixture.ex diff --git a/lib/codeqa/languages/code/native/cpp.ex b/lib/codeqa/languages/code/native/cpp.ex new file mode 100644 index 0000000..3cc8724 --- /dev/null +++ b/lib/codeqa/languages/code/native/cpp.ex @@ -0,0 +1,48 @@ +defmodule CodeQA.Languages.Code.Native.Cpp do + use CodeQA.Language + + @impl true + def name, do: "cpp" + + @impl true + def extensions, do: ~w[c cpp cc cxx hpp h hh] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while do class struct namespace using include template typename + return new delete this public private protected virtual override static + const constexpr inline extern try catch throw switch case break continue + default auto void true false nullptr + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= -> :: + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # * + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[class struct namespace template] + + @impl true + def branch_keywords, do: ~w[else catch case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[public private protected static virtual override inline] + + @impl true + def module_keywords, do: ~w[class struct namespace enum] +end diff --git a/lib/codeqa/languages/code/native/go.ex b/lib/codeqa/languages/code/native/go.ex new file mode 100644 index 0000000..45ea58d --- /dev/null +++ b/lib/codeqa/languages/code/native/go.ex @@ -0,0 +1,50 @@ +defmodule CodeQA.Languages.Code.Native.Go do + use CodeQA.Language + + @impl true + def name, do: "go" + + @impl true + def extensions, do: ~w[go] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for func type struct interface package import return var const + map chan go defer select switch case break continue default fallthrough + range make new append len cap close nil true false + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= := + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[func type struct interface] + + @impl true + def branch_keywords, do: ~w[else case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: [] + + @impl true + def function_keywords, do: ~w[func] + + @impl true + def import_keywords, do: ~w[import package] +end diff --git a/lib/codeqa/languages/code/native/haskell.ex b/lib/codeqa/languages/code/native/haskell.ex new file mode 100644 index 0000000..c0b1075 --- /dev/null +++ b/lib/codeqa/languages/code/native/haskell.ex @@ -0,0 +1,56 @@ +defmodule CodeQA.Languages.Code.Native.Haskell do + use CodeQA.Language + + @impl true + def name, do: "haskell" + + @impl true + def extensions, do: ~w[hs lhs] + + @impl true + def comment_prefixes, do: ~w[--] + + @impl true + def block_comments, do: [{"{-", "-}"}] + + @impl true + def keywords, do: ~w[ + if else then for do let in where module import data type newtype class + instance deriving case of return True False Nothing Just do + infixl infixr infix qualified as hiding + ] + + @impl true + def operators, do: ~w[ + == /= <= >= + - * / ^ && || ! $ . <$> <*> >>= >> -> <- :: = | @ ~ + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; | @ -> <- :: + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[data type newtype class instance] + + @impl true + def branch_keywords, do: ~w[else] + + @impl true + def block_end_tokens, do: [] + + @impl true + def function_keywords, do: ~w[where let] + + @impl true + def module_keywords, do: ~w[module class instance] + + @impl true + def import_keywords, do: ~w[import] + + @impl true + def test_keywords, do: ~w[test it describe prop] + + @impl true + def uses_colon_indent?, do: true +end diff --git a/lib/codeqa/languages/code/native/ocaml.ex b/lib/codeqa/languages/code/native/ocaml.ex new file mode 100644 index 0000000..6857a5f --- /dev/null +++ b/lib/codeqa/languages/code/native/ocaml.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Native.Ocaml do + use CodeQA.Language + + @impl true + def name, do: "ocaml" + + @impl true + def extensions, do: ~w[ml mli] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [{"(*", "*)"}] + + @impl true + def keywords, do: ~w[ + let rec fun if then else for while do done begin end match with type module + open struct sig functor val mutable exception raise try when and or not in + of as include class object method inherit new virtual + ] + + @impl true + def operators, do: ~w[ + == = != <> <= >= + - * / mod << >> & | ^ ~ && || @ :: |> -> <- := ! + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; | @ -> + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[let type module class] + + @impl true + def branch_keywords, do: ~w[else with when] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def access_modifiers, do: ~w[mutable virtual] + + @impl true + def function_keywords, do: ~w[let fun] + + @impl true + def module_keywords, do: ~w[module struct functor class] + + @impl true + def import_keywords, do: ~w[open include] +end diff --git a/lib/codeqa/languages/code/native/rust.ex b/lib/codeqa/languages/code/native/rust.ex new file mode 100644 index 0000000..4aa7907 --- /dev/null +++ b/lib/codeqa/languages/code/native/rust.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Native.Rust do + use CodeQA.Language + + @impl true + def name, do: "rust" + + @impl true + def extensions, do: ~w[rs] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while loop fn struct enum trait impl use mod pub let mut const + static return match type where as in ref move async await dyn unsafe extern + crate self super true false + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= -> => :: + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[fn struct enum trait impl mod] + + @impl true + def branch_keywords, do: ~w[else match] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[pub] + + @impl true + def function_keywords, do: ~w[fn] + + @impl true + def module_keywords, do: ~w[impl trait struct enum] + + @impl true + def import_keywords, do: ~w[use extern] +end diff --git a/lib/codeqa/languages/code/native/swift.ex b/lib/codeqa/languages/code/native/swift.ex new file mode 100644 index 0000000..1a83b67 --- /dev/null +++ b/lib/codeqa/languages/code/native/swift.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Native.Swift do + use CodeQA.Language + + @impl true + def name, do: "swift" + + @impl true + def extensions, do: ~w[swift] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while repeat func class struct enum protocol extension import + return let var guard defer do try catch throw switch case break continue + default in as is init self super nil true false async await + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || ?? = += -= *= /= %= -> => + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[func class struct enum protocol extension] + + @impl true + def branch_keywords, do: ~w[else catch case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[public private internal fileprivate open] + + @impl true + def function_keywords, do: ~w[func] + + @impl true + def module_keywords, do: ~w[class struct protocol extension enum] + + @impl true + def import_keywords, do: ~w[import] +end diff --git a/lib/codeqa/languages/code/native/zig.ex b/lib/codeqa/languages/code/native/zig.ex new file mode 100644 index 0000000..4141a2e --- /dev/null +++ b/lib/codeqa/languages/code/native/zig.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Native.Zig do + use CodeQA.Language + + @impl true + def name, do: "zig" + + @impl true + def extensions, do: ~w[zig] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + const var fn if else for while switch return pub try catch error defer errdefer + comptime inline struct enum union test break continue null undefined unreachable + async await suspend resume orelse anytype anyerror bool void noreturn type + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= %= orelse catch + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[fn struct enum union] + + @impl true + def branch_keywords, do: ~w[else] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[pub inline comptime] + + @impl true + def function_keywords, do: ~w[fn] + + @impl true + def module_keywords, do: ~w[struct enum union] + + @impl true + def test_keywords, do: ~w[test] +end diff --git a/lib/codeqa/languages/code/scripting/julia.ex b/lib/codeqa/languages/code/scripting/julia.ex new file mode 100644 index 0000000..e99f8e3 --- /dev/null +++ b/lib/codeqa/languages/code/scripting/julia.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Scripting.Julia do + use CodeQA.Language + + @impl true + def name, do: "julia" + + @impl true + def extensions, do: ~w[jl] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [{"#=", "=#"}] + + @impl true + def keywords, do: ~w[ + if else elseif for while do end function return module import using export + struct mutable abstract type primitive begin let local global const try catch + finally throw macro quote true false nothing + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % ^ << >> & | ~ && || = += -= *= /= ÷ → ← |> + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function struct macro module] + + @impl true + def branch_keywords, do: ~w[else elseif catch finally] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def function_keywords, do: ~w[function macro] + + @impl true + def module_keywords, do: ~w[module struct] + + @impl true + def import_keywords, do: ~w[import using] + + @impl true + def test_keywords, do: ~w[@test @testset] +end diff --git a/lib/codeqa/languages/code/scripting/lua.ex b/lib/codeqa/languages/code/scripting/lua.ex new file mode 100644 index 0000000..e9e94b8 --- /dev/null +++ b/lib/codeqa/languages/code/scripting/lua.ex @@ -0,0 +1,46 @@ +defmodule CodeQA.Languages.Code.Scripting.Lua do + use CodeQA.Language + + @impl true + def name, do: "lua" + + @impl true + def extensions, do: ~w[lua] + + @impl true + def comment_prefixes, do: ~w[--] + + @impl true + def block_comments, do: [{"--[[", "]]"}] + + @impl true + def keywords, do: ~w[ + and break do else elseif end false for function goto if in local nil not or + repeat return then true until while + ] + + @impl true + def operators, do: ~w[ + == ~= <= >= + - * / % ^ # & | ~ << >> // .. = and or not + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function local] + + @impl true + def branch_keywords, do: ~w[else elseif] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def function_keywords, do: ~w[function] + + @impl true + def import_keywords, do: ~w[require] +end diff --git a/lib/codeqa/languages/code/scripting/perl.ex b/lib/codeqa/languages/code/scripting/perl.ex new file mode 100644 index 0000000..5f90eeb --- /dev/null +++ b/lib/codeqa/languages/code/scripting/perl.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Scripting.Perl do + use CodeQA.Language + + @impl true + def name, do: "perl" + + @impl true + def extensions, do: ~w[pl pm t] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else elsif unless for foreach while do until sub my our local use require + package return last next redo goto print say die warn eval and or not defined + undef true false + ] + + @impl true + def operators, do: ~w[ + == != <= >= eq ne lt gt le ge + - * / % ** . x = += -= *= /= .= && || ! ~ & | + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ $ % + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[sub package] + + @impl true + def branch_keywords, do: ~w[else elsif] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def function_keywords, do: ~w[sub] + + @impl true + def module_keywords, do: ~w[package] + + @impl true + def import_keywords, do: ~w[use require] + + @impl true + def test_keywords, do: ~w[ok is isnt like unlike cmp_ok] +end diff --git a/lib/codeqa/languages/code/scripting/php.ex b/lib/codeqa/languages/code/scripting/php.ex new file mode 100644 index 0000000..0311e69 --- /dev/null +++ b/lib/codeqa/languages/code/scripting/php.ex @@ -0,0 +1,54 @@ +defmodule CodeQA.Languages.Code.Scripting.PHP do + use CodeQA.Language + + @impl true + def name, do: "php" + + @impl true + def extensions, do: ~w[php phtml php3 php4 php5 php7 php8] + + @impl true + def comment_prefixes, do: ~w[// #] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else elseif for foreach while do function class interface trait namespace + use return new echo print public private protected static abstract final + try catch finally throw switch case break continue default include require + include_once require_once extends implements null true false + ] + + @impl true + def operators, do: ~w[ + == === != !== <= >= + - * / % ** << >> & | ^ ~ && || ?? = += -= *= /= %= -> :: => + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # $ + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function class interface trait namespace] + + @impl true + def branch_keywords, do: ~w[else elseif catch finally case default] + + @impl true + def block_end_tokens, do: ~w[} endif endfor endforeach endwhile endswitch] + + @impl true + def access_modifiers, do: ~w[public private protected static abstract final] + + @impl true + def function_keywords, do: ~w[function fn] + + @impl true + def module_keywords, do: ~w[class interface trait namespace] + + @impl true + def import_keywords, do: ~w[use namespace] +end diff --git a/lib/codeqa/languages/code/scripting/python.ex b/lib/codeqa/languages/code/scripting/python.ex new file mode 100644 index 0000000..bdb31aa --- /dev/null +++ b/lib/codeqa/languages/code/scripting/python.ex @@ -0,0 +1,56 @@ +defmodule CodeQA.Languages.Code.Scripting.Python do + use CodeQA.Language + + @impl true + def name, do: "python" + + @impl true + def extensions, do: ~w[py pyi] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else elif for while def class import from return pass break continue + not and or in is lambda with as try except finally raise yield async await + global nonlocal del assert True False None + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % ** // << >> & | ^ ~ = += -= *= /= %= **= //= + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[def class async] + + @impl true + def branch_keywords, do: ~w[elif else except finally] + + @impl true + def block_end_tokens, do: [] + + @impl true + def access_modifiers, do: [] + + @impl true + def function_keywords, do: ~w[def async] + + @impl true + def module_keywords, do: ~w[class] + + @impl true + def import_keywords, do: ~w[import from] + + @impl true + def uses_colon_indent?, do: true +end diff --git a/lib/codeqa/languages/code/scripting/r.ex b/lib/codeqa/languages/code/scripting/r.ex new file mode 100644 index 0000000..c22cb15 --- /dev/null +++ b/lib/codeqa/languages/code/scripting/r.ex @@ -0,0 +1,48 @@ +defmodule CodeQA.Languages.Code.Scripting.R do + use CodeQA.Language + + @impl true + def name, do: "r" + + @impl true + def extensions, do: ~w[r R Rmd rmd] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else for while repeat break next return function TRUE FALSE NULL NA Inf NaN + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / ^ %% %/% %in% <- -> = & | ! && || ~ : :: + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function] + + @impl true + def branch_keywords, do: ~w[else] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def function_keywords, do: ~w[function] + + @impl true + def import_keywords, do: ~w[library require source] + + @impl true + def test_keywords, do: ~w[test_that expect_equal expect_true describe it] +end diff --git a/lib/codeqa/languages/code/scripting/ruby.ex b/lib/codeqa/languages/code/scripting/ruby.ex new file mode 100644 index 0000000..90b17a7 --- /dev/null +++ b/lib/codeqa/languages/code/scripting/ruby.ex @@ -0,0 +1,57 @@ +defmodule CodeQA.Languages.Code.Scripting.Ruby do + use CodeQA.Language + + @impl true + def name, do: "ruby" + + @impl true + def extensions, do: ~w[rb rake gemspec] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else elsif unless for while until def class module do end return begin + rescue ensure raise yield include extend require require_relative + attr_accessor attr_reader attr_writer then case when next break in + and or not true false nil self super + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % ** << >> & | ^ ~ = += -= *= /= %= **= <=> === =~ + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ | # ? + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[def class module] + + @impl true + def branch_keywords, do: ~w[else elsif rescue ensure when] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def access_modifiers, do: [] + + @impl true + def function_keywords, do: ~w[def] + + @impl true + def module_keywords, do: ~w[class module] + + @impl true + def import_keywords, do: ~w[require require_relative include] + + @impl true + def test_keywords, do: ~w[it describe context scenario feature given] +end diff --git a/lib/codeqa/languages/code/scripting/shell.ex b/lib/codeqa/languages/code/scripting/shell.ex new file mode 100644 index 0000000..acbe37c --- /dev/null +++ b/lib/codeqa/languages/code/scripting/shell.ex @@ -0,0 +1,46 @@ +defmodule CodeQA.Languages.Code.Scripting.Shell do + use CodeQA.Language + + @impl true + def name, do: "shell" + + @impl true + def extensions, do: ~w[sh bash zsh fish] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else elif fi for while do done case esac function return then in until + select break continue exit local export readonly unset + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % && || | & > < >> << = += -= *= /= %= -eq -ne -lt -gt -le -ge + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # $ ! ? | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function] + + @impl true + def branch_keywords, do: ~w[else elif case] + + @impl true + def block_end_tokens, do: ~w[fi done esac] + + @impl true + def access_modifiers, do: [] + + @impl true + def function_keywords, do: ~w[function] +end diff --git a/lib/codeqa/languages/code/vm/clojure.ex b/lib/codeqa/languages/code/vm/clojure.ex new file mode 100644 index 0000000..770b280 --- /dev/null +++ b/lib/codeqa/languages/code/vm/clojure.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Vm.Clojure do + use CodeQA.Language + + @impl true + def name, do: "clojure" + + @impl true + def extensions, do: ~w[clj cljs cljc edn] + + @impl true + def comment_prefixes, do: ~w[;] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + def defn defmacro let fn if do when cond case for loop recur ns require use + import try catch finally throw quote defprotocol defrecord deftype reify + extend-type extend-protocol nil true false and or not + ] + + @impl true + def operators, do: ~w[ + = == not= < > <= >= + - * / mod rem quot and or not + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; # @ ^ + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[def defn defmacro defprotocol defrecord deftype] + + @impl true + def branch_keywords, do: ~w[else] + + @impl true + def block_end_tokens, do: ~w[)] + + @impl true + def function_keywords, do: ~w[defn fn] + + @impl true + def module_keywords, do: ~w[ns defprotocol defrecord] + + @impl true + def import_keywords, do: ~w[ns require use import] + + @impl true + def test_keywords, do: ~w[deftest is testing] +end diff --git a/lib/codeqa/languages/code/vm/csharp.ex b/lib/codeqa/languages/code/vm/csharp.ex new file mode 100644 index 0000000..b90933f --- /dev/null +++ b/lib/codeqa/languages/code/vm/csharp.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Vm.CSharp do + use CodeQA.Language + + @impl true + def name, do: "csharp" + + @impl true + def extensions, do: ~w[cs csx] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for foreach while do class interface struct enum namespace using + return var new this base public private protected internal static abstract + virtual override sealed async await try catch finally throw switch case + break continue default in out ref void true false null readonly const + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || ?? = += -= *= /= %= + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # => + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[class interface struct enum namespace] + + @impl true + def branch_keywords, do: ~w[else catch finally case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, + do: + ~w[public private protected internal static abstract virtual override sealed readonly const async] + + @impl true + def module_keywords, do: ~w[class interface struct enum namespace] + + @impl true + def import_keywords, do: ~w[using namespace] +end diff --git a/lib/codeqa/languages/code/vm/dart.ex b/lib/codeqa/languages/code/vm/dart.ex new file mode 100644 index 0000000..16e29de --- /dev/null +++ b/lib/codeqa/languages/code/vm/dart.ex @@ -0,0 +1,57 @@ +defmodule CodeQA.Languages.Code.Vm.Dart do + use CodeQA.Language + + @impl true + def name, do: "dart" + + @impl true + def extensions, do: ~w[dart] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while do switch case break continue return class extends implements + with new final const var void null true false import export part library + abstract static dynamic async await yield try catch finally throw rethrow + enum typedef mixin factory is as in + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % ~/ << >> & | ^ ~ && || ?? = += -= *= /= %= ??= -> => + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # => + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[class enum typedef mixin] + + @impl true + def branch_keywords, do: ~w[else catch finally case] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[static final const abstract] + + @impl true + def function_keywords, do: ~w[void async] + + @impl true + def module_keywords, do: ~w[class enum mixin] + + @impl true + def import_keywords, do: ~w[import export] + + @impl true + def test_keywords, do: ~w[test group setUp tearDown expect] +end diff --git a/lib/codeqa/languages/code/vm/elixir.ex b/lib/codeqa/languages/code/vm/elixir.ex new file mode 100644 index 0000000..401f808 --- /dev/null +++ b/lib/codeqa/languages/code/vm/elixir.ex @@ -0,0 +1,58 @@ +defmodule CodeQA.Languages.Code.Vm.Elixir do + use CodeQA.Language + + @impl true + def name, do: "elixir" + + @impl true + def extensions, do: ~w[ex exs] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else unless for do end def defp defmodule defmacro defmacrop defprotocol + defimpl defguard defdelegate defstruct case cond with when fn try rescue + catch raise receive in not and or true false nil + ] + + @impl true + def operators, do: ~w[ + == === != !== <= >= + - * / % << >> & | ^ ~ && || |> <> <- -> = ! not and or in + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, + do: + ~w[def defp defmodule defmacro defmacrop defprotocol defimpl defdelegate defoverridable defguard] + + @impl true + def branch_keywords, do: ~w[else rescue catch ensure cond when case] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def access_modifiers, do: [] + + @impl true + def function_keywords, do: ~w[def defp defmacro defmacrop defdelegate defguard] + + @impl true + def module_keywords, do: ~w[defmodule defprotocol defimpl] + + @impl true + def import_keywords, do: ~w[import require use alias] + + @impl true + def test_keywords, do: ~w[test describe] +end diff --git a/lib/codeqa/languages/code/vm/erlang.ex b/lib/codeqa/languages/code/vm/erlang.ex new file mode 100644 index 0000000..d390e01 --- /dev/null +++ b/lib/codeqa/languages/code/vm/erlang.ex @@ -0,0 +1,53 @@ +defmodule CodeQA.Languages.Code.Vm.Erlang do + use CodeQA.Language + + @impl true + def name, do: "erlang" + + @impl true + def extensions, do: ~w[erl hrl] + + @impl true + def comment_prefixes, do: ~w[%] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if case when of begin end receive after fun try catch throw error exit + module export import define record true false ok undefined andalso orelse + not band bor bxor bnot bsl bsr div rem + ] + + @impl true + def operators, do: ~w[ + == /= =< >= =:= =/= + - * / ! <- -> :: | . , ; : + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; | -> + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[-module -record -define] + + @impl true + def branch_keywords, do: ~w[of after catch] + + @impl true + def block_end_tokens, do: ~w[end] + + @impl true + def function_keywords, do: ~w[fun] + + @impl true + def module_keywords, do: ~w[-module] + + @impl true + def import_keywords, do: ~w[-import -include] + + @impl true + def test_keywords, do: ~w[_test_ _test] +end diff --git a/lib/codeqa/languages/code/vm/fsharp.ex b/lib/codeqa/languages/code/vm/fsharp.ex new file mode 100644 index 0000000..e6b12ef --- /dev/null +++ b/lib/codeqa/languages/code/vm/fsharp.ex @@ -0,0 +1,59 @@ +defmodule CodeQA.Languages.Code.Vm.Fsharp do + use CodeQA.Language + + @impl true + def name, do: "fsharp" + + @impl true + def extensions, do: ~w[fs fsi fsx] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"(*", "*)"}] + + @impl true + def keywords, do: ~w[ + let rec if then else for while do match with type module open namespace val + mutable abstract member override new return yield async await try finally + raise true false null and or not in when downto to + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = |> <| >> << -> <- :: @ ? + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; | @ # -> + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[let type module] + + @impl true + def branch_keywords, do: ~w[else with] + + @impl true + def block_end_tokens, do: [] + + @impl true + def access_modifiers, do: ~w[public private protected internal static abstract override] + + @impl true + def function_keywords, do: ~w[let fun] + + @impl true + def module_keywords, do: ~w[module namespace type] + + @impl true + def import_keywords, do: ~w[open] + + @impl true + def test_keywords, do: ~w[testCase test testProperty] + + @impl true + def uses_colon_indent?, do: true +end diff --git a/lib/codeqa/languages/code/vm/java.ex b/lib/codeqa/languages/code/vm/java.ex new file mode 100644 index 0000000..151bee1 --- /dev/null +++ b/lib/codeqa/languages/code/vm/java.ex @@ -0,0 +1,51 @@ +defmodule CodeQA.Languages.Code.Vm.Java do + use CodeQA.Language + + @impl true + def name, do: "java" + + @impl true + def extensions, do: ~w[java] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while do class interface extends implements import package + return new this super public private protected static abstract final + synchronized volatile try catch finally throw throws switch case break + continue default void true false null instanceof + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> >>> & | ^ ~ && || = += -= *= /= %= + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[class interface] + + @impl true + def branch_keywords, do: ~w[else catch finally case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[public private protected static abstract final synchronized] + + @impl true + def module_keywords, do: ~w[class interface enum] + + @impl true + def import_keywords, do: ~w[import package] +end diff --git a/lib/codeqa/languages/code/vm/kotlin.ex b/lib/codeqa/languages/code/vm/kotlin.ex new file mode 100644 index 0000000..72980a2 --- /dev/null +++ b/lib/codeqa/languages/code/vm/kotlin.ex @@ -0,0 +1,54 @@ +defmodule CodeQA.Languages.Code.Vm.Kotlin do + use CodeQA.Language + + @impl true + def name, do: "kotlin" + + @impl true + def extensions, do: ~w[kt kts] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while do fun class object interface data sealed abstract enum + companion import package return val var when is as in out by override open + final private protected public internal suspend inline reified crossinline + noinline try catch finally throw break continue null true false this super init + ] + + @impl true + def operators, do: ~w[ + == === != !== <= >= + - * / % << >> & | ^ ~ && || ?: = += -= *= /= %= -> => :: + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # | + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[fun class object interface data sealed abstract enum] + + @impl true + def branch_keywords, do: ~w[else when catch finally] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[public private protected internal override open abstract final] + + @impl true + def function_keywords, do: ~w[fun] + + @impl true + def module_keywords, do: ~w[class interface object] + + @impl true + def import_keywords, do: ~w[import package] +end diff --git a/lib/codeqa/languages/code/vm/scala.ex b/lib/codeqa/languages/code/vm/scala.ex new file mode 100644 index 0000000..55a7a00 --- /dev/null +++ b/lib/codeqa/languages/code/vm/scala.ex @@ -0,0 +1,57 @@ +defmodule CodeQA.Languages.Code.Vm.Scala do + use CodeQA.Language + + @impl true + def name, do: "scala" + + @impl true + def extensions, do: ~w[scala sc] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while do def class object trait extends with new return import + package val var type match case sealed abstract override final protected + private implicit lazy yield try catch finally throw true false null this super + ] + + @impl true + def operators, do: ~w[ + == != <= >= + - * / % << >> & | ^ ~ && || = += -= *= /= => <- <: >: : + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # => + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[def class object trait type] + + @impl true + def branch_keywords, do: ~w[else catch case finally] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, + do: ~w[public private protected override abstract final sealed implicit lazy] + + @impl true + def function_keywords, do: ~w[def] + + @impl true + def module_keywords, do: ~w[class object trait package] + + @impl true + def import_keywords, do: ~w[import package] + + @impl true + def test_keywords, do: ~w[test it describe should] +end diff --git a/lib/codeqa/languages/code/web/javascript.ex b/lib/codeqa/languages/code/web/javascript.ex new file mode 100644 index 0000000..38194f1 --- /dev/null +++ b/lib/codeqa/languages/code/web/javascript.ex @@ -0,0 +1,56 @@ +defmodule CodeQA.Languages.Code.Web.JavaScript do + use CodeQA.Language + + @impl true + def name, do: "javascript" + + @impl true + def extensions, do: ~w[js mjs cjs jsx vue svelte] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while function class return var let const import export from + new this typeof instanceof try catch finally throw switch case break + continue default delete in of async await yield true false null undefined + ] + + @impl true + def operators, do: ~w[ + == === != !== <= >= + - * / % ** << >> >>> & | ^ ~ && || ?? = += -= *= /= %= + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # => + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function class async] + + @impl true + def branch_keywords, do: ~w[else catch finally case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, do: ~w[export static] + + @impl true + def function_keywords, do: ~w[function async] + + @impl true + def module_keywords, do: ~w[class] + + @impl true + def import_keywords, do: ~w[import] + + @impl true + def test_keywords, do: ~w[test it describe context scenario feature given] +end diff --git a/lib/codeqa/languages/code/web/typescript.ex b/lib/codeqa/languages/code/web/typescript.ex new file mode 100644 index 0000000..23ab690 --- /dev/null +++ b/lib/codeqa/languages/code/web/typescript.ex @@ -0,0 +1,59 @@ +defmodule CodeQA.Languages.Code.Web.TypeScript do + use CodeQA.Language + + @impl true + def name, do: "typescript" + + @impl true + def extensions, do: ~w[ts tsx] + + @impl true + def comment_prefixes, do: ~w[//] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + if else for while function class return var let const import export from + new this typeof instanceof try catch finally throw switch case break + continue default delete in of async await yield true false null undefined + type interface enum namespace declare abstract override readonly implements + extends satisfies as keyof typeof infer never unknown any void + ] + + @impl true + def operators, do: ~w[ + == === != !== <= >= + - * / % ** << >> >>> & | ^ ~ && || ?? = += -= *= /= %= + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ # => < + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[function class async interface enum namespace type declare] + + @impl true + def branch_keywords, do: ~w[else catch finally case default] + + @impl true + def block_end_tokens, do: ~w[}] + + @impl true + def access_modifiers, + do: ~w[export public private protected static abstract override readonly sealed] + + @impl true + def function_keywords, do: ~w[function async] + + @impl true + def module_keywords, do: ~w[class interface enum namespace] + + @impl true + def import_keywords, do: ~w[import] + + @impl true + def test_keywords, do: ~w[test it describe context scenario feature given] +end diff --git a/lib/codeqa/languages/config/dockerfile.ex b/lib/codeqa/languages/config/dockerfile.ex new file mode 100644 index 0000000..97373ea --- /dev/null +++ b/lib/codeqa/languages/config/dockerfile.ex @@ -0,0 +1,34 @@ +defmodule CodeQA.Languages.Config.Dockerfile do + use CodeQA.Language + + @impl true + def name, do: "dockerfile" + + @impl true + def extensions, do: ~w[Dockerfile] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + FROM RUN CMD LABEL EXPOSE ENV ADD COPY ENTRYPOINT VOLUME USER WORKDIR ARG + ONBUILD STOPSIGNAL HEALTHCHECK SHELL AS + ] + + @impl true + def operators, do: ~w[ + = \ + ] + + @impl true + def delimiters, do: ~w[ + ( ) , : # + ] ++ ~w( [ ] ) + + @impl true + def declaration_keywords, do: ~w[FROM] +end diff --git a/lib/codeqa/languages/config/makefile.ex b/lib/codeqa/languages/config/makefile.ex new file mode 100644 index 0000000..6ad3d7b --- /dev/null +++ b/lib/codeqa/languages/config/makefile.ex @@ -0,0 +1,31 @@ +defmodule CodeQA.Languages.Config.Makefile do + use CodeQA.Language + + @impl true + def name, do: "makefile" + + @impl true + def extensions, do: ~w[Makefile GNUmakefile mk] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + ifeq ifneq ifdef ifndef else endif define endef include export unexport + override private vpath all clean install + ] + + @impl true + def operators, do: ~w[ + = := ::= ?= += != + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; @ $ % # \ + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/config/terraform.ex b/lib/codeqa/languages/config/terraform.ex new file mode 100644 index 0000000..bf257bc --- /dev/null +++ b/lib/codeqa/languages/config/terraform.ex @@ -0,0 +1,32 @@ +defmodule CodeQA.Languages.Config.Terraform do + use CodeQA.Language + + @impl true + def name, do: "terraform" + + @impl true + def extensions, do: ~w[tf tfvars] + + @impl true + def comment_prefixes, do: ~w[# //] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + resource data variable output locals module provider terraform + required_providers backend for_each count depends_on lifecycle + source version true false null for if + ] + + @impl true + def operators, do: ~w[ + = == != <= >= && || ! ? : + ] + + @impl true + def delimiters, do: ~w[ + { } ( ) , . : = " # // + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/data/graphql.ex b/lib/codeqa/languages/data/graphql.ex new file mode 100644 index 0000000..70e02e2 --- /dev/null +++ b/lib/codeqa/languages/data/graphql.ex @@ -0,0 +1,31 @@ +defmodule CodeQA.Languages.Data.GraphQL do + use CodeQA.Language + + @impl true + def name, do: "graphql" + + @impl true + def extensions, do: ~w[graphql gql] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + query mutation subscription fragment on type interface union enum input + scalar schema directive extend implements true false null + ] + + @impl true + def operators, do: ~w[ + = : ! | & + ] + + @impl true + def delimiters, do: ~w[ + { } ( ) , . : # @ ! + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/data/json.ex b/lib/codeqa/languages/data/json.ex new file mode 100644 index 0000000..c0b0589 --- /dev/null +++ b/lib/codeqa/languages/data/json.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Data.Json do + use CodeQA.Language + + @impl true + def name, do: "json" + + @impl true + def extensions, do: ~w[json jsonc] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + true false null + ] + + @impl true + def operators, do: ~w[ + : + ] + + @impl true + def delimiters, do: ~w[ + { } , " ' + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/data/sql.ex b/lib/codeqa/languages/data/sql.ex new file mode 100644 index 0000000..9178fe5 --- /dev/null +++ b/lib/codeqa/languages/data/sql.ex @@ -0,0 +1,39 @@ +defmodule CodeQA.Languages.Data.Sql do + use CodeQA.Language + + @impl true + def name, do: "sql" + + @impl true + def extensions, do: ~w[sql] + + @impl true + def comment_prefixes, do: ~w[--] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + SELECT FROM WHERE INSERT INTO UPDATE DELETE SET CREATE DROP ALTER TABLE + INDEX VIEW JOIN LEFT RIGHT INNER OUTER FULL CROSS ON AND OR NOT IN EXISTS + AS GROUP BY ORDER HAVING LIMIT OFFSET DISTINCT NULL TRUE FALSE PRIMARY KEY + FOREIGN REFERENCES CASCADE UNIQUE DEFAULT VALUES RETURNING WITH UNION + INTERSECT EXCEPT CASE WHEN THEN ELSE END IF BEGIN COMMIT ROLLBACK + ] + + @impl true + def operators, do: ~w[ + = != <> <= >= + - * / % LIKE BETWEEN IS IN + ] + + @impl true + def delimiters, do: ~w[ + ( ) , . ; ' " -- /* + ] ++ ~w( [ ] ) + + @impl true + def statement_keywords, + do: + ~w[select insert update delete create drop alter truncate begin commit rollback call execute] +end diff --git a/lib/codeqa/languages/data/toml.ex b/lib/codeqa/languages/data/toml.ex new file mode 100644 index 0000000..9f555a3 --- /dev/null +++ b/lib/codeqa/languages/data/toml.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Data.Toml do + use CodeQA.Language + + @impl true + def name, do: "toml" + + @impl true + def extensions, do: ~w[toml] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + true false + ] + + @impl true + def operators, do: ~w[ + = + ] + + @impl true + def delimiters, do: ~w[ + { } , . : # " ' + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/data/yaml.ex b/lib/codeqa/languages/data/yaml.ex new file mode 100644 index 0000000..77d8f1f --- /dev/null +++ b/lib/codeqa/languages/data/yaml.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Data.Yaml do + use CodeQA.Language + + @impl true + def name, do: "yaml" + + @impl true + def extensions, do: ~w[yml yaml] + + @impl true + def comment_prefixes, do: ~w[#] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + true false null yes no on off + ] + + @impl true + def operators, do: ~w[ + : | > & * ! + ] + + @impl true + def delimiters, do: ~w[ + { } , . # @ --- + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/language.ex b/lib/codeqa/languages/language.ex new file mode 100644 index 0000000..ada7fa1 --- /dev/null +++ b/lib/codeqa/languages/language.ex @@ -0,0 +1,180 @@ +defmodule CodeQA.Language do + @callback name() :: String.t() + @callback extensions() :: [String.t()] + @callback comment_prefixes() :: [String.t()] + @callback block_comments() :: [{String.t(), String.t()}] + @callback keywords() :: [String.t()] + @callback operators() :: [String.t()] + @callback delimiters() :: [String.t()] + + @callback declaration_keywords() :: [String.t()] + @callback branch_keywords() :: [String.t()] + @callback block_end_tokens() :: [String.t()] + @callback access_modifiers() :: [String.t()] + @callback statement_keywords() :: [String.t()] + + @callback function_keywords() :: [String.t()] + @callback module_keywords() :: [String.t()] + @callback import_keywords() :: [String.t()] + @callback test_keywords() :: [String.t()] + @callback uses_colon_indent?() :: boolean() + @callback divider_indicators() :: [String.t()] + + @optional_callbacks [ + declaration_keywords: 0, + branch_keywords: 0, + block_end_tokens: 0, + access_modifiers: 0, + statement_keywords: 0, + function_keywords: 0, + module_keywords: 0, + import_keywords: 0, + test_keywords: 0, + uses_colon_indent?: 0, + divider_indicators: 0 + ] + + defmacro __using__(_opts) do + quote do + @behaviour CodeQA.Language + def declaration_keywords, do: [] + def branch_keywords, do: [] + def block_end_tokens, do: [] + def access_modifiers, do: [] + def statement_keywords, do: [] + def function_keywords, do: [] + def module_keywords, do: [] + def import_keywords, do: [] + def test_keywords, do: [] + def uses_colon_indent?, do: false + def divider_indicators, do: ~w[-- - == === ~ * ** # // / =] + + defoverridable declaration_keywords: 0, + branch_keywords: 0, + block_end_tokens: 0, + access_modifiers: 0, + statement_keywords: 0, + function_keywords: 0, + module_keywords: 0, + import_keywords: 0, + test_keywords: 0, + uses_colon_indent?: 0, + divider_indicators: 0 + end + end + + @spec all() :: [module()] + def all do + {:ok, modules} = :application.get_key(:codeqa, :modules) + Enum.filter(modules, &implements?/1) + end + + @spec all_keywords() :: [String.t()] + def all_keywords do + all() + |> Enum.flat_map(& &1.keywords()) + |> Enum.uniq() + end + + @spec keywords(atom() | String.t()) :: MapSet.t() + def keywords(language) do + case find(language) do + nil -> MapSet.new() + mod -> MapSet.new(mod.keywords()) + end + end + + @spec operators(atom() | String.t()) :: MapSet.t() + def operators(language) do + case find(language) do + nil -> MapSet.new() + mod -> MapSet.new(mod.operators()) + end + end + + @spec delimiters(atom() | String.t()) :: MapSet.t() + def delimiters(language) do + case find(language) do + nil -> MapSet.new() + mod -> MapSet.new(mod.delimiters()) + end + end + + @spec declaration_keywords(module()) :: MapSet.t() + def declaration_keywords(mod), do: MapSet.new(mod.declaration_keywords()) + + @spec branch_keywords(module()) :: MapSet.t() + def branch_keywords(mod), do: MapSet.new(mod.branch_keywords()) + + @spec block_end_tokens(module()) :: MapSet.t() + def block_end_tokens(mod), do: MapSet.new(mod.block_end_tokens()) + + @spec access_modifiers(module()) :: MapSet.t() + def access_modifiers(mod), do: MapSet.new(mod.access_modifiers()) + + @spec statement_keywords(module()) :: MapSet.t() + def statement_keywords(mod), do: MapSet.new(mod.statement_keywords()) + + @spec function_keywords(module()) :: MapSet.t() + def function_keywords(mod), do: MapSet.new(mod.function_keywords()) + + @spec module_keywords(module()) :: MapSet.t() + def module_keywords(mod), do: MapSet.new(mod.module_keywords()) + + @spec import_keywords(module()) :: MapSet.t() + def import_keywords(mod), do: MapSet.new(mod.import_keywords()) + + @spec test_keywords(module()) :: MapSet.t() + def test_keywords(mod), do: MapSet.new(mod.test_keywords()) + + @spec divider_indicators(module()) :: MapSet.t() + def divider_indicators(mod), do: MapSet.new(mod.divider_indicators()) + + @spec find(atom() | String.t()) :: module() + def find(language) do + name = to_string(language) + Enum.find(all(), fn mod -> mod.name() == name end) || CodeQA.Languages.Unknown + end + + @spec detect(String.t()) :: module() + def detect(path) do + basename = Path.basename(path) + ext = path |> Path.extname() |> String.trim_leading(".") + + Enum.find(all(), fn mod -> + ext in mod.extensions() or (ext == "" and basename in mod.extensions()) + end) || CodeQA.Languages.Unknown + end + + @spec strip_comments(String.t(), module()) :: String.t() + def strip_comments(content, language_mod) do + content + |> strip_block_comments(language_mod.block_comments()) + |> strip_line_comments(language_mod.comment_prefixes()) + end + + defp strip_block_comments(content, []), do: content + + defp strip_block_comments(content, pairs) do + Enum.reduce(pairs, content, fn {open, close}, acc -> + regex = Regex.compile!(Regex.escape(open) <> ".*?" <> Regex.escape(close), [:dotall]) + + Regex.replace(regex, acc, fn match -> + String.replace(match, ~r/[^\n]/, "") + end) + end) + end + + defp strip_line_comments(content, []), do: content + + defp strip_line_comments(content, prefixes) do + pattern = prefixes |> Enum.map(&Regex.escape/1) |> Enum.join("|") + Regex.replace(Regex.compile!("(#{pattern}).*$", [:multiline]), content, "") + end + + defp implements?(module) do + CodeQA.Language in (module.__info__(:attributes)[:behaviour] || []) + rescue + _ -> false + end +end diff --git a/lib/codeqa/languages/markup/css.ex b/lib/codeqa/languages/markup/css.ex new file mode 100644 index 0000000..81a8e7c --- /dev/null +++ b/lib/codeqa/languages/markup/css.ex @@ -0,0 +1,31 @@ +defmodule CodeQA.Languages.Markup.Css do + use CodeQA.Language + + @impl true + def name, do: "css" + + @impl true + def extensions, do: ~w[css scss sass less] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [{"/*", "*/"}] + + @impl true + def keywords, do: ~w[ + media keyframes import charset supports layer font-face from to + auto none inherit initial unset normal bold italic + ] + + @impl true + def operators, do: ~w[ + : ; > + ~ * = ^= $= *= ~= |= + ] + + @impl true + def delimiters, do: ~w[ + { } ( ) , . # : ; @ + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/markup/html.ex b/lib/codeqa/languages/markup/html.ex new file mode 100644 index 0000000..4835835 --- /dev/null +++ b/lib/codeqa/languages/markup/html.ex @@ -0,0 +1,33 @@ +defmodule CodeQA.Languages.Markup.Html do + use CodeQA.Language + + @impl true + def name, do: "html" + + @impl true + def extensions, do: ~w[html htm heex eex leex erb htmlbars hbs mustache jinja jinja2 njk liquid] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [{""}] + + @impl true + def keywords, do: ~w[ + html head body div span p a img input form button select option textarea + script style link meta title h1 h2 h3 h4 h5 h6 ul ol li table tr td th + header footer nav main section article aside figure figcaption + class id href src type name value rel action method placeholder + ] + + @impl true + def operators, do: ~w[ + < > / = & + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } , . : ; " ' # ! ? + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/markup/markdown.ex b/lib/codeqa/languages/markup/markdown.ex new file mode 100644 index 0000000..1e81a8c --- /dev/null +++ b/lib/codeqa/languages/markup/markdown.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Markup.Markdown do + use CodeQA.Language + + @impl true + def name, do: "markdown" + + @impl true + def extensions, do: ~w[md mdx] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + TODO NOTE FIXME WARNING IMPORTANT + ] + + @impl true + def operators, do: ~w[ + # ## ### #### ##### ###### > ``` ** * _ __ ~~ + ] + + @impl true + def delimiters, do: ~w[ + ( ) . ! ? ` * _ ~ + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/markup/xml.ex b/lib/codeqa/languages/markup/xml.ex new file mode 100644 index 0000000..8c136c9 --- /dev/null +++ b/lib/codeqa/languages/markup/xml.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Markup.Xml do + use CodeQA.Language + + @impl true + def name, do: "xml" + + @impl true + def extensions, do: ~w[xml svg xsl xslt xsd wsdl plist] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [{""}] + + @impl true + def keywords, do: ~w[ + xmlns version encoding standalone + ] + + @impl true + def operators, do: ~w[ + < > / = & + ] + + @impl true + def delimiters, do: ~w[ + ( ) , . : ; " ' # ! ? + ] ++ ~w( [ ] ) +end diff --git a/lib/codeqa/languages/unknown.ex b/lib/codeqa/languages/unknown.ex new file mode 100644 index 0000000..9873a86 --- /dev/null +++ b/lib/codeqa/languages/unknown.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.Languages.Unknown do + use CodeQA.Language + + @impl true + def name, do: "unknown" + + @impl true + def extensions, do: [] + + @impl true + def comment_prefixes, do: [] + + @impl true + def block_comments, do: [] + + @impl true + def keywords, do: ~w[ + if else + ] + + @impl true + def operators, do: ~w[ + == != + ] + + @impl true + def delimiters, do: ~w[ + ( ) { } + ] +end diff --git a/test/fixtures/sample.ex b/test/fixtures/sample.ex index 16d90fc..625d9cb 100644 --- a/test/fixtures/sample.ex +++ b/test/fixtures/sample.ex @@ -1,4 +1,5 @@ defmodule Sample do + @moduledoc false def hello do IO.puts("Hello, world!") end diff --git a/test/support/fixtures/cpp/observer_pattern.ex b/test/support/fixtures/cpp/observer_pattern.ex new file mode 100644 index 0000000..b536d35 --- /dev/null +++ b/test/support/fixtures/cpp/observer_pattern.ex @@ -0,0 +1,71 @@ +defmodule Test.Fixtures.Cpp.ObserverPattern do + @moduledoc false + use Test.LanguageFixture, language: "cpp observer_pattern" + + @code ~S''' + #include + #include + + template + class Observer { + public: + virtual void onEvent(const Event& event) = 0; + + virtual ~Observer() = default; + }; + + template + class Subject { + std::vector*> observers; + + public: + void attach(Observer* observer) { observers.push_back(observer); } + + void detach(Observer* observer) { + observers.erase( + std::remove(observers.begin(), observers.end(), observer), + observers.end() + ); + } + + void notify(const Event& event) { + for (auto* obs : observers) obs->onEvent(event); + } + }; + + struct StockEvent { + std::string symbol; + double price; + double previousPrice; + + double change() const { return price - previousPrice; } + + double changePercent() const { return previousPrice > 0 ? change() / previousPrice * 100.0 : 0.0; } + }; + + class StockTicker : public Subject { + std::map prices; + + public: + void updatePrice(const std::string& symbol, double newPrice) { + double prev = prices.count(symbol) ? prices[symbol] : newPrice; + prices[symbol] = newPrice; + notify(StockEvent{symbol, newPrice, prev}); + } + + double getPrice(const std::string& symbol) const { + auto it = prices.find(symbol); + return it != prices.end() ? it->second : 0.0; + } + }; + + class AlertObserver : public Observer { + double threshold; + + public: + explicit AlertObserver(double threshold) : threshold(threshold) {} + + void onEvent(const StockEvent& event) override {} + }; + ''' +end diff --git a/test/support/fixtures/cpp/smart_pointer.ex b/test/support/fixtures/cpp/smart_pointer.ex new file mode 100644 index 0000000..6e91c9b --- /dev/null +++ b/test/support/fixtures/cpp/smart_pointer.ex @@ -0,0 +1,87 @@ +defmodule Test.Fixtures.Cpp.SmartPointer do + @moduledoc false + use Test.LanguageFixture, language: "cpp smart_pointer" + + @code ~S''' + #include + #include + + template + class UniquePtr { + T* ptr; + std::function deleter; + + public: + explicit UniquePtr(T* p = nullptr, std::function d = std::default_delete()) + : ptr(p), deleter(d) {} + + ~UniquePtr() { if (ptr) deleter(ptr); } + + UniquePtr(const UniquePtr&) = delete; + + UniquePtr& operator=(const UniquePtr&) = delete; + + UniquePtr(UniquePtr&& other) noexcept : ptr(other.ptr), deleter(std::move(other.deleter)) { other.ptr = nullptr; } + + UniquePtr& operator=(UniquePtr&& other) noexcept { + if (this != &other) { if (ptr) deleter(ptr); ptr = other.ptr; other.ptr = nullptr; } + return *this; + } + + T* get() const { return ptr; } + + T& operator*() const { return *ptr; } + + T* operator->() const { return ptr; } + + explicit operator bool() const { return ptr != nullptr; } + + T* release() { T* p = ptr; ptr = nullptr; return p; } + + void reset(T* p = nullptr) { if (ptr) deleter(ptr); ptr = p; } + }; + + template + struct SharedControl { + T* ptr; + int refCount; + + SharedControl(T* p) : ptr(p), refCount(1) {} + + ~SharedControl() { delete ptr; } + }; + + template + class SharedPtr { + SharedControl* ctrl; + + public: + explicit SharedPtr(T* p = nullptr) : ctrl(p ? new SharedControl(p) : nullptr) {} + + SharedPtr(const SharedPtr& other) : ctrl(other.ctrl) { if (ctrl) ++ctrl->refCount; } + + SharedPtr& operator=(const SharedPtr& other) { + if (this != &other) { release(); ctrl = other.ctrl; if (ctrl) ++ctrl->refCount; } + return *this; + } + + ~SharedPtr() { release(); } + + T* get() const { return ctrl ? ctrl->ptr : nullptr; } + + T& operator*() const { return *ctrl->ptr; } + + T* operator->() const { return ctrl->ptr; } + + int useCount() const { return ctrl ? ctrl->refCount : 0; } + + private: + void release() { if (ctrl && --ctrl->refCount == 0) { delete ctrl; ctrl = nullptr; } } + }; + + template + UniquePtr makeUnique(Args&&... args) { + return UniquePtr(new T(std::forward(args)...)); + } + ''' +end diff --git a/test/support/fixtures/cpp/template_container.ex b/test/support/fixtures/cpp/template_container.ex new file mode 100644 index 0000000..6ff7bdb --- /dev/null +++ b/test/support/fixtures/cpp/template_container.ex @@ -0,0 +1,90 @@ +defmodule Test.Fixtures.Cpp.TemplateContainer do + @moduledoc false + use Test.LanguageFixture, language: "cpp template_container" + + @code ~S''' + #include + + template + class Stack { + T* data; + int capacity; + int topIdx; + + public: + explicit Stack(int cap = 16) : capacity(cap), topIdx(-1) { data = new T[cap]; } + + ~Stack() { delete[] data; } + + Stack(const Stack&) = delete; + + Stack& operator=(const Stack&) = delete; + + void push(const T& value) { + if (topIdx + 1 >= capacity) throw std::overflow_error("Stack overflow"); + data[++topIdx] = value; + } + + T pop() { + if (empty()) throw std::underflow_error("Stack underflow"); + return data[topIdx--]; + } + + T& top() { + if (empty()) throw std::underflow_error("Stack is empty"); + return data[topIdx]; + } + + bool empty() const { return topIdx < 0; } + + int size() const { return topIdx + 1; } + + int maxCapacity() const { return capacity; } + }; + + template + class Queue { + T* data; + int capacity; + int head; + int tail; + int count; + + public: + explicit Queue(int cap = 16) : capacity(cap), head(0), tail(0), count(0) { data = new T[cap]; } + + ~Queue() { delete[] data; } + + void enqueue(const T& value) { + if (count >= capacity) throw std::overflow_error("Queue overflow"); + data[tail] = value; + tail = (tail + 1) % capacity; + ++count; + } + + T dequeue() { + if (empty()) throw std::underflow_error("Queue underflow"); + T value = data[head]; + head = (head + 1) % capacity; + --count; + return value; + } + + T& front() { if (empty()) throw std::underflow_error("Queue is empty"); return data[head]; } + + bool empty() const { return count == 0; } + + int size() const { return count; } + }; + + template + struct Pair { + T first; + T second; + + Pair(T a, T b) : first(a), second(b) {} + + bool operator==(const Pair& other) const { return first == other.first && second == other.second; } + }; + ''' +end diff --git a/test/support/fixtures/csharp/async_task_manager.ex b/test/support/fixtures/csharp/async_task_manager.ex new file mode 100644 index 0000000..6dd9db7 --- /dev/null +++ b/test/support/fixtures/csharp/async_task_manager.ex @@ -0,0 +1,67 @@ +defmodule Test.Fixtures.CSharp.AsyncTaskManager do + @moduledoc false + use Test.LanguageFixture, language: "csharp async_task_manager" + + @code ~S''' + // TaskManagement namespace — async task scheduling with bounded concurrency + using System.Threading.Tasks; + using System.Collections.Generic; + + interface ITaskScheduler + { + Task ScheduleAsync(System.Func work, System.Threading.CancellationToken ct); + Task ScheduleAsync(System.Func> work, System.Threading.CancellationToken ct); + } + + interface IWorkQueue + { + void Enqueue(System.Func work); + Task DrainAsync(System.Threading.CancellationToken ct); + int Count { get; } + } + + class BoundedTaskScheduler : ITaskScheduler + { + private readonly System.Threading.SemaphoreSlim semaphore; + + public BoundedTaskScheduler(int maxConcurrency) + { + semaphore = new System.Threading.SemaphoreSlim(maxConcurrency, maxConcurrency); + } + + public async Task ScheduleAsync(System.Func work, System.Threading.CancellationToken ct) + { + await semaphore.WaitAsync(ct); + try { await work(); } + finally { semaphore.Release(); } + } + + public async Task ScheduleAsync(System.Func> work, System.Threading.CancellationToken ct) + { + await semaphore.WaitAsync(ct); + try { return await work(); } + finally { semaphore.Release(); } + } + } + + class InMemoryWorkQueue : IWorkQueue + { + private readonly Queue> queue = new Queue>(); + + public void Enqueue(System.Func work) { queue.Enqueue(work); } + + public int Count => queue.Count; + + public async Task DrainAsync(System.Threading.CancellationToken ct) + { + while (queue.Count > 0 && !ct.IsCancellationRequested) + { + var work = queue.Dequeue(); + await work(); + } + } + } + + enum TaskState { Pending, Running, Completed, Failed, Cancelled } + ''' +end diff --git a/test/support/fixtures/csharp/linq_pipeline.ex b/test/support/fixtures/csharp/linq_pipeline.ex new file mode 100644 index 0000000..677c6eb --- /dev/null +++ b/test/support/fixtures/csharp/linq_pipeline.ex @@ -0,0 +1,71 @@ +defmodule Test.Fixtures.CSharp.LinqPipeline do + @moduledoc false + use Test.LanguageFixture, language: "csharp linq_pipeline" + + @code ~S''' + // DataPipeline namespace — LINQ-style transformation pipeline + using System.Collections.Generic; + using System.Linq; + + interface ITransform + { + IEnumerable Apply(IEnumerable input); + } + + interface IPipeline + { + IPipeline Pipe(ITransform transform); + IEnumerable Execute(); + } + + class FilterTransform : ITransform + { + private readonly System.Func predicate; + + public FilterTransform(System.Func predicate) + { + this.predicate = predicate; + } + + public IEnumerable Apply(IEnumerable input) + { + return input.Where(predicate); + } + } + + class MapTransform : ITransform + { + private readonly System.Func selector; + + public MapTransform(System.Func selector) + { + this.selector = selector; + } + + public IEnumerable Apply(IEnumerable input) + { + return input.Select(selector); + } + } + + class DataPipeline : IPipeline + { + private readonly IEnumerable source; + + public DataPipeline(IEnumerable source) + { + this.source = source; + } + + public IPipeline Pipe(ITransform transform) + { + return new DataPipeline(transform.Apply(source)); + } + + public IEnumerable Execute() + { + return source.ToList(); + } + } + ''' +end diff --git a/test/support/fixtures/csharp/plugin_system.ex b/test/support/fixtures/csharp/plugin_system.ex new file mode 100644 index 0000000..ebf7e7f --- /dev/null +++ b/test/support/fixtures/csharp/plugin_system.ex @@ -0,0 +1,72 @@ +defmodule Test.Fixtures.CSharp.PluginSystem do + @moduledoc false + use Test.LanguageFixture, language: "csharp plugin_system" + + @code ~S''' + // PluginSystem namespace — plugin registry with lifecycle management + using System.Collections.Generic; + + interface IPlugin + { + string Name { get; } + string Version { get; } + void Initialize(IPluginContext context); + void Shutdown(); + } + + interface IPluginContext + { + void RegisterService(T service) where T : class; + T ResolveService() where T : class; + void Log(string message); + } + + interface IPluginRegistry + { + void Register(IPlugin plugin); + void Unregister(string name); + IPlugin Find(string name); + IEnumerable All(); + } + + class PluginContext : IPluginContext + { + private readonly Dictionary services = new Dictionary(); + + public void RegisterService(T service) where T : class { services[typeof(T)] = service; } + + public T ResolveService() where T : class + { + if (services.TryGetValue(typeof(T), out var svc)) return (T)svc; + throw new System.InvalidOperationException("Service not found: " + typeof(T).Name); + } + + public void Log(string message) { System.Console.WriteLine("[Plugin] " + message); } + } + + class PluginRegistry : IPluginRegistry + { + private readonly Dictionary plugins = new Dictionary(); + private readonly IPluginContext context; + + public PluginRegistry(IPluginContext context) { this.context = context; } + + public void Register(IPlugin plugin) + { + plugin.Initialize(context); + plugins[plugin.Name] = plugin; + } + + public void Unregister(string name) + { + if (plugins.TryGetValue(name, out var plugin)) { plugin.Shutdown(); plugins.Remove(name); } + } + + public IPlugin Find(string name) { plugins.TryGetValue(name, out var p); return p; } + + public IEnumerable All() { return plugins.Values; } + } + + enum PluginState { Unloaded, Initializing, Active, ShuttingDown } + ''' +end diff --git a/test/support/fixtures/dart/futures_async.ex b/test/support/fixtures/dart/futures_async.ex new file mode 100644 index 0000000..ff5317a --- /dev/null +++ b/test/support/fixtures/dart/futures_async.ex @@ -0,0 +1,78 @@ +defmodule Test.Fixtures.Dart.FuturesAsync do + @moduledoc false + use Test.LanguageFixture, language: "dart futures_async" + + @code ~S''' + abstract class AsyncTask { + Future execute(); + + void cancel(); + + bool get isCancelled; + } + + abstract class TaskScheduler { + Future schedule(AsyncTask task); + + Future> scheduleAll(List> tasks); + + void shutdown(); + } + + class RetryPolicy { + final int maxAttempts; + final Duration delay; + final double backoffMultiplier; + + const RetryPolicy({ + this.maxAttempts = 3, + this.delay = const Duration(milliseconds: 500), + this.backoffMultiplier = 2.0, + }); + + Duration delayForAttempt(int attempt) { + final ms = delay.inMilliseconds * (backoffMultiplier * attempt).ceil(); + return Duration(milliseconds: ms); + } + } + + class SimpleTaskScheduler implements TaskScheduler { + bool _shutdown = false; + final List> _pending = []; + + Future schedule(AsyncTask task) async { + if (_shutdown) throw StateError("Scheduler is shut down"); + final future = task.execute(); + _pending.add(future); + return future; + } + + Future> scheduleAll(List> tasks) { + return Future.wait(tasks.map((t) => schedule(t)).toList()); + } + + void shutdown() { + _shutdown = true; + _pending.clear(); + } + } + + enum TaskStatus { + pending, + running, + completed, + failed, + cancelled + } + + class TaskResult { + final T? value; + final Object? error; + final TaskStatus status; + + const TaskResult.success(this.value) : error = null, status = TaskStatus.completed; + + const TaskResult.failure(this.error) : value = null, status = TaskStatus.failed; + } + ''' +end diff --git a/test/support/fixtures/dart/mixin_composition.ex b/test/support/fixtures/dart/mixin_composition.ex new file mode 100644 index 0000000..05013b4 --- /dev/null +++ b/test/support/fixtures/dart/mixin_composition.ex @@ -0,0 +1,85 @@ +defmodule Test.Fixtures.Dart.MixinComposition do + @moduledoc false + use Test.LanguageFixture, language: "dart mixin_composition" + + @code ~S''' + abstract class Serializable { + Map toJson(); + + String toJsonString() { + final map = toJson(); + return map.entries.map((e) => '"${e.key}": "${e.value}"').join(', '); + } + } + + abstract class Validatable { + List validate(); + + bool get isValid => validate().isEmpty; + + void assertValid() { + final errors = validate(); + if (errors.isNotEmpty) throw ArgumentError(errors.join(', ')); + } + } + + abstract class Equatable { + List get props; + + bool equalsTo(Object other) { + if (identical(this, other)) return true; + if (other.runtimeType != runtimeType) return false; + final otherEquatable = other as Equatable; + for (int i = 0; i < props.length; i++) { + if (props[i] != otherEquatable.props[i]) return false; + } + return true; + } + } + + class Address extends Serializable implements Validatable { + final String street; + final String city; + final String country; + + Address({required this.street, required this.city, required this.country}); + + Map toJson() => {'street': street, 'city': city, 'country': country}; + + List validate() { + final errors = []; + if (street.isEmpty) errors.add('street is required'); + if (city.isEmpty) errors.add('city is required'); + if (country.isEmpty) errors.add('country is required'); + return errors; + } + + List get props => [street, city, country]; + } + + enum AddressType { + home, + work, + billing, + shipping + } + + class Contact extends Serializable implements Validatable { + final String name; + final String email; + final Address address; + + Contact({required this.name, required this.email, required this.address}); + + Map toJson() => {'name': name, 'email': email, 'address': address.toJson()}; + + List validate() { + final errors = []; + if (name.isEmpty) errors.add('name is required'); + if (!email.contains('@')) errors.add('invalid email'); + errors.addAll(address.validate()); + return errors; + } + } + ''' +end diff --git a/test/support/fixtures/dart/widget_state.ex b/test/support/fixtures/dart/widget_state.ex new file mode 100644 index 0000000..d4a1b04 --- /dev/null +++ b/test/support/fixtures/dart/widget_state.ex @@ -0,0 +1,91 @@ +defmodule Test.Fixtures.Dart.WidgetState do + @moduledoc false + use Test.LanguageFixture, language: "dart widget_state" + + @code ~S''' + abstract class Widget { + String get key; + + Element createElement(); + } + + abstract class StatefulWidget extends Widget { + State createState(); + } + + abstract class State { + T widget; + + State(this.widget); + + void setState(void Function() fn) { + fn(); + markNeedsBuild(); + } + + void markNeedsBuild() {} + + Widget build(); + + void initState() {} + + void dispose() {} + } + + class Element { + Widget widget; + State? state; + + Element(this.widget); + + void mount() { + if (widget is StatefulWidget) { + state = (widget as StatefulWidget).createState(); + state!.initState(); + } + } + + void unmount() { + state?.dispose(); + } + } + + abstract class BuildContext { + Widget get widget; + + Element get element; + } + + enum WidgetLifecycle { + created, + mounted, + active, + inactive, + disposed + } + + class RenderObject { + double x = 0; + double y = 0; + double width = 0; + double height = 0; + bool needsLayout = true; + bool needsPaint = true; + RenderObject? parent; + List children = []; + + void layout() { + needsLayout = false; + } + + void paint() { + needsPaint = false; + } + + void addChild(RenderObject child) { + children.add(child); + child.parent = this; + } + } + ''' +end diff --git a/test/support/fixtures/elixir/calculator.ex b/test/support/fixtures/elixir/calculator.ex new file mode 100644 index 0000000..7657b88 --- /dev/null +++ b/test/support/fixtures/elixir/calculator.ex @@ -0,0 +1,125 @@ +defmodule Test.Fixtures.Elixir.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "elixir calculator" + import Test.NodeMatcher + + @code ~S''' + defmodule Calculator.Behaviour do + @moduledoc "Contract for all calculator implementations." + @callback add(number, number) :: number + @callback subtract(number, number) :: number + @callback multiply(number, number) :: number + @callback divide(number, number) :: {:ok, float} | {:error, :division_by_zero} + end + + defprotocol Calculator.Displayable do + @doc "Converts a result to a human-readable string." + def display(value) + end + + defmodule Calculator.Basic do + @moduledoc "Basic arithmetic calculator." + @behaviour Calculator.Behaviour + + @doc "Adds two numbers." + @spec add(number, number) :: number + def add(a, b), do: a + b + + @doc "Subtracts b from a." + @spec subtract(number, number) :: number + def subtract(a, b), do: a - b + + @doc "Multiplies two numbers." + @spec multiply(number, number) :: number + def multiply(a, b), do: a * b + + @doc "Divides a by b, returns error for zero divisor." + @spec divide(number, number) :: {:ok, float} | {:error, :division_by_zero} + def divide(_a, 0), do: {:error, :division_by_zero} + def divide(a, b), do: {:ok, a / b} + + @doc "Absolute value of n." + @spec abs_val(number) :: number + def abs_val(n) when n < 0, do: -n + def abs_val(n), do: n + end + + defimpl Calculator.Displayable, for: Integer do + def display(value), do: Integer.to_string(value) + end + + defimpl Calculator.Displayable, for: Float do + def display(value), do: :erlang.float_to_binary(value, [decimals: 4]) + end + + defmodule Calculator.Scientific do + @moduledoc "Scientific calculator with extended math operations." + @behaviour Calculator.Behaviour + + @doc "Adds two numbers." + @spec add(number, number) :: number + def add(a, b), do: a + b + + @doc "Subtracts b from a." + @spec subtract(number, number) :: number + def subtract(a, b), do: a - b + + @doc "Multiplies two numbers." + @spec multiply(number, number) :: number + def multiply(a, b), do: a * b + + @doc "Divides, returning an error on zero divisor." + @spec divide(number, number) :: {:ok, float} | {:error, :division_by_zero} + def divide(_a, 0), do: {:error, :division_by_zero} + def divide(a, b), do: {:ok, a / b} + + @doc "Raises a to the power of b." + @spec power(number, number) :: number + def power(a, b), do: :math.pow(a, b) + + @doc "Returns the square root or an error for negative input." + @spec sqrt(number) :: {:ok, float} | {:error, :negative_input} + def sqrt(n) when n < 0, do: {:error, :negative_input} + def sqrt(n), do: {:ok, :math.sqrt(n)} + + @doc "Natural logarithm, error for non-positive input." + @spec log(number) :: {:ok, float} | {:error, :non_positive_input} + def log(n) when n <= 0, do: {:error, :non_positive_input} + def log(n), do: {:ok, :math.log(n)} + + defp validate_positive(n) when n > 0, do: {:ok, n} + defp validate_positive(_n), do: {:error, :non_positive_input} + end + + defmodule Calculator.History do + @moduledoc "Tracks a history of calculator operations." + @type entry :: {atom, list} + @type t :: list + + @doc "Creates an empty history." + @spec new() :: t + def new(), do: [] + + @doc "Records an operation entry." + @spec record(t, atom, list) :: t + def record(history, op, args) when is_list(args), do: [{op, args} | history] + + @doc "Returns the last n entries." + @spec last(t, non_neg_integer) :: t + def last(history, n \\ 5), do: Enum.take(history, n) + + @doc "Clears the history." + @spec clear(t) :: t + def clear(_history), do: [] + + defp format_entry({op, args}), do: "#{op}(#{Enum.join(args, ", ")})" + end + ''' + + @block_assertions [ + %{ + description: "a compound block containing add with doc and spec annotations", + all_of: [exact(:content, "add"), exact(:content, "doc"), exact(:content, "spec")] + } + ] +end diff --git a/test/support/fixtures/elixir/event_bus.ex b/test/support/fixtures/elixir/event_bus.ex new file mode 100644 index 0000000..e196e09 --- /dev/null +++ b/test/support/fixtures/elixir/event_bus.ex @@ -0,0 +1,71 @@ +defmodule Test.Fixtures.Elixir.EventBus do + @moduledoc false + use Test.LanguageFixture, language: "elixir event_bus" + + @code ~S''' + defmodule EventBus.Behaviour do + @moduledoc "Contract for event bus implementations." + @callback subscribe(topic :: String.t(), pid :: pid()) :: :ok | {:error, term()} + @callback unsubscribe(topic :: String.t(), pid :: pid()) :: :ok + @callback publish(topic :: String.t(), event :: term()) :: :ok + @callback topics() :: [String.t()] + end + + defprotocol EventBus.Serializable do + @doc "Encodes an event to a binary payload." + @spec encode(t()) :: binary() + def encode(event) + + @doc "Decodes a binary payload back to an event." + @spec decode(t(), binary()) :: term() + def decode(schema, payload) + end + + defmodule EventBus.Topic do + @moduledoc "Represents a named event topic with subscriber tracking." + @enforce_keys [:name] + defstruct [:name, subscribers: []] + + @doc "Creates a new topic." + @spec new(String.t()) :: t() + def new(name) when is_binary(name), do: %__MODULE__{name: name} + + @doc "Adds a subscriber pid to the topic." + @spec add_subscriber(t(), pid()) :: t() + def add_subscriber(%__MODULE__{subscribers: subs} = topic, pid) do + %{topic | subscribers: [pid | subs]} + end + + @doc "Removes a subscriber pid from the topic." + @spec remove_subscriber(t(), pid()) :: t() + def remove_subscriber(%__MODULE__{subscribers: subs} = topic, pid) do + %{topic | subscribers: List.delete(subs, pid)} + end + + @doc "Returns all current subscribers." + @spec subscribers(t()) :: [pid()] + def subscribers(%__MODULE__{subscribers: subs}), do: subs + end + + defmodule EventBus.Dispatcher do + @moduledoc "Dispatches events to all topic subscribers." + + @doc "Broadcasts an event to every subscriber of the given topic." + @spec broadcast(EventBus.Topic.t(), term()) :: :ok + def broadcast(%EventBus.Topic{} = topic, event) do + topic + |> EventBus.Topic.subscribers() + |> Enum.each(&send(&1, {:event, topic.name, event})) + end + + @doc "Dispatches to subscribers matching a predicate." + @spec dispatch_filtered(EventBus.Topic.t(), term(), (pid() -> boolean())) :: :ok + def dispatch_filtered(%EventBus.Topic{} = topic, event, filter_fn) do + topic + |> EventBus.Topic.subscribers() + |> Enum.filter(filter_fn) + |> Enum.each(&send(&1, {:event, topic.name, event})) + end + end + ''' +end diff --git a/test/support/fixtures/elixir/rate_limiter.ex b/test/support/fixtures/elixir/rate_limiter.ex new file mode 100644 index 0000000..580a2b4 --- /dev/null +++ b/test/support/fixtures/elixir/rate_limiter.ex @@ -0,0 +1,85 @@ +defmodule Test.Fixtures.Elixir.RateLimiter do + @moduledoc false + use Test.LanguageFixture, language: "elixir rate_limiter" + + @code ~S''' + defmodule RateLimiter.Behaviour do + @moduledoc "Contract for rate limiter backends." + @callback allow?(key :: term(), cost :: pos_integer()) :: boolean() + @callback reset(key :: term()) :: :ok + @callback stats(key :: term()) :: {:ok, map()} | {:error, :not_found} + end + + defmodule RateLimiter.Bucket do + @moduledoc "Token bucket state for a single rate-limited key." + @enforce_keys [:capacity, :tokens, :refill_rate] + defstruct [:capacity, :tokens, :refill_rate, last_refill: nil] + + @doc "Creates a new bucket with full capacity." + @spec new(pos_integer(), pos_integer()) :: t() + def new(capacity, refill_rate) when capacity > 0 and refill_rate > 0 do + %__MODULE__{capacity: capacity, tokens: capacity, refill_rate: refill_rate, last_refill: System.monotonic_time(:millisecond)} + end + + @doc "Consumes tokens from the bucket. Returns updated bucket or error." + @spec consume(t(), pos_integer()) :: {:ok, t()} | {:error, :rate_limited} + def consume(%__MODULE__{tokens: tokens} = bucket, cost) when tokens >= cost do + {:ok, %{bucket | tokens: tokens - cost}} + end + def consume(%__MODULE__{}, _cost), do: {:error, :rate_limited} + + @doc "Refills the bucket based on elapsed time." + @spec refill(t()) :: t() + def refill(%__MODULE__{tokens: t, capacity: cap, refill_rate: rate, last_refill: last} = bucket) do + now = System.monotonic_time(:millisecond) + elapsed_ms = now - last + new_tokens = min(cap, t + div(elapsed_ms * rate, 1000)) + %{bucket | tokens: new_tokens, last_refill: now} + end + end + + defmodule RateLimiter.Server do + @moduledoc "GenServer-backed rate limiter with configurable buckets." + @behaviour RateLimiter.Behaviour + use GenServer + + @doc "Starts the rate limiter server." + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__) + + @impl RateLimiter.Behaviour + @spec allow?(term(), pos_integer()) :: boolean() + def allow?(key, cost \\ 1), do: GenServer.call(__MODULE__, {:allow, key, cost}) + + @impl RateLimiter.Behaviour + @spec reset(term()) :: :ok + def reset(key), do: GenServer.cast(__MODULE__, {:reset, key}) + + @impl RateLimiter.Behaviour + @spec stats(term()) :: {:ok, map()} | {:error, :not_found} + def stats(key), do: GenServer.call(__MODULE__, {:stats, key}) + + @impl GenServer + def init(opts) do + capacity = Keyword.get(opts, :capacity, 100) + refill_rate = Keyword.get(opts, :refill_rate, 10) + {:ok, %{buckets: %{}, capacity: capacity, refill_rate: refill_rate}} + end + + @impl GenServer + def handle_call({:allow, key, cost}, _from, state) do + bucket = Map.get_lazy(state.buckets, key, fn -> RateLimiter.Bucket.new(state.capacity, state.refill_rate) end) + bucket = RateLimiter.Bucket.refill(bucket) + case RateLimiter.Bucket.consume(bucket, cost) do + {:ok, updated} -> {:reply, true, %{state | buckets: Map.put(state.buckets, key, updated)}} + {:error, :rate_limited} -> {:reply, false, %{state | buckets: Map.put(state.buckets, key, bucket)}} + end + end + + @impl GenServer + def handle_cast({:reset, key}, state), do: {:noreply, %{state | buckets: Map.delete(state.buckets, key)}} + + defp default_bucket(state), do: RateLimiter.Bucket.new(state.capacity, state.refill_rate) + end + ''' +end diff --git a/test/support/fixtures/go/calculator.ex b/test/support/fixtures/go/calculator.ex new file mode 100644 index 0000000..e55100a --- /dev/null +++ b/test/support/fixtures/go/calculator.ex @@ -0,0 +1,53 @@ +defmodule Test.Fixtures.Go.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "go calculator" + + @code ~S''' + func Add(a, b float64) float64 { + return a + b + } + + func Subtract(a, b float64) float64 { + return a - b + } + + func Multiply(a, b float64) float64 { + return a * b + } + + func Divide(a, b float64) (float64, error) { + if b == 0 { + return 0, fmt.Errorf("division by zero") + } + return a / b, nil + } + + func Power(base, exp float64) float64 { + return math.Pow(base, exp) + } + + func Sqrt(n float64) (float64, error) { + if n < 0 { + return 0, fmt.Errorf("cannot take sqrt of negative number") + } + return math.Sqrt(n), nil + } + + func Abs(n float64) float64 { + if n < 0 { + return -n + } + return n + } + + func Clamp(n, min, max float64) float64 { + if n < min { + return min + } + if n > max { + return max + } + return n + } + ''' +end diff --git a/test/support/fixtures/go/cli_parser.ex b/test/support/fixtures/go/cli_parser.ex new file mode 100644 index 0000000..c97c14a --- /dev/null +++ b/test/support/fixtures/go/cli_parser.ex @@ -0,0 +1,77 @@ +defmodule Test.Fixtures.Go.CliParser do + @moduledoc false + use Test.LanguageFixture, language: "go cli_parser" + + @code ~S''' + type Flag struct { + Name string + Short string + Description string + Required bool + Value interface{} + } + + type Command struct { + Name string + Description string + flags []*Flag + subcommands []*Command + action func(args []string, flags map[string]interface{}) error + } + + func NewCommand(name, description string) *Command { + return &Command{Name: name, Description: description, flags: []*Flag{}, subcommands: []*Command{}} + } + + func (c *Command) AddFlag(name, short, description string, required bool) *Flag { + f := &Flag{Name: name, Short: short, Description: description, Required: required} + c.flags = append(c.flags, f) + return f + } + + func (c *Command) AddSubcommand(sub *Command) *Command { + c.subcommands = append(c.subcommands, sub) + return c + } + + func (c *Command) Action(fn func(args []string, flags map[string]interface{}) error) { + c.action = fn + } + + func (c *Command) Execute(args []string) error { + if len(args) > 0 { + for _, sub := range c.subcommands { + if sub.Name == args[0] { + return sub.Execute(args[1:]) + } + } + } + flags, remaining, err := c.parseFlags(args) + if err != nil { + return err + } + if c.action != nil { + return c.action(remaining, flags) + } + return nil + } + + func (c *Command) parseFlags(args []string) (map[string]interface{}, []string, error) { + result := make(map[string]interface{}) + remaining := []string{} + for i := 0; i < len(args); i++ { + arg := args[i] + if len(arg) > 2 && arg[:2] == "--" { + key := arg[2:] + if i+1 < len(args) { + result[key] = args[i+1] + i++ + } + } else { + remaining = append(remaining, arg) + } + } + return result, remaining, nil + } + ''' +end diff --git a/test/support/fixtures/go/http_middleware.ex b/test/support/fixtures/go/http_middleware.ex new file mode 100644 index 0000000..e759c85 --- /dev/null +++ b/test/support/fixtures/go/http_middleware.ex @@ -0,0 +1,86 @@ +defmodule Test.Fixtures.Go.HttpMiddleware do + @moduledoc false + use Test.LanguageFixture, language: "go http_middleware" + + @code ~S''' + type Handler func(w ResponseWriter, r *Request) + + type Middleware func(Handler) Handler + + type ResponseWriter interface { + Write([]byte) (int, error) + WriteHeader(statusCode int) + Header() map[string][]string + } + + type Request struct { + Method string + Path string + Headers map[string]string + Body []byte + } + + type Router struct { + routes map[string]Handler + middlewares []Middleware + } + + func NewRouter() *Router { + return &Router{routes: make(map[string]Handler), middlewares: []Middleware{}} + } + + func (r *Router) Use(m Middleware) { + r.middlewares = append(r.middlewares, m) + } + + func (r *Router) Handle(path string, h Handler) { + r.routes[path] = r.wrap(h) + } + + func (r *Router) ServeHTTP(w ResponseWriter, req *Request) { + h, ok := r.routes[req.Path] + if !ok { + w.WriteHeader(404) + return + } + h(w, req) + } + + func (r *Router) wrap(h Handler) Handler { + for i := len(r.middlewares) - 1; i >= 0; i-- { + h = r.middlewares[i](h) + } + return h + } + + func LoggingMiddleware(next Handler) Handler { + return func(w ResponseWriter, r *Request) { + next(w, r) + } + } + + func RecoveryMiddleware(next Handler) Handler { + return func(w ResponseWriter, r *Request) { + defer func() { + if rec := recover(); rec != nil { + w.WriteHeader(500) + } + }() + next(w, r) + } + } + + func AuthMiddleware(secret string) Middleware { + return func(next Handler) Handler { + return func(w ResponseWriter, r *Request) { + token, ok := r.Headers["Authorization"] + if !ok || token != secret { + w.WriteHeader(401) + return + } + next(w, r) + } + } + } + ''' +end diff --git a/test/support/fixtures/java/builder_pattern.ex b/test/support/fixtures/java/builder_pattern.ex new file mode 100644 index 0000000..15cd00d --- /dev/null +++ b/test/support/fixtures/java/builder_pattern.ex @@ -0,0 +1,81 @@ +defmodule Test.Fixtures.Java.BuilderPattern do + @moduledoc false + use Test.LanguageFixture, language: "java builder_pattern" + + @code ~S''' + interface Validatable { + boolean isValid(); + String validationError(); + } + + interface Buildable { + T build(); + } + + class Address implements Validatable { + private final String street; + private final String city; + private final String country; + private final String postalCode; + + private Address(Builder b) { + this.street = b.street; + this.city = b.city; + this.country = b.country; + this.postalCode = b.postalCode; + } + + public boolean isValid() { + return street != null && !street.isEmpty() && city != null && country != null; + } + + public String validationError() { + if (street == null || street.isEmpty()) return "street is required"; + if (city == null) return "city is required"; + return null; + } + + public String getStreet() { return street; } + + public String getCity() { return city; } + + public String getCountry() { return country; } + + public String getPostalCode() { return postalCode; } + + public static class Builder implements Buildable
{ + private String street; + private String city; + private String country; + private String postalCode; + + public Builder street(String street) { this.street = street; return this; } + + public Builder city(String city) { this.city = city; return this; } + + public Builder country(String country) { this.country = country; return this; } + + public Builder postalCode(String postalCode) { this.postalCode = postalCode; return this; } + + public Address build() { + Address a = new Address(this); + if (!a.isValid()) throw new IllegalStateException(a.validationError()); + return a; + } + } + } + + enum Country { + US("United States"), + DE("Germany"), + JP("Japan"), + BR("Brazil"); + + private final String displayName; + + Country(String displayName) { this.displayName = displayName; } + + public String getDisplayName() { return displayName; } + } + ''' +end diff --git a/test/support/fixtures/java/repository_pattern.ex b/test/support/fixtures/java/repository_pattern.ex new file mode 100644 index 0000000..487b526 --- /dev/null +++ b/test/support/fixtures/java/repository_pattern.ex @@ -0,0 +1,76 @@ +defmodule Test.Fixtures.Java.RepositoryPattern do + @moduledoc false + use Test.LanguageFixture, language: "java repository_pattern" + + @code ~S''' + interface Entity { + ID getId(); + } + + interface Repository, ID> { + T findById(ID id); + java.util.List findAll(); + T save(T entity); + void delete(ID id); + boolean exists(ID id); + } + + interface UserRepository extends Repository { + java.util.Optional findByEmail(String email); + java.util.List findByRole(String role); + } + + class User implements Entity { + private Long id; + private String name; + private String email; + private String role; + + public User(Long id, String name, String email, String role) { + this.id = id; + this.name = name; + this.email = email; + this.role = role; + } + + public Long getId() { return id; } + + public String getName() { return name; } + + public String getEmail() { return email; } + + public String getRole() { return role; } + } + + class InMemoryUserRepository implements UserRepository { + private final java.util.Map store = new java.util.HashMap<>(); + private long nextId = 1L; + + public User findById(Long id) { return store.get(id); } + + public java.util.List findAll() { return new java.util.ArrayList<>(store.values()); } + + public User save(User user) { + if (user.getId() == null) { + User saved = new User(nextId++, user.getName(), user.getEmail(), user.getRole()); + store.put(saved.getId(), saved); + return saved; + } + store.put(user.getId(), user); + return user; + } + + public void delete(Long id) { store.remove(id); } + + public boolean exists(Long id) { return store.containsKey(id); } + + public java.util.Optional findByEmail(String email) { + return store.values().stream().filter(u -> u.getEmail().equals(email)).findFirst(); + } + + public java.util.List findByRole(String role) { + return store.values().stream().filter(u -> u.getRole().equals(role)).collect(java.util.stream.Collectors.toList()); + } + } + ''' +end diff --git a/test/support/fixtures/java/strategy_pattern.ex b/test/support/fixtures/java/strategy_pattern.ex new file mode 100644 index 0000000..0d129f1 --- /dev/null +++ b/test/support/fixtures/java/strategy_pattern.ex @@ -0,0 +1,79 @@ +defmodule Test.Fixtures.Java.StrategyPattern do + @moduledoc false + use Test.LanguageFixture, language: "java strategy_pattern" + + @code ~S''' + interface PaymentStrategy { + boolean validate(double amount); + String process(double amount, String currency); + String getName(); + } + + interface TransactionLogger { + void log(String strategy, double amount, String result); + } + + class CreditCardStrategy implements PaymentStrategy { + private final String cardNumber; + private final String expiry; + private final String cvv; + + public CreditCardStrategy(String cardNumber, String expiry, String cvv) { + this.cardNumber = cardNumber; + this.expiry = expiry; + this.cvv = cvv; + } + + public boolean validate(double amount) { + return amount > 0 && cardNumber != null && cardNumber.length() == 16; + } + + public String process(double amount, String currency) { + return "Charged " + amount + " " + currency + " to card ending " + cardNumber.substring(12); + } + + public String getName() { return "credit_card"; } + } + + class BankTransferStrategy implements PaymentStrategy { + private final String accountNumber; + private final String routingNumber; + + public BankTransferStrategy(String accountNumber, String routingNumber) { + this.accountNumber = accountNumber; + this.routingNumber = routingNumber; + } + + public boolean validate(double amount) { return amount >= 1.0; } + + public String process(double amount, String currency) { + return "Transferred " + amount + " " + currency + " from account " + accountNumber; + } + + public String getName() { return "bank_transfer"; } + } + + class PaymentProcessor { + private PaymentStrategy strategy; + private final TransactionLogger logger; + + public PaymentProcessor(PaymentStrategy strategy, TransactionLogger logger) { + this.strategy = strategy; + this.logger = logger; + } + + public void setStrategy(PaymentStrategy strategy) { this.strategy = strategy; } + + public String pay(double amount, String currency) { + if (!strategy.validate(amount)) throw new IllegalArgumentException("Invalid payment"); + String result = strategy.process(amount, currency); + logger.log(strategy.getName(), amount, result); + return result; + } + } + + enum PaymentStatus { + PENDING, PROCESSING, COMPLETED, FAILED, REFUNDED + } + ''' +end diff --git a/test/support/fixtures/javascript/calculator.ex b/test/support/fixtures/javascript/calculator.ex new file mode 100644 index 0000000..b6d67a5 --- /dev/null +++ b/test/support/fixtures/javascript/calculator.ex @@ -0,0 +1,51 @@ +defmodule Test.Fixtures.JavaScript.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "javascript calculator" + + @code ~S''' + function add(a, b) { + return a + b; + } + + function subtract(a, b) { + return a - b; + } + + function multiply(a, b) { + return a * b; + } + + function divide(a, b) { + if (b === 0) throw new Error("Cannot divide by zero"); + return a / b; + } + + function power(base, exp) { + return Math.pow(base, exp); + } + + function sqrt(n) { + if (n < 0) throw new Error("Cannot take sqrt of negative number"); + return Math.sqrt(n); + } + + function abs(n) { + return Math.abs(n); + } + + function clamp(n, min, max) { + return Math.min(Math.max(n, min), max); + } + + function roundTo(n, decimals) { + var factor = Math.pow(10, decimals); + return Math.round(n * factor) / factor; + } + + function average(values) { + if (values.length === 0) return 0; + var sum = values.reduce(function(acc, v) { return acc + v; }, 0); + return sum / values.length; + } + ''' +end diff --git a/test/support/fixtures/javascript/form_validator.ex b/test/support/fixtures/javascript/form_validator.ex new file mode 100644 index 0000000..017ed52 --- /dev/null +++ b/test/support/fixtures/javascript/form_validator.ex @@ -0,0 +1,134 @@ +defmodule Test.Fixtures.JavaScript.FormValidator do + @moduledoc false + use Test.LanguageFixture, language: "javascript form_validator" + + @code ~S''' + class ValidationError { + constructor(field, message) { + this.field = field; + this.message = message; + } + + toString() { + return `${this.field}: ${this.message}`; + } + } + + class ValidationResult { + constructor() { + this.errors = []; + } + + addError(field, message) { + this.errors.push(new ValidationError(field, message)); + return this; + } + + isValid() { + return this.errors.length === 0; + } + + getErrors(field) { + return this.errors.filter(function(e) { return e.field === field; }); + } + } + + class FieldValidator { + constructor(field, value) { + this.field = field; + this.value = value; + this._rules = []; + } + + required() { + this._rules.push(function(v) { + if (v === null || v === undefined || v === "") { + return "is required"; + } + return null; + }); + return this; + } + + minLength(n) { + this._rules.push(function(v) { + if (typeof v === "string" && v.length < n) { + return "is too short (minimum " + n + " characters)"; + } + return null; + }); + return this; + } + + maxLength(n) { + this._rules.push(function(v) { + if (typeof v === "string" && v.length > n) { + return "is too long (maximum " + n + " characters)"; + } + return null; + }); + return this; + } + + matches(pattern, message) { + this._rules.push(function(v) { + if (typeof v === "string" && !pattern.test(v)) { + return message || "is invalid"; + } + return null; + }); + return this; + } + + validate() { + var errors = []; + for (var i = 0; i < this._rules.length; i++) { + var error = this._rules[i](this.value); + if (error !== null) { + errors.push(error); + } + } + return errors; + } + } + + class FormValidator { + constructor(data) { + this._data = data; + this._fields = []; + } + + field(name) { + var validator = new FieldValidator(name, this._data[name]); + this._fields.push(validator); + return validator; + } + + validate() { + var result = new ValidationResult(); + for (var i = 0; i < this._fields.length; i++) { + var f = this._fields[i]; + var errors = f.validate(); + for (var j = 0; j < errors.length; j++) { + result.addError(f.field, errors[j]); + } + } + return result; + } + } + + function validateEmail(value) { + var pattern = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; + return pattern.test(value); + } + + function validateUrl(value) { + try { + new URL(value); + return true; + } catch (_) { + return false; + } + } + ''' +end diff --git a/test/support/fixtures/javascript/shopping_cart.ex b/test/support/fixtures/javascript/shopping_cart.ex new file mode 100644 index 0000000..e7d8600 --- /dev/null +++ b/test/support/fixtures/javascript/shopping_cart.ex @@ -0,0 +1,100 @@ +defmodule Test.Fixtures.JavaScript.ShoppingCart do + @moduledoc false + use Test.LanguageFixture, language: "javascript shopping_cart" + + @code ~S''' + class CartItem { + constructor(id, name, price, quantity) { + this.id = id; + this.name = name; + this.price = price; + this.quantity = quantity; + } + + get subtotal() { + return this.price * this.quantity; + } + + withQuantity(quantity) { + return new CartItem(this.id, this.name, this.price, quantity); + } + } + + class Discount { + constructor(code, type, value) { + this.code = code; + this.type = type; + this.value = value; + } + + apply(subtotal) { + if (this.type === "percent") { + return subtotal * (1 - this.value / 100); + } + if (this.type === "fixed") { + return Math.max(0, subtotal - this.value); + } + return subtotal; + } + } + + class ShoppingCart { + constructor() { + this._items = new Map(); + this._discount = null; + this._listeners = []; + } + + addItem(item) { + var existing = this._items.get(item.id); + if (existing) { + this._items.set(item.id, existing.withQuantity(existing.quantity + item.quantity)); + } else { + this._items.set(item.id, item); + } + this._emit("item:added", item); + return this; + } + + removeItem(id) { + this._items.delete(id); + this._emit("item:removed", { id: id }); + return this; + } + + applyDiscount(discount) { + this._discount = discount; + this._emit("discount:applied", discount); + return this; + } + + get subtotal() { + var total = 0; + this._items.forEach(function(item) { total += item.subtotal; }); + return total; + } + + get total() { + var sub = this.subtotal; + return this._discount ? this._discount.apply(sub) : sub; + } + + get itemCount() { + var count = 0; + this._items.forEach(function(item) { count += item.quantity; }); + return count; + } + + on(event, handler) { + this._listeners.push({ event: event, handler: handler }); + return this; + } + + _emit(event, data) { + this._listeners + .filter(function(l) { return l.event === event; }) + .forEach(function(l) { l.handler(data); }); + } + } + ''' +end diff --git a/test/support/fixtures/kotlin/coroutine_flow.ex b/test/support/fixtures/kotlin/coroutine_flow.ex new file mode 100644 index 0000000..efd8f80 --- /dev/null +++ b/test/support/fixtures/kotlin/coroutine_flow.ex @@ -0,0 +1,64 @@ +defmodule Test.Fixtures.Kotlin.CoroutineFlow do + @moduledoc false + use Test.LanguageFixture, language: "kotlin coroutine_flow" + + @code ~S''' + interface FlowCollector { + suspend fun emit(value: T) + } + + interface Flow { + suspend fun collect(collector: FlowCollector) + } + + interface Channel { + suspend fun send(value: T) + suspend fun receive(): T + fun close() + val isClosedForSend: Boolean + } + + class SimpleFlow(private val block: suspend FlowCollector.() -> Unit) : Flow { + override suspend fun collect(collector: FlowCollector) { + collector.block() + } + } + + class TransformFlow( + private val upstream: Flow, + private val transform: suspend (T) -> R + ) : Flow { + override suspend fun collect(collector: FlowCollector) { + upstream.collect(object : FlowCollector { + override suspend fun emit(value: T) { + collector.emit(transform(value)) + } + }) + } + } + + class FilterFlow( + private val upstream: Flow, + private val predicate: suspend (T) -> Boolean + ) : Flow { + override suspend fun collect(collector: FlowCollector) { + upstream.collect(object : FlowCollector { + override suspend fun emit(value: T) { + if (predicate(value)) collector.emit(value) + } + }) + } + } + + class BufferedChannel(private val capacity: Int) : Channel { + private val buffer: ArrayDeque = ArrayDeque() + override val isClosedForSend: Boolean get() = false + + override suspend fun send(value: T) { buffer.addLast(value) } + + override suspend fun receive(): T = buffer.removeFirst() + + override fun close() { buffer.clear() } + } + ''' +end diff --git a/test/support/fixtures/kotlin/extension_library.ex b/test/support/fixtures/kotlin/extension_library.ex new file mode 100644 index 0000000..8d0ad7e --- /dev/null +++ b/test/support/fixtures/kotlin/extension_library.ex @@ -0,0 +1,55 @@ +defmodule Test.Fixtures.Kotlin.ExtensionLibrary do + @moduledoc false + use Test.LanguageFixture, language: "kotlin extension_library" + + @code ~S''' + interface StringValidator { + fun validate(value: String): Boolean + fun errorMessage(): String + } + + interface Transformer { + fun transform(value: T): R + } + + interface Pipeline { + fun pipe(step: Transformer): Pipeline + fun execute(input: T): T + } + + class EmailValidator : StringValidator { + override fun validate(value: String): Boolean = value.contains("@") && value.contains(".") + + override fun errorMessage(): String = "Invalid email format" + } + + class LengthValidator(private val min: Int, private val max: Int) : StringValidator { + override fun validate(value: String): Boolean = value.length in min..max + + override fun errorMessage(): String = "Length must be between $min and $max" + } + + class TrimTransformer : Transformer { + override fun transform(value: String): String = value.trim() + } + + class LowercaseTransformer : Transformer { + override fun transform(value: String): String = value.lowercase() + } + + class StringPipeline : Pipeline { + private val steps: MutableList> = mutableListOf() + + override fun pipe(step: Transformer): Pipeline { + steps.add(step) + return this + } + + override fun execute(input: String): String = steps.fold(input) { acc, step -> step.transform(acc) } + } + + enum class ValidationMode { + STRICT, LENIENT, DISABLED + } + ''' +end diff --git a/test/support/fixtures/kotlin/sealed_state.ex b/test/support/fixtures/kotlin/sealed_state.ex new file mode 100644 index 0000000..fd0b1fa --- /dev/null +++ b/test/support/fixtures/kotlin/sealed_state.ex @@ -0,0 +1,63 @@ +defmodule Test.Fixtures.Kotlin.SealedState do + @moduledoc false + use Test.LanguageFixture, language: "kotlin sealed_state" + + @code ~S''' + interface Action + + interface State + + interface Reducer { + fun reduce(state: S, action: A): S + } + + class ScreenState { + class Loading : ScreenState() + class Success(val data: List) : ScreenState() + class Error(val message: String, val cause: Throwable?) : ScreenState() + class Empty : ScreenState() + } + + class ScreenAction { + class Load : ScreenAction() + class LoadSuccess(val data: List) : ScreenAction() + class LoadError(val message: String, val cause: Throwable?) : ScreenAction() + class Refresh : ScreenAction() + class Clear : ScreenAction() + } + + class ScreenReducer : Reducer { + override fun reduce(state: ScreenState, action: ScreenAction): ScreenState { + return when (action) { + is ScreenAction.Load -> ScreenState.Loading() + is ScreenAction.LoadSuccess -> if (action.data.isEmpty()) ScreenState.Empty() else ScreenState.Success(action.data) + is ScreenAction.LoadError -> ScreenState.Error(action.message, action.cause) + is ScreenAction.Refresh -> ScreenState.Loading() + is ScreenAction.Clear -> ScreenState.Empty() + else -> state + } + } + } + + enum class LoadStrategy { + EAGER, LAZY, PREFETCH, BACKGROUND + } + + class StateStore(private val reducer: Reducer, initialState: S) { + private var state: S = initialState + private val listeners: MutableList<(S) -> Unit> = mutableListOf() + + fun getState(): S = state + + fun dispatch(action: A) { + state = reducer.reduce(state, action) + listeners.forEach { it(state) } + } + + fun subscribe(listener: (S) -> Unit): () -> Unit { + listeners.add(listener) + return { listeners.remove(listener) } + } + } + ''' +end diff --git a/test/support/fixtures/lua/class_system.ex b/test/support/fixtures/lua/class_system.ex new file mode 100644 index 0000000..d96769d --- /dev/null +++ b/test/support/fixtures/lua/class_system.ex @@ -0,0 +1,63 @@ +defmodule Test.Fixtures.Lua.ClassSystem do + @moduledoc false + use Test.LanguageFixture, language: "lua class_system" + + @code ~S''' + function class(parent) + local cls = {} + cls.__index = cls + if parent then + setmetatable(cls, { __index = parent }) + end + cls.new = function(...) + local instance = setmetatable({}, cls) + if instance.init then + instance:init(...) + end + return instance + end + cls.isInstanceOf = function(self, klass) + local mt = getmetatable(self) + while mt do + if mt == klass then return true end + mt = getmetatable(mt) + end + return false + end + return cls + end + + function mixin(target, source) + for key, value in pairs(source) do + if type(value) == "function" and not target[key] then + target[key] = value + end + end + return target + end + + function interface(...) + local methods = { ... } + return function(obj) + for _, method in ipairs(methods) do + if type(obj[method]) ~= "function" then + error("Missing method: " .. method) + end + end + return true + end + end + + function extend(parent, definition) + local cls = class(parent) + for k, v in pairs(definition) do + cls[k] = v + end + return cls + end + + function implements(obj, iface) + return pcall(iface, obj) + end + ''' +end diff --git a/test/support/fixtures/lua/event_system.ex b/test/support/fixtures/lua/event_system.ex new file mode 100644 index 0000000..4c50cf8 --- /dev/null +++ b/test/support/fixtures/lua/event_system.ex @@ -0,0 +1,76 @@ +defmodule Test.Fixtures.Lua.EventSystem do + @moduledoc false + use Test.LanguageFixture, language: "lua event_system" + + @code ~S''' + function EventEmitter() + local self = { listeners = {}, onceListeners = {} } + + function self:on(event, callback) + if not self.listeners[event] then + self.listeners[event] = {} + end + table.insert(self.listeners[event], callback) + return self + end + + function self:once(event, callback) + if not self.onceListeners[event] then + self.onceListeners[event] = {} + end + table.insert(self.onceListeners[event], callback) + return self + end + + function self:off(event, callback) + if self.listeners[event] then + for i, cb in ipairs(self.listeners[event]) do + if cb == callback then + table.remove(self.listeners[event], i) + return self + end + end + end + return self + end + + function self:emit(event, ...) + local listeners = self.listeners[event] or {} + for _, cb in ipairs(listeners) do + cb(...) + end + local onceListeners = self.onceListeners[event] or {} + self.onceListeners[event] = {} + for _, cb in ipairs(onceListeners) do + cb(...) + end + return self + end + + function self:removeAllListeners(event) + if event then + self.listeners[event] = nil + self.onceListeners[event] = nil + else + self.listeners = {} + self.onceListeners = {} + end + return self + end + + return self + end + + function pipe(emitter1, event, emitter2, targetEvent) + emitter1:on(event, function(...) + emitter2:emit(targetEvent or event, ...) + end) + end + + function broadcast(emitters, event, ...) + for _, emitter in ipairs(emitters) do + emitter:emit(event, ...) + end + end + ''' +end diff --git a/test/support/fixtures/lua/state_machine.ex b/test/support/fixtures/lua/state_machine.ex new file mode 100644 index 0000000..cba47b8 --- /dev/null +++ b/test/support/fixtures/lua/state_machine.ex @@ -0,0 +1,75 @@ +defmodule Test.Fixtures.Lua.StateMachine do + @moduledoc false + use Test.LanguageFixture, language: "lua state_machine" + + @code ~S''' + function StateMachine(config) + local self = { + current = config.initial, + states = config.states or {}, + transitions = config.transitions or {}, + history = {}, + listeners = {}, + } + + function self:can(event) + local key = self.current .. ":" .. event + return self.transitions[key] ~= nil + end + + function self:transition(event, data) + local key = self.current .. ":" .. event + local target = self.transitions[key] + if not target then + error("No transition from '" .. self.current .. "' on event '" .. event .. "'") + end + local from = self.current + local stateConfig = self.states[from] or {} + if stateConfig.onExit then stateConfig.onExit(from, event, data) end + table.insert(self.history, { state = from, event = event }) + self.current = target + local targetConfig = self.states[target] or {} + if targetConfig.onEnter then targetConfig.onEnter(target, event, data) end + for _, cb in ipairs(self.listeners) do + cb(from, event, target, data) + end + return self + end + + function self:onTransition(callback) + table.insert(self.listeners, callback) + return self + end + + function self:getHistory() + return self.history + end + + function self:reset() + self.current = config.initial + self.history = {} + return self + end + + return self + end + + function buildTransitionTable(transitions) + local tbl = {} + for _, t in ipairs(transitions) do + local key = t.from .. ":" .. t.event + tbl[key] = t.to + end + return tbl + end + + function validateMachine(machine, requiredStates) + for _, state in ipairs(requiredStates) do + if not machine.states[state] then + return false, "Missing state: " .. state + end + end + return true, nil + end + ''' +end diff --git a/test/support/fixtures/python/calculator.ex b/test/support/fixtures/python/calculator.ex new file mode 100644 index 0000000..47c9029 --- /dev/null +++ b/test/support/fixtures/python/calculator.ex @@ -0,0 +1,83 @@ +defmodule Test.Fixtures.Python.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "python calculator" + + @code ~S''' + class Calculator: + """A calculator supporting basic arithmetic operations.""" + + def add(self, a, b): + """Returns the sum of a and b.""" + return a + b + + def subtract(self, a, b): + """Returns a minus b.""" + return a - b + + def multiply(self, a, b): + """Returns the product of a and b.""" + return a * b + + def divide(self, a, b): + """Divides a by b. Raises for zero divisor.""" + if b == 0: + raise ValueError("Cannot divide by zero") + return a / b + + def power(self, base, exp): + """Returns base to the power of exp.""" + return base ** exp + + def sqrt(self, n): + """Returns the square root. Raises for negative input.""" + if n < 0: + raise ValueError("Cannot take sqrt of negative number") + return n ** 0.5 + + def abs_val(self, n): + """Returns the absolute value of n.""" + if n < 0: + return -n + return n + + + class ScientificCalculator(Calculator): + """Extended scientific calculator.""" + + def log(self, n, base=10): + """Returns log base of n. Raises for non-positive n.""" + if n <= 0: + raise ValueError("Logarithm undefined for non-positive values") + import math + return math.log(n, base) + + def factorial(self, n): + """Returns n factorial. Raises for negative n.""" + if n < 0: + raise ValueError("Factorial undefined for negative numbers") + if n == 0: + return 1 + result = 1 + for i in range(1, n + 1): + result *= i + return result + + + def add(a, b): + return a + b + + + def subtract(a, b): + return a - b + + + def multiply(a, b): + return a * b + + + def divide(a, b): + if b == 0: + raise ValueError("Cannot divide by zero") + return a / b + ''' +end diff --git a/test/support/fixtures/python/config_parser.ex b/test/support/fixtures/python/config_parser.ex new file mode 100644 index 0000000..a58516e --- /dev/null +++ b/test/support/fixtures/python/config_parser.ex @@ -0,0 +1,89 @@ +defmodule Test.Fixtures.Python.ConfigParser do + @moduledoc false + use Test.LanguageFixture, language: "python config_parser" + + @code ~S''' + from dataclasses import dataclass, field + from typing import ClassVar, Optional + + + @dataclass + class DatabaseConfig: + """Database connection configuration.""" + + host: str = "localhost" + port: int = 5432 + name: str = "app" + pool_size: int = 10 + VALID_PORTS: ClassVar[range] = range(1, 65536) + + def __post_init__(self): + """Validates configuration after initialisation.""" + if self.port not in self.VALID_PORTS: + raise ValueError(f"Invalid port: {self.port}") + if not self.host: + raise ValueError("host must not be empty") + if self.pool_size < 1: + raise ValueError("pool_size must be at least 1") + + def url(self) -> str: + """Returns the database connection URL.""" + return f"postgres://{self.host}:{self.port}/{self.name}" + + + @dataclass + class LoggingConfig: + """Logging configuration.""" + + level: str = "info" + format: str = "text" + output: str = "stdout" + VALID_LEVELS: ClassVar[list] = ["debug", "info", "warning", "error"] + VALID_FORMATS: ClassVar[list] = ["text", "json"] + + def __post_init__(self): + """Validates level and format.""" + if self.level not in self.VALID_LEVELS: + raise ValueError(f"Invalid log level: {self.level}") + if self.format not in self.VALID_FORMATS: + raise ValueError(f"Invalid log format: {self.format}") + + + @dataclass + class AppConfig: + """Top-level application configuration.""" + + database: DatabaseConfig = field(default_factory=DatabaseConfig) + logging: LoggingConfig = field(default_factory=LoggingConfig) + debug: bool = False + version: str = "1.0.0" + + def is_production(self) -> bool: + """Returns True when debug mode is disabled.""" + return not self.debug + + @classmethod + def from_dict(cls, data: dict) -> "AppConfig": + """Builds an AppConfig from a plain dictionary.""" + db_data = data.get("database", {}) + log_data = data.get("logging", {}) + return cls( + database=DatabaseConfig(**db_data), + logging=LoggingConfig(**log_data), + debug=data.get("debug", False), + version=data.get("version", "1.0.0"), + ) + + @classmethod + def from_env(cls, prefix: str = "APP") -> "AppConfig": + """Builds an AppConfig from environment variables.""" + import os + return cls( + database=DatabaseConfig( + host=os.getenv(f"{prefix}_DB_HOST", "localhost"), + port=int(os.getenv(f"{prefix}_DB_PORT", "5432")), + ), + debug=os.getenv(f"{prefix}_DEBUG", "false").lower() == "true", + ) + ''' +end diff --git a/test/support/fixtures/python/csv_pipeline.ex b/test/support/fixtures/python/csv_pipeline.ex new file mode 100644 index 0000000..459acf8 --- /dev/null +++ b/test/support/fixtures/python/csv_pipeline.ex @@ -0,0 +1,95 @@ +defmodule Test.Fixtures.Python.CsvPipeline do + @moduledoc false + use Test.LanguageFixture, language: "python csv_pipeline" + + @code ~S''' + from dataclasses import dataclass, field + from typing import Iterator, Protocol + + + @dataclass + class CsvRow: + """Represents one row of parsed CSV data.""" + + fields: dict + line_number: int + + def get(self, key: str, default=None): + """Returns the value for key or default.""" + return self.fields.get(key, default) + + def keys(self) -> list: + """Returns all field names.""" + return list(self.fields.keys()) + + + class RowTransformer(Protocol): + """Protocol for CSV row transformation steps.""" + + def transform(self, row: CsvRow) -> CsvRow: + """Transforms a single row.""" + ... + + + @dataclass + class ColumnRenamer: + """Renames columns according to a mapping.""" + + mapping: dict = field(default_factory=dict) + + def transform(self, row: CsvRow) -> CsvRow: + """Applies column rename mapping to a row.""" + new_fields = {self.mapping.get(k, k): v for k, v in row.fields.items()} + return CsvRow(fields=new_fields, line_number=row.line_number) + + + @dataclass + class TypeCoercer: + """Coerces column values to specified types.""" + + types: dict = field(default_factory=dict) + + def transform(self, row: CsvRow) -> CsvRow: + """Coerces field values using the types mapping.""" + coerced = {} + for key, value in row.fields.items(): + target_type = self.types.get(key) + if target_type is not None: + try: + coerced[key] = target_type(value) + except (ValueError, TypeError): + coerced[key] = value + else: + coerced[key] = value + return CsvRow(fields=coerced, line_number=row.line_number) + + + class CsvPipeline: + """Streaming CSV pipeline with pluggable transformation steps.""" + + def __init__(self, path: str): + """Initialises the pipeline for the given CSV file path.""" + self._path = path + self._steps: list = [] + + def add_step(self, step: RowTransformer) -> "CsvPipeline": + """Adds a transformation step and returns self for chaining.""" + self._steps.append(step) + return self + + def run(self) -> Iterator[CsvRow]: + """Yields processed rows from the CSV file.""" + with open(self._path, "r", newline="") as fh: + import csv + reader = csv.DictReader(fh) + for line_number, raw in enumerate(reader, start=1): + row = CsvRow(fields=dict(raw), line_number=line_number) + for step in self._steps: + row = step.transform(row) + yield row + + def collect(self) -> list: + """Collects all processed rows into a list.""" + return list(self.run()) + ''' +end diff --git a/test/support/fixtures/ruby/calculator.ex b/test/support/fixtures/ruby/calculator.ex new file mode 100644 index 0000000..df46955 --- /dev/null +++ b/test/support/fixtures/ruby/calculator.ex @@ -0,0 +1,59 @@ +defmodule Test.Fixtures.Ruby.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "ruby calculator" + + @code ~S''' + module Calculable + def abs_val(n) + n < 0 ? -n : n + end + + def clamp(n, min, max) + [[n, min].max, max].min + end + end + + class BasicCalculator + include Calculable + + def add(a, b) + a + b + end + + def subtract(a, b) + a - b + end + + def multiply(a, b) + a * b + end + + def divide(a, b) + raise ArgumentError, "Cannot divide by zero" if b.zero? + a.to_f / b + end + + def power(a, b) + a ** b + end + end + + class ScientificCalculator < BasicCalculator + def sqrt(n) + raise ArgumentError, "Cannot take sqrt of negative number" if n < 0 + Math.sqrt(n) + end + + def log(n, base = 10) + raise ArgumentError, "Logarithm undefined for non-positive values" if n <= 0 + Math.log(n) / Math.log(base) + end + + def factorial(n) + raise ArgumentError, "Factorial undefined for negative numbers" if n < 0 + return 1 if n == 0 + (1..n).reduce(1, :*) + end + end + ''' +end diff --git a/test/support/fixtures/ruby/markdown_renderer.ex b/test/support/fixtures/ruby/markdown_renderer.ex new file mode 100644 index 0000000..2e70d26 --- /dev/null +++ b/test/support/fixtures/ruby/markdown_renderer.ex @@ -0,0 +1,79 @@ +defmodule Test.Fixtures.Ruby.MarkdownRenderer do + @moduledoc false + use Test.LanguageFixture, language: "ruby markdown_renderer" + + @code ~S''' + module Markdown + Token = Struct.new(:type, :content, :level) + end + + module Markdown::Tokenizer + HEADING_RE = /^(#{1,6})\s+(.+)$/ + CODE_BLOCK_RE = /^```(\w*)$/ + BOLD_RE = /\*\*(.+?)\*\*/ + ITALIC_RE = /\*(.+?)\*/ + LINK_RE = /\[(.+?)\]\((.+?)\)/ + + def tokenize_line(line) + case line + when HEADING_RE + Markdown::Token.new(:heading, Regexp.last_match(2), Regexp.last_match(1).length) + when /^\s*[-*]\s+(.+)/ + Markdown::Token.new(:list_item, Regexp.last_match(1), 0) + when /^\s*$/ + Markdown::Token.new(:blank, "", 0) + else + Markdown::Token.new(:paragraph, line, 0) + end + end + + def inline_format(text) + text + .gsub(LINK_RE) { "#{Regexp.last_match(1)}" } + .gsub(BOLD_RE) { "#{Regexp.last_match(1)}" } + .gsub(ITALIC_RE) { "#{Regexp.last_match(1)}" } + end + end + + module Markdown::Renderer + include Markdown::Tokenizer + + def render_token(token) + case token.type + when :heading + "#{inline_format(token.content)}" + when :list_item + "
  • #{inline_format(token.content)}
  • " + when :paragraph + "

    #{inline_format(token.content)}

    " + when :blank + "" + end + end + + def render(markdown) + markdown.lines.map { |line| tokenize_line(line.chomp) }.map { |token| render_token(token) }.reject(&:empty?).join("\n") + end + end + + class Markdown::Document + include Markdown::Renderer + + def initialize(source) + @source = source + end + + def to_html + render(@source) + end + + def word_count + @source.split(/\s+/).length + end + + def heading_count + @source.lines.count { |l| l.match?(HEADING_RE) } + end + end + ''' +end diff --git a/test/support/fixtures/ruby/orm_lite.ex b/test/support/fixtures/ruby/orm_lite.ex new file mode 100644 index 0000000..672b668 --- /dev/null +++ b/test/support/fixtures/ruby/orm_lite.ex @@ -0,0 +1,106 @@ +defmodule Test.Fixtures.Ruby.OrmLite do + @moduledoc false + use Test.LanguageFixture, language: "ruby orm_lite" + + @code ~S''' + module OrmLite + module Persistence + def self.included(base) + base.extend(ClassMethods) + base.instance_variable_set(:@columns, []) + base.instance_variable_set(:@validations, []) + end + + module ClassMethods + def column(name, type = :string) + @columns << { name: name, type: type } + attr_accessor name + end + + def validates(name, **rules) + @validations << { name: name, rules: rules } + end + + def columns + @columns + end + + def validations + @validations + end + + def find(id) + new(id: id) + end + end + + def initialize(attrs = {}) + attrs.each do |key, value| + send(:"#{key}=", value) if respond_to?(:"#{key}=") + end + end + + def valid? + @errors = [] + self.class.validations.each do |v| + value = send(v[:name]) + @errors << "#{v[:name]} can't be blank" if v[:rules][:presence] && (value.nil? || value.to_s.empty?) + @errors << "#{v[:name]} is too short" if v[:rules][:min_length] && value.to_s.length < v[:rules][:min_length] + end + @errors.empty? + end + + def errors + @errors ||= [] + end + + def save + return false unless valid? + true + end + end + + module Associations + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + def has_many(name) + define_method(name) do + [] + end + end + + def belongs_to(name) + attr_accessor :"#{name}_id" + define_method(name) do + nil + end + end + end + end + end + + class User + include OrmLite::Persistence + include OrmLite::Associations + column :name, :string + column :email, :string + column :age, :integer + has_many :posts + validates :name, presence: true, min_length: 2 + validates :email, presence: true + end + + class Post + include OrmLite::Persistence + include OrmLite::Associations + column :title, :string + column :body, :text + belongs_to :user + validates :title, presence: true + validates :body, presence: true + end + ''' +end diff --git a/test/support/fixtures/rust/calculator.ex b/test/support/fixtures/rust/calculator.ex new file mode 100644 index 0000000..a47df57 --- /dev/null +++ b/test/support/fixtures/rust/calculator.ex @@ -0,0 +1,70 @@ +defmodule Test.Fixtures.Rust.Calculator do + @moduledoc false + use Test.LanguageFixture, language: "rust calculator" + + @code ~S''' + trait Calculator { + fn add(&self, a: f64, b: f64) -> f64; + fn subtract(&self, a: f64, b: f64) -> f64; + fn multiply(&self, a: f64, b: f64) -> f64; + fn divide(&self, a: f64, b: f64) -> Option; + } + + struct BasicCalculator; + + impl Calculator for BasicCalculator { + fn add(&self, a: f64, b: f64) -> f64 { + a + b + } + + fn subtract(&self, a: f64, b: f64) -> f64 { + a - b + } + + fn multiply(&self, a: f64, b: f64) -> f64 { + a * b + } + + fn divide(&self, a: f64, b: f64) -> Option { + if b == 0.0 { return None; } + Some(a / b) + } + } + + impl BasicCalculator { + fn new() -> Self { + BasicCalculator + } + + fn power(&self, base: f64, exp: f64) -> f64 { + base.powf(exp) + } + + fn sqrt(&self, n: f64) -> Option { + if n < 0.0 { return None; } + Some(n.sqrt()) + } + + fn abs(&self, n: f64) -> f64 { + n.abs() + } + } + + fn add(a: f64, b: f64) -> f64 { + a + b + } + + fn subtract(a: f64, b: f64) -> f64 { + a - b + } + + fn multiply(a: f64, b: f64) -> f64 { + a * b + } + + fn divide(a: f64, b: f64) -> Option { + if b == 0.0 { return None; } + Some(a / b) + } + ''' +end diff --git a/test/support/fixtures/rust/ring_buffer.ex b/test/support/fixtures/rust/ring_buffer.ex new file mode 100644 index 0000000..eba5a76 --- /dev/null +++ b/test/support/fixtures/rust/ring_buffer.ex @@ -0,0 +1,86 @@ +defmodule Test.Fixtures.Rust.RingBuffer do + @moduledoc false + use Test.LanguageFixture, language: "rust ring_buffer" + + @code ~S''' + struct RingBuffer { + data: Vec>, + head: usize, + tail: usize, + len: usize, + capacity: usize, + } + + impl RingBuffer { + fn new(capacity: usize) -> Self { + let data = (0..capacity).map(|_| None).collect(); + RingBuffer { data, head: 0, tail: 0, len: 0, capacity } + } + + fn push(&mut self, value: T) -> bool { + if self.len == self.capacity { + return false; + } + self.data[self.tail] = Some(value); + self.tail = (self.tail + 1) % self.capacity; + self.len += 1; + true + } + + fn pop(&mut self) -> Option { + if self.len == 0 { + return None; + } + let value = self.data[self.head].take(); + self.head = (self.head + 1) % self.capacity; + self.len -= 1; + value + } + + fn peek(&self) -> Option<&T> { + if self.len == 0 { None } else { self.data[self.head].as_ref() } + } + + fn is_empty(&self) -> bool { + self.len == 0 + } + + fn is_full(&self) -> bool { + self.len == self.capacity + } + + fn len(&self) -> usize { + self.len + } + + fn capacity(&self) -> usize { + self.capacity + } + + fn clear(&mut self) { + for slot in self.data.iter_mut() { + *slot = None; + } + self.head = 0; + self.tail = 0; + self.len = 0; + } + } + + impl RingBuffer { + fn to_vec(&self) -> Vec { + (0..self.len) + .filter_map(|i| self.data[(self.head + i) % self.capacity].clone()) + .collect() + } + } + + fn fill_buffer(items: &[T], capacity: usize) -> RingBuffer { + let mut buf = RingBuffer::new(capacity); + for item in items { + buf.push(item.clone()); + } + buf + } + ''' +end diff --git a/test/support/fixtures/rust/tokenizer.ex b/test/support/fixtures/rust/tokenizer.ex new file mode 100644 index 0000000..0925774 --- /dev/null +++ b/test/support/fixtures/rust/tokenizer.ex @@ -0,0 +1,112 @@ +defmodule Test.Fixtures.Rust.Tokenizer do + @moduledoc false + use Test.LanguageFixture, language: "rust tokenizer" + + @code ~S''' + #[derive(Debug, PartialEq, Clone)] + enum TokenKind { + Number(f64), + Plus, + Minus, + Star, + Slash, + LParen, + RParen, + Eof, + } + + #[derive(Debug, Clone)] + struct Token { + kind: TokenKind, + lexeme: String, + line: usize, + } + + impl Token { + fn new(kind: TokenKind, lexeme: &str, line: usize) -> Self { + Token { kind, lexeme: lexeme.to_string(), line } + } + + fn is_operator(&self) -> bool { + matches!(self.kind, TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash) + } + } + + impl std::fmt::Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}({})", self.kind, self.lexeme) + } + } + + struct Lexer { + source: Vec, + pos: usize, + line: usize, + } + + impl Lexer { + fn new(source: &str) -> Self { + Lexer { source: source.chars().collect(), pos: 0, line: 1 } + } + + fn peek(&self) -> Option { + self.source.get(self.pos).copied() + } + + fn advance(&mut self) -> Option { + let ch = self.source.get(self.pos).copied(); + self.pos += 1; + ch + } + + fn skip_whitespace(&mut self) { + while let Some(c) = self.peek() { + if c == '\n' { self.line += 1; self.pos += 1; } + else if c.is_whitespace() { self.pos += 1; } + else { break; } + } + } + + fn read_number(&mut self) -> Token { + let start = self.pos; + while let Some(c) = self.peek() { + if c.is_ascii_digit() || c == '.' { self.pos += 1; } + else { break; } + } + let lexeme: String = self.source[start..self.pos].iter().collect(); + let value: f64 = lexeme.parse().unwrap_or(0.0); + Token::new(TokenKind::Number(value), &lexeme, self.line) + } + + fn next_token(&mut self) -> Token { + self.skip_whitespace(); + match self.advance() { + Some('+') => Token::new(TokenKind::Plus, "+", self.line), + Some('-') => Token::new(TokenKind::Minus, "-", self.line), + Some('*') => Token::new(TokenKind::Star, "*", self.line), + Some('/') => Token::new(TokenKind::Slash, "/", self.line), + Some('(') => Token::new(TokenKind::LParen, "(", self.line), + Some(')') => Token::new(TokenKind::RParen, ")", self.line), + Some(c) if c.is_ascii_digit() => { self.pos -= 1; self.read_number() } + None => Token::new(TokenKind::Eof, "", self.line), + _ => Token::new(TokenKind::Eof, "", self.line), + } + } + + fn tokenize(&mut self) -> Vec { + let mut tokens = Vec::new(); + loop { + let t = self.next_token(); + let done = t.kind == TokenKind::Eof; + tokens.push(t); + if done { break; } + } + tokens + } + } + + fn tokenize(source: &str) -> Vec { + Lexer::new(source).tokenize() + } + ''' +end diff --git a/test/support/fixtures/scala/actor_messages.ex b/test/support/fixtures/scala/actor_messages.ex new file mode 100644 index 0000000..73f150c --- /dev/null +++ b/test/support/fixtures/scala/actor_messages.ex @@ -0,0 +1,70 @@ +defmodule Test.Fixtures.Scala.ActorMessages do + @moduledoc false + use Test.LanguageFixture, language: "scala actor_messages" + + @code ~S''' + trait Message + + class Request(val id: String, val payload: Map[String, String]) extends Message + + class Response(val id: String, val status: Int, val body: String) extends Message + + class Broadcast(val topic: String, val data: String) extends Message + + class Shutdown(val reason: String) extends Message + + trait ActorState + + class Active(val processedCount: Int) extends ActorState + + class Paused(val since: Long, val reason: String) extends ActorState + + class Stopped(val at: Long) extends ActorState + + trait Behaviour { + def receive(message: Message, state: ActorState): (List[Message], ActorState) + + def onStart(): ActorState + + def onStop(state: ActorState): Unit + } + + class EchoBehaviour extends Behaviour { + def receive(message: Message, state: ActorState): (List[Message], ActorState) = + message match { + case req: Request => + val reply = new Response(req.id, 200, req.payload.mkString(",")) + val newState = state match { + case a: Active => new Active(a.processedCount + 1) + case other => other + } + (List(reply), newState) + case _: Shutdown => (List.empty, new Stopped(System.currentTimeMillis())) + case _ => (List.empty, state) + } + + def onStart(): ActorState = new Active(0) + + def onStop(state: ActorState): Unit = {} + } + + class Supervisor { + private var actors: Map[String, Behaviour] = Map.empty + private var states: Map[String, ActorState] = Map.empty + + def spawn(id: String, behaviour: Behaviour): Unit = { + actors = actors + (id -> behaviour) + states = states + (id -> behaviour.onStart()) + } + + def send(id: String, message: Message): List[Message] = + actors.get(id).map { b => + val (replies, newState) = b.receive(message, states(id)) + states = states + (id -> newState) + replies + }.getOrElse(List.empty) + + def stop(id: String): Unit = actors.get(id).foreach { b => b.onStop(states(id)); actors = actors - id } + } + ''' +end diff --git a/test/support/fixtures/scala/case_class_algebra.ex b/test/support/fixtures/scala/case_class_algebra.ex new file mode 100644 index 0000000..7a2f002 --- /dev/null +++ b/test/support/fixtures/scala/case_class_algebra.ex @@ -0,0 +1,73 @@ +defmodule Test.Fixtures.Scala.CaseClassAlgebra do + @moduledoc false + use Test.LanguageFixture, language: "scala case_class_algebra" + + @code ~S''' + trait Expr + + class Num(val value: Double) extends Expr + + class Add(val left: Expr, val right: Expr) extends Expr + + class Sub(val left: Expr, val right: Expr) extends Expr + + class Mul(val left: Expr, val right: Expr) extends Expr + + class Div(val left: Expr, val right: Expr) extends Expr + + class Neg(val expr: Expr) extends Expr + + trait EvalResult + + class EvalOk(val value: Double) extends EvalResult + + class EvalError(val message: String) extends EvalResult + + trait Evaluator { + def eval(expr: Expr): EvalResult + } + + class SafeEvaluator extends Evaluator { + def eval(expr: Expr): EvalResult = expr match { + case n: Num => new EvalOk(n.value) + case neg: Neg => eval(neg.expr) match { + case ok: EvalOk => new EvalOk(-ok.value) + case err => err + } + case add: Add => combine(add.left, add.right)(_ + _) + case sub: Sub => combine(sub.left, sub.right)(_ - _) + case mul: Mul => combine(mul.left, mul.right)(_ * _) + case div: Div => eval(div.right) match { + case ok: EvalOk if ok.value == 0.0 => new EvalError("Division by zero") + case ok: EvalOk => eval(div.left) match { + case lOk: EvalOk => new EvalOk(lOk.value / ok.value) + case err => err + } + case err => err + } + } + + private def combine(l: Expr, r: Expr)(op: (Double, Double) => Double): EvalResult = + (eval(l), eval(r)) match { + case (lv: EvalOk, rv: EvalOk) => new EvalOk(op(lv.value, rv.value)) + case (err: EvalError, _) => err + case (_, err: EvalError) => err + } + } + + trait Printer { + def print(expr: Expr): String + } + + class InfixPrinter extends Printer { + def print(expr: Expr): String = expr match { + case n: Num => n.value.toString + case neg: Neg => s"-${print(neg.expr)}" + case add: Add => s"(${print(add.left)} + ${print(add.right)})" + case sub: Sub => s"(${print(sub.left)} - ${print(sub.right)})" + case mul: Mul => s"(${print(mul.left)} * ${print(mul.right)})" + case div: Div => s"(${print(div.left)} / ${print(div.right)})" + } + } + ''' +end diff --git a/test/support/fixtures/scala/typeclass_pattern.ex b/test/support/fixtures/scala/typeclass_pattern.ex new file mode 100644 index 0000000..0a14c37 --- /dev/null +++ b/test/support/fixtures/scala/typeclass_pattern.ex @@ -0,0 +1,64 @@ +defmodule Test.Fixtures.Scala.TypeclassPattern do + @moduledoc false + use Test.LanguageFixture, language: "scala typeclass_pattern" + + @code ~S''' + trait Show[A] { + def show(value: A): String + } + + trait Eq[A] { + def eqv(a: A, b: A): Boolean + + def neqv(a: A, b: A): Boolean = !eqv(a, b) + } + + trait Ord[A] extends Eq[A] { + def compare(a: A, b: A): Int + + def lt(a: A, b: A): Boolean = compare(a, b) < 0 + + def lte(a: A, b: A): Boolean = compare(a, b) <= 0 + + def gt(a: A, b: A): Boolean = compare(a, b) > 0 + + def gte(a: A, b: A): Boolean = compare(a, b) >= 0 + + def eqv(a: A, b: A): Boolean = compare(a, b) == 0 + } + + trait Functor[F[_]] { + def map[A, B](fa: F[A])(f: A => B): F[B] + } + + class Identity[A](val value: A) + + class IdentityInstances { + val identityFunctor: Functor[Identity] = new Functor[Identity] { + def map[A, B](fa: Identity[A])(f: A => B): Identity[B] = new Identity(f(fa.value)) + } + + val identityShow: Show[Identity[String]] = new Show[Identity[String]] { + def show(value: Identity[String]): String = s"Identity(${value.value})" + } + } + + class ShowSyntax[A](value: A, ev: Show[A]) { + def show: String = ev.show(value) + } + + class OrdSyntax[A](value: A, ev: Ord[A]) { + def <(other: A): Boolean = ev.lt(value, other) + + def >(other: A): Boolean = ev.gt(value, other) + + def ===(other: A): Boolean = ev.eqv(value, other) + } + + trait Monoid[A] { + def empty: A + + def combine(a: A, b: A): A + } + ''' +end diff --git a/test/support/fixtures/swift/actor_model.ex b/test/support/fixtures/swift/actor_model.ex new file mode 100644 index 0000000..3e65174 --- /dev/null +++ b/test/support/fixtures/swift/actor_model.ex @@ -0,0 +1,81 @@ +defmodule Test.Fixtures.Swift.ActorModel do + @moduledoc false + use Test.LanguageFixture, language: "swift actor_model" + + @code ~S''' + enum ActorMessage { + case ping(replyTo: String) + case pong(from: String) + case shutdown + case updateState(key: String, value: String) + } + + protocol ActorBehaviour { + var id: String { get } + func receive(_ message: ActorMessage) -> [ActorMessage] + func preStart() + func postStop() + } + + struct ActorRef { + let id: String + private let mailbox: [ActorMessage] + + init(id: String) { + self.id = id + self.mailbox = [] + } + } + + class ActorSystem { + private var actors: [String: ActorBehaviour] = [:] + private var mailboxes: [String: [ActorMessage]] = [:] + + func spawn(id: String, behaviour: ActorBehaviour) { + actors[id] = behaviour + mailboxes[id] = [] + behaviour.preStart() + } + + func send(to id: String, message: ActorMessage) { + mailboxes[id, default: []].append(message) + } + + func process(actorId: String) { + guard let actor = actors[actorId] else { return } + let messages = mailboxes[actorId] ?? [] + mailboxes[actorId] = [] + for message in messages { + let replies = actor.receive(message) + for reply in replies { self.processReply(reply) } + } + } + + func stop(actorId: String) { + actors[actorId]?.postStop() + actors.removeValue(forKey: actorId) + mailboxes.removeValue(forKey: actorId) + } + + private func processReply(_ message: ActorMessage) {} + } + + struct StateActor: ActorBehaviour { + let id: String + private var state: [String: String] = [:] + + func receive(_ message: ActorMessage) -> [ActorMessage] { + switch message { + case .ping(let replyTo): return [.pong(from: id)] + case .updateState(let key, let value): return [] + case .shutdown: return [] + default: return [] + } + } + + func preStart() {} + + func postStop() {} + } + ''' +end diff --git a/test/support/fixtures/swift/combine_stream.ex b/test/support/fixtures/swift/combine_stream.ex new file mode 100644 index 0000000..1faf9e4 --- /dev/null +++ b/test/support/fixtures/swift/combine_stream.ex @@ -0,0 +1,66 @@ +defmodule Test.Fixtures.Swift.CombineStream do + @moduledoc false + use Test.LanguageFixture, language: "swift combine_stream" + + @code ~S''' + protocol Publisher { + associatedtype Output + associatedtype Failure: Error + func subscribe(_ subscriber: AnySubscriber) + } + + protocol Subscriber { + associatedtype Input + associatedtype Failure: Error + func receive(_ input: Input) + func receiveCompletion(_ completion: Completion) + } + + enum Completion { + case finished + case failure(Failure) + } + + struct AnySubscriber { + private let receiveValue: (Input) -> Void + private let receiveCompletion: (Completion) -> Void + + init(receiveValue: @escaping (Input) -> Void, receiveCompletion: @escaping (Completion) -> Void) { + self.receiveValue = receiveValue + self.receiveCompletion = receiveCompletion + } + + func receive(_ input: Input) { receiveValue(input) } + + func receiveCompletion(_ completion: Completion) { self.receiveCompletion(completion) } + } + + struct Just: Publisher { + typealias Failure = Never + let value: Output + + func subscribe(_ subscriber: AnySubscriber) { + subscriber.receive(value) + subscriber.receiveCompletion(.finished) + } + } + + struct MapPublisher: Publisher { + typealias Failure = Upstream.Failure + let upstream: Upstream + let transform: (Upstream.Output) -> Output + + func subscribe(_ subscriber: AnySubscriber) { + let mapped = AnySubscriber( + receiveValue: { self.upstream.subscribe(AnySubscriber(receiveValue: { _ in }, receiveCompletion: { _ in })); subscriber.receive(self.transform($0)) }, + receiveCompletion: subscriber.receiveCompletion + ) + upstream.subscribe(mapped) + } + } + + func sink(receiveValue: @escaping (T) -> Void) -> AnySubscriber { + return AnySubscriber(receiveValue: receiveValue, receiveCompletion: { _ in }) + } + ''' +end diff --git a/test/support/fixtures/swift/result_type.ex b/test/support/fixtures/swift/result_type.ex new file mode 100644 index 0000000..5ce1179 --- /dev/null +++ b/test/support/fixtures/swift/result_type.ex @@ -0,0 +1,63 @@ +defmodule Test.Fixtures.Swift.ResultType do + @moduledoc false + use Test.LanguageFixture, language: "swift result_type" + + @code ~S''' + enum ValidationError: Error { + case empty(field: String) + case tooShort(field: String, minimum: Int) + case tooLong(field: String, maximum: Int) + case invalidFormat(field: String, pattern: String) + } + + enum ParseError: Error { + case invalidJSON + case missingField(String) + case typeMismatch(field: String, expected: String) + } + + struct Email { + let value: String + + static func parse(_ raw: String) -> Result { + guard !raw.isEmpty else { return .failure(.empty(field: "email")) } + guard raw.contains("@") else { return .failure(.invalidFormat(field: "email", pattern: "must contain @")) } + return .success(Email(value: raw.lowercased())) + } + } + + struct Username { + let value: String + + static func parse(_ raw: String) -> Result { + guard !raw.isEmpty else { return .failure(.empty(field: "username")) } + guard raw.count >= 3 else { return .failure(.tooShort(field: "username", minimum: 3)) } + guard raw.count <= 32 else { return .failure(.tooLong(field: "username", maximum: 32)) } + return .success(Username(value: raw)) + } + } + + struct UserRegistration { + let email: Email + let username: Username + + static func validate(email rawEmail: String, username rawUsername: String) -> Result { + switch Email.parse(rawEmail) { + case .failure(let e): return .failure(e) + case .success(let email): + switch Username.parse(rawUsername) { + case .failure(let e): return .failure(e) + case .success(let username): return .success(UserRegistration(email: email, username: username)) + } + } + } + } + + func mapResult(_ result: Result, _ transform: (T) -> U) -> Result { + switch result { + case .success(let value): return .success(transform(value)) + case .failure(let error): return .failure(error) + } + } + ''' +end diff --git a/test/support/fixtures/typescript/dependency_injection.ex b/test/support/fixtures/typescript/dependency_injection.ex new file mode 100644 index 0000000..38bb9b8 --- /dev/null +++ b/test/support/fixtures/typescript/dependency_injection.ex @@ -0,0 +1,66 @@ +defmodule Test.Fixtures.TypeScript.DependencyInjection do + @moduledoc false + use Test.LanguageFixture, language: "typescript dependency_injection" + + @code ~S''' + interface Token { + readonly name: string; + } + + interface Provider { + token: Token; + factory: (container: Container) => T; + singleton: boolean; + } + + interface Container { + register(provider: Provider): void; + resolve(token: Token): T; + has(token: Token): boolean; + } + + class DIContainer implements Container { + private providers: Map>; + private singletons: Map; + + constructor() { + this.providers = new Map(); + this.singletons = new Map(); + } + + register(provider: Provider): void { + this.providers.set(provider.token.name, provider as Provider); + } + + resolve(token: Token): T { + const provider = this.providers.get(token.name); + if (!provider) { + throw new Error("No provider registered for token: " + token.name); + } + if (provider.singleton) { + if (!this.singletons.has(token.name)) { + this.singletons.set(token.name, provider.factory(this)); + } + return this.singletons.get(token.name) as T; + } + return provider.factory(this) as T; + } + + has(token: Token): boolean { + return this.providers.has(token.name); + } + } + + function createToken(name: string): Token { + return { name }; + } + + function singleton(token: Token, factory: (c: Container) => T): Provider { + return { token, factory, singleton: true }; + } + + function transient(token: Token, factory: (c: Container) => T): Provider { + return { token, factory, singleton: false }; + } + ''' +end diff --git a/test/support/fixtures/typescript/event_emitter.ex b/test/support/fixtures/typescript/event_emitter.ex new file mode 100644 index 0000000..8f1fed3 --- /dev/null +++ b/test/support/fixtures/typescript/event_emitter.ex @@ -0,0 +1,68 @@ +defmodule Test.Fixtures.TypeScript.EventEmitter do + @moduledoc false + use Test.LanguageFixture, language: "typescript event_emitter" + + @code ~S''' + interface EventMap { + [event: string]: unknown; + } + + interface Listener { + callback: (data: T) => void; + once: boolean; + } + + class EventEmitter { + private listeners: Map>>; + + constructor() { + this.listeners = new Map(); + } + + on(event: K, callback: (data: T[K]) => void): this { + if (!this.listeners.has(event)) { + this.listeners.set(event, []); + } + this.listeners.get(event)!.push({ callback: callback as (data: unknown) => void, once: false }); + return this; + } + + once(event: K, callback: (data: T[K]) => void): this { + if (!this.listeners.has(event)) { + this.listeners.set(event, []); + } + this.listeners.get(event)!.push({ callback: callback as (data: unknown) => void, once: true }); + return this; + } + + off(event: K, callback: (data: T[K]) => void): this { + const list = this.listeners.get(event); + if (list) { + this.listeners.set(event, list.filter(function(l) { return l.callback !== callback; })); + } + return this; + } + + emit(event: K, data: T[K]): boolean { + const list = this.listeners.get(event); + if (!list || list.length === 0) return false; + list.forEach(function(listener) { listener.callback(data); }); + this.listeners.set(event, list.filter(function(l) { return !l.once; })); + return true; + } + + removeAllListeners(event?: keyof T): this { + if (event) { + this.listeners.delete(event); + } else { + this.listeners.clear(); + } + return this; + } + } + + function createEmitter(): EventEmitter { + return new EventEmitter(); + } + ''' +end diff --git a/test/support/fixtures/typescript/user_profile_store.ex b/test/support/fixtures/typescript/user_profile_store.ex new file mode 100644 index 0000000..2242e43 --- /dev/null +++ b/test/support/fixtures/typescript/user_profile_store.ex @@ -0,0 +1,72 @@ +defmodule Test.Fixtures.TypeScript.UserProfileStore do + @moduledoc false + use Test.LanguageFixture, language: "typescript user_profile_store" + + @code ~S''' + interface UserProfile { + id: string; + name: string; + email: string; + role: "admin" | "member" | "guest"; + } + + interface StoreState { + users: Record; + loading: boolean; + error: string | null; + } + + interface Action { + type: string; + payload?: unknown; + } + + class UserProfileStore { + private state: StoreState; + private subscribers: Array<(state: StoreState) => void>; + + constructor() { + this.state = { users: {}, loading: false, error: null }; + this.subscribers = []; + } + + getState(): StoreState { + return this.state; + } + + dispatch(action: Action): void { + this.state = this.reduce(this.state, action); + this.notify(); + } + + subscribe(listener: (state: StoreState) => void): () => void { + this.subscribers.push(listener); + return () => { + this.subscribers = this.subscribers.filter(function(s) { return s !== listener; }); + }; + } + + private reduce(state: StoreState, action: Action): StoreState { + switch (action.type) { + case "SET_LOADING": + return { ...state, loading: action.payload as boolean }; + case "SET_ERROR": + return { ...state, error: action.payload as string }; + case "UPSERT_USER": + const user = action.payload as UserProfile; + return { ...state, users: { ...state.users, [user.id]: user } }; + default: + return state; + } + } + + private notify(): void { + this.subscribers.forEach(function(listener) { listener(this.state); }.bind(this)); + } + } + + function createUserProfileStore(): UserProfileStore { + return new UserProfileStore(); + } + ''' +end diff --git a/test/support/fixtures/zig/allocator_interface.ex b/test/support/fixtures/zig/allocator_interface.ex new file mode 100644 index 0000000..f11d680 --- /dev/null +++ b/test/support/fixtures/zig/allocator_interface.ex @@ -0,0 +1,72 @@ +defmodule Test.Fixtures.Zig.AllocatorInterface do + @moduledoc false + use Test.LanguageFixture, language: "zig allocator_interface" + + @code ~S''' + const Allocator = struct { + ptr: *anyopaque, + vtable: *const VTable, + + pub const VTable = struct { + alloc: *const fn (ctx: *anyopaque, len: usize, alignment: u8) ?[*]u8, + free: *const fn (ctx: *anyopaque, buf: [*]u8, len: usize) void, + resize: *const fn (ctx: *anyopaque, buf: [*]u8, old_len: usize, new_len: usize) bool, + }; + + pub fn alloc(self: Allocator, comptime T: type, n: usize) ![]T { + const ptr = self.vtable.alloc(self.ptr, @sizeOf(T) * n, @alignOf(T)) orelse return error.OutOfMemory; + return @as([*]T, @ptrCast(@alignCast(ptr)))[0..n]; + } + + pub fn free(self: Allocator, slice: anytype) void { + const T = @TypeOf(slice[0]); + self.vtable.free(self.ptr, @as([*]u8, @ptrCast(slice.ptr)), slice.len * @sizeOf(T)); + } + }; + + const ArenaAllocator = struct { + backing: Allocator, + buffer: []u8, + pos: usize, + + pub fn init(backing: Allocator, size: usize) !ArenaAllocator { + const buf = try backing.alloc(u8, size); + return ArenaAllocator{ .backing = backing, .buffer = buf, .pos = 0 }; + } + + pub fn deinit(self: *ArenaAllocator) void { + self.backing.free(self.buffer); + } + + pub fn alloc(self: *ArenaAllocator, comptime T: type, n: usize) ![]T { + const size = @sizeOf(T) * n; + if (self.pos + size > self.buffer.len) return error.OutOfMemory; + const slice = self.buffer[self.pos .. self.pos + size]; + self.pos += size; + return @as([*]T, @ptrCast(@alignCast(slice.ptr)))[0..n]; + } + + pub fn reset(self: *ArenaAllocator) void { + self.pos = 0; + } + }; + + const AllocError = error{ + OutOfMemory, + AlignmentError, + InvalidSize, + }; + + fn alignForward(addr: usize, alignment: usize) usize { + return (addr + alignment - 1) & ~(alignment - 1); + } + + fn isPowerOfTwo(n: usize) bool { + return n > 0 and (n & (n - 1)) == 0; + } + + fn sizeOf(comptime T: type) comptime_int { + return @sizeOf(T); + } + ''' +end diff --git a/test/support/fixtures/zig/iterator_protocol.ex b/test/support/fixtures/zig/iterator_protocol.ex new file mode 100644 index 0000000..52848ef --- /dev/null +++ b/test/support/fixtures/zig/iterator_protocol.ex @@ -0,0 +1,87 @@ +defmodule Test.Fixtures.Zig.IteratorProtocol do + @moduledoc false + use Test.LanguageFixture, language: "zig iterator_protocol" + + @code ~S''' + fn Iterator(comptime T: type) type { + return struct { + const Self = @This(); + pub const Item = T; + ptr: *anyopaque, + nextFn: *const fn (ptr: *anyopaque) ?T, + + pub fn next(self: *Self) ?T { + return self.nextFn(self.ptr); + } + + pub fn count(self: *Self) usize { + var n: usize = 0; + while (self.next() != null) n += 1; + return n; + } + + pub fn forEach(self: *Self, callback: fn (T) void) void { + while (self.next()) |item| callback(item); + } + }; + } + + fn RangeIterator(comptime T: type) type { + return struct { + current: T, + end: T, + step: T, + + pub fn init(start: T, end: T, step: T) @This() { + return .{ .current = start, .end = end, .step = step }; + } + + pub fn next(self: *@This()) ?T { + if (self.current >= self.end) return null; + const value = self.current; + self.current += self.step; + return value; + } + }; + } + + fn SliceIterator(comptime T: type) type { + return struct { + slice: []const T, + index: usize, + + pub fn init(slice: []const T) @This() { + return .{ .slice = slice, .index = 0 }; + } + + pub fn next(self: *@This()) ?T { + if (self.index >= self.slice.len) return null; + const item = self.slice[self.index]; + self.index += 1; + return item; + } + + pub fn reset(self: *@This()) void { + self.index = 0; + } + }; + } + + fn MapIterator(comptime In: type, comptime Out: type) type { + return struct { + inner: SliceIterator(In), + transform: *const fn (In) Out, + + pub fn next(self: *@This()) ?Out { + const item = self.inner.next() orelse return null; + return self.transform(item); + } + }; + } + + fn take(comptime T: type, iter: *SliceIterator(T), n: usize) []const T { + _ = n; + return iter.slice; + } + ''' +end diff --git a/test/support/fixtures/zig/tagged_union.ex b/test/support/fixtures/zig/tagged_union.ex new file mode 100644 index 0000000..fed8b31 --- /dev/null +++ b/test/support/fixtures/zig/tagged_union.ex @@ -0,0 +1,90 @@ +defmodule Test.Fixtures.Zig.TaggedUnion do + @moduledoc false + use Test.LanguageFixture, language: "zig tagged_union" + + @code ~S''' + const TokenKind = enum { + identifier, + integer, + float, + string_literal, + operator, + keyword, + comment, + eof, + }; + + const Token = struct { + kind: TokenKind, + start: usize, + end: usize, + line: u32, + column: u32, + + pub fn length(self: Token) usize { + return self.end - self.start; + } + + pub fn isLiteral(self: Token) bool { + return self.kind == .integer or self.kind == .float or self.kind == .string_literal; + } + }; + + const Value = union(enum) { + int: i64, + float: f64, + boolean: bool, + string: []const u8, + null_value: void, + + pub fn typeName(self: Value) []const u8 { + return switch (self) { + .int => "int", + .float => "float", + .boolean => "boolean", + .string => "string", + .null_value => "null", + }; + } + + pub fn isTruthy(self: Value) bool { + return switch (self) { + .int => |v| v != 0, + .float => |v| v != 0.0, + .boolean => |v| v, + .string => |v| v.len > 0, + .null_value => false, + }; + } + }; + + const ParseError = error{ + UnexpectedToken, + UnexpectedEof, + InvalidLiteral, + StackOverflow, + }; + + fn parseInteger(source: []const u8) !i64 { + var result: i64 = 0; + for (source) |ch| { + if (ch < '0' or ch > '9') return ParseError.InvalidLiteral; + result = result * 10 + @as(i64, ch - '0'); + } + return result; + } + + fn parseFloat(source: []const u8) !f64 { + var result: f64 = 0; + var decimal = false; + var scale: f64 = 1; + for (source) |ch| { + if (ch == '.') { decimal = true; continue; } + if (ch < '0' or ch > '9') return ParseError.InvalidLiteral; + if (decimal) { scale /= 10; result += @as(f64, ch - '0') * scale; } + else { result = result * 10 + @as(f64, ch - '0'); } + } + return result; + } + ''' +end diff --git a/test/support/language_fixture.ex b/test/support/language_fixture.ex new file mode 100644 index 0000000..b83b44a --- /dev/null +++ b/test/support/language_fixture.ex @@ -0,0 +1,61 @@ +defmodule Test.LanguageFixture do + @moduledoc """ + Macro for defining per-language, per-domain code fixtures. + + ## In a fixture module + + defmodule Test.Fixtures.Elixir.EventBus do + use Test.LanguageFixture, language: "elixir event bus" + + @code ~S''' + defmodule EventBus do + ... + end + ''' + end + + ## In a test module + + defmodule MyTest do + Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false) + use Test.Fixtures.Elixir.EventBus + use Test.Fixtures.Python.CsvPipeline + end + """ + + defmacro __using__(opts) do + language = Keyword.fetch!(opts, :language) + + quote do + @language unquote(language) + @before_compile Test.LanguageFixture + end + end + + defmacro __before_compile__(env) do + mod = env.module + code = Module.get_attribute(mod, :code) + language = Module.get_attribute(mod, :language) + block_assertions = Module.get_attribute(mod, :block_assertions) || [] + + unless code do + raise CompileError, + file: env.file, + line: env.line, + description: "#{mod} uses Test.LanguageFixture but @code is not set" + end + + quote do + defmacro __using__(_opts) do + fixture_language = unquote(language) + fixture_code = unquote(code) + fixture_block_assertions = unquote(Macro.escape(block_assertions)) + + quote do + @fixture {unquote(fixture_language), unquote(fixture_code), + unquote(Macro.escape(fixture_block_assertions))} + end + end + end + end +end From fb4135eedcda1a3e8a79ae6ed7fc3d5826c3cd26 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:26:16 +0100 Subject: [PATCH 05/71] feat(analysis): add OTP-based analysis run context and servers Introduce supervised GenServer processes for managing analysis runs: BehaviorConfigServer, FileContextServer, FileMetricsServer, RunContext, and RunSupervisor. Enables concurrent per-file metric collection with shared configuration state. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/analysis/behavior_config_server.ex | 117 ++++++++++++++++++ lib/codeqa/analysis/file_context_server.ex | 85 +++++++++++++ lib/codeqa/analysis/file_metrics_server.ex | 107 ++++++++++++++++ lib/codeqa/analysis/run_context.ex | 16 +++ lib/codeqa/analysis/run_supervisor.ex | 54 ++++++++ 5 files changed, 379 insertions(+) create mode 100644 lib/codeqa/analysis/behavior_config_server.ex create mode 100644 lib/codeqa/analysis/file_context_server.ex create mode 100644 lib/codeqa/analysis/file_metrics_server.ex create mode 100644 lib/codeqa/analysis/run_context.ex create mode 100644 lib/codeqa/analysis/run_supervisor.ex diff --git a/lib/codeqa/analysis/behavior_config_server.ex b/lib/codeqa/analysis/behavior_config_server.ex new file mode 100644 index 0000000..f526b20 --- /dev/null +++ b/lib/codeqa/analysis/behavior_config_server.ex @@ -0,0 +1,117 @@ +defmodule CodeQA.Analysis.BehaviorConfigServer do + @moduledoc """ + Per-run GenServer that loads all YAML behavior configs once and serves them + from an anonymous ETS table. + + Eliminates repeated disk reads in `SampleRunner.diagnose_aggregate/2` by + loading `priv/combined_metrics/*.yml` on startup and keeping data in memory + for the duration of the analysis run. + + ETS layout: `{category, behavior} => behavior_data` + where `behavior_data` is the raw YAML map for that behavior. + """ + + use GenServer + + @yaml_dir "priv/combined_metrics" + + # --- Public API --- + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts) + end + + @doc "Returns the ETS table id. Callers may read directly from it." + @spec get_tid(pid()) :: :ets.tid() + def get_tid(pid), do: GenServer.call(pid, :get_tid) + + @doc """ + Returns all behaviors grouped by category. + + %{"function_design" => [{"no_boolean_parameter", behavior_data}, ...], ...} + """ + @spec get_all_behaviors(pid()) :: %{String.t() => [{String.t(), map()}]} + def get_all_behaviors(pid) do + tid = get_tid(pid) + + tid + |> :ets.tab2list() + |> Enum.reduce(%{}, fn {{cat, beh}, data}, acc -> + Map.update(acc, cat, [{beh, data}], &[{beh, data} | &1]) + end) + end + + @doc "Returns the scalar weight map for a given category + behavior." + @spec get_scalars(pid(), String.t(), String.t()) :: %{{String.t(), String.t()} => float()} + def get_scalars(pid, category, behavior) do + tid = get_tid(pid) + + case :ets.lookup(tid, {category, behavior}) do + [{_, data}] -> scalars_from_behavior_data(data) + [] -> %{} + end + end + + @doc "Returns the `_log_baseline` value for a given category + behavior." + @spec get_log_baseline(pid(), String.t(), String.t()) :: float() + def get_log_baseline(pid, category, behavior) do + tid = get_tid(pid) + + case :ets.lookup(tid, {category, behavior}) do + [{_, data}] -> Map.get(data, "_log_baseline", 0.0) / 1.0 + [] -> 0.0 + end + end + + # --- GenServer callbacks --- + + @impl true + def init(_opts) do + tid = :ets.new(:behavior_config, [:set, :public, read_concurrency: true]) + load_configs(tid) + {:ok, %{tid: tid}} + end + + @impl true + def handle_call(:get_tid, _from, state) do + {:reply, state.tid, state} + end + + # --- Private helpers --- + + defp load_configs(tid) do + case File.ls(@yaml_dir) do + {:ok, files} -> + files + |> Enum.filter(&String.ends_with?(&1, ".yml")) + |> Enum.each(fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, data} = YamlElixir.read_from_file(yaml_path) + + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.each(fn {behavior, behavior_data} -> + :ets.insert(tid, {{category, behavior}, behavior_data}) + end) + end) + + {:error, _} -> + :ok + end + end + + @doc false + def scalars_from_behavior_data(behavior_data) do + behavior_data + |> Enum.flat_map(fn + {group, keys} when is_map(keys) -> + Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end) + + _ -> + [] + end) + |> Map.new() + end +end diff --git a/lib/codeqa/analysis/file_context_server.ex b/lib/codeqa/analysis/file_context_server.ex new file mode 100644 index 0000000..28f9670 --- /dev/null +++ b/lib/codeqa/analysis/file_context_server.ex @@ -0,0 +1,85 @@ +defmodule CodeQA.Analysis.FileContextServer do + @moduledoc """ + Per-run GenServer that memoizes `Pipeline.build_file_context/2` by + `{MD5(content), language_name}`. + + Cache key includes the resolved language name because different languages + produce different keyword/operator sets, yielding different identifiers from + the same content. + + ETS layout: `{md5_binary, language_name} => FileContext.t()` + + On a cache miss, the calling process builds the context directly and inserts + it into the shared ETS table — no GenServer mailbox round-trip for the + computation itself. + """ + + use GenServer + + alias CodeQA.Engine.{FileContext, Pipeline} + + # --- Public API --- + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts) + end + + @doc "Returns the ETS table id. Callers may read directly from it." + @spec get_tid(pid()) :: :ets.tid() + def get_tid(pid), do: GenServer.call(pid, :get_tid) + + @doc """ + Returns a cached (or freshly built) `FileContext` for `content`. + + The language is resolved from `opts` (`:language` or `:path`); defaults to + `Unknown`, consistent with how `FileImpact` calls `build_file_context/2`. + """ + @spec get(pid(), String.t(), keyword()) :: FileContext.t() + def get(pid, content, opts \\ []) do + tid = get_tid(pid) + language_name = resolve_language_name(opts) + key = {md5(content), language_name} + + case :ets.lookup(tid, key) do + [{_, ctx}] -> + ctx + + [] -> + ctx = Pipeline.build_file_context(content, opts) + :ets.insert(tid, {key, ctx}) + ctx + end + end + + # --- GenServer callbacks --- + + @impl true + def init(_opts) do + tid = :ets.new(:file_context, [:set, :public, read_concurrency: true]) + {:ok, %{tid: tid}} + end + + @impl true + def handle_call(:get_tid, _from, state) do + {:reply, state.tid, state} + end + + # --- Private helpers --- + + defp md5(content), do: :crypto.hash(:md5, content) + + defp resolve_language_name(opts) do + cond do + lang = Keyword.get(opts, :language) -> + mod = CodeQA.Language.find(lang) || CodeQA.Languages.Unknown + mod.name() + + path = Keyword.get(opts, :path) -> + CodeQA.Language.detect(path).name() + + true -> + CodeQA.Languages.Unknown.name() + end + end +end diff --git a/lib/codeqa/analysis/file_metrics_server.ex b/lib/codeqa/analysis/file_metrics_server.ex new file mode 100644 index 0000000..579a63d --- /dev/null +++ b/lib/codeqa/analysis/file_metrics_server.ex @@ -0,0 +1,107 @@ +defmodule CodeQA.Analysis.FileMetricsServer do + @moduledoc """ + Per-run GenServer that caches `Registry.run_file_metrics/2` results. + + Pre-populated from `pipeline_result` before block analysis starts so baseline + metrics are served directly from ETS without recomputation. + + ETS layout: + - `{:path, path}` => metrics map (baseline for existing files) + - `{:hash, md5_binary}` => metrics map (computed on demand for reconstructed content) + """ + + use GenServer + + alias CodeQA.Engine.Pipeline + alias CodeQA.Engine.Registry + + # --- Public API --- + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts) + end + + @doc "Returns the ETS table id. Callers may read directly from it." + @spec get_tid(pid()) :: :ets.tid() + def get_tid(pid), do: GenServer.call(pid, :get_tid) + + @doc """ + Bulk-inserts all baseline metrics from `pipeline_result` and cross-indexes by + content hash for each path present in `files_map`. + + Call once after starting the supervisor, before beginning block analysis. + """ + @spec populate(pid(), map(), map()) :: :ok + def populate(pid, pipeline_result, files_map) do + tid = get_tid(pid) + files_data = Map.get(pipeline_result, "files", %{}) + + Enum.each(files_data, fn {path, file_data} -> + metrics = Map.get(file_data, "metrics", %{}) + :ets.insert(tid, {{:path, path}, metrics}) + end) + + Enum.each(files_map, fn {path, content} -> + hash = md5(content) + + case :ets.lookup(tid, {:path, path}) do + [{_, metrics}] -> :ets.insert(tid, {{:hash, hash}, metrics}) + [] -> :ok + end + end) + + :ok + end + + @doc "Returns pre-populated baseline metrics for `path`, or `nil` if not found." + @spec get_by_path(pid(), String.t()) :: map() | nil + def get_by_path(pid, path) do + tid = get_tid(pid) + + case :ets.lookup(tid, {:path, path}) do + [{_, metrics}] -> metrics + [] -> nil + end + end + + @doc """ + Returns metrics for `content`, using the hash cache. + + On a cache miss, builds the file context and runs metrics in the calling + process, then inserts the result into ETS for future lookups. + """ + @spec get_for_content(pid(), Registry.t(), String.t(), keyword()) :: map() + def get_for_content(pid, registry, content, opts \\ []) do + tid = get_tid(pid) + hash = md5(content) + + case :ets.lookup(tid, {:hash, hash}) do + [{_, metrics}] -> + metrics + + [] -> + ctx = Pipeline.build_file_context(content, opts) + metrics = Registry.run_file_metrics(registry, ctx) + :ets.insert(tid, {{:hash, hash}, metrics}) + metrics + end + end + + # --- GenServer callbacks --- + + @impl true + def init(_opts) do + tid = :ets.new(:file_metrics, [:set, :public, read_concurrency: true]) + {:ok, %{tid: tid}} + end + + @impl true + def handle_call(:get_tid, _from, state) do + {:reply, state.tid, state} + end + + # --- Private helpers --- + + defp md5(content), do: :crypto.hash(:md5, content) +end diff --git a/lib/codeqa/analysis/run_context.ex b/lib/codeqa/analysis/run_context.ex new file mode 100644 index 0000000..e9e4aae --- /dev/null +++ b/lib/codeqa/analysis/run_context.ex @@ -0,0 +1,16 @@ +defmodule CodeQA.Analysis.RunContext do + @moduledoc """ + Holds PIDs for the per-run GenServers started under `RunSupervisor`. + + Passed through the analysis call chain so all callers can access + cached state without named process registration. + """ + + defstruct [:behavior_config_pid, :file_context_pid, :file_metrics_pid] + + @type t :: %__MODULE__{ + behavior_config_pid: pid(), + file_context_pid: pid(), + file_metrics_pid: pid() + } +end diff --git a/lib/codeqa/analysis/run_supervisor.ex b/lib/codeqa/analysis/run_supervisor.ex new file mode 100644 index 0000000..687e58c --- /dev/null +++ b/lib/codeqa/analysis/run_supervisor.ex @@ -0,0 +1,54 @@ +defmodule CodeQA.Analysis.RunSupervisor do + @moduledoc """ + One-shot supervisor for the per-analysis-run GenServers. + + Started at the top of `BlockImpactAnalyzer.analyze/3` and stopped (via + `Supervisor.stop/1`) in an `after` block when the run completes. + + Servers are not registered by name, preventing collisions when multiple + analysis runs share the same BEAM node (e.g. parallel tests). + """ + + use Supervisor + + alias CodeQA.Analysis.{BehaviorConfigServer, FileContextServer, FileMetricsServer, RunContext} + + @spec start_link(keyword()) :: Supervisor.on_start() + def start_link(opts \\ []) do + Supervisor.start_link(__MODULE__, opts) + end + + @doc """ + Queries child PIDs from `sup` and returns a `RunContext` struct. + + Call once after `start_link/1` succeeds, before beginning analysis. + """ + @spec run_context(pid()) :: RunContext.t() + def run_context(sup) do + children = Supervisor.which_children(sup) + + %RunContext{ + behavior_config_pid: find_pid(children, BehaviorConfigServer), + file_context_pid: find_pid(children, FileContextServer), + file_metrics_pid: find_pid(children, FileMetricsServer) + } + end + + @impl true + def init(_opts) do + children = [ + {BehaviorConfigServer, []}, + {FileContextServer, []}, + {FileMetricsServer, []} + ] + + Supervisor.init(children, strategy: :one_for_one) + end + + defp find_pid(children, module) do + {_id, pid, _type, _modules} = + Enum.find(children, fn {id, _pid, _type, _modules} -> id == module end) + + pid + end +end From 110ffd2eef908ee9dfd70032cd4e2711c852d854 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:26:30 +0100 Subject: [PATCH 06/71] feat(combined-metrics): add combined metrics scoring framework Add a YAML-driven scoring framework that evaluates code quality across categories (code smells, consistency, dependencies, documentation, error handling, file structure, function design, naming conventions, etc.). Each category loads behaviors from YAML config with per-language sample validation. Includes FileSCorer, Scorer, SampleRunner, and mix tasks for running sample reports and debugging signals. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/category.ex | 38 + lib/codeqa/combined_metrics/code_smells.ex | 28 + lib/codeqa/combined_metrics/consistency.ex | 29 + lib/codeqa/combined_metrics/dependencies.ex | 28 + lib/codeqa/combined_metrics/documentation.ex | 28 + lib/codeqa/combined_metrics/error_handling.ex | 28 + lib/codeqa/combined_metrics/file_scorer.ex | 97 ++ lib/codeqa/combined_metrics/file_structure.ex | 28 + .../combined_metrics/function_design.ex | 28 + .../combined_metrics/naming_conventions.ex | 30 + lib/codeqa/combined_metrics/sample_runner.ex | 499 +++++++ .../combined_metrics/scope_and_assignment.ex | 28 + lib/codeqa/combined_metrics/scorer.ex | 83 ++ lib/codeqa/combined_metrics/testing.ex | 28 + lib/codeqa/combined_metrics/type_and_value.ex | 28 + .../combined_metrics/variable_naming.ex | 28 + lib/mix/tasks/codeqa/sample_report.ex | 192 +++ lib/mix/tasks/codeqa/signal_debug.ex | 183 +++ priv/combined_metrics/code_smells.yml | 540 ++++++++ priv/combined_metrics/consistency.yml | 310 +++++ priv/combined_metrics/dependencies.yml | 319 +++++ priv/combined_metrics/documentation.yml | 634 +++++++++ priv/combined_metrics/error_handling.yml | 325 +++++ priv/combined_metrics/file_structure.yml | 503 +++++++ priv/combined_metrics/function_design.yml | 820 +++++++++++ priv/combined_metrics/naming_conventions.yml | 266 ++++ .../bad/notifications.ex | 84 ++ .../consistent_string_quote_style/config.yml | 1 + .../good/notifications.ex | 82 ++ .../bad/fetcher.go | 64 + .../good/fetcher.go | 61 + .../bad/worker.rs | 51 + .../good/worker.rs | 51 + .../bad/cache.rs | 59 + .../good/cache.rs | 61 + .../bad/buffer.rs | 44 + .../good/buffer.rs | 61 + .../bad/registry.rs | 56 + .../good/registry.rs | 52 + .../bad/pipeline.rs | 53 + .../good/pipeline.rs | 48 + .../bad/queue.rs | 52 + .../good/queue.rs | 50 + .../bad/importer.go | 53 + .../good/importer.go | 58 + .../bad/worker.go | 45 + .../good/worker.go | 52 + .../bad/DocumentStorage.cs | 63 + .../good/DocumentStorage.cs | 60 + .../bad/price_calculator.py | 47 + .../good/price_calculator.py | 64 + .../bad/shipping.ex | 67 + .../good/shipping.ex | 61 + .../bad/matrix_utils.js | 59 + .../good/matrix_utils.js | 53 + .../bad/notification_handler.ts | 67 + .../good/notification_handler.ts | 64 + .../no_auto_ptr/bad/Connection.cpp | 73 + .../no_auto_ptr/good/Connection.cpp | 73 + .../bad/NotificationService.cs | 74 + .../good/NotificationService.cs | 61 + .../no_class_variables/bad/account.rb | 46 + .../no_class_variables/good/account.rb | 47 + .../no_const_enum/bad/product_repository.ts | 70 + .../no_const_enum/good/product_repository.ts | 69 + .../bad/order_service.ex | 96 ++ .../no_dead_code_after_return/config.yml | 1 + .../good/order_service.ex | 58 + .../no_debug_print_statements/bad/payment.ex | 97 ++ .../no_debug_print_statements/config.yml | 1 + .../no_debug_print_statements/good/payment.ex | 66 + .../no_double_negation/bad/cart.rb | 45 + .../no_double_negation/good/cart.rb | 44 + .../bad/DatabaseConnection.java | 67 + .../good/DatabaseConnection.java | 77 ++ .../no_fixme_comments/bad/importer.ex | 77 ++ .../code_smells/no_fixme_comments/config.yml | 1 + .../no_fixme_comments/good/importer.ex | 88 ++ .../bad/analytics_tracker.js | 59 + .../good/analytics_tracker.js | 68 + .../bad/billing.ex | 58 + .../good/billing.ex | 53 + .../bad/task_runner.py | 61 + .../good/task_runner.py | 76 ++ .../no_long_parameter_list/bad/accounts.ex | 62 + .../no_long_parameter_list/good/accounts.ex | 66 + .../bad/OrderService.php | 57 + .../good/OrderService.php | 63 + .../no_namespace_trespassing/bad/catalog.ex | 58 + .../no_namespace_trespassing/good/catalog.ex | 61 + .../bad/subscription.rb | 39 + .../good/subscription.rb | 48 + .../no_nested_ternary/bad/pricing.ex | 105 ++ .../code_smells/no_nested_ternary/config.yml | 1 + .../no_nested_ternary/good/pricing.ex | 54 + .../bad/form_validator.js | 68 + .../good/form_validator.js | 64 + .../no_private_inheritance/bad/Connection.cpp | 75 + .../good/Connection.cpp | 75 + .../bad/Widget.cpp | 58 + .../good/Widget.cpp | 72 + .../bad/inventory.ex | 71 + .../good/inventory.ex | 58 + .../bad/data_store.js | 63 + .../good/data_store.js | 67 + .../bad/UserRepository.php | 58 + .../good/UserRepository.php | 69 + .../bad/FileImportService.kt | 54 + .../good/FileImportService.kt | 51 + .../bad/UserRepository.php | 55 + .../good/UserRepository.php | 84 ++ .../bad/order_service.ts | 68 + .../good/order_service.ts | 46 + .../bad/Matrix.cpp | 75 + .../good/Matrix.cpp | 72 + .../bad/Widget.cpp | 72 + .../good/Widget.cpp | 77 ++ .../no_with_statement/bad/report_builder.js | 54 + .../no_with_statement/good/report_builder.js | 49 + .../bad/counter.go | 39 + .../good/counter.go | 55 + .../bad/Parser.cpp | 97 ++ .../good/Parser.cpp | 93 ++ .../bad/analytics.ex | 65 + .../good/analytics.ex | 74 + .../bad/notification_router.js | 58 + .../good/notification_router.js | 52 + .../bad/AnalyticsProcessor.kt | 54 + .../good/AnalyticsProcessor.kt | 54 + .../bad/pool.rs | 52 + .../good/pool.rs | 55 + .../bad/payment.rb | 58 + .../good/payment.rb | 39 + .../bad/database_manager.py | 62 + .../good/database_manager.py | 67 + .../uses_copied_on_copy_types/bad/metrics.rs | 48 + .../uses_copied_on_copy_types/good/metrics.rs | 47 + .../bad/ProductCatalog.php | 62 + .../good/ProductCatalog.php | 66 + .../bad/UserProfile.swift | 60 + .../good/UserProfile.swift | 57 + .../bad/server.go | 46 + .../good/server.go | 58 + .../bad/AccessPolicy.cs | 56 + .../good/AccessPolicy.cs | 54 + .../bad/Buffer.cpp | 77 ++ .../good/Buffer.cpp | 63 + .../bad/geometry.rs | 51 + .../good/geometry.rs | 55 + .../bad/HtmlRenderer.cs | 61 + .../good/HtmlRenderer.cs | 72 + .../bad/DataSyncService.kt | 47 + .../good/DataSyncService.kt | 49 + .../bad/ReportExporter.cs | 81 ++ .../good/ReportExporter.cs | 66 + .../bad/PaymentGateway.php | 70 + .../good/PaymentGateway.php | 73 + .../bad/OrderService.php | 70 + .../good/OrderService.php | 79 ++ .../bad/analytics.ex | 78 ++ .../consistent_casing_within_file/config.yml | 1 + .../good/analytics.ex | 78 ++ .../bad/accounts.ex | 102 ++ .../consistent_error_return_shape/config.yml | 1 + .../good/accounts.ex | 94 ++ .../bad/formatter.ex | 66 + .../consistent_function_style/config.yml | 1 + .../good/formatter.ex | 96 ++ .../bad/subscriptions.ex | 58 + .../good/subscriptions.ex | 67 + .../bad/PaymentProcessor.java | 62 + .../good/PaymentProcessor.java | 59 + .../bad/UserRepository.java | 88 ++ .../good/UserRepository.java | 92 ++ .../bad/sessions.ex | 60 + .../good/sessions.ex | 76 ++ .../bad/PaymentProcessor.swift | 53 + .../good/PaymentProcessor.swift | 51 + .../same_concept_same_name/bad/auth.ex | 102 ++ .../same_concept_same_name/config.yml | 1 + .../same_concept_same_name/good/auth.ex | 102 ++ .../bad/ProductCatalog.java | 63 + .../good/ProductCatalog.java | 62 + .../bad/OrderService.php | 71 + .../good/OrderService.php | 78 ++ .../import_count_under_10/bad/dashboard.ex | 80 ++ .../import_count_under_10/config.yml | 1 + .../import_count_under_10/good/dashboard.ex | 80 ++ .../low_coupling/bad/order_controller.ex | 91 ++ .../dependencies/low_coupling/config.yml | 1 + .../low_coupling/good/order_controller.ex | 57 + .../no_default_exports/bad/payment_gateway.ts | 68 + .../good/payment_gateway.ts | 63 + .../bad/product_repository.ts | 46 + .../good/product_repository.ts | 57 + .../no_wildcard_imports/bad/query_helpers.ex | 74 + .../no_wildcard_imports/config.yml | 1 + .../no_wildcard_imports/good/query_helpers.ex | 73 + .../bad/user_service.ts | 47 + .../good/user_service.ts | 47 + .../docstring_is_nonempty/bad/cache.ex | 78 ++ .../docstring_is_nonempty/config.yml | 1 + .../docstring_is_nonempty/good/cache.ex | 96 ++ .../doctests_validate_examples/bad/billing.ex | 59 + .../good/billing.ex | 73 + .../bad/ShippingService.swift | 57 + .../good/ShippingService.swift | 85 ++ .../bad/cache.go | 67 + .../good/cache.go | 72 + .../file_has_license_header/bad/core.ex | 74 + .../file_has_license_header/config.yml | 1 + .../file_has_license_header/good/core.ex | 76 ++ .../file_has_module_docstring/bad/shipping.ex | 73 + .../file_has_module_docstring/config.yml | 1 + .../good/shipping.ex | 88 ++ .../bad/legacy_importer.ex | 91 ++ .../file_has_no_commented_out_code/config.yml | 1 + .../good/legacy_importer.ex | 83 ++ .../function_has_docstring/bad/tax.ex | 73 + .../function_has_docstring/config.yml | 1 + .../function_has_docstring/good/tax.ex | 105 ++ .../function_todo_comment_in_body/bad/sync.ex | 95 ++ .../function_todo_comment_in_body/config.yml | 1 + .../good/sync.ex | 104 ++ .../bad/payments.ex | 81 ++ .../good/payments.ex | 109 ++ .../bad/file_importer.py | 66 + .../good/file_importer.py | 69 + .../bad/PaymentGateway.php | 62 + .../good/PaymentGateway.php | 62 + .../bad/processor.go | 61 + .../good/processor.go | 63 + .../bad/validator_test.rs | 50 + .../good/validator_test.rs | 50 + .../bad/FileImporter.swift | 58 + .../good/FileImporter.swift | 66 + .../bad/OrderProcessor.cs | 73 + .../good/OrderProcessor.cs | 68 + .../does_not_discard_errors/bad/handler.go | 50 + .../does_not_discard_errors/good/handler.go | 60 + .../bad/gateway.go | 50 + .../good/gateway.go | 50 + .../bad/NetworkClient.swift | 57 + .../good/NetworkClient.swift | 66 + .../bad/UserRepository.cs | 98 ++ .../good/UserRepository.cs | 78 ++ .../bad/file_processor.ex | 85 ++ .../does_not_swallow_errors/config.yml | 1 + .../good/file_processor.ex | 98 ++ .../bad/event_bus.js | 61 + .../good/event_bus.js | 66 + .../bad/OrderService.java | 69 + .../good/OrderService.java | 73 + .../bad/FileProcessor.cs | 81 ++ .../good/FileProcessor.cs | 84 ++ .../bad/ProductCatalog.cs | 94 ++ .../good/ProductCatalog.cs | 67 + .../bad/DataLoader.swift | 49 + .../good/DataLoader.swift | 60 + .../bad/store.go | 53 + .../good/store.go | 56 + .../bad/client.rs | 48 + .../good/client.rs | 64 + .../bad/billing.ex | 83 ++ .../error_message_is_descriptive/config.yml | 1 + .../good/billing.ex | 93 ++ .../bad/service.go | 54 + .../good/service.go | 51 + .../error_type_includes_context/bad/client.go | 47 + .../good/client.go | 56 + .../no_bare_except/bad/api_client.py | 60 + .../no_bare_except/good/api_client.py | 71 + .../no_blind_rescue/bad/cart.rb | 45 + .../no_blind_rescue/good/cart.rb | 53 + .../no_empty_rescue_block/bad/payment.rb | 42 + .../no_empty_rescue_block/good/payment.rb | 49 + .../bad/ProductCatalog.php | 62 + .../good/ProductCatalog.php | 77 ++ .../bad/template_engine.js | 48 + .../good/template_engine.js | 58 + .../bad/billing.ex | 68 + .../good/billing.ex | 84 ++ .../no_floating_promises/bad/order_service.js | 55 + .../no_floating_promises/bad/user_service.ts | 68 + .../good/order_service.js | 59 + .../no_floating_promises/good/user_service.ts | 86 ++ .../bad/product_repository.ts | 64 + .../good/product_repository.ts | 55 + .../no_rescue_as_flow_control/bad/invoice.rb | 65 + .../no_rescue_as_flow_control/good/invoice.rb | 59 + .../no_return_from_ensure/bad/subscription.rb | 49 + .../good/subscription.rb | 52 + .../rescue_most_specific_first/bad/account.rb | 55 + .../good/account.rb | 57 + .../returns_typed_error/bad/repository.ex | 92 ++ .../returns_typed_error/config.yml | 1 + .../returns_typed_error/good/repository.ex | 96 ++ .../bad/api_client.js | 64 + .../good/api_client.js | 69 + .../bad/payment_processor.py | 73 + .../good/payment_processor.py | 78 ++ .../uses_checked_arithmetic/bad/invoice.rs | 41 + .../uses_checked_arithmetic/good/invoice.rs | 69 + .../bad/store.rs | 51 + .../good/store.rs | 70 + .../bad/router.go | 44 + .../good/router.go | 46 + .../bad/middleware.go | 42 + .../good/middleware.go | 42 + .../bad/OrderService.php | 66 + .../good/OrderService.php | 72 + .../bad/loader.rs | 58 + .../good/loader.rs | 63 + .../uses_raise_from/bad/config_loader.py | 68 + .../uses_raise_from/good/config_loader.py | 72 + .../bad/parser.rs | 50 + .../good/parser.rs | 74 + .../bad/AuthService.swift | 65 + .../good/AuthService.swift | 70 + .../bad/repository.go | 50 + .../good/repository.go | 49 + .../has_consistent_indentation/bad/worker.ex | 73 + .../has_consistent_indentation/config.yml | 1 + .../has_consistent_indentation/good/worker.ex | 67 + .../bad/Parser.cpp | 121 ++ .../good/Parser.cpp | 116 ++ .../line_count_under_300/bad/mega_service.ex | 130 ++ .../line_count_under_300/config.yml | 1 + .../good/order_service.ex | 68 + .../bad/query_builder.ex | 46 + .../line_length_under_120/config.yml | 1 + .../good/query_builder.ex | 89 ++ .../no_magic_numbers/bad/rate_limiter.ex | 91 ++ .../no_magic_numbers/config.yml | 1 + .../no_magic_numbers/good/rate_limiter.ex | 114 ++ .../bad/InvoiceService.java | 55 + .../good/InvoiceService.java | 63 + .../single_responsibility/bad/user_handler.ex | 110 ++ .../single_responsibility/config.yml | 1 + .../good/user_registration.ex | 53 + .../bad/mailer.ex | 86 ++ .../config.yml | 1 + .../good/mailer.ex | 88 ++ .../bad/user_service.ts | 68 + .../good/user_service.ts | 61 + .../bad/payment_gateway.ts | 69 + .../good/payment_gateway.ts | 75 + .../bad/EmailDispatcher.cs | 59 + .../good/EmailDispatcher.cs | 59 + .../bad/guards.ex | 41 + .../config.yml | 1 + .../good/guards.ex | 41 + .../bad/EventHandler.swift | 56 + .../good/EventHandler.swift | 56 + .../context_is_first_parameter/bad/mailer.go | 60 + .../context_is_first_parameter/good/mailer.go | 57 + .../bad/order_processor.ex | 52 + .../cyclomatic_complexity_under_10/config.yml | 1 + .../good/order_processor.ex | 50 + .../bad/EmailComposer.swift | 64 + .../good/EmailComposer.swift | 64 + .../bad/reports.ex | 64 + .../good/reports.ex | 69 + .../has_verb_in_name/bad/api.ex | 54 + .../has_verb_in_name/config.yml | 1 + .../has_verb_in_name/good/api.ex | 54 + .../bad/Matrix.cpp | 64 + .../good/Matrix.cpp | 53 + .../bad/storage.go | 43 + .../good/storage.go | 53 + .../bad/report_generator.ex | 53 + .../is_less_than_20_lines/config.yml | 1 + .../good/report_generator.ex | 55 + .../bad/Buffer.cpp | 82 ++ .../good/Buffer.cpp | 79 ++ .../bad/parser.go | 56 + .../good/parser.go | 58 + .../nesting_depth_under_4/bad/validator.ex | 59 + .../nesting_depth_under_4/config.yml | 1 + .../nesting_depth_under_4/good/validator.ex | 49 + .../no_arguments_object/bad/query_builder.js | 57 + .../no_arguments_object/good/query_builder.js | 49 + .../bad/BackgroundSync.cs | 61 + .../good/BackgroundSync.cs | 74 + .../no_boolean_parameter/bad/notifications.ex | 49 + .../no_boolean_parameter/config.yml | 1 + .../good/notifications.ex | 52 + .../bad/Widget.cpp | 63 + .../good/Widget.cpp | 56 + .../no_magic_numbers/bad/discounter.ex | 58 + .../no_magic_numbers/config.yml | 1 + .../no_magic_numbers/good/discounter.ex | 54 + .../bad/report_builder.py | 60 + .../good/report_builder.py | 72 + .../bad/notification_sender.py | 48 + .../good/notification_sender.py | 78 ++ .../bad/date_utils.ts | 62 + .../good/date_utils.ts | 58 + .../bad/string_utils.js | 52 + .../good/string_utils.js | 48 + .../bad/notifications.ex | 51 + .../good/notifications.ex | 46 + .../bad/email_sender.ex | 35 + .../parameter_count_under_4/config.yml | 1 + .../good/email_sender.ex | 48 + .../bad/validator.go | 54 + .../good/validator.go | 53 + .../bad/notification_handler.ts | 63 + .../good/notification_handler.ts | 62 + .../bad/Parser.cpp | 76 ++ .../good/Parser.cpp | 68 + .../uses_assertive_map_access/bad/shipping.ex | 71 + .../good/shipping.ex | 80 ++ .../bad/inventory.ex | 62 + .../good/inventory.ex | 67 + .../bad/EmailSender.kt | 48 + .../good/EmailSender.kt | 49 + .../uses_defer_for_cleanup/bad/filestore.go | 75 + .../uses_defer_for_cleanup/good/filestore.go | 68 + .../bad/item_processor.py | 64 + .../good/item_processor.py | 64 + .../bad/data_pipeline.py | 52 + .../good/data_pipeline.py | 60 + .../bad/invoice.rb | 62 + .../good/invoice.rb | 58 + .../bad/SubscriptionService.kt | 39 + .../good/SubscriptionService.kt | 57 + .../bad/orders.ex | 70 + .../good/orders.ex | 57 + .../uses_ternary_expression/bad/formatter.ex | 65 + .../uses_ternary_expression/config.yml | 1 + .../uses_ternary_expression/good/formatter.ex | 32 + .../bad/ShippingCalculator.kt | 62 + .../good/ShippingCalculator.kt | 47 + .../bad/payments.ex | 59 + .../good/payments.ex | 59 + .../zero_value_is_usable/bad/buffer.go | 55 + .../zero_value_is_usable/good/buffer.go | 43 + .../bad/PaymentGateway.cs | 63 + .../good/PaymentGateway.cs | 60 + .../bad/accounts.ex | 68 + .../config.yml | 1 + .../good/accounts.ex | 83 ++ .../bad/ProductCatalog.swift | 52 + .../good/ProductCatalog.swift | 50 + .../class_name_is_noun/bad/modules.ex | 45 + .../class_name_is_noun/config.yml | 1 + .../class_name_is_noun/good/modules.ex | 45 + .../bad/OrderService.php | 64 + .../good/OrderService.php | 68 + .../bad/payment.rb | 54 + .../good/payment.rb | 54 + .../bad/ProductCatalog.php | 61 + .../good/ProductCatalog.php | 57 + .../bad/session.rs | 58 + .../good/session.rs | 48 + .../bad/invoice.rb | 66 + .../good/invoice.rb | 70 + .../bad/payment.rs | 61 + .../good/payment.rs | 62 + .../bad/SubscriptionManager.swift | 53 + .../good/SubscriptionManager.swift | 48 + .../bad/order.go | 67 + .../good/order.go | 64 + .../bad/InventoryService.swift | 56 + .../good/InventoryService.swift | 50 + .../bad/product.go | 48 + .../good/product.go | 54 + .../bad/ReportBuilder.swift | 79 ++ .../good/ReportBuilder.swift | 75 + .../bad/helpers.ex | 50 + .../config.yml | 1 + .../good/user_manager.ex | 50 + .../bad/worker.ex | 46 + .../config.yml | 1 + .../good/worker.ex | 46 + .../bad/queries.ex | 46 + .../config.yml | 1 + .../good/queries.ex | 46 + .../bad/sessions.ex | 53 + .../good/sessions.ex | 78 ++ .../getter_has_no_prefix/bad/user.rs | 45 + .../getter_has_no_prefix/good/user.rs | 51 + .../bad/account.go | 50 + .../good/account.go | 49 + .../bad/api.go | 40 + .../good/api.go | 41 + .../bad/notification.go | 31 + .../good/notification.go | 32 + .../bad/PaymentGateway.php | 67 + .../good/PaymentGateway.php | 65 + .../bad/ShoppingCart.swift | 51 + .../good/ShoppingCart.swift | 53 + .../bad/OrderManager.swift | 69 + .../good/OrderManager.swift | 59 + .../bad/SubscriptionManager.cs | 71 + .../good/SubscriptionManager.cs | 63 + .../bad/ShipmentTracker.cs | 65 + .../good/ShipmentTracker.cs | 67 + .../bad/InventoryService.cs | 57 + .../good/InventoryService.cs | 57 + .../bad/CustomerAccount.java | 62 + .../good/CustomerAccount.java | 58 + .../bad/account.rb | 49 + .../good/account.rb | 55 + .../bad/matrix_transform.py | 55 + .../good/matrix_transform.py | 62 + .../bad/UserRepository.php | 92 ++ .../good/UserRepository.php | 98 ++ .../bad/PriceFormatter.kt | 52 + .../good/PriceFormatter.kt | 46 + .../bad/user.go | 50 + .../good/user.go | 48 + .../bad/subscriptions.ex | 46 + .../good/subscriptions.ex | 65 + .../bad/subscription.rb | 52 + .../good/subscription.rb | 51 + .../predicate_prefixed_with_is/bad/order.rs | 59 + .../predicate_prefixed_with_is/good/order.rs | 55 + .../bad/DocumentPipeline.swift | 60 + .../good/DocumentPipeline.swift | 56 + .../bad/session.go | 47 + .../good/session.go | 48 + .../bad/catalog.ex | 51 + .../good/catalog.ex | 61 + .../bad/user_test.ex | 54 + .../test_name_starts_with_verb/config.yml | 1 + .../good/user_test.ex | 54 + .../bad/CacheStore.swift | 56 + .../good/CacheStore.swift | 54 + .../bad/session_manager.py | 69 + .../good/session_manager.py | 73 + .../bad/invoice_handler.py | 74 + .../good/invoice_handler.py | 82 ++ .../uses_snake_case_for_methods/bad/cart.rb | 54 + .../uses_snake_case_for_methods/good/cart.rb | 53 + .../bad/ProductInventory.kt | 39 + .../good/ProductInventory.kt | 39 + .../declared_close_to_use/bad/processor.ex | 94 ++ .../declared_close_to_use/config.yml | 1 + .../declared_close_to_use/good/processor.ex | 76 ++ .../bad/aggregator.ex | 75 + .../config.yml | 1 + .../good/aggregator.ex | 55 + .../no_var_declarations/bad/search_index.js | 64 + .../no_var_declarations/good/search_index.js | 57 + .../bad/ReportGenerator.kt | 61 + .../good/ReportGenerator.kt | 54 + .../bad/transformer.ex | 49 + .../reassigned_multiple_times/config.yml | 1 + .../good/transformer.ex | 47 + .../scope_is_minimal/bad/handler.ex | 71 + .../scope_is_minimal/config.yml | 1 + .../scope_is_minimal/good/handler.ex | 62 + .../shadowed_by_inner_scope/bad/nested.ex | 66 + .../shadowed_by_inner_scope/config.yml | 1 + .../shadowed_by_inner_scope/good/nested.ex | 52 + .../used_only_once/bad/builder.ex | 57 + .../used_only_once/config.yml | 1 + .../used_only_once/good/builder.ex | 49 + .../bad/invoice_calculator.js | 62 + .../good/invoice_calculator.js | 58 + .../bad/sparse_test.ex | 58 + .../reasonable_test_to_code_ratio/config.yml | 1 + .../good/thorough_test.ex | 100 ++ .../bad/empty_assertions_test.ex | 62 + .../testing/test_has_assertion/config.yml | 1 + .../good/proper_assertions_test.ex | 50 + .../bad/vague_test.ex | 49 + .../test_name_describes_behavior/config.yml | 1 + .../good/descriptive_test.ex | 62 + .../bad/kitchen_sink_test.ex | 62 + .../testing/test_single_concept/config.yml | 1 + .../test_single_concept/good/focused_test.ex | 99 ++ .../bad/checker.ex | 76 ++ .../config.yml | 1 + .../good/checker.ex | 31 + .../bad/OrderDraft.kt | 45 + .../good/OrderDraft.kt | 47 + .../bad/Matrix.cpp | 64 + .../good/Matrix.cpp | 79 ++ .../bad/converter.rs | 40 + .../good/converter.rs | 69 + .../bad/session_manager.js | 84 ++ .../good/session_manager.js | 76 ++ .../hardcoded_url_or_path/bad/client.ex | 53 + .../hardcoded_url_or_path/config.yml | 1 + .../hardcoded_url_or_path/good/client.ex | 55 + .../bad/order_handler.py | 72 + .../good/order_handler.py | 82 ++ .../no_boolean_obsession/bad/notifications.ex | 62 + .../good/notifications.ex | 51 + .../no_c_style_casts/bad/Buffer.cpp | 65 + .../no_c_style_casts/good/Buffer.cpp | 60 + .../bad/feature_flags.py | 69 + .../good/feature_flags.py | 61 + .../bad/user_service.ts | 44 + .../good/user_service.ts | 67 + .../bad/notification_handler.ts | 51 + .../good/notification_handler.ts | 58 + .../bad/analytics.ex | 46 + .../good/analytics.ex | 53 + .../no_empty_string_initial/bad/builder.ex | 88 ++ .../no_empty_string_initial/config.yml | 1 + .../no_empty_string_initial/good/builder.ex | 48 + .../no_explicit_any/bad/user_service.ts | 55 + .../no_explicit_any/good/user_service.ts | 75 + .../no_implicit_null_initial/bad/loader.ex | 89 ++ .../no_implicit_null_initial/config.yml | 1 + .../no_implicit_null_initial/good/loader.ex | 68 + .../no_magic_value_assigned/bad/roles.ex | 64 + .../no_magic_value_assigned/config.yml | 1 + .../no_magic_value_assigned/good/roles.ex | 57 + .../bad/feature_flags.js | 50 + .../good/feature_flags.js | 43 + .../bad/notification_handler.ts | 57 + .../good/notification_handler.ts | 61 + .../bad/UserProfile.kt | 48 + .../good/UserProfile.kt | 49 + .../no_primitive_obsession/bad/payments.ex | 48 + .../no_primitive_obsession/good/payments.ex | 63 + .../type_and_value/no_rtti/bad/Connection.cpp | 69 + .../no_rtti/good/Connection.cpp | 69 + .../bad/product_service.py | 65 + .../good/product_service.py | 80 ++ .../bad/product_repository.ts | 56 + .../good/product_repository.ts | 63 + .../no_unsafe_return/bad/payment_gateway.ts | 58 + .../no_unsafe_return/good/payment_gateway.ts | 73 + .../bad/PaymentGateway.php | 56 + .../good/PaymentGateway.php | 59 + .../bad/NotificationHandler.kt | 50 + .../good/NotificationHandler.kt | 53 + .../bad/product_repository.ts | 61 + .../good/product_repository.ts | 61 + .../bad/PaymentStatus.kt | 47 + .../good/PaymentStatus.kt | 40 + .../struct_under_32_fields/bad/orders.ex | 60 + .../struct_under_32_fields/good/orders.ex | 67 + .../bad/payment_gateway.ts | 66 + .../good/payment_gateway.ts | 80 ++ .../bad/SearchResult.kt | 41 + .../good/SearchResult.kt | 46 + .../bad/PaymentService.kt | 69 + .../good/PaymentService.kt | 57 + .../bad/cart_service.js | 62 + .../good/cart_service.js | 62 + .../bad/ShoppingCart.kt | 47 + .../good/ShoppingCart.kt | 50 + .../bad/event_dispatcher.py | 62 + .../good/event_dispatcher.py | 65 + .../bad/user_repository.py | 70 + .../good/user_repository.py | 72 + .../bad/OrderService.php | 86 ++ .../good/OrderService.php | 91 ++ .../bad/OrderManager.kt | 61 + .../good/OrderManager.kt | 40 + .../bad/ProductCatalog.php | 71 + .../good/ProductCatalog.php | 72 + .../bad/UserRepository.php | 108 ++ .../good/UserRepository.php | 106 ++ .../boolean_has_is_has_prefix/bad/users.ex | 75 + .../boolean_has_is_has_prefix/bad/users.js | 69 + .../boolean_has_is_has_prefix/bad/users.rb | 69 + .../boolean_has_is_has_prefix/config.yml | 1 + .../boolean_has_is_has_prefix/good/users.ex | 75 + .../boolean_has_is_has_prefix/good/users.js | 69 + .../boolean_has_is_has_prefix/good/users.rb | 69 + .../collection_name_is_plural/bad/catalog.ex | 68 + .../collection_name_is_plural/bad/catalog.js | 72 + .../collection_name_is_plural/bad/catalog.rb | 65 + .../collection_name_is_plural/config.yml | 1 + .../collection_name_is_plural/good/catalog.ex | 68 + .../collection_name_is_plural/good/catalog.js | 72 + .../collection_name_is_plural/good/catalog.rb | 65 + .../loop_var_is_single_letter/bad/matrix.ex | 81 ++ .../loop_var_is_single_letter/bad/matrix.js | 92 ++ .../loop_var_is_single_letter/bad/matrix.rb | 89 ++ .../loop_var_is_single_letter/config.yml | 1 + .../loop_var_is_single_letter/good/matrix.ex | 81 ++ .../loop_var_is_single_letter/good/matrix.js | 92 ++ .../loop_var_is_single_letter/good/matrix.rb | 88 ++ .../name_contains_and/bad/checkout.ex | 76 ++ .../name_contains_and/bad/checkout.js | 94 ++ .../name_contains_and/bad/checkout.rb | 83 ++ .../name_contains_and/config.yml | 1 + .../name_contains_and/good/checkout.ex | 78 ++ .../name_contains_and/good/checkout.js | 89 ++ .../name_contains_and/good/checkout.rb | 89 ++ .../name_contains_type_suffix/bad/report.ex | 87 ++ .../name_contains_type_suffix/bad/report.js | 84 ++ .../name_contains_type_suffix/bad/report.rb | 82 ++ .../name_contains_type_suffix/config.yml | 1 + .../name_contains_type_suffix/good/report.ex | 87 ++ .../name_contains_type_suffix/good/report.js | 84 ++ .../name_contains_type_suffix/good/report.rb | 82 ++ .../name_is_abbreviation/bad/api_client.ex | 88 ++ .../name_is_abbreviation/bad/api_client.js | 98 ++ .../name_is_abbreviation/bad/api_client.rb | 91 ++ .../name_is_abbreviation/config.yml | 1 + .../name_is_abbreviation/good/api_client.ex | 88 ++ .../name_is_abbreviation/good/api_client.js | 98 ++ .../name_is_abbreviation/good/api_client.rb | 91 ++ .../name_is_generic/bad/orders.ex | 96 ++ .../name_is_generic/bad/orders.js | 94 ++ .../name_is_generic/bad/orders.rb | 87 ++ .../name_is_generic/config.yml | 1 + .../name_is_generic/good/orders.ex | 122 ++ .../name_is_generic/good/orders.js | 96 ++ .../name_is_generic/good/orders.rb | 89 ++ .../name_is_number_like/bad/pipeline.ex | 86 ++ .../name_is_number_like/bad/pipeline.js | 84 ++ .../name_is_number_like/bad/pipeline.rb | 87 ++ .../name_is_number_like/config.yml | 1 + .../name_is_number_like/good/pipeline.ex | 86 ++ .../name_is_number_like/good/pipeline.js | 84 ++ .../name_is_number_like/good/pipeline.rb | 87 ++ .../name_is_single_letter/bad/calculator.ex | 72 + .../name_is_single_letter/bad/calculator.js | 69 + .../name_is_single_letter/bad/calculator.rb | 73 + .../name_is_single_letter/config.yml | 1 + .../name_is_single_letter/good/calculator.ex | 72 + .../name_is_single_letter/good/calculator.js | 72 + .../name_is_single_letter/good/calculator.rb | 75 + .../name_is_too_long/bad/session.ex | 72 + .../name_is_too_long/bad/session.js | 81 ++ .../name_is_too_long/bad/session.rb | 80 ++ .../name_is_too_long/config.yml | 1 + .../name_is_too_long/good/session.ex | 76 ++ .../name_is_too_long/good/session.js | 63 + .../name_is_too_long/good/session.rb | 61 + .../name_is_too_short/bad/contact.ex | 78 ++ .../name_is_too_short/bad/contact.js | 87 ++ .../name_is_too_short/bad/contact.rb | 82 ++ .../name_is_too_short/config.yml | 1 + .../name_is_too_short/good/contact.ex | 78 ++ .../name_is_too_short/good/contact.js | 88 ++ .../name_is_too_short/good/contact.rb | 82 ++ .../negated_boolean_name/bad/validator.ex | 84 ++ .../negated_boolean_name/bad/validator.js | 86 ++ .../negated_boolean_name/bad/validator.rb | 88 ++ .../negated_boolean_name/config.yml | 1 + .../negated_boolean_name/good/validator.ex | 84 ++ .../negated_boolean_name/good/validator.js | 86 ++ .../negated_boolean_name/good/validator.rb | 88 ++ .../no_hungarian_notation/bad/inventory.ex | 87 ++ .../no_hungarian_notation/bad/inventory.js | 83 ++ .../no_hungarian_notation/bad/inventory.rb | 79 ++ .../no_hungarian_notation/config.yml | 1 + .../no_hungarian_notation/good/inventory.ex | 87 ++ .../no_hungarian_notation/good/inventory.js | 83 ++ .../no_hungarian_notation/good/inventory.rb | 79 ++ .../bad/config.ex | 90 ++ .../bad/config.js | 105 ++ .../bad/config.rb | 107 ++ .../screaming_snake_for_constants/config.yml | 1 + .../good/config.ex | 90 ++ .../good/config.js | 105 ++ .../good/config.rb | 99 ++ .../combined_metrics/scope_and_assignment.yml | 625 +++++++++ priv/combined_metrics/testing.yml | 420 ++++++ priv/combined_metrics/type_and_value.yml | 528 +++++++ priv/combined_metrics/variable_naming.yml | 1209 +++++++++++++++++ .../combined_metrics/file_scorer_test.exs | 268 ++++ .../combined_metrics/sample_runner_test.exs | 34 + 765 files changed, 51643 insertions(+) create mode 100644 lib/codeqa/combined_metrics/category.ex create mode 100644 lib/codeqa/combined_metrics/code_smells.ex create mode 100644 lib/codeqa/combined_metrics/consistency.ex create mode 100644 lib/codeqa/combined_metrics/dependencies.ex create mode 100644 lib/codeqa/combined_metrics/documentation.ex create mode 100644 lib/codeqa/combined_metrics/error_handling.ex create mode 100644 lib/codeqa/combined_metrics/file_scorer.ex create mode 100644 lib/codeqa/combined_metrics/file_structure.ex create mode 100644 lib/codeqa/combined_metrics/function_design.ex create mode 100644 lib/codeqa/combined_metrics/naming_conventions.ex create mode 100644 lib/codeqa/combined_metrics/sample_runner.ex create mode 100644 lib/codeqa/combined_metrics/scope_and_assignment.ex create mode 100644 lib/codeqa/combined_metrics/scorer.ex create mode 100644 lib/codeqa/combined_metrics/testing.ex create mode 100644 lib/codeqa/combined_metrics/type_and_value.ex create mode 100644 lib/codeqa/combined_metrics/variable_naming.ex create mode 100644 lib/mix/tasks/codeqa/sample_report.ex create mode 100644 lib/mix/tasks/codeqa/signal_debug.ex create mode 100644 priv/combined_metrics/code_smells.yml create mode 100644 priv/combined_metrics/consistency.yml create mode 100644 priv/combined_metrics/dependencies.yml create mode 100644 priv/combined_metrics/documentation.yml create mode 100644 priv/combined_metrics/error_handling.yml create mode 100644 priv/combined_metrics/file_structure.yml create mode 100644 priv/combined_metrics/function_design.yml create mode 100644 priv/combined_metrics/naming_conventions.yml create mode 100644 priv/combined_metrics/samples/code_smells/consistent_string_quote_style/bad/notifications.ex create mode 100644 priv/combined_metrics/samples/code_smells/consistent_string_quote_style/config.yml create mode 100644 priv/combined_metrics/samples/code_smells/consistent_string_quote_style/good/notifications.ex create mode 100644 priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/bad/fetcher.go create mode 100644 priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/good/fetcher.go create mode 100644 priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/bad/worker.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/good/worker.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/bad/cache.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/good/cache.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/bad/buffer.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/good/buffer.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/bad/registry.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/good/registry.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/bad/pipeline.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/good/pipeline.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/bad/queue.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/good/queue.rs create mode 100644 priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/bad/importer.go create mode 100644 priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/good/importer.go create mode 100644 priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/bad/worker.go create mode 100644 priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/good/worker.go create mode 100644 priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/bad/DocumentStorage.cs create mode 100644 priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/good/DocumentStorage.cs create mode 100644 priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/bad/price_calculator.py create mode 100644 priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/good/price_calculator.py create mode 100644 priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/bad/shipping.ex create mode 100644 priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/good/shipping.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/bad/matrix_utils.js create mode 100644 priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/good/matrix_utils.js create mode 100644 priv/combined_metrics/samples/code_smells/no_array_delete/bad/notification_handler.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_array_delete/good/notification_handler.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_auto_ptr/bad/Connection.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_auto_ptr/good/Connection.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/bad/NotificationService.cs create mode 100644 priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/good/NotificationService.cs create mode 100644 priv/combined_metrics/samples/code_smells/no_class_variables/bad/account.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_class_variables/good/account.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_const_enum/bad/product_repository.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_const_enum/good/product_repository.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_dead_code_after_return/bad/order_service.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_dead_code_after_return/config.yml create mode 100644 priv/combined_metrics/samples/code_smells/no_dead_code_after_return/good/order_service.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_debug_print_statements/bad/payment.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_debug_print_statements/config.yml create mode 100644 priv/combined_metrics/samples/code_smells/no_debug_print_statements/good/payment.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_double_negation/bad/cart.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_double_negation/good/cart.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_finalize_override/bad/DatabaseConnection.java create mode 100644 priv/combined_metrics/samples/code_smells/no_finalize_override/good/DatabaseConnection.java create mode 100644 priv/combined_metrics/samples/code_smells/no_fixme_comments/bad/importer.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_fixme_comments/config.yml create mode 100644 priv/combined_metrics/samples/code_smells/no_fixme_comments/good/importer.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_global_scope_pollution/bad/analytics_tracker.js create mode 100644 priv/combined_metrics/samples/code_smells/no_global_scope_pollution/good/analytics_tracker.js create mode 100644 priv/combined_metrics/samples/code_smells/no_library_global_application_config/bad/billing.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_library_global_application_config/good/billing.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/bad/task_runner.py create mode 100644 priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/good/task_runner.py create mode 100644 priv/combined_metrics/samples/code_smells/no_long_parameter_list/bad/accounts.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_long_parameter_list/good/accounts.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_magic_method_abuse/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/code_smells/no_magic_method_abuse/good/OrderService.php create mode 100644 priv/combined_metrics/samples/code_smells/no_namespace_trespassing/bad/catalog.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_namespace_trespassing/good/catalog.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_nested_method_definition/bad/subscription.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_nested_method_definition/good/subscription.rb create mode 100644 priv/combined_metrics/samples/code_smells/no_nested_ternary/bad/pricing.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_nested_ternary/config.yml create mode 100644 priv/combined_metrics/samples/code_smells/no_nested_ternary/good/pricing.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/bad/form_validator.js create mode 100644 priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/good/form_validator.js create mode 100644 priv/combined_metrics/samples/code_smells/no_private_inheritance/bad/Connection.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_private_inheritance/good/Connection.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/bad/Widget.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/good/Widget.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_process_for_code_organization/bad/inventory.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_process_for_code_organization/good/inventory.ex create mode 100644 priv/combined_metrics/samples/code_smells/no_prototype_modification/bad/data_store.js create mode 100644 priv/combined_metrics/samples/code_smells/no_prototype_modification/good/data_store.js create mode 100644 priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/bad/UserRepository.php create mode 100644 priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/good/UserRepository.php create mode 100644 priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/bad/FileImportService.kt create mode 100644 priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/good/FileImportService.kt create mode 100644 priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/bad/UserRepository.php create mode 100644 priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/good/UserRepository.php create mode 100644 priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/bad/order_service.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/good/order_service.ts create mode 100644 priv/combined_metrics/samples/code_smells/no_using_namespace_directives/bad/Matrix.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_using_namespace_directives/good/Matrix.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/bad/Widget.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/good/Widget.cpp create mode 100644 priv/combined_metrics/samples/code_smells/no_with_statement/bad/report_builder.js create mode 100644 priv/combined_metrics/samples/code_smells/no_with_statement/good/report_builder.js create mode 100644 priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/bad/counter.go create mode 100644 priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/good/counter.go create mode 100644 priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/bad/Parser.cpp create mode 100644 priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/good/Parser.cpp create mode 100644 priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/bad/analytics.ex create mode 100644 priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/good/analytics.ex create mode 100644 priv/combined_metrics/samples/code_smells/switch_has_default_case/bad/notification_router.js create mode 100644 priv/combined_metrics/samples/code_smells/switch_has_default_case/good/notification_router.js create mode 100644 priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/bad/AnalyticsProcessor.kt create mode 100644 priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/good/AnalyticsProcessor.kt create mode 100644 priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/bad/pool.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/good/pool.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/bad/payment.rb create mode 100644 priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/good/payment.rb create mode 100644 priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/bad/database_manager.py create mode 100644 priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/good/database_manager.py create mode 100644 priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/bad/metrics.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/good/metrics.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_mb_string_functions_for_unicode/bad/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/code_smells/uses_mb_string_functions_for_unicode/good/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/code_smells/uses_optionals_not_sentinel_values/bad/UserProfile.swift create mode 100644 priv/combined_metrics/samples/code_smells/uses_optionals_not_sentinel_values/good/UserProfile.swift create mode 100644 priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/bad/server.go create mode 100644 priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/good/server.go create mode 100644 priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/bad/AccessPolicy.cs create mode 100644 priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/good/AccessPolicy.cs create mode 100644 priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/bad/Buffer.cpp create mode 100644 priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/good/Buffer.cpp create mode 100644 priv/combined_metrics/samples/code_smells/uses_standard_library_constants/bad/geometry.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_standard_library_constants/good/geometry.rs create mode 100644 priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/bad/HtmlRenderer.cs create mode 100644 priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/good/HtmlRenderer.cs create mode 100644 priv/combined_metrics/samples/code_smells/uses_structured_concurrency/bad/DataSyncService.kt create mode 100644 priv/combined_metrics/samples/code_smells/uses_structured_concurrency/good/DataSyncService.kt create mode 100644 priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/bad/ReportExporter.cs create mode 100644 priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/good/ReportExporter.cs create mode 100644 priv/combined_metrics/samples/consistency/all_methods_declare_visibility/bad/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/consistency/all_methods_declare_visibility/good/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/consistency/all_properties_declare_visibility/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/consistency/all_properties_declare_visibility/good/OrderService.php create mode 100644 priv/combined_metrics/samples/consistency/consistent_casing_within_file/bad/analytics.ex create mode 100644 priv/combined_metrics/samples/consistency/consistent_casing_within_file/config.yml create mode 100644 priv/combined_metrics/samples/consistency/consistent_casing_within_file/good/analytics.ex create mode 100644 priv/combined_metrics/samples/consistency/consistent_error_return_shape/bad/accounts.ex create mode 100644 priv/combined_metrics/samples/consistency/consistent_error_return_shape/config.yml create mode 100644 priv/combined_metrics/samples/consistency/consistent_error_return_shape/good/accounts.ex create mode 100644 priv/combined_metrics/samples/consistency/consistent_function_style/bad/formatter.ex create mode 100644 priv/combined_metrics/samples/consistency/consistent_function_style/config.yml create mode 100644 priv/combined_metrics/samples/consistency/consistent_function_style/good/formatter.ex create mode 100644 priv/combined_metrics/samples/consistency/doc_vs_comment_separation/bad/subscriptions.ex create mode 100644 priv/combined_metrics/samples/consistency/doc_vs_comment_separation/good/subscriptions.ex create mode 100644 priv/combined_metrics/samples/consistency/overloads_are_contiguous/bad/PaymentProcessor.java create mode 100644 priv/combined_metrics/samples/consistency/overloads_are_contiguous/good/PaymentProcessor.java create mode 100644 priv/combined_metrics/samples/consistency/override_annotation_present/bad/UserRepository.java create mode 100644 priv/combined_metrics/samples/consistency/override_annotation_present/good/UserRepository.java create mode 100644 priv/combined_metrics/samples/consistency/process_interactions_centralized/bad/sessions.ex create mode 100644 priv/combined_metrics/samples/consistency/process_interactions_centralized/good/sessions.ex create mode 100644 priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/bad/PaymentProcessor.swift create mode 100644 priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/good/PaymentProcessor.swift create mode 100644 priv/combined_metrics/samples/consistency/same_concept_same_name/bad/auth.ex create mode 100644 priv/combined_metrics/samples/consistency/same_concept_same_name/config.yml create mode 100644 priv/combined_metrics/samples/consistency/same_concept_same_name/good/auth.ex create mode 100644 priv/combined_metrics/samples/consistency/static_member_via_class_name/bad/ProductCatalog.java create mode 100644 priv/combined_metrics/samples/consistency/static_member_via_class_name/good/ProductCatalog.java create mode 100644 priv/combined_metrics/samples/consistency/switch_fallthrough_has_comment/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/consistency/switch_fallthrough_has_comment/good/OrderService.php create mode 100644 priv/combined_metrics/samples/dependencies/import_count_under_10/bad/dashboard.ex create mode 100644 priv/combined_metrics/samples/dependencies/import_count_under_10/config.yml create mode 100644 priv/combined_metrics/samples/dependencies/import_count_under_10/good/dashboard.ex create mode 100644 priv/combined_metrics/samples/dependencies/low_coupling/bad/order_controller.ex create mode 100644 priv/combined_metrics/samples/dependencies/low_coupling/config.yml create mode 100644 priv/combined_metrics/samples/dependencies/low_coupling/good/order_controller.ex create mode 100644 priv/combined_metrics/samples/dependencies/no_default_exports/bad/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/dependencies/no_default_exports/good/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/dependencies/no_mutable_exports/bad/product_repository.ts create mode 100644 priv/combined_metrics/samples/dependencies/no_mutable_exports/good/product_repository.ts create mode 100644 priv/combined_metrics/samples/dependencies/no_wildcard_imports/bad/query_helpers.ex create mode 100644 priv/combined_metrics/samples/dependencies/no_wildcard_imports/config.yml create mode 100644 priv/combined_metrics/samples/dependencies/no_wildcard_imports/good/query_helpers.ex create mode 100644 priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/bad/user_service.ts create mode 100644 priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/good/user_service.ts create mode 100644 priv/combined_metrics/samples/documentation/docstring_is_nonempty/bad/cache.ex create mode 100644 priv/combined_metrics/samples/documentation/docstring_is_nonempty/config.yml create mode 100644 priv/combined_metrics/samples/documentation/docstring_is_nonempty/good/cache.ex create mode 100644 priv/combined_metrics/samples/documentation/doctests_validate_examples/bad/billing.ex create mode 100644 priv/combined_metrics/samples/documentation/doctests_validate_examples/good/billing.ex create mode 100644 priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/bad/ShippingService.swift create mode 100644 priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/good/ShippingService.swift create mode 100644 priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/bad/cache.go create mode 100644 priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/good/cache.go create mode 100644 priv/combined_metrics/samples/documentation/file_has_license_header/bad/core.ex create mode 100644 priv/combined_metrics/samples/documentation/file_has_license_header/config.yml create mode 100644 priv/combined_metrics/samples/documentation/file_has_license_header/good/core.ex create mode 100644 priv/combined_metrics/samples/documentation/file_has_module_docstring/bad/shipping.ex create mode 100644 priv/combined_metrics/samples/documentation/file_has_module_docstring/config.yml create mode 100644 priv/combined_metrics/samples/documentation/file_has_module_docstring/good/shipping.ex create mode 100644 priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/bad/legacy_importer.ex create mode 100644 priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/config.yml create mode 100644 priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/good/legacy_importer.ex create mode 100644 priv/combined_metrics/samples/documentation/function_has_docstring/bad/tax.ex create mode 100644 priv/combined_metrics/samples/documentation/function_has_docstring/config.yml create mode 100644 priv/combined_metrics/samples/documentation/function_has_docstring/good/tax.ex create mode 100644 priv/combined_metrics/samples/documentation/function_todo_comment_in_body/bad/sync.ex create mode 100644 priv/combined_metrics/samples/documentation/function_todo_comment_in_body/config.yml create mode 100644 priv/combined_metrics/samples/documentation/function_todo_comment_in_body/good/sync.ex create mode 100644 priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/bad/payments.ex create mode 100644 priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/good/payments.ex create mode 100644 priv/combined_metrics/samples/error_handling/catches_specific_exception/bad/file_importer.py create mode 100644 priv/combined_metrics/samples/error_handling/catches_specific_exception/good/file_importer.py create mode 100644 priv/combined_metrics/samples/error_handling/catches_typed_exception/bad/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/error_handling/catches_typed_exception/good/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/bad/processor.go create mode 100644 priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/good/processor.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/bad/validator_test.rs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/good/validator_test.rs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/bad/FileImporter.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/good/FileImporter.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/bad/OrderProcessor.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/good/OrderProcessor.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_discard_errors/bad/handler.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_discard_errors/good/handler.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/bad/gateway.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/good/gateway.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/bad/NetworkClient.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/good/NetworkClient.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_return_error_codes/bad/UserRepository.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_return_error_codes/good/UserRepository.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_errors/bad/file_processor.ex create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_errors/config.yml create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_errors/good/file_processor.ex create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/bad/event_bus.js create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/good/event_bus.js create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/bad/OrderService.java create mode 100644 priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/good/OrderService.java create mode 100644 priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/bad/FileProcessor.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/good/FileProcessor.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/bad/ProductCatalog.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/good/ProductCatalog.cs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_force_try/bad/DataLoader.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_force_try/good/DataLoader.swift create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/bad/store.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/good/store.go create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/bad/client.rs create mode 100644 priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/good/client.rs create mode 100644 priv/combined_metrics/samples/error_handling/error_message_is_descriptive/bad/billing.ex create mode 100644 priv/combined_metrics/samples/error_handling/error_message_is_descriptive/config.yml create mode 100644 priv/combined_metrics/samples/error_handling/error_message_is_descriptive/good/billing.ex create mode 100644 priv/combined_metrics/samples/error_handling/error_string_not_capitalized/bad/service.go create mode 100644 priv/combined_metrics/samples/error_handling/error_string_not_capitalized/good/service.go create mode 100644 priv/combined_metrics/samples/error_handling/error_type_includes_context/bad/client.go create mode 100644 priv/combined_metrics/samples/error_handling/error_type_includes_context/good/client.go create mode 100644 priv/combined_metrics/samples/error_handling/no_bare_except/bad/api_client.py create mode 100644 priv/combined_metrics/samples/error_handling/no_bare_except/good/api_client.py create mode 100644 priv/combined_metrics/samples/error_handling/no_blind_rescue/bad/cart.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_blind_rescue/good/cart.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_empty_rescue_block/bad/payment.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_empty_rescue_block/good/payment.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_error_suppression_operator/bad/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/error_handling/no_error_suppression_operator/good/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/bad/template_engine.js create mode 100644 priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/good/template_engine.js create mode 100644 priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/bad/billing.ex create mode 100644 priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/good/billing.ex create mode 100644 priv/combined_metrics/samples/error_handling/no_floating_promises/bad/order_service.js create mode 100644 priv/combined_metrics/samples/error_handling/no_floating_promises/bad/user_service.ts create mode 100644 priv/combined_metrics/samples/error_handling/no_floating_promises/good/order_service.js create mode 100644 priv/combined_metrics/samples/error_handling/no_floating_promises/good/user_service.ts create mode 100644 priv/combined_metrics/samples/error_handling/no_misused_promises/bad/product_repository.ts create mode 100644 priv/combined_metrics/samples/error_handling/no_misused_promises/good/product_repository.ts create mode 100644 priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/bad/invoice.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/good/invoice.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_return_from_ensure/bad/subscription.rb create mode 100644 priv/combined_metrics/samples/error_handling/no_return_from_ensure/good/subscription.rb create mode 100644 priv/combined_metrics/samples/error_handling/rescue_most_specific_first/bad/account.rb create mode 100644 priv/combined_metrics/samples/error_handling/rescue_most_specific_first/good/account.rb create mode 100644 priv/combined_metrics/samples/error_handling/returns_typed_error/bad/repository.ex create mode 100644 priv/combined_metrics/samples/error_handling/returns_typed_error/config.yml create mode 100644 priv/combined_metrics/samples/error_handling/returns_typed_error/good/repository.ex create mode 100644 priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/bad/api_client.js create mode 100644 priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/good/api_client.js create mode 100644 priv/combined_metrics/samples/error_handling/try_block_is_minimal/bad/payment_processor.py create mode 100644 priv/combined_metrics/samples/error_handling/try_block_is_minimal/good/payment_processor.py create mode 100644 priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/bad/invoice.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/good/invoice.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/bad/store.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/good/store.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/bad/router.go create mode 100644 priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/good/router.go create mode 100644 priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/bad/middleware.go create mode 100644 priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/good/middleware.go create mode 100644 priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/good/OrderService.php create mode 100644 priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/bad/loader.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/good/loader.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_raise_from/bad/config_loader.py create mode 100644 priv/combined_metrics/samples/error_handling/uses_raise_from/good/config_loader.py create mode 100644 priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/bad/parser.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/good/parser.rs create mode 100644 priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/bad/AuthService.swift create mode 100644 priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/good/AuthService.swift create mode 100644 priv/combined_metrics/samples/error_handling/wraps_errors_with_context/bad/repository.go create mode 100644 priv/combined_metrics/samples/error_handling/wraps_errors_with_context/good/repository.go create mode 100644 priv/combined_metrics/samples/file_structure/has_consistent_indentation/bad/worker.ex create mode 100644 priv/combined_metrics/samples/file_structure/has_consistent_indentation/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/has_consistent_indentation/good/worker.ex create mode 100644 priv/combined_metrics/samples/file_structure/headers_have_include_guards/bad/Parser.cpp create mode 100644 priv/combined_metrics/samples/file_structure/headers_have_include_guards/good/Parser.cpp create mode 100644 priv/combined_metrics/samples/file_structure/line_count_under_300/bad/mega_service.ex create mode 100644 priv/combined_metrics/samples/file_structure/line_count_under_300/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/line_count_under_300/good/order_service.ex create mode 100644 priv/combined_metrics/samples/file_structure/line_length_under_120/bad/query_builder.ex create mode 100644 priv/combined_metrics/samples/file_structure/line_length_under_120/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/line_length_under_120/good/query_builder.ex create mode 100644 priv/combined_metrics/samples/file_structure/no_magic_numbers/bad/rate_limiter.ex create mode 100644 priv/combined_metrics/samples/file_structure/no_magic_numbers/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/no_magic_numbers/good/rate_limiter.ex create mode 100644 priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/bad/InvoiceService.java create mode 100644 priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/good/InvoiceService.java create mode 100644 priv/combined_metrics/samples/file_structure/single_responsibility/bad/user_handler.ex create mode 100644 priv/combined_metrics/samples/file_structure/single_responsibility/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/single_responsibility/good/user_registration.ex create mode 100644 priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/bad/mailer.ex create mode 100644 priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/config.yml create mode 100644 priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/good/mailer.ex create mode 100644 priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/bad/user_service.ts create mode 100644 priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/good/user_service.ts create mode 100644 priv/combined_metrics/samples/function_design/async_functions_contain_await/bad/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/function_design/async_functions_contain_await/good/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/function_design/async_method_has_await/bad/EmailDispatcher.cs create mode 100644 priv/combined_metrics/samples/function_design/async_method_has_await/good/EmailDispatcher.cs create mode 100644 priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/bad/guards.ex create mode 100644 priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/config.yml create mode 100644 priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/good/guards.ex create mode 100644 priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/bad/EventHandler.swift create mode 100644 priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/good/EventHandler.swift create mode 100644 priv/combined_metrics/samples/function_design/context_is_first_parameter/bad/mailer.go create mode 100644 priv/combined_metrics/samples/function_design/context_is_first_parameter/good/mailer.go create mode 100644 priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/bad/order_processor.ex create mode 100644 priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/config.yml create mode 100644 priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/good/order_processor.ex create mode 100644 priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/bad/EmailComposer.swift create mode 100644 priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/good/EmailComposer.swift create mode 100644 priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/bad/reports.ex create mode 100644 priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/good/reports.ex create mode 100644 priv/combined_metrics/samples/function_design/has_verb_in_name/bad/api.ex create mode 100644 priv/combined_metrics/samples/function_design/has_verb_in_name/config.yml create mode 100644 priv/combined_metrics/samples/function_design/has_verb_in_name/good/api.ex create mode 100644 priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/bad/Matrix.cpp create mode 100644 priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/good/Matrix.cpp create mode 100644 priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/bad/storage.go create mode 100644 priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/good/storage.go create mode 100644 priv/combined_metrics/samples/function_design/is_less_than_20_lines/bad/report_generator.ex create mode 100644 priv/combined_metrics/samples/function_design/is_less_than_20_lines/config.yml create mode 100644 priv/combined_metrics/samples/function_design/is_less_than_20_lines/good/report_generator.ex create mode 100644 priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/bad/Buffer.cpp create mode 100644 priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/good/Buffer.cpp create mode 100644 priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/bad/parser.go create mode 100644 priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/good/parser.go create mode 100644 priv/combined_metrics/samples/function_design/nesting_depth_under_4/bad/validator.ex create mode 100644 priv/combined_metrics/samples/function_design/nesting_depth_under_4/config.yml create mode 100644 priv/combined_metrics/samples/function_design/nesting_depth_under_4/good/validator.ex create mode 100644 priv/combined_metrics/samples/function_design/no_arguments_object/bad/query_builder.js create mode 100644 priv/combined_metrics/samples/function_design/no_arguments_object/good/query_builder.js create mode 100644 priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/bad/BackgroundSync.cs create mode 100644 priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/good/BackgroundSync.cs create mode 100644 priv/combined_metrics/samples/function_design/no_boolean_parameter/bad/notifications.ex create mode 100644 priv/combined_metrics/samples/function_design/no_boolean_parameter/config.yml create mode 100644 priv/combined_metrics/samples/function_design/no_boolean_parameter/good/notifications.ex create mode 100644 priv/combined_metrics/samples/function_design/no_default_arguments_on_virtual_functions/bad/Widget.cpp create mode 100644 priv/combined_metrics/samples/function_design/no_default_arguments_on_virtual_functions/good/Widget.cpp create mode 100644 priv/combined_metrics/samples/function_design/no_magic_numbers/bad/discounter.ex create mode 100644 priv/combined_metrics/samples/function_design/no_magic_numbers/config.yml create mode 100644 priv/combined_metrics/samples/function_design/no_magic_numbers/good/discounter.ex create mode 100644 priv/combined_metrics/samples/function_design/no_mutable_default_argument/bad/report_builder.py create mode 100644 priv/combined_metrics/samples/function_design/no_mutable_default_argument/good/report_builder.py create mode 100644 priv/combined_metrics/samples/function_design/no_star_args_abuse/bad/notification_sender.py create mode 100644 priv/combined_metrics/samples/function_design/no_star_args_abuse/good/notification_sender.py create mode 100644 priv/combined_metrics/samples/function_design/no_static_class_for_namespacing/bad/date_utils.ts create mode 100644 priv/combined_metrics/samples/function_design/no_static_class_for_namespacing/good/date_utils.ts create mode 100644 priv/combined_metrics/samples/function_design/no_static_only_container_classes/bad/string_utils.js create mode 100644 priv/combined_metrics/samples/function_design/no_static_only_container_classes/good/string_utils.js create mode 100644 priv/combined_metrics/samples/function_design/no_unrelated_clauses_in_multi_clause_function/bad/notifications.ex create mode 100644 priv/combined_metrics/samples/function_design/no_unrelated_clauses_in_multi_clause_function/good/notifications.ex create mode 100644 priv/combined_metrics/samples/function_design/parameter_count_under_4/bad/email_sender.ex create mode 100644 priv/combined_metrics/samples/function_design/parameter_count_under_4/config.yml create mode 100644 priv/combined_metrics/samples/function_design/parameter_count_under_4/good/email_sender.ex create mode 100644 priv/combined_metrics/samples/function_design/prefers_synchronous_functions/bad/validator.go create mode 100644 priv/combined_metrics/samples/function_design/prefers_synchronous_functions/good/validator.go create mode 100644 priv/combined_metrics/samples/function_design/promise_returning_functions_are_async/bad/notification_handler.ts create mode 100644 priv/combined_metrics/samples/function_design/promise_returning_functions_are_async/good/notification_handler.ts create mode 100644 priv/combined_metrics/samples/function_design/return_values_over_output_parameters/bad/Parser.cpp create mode 100644 priv/combined_metrics/samples/function_design/return_values_over_output_parameters/good/Parser.cpp create mode 100644 priv/combined_metrics/samples/function_design/uses_assertive_map_access/bad/shipping.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_assertive_map_access/good/shipping.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_assertive_pattern_matching/bad/inventory.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_assertive_pattern_matching/good/inventory.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_default_parameters_over_overloads/bad/EmailSender.kt create mode 100644 priv/combined_metrics/samples/function_design/uses_default_parameters_over_overloads/good/EmailSender.kt create mode 100644 priv/combined_metrics/samples/function_design/uses_defer_for_cleanup/bad/filestore.go create mode 100644 priv/combined_metrics/samples/function_design/uses_defer_for_cleanup/good/filestore.go create mode 100644 priv/combined_metrics/samples/function_design/uses_enumerate_not_range_len/bad/item_processor.py create mode 100644 priv/combined_metrics/samples/function_design/uses_enumerate_not_range_len/good/item_processor.py create mode 100644 priv/combined_metrics/samples/function_design/uses_generator_over_list_comprehension/bad/data_pipeline.py create mode 100644 priv/combined_metrics/samples/function_design/uses_generator_over_list_comprehension/good/data_pipeline.py create mode 100644 priv/combined_metrics/samples/function_design/uses_keyword_arguments_over_option_hash/bad/invoice.rb create mode 100644 priv/combined_metrics/samples/function_design/uses_keyword_arguments_over_option_hash/good/invoice.rb create mode 100644 priv/combined_metrics/samples/function_design/uses_named_arguments_for_multiple_same_type_params/bad/SubscriptionService.kt create mode 100644 priv/combined_metrics/samples/function_design/uses_named_arguments_for_multiple_same_type_params/good/SubscriptionService.kt create mode 100644 priv/combined_metrics/samples/function_design/uses_pattern_matching_over_conditionals/bad/orders.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_pattern_matching_over_conditionals/good/orders.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_ternary_expression/bad/formatter.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_ternary_expression/config.yml create mode 100644 priv/combined_metrics/samples/function_design/uses_ternary_expression/good/formatter.ex create mode 100644 priv/combined_metrics/samples/function_design/uses_when_for_three_or_more_branches/bad/ShippingCalculator.kt create mode 100644 priv/combined_metrics/samples/function_design/uses_when_for_three_or_more_branches/good/ShippingCalculator.kt create mode 100644 priv/combined_metrics/samples/function_design/with_expression_handles_happy_path_only/bad/payments.ex create mode 100644 priv/combined_metrics/samples/function_design/with_expression_handles_happy_path_only/good/payments.ex create mode 100644 priv/combined_metrics/samples/function_design/zero_value_is_usable/bad/buffer.go create mode 100644 priv/combined_metrics/samples/function_design/zero_value_is_usable/good/buffer.go create mode 100644 priv/combined_metrics/samples/naming_conventions/async_method_named_with_async_suffix/bad/PaymentGateway.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/async_method_named_with_async_suffix/good/PaymentGateway.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/bang_function_raises_on_failure/bad/accounts.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/bang_function_raises_on_failure/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/bang_function_raises_on_failure/good/accounts.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/boolean_reads_as_assertion/bad/ProductCatalog.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/boolean_reads_as_assertion/good/ProductCatalog.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/class_name_is_noun/bad/modules.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/class_name_is_noun/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/class_name_is_noun/good/modules.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/classes_use_studly_caps/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/naming_conventions/classes_use_studly_caps/good/OrderService.php create mode 100644 priv/combined_metrics/samples/naming_conventions/constants_are_screaming_snake_or_caps_words/bad/payment.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/constants_are_screaming_snake_or_caps_words/good/payment.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/constants_are_upper_case_with_underscores/bad/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/naming_conventions/constants_are_upper_case_with_underscores/good/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/naming_conventions/constructor_named_new_or_with/bad/session.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/constructor_named_new_or_with/good/session.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/dangerous_method_ends_with_bang/bad/invoice.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/dangerous_method_ends_with_bang/good/invoice.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_repeat_module_name_in_type/bad/payment.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_repeat_module_name_in_type/good/payment.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_abbreviated_names/bad/SubscriptionManager.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_abbreviated_names/good/SubscriptionManager.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_generic_package_names/bad/order.go create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_generic_package_names/good/order.go create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_get_prefix_for_accessors/bad/InventoryService.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/does_not_use_get_prefix_for_accessors/good/InventoryService.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/exported_name_not_redundant_with_package/bad/product.go create mode 100644 priv/combined_metrics/samples/naming_conventions/exported_name_not_redundant_with_package/good/product.go create mode 100644 priv/combined_metrics/samples/naming_conventions/factory_method_starts_with_make/bad/ReportBuilder.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/factory_method_starts_with_make/good/ReportBuilder.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/file_name_matches_primary_export/bad/helpers.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/file_name_matches_primary_export/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/file_name_matches_primary_export/good/user_manager.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_is_not_single_word/bad/worker.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_is_not_single_word/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_is_not_single_word/good/worker.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_matches_return_type/bad/queries.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_matches_return_type/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/function_name_matches_return_type/good/queries.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/get_fetch_fetch_bang_contract_is_consistent/bad/sessions.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/get_fetch_fetch_bang_contract_is_consistent/good/sessions.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/getter_has_no_prefix/bad/user.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/getter_has_no_prefix/good/user.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/getter_not_prefixed_with_get/bad/account.go create mode 100644 priv/combined_metrics/samples/naming_conventions/getter_not_prefixed_with_get/good/account.go create mode 100644 priv/combined_metrics/samples/naming_conventions/initialism_uses_consistent_casing/bad/api.go create mode 100644 priv/combined_metrics/samples/naming_conventions/initialism_uses_consistent_casing/good/api.go create mode 100644 priv/combined_metrics/samples/naming_conventions/interface_named_with_er_suffix/bad/notification.go create mode 100644 priv/combined_metrics/samples/naming_conventions/interface_named_with_er_suffix/good/notification.go create mode 100644 priv/combined_metrics/samples/naming_conventions/methods_use_camel_case/bad/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/naming_conventions/methods_use_camel_case/good/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/naming_conventions/mutating_method_uses_imperative_verb/bad/ShoppingCart.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/mutating_method_uses_imperative_verb/good/ShoppingCart.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/name_reflects_role_not_type/bad/OrderManager.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/name_reflects_role_not_type/good/OrderManager.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/name_reflects_semantics_not_type/bad/SubscriptionManager.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/name_reflects_semantics_not_type/good/SubscriptionManager.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/no_abbreviations_in_identifiers/bad/ShipmentTracker.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/no_abbreviations_in_identifiers/good/ShipmentTracker.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/no_hungarian_notation/bad/InventoryService.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/no_hungarian_notation/good/InventoryService.cs create mode 100644 priv/combined_metrics/samples/naming_conventions/no_member_name_prefixes_suffixes/bad/CustomerAccount.java create mode 100644 priv/combined_metrics/samples/naming_conventions/no_member_name_prefixes_suffixes/good/CustomerAccount.java create mode 100644 priv/combined_metrics/samples/naming_conventions/no_predicate_method_with_is_prefix/bad/account.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/no_predicate_method_with_is_prefix/good/account.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/no_single_char_variable_ambiguous/bad/matrix_transform.py create mode 100644 priv/combined_metrics/samples/naming_conventions/no_single_char_variable_ambiguous/good/matrix_transform.py create mode 100644 priv/combined_metrics/samples/naming_conventions/no_underscore_visibility_prefix/bad/UserRepository.php create mode 100644 priv/combined_metrics/samples/naming_conventions/no_underscore_visibility_prefix/good/UserRepository.php create mode 100644 priv/combined_metrics/samples/naming_conventions/no_util_or_meaningless_class_names/bad/PriceFormatter.kt create mode 100644 priv/combined_metrics/samples/naming_conventions/no_util_or_meaningless_class_names/good/PriceFormatter.kt create mode 100644 priv/combined_metrics/samples/naming_conventions/package_name_is_single_lowercase_word/bad/user.go create mode 100644 priv/combined_metrics/samples/naming_conventions/package_name_is_single_lowercase_word/good/user.go create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_function_ends_with_question_mark/bad/subscriptions.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_function_ends_with_question_mark/good/subscriptions.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_method_ends_with_question_mark/bad/subscription.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_method_ends_with_question_mark/good/subscription.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_prefixed_with_is/bad/order.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/predicate_prefixed_with_is/good/order.rs create mode 100644 priv/combined_metrics/samples/naming_conventions/protocol_capability_uses_able_ible_ing_suffix/bad/DocumentPipeline.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/protocol_capability_uses_able_ible_ing_suffix/good/DocumentPipeline.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/receiver_name_is_short_type_abbreviation/bad/session.go create mode 100644 priv/combined_metrics/samples/naming_conventions/receiver_name_is_short_type_abbreviation/good/session.go create mode 100644 priv/combined_metrics/samples/naming_conventions/size_vs_length_naming_is_consistent/bad/catalog.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/size_vs_length_naming_is_consistent/good/catalog.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/test_name_starts_with_verb/bad/user_test.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/test_name_starts_with_verb/config.yml create mode 100644 priv/combined_metrics/samples/naming_conventions/test_name_starts_with_verb/good/user_test.ex create mode 100644 priv/combined_metrics/samples/naming_conventions/type_parameter_is_descriptive_or_single_letter/bad/CacheStore.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/type_parameter_is_descriptive_or_single_letter/good/CacheStore.swift create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_caps_words_for_classes/bad/session_manager.py create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_caps_words_for_classes/good/session_manager.py create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_snake_case_for_functions/bad/invoice_handler.py create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_snake_case_for_functions/good/invoice_handler.py create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_snake_case_for_methods/bad/cart.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/uses_snake_case_for_methods/good/cart.rb create mode 100644 priv/combined_metrics/samples/naming_conventions/verb_phrase_distinguishes_mutation_vs_return/bad/ProductInventory.kt create mode 100644 priv/combined_metrics/samples/naming_conventions/verb_phrase_distinguishes_mutation_vs_return/good/ProductInventory.kt create mode 100644 priv/combined_metrics/samples/scope_and_assignment/declared_close_to_use/bad/processor.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/declared_close_to_use/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/declared_close_to_use/good/processor.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/mutated_after_initial_assignment/bad/aggregator.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/mutated_after_initial_assignment/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/mutated_after_initial_assignment/good/aggregator.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/no_var_declarations/bad/search_index.js create mode 100644 priv/combined_metrics/samples/scope_and_assignment/no_var_declarations/good/search_index.js create mode 100644 priv/combined_metrics/samples/scope_and_assignment/prefers_val_over_var/bad/ReportGenerator.kt create mode 100644 priv/combined_metrics/samples/scope_and_assignment/prefers_val_over_var/good/ReportGenerator.kt create mode 100644 priv/combined_metrics/samples/scope_and_assignment/reassigned_multiple_times/bad/transformer.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/reassigned_multiple_times/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/reassigned_multiple_times/good/transformer.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/scope_is_minimal/bad/handler.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/scope_is_minimal/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/scope_is_minimal/good/handler.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/shadowed_by_inner_scope/bad/nested.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/shadowed_by_inner_scope/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/shadowed_by_inner_scope/good/nested.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/used_only_once/bad/builder.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/used_only_once/config.yml create mode 100644 priv/combined_metrics/samples/scope_and_assignment/used_only_once/good/builder.ex create mode 100644 priv/combined_metrics/samples/scope_and_assignment/uses_const_by_default/bad/invoice_calculator.js create mode 100644 priv/combined_metrics/samples/scope_and_assignment/uses_const_by_default/good/invoice_calculator.js create mode 100644 priv/combined_metrics/samples/testing/reasonable_test_to_code_ratio/bad/sparse_test.ex create mode 100644 priv/combined_metrics/samples/testing/reasonable_test_to_code_ratio/config.yml create mode 100644 priv/combined_metrics/samples/testing/reasonable_test_to_code_ratio/good/thorough_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_has_assertion/bad/empty_assertions_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_has_assertion/config.yml create mode 100644 priv/combined_metrics/samples/testing/test_has_assertion/good/proper_assertions_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_name_describes_behavior/bad/vague_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_name_describes_behavior/config.yml create mode 100644 priv/combined_metrics/samples/testing/test_name_describes_behavior/good/descriptive_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_single_concept/bad/kitchen_sink_test.ex create mode 100644 priv/combined_metrics/samples/testing/test_single_concept/config.yml create mode 100644 priv/combined_metrics/samples/testing/test_single_concept/good/focused_test.ex create mode 100644 priv/combined_metrics/samples/type_and_value/boolean_assigned_from_comparison/bad/checker.ex create mode 100644 priv/combined_metrics/samples/type_and_value/boolean_assigned_from_comparison/config.yml create mode 100644 priv/combined_metrics/samples/type_and_value/boolean_assigned_from_comparison/good/checker.ex create mode 100644 priv/combined_metrics/samples/type_and_value/copy_not_assumed_to_be_deep/bad/OrderDraft.kt create mode 100644 priv/combined_metrics/samples/type_and_value/copy_not_assumed_to_be_deep/good/OrderDraft.kt create mode 100644 priv/combined_metrics/samples/type_and_value/copyability_is_explicitly_declared/bad/Matrix.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/copyability_is_explicitly_declared/good/Matrix.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/does_not_use_lossy_numeric_cast/bad/converter.rs create mode 100644 priv/combined_metrics/samples/type_and_value/does_not_use_lossy_numeric_cast/good/converter.rs create mode 100644 priv/combined_metrics/samples/type_and_value/fields_defined_in_constructor/bad/session_manager.js create mode 100644 priv/combined_metrics/samples/type_and_value/fields_defined_in_constructor/good/session_manager.js create mode 100644 priv/combined_metrics/samples/type_and_value/hardcoded_url_or_path/bad/client.ex create mode 100644 priv/combined_metrics/samples/type_and_value/hardcoded_url_or_path/config.yml create mode 100644 priv/combined_metrics/samples/type_and_value/hardcoded_url_or_path/good/client.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_any_type_annotation/bad/order_handler.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_any_type_annotation/good/order_handler.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_boolean_obsession/bad/notifications.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_boolean_obsession/good/notifications.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_c_style_casts/bad/Buffer.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/no_c_style_casts/good/Buffer.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/no_comparison_to_true_false/bad/feature_flags.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_comparison_to_true_false/good/feature_flags.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_double_type_assertion/bad/user_service.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_double_type_assertion/good/user_service.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_duplicate_enum_values/bad/notification_handler.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_duplicate_enum_values/good/notification_handler.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_dynamic_atom_creation_from_external_input/bad/analytics.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_dynamic_atom_creation_from_external_input/good/analytics.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_empty_string_initial/bad/builder.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_empty_string_initial/config.yml create mode 100644 priv/combined_metrics/samples/type_and_value/no_empty_string_initial/good/builder.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_explicit_any/bad/user_service.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_explicit_any/good/user_service.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_implicit_null_initial/bad/loader.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_implicit_null_initial/config.yml create mode 100644 priv/combined_metrics/samples/type_and_value/no_implicit_null_initial/good/loader.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_magic_value_assigned/bad/roles.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_magic_value_assigned/config.yml create mode 100644 priv/combined_metrics/samples/type_and_value/no_magic_value_assigned/good/roles.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_mutable_exported_variables/bad/feature_flags.js create mode 100644 priv/combined_metrics/samples/type_and_value/no_mutable_exported_variables/good/feature_flags.js create mode 100644 priv/combined_metrics/samples/type_and_value/no_non_null_assertion/bad/notification_handler.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_non_null_assertion/good/notification_handler.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_non_null_assertion_operator/bad/UserProfile.kt create mode 100644 priv/combined_metrics/samples/type_and_value/no_non_null_assertion_operator/good/UserProfile.kt create mode 100644 priv/combined_metrics/samples/type_and_value/no_primitive_obsession/bad/payments.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_primitive_obsession/good/payments.ex create mode 100644 priv/combined_metrics/samples/type_and_value/no_rtti/bad/Connection.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/no_rtti/good/Connection.cpp create mode 100644 priv/combined_metrics/samples/type_and_value/no_unparameterized_generic/bad/product_service.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_unparameterized_generic/good/product_service.py create mode 100644 priv/combined_metrics/samples/type_and_value/no_unsafe_assignment/bad/product_repository.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_unsafe_assignment/good/product_repository.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_unsafe_return/bad/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_unsafe_return/good/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/type_and_value/no_var_keyword_for_properties/bad/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/type_and_value/no_var_keyword_for_properties/good/PaymentGateway.php create mode 100644 priv/combined_metrics/samples/type_and_value/nullable_boolean_compared_explicitly/bad/NotificationHandler.kt create mode 100644 priv/combined_metrics/samples/type_and_value/nullable_boolean_compared_explicitly/good/NotificationHandler.kt create mode 100644 priv/combined_metrics/samples/type_and_value/readonly_for_never_reassigned_fields/bad/product_repository.ts create mode 100644 priv/combined_metrics/samples/type_and_value/readonly_for_never_reassigned_fields/good/product_repository.ts create mode 100644 priv/combined_metrics/samples/type_and_value/sealed_class_when_is_exhaustive/bad/PaymentStatus.kt create mode 100644 priv/combined_metrics/samples/type_and_value/sealed_class_when_is_exhaustive/good/PaymentStatus.kt create mode 100644 priv/combined_metrics/samples/type_and_value/struct_under_32_fields/bad/orders.ex create mode 100644 priv/combined_metrics/samples/type_and_value/struct_under_32_fields/good/orders.ex create mode 100644 priv/combined_metrics/samples/type_and_value/switch_covers_all_union_members/bad/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/type_and_value/switch_covers_all_union_members/good/payment_gateway.ts create mode 100644 priv/combined_metrics/samples/type_and_value/uses_data_class_over_pair_triple/bad/SearchResult.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_data_class_over_pair_triple/good/SearchResult.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_elvis_for_nullable_default/bad/PaymentService.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_elvis_for_nullable_default/good/PaymentService.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_identity_operators/bad/cart_service.js create mode 100644 priv/combined_metrics/samples/type_and_value/uses_identity_operators/good/cart_service.js create mode 100644 priv/combined_metrics/samples/type_and_value/uses_immutable_collection_interfaces/bad/ShoppingCart.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_immutable_collection_interfaces/good/ShoppingCart.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_isinstance_not_type_comparison/bad/event_dispatcher.py create mode 100644 priv/combined_metrics/samples/type_and_value/uses_isinstance_not_type_comparison/good/event_dispatcher.py create mode 100644 priv/combined_metrics/samples/type_and_value/uses_optional_for_nullable_param/bad/user_repository.py create mode 100644 priv/combined_metrics/samples/type_and_value/uses_optional_for_nullable_param/good/user_repository.py create mode 100644 priv/combined_metrics/samples/type_and_value/uses_return_type_declaration/bad/OrderService.php create mode 100644 priv/combined_metrics/samples/type_and_value/uses_return_type_declaration/good/OrderService.php create mode 100644 priv/combined_metrics/samples/type_and_value/uses_safe_call_over_null_check/bad/OrderManager.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_safe_call_over_null_check/good/OrderManager.kt create mode 100644 priv/combined_metrics/samples/type_and_value/uses_short_type_keywords/bad/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/type_and_value/uses_short_type_keywords/good/ProductCatalog.php create mode 100644 priv/combined_metrics/samples/type_and_value/uses_type_declarations_on_parameters/bad/UserRepository.php create mode 100644 priv/combined_metrics/samples/type_and_value/uses_type_declarations_on_parameters/good/UserRepository.php create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/bad/users.ex create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/bad/users.js create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/bad/users.rb create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/good/users.ex create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/good/users.js create mode 100644 priv/combined_metrics/samples/variable_naming/boolean_has_is_has_prefix/good/users.rb create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/bad/catalog.ex create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/bad/catalog.js create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/bad/catalog.rb create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/good/catalog.ex create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/good/catalog.js create mode 100644 priv/combined_metrics/samples/variable_naming/collection_name_is_plural/good/catalog.rb create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/bad/matrix.ex create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/bad/matrix.js create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/bad/matrix.rb create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/good/matrix.ex create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/good/matrix.js create mode 100644 priv/combined_metrics/samples/variable_naming/loop_var_is_single_letter/good/matrix.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/bad/checkout.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/bad/checkout.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/bad/checkout.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/good/checkout.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/good/checkout.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_and/good/checkout.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/bad/report.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/bad/report.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/bad/report.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/good/report.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/good/report.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_contains_type_suffix/good/report.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/bad/api_client.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/bad/api_client.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/bad/api_client.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/good/api_client.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/good/api_client.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_abbreviation/good/api_client.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/bad/orders.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/bad/orders.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/bad/orders.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/good/orders.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/good/orders.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_generic/good/orders.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/bad/pipeline.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/bad/pipeline.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/bad/pipeline.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/good/pipeline.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/good/pipeline.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_number_like/good/pipeline.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/bad/calculator.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/bad/calculator.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/bad/calculator.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/good/calculator.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/good/calculator.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_single_letter/good/calculator.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/bad/session.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/bad/session.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/bad/session.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/good/session.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/good/session.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_long/good/session.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/bad/contact.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/bad/contact.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/bad/contact.rb create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/good/contact.ex create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/good/contact.js create mode 100644 priv/combined_metrics/samples/variable_naming/name_is_too_short/good/contact.rb create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/bad/validator.ex create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/bad/validator.js create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/bad/validator.rb create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/good/validator.ex create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/good/validator.js create mode 100644 priv/combined_metrics/samples/variable_naming/negated_boolean_name/good/validator.rb create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/bad/inventory.ex create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/bad/inventory.js create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/bad/inventory.rb create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/good/inventory.ex create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/good/inventory.js create mode 100644 priv/combined_metrics/samples/variable_naming/no_hungarian_notation/good/inventory.rb create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/bad/config.ex create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/bad/config.js create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/bad/config.rb create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/config.yml create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/good/config.ex create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/good/config.js create mode 100644 priv/combined_metrics/samples/variable_naming/screaming_snake_for_constants/good/config.rb create mode 100644 priv/combined_metrics/scope_and_assignment.yml create mode 100644 priv/combined_metrics/testing.yml create mode 100644 priv/combined_metrics/type_and_value.yml create mode 100644 priv/combined_metrics/variable_naming.yml create mode 100644 test/codeqa/combined_metrics/file_scorer_test.exs create mode 100644 test/codeqa/combined_metrics/sample_runner_test.exs diff --git a/lib/codeqa/combined_metrics/category.ex b/lib/codeqa/combined_metrics/category.ex new file mode 100644 index 0000000..2467969 --- /dev/null +++ b/lib/codeqa/combined_metrics/category.ex @@ -0,0 +1,38 @@ +defmodule CodeQA.CombinedMetrics.Category do + @moduledoc """ + Macro helper for defining combined-metric category modules. + + Each category module (e.g. `VariableNaming`, `Documentation`) calls + `use CodeQA.CombinedMetrics.Category, yaml_path: "priv/..."`. + + This injects: + - `@callback score(metrics :: map()) :: float()` — making the caller a behaviour + - `compute_score/2` — delegates to `Scorer` with the baked-in yaml path + + ## Example + + defmodule CodeQA.CombinedMetrics.VariableNaming do + use CodeQA.CombinedMetrics.Category, + yaml_path: "priv/combined_metrics/variable_naming.yml" + end + + Leaf modules then declare `@behaviour CodeQA.CombinedMetrics.VariableNaming` + and call `VariableNaming.compute_score("key", metrics)`. + """ + + defmacro __using__(yaml_path: yaml_path) do + quote do + @callback score(metrics :: map()) :: float() + + @doc """ + Computes the score for `metric_name` using scalars from this category's YAML file. + + Delegates to `CodeQA.CombinedMetrics.Scorer.compute_score/3`. + """ + @spec compute_score(String.t(), map()) :: float() + def compute_score(metric_name, metrics) do + CodeQA.CombinedMetrics.Scorer.compute_score(unquote(yaml_path), metric_name, metrics) + end + end + end +end diff --git a/lib/codeqa/combined_metrics/code_smells.ex b/lib/codeqa/combined_metrics/code_smells.ex new file mode 100644 index 0000000..5b5678a --- /dev/null +++ b/lib/codeqa/combined_metrics/code_smells.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.CodeSmells do + @moduledoc """ + Behaviour and submodule registry for code smell detection metrics. + + Scalar weights are defined in `priv/combined_metrics/code_smells.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/code_smells.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.CodeSmells, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.CodeSmells + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.CodeSmells.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/consistency.ex b/lib/codeqa/combined_metrics/consistency.ex new file mode 100644 index 0000000..6a1474e --- /dev/null +++ b/lib/codeqa/combined_metrics/consistency.ex @@ -0,0 +1,29 @@ +defmodule CodeQA.CombinedMetrics.Consistency do + @moduledoc """ + Behaviour and submodule registry for codebase consistency metrics. + + Covers naming style uniformity, structural patterns, and cross-file coherence. + Scalar weights are defined in `priv/combined_metrics/consistency.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/consistency.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.Consistency, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.Consistency + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.Consistency.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/dependencies.ex b/lib/codeqa/combined_metrics/dependencies.ex new file mode 100644 index 0000000..0cde033 --- /dev/null +++ b/lib/codeqa/combined_metrics/dependencies.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.Dependencies do + @moduledoc """ + Behaviour and submodule registry for dependency and coupling quality metrics. + + Scalar weights are defined in `priv/combined_metrics/dependencies.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/dependencies.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.Dependencies, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.Dependencies + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.Dependencies.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/documentation.ex b/lib/codeqa/combined_metrics/documentation.ex new file mode 100644 index 0000000..31abd0e --- /dev/null +++ b/lib/codeqa/combined_metrics/documentation.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.Documentation do + @moduledoc """ + Behaviour and submodule registry for documentation quality metrics. + + Scalar weights are defined in `priv/combined_metrics/documentation.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/documentation.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.Documentation, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.Documentation + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.Documentation.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/error_handling.ex b/lib/codeqa/combined_metrics/error_handling.ex new file mode 100644 index 0000000..62e2032 --- /dev/null +++ b/lib/codeqa/combined_metrics/error_handling.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.ErrorHandling do + @moduledoc """ + Behaviour and submodule registry for error handling quality metrics. + + Scalar weights are defined in `priv/combined_metrics/error_handling.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/error_handling.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.ErrorHandling, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.ErrorHandling + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.ErrorHandling.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex new file mode 100644 index 0000000..368d163 --- /dev/null +++ b/lib/codeqa/combined_metrics/file_scorer.ex @@ -0,0 +1,97 @@ +defmodule CodeQA.CombinedMetrics.FileScorer do + @moduledoc """ + Scores individual files against combined metric behaviors. + + Converts per-file raw metric maps to aggregate-compatible format and + identifies which behaviors each file most likely exhibits. + """ + + alias CodeQA.CombinedMetrics.SampleRunner + + @doc """ + Converts a single file's raw metric map to aggregate format. + + Wraps each key in each group with the `mean_` prefix so the resulting + map is compatible with `SampleRunner.diagnose_aggregate/2`. + + ## Example + + iex> CodeQA.CombinedMetrics.FileScorer.file_to_aggregate(%{"halstead" => %{"tokens" => 42.0}}) + %{"halstead" => %{"mean_tokens" => 42.0}} + """ + @spec file_to_aggregate(map()) :: map() + def file_to_aggregate(metrics) do + Map.new(metrics, fn {group, keys} -> + prefixed_keys = Map.new(keys, fn {key, value} -> {"mean_" <> key, value} end) + {group, prefixed_keys} + end) + end + + @doc """ + Identifies the worst files per combined metric behavior. + + For each file in `files_map`, converts its metrics to aggregate format and + runs `SampleRunner.diagnose_aggregate/2`. The results are collected per + behavior and sorted ascending by cosine similarity (most negative = worst first), + then truncated to `combined_top` entries. + + ## Options + + * `:combined_top` - number of worst files to keep per behavior (default: 2) + + ## Result shape + + %{ + "function_design.no_boolean_parameter" => [ + %{file: "lib/foo/bar.ex", cosine: -0.71}, + %{file: "lib/foo/baz.ex", cosine: -0.44} + ], + ... + } + """ + @spec worst_files_per_behavior(map(), keyword()) :: + %{ + String.t() => [ + %{file: String.t(), cosine: float(), top_metrics: list(), top_nodes: list()} + ] + } + def worst_files_per_behavior(files_map, opts \\ []) do + # TODO(option-c): cosine similarity is computed at file level; a line-level mapping would require computing a separate cosine score for each AST node by projecting that node's metric vector against the behavior's feature-weight vector. This is not currently possible because serialized nodes do not carry their own metric values. + top_n = Keyword.get(opts, :combined_top, 2) + + files_map + |> Enum.reject(fn {_path, file_data} -> + file_data |> Map.get("metrics", %{}) |> map_size() == 0 + end) + |> Enum.reduce(%{}, fn {path, file_data}, acc -> + top_nodes = CodeQA.HealthReport.Grader.top_3_nodes(Map.get(file_data, "nodes")) + + file_data + |> Map.get("metrics", %{}) + |> file_to_aggregate() + |> SampleRunner.diagnose_aggregate(top: 99_999) + |> Enum.reduce(acc, fn %{ + category: category, + behavior: behavior, + cosine: cosine, + top_metrics: top_metrics + }, + inner_acc -> + key = "#{category}.#{behavior}" + entry = %{file: path, cosine: cosine, top_metrics: top_metrics, top_nodes: top_nodes} + Map.update(inner_acc, key, [entry], &[entry | &1]) + end) + end) + |> Map.new(fn {key, entries} -> + threshold = CodeQA.Config.cosine_significance_threshold() + + sorted = + entries + |> Enum.filter(fn e -> e.cosine <= -threshold end) + |> Enum.sort_by(& &1.cosine) + |> Enum.take(top_n) + + {key, sorted} + end) + end +end diff --git a/lib/codeqa/combined_metrics/file_structure.ex b/lib/codeqa/combined_metrics/file_structure.ex new file mode 100644 index 0000000..164d3c9 --- /dev/null +++ b/lib/codeqa/combined_metrics/file_structure.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.FileStructure do + @moduledoc """ + Behaviour and submodule registry for file structure quality metrics. + + Scalar weights are defined in `priv/combined_metrics/file_structure.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/file_structure.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.FileStructure, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.FileStructure + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.FileStructure.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/function_design.ex b/lib/codeqa/combined_metrics/function_design.ex new file mode 100644 index 0000000..3e2e5e9 --- /dev/null +++ b/lib/codeqa/combined_metrics/function_design.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.FunctionDesign do + @moduledoc """ + Behaviour and submodule registry for function design quality metrics. + + Scalar weights are defined in `priv/combined_metrics/function_design.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/function_design.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.FunctionDesign, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.FunctionDesign + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.FunctionDesign.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/naming_conventions.ex b/lib/codeqa/combined_metrics/naming_conventions.ex new file mode 100644 index 0000000..f463b85 --- /dev/null +++ b/lib/codeqa/combined_metrics/naming_conventions.ex @@ -0,0 +1,30 @@ +defmodule CodeQA.CombinedMetrics.NamingConventions do + @moduledoc """ + Behaviour and submodule registry for broader naming convention metrics. + + Covers class, file, and function naming patterns not captured by + `VariableNaming`. Scalar weights are defined in + `priv/combined_metrics/naming_conventions.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/naming_conventions.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.NamingConventions, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.NamingConventions + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.NamingConventions.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex new file mode 100644 index 0000000..4bd078c --- /dev/null +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -0,0 +1,499 @@ +defmodule CodeQA.CombinedMetrics.SampleRunner do + @moduledoc """ + Discovers sample directories, analyzes them, and scores each behavior formula. + + Returns structured results suitable for rendering a separation table, enabling + manual scalar tuning of combined metric formulas. + """ + + alias CodeQA.CombinedMetrics.Scorer + + @samples_root "priv/combined_metrics/samples" + @yaml_dir "priv/combined_metrics" + @deadzone_low 0.995 + @deadzone_high 1.005 + + @doc """ + Runs all behaviors found in sample directories, optionally filtered by category. + + ## Options + + * `:category` - restrict to one category (e.g. `"variable_naming"`) + * `:verbose` - when `true`, populates `:metric_detail` in each result + + ## Result shape + + %{ + category: "variable_naming", + behavior: "name_is_generic", + bad_score: 0.074, + good_score: 0.550, + ratio: 7.43, + direction_ok: true, + metric_detail: [...] # empty unless verbose: true + } + """ + @spec run(keyword()) :: [map()] + def run(opts \\ []) do + filter_category = opts[:category] + + @samples_root + |> File.ls!() + |> Enum.flat_map(fn category -> + Path.join([@samples_root, category]) + |> File.ls!() + |> Enum.map(&{category, &1}) + end) + |> Enum.filter(fn {category, behavior} -> + (filter_category == nil or category == filter_category) and + has_both_dirs?(category, behavior) + end) + |> Enum.map(fn {category, behavior} -> + score_behavior(category, behavior, opts) + end) + end + + defp has_both_dirs?(category, behavior) do + File.dir?(sample_path(category, behavior, "bad")) and + File.dir?(sample_path(category, behavior, "good")) + end + + defp score_behavior(category, behavior, opts) do + yaml_path = "priv/combined_metrics/#{category}.yml" + bad_agg = analyze(sample_path(category, behavior, "bad")) + good_agg = analyze(sample_path(category, behavior, "good")) + + bad_score = Scorer.compute_score(yaml_path, behavior, bad_agg) + good_score = Scorer.compute_score(yaml_path, behavior, good_agg) + ratio = if bad_score > 0, do: good_score / bad_score, else: 0.0 + + base = %{ + category: category, + behavior: behavior, + bad_score: bad_score, + good_score: good_score, + ratio: Float.round(ratio, 2), + direction_ok: good_score >= bad_score + } + + if opts[:verbose] do + Map.put(base, :metric_detail, metric_detail(yaml_path, behavior, bad_agg, good_agg)) + else + Map.put(base, :metric_detail, []) + end + end + + defp analyze(dir) do + dir + |> CodeQA.Engine.Collector.collect_files() + |> CodeQA.Engine.Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) + end + + defp metric_detail(yaml_path, behavior, bad_agg, good_agg) do + Scorer.scalars_for(yaml_path, behavior) + |> Enum.map(fn {{group, key}, scalar} -> + bad_val = Scorer.get(bad_agg, group, key) + good_val = Scorer.get(good_agg, group, key) + ratio = if bad_val > 0, do: Float.round(good_val / bad_val, 2), else: 0.0 + + %{ + group: group, + key: key, + scalar: scalar, + bad: bad_val, + good: good_val, + ratio: ratio + } + end) + |> Enum.sort_by(&abs(&1.ratio - 1.0), :desc) + end + + @doc """ + Builds a per-behavior metric correlation report for scalar tuning. + + For each behavior with sample data, computes all `mean_*` metric values for + both good and bad samples, then suggests normalized scalars in [-2, 2] using + the log-linear method: + + log_diff = log(good_val) - log(bad_val) + suggested_scalar = 2.0 * log_diff / max(|all log_diffs| for this behavior) + + The strongest signal for each behavior maps to ±2.0; all others scale + proportionally. This lets you paste the suggested scalars into the YAML as a + starting point and refine from there. + + ## Result shape (keyed by "category.behavior") + + %{ + "variable_naming.name_is_generic" => %{ + "identifier_length_variance.mean_variance" => %{ + bad: 5.131, good: 25.109, + log_bad: 1.635, log_good: 3.224, + ratio: 4.895, + suggested_scalar: 2.0 + }, + ... + } + } + """ + @spec build_metric_report(keyword()) :: map() + def build_metric_report(opts \\ []) do + filter_category = opts[:category] + + @samples_root + |> File.ls!() + |> Enum.flat_map(fn category -> + Path.join([@samples_root, category]) + |> File.ls!() + |> Enum.map(&{category, &1}) + end) + |> Enum.filter(fn {category, behavior} -> + (filter_category == nil or category == filter_category) and + has_both_dirs?(category, behavior) + end) + |> Map.new(fn {category, behavior} -> + {"#{category}.#{behavior}", behavior_metric_table(category, behavior)} + end) + end + + defp behavior_metric_table(category, behavior) do + bad_agg = analyze(sample_path(category, behavior, "bad")) + good_agg = analyze(sample_path(category, behavior, "good")) + + entries = + Scorer.default_scalars() + |> Map.keys() + |> Enum.map(fn {group, key} -> + bad_val = Scorer.get(bad_agg, group, key) + good_val = Scorer.get(good_agg, group, key) + log_bad = :math.log(bad_val) + log_good = :math.log(good_val) + ratio = good_val / bad_val + log_diff = log_good - log_bad + {"#{group}.#{key}", bad_val, good_val, log_bad, log_good, ratio, log_diff} + end) + + max_abs_log_diff = + entries + |> Enum.map(fn {_, _, _, _, _, _, ld} -> abs(ld) end) + |> Enum.max(fn -> 1.0 end) + |> max(1.0e-10) + + Map.new(entries, fn {metric_key, bad_val, good_val, log_bad, log_good, ratio, log_diff} -> + suggested_scalar = Float.round(2.0 * log_diff / max_abs_log_diff, 4) + + {metric_key, + %{ + bad: r4(bad_val), + good: r4(good_val), + log_bad: r4(log_bad), + log_good: r4(log_good), + ratio: r4(ratio), + suggested_scalar: suggested_scalar + }} + end) + end + + @doc """ + Scores all combined metric behaviors against the given codebase aggregate map. + + Reads all YAML config files from `priv/combined_metrics/` and returns one entry + per YAML category, each containing the scores for all behaviors within it. + Behaviors are sorted ascending by score so the lowest-scoring (worst) appear first. + + ## Result shape + + [ + %{ + category: "variable_naming", + name: "Variable Naming", + behaviors: [ + %{behavior: "name_is_generic", score: 3.45}, + ... + ] + }, + ... + ] + """ + @spec score_aggregate(map()) :: [map()] + def score_aggregate(aggregate) do + Scorer.all_yamls() + |> Enum.sort_by(fn {path, _} -> path end) + |> Enum.map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + behaviors = + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {behavior, behavior_data} -> + log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) + calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline + %{behavior: behavior, score: Float.round(calibrated, 4)} + end) + |> Enum.sort_by(& &1.score) + + %{category: category, name: humanize(category), behaviors: behaviors} + end) + end + + defp humanize(slug) do + slug + |> String.split("_") + |> Enum.map_join(" ", &String.capitalize/1) + end + + @doc """ + Identifies the most likely code quality issues in an aggregate by cosine similarity. + + For each behavior, computes the cosine similarity between its scalar weight vector + `s` and the file's log-metric vector `v`: + + cos_sim = (s · v) / (|s| × |v|) + + A negative cosine means the file's metric profile anti-aligns with what good code + looks like for that behavior — i.e. the file likely exhibits that anti-pattern. + + Results are sorted by cosine similarity ascending (most negative = most likely + issue). Behaviors with no non-zero scalars (no sample data) are excluded. + + ## Options + + * `:top` - number of results to return (default 15) + + ## Result shape + + %{ + category: "function_design", + behavior: "no_boolean_parameter", + cosine: -0.83, + score: -13.54, + top_metrics: [%{metric: "branching.mean_branching_density", contribution: -4.1}, ...] + } + """ + @spec diagnose_aggregate(map(), keyword()) :: [map()] + def diagnose_aggregate(aggregate, opts \\ []) do + top_n = Keyword.get(opts, :top, 15) + + Scorer.all_yamls() + |> Enum.sort_by(fn {path, _} -> path end) + |> Enum.flat_map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.flat_map(fn {behavior, behavior_data} -> + scalars = Scorer.scalars_for(yaml_path, behavior) + + if map_size(scalars) == 0 do + [] + else + log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + + {dot, norm_s_sq, norm_v_sq, contributions} = + Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, + {d, ns, nv, contribs} -> + log_m = :math.log(Scorer.get(aggregate, group, key)) + contrib = scalar * log_m + + {d + contrib, ns + scalar * scalar, nv + log_m * log_m, + [{:"#{group}.#{key}", contrib} | contribs]} + end) + + cos_sim = + if norm_s_sq > 0 and norm_v_sq > 0, + do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), + else: 0.0 + + raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) + calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline + + top_metrics = + contributions + |> Enum.sort_by(fn {_, c} -> c end) + |> Enum.take(5) + |> Enum.map(fn {metric, contribution} -> + %{metric: to_string(metric), contribution: Float.round(contribution, 4)} + end) + + [ + %{ + category: category, + behavior: behavior, + cosine: Float.round(cos_sim, 4), + score: Float.round(calibrated, 4), + top_metrics: top_metrics + } + ] + end + end) + end) + |> Enum.sort_by(& &1.cosine) + |> Enum.take(top_n) + end + + @doc """ + Applies suggested scalars from sample analysis back to the YAML config files. + + For each behavior that has sample data, rewrites its scalar entries using the + log-linear suggestion method. Metrics whose ratio falls in the deadzone + (#{@deadzone_low} ≤ ratio ≤ #{@deadzone_high}) are excluded. All non-deadzoned + metrics are written, even if they were not previously present in the YAML. + + Behaviors without sample data are left unchanged. + + Returns a list of per-category stats maps. + """ + @spec apply_scalars(keyword()) :: [map()] + def apply_scalars(opts \\ []) do + report = build_metric_report(opts) + filter_category = opts[:category] + + @yaml_dir + |> File.ls!() + |> Enum.filter(&String.ends_with?(&1, ".yml")) + |> Enum.filter(fn yml_file -> + filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category + end) + |> Enum.sort() + |> Enum.map(fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, existing} = YamlElixir.read_from_file(yaml_path) + + {updated_yaml, stats} = apply_to_category(existing, category, report) + File.write!(yaml_path, format_yaml(updated_yaml)) + + Map.put(stats, :category, category) + end) + end + + defp apply_to_category(existing, category, report) do + existing + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.reduce({%{}, %{updated: 0, deadzoned: 0, skipped: 0}}, fn + {behavior, current_groups}, {acc_yaml, stats} -> + report_key = "#{category}.#{behavior}" + doc = read_behavior_doc(category, behavior) + + case Map.get(report, report_key) do + nil -> + groups = maybe_put_doc(current_groups, doc) + {Map.put(acc_yaml, behavior, groups), Map.update!(stats, :skipped, &(&1 + 1))} + + metrics -> + {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics) + # Fall back to current groups if everything was deadzoned + groups = + if(map_size(new_groups) > 0, do: new_groups, else: current_groups) + |> Map.put("_log_baseline", Float.round(log_baseline, 6)) + |> maybe_put_doc(doc) + + {Map.put(acc_yaml, behavior, groups), + %{ + stats + | updated: stats.updated + n_updated, + deadzoned: stats.deadzoned + n_deadzoned + }} + end + end) + end + + defp read_behavior_doc(category, behavior) do + config_path = Path.join([@samples_root, category, behavior, "config.yml"]) + + case File.read(config_path) do + {:ok, content} -> + case YamlElixir.read_from_string(content) do + {:ok, %{"doc" => doc}} when is_binary(doc) -> doc + _ -> nil + end + + _ -> + nil + end + end + + defp maybe_put_doc(groups, nil), do: groups + defp maybe_put_doc(groups, doc), do: Map.put(groups, "_doc", doc) + + defp groups_from_report(metrics) do + Enum.reduce(metrics, {%{}, 0.0, 0, 0}, fn {metric_key, data}, + {groups, log_baseline, n_updated, n_deadzoned} -> + [group, key] = String.split(metric_key, ".", parts: 2) + + if deadzone?(data.ratio) do + {groups, log_baseline, n_updated, n_deadzoned + 1} + else + new_groups = + Map.update( + groups, + group, + %{key => data.suggested_scalar}, + &Map.put(&1, key, data.suggested_scalar) + ) + + # Baseline: expected log score at the geometric mean of good/bad sample values + geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10)) + new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean) + {new_groups, new_baseline, n_updated + 1, n_deadzoned} + end + end) + end + + defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high + + defp format_yaml(data) do + lines = + data + |> Enum.sort_by(fn {behavior, _} -> behavior end) + |> Enum.flat_map(fn {behavior, groups} -> + doc_line = + case Map.get(groups, "_doc") do + nil -> [] + doc -> [" _doc: #{inspect(doc)}"] + end + + baseline_line = + case Map.get(groups, "_log_baseline") do + nil -> [] + val -> [" _log_baseline: #{fmt_scalar(val)}"] + end + + fix_hint_line = + case Map.get(groups, "_fix_hint") do + nil -> [] + hint -> [" _fix_hint: #{inspect(hint)}"] + end + + group_lines = + groups + |> Enum.filter(fn {k, v} -> + k not in ["_doc", "_log_baseline", "_fix_hint"] and is_map(v) + end) + |> Enum.sort_by(fn {group, _} -> group end) + |> Enum.flat_map(fn {group, keys} -> + key_lines = + keys + |> Enum.sort_by(fn {key, _} -> key end) + |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) + + [" #{group}:" | key_lines] + end) + + ["#{behavior}:" | doc_line] ++ fix_hint_line ++ baseline_line ++ group_lines ++ [""] + end) + + Enum.join(lines, "\n") <> "\n" + end + + defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) + defp fmt_scalar(n) when is_integer(n), do: "#{n}.0" + + defp r4(f), do: Float.round(f / 1.0, 4) + + defp sample_path(category, behavior, kind) do + Path.join([@samples_root, category, behavior, kind]) + end +end diff --git a/lib/codeqa/combined_metrics/scope_and_assignment.ex b/lib/codeqa/combined_metrics/scope_and_assignment.ex new file mode 100644 index 0000000..ddb1d11 --- /dev/null +++ b/lib/codeqa/combined_metrics/scope_and_assignment.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.ScopeAndAssignment do + @moduledoc """ + Behaviour and submodule registry for variable scope and assignment quality metrics. + + Scalar weights are defined in `priv/combined_metrics/scope_and_assignment.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/scope_and_assignment.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.ScopeAndAssignment, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.ScopeAndAssignment + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.ScopeAndAssignment.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/scorer.ex b/lib/codeqa/combined_metrics/scorer.ex new file mode 100644 index 0000000..29b6d2f --- /dev/null +++ b/lib/codeqa/combined_metrics/scorer.ex @@ -0,0 +1,83 @@ +defmodule CodeQA.CombinedMetrics.Scorer do + @moduledoc """ + Pure computation engine for combined metric formulas. + + Loads scalar weights from a YAML file and computes a score as a product of + metric powers: + + score = metric_a ^ s_a * metric_b ^ s_b * ... + + Scalars of 0.0 contribute nothing (x^0 = 1.0) and are the default for all + metric keys not listed in the YAML. Negative scalars penalise a metric + (higher raw value → lower score). + """ + + @doc """ + Computes the score for `metric_name` using scalars from `yaml_path`. + + `metrics` is the `codebase.aggregate` map returned by `codeqa analyze`. + """ + @spec compute_score(String.t(), String.t(), map()) :: float() + def compute_score(yaml_path, metric_name, metrics) do + default_scalars() + |> Map.merge(scalars_for(yaml_path, metric_name)) + |> Enum.reduce(1.0, fn {{group, key}, scalar}, acc -> + acc * pow(get(metrics, group, key), scalar) + end) + end + + @doc "Returns the non-zero scalar overrides for `metric_name` from `yaml_path`." + @spec scalars_for(String.t(), String.t()) :: %{{String.t(), String.t()} => float()} + def scalars_for(yaml_path, metric_name) do + yaml_path + |> yaml_data() + |> Map.get(metric_name, %{}) + |> Enum.flat_map(fn + {group, keys} when is_map(keys) -> + Enum.map(keys, fn {key, scalar} -> {{group, key}, scalar / 1.0} end) + + _ -> + [] + end) + |> Map.new() + end + + @doc "Returns the full default scalar map: all registered file metric keys mapped to 0.0." + @spec default_scalars() :: %{{String.t(), String.t()} => float()} + def default_scalars do + CodeQA.Engine.Analyzer.build_registry().file_metrics + |> Enum.flat_map(fn mod -> + Enum.map(mod.keys(), fn key -> {{mod.name(), "mean_" <> key}, 0.0} end) + end) + |> Map.new() + end + + @doc "Safely fetches a nested metric value, returning 1.0 if missing or non-positive." + @spec get(map(), String.t(), String.t()) :: float() + def get(metrics, group, key) do + case get_in(metrics, [group, key]) do + val when is_number(val) and val > 0 -> val / 1.0 + _ -> 1.0 + end + end + + @doc "Computes `base ^ scalar`, returning 1.0 for non-positive bases." + @spec pow(float(), float()) :: float() + def pow(base, scalar) when base > 0, do: :math.pow(base, scalar) + def pow(_base, _scalar), do: 1.0 + + @yaml_dir "priv/combined_metrics" + @yaml_paths Path.wildcard(Path.join(@yaml_dir, "*.yml")) + for path <- @yaml_paths, do: @external_resource(path) + + @compiled_yamls Map.new(@yaml_paths, fn path -> + {:ok, data} = YamlElixir.read_from_file(path) + {path, data} + end) + + @doc "Returns all compiled YAML data as `%{path => parsed_map}`." + @spec all_yamls() :: %{String.t() => map()} + def all_yamls, do: @compiled_yamls + + defp yaml_data(yaml_path), do: Map.get(@compiled_yamls, yaml_path, %{}) +end diff --git a/lib/codeqa/combined_metrics/testing.ex b/lib/codeqa/combined_metrics/testing.ex new file mode 100644 index 0000000..8876453 --- /dev/null +++ b/lib/codeqa/combined_metrics/testing.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.Testing do + @moduledoc """ + Behaviour and submodule registry for test quality metrics. + + Scalar weights are defined in `priv/combined_metrics/testing.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/testing.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.Testing, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.Testing + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.Testing.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/type_and_value.ex b/lib/codeqa/combined_metrics/type_and_value.ex new file mode 100644 index 0000000..5f76a8a --- /dev/null +++ b/lib/codeqa/combined_metrics/type_and_value.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.TypeAndValue do + @moduledoc """ + Behaviour and submodule registry for type safety and value assignment quality metrics. + + Scalar weights are defined in `priv/combined_metrics/type_and_value.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/type_and_value.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.TypeAndValue, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.TypeAndValue + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.TypeAndValue.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/codeqa/combined_metrics/variable_naming.ex b/lib/codeqa/combined_metrics/variable_naming.ex new file mode 100644 index 0000000..c846414 --- /dev/null +++ b/lib/codeqa/combined_metrics/variable_naming.ex @@ -0,0 +1,28 @@ +defmodule CodeQA.CombinedMetrics.VariableNaming do + @moduledoc """ + Behaviour and submodule registry for variable naming quality metrics. + + Scalar weights are defined in `priv/combined_metrics/variable_naming.yml`. + See `CodeQA.CombinedMetrics.Category` for the scoring model. + """ + + @yaml_path "priv/combined_metrics/variable_naming.yml" + + use CodeQA.CombinedMetrics.Category, yaml_path: @yaml_path + + @behaviors @yaml_path + |> YamlElixir.read_from_file!() + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.map(fn {key, groups} -> {key, Map.get(groups, "_doc")} end) + + for {key, doc} <- @behaviors do + defmodule Module.concat(CodeQA.CombinedMetrics.VariableNaming, Macro.camelize(key)) do + @moduledoc doc + @behaviour CodeQA.CombinedMetrics.VariableNaming + @score_key key + @impl true + def score(metrics), + do: CodeQA.CombinedMetrics.VariableNaming.compute_score(@score_key, metrics) + end + end +end diff --git a/lib/mix/tasks/codeqa/sample_report.ex b/lib/mix/tasks/codeqa/sample_report.ex new file mode 100644 index 0000000..0246777 --- /dev/null +++ b/lib/mix/tasks/codeqa/sample_report.ex @@ -0,0 +1,192 @@ +defmodule Mix.Tasks.Codeqa.SampleReport do + use Mix.Task + + @shortdoc "Evaluates combined metric formulas against good/bad sample code" + + @moduledoc """ + Runs combined metric formulas against sample files and prints a separation table. + + mix codeqa.sample_report + mix codeqa.sample_report --category variable_naming + mix codeqa.sample_report --verbose + mix codeqa.sample_report --output results.json + mix codeqa.sample_report --apply-scalars + mix codeqa.sample_report --file path/to/file.ex + + A ratio ≥ 2x means the formula meaningfully separates good from bad code. + A ratio < 1.5x is flagged as weak; < 1.0x is marked ✗ (wrong direction). + + `--apply-scalars` rewrites the YAML config files with suggested scalars derived + from the sample data. Metrics with ratio in the deadzone (0.995–1.005) are + excluded. All non-deadzoned metrics are written, including ones not previously + in the YAML. + + `--file` analyzes a single file or directory and prints all combined metric + behavior scores, grouped by category, sorted worst-first. + """ + + @switches [ + category: :string, + verbose: :boolean, + output: :string, + report: :string, + apply_scalars: :boolean, + file: :string, + top: :integer + ] + + def run(args) do + Mix.Task.run("app.start") + {opts, _, _} = OptionParser.parse(args, switches: @switches) + + results = CodeQA.CombinedMetrics.SampleRunner.run(opts) + + results + |> Enum.group_by(& &1.category) + |> Enum.each(&print_category(&1, opts)) + + if path = opts[:output] do + File.write!(path, Jason.encode!(results, pretty: true)) + IO.puts("\nResults written to #{path}") + end + + if path = opts[:report] do + report = CodeQA.CombinedMetrics.SampleRunner.build_metric_report(opts) + File.write!(path, Jason.encode!(report, pretty: true)) + IO.puts("\nMetric report written to #{path}") + end + + if opts[:apply_scalars] do + stats = CodeQA.CombinedMetrics.SampleRunner.apply_scalars(opts) + IO.puts("\nApplied scalars to YAML configs:") + Enum.each(stats, &print_scalar_stats/1) + end + + if path = opts[:file] do + print_file_scores(path, opts) + end + end + + defp print_category({category, results}, opts) do + IO.puts("\n#{category}") + IO.puts(String.duplicate("-", 75)) + + IO.puts( + " " <> + pad("behavior", 35) <> + pad("bad", 9) <> + pad("good", 9) <> + pad("ratio", 13) <> + "ok?" + ) + + Enum.each(results, &print_row(&1, opts)) + end + + defp print_row(r, opts) do + ratio_str = + "#{r.ratio}x" <> + cond do + not r.direction_ok -> "" + r.ratio < 1.5 -> " (weak)" + true -> "" + end + + ok = if r.direction_ok, do: "✓", else: "✗" + + IO.puts( + " " <> + pad(r.behavior, 35) <> + pad(fmt(r.bad_score), 9) <> + pad(fmt(r.good_score), 9) <> + pad(ratio_str, 13) <> + ok + ) + + if opts[:verbose] do + Enum.each(r.metric_detail, fn m -> + scalar_str = if m.scalar >= 0, do: "+#{m.scalar}", else: "#{m.scalar}" + + IO.puts( + " " <> + pad("#{m.group}.#{m.key}", 45) <> + pad(scalar_str, 7) <> + pad(fmt(m.bad), 8) <> + pad(fmt(m.good), 8) <> + "#{m.ratio}x" + ) + end) + end + end + + defp print_file_scores(path, opts) do + expanded = Path.expand(path) + + files = + cond do + File.dir?(expanded) -> + CodeQA.Engine.Collector.collect_files(expanded) + + File.regular?(expanded) -> + %{Path.basename(expanded) => File.read!(expanded)} + + true -> + IO.puts("\nPath not found: #{path}") + nil + end + + if files && map_size(files) > 0 do + IO.puts("\nAnalyzing #{map_size(files)} file(s) at: #{path}") + + aggregate = + files + |> CodeQA.Engine.Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) + + top_n = opts[:top] || 15 + issues = CodeQA.CombinedMetrics.SampleRunner.diagnose_aggregate(aggregate, top: top_n) + IO.puts("\nTop #{top_n} likely issues (by cosine similarity):") + IO.puts(String.duplicate("-", 75)) + IO.puts(" " <> pad("behavior", 38) <> pad("cosine", 9) <> "score") + Enum.each(issues, &print_issue_row/1) + + IO.puts("\nFull breakdown by category:") + combined = CodeQA.CombinedMetrics.SampleRunner.score_aggregate(aggregate) + IO.puts("") + Enum.each(combined, &print_combined_category/1) + else + IO.puts("\nNo supported files found at: #{path}") + end + end + + defp print_issue_row(%{category: cat, behavior: b, cosine: cos, score: s, top_metrics: metrics}) do + IO.puts(" " <> pad("#{cat}.#{b}", 38) <> pad(fmt(cos), 9) <> fmt(s)) + + Enum.each(metrics, fn %{metric: m, contribution: c} -> + IO.puts(" " <> pad(m, 44) <> fmt(c)) + end) + end + + defp print_combined_category(%{name: name, behaviors: behaviors}) do + IO.puts(name) + IO.puts(String.duplicate("-", 60)) + + IO.puts(" " <> pad("behavior", 40) <> "score") + + behaviors + |> Enum.sort_by(& &1.score) + |> Enum.each(fn %{behavior: b, score: s} -> + flag = if s < 0.0, do: " ⚠", else: "" + IO.puts(" " <> pad(b, 40) <> fmt(s) <> flag) + end) + + IO.puts("") + end + + defp print_scalar_stats(%{category: cat, updated: u, deadzoned: d, skipped: s}) do + IO.puts(" #{pad(cat, 30)} #{u} written #{d} deadzoned #{s} skipped (no samples)") + end + + defp fmt(f), do: :erlang.float_to_binary(f / 1, decimals: 4) + defp pad(s, n), do: String.pad_trailing(to_string(s), n) +end diff --git a/lib/mix/tasks/codeqa/signal_debug.ex b/lib/mix/tasks/codeqa/signal_debug.ex new file mode 100644 index 0000000..3852dec --- /dev/null +++ b/lib/mix/tasks/codeqa/signal_debug.ex @@ -0,0 +1,183 @@ +defmodule Mix.Tasks.Codeqa.SignalDebug do + use Mix.Task + + @shortdoc "Shows structural signal emissions when splitting a file into blocks" + + @moduledoc """ + Runs each structural signal over a file and prints its emissions step by step. + + mix codeqa.signal_debug path/to/file.ex + mix codeqa.signal_debug path/to/file.py --signal keyword + mix codeqa.signal_debug path/to/file.ex --show-tokens + + Options: + --signal Only show a specific signal (e.g. keyword, blank, bracket) + --show-tokens Print the full token list before signal output + """ + + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.Language + + alias CodeQA.AST.Signals.Structural.{ + AccessModifierSignal, + BlankLineSignal, + BracketSignal, + BranchSplitSignal, + ColonIndentSignal, + CommentDividerSignal, + KeywordSignal, + SQLBlockSignal, + TripleQuoteSignal + } + + @switches [signal: :string, show_tokens: :boolean] + + @all_signals [ + %TripleQuoteSignal{}, + %BlankLineSignal{}, + %KeywordSignal{}, + %BranchSplitSignal{}, + %AccessModifierSignal{}, + %CommentDividerSignal{}, + %SQLBlockSignal{}, + %BracketSignal{}, + %ColonIndentSignal{} + ] + + @impl Mix.Task + def run(args) do + {opts, positional, _} = OptionParser.parse(args, strict: @switches) + + path = + case positional do + [p | _] -> p + [] -> Mix.raise("Usage: mix codeqa.signal_debug [--signal ] [--show-tokens]") + end + + unless File.exists?(path), do: Mix.raise("File not found: #{path}") + + content = File.read!(path) + lang_mod = Language.detect(path) + tokens = TokenNormalizer.normalize_structural(content) + lines = String.split(content, "\n") + + Mix.shell().info("File: #{path}") + Mix.shell().info("Language: #{lang_mod.name()}") + Mix.shell().info("Tokens: #{length(tokens)}") + Mix.shell().info("Lines: #{length(lines)}") + Mix.shell().info("") + + if opts[:show_tokens] do + print_tokens(tokens) + end + + signals = filter_signals(@all_signals, opts[:signal]) + + emissions_per_signal = + SignalStream.run(tokens, signals, lang_mod) + + Enum.zip(signals, emissions_per_signal) + |> Enum.each(fn {signal, emissions} -> + print_signal_section(signal, emissions, tokens, lines) + end) + end + + defp filter_signals(signals, nil), do: signals + + defp filter_signals(signals, name_filter) do + Enum.filter(signals, fn signal -> + module_name = + signal.__struct__ + |> Module.split() + |> List.last() + |> String.downcase() + + String.contains?(module_name, String.downcase(name_filter)) + end) + end + + defp print_tokens(tokens) do + Mix.shell().info("=== TOKEN LIST ===") + + tokens + |> Enum.with_index() + |> Enum.each(fn {token, idx} -> + Mix.shell().info( + " [#{idx}] line #{token.line} col #{token.col} #{inspect(token.kind)} #{inspect(token.content)}" + ) + end) + + Mix.shell().info("") + end + + defp print_signal_section(signal, emissions, tokens, lines) do + name = signal.__struct__ |> Module.split() |> List.last() + separator = String.duplicate("─", 60) + + Mix.shell().info(separator) + Mix.shell().info("SIGNAL: #{name}") + Mix.shell().info("Emissions: #{length(emissions)}") + Mix.shell().info("") + + if Enum.empty?(emissions) do + Mix.shell().info(" (no emissions)") + else + Enum.each(emissions, fn {_source, group, emission_name, value} -> + print_emission(group, emission_name, value, tokens, lines) + end) + end + + Mix.shell().info("") + end + + defp print_emission(:split, name, token_idx, tokens, lines) do + token = Enum.at(tokens, token_idx) + + line_num = token && token.line + line_src = line_num && Enum.at(lines, line_num - 1) + + Mix.shell().info(" [SPLIT :#{name}] token[#{token_idx}] → line #{line_num}") + + if line_src do + Mix.shell().info(" #{String.trim_trailing(line_src)}") + end + + if token do + Mix.shell().info(" ^ #{inspect(token.kind)} #{inspect(token.content)}") + end + + Mix.shell().info("") + end + + defp print_emission(:enclosure, name, {start_idx, end_idx}, tokens, lines) do + start_token = Enum.at(tokens, start_idx) + end_token = Enum.at(tokens, end_idx) + + start_line = start_token && start_token.line + end_line = end_token && end_token.line + + Mix.shell().info( + " [ENCLOSURE :#{name}] tokens[#{start_idx}..#{end_idx}] lines #{start_line}–#{end_line}" + ) + + if start_line do + Mix.shell().info( + " open: #{inspect(Enum.at(lines, start_line - 1) |> String.trim_trailing())}" + ) + end + + if end_line && end_line != start_line do + Mix.shell().info( + " close: #{inspect(Enum.at(lines, end_line - 1) |> String.trim_trailing())}" + ) + end + + Mix.shell().info("") + end + + defp print_emission(group, name, value, _tokens, _lines) do + Mix.shell().info(" [:#{group} :#{name}] #{inspect(value)}") + Mix.shell().info("") + end +end diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml new file mode 100644 index 0000000..d8fe1b8 --- /dev/null +++ b/priv/combined_metrics/code_smells.yml @@ -0,0 +1,540 @@ +consistent_string_quote_style: + _doc: "Files should use a single, consistent string quoting style throughout." + _fix_hint: "Use a single quote style (e.g., double quotes) consistently throughout the file" + _log_baseline: -18.9505 + branching: + mean_branching_density: 0.0243 + mean_non_blank_count: -0.0248 + brevity: + mean_sample_size: -0.0656 + casing_entropy: + mean_entropy: -0.0493 + mean_pascal_case_count: -0.1743 + mean_snake_case_count: -0.0451 + compression: + mean_raw_bytes: -0.0672 + mean_redundancy: 0.0207 + mean_unique_line_ratio: -0.0338 + mean_zlib_bytes: -0.1085 + mean_zlib_ratio: 0.0413 + entropy: + mean_char_max_entropy: -0.0077 + mean_token_entropy: -0.0178 + mean_token_max_entropy: -0.0141 + mean_total_tokens: -0.0783 + mean_vocab_size: -0.0656 + function_metrics: + mean_avg_function_lines: -0.0226 + mean_max_function_lines: -0.0644 + halstead: + mean_N1_total_operators: -0.1087 + mean_N2_total_operands: -0.2297 + mean_difficulty: -0.1017 + mean_effort: -0.2949 + mean_estimated_bugs: -0.1934 + mean_length: -0.1541 + mean_n1_unique_operators: -0.0790 + mean_n2_unique_operands: -0.2071 + mean_time_to_implement_seconds: -0.2949 + mean_vocabulary: -0.1721 + mean_volume: -0.1933 + heaps: + mean_k: -0.0368 + mean_r_squared: -0.0080 + identifier_length_variance: + mean_mean: 0.0058 + mean_std_dev: 0.0158 + mean_variance: 0.0316 + indentation: + mean_blank_line_ratio: 0.0205 + mean_variance: 0.0246 + line_patterns: + mean_blank_line_ratio: 0.0205 + mean_string_literal_ratio: -0.1616 + mean_unique_line_ratio: -0.0365 + magic_number_density: + mean_string_literal_ratio: 0.3018 + near_duplicate_blocks_file: + mean_sub_block_count: -0.1804 + ngram: + mean_bigram_hapax_fraction: -0.0101 + mean_bigram_repeated_unique: -0.0915 + mean_bigram_repetition_rate: 0.0277 + mean_bigram_total: -0.0785 + mean_bigram_unique: -0.1146 + mean_trigram_repeated_unique: -0.1104 + mean_trigram_repetition_rate: 0.0499 + mean_trigram_total: -0.0787 + mean_trigram_unique: -0.1182 + punctuation_density: + mean_arrow_density: 0.0674 + mean_bracket_nonalpha_prefix_count: 1.0103 + mean_bracket_nonalpha_suffix_count: 2.0000 + mean_colon_suffix_density: 0.0458 + mean_dot_count: -0.1743 + mean_exclamation_density: 0.0424 + mean_id_nonalpha_suffix_density: 0.0783 + readability: + mean_avg_line_length: -0.0444 + mean_avg_tokens_per_line: -0.0535 + mean_flesch_adapted: 0.0046 + mean_fog_adapted: -0.0301 + mean_total_lines: -0.0248 + symbol_density: + mean_density: -0.0325 + mean_distinct_symbol_types: -0.0966 + mean_symbol_count: -0.0999 + vocabulary: + mean_mattr: -0.0304 + mean_raw_ttr: -0.0079 + mean_total_identifiers: -0.0589 + mean_unique_identifiers: -0.0668 + vowel_density: + mean_total_chars: -0.0531 + zipf: + mean_total_tokens: -0.0783 + mean_vocab_size: -0.0656 + +no_dead_code_after_return: + _doc: "There should be no unreachable statements after a return or early exit." + _fix_hint: "Remove unreachable statements after return/raise/exit — they can never execute" + _log_baseline: -57.2281 + branching: + mean_branch_count: -2.0000 + mean_branching_density: -1.4201 + mean_non_blank_count: -0.5815 + brevity: + mean_sample_size: -0.2610 + casing_entropy: + mean_entropy: -0.2355 + mean_other_count: -0.8708 + mean_pascal_case_count: -0.5752 + mean_snake_case_count: -0.3869 + compression: + mean_raw_bytes: -0.4531 + mean_redundancy: -0.0467 + mean_zlib_bytes: -0.3558 + mean_zlib_ratio: -0.0974 + entropy: + mean_char_entropy: 0.0250 + mean_char_max_entropy: -0.0205 + mean_char_normalized: 0.0455 + mean_token_entropy: -0.0475 + mean_token_max_entropy: -0.0575 + mean_token_normalized: 0.0099 + mean_total_tokens: -0.3093 + mean_vocab_size: -0.2610 + function_metrics: + mean_avg_function_lines: -0.4255 + mean_avg_param_count: 0.1143 + mean_function_count: -0.1143 + mean_max_function_lines: -0.5062 + halstead: + mean_N1_total_operators: -0.2185 + mean_N2_total_operands: -0.4051 + mean_difficulty: -0.1769 + mean_effort: -0.5126 + mean_estimated_bugs: -0.3357 + mean_length: -0.2795 + mean_n1_unique_operators: -0.0857 + mean_n2_unique_operands: -0.3139 + mean_time_to_implement_seconds: -0.5126 + mean_vocabulary: -0.2525 + mean_volume: -0.3357 + heaps: + mean_k: -0.1169 + identifier_length_variance: + mean_max: -0.4367 + mean_mean: 0.0078 + mean_std_dev: -0.2478 + mean_variance: -0.4957 + indentation: + mean_blank_line_ratio: 0.2883 + mean_mean_depth: -0.4448 + mean_variance: -0.6173 + line_patterns: + mean_blank_line_ratio: 0.2883 + mean_string_literal_ratio: -0.8289 + mean_unique_line_ratio: -0.0289 + magic_number_density: + mean_density: 0.2821 + mean_string_literal_ratio: -0.8289 + near_duplicate_blocks_file: + mean_sub_block_count: -0.3612 + ngram: + mean_bigram_hapax_fraction: 0.0142 + mean_bigram_repeated_unique: -0.3335 + mean_bigram_repetition_rate: -0.0114 + mean_bigram_total: -0.3100 + mean_bigram_unique: -0.3022 + mean_trigram_hapax_fraction: -0.0576 + mean_trigram_repeated_unique: -0.0894 + mean_trigram_repetition_rate: 0.0890 + mean_trigram_total: -0.3107 + mean_trigram_unique: -0.3313 + punctuation_density: + mean_arrow_density: -1.1156 + mean_bracket_nonalpha_prefix_count: 1.0397 + mean_bracket_nonalpha_suffix_count: -0.4541 + mean_colon_suffix_density: 0.3588 + mean_dot_count: -1.0081 + mean_id_nonalpha_suffix_density: 0.0111 + readability: + mean_avg_line_length: 0.1309 + mean_avg_sub_words_per_id: -0.0415 + mean_avg_tokens_per_line: 0.2722 + mean_flesch_adapted: 0.0243 + mean_fog_adapted: -0.3299 + mean_total_lines: -0.5815 + symbol_density: + mean_density: 0.2141 + mean_symbol_count: -0.2386 + vocabulary: + mean_mattr: -0.0870 + mean_raw_ttr: 0.0851 + mean_total_identifiers: -0.4256 + mean_unique_identifiers: -0.3406 + vowel_density: + mean_total_chars: -0.4179 + zipf: + mean_exponent: -0.0067 + mean_total_tokens: -0.3093 + mean_vocab_size: -0.2610 + +no_debug_print_statements: + _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." + _fix_hint: "Remove IO.inspect/IO.puts/console.log debug output — use Logger or remove entirely" + _log_baseline: -86.5160 + branching: + mean_branch_count: -0.3540 + mean_branching_density: 0.0181 + mean_max_nesting_depth: -0.4025 + mean_non_blank_count: -0.3719 + brevity: + mean_sample_size: -0.2128 + casing_entropy: + mean_entropy: 0.0285 + mean_other_count: 0.1169 + mean_pascal_case_count: -1.0141 + mean_snake_case_count: -0.5844 + compression: + mean_raw_bytes: -0.4490 + mean_redundancy: -0.0518 + mean_unique_line_ratio: -0.0864 + mean_zlib_bytes: -0.3414 + mean_zlib_ratio: -0.1076 + entropy: + mean_char_entropy: -0.0058 + mean_char_normalized: -0.0059 + mean_token_entropy: -0.0215 + mean_token_max_entropy: -0.0460 + mean_token_normalized: 0.0245 + mean_total_tokens: -0.5168 + mean_vocab_size: -0.2128 + function_metrics: + mean_avg_function_lines: -0.4292 + mean_max_function_lines: -0.4243 + halstead: + mean_N1_total_operators: -0.3780 + mean_N2_total_operands: -0.4000 + mean_difficulty: -0.2494 + mean_effort: -0.6911 + mean_estimated_bugs: -0.4418 + mean_length: -0.3857 + mean_n1_unique_operators: -0.1473 + mean_n2_unique_operands: -0.2979 + mean_time_to_implement_seconds: -0.6911 + mean_vocabulary: -0.2559 + mean_volume: -0.4417 + heaps: + mean_beta: 0.0307 + mean_k: 0.0324 + mean_r_squared: -0.0077 + identifier_length_variance: + mean_mean: 0.1266 + mean_std_dev: 0.0871 + mean_variance: 0.1743 + indentation: + mean_blank_line_ratio: -0.0499 + mean_max_depth: -0.2215 + mean_mean_depth: -0.1903 + mean_variance: -0.2218 + line_patterns: + mean_blank_line_ratio: -0.0499 + mean_max_nesting_depth: -0.4025 + mean_string_literal_ratio: -1.0798 + mean_unique_line_ratio: -0.0602 + magic_number_density: + mean_density: 0.5227 + mean_string_literal_ratio: -1.0798 + near_duplicate_blocks_file: + mean_sub_block_count: -0.4114 + ngram: + mean_bigram_hapax_fraction: -0.0183 + mean_bigram_repeated_unique: -0.3069 + mean_bigram_repetition_rate: -0.0758 + mean_bigram_total: -0.5178 + mean_bigram_unique: -0.3466 + mean_trigram_hapax_fraction: -0.0225 + mean_trigram_repeated_unique: -0.3221 + mean_trigram_repetition_rate: -0.0929 + mean_trigram_total: -0.5188 + mean_trigram_unique: -0.4061 + punctuation_density: + mean_arrow_density: 0.4321 + mean_bracket_nonalpha_prefix_count: -2.0000 + mean_bracket_nonalpha_suffix_count: -1.2715 + mean_colon_suffix_density: -0.1696 + mean_dot_count: -0.9095 + mean_id_nonalpha_suffix_density: -0.0078 + readability: + mean_avg_line_length: -0.0795 + mean_avg_sub_words_per_id: 0.0617 + mean_avg_tokens_per_line: -0.1449 + mean_flesch_adapted: -0.0473 + mean_fog_adapted: -0.0298 + mean_total_lines: -0.3719 + symbol_density: + mean_density: -0.0878 + mean_distinct_symbol_types: -0.1242 + mean_symbol_count: -0.5369 + vocabulary: + mean_mattr: 0.2274 + mean_raw_ttr: 0.3523 + mean_total_identifiers: -0.5867 + mean_unique_identifiers: -0.2346 + vowel_density: + mean_total_chars: -0.4601 + zipf: + mean_exponent: -0.1196 + mean_r_squared: 0.0127 + mean_total_tokens: -0.5168 + mean_vocab_size: -0.2128 + +no_fixme_comments: + _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." + _fix_hint: "Resolve FIXME/XXX/HACK comments before merging — they indicate known unresolved issues" + _log_baseline: -2.1522 + branching: + mean_branch_count: 0.1755 + mean_branching_density: 0.1504 + mean_non_blank_count: 0.0252 + brevity: + mean_sample_size: -0.0887 + casing_entropy: + mean_entropy: -0.0225 + mean_pascal_case_count: -0.0769 + mean_snake_case_count: -0.0130 + comment_structure: + mean_comment_line_count: -0.6667 + mean_comment_line_ratio: 0.5464 + mean_todo_fixme_count: -0.7195 + compression: + mean_raw_bytes: 0.0176 + mean_redundancy: 0.0275 + mean_unique_line_ratio: -0.0049 + mean_zlib_bytes: -0.0211 + mean_zlib_ratio: 0.0387 + entropy: + mean_char_entropy: 0.0020 + mean_char_max_entropy: -0.0054 + mean_char_normalized: 0.0074 + mean_token_entropy: -0.0266 + mean_token_max_entropy: -0.0180 + mean_token_normalized: -0.0086 + mean_total_tokens: 0.0600 + mean_vocab_size: -0.0887 + function_metrics: + mean_avg_function_lines: 0.0209 + mean_avg_param_count: 0.0155 + mean_function_count: 0.0232 + mean_max_function_lines: 0.1330 + halstead: + mean_N1_total_operators: 0.1156 + mean_N2_total_operands: 0.0046 + mean_difficulty: 0.0964 + mean_effort: 0.1556 + mean_estimated_bugs: 0.0592 + mean_length: 0.0745 + mean_n1_unique_operators: -0.0082 + mean_n2_unique_operands: -0.1000 + mean_time_to_implement_seconds: 0.1556 + mean_vocabulary: -0.0750 + mean_volume: 0.0591 + heaps: + mean_beta: -0.0665 + mean_k: 0.1089 + identifier_length_variance: + mean_mean: 0.0027 + mean_std_dev: 0.0259 + mean_variance: 0.0517 + indentation: + mean_blank_line_ratio: 0.0697 + mean_max_depth: 0.0646 + mean_mean_depth: 0.0482 + mean_variance: 0.1889 + line_patterns: + mean_blank_line_ratio: 0.0697 + mean_string_literal_ratio: -0.0060 + mean_unique_line_ratio: -0.0041 + magic_number_density: + mean_density: -0.0646 + mean_string_literal_ratio: 0.0564 + near_duplicate_blocks_file: + mean_sub_block_count: 0.1090 + ngram: + mean_bigram_hapax_fraction: -0.0378 + mean_bigram_repeated_unique: 0.0858 + mean_bigram_repetition_rate: 0.0747 + mean_bigram_total: 0.0601 + mean_bigram_unique: -0.0026 + mean_trigram_hapax_fraction: -0.0202 + mean_trigram_repeated_unique: 0.1423 + mean_trigram_repetition_rate: 0.1085 + mean_trigram_total: 0.0602 + mean_trigram_unique: 0.0286 + punctuation_density: + mean_arrow_density: 0.0123 + mean_bracket_nonalpha_prefix_count: 0.1755 + mean_bracket_nonalpha_suffix_count: 0.2840 + mean_colon_suffix_density: -0.1540 + mean_dot_count: 0.1361 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: 0.0200 + mean_question_mark_density: 0.2981 + readability: + mean_avg_line_length: 0.0494 + mean_avg_sub_words_per_id: 0.0089 + mean_avg_tokens_per_line: -0.0127 + mean_flesch_adapted: -0.0070 + mean_fog_adapted: -0.0103 + mean_total_lines: 0.0728 + symbol_density: + mean_density: 0.1041 + mean_symbol_count: 0.1218 + vocabulary: + mean_mattr: -0.1086 + mean_raw_ttr: -0.1007 + mean_total_identifiers: -0.0217 + mean_unique_identifiers: -0.1224 + vowel_density: + mean_total_chars: -0.0190 + zipf: + mean_exponent: 0.0584 + mean_r_squared: 0.0015 + mean_total_tokens: 0.0600 + mean_vocab_size: -0.0887 + +no_nested_ternary: + _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." + _fix_hint: "Replace nested ternary/conditional expressions with if-else blocks or pattern matching" + _log_baseline: 7.2208 + branching: + mean_branch_count: -0.5662 + mean_branching_density: -0.3441 + mean_max_nesting_depth: 0.1824 + mean_non_blank_count: -0.2221 + brevity: + mean_sample_size: 0.0486 + casing_entropy: + mean_entropy: 0.2495 + mean_other_count: 0.7455 + mean_pascal_case_count: 0.1237 + mean_snake_case_count: 0.0885 + compression: + mean_raw_bytes: -0.0141 + mean_redundancy: -0.0117 + mean_unique_line_ratio: 0.1154 + mean_zlib_bytes: 0.0170 + mean_zlib_ratio: -0.0312 + entropy: + mean_char_entropy: 0.0689 + mean_char_max_entropy: 0.0024 + mean_char_normalized: 0.0665 + mean_token_entropy: -0.0014 + mean_token_max_entropy: 0.0110 + mean_token_normalized: -0.0124 + mean_total_tokens: 0.1324 + mean_vocab_size: 0.0486 + function_metrics: + mean_avg_function_lines: -0.7403 + mean_avg_param_count: -0.0277 + mean_function_count: 0.5579 + mean_max_function_lines: -0.4954 + halstead: + mean_N1_total_operators: 0.1382 + mean_N2_total_operands: 0.1252 + mean_difficulty: 0.0773 + mean_effort: 0.2218 + mean_estimated_bugs: 0.1445 + mean_length: 0.1335 + mean_n1_unique_operators: 0.0128 + mean_n2_unique_operands: 0.0608 + mean_time_to_implement_seconds: 0.2218 + mean_vocabulary: 0.0480 + mean_volume: 0.1445 + heaps: + mean_beta: -0.0334 + mean_k: 0.0563 + identifier_length_variance: + mean_max: 0.0170 + mean_std_dev: -0.0032 + mean_variance: -0.0065 + indentation: + mean_blank_line_ratio: 0.3825 + mean_max_depth: -0.2891 + mean_mean_depth: -0.2922 + mean_variance: -0.5254 + line_patterns: + mean_blank_line_ratio: 0.3825 + mean_max_nesting_depth: 0.1824 + mean_string_literal_ratio: 0.0146 + mean_unique_line_ratio: 0.1591 + magic_number_density: + mean_density: -0.1634 + mean_magic_number_count: -0.0310 + mean_string_literal_ratio: 0.0146 + near_duplicate_blocks_file: + mean_sub_block_count: 0.5472 + ngram: + mean_bigram_hapax_fraction: -0.0464 + mean_bigram_repeated_unique: 0.1405 + mean_bigram_repetition_rate: 0.0564 + mean_bigram_total: 0.1327 + mean_bigram_unique: 0.0600 + mean_trigram_hapax_fraction: -0.0321 + mean_trigram_repeated_unique: 0.1699 + mean_trigram_repetition_rate: 0.1003 + mean_trigram_total: 0.1331 + mean_trigram_unique: 0.0704 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 0.5781 + mean_bracket_nonalpha_suffix_count: 0.7295 + mean_colon_suffix_density: -0.6851 + mean_dot_count: -0.1824 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: 0.2589 + readability: + mean_avg_line_length: 0.2148 + mean_avg_sub_words_per_id: 0.0173 + mean_avg_tokens_per_line: 0.3545 + mean_flesch_adapted: -0.0367 + mean_fog_adapted: 0.3545 + mean_total_lines: -0.2221 + symbol_density: + mean_density: 0.2615 + mean_distinct_symbol_types: 0.0377 + mean_symbol_count: 0.2475 + vocabulary: + mean_mattr: -0.0453 + mean_raw_ttr: -0.0380 + mean_total_identifiers: 0.1302 + mean_unique_identifiers: 0.0921 + vowel_density: + mean_total_chars: 0.1293 + zipf: + mean_exponent: 0.0240 + mean_r_squared: 0.0111 + mean_total_tokens: 0.1324 + mean_vocab_size: 0.0486 + diff --git a/priv/combined_metrics/consistency.yml b/priv/combined_metrics/consistency.yml new file mode 100644 index 0000000..8dbd172 --- /dev/null +++ b/priv/combined_metrics/consistency.yml @@ -0,0 +1,310 @@ +consistent_casing_within_file: + _doc: "A file should use one naming convention throughout — no mixing of camelCase and snake_case for the same kind of identifier." + _fix_hint: "Pick one casing convention (snake_case or camelCase) and apply it uniformly" + _log_baseline: -2.4622 + brevity: + mean_sample_size: -0.0471 + casing_entropy: + mean_camel_case_count: -2.0000 + mean_entropy: -0.4219 + mean_snake_case_count: 0.2745 + compression: + mean_raw_bytes: 0.0213 + mean_redundancy: 0.0219 + mean_zlib_bytes: -0.0194 + mean_zlib_ratio: 0.0407 + entropy: + mean_char_entropy: -0.0126 + mean_char_max_entropy: -0.0170 + mean_char_normalized: 0.0044 + mean_token_entropy: -0.0090 + mean_token_max_entropy: -0.0101 + mean_vocab_size: -0.0471 + halstead: + mean_difficulty: 0.0629 + mean_effort: 0.0530 + mean_estimated_bugs: -0.0099 + mean_n2_unique_operands: -0.0629 + mean_time_to_implement_seconds: 0.0530 + mean_vocabulary: -0.0456 + mean_volume: -0.0099 + heaps: + mean_beta: -0.0232 + mean_k: 0.0253 + identifier_length_variance: + mean_mean: 0.0342 + mean_std_dev: 0.0123 + mean_variance: 0.0246 + ngram: + mean_bigram_hapax_fraction: -0.0071 + mean_bigram_repetition_rate: 0.0267 + mean_bigram_unique: -0.0197 + mean_trigram_hapax_fraction: -0.0122 + mean_trigram_repeated_unique: 0.0698 + mean_trigram_repetition_rate: 0.0874 + mean_trigram_unique: -0.0172 + readability: + mean_avg_line_length: 0.0221 + symbol_density: + mean_density: -0.0214 + vocabulary: + mean_mattr: -0.0679 + mean_raw_ttr: -0.0715 + mean_unique_identifiers: -0.0714 + vowel_density: + mean_total_chars: 0.0342 + zipf: + mean_exponent: 0.0265 + mean_vocab_size: -0.0471 + +consistent_error_return_shape: + _doc: "All functions in a module should return errors in the same shape — mixed `nil`, `false`, and `{:error, _}` returns are confusing." + _fix_hint: "Use a single error return format (e.g., {:error, reason}) consistently throughout" + _log_baseline: 31.6243 + branching: + mean_branch_count: -0.2178 + mean_branching_density: -0.1258 + mean_non_blank_count: -0.0921 + brevity: + mean_sample_size: 0.0231 + casing_entropy: + mean_entropy: -0.0372 + mean_snake_case_count: 0.0582 + compression: + mean_raw_bytes: -0.0234 + mean_redundancy: 0.0058 + mean_unique_line_ratio: 0.0576 + mean_zlib_bytes: -0.0378 + mean_zlib_ratio: 0.0144 + entropy: + mean_char_entropy: 0.0159 + mean_char_normalized: 0.0160 + mean_token_entropy: -0.0277 + mean_token_normalized: -0.0328 + mean_total_tokens: 0.0361 + mean_vocab_size: 0.0231 + function_metrics: + mean_avg_function_lines: -0.0914 + halstead: + mean_N1_total_operators: 0.1217 + mean_N2_total_operands: 0.0275 + mean_difficulty: 0.0936 + mean_effort: 0.1918 + mean_estimated_bugs: 0.0982 + mean_length: 0.0900 + mean_n1_unique_operators: 0.0835 + mean_n2_unique_operands: 0.0174 + mean_time_to_implement_seconds: 0.1918 + mean_vocabulary: 0.0368 + mean_volume: 0.0982 + heaps: + mean_beta: -0.0402 + mean_k: 0.0903 + identifier_length_variance: + mean_std_dev: 0.0324 + mean_variance: 0.0648 + indentation: + mean_blank_line_ratio: 0.0185 + mean_mean_depth: -0.0638 + mean_variance: -0.0831 + line_patterns: + mean_blank_line_ratio: 0.0185 + mean_string_literal_ratio: -2.0000 + mean_unique_line_ratio: 0.0559 + magic_number_density: + mean_density: -0.0557 + mean_string_literal_ratio: -2.0000 + near_duplicate_blocks_file: + mean_sub_block_count: 0.1553 + ngram: + mean_bigram_hapax_fraction: -0.0430 + mean_bigram_repeated_unique: 0.0136 + mean_bigram_repetition_rate: 0.0601 + mean_bigram_total: 0.0362 + mean_bigram_unique: -0.0667 + mean_trigram_hapax_fraction: -0.0176 + mean_trigram_repeated_unique: 0.0150 + mean_trigram_repetition_rate: 0.0920 + mean_trigram_total: 0.0363 + mean_trigram_unique: -0.0443 + punctuation_density: + mean_arrow_density: -0.3821 + mean_bracket_nonalpha_suffix_count: 0.0804 + mean_colon_suffix_density: -0.0384 + mean_id_nonalpha_suffix_density: 0.0720 + readability: + mean_avg_line_length: 0.0716 + mean_avg_sub_words_per_id: 0.0302 + mean_avg_tokens_per_line: 0.1283 + mean_flesch_adapted: -0.0449 + mean_fog_adapted: 0.0827 + mean_total_lines: -0.0921 + symbol_density: + mean_density: 0.1360 + mean_symbol_count: 0.1121 + vocabulary: + mean_mattr: 0.0268 + mean_raw_ttr: -0.0151 + mean_total_identifiers: 0.0519 + mean_unique_identifiers: 0.0368 + vowel_density: + mean_total_chars: 0.0479 + zipf: + mean_exponent: 0.0181 + mean_total_tokens: 0.0361 + mean_vocab_size: 0.0231 + +consistent_function_style: + _doc: "A module should not mix one-liner and multi-clause function definitions for the same concern." + _fix_hint: "Use a consistent function definition style — all def or all defp, one-liner or block form" + _log_baseline: -2.0498 + branching: + mean_branch_count: -0.1610 + mean_branching_density: -0.3349 + mean_max_nesting_depth: -0.1610 + mean_non_blank_count: 0.1738 + brevity: + mean_sample_size: 0.0028 + casing_entropy: + mean_entropy: -0.0562 + mean_other_count: -0.2753 + mean_pascal_case_count: -0.0379 + mean_snake_case_count: 0.0228 + compression: + mean_raw_bytes: 0.0313 + mean_redundancy: 0.0188 + mean_unique_line_ratio: -0.0440 + mean_zlib_bytes: 0.0037 + mean_zlib_ratio: 0.0276 + entropy: + mean_char_entropy: -0.0072 + mean_char_normalized: -0.0071 + mean_token_entropy: 0.0058 + mean_token_normalized: 0.0052 + mean_vocab_size: 0.0028 + function_metrics: + mean_avg_function_lines: 0.0608 + mean_avg_param_count: -0.0099 + mean_function_count: 0.0939 + mean_max_function_lines: -0.0797 + halstead: + mean_N2_total_operands: 0.0471 + mean_difficulty: 0.0186 + mean_effort: 0.0362 + mean_estimated_bugs: 0.0176 + mean_length: 0.0157 + mean_n1_unique_operators: -0.0122 + mean_n2_unique_operands: 0.0162 + mean_time_to_implement_seconds: 0.0362 + mean_vocabulary: 0.0091 + mean_volume: 0.0176 + heaps: + mean_beta: 0.0024 + identifier_length_variance: + mean_mean: 0.0040 + mean_std_dev: 0.0053 + mean_variance: 0.0106 + indentation: + mean_blank_line_ratio: -0.0991 + mean_max_depth: -0.1143 + mean_mean_depth: -0.0203 + mean_variance: -0.1730 + line_patterns: + mean_blank_line_ratio: -0.0991 + mean_max_nesting_depth: -0.1610 + mean_unique_line_ratio: -0.0456 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0594 + ngram: + mean_bigram_hapax_fraction: 0.0037 + mean_bigram_repeated_unique: -0.0041 + mean_bigram_repetition_rate: -0.0091 + mean_bigram_unique: 0.0065 + mean_trigram_repeated_unique: -0.0058 + punctuation_density: + mean_arrow_density: 2.0000 + mean_bracket_nonalpha_suffix_count: -0.0781 + mean_colon_suffix_density: -0.1318 + mean_dot_count: -0.0204 + mean_id_nonalpha_suffix_density: -0.0132 + readability: + mean_avg_line_length: -0.1471 + mean_avg_sub_words_per_id: 0.0030 + mean_avg_tokens_per_line: -0.1751 + mean_flesch_adapted: 0.0147 + mean_fog_adapted: -0.1412 + mean_total_lines: 0.1738 + symbol_density: + mean_density: -0.0473 + mean_symbol_count: -0.0159 + vocabulary: + mean_mattr: -0.0035 + mean_raw_ttr: -0.0090 + mean_total_identifiers: 0.0133 + mean_unique_identifiers: 0.0042 + vowel_density: + mean_total_chars: 0.0173 + zipf: + mean_exponent: -0.0054 + mean_vocab_size: 0.0028 + +same_concept_same_name: + _doc: "The same domain concept should use the same name throughout a file — mixing `user`, `usr`, and `u` for the same thing harms readability." + _fix_hint: "Use the same name for the same concept everywhere — avoid synonyms like user/account/member" + _log_baseline: -10.7039 + brevity: + mean_sample_size: -1.3837 + compression: + mean_raw_bytes: 0.1823 + mean_redundancy: 0.4046 + mean_unique_line_ratio: -0.3343 + mean_zlib_bytes: -0.8496 + mean_zlib_ratio: 1.0316 + entropy: + mean_char_entropy: -0.1859 + mean_char_normalized: -0.1851 + mean_token_entropy: -0.3646 + mean_token_max_entropy: -0.2981 + mean_vocab_size: -1.3837 + halstead: + mean_difficulty: 1.9192 + mean_effort: 1.6104 + mean_estimated_bugs: -0.3082 + mean_n2_unique_operands: -1.9192 + mean_time_to_implement_seconds: 1.6104 + mean_vocabulary: -1.4248 + mean_volume: -0.3088 + heaps: + mean_beta: -0.6035 + mean_k: 0.5246 + identifier_length_variance: + mean_mean: 0.3369 + mean_std_dev: -0.2793 + mean_variance: -0.5589 + line_patterns: + mean_unique_line_ratio: -0.7135 + ngram: + mean_bigram_hapax_fraction: -0.6648 + mean_bigram_repeated_unique: -0.2150 + mean_bigram_repetition_rate: 0.6714 + mean_bigram_unique: -1.2077 + mean_trigram_hapax_fraction: -0.6812 + mean_trigram_repeated_unique: 1.3250 + mean_trigram_repetition_rate: 1.6604 + mean_trigram_unique: -1.0154 + readability: + mean_avg_line_length: 0.1889 + mean_avg_sub_words_per_id: -0.1821 + mean_flesch_adapted: 0.2414 + symbol_density: + mean_density: -0.1858 + vocabulary: + mean_mattr: -1.7927 + mean_raw_ttr: -2.0000 + mean_unique_identifiers: -1.9992 + vowel_density: + mean_total_chars: 0.3371 + zipf: + mean_exponent: 0.7915 + mean_vocab_size: -1.3837 + diff --git a/priv/combined_metrics/dependencies.yml b/priv/combined_metrics/dependencies.yml new file mode 100644 index 0000000..000ecbf --- /dev/null +++ b/priv/combined_metrics/dependencies.yml @@ -0,0 +1,319 @@ +import_count_under_10: + _doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling." + _fix_hint: "Reduce imports — split large modules or use fully-qualified names for rarely-used deps" + _log_baseline: 7.0687 + branching: + mean_branch_count: 0.2110 + mean_branching_density: -1.0683 + mean_max_nesting_depth: 0.1234 + mean_non_blank_count: -0.0219 + brevity: + mean_sample_size: 0.0119 + casing_entropy: + mean_entropy: -0.0396 + mean_pascal_case_count: -0.1657 + mean_snake_case_count: 0.0025 + comment_structure: + mean_comment_line_ratio: -1.2802 + compression: + mean_raw_bytes: -0.0133 + mean_redundancy: -0.0135 + mean_unique_line_ratio: -0.0046 + mean_zlib_bytes: 0.0144 + mean_zlib_ratio: -0.0277 + entropy: + mean_char_entropy: 0.0035 + mean_char_max_entropy: 0.0088 + mean_char_normalized: -0.0053 + mean_token_entropy: -0.0040 + mean_token_max_entropy: 0.0026 + mean_token_normalized: -0.0066 + mean_total_tokens: -0.0251 + mean_vocab_size: 0.0119 + function_metrics: + mean_avg_function_lines: -0.0688 + mean_avg_param_count: -0.0555 + mean_function_count: 0.1234 + mean_max_function_lines: 0.0944 + mean_max_param_count: -0.1234 + halstead: + mean_N1_total_operators: -0.0138 + mean_N2_total_operands: -0.0464 + mean_difficulty: -0.0353 + mean_effort: -0.0606 + mean_estimated_bugs: -0.0253 + mean_length: -0.0260 + mean_n1_unique_operators: 0.0111 + mean_time_to_implement_seconds: -0.0606 + mean_vocabulary: 0.0032 + mean_volume: -0.0253 + heaps: + mean_beta: -0.0893 + mean_k: 0.3293 + mean_r_squared: 0.0101 + identifier_length_variance: + mean_max: 0.0679 + mean_mean: 0.0712 + mean_std_dev: 0.0609 + mean_variance: 0.1218 + indentation: + mean_blank_line_ratio: 0.1478 + mean_max_depth: -0.0876 + mean_mean_depth: -0.0397 + mean_variance: -0.2328 + line_patterns: + mean_blank_line_ratio: 0.1478 + mean_max_nesting_depth: 0.1234 + mean_string_literal_ratio: 0.0265 + mean_unique_line_ratio: -0.0050 + magic_number_density: + mean_density: 0.5219 + mean_magic_number_count: 0.4898 + mean_string_literal_ratio: 0.0265 + near_duplicate_blocks_file: + mean_sub_block_count: 0.1110 + ngram: + mean_bigram_repeated_unique: 0.0034 + mean_bigram_repetition_rate: -0.0129 + mean_bigram_total: -0.0252 + mean_bigram_unique: 0.0024 + mean_trigram_hapax_fraction: -0.0051 + mean_trigram_repeated_unique: 0.0257 + mean_trigram_repetition_rate: -0.0296 + mean_trigram_total: -0.0252 + mean_trigram_unique: 0.0062 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 0.5922 + mean_bracket_nonalpha_suffix_count: 0.1086 + mean_colon_suffix_density: -0.1389 + mean_dot_count: -0.1234 + mean_id_nonalpha_suffix_density: -0.0141 + mean_question_mark_density: -2.0000 + readability: + mean_avg_line_length: 0.0110 + mean_avg_sub_words_per_id: 0.0173 + mean_flesch_adapted: -0.0204 + mean_fog_adapted: 0.2028 + mean_total_lines: -0.0265 + symbol_density: + mean_density: 0.0223 + mean_distinct_symbol_types: 0.0643 + mean_symbol_count: 0.0087 + vocabulary: + mean_mattr: 0.0037 + mean_raw_ttr: 0.0453 + mean_total_identifiers: -0.0543 + mean_unique_identifiers: -0.0090 + vowel_density: + mean_total_chars: 0.0169 + zipf: + mean_exponent: -0.0152 + mean_r_squared: 0.0050 + mean_total_tokens: -0.0251 + mean_vocab_size: 0.0119 + +low_coupling: + _doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling." + _fix_hint: "Reduce dependencies between modules — introduce interfaces or narrow the public API" + _log_baseline: -38.4249 + branching: + mean_branch_count: 0.0745 + mean_branching_density: 0.2097 + mean_max_nesting_depth: -0.1353 + mean_non_blank_count: -0.1353 + brevity: + mean_sample_size: -0.1276 + casing_entropy: + mean_entropy: -0.0947 + mean_pascal_case_count: -0.3722 + mean_snake_case_count: -0.1208 + compression: + mean_raw_bytes: -0.1657 + mean_redundancy: 0.0126 + mean_unique_line_ratio: -0.0296 + mean_zlib_bytes: -0.1918 + mean_zlib_ratio: 0.0262 + entropy: + mean_char_entropy: -0.0044 + mean_char_max_entropy: -0.0152 + mean_char_normalized: 0.0108 + mean_token_entropy: -0.0215 + mean_token_max_entropy: -0.0285 + mean_token_normalized: 0.0070 + mean_total_tokens: -0.1602 + mean_vocab_size: -0.1276 + function_metrics: + mean_avg_function_lines: -0.3103 + mean_function_count: 0.1353 + mean_max_function_lines: -0.3573 + halstead: + mean_N1_total_operators: -0.1645 + mean_N2_total_operands: -0.1785 + mean_difficulty: -0.1429 + mean_effort: -0.3500 + mean_estimated_bugs: -0.2072 + mean_length: -0.1700 + mean_n1_unique_operators: -0.1406 + mean_n2_unique_operands: -0.1763 + mean_time_to_implement_seconds: -0.3500 + mean_vocabulary: -0.1655 + mean_volume: -0.2072 + heaps: + mean_beta: -0.0557 + mean_k: 0.1362 + mean_r_squared: -0.0234 + identifier_length_variance: + mean_max: -0.0427 + mean_std_dev: -0.0173 + mean_variance: -0.0345 + indentation: + mean_blank_line_ratio: -0.0752 + mean_max_depth: -0.0352 + mean_mean_depth: -0.1381 + mean_variance: -0.2519 + line_patterns: + mean_blank_line_ratio: -0.0752 + mean_max_nesting_depth: -0.1353 + mean_string_literal_ratio: 0.1282 + mean_unique_line_ratio: -0.0312 + magic_number_density: + mean_density: -2.0000 + mean_string_literal_ratio: 0.1282 + near_duplicate_blocks_file: + mean_sub_block_count: -0.0902 + ngram: + mean_bigram_hapax_fraction: -0.0247 + mean_bigram_repeated_unique: -0.1792 + mean_bigram_repetition_rate: 0.0301 + mean_bigram_total: -0.1605 + mean_bigram_unique: -0.2135 + mean_trigram_hapax_fraction: -0.0265 + mean_trigram_repeated_unique: -0.1784 + mean_trigram_repetition_rate: 0.0750 + mean_trigram_total: -0.1608 + mean_trigram_unique: -0.2352 + punctuation_density: + mean_arrow_density: -0.0373 + mean_bracket_nonalpha_prefix_count: -0.4412 + mean_bracket_nonalpha_suffix_count: 0.2314 + mean_colon_suffix_density: -0.0705 + mean_dot_count: -0.2609 + mean_exclamation_density: 1.8877 + mean_id_nonalpha_suffix_density: -0.0113 + readability: + mean_avg_line_length: -0.0307 + mean_avg_sub_words_per_id: 0.0032 + mean_avg_tokens_per_line: -0.0248 + mean_fog_adapted: 0.0082 + mean_total_lines: -0.1353 + symbol_density: + mean_density: -0.0137 + mean_distinct_symbol_types: -0.0960 + mean_symbol_count: -0.1794 + vocabulary: + mean_mattr: -0.0983 + mean_raw_ttr: 0.0304 + mean_total_identifiers: -0.1705 + mean_unique_identifiers: -0.1402 + vowel_density: + mean_total_chars: -0.1694 + zipf: + mean_exponent: 0.0065 + mean_r_squared: -0.0205 + mean_total_tokens: -0.1602 + mean_vocab_size: -0.1276 + +no_wildcard_imports: + _doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies." + _fix_hint: "Replace wildcard imports with explicit named imports for clarity and reduced scope pollution" + _log_baseline: -9.4788 + branching: + mean_branching_density: 0.0249 + mean_non_blank_count: -0.0268 + brevity: + mean_sample_size: -0.0077 + casing_entropy: + mean_entropy: -0.0070 + mean_snake_case_count: 0.0189 + compression: + mean_raw_bytes: 0.0310 + mean_unique_line_ratio: -0.0046 + mean_zlib_bytes: 0.0331 + entropy: + mean_total_tokens: 0.0131 + mean_vocab_size: -0.0077 + function_metrics: + mean_avg_function_lines: -0.0263 + halstead: + mean_N1_total_operators: 0.0202 + mean_N2_total_operands: 0.0271 + mean_difficulty: 0.0600 + mean_effort: 0.0778 + mean_estimated_bugs: 0.0179 + mean_length: 0.0228 + mean_n2_unique_operands: -0.0329 + mean_time_to_implement_seconds: 0.0778 + mean_vocabulary: -0.0230 + mean_volume: 0.0178 + heaps: + mean_beta: -0.0537 + mean_k: 0.1998 + mean_r_squared: -0.0155 + identifier_length_variance: + mean_mean: 0.0387 + mean_std_dev: 0.0490 + mean_variance: 0.0979 + indentation: + mean_blank_line_ratio: 0.0763 + mean_mean_depth: -0.0117 + mean_variance: 0.0042 + line_patterns: + mean_blank_line_ratio: 0.0763 + mean_string_literal_ratio: -0.3463 + mean_unique_line_ratio: -0.0053 + magic_number_density: + mean_density: 1.1035 + mean_magic_number_count: 1.1312 + mean_string_literal_ratio: -0.3463 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0355 + ngram: + mean_bigram_hapax_fraction: 0.0182 + mean_bigram_repeated_unique: -0.0089 + mean_bigram_repetition_rate: -0.0149 + mean_bigram_total: 0.0131 + mean_bigram_unique: 0.0308 + mean_trigram_hapax_fraction: 0.0094 + mean_trigram_repeated_unique: -0.0263 + mean_trigram_repetition_rate: -0.0255 + mean_trigram_total: 0.0132 + mean_trigram_unique: 0.0274 + punctuation_density: + mean_arrow_density: -0.0139 + mean_bracket_nonalpha_prefix_count: -0.5656 + mean_bracket_nonalpha_suffix_count: -0.0908 + mean_colon_suffix_density: 2.0000 + mean_dot_count: -0.0137 + mean_id_nonalpha_suffix_density: 0.0143 + readability: + mean_avg_line_length: 0.0591 + mean_avg_sub_words_per_id: 0.0084 + mean_avg_tokens_per_line: 0.0399 + mean_flesch_adapted: -0.0142 + mean_fog_adapted: 0.0290 + mean_total_lines: -0.0268 + symbol_density: + mean_density: -0.0266 + mean_distinct_symbol_types: -0.0817 + mean_symbol_count: 0.0042 + vocabulary: + mean_mattr: 0.0212 + mean_raw_ttr: -0.0140 + mean_total_identifiers: 0.0140 + vowel_density: + mean_total_chars: 0.0527 + zipf: + mean_exponent: -0.0270 + mean_total_tokens: 0.0131 + mean_vocab_size: -0.0077 + diff --git a/priv/combined_metrics/documentation.yml b/priv/combined_metrics/documentation.yml new file mode 100644 index 0000000..75b19fd --- /dev/null +++ b/priv/combined_metrics/documentation.yml @@ -0,0 +1,634 @@ +docstring_is_nonempty: + _doc: "Docstrings must contain meaningful content, not just a placeholder or empty string." + _fix_hint: "Replace placeholder docstrings with a real description of what the function does" + _log_baseline: 29.4288 + branching: + mean_branch_count: 0.3089 + mean_branching_density: 0.2652 + mean_non_blank_count: 0.0437 + brevity: + mean_sample_size: 0.1931 + casing_entropy: + mean_entropy: 0.0560 + mean_other_count: 0.0709 + mean_pascal_case_count: 0.3089 + mean_snake_case_count: 0.1585 + compression: + mean_raw_bytes: 0.1245 + mean_redundancy: -0.0198 + mean_unique_line_ratio: 0.0053 + mean_zlib_bytes: 0.1557 + mean_zlib_ratio: -0.0312 + entropy: + mean_char_entropy: 0.0065 + mean_char_max_entropy: 0.0102 + mean_char_normalized: -0.0036 + mean_token_entropy: 0.0408 + mean_token_max_entropy: 0.0400 + mean_total_tokens: 0.1038 + mean_vocab_size: 0.1931 + function_metrics: + mean_avg_function_lines: 0.0357 + mean_avg_param_count: 0.0131 + mean_function_count: -0.0290 + mean_max_function_lines: 0.0329 + halstead: + mean_N1_total_operators: 0.0456 + mean_N2_total_operands: -0.0027 + mean_difficulty: 0.0706 + mean_effort: 0.1098 + mean_estimated_bugs: 0.0392 + mean_length: 0.0289 + mean_n1_unique_operators: 0.0913 + mean_n2_unique_operands: 0.0179 + mean_time_to_implement_seconds: 0.1098 + mean_vocabulary: 0.0465 + mean_volume: 0.0392 + heaps: + mean_beta: 0.0242 + mean_k: 0.0556 + identifier_length_variance: + mean_mean: -0.0039 + mean_std_dev: -0.0154 + mean_variance: -0.0307 + indentation: + mean_blank_line_ratio: 0.0413 + mean_mean_depth: -0.0330 + mean_variance: -0.0309 + line_patterns: + mean_blank_line_ratio: 0.0413 + mean_string_literal_ratio: 0.1078 + mean_unique_line_ratio: 0.0072 + magic_number_density: + mean_density: 0.0693 + mean_magic_number_count: 0.1709 + mean_string_literal_ratio: 0.1078 + near_duplicate_blocks_file: + mean_block_count: 0.5417 + mean_near_dup_block_d8: 0.2709 + mean_sub_block_count: -0.0061 + ngram: + mean_bigram_hapax_fraction: 0.0378 + mean_bigram_repeated_unique: 0.0767 + mean_bigram_repetition_rate: -0.0528 + mean_bigram_total: 0.1039 + mean_bigram_unique: 0.1635 + mean_trigram_hapax_fraction: 0.0158 + mean_trigram_repeated_unique: 0.0692 + mean_trigram_repetition_rate: -0.0615 + mean_trigram_total: 0.1041 + mean_trigram_unique: 0.1386 + punctuation_density: + mean_arrow_density: -0.0651 + mean_bracket_nonalpha_prefix_count: 0.0450 + mean_bracket_nonalpha_suffix_count: 0.1000 + mean_colon_suffix_density: -0.0260 + mean_dot_count: 0.1435 + mean_exclamation_density: -2.0000 + mean_id_nonalpha_suffix_density: -0.0474 + readability: + mean_avg_line_length: 0.0834 + mean_avg_sub_words_per_id: -0.0071 + mean_avg_tokens_per_line: 0.0601 + mean_fog_adapted: 0.0452 + mean_total_lines: 0.0437 + symbol_density: + mean_density: -0.0578 + mean_distinct_symbol_types: 0.0505 + mean_symbol_count: 0.0664 + vocabulary: + mean_mattr: 0.1408 + mean_raw_ttr: 0.0942 + mean_total_identifiers: 0.1709 + mean_unique_identifiers: 0.2651 + vowel_density: + mean_total_chars: 0.1670 + zipf: + mean_exponent: -0.0353 + mean_r_squared: 0.0037 + mean_total_tokens: 0.1038 + mean_vocab_size: 0.1931 + +file_has_license_header: + _doc: "Source files should begin with a license or copyright header." + _fix_hint: "Add a license header comment at the top of the file" + _log_baseline: 5.7261 + branching: + mean_branching_density: -0.0081 + mean_non_blank_count: 0.0080 + brevity: + mean_sample_size: 0.0263 + casing_entropy: + mean_entropy: 0.0312 + mean_pascal_case_count: 0.0957 + mean_snake_case_count: 0.0036 + comment_structure: + mean_comment_line_ratio: -2.0000 + compression: + mean_raw_bytes: 0.0104 + mean_redundancy: -0.0059 + mean_zlib_bytes: 0.0200 + mean_zlib_ratio: -0.0095 + entropy: + mean_char_entropy: 0.0028 + mean_char_max_entropy: 0.0052 + mean_token_entropy: 0.0042 + mean_token_max_entropy: 0.0054 + mean_total_tokens: 0.0091 + mean_vocab_size: 0.0263 + halstead: + mean_N1_total_operators: 0.0051 + mean_N2_total_operands: 0.0185 + mean_difficulty: -0.0273 + mean_effort: -0.0113 + mean_estimated_bugs: 0.0159 + mean_length: 0.0095 + mean_n2_unique_operands: 0.0458 + mean_time_to_implement_seconds: -0.0113 + mean_vocabulary: 0.0306 + mean_volume: 0.0160 + heaps: + mean_beta: -0.0113 + mean_k: 0.0614 + identifier_length_variance: + mean_mean: -0.0037 + mean_variance: -0.0031 + indentation: + mean_blank_line_ratio: 0.0206 + mean_mean_depth: -0.0080 + mean_variance: 0.0154 + line_patterns: + mean_blank_line_ratio: 0.0206 + mean_string_literal_ratio: -0.0104 + magic_number_density: + mean_density: 0.1920 + mean_magic_number_count: 0.1973 + mean_string_literal_ratio: -0.0104 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0089 + ngram: + mean_bigram_hapax_fraction: 0.0086 + mean_bigram_repetition_rate: -0.0091 + mean_bigram_total: 0.0091 + mean_bigram_unique: 0.0182 + mean_trigram_hapax_fraction: 0.0031 + mean_trigram_repetition_rate: -0.0091 + mean_trigram_total: 0.0091 + mean_trigram_unique: 0.0133 + punctuation_density: + mean_arrow_density: -0.0105 + mean_colon_suffix_density: -0.0104 + mean_dot_count: 0.0423 + readability: + mean_avg_tokens_per_line: 0.0091 + mean_fog_adapted: 0.0060 + symbol_density: + mean_density: -0.0042 + mean_symbol_count: 0.0065 + vocabulary: + mean_mattr: 0.0121 + mean_raw_ttr: 0.0200 + mean_total_identifiers: 0.0175 + mean_unique_identifiers: 0.0374 + vowel_density: + mean_total_chars: 0.0138 + zipf: + mean_exponent: -0.0055 + mean_total_tokens: 0.0091 + mean_vocab_size: 0.0263 + +file_has_module_docstring: + _doc: "Files should have a module-level docstring explaining purpose and usage." + _fix_hint: "Add a @moduledoc string describing the module's purpose" + _log_baseline: 24.2268 + branching: + mean_branch_count: 0.3854 + mean_branching_density: -2.0000 + mean_non_blank_count: 0.0908 + brevity: + mean_sample_size: 0.2219 + casing_entropy: + mean_entropy: -0.0217 + mean_pascal_case_count: 0.0929 + mean_snake_case_count: 0.1492 + compression: + mean_raw_bytes: 0.1161 + mean_redundancy: -0.0256 + mean_unique_line_ratio: 0.0122 + mean_zlib_bytes: 0.1676 + mean_zlib_ratio: -0.0514 + entropy: + mean_char_max_entropy: 0.0126 + mean_char_normalized: -0.0120 + mean_token_entropy: 0.0441 + mean_token_max_entropy: 0.0457 + mean_total_tokens: 0.0837 + mean_vocab_size: 0.2219 + function_metrics: + mean_avg_function_lines: 0.0166 + mean_max_function_lines: 0.1014 + halstead: + mean_N1_total_operators: 0.0448 + mean_N2_total_operands: 0.0268 + mean_difficulty: 0.0971 + mean_effort: 0.1486 + mean_estimated_bugs: 0.0515 + mean_length: 0.0387 + mean_n1_unique_operators: 0.1116 + mean_n2_unique_operands: 0.0412 + mean_time_to_implement_seconds: 0.1486 + mean_vocabulary: 0.0602 + mean_volume: 0.0515 + heaps: + mean_beta: -0.0925 + mean_k: 0.5760 + mean_r_squared: -0.0049 + identifier_length_variance: + mean_mean: -0.0059 + mean_std_dev: -0.0462 + mean_variance: -0.0924 + indentation: + mean_blank_line_ratio: 0.0686 + mean_mean_depth: -0.0240 + mean_variance: -0.0634 + line_patterns: + mean_blank_line_ratio: 0.0686 + mean_string_literal_ratio: 0.1425 + mean_unique_line_ratio: 0.0141 + magic_number_density: + mean_density: 0.0812 + mean_magic_number_count: 0.1599 + mean_string_literal_ratio: 0.1425 + near_duplicate_blocks_file: + mean_block_count: 0.3854 + mean_sub_block_count: 0.0098 + ngram: + mean_bigram_hapax_fraction: 0.0500 + mean_bigram_repeated_unique: 0.0539 + mean_bigram_repetition_rate: -0.0497 + mean_bigram_total: 0.0838 + mean_bigram_unique: 0.1493 + mean_trigram_hapax_fraction: 0.0283 + mean_trigram_repeated_unique: 0.0225 + mean_trigram_repetition_rate: -0.0657 + mean_trigram_total: 0.0839 + mean_trigram_unique: 0.1235 + punctuation_density: + mean_colon_suffix_density: 0.0341 + mean_dot_count: 0.0777 + mean_exclamation_density: -0.1014 + mean_id_nonalpha_suffix_density: -0.0339 + readability: + mean_avg_line_length: 0.0257 + mean_avg_sub_words_per_id: -0.0181 + mean_avg_tokens_per_line: -0.0071 + mean_flesch_adapted: 0.0205 + mean_fog_adapted: -0.0266 + mean_total_lines: 0.0908 + symbol_density: + mean_density: -0.0727 + mean_distinct_symbol_types: 0.0618 + mean_symbol_count: 0.0433 + vocabulary: + mean_mattr: 0.0527 + mean_raw_ttr: 0.1365 + mean_total_identifiers: 0.1309 + mean_unique_identifiers: 0.2674 + vowel_density: + mean_total_chars: 0.1250 + zipf: + mean_exponent: -0.0467 + mean_total_tokens: 0.0837 + mean_vocab_size: 0.2219 + +file_has_no_commented_out_code: + _doc: "Files should not contain commented-out code blocks left from development." + _fix_hint: "Remove commented-out code — use version control to track deleted code" + _log_baseline: -8.1616 + branching: + mean_branching_density: 0.0368 + mean_non_blank_count: -0.0367 + brevity: + mean_sample_size: -0.0046 + casing_entropy: + mean_entropy: -0.0252 + mean_pascal_case_count: -0.0657 + mean_snake_case_count: 0.0160 + comment_structure: + mean_comment_line_count: -0.9901 + mean_comment_line_ratio: 0.3578 + compression: + mean_raw_bytes: -0.0068 + mean_redundancy: 0.0077 + mean_zlib_bytes: -0.0179 + mean_zlib_ratio: 0.0111 + entropy: + mean_char_entropy: -0.0026 + mean_char_max_entropy: -0.0061 + mean_char_normalized: 0.0035 + mean_total_tokens: -0.0158 + mean_vocab_size: -0.0046 + function_metrics: + mean_avg_function_lines: -0.0992 + mean_function_count: 0.0686 + mean_max_function_lines: -0.1247 + halstead: + mean_N1_total_operators: -0.0058 + mean_N2_total_operands: -0.0546 + mean_difficulty: 0.0608 + mean_effort: 0.0253 + mean_estimated_bugs: -0.0355 + mean_length: -0.0224 + mean_n1_unique_operators: 0.0171 + mean_n2_unique_operands: -0.0984 + mean_time_to_implement_seconds: 0.0253 + mean_vocabulary: -0.0628 + mean_volume: -0.0356 + heaps: + mean_beta: -0.0499 + mean_k: 0.1958 + mean_r_squared: -0.0200 + identifier_length_variance: + mean_mean: 0.0074 + mean_std_dev: 0.0252 + mean_variance: 0.0503 + indentation: + mean_blank_line_ratio: 0.0551 + mean_max_depth: 0.0324 + mean_mean_depth: 0.0564 + mean_variance: 0.0552 + line_patterns: + mean_blank_line_ratio: 0.0551 + mean_string_literal_ratio: -0.0818 + mean_unique_line_ratio: -0.0077 + magic_number_density: + mean_density: 2.0000 + mean_string_literal_ratio: -0.0818 + near_duplicate_blocks_file: + mean_sub_block_count: -0.0454 + ngram: + mean_bigram_hapax_fraction: 0.0101 + mean_bigram_repeated_unique: -0.0414 + mean_bigram_repetition_rate: -0.0019 + mean_bigram_total: -0.0158 + mean_bigram_unique: -0.0223 + mean_trigram_hapax_fraction: -0.0019 + mean_trigram_repeated_unique: -0.0273 + mean_trigram_repetition_rate: 0.0258 + mean_trigram_total: -0.0159 + mean_trigram_unique: -0.0338 + punctuation_density: + mean_arrow_density: 0.1869 + mean_bracket_nonalpha_prefix_count: -0.1247 + mean_bracket_nonalpha_suffix_count: -0.0885 + mean_colon_suffix_density: -0.1285 + mean_dot_count: -0.0411 + mean_exclamation_density: -0.1956 + mean_id_nonalpha_suffix_density: 0.0028 + mean_question_mark_density: -0.2494 + readability: + mean_avg_line_length: 0.0371 + mean_avg_sub_words_per_id: -0.0018 + mean_avg_tokens_per_line: -0.0943 + mean_flesch_adapted: 0.0114 + mean_fog_adapted: -0.0779 + mean_total_lines: 0.0785 + symbol_density: + mean_density: -0.0172 + mean_symbol_count: -0.0237 + vocabulary: + mean_mattr: -0.0404 + mean_raw_ttr: -0.0140 + mean_total_identifiers: -0.0030 + mean_unique_identifiers: -0.0170 + vowel_density: + mean_total_chars: 0.0044 + zipf: + mean_exponent: -0.0043 + mean_total_tokens: -0.0158 + mean_vocab_size: -0.0046 + +function_has_docstring: + _doc: "Public functions should have a docstring describing behaviour, params, and return value." + _fix_hint: "Add @doc strings to public functions describing parameters, return value, and purpose" + _log_baseline: 43.0440 + branching: + mean_branch_count: 0.5279 + mean_branching_density: 0.3832 + mean_non_blank_count: 0.1446 + brevity: + mean_sample_size: 0.2608 + casing_entropy: + mean_entropy: -0.0193 + mean_other_count: 0.3105 + mean_pascal_case_count: 0.1852 + mean_snake_case_count: 0.3052 + comment_structure: + mean_comment_line_ratio: -2.0000 + compression: + mean_raw_bytes: 0.2251 + mean_redundancy: -0.0242 + mean_unique_line_ratio: -0.0264 + mean_zlib_bytes: 0.2718 + mean_zlib_ratio: -0.0468 + entropy: + mean_char_entropy: 0.0081 + mean_char_max_entropy: 0.0163 + mean_char_normalized: -0.0082 + mean_token_entropy: 0.0517 + mean_token_max_entropy: 0.0557 + mean_token_normalized: -0.0040 + mean_total_tokens: 0.2284 + mean_vocab_size: 0.2608 + function_metrics: + mean_avg_function_lines: 0.0289 + mean_avg_param_count: 0.0202 + mean_function_count: 0.0999 + mean_max_function_lines: 0.1368 + halstead: + mean_N1_total_operators: 0.1175 + mean_N2_total_operands: 0.0799 + mean_difficulty: 0.0232 + mean_effort: 0.1555 + mean_estimated_bugs: 0.1324 + mean_length: 0.1035 + mean_n1_unique_operators: 0.0939 + mean_n2_unique_operands: 0.1507 + mean_time_to_implement_seconds: 0.1555 + mean_vocabulary: 0.1288 + mean_volume: 0.1324 + heaps: + mean_beta: 0.0660 + mean_k: -0.0612 + mean_r_squared: -0.0041 + identifier_length_variance: + mean_mean: -0.0412 + mean_std_dev: -0.0363 + mean_variance: -0.0727 + indentation: + mean_blank_line_ratio: 0.1003 + mean_max_depth: -0.1288 + mean_mean_depth: -0.0904 + mean_variance: -0.2118 + line_patterns: + mean_blank_line_ratio: 0.1003 + mean_string_literal_ratio: 0.5931 + mean_unique_line_ratio: -0.0135 + magic_number_density: + mean_density: 0.1744 + mean_magic_number_count: 0.4104 + mean_string_literal_ratio: 0.5931 + near_duplicate_blocks_file: + mean_block_count: 0.6209 + mean_near_dup_block_d6: 0.3105 + mean_near_dup_block_d8: 0.3105 + mean_sub_block_count: 0.0349 + ngram: + mean_bigram_hapax_fraction: 0.0560 + mean_bigram_repeated_unique: 0.1917 + mean_bigram_repetition_rate: -0.0476 + mean_bigram_total: 0.2288 + mean_bigram_unique: 0.2856 + mean_trigram_hapax_fraction: 0.0480 + mean_trigram_repeated_unique: 0.1175 + mean_trigram_repetition_rate: -0.1025 + mean_trigram_total: 0.2292 + mean_trigram_unique: 0.2807 + punctuation_density: + mean_arrow_density: -0.3619 + mean_bracket_nonalpha_prefix_count: 0.0999 + mean_bracket_nonalpha_suffix_count: 0.2024 + mean_colon_suffix_density: -0.0297 + mean_dot_count: 0.1816 + mean_exclamation_density: -0.3105 + mean_id_nonalpha_suffix_density: -0.1019 + mean_question_mark_density: -0.2377 + readability: + mean_avg_line_length: 0.0861 + mean_avg_sub_words_per_id: -0.0113 + mean_avg_tokens_per_line: 0.0890 + mean_flesch_adapted: 0.0026 + mean_fog_adapted: 0.0948 + mean_total_lines: 0.1394 + symbol_density: + mean_density: -0.0353 + mean_distinct_symbol_types: 0.0427 + mean_symbol_count: 0.1896 + vocabulary: + mean_mattr: 0.1771 + mean_raw_ttr: 0.0544 + mean_total_identifiers: 0.2819 + mean_unique_identifiers: 0.3363 + vowel_density: + mean_total_chars: 0.2407 + zipf: + mean_exponent: 0.0025 + mean_total_tokens: 0.2284 + mean_vocab_size: 0.2608 + +function_todo_comment_in_body: + _doc: "Functions should not contain TODO/FIXME comments indicating unfinished work." + _fix_hint: "Resolve or extract TODO comments — create a tracked issue instead of leaving them inline" + _log_baseline: 7.0511 + branching: + mean_branch_count: -0.0287 + mean_branching_density: -0.0435 + mean_non_blank_count: 0.0147 + brevity: + mean_sample_size: -0.0084 + casing_entropy: + mean_entropy: 0.0039 + mean_pascal_case_count: 0.0055 + mean_snake_case_count: -0.0047 + comment_structure: + mean_comment_line_count: -0.5392 + mean_comment_line_ratio: 0.7796 + mean_todo_fixme_count: -0.5392 + compression: + mean_raw_bytes: 0.0082 + mean_unique_line_ratio: 0.0028 + mean_zlib_bytes: 0.0074 + entropy: + mean_char_entropy: 0.0026 + mean_char_normalized: 0.0026 + mean_token_max_entropy: -0.0017 + mean_token_normalized: 0.0020 + mean_total_tokens: 0.0157 + mean_vocab_size: -0.0084 + function_metrics: + mean_avg_function_lines: -0.0250 + mean_avg_param_count: -0.0354 + mean_function_count: 0.0354 + mean_max_function_lines: -0.0182 + halstead: + mean_N1_total_operators: 0.0224 + mean_N2_total_operands: -0.0309 + mean_difficulty: 0.0451 + mean_effort: 0.0375 + mean_estimated_bugs: -0.0076 + mean_length: 0.0035 + mean_n2_unique_operands: -0.0761 + mean_time_to_implement_seconds: 0.0375 + mean_vocabulary: -0.0540 + mean_volume: -0.0076 + heaps: + mean_beta: -0.0498 + mean_k: 0.1608 + mean_r_squared: -0.0095 + identifier_length_variance: + mean_mean: 0.0058 + mean_std_dev: 0.0154 + mean_variance: 0.0309 + indentation: + mean_blank_line_ratio: 0.0593 + mean_mean_depth: -0.0184 + mean_variance: -0.0277 + line_patterns: + mean_blank_line_ratio: 0.0593 + mean_string_literal_ratio: -0.0151 + mean_unique_line_ratio: 0.0033 + magic_number_density: + mean_density: -2.0000 + mean_string_literal_ratio: -0.0151 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0281 + ngram: + mean_bigram_hapax_fraction: -0.0187 + mean_bigram_repeated_unique: 0.0464 + mean_bigram_repetition_rate: 0.0098 + mean_bigram_total: 0.0157 + mean_bigram_unique: 0.0136 + mean_trigram_hapax_fraction: -0.0109 + mean_trigram_repeated_unique: 0.0479 + mean_trigram_repetition_rate: 0.0123 + mean_trigram_total: 0.0157 + mean_trigram_unique: 0.0149 + punctuation_density: + mean_arrow_density: -0.0161 + mean_bracket_nonalpha_prefix_count: -0.0287 + mean_colon_suffix_density: -0.0293 + mean_dot_count: 0.0485 + mean_id_nonalpha_suffix_density: 0.0062 + mean_question_mark_density: -0.0287 + readability: + mean_avg_line_length: 0.0123 + mean_avg_sub_words_per_id: 0.0073 + mean_avg_tokens_per_line: -0.0224 + mean_flesch_adapted: -0.0053 + mean_fog_adapted: -0.0109 + mean_total_lines: 0.0381 + symbol_density: + mean_density: 0.0116 + mean_distinct_symbol_types: -0.0140 + mean_symbol_count: 0.0200 + vocabulary: + mean_mattr: -0.0437 + mean_raw_ttr: -0.0159 + mean_total_identifiers: -0.0024 + mean_unique_identifiers: -0.0182 + vowel_density: + mean_total_chars: 0.0035 + zipf: + mean_total_tokens: 0.0157 + mean_vocab_size: -0.0084 + diff --git a/priv/combined_metrics/error_handling.yml b/priv/combined_metrics/error_handling.yml new file mode 100644 index 0000000..1398a3d --- /dev/null +++ b/priv/combined_metrics/error_handling.yml @@ -0,0 +1,325 @@ +does_not_swallow_errors: + _doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures." + _fix_hint: "Propagate or log errors — do not silently discard {:error, _} tuples or rescue clauses" + _log_baseline: 87.3594 + branching: + mean_branch_count: -0.1041 + mean_branching_density: -0.2095 + mean_max_nesting_depth: 0.5405 + mean_non_blank_count: 0.1054 + brevity: + mean_sample_size: 0.2830 + casing_entropy: + mean_entropy: -0.1673 + mean_other_count: -1.6214 + mean_pascal_case_count: 0.8391 + mean_snake_case_count: 0.5551 + compression: + mean_raw_bytes: 0.3818 + mean_redundancy: 0.0202 + mean_unique_line_ratio: 0.1028 + mean_zlib_bytes: 0.3399 + mean_zlib_ratio: 0.0419 + entropy: + mean_char_entropy: 0.0445 + mean_char_max_entropy: 0.0347 + mean_char_normalized: 0.0098 + mean_token_entropy: 0.0223 + mean_token_max_entropy: 0.0620 + mean_token_normalized: -0.0397 + mean_total_tokens: 0.4926 + mean_vocab_size: 0.2830 + function_metrics: + mean_avg_function_lines: 0.1005 + mean_max_function_lines: 0.2243 + halstead: + mean_N1_total_operators: 0.4699 + mean_N2_total_operands: 0.2900 + mean_difficulty: 0.2395 + mean_effort: 0.6960 + mean_estimated_bugs: 0.4564 + mean_length: 0.4072 + mean_n1_unique_operators: 0.1859 + mean_n2_unique_operands: 0.2364 + mean_time_to_implement_seconds: 0.6960 + mean_vocabulary: 0.2190 + mean_volume: 0.4565 + heaps: + mean_beta: -0.0869 + mean_k: 0.2466 + identifier_length_variance: + mean_mean: -0.0142 + mean_std_dev: -0.1252 + mean_variance: -0.2505 + indentation: + mean_blank_line_ratio: 0.0451 + mean_max_depth: 0.1740 + mean_mean_depth: 0.1043 + mean_variance: 0.3416 + line_patterns: + mean_blank_line_ratio: 0.0451 + mean_max_nesting_depth: 0.5405 + mean_string_literal_ratio: 0.2524 + mean_unique_line_ratio: 0.1413 + magic_number_density: + mean_string_literal_ratio: 0.2524 + near_duplicate_blocks_file: + mean_sub_block_count: 0.3065 + ngram: + mean_bigram_hapax_fraction: -0.0373 + mean_bigram_repeated_unique: 0.4011 + mean_bigram_repetition_rate: 0.0995 + mean_bigram_total: 0.4937 + mean_bigram_unique: 0.3266 + mean_trigram_hapax_fraction: -0.0651 + mean_trigram_repeated_unique: 0.5672 + mean_trigram_repetition_rate: 0.2299 + mean_trigram_total: 0.4949 + mean_trigram_unique: 0.3376 + punctuation_density: + mean_arrow_density: -0.3177 + mean_bracket_nonalpha_prefix_count: 0.7888 + mean_bracket_nonalpha_suffix_count: 2.0000 + mean_colon_suffix_density: 0.2150 + mean_dot_count: 0.5172 + mean_exclamation_density: -0.5217 + mean_id_nonalpha_suffix_density: 0.0181 + mean_question_mark_density: -0.4364 + readability: + mean_avg_line_length: 0.2905 + mean_avg_sub_words_per_id: 0.0219 + mean_avg_tokens_per_line: 0.3872 + mean_flesch_adapted: -0.0373 + mean_fog_adapted: 0.3019 + mean_total_lines: 0.1054 + symbol_density: + mean_density: 0.2563 + mean_distinct_symbol_types: 0.0400 + mean_symbol_count: 0.6378 + vocabulary: + mean_mattr: 0.0190 + mean_raw_ttr: -0.1464 + mean_total_identifiers: 0.5481 + mean_unique_identifiers: 0.4017 + vowel_density: + mean_total_chars: 0.5339 + zipf: + mean_exponent: 0.0933 + mean_total_tokens: 0.4926 + mean_vocab_size: 0.2830 + +error_message_is_descriptive: + _doc: "Error values should carry a meaningful message, not just a bare atom or empty string." + _fix_hint: "Include context in error messages — describe what failed, not just that it failed" + _log_baseline: 52.7594 + branching: + mean_branch_count: 0.0664 + mean_branching_density: -0.0540 + mean_max_nesting_depth: 0.3900 + mean_non_blank_count: 0.1204 + brevity: + mean_sample_size: 0.3136 + casing_entropy: + mean_entropy: 0.1513 + mean_pascal_case_count: 2.0000 + mean_snake_case_count: 0.4347 + compression: + mean_raw_bytes: 0.3028 + mean_redundancy: 0.0104 + mean_unique_line_ratio: -0.0126 + mean_zlib_bytes: 0.2771 + mean_zlib_ratio: 0.0257 + entropy: + mean_char_entropy: 0.0161 + mean_char_max_entropy: 0.0487 + mean_char_normalized: -0.0326 + mean_token_entropy: 0.0596 + mean_token_max_entropy: 0.0685 + mean_token_normalized: -0.0089 + mean_total_tokens: 0.3002 + mean_vocab_size: 0.3136 + function_metrics: + mean_avg_function_lines: 0.1160 + mean_max_function_lines: 0.0713 + halstead: + mean_N1_total_operators: 0.1787 + mean_N2_total_operands: 0.0463 + mean_difficulty: 0.0136 + mean_effort: 0.1822 + mean_estimated_bugs: 0.1686 + mean_length: 0.1370 + mean_n1_unique_operators: 0.1179 + mean_n2_unique_operands: 0.1506 + mean_time_to_implement_seconds: 0.1822 + mean_vocabulary: 0.1415 + mean_volume: 0.1686 + heaps: + mean_beta: 0.0120 + mean_k: 0.1259 + mean_r_squared: 0.0073 + identifier_length_variance: + mean_mean: -0.0598 + mean_std_dev: -0.0787 + mean_variance: -0.1573 + indentation: + mean_blank_line_ratio: -0.1098 + mean_max_depth: 0.1754 + mean_mean_depth: 0.1108 + mean_variance: 0.1967 + line_patterns: + mean_blank_line_ratio: -0.1098 + mean_max_nesting_depth: 0.3900 + mean_string_literal_ratio: 0.3673 + mean_unique_line_ratio: 0.0304 + magic_number_density: + mean_string_literal_ratio: 0.3673 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0621 + ngram: + mean_bigram_hapax_fraction: -0.0059 + mean_bigram_repeated_unique: 0.3150 + mean_bigram_total: 0.3008 + mean_bigram_unique: 0.3055 + mean_trigram_hapax_fraction: -0.0298 + mean_trigram_repeated_unique: 0.4104 + mean_trigram_repetition_rate: 0.0227 + mean_trigram_total: 0.3014 + mean_trigram_unique: 0.3075 + punctuation_density: + mean_arrow_density: -0.0591 + mean_bracket_nonalpha_prefix_count: 1.4250 + mean_bracket_nonalpha_suffix_count: 0.3350 + mean_colon_suffix_density: -0.0854 + mean_id_nonalpha_suffix_density: -0.0694 + readability: + mean_avg_line_length: 0.1895 + mean_avg_tokens_per_line: 0.1798 + mean_flesch_adapted: -0.0175 + mean_fog_adapted: 0.1420 + mean_total_lines: 0.1204 + symbol_density: + mean_distinct_symbol_types: 0.0664 + mean_symbol_count: 0.3056 + vocabulary: + mean_mattr: 0.0721 + mean_raw_ttr: -0.0454 + mean_total_identifiers: 0.4472 + mean_unique_identifiers: 0.4018 + vowel_density: + mean_total_chars: 0.3874 + zipf: + mean_r_squared: 0.0056 + mean_total_tokens: 0.3002 + mean_vocab_size: 0.3136 + +returns_typed_error: + _doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`." + _fix_hint: "Return typed errors like {:error, :not_found} instead of bare :error or nil" + _log_baseline: 208.7673 + branching: + mean_branch_count: -0.2092 + mean_branching_density: -0.3081 + mean_max_nesting_depth: 1.8360 + mean_non_blank_count: 0.0989 + brevity: + mean_sample_size: 0.3776 + casing_entropy: + mean_entropy: -0.7806 + mean_other_count: -0.4385 + mean_pascal_case_count: 1.1584 + mean_snake_case_count: 1.2656 + compression: + mean_raw_bytes: 0.7114 + mean_redundancy: 0.0542 + mean_unique_line_ratio: 0.2391 + mean_zlib_bytes: 0.5668 + mean_zlib_ratio: 0.1446 + entropy: + mean_char_entropy: 0.1388 + mean_char_max_entropy: 0.0695 + mean_char_normalized: 0.0692 + mean_token_entropy: -0.0195 + mean_token_max_entropy: 0.0863 + mean_token_normalized: -0.1059 + mean_total_tokens: 1.0937 + mean_vocab_size: 0.3776 + function_metrics: + mean_avg_function_lines: 0.1470 + mean_avg_param_count: 0.0087 + mean_function_count: -0.0904 + mean_max_function_lines: 0.1338 + halstead: + mean_N1_total_operators: 1.2868 + mean_N2_total_operands: 0.8935 + mean_difficulty: 0.7343 + mean_effort: 2.0000 + mean_estimated_bugs: 1.2657 + mean_length: 1.1609 + mean_n1_unique_operators: 0.3422 + mean_n2_unique_operands: 0.5015 + mean_time_to_implement_seconds: 2.0000 + mean_vocabulary: 0.4467 + mean_volume: 1.2657 + heaps: + mean_beta: -0.3791 + mean_k: 0.7841 + mean_r_squared: 0.0178 + identifier_length_variance: + mean_max: 0.3245 + mean_std_dev: 0.3851 + mean_variance: 0.7704 + indentation: + mean_blank_line_ratio: -0.2464 + mean_mean_depth: -0.0466 + mean_variance: 0.0604 + line_patterns: + mean_blank_line_ratio: -0.2464 + mean_max_nesting_depth: 1.8360 + mean_string_literal_ratio: -1.0976 + mean_unique_line_ratio: 0.2365 + magic_number_density: + mean_string_literal_ratio: -1.0976 + near_duplicate_blocks_file: + mean_sub_block_count: 1.2007 + ngram: + mean_bigram_hapax_fraction: -0.2293 + mean_bigram_repeated_unique: 0.7952 + mean_bigram_repetition_rate: 0.2117 + mean_bigram_total: 1.0959 + mean_bigram_unique: 0.5042 + mean_trigram_hapax_fraction: -0.0889 + mean_trigram_repeated_unique: 0.8135 + mean_trigram_repetition_rate: 0.3853 + mean_trigram_total: 1.0981 + mean_trigram_unique: 0.5821 + punctuation_density: + mean_arrow_density: -1.3061 + mean_bracket_nonalpha_prefix_count: -0.3047 + mean_colon_suffix_density: -1.3955 + mean_dot_count: 1.8360 + mean_id_nonalpha_suffix_density: 0.1318 + mean_question_mark_density: -1.0680 + readability: + mean_avg_line_length: 0.6431 + mean_avg_sub_words_per_id: 0.1542 + mean_avg_tokens_per_line: 0.9948 + mean_flesch_adapted: -0.2067 + mean_fog_adapted: 1.0791 + mean_total_lines: 0.0989 + symbol_density: + mean_density: 0.9451 + mean_distinct_symbol_types: 0.3470 + mean_symbol_count: 1.6563 + vocabulary: + mean_mattr: -0.4659 + mean_raw_ttr: -0.6552 + mean_total_identifiers: 1.1152 + mean_unique_identifiers: 0.4603 + vowel_density: + mean_total_chars: 1.1198 + zipf: + mean_exponent: 0.1702 + mean_r_squared: 0.0411 + mean_total_tokens: 1.0937 + mean_vocab_size: 0.3776 + diff --git a/priv/combined_metrics/file_structure.yml b/priv/combined_metrics/file_structure.yml new file mode 100644 index 0000000..19418d4 --- /dev/null +++ b/priv/combined_metrics/file_structure.yml @@ -0,0 +1,503 @@ +has_consistent_indentation: + _doc: "Files should use a single, consistent indentation style with no mixed tabs and spaces." + _fix_hint: "Use a consistent indentation width throughout the file (2 or 4 spaces, not mixed)" + _log_baseline: -12.7016 + branching: + mean_branching_density: 0.1994 + mean_non_blank_count: -0.2001 + compression: + mean_raw_bytes: -0.1617 + mean_redundancy: -0.0442 + mean_unique_line_ratio: -0.1671 + mean_zlib_bytes: -0.0805 + mean_zlib_ratio: -0.0812 + entropy: + mean_char_entropy: 0.0876 + mean_char_normalized: 0.0877 + function_metrics: + mean_avg_function_lines: -0.1622 + mean_max_function_lines: -0.1528 + indentation: + mean_blank_line_ratio: 0.3569 + mean_max_depth: -0.9827 + mean_mean_depth: -0.6193 + mean_variance: -2.0000 + line_patterns: + mean_blank_line_ratio: 0.3569 + mean_unique_line_ratio: -0.0830 + readability: + mean_avg_line_length: 0.0378 + mean_avg_tokens_per_line: 0.2001 + mean_flesch_adapted: -0.0112 + mean_fog_adapted: 0.1818 + mean_total_lines: -0.2001 + symbol_density: + mean_density: 0.1620 + +line_count_under_300: + _doc: "Files should be under 300 lines; longer files typically violate single responsibility." + _fix_hint: "Split large files — extract cohesive groups of functions into separate modules" + _log_baseline: -49.2655 + branching: + mean_branch_count: -0.4508 + mean_branching_density: -0.2446 + mean_non_blank_count: -0.2063 + brevity: + mean_sample_size: -0.2062 + casing_entropy: + mean_entropy: 0.0366 + mean_other_count: -0.6011 + mean_pascal_case_count: 0.1036 + mean_snake_case_count: -0.1860 + compression: + mean_raw_bytes: -0.2263 + mean_redundancy: -0.0026 + mean_unique_line_ratio: 0.0519 + mean_zlib_bytes: -0.2194 + mean_zlib_ratio: -0.0069 + entropy: + mean_char_entropy: -0.0072 + mean_char_max_entropy: -0.0245 + mean_char_normalized: 0.0173 + mean_token_entropy: -0.0264 + mean_token_max_entropy: -0.0433 + mean_token_normalized: 0.0169 + mean_total_tokens: -0.1807 + mean_vocab_size: -0.2062 + function_metrics: + mean_avg_function_lines: 0.1338 + mean_avg_param_count: -0.0931 + mean_function_count: -0.3274 + mean_max_function_lines: 0.0222 + mean_max_param_count: -0.1036 + halstead: + mean_N1_total_operators: -0.1746 + mean_N2_total_operands: -0.1868 + mean_difficulty: 0.0070 + mean_effort: -0.2194 + mean_estimated_bugs: -0.2264 + mean_length: -0.1785 + mean_n1_unique_operators: -0.0814 + mean_n2_unique_operands: -0.2752 + mean_time_to_implement_seconds: -0.2194 + mean_vocabulary: -0.2238 + mean_volume: -0.2264 + heaps: + mean_beta: -0.0687 + mean_k: 0.0978 + mean_r_squared: -0.0094 + identifier_length_variance: + mean_max: -0.0671 + mean_mean: -0.0670 + mean_std_dev: 0.0028 + mean_variance: 0.0055 + indentation: + mean_blank_line_ratio: -0.4899 + mean_max_depth: 0.0301 + mean_mean_depth: 0.0114 + mean_variance: 0.1685 + line_patterns: + mean_blank_line_ratio: -0.4899 + mean_string_literal_ratio: 0.0039 + mean_unique_line_ratio: 0.0561 + magic_number_density: + mean_density: 1.4051 + mean_magic_number_count: -0.4114 + mean_string_literal_ratio: 0.0039 + near_duplicate_blocks_file: + mean_block_count: 0.1772 + mean_sub_block_count: -0.1378 + ngram: + mean_bigram_hapax_fraction: -0.0655 + mean_bigram_repeated_unique: -0.1356 + mean_bigram_repetition_rate: 0.0296 + mean_bigram_total: -0.1809 + mean_bigram_unique: -0.2260 + mean_trigram_hapax_fraction: -0.0366 + mean_trigram_repeated_unique: -0.1208 + mean_trigram_repetition_rate: 0.0506 + mean_trigram_total: -0.1812 + mean_trigram_unique: -0.2220 + punctuation_density: + mean_arrow_density: -0.2511 + mean_bracket_nonalpha_prefix_count: -0.2342 + mean_bracket_nonalpha_suffix_count: -0.3472 + mean_bracket_number_pair_count: -0.1772 + mean_colon_suffix_density: -0.2045 + mean_dot_count: -0.0341 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: -0.0125 + readability: + mean_avg_line_length: -0.0217 + mean_avg_sub_words_per_id: -0.0148 + mean_avg_tokens_per_line: 0.0256 + mean_flesch_adapted: 0.0146 + mean_fog_adapted: 0.0323 + mean_total_lines: -0.2063 + symbol_density: + mean_density: 0.0758 + mean_distinct_symbol_types: -0.0604 + mean_symbol_count: -0.1504 + vocabulary: + mean_mattr: -0.1338 + mean_raw_ttr: -0.0761 + mean_total_identifiers: -0.1689 + mean_unique_identifiers: -0.2450 + vowel_density: + mean_total_chars: -0.2359 + zipf: + mean_exponent: 0.0102 + mean_r_squared: -0.0067 + mean_total_tokens: -0.1807 + mean_vocab_size: -0.2062 + +line_length_under_120: + _doc: "Lines should be under 120 characters to avoid horizontal scrolling." + _fix_hint: "Wrap lines at 80–120 characters — break long expressions into multiple lines" + _log_baseline: -6.2404 + branching: + mean_branching_density: -0.1942 + mean_non_blank_count: 0.1944 + brevity: + mean_sample_size: -0.0200 + casing_entropy: + mean_entropy: -0.0025 + mean_snake_case_count: 0.0039 + compression: + mean_raw_bytes: 0.0170 + mean_redundancy: 0.0140 + mean_unique_line_ratio: 0.0133 + mean_zlib_bytes: -0.0077 + mean_zlib_ratio: 0.0247 + entropy: + mean_char_entropy: -0.0087 + mean_char_normalized: -0.0076 + mean_token_entropy: -0.0022 + mean_token_max_entropy: -0.0041 + mean_token_normalized: 0.0019 + mean_total_tokens: -0.0030 + mean_vocab_size: -0.0200 + function_metrics: + mean_avg_function_lines: 0.2084 + mean_avg_param_count: -0.0276 + mean_max_function_lines: 0.2570 + mean_max_param_count: -0.0944 + halstead: + mean_N1_total_operators: -0.0033 + mean_N2_total_operands: 0.0022 + mean_difficulty: 0.0219 + mean_effort: 0.0160 + mean_estimated_bugs: -0.0059 + mean_n1_unique_operators: -0.0081 + mean_n2_unique_operands: -0.0278 + mean_time_to_implement_seconds: 0.0160 + mean_vocabulary: -0.0228 + mean_volume: -0.0059 + heaps: + mean_beta: -0.0068 + mean_k: 0.0086 + identifier_length_variance: + mean_mean: -0.0176 + mean_std_dev: -0.0468 + mean_variance: -0.0936 + indentation: + mean_blank_line_ratio: -0.0420 + mean_max_depth: 0.1137 + mean_mean_depth: 0.1254 + mean_variance: 0.2595 + line_patterns: + mean_blank_line_ratio: -0.0420 + mean_string_literal_ratio: -0.0264 + mean_unique_line_ratio: 0.0181 + magic_number_density: + mean_density: 0.0052 + mean_string_literal_ratio: -0.0264 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0477 + ngram: + mean_bigram_hapax_fraction: -0.0141 + mean_bigram_repeated_unique: 0.0257 + mean_bigram_repetition_rate: 0.0141 + mean_bigram_total: -0.0030 + mean_bigram_unique: -0.0113 + mean_trigram_hapax_fraction: 0.0017 + mean_trigram_repeated_unique: -0.0134 + mean_trigram_total: -0.0030 + mean_trigram_unique: -0.0043 + punctuation_density: + mean_bracket_nonalpha_prefix_count: -0.0807 + mean_bracket_nonalpha_suffix_count: -0.1362 + mean_colon_suffix_density: 0.0705 + mean_dot_count: -0.0069 + mean_id_nonalpha_suffix_density: 0.0093 + mean_question_mark_density: 2.0000 + readability: + mean_avg_line_length: -0.1816 + mean_avg_sub_words_per_id: -0.0066 + mean_avg_tokens_per_line: -0.1974 + mean_flesch_adapted: 0.0402 + mean_fog_adapted: -0.2009 + mean_total_lines: 0.1944 + symbol_density: + mean_density: -0.0247 + mean_distinct_symbol_types: -0.0130 + mean_symbol_count: -0.0078 + vocabulary: + mean_mattr: -0.0207 + mean_raw_ttr: -0.0312 + mean_total_identifiers: 0.0036 + mean_unique_identifiers: -0.0276 + vowel_density: + mean_total_chars: -0.0140 + zipf: + mean_exponent: 0.0039 + mean_total_tokens: -0.0030 + mean_vocab_size: -0.0200 + +no_magic_numbers: + _doc: "Numeric literals should be extracted to named constants rather than used inline." + _fix_hint: "Replace literal numbers with named constants or module attributes" + _log_baseline: 107.5222 + branching: + mean_branch_count: -0.4352 + mean_branching_density: -0.9103 + mean_non_blank_count: 0.4762 + brevity: + mean_sample_size: 0.3955 + casing_entropy: + mean_entropy: -0.4640 + mean_snake_case_count: 0.7640 + compression: + mean_raw_bytes: 0.7713 + mean_redundancy: 0.1328 + mean_unique_line_ratio: 0.1073 + mean_zlib_bytes: 0.5072 + mean_zlib_ratio: 0.2642 + entropy: + mean_char_entropy: 0.0481 + mean_char_normalized: 0.0481 + mean_token_entropy: 0.0769 + mean_token_max_entropy: 0.0825 + mean_total_tokens: 0.4877 + mean_vocab_size: 0.3955 + function_metrics: + mean_avg_function_lines: -0.5888 + mean_avg_param_count: -0.1339 + mean_function_count: 0.5327 + mean_max_function_lines: -0.2655 + halstead: + mean_N1_total_operators: 0.1749 + mean_N2_total_operands: 0.4966 + mean_difficulty: -0.0338 + mean_effort: 0.3387 + mean_estimated_bugs: 0.3723 + mean_length: 0.3056 + mean_n1_unique_operators: -0.0901 + mean_n2_unique_operands: 0.4402 + mean_time_to_implement_seconds: 0.3387 + mean_vocabulary: 0.3159 + mean_volume: 0.3724 + heaps: + mean_beta: -0.1294 + mean_k: 0.7952 + mean_r_squared: -0.0645 + identifier_length_variance: + mean_max: 0.2172 + mean_mean: 0.5105 + mean_std_dev: 0.5395 + mean_variance: 1.0791 + indentation: + mean_blank_line_ratio: 0.3137 + mean_mean_depth: -0.4612 + mean_variance: -0.5503 + line_patterns: + mean_blank_line_ratio: 0.3137 + mean_string_literal_ratio: -0.5060 + mean_unique_line_ratio: 0.1502 + magic_number_density: + mean_density: -1.2903 + mean_magic_number_count: -0.8032 + mean_string_literal_ratio: -0.5060 + near_duplicate_blocks_file: + mean_sub_block_count: 0.3466 + ngram: + mean_bigram_hapax_fraction: -0.1520 + mean_bigram_repeated_unique: 0.7630 + mean_bigram_repetition_rate: 0.1469 + mean_bigram_total: 0.4887 + mean_bigram_unique: 0.4248 + mean_trigram_hapax_fraction: 0.0849 + mean_trigram_repeated_unique: 0.0415 + mean_trigram_repetition_rate: -0.2233 + mean_trigram_total: 0.4896 + mean_trigram_unique: 0.5215 + punctuation_density: + mean_arrow_density: -1.4573 + mean_bracket_nonalpha_suffix_count: 0.5999 + mean_colon_suffix_density: 0.5811 + mean_id_nonalpha_suffix_density: -0.1238 + mean_question_mark_density: -0.8032 + readability: + mean_avg_line_length: 0.3048 + mean_avg_sub_words_per_id: 0.3883 + mean_flesch_adapted: -0.7069 + mean_fog_adapted: 2.0000 + mean_total_lines: 0.4762 + symbol_density: + mean_density: -0.3071 + mean_symbol_count: 0.4654 + vocabulary: + mean_mattr: 0.3317 + mean_total_identifiers: 0.6600 + mean_unique_identifiers: 0.6581 + vowel_density: + mean_total_chars: 1.1705 + zipf: + mean_exponent: -0.1353 + mean_r_squared: -0.0320 + mean_total_tokens: 0.4877 + mean_vocab_size: 0.3955 + +single_responsibility: + _doc: "Each file should have one primary concern — low complexity spread across few, focused functions." + _fix_hint: "Split the module — each file should have one primary purpose" + _log_baseline: -35.4996 + branching: + mean_branch_count: -0.0678 + mean_branching_density: 0.1364 + mean_max_nesting_depth: -0.1093 + mean_non_blank_count: -0.2043 + brevity: + mean_sample_size: -0.0864 + casing_entropy: + mean_other_count: -0.7475 + mean_pascal_case_count: 0.0470 + mean_snake_case_count: -0.1816 + compression: + mean_raw_bytes: -0.1908 + mean_redundancy: -0.0351 + mean_unique_line_ratio: 0.0316 + mean_zlib_bytes: -0.1293 + mean_zlib_ratio: -0.0616 + entropy: + mean_char_entropy: 0.0078 + mean_char_max_entropy: -0.0021 + mean_char_normalized: 0.0099 + mean_token_entropy: 0.0014 + mean_token_max_entropy: -0.0182 + mean_token_normalized: 0.0196 + mean_total_tokens: -0.1489 + mean_vocab_size: -0.0864 + function_metrics: + mean_avg_function_lines: 0.1696 + mean_avg_param_count: -0.0805 + mean_function_count: -0.4114 + mean_max_param_count: -0.2962 + halstead: + mean_N1_total_operators: -0.1395 + mean_N2_total_operands: -0.1701 + mean_difficulty: 0.0527 + mean_effort: -0.1183 + mean_estimated_bugs: -0.1710 + mean_length: -0.1498 + mean_n1_unique_operators: 0.0541 + mean_n2_unique_operands: -0.1687 + mean_time_to_implement_seconds: -0.1183 + mean_vocabulary: -0.0965 + mean_volume: -0.1710 + heaps: + mean_beta: -0.0154 + mean_k: 0.0801 + mean_r_squared: -0.0163 + identifier_length_variance: + mean_max: -0.0836 + mean_mean: -0.0346 + mean_std_dev: -0.0825 + mean_variance: -0.1650 + indentation: + mean_blank_line_ratio: 0.0458 + mean_mean_depth: -0.0476 + mean_variance: -0.0931 + line_patterns: + mean_blank_line_ratio: 0.0458 + mean_max_nesting_depth: -0.1093 + mean_string_literal_ratio: -0.1759 + mean_unique_line_ratio: 0.0324 + magic_number_density: + mean_density: 0.1469 + mean_string_literal_ratio: -0.1759 + near_duplicate_blocks_file: + mean_sub_block_count: -0.1348 + ngram: + mean_bigram_hapax_fraction: 0.0075 + mean_bigram_repeated_unique: -0.1303 + mean_bigram_repetition_rate: -0.0207 + mean_bigram_total: -0.1492 + mean_bigram_unique: -0.1162 + mean_trigram_hapax_fraction: 0.0132 + mean_trigram_repeated_unique: -0.1793 + mean_trigram_repetition_rate: -0.0466 + mean_trigram_total: -0.1495 + mean_trigram_unique: -0.1273 + punctuation_density: + mean_arrow_density: -0.1462 + mean_bracket_nonalpha_prefix_count: -0.0859 + mean_bracket_nonalpha_suffix_count: -0.4201 + mean_colon_suffix_density: -0.4720 + mean_dot_count: -0.0630 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: -0.0499 + mean_question_mark_density: 0.4596 + readability: + mean_avg_line_length: 0.0137 + mean_avg_sub_words_per_id: -0.0377 + mean_avg_tokens_per_line: 0.0553 + mean_flesch_adapted: 0.0348 + mean_fog_adapted: -0.0587 + mean_total_lines: -0.2043 + symbol_density: + mean_density: 0.0683 + mean_distinct_symbol_types: 0.0284 + mean_symbol_count: -0.1225 + vocabulary: + mean_mattr: -0.0474 + mean_raw_ttr: 0.0249 + mean_total_identifiers: -0.1662 + mean_unique_identifiers: -0.1414 + vowel_density: + mean_total_chars: -0.2009 + zipf: + mean_exponent: -0.0209 + mean_r_squared: -0.0043 + mean_total_tokens: -0.1489 + mean_vocab_size: -0.0864 + +uses_standard_indentation_width: + _doc: "Indentation should use consistent multiples of 2 or 4 spaces throughout the file." + _fix_hint: "Use the project-standard 2-space indentation throughout" + _log_baseline: -17.9172 + compression: + mean_raw_bytes: -0.2512 + mean_redundancy: -0.0906 + mean_zlib_bytes: -0.0351 + mean_zlib_ratio: -0.2161 + entropy: + mean_char_entropy: 0.1510 + mean_char_normalized: 0.1510 + function_metrics: + mean_avg_function_lines: 0.0361 + indentation: + mean_blank_line_ratio: 0.2077 + mean_max_depth: -1.0000 + mean_mean_depth: -1.0000 + mean_variance: -2.0000 + line_patterns: + mean_blank_line_ratio: 0.2077 + punctuation_density: + mean_exclamation_density: 0.2630 + mean_question_mark_density: 0.2630 + readability: + mean_avg_line_length: -0.2644 + symbol_density: + mean_density: 0.2512 + diff --git a/priv/combined_metrics/function_design.yml b/priv/combined_metrics/function_design.yml new file mode 100644 index 0000000..6824ebb --- /dev/null +++ b/priv/combined_metrics/function_design.yml @@ -0,0 +1,820 @@ +boolean_function_has_question_mark: + _doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)." + _fix_hint: "Add a ? suffix to boolean-returning functions (e.g., valid? instead of is_valid)" + _log_baseline: 7.0991 + brevity: + mean_sample_size: 0.0085 + compression: + mean_raw_bytes: 0.0063 + mean_zlib_bytes: 0.0085 + entropy: + mean_char_entropy: 0.0045 + mean_char_max_entropy: 0.0028 + mean_token_entropy: 0.0034 + mean_total_tokens: 0.0248 + mean_vocab_size: 0.0085 + heaps: + mean_beta: -0.0104 + mean_k: 0.0270 + ngram: + mean_bigram_hapax_fraction: -0.0039 + mean_bigram_repeated_unique: 0.0190 + mean_bigram_repetition_rate: 0.0261 + mean_bigram_total: 0.0250 + mean_bigram_unique: 0.0039 + mean_trigram_repeated_unique: 0.0300 + mean_trigram_repetition_rate: 0.0180 + mean_trigram_total: 0.0251 + mean_trigram_unique: 0.0207 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 1.0697 + mean_exclamation_density: -0.0344 + mean_id_nonalpha_suffix_density: -0.0248 + mean_question_mark_density: -2.0000 + readability: + mean_avg_line_length: 0.0067 + mean_avg_tokens_per_line: 0.0248 + mean_fog_adapted: 0.0248 + symbol_density: + mean_density: 0.0490 + mean_distinct_symbol_types: 0.0404 + mean_symbol_count: 0.0554 + zipf: + mean_exponent: 0.0072 + mean_total_tokens: 0.0248 + mean_vocab_size: 0.0085 + +cyclomatic_complexity_under_10: + _doc: "Functions should have a cyclomatic complexity under 10." + _fix_hint: "Reduce branching — extract complex conditionals into helper functions" + _log_baseline: -1.6476 + branching: + mean_branch_count: -0.2373 + mean_branching_density: -0.1952 + mean_non_blank_count: -0.0421 + casing_entropy: + mean_entropy: 0.1030 + mean_other_count: 0.3306 + mean_snake_case_count: 0.0284 + compression: + mean_raw_bytes: -0.0162 + mean_redundancy: -0.0172 + mean_unique_line_ratio: -0.0305 + mean_zlib_bytes: 0.0186 + mean_zlib_ratio: -0.0347 + entropy: + mean_char_entropy: 0.0349 + mean_char_max_entropy: 0.0016 + mean_char_normalized: 0.0333 + mean_token_entropy: -0.0050 + mean_token_normalized: -0.0050 + mean_total_tokens: 0.0437 + function_metrics: + mean_avg_function_lines: -0.4757 + mean_function_count: 0.4636 + mean_max_function_lines: -0.5038 + halstead: + mean_N1_total_operators: 0.0708 + mean_N2_total_operands: 0.0358 + mean_difficulty: 0.0472 + mean_effort: 0.1087 + mean_estimated_bugs: 0.0615 + mean_length: 0.0605 + mean_n1_unique_operators: 0.0114 + mean_time_to_implement_seconds: 0.1087 + mean_vocabulary: 0.0043 + mean_volume: 0.0615 + heaps: + mean_beta: -0.0367 + mean_k: 0.0672 + mean_r_squared: 0.0049 + identifier_length_variance: + mean_mean: 0.0092 + mean_std_dev: 0.0209 + mean_variance: 0.0418 + indentation: + mean_blank_line_ratio: 0.1655 + mean_max_depth: -0.2086 + mean_mean_depth: -0.2901 + mean_variance: -0.4637 + line_patterns: + mean_blank_line_ratio: 0.1655 + mean_string_literal_ratio: -0.0439 + mean_unique_line_ratio: 0.0055 + magic_number_density: + mean_density: -0.0329 + mean_string_literal_ratio: -0.0439 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0994 + ngram: + mean_bigram_hapax_fraction: -0.0068 + mean_bigram_repeated_unique: 0.0301 + mean_bigram_repetition_rate: 0.0115 + mean_bigram_total: 0.0438 + mean_bigram_unique: 0.0192 + mean_trigram_hapax_fraction: -0.0027 + mean_trigram_repeated_unique: 0.0456 + mean_trigram_repetition_rate: 0.0055 + mean_trigram_total: 0.0440 + mean_trigram_unique: 0.0388 + punctuation_density: + mean_arrow_density: -0.4960 + mean_bracket_nonalpha_prefix_count: 0.2488 + mean_bracket_nonalpha_suffix_count: 0.3306 + mean_colon_suffix_density: 0.2760 + mean_dot_count: -0.3005 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: 0.0616 + readability: + mean_avg_line_length: 0.0257 + mean_avg_sub_words_per_id: 0.0067 + mean_avg_tokens_per_line: 0.0858 + mean_flesch_adapted: -0.0140 + mean_fog_adapted: 0.1035 + mean_total_lines: -0.0421 + symbol_density: + mean_density: 0.0552 + mean_distinct_symbol_types: -0.0172 + mean_symbol_count: 0.0391 + vocabulary: + mean_mattr: -0.0325 + mean_raw_ttr: -0.0309 + mean_total_identifiers: 0.0382 + mean_unique_identifiers: 0.0074 + vowel_density: + mean_total_chars: 0.0474 + zipf: + mean_exponent: 0.0120 + mean_r_squared: 0.0057 + mean_total_tokens: 0.0437 + +has_verb_in_name: + _doc: "Function names should contain a verb describing the action performed." + _fix_hint: "Start function names with a verb (get_, fetch_, build_, compute_, validate_)" + _log_baseline: 15.9117 + compression: + mean_raw_bytes: 0.0990 + mean_redundancy: -0.0473 + mean_zlib_bytes: 0.2439 + mean_zlib_ratio: -0.1449 + identifier_length_variance: + mean_max: 0.9396 + mean_mean: 0.1989 + mean_std_dev: 1.0001 + mean_variance: 2.0000 + punctuation_density: + mean_exclamation_density: -0.1305 + readability: + mean_avg_line_length: 0.1026 + mean_avg_sub_words_per_id: 0.1614 + mean_flesch_adapted: -0.1605 + mean_fog_adapted: 1.6084 + symbol_density: + mean_density: -0.1005 + vowel_density: + mean_total_chars: 0.1989 + +is_less_than_20_lines: + _doc: "Functions should be 20 lines or fewer." + _fix_hint: "Split long functions — each function should fit on one screen (under 20 lines)" + _log_baseline: 23.1945 + branching: + mean_branch_count: -0.0820 + mean_branching_density: -0.1010 + mean_max_nesting_depth: -0.1156 + mean_non_blank_count: 0.0188 + brevity: + mean_sample_size: 0.0165 + casing_entropy: + mean_entropy: 0.0640 + mean_other_count: 0.6266 + mean_pascal_case_count: 0.0440 + mean_snake_case_count: 0.0800 + compression: + mean_raw_bytes: 0.0746 + mean_redundancy: 0.0227 + mean_unique_line_ratio: -0.0334 + mean_zlib_bytes: 0.0366 + mean_zlib_ratio: 0.0379 + entropy: + mean_char_entropy: 0.0020 + mean_token_entropy: -0.0041 + mean_token_max_entropy: 0.0035 + mean_token_normalized: -0.0076 + mean_total_tokens: 0.0759 + mean_vocab_size: 0.0165 + function_metrics: + mean_avg_function_lines: -0.3598 + mean_avg_param_count: 0.1156 + mean_function_count: 0.3705 + mean_max_function_lines: -0.4532 + mean_max_param_count: 0.0820 + halstead: + mean_N1_total_operators: 0.0857 + mean_N2_total_operands: 0.0965 + mean_difficulty: 0.0624 + mean_effort: 0.1550 + mean_estimated_bugs: 0.0926 + mean_length: 0.0895 + mean_n1_unique_operators: -0.0097 + mean_n2_unique_operands: 0.0245 + mean_time_to_implement_seconds: 0.1550 + mean_vocabulary: 0.0143 + mean_volume: 0.0926 + heaps: + mean_k: -0.0254 + identifier_length_variance: + mean_mean: 0.0149 + mean_std_dev: 0.0313 + mean_variance: 0.0625 + indentation: + mean_blank_line_ratio: -0.0440 + mean_mean_depth: -0.0962 + mean_variance: -0.1115 + line_patterns: + mean_blank_line_ratio: -0.0440 + mean_max_nesting_depth: -0.1156 + mean_string_literal_ratio: -0.0774 + mean_unique_line_ratio: -0.0188 + magic_number_density: + mean_density: 0.0389 + mean_magic_number_count: 0.1156 + mean_string_literal_ratio: -0.0774 + near_duplicate_blocks_file: + mean_sub_block_count: 0.1886 + ngram: + mean_bigram_hapax_fraction: -0.0508 + mean_bigram_repeated_unique: 0.1067 + mean_bigram_repetition_rate: 0.0562 + mean_bigram_total: 0.0760 + mean_bigram_unique: 0.0228 + mean_trigram_hapax_fraction: -0.0300 + mean_trigram_repeated_unique: 0.1516 + mean_trigram_repetition_rate: 0.1014 + mean_trigram_total: 0.0761 + mean_trigram_unique: 0.0386 + punctuation_density: + mean_arrow_density: -0.3892 + mean_bracket_nonalpha_prefix_count: 0.0418 + mean_bracket_nonalpha_suffix_count: 0.0476 + mean_colon_suffix_density: 0.0941 + mean_dot_count: 0.0717 + mean_exclamation_density: -0.0820 + mean_id_nonalpha_suffix_density: 0.0518 + mean_question_mark_density: -2.0000 + readability: + mean_avg_line_length: 0.0576 + mean_avg_sub_words_per_id: -0.0034 + mean_avg_tokens_per_line: 0.0570 + mean_flesch_adapted: -0.0039 + mean_fog_adapted: 0.0868 + mean_total_lines: 0.0188 + symbol_density: + mean_density: -0.0084 + mean_distinct_symbol_types: 0.0127 + mean_symbol_count: 0.0662 + vocabulary: + mean_mattr: -0.0368 + mean_raw_ttr: -0.0647 + mean_total_identifiers: 0.0864 + mean_unique_identifiers: 0.0218 + vowel_density: + mean_total_chars: 0.1013 + zipf: + mean_exponent: 0.0225 + mean_r_squared: 0.0030 + mean_total_tokens: 0.0759 + mean_vocab_size: 0.0165 + +nesting_depth_under_4: + _doc: "Code should not nest deeper than 4 levels." + _fix_hint: "Reduce nesting — use early returns, guards, or extract inner blocks" + _log_baseline: 1.1322 + branching: + mean_branch_count: -0.3267 + mean_branching_density: -0.2061 + mean_max_nesting_depth: 0.2061 + mean_non_blank_count: -0.1206 + brevity: + mean_sample_size: 0.0178 + casing_entropy: + mean_entropy: -0.0358 + mean_other_count: 0.2917 + mean_pascal_case_count: -0.2725 + mean_snake_case_count: 0.1015 + compression: + mean_raw_bytes: -0.0069 + mean_redundancy: -0.0076 + mean_unique_line_ratio: -0.0028 + mean_zlib_bytes: 0.0117 + mean_zlib_ratio: -0.0186 + entropy: + mean_char_entropy: 0.0715 + mean_char_max_entropy: -0.0017 + mean_char_normalized: 0.0732 + mean_token_entropy: -0.0118 + mean_token_max_entropy: 0.0042 + mean_token_normalized: -0.0161 + mean_total_tokens: 0.1047 + mean_vocab_size: 0.0178 + function_metrics: + mean_avg_function_lines: -0.6349 + mean_function_count: 0.5787 + mean_max_function_lines: -0.3375 + halstead: + mean_N1_total_operators: 0.1525 + mean_N2_total_operands: 0.0950 + mean_difficulty: 0.0991 + mean_effort: 0.2385 + mean_estimated_bugs: 0.1394 + mean_length: 0.1347 + mean_n1_unique_operators: 0.0229 + mean_n2_unique_operands: 0.0188 + mean_time_to_implement_seconds: 0.2385 + mean_vocabulary: 0.0202 + mean_volume: 0.1394 + heaps: + mean_beta: -0.0464 + mean_k: 0.0845 + identifier_length_variance: + mean_mean: 0.0488 + mean_std_dev: 0.1849 + mean_variance: 0.3698 + indentation: + mean_blank_line_ratio: 0.5622 + mean_max_depth: -0.3155 + mean_mean_depth: -0.3651 + mean_variance: -0.6050 + line_patterns: + mean_blank_line_ratio: 0.5622 + mean_max_nesting_depth: 0.2061 + mean_string_literal_ratio: -0.1046 + mean_unique_line_ratio: 0.0786 + magic_number_density: + mean_string_literal_ratio: -0.1046 + near_duplicate_blocks_file: + mean_sub_block_count: 0.1999 + ngram: + mean_bigram_hapax_fraction: -0.0645 + mean_bigram_repeated_unique: 0.1420 + mean_bigram_repetition_rate: 0.0534 + mean_bigram_total: 0.1049 + mean_bigram_unique: 0.0292 + mean_trigram_hapax_fraction: -0.0170 + mean_trigram_repeated_unique: 0.1274 + mean_trigram_repetition_rate: 0.0457 + mean_trigram_total: 0.1052 + mean_trigram_unique: 0.0686 + punctuation_density: + mean_arrow_density: 0.9701 + mean_bracket_nonalpha_prefix_count: 0.1748 + mean_bracket_nonalpha_suffix_count: 0.9451 + mean_colon_suffix_density: 0.8804 + mean_dot_count: -0.2520 + mean_exclamation_density: 2.0000 + mean_id_nonalpha_suffix_density: 0.0946 + mean_question_mark_density: -0.1977 + readability: + mean_avg_line_length: 0.1152 + mean_avg_sub_words_per_id: 0.0220 + mean_avg_tokens_per_line: 0.2252 + mean_flesch_adapted: -0.0374 + mean_fog_adapted: 0.2252 + mean_total_lines: -0.1206 + symbol_density: + mean_density: 0.1426 + mean_symbol_count: 0.1355 + vocabulary: + mean_mattr: -0.0321 + mean_raw_ttr: -0.0555 + mean_total_identifiers: 0.0987 + mean_unique_identifiers: 0.0432 + vowel_density: + mean_total_chars: 0.1475 + zipf: + mean_exponent: 0.0250 + mean_r_squared: 0.0156 + mean_total_tokens: 0.1047 + mean_vocab_size: 0.0178 + +no_boolean_parameter: + _doc: "Functions should not take boolean parameters — a flag usually means the function does two things." + _fix_hint: "Replace boolean parameters with two separate functions or use an options map" + _log_baseline: 3.0928 + branching: + mean_branch_count: -2.0000 + mean_branching_density: 1.0271 + mean_max_nesting_depth: -0.3263 + mean_non_blank_count: -0.0383 + brevity: + mean_sample_size: -0.0253 + casing_entropy: + mean_entropy: 0.0155 + mean_pascal_case_count: 0.1180 + mean_snake_case_count: 0.0762 + compression: + mean_raw_bytes: 0.0435 + mean_redundancy: 0.0777 + mean_unique_line_ratio: -0.0656 + mean_zlib_bytes: -0.1055 + mean_zlib_ratio: 0.1490 + entropy: + mean_char_entropy: 0.0152 + mean_char_normalized: 0.0153 + mean_token_entropy: -0.0129 + mean_token_max_entropy: -0.0055 + mean_token_normalized: -0.0073 + mean_total_tokens: 0.0692 + mean_vocab_size: -0.0253 + function_metrics: + mean_avg_function_lines: -0.3850 + mean_avg_param_count: -0.2935 + mean_function_count: 0.4338 + mean_max_function_lines: -0.5579 + halstead: + mean_N1_total_operators: 0.0393 + mean_N2_total_operands: 0.0832 + mean_difficulty: 0.0207 + mean_effort: 0.0660 + mean_estimated_bugs: 0.0453 + mean_length: 0.0543 + mean_n1_unique_operators: -0.0806 + mean_n2_unique_operands: -0.0181 + mean_time_to_implement_seconds: 0.0660 + mean_vocabulary: -0.0374 + mean_volume: 0.0453 + heaps: + mean_beta: -0.0314 + mean_k: 0.0620 + identifier_length_variance: + mean_mean: 0.0178 + mean_std_dev: 0.1693 + mean_variance: 0.3386 + indentation: + mean_blank_line_ratio: 0.4402 + mean_max_depth: -0.5579 + mean_mean_depth: -0.2880 + mean_variance: -0.8414 + line_patterns: + mean_blank_line_ratio: 0.4402 + mean_max_nesting_depth: -0.3263 + mean_string_literal_ratio: 0.0206 + mean_unique_line_ratio: 0.0101 + magic_number_density: + mean_string_literal_ratio: 0.0206 + near_duplicate_blocks_file: + mean_sub_block_count: 0.2775 + ngram: + mean_bigram_hapax_fraction: -0.1940 + mean_bigram_repeated_unique: 0.1467 + mean_bigram_repetition_rate: 0.1504 + mean_bigram_total: 0.0694 + mean_bigram_unique: -0.1127 + mean_trigram_hapax_fraction: -0.2208 + mean_trigram_repeated_unique: 0.3783 + mean_trigram_repetition_rate: 0.3150 + mean_trigram_total: 0.0695 + mean_trigram_unique: -0.1019 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 0.1152 + mean_bracket_nonalpha_suffix_count: 0.1075 + mean_colon_suffix_density: -0.2793 + mean_dot_count: 0.1538 + mean_exclamation_density: -0.0435 + mean_id_nonalpha_suffix_density: 0.0357 + readability: + mean_avg_line_length: 0.0817 + mean_avg_sub_words_per_id: 0.0160 + mean_avg_tokens_per_line: 0.1075 + mean_flesch_adapted: -0.0254 + mean_fog_adapted: 0.2928 + mean_total_lines: -0.0383 + symbol_density: + mean_density: 0.0479 + mean_symbol_count: 0.0916 + vocabulary: + mean_mattr: -0.0611 + mean_raw_ttr: -0.0924 + mean_total_identifiers: 0.0811 + mean_unique_identifiers: -0.0114 + vowel_density: + mean_total_chars: 0.0989 + zipf: + mean_exponent: 0.0374 + mean_total_tokens: 0.0692 + mean_vocab_size: -0.0253 + +no_magic_numbers: + _doc: "Numeric literals should be named constants, not inline magic numbers." + _fix_hint: "Replace magic numbers inside functions with named module attributes or constants" + _log_baseline: 48.6069 + branching: + mean_branch_count: -0.2708 + mean_branching_density: -0.1682 + mean_non_blank_count: -0.1029 + brevity: + mean_sample_size: 0.1527 + casing_entropy: + mean_entropy: -0.2876 + mean_snake_case_count: 0.4222 + compression: + mean_raw_bytes: 0.3823 + mean_redundancy: 0.0584 + mean_unique_line_ratio: 0.2269 + mean_zlib_bytes: 0.2473 + mean_zlib_ratio: 0.1350 + entropy: + mean_char_entropy: 0.0661 + mean_char_normalized: 0.0624 + mean_token_entropy: 0.0148 + mean_token_max_entropy: 0.0355 + mean_token_normalized: -0.0207 + mean_total_tokens: 0.2834 + mean_vocab_size: 0.1527 + function_metrics: + mean_avg_function_lines: -0.8758 + mean_function_count: 0.4111 + halstead: + mean_N1_total_operators: 0.1953 + mean_N2_total_operands: 0.2960 + mean_difficulty: 0.0408 + mean_effort: 0.3105 + mean_estimated_bugs: 0.2698 + mean_length: 0.2359 + mean_n1_unique_operators: -0.0413 + mean_n2_unique_operands: 0.2139 + mean_time_to_implement_seconds: 0.3105 + mean_vocabulary: 0.1447 + mean_volume: 0.2697 + heaps: + mean_beta: -0.1129 + mean_k: 0.5236 + mean_r_squared: -0.0256 + identifier_length_variance: + mean_max: 0.0987 + mean_mean: 0.3701 + mean_std_dev: 0.3918 + mean_variance: 0.7835 + indentation: + mean_blank_line_ratio: 0.2374 + mean_mean_depth: -0.3518 + mean_variance: -0.4760 + line_patterns: + mean_blank_line_ratio: 0.2374 + mean_string_literal_ratio: -0.2880 + mean_unique_line_ratio: 0.2337 + magic_number_density: + mean_density: -0.2831 + mean_string_literal_ratio: -0.2880 + near_duplicate_blocks_file: + mean_sub_block_count: 0.2708 + ngram: + mean_bigram_hapax_fraction: -0.1437 + mean_bigram_repeated_unique: 0.4787 + mean_bigram_repetition_rate: 0.1545 + mean_bigram_total: 0.2844 + mean_bigram_unique: 0.1437 + mean_trigram_hapax_fraction: -0.0207 + mean_trigram_repeated_unique: 0.2787 + mean_trigram_repetition_rate: 0.1465 + mean_trigram_total: 0.2854 + mean_trigram_unique: 0.1843 + punctuation_density: + mean_arrow_density: -1.1699 + mean_bracket_nonalpha_suffix_count: 2.0000 + mean_colon_suffix_density: 1.9476 + mean_id_nonalpha_suffix_density: 0.3448 + readability: + mean_avg_line_length: 0.5035 + mean_avg_sub_words_per_id: 0.2699 + mean_avg_tokens_per_line: 0.3863 + mean_flesch_adapted: -0.3819 + mean_fog_adapted: 1.0656 + mean_total_lines: -0.1029 + symbol_density: + mean_density: -0.0314 + mean_distinct_symbol_types: 0.0644 + mean_symbol_count: 0.3512 + vocabulary: + mean_mattr: 0.0054 + mean_raw_ttr: -0.0161 + mean_total_identifiers: 0.3860 + mean_unique_identifiers: 0.3699 + vowel_density: + mean_total_chars: 0.7561 + zipf: + mean_exponent: 0.0164 + mean_r_squared: 0.0321 + mean_total_tokens: 0.2834 + mean_vocab_size: 0.1527 + +parameter_count_under_4: + _doc: "Functions should take fewer than 4 parameters." + _fix_hint: "Reduce parameter count — group related params into a struct or options map" + _log_baseline: 1.6218 + branching: + mean_non_blank_count: 0.0967 + brevity: + mean_sample_size: 0.0261 + casing_entropy: + mean_entropy: 0.5987 + mean_other_count: 0.5408 + mean_pascal_case_count: 0.2329 + mean_snake_case_count: -0.0580 + compression: + mean_raw_bytes: -0.0343 + mean_redundancy: -0.0308 + mean_unique_line_ratio: -0.0166 + mean_zlib_bytes: 0.0291 + mean_zlib_ratio: -0.0634 + entropy: + mean_char_entropy: 0.0082 + mean_char_max_entropy: 0.0175 + mean_char_normalized: -0.0093 + mean_token_entropy: 0.0206 + mean_token_max_entropy: 0.0063 + mean_token_normalized: 0.0144 + mean_total_tokens: -0.0335 + mean_vocab_size: 0.0261 + function_metrics: + mean_avg_function_lines: 0.1262 + mean_avg_param_count: -0.3179 + mean_function_count: -0.0320 + mean_max_function_lines: 0.2037 + mean_max_param_count: -0.2847 + halstead: + mean_N1_total_operators: -0.0264 + mean_N2_total_operands: -0.0507 + mean_difficulty: 0.0180 + mean_effort: -0.0147 + mean_estimated_bugs: -0.0326 + mean_length: -0.0353 + mean_n1_unique_operators: 0.0613 + mean_n2_unique_operands: -0.0074 + mean_time_to_implement_seconds: -0.0147 + mean_vocabulary: 0.0108 + mean_volume: -0.0327 + heaps: + mean_beta: 0.0179 + mean_k: -0.0082 + mean_r_squared: -0.0062 + identifier_length_variance: + mean_mean: -0.0044 + mean_std_dev: -0.0221 + mean_variance: -0.0442 + indentation: + mean_blank_line_ratio: 0.0518 + mean_max_depth: 0.1362 + mean_mean_depth: 0.0506 + mean_variance: 0.1451 + line_patterns: + mean_blank_line_ratio: 0.0518 + mean_string_literal_ratio: 0.1674 + mean_unique_line_ratio: -0.0137 + magic_number_density: + mean_density: -2.0000 + mean_string_literal_ratio: 0.1674 + ngram: + mean_bigram_hapax_fraction: 0.0479 + mean_bigram_repeated_unique: -0.0222 + mean_bigram_repetition_rate: -0.0480 + mean_bigram_total: -0.0336 + mean_bigram_unique: 0.0376 + mean_trigram_hapax_fraction: 0.0610 + mean_trigram_repeated_unique: -0.1263 + mean_trigram_repetition_rate: -0.1619 + mean_trigram_total: -0.0337 + mean_trigram_unique: 0.0524 + punctuation_density: + mean_bracket_nonalpha_prefix_count: -0.0967 + mean_bracket_nonalpha_suffix_count: -0.1131 + mean_colon_suffix_density: 0.0056 + mean_dot_count: 0.9099 + mean_id_nonalpha_suffix_density: -0.0434 + readability: + mean_avg_line_length: -0.1345 + mean_avg_sub_words_per_id: -0.0093 + mean_avg_tokens_per_line: -0.1302 + mean_flesch_adapted: 0.0271 + mean_fog_adapted: -0.1290 + mean_total_lines: 0.0967 + symbol_density: + mean_density: 0.0124 + mean_distinct_symbol_types: 0.1042 + mean_symbol_count: -0.0218 + vocabulary: + mean_mattr: 0.0175 + mean_raw_ttr: 0.0416 + mean_total_identifiers: -0.0416 + vowel_density: + mean_total_chars: -0.0460 + zipf: + mean_exponent: 0.0101 + mean_r_squared: -0.0074 + mean_total_tokens: -0.0335 + mean_vocab_size: 0.0261 + +uses_ternary_expression: + _doc: "Simple conditional assignments should use inline expressions rather than full if-blocks." + _fix_hint: "Replace verbose if-else blocks with concise ternary/conditional expressions where readable" + _log_baseline: -0.3649 + branching: + mean_branch_count: -0.4693 + mean_branching_density: 0.1280 + mean_non_blank_count: -0.5975 + brevity: + mean_sample_size: 0.0107 + casing_entropy: + mean_entropy: -0.0141 + mean_snake_case_count: 0.0271 + compression: + mean_raw_bytes: -0.0924 + mean_redundancy: -0.0709 + mean_unique_line_ratio: 0.1809 + mean_zlib_bytes: 0.0167 + mean_zlib_ratio: -0.1090 + entropy: + mean_char_entropy: 0.0749 + mean_char_normalized: 0.0717 + mean_token_entropy: -0.0088 + mean_token_normalized: -0.0114 + mean_total_tokens: 0.0969 + mean_vocab_size: 0.0107 + function_metrics: + mean_avg_function_lines: -0.7654 + mean_function_count: 0.2745 + mean_max_function_lines: -0.4693 + halstead: + mean_N1_total_operators: 0.1767 + mean_N2_total_operands: 0.0622 + mean_difficulty: 0.0952 + mean_effort: 0.2409 + mean_estimated_bugs: 0.1456 + mean_length: 0.1429 + mean_n1_unique_operators: 0.0330 + mean_time_to_implement_seconds: 0.2409 + mean_vocabulary: 0.0114 + mean_volume: 0.1457 + heaps: + mean_beta: -0.0340 + mean_k: 0.0670 + identifier_length_variance: + mean_mean: 0.0616 + mean_std_dev: 0.0617 + mean_variance: 0.1234 + indentation: + mean_blank_line_ratio: 0.5702 + mean_max_depth: -0.2745 + mean_mean_depth: -0.3658 + mean_variance: -0.6153 + line_patterns: + mean_blank_line_ratio: 0.5702 + mean_string_literal_ratio: -0.0964 + mean_unique_line_ratio: 0.1839 + magic_number_density: + mean_density: -0.0969 + mean_string_literal_ratio: -0.0964 + near_duplicate_blocks_file: + mean_sub_block_count: 0.2745 + ngram: + mean_bigram_hapax_fraction: -0.0616 + mean_bigram_repeated_unique: 0.2415 + mean_bigram_repetition_rate: 0.0886 + mean_bigram_total: 0.0973 + mean_bigram_unique: 0.0487 + mean_trigram_hapax_fraction: -0.0186 + mean_trigram_repeated_unique: 0.1511 + mean_trigram_repetition_rate: 0.1328 + mean_trigram_total: 0.0977 + mean_trigram_unique: 0.0452 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 0.3060 + mean_bracket_nonalpha_suffix_count: 0.5928 + mean_bracket_number_pair_count: 0.4693 + mean_colon_suffix_density: 2.0000 + mean_dot_count: -1.3176 + mean_id_nonalpha_suffix_density: 0.2152 + readability: + mean_avg_line_length: 0.5254 + mean_avg_sub_words_per_id: 0.0352 + mean_avg_tokens_per_line: 0.6944 + mean_flesch_adapted: -0.0769 + mean_fog_adapted: 0.6047 + mean_total_lines: -0.5975 + symbol_density: + mean_density: 0.3573 + mean_distinct_symbol_types: 0.0410 + mean_symbol_count: 0.2651 + vocabulary: + mean_mattr: -0.0421 + mean_raw_ttr: -0.0421 + mean_total_identifiers: 0.0230 + mean_unique_identifiers: -0.0191 + vowel_density: + mean_total_chars: 0.0845 + zipf: + mean_exponent: 0.0361 + mean_r_squared: 0.0150 + mean_total_tokens: 0.0969 + mean_vocab_size: 0.0107 + diff --git a/priv/combined_metrics/naming_conventions.yml b/priv/combined_metrics/naming_conventions.yml new file mode 100644 index 0000000..33e5d8a --- /dev/null +++ b/priv/combined_metrics/naming_conventions.yml @@ -0,0 +1,266 @@ +class_name_is_noun: + _doc: "Class and module names should be nouns describing what they represent, not verbs or gerunds." + _fix_hint: "Name modules/classes with nouns (User, OrderProcessor) not verbs" + _log_baseline: 4.2909 + brevity: + mean_sample_size: 0.7106 + compression: + mean_raw_bytes: 0.1346 + mean_redundancy: -0.0605 + mean_zlib_bytes: 0.2139 + mean_zlib_ratio: -0.0794 + entropy: + mean_token_entropy: 0.1236 + mean_token_max_entropy: 0.1716 + mean_token_normalized: -0.0484 + mean_vocab_size: 0.7106 + halstead: + mean_difficulty: -1.1493 + mean_effort: -0.9669 + mean_estimated_bugs: 0.1818 + mean_n2_unique_operands: 1.1492 + mean_time_to_implement_seconds: -0.9669 + mean_vocabulary: 0.7462 + mean_volume: 0.1823 + heaps: + mean_beta: 0.4086 + mean_k: -0.6266 + identifier_length_variance: + mean_max: -0.4031 + mean_mean: 0.3059 + mean_std_dev: -0.5093 + mean_variance: -1.0187 + ngram: + mean_bigram_hapax_fraction: 0.2542 + mean_bigram_repeated_unique: -0.5967 + mean_bigram_repetition_rate: -0.9599 + mean_bigram_unique: 0.6173 + mean_trigram_hapax_fraction: 0.2449 + mean_trigram_repeated_unique: -2.0000 + mean_trigram_repetition_rate: -1.9547 + mean_trigram_unique: 0.6002 + punctuation_density: + mean_exclamation_density: -0.3314 + readability: + mean_avg_line_length: 0.1418 + symbol_density: + mean_density: -0.1381 + vocabulary: + mean_mattr: 1.2109 + mean_raw_ttr: 1.2109 + mean_unique_identifiers: 1.2116 + vowel_density: + mean_total_chars: 0.3059 + zipf: + mean_exponent: -0.2180 + mean_vocab_size: 0.7106 + +file_name_matches_primary_export: + _doc: "The file name should match the primary class or module it exports (e.g. `user.js` exports `User`)." + _fix_hint: "Rename the file to match the primary module it defines" + _log_baseline: 0.0000 + casing_entropy: + mean_pascal_case_count: 0.0000 + vocabulary: + mean_unique_identifiers: 0.0000 + +function_name_is_not_single_word: + _doc: "Single-word function names like `run`, `process`, or `handle` are too vague to convey intent." + _fix_hint: "Use at least two words in function names to convey intent (e.g., fetch_user not fetch)" + _log_baseline: 17.4874 + compression: + mean_raw_bytes: 0.2480 + mean_redundancy: 0.0791 + mean_zlib_bytes: 0.1049 + mean_zlib_ratio: 0.1431 + entropy: + mean_char_entropy: 0.0245 + mean_char_normalized: 0.0246 + identifier_length_variance: + mean_max: 0.7830 + mean_mean: 0.5357 + mean_std_dev: 1.0000 + mean_variance: 2.0000 + readability: + mean_avg_line_length: 0.2607 + mean_avg_sub_words_per_id: 0.3141 + mean_flesch_adapted: -0.3241 + mean_fog_adapted: 1.3508 + symbol_density: + mean_density: -0.2477 + vowel_density: + mean_total_chars: 0.5357 + +function_name_matches_return_type: + _doc: "Functions prefixed with `get_`, `fetch_`, or `find_` should return the thing they name." + _fix_hint: "Align the function name with what it returns (get_ for values, is_/has_ for booleans)" + _log_baseline: 7.9532 + branching: + mean_max_nesting_depth: 0.1335 + brevity: + mean_sample_size: 0.0257 + casing_entropy: + mean_entropy: 0.0452 + mean_other_count: 0.0347 + mean_snake_case_count: -0.0493 + compression: + mean_raw_bytes: -0.0190 + mean_redundancy: -0.0180 + mean_unique_line_ratio: -0.0104 + mean_zlib_bytes: 0.0143 + mean_zlib_ratio: -0.0332 + entropy: + mean_char_entropy: 0.0079 + mean_char_max_entropy: 0.0071 + mean_token_max_entropy: 0.0059 + mean_token_normalized: -0.0045 + mean_total_tokens: 0.0030 + mean_vocab_size: 0.0257 + halstead: + mean_N1_total_operators: 0.0392 + mean_N2_total_operands: -0.0539 + mean_difficulty: 0.0029 + mean_effort: 0.0080 + mean_estimated_bugs: 0.0050 + mean_n1_unique_operators: 0.0629 + mean_n2_unique_operands: 0.0060 + mean_time_to_implement_seconds: 0.0080 + mean_vocabulary: 0.0218 + mean_volume: 0.0050 + heaps: + mean_beta: 0.0291 + mean_k: -0.0519 + mean_r_squared: 0.0038 + identifier_length_variance: + mean_max: 0.1082 + mean_mean: 0.0200 + mean_std_dev: 0.0081 + mean_variance: 0.0162 + line_patterns: + mean_max_nesting_depth: 0.1335 + mean_string_literal_ratio: -0.0027 + mean_unique_line_ratio: -0.0108 + magic_number_density: + mean_density: -0.0108 + mean_string_literal_ratio: -0.0027 + near_duplicate_blocks_file: + mean_sub_block_count: 0.0314 + ngram: + mean_bigram_hapax_fraction: 0.0106 + mean_bigram_repeated_unique: 0.0095 + mean_bigram_repetition_rate: -0.0167 + mean_bigram_total: 0.0030 + mean_bigram_unique: 0.0261 + mean_trigram_hapax_fraction: 0.0174 + mean_trigram_repeated_unique: -0.0297 + mean_trigram_repetition_rate: -0.0444 + mean_trigram_total: 0.0030 + mean_trigram_unique: 0.0245 + punctuation_density: + mean_bracket_nonalpha_prefix_count: 0.1048 + mean_colon_suffix_density: -0.0027 + mean_dot_count: 0.1335 + mean_id_nonalpha_suffix_density: 0.0266 + mean_question_mark_density: -2.0000 + readability: + mean_avg_line_length: 0.0175 + mean_avg_sub_words_per_id: 0.0087 + mean_avg_tokens_per_line: 0.0030 + mean_flesch_adapted: -0.0107 + mean_fog_adapted: 0.0058 + symbol_density: + mean_density: 0.0633 + mean_distinct_symbol_types: 0.0639 + mean_symbol_count: 0.0442 + vocabulary: + mean_mattr: 0.0228 + mean_raw_ttr: 0.0478 + mean_total_identifiers: -0.0410 + mean_unique_identifiers: 0.0068 + vowel_density: + mean_total_chars: -0.0210 + zipf: + mean_exponent: -0.0047 + mean_r_squared: 0.0105 + mean_total_tokens: 0.0030 + mean_vocab_size: 0.0257 + +test_name_starts_with_verb: + _doc: "Test descriptions should start with a verb: `creates`, `raises`, `returns`, not a noun phrase." + _fix_hint: "Start test descriptions with a verb (returns, raises, creates, validates)" + _log_baseline: 7.8702 + branching: + mean_branch_count: 1.9977 + mean_branching_density: 2.0000 + brevity: + mean_sample_size: 0.0694 + casing_entropy: + mean_entropy: -0.0749 + mean_snake_case_count: 0.1317 + compression: + mean_raw_bytes: 0.0914 + mean_redundancy: 0.0182 + mean_zlib_bytes: 0.0482 + mean_zlib_ratio: 0.0431 + entropy: + mean_char_max_entropy: 0.0064 + mean_char_normalized: -0.0121 + mean_token_entropy: 0.0259 + mean_token_max_entropy: 0.0155 + mean_token_normalized: 0.0104 + mean_total_tokens: 0.0600 + mean_vocab_size: 0.0694 + halstead: + mean_N1_total_operators: 0.0411 + mean_difficulty: 0.0577 + mean_effort: 0.0855 + mean_estimated_bugs: 0.0277 + mean_length: 0.0240 + mean_n1_unique_operators: 0.0577 + mean_time_to_implement_seconds: 0.0855 + mean_vocabulary: 0.0164 + mean_volume: 0.0278 + heaps: + mean_beta: -0.0149 + mean_k: 0.0795 + mean_r_squared: -0.0081 + identifier_length_variance: + mean_std_dev: -0.0221 + mean_variance: -0.0441 + line_patterns: + mean_string_literal_ratio: -0.0611 + magic_number_density: + mean_string_literal_ratio: -0.0611 + ngram: + mean_bigram_hapax_fraction: -0.0506 + mean_bigram_repeated_unique: 0.1209 + mean_bigram_repetition_rate: 0.0150 + mean_bigram_total: 0.0602 + mean_bigram_unique: 0.0621 + mean_trigram_hapax_fraction: -0.0206 + mean_trigram_repeated_unique: 0.0961 + mean_trigram_repetition_rate: 0.0117 + mean_trigram_total: 0.0603 + mean_trigram_unique: 0.0596 + punctuation_density: + mean_arrow_density: -0.1129 + mean_colon_suffix_density: -0.0591 + mean_id_nonalpha_suffix_density: -0.0602 + readability: + mean_avg_line_length: 0.0943 + mean_avg_tokens_per_line: 0.0600 + mean_fog_adapted: 0.0600 + symbol_density: + mean_density: -0.0912 + vocabulary: + mean_mattr: 0.0427 + mean_raw_ttr: -0.0298 + mean_total_identifiers: 0.1126 + mean_unique_identifiers: 0.0828 + vowel_density: + mean_total_chars: 0.1150 + zipf: + mean_exponent: -0.0239 + mean_total_tokens: 0.0600 + mean_vocab_size: 0.0694 + diff --git a/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/bad/notifications.ex b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/bad/notifications.ex new file mode 100644 index 0000000..71bbb89 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/bad/notifications.ex @@ -0,0 +1,84 @@ +defmodule Notifications do + @moduledoc 'Handles sending notifications and emails to users' + + @default_sender 'noreply@example.com' + @support_email "support@example.com" + + def send_welcome_email(user) do + subject = "Welcome to the platform" + body = 'Hi ' <> user.name <> ", welcome aboard!" + sender = @default_sender + + deliver_email(%{ + to: user.email, + from: sender, + subject: subject, + body: body, + reply_to: 'noreply@example.com' + }) + end + + def send_password_reset(user, token) do + link = "https://example.com/reset/" <> token + subject = 'Reset your password' + body = "Click the link to reset your password: " <> link + + deliver_email(%{ + to: user.email, + from: @default_sender, + subject: subject, + body: body + }) + end + + def send_order_confirmation(user, order) do + subject = 'Order #' <> Integer.to_string(order.id) <> " confirmed" + body = "Thank you for your order, " <> user.name <> "!" + + deliver_email(%{ + to: user.email, + from: "orders@example.com", + subject: subject, + body: body, + cc: 'orders@example.com' + }) + end + + def send_invoice(user, invoice) do + subject = "Invoice " <> invoice.number + body = 'Please find your invoice attached.' + + deliver_email(%{ + to: user.email, + from: @default_sender, + subject: subject, + body: body, + attachment: invoice.pdf_path + }) + end + + def send_support_reply(ticket, message) do + subject = 'Re: Support Ticket #' <> Integer.to_string(ticket.id) + body = "Hello,\n\n" <> message <> '\n\nBest regards,\nSupport Team' + + deliver_email(%{ + to: ticket.user_email, + from: @support_email, + subject: subject, + body: body + }) + end + + def format_greeting(name, locale) do + case locale do + "en" -> 'Hello, ' <> name <> "!" + "de" -> "Hallo, " <> name <> '!' + _ -> "Hi, " <> name + end + end + + defp deliver_email(params) do + IO.puts("Sending email to: " <> params.to) + {:ok, params} + end +end diff --git a/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/config.yml b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/config.yml new file mode 100644 index 0000000..ffdf597 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/config.yml @@ -0,0 +1 @@ +doc: "Files should use a single, consistent string quoting style throughout." diff --git a/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/good/notifications.ex b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/good/notifications.ex new file mode 100644 index 0000000..2b8d576 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/consistent_string_quote_style/good/notifications.ex @@ -0,0 +1,82 @@ +defmodule Notifications do + @moduledoc "Handles sending notifications and emails to users" + + @default_sender "noreply@example.com" + @support_email "support@example.com" + + def send_welcome_email(user) do + subject = "Welcome to the platform" + body = "Hi #{user.name}, welcome aboard!" + + deliver_email(%{ + to: user.email, + from: @default_sender, + subject: subject, + body: body, + reply_to: "noreply@example.com" + }) + end + + def send_password_reset(user, token) do + link = "https://example.com/reset/#{token}" + subject = "Reset your password" + body = "Click the link to reset your password: #{link}" + + deliver_email(%{ + to: user.email, + from: @default_sender, + subject: subject, + body: body + }) + end + + def send_order_confirmation(user, order) do + subject = "Order ##{order.id} confirmed" + body = "Thank you for your order, #{user.name}!" + + deliver_email(%{ + to: user.email, + from: "orders@example.com", + subject: subject, + body: body, + cc: "orders@example.com" + }) + end + + def send_invoice(user, invoice) do + subject = "Invoice #{invoice.number}" + body = "Please find your invoice attached." + + deliver_email(%{ + to: user.email, + from: @default_sender, + subject: subject, + body: body, + attachment: invoice.pdf_path + }) + end + + def send_support_reply(ticket, message) do + subject = "Re: Support Ticket ##{ticket.id}" + body = "Hello,\n\n#{message}\n\nBest regards,\nSupport Team" + + deliver_email(%{ + to: ticket.user_email, + from: @support_email, + subject: subject, + body: body + }) + end + + def format_greeting(name, locale) do + case locale do + "en" -> "Hello, #{name}!" + "de" -> "Hallo, #{name}!" + _ -> "Hi, #{name}" + end + end + + defp deliver_email(params) do + {:ok, params} + end +end diff --git a/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/bad/fetcher.go b/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/bad/fetcher.go new file mode 100644 index 0000000..fb8b2cc --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/bad/fetcher.go @@ -0,0 +1,64 @@ +package fetcher + +import ( + "context" + "encoding/json" + "fmt" + "net/http" +) + +// Article represents a remote article resource. +type Article struct { + ID string + Title string + Body string +} + +// ArticleFetcher retrieves articles from a remote API. +// Context is stored as a struct field — an anti-pattern that ties the instance +// to a single request lifetime and makes cancellation hard to reason about. +type ArticleFetcher struct { + base string + client *http.Client + ctx context.Context // anti-pattern: context stored in struct +} + +func New(ctx context.Context, base string) *ArticleFetcher { + return &ArticleFetcher{base: base, client: &http.Client{}, ctx: ctx} +} + +// FetchByID retrieves an article by ID using the stored context. +// Callers cannot supply per-call cancellation. +func (f *ArticleFetcher) FetchByID(id string) (*Article, error) { + url := fmt.Sprintf("%s/articles/%s", f.base, id) + // Uses f.ctx from the struct — callers cannot override it. + req, err := http.NewRequestWithContext(f.ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("fetch article %q: %w", id, err) + } + + resp, err := f.client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch article %q: %w", id, err) + } + defer resp.Body.Close() + + var a Article + if err := json.NewDecoder(resp.Body).Decode(&a); err != nil { + return nil, fmt.Errorf("fetch article %q: decode: %w", id, err) + } + return &a, nil +} + +// FetchAll retrieves multiple articles using the struct's stored context. +func (f *ArticleFetcher) FetchAll(ids []string) ([]*Article, error) { + out := make([]*Article, 0, len(ids)) + for _, id := range ids { + a, err := f.FetchByID(id) + if err != nil { + return nil, err + } + out = append(out, a) + } + return out, nil +} diff --git a/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/good/fetcher.go b/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/good/fetcher.go new file mode 100644 index 0000000..ce3182e --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/context_not_stored_in_struct/good/fetcher.go @@ -0,0 +1,61 @@ +package fetcher + +import ( + "context" + "encoding/json" + "fmt" + "net/http" +) + +// Article represents a remote article resource. +type Article struct { + ID string + Title string + Body string +} + +// ArticleFetcher retrieves articles from a remote API. +// Context is accepted as a function parameter, not stored in the struct. +type ArticleFetcher struct { + base string + client *http.Client +} + +func New(base string) *ArticleFetcher { + return &ArticleFetcher{base: base, client: &http.Client{}} +} + +// FetchByID retrieves an article by ID using the provided context for cancellation. +// Context is passed explicitly — it is not stored on ArticleFetcher. +func (f *ArticleFetcher) FetchByID(ctx context.Context, id string) (*Article, error) { + url := fmt.Sprintf("%s/articles/%s", f.base, id) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("fetch article %q: build request: %w", id, err) + } + + resp, err := f.client.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch article %q: %w", id, err) + } + defer resp.Body.Close() + + var a Article + if err := json.NewDecoder(resp.Body).Decode(&a); err != nil { + return nil, fmt.Errorf("fetch article %q: decode: %w", id, err) + } + return &a, nil +} + +// FetchAll retrieves multiple articles using the provided context. +func (f *ArticleFetcher) FetchAll(ctx context.Context, ids []string) ([]*Article, error) { + out := make([]*Article, 0, len(ids)) + for _, id := range ids { + a, err := f.FetchByID(ctx, id) + if err != nil { + return nil, err + } + out = append(out, a) + } + return out, nil +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/bad/worker.rs b/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/bad/worker.rs new file mode 100644 index 0000000..b8b6111 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/bad/worker.rs @@ -0,0 +1,51 @@ +use std::fmt; + +pub trait Task: fmt::Debug + Send { + fn name(&self) -> &str; + fn execute(&self) -> Result; +} + +#[derive(Debug)] +pub struct EmailTask { + pub recipient: String, + pub subject: String, +} + +impl Task for EmailTask { + fn name(&self) -> &str { "email" } + fn execute(&self) -> Result { + Ok(format!("sent email to {}", self.recipient)) + } +} + +#[derive(Debug)] +pub struct ReportTask { + pub report_id: u64, +} + +impl Task for ReportTask { + fn name(&self) -> &str { "report" } + fn execute(&self) -> Result { + Ok(format!("generated report #{}", self.report_id)) + } +} + +// Bad: &Box forces callers to have an owned Box — cannot pass a +// plain reference; also adds an extra level of indirection unnecessarily. +pub fn run_task(task: &Box) -> Result { + println!("running task: {}", task.name()); + task.execute() +} + +pub fn run_all(tasks: &[Box]) -> Vec> { + tasks.iter().map(run_task).collect() +} + +// Same anti-pattern with a concrete generic type +pub fn log_task_name(task: &Box) { + println!("[worker] task name: {}", task.name()); +} + +pub fn describe(task: &Box) -> String { + format!("Task({})", task.name()) +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/good/worker.rs b/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/good/worker.rs new file mode 100644 index 0000000..5979a98 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_accept_box_ref_parameter/good/worker.rs @@ -0,0 +1,51 @@ +use std::fmt; + +pub trait Task: fmt::Debug + Send { + fn name(&self) -> &str; + fn execute(&self) -> Result; +} + +#[derive(Debug)] +pub struct EmailTask { + pub recipient: String, + pub subject: String, +} + +impl Task for EmailTask { + fn name(&self) -> &str { + "email" + } + fn execute(&self) -> Result { + Ok(format!("sent email to {} re: {}", self.recipient, self.subject)) + } +} + +#[derive(Debug)] +pub struct ReportTask { + pub report_id: u64, +} + +impl Task for ReportTask { + fn name(&self) -> &str { + "report" + } + fn execute(&self) -> Result { + Ok(format!("generated report #{}", self.report_id)) + } +} + +// Accept &dyn Task (or &T) rather than &Box — works with +// both owned Box and references to stack-allocated types +pub fn run_task(task: &dyn Task) -> Result { + println!("running task: {}", task.name()); + task.execute() +} + +pub fn run_all(tasks: &[Box]) -> Vec> { + // Dereference each Box to get &dyn Task — clean, no Box leaking into API + tasks.iter().map(|t| run_task(t.as_ref())).collect() +} + +pub fn log_task_name(task: &dyn Task) { + println!("[worker] task name: {}", task.name()); +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/bad/cache.rs b/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/bad/cache.rs new file mode 100644 index 0000000..46fe6b8 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/bad/cache.rs @@ -0,0 +1,59 @@ +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone)] +pub struct CacheEntry { + pub value: V, + pub inserted_at: Instant, + pub ttl: Duration, +} + +// Bad: Vec and HashMap are wrapped in Box — adds heap indirection with no benefit +pub struct TtlCache { + // Box> adds an extra pointer hop for every lookup + store: Box>>, + // Box> is redundant — Vec already lives on the heap + eviction_order: Box>, + max_size: usize, +} + +impl TtlCache +where + K: std::hash::Hash + Eq + Clone, + V: Clone, +{ + pub fn new(max_size: usize) -> Self { + Self { + store: Box::new(HashMap::new()), + eviction_order: Box::new(Vec::new()), + max_size, + } + } + + pub fn insert(&mut self, key: K, value: V, ttl: Duration) { + if self.store.len() >= self.max_size { + self.evict_oldest(); + } + let entry = CacheEntry { value, inserted_at: Instant::now(), ttl }; + self.store.insert(key.clone(), entry); + self.eviction_order.push(key); + } + + pub fn get(&self, key: &K) -> Option<&V> { + self.store.get(key).and_then(|e| { + if e.inserted_at.elapsed() > e.ttl { None } else { Some(&e.value) } + }) + } + + // Returning Box> — caller must dereference to get slice behavior + pub fn snapshot_keys(&self) -> Box> { + Box::new(self.eviction_order.iter().cloned().collect()) + } + + fn evict_oldest(&mut self) { + if let Some(oldest) = self.eviction_order.first().cloned() { + self.store.remove(&oldest); + self.eviction_order.remove(0); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/good/cache.rs b/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/good/cache.rs new file mode 100644 index 0000000..2e5ceb6 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_box_collections_unnecessarily/good/cache.rs @@ -0,0 +1,61 @@ +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +#[derive(Debug, Clone)] +pub struct CacheEntry { + pub value: V, + pub inserted_at: Instant, + pub ttl: Duration, +} + +impl CacheEntry { + pub fn is_expired(&self) -> bool { + self.inserted_at.elapsed() > self.ttl + } +} + +// Vec and HashMap are used directly — no unnecessary Box wrapping +pub struct TtlCache { + store: HashMap>, + eviction_order: Vec, + max_size: usize, +} + +impl TtlCache +where + K: std::hash::Hash + Eq + Clone, +{ + pub fn new(max_size: usize) -> Self { + Self { + store: HashMap::new(), + eviction_order: Vec::new(), + max_size, + } + } + + pub fn insert(&mut self, key: K, value: V, ttl: Duration) { + if self.store.len() >= self.max_size { + self.evict_oldest(); + } + let entry = CacheEntry { value, inserted_at: Instant::now(), ttl }; + self.store.insert(key.clone(), entry); + self.eviction_order.push(key); + } + + pub fn get(&self, key: &K) -> Option<&V> { + self.store.get(key).and_then(|e| { + if e.is_expired() { None } else { Some(&e.value) } + }) + } + + pub fn keys(&self) -> Vec<&K> { + self.eviction_order.iter().collect() + } + + fn evict_oldest(&mut self) { + if let Some(oldest) = self.eviction_order.first().cloned() { + self.store.remove(&oldest); + self.eviction_order.remove(0); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/bad/buffer.rs b/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/bad/buffer.rs new file mode 100644 index 0000000..14d5808 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/bad/buffer.rs @@ -0,0 +1,44 @@ +/// Bad: uses raw pointer casts to mutate through shared references, +/// violating Rust's aliasing rules and causing undefined behavior. +pub struct RingBuffer { + data: Vec, + capacity: usize, +} + +impl RingBuffer { + pub fn new(capacity: usize) -> Self { + Self { data: Vec::with_capacity(capacity), capacity } + } + + // BAD: casts an immutable reference to a mutable pointer to bypass borrow rules. + // This is undefined behavior — multiple callers can hold &RingBuffer and mutate + // the same Vec simultaneously. + pub fn write_bypass(&self, byte: u8) { + let data_ptr = &self.data as *const Vec as *mut Vec; + unsafe { + (*data_ptr).push(byte); + } + } + + // BAD: same pattern — casting &[u8] pointer to *mut u8 to overwrite bytes + pub fn patch_byte(&self, index: usize, value: u8) { + if index < self.data.len() { + let ptr = self.data.as_ptr() as *mut u8; + unsafe { + *ptr.add(index) = value; + } + } + } + + pub fn len(&self) -> usize { + self.data.len() + } + + // BAD: capacity field mutated through a const-cast pointer + pub fn resize_limit(&self, new_capacity: usize) { + let cap_ptr = &self.capacity as *const usize as *mut usize; + unsafe { + *cap_ptr = new_capacity; + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/good/buffer.rs b/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/good/buffer.rs new file mode 100644 index 0000000..0119ace --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_cast_ref_to_mut_ptr/good/buffer.rs @@ -0,0 +1,61 @@ +use std::sync::Mutex; + +/// A shared ring buffer with interior mutability via Mutex. +/// Mutations go through &mut self or Mutex — no raw pointer casting. +pub struct RingBuffer { + data: Mutex>, + capacity: usize, +} + +impl RingBuffer { + pub fn new(capacity: usize) -> Self { + Self { + data: Mutex::new(Vec::with_capacity(capacity)), + capacity, + } + } + + pub fn write(&self, chunk: &[u8]) -> usize { + let mut data = self.data.lock().expect("mutex poisoned"); + let remaining = self.capacity.saturating_sub(data.len()); + let to_write = chunk.len().min(remaining); + data.extend_from_slice(&chunk[..to_write]); + to_write + } + + pub fn read_all(&self) -> Vec { + let mut data = self.data.lock().expect("mutex poisoned"); + std::mem::take(&mut *data) + } + + pub fn len(&self) -> usize { + self.data.lock().expect("mutex poisoned").len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +pub struct OwnedBuffer { + data: Vec, +} + +impl OwnedBuffer { + pub fn new() -> Self { + Self { data: Vec::new() } + } + + // Mutation via &mut self — safe, no raw pointers + pub fn append(&mut self, bytes: &[u8]) { + self.data.extend_from_slice(bytes); + } + + pub fn clear(&mut self) { + self.data.clear(); + } + + pub fn as_slice(&self) -> &[u8] { + &self.data + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/bad/registry.rs b/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/bad/registry.rs new file mode 100644 index 0000000..8b55b41 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/bad/registry.rs @@ -0,0 +1,56 @@ +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ServiceId(pub String); + +#[derive(Debug, Clone)] +pub struct ServiceEntry { + pub id: ServiceId, + pub endpoint: String, + pub healthy: bool, +} + +pub struct Registry { + entries: HashMap, + primary_id: Option, +} + +impl Registry { + pub fn new() -> Self { + Self { entries: HashMap::new(), primary_id: None } + } + + pub fn register(&mut self, entry: ServiceEntry) { + self.entries.insert(entry.id.clone(), entry); + } + + pub fn set_primary(&mut self, id: ServiceId) { + self.primary_id = Some(id); + } + + // Bad: clones primary_id solely to compare — &ServiceId comparison suffices + pub fn is_primary(&self, id: &ServiceId) -> bool { + self.primary_id.clone() == Some(id.clone()) + } + + // Bad: clones candidate endpoint just to compare strings + pub fn find_by_endpoint(&self, endpoint: &str) -> Option<&ServiceEntry> { + self.entries + .values() + .find(|e| e.endpoint.clone() == endpoint.to_string()) + } + + pub fn healthy_ids(&self) -> Vec { + self.entries + .values() + .filter(|e| e.healthy) + // Bad: clones every healthy id even though callers may only iterate + .map(|e| e.id.clone()) + .collect() + } + + // Bad: clones the key just to check membership + pub fn contains(&self, id: &ServiceId) -> bool { + self.entries.contains_key(&id.clone()) + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/good/registry.rs b/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/good/registry.rs new file mode 100644 index 0000000..1bfb365 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_clone_for_comparison/good/registry.rs @@ -0,0 +1,52 @@ +use std::collections::HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ServiceId(pub String); + +#[derive(Debug, Clone)] +pub struct ServiceEntry { + pub id: ServiceId, + pub endpoint: String, + pub healthy: bool, +} + +pub struct Registry { + entries: HashMap, + primary_id: Option, +} + +impl Registry { + pub fn new() -> Self { + Self { entries: HashMap::new(), primary_id: None } + } + + pub fn register(&mut self, entry: ServiceEntry) { + self.entries.insert(entry.id.clone(), entry); + } + + pub fn set_primary(&mut self, id: ServiceId) { + self.primary_id = Some(id); + } + + // Compare by reference — no clone needed + pub fn is_primary(&self, id: &ServiceId) -> bool { + self.primary_id.as_ref() == Some(id) + } + + // Find without cloning the candidate + pub fn find_by_endpoint(&self, endpoint: &str) -> Option<&ServiceEntry> { + self.entries.values().find(|e| e.endpoint == endpoint) + } + + pub fn healthy_ids(&self) -> Vec<&ServiceId> { + self.entries + .values() + .filter(|e| e.healthy) + .map(|e| &e.id) + .collect() + } + + pub fn contains(&self, id: &ServiceId) -> bool { + self.entries.contains_key(id) + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/bad/pipeline.rs b/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/bad/pipeline.rs new file mode 100644 index 0000000..83d575c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/bad/pipeline.rs @@ -0,0 +1,53 @@ +pub struct Record { + pub id: u64, + pub value: f64, + pub tags: Vec, +} + +pub struct Pipeline { + steps: Vec Record>>, +} + +impl Pipeline { + pub fn new() -> Self { + Self { steps: Vec::new() } + } + + // BAD: record is declared mut but never actually mutated — fold returns new value + pub fn run(&self, mut record: Record) -> Record { + self.steps.iter().fold(record, |acc, step| step(acc)) + } +} + +// BAD: scale is declared mut but the parameter is never reassigned +pub fn normalize(records: &mut Vec, mut scale: f64) { + for r in records.iter_mut() { + r.value *= scale; + } +} + +pub fn summarize(records: &[Record]) -> (f64, f64) { + // BAD: count is declared mut but assigned once and never incremented explicitly + let mut count: usize = records.len(); + let mut sum = 0.0f64; + + // sum is mutated — but count is used as a constant after assignment + for r in records { + sum += r.value; + } + + if count == 0 { + return (0.0, 0.0); + } + (sum, sum / count as f64) +} + +// BAD: result is declared mut but never reassigned after initialization +pub fn find_max(records: &[Record]) -> Option { + let mut result = records.iter().map(|r| r.value).fold(f64::NEG_INFINITY, f64::max); + if result == f64::NEG_INFINITY { + None + } else { + Some(result) + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/good/pipeline.rs b/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/good/pipeline.rs new file mode 100644 index 0000000..8b65a19 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_declare_unused_mut/good/pipeline.rs @@ -0,0 +1,48 @@ +pub struct Record { + pub id: u64, + pub value: f64, + pub tags: Vec, +} + +pub struct Pipeline { + steps: Vec Record>>, +} + +impl Pipeline { + pub fn new() -> Self { + // steps will be mutated — mut is needed here + let mut steps: Vec Record>> = Vec::new(); + steps.push(Box::new(|r| r)); // identity step + Self { steps } + } + + // No mut needed on record — we return a new one via fold + pub fn run(&self, record: Record) -> Record { + self.steps.iter().fold(record, |acc, step| step(acc)) + } + + pub fn add_step(&mut self, step: impl Fn(Record) -> Record + 'static) { + self.steps.push(Box::new(step)); + } +} + +pub fn normalize(records: &mut Vec, scale: f64) { + // records is genuinely mutated via iter_mut + for r in records.iter_mut() { + r.value *= scale; + } +} + +pub fn summarize(records: &[Record]) -> (f64, f64) { + // sum and count are mutated by the loop + let mut sum = 0.0f64; + let mut count = 0usize; + for r in records { + sum += r.value; + count += 1; + } + if count == 0 { + return (0.0, 0.0); + } + (sum, sum / count as f64) +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/bad/queue.rs b/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/bad/queue.rs new file mode 100644 index 0000000..d52641c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/bad/queue.rs @@ -0,0 +1,52 @@ +use std::collections::VecDeque; +use std::sync::{Arc, Mutex}; + +#[derive(Debug, Clone)] +pub struct Job { + pub id: u64, + pub payload: String, +} + +pub struct JobQueue { + inner: Arc>>, +} + +impl JobQueue { + pub fn new() -> Self { + Self { inner: Arc::new(Mutex::new(VecDeque::new())) } + } + + pub fn push(&self, job: Job) { + self.inner.lock().expect("mutex poisoned").push_back(job); + } + + // BAD: the MutexGuard is held across an .await point. + // This can deadlock (tokio Mutex panics) or block other tasks from + // acquiring the lock while the async work runs. + pub async fn process_next_bad(&self) -> Option { + let mut q = self.inner.lock().expect("mutex poisoned"); + // MutexGuard is still live here — held across the await below + let job = q.pop_front()?; + + // Awaiting while holding the guard — deadlock risk + let result = self.handle_job(&job).await; + // Guard finally dropped when this function returns, after all awaits + Some(result) + } + + // BAD: returns while guard is in scope after an await + pub async fn peek_and_log(&self) { + let q = self.inner.lock().expect("mutex poisoned"); + if let Some(job) = q.front() { + println!("next job: {}", job.id); + } + // MutexGuard q is still in scope + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + // q dropped here — after the await + } + + async fn handle_job(&self, job: &Job) -> String { + tokio::time::sleep(std::time::Duration::from_millis(1)).await; + format!("processed job #{}: {}", job.id, job.payload) + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/good/queue.rs b/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/good/queue.rs new file mode 100644 index 0000000..0d73408 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_hold_mutex_across_await/good/queue.rs @@ -0,0 +1,50 @@ +use std::collections::VecDeque; +use std::sync::{Arc, Mutex}; + +#[derive(Debug, Clone)] +pub struct Job { + pub id: u64, + pub payload: String, +} + +pub struct JobQueue { + inner: Arc>>, +} + +impl JobQueue { + pub fn new() -> Self { + Self { inner: Arc::new(Mutex::new(VecDeque::new())) } + } + + pub fn push(&self, job: Job) { + let mut q = self.inner.lock().expect("mutex poisoned"); + q.push_back(job); + // Guard dropped at end of block — not held across await + } + + // Good: release the lock before awaiting, then use the extracted value + pub async fn process_next(&self) -> Option { + // Extract the job while holding the lock... + let job = { + let mut q = self.inner.lock().expect("mutex poisoned"); + q.pop_front() + // MutexGuard dropped here — before any await point + }; + + // ...then await without holding the lock + match job { + Some(j) => Some(self.handle_job(j).await), + None => None, + } + } + + async fn handle_job(&self, job: Job) -> String { + // Simulated async work (e.g., HTTP call, DB write) + tokio::time::sleep(std::time::Duration::from_millis(1)).await; + format!("processed job #{}: {}", job.id, job.payload) + } + + pub fn len(&self) -> usize { + self.inner.lock().expect("mutex poisoned").len() + } +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/bad/importer.go b/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/bad/importer.go new file mode 100644 index 0000000..a708ed1 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/bad/importer.go @@ -0,0 +1,53 @@ +package importer + +import ( + "context" + "fmt" + "sync" +) + +type Record struct { + ID string + Data string +} + +type Storer interface { + Store(ctx context.Context, r Record) error +} + +// BulkImporter processes records by spawning one goroutine per record. +// With a large input slice this creates an unbounded number of goroutines, +// exhausting memory and file descriptors. +type BulkImporter struct { + store Storer +} + +func New(store Storer) *BulkImporter { + return &BulkImporter{store: store} +} + +// Import spawns one goroutine per record with no concurrency limit. +func (b *BulkImporter) Import(ctx context.Context, records []Record) error { + errs := make(chan error, len(records)) + var wg sync.WaitGroup + + for _, r := range records { + r := r + wg.Add(1) + // One goroutine per record — can spawn thousands simultaneously. + go func() { + defer wg.Done() + if err := b.store.Store(ctx, r); err != nil { + errs <- fmt.Errorf("store record %s: %w", r.ID, err) + } + }() + } + + wg.Wait() + close(errs) + + for err := range errs { + return err + } + return nil +} diff --git a/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/good/importer.go b/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/good/importer.go new file mode 100644 index 0000000..7f118e2 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/does_not_spawn_unbounded_goroutines/good/importer.go @@ -0,0 +1,58 @@ +package importer + +import ( + "context" + "fmt" + "sync" +) + +type Record struct { + ID string + Data string +} + +type Storer interface { + Store(ctx context.Context, r Record) error +} + +// BulkImporter processes records using a fixed-size worker pool. +// The number of concurrent goroutines is bounded by concurrency. +type BulkImporter struct { + store Storer + concurrency int +} + +func New(store Storer, concurrency int) *BulkImporter { + if concurrency <= 0 { + concurrency = 4 + } + return &BulkImporter{store: store, concurrency: concurrency} +} + +// Import processes all records with at most concurrency goroutines running simultaneously. +func (b *BulkImporter) Import(ctx context.Context, records []Record) error { + sem := make(chan struct{}, b.concurrency) + errs := make(chan error, len(records)) + var wg sync.WaitGroup + + for _, r := range records { + r := r + sem <- struct{}{} // acquire slot + wg.Add(1) + go func() { + defer wg.Done() + defer func() { <-sem }() // release slot + if err := b.store.Store(ctx, r); err != nil { + errs <- fmt.Errorf("store record %s: %w", r.ID, err) + } + }() + } + + wg.Wait() + close(errs) + + for err := range errs { + return err // return first error + } + return nil +} diff --git a/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/bad/worker.go b/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/bad/worker.go new file mode 100644 index 0000000..fe8afab --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/bad/worker.go @@ -0,0 +1,45 @@ +package worker + +import ( + "log" + "time" +) + +type EmailJob struct { + To string + Subject string + Body string +} + +type Mailer interface { + Send(job EmailJob) error +} + +// EmailWorker drains jobs from a channel. +// The goroutine has no exit condition — it leaks forever. +type EmailWorker struct { + mailer Mailer + jobs <-chan EmailJob + logger *log.Logger +} + +func NewEmailWorker(mailer Mailer, jobs <-chan EmailJob, logger *log.Logger) *EmailWorker { + return &EmailWorker{mailer: mailer, jobs: jobs, logger: logger} +} + +// Run starts the worker in a goroutine with no way to stop it. +func (w *EmailWorker) Run() { + // No context, no stop channel — this goroutine runs forever with no exit path. + go func() { + for { + select { + case job := <-w.jobs: + if err := w.mailer.Send(job); err != nil { + w.logger.Printf("failed to send email to %s: %v", job.To, err) + } + default: + time.Sleep(100 * time.Millisecond) + } + } + }() +} diff --git a/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/good/worker.go b/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/good/worker.go new file mode 100644 index 0000000..046ed7b --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/goroutine_has_clear_exit_condition/good/worker.go @@ -0,0 +1,52 @@ +package worker + +import ( + "context" + "log" + "time" +) + +type EmailJob struct { + To string + Subject string + Body string +} + +type Mailer interface { + Send(job EmailJob) error +} + +// EmailWorker drains jobs from a channel until the context is cancelled. +// The goroutine has a clear exit condition: ctx.Done(). +type EmailWorker struct { + mailer Mailer + jobs <-chan EmailJob + logger *log.Logger +} + +func NewEmailWorker(mailer Mailer, jobs <-chan EmailJob, logger *log.Logger) *EmailWorker { + return &EmailWorker{mailer: mailer, jobs: jobs, logger: logger} +} + +// Run starts the worker and blocks until ctx is cancelled or jobs is closed. +func (w *EmailWorker) Run(ctx context.Context) { + for { + select { + case <-ctx.Done(): + // Clear exit: context cancelled — drain stops. + w.logger.Println("email worker shutting down") + return + case job, ok := <-w.jobs: + if !ok { + // Clear exit: channel closed — no more work. + w.logger.Println("jobs channel closed, email worker exiting") + return + } + if err := w.mailer.Send(job); err != nil { + w.logger.Printf("failed to send email to %s: %v", job.To, err) + } + case <-time.After(30 * time.Second): + w.logger.Println("email worker idle heartbeat") + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/bad/DocumentStorage.cs b/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/bad/DocumentStorage.cs new file mode 100644 index 0000000..6bc1279 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/bad/DocumentStorage.cs @@ -0,0 +1,63 @@ +using System.IO; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; + +namespace Storage +{ + public class DocumentStorage + { + private readonly HttpClient _httpClient; + private readonly string _storageRoot; + + public DocumentStorage(HttpClient httpClient, string storageRoot) + { + _httpClient = httpClient; + _storageRoot = storageRoot; + } + + // I/O-bound but wrapped in Task.Run — wastes a thread pool thread + public async Task ReadDocumentAsync(string documentId) + { + var path = BuildPath(documentId); + return await Task.Run(() => File.ReadAllText(path)); + } + + // I/O-bound write wrapped in Task.Run unnecessarily + public async Task SaveDocumentAsync(string documentId, string content) + { + var path = BuildPath(documentId); + await Task.Run(() => + { + Directory.CreateDirectory(Path.GetDirectoryName(path)!); + File.WriteAllText(path, content, Encoding.UTF8); + }); + } + + // Network I/O inside Task.Run — HttpClient is already async, Task.Run adds no value + public async Task FetchFromRemoteAsync(string url) + { + return await Task.Run(async () => + { + var response = await _httpClient.GetAsync(url); + response.EnsureSuccessStatusCode(); + return await response.Content.ReadAsStringAsync(); + }); + } + + // Streaming wrapped in Task.Run — unnecessary thread pool hop for I/O + public async Task DownloadToFileAsync(string url, string destinationPath) + { + await Task.Run(async () => + { + var response = await _httpClient.GetAsync(url); + response.EnsureSuccessStatusCode(); + var bytes = await response.Content.ReadAsByteArrayAsync(); + File.WriteAllBytes(destinationPath, bytes); + }); + } + + private string BuildPath(string documentId) => + Path.Combine(_storageRoot, documentId[..2], documentId + ".txt"); + } +} diff --git a/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/good/DocumentStorage.cs b/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/good/DocumentStorage.cs new file mode 100644 index 0000000..8c6a734 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/io_bound_uses_async_await_not_task_run/good/DocumentStorage.cs @@ -0,0 +1,60 @@ +using System.IO; +using System.Net.Http; +using System.Text; +using System.Threading.Tasks; + +namespace Storage +{ + public class DocumentStorage + { + private readonly HttpClient _httpClient; + private readonly string _storageRoot; + + public DocumentStorage(HttpClient httpClient, string storageRoot) + { + _httpClient = httpClient; + _storageRoot = storageRoot; + } + + // I/O-bound: reads directly with async file API, no Task.Run needed + public async Task ReadDocumentAsync(string documentId) + { + var path = BuildPath(documentId); + return await File.ReadAllTextAsync(path); + } + + // I/O-bound: writes via async file API + public async Task SaveDocumentAsync(string documentId, string content) + { + var path = BuildPath(documentId); + Directory.CreateDirectory(Path.GetDirectoryName(path)!); + await File.WriteAllTextAsync(path, content, Encoding.UTF8); + } + + // I/O-bound: uses async HttpClient, not Task.Run + public async Task FetchFromRemoteAsync(string url) + { + var response = await _httpClient.GetAsync(url); + response.EnsureSuccessStatusCode(); + return await response.Content.ReadAsStringAsync(); + } + + // I/O-bound: streams large file without blocking + public async Task DownloadToFileAsync(string url, string destinationPath) + { + using var response = await _httpClient.GetAsync( + url, System.Net.Http.HttpCompletionOption.ResponseHeadersRead); + response.EnsureSuccessStatusCode(); + + using var contentStream = await response.Content.ReadAsStreamAsync(); + using var fileStream = new FileStream( + destinationPath, FileMode.Create, FileAccess.Write, FileShare.None, + bufferSize: 8192, useAsync: true); + + await contentStream.CopyToAsync(fileStream); + } + + private string BuildPath(string documentId) => + Path.Combine(_storageRoot, documentId[..2], documentId + ".txt"); + } +} diff --git a/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/bad/price_calculator.py b/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/bad/price_calculator.py new file mode 100644 index 0000000..939b8de --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/bad/price_calculator.py @@ -0,0 +1,47 @@ +"""Price calculator with pluggable discount and tax strategies.""" +from __future__ import annotations + +from decimal import Decimal +from typing import Callable + + +PriceTransform = Callable[[Decimal], Decimal] + + +# lambdas assigned to names — should be def statements +apply_percentage_discount = lambda percent: ( # noqa: E731 + lambda price: price * (1 - Decimal(percent) / 100) +) + +apply_flat_discount = lambda amount: ( # noqa: E731 + lambda price: max(price - amount, Decimal(0)) +) + +apply_tax = lambda rate: ( # noqa: E731 + lambda price: price * (1 + rate) +) + + +def chain_transforms(*transforms: PriceTransform) -> PriceTransform: + # lambda assigned to a local name — should be a nested def + apply_all = lambda price: [t(price) for t in transforms][-1] # noqa: E731 + return apply_all + + +# Module-level strategy functions replaced by named lambdas +member_price = lambda price: price * Decimal("0.90") # noqa: E731 — use def + +vip_price = lambda price: ( # noqa: E731 — use def + price * Decimal("0.80") * Decimal("1.08") +) + +format_price = lambda price: f"${price:.2f}" # noqa: E731 — use def + + +def calculate( + base_price: Decimal, + transform: PriceTransform, +) -> str: + """Apply a transform to a base price and return the formatted result.""" + final = transform(base_price) + return format_price(final) diff --git a/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/good/price_calculator.py b/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/good/price_calculator.py new file mode 100644 index 0000000..7aa46c8 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/lambda_not_assigned_to_variable/good/price_calculator.py @@ -0,0 +1,64 @@ +"""Price calculator with pluggable discount and tax strategies.""" +from __future__ import annotations + +from decimal import Decimal +from typing import Callable + + +PriceTransform = Callable[[Decimal], Decimal] + + +def apply_percentage_discount(percent: int) -> PriceTransform: + """Return a function that applies a percentage discount to a price.""" + def discount(price: Decimal) -> Decimal: + return price * (1 - Decimal(percent) / 100) + return discount + + +def apply_flat_discount(amount: Decimal) -> PriceTransform: + """Return a function that subtracts a flat amount from a price.""" + def discount(price: Decimal) -> Decimal: + return max(price - amount, Decimal(0)) + return discount + + +def apply_tax(rate: Decimal) -> PriceTransform: + """Return a function that adds a tax rate to a price.""" + def add_tax(price: Decimal) -> Decimal: + return price * (1 + rate) + return add_tax + + +def chain_transforms(*transforms: PriceTransform) -> PriceTransform: + """Combine multiple price transforms into a single function.""" + def apply_all(price: Decimal) -> Decimal: + for transform in transforms: + price = transform(price) + return price + return apply_all + + +def format_price(price: Decimal) -> str: + """Format a Decimal price as a currency string.""" + return f"${price:.2f}" + + +def calculate( + base_price: Decimal, + transform: PriceTransform, +) -> str: + """Apply a transform to a base price and return the formatted result.""" + final = transform(base_price) + return format_price(final) + + +# Module-level strategies defined as proper named functions, not lambdas +def member_price(price: Decimal) -> Decimal: + """10 % member discount.""" + return price * Decimal("0.90") + + +def vip_price(price: Decimal) -> Decimal: + """20 % VIP discount followed by 8 % tax.""" + discounted = price * Decimal("0.80") + return discounted * Decimal("1.08") diff --git a/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/bad/shipping.ex b/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/bad/shipping.ex new file mode 100644 index 0000000..b658fe6 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/bad/shipping.ex @@ -0,0 +1,67 @@ +defmodule MyApp.Shipping.LabelWorker do + @moduledoc """ + Generates shipping labels asynchronously. + """ + + use GenServer + + alias MyApp.Orders.Order + alias MyApp.Shipping + + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + # Bad: enqueues the entire Order struct in the message. + # This copies the full struct (including all its associations) into the + # worker's mailbox, wasting memory and defeating the purpose of async work. + @spec enqueue(Order.t()) :: :ok + def enqueue(%Order{} = order) do + GenServer.cast(__MODULE__, {:generate_label, order}) + end + + @impl true + def init(_opts), do: {:ok, %{}} + + @impl true + # Bad: entire order struct is in the message payload — large copy + def handle_cast({:generate_label, %Order{} = order}, state) do + # The worker already received a (potentially stale) full struct + Task.start(fn -> Shipping.generate_label(order) end) + {:noreply, state} + end +end + +defmodule MyApp.Shipping.BatchNotifier do + @moduledoc """ + Broadcasts shipping updates. + """ + + alias MyApp.Shipping.Shipment + + # Bad: broadcasts the full Shipment struct to all subscribers. + # If 100 processes subscribe, this full struct is copied 100 times. + @spec notify_dispatched(Shipment.t()) :: :ok | {:error, term()} + def notify_dispatched(%Shipment{} = shipment) do + Phoenix.PubSub.broadcast( + MyApp.PubSub, + "shipments", + # Bad: sending the entire struct with all fields + {:shipment_dispatched, shipment} + ) + end + + # Bad: spawning a closure that captures the full struct + @spec process_async(Shipment.t()) :: :ok + def process_async(%Shipment{} = shipment) do + # Bad: the full shipment struct is captured in the closure + # and copied into the new process's heap + spawn(fn -> + Shipping.finalize(shipment) + Shipping.archive(shipment) + Shipping.notify_customer(shipment) + end) + + :ok + end +end diff --git a/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/good/shipping.ex b/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/good/shipping.ex new file mode 100644 index 0000000..9b415e0 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/minimizes_data_in_messages/good/shipping.ex @@ -0,0 +1,61 @@ +defmodule MyApp.Shipping.LabelWorker do + @moduledoc """ + Generates shipping labels asynchronously. Sends only the order ID + in the message — the worker re-fetches the data it needs from the + database, avoiding large struct serialisation over process boundaries. + """ + + use GenServer + + alias MyApp.Orders + alias MyApp.Shipping + + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @doc """ + Enqueues label generation for the given order ID. + Sends only the integer ID — not the full order struct. + """ + @spec enqueue(integer()) :: :ok + def enqueue(order_id) when is_integer(order_id) do + GenServer.cast(__MODULE__, {:generate_label, order_id}) + end + + @impl true + def init(_opts), do: {:ok, %{}} + + @impl true + # Good: message carries only the order_id. + # The handler fetches the full record inside the worker process. + def handle_cast({:generate_label, order_id}, state) do + Task.start(fn -> do_generate(order_id) end) + {:noreply, state} + end + + defp do_generate(order_id) do + # Re-fetch only the fields needed for label generation + order = Orders.get_order!(order_id) + Shipping.generate_label(order) + end +end + +defmodule MyApp.Shipping.BatchNotifier do + @moduledoc """ + Broadcasts shipping updates. Sends only the shipment ID in PubSub + messages; subscribers fetch full details on demand. + """ + + @doc """ + Publishes a shipment-dispatched event with only the shipment ID. + """ + @spec notify_dispatched(integer()) :: :ok | {:error, term()} + def notify_dispatched(shipment_id) when is_integer(shipment_id) do + Phoenix.PubSub.broadcast( + MyApp.PubSub, + "shipments", + {:shipment_dispatched, shipment_id} + ) + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/bad/matrix_utils.js b/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/bad/matrix_utils.js new file mode 100644 index 0000000..e624438 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/bad/matrix_utils.js @@ -0,0 +1,59 @@ +function createMatrix(rows, cols, fillValue = 0) { + const matrix = new Array(rows); + for (let r = 0; r < rows; r++) { + matrix[r] = new Array(cols).fill(fillValue); + } + return matrix; +} + +function createRange(start, end, step = 1) { + const count = Math.ceil((end - start) / step); + const result = new Array(count); + for (let i = 0; i < count; i++) { + result[i] = start + i * step; + } + return result; +} + +function createFilledArray(length, valueFn) { + const arr = new Array(length); + for (let i = 0; i < length; i++) { + arr[i] = valueFn(i); + } + return arr; +} + +function transposeMatrix(matrix) { + const rows = matrix.length; + const cols = matrix[0].length; + const result = new Array(cols); + + for (let c = 0; c < cols; c++) { + result[c] = new Array(rows); + for (let r = 0; r < rows; r++) { + result[c][r] = matrix[r][c]; + } + } + + return result; +} + +function multiplyMatrices(a, b) { + const rows = a.length; + const cols = b[0].length; + const inner = b.length; + + const result = new Array(rows); + for (let r = 0; r < rows; r++) { + result[r] = new Array(cols).fill(0); + for (let c = 0; c < cols; c++) { + for (let k = 0; k < inner; k++) { + result[r][c] += a[r][k] * b[k][c]; + } + } + } + + return result; +} + +export { createMatrix, createRange, createFilledArray, transposeMatrix, multiplyMatrices }; diff --git a/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/good/matrix_utils.js b/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/good/matrix_utils.js new file mode 100644 index 0000000..d7bdf20 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_array_constructor_with_arguments/good/matrix_utils.js @@ -0,0 +1,53 @@ +function createMatrix(rows, cols, fillValue = 0) { + return Array.from({ length: rows }, () => Array.from({ length: cols }, () => fillValue)); +} + +function createRange(start, end, step = 1) { + const result = []; + for (let i = start; i < end; i += step) { + result.push(i); + } + return result; +} + +function createFilledArray(length, valueFn) { + return Array.from({ length }, (_, index) => valueFn(index)); +} + +function transposeMatrix(matrix) { + const rows = matrix.length; + const cols = matrix[0].length; + const result = Array.from({ length: cols }, () => Array.from({ length: rows }, () => 0)); + + for (let r = 0; r < rows; r++) { + for (let c = 0; c < cols; c++) { + result[c][r] = matrix[r][c]; + } + } + + return result; +} + +function multiplyMatrices(a, b) { + const rows = a.length; + const cols = b[0].length; + const inner = b.length; + + const result = Array.from({ length: rows }, () => Array.from({ length: cols }, () => 0)); + + for (let r = 0; r < rows; r++) { + for (let c = 0; c < cols; c++) { + for (let k = 0; k < inner; k++) { + result[r][c] += a[r][k] * b[k][c]; + } + } + } + + return result; +} + +function flattenMatrix(matrix) { + return matrix.flat(); +} + +export { createMatrix, createRange, createFilledArray, transposeMatrix, multiplyMatrices, flattenMatrix }; diff --git a/priv/combined_metrics/samples/code_smells/no_array_delete/bad/notification_handler.ts b/priv/combined_metrics/samples/code_smells/no_array_delete/bad/notification_handler.ts new file mode 100644 index 0000000..7a13f77 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_array_delete/bad/notification_handler.ts @@ -0,0 +1,67 @@ +interface Notification { + id: string; + userId: string; + type: string; + title: string; + readAt: string | null; +} + +function removeNotificationById( + notifications: Notification[], + id: string +): Notification[] { + const index = notifications.findIndex((n) => n.id === id); + if (index !== -1) { + // delete leaves a hole (undefined) in the array instead of removing the element + delete notifications[index]; + } + return notifications; +} + +function removeReadNotifications(notifications: Notification[]): Notification[] { + for (let i = 0; i < notifications.length; i++) { + if (notifications[i].readAt !== null) { + delete notifications[i]; + } + } + return notifications; +} + +function clearBulk(notifications: Notification[], ids: Set): Notification[] { + for (let i = 0; i < notifications.length; i++) { + if (ids.has(notifications[i].id)) { + delete notifications[i]; + } + } + return notifications; +} + +class NotificationQueue { + private items: Notification[]; + + constructor(initial: Notification[] = []) { + this.items = [...initial]; + } + + enqueue(notification: Notification): void { + this.items.push(notification); + } + + remove(id: string): void { + const index = this.items.findIndex((n) => n.id === id); + if (index !== -1) { + delete this.items[index]; + } + } + + getAll(): Notification[] { + return [...this.items]; + } + + get length(): number { + return this.items.length; + } +} + +export { removeNotificationById, removeReadNotifications, clearBulk, NotificationQueue }; +export type { Notification }; diff --git a/priv/combined_metrics/samples/code_smells/no_array_delete/good/notification_handler.ts b/priv/combined_metrics/samples/code_smells/no_array_delete/good/notification_handler.ts new file mode 100644 index 0000000..942650c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_array_delete/good/notification_handler.ts @@ -0,0 +1,64 @@ +interface Notification { + id: string; + userId: string; + type: string; + title: string; + readAt: string | null; +} + +function removeNotificationById( + notifications: Notification[], + id: string +): Notification[] { + return notifications.filter((n) => n.id !== id); +} + +function removeNotificationsByType( + notifications: Notification[], + type: string +): Notification[] { + return notifications.filter((n) => n.type !== type); +} + +function removeReadNotifications(notifications: Notification[]): Notification[] { + return notifications.filter((n) => n.readAt === null); +} + +function removeAtIndex(notifications: Notification[], index: number): Notification[] { + return [...notifications.slice(0, index), ...notifications.slice(index + 1)]; +} + +function clearBulk(notifications: Notification[], ids: Set): Notification[] { + return notifications.filter((n) => !ids.has(n.id)); +} + +class NotificationQueue { + private items: Notification[]; + + constructor(initial: Notification[] = []) { + this.items = [...initial]; + } + + enqueue(notification: Notification): void { + this.items.push(notification); + } + + remove(id: string): void { + this.items = this.items.filter((n) => n.id !== id); + } + + removeFirst(): Notification | undefined { + return this.items.shift(); + } + + getAll(): Notification[] { + return [...this.items]; + } + + get length(): number { + return this.items.length; + } +} + +export { removeNotificationById, removeNotificationsByType, removeReadNotifications, removeAtIndex, clearBulk, NotificationQueue }; +export type { Notification }; diff --git a/priv/combined_metrics/samples/code_smells/no_auto_ptr/bad/Connection.cpp b/priv/combined_metrics/samples/code_smells/no_auto_ptr/bad/Connection.cpp new file mode 100644 index 0000000..2cbdc89 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_auto_ptr/bad/Connection.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include + +class Socket { +public: + explicit Socket(const std::string& host, int port) + : host_(host), port_(port), connected_(false) {} + + void connect() { connected_ = true; } + void disconnect() { connected_ = false; } + bool isConnected() const { return connected_; } + std::string host() const { return host_; } + int port() const { return port_; } + +private: + std::string host_; + int port_; + bool connected_; +}; + +class Connection { +public: + explicit Connection(const std::string& host, int port) + : socket_(new Socket(host, port)) // std::auto_ptr — deprecated and removed in C++17 + { + socket_->connect(); + } + + ~Connection() { + if (socket_.get() && socket_->isConnected()) + socket_->disconnect(); + // auto_ptr deletes automatically, but transfer semantics are broken + } + + // auto_ptr copy "moves" ownership silently — source becomes null after copy + // This causes bugs when connection is put into a container or passed by value + Connection(const Connection& other) : socket_(other.socket_) {} // silently steals! + + bool isAlive() const { + return socket_.get() && socket_->isConnected(); + } + + std::string endpoint() const { + return socket_->host() + ":" + std::to_string(socket_->port()); + } + +private: + std::auto_ptr socket_; // std::auto_ptr: deprecated since C++11, removed in C++17 +}; + +class ConnectionPool { +public: + explicit ConnectionPool(const std::string& host, int port, std::size_t poolSize) { + for (std::size_t i = 0; i < poolSize; ++i) { + // Storing auto_ptr in a vector is undefined behavior — + // std::vector requires copyable elements; auto_ptr's copy transfers ownership + connections_.push_back(std::auto_ptr(new Connection(host, port))); + } + } + + Connection* acquire() { + for (auto& conn : connections_) { + if (conn.get() && conn->isAlive()) + return conn.get(); + } + throw std::runtime_error("No available connections"); + } + +private: + std::vector> connections_; // undefined behavior +}; diff --git a/priv/combined_metrics/samples/code_smells/no_auto_ptr/good/Connection.cpp b/priv/combined_metrics/samples/code_smells/no_auto_ptr/good/Connection.cpp new file mode 100644 index 0000000..7c2ecc4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_auto_ptr/good/Connection.cpp @@ -0,0 +1,73 @@ +#include +#include +#include +#include + +class Socket { +public: + explicit Socket(const std::string& host, int port) + : host_(host), port_(port), connected_(false) {} + + void connect() { connected_ = true; } + void disconnect() noexcept { connected_ = false; } + bool isConnected() const noexcept { return connected_; } + std::string host() const { return host_; } + int port() const { return port_; } + +private: + std::string host_; + int port_; + bool connected_; +}; + +class Connection { +public: + explicit Connection(const std::string& host, int port) + : socket_(std::make_unique(host, port)) // unique_ptr — clear ownership + { + socket_->connect(); + } + + ~Connection() { + if (socket_ && socket_->isConnected()) + socket_->disconnect(); + } + + // Move is well-defined with unique_ptr + Connection(Connection&&) noexcept = default; + Connection& operator=(Connection&&) noexcept = default; + + Connection(const Connection&) = delete; + Connection& operator=(const Connection&) = delete; + + bool isAlive() const noexcept { + return socket_ && socket_->isConnected(); + } + + std::string endpoint() const { + return socket_->host() + ":" + std::to_string(socket_->port()); + } + +private: + std::unique_ptr socket_; +}; + +class ConnectionPool { +public: + explicit ConnectionPool(const std::string& host, int port, std::size_t size) { + connections_.reserve(size); + for (std::size_t i = 0; i < size; ++i) + connections_.push_back(std::make_unique(host, port)); + } + + Connection* acquire() { + for (auto& conn : connections_) { + if (conn && conn->isAlive()) + return conn.get(); + } + throw std::runtime_error("No available connections in pool"); + } + +private: + std::vector> connections_; +}; diff --git a/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/bad/NotificationService.cs b/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/bad/NotificationService.cs new file mode 100644 index 0000000..7380910 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/bad/NotificationService.cs @@ -0,0 +1,74 @@ +using System.Collections.Generic; +using System.Net.Http; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Notifications +{ + public class NotificationService + { + private readonly HttpClient _httpClient; + private readonly string _baseUrl; + + public NotificationService(HttpClient httpClient, string baseUrl) + { + _httpClient = httpClient; + _baseUrl = baseUrl; + } + + public void Send(Notification notification) + { + var payload = JsonSerializer.Serialize(notification); + var content = new StringContent(payload, System.Text.Encoding.UTF8, "application/json"); + + // .Result blocks the calling thread and can cause deadlocks in ASP.NET contexts + var response = _httpClient.PostAsync($"{_baseUrl}/notify", content).Result; + response.EnsureSuccessStatusCode(); + } + + public IReadOnlyList GetPending(string recipientId) + { + // .Result blocks the thread; deadlock-prone in synchronization contexts + var response = _httpClient.GetAsync( + $"{_baseUrl}/notifications/pending?recipientId={recipientId}").Result; + response.EnsureSuccessStatusCode(); + + // Another .Result to block on the content read + var json = response.Content.ReadAsStringAsync().Result; + var notifications = JsonSerializer.Deserialize>(json); + return notifications?.AsReadOnly() ?? new List().AsReadOnly(); + } + + public void DispatchBatch(IEnumerable notifications) + { + var tasks = new List(); + foreach (var notification in notifications) + tasks.Add(SendAsync(notification)); + + // Task.WaitAll blocks the calling thread + Task.WaitAll(tasks.ToArray()); + } + + public bool IsReachable() + { + try + { + // .Wait() blocks and can deadlock + var responseTask = _httpClient.GetAsync($"{_baseUrl}/health"); + responseTask.Wait(); + return responseTask.Result.IsSuccessStatusCode; + } + catch (AggregateException) + { + return false; + } + } + + private Task SendAsync(Notification notification) + { + var payload = JsonSerializer.Serialize(notification); + var content = new StringContent(payload, System.Text.Encoding.UTF8, "application/json"); + return _httpClient.PostAsync($"{_baseUrl}/notify", content); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/good/NotificationService.cs b/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/good/NotificationService.cs new file mode 100644 index 0000000..cd4ea22 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_blocking_on_async_code/good/NotificationService.cs @@ -0,0 +1,61 @@ +using System.Collections.Generic; +using System.Net.Http; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Notifications +{ + public class NotificationService + { + private readonly HttpClient _httpClient; + private readonly string _baseUrl; + + public NotificationService(HttpClient httpClient, string baseUrl) + { + _httpClient = httpClient; + _baseUrl = baseUrl; + } + + public async Task SendAsync(Notification notification) + { + var payload = JsonSerializer.Serialize(notification); + var content = new StringContent(payload, System.Text.Encoding.UTF8, "application/json"); + + var response = await _httpClient.PostAsync($"{_baseUrl}/notify", content); + response.EnsureSuccessStatusCode(); + } + + public async Task> GetPendingAsync(string recipientId) + { + var response = await _httpClient.GetAsync( + $"{_baseUrl}/notifications/pending?recipientId={recipientId}"); + response.EnsureSuccessStatusCode(); + + var json = await response.Content.ReadAsStringAsync(); + var notifications = JsonSerializer.Deserialize>(json); + return notifications?.AsReadOnly() ?? new List().AsReadOnly(); + } + + public async Task DispatchBatchAsync(IEnumerable notifications) + { + var tasks = new List(); + foreach (var notification in notifications) + tasks.Add(SendAsync(notification)); + + await Task.WhenAll(tasks); + } + + public async Task IsReachableAsync() + { + try + { + var response = await _httpClient.GetAsync($"{_baseUrl}/health"); + return response.IsSuccessStatusCode; + } + catch (HttpRequestException) + { + return false; + } + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_class_variables/bad/account.rb b/priv/combined_metrics/samples/code_smells/no_class_variables/bad/account.rb new file mode 100644 index 0000000..4abf9fa --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_class_variables/bad/account.rb @@ -0,0 +1,46 @@ +class Account + # @@var is shared across the entire inheritance hierarchy + @@default_plan = :free + @@max_team_size = 5 + @@registered_count = 0 + + def self.default_plan + @@default_plan + end + + def self.default_plan=(plan) + @@default_plan = plan + end + + def self.max_team_size + @@max_team_size + end + + def self.registered_count + @@registered_count + end + + attr_reader :email, :plan, :team_size + + def initialize(email:, plan: @@default_plan, team_size: 1) + @email = email + @plan = plan + @team_size = team_size + @@registered_count += 1 + end + + def upgrade_plan(new_plan) + @plan = new_plan + end + + def within_team_limit? + team_size <= @@max_team_size + end +end + +class EnterpriseAccount < Account + # Attempting to set subclass defaults, but @@vars are shared with Account + # Setting @@default_plan here also changes Account.default_plan — a surprise + @@default_plan = :enterprise + @@max_team_size = 500 +end diff --git a/priv/combined_metrics/samples/code_smells/no_class_variables/good/account.rb b/priv/combined_metrics/samples/code_smells/no_class_variables/good/account.rb new file mode 100644 index 0000000..c4f4de8 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_class_variables/good/account.rb @@ -0,0 +1,47 @@ +class Account + # Use instance variables on the class object instead of @@ + @default_plan = :free + @max_team_size = 5 + @registered_count = 0 + + class << self + attr_accessor :default_plan, :max_team_size + + def registered_count + @registered_count + end + + def increment_registered_count + @registered_count += 1 + end + + def reset_registered_count + @registered_count = 0 + end + end + + attr_reader :email, :plan, :team_size + + def initialize(email:, plan: Account.default_plan, team_size: 1) + @email = email + @plan = plan + @team_size = team_size + self.class.increment_registered_count + end + + def upgrade_plan(new_plan) + @plan = new_plan + end + + def within_team_limit? + team_size <= self.class.max_team_size + end +end + +class EnterpriseAccount < Account + @default_plan = :enterprise + @max_team_size = 500 + + # Enterprise subclass has its own independent class-level state + # This would not be possible with @@ which leaks across the hierarchy +end diff --git a/priv/combined_metrics/samples/code_smells/no_const_enum/bad/product_repository.ts b/priv/combined_metrics/samples/code_smells/no_const_enum/bad/product_repository.ts new file mode 100644 index 0000000..aed9efc --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_const_enum/bad/product_repository.ts @@ -0,0 +1,70 @@ +// const enum is inlined at compile time, breaking at runtime when used +// from JavaScript or with isolatedModules / bundlers +const enum ProductStatus { + Draft = "draft", + Active = "active", + Archived = "archived", + OutOfStock = "out_of_stock", +} + +const enum ProductCategory { + Electronics = "electronics", + Clothing = "clothing", + Food = "food", + Books = "books", + Other = "other", +} + +const enum SortOrder { + PriceAsc = "price_asc", + PriceDesc = "price_desc", + NameAsc = "name_asc", + Newest = "newest", +} + +interface Product { + id: string; + name: string; + price: number; + status: ProductStatus; + category: ProductCategory; + stock: number; + createdAt: string; +} + +interface ProductQuery { + category?: ProductCategory; + status?: ProductStatus; + sortOrder?: SortOrder; + page?: number; + pageSize?: number; +} + +async function fetchProducts(query: ProductQuery = {}): Promise { + const params = new URLSearchParams(); + if (query.category) params.set("category", query.category); + if (query.status) params.set("status", query.status); + if (query.sortOrder) params.set("sort", query.sortOrder); + if (query.page) params.set("page", String(query.page)); + if (query.pageSize) params.set("pageSize", String(query.pageSize)); + + const response = await fetch(`/api/products?${params}`); + if (!response.ok) throw new Error(`Failed to fetch products: ${response.status}`); + return response.json() as Promise; +} + +function isAvailable(product: Product): boolean { + return product.status === ProductStatus.Active && product.stock > 0; +} + +function getStatusLabel(status: ProductStatus): string { + switch (status) { + case ProductStatus.Draft: return "Draft"; + case ProductStatus.Active: return "Active"; + case ProductStatus.Archived: return "Archived"; + case ProductStatus.OutOfStock: return "Out of Stock"; + } +} + +export { fetchProducts, isAvailable, getStatusLabel, ProductStatus, ProductCategory, SortOrder }; +export type { Product, ProductQuery }; diff --git a/priv/combined_metrics/samples/code_smells/no_const_enum/good/product_repository.ts b/priv/combined_metrics/samples/code_smells/no_const_enum/good/product_repository.ts new file mode 100644 index 0000000..c68ffe9 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_const_enum/good/product_repository.ts @@ -0,0 +1,69 @@ +enum ProductStatus { + Draft = "draft", + Active = "active", + Archived = "archived", + OutOfStock = "out_of_stock", +} + +enum ProductCategory { + Electronics = "electronics", + Clothing = "clothing", + Food = "food", + Books = "books", + Other = "other", +} + +enum SortOrder { + PriceAsc = "price_asc", + PriceDesc = "price_desc", + NameAsc = "name_asc", + Newest = "newest", +} + +interface Product { + id: string; + name: string; + price: number; + status: ProductStatus; + category: ProductCategory; + stock: number; + createdAt: string; +} + +interface ProductQuery { + category?: ProductCategory; + status?: ProductStatus; + sortOrder?: SortOrder; + page?: number; + pageSize?: number; +} + +async function fetchProducts(query: ProductQuery = {}): Promise { + const params = new URLSearchParams(); + if (query.category) params.set("category", query.category); + if (query.status) params.set("status", query.status); + if (query.sortOrder) params.set("sort", query.sortOrder); + if (query.page) params.set("page", String(query.page)); + if (query.pageSize) params.set("pageSize", String(query.pageSize)); + + const response = await fetch(`/api/products?${params}`); + if (!response.ok) throw new Error(`Failed to fetch products: ${response.status}`); + return response.json() as Promise; +} + +function isAvailable(product: Product): boolean { + return product.status === ProductStatus.Active && product.stock > 0; +} + +function getStatusLabel(status: ProductStatus): string { + const labels: Record = { + [ProductStatus.Draft]: "Draft", + [ProductStatus.Active]: "Active", + [ProductStatus.Archived]: "Archived", + [ProductStatus.OutOfStock]: "Out of Stock", + }; + return labels[status]; +} + +export { fetchProducts, isAvailable, getStatusLabel, ProductStatus, ProductCategory, SortOrder }; +export type { Product, ProductQuery }; diff --git a/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/bad/order_service.ex b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/bad/order_service.ex new file mode 100644 index 0000000..c501350 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/bad/order_service.ex @@ -0,0 +1,96 @@ +defmodule OrderService do + @moduledoc "Handles order creation, validation, and fulfillment" + + def create_order(user, items) do + if user == nil do + {:error, :user_required} + IO.puts("This will never run") + log_attempt(nil) + end + + if items == [] do + {:error, :items_required} + notify_empty_cart(user) + end + + total = calculate_total(items) + {:ok, %{user_id: user.id, items: items, total: total}} + end + + def cancel_order(order) do + case order.status do + :pending -> + {:ok, %{order | status: :cancelled}} + send_cancellation_email(order.user_id) + update_inventory(order.items) + + :shipped -> + {:error, :already_shipped} + log_cancel_attempt(order.id) + notify_support(order) + + _ -> + {:error, :invalid_status} + IO.inspect(order, label: "unexpected order") + end + end + + def apply_discount(order, code) do + case lookup_discount(code) do + nil -> + {:error, :invalid_code} + track_invalid_code(code) + {:error, :not_found} + + discount -> + new_total = order.total * (1 - discount.rate) + {:ok, %{order | total: new_total}} + end + end + + def validate_address(address) do + if address.zip == nil do + {:error, :zip_required} + flag_incomplete_address(address) + end + + if address.city == nil do + {:error, :city_required} + flag_incomplete_address(address) + end + + {:ok, address} + end + + def fulfill_order(order) do + case order.payment_status do + :paid -> + {:ok, %{order | status: :fulfilling}} + schedule_shipment(order) + notify_warehouse(order) + + :pending -> + {:error, :payment_pending} + retry_payment(order) + + :failed -> + {:error, :payment_failed} + notify_user_payment_failed(order.user_id) + end + end + + defp calculate_total(items), do: Enum.sum(Enum.map(items, & &1.price)) + defp lookup_discount(_code), do: nil + defp send_cancellation_email(_user_id), do: :ok + defp update_inventory(_items), do: :ok + defp log_cancel_attempt(_id), do: :ok + defp notify_support(_order), do: :ok + defp track_invalid_code(_code), do: :ok + defp flag_incomplete_address(_address), do: :ok + defp schedule_shipment(_order), do: :ok + defp notify_warehouse(_order), do: :ok + defp retry_payment(_order), do: :ok + defp notify_user_payment_failed(_user_id), do: :ok + defp log_attempt(_user), do: :ok + defp notify_empty_cart(_user), do: :ok +end diff --git a/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/config.yml b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/config.yml new file mode 100644 index 0000000..9132452 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/config.yml @@ -0,0 +1 @@ +doc: "There should be no unreachable statements after a return or early exit." diff --git a/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/good/order_service.ex b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/good/order_service.ex new file mode 100644 index 0000000..8262780 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_dead_code_after_return/good/order_service.ex @@ -0,0 +1,58 @@ +defmodule OrderService do + @moduledoc "Handles order creation, validation, and fulfillment" + + def create_order(nil, _items), do: {:error, :user_required} + def create_order(_user, []), do: {:error, :items_required} + + def create_order(user, items) do + total = calculate_total(items) + {:ok, %{user_id: user.id, items: items, total: total}} + end + + def cancel_order(%{status: :pending} = order) do + {:ok, %{order | status: :cancelled}} + end + + def cancel_order(%{status: :shipped}) do + {:error, :already_shipped} + end + + def cancel_order(_order) do + {:error, :invalid_status} + end + + def apply_discount(order, code) do + case lookup_discount(code) do + nil -> + {:error, :invalid_code} + + discount -> + new_total = order.total * (1 - discount.rate) + {:ok, %{order | total: new_total}} + end + end + + def validate_address(%{zip: nil}), do: {:error, :zip_required} + def validate_address(%{city: nil}), do: {:error, :city_required} + def validate_address(address), do: {:ok, address} + + def fulfill_order(%{payment_status: :paid} = order) do + updated = %{order | status: :fulfilling} + schedule_shipment(updated) + notify_warehouse(updated) + {:ok, updated} + end + + def fulfill_order(%{payment_status: :pending}) do + {:error, :payment_pending} + end + + def fulfill_order(%{payment_status: :failed}) do + {:error, :payment_failed} + end + + defp calculate_total(items), do: Enum.sum(Enum.map(items, & &1.price)) + defp lookup_discount(_code), do: nil + defp schedule_shipment(_order), do: :ok + defp notify_warehouse(_order), do: :ok +end diff --git a/priv/combined_metrics/samples/code_smells/no_debug_print_statements/bad/payment.ex b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/bad/payment.ex new file mode 100644 index 0000000..2c75241 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/bad/payment.ex @@ -0,0 +1,97 @@ +defmodule Payment do + @moduledoc "Handles payment processing and refunds" + + def charge(user, amount, card) do + IO.puts("charging user: #{user.id}") + IO.inspect(card, label: "card details") + IO.inspect(amount, label: "amount") + + case validate_card(card) do + {:ok, validated} -> + IO.puts("card validated successfully") + IO.inspect(validated, label: "validated card") + result = call_payment_gateway(validated, amount) + IO.inspect(result, label: "gateway result") + result + + {:error, reason} -> + IO.puts("card validation failed: #{inspect(reason)}") + {:error, reason} + end + end + + def refund(transaction_id, amount) do + IO.puts("starting refund for transaction: #{transaction_id}") + + case fetch_transaction(transaction_id) do + {:ok, transaction} -> + IO.inspect(transaction, label: "found transaction") + + if transaction.amount < amount do + IO.puts("refund amount exceeds original") + {:error, :exceeds_original} + else + IO.puts("processing refund of #{amount}") + result = call_refund_api(transaction, amount) + IO.inspect(result, label: "refund result") + result + end + + {:error, :not_found} -> + IO.puts("transaction not found: #{transaction_id}") + {:error, :not_found} + end + end + + def calculate_fee(amount, method) do + IO.inspect({amount, method}, label: "fee calculation input") + + fee = + case method do + :credit_card -> amount * 0.029 + 0.30 + :debit_card -> amount * 0.015 + :bank_transfer -> 0.25 + _ -> amount * 0.035 + end + + IO.puts("calculated fee: #{fee}") + fee + end + + def authorize(user, amount) do + IO.inspect(user, label: "authorizing user") + IO.puts("checking balance for #{user.id}, amount: #{amount}") + + cond do + user.balance >= amount -> + IO.puts("authorization approved") + {:ok, :authorized} + + true -> + IO.puts("insufficient funds: #{user.balance} < #{amount}") + {:error, :insufficient_funds} + end + end + + def apply_coupon(total, coupon_code) do + IO.puts("applying coupon: #{coupon_code}") + + case lookup_coupon(coupon_code) do + {:ok, coupon} -> + IO.inspect(coupon, label: "coupon found") + discounted = total - coupon.discount + IO.puts("new total after coupon: #{discounted}") + {:ok, discounted} + + {:error, _} -> + IO.puts("coupon not found: #{coupon_code}") + {:error, :invalid_coupon} + end + end + + defp validate_card(card), do: {:ok, card} + defp call_payment_gateway(_card, _amount), do: {:ok, %{transaction_id: "txn_123"}} + defp fetch_transaction(_id), do: {:ok, %{amount: 100.0}} + defp call_refund_api(_transaction, _amount), do: {:ok, :refunded} + defp lookup_coupon(_code), do: {:error, :not_found} +end diff --git a/priv/combined_metrics/samples/code_smells/no_debug_print_statements/config.yml b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/config.yml new file mode 100644 index 0000000..5f85825 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/config.yml @@ -0,0 +1 @@ +doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." diff --git a/priv/combined_metrics/samples/code_smells/no_debug_print_statements/good/payment.ex b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/good/payment.ex new file mode 100644 index 0000000..b9b5b1c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_debug_print_statements/good/payment.ex @@ -0,0 +1,66 @@ +defmodule Payment do + @moduledoc "Handles payment processing and refunds" + + require Logger + + def charge(user, amount, card) do + case validate_card(card) do + {:ok, validated} -> + result = call_payment_gateway(validated, amount) + Logger.info("Payment charged", user_id: user.id, amount: amount) + result + + {:error, reason} -> + Logger.warning("Card validation failed", user_id: user.id, reason: inspect(reason)) + {:error, reason} + end + end + + def refund(transaction_id, amount) do + case fetch_transaction(transaction_id) do + {:ok, transaction} when transaction.amount >= amount -> + result = call_refund_api(transaction, amount) + Logger.info("Refund processed", transaction_id: transaction_id, amount: amount) + result + + {:ok, _transaction} -> + {:error, :exceeds_original} + + {:error, :not_found} -> + {:error, :not_found} + end + end + + def calculate_fee(amount, method) do + case method do + :credit_card -> amount * 0.029 + 0.30 + :debit_card -> amount * 0.015 + :bank_transfer -> 0.25 + _ -> amount * 0.035 + end + end + + def authorize(user, amount) do + if user.balance >= amount do + {:ok, :authorized} + else + {:error, :insufficient_funds} + end + end + + def apply_coupon(total, coupon_code) do + case lookup_coupon(coupon_code) do + {:ok, coupon} -> + {:ok, total - coupon.discount} + + {:error, _} -> + {:error, :invalid_coupon} + end + end + + defp validate_card(card), do: {:ok, card} + defp call_payment_gateway(_card, _amount), do: {:ok, %{transaction_id: "txn_123"}} + defp fetch_transaction(_id), do: {:ok, %{amount: 100.0}} + defp call_refund_api(_transaction, _amount), do: {:ok, :refunded} + defp lookup_coupon(_code), do: {:error, :not_found} +end diff --git a/priv/combined_metrics/samples/code_smells/no_double_negation/bad/cart.rb b/priv/combined_metrics/samples/code_smells/no_double_negation/bad/cart.rb new file mode 100644 index 0000000..44e3fb0 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_double_negation/bad/cart.rb @@ -0,0 +1,45 @@ +class CartPresenter + attr_reader :cart, :user + + def initialize(cart, user) + @cart = cart + @user = user + end + + def has_items? + !!cart.line_items.any? + end + + def has_coupon? + !!cart.coupon_code + end + + # !! is unnecessary — nil? already returns a boolean + def user_authenticated? + !!user + end + + def show_guest_prompt? + !!user.nil? + end + + def checkout_enabled? + !!(has_items? && user_authenticated?) + end + + def discount_applied? + !!(has_coupon? && cart.discount_amount.to_f > 0) + end + + def to_h + { + has_items: !!has_items?, + has_coupon: !!has_coupon?, + authenticated: !!user_authenticated?, + checkout_enabled: !!checkout_enabled?, + discount_applied: !!discount_applied?, + item_count: cart.line_items.size, + total: cart.total + } + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_double_negation/good/cart.rb b/priv/combined_metrics/samples/code_smells/no_double_negation/good/cart.rb new file mode 100644 index 0000000..42f421a --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_double_negation/good/cart.rb @@ -0,0 +1,44 @@ +class CartPresenter + attr_reader :cart, :user + + def initialize(cart, user) + @cart = cart + @user = user + end + + def has_items? + cart.line_items.any? + end + + def has_coupon? + cart.coupon_code.present? + end + + def user_authenticated? + !user.nil? + end + + def show_guest_prompt? + user.nil? + end + + def checkout_enabled? + has_items? && user_authenticated? + end + + def discount_applied? + has_coupon? && cart.discount_amount.to_f > 0 + end + + def to_h + { + has_items: has_items?, + has_coupon: has_coupon?, + authenticated: user_authenticated?, + checkout_enabled: checkout_enabled?, + discount_applied: discount_applied?, + item_count: cart.line_items.size, + total: cart.total + } + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_finalize_override/bad/DatabaseConnection.java b/priv/combined_metrics/samples/code_smells/no_finalize_override/bad/DatabaseConnection.java new file mode 100644 index 0000000..823d0c1 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_finalize_override/bad/DatabaseConnection.java @@ -0,0 +1,67 @@ +package com.example.db; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class DatabaseConnection { + + private static final Logger logger = Logger.getLogger(DatabaseConnection.class.getName()); + + private final Connection connection; + private boolean closed = false; + + public DatabaseConnection(Connection connection) { + this.connection = connection; + } + + public QueryResult execute(String sql, Object... params) throws SQLException { + try (var stmt = connection.prepareStatement(sql)) { + for (int i = 0; i < params.length; i++) { + stmt.setObject(i + 1, params[i]); + } + return QueryResult.from(stmt.executeQuery()); + } + } + + public void beginTransaction() throws SQLException { + connection.setAutoCommit(false); + } + + public void commit() throws SQLException { + connection.commit(); + } + + public void rollback() throws SQLException { + connection.rollback(); + } + + public void close() throws SQLException { + if (!closed) { + closed = true; + connection.close(); + } + } + + /** + * Overrides Object.finalize() to close the connection when garbage collected. + * This is unreliable — finalize() may never run, or run too late, leaving + * database connections open indefinitely. + */ + @Override + protected void finalize() throws Throwable { + try { + if (!closed) { + logger.log(Level.WARNING, "DatabaseConnection was not closed explicitly — closing in finalizer"); + connection.close(); + } + } finally { + super.finalize(); + } + } + + public boolean isClosed() { + return closed; + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_finalize_override/good/DatabaseConnection.java b/priv/combined_metrics/samples/code_smells/no_finalize_override/good/DatabaseConnection.java new file mode 100644 index 0000000..ed98a87 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_finalize_override/good/DatabaseConnection.java @@ -0,0 +1,77 @@ +package com.example.db; + +import java.io.Closeable; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Wraps a JDBC connection. Use try-with-resources to ensure the connection + * is closed promptly; do not rely on garbage collection for cleanup. + */ +public class DatabaseConnection implements Closeable { + + private static final Logger logger = Logger.getLogger(DatabaseConnection.class.getName()); + + private final Connection connection; + private boolean closed = false; + + public DatabaseConnection(Connection connection) { + this.connection = connection; + } + + public QueryResult execute(String sql, Object... params) throws SQLException { + ensureOpen(); + try (var stmt = connection.prepareStatement(sql)) { + for (int i = 0; i < params.length; i++) { + stmt.setObject(i + 1, params[i]); + } + return QueryResult.from(stmt.executeQuery()); + } + } + + public void beginTransaction() throws SQLException { + ensureOpen(); + connection.setAutoCommit(false); + } + + public void commit() throws SQLException { + ensureOpen(); + connection.commit(); + } + + public void rollback() throws SQLException { + ensureOpen(); + try { + connection.rollback(); + } catch (SQLException e) { + logger.log(Level.WARNING, "Rollback failed", e); + throw e; + } + } + + @Override + public void close() { + if (!closed) { + closed = true; + try { + connection.close(); + } catch (SQLException e) { + // Log but do not propagate — close() must not throw checked exceptions. + // The connection resource is released regardless. + logger.log(Level.WARNING, "Error closing database connection", e); + } + } + } + + public boolean isClosed() { + return closed; + } + + private void ensureOpen() { + if (closed) { + throw new IllegalStateException("DatabaseConnection has already been closed"); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_fixme_comments/bad/importer.ex b/priv/combined_metrics/samples/code_smells/no_fixme_comments/bad/importer.ex new file mode 100644 index 0000000..6e5ea09 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_fixme_comments/bad/importer.ex @@ -0,0 +1,77 @@ +defmodule Importer do + @moduledoc "Handles importing data from CSV and external sources" + + # FIXME: this crashes on empty files, need to handle that + def import_csv(path) do + path + |> File.read!() + |> String.split("\n") + |> Enum.map(&parse_row/1) + |> Enum.reject(&is_nil/1) + end + + # TODO: FIXME - validate headers before parsing rows + def parse_row(line) do + case String.split(line, ",") do + [id, name, email] -> + %{id: id, name: name, email: email} + + # XXX: silently drops malformed rows, should log or collect errors + _ -> + nil + end + end + + def import_users(rows) do + # FIXME: this does N+1 inserts, wrap in a transaction + Enum.map(rows, fn row -> + insert_user(row) + end) + end + + def validate_row(%{email: email} = row) do + # XXX: email regex is wrong, doesn't handle subdomains + if String.match?(email, ~r/@/) do + {:ok, row} + else + {:error, :invalid_email} + end + end + + def deduplicate(rows) do + # FIXME: uses email as dedup key but doesn't normalize case first + rows + |> Enum.group_by(& &1.email) + |> Enum.map(fn {_email, [first | _rest]} -> first end) + end + + def import_from_api(source_url) do + # TODO: FIXME - add retry logic and timeout handling + case fetch_remote(source_url) do + {:ok, data} -> + parse_api_response(data) + + # XXX: swallows all errors, need proper error propagation + _ -> + [] + end + end + + def transform_row(row, field_map) do + # FIXME: doesn't handle nested fields or type coercion + Enum.reduce(field_map, %{}, fn {src, dst}, acc -> + Map.put(acc, dst, Map.get(row, src)) + end) + end + + def write_results(results, output_path) do + # XXX: overwrites file without backup, could lose data + content = Enum.map_join(results, "\n", &format_result/1) + File.write!(output_path, content) + end + + defp insert_user(row), do: {:ok, row} + defp fetch_remote(_url), do: {:ok, []} + defp parse_api_response(data), do: data + defp format_result(result), do: inspect(result) +end diff --git a/priv/combined_metrics/samples/code_smells/no_fixme_comments/config.yml b/priv/combined_metrics/samples/code_smells/no_fixme_comments/config.yml new file mode 100644 index 0000000..be88771 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_fixme_comments/config.yml @@ -0,0 +1 @@ +doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." diff --git a/priv/combined_metrics/samples/code_smells/no_fixme_comments/good/importer.ex b/priv/combined_metrics/samples/code_smells/no_fixme_comments/good/importer.ex new file mode 100644 index 0000000..2af79a9 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_fixme_comments/good/importer.ex @@ -0,0 +1,88 @@ +defmodule Importer do + @moduledoc "Handles importing data from CSV and external sources" + + def import_csv(path) do + case File.read(path) do + {:ok, ""} -> + {:error, :empty_file} + + {:ok, content} -> + rows = + content + |> String.split("\n", trim: true) + |> Enum.map(&parse_row/1) + |> Enum.reject(&is_nil/1) + + {:ok, rows} + + {:error, reason} -> + {:error, reason} + end + end + + def parse_row(line) do + case String.split(line, ",") do + [id, name, email] -> + %{id: id, name: name, email: email} + + _ -> + nil + end + end + + def import_users(rows) do + rows + |> Enum.map(&insert_user/1) + |> Enum.split_with(&match?({:ok, _}, &1)) + |> then(fn {ok, errors} -> {:ok, length(ok), length(errors)} end) + end + + def validate_row(%{email: email} = row) do + normalized = String.downcase(email) + + if String.match?(normalized, ~r/^[^\s@]+@[^\s@]+\.[^\s@]+$/) do + {:ok, %{row | email: normalized}} + else + {:error, :invalid_email} + end + end + + def deduplicate(rows) do + rows + |> Enum.map(fn row -> %{row | email: String.downcase(row.email)} end) + |> Enum.group_by(& &1.email) + |> Enum.map(fn {_email, [first | _rest]} -> first end) + end + + def import_from_api(source_url) do + with {:ok, data} <- fetch_remote(source_url), + {:ok, parsed} <- parse_api_response(data) do + {:ok, parsed} + end + end + + def transform_row(row, field_map) do + Enum.reduce(field_map, %{}, fn {src, dst}, acc -> + Map.put(acc, dst, Map.get(row, src)) + end) + end + + def write_results(results, output_path) do + backup_path = output_path <> ".bak" + + with :ok <- maybe_backup(output_path, backup_path), + content = Enum.map_join(results, "\n", &format_result/1), + :ok <- File.write(output_path, content) do + :ok + end + end + + defp maybe_backup(path, backup) do + if File.exists?(path), do: File.copy(path, backup), else: :ok + end + + defp insert_user(row), do: {:ok, row} + defp fetch_remote(_url), do: {:ok, []} + defp parse_api_response(data), do: {:ok, data} + defp format_result(result), do: inspect(result) +end diff --git a/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/bad/analytics_tracker.js b/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/bad/analytics_tracker.js new file mode 100644 index 0000000..02af07e --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/bad/analytics_tracker.js @@ -0,0 +1,59 @@ +window.ANALYTICS_ENDPOINT = "/api/analytics"; +window.ANALYTICS_API_KEY = process.env.ANALYTICS_API_KEY; +window.analyticsQueue = []; +window.analyticsSessionId = null; +window.analyticsFlushTimer = null; +const MAX_QUEUE_SIZE = 100; + +function initAnalytics() { + const stored = sessionStorage.getItem("session"); + window.analyticsSessionId = stored ?? crypto.randomUUID(); + sessionStorage.setItem("session", window.analyticsSessionId); +} + +function trackEvent(eventName, properties = {}) { + if (window.analyticsQueue.length >= MAX_QUEUE_SIZE) { + flushAnalytics(); + } + + window.analyticsQueue.push({ + event: eventName, + properties, + sessionId: window.analyticsSessionId, + timestamp: Date.now(), + }); + + scheduleAnalyticsFlush(); +} + +function identifyUser(userId, traits = {}) { + trackEvent("$identify", { userId, ...traits }); +} + +function scheduleAnalyticsFlush() { + if (window.analyticsFlushTimer) return; + window.analyticsFlushTimer = setTimeout(() => { + window.analyticsFlushTimer = null; + flushAnalytics(); + }, 2000); +} + +async function flushAnalytics() { + if (window.analyticsQueue.length === 0) return; + const events = window.analyticsQueue.splice(0, window.analyticsQueue.length); + await fetch(window.ANALYTICS_ENDPOINT, { + method: "POST", + headers: { + "Content-Type": "application/json", + "X-Api-Key": window.ANALYTICS_API_KEY, + }, + body: JSON.stringify({ events }), + }); +} + +window.trackEvent = trackEvent; +window.identifyUser = identifyUser; +window.flushAnalytics = flushAnalytics; +window.initAnalytics = initAnalytics; + +initAnalytics(); diff --git a/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/good/analytics_tracker.js b/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/good/analytics_tracker.js new file mode 100644 index 0000000..d81f8ac --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_global_scope_pollution/good/analytics_tracker.js @@ -0,0 +1,68 @@ +const SESSION_KEY = "analytics_session"; +const MAX_QUEUE_SIZE = 100; + +class AnalyticsTracker { + constructor(config) { + this._endpoint = config.endpoint; + this._apiKey = config.apiKey; + this._queue = []; + this._sessionId = this._loadOrCreateSession(); + this._flushTimer = null; + } + + track(eventName, properties = {}) { + if (this._queue.length >= MAX_QUEUE_SIZE) { + this._flushQueue(); + } + + this._queue.push({ + event: eventName, + properties, + sessionId: this._sessionId, + timestamp: Date.now(), + }); + + this._scheduleFlush(); + } + + identify(userId, traits = {}) { + this.track("$identify", { userId, ...traits }); + } + + async flush() { + await this._flushQueue(); + } + + _scheduleFlush() { + if (this._flushTimer) return; + this._flushTimer = setTimeout(() => { + this._flushTimer = null; + this._flushQueue(); + }, 2000); + } + + async _flushQueue() { + if (this._queue.length === 0) return; + const events = this._queue.splice(0, this._queue.length); + await fetch(this._endpoint, { + method: "POST", + headers: { "Content-Type": "application/json", "X-Api-Key": this._apiKey }, + body: JSON.stringify({ events }), + }); + } + + _loadOrCreateSession() { + const stored = sessionStorage.getItem(SESSION_KEY); + if (stored) return stored; + const id = crypto.randomUUID(); + sessionStorage.setItem(SESSION_KEY, id); + return id; + } +} + +const tracker = new AnalyticsTracker({ + endpoint: "/api/analytics", + apiKey: process.env.ANALYTICS_API_KEY, +}); + +export { tracker, AnalyticsTracker }; diff --git a/priv/combined_metrics/samples/code_smells/no_library_global_application_config/bad/billing.ex b/priv/combined_metrics/samples/code_smells/no_library_global_application_config/bad/billing.ex new file mode 100644 index 0000000..7e639a1 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_library_global_application_config/bad/billing.ex @@ -0,0 +1,58 @@ +defmodule Acme.Billing do + @moduledoc """ + Billing library. + """ + + # Bad: reads configuration from the global application environment. + # This couples the library to a specific OTP app name and config key. + # The library cannot be used without configuring `:acme_billing` in config.exs, + # and it cannot be used with different configs simultaneously (e.g., multiple accounts). + + @doc """ + Creates a charge. Reads API key and base URL from Application env. + """ + @spec create_charge(map()) :: {:ok, map()} | {:error, term()} + def create_charge(params) do + # Bad: tight coupling to global config + api_key = Application.get_env(:acme_billing, :api_key) || + raise "Acme.Billing: :api_key not configured" + + base_url = Application.get_env(:acme_billing, :base_url, "https://api.acmebilling.com") + timeout = Application.get_env(:acme_billing, :timeout_ms, 5_000) + + url = "#{base_url}/charges" + headers = [{"Authorization", "Bearer #{api_key}"}] + options = [timeout: timeout, recv_timeout: timeout] + + case HTTPoison.post(url, Jason.encode!(params), headers, options) do + {:ok, %{status_code: 201, body: body}} -> {:ok, Jason.decode!(body)} + {:ok, %{status_code: status, body: body}} -> {:error, {:http_error, status, body}} + {:error, reason} -> {:error, {:network_error, reason}} + end + end + + @doc """ + Lists recent charges. Also reads from Application env. + """ + @spec list_charges(keyword()) :: {:ok, [map()]} | {:error, term()} + def list_charges(opts \\ []) do + # Bad: same global config dependency repeated in every function + api_key = Application.get_env(:acme_billing, :api_key) || + raise "Acme.Billing: :api_key not configured" + + base_url = Application.get_env(:acme_billing, :base_url, "https://api.acmebilling.com") + timeout = Application.get_env(:acme_billing, :timeout_ms, 5_000) + retry = Application.get_env(:acme_billing, :retry_count, 3) + + limit = Keyword.get(opts, :limit, 20) + url = "#{base_url}/charges?limit=#{limit}" + headers = [{"Authorization", "Bearer #{api_key}"}] + + case HTTPoison.get(url, headers, timeout: timeout, recv_timeout: timeout) do + {:ok, %{status_code: 200, body: body}} -> {:ok, Jason.decode!(body)} + {:ok, %{status_code: status}} -> {:error, {:http_error, status}} + {:error, reason} when retry > 0 -> list_charges(opts) + {:error, reason} -> {:error, {:network_error, reason}} + end + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_library_global_application_config/good/billing.ex b/priv/combined_metrics/samples/code_smells/no_library_global_application_config/good/billing.ex new file mode 100644 index 0000000..f656656 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_library_global_application_config/good/billing.ex @@ -0,0 +1,53 @@ +defmodule Acme.Billing do + @moduledoc """ + Billing library. All configuration is passed as explicit function + arguments rather than read from `Application.get_env/2`. + This makes the library usable in any application without side effects. + """ + + defmodule Config do + @moduledoc "Configuration struct for the Acme.Billing client." + @enforce_keys [:api_key, :base_url] + defstruct [:api_key, :base_url, timeout_ms: 5_000, retry_count: 3] + + @type t :: %__MODULE__{ + api_key: String.t(), + base_url: String.t(), + timeout_ms: pos_integer(), + retry_count: non_neg_integer() + } + end + + @doc """ + Creates a charge using the provided configuration. + Config is passed explicitly — the library does not read global app env. + """ + @spec create_charge(Config.t(), map()) :: {:ok, map()} | {:error, term()} + def create_charge(%Config{} = config, params) do + url = "#{config.base_url}/charges" + headers = [{"Authorization", "Bearer #{config.api_key}"}] + options = [timeout: config.timeout_ms, recv_timeout: config.timeout_ms] + + case HTTPoison.post(url, Jason.encode!(params), headers, options) do + {:ok, %{status_code: 201, body: body}} -> {:ok, Jason.decode!(body)} + {:ok, %{status_code: status, body: body}} -> {:error, {:http_error, status, body}} + {:error, reason} -> {:error, {:network_error, reason}} + end + end + + @doc """ + Lists recent charges. Configuration is explicit. + """ + @spec list_charges(Config.t(), keyword()) :: {:ok, [map()]} | {:error, term()} + def list_charges(%Config{} = config, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + url = "#{config.base_url}/charges?limit=#{limit}" + headers = [{"Authorization", "Bearer #{config.api_key}"}] + + case HTTPoison.get(url, headers, timeout: config.timeout_ms) do + {:ok, %{status_code: 200, body: body}} -> {:ok, Jason.decode!(body)} + {:ok, %{status_code: status}} -> {:error, {:http_error, status}} + {:error, reason} -> {:error, {:network_error, reason}} + end + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/bad/task_runner.py b/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/bad/task_runner.py new file mode 100644 index 0000000..46f7234 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/bad/task_runner.py @@ -0,0 +1,61 @@ +"""Task runner that executes a queue of background jobs and logs results.""" +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from typing import Callable, Optional + + +@dataclass +class Task: + id: str + name: str + action: Callable[[], None] + retries: int = 0 + + +@dataclass +class TaskResult: + task_id: str + success: bool + started_at: datetime + finished_at: datetime + error: Optional[str] = None + + +_results: list[TaskResult] = [] + + +def run_task(task: Task) -> TaskResult: + started = datetime.utcnow() + try: + task.action() + return TaskResult(task_id=task.id, success=True, started_at=started, finished_at=datetime.utcnow()) + except Exception as exc: + return TaskResult(task_id=task.id, success=False, started_at=started, finished_at=datetime.utcnow(), error=str(exc)) + + +def run_all(tasks: list[Task]) -> list[TaskResult]: + """Run all tasks — comprehension used only for its side effect; returned list discarded.""" + results = [] + # list comprehension purely for the side effect of appending to results + [results.append(run_task(task)) for task in tasks] # bad: comprehension for side effects + return results + + +def notify_failures(results: list[TaskResult]) -> None: + """Log failures — comprehension builds a list of Nones that is immediately discarded.""" + [print(f"[ALERT] Task {r.task_id} failed: {r.error}") for r in results if not r.success] + + +def archive_results(results: list[TaskResult]) -> None: + """Persist results — comprehension is used purely to call append on _results.""" + [_results.append(result) for result in results] # bad: side-effect-only comprehension + + +def send_summary_emails(results: list[TaskResult], recipients: list[str]) -> None: + """Send emails — nested comprehension used entirely for its side effects.""" + [ + print(f"Sending summary to {email}: {len(results)} tasks run") + for email in recipients + ] # bad: comprehension discarded, used only for print side effect diff --git a/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/good/task_runner.py b/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/good/task_runner.py new file mode 100644 index 0000000..eb472eb --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_list_comprehension_for_side_effects/good/task_runner.py @@ -0,0 +1,76 @@ +"""Task runner that executes a queue of background jobs and logs results.""" +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from typing import Callable, Optional + + +@dataclass +class Task: + id: str + name: str + action: Callable[[], None] + retries: int = 0 + + +@dataclass +class TaskResult: + task_id: str + success: bool + started_at: datetime + finished_at: datetime + error: Optional[str] = None + + +_results: list[TaskResult] = [] + + +def run_task(task: Task) -> TaskResult: + """Execute a single task and return its result.""" + started = datetime.utcnow() + try: + task.action() + return TaskResult( + task_id=task.id, + success=True, + started_at=started, + finished_at=datetime.utcnow(), + ) + except Exception as exc: + return TaskResult( + task_id=task.id, + success=False, + started_at=started, + finished_at=datetime.utcnow(), + error=str(exc), + ) + + +def run_all(tasks: list[Task]) -> list[TaskResult]: + """Run all tasks and collect results using a plain for loop.""" + results = [] + for task in tasks: + result = run_task(task) + results.append(result) + return results + + +def notify_failures(results: list[TaskResult]) -> None: + """Log each failed result — side-effect loop, not a list comprehension.""" + for result in results: + if not result.success: + print(f"[ALERT] Task {result.task_id} failed: {result.error}") + + +def archive_results(results: list[TaskResult]) -> None: + """Persist results to the global store — explicit for loop makes intent clear.""" + for result in results: + _results.append(result) + + +def retry_failed(tasks: list[Task], results: list[TaskResult]) -> list[TaskResult]: + """Re-run tasks whose first attempt failed.""" + failed_ids = {r.task_id for r in results if not r.success} + retry_tasks = [t for t in tasks if t.id in failed_ids] + return run_all(retry_tasks) diff --git a/priv/combined_metrics/samples/code_smells/no_long_parameter_list/bad/accounts.ex b/priv/combined_metrics/samples/code_smells/no_long_parameter_list/bad/accounts.ex new file mode 100644 index 0000000..b8bbc26 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_long_parameter_list/bad/accounts.ex @@ -0,0 +1,62 @@ +defmodule MyApp.Accounts do + @moduledoc """ + User account management. + """ + + alias MyApp.Accounts.User + alias MyApp.Repo + + # Bad: eight positional parameters. Callers must remember the exact order. + # What is the difference between `role` and `plan`? Is `org_id` before or after? + # Easy to accidentally swap `team_id` and `org_id`. + @spec register(String.t(), String.t(), String.t(), atom(), atom(), integer(), integer(), DateTime.t()) :: + {:ok, User.t()} | {:error, Ecto.Changeset.t()} + def register(name, email, password, role, plan, org_id, team_id, trial_expires_at) do + %User{} + |> User.registration_changeset(%{ + name: name, + email: email, + password: password, + role: role, + plan: plan, + organization_id: org_id, + team_id: team_id, + trial_expires_at: trial_expires_at + }) + |> Repo.insert() + end + + # Bad: sending an email with six individual string parameters + @spec send_welcome_email(String.t(), String.t(), String.t(), String.t(), String.t(), String.t()) :: :ok + def send_welcome_email(to_email, user_name, org_name, plan_name, support_email, login_url) do + MyApp.Mailer.deliver(%{ + to: to_email, + subject: "Welcome, #{user_name}!", + template: :welcome, + assigns: %{ + name: user_name, + org: org_name, + plan: plan_name, + support: support_email, + url: login_url + } + }) + + :ok + end + + # Bad: updating a user with many individual named fields — no grouping + @spec update_profile(User.t(), String.t(), String.t(), String.t(), String.t(), boolean()) :: + {:ok, User.t()} | {:error, Ecto.Changeset.t()} + def update_profile(%User{} = user, name, bio, website, timezone, notifications_enabled) do + user + |> User.profile_changeset(%{ + name: name, + bio: bio, + website: website, + timezone: timezone, + notifications_enabled: notifications_enabled + }) + |> Repo.update() + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_long_parameter_list/good/accounts.ex b/priv/combined_metrics/samples/code_smells/no_long_parameter_list/good/accounts.ex new file mode 100644 index 0000000..473fb53 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_long_parameter_list/good/accounts.ex @@ -0,0 +1,66 @@ +defmodule MyApp.Accounts do + @moduledoc """ + User account management. Related parameters are grouped into + structs rather than passed as long argument lists. + """ + + alias MyApp.Accounts.{User, UserRegistration} + alias MyApp.Repo + + defmodule UserRegistration do + @moduledoc "Encapsulates all parameters needed to register a new user." + @enforce_keys [:email, :password, :name] + defstruct [ + :email, + :password, + :name, + :organization_id, + :role, + :plan, + :invite_token, + timezone: "UTC" + ] + end + + @doc """ + Registers a new user. Parameters are grouped in a `UserRegistration` struct + rather than passed as individual arguments. + """ + @spec register(UserRegistration.t()) :: {:ok, User.t()} | {:error, Ecto.Changeset.t()} + def register(%UserRegistration{} = registration) do + %User{} + |> User.registration_changeset(%{ + email: registration.email, + password: registration.password, + name: registration.name, + organization_id: registration.organization_id, + role: registration.role || :member, + plan: registration.plan || :free, + timezone: registration.timezone + }) + |> Repo.insert() + end + + @doc """ + Sends a welcome email. Takes the user struct rather than individual fields. + """ + @spec send_welcome_email(User.t()) :: :ok | {:error, term()} + def send_welcome_email(%User{} = user) do + MyApp.Mailer.deliver(%{ + to: user.email, + subject: "Welcome, #{user.name}!", + template: :welcome, + assigns: %{user: user} + }) + end + + @doc """ + Updates a user's profile. Groups the changeable fields in a map. + """ + @spec update_profile(User.t(), map()) :: {:ok, User.t()} | {:error, Ecto.Changeset.t()} + def update_profile(%User{} = user, attrs) when is_map(attrs) do + user + |> User.profile_changeset(attrs) + |> Repo.update() + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/bad/OrderService.php b/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/bad/OrderService.php new file mode 100644 index 0000000..1186908 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/bad/OrderService.php @@ -0,0 +1,57 @@ +repository = $repository; + } + + // __get is used to make all properties dynamically readable — obscures what actually exists + public function __get(string $name): mixed + { + if (array_key_exists($name, $this->data)) { + return $this->data[$name]; + } + + // Silently returns null for any unknown property + return null; + } + + // __set makes all properties dynamically writable — no validation, no IDE support + public function __set(string $name, mixed $value): void + { + $this->data[$name] = $value; + } + + // __call forwards any method call to the repository — callers can't know what's supported + public function __call(string $name, array $arguments): mixed + { + if (method_exists($this->repository, $name)) { + return $this->repository->$name(...$arguments); + } + + // Falls through silently for unknown methods + return null; + } + + // __isset makes isset() work on magic properties — further obfuscating the interface + public function __isset(string $name): bool + { + return isset($this->data[$name]); + } + + // This is the only real method, but callers are expected to discover API via trial and error + public function place(int $customerId, array $items): array + { + $this->lastCustomer = $customerId; // Triggers __set — invisible + $this->lastItems = $items; // Triggers __set — invisible + + return $this->repository->createOrder($customerId, $items); + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/good/OrderService.php b/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/good/OrderService.php new file mode 100644 index 0000000..b328e2d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_magic_method_abuse/good/OrderService.php @@ -0,0 +1,63 @@ +orders[$id])) { + return $this->orders[$id]; + } + + $order = $this->repository->findById($id); + if ($order !== null) { + $this->orders[$id] = $order; + } + + return $order; + } + + public function getStatus(int $orderId): string + { + $order = $this->findById($orderId); + if ($order === null) { + throw new OrderNotFoundException("Order {$orderId} not found"); + } + + return $order->getStatus(); + } + + public function updateStatus(int $orderId, string $status): void + { + $order = $this->findById($orderId); + if ($order === null) { + throw new OrderNotFoundException("Order {$orderId} not found"); + } + + $order->setStatus($status); + $this->repository->save($order); + unset($this->orders[$orderId]); + } + + public function listByCustomer(int $customerId): array + { + return $this->repository->findByCustomer($customerId); + } + + public function countByStatus(string $status): int + { + return $this->repository->countByStatus($status); + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/bad/catalog.ex b/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/bad/catalog.ex new file mode 100644 index 0000000..ab1c4fd --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/bad/catalog.ex @@ -0,0 +1,58 @@ +defmodule Acme.Catalog do + @moduledoc """ + Public API for the Acme product catalog library. + """ + + alias Acme.Catalog.Product + + @doc "Searches the catalog." + @spec search(String.t()) :: [Product.t()] + def search(query) do + Acme.Catalog.SearchIndex.query(query) + end +end + +# Bad: defines a module inside Elixir's standard `Enum` namespace. +# This will conflict with the standard library and confuse anyone reading the code. +defmodule Enum.CatalogUtils do + @moduledoc """ + Extra Enum utilities added by the Acme.Catalog library. + BAD: This module pollutes the `Enum` namespace which belongs to Elixir itself. + """ + + def filter_available(products) do + Enum.filter(products, & &1.available) + end + + def sort_by_price(products) do + Enum.sort_by(products, & &1.price_cents) + end +end + +# Bad: extends the `String` module with catalog-specific logic. +# Any library calling `String.normalize/1` will be confused. +defmodule String.Utils do + @moduledoc """ + String helpers added by the Acme.Catalog library. + BAD: Trespasses on the `String` namespace owned by Elixir. + """ + + def normalize(str) when is_binary(str) do + str + |> String.trim() + |> String.downcase() + |> String.replace(~r/[^\w\s]/, "") + end +end + +# Bad: opens a module under `Map` — a core Elixir namespace. +defmodule Map.ProductHelpers do + @moduledoc """ + Map utilities for products. + BAD: Pollutes the standard `Map` namespace. + """ + + def from_product(%{id: id, name: name, price_cents: price}) do + %{id: id, name: name, price_cents: price} + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/good/catalog.ex b/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/good/catalog.ex new file mode 100644 index 0000000..333e3bf --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_namespace_trespassing/good/catalog.ex @@ -0,0 +1,61 @@ +defmodule Acme.Catalog do + @moduledoc """ + Public API for the Acme product catalog library. + All modules defined by this library live under the `Acme` namespace. + """ + + alias Acme.Catalog.{Product, Category, SearchIndex} + + @doc """ + Searches the catalog for products matching the query. + """ + @spec search(String.t(), keyword()) :: [Product.t()] + def search(query, opts \\ []) do + SearchIndex.query(query, opts) + end + + @doc """ + Lists all products in a category. + """ + @spec list_by_category(Category.t()) :: [Product.t()] + def list_by_category(%Category{} = category) do + Product.by_category(category) + end +end + +defmodule Acme.Catalog.Product do + @moduledoc """ + Product schema and query helpers. Stays within the `Acme.Catalog` namespace. + """ + + defstruct [:id, :name, :sku, :price_cents, :category_id] + + @type t :: %__MODULE__{ + id: integer(), + name: String.t(), + sku: String.t(), + price_cents: integer(), + category_id: integer() + } + + def by_category(%Acme.Catalog.Category{id: id}) do + # query implementation + [] + end +end + +defmodule Acme.Catalog.StringHelpers do + @moduledoc """ + String utilities specific to catalog use cases. + Kept under `Acme.Catalog` — not polluting the `String` namespace. + """ + + @doc "Normalizes a product name for indexing." + @spec normalize(String.t()) :: String.t() + def normalize(str) when is_binary(str) do + str + |> String.trim() + |> String.downcase() + |> String.replace(~r/[^\w\s]/, "") + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_nested_method_definition/bad/subscription.rb b/priv/combined_metrics/samples/code_smells/no_nested_method_definition/bad/subscription.rb new file mode 100644 index 0000000..767b739 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_nested_method_definition/bad/subscription.rb @@ -0,0 +1,39 @@ +class SubscriptionRenewalService + def initialize(billing, notifier, logger) + @billing = billing + @notifier = notifier + @logger = logger + end + + def renew(subscription) + # Methods defined inside another method do NOT create closures in Ruby. + # They are defined on the enclosing class/object, not scoped locally. + def renewable?(sub) + sub.active? || sub.in_grace_period? + end + + def attempt_charge(sub) + @billing.charge( + sub.payment_method_id, + sub.plan.monthly_price_cents + ) + rescue Billing::DeclinedError => e + { success: false, error: :payment_declined } + end + + def extend_subscription(sub) + new_expiry = [sub.expires_at, Time.current].max + 30.days + sub.update!(expires_at: new_expiry, status: :active) + end + + return { success: false, skipped: true } unless renewable?(subscription) + + charge_result = attempt_charge(subscription) + return charge_result unless charge_result[:success] + + extend_subscription(subscription) + @notifier.send_renewal_confirmation(subscription.user, subscription) + + { success: true, renewed_until: subscription.expires_at } + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_nested_method_definition/good/subscription.rb b/priv/combined_metrics/samples/code_smells/no_nested_method_definition/good/subscription.rb new file mode 100644 index 0000000..f099fe4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_nested_method_definition/good/subscription.rb @@ -0,0 +1,48 @@ +class SubscriptionRenewalService + def initialize(billing, notifier, logger) + @billing = billing + @notifier = notifier + @logger = logger + end + + def renew(subscription) + return skip_result(:not_renewable) unless renewable?(subscription) + + charge_result = attempt_charge(subscription) + return charge_result unless charge_result[:success] + + extend_subscription(subscription) + notify_renewal(subscription) + + { success: true, renewed_until: subscription.expires_at } + end + + private + + def renewable?(subscription) + subscription.active? || subscription.in_grace_period? + end + + def attempt_charge(subscription) + @billing.charge( + subscription.payment_method_id, + subscription.plan.monthly_price_cents + ) + rescue Billing::DeclinedError => e + @logger.warn("Renewal charge declined for #{subscription.id}: #{e.message}") + { success: false, error: :payment_declined } + end + + def extend_subscription(subscription) + new_expiry = [subscription.expires_at, Time.current].max + 30.days + subscription.update!(expires_at: new_expiry, status: :active) + end + + def notify_renewal(subscription) + @notifier.send_renewal_confirmation(subscription.user, subscription) + end + + def skip_result(reason) + { success: false, skipped: true, reason: reason } + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_nested_ternary/bad/pricing.ex b/priv/combined_metrics/samples/code_smells/no_nested_ternary/bad/pricing.ex new file mode 100644 index 0000000..4828b43 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_nested_ternary/bad/pricing.ex @@ -0,0 +1,105 @@ +defmodule Pricing do + @moduledoc "Calculates prices and discounts for products" + + def final_price(product, user, coupon) do + base = + if product.on_sale do + if product.sale_price > 0 do + product.sale_price + else + product.price * 0.9 + end + else + product.price + end + + with_membership = + if user.member do + if user.tier == :gold do + if base > 100 do + base * 0.75 + else + base * 0.85 + end + else + base * 0.9 + end + else + base + end + + if coupon != nil do + if coupon.type == :percent do + if coupon.value > 50 do + with_membership * 0.5 + else + with_membership * (1 - coupon.value / 100) + end + else + if with_membership - coupon.value > 0 do + with_membership - coupon.value + else + 0 + end + end + else + with_membership + end + end + + def shipping_cost(order, user) do + if order.total > 50 do + if user.member do + 0 + else + if order.express do + 9.99 + else + 4.99 + end + end + else + if user.member do + if order.express do + 5.99 + else + 2.99 + end + else + if order.express do + 14.99 + else + 7.99 + end + end + end + end + + def tax_rate(country, region, product_type) do + if country == "US" do + if region == "CA" do + if product_type == :food do + 0.0 + else + 0.0725 + end + else + if product_type == :food do + 0.0 + else + 0.05 + end + end + else + if country == "DE" do + if product_type == :food do + 0.07 + else + 0.19 + end + else + 0.0 + end + end + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_nested_ternary/config.yml b/priv/combined_metrics/samples/code_smells/no_nested_ternary/config.yml new file mode 100644 index 0000000..a87321f --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_nested_ternary/config.yml @@ -0,0 +1 @@ +doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." diff --git a/priv/combined_metrics/samples/code_smells/no_nested_ternary/good/pricing.ex b/priv/combined_metrics/samples/code_smells/no_nested_ternary/good/pricing.ex new file mode 100644 index 0000000..fbef9ef --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_nested_ternary/good/pricing.ex @@ -0,0 +1,54 @@ +defmodule Pricing do + @moduledoc "Calculates prices and discounts for products" + + def final_price(product, user, coupon) do + product + |> base_price() + |> apply_membership(user) + |> apply_coupon(coupon) + end + + defp base_price(%{on_sale: true, sale_price: sale}) when sale > 0, do: sale + defp base_price(%{on_sale: true, price: price}), do: price * 0.9 + defp base_price(%{price: price}), do: price + + defp apply_membership(price, %{member: true, tier: :gold}) when price > 100, do: price * 0.75 + defp apply_membership(price, %{member: true, tier: :gold}), do: price * 0.85 + defp apply_membership(price, %{member: true}), do: price * 0.9 + defp apply_membership(price, _user), do: price + + defp apply_coupon(price, nil), do: price + + defp apply_coupon(price, %{type: :percent, value: value}) when value > 50 do + price * 0.5 + end + + defp apply_coupon(price, %{type: :percent, value: value}) do + price * (1 - value / 100) + end + + defp apply_coupon(price, %{type: :fixed, value: value}) do + max(price - value, 0) + end + + def shipping_cost(order, user) do + shipping_rate(order.total, user.member, order.express) + end + + defp shipping_rate(total, _member, _express) when total > 50, do: 0 + defp shipping_rate(_total, true, true), do: 5.99 + defp shipping_rate(_total, true, false), do: 2.99 + defp shipping_rate(_total, false, true), do: 14.99 + defp shipping_rate(_total, false, false), do: 7.99 + + def tax_rate(country, region, product_type) do + tax_for(country, region, product_type) + end + + defp tax_for("US", _region, :food), do: 0.0 + defp tax_for("US", "CA", _type), do: 0.0725 + defp tax_for("US", _region, _type), do: 0.05 + defp tax_for("DE", :food, _type), do: 0.07 + defp tax_for("DE", _region, _type), do: 0.19 + defp tax_for(_country, _region, _type), do: 0.0 +end diff --git a/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/bad/form_validator.js b/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/bad/form_validator.js new file mode 100644 index 0000000..40efe98 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/bad/form_validator.js @@ -0,0 +1,68 @@ +function isNonEmptyString(value) { + const s = new String(value); + return s.trim().length > 0; +} + +function isPositiveNumber(value) { + const n = new Number(value); + return isFinite(n) && n > 0; +} + +function isTruthy(value) { + const b = new Boolean(value); + return b.valueOf(); +} + +function coerceToNumber(value) { + const n = new Number(value); + return isNaN(n) ? null : n.valueOf(); +} + +function coerceToString(value) { + if (value === null || value === undefined) return new String(""); + return new String(value); +} + +function validateField(name, value, rules) { + const errors = []; + + for (const rule of rules) { + switch (rule.type) { + case "required": + const strVal = new String(value); + if (strVal.trim().length === 0) { + errors.push(new String(`${name} is required`).valueOf()); + } + break; + case "minLength": + if (new String(value).length < new Number(rule.value)) { + errors.push(`${name} must be at least ${rule.value} characters`); + } + break; + case "numeric": + if (isNaN(new Number(value))) { + errors.push(`${name} must be a number`); + } + break; + case "positive": + if (!isPositiveNumber(value)) { + errors.push(`${name} must be a positive number`); + } + break; + default: + break; + } + } + + return errors; +} + +function validateForm(fields) { + return Object.entries(fields).reduce((errors, [name, { value, rules }]) => { + const fieldErrors = validateField(name, value, rules); + if (fieldErrors.length > new Number(0)) errors[name] = fieldErrors; + return errors; + }, {}); +} + +export { validateForm, validateField, isNonEmptyString, isPositiveNumber }; diff --git a/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/good/form_validator.js b/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/good/form_validator.js new file mode 100644 index 0000000..0c1e42b --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_primitive_wrapper_constructors/good/form_validator.js @@ -0,0 +1,64 @@ +function isNonEmptyString(value) { + return typeof value === "string" && value.trim().length > 0; +} + +function isPositiveNumber(value) { + return typeof value === "number" && Number.isFinite(value) && value > 0; +} + +function isTruthy(value) { + return Boolean(value); +} + +function coerceToNumber(value) { + const n = Number(value); + return Number.isNaN(n) ? null : n; +} + +function coerceToString(value) { + if (value === null || value === undefined) return ""; + return String(value); +} + +function validateField(name, value, rules) { + const errors = []; + + for (const rule of rules) { + switch (rule.type) { + case "required": + if (!isNonEmptyString(coerceToString(value))) { + errors.push(`${name} is required`); + } + break; + case "minLength": + if (typeof value === "string" && value.length < rule.value) { + errors.push(`${name} must be at least ${rule.value} characters`); + } + break; + case "numeric": + if (coerceToNumber(value) === null) { + errors.push(`${name} must be a number`); + } + break; + case "positive": + if (!isPositiveNumber(coerceToNumber(value))) { + errors.push(`${name} must be a positive number`); + } + break; + default: + break; + } + } + + return errors; +} + +function validateForm(fields) { + return Object.entries(fields).reduce((errors, [name, { value, rules }]) => { + const fieldErrors = validateField(name, value, rules); + if (fieldErrors.length > 0) errors[name] = fieldErrors; + return errors; + }, {}); +} + +export { validateForm, validateField, isNonEmptyString, isPositiveNumber, coerceToNumber, coerceToString }; diff --git a/priv/combined_metrics/samples/code_smells/no_private_inheritance/bad/Connection.cpp b/priv/combined_metrics/samples/code_smells/no_private_inheritance/bad/Connection.cpp new file mode 100644 index 0000000..b6a5f52 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_private_inheritance/bad/Connection.cpp @@ -0,0 +1,75 @@ +#include +#include +#include + +class TcpSocket { +public: + explicit TcpSocket(const std::string& host, int port) + : host_(host), port_(port), connected_(false) {} + + void connect() { connected_ = true; } + void disconnect() noexcept { connected_ = false; } + bool isConnected() const noexcept { return connected_; } + void send(const std::string& data) { (void)data; } + std::string receive(std::size_t maxBytes) { (void)maxBytes; return {}; } + +protected: + std::string host_; + int port_; + bool connected_; +}; + +class RetryPolicy { +public: + explicit RetryPolicy(int maxAttempts, std::chrono::milliseconds delay) + : maxAttempts_(maxAttempts), delay_(delay) {} + + bool shouldRetry(int attempt) const noexcept { return attempt < maxAttempts_; } + std::chrono::milliseconds delay() const noexcept { return delay_; } + +protected: + int maxAttempts_; + std::chrono::milliseconds delay_; +}; + +// Private inheritance used for implementation reuse — anti-pattern. +// Connection IS-NOT-A TcpSocket; it has confusing base-class subobjects +// and makes it hard to switch the socket implementation later. +class Connection + : private TcpSocket // private inheritance for reuse — should be composition + , private RetryPolicy // same problem +{ +public: + Connection(const std::string& host, int port, int maxRetries) + : TcpSocket(host, port) + , RetryPolicy(maxRetries, std::chrono::milliseconds(500)) + {} + + void open() { + for (int attempt = 0; ; ++attempt) { + try { + TcpSocket::connect(); // must call base explicitly — tightly coupled + return; + } catch (const std::exception&) { + if (!RetryPolicy::shouldRetry(attempt)) + throw; + } + } + } + + void close() noexcept { TcpSocket::disconnect(); } + bool isOpen() const noexcept { return TcpSocket::isConnected(); } + + void send(const std::string& data) { + if (!isOpen()) throw std::runtime_error("Connection is closed"); + TcpSocket::send(data); // using base class members directly + } + + std::string receive(std::size_t maxBytes) { + if (!isOpen()) throw std::runtime_error("Connection is closed"); + return TcpSocket::receive(maxBytes); + } + + // Accesses inherited protected member directly — tight coupling + std::string connectedHost() const { return host_; } +}; diff --git a/priv/combined_metrics/samples/code_smells/no_private_inheritance/good/Connection.cpp b/priv/combined_metrics/samples/code_smells/no_private_inheritance/good/Connection.cpp new file mode 100644 index 0000000..c4b7060 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_private_inheritance/good/Connection.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +// Reuse via composition, not private inheritance + +class TcpSocket { +public: + explicit TcpSocket(const std::string& host, int port) + : host_(host), port_(port), connected_(false) {} + + void connect() { connected_ = true; } + void disconnect() noexcept { connected_ = false; } + bool isConnected() const noexcept { return connected_; } + void send(const std::string& data) { (void)data; } + std::string receive(std::size_t maxBytes) { (void)maxBytes; return {}; } + +private: + std::string host_; + int port_; + bool connected_; +}; + +class RetryPolicy { +public: + explicit RetryPolicy(int maxAttempts, std::chrono::milliseconds delay) + : maxAttempts_(maxAttempts), delay_(delay) {} + + bool shouldRetry(int attempt) const noexcept { return attempt < maxAttempts_; } + std::chrono::milliseconds delay() const noexcept { return delay_; } + +private: + int maxAttempts_; + std::chrono::milliseconds delay_; +}; + +// Composition: Connection HAS-A TcpSocket and HAS-A RetryPolicy +// Not IS-A; private inheritance would expose implementation details +class Connection { +public: + Connection(const std::string& host, int port, int maxRetries) + : socket_(std::make_unique(host, port)) + , retryPolicy_(maxRetries, std::chrono::milliseconds(500)) + {} + + void open() { + for (int attempt = 0; ; ++attempt) { + try { + socket_->connect(); + return; + } catch (const std::exception&) { + if (!retryPolicy_.shouldRetry(attempt)) + throw; + } + } + } + + void close() noexcept { socket_->disconnect(); } + bool isOpen() const noexcept { return socket_->isConnected(); } + + void send(const std::string& data) { + if (!isOpen()) throw std::runtime_error("Connection is closed"); + socket_->send(data); + } + + std::string receive(std::size_t maxBytes) { + if (!isOpen()) throw std::runtime_error("Connection is closed"); + return socket_->receive(maxBytes); + } + +private: + std::unique_ptr socket_; // composed, not inherited + RetryPolicy retryPolicy_; // composed, not inherited +}; diff --git a/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/bad/Widget.cpp b/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/bad/Widget.cpp new file mode 100644 index 0000000..62209d6 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/bad/Widget.cpp @@ -0,0 +1,58 @@ +#include +#include +#include + +class Widget { +public: + Widget(int id, std::string label, bool visible) + : id_(id), label_(std::move(label)), visible_(visible), valid_(true) {} + + // Overloading && — breaks short-circuit evaluation; both operands always evaluated + bool operator&&(const Widget& rhs) const { + return valid_ && rhs.valid_; + } + + // Overloading || — same problem: no short-circuit, confusing semantics + bool operator||(const Widget& rhs) const { + return visible_ || rhs.visible_; + } + + // Overloading comma operator — evaluated left-to-right but breaks comma in function + // arguments and for-loop expressions in surprising ways + Widget& operator,(const Widget& rhs) { + (void)rhs; + return *this; + } + + // Overloading unary & — takes the address of Widget, not its actual address + // Breaks generic code that uses &widget to get a pointer + Widget* operator&() { + return nullptr; // returns something other than the real address — very surprising + } + + int id() const { return id_; } + const std::string& label() const { return label_; } + bool isVisible() const { return visible_; } + +private: + int id_; + std::string label_; + bool visible_; + bool valid_; +}; + +void processWidget(Widget* ptr); + +void demonstrate() { + Widget a(1, "Alpha", true); + Widget b(2, "Beta", false); + + // Looks like short-circuit but isn't — b.valid_ is ALWAYS evaluated + if (a && b) { /* ... */ } + + // Comma operator overloaded — (a, b) returns a, not b as expected + Widget& result = (a, b); + + // &a calls overloaded operator& — does NOT return the real address of a + processWidget(&a); // silently passes nullptr +} diff --git a/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/good/Widget.cpp b/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/good/Widget.cpp new file mode 100644 index 0000000..8998506 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_problematic_operator_overloads/good/Widget.cpp @@ -0,0 +1,72 @@ +#include +#include +#include + +// Overloads only operators with predictable, well-defined semantics. +// Does NOT overload &&, ||, comma, or unary &. + +class WidgetId { +public: + explicit WidgetId(int value) : value_(value) {} + + bool operator==(const WidgetId& rhs) const noexcept { return value_ == rhs.value_; } + bool operator!=(const WidgetId& rhs) const noexcept { return !(*this == rhs); } + bool operator<(const WidgetId& rhs) const noexcept { return value_ < rhs.value_; } + + int value() const noexcept { return value_; } + +private: + int value_; +}; + +class Widget { +public: + Widget(WidgetId id, std::string label, int priority) + : id_(id), label_(std::move(label)), priority_(priority) {} + + // Comparison by priority — clear semantic + bool operator<(const Widget& rhs) const noexcept { return priority_ < rhs.priority_; } + bool operator==(const Widget& rhs) const noexcept { return id_ == rhs.id_; } + + // Arithmetic with clear meaning: combine widget priority scores + Widget operator+(int extraPriority) const { + return Widget(id_, label_, priority_ + extraPriority); + } + + WidgetId id() const noexcept { return id_; } + const std::string& label() const noexcept { return label_; } + int priority() const noexcept { return priority_; } + + // Logical conditions are free functions using && and || naturally — + // no overloaded && or || to break short-circuit evaluation + static bool isHighPriority(const Widget& w) { return w.priority_ > 100; } + static bool isVisible(const Widget& w) { return !w.label_.empty(); } + +private: + WidgetId id_; + std::string label_; + int priority_; +}; + +class WidgetCollection { +public: + void add(Widget w) { items_.push_back(std::move(w)); } + + // operator[] with clear semantics + Widget& operator[](std::size_t index) { return items_[index]; } + const Widget& operator[](std::size_t index) const { return items_[index]; } + + std::size_t size() const noexcept { return items_.size(); } + bool empty() const noexcept { return items_.empty(); } + + // Natural use of && and || without overloading: + bool hasHighPriorityVisible() const { + for (const auto& w : items_) + if (Widget::isHighPriority(w) && Widget::isVisible(w)) + return true; + return false; + } + +private: + std::vector items_; +}; diff --git a/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/bad/inventory.ex b/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/bad/inventory.ex new file mode 100644 index 0000000..86da060 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/bad/inventory.ex @@ -0,0 +1,71 @@ +defmodule MyApp.Inventory do + @moduledoc """ + Inventory calculations. Unnecessarily uses a GenServer to hold a map + that could simply be passed as function arguments. The GenServer adds + overhead and serializes all access with no benefit. + """ + + use GenServer + + # Bad: using a GenServer purely to hold a map that needs no + # concurrency protection or long-lived state. + def start_link(products) do + GenServer.start_link(__MODULE__, products, name: __MODULE__) + end + + @impl true + def init(products) do + {:ok, Map.new(products, &{&1.id, &1})} + end + + # Bad: simple computation wrapped in a GenServer.call — all callers + # are serialized through a single process for no reason. + @spec sufficient_stock?(integer(), pos_integer()) :: boolean() + def sufficient_stock?(product_id, quantity) do + GenServer.call(__MODULE__, {:sufficient_stock, product_id, quantity}) + end + + @spec compute_reservation([{integer(), pos_integer()}]) :: map() + def compute_reservation(items) do + GenServer.call(__MODULE__, {:compute_reservation, items}) + end + + @spec reorder_point(integer()) :: integer() + def reorder_point(product_id) do + GenServer.call(__MODULE__, {:reorder_point, product_id}) + end + + @impl true + def handle_call({:sufficient_stock, product_id, quantity}, _from, products) do + result = + case Map.get(products, product_id) do + nil -> false + product -> product.stock >= quantity + end + {:reply, result, products} + end + + @impl true + def handle_call({:compute_reservation, items}, _from, products) do + result = + Enum.reduce(items, %{available: [], unavailable: []}, fn {id, qty}, acc -> + product = Map.get(products, id) + if product && product.stock >= qty do + Map.update!(acc, :available, &[{id, qty} | &1]) + else + Map.update!(acc, :unavailable, &[{id, qty} | &1]) + end + end) + {:reply, result, products} + end + + @impl true + def handle_call({:reorder_point, product_id}, _from, products) do + result = + case Map.get(products, product_id) do + nil -> 0 + product -> ceil(product.daily_usage * product.lead_time_days * 1.2) + end + {:reply, result, products} + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/good/inventory.ex b/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/good/inventory.ex new file mode 100644 index 0000000..0f79e0d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_process_for_code_organization/good/inventory.ex @@ -0,0 +1,58 @@ +defmodule MyApp.Inventory do + @moduledoc """ + Inventory calculations. Pure module with stateless functions — + no GenServer or Agent needed because there is no mutable state + or concurrency concern. The "state" is just data passed around. + """ + + alias MyApp.Inventory.{Product, StockLevel} + + @doc """ + Checks whether a product has enough stock for the requested quantity. + Pure function — no process needed. + """ + @spec sufficient_stock?(Product.t(), pos_integer()) :: boolean() + def sufficient_stock?(%Product{stock: stock}, quantity), do: stock >= quantity + + @doc """ + Computes a reservation summary for a list of items. + Pure transformation — no process needed. + """ + @spec compute_reservation([{Product.t(), pos_integer()}]) :: map() + def compute_reservation(items) when is_list(items) do + items + |> Enum.reduce(%{available: [], unavailable: []}, fn {product, qty}, acc -> + if sufficient_stock?(product, qty) do + Map.update!(acc, :available, &[{product.id, qty} | &1]) + else + Map.update!(acc, :unavailable, &[{product.id, qty} | &1]) + end + end) + end + + @doc """ + Calculates the reorder point for a product based on lead time and daily usage. + Pure computation — no process involved. + """ + @spec reorder_point(float(), pos_integer()) :: integer() + def reorder_point(daily_usage, lead_time_days) when daily_usage >= 0 do + ceil(daily_usage * lead_time_days * 1.2) + end + + @doc """ + Groups stock levels by warehouse. + Pure data transformation. + """ + @spec group_by_warehouse([StockLevel.t()]) :: map() + def group_by_warehouse(levels) when is_list(levels) do + Enum.group_by(levels, & &1.warehouse_id) + end + + @doc """ + Merges two stock level maps, summing quantities for shared keys. + """ + @spec merge_stock(map(), map()) :: map() + def merge_stock(stock_a, stock_b) do + Map.merge(stock_a, stock_b, fn _warehouse, qty_a, qty_b -> qty_a + qty_b end) + end +end diff --git a/priv/combined_metrics/samples/code_smells/no_prototype_modification/bad/data_store.js b/priv/combined_metrics/samples/code_smells/no_prototype_modification/bad/data_store.js new file mode 100644 index 0000000..0773eec --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_prototype_modification/bad/data_store.js @@ -0,0 +1,63 @@ +Array.prototype.groupBy = function (keyFn) { + return this.reduce((groups, item) => { + const key = keyFn(item); + if (!groups[key]) groups[key] = []; + groups[key].push(item); + return groups; + }, {}); +}; + +Array.prototype.unique = function (keyFn = (x) => x) { + const seen = new Set(); + return this.filter((item) => { + const key = keyFn(item); + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +}; + +Array.prototype.sortedBy = function (keyFn, direction = "asc") { + const multiplier = direction === "asc" ? 1 : -1; + return [...this].sort((a, b) => { + const ak = keyFn(a); + const bk = keyFn(b); + return ak < bk ? -multiplier : ak > bk ? multiplier : 0; + }); +}; + +Object.prototype.deepClone = function () { + return JSON.parse(JSON.stringify(this)); +}; + +String.prototype.toTitleCase = function () { + return this.replace(/\b\w/g, (c) => c.toUpperCase()); +}; + +class DataStore { + constructor(records = []) { + this._records = [...records]; + } + + add(record) { + this._records.push(record); + } + + findBy(predicate) { + return this._records.filter(predicate); + } + + groupBy(keyFn) { + return this._records.groupBy(keyFn); + } + + sortedBy(keyFn, direction) { + return this._records.sortedBy(keyFn, direction); + } + + get size() { + return this._records.length; + } +} + +export { DataStore }; diff --git a/priv/combined_metrics/samples/code_smells/no_prototype_modification/good/data_store.js b/priv/combined_metrics/samples/code_smells/no_prototype_modification/good/data_store.js new file mode 100644 index 0000000..60cab9d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_prototype_modification/good/data_store.js @@ -0,0 +1,67 @@ +function groupBy(array, keyFn) { + return array.reduce((groups, item) => { + const key = keyFn(item); + if (!Object.prototype.hasOwnProperty.call(groups, key)) { + groups[key] = []; + } + groups[key].push(item); + return groups; + }, {}); +} + +function unique(array, keyFn = (x) => x) { + const seen = new Set(); + return array.filter((item) => { + const key = keyFn(item); + if (seen.has(key)) return false; + seen.add(key); + return true; + }); +} + +function sortedBy(array, keyFn, direction = "asc") { + const multiplier = direction === "asc" ? 1 : -1; + return [...array].sort((a, b) => { + const ak = keyFn(a); + const bk = keyFn(b); + return ak < bk ? -multiplier : ak > bk ? multiplier : 0; + }); +} + +class DataStore { + constructor(records = []) { + this._records = [...records]; + this._indexes = new Map(); + } + + add(record) { + this._records.push(record); + this._invalidateIndexes(); + } + + findBy(predicate) { + return this._records.filter(predicate); + } + + groupBy(keyFn) { + return groupBy(this._records, keyFn); + } + + sortedBy(keyFn, direction) { + return sortedBy(this._records, keyFn, direction); + } + + unique(keyFn) { + return unique(this._records, keyFn); + } + + _invalidateIndexes() { + this._indexes.clear(); + } + + get size() { + return this._records.length; + } +} + +export { DataStore, groupBy, unique, sortedBy }; diff --git a/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/bad/UserRepository.php b/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/bad/UserRepository.php new file mode 100644 index 0000000..5fb56b0 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/bad/UserRepository.php @@ -0,0 +1,58 @@ +connection->query($sql); + + return $stmt->fetchAll(PDO::FETCH_ASSOC); + } + + public function findByIds($ids): array + { + if (empty($ids)) { + return []; + } + + // Unsafe: builds IN clause by joining raw input + $idList = implode(',', $ids); + $sql = "SELECT * FROM users WHERE id IN ($idList) AND deleted_at IS NULL"; + + return $this->connection->query($sql)->fetchAll(PDO::FETCH_ASSOC); + } + + public function updateLastLogin($userId, $ip): void + { + // Direct interpolation of $ip — attacker-controlled value in SQL + $sql = "UPDATE users SET last_login_at = NOW(), last_login_ip = '" . $ip . "' WHERE id = " . $userId; + $this->connection->exec($sql); + } + + public function findByUsername($username): ?array + { + // Classic SQL injection pattern + $result = $this->connection->query( + "SELECT * FROM users WHERE username = '" . $username . "'" + ); + + $row = $result->fetch(PDO::FETCH_ASSOC); + return $row ?: null; + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/good/UserRepository.php b/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/good/UserRepository.php new file mode 100644 index 0000000..1d815a4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_raw_sql_string_concatenation/good/UserRepository.php @@ -0,0 +1,69 @@ +connection->prepare( + 'SELECT * FROM users + WHERE (name LIKE :query OR email LIKE :query) + AND role = :role + AND deleted_at IS NULL + ORDER BY created_at DESC + LIMIT :limit' + ); + + $stmt->bindValue(':query', "%{$query}%", PDO::PARAM_STR); + $stmt->bindValue(':role', $role, PDO::PARAM_STR); + $stmt->bindValue(':limit', $limit, PDO::PARAM_INT); + $stmt->execute(); + + return array_map([$this, 'hydrate'], $stmt->fetchAll(PDO::FETCH_ASSOC)); + } + + public function findByIds(array $ids): array + { + if (empty($ids)) { + return []; + } + + // Safe handling of IN clause with bound parameters + $placeholders = implode(',', array_fill(0, count($ids), '?')); + $stmt = $this->connection->prepare( + "SELECT * FROM users WHERE id IN ({$placeholders}) AND deleted_at IS NULL" + ); + $stmt->execute(array_values($ids)); + + return array_map([$this, 'hydrate'], $stmt->fetchAll(PDO::FETCH_ASSOC)); + } + + public function updateLastLogin(int $userId, string $ip): void + { + $stmt = $this->connection->prepare( + 'UPDATE users SET last_login_at = NOW(), last_login_ip = :ip WHERE id = :id' + ); + $stmt->execute(['ip' => $ip, 'id' => $userId]); + } + + private function hydrate(array $row): User + { + return new User( + id: (int) $row['id'], + email: $row['email'], + name: $row['name'], + role: $row['role'] + ); + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/bad/FileImportService.kt b/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/bad/FileImportService.kt new file mode 100644 index 0000000..a598525 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/bad/FileImportService.kt @@ -0,0 +1,54 @@ +package com.example.imports + +import kotlinx.coroutines.* +import java.io.File + +data class ImportResult(val rowsImported: Int, val errors: List) + +class FileImportService( + private val parser: CsvParser, + private val repository: ProductRepository +) { + + /** + * importFile is a suspend function — it is called from within a coroutine. + * Using runBlocking here blocks the thread that the coroutine was running on, + * which can cause deadlocks on single-threaded dispatchers and defeats the + * purpose of structured concurrency. + */ + suspend fun importFile(filePath: String): ImportResult { + val lines = File(filePath).readLines() + val errors = mutableListOf() + var count = 0 + + val batches = lines.drop(1).chunked(500) + + // runBlocking inside a suspend function — blocks the coroutine's thread + count = runBlocking { + val jobs = batches.mapIndexed { index, batch -> + async(Dispatchers.IO) { + processBatch(batch, index, errors) + } + } + jobs.sumOf { it.await() } + } + + return ImportResult(count, errors) + } + + suspend fun validateAndImport(filePath: String): ImportResult { + // runBlocking used to call another suspend function from within a suspend function + val exists = runBlocking { checkFileExists(filePath) } + if (!exists) return ImportResult(0, listOf("File not found: $filePath")) + return importFile(filePath) + } + + private suspend fun checkFileExists(path: String): Boolean { + return withContext(Dispatchers.IO) { File(path).exists() } + } + + private suspend fun processBatch(lines: List, batchIndex: Int, errors: MutableList): Int { + val products = parser.parseLines(lines, batchIndex) + return repository.upsertAll(products) + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/good/FileImportService.kt b/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/good/FileImportService.kt new file mode 100644 index 0000000..4c09849 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_runblocking_in_coroutines/good/FileImportService.kt @@ -0,0 +1,51 @@ +package com.example.imports + +import kotlinx.coroutines.* +import java.io.File + +data class ImportResult(val rowsImported: Int, val errors: List) + +class FileImportService( + private val parser: CsvParser, + private val repository: ProductRepository +) { + + /** + * Top-level entry point — runBlocking here is acceptable because this is + * the boundary between the non-coroutine world (e.g. a CLI main function) + * and the coroutine world. It is NOT inside an existing coroutine. + */ + fun importFromCli(filePath: String): ImportResult = runBlocking { + importFile(filePath) + } + + /** + * The actual work is a proper suspend function. + * Any coroutine-aware caller (HTTP handler, scheduled job) calls this directly + * without incurring the overhead or blocking of runBlocking. + */ + suspend fun importFile(filePath: String): ImportResult = coroutineScope { + val lines = File(filePath).readLines() + val errors = mutableListOf() + var count = 0 + + val batches = lines.drop(1).chunked(500) + val jobs = batches.mapIndexed { index, batch -> + async(Dispatchers.IO) { + processBatch(batch, index, errors) + } + } + + count = jobs.sumOf { it.await() } + ImportResult(count, errors) + } + + private suspend fun processBatch( + lines: List, + batchIndex: Int, + errors: MutableList + ): Int { + val products = parser.parseLines(lines, batchIndex) + return repository.upsertAll(products) + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/bad/UserRepository.php b/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/bad/UserRepository.php new file mode 100644 index 0000000..b8d851d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/bad/UserRepository.php @@ -0,0 +1,55 @@ +query("SELECT * FROM users WHERE id = $id")->fetch(); +} + +class UserRepository +{ + private PDO $connection; + + public function __construct(PDO $connection) + { + $this->connection = $connection; + + // Side effect: running a query in the constructor (at class use time) + $this->connection->exec("SET NAMES utf8mb4"); + } + + public function findById(int $id): ?array + { + $stmt = $this->connection->prepare('SELECT * FROM users WHERE id = :id'); + $stmt->execute(['id' => $id]); + return $stmt->fetch(PDO::FETCH_ASSOC) ?: null; + } + + public function findByEmail(string $email): ?array + { + $stmt = $this->connection->prepare('SELECT * FROM users WHERE email = :email'); + $stmt->execute(['email' => strtolower($email)]); + return $stmt->fetch(PDO::FETCH_ASSOC) ?: null; + } +} + +// Side effect: running code at include time — creates a user unconditionally +$repo = new UserRepository($GLOBALS['db']); +$adminUser = $repo->findByEmail('admin@example.com'); +if (!$adminUser) { + echo "Warning: no admin user found\n"; +} diff --git a/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/good/UserRepository.php b/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/good/UserRepository.php new file mode 100644 index 0000000..228a432 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_side_effects_in_declaration_file/good/UserRepository.php @@ -0,0 +1,84 @@ +connection->prepare( + 'SELECT * FROM users WHERE id = :id' + ); + $stmt->execute(['id' => $id]); + $row = $stmt->fetch(PDO::FETCH_ASSOC); + + return $row ? $this->hydrate($row) : null; + } + + public function findByEmail(string $email): ?User + { + $stmt = $this->connection->prepare( + 'SELECT * FROM users WHERE email = :email' + ); + $stmt->execute(['email' => mb_strtolower($email)]); + $row = $stmt->fetch(PDO::FETCH_ASSOC); + + return $row ? $this->hydrate($row) : null; + } + + public function save(User $user): void + { + if ($user->getId() === null) { + $this->insert($user); + } else { + $this->update($user); + } + } + + private function hydrate(array $row): User + { + return new User( + id: (int) $row['id'], + email: $row['email'], + name: $row['name'], + role: $row['role'] + ); + } + + private function insert(User $user): void + { + $stmt = $this->connection->prepare( + 'INSERT INTO users (email, name, role, created_at) VALUES (:email, :name, :role, NOW())' + ); + $stmt->execute([ + 'email' => $user->getEmail(), + 'name' => $user->getName(), + 'role' => $user->getRole(), + ]); + } + + private function update(User $user): void + { + $stmt = $this->connection->prepare( + 'UPDATE users SET email = :email, name = :name WHERE id = :id' + ); + $stmt->execute([ + 'email' => $user->getEmail(), + 'name' => $user->getName(), + 'id' => $user->getId(), + ]); + } +} diff --git a/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/bad/order_service.ts b/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/bad/order_service.ts new file mode 100644 index 0000000..3c51ec9 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/bad/order_service.ts @@ -0,0 +1,68 @@ +interface Order { + id: string; + status: "pending" | "confirmed" | "shipped" | "delivered" | "cancelled"; + total: number; + currency: string; + items: Array<{ productId: string; quantity: number; price: number }>; +} + +function isOrderCancellable(order: Order): boolean { + // Always true for a string — typeof check is unnecessary here + if (typeof order.id === "string") { + return order.status === "pending" || order.status === "confirmed"; + } + return false; +} + +function canRequestRefund(order: Order): boolean { + return order.status === "delivered"; +} + +async function cancelOrder(order: Order): Promise { + // order.id is always truthy (typed as string), this check is unnecessary + if (order.id) { + if (!isOrderCancellable(order)) { + throw new Error(`Order ${order.id} cannot be cancelled in status '${order.status}'`); + } + } + + const response = await fetch(`/api/orders/${order.id}/cancel`, { method: "POST" }); + if (!response.ok) throw new Error(`Cancel failed: ${response.status}`); + return response.json() as Promise; +} + +function calculateOrderTotal(items: Order["items"]): number { + // items is typed as Array — the null check is unnecessary + if (items !== null && items !== undefined) { + return items.reduce((sum, item) => sum + item.price * item.quantity, 0); + } + return 0; +} + +function getHighValueItems(order: Order, threshold: number): Order["items"] { + return order.items.filter((item) => { + // item.price is typed as number, it's always a number + if (typeof item.price === "number") { + return item.price > threshold; + } + return false; + }); +} + +function formatOrderSummary(order: Order): string { + const itemCount = order.items.length; + // itemCount is always >= 0; the `< 0` branch is unreachable + if (itemCount < 0) { + return `Order #${order.id}: no items`; + } + + const total = new Intl.NumberFormat("en-US", { + style: "currency", + currency: order.currency, + }).format(order.total); + + return `Order #${order.id}: ${itemCount} item${itemCount === 1 ? "" : "s"}, ${total} (${order.status})`; +} + +export { cancelOrder, isOrderCancellable, canRequestRefund, calculateOrderTotal, getHighValueItems, formatOrderSummary }; +export type { Order }; diff --git a/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/good/order_service.ts b/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/good/order_service.ts new file mode 100644 index 0000000..e454095 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_unnecessary_conditions/good/order_service.ts @@ -0,0 +1,46 @@ +interface Order { + id: string; + status: "pending" | "confirmed" | "shipped" | "delivered" | "cancelled"; + total: number; + currency: string; + items: Array<{ productId: string; quantity: number; price: number }>; +} + +function isOrderCancellable(order: Order): boolean { + return order.status === "pending" || order.status === "confirmed"; +} + +function canRequestRefund(order: Order): boolean { + return order.status === "delivered"; +} + +async function cancelOrder(order: Order): Promise { + if (!isOrderCancellable(order)) { + throw new Error(`Order ${order.id} cannot be cancelled in status '${order.status}'`); + } + + const response = await fetch(`/api/orders/${order.id}/cancel`, { method: "POST" }); + if (!response.ok) throw new Error(`Cancel failed: ${response.status}`); + return response.json() as Promise; +} + +function calculateOrderTotal(items: Order["items"]): number { + return items.reduce((sum, item) => sum + item.price * item.quantity, 0); +} + +function getHighValueItems(order: Order, threshold: number): Order["items"] { + return order.items.filter((item) => item.price > threshold); +} + +function formatOrderSummary(order: Order): string { + const itemCount = order.items.length; + const total = new Intl.NumberFormat("en-US", { + style: "currency", + currency: order.currency, + }).format(order.total); + + return `Order #${order.id}: ${itemCount} item${itemCount === 1 ? "" : "s"}, ${total} (${order.status})`; +} + +export { cancelOrder, isOrderCancellable, canRequestRefund, calculateOrderTotal, getHighValueItems, formatOrderSummary }; +export type { Order }; diff --git a/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/bad/Matrix.cpp b/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/bad/Matrix.cpp new file mode 100644 index 0000000..ba1ca5d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/bad/Matrix.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +// Pollutes the global namespace — conflicts with user-defined names and other libraries +using namespace std; + +class Matrix { +public: + Matrix(size_t rows, size_t cols) // "size_t" unqualified due to using namespace std + : rows_(rows), cols_(cols), data_(rows * cols, 0.0) {} + + double& at(size_t row, size_t col) { + boundsCheck(row, col); + return data_[row * cols_ + col]; + } + + double at(size_t row, size_t col) const { + boundsCheck(row, col); + return data_[row * cols_ + col]; + } + + size_t rows() const { return rows_; } + size_t cols() const { return cols_; } + + Matrix operator+(const Matrix& rhs) const { + checkCompatible(rhs); + Matrix result(rows_, cols_); + // "transform" and "plus" silently resolved via using namespace std + transform(data_.begin(), data_.end(), + rhs.data_.begin(), result.data_.begin(), + plus()); + return result; + } + + Matrix operator*(const Matrix& rhs) const { + if (cols_ != rhs.rows_) + throw invalid_argument("Incompatible dimensions"); + + Matrix result(rows_, rhs.cols_); + for (size_t i = 0; i < rows_; ++i) + for (size_t k = 0; k < cols_; ++k) + for (size_t j = 0; j < rhs.cols_; ++j) + result.at(i, j) += at(i, k) * rhs.at(k, j); + return result; + } + + double frobeniusNorm() const { + double sum = 0.0; + for (double val : data_) + sum += val * val; + return sqrt(sum); // unqualified — ambiguous if a custom sqrt exists in scope + } + + void fill(double value) { + // "fill" collides with std::fill; confusing without qualification + fill(data_.begin(), data_.end(), value); + } + +private: + size_t rows_; + size_t cols_; + vector data_; + + void boundsCheck(size_t row, size_t col) const { + if (row >= rows_ || col >= cols_) + throw out_of_range("Matrix index out of range"); + } + + void checkCompatible(const Matrix& other) const { + if (rows_ != other.rows_ || cols_ != other.cols_) + throw invalid_argument("Matrix dimensions do not match"); + } +}; diff --git a/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/good/Matrix.cpp b/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/good/Matrix.cpp new file mode 100644 index 0000000..8468d0c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_using_namespace_directives/good/Matrix.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include + +// No "using namespace std;" — names are qualified explicitly + +class Matrix { +public: + Matrix(std::size_t rows, std::size_t cols) + : rows_(rows), cols_(cols), data_(rows * cols, 0.0) {} + + double& at(std::size_t row, std::size_t col) { + boundsCheck(row, col); + return data_[row * cols_ + col]; + } + + double at(std::size_t row, std::size_t col) const { + boundsCheck(row, col); + return data_[row * cols_ + col]; + } + + std::size_t rows() const noexcept { return rows_; } + std::size_t cols() const noexcept { return cols_; } + + Matrix operator+(const Matrix& rhs) const { + checkCompatible(rhs); + Matrix result(rows_, cols_); + std::transform(data_.begin(), data_.end(), + rhs.data_.begin(), result.data_.begin(), + std::plus()); + return result; + } + + Matrix operator*(const Matrix& rhs) const { + if (cols_ != rhs.rows_) + throw std::invalid_argument("Incompatible matrix dimensions for multiplication"); + + Matrix result(rows_, rhs.cols_); + for (std::size_t i = 0; i < rows_; ++i) + for (std::size_t k = 0; k < cols_; ++k) + for (std::size_t j = 0; j < rhs.cols_; ++j) + result.at(i, j) += at(i, k) * rhs.at(k, j); + return result; + } + + double frobeniusNorm() const { + double sum = 0.0; + for (double val : data_) + sum += val * val; + return std::sqrt(sum); + } + + void fill(double value) { + std::fill(data_.begin(), data_.end(), value); + } + +private: + std::size_t rows_; + std::size_t cols_; + std::vector data_; + + void boundsCheck(std::size_t row, std::size_t col) const { + if (row >= rows_ || col >= cols_) + throw std::out_of_range("Matrix index out of range"); + } + + void checkCompatible(const Matrix& other) const { + if (rows_ != other.rows_ || cols_ != other.cols_) + throw std::invalid_argument("Matrix dimensions do not match"); + } +}; diff --git a/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/bad/Widget.cpp b/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/bad/Widget.cpp new file mode 100644 index 0000000..a2e698b --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/bad/Widget.cpp @@ -0,0 +1,72 @@ +#include +#include +#include + +class Renderer { +public: + virtual ~Renderer() = default; + virtual void draw(const std::string& label) = 0; +}; + +class Widget { +public: + explicit Widget(std::string label) + : label_(std::move(label)) + { + // Calls virtual methods in the constructor — at this point the vtable + // points to Widget's implementations, not the derived class overrides. + // This is almost certainly a bug when derived classes override these. + setupLayout(); // virtual call in ctor — dispatches to Widget::setupLayout + loadResources(); // virtual call in ctor — dispatches to Widget::loadResources + } + + virtual ~Widget() = default; + + virtual void render(Renderer& renderer) const { + renderer.draw(label_); + } + +protected: + virtual void setupLayout() { + // Base implementation — called even when derived class overrides it + minWidth_ = static_cast(label_.size()); + } + + virtual void loadResources() { + // Base implementation — derived class version is never called from ctor + } + + std::string label_; + int minWidth_ = 0; +}; + +class Button : public Widget { +public: + explicit Button(std::string label, std::string action) + : Widget(std::move(label)) // Widget ctor calls setupLayout/loadResources... + , action_(std::move(action)) + { + // ...but Button::setupLayout and Button::loadResources were NOT called above. + // The Button is not properly initialized after construction. + } + + void render(Renderer& renderer) const override { + renderer.draw("[" + label_ + "]"); + } + +protected: + void setupLayout() override { + // This override is NEVER called from Widget's constructor + minWidth_ = static_cast(label_.size()) + 4; // button-specific padding + paddingSet_ = true; + } + + void loadResources() override { + resourcesLoaded_ = true; + } + +private: + std::string action_; + bool paddingSet_ = false; // always false after construction + bool resourcesLoaded_ = false; // always false after construction +}; diff --git a/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/good/Widget.cpp b/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/good/Widget.cpp new file mode 100644 index 0000000..bac17fc --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_virtual_calls_in_constructors/good/Widget.cpp @@ -0,0 +1,77 @@ +#include +#include +#include + +// Deferred initialization pattern: virtual methods are not called from constructors. +// Initialization that depends on virtual behavior uses a factory or a separate init step. + +class Renderer { +public: + virtual ~Renderer() = default; + virtual void draw(const std::string& label) = 0; +}; + +class Widget { +public: + explicit Widget(std::string label) + : label_(std::move(label)), initialized_(false) + { + // Constructor only sets plain data — no virtual calls + } + + virtual ~Widget() = default; + + // Separate initialization method that can safely call virtual members + void initialize() { + if (initialized_) return; + setupLayout(); // virtual — called after construction, when vtable is correct + loadResources(); // virtual — same + initialized_ = true; + } + + virtual void render(Renderer& renderer) const { + renderer.draw(label_); + } + + const std::string& label() const noexcept { return label_; } + bool isInitialized() const noexcept { return initialized_; } + +protected: + virtual void setupLayout() {} + virtual void loadResources() {} + + std::string label_; + bool initialized_; +}; + +class Button : public Widget { +public: + explicit Button(std::string label, std::string action) + : Widget(std::move(label)), action_(std::move(action)) {} + + void render(Renderer& renderer) const override { + renderer.draw("[" + label_ + "]"); + } + +protected: + void setupLayout() override { + // Button-specific layout — runs after construction, vtable is fully set + minWidth_ = static_cast(label_.size()) + 4; + } + + void loadResources() override { + // Load button-specific resources + } + +private: + std::string action_; + int minWidth_ = 0; +}; + +// Factory ensures initialize() is called on the fully-constructed object +template +std::unique_ptr makeWidget(Args&&... args) { + auto widget = std::make_unique(std::forward(args)...); + widget->initialize(); // safe: called after full construction + return widget; +} diff --git a/priv/combined_metrics/samples/code_smells/no_with_statement/bad/report_builder.js b/priv/combined_metrics/samples/code_smells/no_with_statement/bad/report_builder.js new file mode 100644 index 0000000..0f4caee --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_with_statement/bad/report_builder.js @@ -0,0 +1,54 @@ +function formatCurrency(amount, currency = "USD") { + return new Intl.NumberFormat("en-US", { style: "currency", currency }).format(amount); +} + +function formatDate(date) { + return new Intl.DateTimeFormat("en-US", { + year: "numeric", + month: "short", + day: "numeric", + }).format(new Date(date)); +} + +function buildOrderRow(order) { + with (order) { + const total = formatCurrency(order.total, currency); + const date = formatDate(createdAt); + const statusLabel = status.charAt(0).toUpperCase() + status.slice(1); + + return { + id, + customer: `${customer.firstName} ${customer.lastName}`, + email: customer.email, + date, + total, + status: statusLabel, + itemCount: items.length, + }; + } +} + +function buildSummaryStats(orders) { + const totalRevenue = orders.reduce((sum, o) => sum + o.total, 0); + + with (Math) { + const averageOrderValue = totalRevenue / orders.length; + const completedCount = orders.filter((o) => o.status === "completed").length; + + return { + totalOrders: orders.length, + totalRevenue: formatCurrency(totalRevenue), + averageOrderValue: formatCurrency(round(averageOrderValue * 100) / 100), + completionRate: `${round((completedCount / orders.length) * 100)}%`, + }; + } +} + +function buildReport(orders) { + const rows = orders.map(buildOrderRow); + const summary = buildSummaryStats(orders); + + return { rows, summary, generatedAt: new Date().toISOString() }; +} + +export { buildReport, buildOrderRow, buildSummaryStats }; diff --git a/priv/combined_metrics/samples/code_smells/no_with_statement/good/report_builder.js b/priv/combined_metrics/samples/code_smells/no_with_statement/good/report_builder.js new file mode 100644 index 0000000..4e80900 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/no_with_statement/good/report_builder.js @@ -0,0 +1,49 @@ +function formatCurrency(amount, currency = "USD") { + return new Intl.NumberFormat("en-US", { style: "currency", currency }).format(amount); +} + +function formatDate(date) { + return new Intl.DateTimeFormat("en-US", { + year: "numeric", + month: "short", + day: "numeric", + }).format(new Date(date)); +} + +function buildOrderRow(order) { + const total = formatCurrency(order.total, order.currency); + const date = formatDate(order.createdAt); + const statusLabel = order.status.charAt(0).toUpperCase() + order.status.slice(1); + + return { + id: order.id, + customer: `${order.customer.firstName} ${order.customer.lastName}`, + email: order.customer.email, + date, + total, + status: statusLabel, + itemCount: order.items.length, + }; +} + +function buildSummaryStats(orders) { + const totalRevenue = orders.reduce((sum, o) => sum + o.total, 0); + const averageOrderValue = totalRevenue / orders.length; + const completedCount = orders.filter((o) => o.status === "completed").length; + + return { + totalOrders: orders.length, + totalRevenue: formatCurrency(totalRevenue), + averageOrderValue: formatCurrency(averageOrderValue), + completionRate: `${Math.round((completedCount / orders.length) * 100)}%`, + }; +} + +function buildReport(orders) { + const rows = orders.map(buildOrderRow); + const summary = buildSummaryStats(orders); + + return { rows, summary, generatedAt: new Date().toISOString() }; +} + +export { buildReport, buildOrderRow, buildSummaryStats, formatCurrency, formatDate }; diff --git a/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/bad/counter.go b/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/bad/counter.go new file mode 100644 index 0000000..ced67d4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/bad/counter.go @@ -0,0 +1,39 @@ +package counter + +import "sync" + +// PageCounter tracks page view counts using a mutex-guarded map. +// Multiple goroutines directly mutate shared state instead of communicating. +type PageCounter struct { + mu sync.Mutex + counts map[string]int +} + +func NewPageCounter() *PageCounter { + return &PageCounter{ + counts: make(map[string]int), + } +} + +// Increment records a hit for the given page. +// Multiple goroutines share the map directly, protected only by a mutex. +func (c *PageCounter) Increment(page string) { + c.mu.Lock() + defer c.mu.Unlock() + c.counts[page]++ +} + +// Count returns the current hit count for the given page. +func (c *PageCounter) Count(page string) int { + c.mu.Lock() + defer c.mu.Unlock() + return c.counts[page] +} + +// Reset clears all counters. +func (c *PageCounter) Reset() { + c.mu.Lock() + defer c.mu.Unlock() + // Direct mutation of shared map — goroutines share memory instead of communicating. + c.counts = make(map[string]int) +} diff --git a/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/good/counter.go b/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/good/counter.go new file mode 100644 index 0000000..a4bcf19 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/shares_memory_by_communicating/good/counter.go @@ -0,0 +1,55 @@ +package counter + +// PageCounter tracks page view counts by routing all mutations through a channel. +// No mutex is needed; the single goroutine that owns the map is the only writer. +type PageCounter struct { + inc chan string + query chan queryReq + stop chan struct{} +} + +type queryReq struct { + page string + result chan int +} + +func NewPageCounter() *PageCounter { + c := &PageCounter{ + inc: make(chan string, 64), + query: make(chan queryReq), + stop: make(chan struct{}), + } + go c.run() + return c +} + +func (c *PageCounter) run() { + counts := make(map[string]int) + for { + select { + case page := <-c.inc: + counts[page]++ + case req := <-c.query: + req.result <- counts[req.page] + case <-c.stop: + return + } + } +} + +// Increment records a hit for the given page. Safe to call from multiple goroutines. +func (c *PageCounter) Increment(page string) { + c.inc <- page +} + +// Count returns the current hit count for the given page. +func (c *PageCounter) Count(page string) int { + result := make(chan int, 1) + c.query <- queryReq{page: page, result: result} + return <-result +} + +// Stop shuts down the background goroutine. +func (c *PageCounter) Stop() { + close(c.stop) +} diff --git a/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/bad/Parser.cpp b/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/bad/Parser.cpp new file mode 100644 index 0000000..ab246bc --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/bad/Parser.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +class TokenStream { +public: + // Missing explicit: any string is silently convertible to TokenStream + TokenStream(std::string source) + : source_(std::move(source)), position_(0) {} + + bool hasNext() const { return position_ < source_.size(); } + char peek() const { return source_[position_]; } + char consume() { return source_[position_++]; } + +private: + std::string source_; + std::size_t position_; +}; + +class ParseError : public std::runtime_error { +public: + // Missing explicit: a string literal accidentally converts to ParseError in the wrong context + ParseError(const std::string& message) : std::runtime_error(message) {} +}; + +class Token { +public: + enum class Kind { Identifier, Number, Operator, EndOfStream }; + + Token(Kind kind, std::string value) + : kind_(kind), value_(std::move(value)) {} + + Kind kind() const { return kind_; } + const std::string& value() const { return value_; } + +private: + Kind kind_; + std::string value_; +}; + +// Accepting a TokenStream by value triggers an implicit conversion from string +void processStream(TokenStream stream); + +class Parser { +public: + // Missing explicit: Parser p = "some expression"; compiles silently + Parser(std::string input) + : stream_(std::move(input)) {} + + std::vector tokenize() { + std::vector tokens; + while (stream_.hasNext()) { + skipWhitespace(); + if (!stream_.hasNext()) break; + + char c = stream_.peek(); + if (std::isalpha(c)) + tokens.push_back(readIdentifier()); + else if (std::isdigit(c)) + tokens.push_back(readNumber()); + else + tokens.push_back(readOperator()); + } + tokens.emplace_back(Token::Kind::EndOfStream, ""); + return tokens; + } + +private: + TokenStream stream_; + + void skipWhitespace() { + while (stream_.hasNext() && std::isspace(stream_.peek())) + stream_.consume(); + } + + Token readIdentifier() { + std::string value; + while (stream_.hasNext() && std::isalnum(stream_.peek())) + value += stream_.consume(); + return Token(Token::Kind::Identifier, std::move(value)); + } + + Token readNumber() { + std::string value; + while (stream_.hasNext() && std::isdigit(stream_.peek())) + value += stream_.consume(); + return Token(Token::Kind::Number, std::move(value)); + } + + Token readOperator() { + return Token(Token::Kind::Operator, std::string(1, stream_.consume())); + } +}; + +// This compiles because Parser(std::string) is not explicit: +// Parser p = std::string("1 + 2"); // implicit conversion diff --git a/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/good/Parser.cpp b/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/good/Parser.cpp new file mode 100644 index 0000000..6f2a96a --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/single_argument_constructors_are_explicit/good/Parser.cpp @@ -0,0 +1,93 @@ +#include +#include +#include +#include + +class TokenStream { +public: + explicit TokenStream(std::string source) // explicit: prevents implicit string -> TokenStream + : source_(std::move(source)), position_(0) {} + + bool hasNext() const noexcept { return position_ < source_.size(); } + char peek() const { return source_[position_]; } + char consume() { return source_[position_++]; } + +private: + std::string source_; + std::size_t position_; +}; + +class ParseError : public std::runtime_error { +public: + explicit ParseError(const std::string& message) // explicit: single-arg exception ctor + : std::runtime_error(message) {} + + explicit ParseError(std::string_view message) + : std::runtime_error(std::string(message)) {} +}; + +class Token { +public: + enum class Kind { Identifier, Number, Operator, EndOfStream }; + + Token(Kind kind, std::string value) // two-arg ctor: explicit not required but consistent + : kind_(kind), value_(std::move(value)) {} + + Kind kind() const noexcept { return kind_; } + const std::string& value() const noexcept { return value_; } + +private: + Kind kind_; + std::string value_; +}; + +class Parser { +public: + // explicit: prevents accidental conversion from string to Parser + explicit Parser(std::string input) + : stream_(std::move(input)) {} + + std::vector tokenize() { + std::vector tokens; + while (stream_.hasNext()) { + skipWhitespace(); + if (!stream_.hasNext()) break; + + char c = stream_.peek(); + if (std::isalpha(c)) + tokens.push_back(readIdentifier()); + else if (std::isdigit(c)) + tokens.push_back(readNumber()); + else + tokens.push_back(readOperator()); + } + tokens.emplace_back(Token::Kind::EndOfStream, ""); + return tokens; + } + +private: + TokenStream stream_; + + void skipWhitespace() { + while (stream_.hasNext() && std::isspace(stream_.peek())) + stream_.consume(); + } + + Token readIdentifier() { + std::string value; + while (stream_.hasNext() && std::isalnum(stream_.peek())) + value += stream_.consume(); + return Token(Token::Kind::Identifier, std::move(value)); + } + + Token readNumber() { + std::string value; + while (stream_.hasNext() && std::isdigit(stream_.peek())) + value += stream_.consume(); + return Token(Token::Kind::Number, std::move(value)); + } + + Token readOperator() { + return Token(Token::Kind::Operator, std::string(1, stream_.consume())); + } +}; diff --git a/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/bad/analytics.ex b/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/bad/analytics.ex new file mode 100644 index 0000000..fad35d2 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/bad/analytics.ex @@ -0,0 +1,65 @@ +defmodule MyApp.Analytics.EventBuffer do + @moduledoc """ + Buffers analytics events. + """ + + @flush_interval_ms 5_000 + + # Bad: starts the background loop with a bare spawn/1 call. + # If this process crashes it will not be restarted, and no crash report + # is linked to any supervisor — it silently disappears. + @spec start() :: pid() + def start do + state = %{buffer: [], count: 0} + spawn(fn -> loop(state) end) + end + + @spec push(pid(), map()) :: :ok + def push(pid, event) when is_map(event) do + send(pid, {:push, event}) + :ok + end + + defp loop(state) do + receive do + {:push, event} -> + new_state = %{state | buffer: [event | state.buffer], count: state.count + 1} + + if new_state.count >= 500 do + flush(new_state.buffer) + loop(%{buffer: [], count: 0}) + else + loop(new_state) + end + + :flush -> + flush(state.buffer) + schedule_flush(self()) + loop(%{buffer: [], count: 0}) + end + end + + defp flush(events), do: MyApp.Analytics.Store.insert_all(Enum.reverse(events)) + + defp schedule_flush(pid) do + Process.send_after(pid, :flush, @flush_interval_ms) + end +end + +defmodule MyApp.Application do + use Application + + @impl true + def start(_type, _args) do + children = [MyApp.Repo] + + result = Supervisor.start_link(children, strategy: :one_for_one) + + # Bad: EventBuffer is started with spawn/1 outside the supervision tree. + # It will not be restarted on crash and the PID is hard to track. + pid = MyApp.Analytics.EventBuffer.start() + Process.register(pid, :event_buffer) + + result + end +end diff --git a/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/good/analytics.ex b/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/good/analytics.ex new file mode 100644 index 0000000..caa83d3 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/supervised_processes_in_supervision_tree/good/analytics.ex @@ -0,0 +1,74 @@ +defmodule MyApp.Analytics.EventBuffer do + @moduledoc """ + Buffers analytics events before flushing to the database. + Started under the application supervisor — never spawned bare. + """ + + use GenServer, restart: :permanent + + @flush_interval_ms 5_000 + @max_buffer_size 500 + + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @doc "Adds an event to the buffer." + @spec push(map()) :: :ok + def push(event) when is_map(event) do + GenServer.cast(__MODULE__, {:push, event}) + end + + @impl true + def init(_opts) do + schedule_flush() + {:ok, %{buffer: [], count: 0}} + end + + @impl true + def handle_cast({:push, event}, %{buffer: buf, count: count} = state) do + new_state = %{state | buffer: [event | buf], count: count + 1} + + if new_state.count >= @max_buffer_size do + flush(new_state.buffer) + {:noreply, %{new_state | buffer: [], count: 0}} + else + {:noreply, new_state} + end + end + + @impl true + def handle_info(:flush, %{buffer: []} = state) do + schedule_flush() + {:noreply, state} + end + + def handle_info(:flush, %{buffer: buf} = state) do + flush(buf) + schedule_flush() + {:noreply, %{state | buffer: [], count: 0}} + end + + defp flush(events) do + MyApp.Analytics.Store.insert_all(Enum.reverse(events)) + end + + defp schedule_flush do + Process.send_after(self(), :flush, @flush_interval_ms) + end +end + +defmodule MyApp.Application do + use Application + + @impl true + def start(_type, _args) do + children = [ + MyApp.Repo, + # Good: EventBuffer is started as a supervised child, not with spawn/1 + MyApp.Analytics.EventBuffer + ] + + Supervisor.start_link(children, strategy: :one_for_one) + end +end diff --git a/priv/combined_metrics/samples/code_smells/switch_has_default_case/bad/notification_router.js b/priv/combined_metrics/samples/code_smells/switch_has_default_case/bad/notification_router.js new file mode 100644 index 0000000..f43c6b1 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/switch_has_default_case/bad/notification_router.js @@ -0,0 +1,58 @@ +function getNotificationChannel(userPreferences, notificationType) { + switch (notificationType) { + case "order_confirmed": + return userPreferences.emailEnabled ? "email" : "push"; + case "order_shipped": + return "push"; + case "order_delivered": + return userPreferences.emailEnabled ? "email" : "push"; + case "payment_failed": + return "email"; + case "account_locked": + return "email"; + case "promotional": + return userPreferences.marketingEnabled ? "email" : null; + } +} + +function formatNotificationMessage(notification) { + const { type, data } = notification; + + switch (type) { + case "order_confirmed": + return { + subject: `Order #${data.orderId} confirmed`, + body: `Your order has been confirmed and is being prepared.`, + }; + case "order_shipped": + return { + subject: `Order #${data.orderId} is on its way`, + body: `Your order has shipped. Tracking number: ${data.trackingNumber}`, + }; + case "order_delivered": + return { + subject: `Order #${data.orderId} delivered`, + body: `Your order has been delivered. Enjoy!`, + }; + case "payment_failed": + return { + subject: "Payment failed", + body: `Your payment of ${data.amount} could not be processed.`, + }; + } +} + +function getNotificationPriority(type) { + switch (type) { + case "account_locked": + return "critical"; + case "payment_failed": + return "high"; + case "order_shipped": + return "medium"; + case "promotional": + return "low"; + } +} + +export { getNotificationChannel, formatNotificationMessage, getNotificationPriority }; diff --git a/priv/combined_metrics/samples/code_smells/switch_has_default_case/good/notification_router.js b/priv/combined_metrics/samples/code_smells/switch_has_default_case/good/notification_router.js new file mode 100644 index 0000000..562cacd --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/switch_has_default_case/good/notification_router.js @@ -0,0 +1,52 @@ +import logger from "./logger.js"; + +function getNotificationChannel(userPreferences, notificationType) { + switch (notificationType) { + case "order_confirmed": + return userPreferences.emailEnabled ? "email" : "push"; + case "order_shipped": + return "push"; + case "order_delivered": + return userPreferences.emailEnabled ? "email" : "push"; + case "payment_failed": + return "email"; + case "account_locked": + return "email"; + case "promotional": + return userPreferences.marketingEnabled ? "email" : null; + default: + logger.warn(`Unknown notification type: '${notificationType}'`); + return null; + } +} + +function formatNotificationMessage(notification) { + const { type, data } = notification; + + switch (type) { + case "order_confirmed": + return { + subject: `Order #${data.orderId} confirmed`, + body: `Your order has been confirmed and is being prepared.`, + }; + case "order_shipped": + return { + subject: `Order #${data.orderId} is on its way`, + body: `Your order has shipped. Tracking number: ${data.trackingNumber}`, + }; + case "order_delivered": + return { + subject: `Order #${data.orderId} delivered`, + body: `Your order has been delivered. Enjoy!`, + }; + case "payment_failed": + return { + subject: "Payment failed", + body: `Your payment of ${data.amount} could not be processed.`, + }; + default: + throw new Error(`No message template for notification type: '${type}'`); + } +} + +export { getNotificationChannel, formatNotificationMessage }; diff --git a/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/bad/AnalyticsProcessor.kt b/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/bad/AnalyticsProcessor.kt new file mode 100644 index 0000000..05ef94c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/bad/AnalyticsProcessor.kt @@ -0,0 +1,54 @@ +package com.example.analytics + +import kotlinx.coroutines.* +import java.time.LocalDate + +data class DailyStats(val date: LocalDate, val totalRevenue: Double, val orderCount: Int) + +class AnalyticsProcessor( + private val repository: AnalyticsRepository, + private val fileExporter: CsvExporter +) { + + /** + * CPU-intensive computation incorrectly uses Dispatchers.IO. + * IO's thread pool is designed for blocking calls — using it for heavy + * CPU work starves I/O threads and misses parallelism optimisations. + */ + suspend fun computeDailyStats(events: List): List = + withContext(Dispatchers.IO) { // Wrong dispatcher for CPU work + events + .groupBy { it.occurredAt.toLocalDate() } + .map { (date, dayEvents) -> + val revenue = dayEvents.sumOf { it.amount } + val orders = dayEvents.count { it.type == "ORDER_PLACED" } + DailyStats(date, revenue, orders) + } + .sortedBy { it.date } + } + + /** + * Database access incorrectly uses Dispatchers.Default. + * Default is for CPU-bound work; blocking DB calls here will exhaust + * the limited Default thread pool and degrade all CPU-bound tasks. + */ + suspend fun loadEvents(from: LocalDate, to: LocalDate): List = + withContext(Dispatchers.Default) { // Wrong dispatcher for I/O + repository.findBetween(from, to) + } + + /** + * File write runs on Dispatchers.Default — blocking I/O on a CPU dispatcher + * ties up a thread that should be doing computation. + */ + suspend fun exportToCsv(stats: List, outputPath: String) = + withContext(Dispatchers.Default) { // Wrong dispatcher for file I/O + fileExporter.write(outputPath, stats) + } + + suspend fun generateReport(from: LocalDate, to: LocalDate, outputPath: String) { + val events = loadEvents(from, to) + val stats = computeDailyStats(events) + exportToCsv(stats, outputPath) + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/good/AnalyticsProcessor.kt b/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/good/AnalyticsProcessor.kt new file mode 100644 index 0000000..57fa3b4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_appropriate_dispatcher/good/AnalyticsProcessor.kt @@ -0,0 +1,54 @@ +package com.example.analytics + +import kotlinx.coroutines.* +import java.time.LocalDate + +data class DailyStats(val date: LocalDate, val totalRevenue: Double, val orderCount: Int) + +class AnalyticsProcessor( + private val repository: AnalyticsRepository, + private val fileExporter: CsvExporter +) { + + /** + * CPU-intensive aggregation uses Dispatchers.Default — optimised for + * parallel computation on a thread pool sized to available CPU cores. + */ + suspend fun computeDailyStats(events: List): List = + withContext(Dispatchers.Default) { + events + .groupBy { it.occurredAt.toLocalDate() } + .map { (date, dayEvents) -> + val revenue = dayEvents.sumOf { it.amount } + val orders = dayEvents.count { it.type == "ORDER_PLACED" } + DailyStats(date, revenue, orders) + } + .sortedBy { it.date } + } + + /** + * Database reads and writes use Dispatchers.IO — the thread pool is sized + * for blocking I/O without starving CPU-bound work. + */ + suspend fun loadEvents(from: LocalDate, to: LocalDate): List = + withContext(Dispatchers.IO) { + repository.findBetween(from, to) + } + + /** + * File write is I/O-bound — Dispatchers.IO is appropriate. + */ + suspend fun exportToCsv(stats: List, outputPath: String) = + withContext(Dispatchers.IO) { + fileExporter.write(outputPath, stats) + } + + /** + * Orchestrator — each step runs on the right dispatcher via the helpers above. + */ + suspend fun generateReport(from: LocalDate, to: LocalDate, outputPath: String) { + val events = loadEvents(from, to) // I/O dispatcher + val stats = computeDailyStats(events) // Default dispatcher + exportToCsv(stats, outputPath) // I/O dispatcher + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/bad/pool.rs b/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/bad/pool.rs new file mode 100644 index 0000000..c44f1cc --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/bad/pool.rs @@ -0,0 +1,52 @@ +use std::cell::RefCell; +use std::rc::Rc; +use std::sync::Arc; + +/// A connection that uses RefCell for interior mutability. +/// RefCell is NOT Sync — it cannot be safely accessed from multiple threads. +pub struct Connection { + pub id: u64, + pub url: String, + // RefCell is !Sync — wrapping this in Arc is unsound + pub state: RefCell, +} + +// BAD: Arc requires Connection: Send + Sync. +// Because Connection contains RefCell, it is !Sync. +// This will fail to compile when sent across threads, but the intent is wrong. +pub struct Pool { + // Arc here is misleading — it looks thread-safe but isn't + connections: Vec>, +} + +impl Pool { + pub fn new() -> Self { + Self { connections: Vec::new() } + } + + pub fn add(&mut self, url: impl Into) -> Arc { + let conn = Arc::new(Connection { + id: self.connections.len() as u64, + url: url.into(), + state: RefCell::new("idle".to_string()), + }); + self.connections.push(Arc::clone(&conn)); + conn + } +} + +// BAD: Rc is not Send, so Arc> cannot be used across threads. +// Using Arc here is misleading — the Rc inside prevents thread sharing. +pub struct BadSharedHandle { + inner: Arc>, +} + +impl BadSharedHandle { + pub fn new(s: impl Into) -> Self { + Self { inner: Arc::new(Rc::new(s.into())) } + } + + pub fn value(&self) -> Rc { + Rc::clone(&self.inner) + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/good/pool.rs b/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/good/pool.rs new file mode 100644 index 0000000..63865a9 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_arc_only_with_send_sync_types/good/pool.rs @@ -0,0 +1,55 @@ +use std::sync::{Arc, Mutex}; + +/// A connection that is safe to share across threads. +/// Implements Send + Sync, making it valid to wrap in Arc. +#[derive(Debug)] +pub struct Connection { + pub id: u64, + pub url: String, + pub active: bool, +} + +// Connection is Send + Sync because all its fields are Send + Sync +// Arc is therefore also Send + Sync — correct usage +pub struct Pool { + connections: Vec>>, + max_size: usize, +} + +impl Pool { + pub fn new(max_size: usize) -> Self { + Self { connections: Vec::new(), max_size } + } + + pub fn add(&mut self, url: impl Into) -> Arc> { + let conn = Arc::new(Mutex::new(Connection { + id: self.connections.len() as u64, + url: url.into(), + active: true, + })); + self.connections.push(Arc::clone(&conn)); + conn + } + + // Returns an Arc to share the connection safely between threads + pub fn get_connection(&self, id: u64) -> Option>> { + self.connections + .iter() + .find(|c| c.lock().map(|c| c.id == id).unwrap_or(false)) + .map(Arc::clone) + } + + pub fn active_count(&self) -> usize { + self.connections + .iter() + .filter(|c| c.lock().map(|c| c.active).unwrap_or(false)) + .count() + } +} + +// Shared, thread-safe state — Arc wraps a Mutex>, all Send + Sync +pub type SharedLog = Arc>>; + +pub fn create_shared_log() -> SharedLog { + Arc::new(Mutex::new(Vec::new())) +} diff --git a/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/bad/payment.rb b/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/bad/payment.rb new file mode 100644 index 0000000..e515c9c --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/bad/payment.rb @@ -0,0 +1,58 @@ +class PaymentMethod + def initialize(attrs = {}) + @id = attrs[:id] + @card_type = attrs[:card_type] + @last_four = attrs[:last_four] + @expires_at = attrs[:expires_at] + @billing_address = attrs[:billing_address] + @nickname = attrs[:nickname] + @is_default = attrs[:is_default] || false + end + + # Manual getters — should use attr_reader + def id + @id + end + + def card_type + @card_type + end + + def last_four + @last_four + end + + def expires_at + @expires_at + end + + def billing_address + @billing_address + end + + # Manual getter + setter pair — should use attr_accessor + def nickname + @nickname + end + + def nickname=(value) + @nickname = value + end + + def is_default + @is_default + end + + def is_default=(value) + @is_default = value + end + + def expired? + @expires_at < Date.today + end + + def display_name + base = "#{@card_type.upcase} ending in #{@last_four}" + @nickname ? "#{@nickname} (#{base})" : base + end +end diff --git a/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/good/payment.rb b/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/good/payment.rb new file mode 100644 index 0000000..b00ead9 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_attr_accessor_not_manual_getter_setter/good/payment.rb @@ -0,0 +1,39 @@ +class PaymentMethod + attr_reader :id, :card_type, :last_four, :expires_at, :billing_address + attr_accessor :nickname, :is_default + + def initialize(attrs = {}) + @id = attrs[:id] + @card_type = attrs[:card_type] + @last_four = attrs[:last_four] + @expires_at = attrs[:expires_at] + @billing_address = attrs[:billing_address] + @nickname = attrs[:nickname] + @is_default = attrs[:is_default] || false + end + + def expired? + expires_at < Date.today + end + + def expiring_soon? + !expired? && expires_at < 30.days.from_now + end + + def display_name + base = "#{card_type.upcase} ending in #{last_four}" + nickname ? "#{nickname} (#{base})" : base + end + + def to_h + { + id: id, + card_type: card_type, + last_four: last_four, + expires_at: expires_at, + nickname: nickname, + is_default: is_default, + expired: expired? + } + end +end diff --git a/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/bad/database_manager.py b/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/bad/database_manager.py new file mode 100644 index 0000000..e3bf408 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/bad/database_manager.py @@ -0,0 +1,62 @@ +"""Database manager providing connection pooling and query helpers.""" +from __future__ import annotations + +import sqlite3 +from typing import Any, Optional + + +DB_PATH = ":memory:" + + +def get_connection(path: str = DB_PATH) -> sqlite3.Connection: + """Return a raw connection — caller is responsible for closing it.""" + return sqlite3.connect(path) + + +def execute_query( + sql: str, + params: tuple = (), + path: str = DB_PATH, +) -> list[dict[str, Any]]: + """Run a SELECT query — connection left open if an exception is raised.""" + conn = get_connection(path) # no context manager + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + cursor.execute(sql, params) # if this raises, conn is never closed + rows = [dict(row) for row in cursor.fetchall()] + conn.close() # only reached on success + return rows + + +def execute_write( + sql: str, + params: tuple = (), + path: str = DB_PATH, +) -> int: + """Execute a write — connection leaked on error; no rollback on failure.""" + conn = get_connection(path) + cursor = conn.cursor() + cursor.execute(sql, params) # exception here leaks conn + conn.commit() + conn.close() + return cursor.rowcount + + +def export_to_csv( + sql: str, + output_path: str, + path: str = DB_PATH, +) -> int: + """Export results — both file and connection are manually managed.""" + import csv + rows = execute_query(sql, path=path) + if not rows: + return 0 + + csv_file = open(output_path, "w", newline="", encoding="utf-8") # no 'with' + writer = csv.DictWriter(csv_file, fieldnames=rows[0].keys()) + writer.writeheader() + writer.writerows(rows) + csv_file.close() # only reached if writerows() doesn't raise + + return len(rows) diff --git a/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/good/database_manager.py b/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/good/database_manager.py new file mode 100644 index 0000000..6f0405e --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_context_manager_for_resources/good/database_manager.py @@ -0,0 +1,67 @@ +"""Database manager providing connection pooling and query helpers.""" +from __future__ import annotations + +import sqlite3 +from contextlib import contextmanager +from typing import Any, Generator, Optional + + +DB_PATH = ":memory:" + + +@contextmanager +def get_connection(path: str = DB_PATH) -> Generator[sqlite3.Connection, None, None]: + """Yield a database connection, committing on success and rolling back on error.""" + conn = sqlite3.connect(path) + try: + yield conn + conn.commit() + except Exception: + conn.rollback() + raise + finally: + conn.close() + + +def execute_query( + sql: str, + params: tuple = (), + path: str = DB_PATH, +) -> list[dict[str, Any]]: + """Run a SELECT query and return rows as dicts — connection closed automatically.""" + with get_connection(path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + cursor.execute(sql, params) + return [dict(row) for row in cursor.fetchall()] + + +def execute_write( + sql: str, + params: tuple = (), + path: str = DB_PATH, +) -> int: + """Execute an INSERT/UPDATE/DELETE and return the number of affected rows.""" + with get_connection(path) as conn: + cursor = conn.cursor() + cursor.execute(sql, params) + return cursor.rowcount + + +def export_to_csv( + sql: str, + output_path: str, + path: str = DB_PATH, +) -> int: + """Export query results to a CSV file using context managers for both resources.""" + import csv + rows = execute_query(sql, path=path) + if not rows: + return 0 + + with open(output_path, "w", newline="", encoding="utf-8") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=rows[0].keys()) + writer.writeheader() + writer.writerows(rows) + + return len(rows) diff --git a/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/bad/metrics.rs b/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/bad/metrics.rs new file mode 100644 index 0000000..72a44f8 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/bad/metrics.rs @@ -0,0 +1,48 @@ +pub struct MetricSeries { + pub timestamps: Vec, + pub values: Vec, +} + +impl MetricSeries { + pub fn new(timestamps: Vec, values: Vec) -> Self { + Self { timestamps, values } + } + + // Bad: .cloned() on u64 — u64 is Copy, .copied() is the right choice + pub fn recent_timestamps(&self, n: usize) -> Vec { + self.timestamps + .iter() + .rev() + .take(n) + .cloned() + .collect() + } + + // Bad: .cloned() on f64 — misleadingly suggests Clone behavior + pub fn max_value(&self) -> Option { + self.values.iter().cloned().reduce(f64::max) + } + + pub fn values_above(&self, threshold: f64) -> Vec { + self.values + .iter() + .cloned() + .filter(|&v| v > threshold) + .collect() + } + + pub fn timestamp_range(&self) -> Option<(u64, u64)> { + // Bad: .cloned() on a Copy type throughout + let min = self.timestamps.iter().cloned().min()?; + let max = self.timestamps.iter().cloned().max()?; + Some((min, max)) + } + + // Bad: .cloned() on i32 — i32 implements Copy, not just Clone + pub fn count_ids_above(ids: &[i32], threshold: i32) -> Vec { + ids.iter() + .cloned() + .filter(|&id| id > threshold) + .collect() + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/good/metrics.rs b/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/good/metrics.rs new file mode 100644 index 0000000..f77af03 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_copied_on_copy_types/good/metrics.rs @@ -0,0 +1,47 @@ +pub struct MetricSeries { + pub timestamps: Vec, + pub values: Vec, +} + +impl MetricSeries { + pub fn new(timestamps: Vec, values: Vec) -> Self { + Self { timestamps, values } + } + + // u64 is Copy — use .copied() to avoid the misleading suggestion of Clone + pub fn recent_timestamps(&self, n: usize) -> Vec { + self.timestamps + .iter() + .rev() + .take(n) + .copied() + .collect() + } + + // f64 is Copy — .copied() is correct and clear + pub fn max_value(&self) -> Option { + self.values.iter().copied().reduce(f64::max) + } + + pub fn values_above(&self, threshold: f64) -> Vec { + self.values + .iter() + .copied() + .filter(|&v| v > threshold) + .collect() + } + + pub fn timestamp_range(&self) -> Option<(u64, u64)> { + let min = self.timestamps.iter().copied().min()?; + let max = self.timestamps.iter().copied().max()?; + Some((min, max)) + } + + // i32 is Copy — .copied() communicates intent clearly + pub fn count_ids_above(ids: &[i32], threshold: i32) -> Vec { + ids.iter() + .copied() + .filter(|&id| id > threshold) + .collect() + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_mb_string_functions_for_unicode/bad/ProductCatalog.php b/priv/combined_metrics/samples/code_smells/uses_mb_string_functions_for_unicode/bad/ProductCatalog.php new file mode 100644 index 0000000..80ae1f6 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_mb_string_functions_for_unicode/bad/ProductCatalog.php @@ -0,0 +1,62 @@ + String { + // Magic number check instead of nil check + if age == -1 { + return "Age not provided" + } + return "\(age) years old" + } + + func contactSummary() -> String { + var parts: [String] = [email] + // Empty string check instead of nil check + if !phoneNumber.isEmpty { + parts.append(phoneNumber) + } + return parts.joined(separator: " | ") + } + + func loyaltySummary() -> String { + // Magic number check throughout codebase + if loyaltyPoints == -1 { + return "Not enrolled in loyalty program" + } + return "\(loyaltyPoints) points" + } + + func hasLocation() -> Bool { + // Magic value comparison: 0/0 is technically a real coordinate + return latitude != 0.0 || longitude != 0.0 + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_optionals_not_sentinel_values/good/UserProfile.swift b/priv/combined_metrics/samples/code_smells/uses_optionals_not_sentinel_values/good/UserProfile.swift new file mode 100644 index 0000000..9215e6a --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_optionals_not_sentinel_values/good/UserProfile.swift @@ -0,0 +1,57 @@ +import Foundation + +struct Address { + let street: String + let city: String + let postalCode: String + let country: String +} + +struct UserProfile { + let id: String + var displayName: String + var email: String + var age: Int? + var phoneNumber: String? + var bio: String? + var address: Address? + var loyaltyPoints: Int? + var lastLoginDate: Date? + + var isPhoneVerified: Bool + var isPremiumMember: Bool + + func formattedAge() -> String { + guard let age = age else { + return "Age not provided" + } + return "\(age) years old" + } + + func contactSummary() -> String { + var parts: [String] = [email] + if let phone = phoneNumber { + parts.append(phone) + } + return parts.joined(separator: " | ") + } + + func loyaltySummary() -> String { + guard let points = loyaltyPoints else { + return "Not enrolled in loyalty program" + } + return "\(points) points" + } +} + +class UserProfileRepository { + private var profiles: [String: UserProfile] = [:] + + func profile(for userID: String) -> UserProfile? { + return profiles[userID] + } + + func update(_ profile: UserProfile) { + profiles[profile.id] = profile + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/bad/server.go b/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/bad/server.go new file mode 100644 index 0000000..0b451f7 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/bad/server.go @@ -0,0 +1,46 @@ +package server + +import ( + "context" + "log" +) + +type Task struct { + ID string + Payload string +} + +type Executor interface { + Execute(ctx context.Context, task Task) error +} + +// TaskServer runs tasks continuously but does not recover from panics. +// A single panic in Execute will crash the entire process. +type TaskServer struct { + executor Executor + tasks <-chan Task + logger *log.Logger +} + +func New(executor Executor, tasks <-chan Task, logger *log.Logger) *TaskServer { + return &TaskServer{executor: executor, tasks: tasks, logger: logger} +} + +// Run starts the task processing loop with no panic recovery. +func (s *TaskServer) Run(ctx context.Context) { + for { + select { + case <-ctx.Done(): + s.logger.Println("task server shutting down") + return + case task, ok := <-s.tasks: + if !ok { + return + } + // No recover — a panic inside Execute terminates the process. + if err := s.executor.Execute(ctx, task); err != nil { + s.logger.Printf("task %s failed: %v", task.ID, err) + } + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/good/server.go b/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/good/server.go new file mode 100644 index 0000000..acd60f4 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_recover_in_long_running_goroutines/good/server.go @@ -0,0 +1,58 @@ +package server + +import ( + "context" + "log" + "time" +) + +type Task struct { + ID string + Payload string +} + +type Executor interface { + Execute(ctx context.Context, task Task) error +} + +// TaskServer runs tasks continuously and recovers from panics so the process stays alive. +type TaskServer struct { + executor Executor + tasks <-chan Task + logger *log.Logger +} + +func New(executor Executor, tasks <-chan Task, logger *log.Logger) *TaskServer { + return &TaskServer{executor: executor, tasks: tasks, logger: logger} +} + +// Run starts the task processing loop. It recovers from panics within each iteration. +func (s *TaskServer) Run(ctx context.Context) { + for { + select { + case <-ctx.Done(): + s.logger.Println("task server shutting down") + return + case task, ok := <-s.tasks: + if !ok { + return + } + s.processWithRecover(ctx, task) + } + } +} + +// processWithRecover wraps task execution in a deferred recover so a panic in +// Execute cannot crash the entire server process. +func (s *TaskServer) processWithRecover(ctx context.Context, task Task) { + defer func() { + if r := recover(); r != nil { + s.logger.Printf("panic processing task %s: %v — continuing", task.ID, r) + time.Sleep(100 * time.Millisecond) // brief back-off after panic + } + }() + + if err := s.executor.Execute(ctx, task); err != nil { + s.logger.Printf("task %s failed: %v", task.ID, err) + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/bad/AccessPolicy.cs b/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/bad/AccessPolicy.cs new file mode 100644 index 0000000..3b77421 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/bad/AccessPolicy.cs @@ -0,0 +1,56 @@ +using System; + +namespace Security +{ + public class AccessPolicy + { + public bool CanReadDocument(User user, Document document) + { + // Non-short-circuit & evaluates both sides even if user is null → NullReferenceException + return user != null & document != null + & (user.IsAdmin | document.OwnerId == user.Id | user.HasRole("reader")); + } + + public bool CanEditDocument(User user, Document document) + { + if (user == null | document == null) // | does not short-circuit + return false; + + // | evaluates both sides; IsLocked check runs even when OwnerId doesn't match + return user.IsAdmin | (document.OwnerId == user.Id & !document.IsLocked); + } + + public bool ShouldSendAlert(SystemMetrics metrics) + { + // Non-short-circuit & can throw if metrics is null + return metrics != null + & (metrics.CpuUsage > 90.0 | metrics.MemoryUsage > 85.0) + & metrics.AlertsEnabled; + } + + public string ResolveDisplayName(User user) + { + // Non-short-circuit & evaluates IsNullOrWhiteSpace even when user is null + return user != null & !string.IsNullOrWhiteSpace(user.DisplayName) + ? user.DisplayName + : "Anonymous"; + } + + public bool IsValidRequest(ApiRequest request) + { + // All conditions evaluated regardless; throws if request is null + return request != null + & !string.IsNullOrWhiteSpace(request.ApiKey) + & request.Timestamp > DateTimeOffset.UtcNow.AddMinutes(-5) + & request.Payload?.Length <= 1_048_576; + } + + public bool ShouldRetry(HttpResponse response, int attempt) + { + // Non-short-circuit | evaluates status codes even when response is null + return response != null + & attempt < 3 + & (response.StatusCode == 429 | response.StatusCode >= 500); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/good/AccessPolicy.cs b/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/good/AccessPolicy.cs new file mode 100644 index 0000000..1c5b6ea --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_short_circuit_operators/good/AccessPolicy.cs @@ -0,0 +1,54 @@ +using System; + +namespace Security +{ + public class AccessPolicy + { + public bool CanReadDocument(User user, Document document) + { + // Short-circuit: if user is null, the rest is never evaluated + return user != null && document != null + && (user.IsAdmin || document.OwnerId == user.Id || user.HasRole("reader")); + } + + public bool CanEditDocument(User user, Document document) + { + if (user == null || document == null) + return false; + + // Short-circuit: IsAdmin check avoids evaluating the more expensive checks + return user.IsAdmin || (document.OwnerId == user.Id && !document.IsLocked); + } + + public bool ShouldSendAlert(SystemMetrics metrics) + { + // Short-circuit: if metrics is null, subsequent property access is skipped + return metrics != null + && (metrics.CpuUsage > 90.0 || metrics.MemoryUsage > 85.0) + && metrics.AlertsEnabled; + } + + public string ResolveDisplayName(User user) + { + // Short-circuit null coalescing with && guards + return user != null && !string.IsNullOrWhiteSpace(user.DisplayName) + ? user.DisplayName + : "Anonymous"; + } + + public bool IsValidRequest(ApiRequest request) + { + return request != null + && !string.IsNullOrWhiteSpace(request.ApiKey) + && request.Timestamp > DateTimeOffset.UtcNow.AddMinutes(-5) + && request.Payload?.Length <= 1_048_576; + } + + public bool ShouldRetry(HttpResponse response, int attempt) + { + return response != null + && attempt < 3 + && (response.StatusCode == 429 || response.StatusCode >= 500); + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/bad/Buffer.cpp b/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/bad/Buffer.cpp new file mode 100644 index 0000000..d1ee596 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/bad/Buffer.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include + +class Buffer { +public: + explicit Buffer(std::size_t capacity) + : capacity_(capacity) + , size_(0) + { + // Raw owning pointer — must be manually deleted; leaks on exception + data_ = new uint8_t[capacity]; + } + + ~Buffer() { + delete[] data_; // relies on correct destructor call; no RAII safety + } + + // Copy constructor not implemented — double-free if copied + Buffer(const Buffer&) = delete; + Buffer& operator=(const Buffer&) = delete; + + void write(const uint8_t* src, std::size_t length) { + if (size_ + length > capacity_) + throw std::overflow_error("Buffer capacity exceeded"); + std::memcpy(data_ + size_, src, length); + size_ += length; + } + + std::size_t read(uint8_t* dst, std::size_t maxLength) const { + std::size_t toRead = std::min(maxLength, size_); + std::memcpy(dst, data_, toRead); + return toRead; + } + + void clear() { size_ = 0; } + std::size_t size() const { return size_; } + +private: + uint8_t* data_; // raw owning pointer — manual memory management + std::size_t capacity_; + std::size_t size_; +}; + +class BufferPool { +public: + explicit BufferPool(std::size_t bufferSize, std::size_t poolSize) + : bufferSize_(bufferSize) + { + for (std::size_t i = 0; i < poolSize; ++i) + available_.push_back(new Buffer(bufferSize)); // raw owning pointers in vector + } + + ~BufferPool() { + for (auto* buf : available_) + delete buf; // manual cleanup; leaks if exception thrown before this + } + + Buffer* acquire() { + if (available_.empty()) + return new Buffer(bufferSize_); // caller must delete — ownership unclear + Buffer* buf = available_.back(); + available_.pop_back(); + return buf; + } + + void release(Buffer* buf) { + buf->clear(); + available_.push_back(buf); + } + +private: + std::size_t bufferSize_; + std::vector available_; // vector of raw owning pointers +}; diff --git a/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/good/Buffer.cpp b/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/good/Buffer.cpp new file mode 100644 index 0000000..7dd829d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_smart_pointers_for_ownership/good/Buffer.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +class Buffer { +public: + explicit Buffer(std::size_t capacity) + : data_(std::make_unique(capacity)) + , capacity_(capacity) + , size_(0) + {} + + void write(const uint8_t* src, std::size_t length) { + if (size_ + length > capacity_) + throw std::overflow_error("Buffer capacity exceeded"); + std::copy(src, src + length, data_.get() + size_); + size_ += length; + } + + std::size_t read(uint8_t* dst, std::size_t maxLength) const { + std::size_t toRead = std::min(maxLength, size_); + std::copy(data_.get(), data_.get() + toRead, dst); + return toRead; + } + + void clear() noexcept { size_ = 0; } + std::size_t size() const noexcept { return size_; } + std::size_t capacity() const noexcept { return capacity_; } + +private: + std::unique_ptr data_; // ownership is explicit and automatic + std::size_t capacity_; + std::size_t size_; +}; + +class BufferPool { +public: + explicit BufferPool(std::size_t bufferSize, std::size_t poolSize) + : bufferSize_(bufferSize) + { + for (std::size_t i = 0; i < poolSize; ++i) + available_.push_back(std::make_unique(bufferSize)); + } + + std::unique_ptr acquire() { + if (available_.empty()) + return std::make_unique(bufferSize_); + auto buf = std::move(available_.back()); + available_.pop_back(); + return buf; + } + + void release(std::unique_ptr buf) { + buf->clear(); + available_.push_back(std::move(buf)); + } + +private: + std::size_t bufferSize_; + std::vector> available_; +}; diff --git a/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/bad/geometry.rs b/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/bad/geometry.rs new file mode 100644 index 0000000..22f144e --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/bad/geometry.rs @@ -0,0 +1,51 @@ +#[derive(Debug, Clone, Copy)] +pub struct Circle { + pub radius: f64, +} + +impl Circle { + pub fn new(radius: f64) -> Self { + Self { radius } + } + + pub fn area(&self) -> f64 { + // BAD: hardcoded approximation — less precise than std::f64::consts::PI + 3.14159 * self.radius * self.radius + } + + pub fn circumference(&self) -> f64 { + // BAD: 2 * pi approximated — differs from PI at the 6th decimal place + 2.0 * 3.14159 * self.radius + } + + pub fn inscribed_square_side(&self) -> f64 { + // BAD: sqrt(2) hardcoded — use std::f64::consts::SQRT_2 + 2.0 * self.radius / 1.41421 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Sector { + pub radius: f64, + pub angle: f64, +} + +impl Sector { + pub fn area(&self) -> f64 { + 0.5 * self.radius * self.radius * self.angle + } + + pub fn from_degrees(radius: f64, degrees: f64) -> Self { + // BAD: magic constant instead of TAU or PI from std + Self { radius, angle: degrees * 6.28318 / 360.0 } + } +} + +pub fn degrees_to_radians(degrees: f64) -> f64 { + // BAD: literal approximation of PI + degrees * 3.14159265 / 180.0 +} + +pub fn radians_to_degrees(radians: f64) -> f64 { + radians * 180.0 / 3.14159265 +} diff --git a/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/good/geometry.rs b/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/good/geometry.rs new file mode 100644 index 0000000..ca9790f --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_standard_library_constants/good/geometry.rs @@ -0,0 +1,55 @@ +use std::f64::consts::{PI, SQRT_2, TAU}; + +#[derive(Debug, Clone, Copy)] +pub struct Circle { + pub radius: f64, +} + +impl Circle { + pub fn new(radius: f64) -> Self { + Self { radius } + } + + pub fn area(&self) -> f64 { + PI * self.radius * self.radius + } + + pub fn circumference(&self) -> f64 { + TAU * self.radius + } + + pub fn inscribed_square_diagonal(&self) -> f64 { + // Diameter * sqrt(2) / sqrt(2) == diameter, but demonstrates SQRT_2 usage + 2.0 * self.radius / SQRT_2 * SQRT_2 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Sector { + pub radius: f64, + /// angle in radians + pub angle: f64, +} + +impl Sector { + pub fn arc_length(&self) -> f64 { + self.radius * self.angle + } + + pub fn area(&self) -> f64 { + 0.5 * self.radius * self.radius * self.angle + } + + pub fn from_degrees(radius: f64, degrees: f64) -> Self { + // TAU / 360.0 is exact; using PI * 2.0 / 360.0 would also work + Self { radius, angle: degrees * TAU / 360.0 } + } +} + +pub fn degrees_to_radians(degrees: f64) -> f64 { + degrees * PI / 180.0 +} + +pub fn radians_to_degrees(radians: f64) -> f64 { + radians * 180.0 / PI +} diff --git a/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/bad/HtmlRenderer.cs b/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/bad/HtmlRenderer.cs new file mode 100644 index 0000000..7164ee6 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/bad/HtmlRenderer.cs @@ -0,0 +1,61 @@ +using System.Collections.Generic; + +namespace Rendering +{ + public class HtmlRenderer + { + public string RenderTable(IEnumerable rows) + { + // String concatenation in loops creates a new string object on every iteration + string html = "\n \n"; + + foreach (var row in rows) + { + html += " "; + foreach (var cell in row.Cells) + { + html += ""; // O(n²) allocations + } + html += "\n"; + } + + html += " \n
    " + Escape(cell) + "
    \n"; + return html; + } + + public string RenderList(IEnumerable items, string cssClass) + { + string html = "\n"; // new string every loop + } + + html += "\n"; + return html; + } + + public string RenderReport(ReportData report) + { + string html = "\n\n\n"; + html += "

    " + Escape(report.Title) + "

    \n"; + + foreach (var section in report.Sections) + { + // Each += allocates a new string on the heap + html += "

    " + Escape(section.Heading) + "

    \n"; + html += "

    " + Escape(section.Body) + "

    \n"; + } + + html += "\n\n"; + return html; + } + + private static string Escape(string text) => + text?.Replace("&", "&").Replace("<", "<").Replace(">", ">") ?? string.Empty; + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/good/HtmlRenderer.cs b/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/good/HtmlRenderer.cs new file mode 100644 index 0000000..b9d892a --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_string_builder_for_loop_concatenation/good/HtmlRenderer.cs @@ -0,0 +1,72 @@ +using System.Collections.Generic; +using System.Text; + +namespace Rendering +{ + public class HtmlRenderer + { + public string RenderTable(IEnumerable rows) + { + var sb = new StringBuilder(); + sb.AppendLine(""); + sb.AppendLine(" "); + + foreach (var row in rows) + { + sb.Append(" "); + foreach (var cell in row.Cells) + { + sb.Append(""); + } + sb.AppendLine(""); + } + + sb.AppendLine(" "); + sb.AppendLine("
    ") + .Append(Escape(cell)) + .Append("
    "); + return sb.ToString(); + } + + public string RenderList(IEnumerable items, string cssClass) + { + var sb = new StringBuilder(); + sb.Append(""); + + foreach (var item in items) + { + sb.Append("
  • ").Append(Escape(item)).AppendLine("
  • "); + } + + sb.AppendLine(""); + return sb.ToString(); + } + + public string RenderReport(ReportData report) + { + var sb = new StringBuilder(capacity: 4096); + sb.AppendLine("") + .AppendLine("") + .AppendLine(""); + + sb.Append("

    ").Append(Escape(report.Title)).AppendLine("

    "); + + foreach (var section in report.Sections) + { + sb.Append("

    ").Append(Escape(section.Heading)).AppendLine("

    "); + sb.Append("

    ").Append(Escape(section.Body)).AppendLine("

    "); + } + + sb.AppendLine("") + .AppendLine(""); + + return sb.ToString(); + } + + private static string Escape(string text) => + text?.Replace("&", "&").Replace("<", "<").Replace(">", ">") ?? string.Empty; + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/bad/DataSyncService.kt b/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/bad/DataSyncService.kt new file mode 100644 index 0000000..49db07d --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/bad/DataSyncService.kt @@ -0,0 +1,47 @@ +package com.example.sync + +import kotlinx.coroutines.* + +class DataSyncService( + private val userRepository: UserRepository, + private val orderRepository: OrderRepository, + private val inventoryClient: InventoryClient +) { + + /** + * Launches coroutines using GlobalScope — they are not tied to any lifecycle. + * If the service is destroyed or the app shuts down, these coroutines keep running + * and cannot be cancelled as a group. + */ + fun startPeriodicSync(): Job = GlobalScope.launch(Dispatchers.IO) { + while (isActive) { + syncAll() + delay(60_000) + } + } + + /** + * Each task is launched into GlobalScope independently. + * There is no parent scope to cancel them together, and exceptions + * in one do not cancel the others. + */ + suspend fun syncAll(): SyncReport { + val userJob = GlobalScope.async(Dispatchers.IO) { syncUsers() } + val orderJob = GlobalScope.async(Dispatchers.IO) { syncOrders() } + val inventoryJob = GlobalScope.async(Dispatchers.IO) { syncInventory() } + + return SyncReport( + usersUpdated = userJob.await(), + ordersUpdated = orderJob.await(), + itemsUpdated = inventoryJob.await() + ) + } + + private suspend fun syncUsers(): Int = userRepository.fetchAndUpdate() + private suspend fun syncOrders(): Int = orderRepository.fetchAndUpdate() + private suspend fun syncInventory(): Int = inventoryClient.syncAll() + + fun stop() { + // No-op — nothing to cancel because GlobalScope outlives everything + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/good/DataSyncService.kt b/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/good/DataSyncService.kt new file mode 100644 index 0000000..92cbb33 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_structured_concurrency/good/DataSyncService.kt @@ -0,0 +1,49 @@ +package com.example.sync + +import kotlinx.coroutines.* + +class DataSyncService( + private val userRepository: UserRepository, + private val orderRepository: OrderRepository, + private val inventoryClient: InventoryClient +) { + + // Uses a defined CoroutineScope tied to the service lifecycle + private val scope = CoroutineScope(SupervisorJob() + Dispatchers.IO) + + /** + * Launches a background sync job within the service scope. + * When the service is stopped, all child coroutines are cancelled via scope.cancel(). + */ + fun startPeriodicSync(): Job = scope.launch { + while (isActive) { + syncAll() + delay(60_000) + } + } + + /** + * Runs all sync tasks concurrently within a single coroutine scope. + * All tasks are children of the caller's scope — cancellation propagates correctly. + */ + suspend fun syncAll(): SyncReport = coroutineScope { + val userSync = async { syncUsers() } + val orderSync = async { syncOrders() } + val inventorySync = async { syncInventory() } + + SyncReport( + usersUpdated = userSync.await(), + ordersUpdated = orderSync.await(), + itemsUpdated = inventorySync.await() + ) + } + + private suspend fun syncUsers(): Int = userRepository.fetchAndUpdate() + private suspend fun syncOrders(): Int = orderRepository.fetchAndUpdate() + private suspend fun syncInventory(): Int = inventoryClient.syncAll() + + fun stop() { + // Cancels all coroutines launched in this scope + scope.cancel() + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/bad/ReportExporter.cs b/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/bad/ReportExporter.cs new file mode 100644 index 0000000..70fc219 --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/bad/ReportExporter.cs @@ -0,0 +1,81 @@ +using System; +using System.Data.SqlClient; +using System.IO; +using System.Text; + +namespace Reporting +{ + public class ReportExporter + { + private readonly string _connectionString; + + public ReportExporter(string connectionString) + { + _connectionString = connectionString; + } + + public void ExportToFile(string reportName, string outputPath) + { + // Manual resource management — Dispose never called if an exception occurs + var connection = new SqlConnection(_connectionString); + connection.Open(); + + var command = new SqlCommand( + "SELECT * FROM Reports WHERE Name = @name", connection); + command.Parameters.AddWithValue("@name", reportName); + + var reader = command.ExecuteReader(); + var writer = new StreamWriter(outputPath, append: false, Encoding.UTF8); + + writer.WriteLine($"Report: {reportName}"); + writer.WriteLine(new string('-', 40)); + + while (reader.Read()) + { + writer.WriteLine( + $"{reader["Date"]:yyyy-MM-dd} | {reader["Value"]:N2}"); + } + + // These Dispose calls are never reached if an exception is thrown above + reader.Dispose(); + command.Dispose(); + writer.Dispose(); + connection.Dispose(); + } + + public byte[] ExportToBytes(int reportId) + { + var memoryStream = new MemoryStream(); + var writer = new StreamWriter(memoryStream, Encoding.UTF8); + + var connection = new SqlConnection(_connectionString); + connection.Open(); + + var command = new SqlCommand( + "SELECT * FROM ReportRows WHERE ReportId = @id ORDER BY RowIndex", connection); + command.Parameters.AddWithValue("@id", reportId); + + var reader = command.ExecuteReader(); + while (reader.Read()) + writer.WriteLine(reader["Content"].ToString()); + + writer.Flush(); + var result = memoryStream.ToArray(); + + // Missing dispose calls on reader, command, connection + writer.Dispose(); + memoryStream.Dispose(); + + return result; + } + + public void CopyReport(string sourcePath, string destinationPath) + { + var source = new FileStream(sourcePath, FileMode.Open, FileAccess.Read); + var destination = new FileStream(destinationPath, FileMode.Create, FileAccess.Write); + source.CopyTo(destination); + source.Dispose(); + destination.Dispose(); // not reached if CopyTo throws + } + } +} diff --git a/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/good/ReportExporter.cs b/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/good/ReportExporter.cs new file mode 100644 index 0000000..5ac04ac --- /dev/null +++ b/priv/combined_metrics/samples/code_smells/uses_using_statement_for_disposables/good/ReportExporter.cs @@ -0,0 +1,66 @@ +using System; +using System.Data.SqlClient; +using System.IO; +using System.Text; + +namespace Reporting +{ + public class ReportExporter + { + private readonly string _connectionString; + + public ReportExporter(string connectionString) + { + _connectionString = connectionString; + } + + public void ExportToFile(string reportName, string outputPath) + { + using var connection = new SqlConnection(_connectionString); + connection.Open(); + + using var command = new SqlCommand( + "SELECT * FROM Reports WHERE Name = @name", connection); + command.Parameters.AddWithValue("@name", reportName); + + using var reader = command.ExecuteReader(); + using var writer = new StreamWriter(outputPath, append: false, Encoding.UTF8); + + writer.WriteLine($"Report: {reportName}"); + writer.WriteLine(new string('-', 40)); + + while (reader.Read()) + { + writer.WriteLine( + $"{reader["Date"]:yyyy-MM-dd} | {reader["Value"]:N2}"); + } + } + + public byte[] ExportToBytes(int reportId) + { + using var memoryStream = new MemoryStream(); + using var writer = new StreamWriter(memoryStream, Encoding.UTF8, leaveOpen: true); + + using var connection = new SqlConnection(_connectionString); + connection.Open(); + + using var command = new SqlCommand( + "SELECT * FROM ReportRows WHERE ReportId = @id ORDER BY RowIndex", connection); + command.Parameters.AddWithValue("@id", reportId); + + using var reader = command.ExecuteReader(); + while (reader.Read()) + writer.WriteLine(reader["Content"].ToString()); + + writer.Flush(); + return memoryStream.ToArray(); + } + + public void CopyReport(string sourcePath, string destinationPath) + { + using var source = new FileStream(sourcePath, FileMode.Open, FileAccess.Read); + using var destination = new FileStream(destinationPath, FileMode.Create, FileAccess.Write); + source.CopyTo(destination); + } + } +} diff --git a/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/bad/PaymentGateway.php b/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/bad/PaymentGateway.php new file mode 100644 index 0000000..e72c4f2 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/bad/PaymentGateway.php @@ -0,0 +1,70 @@ +apiKey = $apiKey; + $this->baseUrl = $baseUrl; + } + + // Missing visibility — defaults to public implicitly in PHP, but PSR-12 requires explicit declaration + function charge($amountCents, $token) + { + $this->requestCount++; + return $this->post('/v1/charges', ['amount' => $amountCents, 'source' => $token]); + } + + // Missing visibility + function refund($transactionId, $amountCents) + { + $this->requestCount++; + return $this->post('/v1/refunds', ['transaction_id' => $transactionId, 'amount' => $amountCents]); + } + + public function getRequestCount() + { + return $this->requestCount; + } + + // Missing visibility on static method + static function getTotalRequests() + { + return 0; + } + + // Missing visibility + function buildHeaders() + { + return [ + 'Authorization' => "Bearer {$this->apiKey}", + 'Content-Type' => 'application/json', + ]; + } + + // Missing visibility on private method + function post($path, $payload) + { + return []; + } + + // Missing visibility + function buildUrl($path) + { + return rtrim($this->baseUrl, '/') . $path; + } + + // Missing visibility on abstract-style helper + function validateAmount($amountCents) + { + if ($amountCents <= 0) { + throw new \InvalidArgumentException("Amount must be positive"); + } + } +} diff --git a/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/good/PaymentGateway.php b/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/good/PaymentGateway.php new file mode 100644 index 0000000..a90368d --- /dev/null +++ b/priv/combined_metrics/samples/consistency/all_methods_declare_visibility/good/PaymentGateway.php @@ -0,0 +1,73 @@ +apiKey = $apiKey; + $this->baseUrl = $baseUrl; + } + + public function charge(int $amountCents, string $token): array + { + $this->requestCount++; + self::$totalRequests++; + + return $this->post('/v1/charges', ['amount' => $amountCents, 'source' => $token]); + } + + public function refund(string $transactionId, int $amountCents): array + { + $this->requestCount++; + self::$totalRequests++; + + return $this->post('/v1/refunds', ['transaction_id' => $transactionId, 'amount' => $amountCents]); + } + + public function getRequestCount(): int + { + return $this->requestCount; + } + + public static function getTotalRequests(): int + { + return self::$totalRequests; + } + + protected function buildHeaders(): array + { + return [ + 'Authorization' => "Bearer {$this->apiKey}", + 'Content-Type' => 'application/json', + ]; + } + + private function post(string $path, array $payload): array + { + $url = $this->buildUrl($path); + // HTTP client logic + return []; + } + + private function buildUrl(string $path): string + { + return rtrim($this->baseUrl, '/') . $path; + } + + private function validateAmount(int $amountCents): void + { + if ($amountCents <= 0) { + throw new \InvalidArgumentException("Amount must be positive: {$amountCents}"); + } + } +} diff --git a/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/bad/OrderService.php b/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/bad/OrderService.php new file mode 100644 index 0000000..fc0fe00 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/bad/OrderService.php @@ -0,0 +1,70 @@ +orderRepository = $orderRepository; + $this->paymentService = $paymentService; + $this->notificationService = $notificationService; + self::$instanceCount++; + } + + public function place($customerId, $items) + { + if (count($items) > self::MAX_ITEMS_PER_ORDER) { + throw new \InvalidArgumentException('Too many items'); + } + + $order = new \stdClass(); + $order->customerId = $customerId; + $order->currency = $this->defaultCurrency; + $order->items = $items; + + $this->orderRepository->save($order); + + if ($this->auditEnabled) { + $this->logPlacement($order); + } + + return $order; + } + + public function cancel($orderId) + { + $order = $this->orderRepository->findById($orderId); + + if ($order === null) { + return false; + } + + $order->status = 'cancelled'; + $this->orderRepository->save($order); + + return true; + } + + static function getInstanceCount() // missing visibility on static method + { + return self::$instanceCount; + } + + function logPlacement($order): void // missing visibility on instance method + { + // audit logging + } +} diff --git a/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/good/OrderService.php b/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/good/OrderService.php new file mode 100644 index 0000000..233468f --- /dev/null +++ b/priv/combined_metrics/samples/consistency/all_properties_declare_visibility/good/OrderService.php @@ -0,0 +1,79 @@ +orderRepository = $orderRepository; + $this->paymentService = $paymentService; + $this->notificationService = $notificationService; + self::$instanceCount++; + } + + public function place(int $customerId, array $items): Order + { + if (count($items) > self::MAX_ITEMS_PER_ORDER) { + throw new \InvalidArgumentException( + 'Order exceeds maximum of ' . self::MAX_ITEMS_PER_ORDER . ' items' + ); + } + + $order = new Order(customerId: $customerId, currency: $this->defaultCurrency); + foreach ($items as $item) { + $order->addItem($item['product_id'], $item['quantity'], $item['unit_price']); + } + + $this->orderRepository->save($order); + + if ($this->auditEnabled) { + $this->logPlacement($order); + } + + return $order; + } + + public function cancel(int $orderId): bool + { + $order = $this->orderRepository->findById($orderId); + + if ($order === null) { + throw new OrderNotFoundException("Order {$orderId} not found"); + } + + $order->setStatus('cancelled'); + $this->orderRepository->save($order); + $this->notificationService->notifyCancellation($order); + + return true; + } + + public static function getInstanceCount(): int + { + return self::$instanceCount; + } + + private function logPlacement(Order $order): void + { + // audit logging + } +} diff --git a/priv/combined_metrics/samples/consistency/consistent_casing_within_file/bad/analytics.ex b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/bad/analytics.ex new file mode 100644 index 0000000..372546b --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/bad/analytics.ex @@ -0,0 +1,78 @@ +defmodule Analytics do + @moduledoc "Tracks and reports on user events and analytics data" + + def track_event(userId, eventName, properties) do + timeStamp = DateTime.utc_now() + eventData = build_event(userId, eventName, properties, timeStamp) + store_event(eventData) + end + + def build_event(user_id, event_name, props, timestamp) do + %{ + userId: user_id, + eventName: event_name, + properties: props, + createdAt: timestamp + } + end + + def get_user_events(userId, opts \\ []) do + page_size = Keyword.get(opts, :pageSize, 20) + startDate = Keyword.get(opts, :start_date) + endDate = Keyword.get(opts, :end_date) + + fetch_events(userId, startDate, endDate, page_size) + end + + def aggregate_events(eventList) do + eventList + |> Enum.group_by(fn event -> event.eventName end) + |> Enum.map(fn {event_name, events} -> + event_count = length(events) + {event_name, event_count} + end) + |> Map.new() + end + + def compute_retention(userList, start_date, endDate) do + activeUsers = + userList + |> Enum.filter(fn u -> + last_seen = u.lastSeenAt + DateTime.compare(last_seen, start_date) == :gt and + DateTime.compare(last_seen, endDate) == :lt + end) + + totalUsers = length(userList) + activeCount = length(activeUsers) + + if totalUsers > 0 do + retentionRate = activeCount / totalUsers + {:ok, retentionRate} + else + {:error, :no_users} + end + end + + def format_report(reportData) do + event_count = reportData.totalEvents + uniqueUsers = reportData.unique_users + topEvent = reportData.topEventName + + %{ + summary: "#{event_count} events from #{uniqueUsers} users", + top_event: topEvent, + generatedAt: DateTime.utc_now() + } + end + + def filter_by_property(eventList, propertyKey, propertyVal) do + Enum.filter(eventList, fn event -> + val = Map.get(event.properties, propertyKey) + val == propertyVal + end) + end + + defp store_event(eventData), do: {:ok, eventData} + defp fetch_events(_userId, _start, _end, _pageSize), do: [] +end diff --git a/priv/combined_metrics/samples/consistency/consistent_casing_within_file/config.yml b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/config.yml new file mode 100644 index 0000000..6955881 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/config.yml @@ -0,0 +1 @@ +doc: "A file should use one naming convention throughout — no mixing of camelCase and snake_case for the same kind of identifier." diff --git a/priv/combined_metrics/samples/consistency/consistent_casing_within_file/good/analytics.ex b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/good/analytics.ex new file mode 100644 index 0000000..4e6adb5 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_casing_within_file/good/analytics.ex @@ -0,0 +1,78 @@ +defmodule Analytics do + @moduledoc "Tracks and reports on user events and analytics data" + + def track_event(user_id, event_name, properties) do + timestamp = DateTime.utc_now() + event_data = build_event(user_id, event_name, properties, timestamp) + store_event(event_data) + end + + def build_event(user_id, event_name, properties, timestamp) do + %{ + user_id: user_id, + event_name: event_name, + properties: properties, + created_at: timestamp + } + end + + def get_user_events(user_id, opts \\ []) do + page_size = Keyword.get(opts, :page_size, 20) + start_date = Keyword.get(opts, :start_date) + end_date = Keyword.get(opts, :end_date) + + fetch_events(user_id, start_date, end_date, page_size) + end + + def aggregate_events(event_list) do + event_list + |> Enum.group_by(fn event -> event.event_name end) + |> Enum.map(fn {event_name, events} -> + event_count = length(events) + {event_name, event_count} + end) + |> Map.new() + end + + def compute_retention(user_list, start_date, end_date) do + active_users = + user_list + |> Enum.filter(fn user -> + last_seen = user.last_seen_at + DateTime.compare(last_seen, start_date) == :gt and + DateTime.compare(last_seen, end_date) == :lt + end) + + total_users = length(user_list) + active_count = length(active_users) + + if total_users > 0 do + retention_rate = active_count / total_users + {:ok, retention_rate} + else + {:error, :no_users} + end + end + + def format_report(report_data) do + event_count = report_data.total_events + unique_users = report_data.unique_users + top_event = report_data.top_event_name + + %{ + summary: "#{event_count} events from #{unique_users} users", + top_event: top_event, + generated_at: DateTime.utc_now() + } + end + + def filter_by_property(event_list, property_key, property_value) do + Enum.filter(event_list, fn event -> + value = Map.get(event.properties, property_key) + value == property_value + end) + end + + defp store_event(event_data), do: {:ok, event_data} + defp fetch_events(_user_id, _start_date, _end_date, _page_size), do: [] +end diff --git a/priv/combined_metrics/samples/consistency/consistent_error_return_shape/bad/accounts.ex b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/bad/accounts.ex new file mode 100644 index 0000000..3acfccd --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/bad/accounts.ex @@ -0,0 +1,102 @@ +defmodule Accounts do + @moduledoc "Manages user accounts and authentication" + + def get_user(id) do + case fetch_user_from_db(id) do + nil -> nil + user -> user + end + end + + def create_user(attrs) do + cond do + Map.get(attrs, :email) == nil -> + "email is required" + + not valid_email?(attrs.email) -> + {:error, "invalid email format"} + + user_exists?(attrs.email) -> + false + + true -> + do_insert_user(attrs) + end + end + + def update_user(id, attrs) do + case fetch_user_from_db(id) do + nil -> + {:error, :not_found} + + user -> + case validate_attrs(attrs) do + false -> "validation failed" + true -> do_update_user(user, attrs) + end + end + end + + def delete_user(id) do + case fetch_user_from_db(id) do + nil -> false + user -> do_delete_user(user) + end + end + + def authenticate(email, password) do + case fetch_user_by_email(email) do + nil -> + {:error, :not_found} + + user -> + if check_password(user, password) do + {:ok, user} + else + "invalid password" + end + end + end + + def change_password(user, old_password, new_password) do + cond do + not check_password(user, old_password) -> + {:error, :wrong_password} + + String.length(new_password) < 8 -> + nil + + true -> + do_update_password(user, new_password) + end + end + + def list_users(filters \\ %{}) do + case fetch_all_users(filters) do + [] -> false + users -> users + end + end + + def verify_email(user, token) do + case validate_token(token) do + :invalid -> "token is invalid" + :expired -> {:error, :token_expired} + :ok -> do_verify_email(user) + end + end + + defp fetch_user_from_db(_id), do: nil + defp fetch_user_by_email(_email), do: nil + defp fetch_all_users(_filters), do: [] + defp do_insert_user(attrs), do: {:ok, attrs} + defp do_update_user(user, _attrs), do: {:ok, user} + defp do_delete_user(_user), do: {:ok, :deleted} + defp do_update_password(user, _pw), do: {:ok, user} + defp do_verify_email(user), do: {:ok, user} + defp valid_email?(_email), do: true + defp user_exists?(_email), do: false + defp validate_attrs(_attrs), do: true + defp check_password(_user, _pw), do: true + defp validate_token(_token), do: :ok +end diff --git a/priv/combined_metrics/samples/consistency/consistent_error_return_shape/config.yml b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/config.yml new file mode 100644 index 0000000..5b61d8f --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/config.yml @@ -0,0 +1 @@ +doc: "All functions in a module should return errors in the same shape — mixed `nil`, `false`, and `{:error, _}` returns are confusing." diff --git a/priv/combined_metrics/samples/consistency/consistent_error_return_shape/good/accounts.ex b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/good/accounts.ex new file mode 100644 index 0000000..045cb19 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_error_return_shape/good/accounts.ex @@ -0,0 +1,94 @@ +defmodule Accounts do + @moduledoc "Manages user accounts and authentication" + + def get_user(id) do + case fetch_user_from_db(id) do + nil -> {:error, :not_found} + user -> {:ok, user} + end + end + + def create_user(attrs) do + cond do + Map.get(attrs, :email) == nil -> + {:error, :email_required} + + not valid_email?(attrs.email) -> + {:error, :invalid_email} + + user_exists?(attrs.email) -> + {:error, :email_taken} + + true -> + do_insert_user(attrs) + end + end + + def update_user(id, attrs) do + with {:ok, user} <- get_user(id), + :ok <- validate_attrs(attrs) do + do_update_user(user, attrs) + end + end + + def delete_user(id) do + case fetch_user_from_db(id) do + nil -> {:error, :not_found} + user -> do_delete_user(user) + end + end + + def authenticate(email, password) do + case fetch_user_by_email(email) do + nil -> + {:error, :not_found} + + user -> + if check_password(user, password) do + {:ok, user} + else + {:error, :invalid_password} + end + end + end + + def change_password(user, old_password, new_password) do + cond do + not check_password(user, old_password) -> + {:error, :wrong_password} + + String.length(new_password) < 8 -> + {:error, :password_too_short} + + true -> + do_update_password(user, new_password) + end + end + + def list_users(filters \\ %{}) do + users = fetch_all_users(filters) + {:ok, users} + end + + def verify_email(user, token) do + case validate_token(token) do + :invalid -> {:error, :invalid_token} + :expired -> {:error, :token_expired} + :ok -> do_verify_email(user) + end + end + + defp fetch_user_from_db(_id), do: nil + defp fetch_user_by_email(_email), do: nil + defp fetch_all_users(_filters), do: [] + defp do_insert_user(attrs), do: {:ok, attrs} + defp do_update_user(user, _attrs), do: {:ok, user} + defp do_delete_user(_user), do: {:ok, :deleted} + defp do_update_password(user, _pw), do: {:ok, user} + defp do_verify_email(user), do: {:ok, user} + defp valid_email?(_email), do: true + defp user_exists?(_email), do: false + defp validate_attrs(_attrs), do: :ok + defp check_password(_user, _pw), do: true + defp validate_token(_token), do: :ok +end diff --git a/priv/combined_metrics/samples/consistency/consistent_function_style/bad/formatter.ex b/priv/combined_metrics/samples/consistency/consistent_function_style/bad/formatter.ex new file mode 100644 index 0000000..44ee61e --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_function_style/bad/formatter.ex @@ -0,0 +1,66 @@ +defmodule Formatter do + @moduledoc "Formats and serializes various data types for display and output" + + def format_name(first, last), do: "#{first} #{last}" + + def format_full_address(address) do + "#{address.street}, #{address.city}, #{address.state} #{address.zip}" + end + + def format_price(cents), do: "$#{:erlang.float_to_binary(cents / 100, decimals: 2)}" + + def format_date(date) do + "#{date.year}-#{pad(date.month)}-#{pad(date.day)}" + end + + def format_phone(digits), do: "(#{String.slice(digits, 0, 3)}) #{String.slice(digits, 3, 3)}-#{String.slice(digits, 6, 4)}" + + def format_percentage(value) do + rounded = Float.round(value * 100, 1) + "#{rounded}%" + end + + def format_bytes(bytes), do: if(bytes < 1024, do: "#{bytes} B", else: "#{Float.round(bytes / 1024, 1)} KB") + + def format_duration(seconds) do + minutes = div(seconds, 60) + remaining = rem(seconds, 60) + "#{minutes}m #{remaining}s" + end + + def serialize_user(user), do: %{id: user.id, name: format_name(user.first_name, user.last_name), email: user.email} + + def serialize_order(order) do + %{ + id: order.id, + total: format_price(order.total_cents), + placed_at: format_date(order.inserted_at), + items: Enum.map(order.items, &serialize_order_item/1) + } + end + + def serialize_order_item(item), do: %{name: item.name, quantity: item.quantity, unit_price: format_price(item.unit_price_cents)} + + def truncate(text, max_length) do + if String.length(text) > max_length do + String.slice(text, 0, max_length - 3) <> "..." + else + text + end + end + + def slugify(text), do: text |> String.downcase() |> String.replace(~r/[^a-z0-9]+/, "-") |> String.trim("-") + + def format_list(items) do + case length(items) do + 0 -> "none" + 1 -> hd(items) + 2 -> "#{Enum.at(items, 0)} and #{Enum.at(items, 1)}" + _ -> + all_but_last = Enum.join(Enum.drop(items, -1), ", ") + "#{all_but_last}, and #{List.last(items)}" + end + end + + defp pad(n), do: String.pad_leading(Integer.to_string(n), 2, "0") +end diff --git a/priv/combined_metrics/samples/consistency/consistent_function_style/config.yml b/priv/combined_metrics/samples/consistency/consistent_function_style/config.yml new file mode 100644 index 0000000..28e73a2 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_function_style/config.yml @@ -0,0 +1 @@ +doc: "A module should not mix one-liner and multi-clause function definitions for the same concern." diff --git a/priv/combined_metrics/samples/consistency/consistent_function_style/good/formatter.ex b/priv/combined_metrics/samples/consistency/consistent_function_style/good/formatter.ex new file mode 100644 index 0000000..1f43bed --- /dev/null +++ b/priv/combined_metrics/samples/consistency/consistent_function_style/good/formatter.ex @@ -0,0 +1,96 @@ +defmodule Formatter do + @moduledoc "Formats and serializes various data types for display and output" + + def format_name(first, last) do + "#{first} #{last}" + end + + def format_full_address(address) do + "#{address.street}, #{address.city}, #{address.state} #{address.zip}" + end + + def format_price(cents) do + "$#{:erlang.float_to_binary(cents / 100, decimals: 2)}" + end + + def format_date(date) do + "#{date.year}-#{pad(date.month)}-#{pad(date.day)}" + end + + def format_phone(digits) do + area = String.slice(digits, 0, 3) + prefix = String.slice(digits, 3, 3) + line = String.slice(digits, 6, 4) + "(#{area}) #{prefix}-#{line}" + end + + def format_percentage(value) do + rounded = Float.round(value * 100, 1) + "#{rounded}%" + end + + def format_bytes(bytes) when bytes < 1024 do + "#{bytes} B" + end + + def format_bytes(bytes) do + "#{Float.round(bytes / 1024, 1)} KB" + end + + def format_duration(seconds) do + minutes = div(seconds, 60) + remaining = rem(seconds, 60) + "#{minutes}m #{remaining}s" + end + + def serialize_user(user) do + %{ + id: user.id, + name: format_name(user.first_name, user.last_name), + email: user.email + } + end + + def serialize_order(order) do + %{ + id: order.id, + total: format_price(order.total_cents), + placed_at: format_date(order.inserted_at), + items: Enum.map(order.items, &serialize_order_item/1) + } + end + + def serialize_order_item(item) do + %{ + name: item.name, + quantity: item.quantity, + unit_price: format_price(item.unit_price_cents) + } + end + + def truncate(text, max_length) do + if String.length(text) > max_length do + String.slice(text, 0, max_length - 3) <> "..." + else + text + end + end + + def slugify(text) do + text + |> String.downcase() + |> String.replace(~r/[^a-z0-9]+/, "-") + |> String.trim("-") + end + + def format_list([]), do: "none" + def format_list([item]), do: item + def format_list([a, b]), do: "#{a} and #{b}" + + def format_list(items) do + all_but_last = items |> Enum.drop(-1) |> Enum.join(", ") + "#{all_but_last}, and #{List.last(items)}" + end + + defp pad(n), do: String.pad_leading(Integer.to_string(n), 2, "0") +end diff --git a/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/bad/subscriptions.ex b/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/bad/subscriptions.ex new file mode 100644 index 0000000..619771b --- /dev/null +++ b/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/bad/subscriptions.ex @@ -0,0 +1,58 @@ +defmodule MyApp.Subscriptions do + # Bad: no @moduledoc at all — public module with no documentation + # The module purpose, lifecycle, and conventions are undocumented. + + alias MyApp.Subscriptions.{Subscription, Plan} + alias MyApp.Repo + + # Bad: using a plain comment instead of @doc for a public function. + # Consumers cannot use `h MyApp.Subscriptions.create/2` in IEx. + # Creates a new subscription + @spec create(integer(), Plan.t()) :: {:ok, Subscription.t()} | {:error, Ecto.Changeset.t()} + def create(customer_id, %Plan{} = plan) do + initial_status = if plan.trial_days > 0, do: :trialing, else: :active + + trial_ends_at = + if plan.trial_days > 0 do + DateTime.add(DateTime.utc_now(), plan.trial_days * 86_400, :second) + end + + %Subscription{} + |> Subscription.changeset(%{ + customer_id: customer_id, + plan_id: plan.id, + status: initial_status, + trial_ends_at: trial_ends_at + }) + |> Repo.insert() + end + + @doc """ + Cancels the subscription. + + Implementation note: we first check if the changeset is valid by calling + Subscription.changeset/2, then call Repo.update/1. The Subscription schema + has a :cancelled_at field that gets set here. We also emit a telemetry event + by calling :telemetry.execute/3 with the [:my_app, :subscriptions, :transitioned] + event name. The metadata map has :from and :to keys. The Repo is aliased at the + top of this module. We use DateTime.utc_now() for the timestamp. + """ + # Bad: @doc describes the implementation in exhaustive detail — not the contract. + # The doc should explain what the function does for callers, not how it works internally. + @spec cancel(Subscription.t()) :: {:ok, Subscription.t()} | {:error, Ecto.Changeset.t()} + def cancel(%Subscription{status: :cancelled} = sub), do: {:ok, sub} + + def cancel(%Subscription{} = sub) do + sub + |> Subscription.changeset(%{status: :cancelled, cancelled_at: DateTime.utc_now()}) + |> Repo.update() + end + + # Bad: no @doc on a public function — leaves callers guessing + @spec reactivate(Subscription.t()) :: {:ok, Subscription.t()} | {:error, atom()} + def reactivate(%Subscription{} = sub) do + sub + |> Subscription.changeset(%{status: :active, cancelled_at: nil}) + |> Repo.update() + end +end diff --git a/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/good/subscriptions.ex b/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/good/subscriptions.ex new file mode 100644 index 0000000..fab6629 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/doc_vs_comment_separation/good/subscriptions.ex @@ -0,0 +1,67 @@ +defmodule MyApp.Subscriptions do + @moduledoc """ + Public API for managing customer subscriptions. + + Subscriptions move through the following lifecycle: + `:trialing` -> `:active` -> `:past_due` -> `:cancelled` + + All state transitions emit a telemetry event under + `[:my_app, :subscriptions, :transitioned]`. + """ + + alias MyApp.Subscriptions.{Subscription, Plan} + alias MyApp.Repo + + @doc """ + Creates a new subscription for the given customer on the specified plan. + + Returns `{:ok, subscription}` on success, or `{:error, changeset}` when + validation fails (e.g. the customer already has an active subscription). + """ + @spec create(integer(), Plan.t()) :: {:ok, Subscription.t()} | {:error, Ecto.Changeset.t()} + def create(customer_id, %Plan{} = plan) do + # Determine initial status: new customers start in a trial period + initial_status = if plan.trial_days > 0, do: :trialing, else: :active + + # Compute trial end date; nil when the plan has no trial + trial_ends_at = + if plan.trial_days > 0 do + DateTime.add(DateTime.utc_now(), plan.trial_days * 86_400, :second) + end + + %Subscription{} + |> Subscription.changeset(%{ + customer_id: customer_id, + plan_id: plan.id, + status: initial_status, + trial_ends_at: trial_ends_at + }) + |> Repo.insert() + end + + @doc """ + Cancels a subscription immediately. + + If the subscription is already cancelled this is a no-op and + `{:ok, subscription}` is still returned. + """ + @spec cancel(Subscription.t()) :: {:ok, Subscription.t()} | {:error, Ecto.Changeset.t()} + def cancel(%Subscription{status: :cancelled} = sub), do: {:ok, sub} + + def cancel(%Subscription{} = sub) do + sub + |> Subscription.changeset(%{status: :cancelled, cancelled_at: DateTime.utc_now()}) + |> Repo.update() + |> tap(fn + {:ok, updated} -> + # Emit telemetry so billing can react to the cancellation + :telemetry.execute([:my_app, :subscriptions, :transitioned], %{}, %{ + from: sub.status, + to: updated.status + }) + + _ -> + :ok + end) + end +end diff --git a/priv/combined_metrics/samples/consistency/overloads_are_contiguous/bad/PaymentProcessor.java b/priv/combined_metrics/samples/consistency/overloads_are_contiguous/bad/PaymentProcessor.java new file mode 100644 index 0000000..40e5226 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/overloads_are_contiguous/bad/PaymentProcessor.java @@ -0,0 +1,62 @@ +package com.example.payments; + +import java.math.BigDecimal; +import java.util.Currency; +import java.util.Locale; + +public class PaymentProcessor { + + private final PaymentGateway gateway; + private final AuditLog auditLog; + + public PaymentProcessor(PaymentGateway gateway, AuditLog auditLog) { + this.gateway = gateway; + this.auditLog = auditLog; + } + + // First process() overload + public PaymentResult process(PaymentRequest request) { + return process(request, Currency.getInstance(Locale.US)); + } + + // refund() overload interspersed between process() overloads + public RefundResult refund(String transactionId) { + return refund(transactionId, null); + } + + // Second process() overload — separated from the first by refund() + public PaymentResult process(PaymentRequest request, Currency currency) { + return process(request, currency, false); + } + + public boolean isHealthy() { + return gateway.ping(); + } + + // Third process() overload — far from the other two + public PaymentResult process(PaymentRequest request, Currency currency, boolean capture) { + ChargeRequest charge = ChargeRequest.builder() + .amount(request.getAmount()) + .currency(currency) + .capture(capture) + .token(request.getPaymentToken()) + .build(); + PaymentResult result = gateway.charge(charge); + auditLog.record(request, result); + return result; + } + + public PaymentSummary summarize(String merchantId) { + return gateway.fetchSummary(merchantId); + } + + // Second refund() overload — separated from the first by three other methods + public RefundResult refund(String transactionId, BigDecimal amount) { + RefundRequest refund = amount != null + ? RefundRequest.partial(transactionId, amount) + : RefundRequest.full(transactionId); + RefundResult result = gateway.refund(refund); + auditLog.record(refund, result); + return result; + } +} diff --git a/priv/combined_metrics/samples/consistency/overloads_are_contiguous/good/PaymentProcessor.java b/priv/combined_metrics/samples/consistency/overloads_are_contiguous/good/PaymentProcessor.java new file mode 100644 index 0000000..4c30cc7 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/overloads_are_contiguous/good/PaymentProcessor.java @@ -0,0 +1,59 @@ +package com.example.payments; + +import java.math.BigDecimal; +import java.util.Currency; +import java.util.Locale; + +public class PaymentProcessor { + + private final PaymentGateway gateway; + private final AuditLog auditLog; + + public PaymentProcessor(PaymentGateway gateway, AuditLog auditLog) { + this.gateway = gateway; + this.auditLog = auditLog; + } + + // All process() overloads are grouped together + public PaymentResult process(PaymentRequest request) { + return process(request, Currency.getInstance(Locale.US)); + } + + public PaymentResult process(PaymentRequest request, Currency currency) { + return process(request, currency, false); + } + + public PaymentResult process(PaymentRequest request, Currency currency, boolean capture) { + ChargeRequest charge = ChargeRequest.builder() + .amount(request.getAmount()) + .currency(currency) + .capture(capture) + .token(request.getPaymentToken()) + .build(); + PaymentResult result = gateway.charge(charge); + auditLog.record(request, result); + return result; + } + + // All refund() overloads are grouped together + public RefundResult refund(String transactionId) { + return refund(transactionId, null); + } + + public RefundResult refund(String transactionId, BigDecimal amount) { + RefundRequest refund = amount != null + ? RefundRequest.partial(transactionId, amount) + : RefundRequest.full(transactionId); + RefundResult result = gateway.refund(refund); + auditLog.record(refund, result); + return result; + } + + public boolean isHealthy() { + return gateway.ping(); + } + + public PaymentSummary summarize(String merchantId) { + return gateway.fetchSummary(merchantId); + } +} diff --git a/priv/combined_metrics/samples/consistency/override_annotation_present/bad/UserRepository.java b/priv/combined_metrics/samples/consistency/override_annotation_present/bad/UserRepository.java new file mode 100644 index 0000000..71ef3cd --- /dev/null +++ b/priv/combined_metrics/samples/consistency/override_annotation_present/bad/UserRepository.java @@ -0,0 +1,88 @@ +package com.example.users; + +import java.util.List; +import java.util.Objects; + +public class UserRepository extends AbstractRepository implements Auditable { + + private final DataSource dataSource; + + public UserRepository(DataSource dataSource) { + this.dataSource = dataSource; + } + + // Missing @Override — not obvious this is implementing the abstract method + public User findById(long id) { + return dataSource.query( + "SELECT * FROM users WHERE id = ?", + ps -> ps.setLong(1, id), + UserRepository::mapRow + ); + } + + // Missing @Override + public List findAll() { + return dataSource.queryList( + "SELECT * FROM users ORDER BY created_at DESC", + UserRepository::mapRow + ); + } + + // Missing @Override + public void save(User user) { + if (user.getId() == null) { + insert(user); + } else { + update(user); + } + } + + // Missing @Override + public void delete(long id) { + dataSource.execute("DELETE FROM users WHERE id = ?", ps -> ps.setLong(1, id)); + } + + // Missing @Override — implementing interface method without annotation + public String auditLabel() { + return "users"; + } + + // Missing @Override on Object methods + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof UserRepository)) return false; + UserRepository that = (UserRepository) o; + return Objects.equals(dataSource, that.dataSource); + } + + // Missing @Override + public int hashCode() { + return Objects.hash(dataSource); + } + + // Missing @Override + public String toString() { + return "UserRepository{dataSource=" + dataSource + "}"; + } + + private void insert(User user) { + dataSource.execute( + "INSERT INTO users (email, name, created_at) VALUES (?, ?, NOW())", + ps -> { + ps.setString(1, user.getEmail()); + ps.setString(2, user.getName()); + } + ); + } + + private void update(User user) { + dataSource.execute( + "UPDATE users SET email = ?, name = ? WHERE id = ?", + ps -> { + ps.setString(1, user.getEmail()); + ps.setString(2, user.getName()); + ps.setLong(3, user.getId()); + } + ); + } +} diff --git a/priv/combined_metrics/samples/consistency/override_annotation_present/good/UserRepository.java b/priv/combined_metrics/samples/consistency/override_annotation_present/good/UserRepository.java new file mode 100644 index 0000000..0e1124e --- /dev/null +++ b/priv/combined_metrics/samples/consistency/override_annotation_present/good/UserRepository.java @@ -0,0 +1,92 @@ +package com.example.users; + +import java.util.List; +import java.util.Objects; + +public class UserRepository extends AbstractRepository implements Auditable { + + private final DataSource dataSource; + + public UserRepository(DataSource dataSource) { + this.dataSource = dataSource; + } + + @Override + public User findById(long id) { + return dataSource.query( + "SELECT * FROM users WHERE id = ?", + ps -> ps.setLong(1, id), + UserRepository::mapRow + ); + } + + @Override + public List findAll() { + return dataSource.queryList( + "SELECT * FROM users ORDER BY created_at DESC", + UserRepository::mapRow + ); + } + + @Override + public void save(User user) { + if (user.getId() == null) { + insert(user); + } else { + update(user); + } + } + + @Override + public void delete(long id) { + dataSource.execute("DELETE FROM users WHERE id = ?", ps -> ps.setLong(1, id)); + } + + @Override + public String auditLabel() { + return "users"; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof UserRepository)) return false; + UserRepository that = (UserRepository) o; + return Objects.equals(dataSource, that.dataSource); + } + + @Override + public int hashCode() { + return Objects.hash(dataSource); + } + + @Override + public String toString() { + return "UserRepository{dataSource=" + dataSource + "}"; + } + + private void insert(User user) { + dataSource.execute( + "INSERT INTO users (email, name, created_at) VALUES (?, ?, NOW())", + ps -> { + ps.setString(1, user.getEmail()); + ps.setString(2, user.getName()); + } + ); + } + + private void update(User user) { + dataSource.execute( + "UPDATE users SET email = ?, name = ? WHERE id = ?", + ps -> { + ps.setString(1, user.getEmail()); + ps.setString(2, user.getName()); + ps.setLong(3, user.getId()); + } + ); + } + + private static User mapRow(ResultSet rs) throws SQLException { + return new User(rs.getLong("id"), rs.getString("email"), rs.getString("name")); + } +} diff --git a/priv/combined_metrics/samples/consistency/process_interactions_centralized/bad/sessions.ex b/priv/combined_metrics/samples/consistency/process_interactions_centralized/bad/sessions.ex new file mode 100644 index 0000000..9c67252 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/process_interactions_centralized/bad/sessions.ex @@ -0,0 +1,60 @@ +defmodule MyApp.SessionStore do + @moduledoc "Holds session state." + + use GenServer + + def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__) + + @impl true + def init(_opts), do: {:ok, %{sessions: %{}}} + + @impl true + def handle_call({:put, token, session}, _from, state) do + {:reply, :ok, put_in(state, [:sessions, token], session)} + end + + def handle_call({:get, token}, _from, state) do + {:reply, Map.get(state.sessions, token), state} + end + + def handle_call(:count, _from, state) do + {:reply, map_size(state.sessions), state} + end + + @impl true + def handle_cast({:delete, token}, state) do + {:noreply, update_in(state, [:sessions], &Map.delete(&1, token))} + end +end + +# Bad: MyApp.AuthController calls GenServer directly instead of going through a facade +defmodule MyApp.AuthController do + def login(conn, %{"token" => token}) do + # Bad: direct GenServer call scattered in a controller + session = GenServer.call(MyApp.SessionStore, {:get, token}) + # ... + end +end + +# Bad: MyApp.Plugs.LoadSession also calls GenServer directly — duplication +defmodule MyApp.Plugs.LoadSession do + def call(conn, _opts) do + token = get_session_token(conn) + # Bad: same GenServer call repeated here — no single facade + session = GenServer.call(MyApp.SessionStore, {:get, token}) + assign(conn, :current_session, session) + end + + defp get_session_token(conn), do: Plug.Conn.get_req_header(conn, "x-session-token") |> List.first() + defp assign(conn, key, value), do: Map.put(conn, key, value) +end + +# Bad: yet another module talking directly to the GenServer +defmodule MyApp.SessionCleanup do + def delete_expired(tokens) do + Enum.each(tokens, fn token -> + # Bad: direct cast, not going through any facade + GenServer.cast(MyApp.SessionStore, {:delete, token}) + end) + end +end diff --git a/priv/combined_metrics/samples/consistency/process_interactions_centralized/good/sessions.ex b/priv/combined_metrics/samples/consistency/process_interactions_centralized/good/sessions.ex new file mode 100644 index 0000000..9690d41 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/process_interactions_centralized/good/sessions.ex @@ -0,0 +1,76 @@ +defmodule MyApp.SessionStore do + @moduledoc """ + Facade for all interactions with the session cache GenServer. + All GenServer calls are centralised here — no other module calls + `GenServer.call/cast` on the session process directly. + """ + + use GenServer + + alias MyApp.Sessions.Session + + # --- Public API (the facade) --- + + def start_link(opts), do: GenServer.start_link(__MODULE__, opts, name: __MODULE__) + + @doc "Stores a session, returning the token." + @spec put(Session.t()) :: String.t() + def put(%Session{} = session) do + token = generate_token() + GenServer.call(__MODULE__, {:put, token, session}) + token + end + + @doc "Retrieves a session by token." + @spec get(String.t()) :: Session.t() | nil + def get(token) when is_binary(token) do + GenServer.call(__MODULE__, {:get, token}) + end + + @doc "Deletes a session by token." + @spec delete(String.t()) :: :ok + def delete(token) when is_binary(token) do + GenServer.cast(__MODULE__, {:delete, token}) + end + + @doc "Extends a session's TTL by the given number of seconds." + @spec touch(String.t(), pos_integer()) :: :ok + def touch(token, ttl_seconds) when is_binary(token) and is_integer(ttl_seconds) do + GenServer.cast(__MODULE__, {:touch, token, ttl_seconds}) + end + + @doc "Returns the number of active sessions." + @spec count() :: non_neg_integer() + def count do + GenServer.call(__MODULE__, :count) + end + + # --- GenServer callbacks --- + + @impl true + def init(_opts), do: {:ok, %{sessions: %{}, expiry: %{}}} + + @impl true + def handle_call({:put, token, session}, _from, state) do + {:reply, :ok, put_in(state, [:sessions, token], session)} + end + + def handle_call({:get, token}, _from, state) do + {:reply, Map.get(state.sessions, token), state} + end + + def handle_call(:count, _from, state) do + {:reply, map_size(state.sessions), state} + end + + @impl true + def handle_cast({:delete, token}, state) do + {:noreply, update_in(state, [:sessions], &Map.delete(&1, token))} + end + + def handle_cast({:touch, _token, _ttl}, state) do + {:noreply, state} + end + + defp generate_token, do: Base.url_encode64(:crypto.strong_rand_bytes(32), padding: false) +end diff --git a/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/bad/PaymentProcessor.swift b/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/bad/PaymentProcessor.swift new file mode 100644 index 0000000..7161470 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/bad/PaymentProcessor.swift @@ -0,0 +1,53 @@ +import Foundation + +enum PaymentMethod { + case creditCard(last4: String) + case bankTransfer(accountNumber: String) + case digitalWallet(provider: String) +} + +enum PaymentStatus { + case pending, authorized, captured, refunded, failed +} + +// All protocol conformances declared inline with the primary type, +// making it harder to locate core logic vs. protocol implementations +struct PaymentTransaction: Equatable, Hashable, CustomStringConvertible, Codable { + let id: String + let amount: Decimal + let currency: String + let method: PaymentMethod + var status: PaymentStatus + let createdAt: Date + var completedAt: Date? + var failureReason: String? + + // Equatable mixed in with stored properties + static func == (lhs: PaymentTransaction, rhs: PaymentTransaction) -> Bool { + return lhs.id == rhs.id + } + + // Hashable mixed in with stored properties + func hash(into hasher: inout Hasher) { + hasher.combine(id) + } + + // CustomStringConvertible mixed in with stored properties + var description: String { + return "PaymentTransaction(id: \(id), amount: \(amount) \(currency), status: \(status))" + } + + // Codable CodingKeys mixed in with stored properties + enum CodingKeys: String, CodingKey { + case id, amount, currency, status, createdAt, completedAt, failureReason + } + + // Business logic buried alongside protocol boilerplate + func isRefundable() -> Bool { + return status == .captured && completedAt != nil + } + + func summary() -> String { + return "\(currency) \(amount) via \(method)" + } +} diff --git a/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/good/PaymentProcessor.swift b/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/good/PaymentProcessor.swift new file mode 100644 index 0000000..f7cfcad --- /dev/null +++ b/priv/combined_metrics/samples/consistency/protocol_conformance_in_separate_extension/good/PaymentProcessor.swift @@ -0,0 +1,51 @@ +import Foundation + +enum PaymentMethod { + case creditCard(last4: String) + case bankTransfer(accountNumber: String) + case digitalWallet(provider: String) +} + +enum PaymentStatus { + case pending, authorized, captured, refunded, failed +} + +// Primary type definition — only core stored properties and init +struct PaymentTransaction { + let id: String + let amount: Decimal + let currency: String + let method: PaymentMethod + var status: PaymentStatus + let createdAt: Date + var completedAt: Date? + var failureReason: String? +} + +// MARK: - Equatable conformance in its own extension +extension PaymentTransaction: Equatable { + static func == (lhs: PaymentTransaction, rhs: PaymentTransaction) -> Bool { + return lhs.id == rhs.id + } +} + +// MARK: - Hashable conformance in its own extension +extension PaymentTransaction: Hashable { + func hash(into hasher: inout Hasher) { + hasher.combine(id) + } +} + +// MARK: - CustomStringConvertible in its own extension +extension PaymentTransaction: CustomStringConvertible { + var description: String { + return "PaymentTransaction(id: \(id), amount: \(amount) \(currency), status: \(status))" + } +} + +// MARK: - Codable in its own extension +extension PaymentTransaction: Codable { + enum CodingKeys: String, CodingKey { + case id, amount, currency, status, createdAt, completedAt, failureReason + } +} diff --git a/priv/combined_metrics/samples/consistency/same_concept_same_name/bad/auth.ex b/priv/combined_metrics/samples/consistency/same_concept_same_name/bad/auth.ex new file mode 100644 index 0000000..3dbbbc3 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/same_concept_same_name/bad/auth.ex @@ -0,0 +1,102 @@ +defmodule Auth do + @moduledoc "Handles authentication and session management" + + def login(account, password) do + case fetch_user(account.email) do + nil -> + {:error, :not_found} + + u -> + if verify_password(u, password) do + token = generate_token(u.id) + {:ok, token} + else + {:error, :invalid_credentials} + end + end + end + + def logout(usr) do + case fetch_active_session(usr.id) do + nil -> {:error, :no_session} + session -> invalidate_session(session) + end + end + + def register(params) do + with :ok <- validate_registration(params), + {:ok, new_account} <- create_user(params), + {:ok, _} <- send_verification_email(new_account) do + {:ok, new_account} + end + end + + def verify_token(token_string) do + case decode_token(token_string) do + {:ok, claims} -> + account = fetch_user_by_id(claims["sub"]) + {:ok, account} + + {:error, reason} -> + {:error, reason} + end + end + + def change_password(u, old_pw, new_pw) do + if verify_password(u, old_pw) do + hashed = hash_password(new_pw) + update_user_password(u.id, hashed) + else + {:error, :wrong_password} + end + end + + def request_password_reset(email_address) do + case fetch_user(email_address) do + nil -> + {:error, :not_found} + + account -> + reset_token = generate_reset_token(account.id) + send_reset_email(account.email, reset_token) + end + end + + def reset_password(reset_token, new_pw) do + with {:ok, usr_id} <- validate_reset_token(reset_token), + account <- fetch_user_by_id(usr_id), + hashed = hash_password(new_pw), + {:ok, updated} <- update_user_password(account.id, hashed) do + {:ok, updated} + end + end + + def list_sessions(account) do + fetch_sessions_for_user(account.id) + end + + def revoke_session(u, session_id) do + case fetch_session(session_id) do + %{user_id: ^u.id} = session -> invalidate_session(session) + _ -> {:error, :unauthorized} + end + end + + defp fetch_user(_email), do: nil + defp fetch_user_by_id(_id), do: nil + defp fetch_active_session(_user_id), do: nil + defp fetch_session(_id), do: nil + defp fetch_sessions_for_user(_user_id), do: [] + defp verify_password(_user, _pw), do: true + defp generate_token(_user_id), do: "tok_abc" + defp generate_reset_token(_user_id), do: "rst_abc" + defp invalidate_session(_session), do: {:ok, :logged_out} + defp create_user(attrs), do: {:ok, attrs} + defp send_verification_email(_user), do: {:ok, :sent} + defp send_reset_email(_email, _token), do: {:ok, :sent} + defp decode_token(_token), do: {:ok, %{"sub" => "1"}} + defp validate_reset_token(_token), do: {:ok, "1"} + defp validate_registration(_params), do: :ok + defp hash_password(pw), do: pw + defp update_user_password(_id, _hash), do: {:ok, %{}} +end diff --git a/priv/combined_metrics/samples/consistency/same_concept_same_name/config.yml b/priv/combined_metrics/samples/consistency/same_concept_same_name/config.yml new file mode 100644 index 0000000..3543d7a --- /dev/null +++ b/priv/combined_metrics/samples/consistency/same_concept_same_name/config.yml @@ -0,0 +1 @@ +doc: "The same domain concept should use the same name throughout a file — mixing `user`, `usr`, and `u` for the same thing harms readability." diff --git a/priv/combined_metrics/samples/consistency/same_concept_same_name/good/auth.ex b/priv/combined_metrics/samples/consistency/same_concept_same_name/good/auth.ex new file mode 100644 index 0000000..ea578d8 --- /dev/null +++ b/priv/combined_metrics/samples/consistency/same_concept_same_name/good/auth.ex @@ -0,0 +1,102 @@ +defmodule Auth do + @moduledoc "Handles authentication and session management" + + def login(user, password) do + case fetch_user(user.email) do + nil -> + {:error, :not_found} + + user -> + if verify_password(user, password) do + token = generate_token(user.id) + {:ok, token} + else + {:error, :invalid_credentials} + end + end + end + + def logout(user) do + case fetch_active_session(user.id) do + nil -> {:error, :no_session} + session -> invalidate_session(session) + end + end + + def register(params) do + with :ok <- validate_registration(params), + {:ok, user} <- create_user(params), + {:ok, _} <- send_verification_email(user) do + {:ok, user} + end + end + + def verify_token(token_string) do + case decode_token(token_string) do + {:ok, claims} -> + user = fetch_user_by_id(claims["sub"]) + {:ok, user} + + {:error, reason} -> + {:error, reason} + end + end + + def change_password(user, old_password, new_password) do + if verify_password(user, old_password) do + hashed = hash_password(new_password) + update_user_password(user.id, hashed) + else + {:error, :wrong_password} + end + end + + def request_password_reset(email) do + case fetch_user(email) do + nil -> + {:error, :not_found} + + user -> + reset_token = generate_reset_token(user.id) + send_reset_email(user.email, reset_token) + end + end + + def reset_password(reset_token, new_password) do + with {:ok, user_id} <- validate_reset_token(reset_token), + user <- fetch_user_by_id(user_id), + hashed = hash_password(new_password), + {:ok, updated_user} <- update_user_password(user.id, hashed) do + {:ok, updated_user} + end + end + + def list_sessions(user) do + fetch_sessions_for_user(user.id) + end + + def revoke_session(user, session_id) do + case fetch_session(session_id) do + %{user_id: ^user.id} = session -> invalidate_session(session) + _ -> {:error, :unauthorized} + end + end + + defp fetch_user(_email), do: nil + defp fetch_user_by_id(_id), do: nil + defp fetch_active_session(_user_id), do: nil + defp fetch_session(_id), do: nil + defp fetch_sessions_for_user(_user_id), do: [] + defp verify_password(_user, _password), do: true + defp generate_token(_user_id), do: "tok_abc" + defp generate_reset_token(_user_id), do: "rst_abc" + defp invalidate_session(_session), do: {:ok, :logged_out} + defp create_user(attrs), do: {:ok, attrs} + defp send_verification_email(_user), do: {:ok, :sent} + defp send_reset_email(_email, _token), do: {:ok, :sent} + defp decode_token(_token), do: {:ok, %{"sub" => "1"}} + defp validate_reset_token(_token), do: {:ok, "1"} + defp validate_registration(_params), do: :ok + defp hash_password(password), do: password + defp update_user_password(_id, _hash), do: {:ok, %{}} +end diff --git a/priv/combined_metrics/samples/consistency/static_member_via_class_name/bad/ProductCatalog.java b/priv/combined_metrics/samples/consistency/static_member_via_class_name/bad/ProductCatalog.java new file mode 100644 index 0000000..3e63a8b --- /dev/null +++ b/priv/combined_metrics/samples/consistency/static_member_via_class_name/bad/ProductCatalog.java @@ -0,0 +1,63 @@ +package com.example.catalog; + +import java.math.BigDecimal; +import java.util.List; + +public class ProductCatalog { + + public static final int MAX_SEARCH_RESULTS = 100; + public static final BigDecimal DEFAULT_TAX_RATE = new BigDecimal("0.20"); + + private static int instanceCount = 0; + + private final String region; + private final ProductRepository repository; + + public ProductCatalog(String region, ProductRepository repository) { + this.region = region; + this.repository = repository; + // Accessing static field via `this` — misleading, looks like instance state + this.instanceCount++; + } + + public List search(String query) { + // Accessing static constant via instance reference — hides the static nature + return repository.search(query, this.MAX_SEARCH_RESULTS); + } + + public BigDecimal priceWithTax(Product product) { + // Accessing static constant via instance reference + return product.getBasePrice().multiply( + BigDecimal.ONE.add(this.DEFAULT_TAX_RATE) + ); + } + + public static ProductCatalog forRegion(String region, ProductRepository repo) { + return new ProductCatalog(region, repo); + } + + public static int getInstanceCount() { + // Fine in static context, but inconsistent with rest of file + return instanceCount; + } + + public void resetInstanceTracking() { + // Accessing static field via `this` in instance method + this.instanceCount = 0; + } + + public List findOnSale() { + ProductCatalog catalog = this; + // Accessing static member via local instance variable — very confusing + return repository.findAll().stream() + .filter(Product::isOnSale) + .limit(catalog.MAX_SEARCH_RESULTS) + .toList(); + } + + public void logStats() { + ProductCatalog temp = new ProductCatalog(region, repository); + // Accessing static field through a different instance + System.out.println("Count: " + temp.instanceCount); + } +} diff --git a/priv/combined_metrics/samples/consistency/static_member_via_class_name/good/ProductCatalog.java b/priv/combined_metrics/samples/consistency/static_member_via_class_name/good/ProductCatalog.java new file mode 100644 index 0000000..363e97a --- /dev/null +++ b/priv/combined_metrics/samples/consistency/static_member_via_class_name/good/ProductCatalog.java @@ -0,0 +1,62 @@ +package com.example.catalog; + +import java.math.BigDecimal; +import java.util.List; + +public class ProductCatalog { + + public static final int MAX_SEARCH_RESULTS = 100; + public static final BigDecimal DEFAULT_TAX_RATE = new BigDecimal("0.20"); + + private static int instanceCount = 0; + + private final String region; + private final ProductRepository repository; + + public ProductCatalog(String region, ProductRepository repository) { + this.region = region; + this.repository = repository; + ProductCatalog.instanceCount++; + } + + public List search(String query) { + // Accessing static constant via class name + return repository.search(query, ProductCatalog.MAX_SEARCH_RESULTS); + } + + public BigDecimal priceWithTax(Product product) { + // Accessing static constant via class name + return product.getBasePrice().multiply( + BigDecimal.ONE.add(ProductCatalog.DEFAULT_TAX_RATE) + ); + } + + public static ProductCatalog forRegion(String region, ProductRepository repo) { + return new ProductCatalog(region, repo); + } + + public static int getInstanceCount() { + // Accessing static field via class name inside static method + return ProductCatalog.instanceCount; + } + + public void resetInstanceTracking() { + // Static field accessed via class name even from instance method + ProductCatalog.instanceCount = 0; + } + + public Product findBySkuOrThrow(String sku) { + Product product = repository.findBySku(sku); + if (product == null) { + throw new ProductNotFoundException(sku); + } + return product; + } + + public List findOnSale() { + return repository.findAll().stream() + .filter(Product::isOnSale) + .limit(ProductCatalog.MAX_SEARCH_RESULTS) + .toList(); + } +} diff --git a/priv/combined_metrics/samples/consistency/switch_fallthrough_has_comment/bad/OrderService.php b/priv/combined_metrics/samples/consistency/switch_fallthrough_has_comment/bad/OrderService.php new file mode 100644 index 0000000..90f311c --- /dev/null +++ b/priv/combined_metrics/samples/consistency/switch_fallthrough_has_comment/bad/OrderService.php @@ -0,0 +1,71 @@ += ^from and e.date <= ^to + ) + + %Report{user_id: user_id, events: events} + end +end diff --git a/priv/combined_metrics/samples/dependencies/import_count_under_10/config.yml b/priv/combined_metrics/samples/dependencies/import_count_under_10/config.yml new file mode 100644 index 0000000..3fad5bf --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/import_count_under_10/config.yml @@ -0,0 +1 @@ +doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling." diff --git a/priv/combined_metrics/samples/dependencies/import_count_under_10/good/dashboard.ex b/priv/combined_metrics/samples/dependencies/import_count_under_10/good/dashboard.ex new file mode 100644 index 0000000..8313437 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/import_count_under_10/good/dashboard.ex @@ -0,0 +1,80 @@ +defmodule MyApp.Dashboard do + @moduledoc """ + Dashboard aggregation module that compiles user-facing metrics + and summaries from all subdomains. + """ + + alias MyApp.Accounts + alias MyApp.Orders + alias MyApp.Billing + alias MyApp.Shipping + alias MyApp.Notifications + + @spec summary(Accounts.User.t()) :: map() + def summary(user) do + %{ + orders: Orders.recent_for_user(user, limit: 5), + invoices: Billing.open_invoices_for_user(user), + shipments: Shipping.active_shipments_for_user(user), + notifications: Notifications.unread_for_user(user) + } + end + + @spec order_count(Accounts.User.t()) :: non_neg_integer() + def order_count(user) do + user + |> Orders.for_user() + |> length() + end + + @spec billing_status(Accounts.User.t()) :: :current | :overdue | :no_invoices + def billing_status(user) do + user + |> Billing.open_invoices_for_user() + |> determine_billing_status() + end + + @spec shipment_summary(Accounts.User.t()) :: map() + def shipment_summary(user) do + shipments = Shipping.active_shipments_for_user(user) + + %{ + in_transit: Enum.count(shipments, &(&1.status == :in_transit)), + delivered: Enum.count(shipments, &(&1.status == :delivered)), + total: length(shipments) + } + end + + @spec notification_badge(Accounts.User.t()) :: non_neg_integer() + def notification_badge(user) do + user + |> Notifications.unread_for_user() + |> length() + end + + @spec activity_feed(Accounts.User.t(), keyword()) :: [map()] + def activity_feed(user, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + + [ + Orders.recent_for_user(user, limit: limit), + Shipping.recent_events_for_user(user, limit: limit), + Notifications.unread_for_user(user) + ] + |> List.flatten() + |> Enum.sort_by(& &1.inserted_at, {:desc, DateTime}) + |> Enum.take(limit) + end + + # Private + + defp determine_billing_status([]), do: :no_invoices + + defp determine_billing_status(invoices) do + if Enum.any?(invoices, &past_due?/1), do: :overdue, else: :current + end + + defp past_due?(%{due_date: due_date}) do + Date.compare(due_date, Date.utc_today()) == :lt + end +end diff --git a/priv/combined_metrics/samples/dependencies/low_coupling/bad/order_controller.ex b/priv/combined_metrics/samples/dependencies/low_coupling/bad/order_controller.ex new file mode 100644 index 0000000..e52e77e --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/low_coupling/bad/order_controller.ex @@ -0,0 +1,91 @@ +defmodule MyAppWeb.OrderController do + use MyAppWeb, :controller + + alias MyApp.Repo + alias MyApp.Orders.Order + alias MyApp.Orders.OrderItem + alias MyApp.Accounts.User + alias MyApp.Billing.Invoice + alias MyApp.Notifications.Mailer + alias MyApp.Shipping.ShipmentService + + @moduledoc """ + Controller for order lifecycle management. + """ + + def index(conn, _params) do + user_id = conn.assigns.current_user.id + + orders = + Repo.all( + from o in Order, + where: o.user_id == ^user_id, + preload: [:items, :invoice] + ) + + render(conn, :index, orders: orders) + end + + def show(conn, %{"id" => id}) do + order = Repo.get!(Order, id) |> Repo.preload([:items, :invoice, :user]) + render(conn, :show, order: order) + end + + def create(conn, %{"order" => params}) do + user = Repo.get!(User, conn.assigns.current_user.id) + + changeset = Order.changeset(%Order{}, Map.put(params, "user_id", user.id)) + + case Repo.insert(changeset) do + {:ok, order} -> + items = Map.get(params, "items", []) + + Enum.each(items, fn item_params -> + %OrderItem{} + |> OrderItem.changeset(Map.put(item_params, "order_id", order.id)) + |> Repo.insert!() + end) + + invoice = Repo.insert!(%Invoice{order_id: order.id, user_id: user.id, status: :open}) + + total = Enum.reduce(items, 0, fn i, acc -> acc + i["price"] * i["quantity"] end) + Repo.update!(Invoice.changeset(invoice, %{total: total})) + + ShipmentService.create_shipment_for_order(order) + + Mailer.send_order_confirmation(user.email, order) + + conn + |> put_status(:created) + |> render(:show, order: order) + + {:error, changeset} -> + conn + |> put_status(:unprocessable_entity) + |> render(:error, changeset: changeset) + end + end + + def cancel(conn, %{"id" => id}) do + order = Repo.get!(Order, id) + + case Repo.update(Order.changeset(order, %{status: :cancelled})) do + {:ok, order} -> + invoice = Repo.get_by!(Invoice, order_id: order.id) + Repo.update!(Invoice.changeset(invoice, %{status: :voided})) + + shipment = ShipmentService.find_shipment(order.id) + if shipment, do: ShipmentService.cancel_shipment(shipment) + + user = Repo.get!(User, order.user_id) + Mailer.send_cancellation_notice(user.email, order) + + render(conn, :show, order: order) + + {:error, changeset} -> + conn + |> put_status(:unprocessable_entity) + |> render(:error, changeset: changeset) + end + end +end diff --git a/priv/combined_metrics/samples/dependencies/low_coupling/config.yml b/priv/combined_metrics/samples/dependencies/low_coupling/config.yml new file mode 100644 index 0000000..a84bf39 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/low_coupling/config.yml @@ -0,0 +1 @@ +doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling." diff --git a/priv/combined_metrics/samples/dependencies/low_coupling/good/order_controller.ex b/priv/combined_metrics/samples/dependencies/low_coupling/good/order_controller.ex new file mode 100644 index 0000000..0bbb5c1 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/low_coupling/good/order_controller.ex @@ -0,0 +1,57 @@ +defmodule MyAppWeb.OrderController do + use MyAppWeb, :controller + + alias MyApp.Orders + + @moduledoc """ + Controller for order lifecycle management. + Delegates all business logic to the Orders context. + """ + + action_fallback MyAppWeb.FallbackController + + def index(conn, _params) do + orders = Orders.list_orders_for_user(conn.assigns.current_user) + render(conn, :index, orders: orders) + end + + def show(conn, %{"id" => id}) do + with {:ok, order} <- Orders.get_order(id, conn.assigns.current_user) do + render(conn, :show, order: order) + end + end + + def create(conn, %{"order" => params}) do + with {:ok, order} <- Orders.place_order(conn.assigns.current_user, params) do + conn + |> put_status(:created) + |> render(:show, order: order) + end + end + + def cancel(conn, %{"id" => id}) do + with {:ok, order} <- Orders.get_order(id, conn.assigns.current_user), + {:ok, cancelled} <- Orders.cancel_order(order) do + render(conn, :show, order: cancelled) + end + end + + def update(conn, %{"id" => id, "order" => params}) do + with {:ok, order} <- Orders.get_order(id, conn.assigns.current_user), + {:ok, updated} <- Orders.update_order(order, params) do + render(conn, :show, order: updated) + end + end + + def history(conn, params) do + page = Map.get(params, "page", 1) + per_page = Map.get(params, "per_page", 20) + + orders = Orders.order_history_for_user(conn.assigns.current_user, + page: page, + per_page: per_page + ) + + render(conn, :index, orders: orders) + end +end diff --git a/priv/combined_metrics/samples/dependencies/no_default_exports/bad/payment_gateway.ts b/priv/combined_metrics/samples/dependencies/no_default_exports/bad/payment_gateway.ts new file mode 100644 index 0000000..d50dc6c --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_default_exports/bad/payment_gateway.ts @@ -0,0 +1,68 @@ +interface ChargeRequest { + amount: number; + currency: string; + paymentMethodId: string; + customerId: string; +} + +interface ChargeResult { + id: string; + status: "succeeded" | "pending" | "failed"; + amount: number; + currency: string; + createdAt: string; +} + +interface RefundRequest { + chargeId: string; + amount?: number; + reason?: string; +} + +interface RefundResult { + id: string; + chargeId: string; + amount: number; + status: "pending" | "succeeded"; +} + +async function createCharge(request: ChargeRequest): Promise { + const response = await fetch("/api/charges", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + if (!response.ok) throw new Error(`Charge failed: ${response.status}`); + return response.json() as Promise; +} + +async function fetchCharge(chargeId: string): Promise { + const response = await fetch(`/api/charges/${chargeId}`); + if (!response.ok) throw new Error(`Charge not found: ${chargeId}`); + return response.json() as Promise; +} + +async function refundCharge(request: RefundRequest): Promise { + const response = await fetch("/api/refunds", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + if (!response.ok) throw new Error(`Refund failed: ${response.status}`); + return response.json() as Promise; +} + +function formatChargeAmount(charge: ChargeResult): string { + return new Intl.NumberFormat("en-US", { + style: "currency", + currency: charge.currency, + }).format(charge.amount / 100); +} + +// Default export makes it hard to rename consistently across the codebase +export default { + createCharge, + fetchCharge, + refundCharge, + formatChargeAmount, +}; diff --git a/priv/combined_metrics/samples/dependencies/no_default_exports/good/payment_gateway.ts b/priv/combined_metrics/samples/dependencies/no_default_exports/good/payment_gateway.ts new file mode 100644 index 0000000..5cc1ba3 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_default_exports/good/payment_gateway.ts @@ -0,0 +1,63 @@ +interface ChargeRequest { + amount: number; + currency: string; + paymentMethodId: string; + customerId: string; +} + +interface ChargeResult { + id: string; + status: "succeeded" | "pending" | "failed"; + amount: number; + currency: string; + createdAt: string; +} + +interface RefundRequest { + chargeId: string; + amount?: number; + reason?: string; +} + +interface RefundResult { + id: string; + chargeId: string; + amount: number; + status: "pending" | "succeeded"; +} + +async function createCharge(request: ChargeRequest): Promise { + const response = await fetch("/api/charges", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + if (!response.ok) throw new Error(`Charge failed: ${response.status}`); + return response.json() as Promise; +} + +async function fetchCharge(chargeId: string): Promise { + const response = await fetch(`/api/charges/${chargeId}`); + if (!response.ok) throw new Error(`Charge not found: ${chargeId}`); + return response.json() as Promise; +} + +async function refundCharge(request: RefundRequest): Promise { + const response = await fetch("/api/refunds", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + if (!response.ok) throw new Error(`Refund failed: ${response.status}`); + return response.json() as Promise; +} + +function formatChargeAmount(charge: ChargeResult): string { + return new Intl.NumberFormat("en-US", { + style: "currency", + currency: charge.currency, + }).format(charge.amount / 100); +} + +export { createCharge, fetchCharge, refundCharge, formatChargeAmount }; +export type { ChargeRequest, ChargeResult, RefundRequest, RefundResult }; diff --git a/priv/combined_metrics/samples/dependencies/no_mutable_exports/bad/product_repository.ts b/priv/combined_metrics/samples/dependencies/no_mutable_exports/bad/product_repository.ts new file mode 100644 index 0000000..d1daac2 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_mutable_exports/bad/product_repository.ts @@ -0,0 +1,46 @@ +interface Product { + id: string; + name: string; + price: number; + category: string; +} + +// Exported mutable state — consumers can accidentally mutate these +export let productCache: Map = new Map(); +export let cacheLastFetchedAt: number | null = null; +export let isLoading = false; + +const CACHE_TTL_MS = 5 * 60 * 1000; + +function isCacheStale(): boolean { + return cacheLastFetchedAt === null || Date.now() - cacheLastFetchedAt > CACHE_TTL_MS; +} + +export async function loadProducts(): Promise { + isLoading = true; + try { + const response = await fetch("/api/products"); + if (!response.ok) throw new Error(`Failed to load products: ${response.status}`); + const products: Product[] = await response.json(); + + productCache = new Map(products.map((p) => [p.id, p])); + cacheLastFetchedAt = Date.now(); + } finally { + isLoading = false; + } +} + +export async function getProducts(): Promise { + if (isCacheStale()) await loadProducts(); + return Array.from(productCache.values()); +} + +export async function getProduct(id: string): Promise { + if (isCacheStale()) await loadProducts(); + return productCache.get(id) ?? null; +} + +export function invalidateCache(): void { + productCache = new Map(); + cacheLastFetchedAt = null; +} diff --git a/priv/combined_metrics/samples/dependencies/no_mutable_exports/good/product_repository.ts b/priv/combined_metrics/samples/dependencies/no_mutable_exports/good/product_repository.ts new file mode 100644 index 0000000..66a99a3 --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_mutable_exports/good/product_repository.ts @@ -0,0 +1,57 @@ +interface Product { + id: string; + name: string; + price: number; + category: string; +} + +interface CacheState { + products: Map; + lastFetchedAt: number | null; +} + +const state: CacheState = { + products: new Map(), + lastFetchedAt: null, +}; + +const CACHE_TTL_MS = 5 * 60 * 1000; + +function isCacheStale(): boolean { + return state.lastFetchedAt === null || Date.now() - state.lastFetchedAt > CACHE_TTL_MS; +} + +async function loadProducts(): Promise { + const response = await fetch("/api/products"); + if (!response.ok) throw new Error(`Failed to load products: ${response.status}`); + const products: Product[] = await response.json(); + + state.products.clear(); + for (const product of products) { + state.products.set(product.id, product); + } + state.lastFetchedAt = Date.now(); +} + +async function getProducts(): Promise { + if (isCacheStale()) await loadProducts(); + return Array.from(state.products.values()); +} + +async function getProduct(id: string): Promise { + if (isCacheStale()) await loadProducts(); + return state.products.get(id) ?? null; +} + +function invalidateCache(): void { + state.products.clear(); + state.lastFetchedAt = null; +} + +function getCacheAge(): number | null { + if (state.lastFetchedAt === null) return null; + return Date.now() - state.lastFetchedAt; +} + +export { getProducts, getProduct, invalidateCache, getCacheAge, loadProducts }; +export type { Product }; diff --git a/priv/combined_metrics/samples/dependencies/no_wildcard_imports/bad/query_helpers.ex b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/bad/query_helpers.ex new file mode 100644 index 0000000..32f1c3d --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/bad/query_helpers.ex @@ -0,0 +1,74 @@ +defmodule MyApp.QueryHelpers do + @moduledoc """ + Helpers for building common Ecto query patterns. + """ + + import Ecto.Query + import Ecto.Changeset + import MyApp.QueryFilters + import MyApp.PaginationHelpers + + @spec paginate(Ecto.Queryable.t(), map()) :: Ecto.Query.t() + def paginate(query, params) do + page = Map.get(params, "page", 1) + per_page = Map.get(params, "per_page", 20) + + query + |> offset(^((page - 1) * per_page)) + |> limit(^per_page) + end + + @spec filter_by_status(Ecto.Queryable.t(), atom()) :: Ecto.Query.t() + def filter_by_status(query, status) do + where(query, [q], q.status == ^status) + end + + @spec filter_by_user(Ecto.Queryable.t(), integer()) :: Ecto.Query.t() + def filter_by_user(query, user_id) do + where(query, [q], q.user_id == ^user_id) + end + + @spec order_by_inserted(Ecto.Queryable.t(), :asc | :desc) :: Ecto.Query.t() + def order_by_inserted(query, direction \\ :desc) do + order_by(query, [q], [{^direction, q.inserted_at}]) + end + + @spec search_name(Ecto.Queryable.t(), String.t()) :: Ecto.Query.t() + def search_name(query, term) do + like_term = "%#{term}%" + where(query, [q], ilike(q.name, ^like_term)) + end + + @spec with_preloads(Ecto.Queryable.t(), list()) :: Ecto.Query.t() + def with_preloads(query, associations) do + preload(query, ^associations) + end + + @spec apply_filters(Ecto.Queryable.t(), map()) :: Ecto.Query.t() + def apply_filters(query, filters) do + Enum.reduce(filters, query, fn + {"status", status}, q -> filter_by_status(q, String.to_atom(status)) + {"user_id", id}, q -> filter_by_user(q, id) + {"search", term}, q -> search_name(q, term) + _unknown, q -> q + end) + end + + @spec validate_and_apply(Ecto.Changeset.t(), map()) :: Ecto.Changeset.t() + def validate_and_apply(changeset, attrs) do + changeset + |> cast(attrs, [:status, :name]) + |> validate_required([:status]) + |> validate_inclusion(:status, [:active, :inactive, :pending]) + end + + @spec count_query(Ecto.Queryable.t()) :: Ecto.Query.t() + def count_query(query) do + from q in query, select: count(q.id) + end + + @spec date_range(Ecto.Queryable.t(), Date.t(), Date.t()) :: Ecto.Query.t() + def date_range(query, from_date, to_date) do + where(query, [q], q.inserted_at >= ^from_date and q.inserted_at <= ^to_date) + end +end diff --git a/priv/combined_metrics/samples/dependencies/no_wildcard_imports/config.yml b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/config.yml new file mode 100644 index 0000000..4b85bcb --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/config.yml @@ -0,0 +1 @@ +doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies." diff --git a/priv/combined_metrics/samples/dependencies/no_wildcard_imports/good/query_helpers.ex b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/good/query_helpers.ex new file mode 100644 index 0000000..ccde3df --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/no_wildcard_imports/good/query_helpers.ex @@ -0,0 +1,73 @@ +defmodule MyApp.QueryHelpers do + @moduledoc """ + Helpers for building common Ecto query patterns. + """ + + import Ecto.Query, only: [from: 2, where: 3, order_by: 3, limit: 2, offset: 2, preload: 2] + import Ecto.Changeset, only: [cast: 3, validate_required: 2, validate_inclusion: 3] + + alias MyApp.QueryFilters + alias MyApp.PaginationHelpers + + @spec paginate(Ecto.Queryable.t(), map()) :: Ecto.Query.t() + def paginate(query, params) do + {page, per_page} = PaginationHelpers.extract_pagination(params) + + query + |> offset(^((page - 1) * per_page)) + |> limit(^per_page) + end + + @spec filter_by_status(Ecto.Queryable.t(), atom()) :: Ecto.Query.t() + def filter_by_status(query, status) do + where(query, [q], q.status == ^status) + end + + @spec filter_by_user(Ecto.Queryable.t(), integer()) :: Ecto.Query.t() + def filter_by_user(query, user_id) do + where(query, [q], q.user_id == ^user_id) + end + + @spec order_by_inserted(Ecto.Queryable.t(), :asc | :desc) :: Ecto.Query.t() + def order_by_inserted(query, direction \\ :desc) do + order_by(query, [q], [{^direction, q.inserted_at}]) + end + + @spec search_name(Ecto.Queryable.t(), String.t()) :: Ecto.Query.t() + def search_name(query, term) do + QueryFilters.ilike_search(query, :name, term) + end + + @spec with_preloads(Ecto.Queryable.t(), list()) :: Ecto.Query.t() + def with_preloads(query, associations) do + preload(query, ^associations) + end + + @spec apply_filters(Ecto.Queryable.t(), map()) :: Ecto.Query.t() + def apply_filters(query, filters) do + Enum.reduce(filters, query, fn + {"status", status}, q -> filter_by_status(q, String.to_atom(status)) + {"user_id", id}, q -> filter_by_user(q, id) + {"search", term}, q -> search_name(q, term) + _unknown, q -> q + end) + end + + @spec validate_and_apply(Ecto.Changeset.t(), map()) :: Ecto.Changeset.t() + def validate_and_apply(changeset, attrs) do + changeset + |> cast(attrs, [:status, :name]) + |> validate_required([:status]) + |> validate_inclusion(:status, [:active, :inactive, :pending]) + end + + @spec count_query(Ecto.Queryable.t()) :: Ecto.Query.t() + def count_query(query) do + from q in query, select: count(q.id) + end + + @spec date_range(Ecto.Queryable.t(), Date.t(), Date.t()) :: Ecto.Query.t() + def date_range(query, from_date, to_date) do + where(query, [q], q.inserted_at >= ^from_date and q.inserted_at <= ^to_date) + end +end diff --git a/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/bad/user_service.ts b/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/bad/user_service.ts new file mode 100644 index 0000000..a1fbecd --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/bad/user_service.ts @@ -0,0 +1,47 @@ +// Regular imports used for type-only symbols — should use `import type` +import { User, CreateUserPayload, UpdateUserPayload } from "./user_types.js"; +import { PaginatedResponse, ApiError } from "./api_types.js"; +import { buildApiUrl, handleResponse } from "./api_client.js"; +import { formatDate } from "./date_utils.js"; + +async function fetchUser(userId: string): Promise { + const url = buildApiUrl(`/users/${userId}`); + const response = await fetch(url); + return handleResponse(response); +} + +async function createUser(payload: CreateUserPayload): Promise { + const url = buildApiUrl("/users"); + const response = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + return handleResponse(response); +} + +async function updateUser(userId: string, changes: UpdateUserPayload): Promise { + const url = buildApiUrl(`/users/${userId}`); + const response = await fetch(url, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(changes), + }); + return handleResponse(response); +} + +async function listUsers(page = 1, pageSize = 20): Promise> { + const url = buildApiUrl(`/users?page=${page}&pageSize=${pageSize}`); + const response = await fetch(url); + return handleResponse>(response); +} + +function handleApiError(error: ApiError): void { + console.error(`API error [${error.code}]: ${error.message}`); +} + +function formatUserCreatedDate(user: User): string { + return formatDate(user.createdAt); +} + +export { fetchUser, createUser, updateUser, listUsers, handleApiError, formatUserCreatedDate }; diff --git a/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/good/user_service.ts b/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/good/user_service.ts new file mode 100644 index 0000000..78fef3b --- /dev/null +++ b/priv/combined_metrics/samples/dependencies/uses_import_type_for_type_only_imports/good/user_service.ts @@ -0,0 +1,47 @@ +import type { User, CreateUserPayload, UpdateUserPayload } from "./user_types.js"; +import type { PaginatedResponse, ApiError } from "./api_types.js"; +import { buildApiUrl, handleResponse } from "./api_client.js"; +import { formatDate } from "./date_utils.js"; + +async function fetchUser(userId: string): Promise { + const url = buildApiUrl(`/users/${userId}`); + const response = await fetch(url); + return handleResponse(response); +} + +async function createUser(payload: CreateUserPayload): Promise { + const url = buildApiUrl("/users"); + const response = await fetch(url, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + return handleResponse(response); +} + +async function updateUser(userId: string, changes: UpdateUserPayload): Promise { + const url = buildApiUrl(`/users/${userId}`); + const response = await fetch(url, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(changes), + }); + return handleResponse(response); +} + +async function listUsers(page = 1, pageSize = 20): Promise> { + const url = buildApiUrl(`/users?page=${page}&pageSize=${pageSize}`); + const response = await fetch(url); + return handleResponse>(response); +} + +function formatUserCreatedDate(user: User): string { + return formatDate(user.createdAt); +} + +function getUserDisplayLabel(user: User): string { + return `${user.displayName} <${user.email}>`; +} + +export { fetchUser, createUser, updateUser, listUsers, formatUserCreatedDate, getUserDisplayLabel }; +export type { User, CreateUserPayload, UpdateUserPayload }; diff --git a/priv/combined_metrics/samples/documentation/docstring_is_nonempty/bad/cache.ex b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/bad/cache.ex new file mode 100644 index 0000000..1127e5f --- /dev/null +++ b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/bad/cache.ex @@ -0,0 +1,78 @@ +defmodule MyApp.Cache do + @moduledoc "" + + @doc "" + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + name = Keyword.get(opts, :name, __MODULE__) + ttl = Keyword.get(opts, :ttl, :timer.minutes(5)) + :ets.new(name, [:set, :public, :named_table, read_concurrency: true]) + Agent.start_link(fn -> %{name: name, ttl: ttl} end, name: :"#{name}_agent") + end + + @doc "" + @spec get(atom(), term()) :: term() | nil + def get(cache, key) do + case :ets.lookup(cache, key) do + [{^key, value, expires_at}] -> + if System.monotonic_time(:millisecond) < expires_at, do: value, else: nil + + [] -> + nil + end + end + + @doc "" + @spec put(atom(), term(), term(), keyword()) :: true + def put(cache, key, value, opts \\ []) do + ttl = Keyword.get(opts, :ttl, default_ttl(cache)) + expires_at = System.monotonic_time(:millisecond) + ttl + :ets.insert(cache, {key, value, expires_at}) + end + + @doc "" + @spec delete(atom(), term()) :: true + def delete(cache, key) do + :ets.delete(cache, key) + end + + @doc "" + @spec flush(atom()) :: true + def flush(cache) do + :ets.delete_all_objects(cache) + end + + @doc "" + @spec fetch(atom(), term(), (-> term())) :: term() + def fetch(cache, key, fun) do + case get(cache, key) do + nil -> + value = fun.() + put(cache, key, value) + value + + value -> + value + end + end + + @doc "" + @spec size(atom()) :: non_neg_integer() + def size(cache) do + :ets.info(cache, :size) + end + + @doc "" + @spec stats(atom()) :: map() + def stats(cache) do + %{ + size: size(cache), + memory: :ets.info(cache, :memory) + } + end + + defp default_ttl(cache) do + agent = :"#{cache}_agent" + Agent.get(agent, & &1.ttl) + end +end diff --git a/priv/combined_metrics/samples/documentation/docstring_is_nonempty/config.yml b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/config.yml new file mode 100644 index 0000000..02f3c8b --- /dev/null +++ b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/config.yml @@ -0,0 +1 @@ +doc: "Docstrings must contain meaningful content, not just a placeholder or empty string." diff --git a/priv/combined_metrics/samples/documentation/docstring_is_nonempty/good/cache.ex b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/good/cache.ex new file mode 100644 index 0000000..89bb205 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/docstring_is_nonempty/good/cache.ex @@ -0,0 +1,96 @@ +defmodule MyApp.Cache do + @moduledoc """ + ETS-backed in-memory cache with per-entry TTL support. + + Each cache instance is an ETS table created at startup via `start_link/1`. + Values are stored with an expiry timestamp and are considered stale after + their TTL has elapsed. Stale entries are not automatically evicted but are + ignored on read. + + Use `fetch/3` for the common read-through pattern to avoid redundant + computations or database calls. + """ + + @doc """ + Creates a new cache ETS table and starts its companion Agent. + + Accepts the following options: + - `:name` — the atom name for the ETS table (defaults to `#{__MODULE__}`) + - `:ttl` — default time-to-live in milliseconds (defaults to 5 minutes) + """ + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + name = Keyword.get(opts, :name, __MODULE__) + ttl = Keyword.get(opts, :ttl, :timer.minutes(5)) + :ets.new(name, [:set, :public, :named_table, read_concurrency: true]) + Agent.start_link(fn -> %{name: name, ttl: ttl} end, name: :"#{name}_agent") + end + + @doc """ + Returns the cached value for `key`, or `nil` if missing or expired. + """ + @spec get(atom(), term()) :: term() | nil + def get(cache, key) do + case :ets.lookup(cache, key) do + [{^key, value, expires_at}] -> + if System.monotonic_time(:millisecond) < expires_at, do: value, else: nil + + [] -> + nil + end + end + + @doc """ + Stores `value` under `key` in the cache with an optional `:ttl` override. + + If `:ttl` is not provided, the cache's default TTL is used. + """ + @spec put(atom(), term(), term(), keyword()) :: true + def put(cache, key, value, opts \\ []) do + ttl = Keyword.get(opts, :ttl, default_ttl(cache)) + expires_at = System.monotonic_time(:millisecond) + ttl + :ets.insert(cache, {key, value, expires_at}) + end + + @doc """ + Removes the entry for `key` from the cache. + """ + @spec delete(atom(), term()) :: true + def delete(cache, key), do: :ets.delete(cache, key) + + @doc """ + Removes all entries from the cache without deleting the table itself. + """ + @spec flush(atom()) :: true + def flush(cache), do: :ets.delete_all_objects(cache) + + @doc """ + Returns the cached value for `key`, computing and storing it via `fun` on a miss. + + This is the preferred read-through pattern to avoid duplicate work: + + MyApp.Cache.fetch(:my_cache, {:user, id}, fn -> Accounts.get_user!(id) end) + """ + @spec fetch(atom(), term(), (-> term())) :: term() + def fetch(cache, key, fun) do + case get(cache, key) do + nil -> + value = fun.() + put(cache, key, value) + value + + value -> + value + end + end + + @doc """ + Returns the number of entries currently stored in the cache table. + """ + @spec size(atom()) :: non_neg_integer() + def size(cache), do: :ets.info(cache, :size) + + defp default_ttl(cache) do + Agent.get(:"#{cache}_agent", & &1.ttl) + end +end diff --git a/priv/combined_metrics/samples/documentation/doctests_validate_examples/bad/billing.ex b/priv/combined_metrics/samples/documentation/doctests_validate_examples/bad/billing.ex new file mode 100644 index 0000000..a90ebe2 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/doctests_validate_examples/bad/billing.ex @@ -0,0 +1,59 @@ +defmodule MyApp.Billing.Formatter do + @moduledoc """ + Formatting utilities for billing amounts and invoice references. + """ + + @doc """ + Formats an integer amount in cents to a display string for the given currency. + + For example, passing 2999 and :usd would return a string like "$29.99". + Passing 0 for any currency returns "0.00" with the appropriate symbol. + For unknown currencies the amount is formatted without a symbol prefix. + + Note that the function expects a non-negative integer for cents. + Negative values are not supported and may produce unexpected output. + The currency atom should be one of :usd, :eur, or :gbp for proper + symbol formatting. Other atoms fall back to an uppercase string suffix. + """ + # Bad: prose-only documentation with no `iex>` examples. + # The description is vague ("a string like...") and untestable. + # A doctest would pin the exact return values and catch regressions. + @spec format_amount(integer(), atom()) :: String.t() + def format_amount(cents, currency) when is_integer(cents) and cents >= 0 do + value = :erlang.float_to_binary(cents / 100, decimals: 2) + + case currency do + :usd -> "$#{value}" + :eur -> "€#{value}" + :gbp -> "£#{value}" + other -> "#{value} #{other |> Atom.to_string() |> String.upcase()}" + end + end + + @doc """ + Generates an invoice reference string. + + The format is "INV-" followed by the customer ID padded to 5 digits, + a dash, and the sequence number padded to 4 digits. + Customer IDs and sequence numbers must be positive integers. + """ + # Bad: describes the format in prose but provides no runnable example. + # No doctest means the claimed format cannot be verified automatically. + @spec invoice_ref(pos_integer(), pos_integer()) :: String.t() + def invoice_ref(customer_id, sequence) + when is_integer(customer_id) and is_integer(sequence) do + "INV-#{String.pad_leading(to_string(customer_id), 5, "0")}-#{String.pad_leading(to_string(sequence), 4, "0")}" + end + + @doc """ + Checks whether an amount is valid for charging. + Returns true for amounts between 1 and 1,000,000 (inclusive), false otherwise. + Zero is not a valid charge amount. Amounts above one million are rejected. + """ + # Bad: no iex> examples. The boundary conditions (0, 1, 1_000_000, 1_000_001) + # are described in words but never tested via doctest. + @spec valid_amount?(integer()) :: boolean() + def valid_amount?(amount) when is_integer(amount) do + amount in 1..1_000_000 + end +end diff --git a/priv/combined_metrics/samples/documentation/doctests_validate_examples/good/billing.ex b/priv/combined_metrics/samples/documentation/doctests_validate_examples/good/billing.ex new file mode 100644 index 0000000..d451fb6 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/doctests_validate_examples/good/billing.ex @@ -0,0 +1,73 @@ +defmodule MyApp.Billing.Formatter do + @moduledoc """ + Formatting utilities for billing amounts and invoice references. + + All examples in this module's `@doc` blocks are run as ExUnit doctests. + See `test/my_app/billing/formatter_test.exs`. + """ + + @doc """ + Formats an integer amount in cents to a display string for the given currency. + + iex> MyApp.Billing.Formatter.format_amount(2999, :usd) + "$29.99" + + iex> MyApp.Billing.Formatter.format_amount(1050, :eur) + "€10.50" + + iex> MyApp.Billing.Formatter.format_amount(0, :gbp) + "£0.00" + + iex> MyApp.Billing.Formatter.format_amount(100, :unknown) + "1.00 UNKNOWN" + """ + @spec format_amount(integer(), atom()) :: String.t() + def format_amount(cents, currency) when is_integer(cents) and cents >= 0 do + value = :erlang.float_to_binary(cents / 100, decimals: 2) + + case currency do + :usd -> "$#{value}" + :eur -> "€#{value}" + :gbp -> "£#{value}" + other -> "#{value} #{other |> Atom.to_string() |> String.upcase()}" + end + end + + @doc """ + Generates an invoice reference from a customer ID and sequence number. + + iex> MyApp.Billing.Formatter.invoice_ref(42, 7) + "INV-00042-0007" + + iex> MyApp.Billing.Formatter.invoice_ref(1, 1) + "INV-00001-0001" + + iex> MyApp.Billing.Formatter.invoice_ref(99999, 9999) + "INV-99999-9999" + """ + @spec invoice_ref(pos_integer(), pos_integer()) :: String.t() + def invoice_ref(customer_id, sequence) + when is_integer(customer_id) and is_integer(sequence) do + "INV-#{String.pad_leading(to_string(customer_id), 5, "0")}-#{String.pad_leading(to_string(sequence), 4, "0")}" + end + + @doc """ + Returns true if the amount is within the acceptable charge range. + + iex> MyApp.Billing.Formatter.valid_amount?(50) + true + + iex> MyApp.Billing.Formatter.valid_amount?(0) + false + + iex> MyApp.Billing.Formatter.valid_amount?(10_000_00) + true + + iex> MyApp.Billing.Formatter.valid_amount?(10_000_01) + false + """ + @spec valid_amount?(integer()) :: boolean() + def valid_amount?(amount) when is_integer(amount) do + amount in 1..1_000_000 + end +end diff --git a/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/bad/ShippingService.swift b/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/bad/ShippingService.swift new file mode 100644 index 0000000..1bca816 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/bad/ShippingService.swift @@ -0,0 +1,57 @@ +import Foundation + +// No doc comment on the enum +enum ShipmentStatus { + // No doc comment on cases + case pending + case inTransit(currentLocation: String) + case delivered(at: Date) + case attemptedDelivery(attemptedAt: Date) + case returned(reason: String) +} + +// No doc comment on the struct +struct Shipment { + // No doc comment on properties + let trackingNumber: String + let destinationAddress: String + let estimatedDeliveryDate: Date + var status: ShipmentStatus + let carrier: String +} + +// No doc comment on the class +class ShippingService { + private var shipments: [String: Shipment] = [:] + + // No doc comment — what does this return, and what are the failure conditions? + @discardableResult + func register(_ shipment: Shipment) -> Bool { + guard shipments[shipment.trackingNumber] == nil else { return false } + shipments[shipment.trackingNumber] = shipment + return true + } + + // No doc comment — unclear whether nil means "not found" or "error" + func shipment(for trackingNumber: String) -> Shipment? { + return shipments[trackingNumber] + } + + // No doc comment — parameters and return value undocumented + @discardableResult + func updateStatus(for trackingNumber: String, to status: ShipmentStatus) -> Bool { + guard shipments[trackingNumber] != nil else { return false } + shipments[trackingNumber]?.status = status + return true + } + + // No doc comment — what counts as "overdue"? What is referenceDate for? + func overdueShipments(referenceDate: Date = Date()) -> [Shipment] { + return shipments.values + .filter { shipment in + if case .delivered = shipment.status { return false } + return shipment.estimatedDeliveryDate < referenceDate + } + .sorted { $0.estimatedDeliveryDate < $1.estimatedDeliveryDate } + } +} diff --git a/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/good/ShippingService.swift b/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/good/ShippingService.swift new file mode 100644 index 0000000..3b7a244 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/every_declaration_has_doc_comment/good/ShippingService.swift @@ -0,0 +1,85 @@ +import Foundation + +/// Represents the current status of a shipment in the delivery pipeline. +enum ShipmentStatus { + /// The order has been placed but not yet picked up by a carrier. + case pending + /// The package is in transit between facilities. + case inTransit(currentLocation: String) + /// The package has been delivered to the destination address. + case delivered(at: Date) + /// Delivery was attempted but the recipient was unavailable. + case attemptedDelivery(attemptedAt: Date) + /// The shipment was returned to the sender. + case returned(reason: String) +} + +/// Encapsulates all tracking information for a single shipment. +struct Shipment { + /// The unique tracking number assigned by the carrier. + let trackingNumber: String + /// The destination address. + let destinationAddress: String + /// The estimated delivery date provided at the time of dispatch. + let estimatedDeliveryDate: Date + /// The current status of the shipment. + var status: ShipmentStatus + /// The carrier responsible for delivery (e.g., "FedEx", "UPS"). + let carrier: String +} + +/// Manages shipment creation, tracking updates, and delivery confirmation. +/// +/// Use this service as the single point of contact for all shipping operations. +/// It maintains an in-memory registry of active shipments. +class ShippingService { + private var shipments: [String: Shipment] = [:] + + /// Registers a new shipment and begins tracking it. + /// + /// - Parameter shipment: The shipment to register. The `trackingNumber` must be unique. + /// - Returns: `true` if registration succeeded; `false` if the tracking number already exists. + @discardableResult + func register(_ shipment: Shipment) -> Bool { + guard shipments[shipment.trackingNumber] == nil else { return false } + shipments[shipment.trackingNumber] = shipment + return true + } + + /// Returns the shipment with the given tracking number, if it exists. + /// + /// - Parameter trackingNumber: The carrier-assigned tracking number. + /// - Returns: The matching `Shipment`, or `nil` if not found. + func shipment(for trackingNumber: String) -> Shipment? { + return shipments[trackingNumber] + } + + /// Updates the status of an existing shipment. + /// + /// - Parameters: + /// - trackingNumber: The tracking number of the shipment to update. + /// - status: The new status to apply. + /// - Returns: `true` if the update was applied; `false` if the tracking number was not found. + @discardableResult + func updateStatus(for trackingNumber: String, to status: ShipmentStatus) -> Bool { + guard shipments[trackingNumber] != nil else { return false } + shipments[trackingNumber]?.status = status + return true + } + + /// Returns all shipments that are currently overdue based on their estimated delivery date. + /// + /// A shipment is considered overdue if it has not been delivered and its estimated + /// delivery date is in the past. + /// + /// - Parameter referenceDate: The date to compare against. Defaults to the current date. + /// - Returns: An array of overdue shipments, sorted by estimated delivery date ascending. + func overdueShipments(referenceDate: Date = Date()) -> [Shipment] { + return shipments.values + .filter { shipment in + if case .delivered = shipment.status { return false } + return shipment.estimatedDeliveryDate < referenceDate + } + .sorted { $0.estimatedDeliveryDate < $1.estimatedDeliveryDate } + } +} diff --git a/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/bad/cache.go b/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/bad/cache.go new file mode 100644 index 0000000..b53ea42 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/bad/cache.go @@ -0,0 +1,67 @@ +package cache + +import ( + "sync" + "time" +) + +// no doc comment on exported var +var ErrExpired = errExpired{} + +type errExpired struct{} + +func (errExpired) Error() string { return "cache entry expired" } + +// no doc comment on exported type +type Entry struct { + Value interface{} + ExpiresAt time.Time +} + +// no doc comment on exported method +func (e Entry) IsExpired() bool { + return time.Now().After(e.ExpiresAt) +} + +// no doc comment on exported type +type Cache struct { + mu sync.RWMutex + entries map[string]Entry +} + +// no doc comment on exported constructor +func New() *Cache { + return &Cache{entries: make(map[string]Entry)} +} + +// no doc comment on exported method +func (c *Cache) Set(key string, value interface{}, ttl time.Duration) { + c.mu.Lock() + defer c.mu.Unlock() + if ttl <= 0 { + delete(c.entries, key) + return + } + c.entries[key] = Entry{Value: value, ExpiresAt: time.Now().Add(ttl)} +} + +// no doc comment on exported method +func (c *Cache) Get(key string) (interface{}, error) { + c.mu.RLock() + defer c.mu.RUnlock() + entry, ok := c.entries[key] + if !ok { + return nil, nil + } + if entry.IsExpired() { + return nil, ErrExpired + } + return entry.Value, nil +} + +// no doc comment on exported method +func (c *Cache) Delete(key string) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, key) +} diff --git a/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/good/cache.go b/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/good/cache.go new file mode 100644 index 0000000..973bb5c --- /dev/null +++ b/priv/combined_metrics/samples/documentation/exported_symbol_has_doc_comment/good/cache.go @@ -0,0 +1,72 @@ +// Package cache provides an in-memory key-value cache with TTL-based expiry. +package cache + +import ( + "sync" + "time" +) + +// ErrExpired is returned when a requested key exists but its TTL has elapsed. +var ErrExpired = errExpired{} + +type errExpired struct{} + +func (errExpired) Error() string { return "cache entry expired" } + +// Entry holds a cached value together with its expiry time. +type Entry struct { + Value interface{} + ExpiresAt time.Time +} + +// IsExpired reports whether the entry's TTL has elapsed. +func (e Entry) IsExpired() bool { + return time.Now().After(e.ExpiresAt) +} + +// Cache is a thread-safe in-memory store with per-entry TTLs. +// The zero value is not usable; construct one with New. +type Cache struct { + mu sync.RWMutex + entries map[string]Entry +} + +// New constructs an empty Cache ready for use. +func New() *Cache { + return &Cache{entries: make(map[string]Entry)} +} + +// Set stores value under key with the given TTL. +// Calling Set with a non-positive TTL removes any existing entry for key. +func (c *Cache) Set(key string, value interface{}, ttl time.Duration) { + c.mu.Lock() + defer c.mu.Unlock() + if ttl <= 0 { + delete(c.entries, key) + return + } + c.entries[key] = Entry{Value: value, ExpiresAt: time.Now().Add(ttl)} +} + +// Get returns the value stored under key. +// It returns ErrExpired if the entry exists but has elapsed, and a nil error +// with a nil value if the key is absent. +func (c *Cache) Get(key string) (interface{}, error) { + c.mu.RLock() + defer c.mu.RUnlock() + entry, ok := c.entries[key] + if !ok { + return nil, nil + } + if entry.IsExpired() { + return nil, ErrExpired + } + return entry.Value, nil +} + +// Delete removes the entry for key. It is a no-op if key is not present. +func (c *Cache) Delete(key string) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, key) +} diff --git a/priv/combined_metrics/samples/documentation/file_has_license_header/bad/core.ex b/priv/combined_metrics/samples/documentation/file_has_license_header/bad/core.ex new file mode 100644 index 0000000..b07eabd --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_license_header/bad/core.ex @@ -0,0 +1,74 @@ +defmodule MyApp.Core do + @moduledoc """ + Core utility functions shared across all contexts. + + Provides helpers for formatting, type coercion, and safe value + extraction that do not belong to any specific domain context. + """ + + @spec format_currency(Decimal.t(), String.t()) :: String.t() + def format_currency(%Decimal{} = amount, currency \\ "USD") do + formatted = + amount + |> Decimal.round(2) + |> Decimal.to_string(:normal) + + "#{currency} #{formatted}" + end + + @spec truncate(String.t(), non_neg_integer()) :: String.t() + def truncate(string, max_length) when byte_size(string) <= max_length, do: string + + def truncate(string, max_length) do + String.slice(string, 0, max_length - 3) <> "..." + end + + @spec slugify(String.t()) :: String.t() + def slugify(string) do + string + |> String.downcase() + |> String.replace(~r/[^a-z0-9\s-]/, "") + |> String.replace(~r/\s+/, "-") + |> String.trim("-") + end + + @spec safe_to_integer(term()) :: {:ok, integer()} | :error + def safe_to_integer(value) when is_integer(value), do: {:ok, value} + + def safe_to_integer(value) when is_binary(value) do + case Integer.parse(value) do + {int, ""} -> {:ok, int} + _ -> :error + end + end + + def safe_to_integer(_), do: :error + + @spec deep_merge(map(), map()) :: map() + def deep_merge(left, right) do + Map.merge(left, right, fn _key, left_val, right_val -> + if is_map(left_val) and is_map(right_val) do + deep_merge(left_val, right_val) + else + right_val + end + end) + end + + @spec present?(term()) :: boolean() + def present?(nil), do: false + def present?(""), do: false + def present?([]), do: false + def present?(%{} = map) when map_size(map) == 0, do: false + def present?(_), do: true + + @spec blank?(term()) :: boolean() + def blank?(value), do: not present?(value) + + @spec wrap_ok(term()) :: {:ok, term()} + def wrap_ok(value), do: {:ok, value} + + @spec unwrap_ok!({:ok, term()}) :: term() + def unwrap_ok!({:ok, value}), do: value + def unwrap_ok!({:error, reason}), do: raise("Expected {:ok, _}, got {:error, #{inspect(reason)}}") +end diff --git a/priv/combined_metrics/samples/documentation/file_has_license_header/config.yml b/priv/combined_metrics/samples/documentation/file_has_license_header/config.yml new file mode 100644 index 0000000..5611933 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_license_header/config.yml @@ -0,0 +1 @@ +doc: "Source files should begin with a license or copyright header." diff --git a/priv/combined_metrics/samples/documentation/file_has_license_header/good/core.ex b/priv/combined_metrics/samples/documentation/file_has_license_header/good/core.ex new file mode 100644 index 0000000..93e899d --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_license_header/good/core.ex @@ -0,0 +1,76 @@ +# Copyright (c) 2024 Acme Corp. MIT License. + +defmodule MyApp.Core do + @moduledoc """ + Core utility functions shared across all contexts. + + Provides helpers for formatting, type coercion, and safe value + extraction that do not belong to any specific domain context. + """ + + @spec format_currency(Decimal.t(), String.t()) :: String.t() + def format_currency(%Decimal{} = amount, currency \\ "USD") do + formatted = + amount + |> Decimal.round(2) + |> Decimal.to_string(:normal) + + "#{currency} #{formatted}" + end + + @spec truncate(String.t(), non_neg_integer()) :: String.t() + def truncate(string, max_length) when byte_size(string) <= max_length, do: string + + def truncate(string, max_length) do + String.slice(string, 0, max_length - 3) <> "..." + end + + @spec slugify(String.t()) :: String.t() + def slugify(string) do + string + |> String.downcase() + |> String.replace(~r/[^a-z0-9\s-]/, "") + |> String.replace(~r/\s+/, "-") + |> String.trim("-") + end + + @spec safe_to_integer(term()) :: {:ok, integer()} | :error + def safe_to_integer(value) when is_integer(value), do: {:ok, value} + + def safe_to_integer(value) when is_binary(value) do + case Integer.parse(value) do + {int, ""} -> {:ok, int} + _ -> :error + end + end + + def safe_to_integer(_), do: :error + + @spec deep_merge(map(), map()) :: map() + def deep_merge(left, right) do + Map.merge(left, right, fn _key, left_val, right_val -> + if is_map(left_val) and is_map(right_val) do + deep_merge(left_val, right_val) + else + right_val + end + end) + end + + @spec present?(term()) :: boolean() + def present?(nil), do: false + def present?(""), do: false + def present?([]), do: false + def present?(%{} = map) when map_size(map) == 0, do: false + def present?(_), do: true + + @spec blank?(term()) :: boolean() + def blank?(value), do: not present?(value) + + @spec wrap_ok(term()) :: {:ok, term()} + def wrap_ok(value), do: {:ok, value} + + @spec unwrap_ok!({:ok, term()}) :: term() + def unwrap_ok!({:ok, value}), do: value + def unwrap_ok!({:error, reason}), do: raise("Expected {:ok, _}, got {:error, #{inspect(reason)}}") +end diff --git a/priv/combined_metrics/samples/documentation/file_has_module_docstring/bad/shipping.ex b/priv/combined_metrics/samples/documentation/file_has_module_docstring/bad/shipping.ex new file mode 100644 index 0000000..044c4f8 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_module_docstring/bad/shipping.ex @@ -0,0 +1,73 @@ +defmodule MyApp.Shipping do + alias MyApp.Repo + alias MyApp.Shipping.Shipment + alias MyApp.Shipping.TrackingEvent + alias MyApp.Orders.Order + + @spec create_shipment(Order.t(), map()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def create_shipment(%Order{} = order, attrs) do + %Shipment{} + |> Shipment.changeset(Map.put(attrs, :order_id, order.id)) + |> Repo.insert() + end + + @spec get_shipment!(integer()) :: Shipment.t() + def get_shipment!(id) do + Repo.get!(Shipment, id) + |> Repo.preload(:tracking_events) + end + + @spec update_shipment(Shipment.t(), map()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def update_shipment(%Shipment{} = shipment, attrs) do + shipment + |> Shipment.changeset(attrs) + |> Repo.update() + end + + @spec cancel_shipment(Shipment.t()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def cancel_shipment(%Shipment{} = shipment) do + update_shipment(shipment, %{status: :cancelled}) + end + + @spec add_tracking_event(Shipment.t(), map()) :: + {:ok, TrackingEvent.t()} | {:error, Ecto.Changeset.t()} + def add_tracking_event(%Shipment{} = shipment, attrs) do + %TrackingEvent{} + |> TrackingEvent.changeset(Map.put(attrs, :shipment_id, shipment.id)) + |> Repo.insert() + end + + @spec estimated_delivery(Shipment.t()) :: Date.t() | nil + def estimated_delivery(%Shipment{shipped_at: nil}), do: nil + + def estimated_delivery(%Shipment{shipped_at: shipped_at, service: service}) do + days = transit_days(service) + Date.add(DateTime.to_date(shipped_at), days) + end + + @spec active_shipments_for_user(map()) :: [Shipment.t()] + def active_shipments_for_user(%{id: user_id}) do + Repo.all( + from s in Shipment, + join: o in Order, + on: o.id == s.order_id, + where: o.user_id == ^user_id and s.status == :in_transit + ) + end + + @spec calculate_shipping_cost(map(), String.t()) :: Decimal.t() + def calculate_shipping_cost(%{weight_grams: weight}, destination_zone) do + base = base_rate(destination_zone) + weight_cost = Decimal.mult(Decimal.new(weight), Decimal.new("0.001")) + Decimal.add(base, weight_cost) + end + + defp transit_days(:standard), do: 5 + defp transit_days(:express), do: 2 + defp transit_days(:overnight), do: 1 + defp transit_days(_), do: 7 + + defp base_rate("domestic"), do: Decimal.new("4.99") + defp base_rate("international"), do: Decimal.new("19.99") + defp base_rate(_), do: Decimal.new("9.99") +end diff --git a/priv/combined_metrics/samples/documentation/file_has_module_docstring/config.yml b/priv/combined_metrics/samples/documentation/file_has_module_docstring/config.yml new file mode 100644 index 0000000..d3f61c3 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_module_docstring/config.yml @@ -0,0 +1 @@ +doc: "Files should have a module-level docstring explaining purpose and usage." diff --git a/priv/combined_metrics/samples/documentation/file_has_module_docstring/good/shipping.ex b/priv/combined_metrics/samples/documentation/file_has_module_docstring/good/shipping.ex new file mode 100644 index 0000000..5d7e2de --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_module_docstring/good/shipping.ex @@ -0,0 +1,88 @@ +defmodule MyApp.Shipping do + @moduledoc """ + Context module for shipment lifecycle management. + + Handles creating and tracking shipments tied to orders, recording + tracking events, and estimating delivery dates based on the selected + shipping service and destination zone. + + All public functions in this module are the sole entry point for + shipment-related operations. Internal schema modules such as + `MyApp.Shipping.Shipment` and `MyApp.Shipping.TrackingEvent` should + not be accessed directly from outside this context. + """ + + alias MyApp.Repo + alias MyApp.Shipping.Shipment + alias MyApp.Shipping.TrackingEvent + alias MyApp.Orders.Order + + @spec create_shipment(Order.t(), map()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def create_shipment(%Order{} = order, attrs) do + %Shipment{} + |> Shipment.changeset(Map.put(attrs, :order_id, order.id)) + |> Repo.insert() + end + + @spec get_shipment!(integer()) :: Shipment.t() + def get_shipment!(id) do + Repo.get!(Shipment, id) + |> Repo.preload(:tracking_events) + end + + @spec update_shipment(Shipment.t(), map()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def update_shipment(%Shipment{} = shipment, attrs) do + shipment + |> Shipment.changeset(attrs) + |> Repo.update() + end + + @spec cancel_shipment(Shipment.t()) :: {:ok, Shipment.t()} | {:error, Ecto.Changeset.t()} + def cancel_shipment(%Shipment{} = shipment) do + update_shipment(shipment, %{status: :cancelled}) + end + + @spec add_tracking_event(Shipment.t(), map()) :: + {:ok, TrackingEvent.t()} | {:error, Ecto.Changeset.t()} + def add_tracking_event(%Shipment{} = shipment, attrs) do + %TrackingEvent{} + |> TrackingEvent.changeset(Map.put(attrs, :shipment_id, shipment.id)) + |> Repo.insert() + end + + @spec estimated_delivery(Shipment.t()) :: Date.t() | nil + def estimated_delivery(%Shipment{shipped_at: nil}), do: nil + + def estimated_delivery(%Shipment{shipped_at: shipped_at, service: service}) do + days = transit_days(service) + Date.add(DateTime.to_date(shipped_at), days) + end + + @spec active_shipments_for_user(map()) :: [Shipment.t()] + def active_shipments_for_user(%{id: user_id}) do + import Ecto.Query, only: [from: 2, where: 3, join: 5] + + Repo.all( + from s in Shipment, + join: o in Order, + on: o.id == s.order_id, + where: o.user_id == ^user_id and s.status == :in_transit + ) + end + + @spec calculate_shipping_cost(map(), String.t()) :: Decimal.t() + def calculate_shipping_cost(%{weight_grams: weight}, destination_zone) do + base = base_rate(destination_zone) + weight_cost = Decimal.mult(Decimal.new(weight), Decimal.new("0.001")) + Decimal.add(base, weight_cost) + end + + defp transit_days(:standard), do: 5 + defp transit_days(:express), do: 2 + defp transit_days(:overnight), do: 1 + defp transit_days(_), do: 7 + + defp base_rate("domestic"), do: Decimal.new("4.99") + defp base_rate("international"), do: Decimal.new("19.99") + defp base_rate(_), do: Decimal.new("9.99") +end diff --git a/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/bad/legacy_importer.ex b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/bad/legacy_importer.ex new file mode 100644 index 0000000..78f7f78 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/bad/legacy_importer.ex @@ -0,0 +1,91 @@ +defmodule MyApp.LegacyImporter do + @moduledoc """ + Handles importing records from the legacy CSV export format. + """ + + alias MyApp.Repo + alias MyApp.Accounts.User + alias MyApp.Orders.Order + + # def import_all(path) do + # path + # |> File.stream!() + # |> CSV.decode!(headers: true) + # |> Enum.map(&import_row/1) + # end + + @spec import_file(String.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} + def import_file(path) do + rows = + path + |> File.stream!() + |> Stream.map(&String.trim/1) + |> Stream.reject(&(&1 == "")) + |> Stream.drop(1) + |> Enum.map(&String.split(&1, ",")) + + results = Enum.map(rows, &import_row/1) + errors = Enum.filter(results, &match?({:error, _}, &1)) + + if errors == [] do + {:ok, length(results)} + else + {:error, "#{length(errors)} rows failed"} + end + end + + # Old row import — replaced by pattern matched version below + # def import_row(row) do + # user = Repo.get_by(User, email: Enum.at(row, 2)) + # if user do + # %Order{user_id: user.id, total: String.to_float(Enum.at(row, 5))} + # |> Repo.insert() + # else + # {:error, "user not found"} + # end + # end + + defp import_row([id, _name, email, date, _source, total | _rest]) do + with %User{} = user <- Repo.get_by(User, email: email), + {:ok, ordered_at} <- Date.from_iso8601(date), + {amount, _} <- Float.parse(total) do + %Order{} + |> Order.changeset(%{ + legacy_id: id, + user_id: user.id, + total: amount, + ordered_at: ordered_at + }) + |> Repo.insert() + else + nil -> {:error, "unknown user: #{email}"} + {:error, reason} -> {:error, reason} + end + end + + defp import_row(_invalid), do: {:error, "malformed row"} + + # TODO: add dry_run mode + # def dry_run(path) do + # import_file(path) + # |> case do + # {:ok, count} -> IO.puts("Would import #{count} rows") + # {:error, msg} -> IO.puts("Error: #{msg}") + # end + # end + + @spec summary_stats(String.t()) :: map() + def summary_stats(path) do + rows = + path + |> File.stream!() + |> Stream.drop(1) + |> Enum.to_list() + + %{ + total_rows: length(rows), + # valid_rows: Enum.count(rows, &valid_row?/1), + file: path + } + end +end diff --git a/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/config.yml b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/config.yml new file mode 100644 index 0000000..345b09b --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/config.yml @@ -0,0 +1 @@ +doc: "Files should not contain commented-out code blocks left from development." diff --git a/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/good/legacy_importer.ex b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/good/legacy_importer.ex new file mode 100644 index 0000000..aa6d3da --- /dev/null +++ b/priv/combined_metrics/samples/documentation/file_has_no_commented_out_code/good/legacy_importer.ex @@ -0,0 +1,83 @@ +defmodule MyApp.LegacyImporter do + @moduledoc """ + Handles importing records from the legacy CSV export format. + + Rows are expected to be comma-separated with the following columns: + `id, name, email, date, source, total`. + + Use `import_file/1` to run the full import and receive a success count, + or `summary_stats/1` to inspect the file without persisting any records. + """ + + alias MyApp.Repo + alias MyApp.Accounts.User + alias MyApp.Orders.Order + + @spec import_file(String.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} + def import_file(path) do + rows = + path + |> File.stream!() + |> Stream.map(&String.trim/1) + |> Stream.reject(&(&1 == "")) + |> Stream.drop(1) + |> Enum.map(&String.split(&1, ",")) + + results = Enum.map(rows, &import_row/1) + failed = Enum.count(results, &match?({:error, _}, &1)) + + if failed == 0 do + {:ok, length(results)} + else + {:error, "#{failed} rows failed to import"} + end + end + + @spec dry_run(String.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} + def dry_run(path) do + Repo.transaction(fn -> + case import_file(path) do + {:ok, count} -> + Repo.rollback({:dry_run, count}) + + {:error, reason} -> + Repo.rollback({:error, reason}) + end + end) + |> case do + {:error, {:dry_run, count}} -> {:ok, count} + {:error, {:error, reason}} -> {:error, reason} + end + end + + @spec summary_stats(String.t()) :: map() + def summary_stats(path) do + rows = + path + |> File.stream!() + |> Stream.drop(1) + |> Enum.to_list() + + %{total_rows: length(rows), file: path} + end + + defp import_row([id, _name, email, date, _source, total | _rest]) do + with %User{} = user <- Repo.get_by(User, email: email), + {:ok, ordered_at} <- Date.from_iso8601(date), + {amount, _} <- Float.parse(total) do + %Order{} + |> Order.changeset(%{ + legacy_id: id, + user_id: user.id, + total: amount, + ordered_at: ordered_at + }) + |> Repo.insert() + else + nil -> {:error, "unknown user: #{email}"} + {:error, reason} -> {:error, reason} + end + end + + defp import_row(_invalid), do: {:error, "malformed row"} +end diff --git a/priv/combined_metrics/samples/documentation/function_has_docstring/bad/tax.ex b/priv/combined_metrics/samples/documentation/function_has_docstring/bad/tax.ex new file mode 100644 index 0000000..48812b4 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_has_docstring/bad/tax.ex @@ -0,0 +1,73 @@ +defmodule MyApp.Tax do + @moduledoc """ + Tax calculation context for order totals and line items. + """ + + alias MyApp.Tax.Rate + alias MyApp.Tax.Exemption + alias MyApp.Repo + + @spec calculate(Decimal.t(), String.t()) :: Decimal.t() + def calculate(subtotal, region) do + rate = fetch_rate(region) + Decimal.mult(subtotal, rate) + end + + @spec calculate_line_items([map()], String.t()) :: [map()] + def calculate_line_items(items, region) do + rate = fetch_rate(region) + Enum.map(items, fn item -> + tax = Decimal.mult(item.price, rate) + Map.put(item, :tax, tax) + end) + end + + @spec effective_rate(String.t()) :: Decimal.t() + def effective_rate(region) do + fetch_rate(region) + end + + @spec exempt?(String.t(), String.t()) :: boolean() + def exempt?(product_category, region) do + Repo.get_by(Exemption, category: product_category, region: region) != nil + end + + @spec apply_exemptions([map()], String.t()) :: [map()] + def apply_exemptions(items, region) do + Enum.map(items, fn item -> + if exempt?(item.category, region) do + Map.put(item, :tax, Decimal.new(0)) + else + item + end + end) + end + + @spec summarize(Decimal.t(), String.t()) :: map() + def summarize(subtotal, region) do + tax = calculate(subtotal, region) + total = Decimal.add(subtotal, tax) + + %{ + subtotal: subtotal, + tax: tax, + total: total, + rate: effective_rate(region), + region: region + } + end + + @spec annual_liability([map()]) :: Decimal.t() + def annual_liability(transactions) do + transactions + |> Enum.map(& &1.tax) + |> Enum.reduce(Decimal.new(0), &Decimal.add/2) + end + + defp fetch_rate(region) do + case Repo.get_by(Rate, region: region) do + %Rate{rate: rate} -> rate + nil -> Decimal.new("0.10") + end + end +end diff --git a/priv/combined_metrics/samples/documentation/function_has_docstring/config.yml b/priv/combined_metrics/samples/documentation/function_has_docstring/config.yml new file mode 100644 index 0000000..bac2ee2 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_has_docstring/config.yml @@ -0,0 +1 @@ +doc: "Public functions should have a docstring describing behaviour, params, and return value." diff --git a/priv/combined_metrics/samples/documentation/function_has_docstring/good/tax.ex b/priv/combined_metrics/samples/documentation/function_has_docstring/good/tax.ex new file mode 100644 index 0000000..9767383 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_has_docstring/good/tax.ex @@ -0,0 +1,105 @@ +defmodule MyApp.Tax do + @moduledoc """ + Tax calculation context for order totals and line items. + """ + + alias MyApp.Tax.Rate + alias MyApp.Tax.Exemption + alias MyApp.Repo + + @doc """ + Calculates the tax amount for a given subtotal and region. + + Returns the tax amount as a `Decimal`, not the total. Use `summarize/2` + to get a full breakdown including subtotal, tax, and total. + + ## Examples + + iex> MyApp.Tax.calculate(Decimal.new("100.00"), "us-ca") + Decimal.new("8.25") + """ + @spec calculate(Decimal.t(), String.t()) :: Decimal.t() + def calculate(subtotal, region) do + rate = fetch_rate(region) + Decimal.mult(subtotal, rate) + end + + @doc """ + Applies per-line-item tax to a list of order items for a given region. + + Each item map must have a `:price` and `:category` key. Returns the + same list with a `:tax` key added to each item. Items with an exemption + in the given region receive a tax of zero. + """ + @spec calculate_line_items([map()], String.t()) :: [map()] + def calculate_line_items(items, region) do + items + |> apply_exemptions(region) + |> Enum.map(&apply_tax_rate(&1, fetch_rate(region))) + end + + @doc """ + Returns the applicable tax rate for the given region as a `Decimal`. + + Falls back to a default rate of 10% when no specific rate is configured. + """ + @spec effective_rate(String.t()) :: Decimal.t() + def effective_rate(region) do + fetch_rate(region) + end + + @doc """ + Returns `true` if the given product category is tax-exempt in a region. + """ + @spec exempt?(String.t(), String.t()) :: boolean() + def exempt?(product_category, region) do + Repo.get_by(Exemption, category: product_category, region: region) != nil + end + + @doc """ + Returns a tax summary map for a subtotal and region. + + The map contains `:subtotal`, `:tax`, `:total`, `:rate`, and `:region`. + """ + @spec summarize(Decimal.t(), String.t()) :: map() + def summarize(subtotal, region) do + tax = calculate(subtotal, region) + total = Decimal.add(subtotal, tax) + + %{ + subtotal: subtotal, + tax: tax, + total: total, + rate: effective_rate(region), + region: region + } + end + + @doc """ + Sums the total tax liability across a list of transactions. + + Each transaction map must have a `:tax` key with a `Decimal` value. + """ + @spec annual_liability([map()]) :: Decimal.t() + def annual_liability(transactions) do + transactions + |> Enum.map(& &1.tax) + |> Enum.reduce(Decimal.new(0), &Decimal.add/2) + end + + defp apply_exemptions(items, region) do + Enum.map(items, fn item -> + if exempt?(item.category, region), do: Map.put(item, :tax, Decimal.new(0)), else: item + end) + end + + defp apply_tax_rate(%{tax: _already_set} = item, _rate), do: item + defp apply_tax_rate(item, rate), do: Map.put(item, :tax, Decimal.mult(item.price, rate)) + + defp fetch_rate(region) do + case Repo.get_by(Rate, region: region) do + %Rate{rate: rate} -> rate + nil -> Decimal.new("0.10") + end + end +end diff --git a/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/bad/sync.ex b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/bad/sync.ex new file mode 100644 index 0000000..4cfb397 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/bad/sync.ex @@ -0,0 +1,95 @@ +defmodule MyApp.Sync do + @moduledoc """ + Data synchronization service for reconciling records with an external system. + """ + + alias MyApp.Repo + alias MyApp.Sync.Record + alias MyApp.ExternalAPI + + @spec sync_all() :: {:ok, map()} | {:error, String.t()} + def sync_all() do + # TODO: add pagination support so we don't fetch all records at once + records = Repo.all(Record) + results = Enum.map(records, &sync_record/1) + + %{ + total: length(results), + success: Enum.count(results, &match?({:ok, _}, &1)), + failed: Enum.count(results, &match?({:error, _}, &1)) + } + |> then(&{:ok, &1}) + end + + @spec sync_record(Record.t()) :: {:ok, Record.t()} | {:error, String.t()} + def sync_record(%Record{} = record) do + # TODO: handle rate limiting from ExternalAPI + case ExternalAPI.push(record.external_id, record_payload(record)) do + {:ok, response} -> + # TODO: parse and store the response metadata + update_synced_at(record, response) + + {:error, reason} -> + {:error, "ExternalAPI error: #{reason}"} + end + end + + @spec pull_updates(DateTime.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} + def pull_updates(since) do + # TODO: implement delta sync — currently fetches everything + case ExternalAPI.list_updated(since) do + {:ok, items} -> + count = + items + |> Enum.map(&upsert_from_external/1) + |> Enum.count(&match?({:ok, _}, &1)) + + {:ok, count} + + {:error, reason} -> + {:error, reason} + end + end + + @spec conflict_resolution(Record.t(), map()) :: {:ok, Record.t()} + def conflict_resolution(%Record{} = local, remote) do + # TODO: implement proper conflict resolution strategy (last-write-wins vs merge) + if DateTime.compare(local.updated_at, remote["updated_at"]) == :gt do + {:ok, local} + else + upsert_from_external(remote) + end + end + + @spec status() :: map() + def status() do + # TODO: add last_error_at tracking + total = Repo.aggregate(Record, :count) + synced = Repo.aggregate(from(r in Record, where: not is_nil(r.synced_at)), :count) + + %{ + total: total, + synced: synced, + pending: total - synced + } + end + + defp record_payload(%Record{} = record) do + %{id: record.external_id, data: record.payload, version: record.version} + end + + defp update_synced_at(record, _response) do + record + |> Record.changeset(%{synced_at: DateTime.utc_now()}) + |> Repo.update() + end + + defp upsert_from_external(%{"id" => ext_id} = data) do + attrs = %{external_id: ext_id, payload: data, synced_at: DateTime.utc_now()} + + case Repo.get_by(Record, external_id: ext_id) do + nil -> Repo.insert(Record.changeset(%Record{}, attrs)) + record -> Repo.update(Record.changeset(record, attrs)) + end + end +end diff --git a/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/config.yml b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/config.yml new file mode 100644 index 0000000..d1cd973 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/config.yml @@ -0,0 +1 @@ +doc: "Functions should not contain TODO/FIXME comments indicating unfinished work." diff --git a/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/good/sync.ex b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/good/sync.ex new file mode 100644 index 0000000..08778c0 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/function_todo_comment_in_body/good/sync.ex @@ -0,0 +1,104 @@ +defmodule MyApp.Sync do + @moduledoc """ + Data synchronization service for reconciling records with an external system. + + Supports full sync via `sync_all/0`, incremental pull via `pull_updates/1`, + and per-record sync via `sync_record/1`. Conflict resolution uses a + last-write-wins strategy based on `updated_at` timestamps. + """ + + alias MyApp.Repo + alias MyApp.Sync.Record + alias MyApp.ExternalAPI + + @page_size 100 + + @spec sync_all() :: {:ok, map()} | {:error, String.t()} + def sync_all() do + results = + stream_all_records() + |> Enum.map(&sync_record/1) + + summary = %{ + total: length(results), + success: Enum.count(results, &match?({:ok, _}, &1)), + failed: Enum.count(results, &match?({:error, _}, &1)) + } + + {:ok, summary} + end + + @spec sync_record(Record.t()) :: {:ok, Record.t()} | {:error, String.t()} + def sync_record(%Record{} = record) do + case ExternalAPI.push(record.external_id, record_payload(record)) do + {:ok, response} -> update_synced_at(record, response) + {:error, reason} -> {:error, "ExternalAPI error: #{reason}"} + end + end + + @spec pull_updates(DateTime.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} + def pull_updates(since) do + case ExternalAPI.list_updated(since) do + {:ok, items} -> + count = + items + |> Enum.map(&upsert_from_external/1) + |> Enum.count(&match?({:ok, _}, &1)) + + {:ok, count} + + {:error, reason} -> + {:error, reason} + end + end + + @spec conflict_resolution(Record.t(), map()) :: {:ok, Record.t()} + def conflict_resolution(%Record{} = local, remote) do + if DateTime.compare(local.updated_at, remote["updated_at"]) == :gt do + {:ok, local} + else + upsert_from_external(remote) + end + end + + @spec status() :: map() + def status() do + import Ecto.Query, only: [from: 2] + + total = Repo.aggregate(Record, :count) + synced = Repo.aggregate(from(r in Record, where: not is_nil(r.synced_at)), :count) + + %{ + total: total, + synced: synced, + pending: total - synced + } + end + + defp stream_all_records() do + import Ecto.Query, only: [from: 2] + + Repo.all(from r in Record, order_by: r.id) + |> Stream.chunk_every(@page_size) + |> Stream.flat_map(& &1) + end + + defp record_payload(%Record{} = record) do + %{id: record.external_id, data: record.payload, version: record.version} + end + + defp update_synced_at(record, _response) do + record + |> Record.changeset(%{synced_at: DateTime.utc_now()}) + |> Repo.update() + end + + defp upsert_from_external(%{"id" => ext_id} = data) do + attrs = %{external_id: ext_id, payload: data, synced_at: DateTime.utc_now()} + + case Repo.get_by(Record, external_id: ext_id) do + nil -> Repo.insert(Record.changeset(%Record{}, attrs)) + record -> Repo.update(Record.changeset(record, attrs)) + end + end +end diff --git a/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/bad/payments.ex b/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/bad/payments.ex new file mode 100644 index 0000000..2f886e0 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/bad/payments.ex @@ -0,0 +1,81 @@ +defmodule MyApp.Payments do + # Bad: no @moduledoc — the module's purpose, conventions, and usage + # are completely undiscoverable without reading the full source. + + alias MyApp.Payments.{Charge, Refund} + alias MyApp.Repo + + # Bad: no @doc on a public function. Callers cannot use `h MyApp.Payments.charge/3` + # in IEx, and documentation tools will not generate an entry for this function. + @spec charge(integer(), pos_integer(), :usd | :eur | :gbp) :: + {:ok, Charge.t()} | {:error, atom()} + def charge(customer_id, amount, currency) + when is_integer(amount) and amount > 0 do + with {:ok, pm} <- fetch_default_payment_method(customer_id), + {:ok, result} <- MyApp.PaymentGateway.charge(pm.token, amount, currency) do + insert_charge(customer_id, amount, currency, result.transaction_id) + end + end + + # Bad: no @doc on this public function either + @spec refund(integer(), pos_integer()) :: {:ok, Refund.t()} | {:error, atom()} + def refund(charge_id, amount) when is_integer(amount) and amount > 0 do + with {:ok, charge} <- fetch_charge(charge_id), + :ok <- validate_refund_amount(charge, amount), + {:ok, result} <- MyApp.PaymentGateway.refund(charge.transaction_id, amount) do + insert_refund(charge, amount, result.refund_id) + end + end + + # Bad: `list_charges/2` is public but completely undocumented. + # What does `opts` accept? What order are results in? + def list_charges(customer_id, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + offset = Keyword.get(opts, :offset, 0) + + Repo.all( + from c in Charge, + where: c.customer_id == ^customer_id, + order_by: [desc: c.inserted_at], + limit: ^limit, + offset: ^offset + ) + end + + defp fetch_default_payment_method(customer_id) do + case Repo.get_by(MyApp.Payments.PaymentMethod, customer_id: customer_id, default: true) do + nil -> {:error, :no_payment_method} + pm -> {:ok, pm} + end + end + + defp fetch_charge(charge_id) do + case Repo.get(Charge, charge_id) do + nil -> {:error, :charge_not_found} + charge -> {:ok, charge} + end + end + + defp validate_refund_amount(%Charge{amount: orig}, amount) when amount > orig do + {:error, :exceeds_original} + end + + defp validate_refund_amount(_, _), do: :ok + + defp insert_charge(customer_id, amount, currency, transaction_id) do + %Charge{} + |> Charge.changeset(%{ + customer_id: customer_id, + amount: amount, + currency: currency, + transaction_id: transaction_id + }) + |> Repo.insert() + end + + defp insert_refund(charge, amount, refund_id) do + %Refund{} + |> Refund.changeset(%{charge_id: charge.id, amount: amount, refund_id: refund_id}) + |> Repo.insert() + end +end diff --git a/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/good/payments.ex b/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/good/payments.ex new file mode 100644 index 0000000..2d87c50 --- /dev/null +++ b/priv/combined_metrics/samples/documentation/public_api_has_moduledoc_and_doc/good/payments.ex @@ -0,0 +1,109 @@ +defmodule MyApp.Payments do + @moduledoc """ + Public API for processing payments and managing charges. + + All monetary amounts are in the smallest currency unit (e.g. cents for USD). + Currency codes follow ISO 4217 (e.g. `:usd`, `:eur`). + + ## Usage + + {:ok, charge} = MyApp.Payments.charge(customer_id, 2999, :usd) + {:ok, _} = MyApp.Payments.refund(charge.id, 2999) + """ + + alias MyApp.Payments.{Charge, Refund} + alias MyApp.Repo + + @doc """ + Creates a charge against a customer's default payment method. + + `amount` must be a positive integer in the smallest currency unit. + `currency` must be one of `:usd`, `:eur`, or `:gbp`. + + Returns `{:ok, charge}` on success, or `{:error, reason}` when the + customer has no payment method, the card is declined, or validation fails. + """ + @spec charge(integer(), pos_integer(), :usd | :eur | :gbp) :: + {:ok, Charge.t()} | {:error, atom()} + def charge(customer_id, amount, currency) + when is_integer(amount) and amount > 0 do + with {:ok, pm} <- fetch_default_payment_method(customer_id), + {:ok, result} <- MyApp.PaymentGateway.charge(pm.token, amount, currency) do + insert_charge(customer_id, amount, currency, result.transaction_id) + end + end + + @doc """ + Refunds a charge fully or partially. + + `amount` must not exceed the original charge amount. Pass the full charge + amount to issue a full refund. + + Returns `{:ok, refund}` or `{:error, :exceeds_original}` when the requested + amount is greater than the charge amount. + """ + @spec refund(integer(), pos_integer()) :: {:ok, Refund.t()} | {:error, atom()} + def refund(charge_id, amount) when is_integer(amount) and amount > 0 do + with {:ok, charge} <- fetch_charge(charge_id), + :ok <- validate_refund_amount(charge, amount), + {:ok, result} <- MyApp.PaymentGateway.refund(charge.transaction_id, amount) do + insert_refund(charge, amount, result.refund_id) + end + end + + @doc """ + Lists all charges for a customer, ordered by most recent first. + + `opts` supports `:limit` (default 20) and `:offset` (default 0). + """ + @spec list_charges(integer(), keyword()) :: [Charge.t()] + def list_charges(customer_id, opts \\ []) do + limit = Keyword.get(opts, :limit, 20) + offset = Keyword.get(opts, :offset, 0) + + Repo.all( + from c in Charge, + where: c.customer_id == ^customer_id, + order_by: [desc: c.inserted_at], + limit: ^limit, + offset: ^offset + ) + end + + defp fetch_default_payment_method(customer_id) do + case Repo.get_by(MyApp.Payments.PaymentMethod, customer_id: customer_id, default: true) do + nil -> {:error, :no_payment_method} + pm -> {:ok, pm} + end + end + + defp fetch_charge(charge_id) do + case Repo.get(Charge, charge_id) do + nil -> {:error, :charge_not_found} + charge -> {:ok, charge} + end + end + + defp validate_refund_amount(%Charge{amount: orig}, amount) when amount > orig do + {:error, :exceeds_original} + end + + defp validate_refund_amount(_, _), do: :ok + + defp insert_charge(customer_id, amount, currency, transaction_id) do + %Charge{} + |> Charge.changeset(%{ + customer_id: customer_id, + amount: amount, + currency: currency, + transaction_id: transaction_id + }) + |> Repo.insert() + end + + defp insert_refund(charge, amount, refund_id) do + %Refund{} + |> Refund.changeset(%{charge_id: charge.id, amount: amount, refund_id: refund_id}) + |> Repo.insert() + end +end diff --git a/priv/combined_metrics/samples/error_handling/catches_specific_exception/bad/file_importer.py b/priv/combined_metrics/samples/error_handling/catches_specific_exception/bad/file_importer.py new file mode 100644 index 0000000..0ab63e0 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/catches_specific_exception/bad/file_importer.py @@ -0,0 +1,66 @@ +"""File importer that reads, parses, and ingests CSV data files.""" +from __future__ import annotations + +import csv +import io +import os +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ImportResult: + filename: str + rows_imported: int + rows_skipped: int + error: Optional[str] = None + + +def read_file(path: str) -> str: + """Read a file — catches all exceptions, masking programming errors.""" + try: + with open(path, encoding="utf-8") as fh: + return fh.read() + except Exception: # too broad: hides PermissionError, MemoryError, etc. + return "" + + +def parse_csv(content: str) -> list: + """Parse CSV text — broad catch swallows malformed-data signals.""" + try: + reader = csv.DictReader(io.StringIO(content)) + return list(reader) + except Exception as e: # catches everything including KeyboardInterrupt chain + print(f"parse error: {e}") + return [] + + +def convert_row(row: dict) -> dict: + """Convert raw string values — broad except prevents surfacing schema issues.""" + try: + return { + "id": int(row["id"]), + "name": row["name"].strip(), + "amount": float(row["amount"]), + } + except Exception: # hides KeyError (missing column) vs ValueError (bad data) + return {} + + +def import_file(path: str) -> ImportResult: + """Import a CSV file — catches everything so failures are silently swallowed.""" + filename = os.path.basename(path) + try: + content = read_file(path) + rows = parse_csv(content) + imported = 0 + skipped = 0 + for row in rows: + result = convert_row(row) + if result: + imported += 1 + else: + skipped += 1 + return ImportResult(filename=filename, rows_imported=imported, rows_skipped=skipped) + except Exception as e: # outermost catch hides all failures + return ImportResult(filename=filename, rows_imported=0, rows_skipped=0, error=str(e)) diff --git a/priv/combined_metrics/samples/error_handling/catches_specific_exception/good/file_importer.py b/priv/combined_metrics/samples/error_handling/catches_specific_exception/good/file_importer.py new file mode 100644 index 0000000..790db17 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/catches_specific_exception/good/file_importer.py @@ -0,0 +1,69 @@ +"""File importer that reads, parses, and ingests CSV data files.""" +from __future__ import annotations + +import csv +import os +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ImportResult: + filename: str + rows_imported: int + rows_skipped: int + error: Optional[str] = None + + +def read_file(path: str) -> str: + """Read a file and return its contents, raising descriptive errors.""" + try: + with open(path, encoding="utf-8") as fh: + return fh.read() + except FileNotFoundError: + raise FileNotFoundError(f"Import file not found: {path}") + except PermissionError: + raise PermissionError(f"No read permission for file: {path}") + except UnicodeDecodeError as exc: + raise ValueError(f"File {path} is not valid UTF-8") from exc + + +def parse_csv(content: str) -> list[dict[str, str]]: + """Parse CSV text into a list of row dicts.""" + import io + try: + reader = csv.DictReader(io.StringIO(content)) + return list(reader) + except csv.Error as exc: + raise ValueError(f"Malformed CSV content: {exc}") from exc + + +def convert_row(row: dict[str, str]) -> dict: + """Convert raw string values to typed fields.""" + try: + return { + "id": int(row["id"]), + "name": row["name"].strip(), + "amount": float(row["amount"]), + } + except KeyError as exc: + raise ValueError(f"Missing required column: {exc}") from exc + except (TypeError, ValueError) as exc: + raise ValueError(f"Type conversion failed for row {row}: {exc}") from exc + + +def import_file(path: str) -> ImportResult: + """Import a CSV file, skipping rows that fail conversion.""" + filename = os.path.basename(path) + content = read_file(path) + rows = parse_csv(content) + + imported, skipped = 0, 0 + for row in rows: + try: + convert_row(row) + imported += 1 + except ValueError: + skipped += 1 + + return ImportResult(filename=filename, rows_imported=imported, rows_skipped=skipped) diff --git a/priv/combined_metrics/samples/error_handling/catches_typed_exception/bad/PaymentGateway.php b/priv/combined_metrics/samples/error_handling/catches_typed_exception/bad/PaymentGateway.php new file mode 100644 index 0000000..db4449b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/catches_typed_exception/bad/PaymentGateway.php @@ -0,0 +1,62 @@ +httpClient->post('/v1/charges', [ + 'amount' => $amountCents, + 'source' => $token, + 'customer_id' => $customerId, + ]); + + return [ + 'transaction_id' => $response['id'], + 'status' => $response['status'], + ]; + } catch (\Exception $e) { + // Bare \Exception catches everything — loses all error specificity + $this->logger->error("Charge failed: {$e->getMessage()}"); + return null; + } + } + + public function refund($transactionId, $amountCents) + { + try { + $response = $this->httpClient->post('/v1/refunds', [ + 'transaction_id' => $transactionId, + 'amount' => $amountCents, + ]); + + return ['refund_id' => $response['refund_id']]; + } catch (\Throwable $e) { + // \Throwable is even broader — catches Errors and Exceptions alike + $this->logger->error("Refund failed: {$e->getMessage()}"); + return false; + } + } + + public function validateCard($cardNumber, $expiry, $cvv) + { + try { + return $this->httpClient->post('/v1/validate', [ + 'number' => $cardNumber, + 'expiry' => $expiry, + 'cvv' => $cvv, + ]); + } catch (\Exception $e) { + // Swallows all exceptions — caller gets null regardless of cause + return null; + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/catches_typed_exception/good/PaymentGateway.php b/priv/combined_metrics/samples/error_handling/catches_typed_exception/good/PaymentGateway.php new file mode 100644 index 0000000..5d49240 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/catches_typed_exception/good/PaymentGateway.php @@ -0,0 +1,62 @@ +httpClient->post('/v1/charges', [ + 'amount' => $amountCents, + 'source' => $token, + 'customer_id' => $customerId, + ]); + + return new ChargeResult( + transactionId: $response['id'], + status: $response['status'] + ); + } catch (CardDeclinedException $e) { + $this->logger->info("Card declined for customer {$customerId}: {$e->getDeclineCode()}"); + throw $e; + } catch (GatewayTimeoutException $e) { + $this->logger->error("Gateway timeout for customer {$customerId}: {$e->getMessage()}"); + throw new \RuntimeException("Payment service temporarily unavailable", 0, $e); + } catch (InvalidCardException $e) { + $this->logger->warning("Invalid card for customer {$customerId}: {$e->getMessage()}"); + throw $e; + } + } + + public function refund(string $transactionId, int $amountCents): RefundResult + { + try { + $response = $this->httpClient->post('/v1/refunds', [ + 'transaction_id' => $transactionId, + 'amount' => $amountCents, + ]); + + return new RefundResult(refundId: $response['refund_id']); + } catch (TransactionNotFoundException $e) { + $this->logger->error("Refund failed — transaction not found: {$transactionId}"); + throw $e; + } catch (RefundNotAllowedException $e) { + $this->logger->warning("Refund not allowed for {$transactionId}: {$e->getMessage()}"); + throw $e; + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/bad/processor.go b/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/bad/processor.go new file mode 100644 index 0000000..6e00094 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/bad/processor.go @@ -0,0 +1,61 @@ +package processor + +import ( + "errors" + "fmt" +) + +// ProcessingError wraps an underlying error but does NOT implement Unwrap. +// This prevents errors.Is and errors.As from traversing the chain. +type ProcessingError struct { + Stage string + JobID string + Err error +} + +func (e *ProcessingError) Error() string { + return fmt.Sprintf("processing job %s at stage %q: %v", e.JobID, e.Stage, e.Err) +} + +// Missing: func (e *ProcessingError) Unwrap() error { return e.Err } + +var ErrInvalidPayload = errors.New("invalid payload") + +type Job struct { + ID string + Payload []byte +} + +type PaymentProcessor struct{} + +func (p *PaymentProcessor) Process(job Job) error { + if len(job.Payload) == 0 { + return &ProcessingError{ + Stage: "validate", + JobID: job.ID, + Err: ErrInvalidPayload, + } + } + + if err := p.execute(job); err != nil { + return &ProcessingError{ + Stage: "execute", + JobID: job.ID, + Err: err, + } + } + return nil +} + +func (p *PaymentProcessor) execute(job Job) error { + return nil +} + +// HandleJob — errors.Is returns false here because Unwrap is missing. +func HandleJob(p *PaymentProcessor, job Job) { + err := p.Process(job) + // This will never be true; the error chain cannot be traversed. + if errors.Is(err, ErrInvalidPayload) { + fmt.Println("bad payload, skip job") + } +} diff --git a/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/good/processor.go b/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/good/processor.go new file mode 100644 index 0000000..591ad1c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/custom_error_type_implements_unwrap/good/processor.go @@ -0,0 +1,63 @@ +package processor + +import ( + "errors" + "fmt" +) + +// ProcessingError wraps an underlying error and adds the stage at which it occurred. +// It implements Unwrap so errors.Is and errors.As can traverse the chain. +type ProcessingError struct { + Stage string + JobID string + Err error +} + +func (e *ProcessingError) Error() string { + return fmt.Sprintf("processing job %s at stage %q: %v", e.JobID, e.Stage, e.Err) +} + +// Unwrap allows errors.Is and errors.As to inspect the wrapped error. +func (e *ProcessingError) Unwrap() error { return e.Err } + +var ErrInvalidPayload = errors.New("invalid payload") + +type Job struct { + ID string + Payload []byte +} + +type PaymentProcessor struct{} + +func (p *PaymentProcessor) Process(job Job) error { + if len(job.Payload) == 0 { + return &ProcessingError{ + Stage: "validate", + JobID: job.ID, + Err: ErrInvalidPayload, + } + } + + if err := p.execute(job); err != nil { + return &ProcessingError{ + Stage: "execute", + JobID: job.ID, + Err: err, + } + } + return nil +} + +func (p *PaymentProcessor) execute(job Job) error { + // simulate execution + return nil +} + +// HandleJob demonstrates that errors.Is works through ProcessingError.Unwrap. +func HandleJob(p *PaymentProcessor, job Job) { + err := p.Process(job) + if errors.Is(err, ErrInvalidPayload) { + // reachable because ProcessingError implements Unwrap + fmt.Println("bad payload, skip job") + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/bad/validator_test.rs b/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/bad/validator_test.rs new file mode 100644 index 0000000..dd0e094 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/bad/validator_test.rs @@ -0,0 +1,50 @@ +// Bad: assert!(result.is_ok()) discards the error — failures show no useful info + +fn validate_email(email: &str) -> Result<(), String> { + if email.contains('@') && email.contains('.') { + Ok(()) + } else { + Err(format!("'{email}' is not a valid email address")) + } +} + +fn parse_port(s: &str) -> Result { + s.parse::().map_err(|e| format!("invalid port '{s}': {e}")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_email_passes() { + // Failure output: "assertion failed" — no indication of what went wrong + assert!(validate_email("user@example.com").is_ok()); + } + + #[test] + fn invalid_email_returns_error() { + // Only checks that it's an Err — cannot see the actual message + assert!(validate_email("not-an-email").is_err()); + } + + #[test] + fn valid_port_parses() { + let result = parse_port("8080"); + // We know it's Ok but cannot verify the actual parsed value + assert!(result.is_ok()); + } + + #[test] + fn non_numeric_port_returns_error() { + // Cannot inspect what error was returned + assert!(parse_port("abc").is_err()); + } + + #[test] + fn edge_case_port() { + // If this fails, we see nothing about what parse_port returned + assert!(parse_port("65535").is_ok()); + assert!(parse_port("65536").is_err()); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/good/validator_test.rs b/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/good/validator_test.rs new file mode 100644 index 0000000..8619e93 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_assert_result_without_value/good/validator_test.rs @@ -0,0 +1,50 @@ +// Good: tests unwrap the Result so the actual error is shown on failure + +fn validate_email(email: &str) -> Result<(), String> { + if email.contains('@') && email.contains('.') { + Ok(()) + } else { + Err(format!("'{email}' is not a valid email address")) + } +} + +fn parse_port(s: &str) -> Result { + s.parse::().map_err(|e| format!("invalid port '{s}': {e}")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_email_passes() { + // Using unwrap() shows the error message when the assertion fails + validate_email("user@example.com").unwrap(); + } + + #[test] + fn invalid_email_returns_error() { + let err = validate_email("not-an-email").unwrap_err(); + assert!(err.contains("not-an-email"), "expected email in error, got: {err}"); + } + + #[test] + fn valid_port_parses() { + let port = parse_port("8080").unwrap(); + assert_eq!(port, 8080); + } + + #[test] + fn port_zero_is_rejected() { + // parse_port("0") succeeds (0 is a valid u16), but a higher-level + // validator would reject it — show the value to understand failures + let port = parse_port("0").unwrap(); + assert_eq!(port, 0); + } + + #[test] + fn non_numeric_port_returns_error() { + let err = parse_port("abc").unwrap_err(); + assert!(err.contains("abc"), "expected input in error, got: {err}"); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/bad/FileImporter.swift b/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/bad/FileImporter.swift new file mode 100644 index 0000000..2a90f19 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/bad/FileImporter.swift @@ -0,0 +1,58 @@ +import Foundation + +struct ImportResult { + let recordsImported: Int + let sourceURL: URL +} + +class FileImporter { + + func importCSV(from url: URL) -> ImportResult? { + guard FileManager.default.fileExists(atPath: url.path) else { + return nil + } + + let contents: String + do { + contents = try String(contentsOf: url, encoding: .utf8) + } catch { + // Silently swallowed — caller has no idea what went wrong + return nil + } + + let lines = contents.components(separatedBy: .newlines).filter { !$0.isEmpty } + guard lines.count > 1 else { + return nil + } + + var successCount = 0 + + for (index, line) in lines.dropFirst().enumerated() { + do { + try processLine(line, index: index) + successCount += 1 + } catch { + // Silently skipping bad rows — data loss with no notification + continue + } + } + + // Returns a "success" even when half the rows were silently dropped + return ImportResult(recordsImported: successCount, sourceURL: url) + } + + func saveRecord(_ data: Data, to url: URL) { + do { + try data.write(to: url) + } catch { + // Error completely swallowed — caller thinks save succeeded + } + } + + private func processLine(_ line: String, index: Int) throws { + let columns = line.components(separatedBy: ",") + guard columns.count >= 3 else { + throw NSError(domain: "ImportError", code: 1) + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/good/FileImporter.swift b/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/good/FileImporter.swift new file mode 100644 index 0000000..031ad87 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_catch_and_suppress_errors/good/FileImporter.swift @@ -0,0 +1,66 @@ +import Foundation + +enum ImportError: Error { + case fileNotFound(URL) + case invalidFormat(String) + case permissionDenied + case partialFailure(succeeded: Int, failed: [Error]) +} + +struct ImportResult { + let recordsImported: Int + let sourceURL: URL +} + +class FileImporter { + private let logger: Logger + + init(logger: Logger = Logger(subsystem: "com.app", category: "importer")) { + self.logger = logger + } + + func importCSV(from url: URL) throws -> ImportResult { + guard FileManager.default.fileExists(atPath: url.path) else { + throw ImportError.fileNotFound(url) + } + + let contents: String + do { + contents = try String(contentsOf: url, encoding: .utf8) + } catch { + logger.error("Failed to read file at \(url.path): \(error)") + throw ImportError.permissionDenied + } + + let lines = contents.components(separatedBy: .newlines).filter { !$0.isEmpty } + guard lines.count > 1 else { + throw ImportError.invalidFormat("File contains no data rows") + } + + var errors: [Error] = [] + var successCount = 0 + + for (index, line) in lines.dropFirst().enumerated() { + do { + try processLine(line, index: index) + successCount += 1 + } catch { + logger.warning("Row \(index) failed: \(error)") + errors.append(error) + } + } + + if !errors.isEmpty { + throw ImportError.partialFailure(succeeded: successCount, failed: errors) + } + + return ImportResult(recordsImported: successCount, sourceURL: url) + } + + private func processLine(_ line: String, index: Int) throws { + let columns = line.components(separatedBy: ",") + guard columns.count >= 3 else { + throw ImportError.invalidFormat("Row \(index) has \(columns.count) columns, expected 3+") + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/bad/OrderProcessor.cs b/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/bad/OrderProcessor.cs new file mode 100644 index 0000000..12e1c1c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/bad/OrderProcessor.cs @@ -0,0 +1,73 @@ +using System; +using System.Data.SqlClient; +using System.IO; + +namespace OrderService +{ + public class OrderProcessor + { + private readonly IOrderRepository _repository; + private readonly ILogger _logger; + + public OrderProcessor(IOrderRepository repository, ILogger logger) + { + _repository = repository; + _logger = logger; + } + + public void ProcessOrder(int orderId) + { + try + { + var order = _repository.GetById(orderId); + ValidateOrder(order); + _repository.MarkAsProcessed(order); + } + catch (Exception ex) + { + // Catches everything — hides programming errors, thread aborts, etc. + _logger.Error("Something went wrong: " + ex.Message); + } + } + + public bool TrySaveInvoice(Order order, string path) + { + try + { + var content = GenerateInvoiceContent(order); + File.WriteAllText(path, content); + return true; + } + catch (Exception) + { + // Swallows all exceptions silently, including OutOfMemoryException + return false; + } + } + + public void FinalizeOrders() + { + try + { + var pending = _repository.GetPendingOrders(); + foreach (var order in pending) + { + _repository.Finalize(order); + } + } + catch (Exception ex) + { + // Re-throwing System.Exception as a new Exception loses the specific type + throw new Exception("Finalization failed", ex); + } + } + + private void ValidateOrder(Order order) + { + if (order.Items.Count == 0) + throw new InvalidOrderException("Order must contain at least one item"); + } + + private string GenerateInvoiceContent(Order order) => $"Invoice for order {order.Id}"; + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/good/OrderProcessor.cs b/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/good/OrderProcessor.cs new file mode 100644 index 0000000..e9614dc --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_catch_general_exception/good/OrderProcessor.cs @@ -0,0 +1,68 @@ +using System; +using System.Data.SqlClient; +using System.IO; + +namespace OrderService +{ + public class OrderProcessor + { + private readonly IOrderRepository _repository; + private readonly ILogger _logger; + + public OrderProcessor(IOrderRepository repository, ILogger logger) + { + _repository = repository; + _logger = logger; + } + + public void ProcessOrder(int orderId) + { + try + { + var order = _repository.GetById(orderId); + ValidateOrder(order); + _repository.MarkAsProcessed(order); + } + catch (SqlException ex) + { + _logger.Error("Database error while processing order {orderId}", ex); + throw new OrderProcessingException("Failed to access order data", ex); + } + catch (InvalidOrderException ex) + { + _logger.Warning("Order {orderId} failed validation: {message}", ex.Message); + throw; + } + } + + public bool TrySaveInvoice(Order order, string path) + { + try + { + var content = GenerateInvoiceContent(order); + File.WriteAllText(path, content); + return true; + } + catch (UnauthorizedAccessException ex) + { + _logger.Warning("Cannot write invoice to {path}: access denied", ex); + return false; + } + catch (IOException ex) + { + _logger.Warning("IO error writing invoice to {path}", ex); + return false; + } + } + + private void ValidateOrder(Order order) + { + if (order.Items.Count == 0) + throw new InvalidOrderException("Order must contain at least one item"); + if (order.CustomerId <= 0) + throw new InvalidOrderException("Order must be associated with a valid customer"); + } + + private string GenerateInvoiceContent(Order order) => $"Invoice for order {order.Id}"; + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_discard_errors/bad/handler.go b/priv/combined_metrics/samples/error_handling/does_not_discard_errors/bad/handler.go new file mode 100644 index 0000000..9ecb167 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_discard_errors/bad/handler.go @@ -0,0 +1,50 @@ +package handler + +import ( + "encoding/json" + "log" + "net/http" +) + +type OrderRequest struct { + UserID string `json:"user_id"` + ProductID string `json:"product_id"` + Quantity int `json:"quantity"` + Price float64 `json:"price"` +} + +type OrderResponse struct { + OrderID string `json:"order_id"` + Status string `json:"status"` +} + +type OrderService interface { + PlaceOrder(req OrderRequest) (string, error) + NotifyUser(userID, orderID string) error +} + +type OrderHandler struct { + service OrderService + logger *log.Logger +} + +func NewOrderHandler(service OrderService, logger *log.Logger) *OrderHandler { + return &OrderHandler{service: service, logger: logger} +} + +func (h *OrderHandler) PlaceOrder(w http.ResponseWriter, r *http.Request) { + var req OrderRequest + // error from Decode is discarded + _ = json.NewDecoder(r.Body).Decode(&req) + + // error from PlaceOrder is discarded + orderID, _ := h.service.PlaceOrder(req) + + // error from NotifyUser is discarded + _ = h.service.NotifyUser(req.UserID, orderID) + + resp := OrderResponse{OrderID: orderID, Status: "confirmed"} + w.Header().Set("Content-Type", "application/json") + // error from Encode is discarded + _ = json.NewEncoder(w).Encode(resp) +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_discard_errors/good/handler.go b/priv/combined_metrics/samples/error_handling/does_not_discard_errors/good/handler.go new file mode 100644 index 0000000..3b4be17 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_discard_errors/good/handler.go @@ -0,0 +1,60 @@ +package handler + +import ( + "encoding/json" + "log" + "net/http" +) + +type OrderRequest struct { + UserID string `json:"user_id"` + ProductID string `json:"product_id"` + Quantity int `json:"quantity"` + Price float64 `json:"price"` +} + +type OrderResponse struct { + OrderID string `json:"order_id"` + Status string `json:"status"` +} + +type OrderService interface { + PlaceOrder(req OrderRequest) (string, error) + NotifyUser(userID, orderID string) error +} + +type OrderHandler struct { + service OrderService + logger *log.Logger +} + +func NewOrderHandler(service OrderService, logger *log.Logger) *OrderHandler { + return &OrderHandler{service: service, logger: logger} +} + +func (h *OrderHandler) PlaceOrder(w http.ResponseWriter, r *http.Request) { + var req OrderRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + h.logger.Printf("failed to decode request: %v", err) + http.Error(w, "invalid request body", http.StatusBadRequest) + return + } + + orderID, err := h.service.PlaceOrder(req) + if err != nil { + h.logger.Printf("failed to place order for user %s: %v", req.UserID, err) + http.Error(w, "failed to place order", http.StatusInternalServerError) + return + } + + if err := h.service.NotifyUser(req.UserID, orderID); err != nil { + h.logger.Printf("failed to notify user %s for order %s: %v", req.UserID, orderID, err) + // notification failure is non-fatal; continue + } + + resp := OrderResponse{OrderID: orderID, Status: "confirmed"} + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(resp); err != nil { + h.logger.Printf("failed to encode response: %v", err) + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/bad/gateway.go b/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/bad/gateway.go new file mode 100644 index 0000000..5394b67 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/bad/gateway.go @@ -0,0 +1,50 @@ +package gateway + +import ( + "context" + "encoding/json" + "fmt" + "net/http" +) + +// ShipmentStatus is returned by the shipping gateway. +type ShipmentStatus struct { + TrackingID string + State string +} + +// ShippingGateway calls an external carrier API. +type ShippingGateway struct { + base string + client *http.Client +} + +func New(base string) *ShippingGateway { + return &ShippingGateway{base: base, client: &http.Client{}} +} + +// TrackShipment retrieves the current status of a shipment. +// Using %w exposes internal http, net/url, and json error types to callers, +// leaking implementation details across the abstraction boundary. +func (g *ShippingGateway) TrackShipment(ctx context.Context, trackingID string) (*ShipmentStatus, error) { + url := fmt.Sprintf("%s/shipments/%s", g.base, trackingID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + // %w leaks *url.Error and http internals to callers. + return nil, fmt.Errorf("track shipment %q: build request: %w", trackingID, err) + } + + resp, err := g.client.Do(req) + if err != nil { + // %w propagates net/http transport types; callers now depend on them. + return nil, fmt.Errorf("track shipment %q: call carrier api: %w", trackingID, err) + } + defer resp.Body.Close() + + var status ShipmentStatus + if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + // %w exposes *json.SyntaxError / *json.UnmarshalTypeError to callers. + return nil, fmt.Errorf("track shipment %q: decode response: %w", trackingID, err) + } + return &status, nil +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/good/gateway.go b/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/good/gateway.go new file mode 100644 index 0000000..fa7fc27 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_expose_implementation_errors/good/gateway.go @@ -0,0 +1,50 @@ +package gateway + +import ( + "context" + "encoding/json" + "fmt" + "net/http" +) + +// ShipmentStatus is returned by the shipping gateway. +type ShipmentStatus struct { + TrackingID string + State string +} + +// ShippingGateway calls an external carrier API. +type ShippingGateway struct { + base string + client *http.Client +} + +func New(base string) *ShippingGateway { + return &ShippingGateway{base: base, client: &http.Client{}} +} + +// TrackShipment retrieves the current status of a shipment. +// Internal HTTP and JSON errors are wrapped with %v to avoid leaking +// implementation details to callers above this abstraction layer. +func (g *ShippingGateway) TrackShipment(ctx context.Context, trackingID string) (*ShipmentStatus, error) { + url := fmt.Sprintf("%s/shipments/%s", g.base, trackingID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + // %v instead of %w: callers should not depend on http.Request internals. + return nil, fmt.Errorf("track shipment %q: build request: %v", trackingID, err) + } + + resp, err := g.client.Do(req) + if err != nil { + // %v prevents leaking net/url or transport error types. + return nil, fmt.Errorf("track shipment %q: call carrier api: %v", trackingID, err) + } + defer resp.Body.Close() + + var status ShipmentStatus + if err := json.NewDecoder(resp.Body).Decode(&status); err != nil { + // %v hides JSON parsing internals from callers. + return nil, fmt.Errorf("track shipment %q: decode response: %v", trackingID, err) + } + return &status, nil +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/bad/NetworkClient.swift b/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/bad/NetworkClient.swift new file mode 100644 index 0000000..49710b7 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/bad/NetworkClient.swift @@ -0,0 +1,57 @@ +import Foundation + +struct APIResponse: Decodable { + let data: T + let statusCode: Int +} + +enum NetworkError: Error { + case invalidURL + case noData + case decodingFailed(Error) +} + +class NetworkClient { + private let session: URLSession + private let baseURL: URL + + init(baseURL: URL, session: URLSession = .shared) { + self.session = session + self.baseURL = baseURL + } + + func fetch( + path: String, + completion: @escaping (Result) -> Void + ) { + // Force unwrap: crashes if path is not a valid URL + let url = URL(string: path, relativeTo: baseURL)! + + session.dataTask(with: url) { data, response, error in + // Force unwrap: crashes if data is nil + let responseData = data! + + do { + let decoded = try JSONDecoder().decode(T.self, from: responseData) + completion(.success(decoded)) + } catch { + completion(.failure(.decodingFailed(error))) + } + }.resume() + } + + func buildURL(for path: String) -> URL { + // Force unwrap: crashes on invalid input + return URL(string: path, relativeTo: baseURL)! + } + + func headerValue(for key: String, in response: HTTPURLResponse) -> String { + // Force unwrap: crashes if header is absent + return response.value(forHTTPHeaderField: key)! + } + + func firstComponent(of url: URL) -> String { + // Force unwrap: crashes if pathComponents is empty + return url.pathComponents.first! + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/good/NetworkClient.swift b/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/good/NetworkClient.swift new file mode 100644 index 0000000..242e54e --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_force_unwrap_optionals/good/NetworkClient.swift @@ -0,0 +1,66 @@ +import Foundation + +struct APIResponse: Decodable { + let data: T + let statusCode: Int +} + +enum NetworkError: Error { + case invalidURL + case noData + case decodingFailed(Error) + case unexpectedStatusCode(Int) +} + +class NetworkClient { + private let session: URLSession + private let baseURL: URL + + init(baseURL: URL, session: URLSession = .shared) { + self.session = session + self.baseURL = baseURL + } + + func fetch( + path: String, + completion: @escaping (Result) -> Void + ) { + guard let url = URL(string: path, relativeTo: baseURL) else { + completion(.failure(.invalidURL)) + return + } + + session.dataTask(with: url) { data, response, error in + if let error = error { + completion(.failure(.decodingFailed(error))) + return + } + + guard let data = data else { + completion(.failure(.noData)) + return + } + + if let httpResponse = response as? HTTPURLResponse, + !(200..<300).contains(httpResponse.statusCode) { + completion(.failure(.unexpectedStatusCode(httpResponse.statusCode))) + return + } + + do { + let decoded = try JSONDecoder().decode(T.self, from: data) + completion(.success(decoded)) + } catch { + completion(.failure(.decodingFailed(error))) + } + }.resume() + } + + func buildURL(for path: String) -> URL? { + return URL(string: path, relativeTo: baseURL) + } + + func headerValue(for key: String, in response: HTTPURLResponse) -> String? { + return response.value(forHTTPHeaderField: key) + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/bad/UserRepository.cs b/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/bad/UserRepository.cs new file mode 100644 index 0000000..64fcb35 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/bad/UserRepository.cs @@ -0,0 +1,98 @@ +using System; +using System.Collections.Generic; +using System.Data.SqlClient; + +namespace UserService +{ + public class UserRepository + { + private readonly string _connectionString; + + public UserRepository(string connectionString) + { + _connectionString = connectionString; + } + + // Returns -1 on failure instead of throwing + public int GetById(int userId, out User user) + { + user = null; + try + { + using var connection = new SqlConnection(_connectionString); + connection.Open(); + var command = new SqlCommand("SELECT * FROM Users WHERE Id = @id", connection); + command.Parameters.AddWithValue("@id", userId); + + using var reader = command.ExecuteReader(); + if (!reader.Read()) + return -1; // not found + + user = MapUser(reader); + return 0; // success + } + catch (SqlException) + { + return -2; // database error + } + } + + // Returns false on failure — caller can't distinguish why it failed + public bool Create(User user) + { + if (user == null) return false; + if (string.IsNullOrWhiteSpace(user.Email)) return false; + + try + { + using var connection = new SqlConnection(_connectionString); + connection.Open(); + + var checkCmd = new SqlCommand( + "SELECT COUNT(1) FROM Users WHERE Email = @email", connection); + checkCmd.Parameters.AddWithValue("@email", user.Email); + if ((int)checkCmd.ExecuteScalar() > 0) + return false; // duplicate email, but caller doesn't know that + + var command = new SqlCommand( + "INSERT INTO Users (Email, Name) VALUES (@email, @name)", connection); + command.Parameters.AddWithValue("@email", user.Email); + command.Parameters.AddWithValue("@name", user.Name); + command.ExecuteNonQuery(); + return true; + } + catch (SqlException) + { + return false; + } + } + + // Returns null to signal "no users" or "error" — ambiguous + public List GetByRole(string role) + { + if (string.IsNullOrWhiteSpace(role)) return null; + + try + { + using var connection = new SqlConnection(_connectionString); + connection.Open(); + var command = new SqlCommand("SELECT * FROM Users WHERE Role = @role", connection); + command.Parameters.AddWithValue("@role", role); + + var users = new List(); + using var reader = command.ExecuteReader(); + while (reader.Read()) + users.Add(MapUser(reader)); + + return users; + } + catch (SqlException) + { + return null; // error or empty — caller cannot tell the difference + } + } + + private User MapUser(SqlDataReader reader) => + new User(reader.GetInt32(0), reader.GetString(1), reader.GetString(2)); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/good/UserRepository.cs b/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/good/UserRepository.cs new file mode 100644 index 0000000..f113b5d --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_return_error_codes/good/UserRepository.cs @@ -0,0 +1,78 @@ +using System; +using System.Collections.Generic; +using System.Data.SqlClient; + +namespace UserService +{ + public class UserRepository + { + private readonly string _connectionString; + + public UserRepository(string connectionString) + { + _connectionString = connectionString; + } + + public User GetById(int userId) + { + using var connection = new SqlConnection(_connectionString); + connection.Open(); + var command = new SqlCommand("SELECT * FROM Users WHERE Id = @id", connection); + command.Parameters.AddWithValue("@id", userId); + + using var reader = command.ExecuteReader(); + if (!reader.Read()) + throw new UserNotFoundException($"User with ID {userId} does not exist."); + + return MapUser(reader); + } + + public void Create(User user) + { + if (user == null) throw new ArgumentNullException(nameof(user)); + if (string.IsNullOrWhiteSpace(user.Email)) + throw new ArgumentException("Email is required.", nameof(user)); + + using var connection = new SqlConnection(_connectionString); + connection.Open(); + + if (EmailExists(connection, user.Email)) + throw new DuplicateEmailException($"Email '{user.Email}' is already registered."); + + var command = new SqlCommand( + "INSERT INTO Users (Email, Name) VALUES (@email, @name)", connection); + command.Parameters.AddWithValue("@email", user.Email); + command.Parameters.AddWithValue("@name", user.Name); + command.ExecuteNonQuery(); + } + + public IReadOnlyList GetByRole(string role) + { + if (string.IsNullOrWhiteSpace(role)) + throw new ArgumentException("Role must not be empty.", nameof(role)); + + using var connection = new SqlConnection(_connectionString); + connection.Open(); + var command = new SqlCommand("SELECT * FROM Users WHERE Role = @role", connection); + command.Parameters.AddWithValue("@role", role); + + var users = new List(); + using var reader = command.ExecuteReader(); + while (reader.Read()) + users.Add(MapUser(reader)); + + return users.AsReadOnly(); + } + + private bool EmailExists(SqlConnection connection, string email) + { + var command = new SqlCommand( + "SELECT COUNT(1) FROM Users WHERE Email = @email", connection); + command.Parameters.AddWithValue("@email", email); + return (int)command.ExecuteScalar() > 0; + } + + private User MapUser(SqlDataReader reader) => + new User(reader.GetInt32(0), reader.GetString(1), reader.GetString(2)); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/bad/file_processor.ex b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/bad/file_processor.ex new file mode 100644 index 0000000..e1c017f --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/bad/file_processor.ex @@ -0,0 +1,85 @@ +defmodule FileProcessor do + @moduledoc """ + Processes uploaded files and extracts their contents. + """ + + def process_file(path) do + try do + contents = File.read!(path) + parsed = parse_contents(contents) + {:ok, parsed} + rescue + _ -> nil + end + end + + def read_csv(path) do + try do + path + |> File.stream!() + |> Enum.map(&String.trim/1) + |> Enum.map(&parse_csv_row/1) + rescue + e -> false + end + end + + def extract_metadata(path) do + try do + stat = File.stat!(path) + %{size: stat.size, modified: stat.mtime} + catch + _, _ -> nil + end + end + + def batch_process(paths) do + Enum.map(paths, fn path -> + try do + process_file(path) + rescue + _ -> nil + end + end) + end + + def validate_and_process(path) do + try do + if File.exists?(path) do + process_file(path) + else + {:error, :not_found} + end + rescue + _ -> false + end + end + + def compress_file(path, dest) do + try do + contents = File.read!(path) + compressed = :zlib.compress(contents) + File.write!(dest, compressed) + :ok + rescue + _ -> nil + end + end + + def delete_processed(path) do + try do + File.rm!(path) + :ok + catch + _, _ -> false + end + end + + defp parse_contents(contents) do + String.split(contents, "\n") + end + + defp parse_csv_row(row) do + String.split(row, ",") + end +end diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/config.yml b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/config.yml new file mode 100644 index 0000000..9db16ed --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/config.yml @@ -0,0 +1 @@ +doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures." diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/good/file_processor.ex b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/good/file_processor.ex new file mode 100644 index 0000000..6647609 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors/good/file_processor.ex @@ -0,0 +1,98 @@ +defmodule FileProcessor do + @moduledoc """ + Processes uploaded files and extracts their contents. + """ + + require Logger + + def process_file(path) do + try do + contents = File.read!(path) + parsed = parse_contents(contents) + {:ok, parsed} + rescue + e in File.Error -> + Logger.error("Failed to read file at #{path}: #{Exception.message(e)}") + {:error, {:read_failed, path}} + end + end + + def read_csv(path) do + try do + rows = + path + |> File.stream!() + |> Enum.map(&String.trim/1) + |> Enum.map(&parse_csv_row/1) + + {:ok, rows} + rescue + e in File.Error -> + Logger.error("CSV read failed for #{path}: #{Exception.message(e)}") + {:error, {:csv_read_failed, path}} + end + end + + def extract_metadata(path) do + try do + stat = File.stat!(path) + {:ok, %{size: stat.size, modified: stat.mtime}} + rescue + e in File.Error -> + Logger.warning("Could not stat file #{path}: #{Exception.message(e)}") + {:error, {:stat_failed, path}} + end + end + + def batch_process(paths) do + Enum.map(paths, fn path -> + case process_file(path) do + {:ok, result} -> {:ok, result} + {:error, reason} -> + Logger.warning("Skipping #{path} due to error: #{inspect(reason)}") + {:error, reason} + end + end) + end + + def validate_and_process(path) do + if File.exists?(path) do + process_file(path) + else + Logger.warning("Attempted to process non-existent file: #{path}") + {:error, {:file_not_found, path}} + end + end + + def compress_file(path, dest) do + try do + contents = File.read!(path) + compressed = :zlib.compress(contents) + File.write!(dest, compressed) + :ok + rescue + e in File.Error -> + Logger.error("Compression failed for #{path} -> #{dest}: #{Exception.message(e)}") + reraise e, __STACKTRACE__ + end + end + + def delete_processed(path) do + case File.rm(path) do + :ok -> + Logger.info("Deleted processed file: #{path}") + :ok + {:error, reason} -> + Logger.error("Failed to delete #{path}: #{inspect(reason)}") + {:error, {:delete_failed, reason}} + end + end + + defp parse_contents(contents) do + String.split(contents, "\n") + end + + defp parse_csv_row(row) do + String.split(row, ",") + end +end diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/bad/event_bus.js b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/bad/event_bus.js new file mode 100644 index 0000000..677dcc5 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/bad/event_bus.js @@ -0,0 +1,61 @@ +class EventBus { + constructor() { + this._handlers = new Map(); + } + + subscribe(eventName, handler) { + if (!this._handlers.has(eventName)) { + this._handlers.set(eventName, []); + } + this._handlers.get(eventName).push(handler); + } + + unsubscribe(eventName, handler) { + try { + const handlers = this._handlers.get(eventName); + const index = handlers.indexOf(handler); + handlers.splice(index, 1); + } catch (e) { + } + } + + async publish(eventName, payload) { + const handlers = this._handlers.get(eventName) ?? []; + + for (const handler of handlers) { + try { + await handler(payload); + } catch (e) { + } + } + } + + async publishAll(events) { + for (const { name, payload } of events) { + try { + await this.publish(name, payload); + } catch { + } + } + } +} + +async function initializeAnalytics(bus) { + try { + await bus.publish("analytics:init", { timestamp: Date.now() }); + } catch (e) { + } +} + +async function loadUserPreferences(userId, bus) { + try { + const prefs = await fetch(`/api/users/${userId}/preferences`).then((r) => + r.json() + ); + await bus.publish("preferences:loaded", prefs); + } catch (err) { + } +} + +const bus = new EventBus(); +export { bus, EventBus, initializeAnalytics, loadUserPreferences }; diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/good/event_bus.js b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/good/event_bus.js new file mode 100644 index 0000000..2c13f6a --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_errors_silently/good/event_bus.js @@ -0,0 +1,66 @@ +import logger from "./logger.js"; + +class EventBus { + constructor() { + this._handlers = new Map(); + this._deadLetterQueue = []; + } + + subscribe(eventName, handler) { + if (!this._handlers.has(eventName)) { + this._handlers.set(eventName, []); + } + this._handlers.get(eventName).push(handler); + } + + unsubscribe(eventName, handler) { + const handlers = this._handlers.get(eventName); + if (!handlers) return; + const index = handlers.indexOf(handler); + if (index !== -1) { + handlers.splice(index, 1); + } + } + + async publish(eventName, payload) { + const handlers = this._handlers.get(eventName) ?? []; + + for (const handler of handlers) { + try { + await handler(payload); + } catch (err) { + logger.error( + `EventBus: handler for '${eventName}' threw an error`, + err + ); + this._deadLetterQueue.push({ eventName, payload, error: err, ts: Date.now() }); + } + } + } + + async publishOrFail(eventName, payload) { + const handlers = this._handlers.get(eventName) ?? []; + + for (const handler of handlers) { + await handler(payload); + } + } + + drainDeadLetterQueue() { + const items = [...this._deadLetterQueue]; + this._deadLetterQueue.length = 0; + return items; + } +} + +async function initializeAnalytics(bus) { + try { + await bus.publish("analytics:init", { timestamp: Date.now() }); + } catch (err) { + // Analytics is non-critical; log and continue application startup + logger.warn("Analytics initialization failed, proceeding without it", err); + } +} + +const bus = new EventBus(); +export { bus, EventBus, initializeAnalytics }; diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/bad/OrderService.java b/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/bad/OrderService.java new file mode 100644 index 0000000..388899c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/bad/OrderService.java @@ -0,0 +1,69 @@ +package com.example.orders; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.logging.Logger; + +public class OrderService { + + private static final Logger logger = Logger.getLogger(OrderService.class.getName()); + + private final OrderRepository repository; + private final PaymentGateway paymentGateway; + + public OrderService(OrderRepository repository, PaymentGateway paymentGateway) { + this.repository = repository; + this.paymentGateway = paymentGateway; + } + + public Order placeOrder(Cart cart, PaymentDetails payment) { + Order order = Order.from(cart); + + try { + paymentGateway.charge(payment, order.totalAmount()); + } catch (PaymentDeclinedException e) { + // silently swallowed — caller will never know the charge failed + } catch (IOException e) { + // silently swallowed — network errors are completely hidden + } + + try { + repository.save(order); + } catch (SQLException e) { + // silently swallowed — order may not have been saved at all + } + + return order; + } + + public void cancelOrder(String orderId) { + Order order = null; + try { + order = repository.findById(orderId); + } catch (SQLException e) { + // silently swallowed — order is null but execution continues + } + + if (order == null) { + return; + } + + order.cancel(); + + try { + repository.update(order); + } catch (SQLException e) { + // silently swallowed — cancellation may not have been persisted + } + } + + public double getOrderTotal(String orderId) { + try { + Order order = repository.findById(orderId); + return order.totalAmount(); + } catch (Exception e) { + // catch-all swallowed; returns 0 as if the order doesn't exist + return 0.0; + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/good/OrderService.java b/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/good/OrderService.java new file mode 100644 index 0000000..2a9655c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_swallow_exceptions/good/OrderService.java @@ -0,0 +1,73 @@ +package com.example.orders; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class OrderService { + + private static final Logger logger = Logger.getLogger(OrderService.class.getName()); + + private final OrderRepository repository; + private final PaymentGateway paymentGateway; + + public OrderService(OrderRepository repository, PaymentGateway paymentGateway) { + this.repository = repository; + this.paymentGateway = paymentGateway; + } + + public Order placeOrder(Cart cart, PaymentDetails payment) { + Order order = Order.from(cart); + + try { + paymentGateway.charge(payment, order.totalAmount()); + } catch (PaymentDeclinedException e) { + // Payment was declined by the gateway; surface this to the caller + // so they can prompt the user to retry with different details. + throw new OrderPlacementException("Payment declined: " + e.getReason(), e); + } catch (IOException e) { + // Network error communicating with the payment gateway. + // Log at ERROR level and rethrow so the caller can handle retries. + logger.log(Level.SEVERE, "Network failure while charging payment for order", e); + throw new OrderPlacementException("Payment gateway unreachable", e); + } + + try { + repository.save(order); + } catch (SQLException e) { + // Database write failed after successful payment — log with order + // context so support can reconcile the charge manually. + logger.log(Level.SEVERE, "Failed to persist order after successful payment: orderId=" + order.getId(), e); + throw new OrderPlacementException("Order could not be saved", e); + } + + return order; + } + + public void cancelOrder(String orderId) { + Order order; + try { + order = repository.findById(orderId); + } catch (SQLException e) { + // Could not load order from the database; rethrow with context. + logger.log(Level.WARNING, "Database error looking up order: " + orderId, e); + throw new OrderServiceException("Unable to retrieve order " + orderId, e); + } + + if (order == null) { + throw new OrderNotFoundException(orderId); + } + + order.cancel(); + + try { + repository.update(order); + } catch (SQLException e) { + // Persisting the cancellation status failed; rethrow so the caller + // knows the cancellation did not complete successfully. + logger.log(Level.SEVERE, "Failed to persist cancellation for orderId=" + orderId, e); + throw new OrderServiceException("Cancellation could not be saved", e); + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/bad/FileProcessor.cs b/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/bad/FileProcessor.cs new file mode 100644 index 0000000..473b49a --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/bad/FileProcessor.cs @@ -0,0 +1,81 @@ +using System; +using System.IO; + +namespace FileProcessing +{ + public class FileProcessor + { + private readonly ILogger _logger; + + public FileProcessor(ILogger logger) + { + _logger = logger; + } + + public string ReadAndProcess(string filePath) + { + StreamReader reader = null; + try + { + reader = new StreamReader(filePath); + var content = reader.ReadToEnd(); + return Transform(content); + } + catch (FileNotFoundException ex) + { + _logger.Error("Input file not found", ex); + throw; + } + finally + { + reader?.Close(); + // Throws from finally — suppresses the FileNotFoundException above + if (!File.Exists(filePath + ".processed")) + throw new InvalidOperationException("Processed marker missing."); + } + } + + public void ProcessBatch(string[] filePaths) + { + FileStream lockFile = null; + try + { + lockFile = AcquireLock(); + foreach (var path in filePaths) + { + ProcessSingleFile(path); + } + } + finally + { + lockFile?.Close(); + + // Validating state inside finally and throwing — bad practice. + // Any exception thrown during batch processing is now lost. + var pendingCount = CountPending(filePaths); + if (pendingCount > 0) + throw new InvalidOperationException( + $"{pendingCount} files were not processed."); + } + } + + private string Transform(string content) => content.Trim().ToUpperInvariant(); + + private void ProcessSingleFile(string path) + { + var content = File.ReadAllText(path); + File.WriteAllText(path + ".out", Transform(content)); + } + + private int CountPending(string[] filePaths) + { + int count = 0; + foreach (var p in filePaths) + if (!File.Exists(p + ".out")) count++; + return count; + } + + private FileStream AcquireLock() => + new FileStream("/tmp/processor.lock", FileMode.Create, FileAccess.Write); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/good/FileProcessor.cs b/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/good/FileProcessor.cs new file mode 100644 index 0000000..dc5510b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_throw_from_finally_block/good/FileProcessor.cs @@ -0,0 +1,84 @@ +using System; +using System.IO; + +namespace FileProcessing +{ + public class FileProcessor + { + private readonly ILogger _logger; + + public FileProcessor(ILogger logger) + { + _logger = logger; + } + + public string ReadAndProcess(string filePath) + { + StreamReader reader = null; + try + { + reader = new StreamReader(filePath); + var content = reader.ReadToEnd(); + return Transform(content); + } + catch (FileNotFoundException ex) + { + _logger.Error("Input file not found: {path}", ex); + throw; + } + finally + { + // Finally only performs cleanup — never throws + try + { + reader?.Close(); + } + catch (IOException ex) + { + // Log but do not rethrow; we must not suppress the original exception + _logger.Warning("Failed to close reader cleanly", ex); + } + } + } + + public void ProcessBatch(string[] filePaths) + { + FileStream lockFile = null; + try + { + lockFile = AcquireLock(); + foreach (var path in filePaths) + { + ProcessSingleFile(path); + } + } + finally + { + // Cleanup only — releasing the lock must not throw out of finally + if (lockFile != null) + { + try + { + lockFile.Close(); + File.Delete(lockFile.Name); + } + catch (IOException ex) + { + _logger.Warning("Lock file cleanup failed", ex); + } + } + } + } + + private string Transform(string content) => content.Trim().ToUpperInvariant(); + + private void ProcessSingleFile(string path) + { + var content = File.ReadAllText(path); + File.WriteAllText(path + ".out", Transform(content)); + } + + private FileStream AcquireLock() => + new FileStream("/tmp/processor.lock", FileMode.Create, FileAccess.Write); + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/bad/ProductCatalog.cs b/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/bad/ProductCatalog.cs new file mode 100644 index 0000000..90547ea --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/bad/ProductCatalog.cs @@ -0,0 +1,94 @@ +using System; +using System.Collections.Generic; + +namespace Catalog +{ + public class ProductCatalog + { + private readonly Dictionary _products = new(); + + // Using exception to detect "not found" as normal control flow + public Product GetProduct(string sku) + { + try + { + return _products[sku]; // throws KeyNotFoundException for missing keys + } + catch (KeyNotFoundException) + { + return null; + } + } + + // Using FormatException to drive parsing logic + public int ParseQuantity(string input) + { + try + { + return int.Parse(input); + } + catch (FormatException) + { + return 0; + } + catch (OverflowException) + { + return 0; + } + } + + // Using exception to check discount applicability + public decimal ApplyDiscount(string couponCode, decimal originalPrice) + { + try + { + decimal rate = _discountMap[couponCode]; // throws if not found + return originalPrice * (1 - rate); + } + catch (KeyNotFoundException) + { + return originalPrice; // no discount — but this is expected, not exceptional + } + } + + public decimal CalculateTotalPrice(string sku, string quantityInput, string couponCode) + { + var product = GetProduct(sku); + if (product == null) return 0m; + + int quantity = ParseQuantity(quantityInput); + if (quantity <= 0) return 0m; + + decimal lineTotal = product.UnitPrice * quantity; + return ApplyDiscount(couponCode, lineTotal); + } + + public IReadOnlyList SearchByCategory(string category) + { + var results = new List(); + try + { + foreach (var product in _products.Values) + { + // Throwing to break from nested search — very bad pattern + if (results.Count >= 50) + throw new InvalidOperationException("limit reached"); + + if (product.Category.Equals(category, StringComparison.OrdinalIgnoreCase)) + results.Add(product); + } + } + catch (InvalidOperationException) + { + // silently stop — exception used as a loop break + } + return results.AsReadOnly(); + } + + private readonly Dictionary _discountMap = new() + { + ["SAVE10"] = 0.10m, + ["SAVE20"] = 0.20m, + }; + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/good/ProductCatalog.cs b/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/good/ProductCatalog.cs new file mode 100644 index 0000000..4c79b6b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_exceptions_for_control_flow/good/ProductCatalog.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; + +namespace Catalog +{ + public class ProductCatalog + { + private readonly Dictionary _products = new(); + + public bool TryGetProduct(string sku, out Product product) + { + return _products.TryGetValue(sku, out product); + } + + public bool TryParseQuantity(string input, out int quantity) + { + return int.TryParse(input, out quantity) && quantity > 0; + } + + public bool TryApplyDiscount(string couponCode, decimal originalPrice, out decimal discountedPrice) + { + discountedPrice = originalPrice; + + if (string.IsNullOrWhiteSpace(couponCode)) + return false; + + if (!_discountMap.TryGetValue(couponCode, out decimal rate)) + return false; + + discountedPrice = originalPrice * (1 - rate); + return true; + } + + public decimal CalculateTotalPrice(string sku, string quantityInput, string couponCode) + { + if (!TryGetProduct(sku, out var product)) + return 0m; + + if (!TryParseQuantity(quantityInput, out int quantity)) + return 0m; + + decimal lineTotal = product.UnitPrice * quantity; + + if (TryApplyDiscount(couponCode, lineTotal, out decimal discounted)) + return discounted; + + return lineTotal; + } + + public IReadOnlyList SearchByCategory(string category) + { + var results = new List(); + foreach (var product in _products.Values) + { + if (product.Category.Equals(category, StringComparison.OrdinalIgnoreCase)) + results.Add(product); + } + return results.AsReadOnly(); + } + + private readonly Dictionary _discountMap = new() + { + ["SAVE10"] = 0.10m, + ["SAVE20"] = 0.20m, + }; + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_force_try/bad/DataLoader.swift b/priv/combined_metrics/samples/error_handling/does_not_use_force_try/bad/DataLoader.swift new file mode 100644 index 0000000..9e973ac --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_force_try/bad/DataLoader.swift @@ -0,0 +1,49 @@ +import Foundation + +enum DataLoaderError: Error { + case fileNotFound(String) + case decodingFailed(Error) +} + +struct UserPreferences: Codable { + var theme: String + var notificationsEnabled: Bool + var language: String +} + +class DataLoader { + private let fileManager: FileManager + private let documentsURL: URL + + init(fileManager: FileManager = .default) { + self.fileManager = fileManager + self.documentsURL = fileManager.urls(for: .documentDirectory, in: .userDomainMask)[0] + } + + func loadPreferences(from filename: String) -> UserPreferences { + let fileURL = documentsURL.appendingPathComponent(filename) + + // try! crashes the app if the file is missing or malformed + let data = try! Data(contentsOf: fileURL) + return try! JSONDecoder().decode(UserPreferences.self, from: data) + } + + func savePreferences(_ preferences: UserPreferences, to filename: String) { + let fileURL = documentsURL.appendingPathComponent(filename) + + // try! crashes if encoding or writing fails + let data = try! JSONEncoder().encode(preferences) + try! data.write(to: fileURL, options: .atomic) + } + + func loadJSON(from url: URL, as type: T.Type) -> T { + // try! on remote or file URL will crash for any network/IO error + let data = try! Data(contentsOf: url) + return try! JSONDecoder().decode(type, from: data) + } + + func parseRegex(pattern: String) -> NSRegularExpression { + // try! will crash for invalid regex patterns + return try! NSRegularExpression(pattern: pattern, options: []) + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_force_try/good/DataLoader.swift b/priv/combined_metrics/samples/error_handling/does_not_use_force_try/good/DataLoader.swift new file mode 100644 index 0000000..d7da4fc --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_force_try/good/DataLoader.swift @@ -0,0 +1,60 @@ +import Foundation + +enum DataLoaderError: Error { + case fileNotFound(String) + case decodingFailed(Error) + case encodingFailed(Error) +} + +struct UserPreferences: Codable { + var theme: String + var notificationsEnabled: Bool + var language: String +} + +class DataLoader { + private let fileManager: FileManager + private let documentsURL: URL + + init(fileManager: FileManager = .default) { + self.fileManager = fileManager + self.documentsURL = fileManager.urls(for: .documentDirectory, in: .userDomainMask)[0] + } + + func loadPreferences(from filename: String) -> Result { + let fileURL = documentsURL.appendingPathComponent(filename) + + guard fileManager.fileExists(atPath: fileURL.path) else { + return .failure(.fileNotFound(filename)) + } + + do { + let data = try Data(contentsOf: fileURL) + let preferences = try JSONDecoder().decode(UserPreferences.self, from: data) + return .success(preferences) + } catch let error as DecodingError { + return .failure(.decodingFailed(error)) + } catch { + return .failure(.decodingFailed(error)) + } + } + + func savePreferences(_ preferences: UserPreferences, to filename: String) throws { + let fileURL = documentsURL.appendingPathComponent(filename) + do { + let data = try JSONEncoder().encode(preferences) + try data.write(to: fileURL, options: .atomic) + } catch { + throw DataLoaderError.encodingFailed(error) + } + } + + func loadJSON(from url: URL, as type: T.Type) throws -> T { + do { + let data = try Data(contentsOf: url) + return try JSONDecoder().decode(type, from: data) + } catch { + throw DataLoaderError.decodingFailed(error) + } + } +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/bad/store.go b/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/bad/store.go new file mode 100644 index 0000000..807d8ef --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/bad/store.go @@ -0,0 +1,53 @@ +package store + +import ( + "context" + "database/sql" + "errors" +) + +type User struct { + ID int64 + Name string + Email string +} + +type UserStore struct { + db *sql.DB +} + +func NewUserStore(db *sql.DB) *UserStore { + return &UserStore{db: db} +} + +// FindByEmail returns the user's name or empty string if not found. +// Callers must check for empty string to detect failure. +func (s *UserStore) FindByEmail(ctx context.Context, email string) string { + row := s.db.QueryRowContext(ctx, + `SELECT name FROM users WHERE email = $1`, email) + + var name string + if err := row.Scan(&name); err != nil { + // returns sentinel "" to signal failure — callers can't distinguish + // "not found" from a real DB error + return "" + } + return name +} + +// FindByID returns the user ID or -1 if not found. +// Callers must check for -1 to detect failure. +func (s *UserStore) FindByID(ctx context.Context, id int64) int64 { + row := s.db.QueryRowContext(ctx, + `SELECT id FROM users WHERE id = $1`, id) + + var found int64 + if err := row.Scan(&found); err != nil { + if errors.Is(err, sql.ErrNoRows) { + // returns sentinel -1 to signal "not found" + return -1 + } + return -1 + } + return found +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/good/store.go b/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/good/store.go new file mode 100644 index 0000000..eaa1e8a --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_inband_error_values/good/store.go @@ -0,0 +1,56 @@ +package store + +import ( + "context" + "database/sql" + "errors" + "fmt" +) + +var ErrNotFound = errors.New("record not found") + +type User struct { + ID int64 + Name string + Email string +} + +type UserStore struct { + db *sql.DB +} + +func NewUserStore(db *sql.DB) *UserStore { + return &UserStore{db: db} +} + +// FindByEmail returns the user with the given email address. +// Returns ErrNotFound if no such user exists. +func (s *UserStore) FindByEmail(ctx context.Context, email string) (*User, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, name, email FROM users WHERE email = $1`, email) + + var u User + if err := row.Scan(&u.ID, &u.Name, &u.Email); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("find user by email %q: %w", email, err) + } + return &u, nil +} + +// FindByID returns the user with the given ID. +// Returns ErrNotFound if no such user exists. +func (s *UserStore) FindByID(ctx context.Context, id int64) (*User, error) { + row := s.db.QueryRowContext(ctx, + `SELECT id, name, email FROM users WHERE id = $1`, id) + + var u User + if err := row.Scan(&u.ID, &u.Name, &u.Email); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("find user by id %d: %w", id, err) + } + return &u, nil +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/bad/client.rs b/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/bad/client.rs new file mode 100644 index 0000000..f8747ae --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/bad/client.rs @@ -0,0 +1,48 @@ +use std::collections::HashMap; +use std::time::Duration; + +pub struct HttpClient { + base_url: String, + timeout: Duration, + headers: HashMap, +} + +impl HttpClient { + pub fn new(base_url: &str, timeout_secs: u64) -> Self { + // unwrap() in constructor: if base_url is empty this is confusing to debug + let parsed = base_url.strip_prefix("https://").unwrap(); + let _ = parsed; // not used, just demonstrating the unwrap + + Self { + base_url: base_url.to_string(), + timeout: Duration::from_secs(timeout_secs), + headers: HashMap::new(), + } + } + + pub fn set_auth_token(&mut self, token: Option<&str>) { + // unwrap() here panics if caller passes None — no graceful handling + let tok = token.unwrap(); + self.headers.insert("Authorization".to_string(), format!("Bearer {tok}")); + } + + pub fn get(&self, path: &str) -> String { + let url = format!("{}{}", self.base_url, path); + let response = self.execute(&url); + // unwrap() on production path — any error panics the whole process + response.unwrap() + } + + fn execute(&self, url: &str) -> Result { + if url.contains("unreachable") { + return Err(format!("cannot connect to {url}")); + } + Ok(format!("OK from {url}")) + } +} + +pub fn fetch_user_profile(client: &HttpClient, user_id: u64) -> String { + let path = format!("/users/{user_id}"); + // Returns a String — caller has no way to handle errors + client.get(&path) +} diff --git a/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/good/client.rs b/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/good/client.rs new file mode 100644 index 0000000..6605d25 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/does_not_use_unwrap_in_production/good/client.rs @@ -0,0 +1,64 @@ +use std::time::Duration; + +#[derive(Debug)] +pub enum ClientError { + InvalidUrl(String), + ConnectionFailed(String), + Timeout, + BadResponse { status: u16, body: String }, +} + +impl std::fmt::Display for ClientError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ClientError::InvalidUrl(u) => write!(f, "invalid URL: {u}"), + ClientError::ConnectionFailed(msg) => write!(f, "connection failed: {msg}"), + ClientError::Timeout => write!(f, "request timed out"), + ClientError::BadResponse { status, body } => { + write!(f, "unexpected status {status}: {body}") + } + } + } +} + +pub struct HttpClient { + base_url: String, + timeout: Duration, +} + +impl HttpClient { + pub fn new(base_url: impl Into, timeout_secs: u64) -> Result { + let base_url = base_url.into(); + if !base_url.starts_with("http://") && !base_url.starts_with("https://") { + return Err(ClientError::InvalidUrl(base_url)); + } + Ok(Self { + base_url, + timeout: Duration::from_secs(timeout_secs), + }) + } + + pub fn get(&self, path: &str) -> Result { + let url = format!("{}{}", self.base_url, path); + // Simulated HTTP call — real impl would use reqwest or hyper + self.execute_request(&url) + } + + fn execute_request(&self, url: &str) -> Result { + if url.contains("unreachable") { + return Err(ClientError::ConnectionFailed(format!( + "host not reachable for {url}" + ))); + } + if self.timeout < Duration::from_millis(1) { + return Err(ClientError::Timeout); + } + Ok(format!("200 OK from {url}")) + } +} + +pub fn fetch_user_profile(client: &HttpClient, user_id: u64) -> Result { + let path = format!("/users/{user_id}/profile"); + let body = client.get(&path)?; + Ok(body) +} diff --git a/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/bad/billing.ex b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/bad/billing.ex new file mode 100644 index 0000000..351c5de --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/bad/billing.ex @@ -0,0 +1,83 @@ +defmodule Billing do + @moduledoc """ + Handles billing and invoice generation. + """ + + def create_invoice(user_id, items) do + case fetch_user(user_id) do + nil -> {:error, :error} + user -> build_invoice(user, items) + end + end + + def charge_customer(customer_id, amount) do + if amount <= 0 do + raise "error" + end + + case find_payment_method(customer_id) do + nil -> {:error, ""} + method -> process_charge(method, amount) + end + end + + def apply_discount(invoice, code) do + case lookup_discount_code(code) do + nil -> {:error, :not_found} + discount -> + if discount.expired do + {:error, :expired} + else + {:ok, apply(invoice, discount)} + end + end + end + + def issue_refund(invoice_id, amount) do + case get_invoice(invoice_id) do + nil -> + {:error, :error} + invoice -> + if amount > invoice.total do + raise "bad amount" + else + process_refund(invoice, amount) + end + end + end + + def update_billing_address(customer_id, address) do + if address == nil or address == "" do + {:error, ""} + else + case find_customer(customer_id) do + nil -> {:error, :error} + customer -> save_address(customer, address) + end + end + end + + def send_invoice(invoice_id, email) do + case get_invoice(invoice_id) do + nil -> {:error, :missing} + invoice -> + case validate_email(email) do + false -> {:error, :bad} + true -> dispatch_email(invoice, email) + end + end + end + + defp fetch_user(_id), do: nil + defp find_payment_method(_id), do: nil + defp process_charge(_method, _amount), do: {:ok, %{}} + defp lookup_discount_code(_code), do: nil + defp apply(_invoice, _discount), do: %{} + defp get_invoice(_id), do: nil + defp process_refund(_invoice, _amount), do: {:ok, %{}} + defp find_customer(_id), do: nil + defp save_address(_customer, _address), do: {:ok, %{}} + defp validate_email(_email), do: true + defp dispatch_email(_invoice, _email), do: :ok + defp build_invoice(_user, _items), do: {:ok, %{}} +end diff --git a/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/config.yml b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/config.yml new file mode 100644 index 0000000..6a3115e --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/config.yml @@ -0,0 +1 @@ +doc: "Error values should carry a meaningful message, not just a bare atom or empty string." diff --git a/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/good/billing.ex b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/good/billing.ex new file mode 100644 index 0000000..270a35b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_message_is_descriptive/good/billing.ex @@ -0,0 +1,93 @@ +defmodule Billing do + @moduledoc """ + Handles billing and invoice generation. + """ + + def create_invoice(user_id, items) do + case fetch_user(user_id) do + nil -> + {:error, "User #{user_id} not found, cannot create invoice"} + user -> + build_invoice(user, items) + end + end + + def charge_customer(customer_id, amount) do + if amount <= 0 do + raise ArgumentError, "Charge amount must be positive, got: #{amount}" + end + + case find_payment_method(customer_id) do + nil -> + {:error, "No payment method on file for customer #{customer_id}"} + method -> + process_charge(method, amount) + end + end + + def apply_discount(invoice, code) do + case lookup_discount_code(code) do + nil -> + {:error, "Discount code #{inspect(code)} does not exist"} + %{expired: true, expires_at: expires_at} -> + {:error, "Discount code #{inspect(code)} expired on #{expires_at}"} + discount -> + {:ok, apply_to_invoice(invoice, discount)} + end + end + + def issue_refund(invoice_id, amount) do + case get_invoice(invoice_id) do + nil -> + {:error, "Invoice #{invoice_id} not found, cannot issue refund"} + invoice -> + if amount > invoice.total do + raise ArgumentError, + "Refund amount #{amount} exceeds invoice total #{invoice.total} for invoice #{invoice_id}" + else + process_refund(invoice, amount) + end + end + end + + def update_billing_address(customer_id, address) do + cond do + is_nil(address) -> + {:error, "Billing address for customer #{customer_id} cannot be nil"} + address == "" -> + {:error, "Billing address for customer #{customer_id} cannot be empty"} + true -> + case find_customer(customer_id) do + nil -> {:error, "Customer #{customer_id} not found"} + customer -> save_address(customer, address) + end + end + end + + def send_invoice(invoice_id, email) do + case get_invoice(invoice_id) do + nil -> + {:error, "Invoice #{invoice_id} not found, cannot send"} + invoice -> + case validate_email(email) do + false -> + {:error, "Cannot send invoice #{invoice_id}: #{inspect(email)} is not a valid email"} + true -> + dispatch_email(invoice, email) + end + end + end + + defp fetch_user(_id), do: nil + defp find_payment_method(_id), do: nil + defp process_charge(_method, _amount), do: {:ok, %{}} + defp lookup_discount_code(_code), do: nil + defp apply_to_invoice(_invoice, _discount), do: %{} + defp get_invoice(_id), do: nil + defp process_refund(_invoice, _amount), do: {:ok, %{}} + defp find_customer(_id), do: nil + defp save_address(_customer, _address), do: {:ok, %{}} + defp validate_email(_email), do: true + defp dispatch_email(_invoice, _email), do: :ok + defp build_invoice(_user, _items), do: {:ok, %{}} +end diff --git a/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/bad/service.go b/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/bad/service.go new file mode 100644 index 0000000..57413c6 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/bad/service.go @@ -0,0 +1,54 @@ +package service + +import ( + "errors" + "fmt" +) + +type Product struct { + ID string + Stock int + Price float64 +} + +type Inventory interface { + Get(id string) (*Product, error) + Decrement(id string, qty int) error +} + +type CartService struct { + inventory Inventory +} + +func NewCartService(inv Inventory) *CartService { + return &CartService{inventory: inv} +} + +var ( + // Error strings are capitalized and end with punctuation — bad practice. + ErrProductNotFound = errors.New("Product not found.") + ErrInsufficientStock = errors.New("Insufficient stock.") +) + +func (s *CartService) AddToCart(productID string, qty int) error { + if qty <= 0 { + // Capitalized and ends with period — will look odd when embedded in larger messages. + return fmt.Errorf("Quantity must be positive, got %d.", qty) + } + + product, err := s.inventory.Get(productID) + if err != nil { + // Capitalized start and trailing period break embedding. + return fmt.Errorf("Failed to get product %q: %w.", productID, err) + } + + if product.Stock < qty { + return fmt.Errorf("Not enough stock for product %q. Has %d, requested %d.", + productID, product.Stock, qty) + } + + if err := s.inventory.Decrement(productID, qty); err != nil { + return fmt.Errorf("Could not decrement stock for %q: %w.", productID, err) + } + return nil +} diff --git a/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/good/service.go b/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/good/service.go new file mode 100644 index 0000000..5c2c49f --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_string_not_capitalized/good/service.go @@ -0,0 +1,51 @@ +package service + +import ( + "errors" + "fmt" +) + +type Product struct { + ID string + Stock int + Price float64 +} + +type Inventory interface { + Get(id string) (*Product, error) + Decrement(id string, qty int) error +} + +type CartService struct { + inventory Inventory +} + +func NewCartService(inv Inventory) *CartService { + return &CartService{inventory: inv} +} + +var ( + ErrProductNotFound = errors.New("product not found") + ErrInsufficientStock = errors.New("insufficient stock") +) + +func (s *CartService) AddToCart(productID string, qty int) error { + if qty <= 0 { + return fmt.Errorf("quantity must be positive, got %d", qty) + } + + product, err := s.inventory.Get(productID) + if err != nil { + return fmt.Errorf("add to cart: get product %q: %w", productID, err) + } + + if product.Stock < qty { + return fmt.Errorf("add to cart: product %q has %d in stock, requested %d: %w", + productID, product.Stock, qty, ErrInsufficientStock) + } + + if err := s.inventory.Decrement(productID, qty); err != nil { + return fmt.Errorf("add to cart: decrement stock for %q: %w", productID, err) + } + return nil +} diff --git a/priv/combined_metrics/samples/error_handling/error_type_includes_context/bad/client.go b/priv/combined_metrics/samples/error_handling/error_type_includes_context/bad/client.go new file mode 100644 index 0000000..a4c354d --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_type_includes_context/bad/client.go @@ -0,0 +1,47 @@ +package client + +import ( + "errors" + "fmt" + "net/http" +) + +// APIError carries no useful context about what failed or where. +type APIError struct { + Message string +} + +func (e *APIError) Error() string { + return e.Message +} + +type PaymentClient struct { + base string + client *http.Client +} + +func NewPaymentClient(base string) *PaymentClient { + return &PaymentClient{base: base, client: &http.Client{}} +} + +func (c *PaymentClient) Charge(orderID string, amountCents int) error { + url := fmt.Sprintf("%s/orders/%s/charge", c.base, orderID) + req, err := http.NewRequest(http.MethodPost, url, nil) + if err != nil { + // wraps nothing; caller cannot recover the original error or know the URL + return &APIError{Message: "request failed"} + } + + resp, err := c.client.Do(req) + if err != nil { + // loses the original transport error entirely + return errors.New("request failed") + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + // no operation, resource, or status code included + return &APIError{Message: "unexpected response"} + } + return nil +} diff --git a/priv/combined_metrics/samples/error_handling/error_type_includes_context/good/client.go b/priv/combined_metrics/samples/error_handling/error_type_includes_context/good/client.go new file mode 100644 index 0000000..4309e53 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/error_type_includes_context/good/client.go @@ -0,0 +1,56 @@ +package client + +import ( + "fmt" + "net/http" +) + +// APIError captures the HTTP operation, the target resource, the HTTP status +// code, and the underlying transport error when one occurs. +type APIError struct { + Method string + Resource string + Status int + Err error +} + +func (e *APIError) Error() string { + if e.Err != nil { + return fmt.Sprintf("%s %s: status %d: %v", e.Method, e.Resource, e.Status, e.Err) + } + return fmt.Sprintf("%s %s: status %d", e.Method, e.Resource, e.Status) +} + +func (e *APIError) Unwrap() error { return e.Err } + +type PaymentClient struct { + base string + client *http.Client +} + +func NewPaymentClient(base string) *PaymentClient { + return &PaymentClient{base: base, client: &http.Client{}} +} + +func (c *PaymentClient) Charge(orderID string, amountCents int) error { + url := fmt.Sprintf("%s/orders/%s/charge", c.base, orderID) + req, err := http.NewRequest(http.MethodPost, url, nil) + if err != nil { + return &APIError{Method: "POST", Resource: url, Err: err} + } + + resp, err := c.client.Do(req) + if err != nil { + return &APIError{Method: "POST", Resource: url, Err: err} + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return &APIError{ + Method: "POST", + Resource: url, + Status: resp.StatusCode, + } + } + return nil +} diff --git a/priv/combined_metrics/samples/error_handling/no_bare_except/bad/api_client.py b/priv/combined_metrics/samples/error_handling/no_bare_except/bad/api_client.py new file mode 100644 index 0000000..5f4386c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_bare_except/bad/api_client.py @@ -0,0 +1,60 @@ +"""HTTP API client with retry logic and structured error handling.""" +from __future__ import annotations + +import json +import time +from dataclasses import dataclass +from typing import Any, Optional +from urllib.request import urlopen + + +@dataclass +class ApiResponse: + status_code: int + body: dict[str, Any] + latency_ms: float + + +def get(url: str, timeout: float = 5.0) -> Optional[ApiResponse]: + """Perform a GET request — bare except swallows KeyboardInterrupt and SystemExit.""" + start = time.monotonic() + try: + with urlopen(url, timeout=timeout) as resp: + raw = resp.read().decode("utf-8") + elapsed = (time.monotonic() - start) * 1000 + return ApiResponse( + status_code=resp.status, + body=json.loads(raw), + latency_ms=round(elapsed, 2), + ) + except: # bare except — catches EVERYTHING including Ctrl+C + return None + + +def get_with_retry( + url: str, + retries: int = 3, + backoff: float = 1.0, + timeout: float = 5.0, +) -> Optional[ApiResponse]: + """Retry a GET — bare except in retry loop makes Ctrl+C impossible to act on.""" + for attempt in range(1, retries + 1): + try: + response = get(url, timeout=timeout) + if response is not None: + return response + except: # bare except — user cannot interrupt a long retry loop + pass + time.sleep(backoff * attempt) + return None + + +def batch_fetch(urls: list[str]) -> list[Optional[ApiResponse]]: + """Fetch multiple URLs — each bare except silently discards all error context.""" + results = [] + for url in urls: + try: + results.append(get(url)) + except: # can't distinguish network vs programming error + results.append(None) + return results diff --git a/priv/combined_metrics/samples/error_handling/no_bare_except/good/api_client.py b/priv/combined_metrics/samples/error_handling/no_bare_except/good/api_client.py new file mode 100644 index 0000000..87e96ad --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_bare_except/good/api_client.py @@ -0,0 +1,71 @@ +"""HTTP API client with retry logic and structured error handling.""" +from __future__ import annotations + +import time +from dataclasses import dataclass +from typing import Any, Optional +from urllib.error import HTTPError, URLError +from urllib.request import urlopen +import json + + +@dataclass +class ApiResponse: + status_code: int + body: dict[str, Any] + latency_ms: float + + +class ApiClientError(Exception): + """Base error for all API client failures.""" + + +class NetworkError(ApiClientError): + """Raised when the network is unreachable.""" + + +class HttpError(ApiClientError): + """Raised when the server returns a 4xx or 5xx response.""" + + def __init__(self, status_code: int, message: str) -> None: + super().__init__(message) + self.status_code = status_code + + +def get(url: str, timeout: float = 5.0) -> ApiResponse: + """Perform a GET request and return a structured response.""" + start = time.monotonic() + try: + with urlopen(url, timeout=timeout) as resp: + raw = resp.read().decode("utf-8") + elapsed = (time.monotonic() - start) * 1000 + return ApiResponse( + status_code=resp.status, + body=json.loads(raw), + latency_ms=round(elapsed, 2), + ) + except HTTPError as exc: + raise HttpError(exc.code, f"Server returned {exc.code} for {url}") from exc + except URLError as exc: + raise NetworkError(f"Could not reach {url}: {exc.reason}") from exc + except json.JSONDecodeError as exc: + raise ApiClientError(f"Invalid JSON from {url}") from exc + + +def get_with_retry( + url: str, + retries: int = 3, + backoff: float = 1.0, + timeout: float = 5.0, +) -> Optional[ApiResponse]: + """Retry a GET request on network errors only; re-raise HTTP errors immediately.""" + for attempt in range(1, retries + 1): + try: + return get(url, timeout=timeout) + except NetworkError: + if attempt == retries: + raise + time.sleep(backoff * attempt) + except HttpError: + raise # do not retry server-side errors + return None diff --git a/priv/combined_metrics/samples/error_handling/no_blind_rescue/bad/cart.rb b/priv/combined_metrics/samples/error_handling/no_blind_rescue/bad/cart.rb new file mode 100644 index 0000000..4304d84 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_blind_rescue/bad/cart.rb @@ -0,0 +1,45 @@ +class CartCheckoutService + def initialize(inventory, payment_processor, logger) + @inventory = inventory + @payment_processor = payment_processor + @logger = logger + end + + def checkout(cart, payment_details) + reserve_items(cart) + process_payment(cart, payment_details) + end + + private + + def reserve_items(cart) + cart.line_items.each do |item| + begin + @inventory.reserve(item.sku, item.quantity) + rescue => e + # Bare rescue catches all StandardError — masks the specific cause + raise "Reservation failed: #{e.message}" + end + end + end + + def process_payment(cart, payment_details) + begin + result = @payment_processor.charge(cart.total_cents, payment_details) + { success: true, order_id: result.order_id } + rescue + # Bare rescue with no class — swallows everything silently + { success: false, error: :unknown } + end + end + + def release_reserved_items(cart) + cart.line_items.each do |item| + begin + @inventory.release(item.sku, item.quantity) + rescue + # Silent swallow — no logging, no re-raise + end + end + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_blind_rescue/good/cart.rb b/priv/combined_metrics/samples/error_handling/no_blind_rescue/good/cart.rb new file mode 100644 index 0000000..bcccc52 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_blind_rescue/good/cart.rb @@ -0,0 +1,53 @@ +class CartCheckoutService + def initialize(inventory, payment_processor, logger) + @inventory = inventory + @payment_processor = payment_processor + @logger = logger + end + + def checkout(cart, payment_details) + reserve_items(cart) + process_payment(cart, payment_details) + end + + private + + def reserve_items(cart) + cart.line_items.each do |item| + begin + @inventory.reserve(item.sku, item.quantity) + rescue Inventory::OutOfStockError => e + raise CheckoutError.new(:out_of_stock, "#{item.name} is no longer available: #{e.message}") + rescue Inventory::ConnectionError => e + @logger.error("Inventory service unreachable: #{e.message}") + raise CheckoutError.new(:service_unavailable, "Unable to confirm stock at this time") + end + end + end + + def process_payment(cart, payment_details) + begin + result = @payment_processor.charge(cart.total_cents, payment_details) + { success: true, order_id: result.order_id, receipt_url: result.receipt_url } + rescue PaymentProcessor::DeclinedError => e + @logger.info("Payment declined for cart #{cart.id}: #{e.decline_code}") + { success: false, error: :payment_declined, decline_code: e.decline_code } + rescue PaymentProcessor::TimeoutError => e + @logger.error("Payment timeout for cart #{cart.id}: #{e.message}") + release_reserved_items(cart) + { success: false, error: :payment_timeout } + rescue PaymentProcessor::Error => e + @logger.error("Payment error for cart #{cart.id}: #{e.message}") + release_reserved_items(cart) + raise + end + end + + def release_reserved_items(cart) + cart.line_items.each do |item| + @inventory.release(item.sku, item.quantity) + rescue Inventory::Error => e + @logger.warn("Failed to release reservation for #{item.sku}: #{e.message}") + end + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/bad/payment.rb b/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/bad/payment.rb new file mode 100644 index 0000000..9f168e8 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/bad/payment.rb @@ -0,0 +1,42 @@ +class PaymentProcessor + def initialize(gateway, logger) + @gateway = gateway + @logger = logger + end + + def charge(order, card_token) + amount_cents = (order.total * 100).to_i + + begin + response = @gateway.charge(amount_cents, card_token, order_id: order.id) + record_transaction(order, response.transaction_id) + { success: true, transaction_id: response.transaction_id } + rescue PaymentGateway::CardDeclinedError + # TODO: handle this + rescue PaymentGateway::NetworkError + rescue PaymentGateway::InvalidAmountError + end + end + + def refund(transaction_id, amount_cents) + begin + response = @gateway.refund(transaction_id, amount_cents) + { success: true, refund_id: response.refund_id } + rescue PaymentGateway::TransactionNotFoundError + rescue PaymentGateway::RefundError + end + end + + private + + def record_transaction(order, transaction_id) + begin + order.update!( + payment_status: :paid, + transaction_id: transaction_id, + paid_at: Time.current + ) + rescue + end + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/good/payment.rb b/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/good/payment.rb new file mode 100644 index 0000000..6845c92 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_empty_rescue_block/good/payment.rb @@ -0,0 +1,49 @@ +class PaymentProcessor + def initialize(gateway, logger) + @gateway = gateway + @logger = logger + end + + def charge(order, card_token) + amount_cents = (order.total * 100).to_i + + begin + response = @gateway.charge(amount_cents, card_token, order_id: order.id) + record_transaction(order, response.transaction_id) + { success: true, transaction_id: response.transaction_id } + rescue PaymentGateway::CardDeclinedError => e + @logger.warn("Card declined for order #{order.id}: #{e.message}") + { success: false, error: :card_declined, message: e.message } + rescue PaymentGateway::NetworkError => e + @logger.error("Gateway network error for order #{order.id}: #{e.message}") + { success: false, error: :network_error, message: "Payment service unavailable" } + rescue PaymentGateway::InvalidAmountError => e + @logger.error("Invalid amount #{amount_cents} for order #{order.id}: #{e.message}") + raise ArgumentError, "Order total is invalid: #{order.total}" + end + end + + def refund(transaction_id, amount_cents) + begin + response = @gateway.refund(transaction_id, amount_cents) + @logger.info("Refund issued: #{response.refund_id} for transaction #{transaction_id}") + { success: true, refund_id: response.refund_id } + rescue PaymentGateway::TransactionNotFoundError => e + @logger.error("Refund failed — transaction not found: #{transaction_id} — #{e.message}") + { success: false, error: :transaction_not_found } + rescue PaymentGateway::RefundError => e + @logger.error("Refund failed for transaction #{transaction_id}: #{e.message}") + { success: false, error: :refund_failed, message: e.message } + end + end + + private + + def record_transaction(order, transaction_id) + order.update!( + payment_status: :paid, + transaction_id: transaction_id, + paid_at: Time.current + ) + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/bad/ProductCatalog.php b/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/bad/ProductCatalog.php new file mode 100644 index 0000000..da091c1 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/bad/ProductCatalog.php @@ -0,0 +1,62 @@ +storageBasePath}/{$sku}.jpg"; + + // @ suppresses mkdir warnings — no way to know if it actually succeeded + @mkdir(dirname($destination), 0755, true); + + // @ suppresses copy errors — destination may not exist but no error is thrown + @copy($sourcePath, $destination); + + return $destination; + } + + public function deleteImage(string $sku): bool + { + $path = "{$this->storageBasePath}/{$sku}.jpg"; + + // @ suppresses warnings if file doesn't exist — silent success/failure + return @unlink($path); + } + + public function readConfig(string $iniPath): array + { + // @ hides parse errors in the ini file + $config = @parse_ini_file($iniPath, true); + + return $config ?: []; + } +} diff --git a/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/good/ProductCatalog.php b/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/good/ProductCatalog.php new file mode 100644 index 0000000..95e941d --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_error_suppression_operator/good/ProductCatalog.php @@ -0,0 +1,77 @@ +parseRows($handle); + } finally { + fclose($handle); + } + } + + public function saveProductImage(string $sku, string $sourcePath): string + { + $destination = "{$this->storageBasePath}/{$sku}.jpg"; + + if (!file_exists($sourcePath)) { + throw new ImageProcessingException("Source image not found: {$sourcePath}"); + } + + $targetDir = dirname($destination); + if (!is_dir($targetDir) && !mkdir($targetDir, 0755, true)) { + throw new ImageProcessingException("Failed to create directory: {$targetDir}"); + } + + if (!copy($sourcePath, $destination)) { + throw new ImageProcessingException( + "Failed to copy image from {$sourcePath} to {$destination}" + ); + } + + return $destination; + } + + private function parseRows($handle): array + { + $products = []; + $headers = fgetcsv($handle); + + if ($headers === false) { + throw new CatalogImportException("CSV file is empty or unreadable"); + } + + while (($row = fgetcsv($handle)) !== false) { + $products[] = array_combine($headers, $row); + } + + return $products; + } +} diff --git a/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/bad/template_engine.js b/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/bad/template_engine.js new file mode 100644 index 0000000..e6ff440 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/bad/template_engine.js @@ -0,0 +1,48 @@ +function renderTemplate(template, context) { + const keys = Object.keys(context); + const values = Object.values(context); + + // Using Function constructor to evaluate template expressions + const fn = new Function(...keys, `return \`${template}\``); + return fn(...values); +} + +function applyFilter(value, filterExpression) { + // Evaluate arbitrary filter code supplied by the user + return eval(`(function(v) { return ${filterExpression}; })(${JSON.stringify(value)})`); +} + +function buildSortComparator(sortConfig) { + // Build a comparator from a user-supplied config string + const comparatorCode = `(a, b) => { return ${sortConfig}; }`; + return eval(comparatorCode); +} + +function compileValidator(rules) { + // Compile validation rules into executable code + const body = rules.map((rule) => `if (!(${rule.expression})) return false;`).join("\n"); + return new Function("value", `${body}\nreturn true;`); +} + +function executePluginHook(pluginCode, eventName, payload) { + // Execute plugin hook code loaded from external source + const runner = new Function("event", "payload", pluginCode); + return runner(eventName, payload); +} + +function renderDynamicField(fieldConfig, record) { + // Evaluate field display expression + const displayValue = eval( + `(function(record) { return ${fieldConfig.expression}; })(record)` + ); + return displayValue; +} + +export { + renderTemplate, + applyFilter, + buildSortComparator, + compileValidator, + executePluginHook, + renderDynamicField, +}; diff --git a/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/good/template_engine.js b/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/good/template_engine.js new file mode 100644 index 0000000..88ea2db --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_eval_or_dynamic_code_execution/good/template_engine.js @@ -0,0 +1,58 @@ +const ALLOWED_FILTERS = { + uppercase: (value) => String(value).toUpperCase(), + lowercase: (value) => String(value).toLowerCase(), + trim: (value) => String(value).trim(), + truncate: (value, length = 80) => String(value).slice(0, Number(length)), +}; + +function renderTemplate(template, context) { + return template.replace(/\{\{\s*([\w.]+)(?:\s*\|\s*([\w]+)(?::([^}]*))?)?\s*\}\}/g, (_, path, filter, arg) => { + const value = resolvePath(context, path); + + if (value === undefined || value === null) { + return ""; + } + + if (filter) { + const fn = ALLOWED_FILTERS[filter]; + if (!fn) { + throw new Error(`Unknown filter: '${filter}'. Allowed filters: ${Object.keys(ALLOWED_FILTERS).join(", ")}`); + } + return fn(value, arg); + } + + return String(value); + }); +} + +function resolvePath(obj, path) { + return path.split(".").reduce((current, key) => { + if (current == null) return undefined; + return current[key]; + }, obj); +} + +function buildSortComparator(field, direction) { + const multiplier = direction === "desc" ? -1 : 1; + + return (a, b) => { + const av = resolvePath(a, field); + const bv = resolvePath(b, field); + + if (av == null && bv == null) return 0; + if (av == null) return 1 * multiplier; + if (bv == null) return -1 * multiplier; + + return av < bv ? -1 * multiplier : av > bv ? 1 * multiplier : 0; + }; +} + +function applyTransforms(value, transforms) { + return transforms.reduce((acc, { name, args }) => { + const fn = ALLOWED_FILTERS[name]; + if (!fn) throw new Error(`Unknown transform: '${name}'`); + return fn(acc, ...args); + }, value); +} + +export { renderTemplate, buildSortComparator, applyTransforms }; diff --git a/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/bad/billing.ex b/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/bad/billing.ex new file mode 100644 index 0000000..f3e75ea --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/bad/billing.ex @@ -0,0 +1,68 @@ +defmodule MyApp.Billing do + @moduledoc """ + Billing operations. + """ + + alias MyApp.Billing.{Invoice, PaymentMethod} + alias MyApp.Repo + + # Bad: using try/rescue for expected, recoverable failures (subscription not found) + @spec create_invoice(integer()) :: {:ok, Invoice.t()} | {:error, atom()} + def create_invoice(subscription_id) do + try do + subscription = Repo.get!(MyApp.Subscriptions.Subscription, subscription_id) + + unless subscription.billing_enabled do + raise "billing disabled" + end + + line_items = MyApp.Billing.LineItemCalculator.compute(subscription) + + invoice = + %Invoice{} + |> Invoice.changeset(%{ + subscription_id: subscription.id, + customer_id: subscription.customer_id, + line_items: line_items, + status: :draft + }) + |> Repo.insert!() + + {:ok, invoice} + rescue + Ecto.NoResultsError -> {:error, :subscription_not_found} + RuntimeError -> {:error, :billing_disabled} + Ecto.InvalidChangesetError -> {:error, :invalid_data} + end + end + + # Bad: using try/rescue as a null-check replacement + @spec charge_invoice(Invoice.t()) :: {:ok, Invoice.t()} | {:error, atom()} + def charge_invoice(%Invoice{} = invoice) do + try do + if invoice.status == :paid, do: raise("already paid") + if invoice.status == :void, do: raise("invoice void") + + payment_method = Repo.get_by!(PaymentMethod, customer_id: invoice.customer_id, default: true) + + unless payment_method.active do + raise "payment method inactive" + end + + case MyApp.PaymentGateway.charge(payment_method.token, invoice.total) do + {:ok, _transaction} -> + invoice + |> Invoice.changeset(%{status: :paid, paid_at: DateTime.utc_now()}) + |> Repo.update() + + {:error, reason} -> + raise "payment failed: #{inspect(reason)}" + end + rescue + RuntimeError, message: "already paid" -> {:error, :already_paid} + RuntimeError, message: "invoice void" -> {:error, :invoice_void} + Ecto.NoResultsError -> {:error, :no_payment_method} + RuntimeError -> {:error, :payment_failed} + end + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/good/billing.ex b/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/good/billing.ex new file mode 100644 index 0000000..04fd18b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_exceptions_for_control_flow/good/billing.ex @@ -0,0 +1,84 @@ +defmodule MyApp.Billing do + @moduledoc """ + Billing operations. Uses `{:ok, value}` / `{:error, reason}` tuples + for all expected failure paths — no exceptions for control flow. + """ + + alias MyApp.Billing.{Invoice, PaymentMethod} + alias MyApp.Repo + + @doc """ + Creates an invoice for the given subscription ID. + Returns `{:ok, invoice}` or `{:error, reason}`. + """ + @spec create_invoice(integer()) :: {:ok, Invoice.t()} | {:error, atom()} + def create_invoice(subscription_id) do + with {:ok, subscription} <- fetch_subscription(subscription_id), + :ok <- validate_billing_enabled(subscription), + {:ok, line_items} <- compute_line_items(subscription), + {:ok, invoice} <- insert_invoice(subscription, line_items) do + {:ok, invoice} + end + end + + @doc """ + Charges the default payment method for an invoice. + Returns `{:ok, invoice}` or `{:error, reason}`. + """ + @spec charge_invoice(Invoice.t()) :: {:ok, Invoice.t()} | {:error, atom()} + def charge_invoice(%Invoice{status: :paid}), do: {:error, :already_paid} + def charge_invoice(%Invoice{status: :void}), do: {:error, :invoice_void} + + def charge_invoice(%Invoice{} = invoice) do + case fetch_payment_method(invoice.customer_id) do + {:ok, %PaymentMethod{active: true} = pm} -> process_payment(invoice, pm) + {:ok, %PaymentMethod{active: false}} -> {:error, :payment_method_inactive} + {:error, :not_found} -> {:error, :no_payment_method} + end + end + + defp fetch_subscription(id) do + case Repo.get(MyApp.Subscriptions.Subscription, id) do + nil -> {:error, :subscription_not_found} + sub -> {:ok, sub} + end + end + + defp validate_billing_enabled(%{billing_enabled: true}), do: :ok + defp validate_billing_enabled(_), do: {:error, :billing_disabled} + + defp compute_line_items(subscription) do + items = MyApp.Billing.LineItemCalculator.compute(subscription) + {:ok, items} + end + + defp insert_invoice(subscription, line_items) do + %Invoice{} + |> Invoice.changeset(%{ + subscription_id: subscription.id, + customer_id: subscription.customer_id, + line_items: line_items, + status: :draft + }) + |> Repo.insert() + end + + defp fetch_payment_method(customer_id) do + case Repo.get_by(PaymentMethod, customer_id: customer_id, default: true) do + nil -> {:error, :not_found} + pm -> {:ok, pm} + end + end + + defp process_payment(invoice, payment_method) do + case MyApp.PaymentGateway.charge(payment_method.token, invoice.total) do + {:ok, _transaction} -> + invoice + |> Invoice.changeset(%{status: :paid, paid_at: DateTime.utc_now()}) + |> Repo.update() + + {:error, reason} -> + {:error, reason} + end + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/order_service.js b/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/order_service.js new file mode 100644 index 0000000..21c1b60 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/order_service.js @@ -0,0 +1,55 @@ +import logger from "./logger.js"; + +async function processOrder(orderId) { + const order = await fetchOrder(orderId); + + validateInventory(order.items); + + const payment = await chargePayment(order.total, order.paymentMethod); + + updateOrderStatus(orderId, "confirmed"); + sendConfirmationEmail(order.customerEmail, order); + recordAnalyticsEvent("order_confirmed", { orderId, total: order.total }); + + return { orderId, paymentId: payment.id, status: "confirmed" }; +} + +async function cancelOrder(orderId, reason) { + const order = await fetchOrder(orderId); + + if (order.status === "shipped") { + throw new Error("Cannot cancel an order that has already shipped"); + } + + await updateOrderStatus(orderId, "cancelled"); + + refundPayment(order.paymentId, order.total); + sendCancellationEmail(order.customerEmail, { orderId, reason }); + + return { orderId, status: "cancelled" }; +} + +function scheduleOrderReminder(orderId, delayMs) { + new Promise((resolve) => setTimeout(resolve, delayMs)) + .then(() => sendReminderEmail(orderId)); +} + +async function bulkFulfillOrders(orderIds) { + let fulfilled = 0; + + for (const id of orderIds) { + processOrder(id).then(() => { + fulfilled++; + }); + } + + return { fulfilled }; +} + +function onOrderCreated(order) { + sendConfirmationEmail(order.customerEmail, order); + recordAnalyticsEvent("order_created", { orderId: order.id }); + updateInventoryReservation(order.items); +} + +export { processOrder, cancelOrder, scheduleOrderReminder, bulkFulfillOrders, onOrderCreated }; diff --git a/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/user_service.ts b/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/user_service.ts new file mode 100644 index 0000000..c803dcf --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_floating_promises/bad/user_service.ts @@ -0,0 +1,68 @@ +interface User { + id: string; + email: string; + displayName: string; +} + +interface AuditEntry { + action: string; + userId: string; + timestamp: number; +} + +async function fetchUser(userId: string): Promise { + const response = await fetch(`/api/users/${userId}`); + if (!response.ok) throw new Error(`User not found: ${userId}`); + return response.json() as Promise; +} + +async function writeAuditLog(entry: AuditEntry): Promise { + await fetch("/api/audit", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(entry), + }); +} + +async function deleteUser(userId: string): Promise { + const user = await fetchUser(userId); + + const response = await fetch(`/api/users/${userId}`, { method: "DELETE" }); + if (!response.ok) throw new Error(`Failed to delete user: ${response.status}`); + + // Floating promise — not awaited + writeAuditLog({ + action: "user_deleted", + userId: user.id, + timestamp: Date.now(), + }); +} + +async function updateEmail(userId: string, newEmail: string): Promise { + const response = await fetch(`/api/users/${userId}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email: newEmail }), + }); + + if (!response.ok) throw new Error(`Failed to update email: ${response.status}`); + + const updated: User = await response.json(); + + // Floating promise — not awaited + writeAuditLog({ action: "email_updated", userId, timestamp: Date.now() }); + + return updated; +} + +function onUserCreated(user: User): void { + // Floating promises in void function + writeAuditLog({ action: "user_created", userId: user.id, timestamp: Date.now() }); + fetch("/api/notifications/welcome", { + method: "POST", + body: JSON.stringify({ userId: user.id }), + }); +} + +export { fetchUser, deleteUser, updateEmail, onUserCreated }; +export type { User }; diff --git a/priv/combined_metrics/samples/error_handling/no_floating_promises/good/order_service.js b/priv/combined_metrics/samples/error_handling/no_floating_promises/good/order_service.js new file mode 100644 index 0000000..9a4237f --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_floating_promises/good/order_service.js @@ -0,0 +1,59 @@ +import logger from "./logger.js"; + +async function processOrder(orderId) { + const order = await fetchOrder(orderId); + + await validateInventory(order.items); + + const payment = await chargePayment(order.total, order.paymentMethod); + + await Promise.all([ + updateOrderStatus(orderId, "confirmed"), + sendConfirmationEmail(order.customerEmail, order), + recordAnalyticsEvent("order_confirmed", { orderId, total: order.total }), + ]); + + return { orderId, paymentId: payment.id, status: "confirmed" }; +} + +async function cancelOrder(orderId, reason) { + const order = await fetchOrder(orderId); + + if (order.status === "shipped") { + throw new Error("Cannot cancel an order that has already shipped"); + } + + await updateOrderStatus(orderId, "cancelled"); + + const refundPromise = refundPayment(order.paymentId, order.total); + const emailPromise = sendCancellationEmail(order.customerEmail, { orderId, reason }); + + const [refund] = await Promise.all([refundPromise, emailPromise]); + + return { orderId, refundId: refund.id, status: "cancelled" }; +} + +function scheduleOrderReminder(orderId, delayMs) { + const reminderPromise = new Promise((resolve) => setTimeout(resolve, delayMs)) + .then(() => sendReminderEmail(orderId)) + .catch((err) => logger.error("Reminder email failed", { orderId, err })); + + return reminderPromise; +} + +async function bulkFulfillOrders(orderIds) { + const results = await Promise.allSettled( + orderIds.map((id) => processOrder(id)) + ); + + const fulfilled = results.filter((r) => r.status === "fulfilled").length; + const failed = results.filter((r) => r.status === "rejected"); + + for (const result of failed) { + logger.error("Order fulfillment failed", result.reason); + } + + return { fulfilled, failedCount: failed.length }; +} + +export { processOrder, cancelOrder, scheduleOrderReminder, bulkFulfillOrders }; diff --git a/priv/combined_metrics/samples/error_handling/no_floating_promises/good/user_service.ts b/priv/combined_metrics/samples/error_handling/no_floating_promises/good/user_service.ts new file mode 100644 index 0000000..a0a83e4 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_floating_promises/good/user_service.ts @@ -0,0 +1,86 @@ +import logger from "./logger.js"; + +interface User { + id: string; + email: string; + displayName: string; +} + +interface AuditEntry { + action: string; + userId: string; + timestamp: number; +} + +async function fetchUser(userId: string): Promise { + const response = await fetch(`/api/users/${userId}`); + if (!response.ok) throw new Error(`User not found: ${userId}`); + return response.json() as Promise; +} + +async function writeAuditLog(entry: AuditEntry): Promise { + await fetch("/api/audit", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(entry), + }); +} + +async function deleteUser(userId: string): Promise { + const user = await fetchUser(userId); + + const response = await fetch(`/api/users/${userId}`, { method: "DELETE" }); + if (!response.ok) throw new Error(`Failed to delete user: ${response.status}`); + + await writeAuditLog({ + action: "user_deleted", + userId: user.id, + timestamp: Date.now(), + }); +} + +async function updateEmail(userId: string, newEmail: string): Promise { + const response = await fetch(`/api/users/${userId}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email: newEmail }), + }); + + if (!response.ok) throw new Error(`Failed to update email: ${response.status}`); + + const updated: User = await response.json(); + + await writeAuditLog({ + action: "email_updated", + userId, + timestamp: Date.now(), + }); + + return updated; +} + +async function bulkInviteUsers(emails: string[]): Promise<{ sent: number; failed: number }> { + const results = await Promise.allSettled( + emails.map((email) => + fetch("/api/invitations", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email }), + }) + ) + ); + + const sent = results.filter((r) => r.status === "fulfilled").length; + const failed = results.filter((r) => r.status === "rejected").length; + + for (const r of results) { + if (r.status === "rejected") { + logger.error("Invitation failed", r.reason); + } + } + + return { sent, failed }; +} + +export { fetchUser, deleteUser, updateEmail, bulkInviteUsers }; +export type { User }; diff --git a/priv/combined_metrics/samples/error_handling/no_misused_promises/bad/product_repository.ts b/priv/combined_metrics/samples/error_handling/no_misused_promises/bad/product_repository.ts new file mode 100644 index 0000000..2f286a5 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_misused_promises/bad/product_repository.ts @@ -0,0 +1,64 @@ +interface Product { + id: string; + name: string; + inStock: boolean; +} + +async function fetchProduct(id: string): Promise { + const response = await fetch(`/api/products/${id}`); + if (!response.ok) throw new Error(`Product not found: ${id}`); + return response.json() as Promise; +} + +async function isProductAvailable(id: string): Promise { + const product = await fetchProduct(id); + return product.inStock; +} + +async function handleAddToCart(productId: string): Promise { + // Misuse: Promise used directly in `if` without await + if (isProductAvailable(productId)) { + console.log(`Adding product ${productId} to cart`); + } else { + console.log(`Product ${productId} is out of stock`); + } +} + +function loadAndFilterProducts(ids: string[]): Product[] { + const products: Product[] = []; + + ids.forEach(async (id) => { + // Misuse: async callback in forEach — errors and results are ignored + const product = await fetchProduct(id); + if (product.inStock) { + products.push(product); + } + }); + + return products; +} + +function setupProductEventListeners(productId: string): void { + const button = document.querySelector(`[data-product="${productId}"]`); + if (!button) return; + + // Misuse: async function passed where void callback is expected with no error handling + button.addEventListener("click", async () => { + await handleAddToCart(productId); + }); +} + +async function validateBeforeCheckout(cartItems: string[]): Promise { + const unavailable: string[] = []; + + cartItems.forEach(async (id) => { + // Misuse: async in forEach, result never collected + const available = await isProductAvailable(id); + if (!available) unavailable.push(id); + }); + + return unavailable; +} + +export { fetchProduct, isProductAvailable, handleAddToCart, loadAndFilterProducts, validateBeforeCheckout }; +export type { Product }; diff --git a/priv/combined_metrics/samples/error_handling/no_misused_promises/good/product_repository.ts b/priv/combined_metrics/samples/error_handling/no_misused_promises/good/product_repository.ts new file mode 100644 index 0000000..fba04b3 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_misused_promises/good/product_repository.ts @@ -0,0 +1,55 @@ +interface Product { + id: string; + name: string; + inStock: boolean; +} + +async function fetchProduct(id: string): Promise { + const response = await fetch(`/api/products/${id}`); + if (!response.ok) throw new Error(`Product not found: ${id}`); + return response.json() as Promise; +} + +async function isProductAvailable(id: string): Promise { + const product = await fetchProduct(id); + return product.inStock; +} + +async function handleAddToCart(productId: string): Promise { + const available = await isProductAvailable(productId); + + if (available) { + console.log(`Adding product ${productId} to cart`); + } else { + console.log(`Product ${productId} is out of stock`); + } +} + +async function loadAndFilterProducts(ids: string[]): Promise { + const products = await Promise.all(ids.map((id) => fetchProduct(id))); + return products.filter((p) => p.inStock); +} + +function setupProductEventListeners(productId: string): void { + const button = document.querySelector(`[data-product="${productId}"]`); + if (!button) return; + + button.addEventListener("click", () => { + handleAddToCart(productId).catch((err) => { + console.error("Failed to add to cart", err); + }); + }); +} + +async function validateBeforeCheckout(cartItems: string[]): Promise { + const checks = await Promise.all( + cartItems.map(async (id) => { + const available = await isProductAvailable(id); + return available ? null : id; + }) + ); + return checks.filter((id): id is string => id !== null); +} + +export { fetchProduct, isProductAvailable, handleAddToCart, loadAndFilterProducts, validateBeforeCheckout, setupProductEventListeners }; +export type { Product }; diff --git a/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/bad/invoice.rb b/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/bad/invoice.rb new file mode 100644 index 0000000..3cfab14 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/bad/invoice.rb @@ -0,0 +1,65 @@ +class InvoiceService + def initialize(repository, mailer) + @repository = repository + @mailer = mailer + end + + def find_or_create_for_order(order) + begin + @repository.find_by!(order_id: order.id) + rescue ActiveRecord::RecordNotFound + create_invoice(order) + end + end + + def apply_discount(invoice, coupon_code) + begin + coupon = @repository.find_coupon!(coupon_code) + rescue ActiveRecord::RecordNotFound + return { success: false, error: :coupon_not_found } + end + + begin + raise "expired" if coupon.expired? + raise "used" if coupon.already_used_by?(invoice.customer_id) + rescue => e + return { success: false, error: e.message.to_sym } + end + + discount = coupon.calculate_discount(invoice.subtotal) + invoice.update!(discount_amount: discount, coupon_code: coupon_code) + + { success: true, discount_amount: discount } + end + + def mark_paid(invoice_id, paid_at: Time.current) + begin + invoice = @repository.find!(invoice_id) + rescue ActiveRecord::RecordNotFound + return { success: false, error: :not_found } + end + + begin + raise "already paid" if invoice.paid? + rescue + return { success: false, error: :already_paid } + end + + invoice.update!(status: :paid, paid_at: paid_at) + @mailer.send_receipt(invoice) + + { success: true, invoice: invoice } + end + + private + + def create_invoice(order) + @repository.create!( + order_id: order.id, + customer_id: order.customer_id, + subtotal: order.subtotal, + total: order.total, + status: :pending + ) + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/good/invoice.rb b/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/good/invoice.rb new file mode 100644 index 0000000..9b97439 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_rescue_as_flow_control/good/invoice.rb @@ -0,0 +1,59 @@ +class InvoiceService + def initialize(repository, mailer) + @repository = repository + @mailer = mailer + end + + def find_or_create_for_order(order) + existing = @repository.find_by(order_id: order.id) + return existing if existing + + create_invoice(order) + end + + def apply_discount(invoice, coupon_code) + coupon = @repository.find_coupon(coupon_code) + + unless coupon + return { success: false, error: :coupon_not_found } + end + + if coupon.expired? + return { success: false, error: :coupon_expired } + end + + if coupon.already_used_by?(invoice.customer_id) + return { success: false, error: :coupon_already_used } + end + + discount = coupon.calculate_discount(invoice.subtotal) + invoice.update!(discount_amount: discount, coupon_code: coupon_code) + + { success: true, discount_amount: discount } + end + + def mark_paid(invoice_id, paid_at: Time.current) + invoice = @repository.find(invoice_id) + + return { success: false, error: :not_found } unless invoice + return { success: false, error: :already_paid } if invoice.paid? + + invoice.update!(status: :paid, paid_at: paid_at) + @mailer.send_receipt(invoice) + + { success: true, invoice: invoice } + end + + private + + def create_invoice(order) + @repository.create!( + order_id: order.id, + customer_id: order.customer_id, + subtotal: order.subtotal, + tax: order.tax, + total: order.total, + status: :pending + ) + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_return_from_ensure/bad/subscription.rb b/priv/combined_metrics/samples/error_handling/no_return_from_ensure/bad/subscription.rb new file mode 100644 index 0000000..ea096c4 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_return_from_ensure/bad/subscription.rb @@ -0,0 +1,49 @@ +class SubscriptionActivator + def initialize(billing_client, notifier, logger) + @billing_client = billing_client + @notifier = notifier + @logger = logger + end + + def activate(subscription) + begin + @billing_client.authorize(subscription.payment_method_id, subscription.plan.monthly_price) + subscription.update!(status: :active, activated_at: Time.current) + @notifier.send_welcome_email(subscription.user) + return { success: true, subscription: subscription } + rescue BillingClient::AuthorizationError => e + @logger.warn("Authorization failed: #{e.message}") + return { success: false, error: :payment_authorization_failed } + ensure + # This return silently swallows any exception raised above + cleanup_pending_state(subscription) + return { success: false, error: :aborted } + end + end + + def cancel(subscription, reason:) + begin + subscription.update!(status: :cancelled, cancelled_at: Time.current, cancel_reason: reason) + @billing_client.cancel_recurring(subscription.billing_id) + @notifier.send_cancellation_confirmation(subscription.user) + return true + rescue StandardError => e + @logger.error("Cancel failed: #{e.message}") + raise + ensure + release_subscription_seats(subscription) + # Returning from ensure masks the re-raised exception + return false + end + end + + private + + def cleanup_pending_state(subscription) + subscription.update_column(:pending_activation, false) if subscription.pending_activation? + end + + def release_subscription_seats(subscription) + subscription.team_seats.update_all(active: false) + end +end diff --git a/priv/combined_metrics/samples/error_handling/no_return_from_ensure/good/subscription.rb b/priv/combined_metrics/samples/error_handling/no_return_from_ensure/good/subscription.rb new file mode 100644 index 0000000..b5b7ee2 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/no_return_from_ensure/good/subscription.rb @@ -0,0 +1,52 @@ +class SubscriptionActivator + def initialize(billing_client, notifier, logger) + @billing_client = billing_client + @notifier = notifier + @logger = logger + end + + def activate(subscription) + result = nil + + begin + @billing_client.authorize(subscription.payment_method_id, subscription.plan.monthly_price) + subscription.update!(status: :active, activated_at: Time.current) + @notifier.send_welcome_email(subscription.user) + result = { success: true, subscription: subscription } + rescue BillingClient::AuthorizationError => e + @logger.warn("Authorization failed for subscription #{subscription.id}: #{e.message}") + result = { success: false, error: :payment_authorization_failed } + rescue StandardError => e + @logger.error("Unexpected error activating subscription #{subscription.id}: #{e.message}") + raise + ensure + @logger.info("Activation attempt completed for subscription #{subscription.id}") + cleanup_pending_state(subscription) + end + + result + end + + def cancel(subscription, reason:) + begin + subscription.update!(status: :cancelled, cancelled_at: Time.current, cancel_reason: reason) + @billing_client.cancel_recurring(subscription.billing_id) + @notifier.send_cancellation_confirmation(subscription.user) + rescue BillingClient::NotFoundError => e + @logger.warn("Billing record not found during cancel #{subscription.id}: #{e.message}") + ensure + release_subscription_seats(subscription) + @logger.info("Cancellation cleanup done for #{subscription.id}") + end + end + + private + + def cleanup_pending_state(subscription) + subscription.update_column(:pending_activation, false) if subscription.pending_activation? + end + + def release_subscription_seats(subscription) + subscription.team_seats.update_all(active: false) + end +end diff --git a/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/bad/account.rb b/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/bad/account.rb new file mode 100644 index 0000000..c7eaa2e --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/bad/account.rb @@ -0,0 +1,55 @@ +class AccountImporter + def initialize(csv_parser, repository, logger) + @csv_parser = csv_parser + @repository = repository + @logger = logger + end + + def import(file_path) + results = { imported: 0, skipped: 0, errors: [] } + + @csv_parser.each_row(file_path) do |row| + import_row(row, results) + end + + results + end + + private + + def import_row(row, results) + account = build_account(row) + + begin + @repository.save!(account) + results[:imported] += 1 + rescue StandardError => e + # Too broad — this catches everything below and the specific rescues are unreachable + @logger.error("Unexpected error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :unexpected } + rescue ActiveRecord::ActiveRecordError => e + # Dead code — StandardError already matched this + @logger.error("ActiveRecord error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :active_record_error } + rescue ActiveRecord::StatementInvalid => e + # Dead code — caught by StandardError above + @logger.error("DB statement error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :database_error } + rescue ActiveRecord::RecordNotUnique + # Dead code — caught by StandardError above + results[:skipped] += 1 + rescue ActiveRecord::RecordInvalid + # Dead code — caught by StandardError above + results[:skipped] += 1 + end + end + + def build_account(row) + Account.new( + email: row[:email], + name: row[:name], + plan: row[:plan] || :free, + source: :csv_import + ) + end +end diff --git a/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/good/account.rb b/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/good/account.rb new file mode 100644 index 0000000..85147a7 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/rescue_most_specific_first/good/account.rb @@ -0,0 +1,57 @@ +class AccountImporter + def initialize(csv_parser, repository, logger) + @csv_parser = csv_parser + @repository = repository + @logger = logger + end + + def import(file_path) + results = { imported: 0, skipped: 0, errors: [] } + + @csv_parser.each_row(file_path) do |row| + import_row(row, results) + end + + results + end + + private + + def import_row(row, results) + account = build_account(row) + + begin + @repository.save!(account) + results[:imported] += 1 + rescue ActiveRecord::RecordInvalid => e + # Most specific: validation failures are expected and recoverable + @logger.warn("Validation failed for row #{row[:email]}: #{e.record.errors.full_messages.join(', ')}") + results[:skipped] += 1 + rescue ActiveRecord::RecordNotUnique => e + # More specific than StatementInvalid but less than RecordInvalid + @logger.warn("Duplicate account skipped: #{row[:email]}") + results[:skipped] += 1 + rescue ActiveRecord::StatementInvalid => e + # Less specific DB error + @logger.error("DB statement error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :database_error } + rescue ActiveRecord::ActiveRecordError => e + # Broad ActiveRecord error — catches anything above not already matched + @logger.error("ActiveRecord error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :active_record_error } + rescue StandardError => e + # Catch-all for unexpected errors + @logger.error("Unexpected error for #{row[:email]}: #{e.message}") + results[:errors] << { email: row[:email], reason: :unexpected } + end + end + + def build_account(row) + Account.new( + email: row[:email], + name: row[:name], + plan: row[:plan] || :free, + source: :csv_import + ) + end +end diff --git a/priv/combined_metrics/samples/error_handling/returns_typed_error/bad/repository.ex b/priv/combined_metrics/samples/error_handling/returns_typed_error/bad/repository.ex new file mode 100644 index 0000000..cebeb97 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/returns_typed_error/bad/repository.ex @@ -0,0 +1,92 @@ +defmodule Repository do + @moduledoc """ + Data repository layer for persisting and fetching domain records. + """ + + def find_by_id(id) do + case lookup(id) do + nil -> nil + record -> record + end + end + + def find_by_email(email) do + case search_email(email) do + [] -> false + [record | _] -> record + end + end + + def save(record) do + if valid?(record) do + do_insert(record) + else + false + end + end + + def update(id, attrs) do + case lookup(id) do + nil -> nil + record -> + if valid_attrs?(attrs) do + do_update(record, attrs) + else + :invalid + end + end + end + + def delete(id) do + case lookup(id) do + nil -> false + record -> + case do_delete(record) do + :ok -> true + _ -> false + end + end + end + + def list_all(filters) do + try do + do_list(filters) + rescue + _ -> [] + end + end + + def count(filters) do + case do_count(filters) do + nil -> 0 + n -> n + end + end + + def exists?(id) do + case lookup(id) do + nil -> false + _ -> true + end + end + + def find_or_create(attrs) do + case search_attrs(attrs) do + nil -> + if valid_attrs?(attrs), do: do_insert(attrs), else: nil + record -> + record + end + end + + defp lookup(_id), do: nil + defp search_email(_email), do: [] + defp valid?(_record), do: true + defp do_insert(record), do: record + defp valid_attrs?(_attrs), do: true + defp do_update(record, _attrs), do: record + defp do_delete(_record), do: :ok + defp do_list(_filters), do: [] + defp do_count(_filters), do: 0 + defp search_attrs(_attrs), do: nil +end diff --git a/priv/combined_metrics/samples/error_handling/returns_typed_error/config.yml b/priv/combined_metrics/samples/error_handling/returns_typed_error/config.yml new file mode 100644 index 0000000..85e423f --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/returns_typed_error/config.yml @@ -0,0 +1 @@ +doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`." diff --git a/priv/combined_metrics/samples/error_handling/returns_typed_error/good/repository.ex b/priv/combined_metrics/samples/error_handling/returns_typed_error/good/repository.ex new file mode 100644 index 0000000..755ed07 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/returns_typed_error/good/repository.ex @@ -0,0 +1,96 @@ +defmodule Repository do + @moduledoc """ + Data repository layer for persisting and fetching domain records. + """ + + @spec find_by_id(term()) :: {:ok, map()} | {:error, :not_found} + def find_by_id(id) do + case lookup(id) do + nil -> {:error, :not_found} + record -> {:ok, record} + end + end + + @spec find_by_email(String.t()) :: {:ok, map()} | {:error, :not_found} + def find_by_email(email) do + case search_email(email) do + [] -> {:error, :not_found} + [record | _] -> {:ok, record} + end + end + + @spec save(map()) :: {:ok, map()} | {:error, :validation_failed} + def save(record) do + if valid?(record) do + {:ok, do_insert(record)} + else + {:error, :validation_failed} + end + end + + @spec update(term(), map()) :: {:ok, map()} | {:error, :not_found | :validation_failed} + def update(id, attrs) do + with {:ok, record} <- find_by_id(id), + true <- valid_attrs?(attrs) do + {:ok, do_update(record, attrs)} + else + {:error, :not_found} -> {:error, :not_found} + false -> {:error, :validation_failed} + end + end + + @spec delete(term()) :: {:ok, map()} | {:error, :not_found | :delete_failed} + def delete(id) do + case find_by_id(id) do + {:error, :not_found} -> + {:error, :not_found} + {:ok, record} -> + case do_delete(record) do + :ok -> {:ok, record} + {:error, reason} -> {:error, {:delete_failed, reason}} + end + end + end + + @spec list_all(map()) :: {:ok, list()} | {:error, :query_failed} + def list_all(filters) do + try do + {:ok, do_list(filters)} + rescue + e -> {:error, {:query_failed, Exception.message(e)}} + end + end + + @spec count(map()) :: {:ok, non_neg_integer()} | {:error, :query_failed} + def count(filters) do + case do_count(filters) do + nil -> {:error, :query_failed} + n -> {:ok, n} + end + end + + @spec find_or_create(map()) :: {:ok, map()} | {:error, :validation_failed} + def find_or_create(attrs) do + case search_attrs(attrs) do + nil -> + if valid_attrs?(attrs) do + {:ok, do_insert(attrs)} + else + {:error, :validation_failed} + end + record -> + {:ok, record} + end + end + + defp lookup(_id), do: nil + defp search_email(_email), do: [] + defp valid?(_record), do: true + defp do_insert(record), do: record + defp valid_attrs?(_attrs), do: true + defp do_update(record, _attrs), do: record + defp do_delete(_record), do: :ok + defp do_list(_filters), do: [] + defp do_count(_filters), do: 0 + defp search_attrs(_attrs), do: nil +end diff --git a/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/bad/api_client.js b/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/bad/api_client.js new file mode 100644 index 0000000..5dc49bc --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/bad/api_client.js @@ -0,0 +1,64 @@ +async function fetchUser(userId) { + if (!userId || typeof userId !== "string") { + throw "userId must be a non-empty string"; + } + + let response; + try { + response = await fetch(`/api/users/${userId}`); + } catch (err) { + throw "Failed to reach the API server"; + } + + if (response.status === 404) { + throw 404; + } + + if (response.status === 403) { + throw { code: 403, message: "You do not have permission to view this user" }; + } + + if (!response.ok) { + throw `Unexpected response status: ${response.status}`; + } + + return response.json(); +} + +async function updateUserEmail(userId, newEmail) { + if (!newEmail.includes("@")) { + throw `'${newEmail}' is not a valid email address`; + } + + const response = await fetch(`/api/users/${userId}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email: newEmail }), + }); + + if (!response.ok) { + const body = await response.json().catch(() => ({})); + throw { + code: response.status, + message: body.message ?? `Failed to update user: ${response.status}`, + }; + } + + return response.json(); +} + +async function deleteUser(userId) { + if (!userId) { + throw null; + } + + const response = await fetch(`/api/users/${userId}`, { method: "DELETE" }); + + if (!response.ok) { + throw response.status; + } + + return true; +} + +export { fetchUser, updateUserEmail, deleteUser }; diff --git a/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/good/api_client.js b/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/good/api_client.js new file mode 100644 index 0000000..41331e9 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/throws_error_objects_not_primitives/good/api_client.js @@ -0,0 +1,69 @@ +class ApiError extends Error { + constructor(message, statusCode) { + super(message); + this.name = "ApiError"; + this.statusCode = statusCode; + } +} + +class NetworkError extends Error { + constructor(message, cause) { + super(message); + this.name = "NetworkError"; + this.cause = cause; + } +} + +async function fetchUser(userId) { + if (!userId || typeof userId !== "string") { + throw new TypeError("userId must be a non-empty string"); + } + + let response; + try { + response = await fetch(`/api/users/${userId}`); + } catch (err) { + throw new NetworkError("Failed to reach the API server", err); + } + + if (response.status === 404) { + throw new ApiError(`User with id '${userId}' not found`, 404); + } + + if (response.status === 403) { + throw new ApiError("You do not have permission to view this user", 403); + } + + if (!response.ok) { + throw new ApiError( + `Unexpected response status: ${response.status}`, + response.status + ); + } + + return response.json(); +} + +async function updateUserEmail(userId, newEmail) { + if (!newEmail.includes("@")) { + throw new RangeError(`'${newEmail}' is not a valid email address`); + } + + const response = await fetch(`/api/users/${userId}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ email: newEmail }), + }); + + if (!response.ok) { + const body = await response.json().catch(() => ({})); + throw new ApiError( + body.message ?? `Failed to update user: ${response.status}`, + response.status + ); + } + + return response.json(); +} + +export { fetchUser, updateUserEmail, ApiError, NetworkError }; diff --git a/priv/combined_metrics/samples/error_handling/try_block_is_minimal/bad/payment_processor.py b/priv/combined_metrics/samples/error_handling/try_block_is_minimal/bad/payment_processor.py new file mode 100644 index 0000000..84740c2 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/try_block_is_minimal/bad/payment_processor.py @@ -0,0 +1,73 @@ +"""Payment processor that charges customers and records transactions.""" +from __future__ import annotations + +from dataclasses import dataclass +from decimal import Decimal +from typing import Optional +import uuid + + +class PaymentGatewayError(Exception): + """Raised when the external gateway rejects a charge.""" + + +@dataclass +class PaymentIntent: + amount: Decimal + currency: str + customer_id: str + description: str + + +@dataclass +class ChargeResult: + charge_id: str + amount: Decimal + currency: str + customer_id: str + + +def _call_gateway(intent: PaymentIntent) -> str: + if intent.amount <= 0: + raise PaymentGatewayError("Amount must be positive") + return f"ch_{uuid.uuid4().hex[:16]}" + + +def _record_transaction(charge_id: str, intent: PaymentIntent) -> None: + print(f"[DB] recorded charge {charge_id} for customer {intent.customer_id}") + + +def charge(intent: PaymentIntent) -> Optional[ChargeResult]: + """Charge a customer — oversized try block hides bugs in safe code.""" + try: + # gateway call AND all subsequent safe operations crammed into one try block + charge_id = _call_gateway(intent) + + # if _record_transaction raises (e.g. DB error), it's caught as PaymentGatewayError + _record_transaction(charge_id, intent) + + # building the result struct is also in try — bugs here are misattributed + result = ChargeResult( + charge_id=charge_id, + amount=intent.amount, + currency=intent.currency, + customer_id=intent.customer_id, + ) + return result + except PaymentGatewayError as exc: + print(f"charge failed: {exc}") + return None + + +def refund(charge_id: str, amount: Optional[Decimal] = None) -> bool: + """Issue a refund — the try block swallows errors from multiple unrelated steps.""" + try: + # all three steps are wrapped together; an error in any one blames the gateway + is_valid = charge_id.startswith("ch_") + if not is_valid: + raise PaymentGatewayError(f"Refund rejected for charge {charge_id}") + print(f"[DB] recorded refund for {charge_id} amount={amount}") + return True + except PaymentGatewayError as exc: + print(f"refund failed: {exc}") + return False diff --git a/priv/combined_metrics/samples/error_handling/try_block_is_minimal/good/payment_processor.py b/priv/combined_metrics/samples/error_handling/try_block_is_minimal/good/payment_processor.py new file mode 100644 index 0000000..e6e7c52 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/try_block_is_minimal/good/payment_processor.py @@ -0,0 +1,78 @@ +"""Payment processor that charges customers and records transactions.""" +from __future__ import annotations + +from dataclasses import dataclass +from decimal import Decimal +from typing import Optional +import uuid + + +class PaymentGatewayError(Exception): + """Raised when the external gateway rejects a charge.""" + + +@dataclass +class PaymentIntent: + amount: Decimal + currency: str + customer_id: str + description: str + + +@dataclass +class ChargeResult: + charge_id: str + amount: Decimal + currency: str + customer_id: str + + +def _call_gateway(intent: PaymentIntent) -> str: + """Simulate an external gateway call; returns a charge ID.""" + if intent.amount <= 0: + raise PaymentGatewayError("Amount must be positive") + return f"ch_{uuid.uuid4().hex[:16]}" + + +def _record_transaction(charge_id: str, intent: PaymentIntent) -> None: + """Persist the transaction record (simulated).""" + print(f"[DB] recorded charge {charge_id} for customer {intent.customer_id}") + + +def charge(intent: PaymentIntent) -> ChargeResult: + """Charge a customer, keeping the try block as small as possible. + + Only the gateway call is inside try; recording and building the result + happen outside so any errors there surface with a clean traceback. + """ + try: + charge_id = _call_gateway(intent) # only the risky call is in try + except PaymentGatewayError as exc: + raise PaymentGatewayError( + f"Gateway rejected charge for customer {intent.customer_id}: {exc}" + ) from exc + + # safe operations live outside the try block + _record_transaction(charge_id, intent) + + return ChargeResult( + charge_id=charge_id, + amount=intent.amount, + currency=intent.currency, + customer_id=intent.customer_id, + ) + + +def refund(charge_id: str, amount: Optional[Decimal] = None) -> bool: + """Issue a refund — try wraps only the gateway call.""" + try: + # only the I/O-bound, failure-prone call belongs inside try + success = charge_id.startswith("ch_") # simulated gateway call + except AttributeError as exc: + raise ValueError(f"Invalid charge_id: {charge_id!r}") from exc + + if not success: + raise PaymentGatewayError(f"Refund rejected for charge {charge_id}") + + print(f"[DB] recorded refund for {charge_id} amount={amount}") + return True diff --git a/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/bad/invoice.rs b/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/bad/invoice.rs new file mode 100644 index 0000000..fdbb4c4 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/bad/invoice.rs @@ -0,0 +1,41 @@ +pub struct LineItem { + pub description: String, + pub unit_price_cents: u64, + pub quantity: u32, +} + +impl LineItem { + pub fn subtotal(&self) -> u64 { + // Silently wraps on overflow in release builds — wrong amount charged + self.unit_price_cents * self.quantity as u64 + } +} + +pub struct Invoice { + pub items: Vec, + /// Discount in basis points (100 = 1%) + pub discount_bps: u32, +} + +impl Invoice { + pub fn total_cents(&self) -> u64 { + let subtotal: u64 = self.items.iter().map(|i| i.subtotal()).sum(); + + // If discount_bps > 10_000 this underflows to a huge positive number + let after_discount = subtotal * (10_000 - self.discount_bps as u64) / 10_000; + + // Final accumulation also has no overflow check + after_discount + } + + pub fn tax_amount(&self, rate_bps: u32) -> u64 { + let total = self.total_cents(); + // Multiplication can overflow for large totals + total * rate_bps as u64 / 10_000 + } + + pub fn grand_total(&self, tax_rate_bps: u32) -> u64 { + // Adding two potentially wrapped values — wrong result silently returned + self.total_cents() + self.tax_amount(tax_rate_bps) + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/good/invoice.rs b/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/good/invoice.rs new file mode 100644 index 0000000..4d160c1 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_checked_arithmetic/good/invoice.rs @@ -0,0 +1,69 @@ +use std::fmt; + +#[derive(Debug)] +pub enum InvoiceError { + LineItemOverflow { item: String }, + TotalOverflow, + DiscountOutOfRange(u32), +} + +impl fmt::Display for InvoiceError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + InvoiceError::LineItemOverflow { item } => { + write!(f, "line item subtotal overflowed for: {item}") + } + InvoiceError::TotalOverflow => write!(f, "invoice total overflowed u64"), + InvoiceError::DiscountOutOfRange(d) => { + write!(f, "discount {d} basis points exceeds 100%") + } + } + } +} + +#[derive(Debug)] +pub struct LineItem { + pub description: String, + pub unit_price_cents: u64, + pub quantity: u32, +} + +impl LineItem { + pub fn subtotal(&self) -> Result { + let qty = self.quantity as u64; + self.unit_price_cents + .checked_mul(qty) + .ok_or_else(|| InvoiceError::LineItemOverflow { + item: self.description.clone(), + }) + } +} + +pub struct Invoice { + pub items: Vec, + /// Discount in basis points (100 = 1%) + pub discount_bps: u32, +} + +impl Invoice { + pub fn total_cents(&self) -> Result { + if self.discount_bps > 10_000 { + return Err(InvoiceError::DiscountOutOfRange(self.discount_bps)); + } + + let subtotal = self + .items + .iter() + .try_fold(0u64, |acc, item| { + item.subtotal()? + .checked_add(acc) + .ok_or(InvoiceError::TotalOverflow) + })?; + + let discount_factor = 10_000 - self.discount_bps as u64; + subtotal + .checked_mul(discount_factor) + .and_then(|n| n.checked_div(10_000)) + .ok_or(InvoiceError::TotalOverflow) + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/bad/store.rs b/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/bad/store.rs new file mode 100644 index 0000000..e1d6bb3 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/bad/store.rs @@ -0,0 +1,51 @@ +use std::collections::HashMap; + +pub struct BoundedStore { + data: HashMap>, + max_entries: usize, + max_bytes: u64, + used_bytes: u64, +} + +impl BoundedStore { + pub fn new(max_entries: usize, max_bytes: u64) -> Self { + Self { + data: HashMap::new(), + max_entries, + max_bytes, + used_bytes: 0, + } + } + + // Returning raw &str errors: no structure, callers can't match on variants + pub fn insert(&mut self, key: String, value: Vec) -> Result<(), &'static str> { + if self.data.len() >= self.max_entries && !self.data.contains_key(&key) { + // Cannot include actual limit in static string + return Err("capacity exceeded"); + } + let new_bytes = self.used_bytes + value.len() as u64; + if new_bytes > self.max_bytes { + // Cannot communicate how full the store is + return Err("storage full"); + } + self.used_bytes = new_bytes; + self.data.insert(key, value); + Ok(()) + } + + // Returning String errors: slightly better, but callers can't pattern match + pub fn get(&self, key: &str) -> Result<&[u8], String> { + self.data + .get(key) + .map(Vec::as_slice) + // key is in the message, but only as a substring — fragile to parse + .ok_or_else(|| format!("not found: {key}")) + } + + pub fn remove(&mut self, key: &str) -> Result, String> { + self.data + .remove(key) + .ok_or_else(|| "key does not exist".to_string()) + // No key context — caller cannot tell which key was missing + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/good/store.rs b/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/good/store.rs new file mode 100644 index 0000000..86b8f5e --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_custom_error_type_with_context/good/store.rs @@ -0,0 +1,70 @@ +use std::collections::HashMap; +use std::fmt; + +#[derive(Debug)] +pub enum StoreError { + NotFound { key: String }, + CapacityExceeded { limit: usize }, + SerializationFailed { key: String, reason: String }, + StorageFull { used_bytes: u64, max_bytes: u64 }, +} + +impl fmt::Display for StoreError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + StoreError::NotFound { key } => write!(f, "key not found: '{key}'"), + StoreError::CapacityExceeded { limit } => { + write!(f, "store capacity of {limit} entries exceeded") + } + StoreError::SerializationFailed { key, reason } => { + write!(f, "failed to serialize value for key '{key}': {reason}") + } + StoreError::StorageFull { used_bytes, max_bytes } => { + write!(f, "storage full: {used_bytes}/{max_bytes} bytes used") + } + } + } +} + +impl std::error::Error for StoreError {} + +pub struct BoundedStore { + data: HashMap>, + max_entries: usize, + max_bytes: u64, + used_bytes: u64, +} + +impl BoundedStore { + pub fn new(max_entries: usize, max_bytes: u64) -> Self { + Self { + data: HashMap::new(), + max_entries, + max_bytes, + used_bytes: 0, + } + } + + pub fn insert(&mut self, key: String, value: Vec) -> Result<(), StoreError> { + if self.data.len() >= self.max_entries && !self.data.contains_key(&key) { + return Err(StoreError::CapacityExceeded { limit: self.max_entries }); + } + let new_bytes = self.used_bytes + value.len() as u64; + if new_bytes > self.max_bytes { + return Err(StoreError::StorageFull { + used_bytes: self.used_bytes, + max_bytes: self.max_bytes, + }); + } + self.used_bytes = new_bytes; + self.data.insert(key, value); + Ok(()) + } + + pub fn get(&self, key: &str) -> Result<&[u8], StoreError> { + self.data + .get(key) + .map(Vec::as_slice) + .ok_or_else(|| StoreError::NotFound { key: key.to_string() }) + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/bad/router.go b/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/bad/router.go new file mode 100644 index 0000000..c153d1b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/bad/router.go @@ -0,0 +1,44 @@ +package router + +import ( + "net/http" +) + +// ValidationError represents a field-level validation failure. +type ValidationError struct { + Field string + Message string +} + +func (e *ValidationError) Error() string { + return e.Field + ": " + e.Message +} + +type ProductHandler struct { + service ProductService +} + +type ProductService interface { + Create(name string, price float64) error +} + +// CreateProduct handles product creation and maps ValidationError to 400. +// It uses a direct type assertion, which fails silently when the error is wrapped. +func (h *ProductHandler) CreateProduct(w http.ResponseWriter, r *http.Request) { + name := r.FormValue("name") + price := 0.0 + + err := h.service.Create(name, price) + if err == nil { + w.WriteHeader(http.StatusCreated) + return + } + + // Direct type assertion fails when the error is wrapped with fmt.Errorf("%w", ve). + if ve, ok := err.(*ValidationError); ok { + http.Error(w, "validation error: "+ve.Field+": "+ve.Message, http.StatusBadRequest) + return + } + + http.Error(w, "internal server error", http.StatusInternalServerError) +} diff --git a/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/good/router.go b/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/good/router.go new file mode 100644 index 0000000..e92ca34 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_errors_as_for_type_assertion/good/router.go @@ -0,0 +1,46 @@ +package router + +import ( + "errors" + "net/http" +) + +// ValidationError represents a field-level validation failure. +type ValidationError struct { + Field string + Message string +} + +func (e *ValidationError) Error() string { + return e.Field + ": " + e.Message +} + +type ProductHandler struct { + service ProductService +} + +type ProductService interface { + Create(name string, price float64) error +} + +// CreateProduct handles product creation and maps ValidationError to 400. +// It uses errors.As to correctly unwrap errors in a chain. +func (h *ProductHandler) CreateProduct(w http.ResponseWriter, r *http.Request) { + name := r.FormValue("name") + price := 0.0 + + err := h.service.Create(name, price) + if err == nil { + w.WriteHeader(http.StatusCreated) + return + } + + // errors.As traverses the error chain to find *ValidationError even when wrapped. + var ve *ValidationError + if errors.As(err, &ve) { + http.Error(w, "validation error: "+ve.Field+": "+ve.Message, http.StatusBadRequest) + return + } + + http.Error(w, "internal server error", http.StatusInternalServerError) +} diff --git a/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/bad/middleware.go b/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/bad/middleware.go new file mode 100644 index 0000000..bec84b2 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/bad/middleware.go @@ -0,0 +1,42 @@ +package middleware + +import ( + "errors" + "net/http" +) + +var ( + ErrUnauthorized = errors.New("unauthorized") + ErrForbidden = errors.New("forbidden") +) + +type AuthService interface { + Validate(token string) error +} + +// RequireAuth returns a middleware that validates the Bearer token. +// It uses == for error comparison, which breaks when errors are wrapped. +func RequireAuth(auth AuthService) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + token := r.Header.Get("Authorization") + err := auth.Validate(token) + if err == nil { + next.ServeHTTP(w, r) + return + } + + // Direct == comparison fails when err is wrapped with fmt.Errorf("%w"). + if err == ErrUnauthorized { + http.Error(w, "authentication required", http.StatusUnauthorized) + return + } + if err == ErrForbidden { + http.Error(w, "access denied", http.StatusForbidden) + return + } + + http.Error(w, "internal server error", http.StatusInternalServerError) + }) + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/good/middleware.go b/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/good/middleware.go new file mode 100644 index 0000000..01e3a4e --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_errors_is_for_sentinel_comparison/good/middleware.go @@ -0,0 +1,42 @@ +package middleware + +import ( + "errors" + "net/http" +) + +var ( + ErrUnauthorized = errors.New("unauthorized") + ErrForbidden = errors.New("forbidden") +) + +type AuthService interface { + Validate(token string) error +} + +// RequireAuth returns a middleware that validates the Bearer token. +// It uses errors.Is to correctly match sentinel errors through any wrapping. +func RequireAuth(auth AuthService) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + token := r.Header.Get("Authorization") + err := auth.Validate(token) + if err == nil { + next.ServeHTTP(w, r) + return + } + + // errors.Is traverses the error chain, so wrapped sentinels are matched correctly. + if errors.Is(err, ErrUnauthorized) { + http.Error(w, "authentication required", http.StatusUnauthorized) + return + } + if errors.Is(err, ErrForbidden) { + http.Error(w, "access denied", http.StatusForbidden) + return + } + + http.Error(w, "internal server error", http.StatusInternalServerError) + }) + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/bad/OrderService.php b/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/bad/OrderService.php new file mode 100644 index 0000000..cc7773c --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/bad/OrderService.php @@ -0,0 +1,66 @@ +inventory->getAvailableQuantity($item['product_id']); + if ($available < $item['quantity']) { + return -1; // Caller must know magic codes + } + } + + $order = new \stdClass(); + $order->customerId = $customerId; + $order->status = 'pending'; + $order->items = $items; + $saved = $this->orders->save($order); + + if (!$saved) { + return false; // Is false a different error than -1? + } + + return $order; + } + + public function confirm($orderId, $paymentToken) + { + $order = $this->orders->findById($orderId); + if ($order === null) { + return null; // Caller must null-check + } + + if ($order->status !== 'pending') { + return -2; // Magic number for wrong state + } + + $charged = $this->payments->charge($order->total, $paymentToken); + if (!$charged) { + return -3; // Magic number for payment failed + } + + $order->status = 'confirmed'; + $this->orders->save($order); + + return $order; + } + + public function getOrFail($orderId) + { + $order = $this->orders->findById($orderId); + + // Returns false — caller must remember to check === false + return $order ?? false; + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/good/OrderService.php b/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/good/OrderService.php new file mode 100644 index 0000000..2c3d70b --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_exceptions_not_error_codes/good/OrderService.php @@ -0,0 +1,72 @@ +inventory->getAvailableQuantity($item['product_id']); + if ($available < $item['quantity']) { + throw new InsufficientStockException( + "Product {$item['product_id']} has only {$available} units available" + ); + } + } + + $order = new Order(customerId: $customerId, status: 'pending'); + foreach ($items as $item) { + $order->addItem($item['product_id'], $item['quantity'], $item['unit_price']); + } + $this->orders->save($order); + + return $order; + } + + public function confirm(int $orderId, string $paymentToken): Order + { + $order = $this->orders->findById($orderId); + if ($order === null) { + throw new OrderNotFoundException("Order {$orderId} not found"); + } + + if ($order->getStatus() !== 'pending') { + throw new InvalidOrderStateException( + "Cannot confirm order in state '{$order->getStatus()}'" + ); + } + + $this->payments->charge($order->getTotal(), $paymentToken); + + $order->setStatus('confirmed'); + $this->orders->save($order); + + return $order; + } + + public function getOrFail(int $orderId): Order + { + $order = $this->orders->findById($orderId); + if ($order === null) { + throw new OrderNotFoundException("Order {$orderId} not found"); + } + + return $order; + } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/bad/loader.rs b/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/bad/loader.rs new file mode 100644 index 0000000..ea5190d --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/bad/loader.rs @@ -0,0 +1,58 @@ +use std::fs; +use std::io; +use std::path::Path; + +pub struct FileRecord { + pub path: String, + pub lines: Vec, + pub byte_count: usize, +} + +pub fn load_record(path: &Path) -> Result { + // Manual match instead of ? operator — noisy and error-prone + let raw = match fs::read(path) { + Ok(bytes) => bytes, + Err(e) => return Err(format!("IO error: {e}")), + }; + + let content = match String::from_utf8(raw.clone()) { + Ok(s) => s, + Err(e) => return Err(format!("encoding error: {e}")), + }; + + if content.trim().is_empty() { + return Err(format!("file is empty: {}", path.display())); + } + + let lines: Vec = content.lines().map(str::to_string).collect(); + + Ok(FileRecord { + path: path.display().to_string(), + lines, + byte_count: raw.len(), + }) +} + +pub fn load_all(dir: &Path) -> Result, String> { + let read_dir = match fs::read_dir(dir) { + Ok(rd) => rd, + Err(e) => return Err(format!("cannot read dir: {e}")), + }; + + let mut records = Vec::new(); + for entry_result in read_dir { + let entry = match entry_result { + Ok(e) => e, + Err(e) => return Err(format!("dir entry error: {e}")), + }; + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) == Some("txt") { + let record = match load_record(&path) { + Ok(r) => r, + Err(e) => return Err(e), + }; + records.push(record); + } + } + Ok(records) +} diff --git a/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/good/loader.rs b/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/good/loader.rs new file mode 100644 index 0000000..d209aae --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_question_mark_for_propagation/good/loader.rs @@ -0,0 +1,63 @@ +use std::fs; +use std::io; +use std::path::Path; + +#[derive(Debug)] +pub enum LoadError { + Io(io::Error), + InvalidEncoding(String), + EmptyFile(String), +} + +impl From for LoadError { + fn from(e: io::Error) -> Self { + LoadError::Io(e) + } +} + +impl std::fmt::Display for LoadError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LoadError::Io(e) => write!(f, "IO error: {e}"), + LoadError::InvalidEncoding(msg) => write!(f, "encoding error: {msg}"), + LoadError::EmptyFile(path) => write!(f, "file is empty: {path}"), + } + } +} + +pub struct FileRecord { + pub path: String, + pub lines: Vec, + pub byte_count: usize, +} + +pub fn load_record(path: &Path) -> Result { + let raw = fs::read(path)?; + let content = String::from_utf8(raw.clone()).map_err(|e| { + LoadError::InvalidEncoding(format!("{}: {e}", path.display())) + })?; + + if content.trim().is_empty() { + return Err(LoadError::EmptyFile(path.display().to_string())); + } + + let lines: Vec = content.lines().map(str::to_string).collect(); + + Ok(FileRecord { + path: path.display().to_string(), + lines, + byte_count: raw.len(), + }) +} + +pub fn load_all(dir: &Path) -> Result, LoadError> { + let mut records = Vec::new(); + for entry in fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) == Some("txt") { + records.push(load_record(&path)?); + } + } + Ok(records) +} diff --git a/priv/combined_metrics/samples/error_handling/uses_raise_from/bad/config_loader.py b/priv/combined_metrics/samples/error_handling/uses_raise_from/bad/config_loader.py new file mode 100644 index 0000000..7a97e95 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_raise_from/bad/config_loader.py @@ -0,0 +1,68 @@ +"""Configuration loader that reads YAML/JSON config files and validates them.""" +from __future__ import annotations + +import json +import os +from typing import Any + + +class ConfigError(Exception): + """Raised when configuration cannot be loaded or is invalid.""" + + +class MissingKeyError(ConfigError): + """Raised when a required configuration key is absent.""" + + +def _read_file(path: str) -> str: + try: + with open(path, encoding="utf-8") as fh: + return fh.read() + except FileNotFoundError: + # original FileNotFoundError context is lost — no 'from exc' + raise ConfigError(f"Configuration file not found: {path}") + except PermissionError: + raise ConfigError(f"Cannot read configuration file: {path}") + + +def _parse_json(raw: str, path: str) -> dict[str, Any]: + try: + return json.loads(raw) + except json.JSONDecodeError: + # the precise parse error (line, column) vanishes from the traceback + raise ConfigError(f"Invalid JSON in configuration file {path}") + + +def _require_key(config: dict[str, Any], key: str) -> Any: + try: + return config[key] + except KeyError: + # KeyError is silently replaced — no chain, harder to debug + raise MissingKeyError(f"Required configuration key {key!r} is missing") + + +def load(path: str) -> dict[str, Any]: + """Load and validate a JSON configuration file.""" + raw = _read_file(path) + config = _parse_json(raw, path) + + database_url = _require_key(config, "database_url") + secret_key = _require_key(config, "secret_key") + debug = config.get("debug", False) + + return { + "database_url": database_url, + "secret_key": secret_key, + "debug": debug, + "raw": config, + } + + +def load_from_env_or_file(env_var: str, fallback_path: str) -> dict[str, Any]: + """Load config from an env var path or fall back to a default file.""" + path = os.environ.get(env_var, fallback_path) + try: + return load(path) + except ConfigError: + # wraps again without from — traceback chain is broken at every level + raise ConfigError(f"Failed to load config (env {env_var}={path!r})") diff --git a/priv/combined_metrics/samples/error_handling/uses_raise_from/good/config_loader.py b/priv/combined_metrics/samples/error_handling/uses_raise_from/good/config_loader.py new file mode 100644 index 0000000..99ae990 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_raise_from/good/config_loader.py @@ -0,0 +1,72 @@ +"""Configuration loader that reads YAML/JSON config files and validates them.""" +from __future__ import annotations + +import json +import os +from typing import Any + + +class ConfigError(Exception): + """Raised when configuration cannot be loaded or is invalid.""" + + +class MissingKeyError(ConfigError): + """Raised when a required configuration key is absent.""" + + +def _read_file(path: str) -> str: + try: + with open(path, encoding="utf-8") as fh: + return fh.read() + except FileNotFoundError as exc: + raise ConfigError(f"Configuration file not found: {path}") from exc + except PermissionError as exc: + raise ConfigError(f"Cannot read configuration file: {path}") from exc + + +def _parse_json(raw: str, path: str) -> dict[str, Any]: + try: + return json.loads(raw) + except json.JSONDecodeError as exc: + # raise from preserves the JSON parse error in the exception chain + raise ConfigError( + f"Invalid JSON in configuration file {path}: {exc.msg} " + f"(line {exc.lineno}, col {exc.colno})" + ) from exc + + +def _require_key(config: dict[str, Any], key: str) -> Any: + try: + return config[key] + except KeyError as exc: + raise MissingKeyError( + f"Required configuration key {key!r} is missing" + ) from exc + + +def load(path: str) -> dict[str, Any]: + """Load and validate a JSON configuration file.""" + raw = _read_file(path) + config = _parse_json(raw, path) + + database_url = _require_key(config, "database_url") + secret_key = _require_key(config, "secret_key") + debug = config.get("debug", False) + + return { + "database_url": database_url, + "secret_key": secret_key, + "debug": debug, + "raw": config, + } + + +def load_from_env_or_file(env_var: str, fallback_path: str) -> dict[str, Any]: + """Load config from an env var path or fall back to a default file.""" + path = os.environ.get(env_var, fallback_path) + try: + return load(path) + except ConfigError as exc: + raise ConfigError( + f"Failed to load config (env {env_var}={path!r})" + ) from exc diff --git a/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/bad/parser.rs b/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/bad/parser.rs new file mode 100644 index 0000000..d1668dc --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/bad/parser.rs @@ -0,0 +1,50 @@ +pub struct Config { + pub host: String, + pub port: u16, + pub max_connections: usize, +} + +pub fn parse_config(raw: &str) -> Config { + let mut host = String::new(); + let mut port: u16 = 0; + let mut max_connections: usize = 10; + + for line in raw.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + // Panics if line has no '=' separator — caller cannot recover + let parts: Vec<&str> = line.splitn(2, '=').collect(); + if parts.len() != 2 { + panic!("malformed config line: {line}"); + } + + let key = parts[0].trim(); + let value = parts[1].trim(); + + match key { + "host" => host = value.to_string(), + "port" => { + // Panics on invalid port — even a typo in the config file crashes + port = value.parse().unwrap(); + } + "max_connections" => { + max_connections = value.parse().unwrap(); + } + _ => {} + } + } + + if host.is_empty() { + // Missing config key is recoverable but we panic anyway + panic!("config missing required field: host"); + } + + if port == 0 { + panic!("config missing required field: port"); + } + + Config { host, port, max_connections } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/good/parser.rs b/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/good/parser.rs new file mode 100644 index 0000000..4f99ea2 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_result_for_recoverable_errors/good/parser.rs @@ -0,0 +1,74 @@ +use std::num::ParseIntError; +use std::fmt; + +#[derive(Debug)] +pub enum ConfigError { + MissingField(String), + InvalidValue { field: String, reason: String }, + ParseError(ParseIntError), +} + +impl fmt::Display for ConfigError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ConfigError::MissingField(field) => write!(f, "missing required field: {field}"), + ConfigError::InvalidValue { field, reason } => { + write!(f, "invalid value for '{field}': {reason}") + } + ConfigError::ParseError(e) => write!(f, "parse error: {e}"), + } + } +} + +impl From for ConfigError { + fn from(e: ParseIntError) -> Self { + ConfigError::ParseError(e) + } +} + +pub struct Config { + pub host: String, + pub port: u16, + pub max_connections: usize, +} + +pub fn parse_config(raw: &str) -> Result { + let mut host = None; + let mut port = None; + let mut max_connections = None; + + for line in raw.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + let (key, value) = line.split_once('=').ok_or_else(|| { + ConfigError::InvalidValue { + field: line.to_string(), + reason: "expected key=value format".to_string(), + } + })?; + + match key.trim() { + "host" => host = Some(value.trim().to_string()), + "port" => { + let p: u16 = value.trim().parse().map_err(|_| ConfigError::InvalidValue { + field: "port".to_string(), + reason: "must be a number between 1 and 65535".to_string(), + })?; + port = Some(p); + } + "max_connections" => { + let n: usize = value.trim().parse()?; + max_connections = Some(n); + } + _ => {} + } + } + + Ok(Config { + host: host.ok_or_else(|| ConfigError::MissingField("host".to_string()))?, + port: port.ok_or_else(|| ConfigError::MissingField("port".to_string()))?, + max_connections: max_connections.unwrap_or(10), + }) +} diff --git a/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/bad/AuthService.swift b/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/bad/AuthService.swift new file mode 100644 index 0000000..2ce1e80 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/bad/AuthService.swift @@ -0,0 +1,65 @@ +import Foundation + +struct AuthToken { + let value: String + let expiresAt: Date + let userID: String +} + +struct Credentials { + let username: String + let password: String +} + +class AuthService { + private var failedAttempts: [String: Int] = [:] + private let maxAttempts = 5 + + // Returns nil for all failure modes — caller cannot distinguish between + // invalid credentials, account locked, network error, etc. + func login(with credentials: Credentials) -> AuthToken? { + guard !credentials.username.isEmpty, !credentials.password.isEmpty else { + return nil + } + + let attempts = failedAttempts[credentials.username, default: 0] + if attempts >= maxAttempts { + return nil + } + + guard isReachable() else { + return nil + } + + guard validateCredentials(credentials) else { + failedAttempts[credentials.username, default: 0] += 1 + return nil + } + + failedAttempts.removeValue(forKey: credentials.username) + return AuthToken( + value: generateToken(), + expiresAt: Date().addingTimeInterval(3600), + userID: credentials.username + ) + } + + // Returns nil with no way to tell if expired or invalid + func validateToken(_ token: AuthToken) -> Bool? { + guard token.expiresAt > Date() else { + return nil + } + return true + } + + // Returns false for both "no permission" and "token invalid" + func requirePermission(_ permission: String, for token: AuthToken) -> Bool { + guard token.expiresAt > Date() else { return false } + return hasPermission(permission, userID: token.userID) + } + + private func validateCredentials(_ credentials: Credentials) -> Bool { true } + private func generateToken() -> String { UUID().uuidString } + private func isReachable() -> Bool { true } + private func hasPermission(_ permission: String, userID: String) -> Bool { true } +} diff --git a/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/good/AuthService.swift b/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/good/AuthService.swift new file mode 100644 index 0000000..b0862b7 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/uses_throws_for_recoverable_errors/good/AuthService.swift @@ -0,0 +1,70 @@ +import Foundation + +enum AuthError: Error { + case invalidCredentials + case accountLocked(until: Date) + case networkUnavailable + case tokenExpired + case insufficientPermissions(required: String) +} + +struct AuthToken { + let value: String + let expiresAt: Date + let userID: String +} + +struct Credentials { + let username: String + let password: String +} + +class AuthService { + private var failedAttempts: [String: Int] = [:] + private let maxAttempts = 5 + + func login(with credentials: Credentials) throws -> AuthToken { + guard !credentials.username.isEmpty, !credentials.password.isEmpty else { + throw AuthError.invalidCredentials + } + + let attempts = failedAttempts[credentials.username, default: 0] + if attempts >= maxAttempts { + let lockoutEnd = Date().addingTimeInterval(15 * 60) + throw AuthError.accountLocked(until: lockoutEnd) + } + + guard isReachable() else { + throw AuthError.networkUnavailable + } + + guard validateCredentials(credentials) else { + failedAttempts[credentials.username, default: 0] += 1 + throw AuthError.invalidCredentials + } + + failedAttempts.removeValue(forKey: credentials.username) + return AuthToken( + value: generateToken(), + expiresAt: Date().addingTimeInterval(3600), + userID: credentials.username + ) + } + + func validateToken(_ token: AuthToken) throws { + guard token.expiresAt > Date() else { + throw AuthError.tokenExpired + } + } + + func requirePermission(_ permission: String, for token: AuthToken) throws { + guard hasPermission(permission, userID: token.userID) else { + throw AuthError.insufficientPermissions(required: permission) + } + } + + private func validateCredentials(_ credentials: Credentials) -> Bool { true } + private func generateToken() -> String { UUID().uuidString } + private func isReachable() -> Bool { true } + private func hasPermission(_ permission: String, userID: String) -> Bool { true } +} diff --git a/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/bad/repository.go b/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/bad/repository.go new file mode 100644 index 0000000..18a4ad3 --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/bad/repository.go @@ -0,0 +1,50 @@ +package repository + +import ( + "context" + "database/sql" + "errors" + "fmt" +) + +var ErrNotFound = errors.New("not found") + +type Invoice struct { + ID int64 + CustomerID int64 + Amount float64 +} + +type InvoiceRepository struct { + db *sql.DB +} + +func New(db *sql.DB) *InvoiceRepository { + return &InvoiceRepository{db: db} +} + +func (r *InvoiceRepository) FindByID(ctx context.Context, id int64) (*Invoice, error) { + row := r.db.QueryRowContext(ctx, + `SELECT id, customer_id, amount FROM invoices WHERE id = $1`, id) + + var inv Invoice + if err := row.Scan(&inv.ID, &inv.CustomerID, &inv.Amount); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, ErrNotFound + } + // Returns a new error that discards the original — callers lose all context. + return nil, fmt.Errorf("database error") + } + return &inv, nil +} + +func (r *InvoiceRepository) Save(ctx context.Context, inv *Invoice) error { + _, err := r.db.ExecContext(ctx, + `INSERT INTO invoices (customer_id, amount) VALUES ($1, $2)`, + inv.CustomerID, inv.Amount) + if err != nil { + // No wrapping, no context — callers see only a bare message. + return errors.New("save failed") + } + return nil +} diff --git a/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/good/repository.go b/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/good/repository.go new file mode 100644 index 0000000..f31e17a --- /dev/null +++ b/priv/combined_metrics/samples/error_handling/wraps_errors_with_context/good/repository.go @@ -0,0 +1,49 @@ +package repository + +import ( + "context" + "database/sql" + "errors" + "fmt" +) + +var ErrNotFound = errors.New("not found") + +type Invoice struct { + ID int64 + CustomerID int64 + Amount float64 +} + +type InvoiceRepository struct { + db *sql.DB +} + +func New(db *sql.DB) *InvoiceRepository { + return &InvoiceRepository{db: db} +} + +func (r *InvoiceRepository) FindByID(ctx context.Context, id int64) (*Invoice, error) { + row := r.db.QueryRowContext(ctx, + `SELECT id, customer_id, amount FROM invoices WHERE id = $1`, id) + + var inv Invoice + if err := row.Scan(&inv.ID, &inv.CustomerID, &inv.Amount); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("find invoice %d: %w", id, ErrNotFound) + } + // Wraps the database error so callers can inspect it with errors.Is/As. + return nil, fmt.Errorf("find invoice %d: %w", id, err) + } + return &inv, nil +} + +func (r *InvoiceRepository) Save(ctx context.Context, inv *Invoice) error { + _, err := r.db.ExecContext(ctx, + `INSERT INTO invoices (customer_id, amount) VALUES ($1, $2)`, + inv.CustomerID, inv.Amount) + if err != nil { + return fmt.Errorf("save invoice for customer %d: %w", inv.CustomerID, err) + } + return nil +} diff --git a/priv/combined_metrics/samples/file_structure/has_consistent_indentation/bad/worker.ex b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/bad/worker.ex new file mode 100644 index 0000000..f11b5cf --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/bad/worker.ex @@ -0,0 +1,73 @@ +defmodule Worker do + @moduledoc """ + Background worker for processing queued jobs. + """ + + def start(queue) do + jobs = fetch_jobs(queue) + Enum.each(jobs, fn job -> + process(job) + end) + end + + def process(job) do + case job.type do + :email -> + send_email(job) + :report -> + generate_report(job) + _ -> + {:error, :unknown_type} + end + end + + def retry(job, attempts) do + if attempts > 0 do + case process(job) do + :ok -> :ok + {:error, _} -> + retry(job, attempts - 1) + end + else + {:error, :max_retries_exceeded} + end + end + + def schedule(job, delay_ms) do + Process.send_after(self(), {:run, job}, delay_ms) + :ok + end + + def cancel(job_id) do + case find_job(job_id) do + nil -> + {:error, :not_found} + job -> + do_cancel(job) + end + end + + def status(job_id) do + case find_job(job_id) do + nil -> {:error, :not_found} + job -> + {:ok, job.status} + end + end + + def drain(queue) do + jobs = fetch_jobs(queue) + Enum.reduce(jobs, {[], []}, fn job, {ok, err} -> + case process(job) do + :ok -> {[job | ok], err} + {:error, _} -> {ok, [job | err]} + end + end) + end + + defp fetch_jobs(_queue), do: [] + defp send_email(_job), do: :ok + defp generate_report(_job), do: :ok + defp find_job(_id), do: nil + defp do_cancel(_job), do: :ok +end diff --git a/priv/combined_metrics/samples/file_structure/has_consistent_indentation/config.yml b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/config.yml new file mode 100644 index 0000000..f3fd8c4 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/config.yml @@ -0,0 +1 @@ +doc: "Files should use a single, consistent indentation style with no mixed tabs and spaces." diff --git a/priv/combined_metrics/samples/file_structure/has_consistent_indentation/good/worker.ex b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/good/worker.ex new file mode 100644 index 0000000..7edae97 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/has_consistent_indentation/good/worker.ex @@ -0,0 +1,67 @@ +defmodule Worker do + @moduledoc """ + Background worker for processing queued jobs. + """ + + def start(queue) do + jobs = fetch_jobs(queue) + Enum.each(jobs, fn job -> + process(job) + end) + end + + def process(job) do + case job.type do + :email -> send_email(job) + :report -> generate_report(job) + _ -> {:error, :unknown_type} + end + end + + def retry(job, attempts) do + if attempts > 0 do + case process(job) do + :ok -> :ok + {:error, _} -> retry(job, attempts - 1) + end + else + {:error, :max_retries_exceeded} + end + end + + def schedule(job, delay_ms) do + Process.send_after(self(), {:run, job}, delay_ms) + :ok + end + + def cancel(job_id) do + case find_job(job_id) do + nil -> {:error, :not_found} + job -> do_cancel(job) + end + end + + def status(job_id) do + case find_job(job_id) do + nil -> {:error, :not_found} + job -> {:ok, job.status} + end + end + + def drain(queue) do + jobs = fetch_jobs(queue) + + Enum.reduce(jobs, {[], []}, fn job, {ok, err} -> + case process(job) do + :ok -> {[job | ok], err} + {:error, _} -> {ok, [job | err]} + end + end) + end + + defp fetch_jobs(_queue), do: [] + defp send_email(_job), do: :ok + defp generate_report(_job), do: :ok + defp find_job(_id), do: nil + defp do_cancel(_job), do: :ok +end diff --git a/priv/combined_metrics/samples/file_structure/headers_have_include_guards/bad/Parser.cpp b/priv/combined_metrics/samples/file_structure/headers_have_include_guards/bad/Parser.cpp new file mode 100644 index 0000000..bee9773 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/headers_have_include_guards/bad/Parser.cpp @@ -0,0 +1,121 @@ +// This file demonstrates a translation unit that includes headers WITHOUT +// include guards. If these headers are included more than once — directly +// or transitively — the compiler sees duplicate declarations and definitions, +// causing errors or subtle ODR (One Definition Rule) violations. + +// token.h (inline for demonstration — NO include guard) +// ----------------------------------------------- +// // No #pragma once and no #ifndef guard +// +// enum class TokenKind { Identifier, Number, Operator, EndOfStream }; +// +// struct Token { +// TokenKind kind; +// std::string value; +// int line; +// }; +// ----------------------------------------------- + +// parse_error.h (inline for demonstration — NO include guard) +// ----------------------------------------------- +// // No #pragma once and no #ifndef guard +// +// #include +// #include +// +// // If parse_error.h is included by both Parser.cpp and another header that +// // Parser.cpp also includes, ParseError is defined twice → compile error. +// class ParseError : public std::runtime_error { +// public: +// explicit ParseError(const std::string& msg, int line) +// : std::runtime_error(msg), line_(line) {} +// int line() const noexcept { return line_; } +// private: +// int line_; +// }; +// ----------------------------------------------- + +#include +#include +#include + +// Simulated second include of the same unguarded header content: +// In a real project this happens via transitive includes. +// Without guards, the declarations below would appear twice — compile error. + +enum class TokenKind { Identifier, Number, Operator, EndOfStream }; + +struct Token { // first definition + TokenKind kind; + std::string value; + int line; +}; + +// struct Token { ← if the header were included again, this would be a redefinition +// TokenKind kind; +// std::string value; +// int line; +// }; + +class ParseError : public std::runtime_error { +public: + explicit ParseError(const std::string& msg, int line) + : std::runtime_error(msg), line_(line) {} + int line() const noexcept { return line_; } +private: + int line_; +}; + +class Parser { +public: + explicit Parser(std::string source) + : source_(std::move(source)), pos_(0), currentLine_(1) {} + + std::vector tokenize() { + std::vector tokens; + while (pos_ < source_.size()) { + skipWhitespace(); + if (pos_ >= source_.size()) break; + + char c = source_[pos_]; + if (std::isalpha(static_cast(c))) + tokens.push_back(readIdentifier()); + else if (std::isdigit(static_cast(c))) + tokens.push_back(readNumber()); + else + tokens.push_back(readOperator()); + } + tokens.push_back({TokenKind::EndOfStream, "", currentLine_}); + return tokens; + } + +private: + std::string source_; + std::size_t pos_; + int currentLine_; + + void skipWhitespace() { + while (pos_ < source_.size() && std::isspace(static_cast(source_[pos_]))) { + if (source_[pos_] == '\n') ++currentLine_; + ++pos_; + } + } + + Token readIdentifier() { + std::string value; + while (pos_ < source_.size() && std::isalnum(static_cast(source_[pos_]))) + value += source_[pos_++]; + return {TokenKind::Identifier, std::move(value), currentLine_}; + } + + Token readNumber() { + std::string value; + while (pos_ < source_.size() && std::isdigit(static_cast(source_[pos_]))) + value += source_[pos_++]; + return {TokenKind::Number, std::move(value), currentLine_}; + } + + Token readOperator() { + return {TokenKind::Operator, std::string(1, source_[pos_++]), currentLine_}; + } +}; diff --git a/priv/combined_metrics/samples/file_structure/headers_have_include_guards/good/Parser.cpp b/priv/combined_metrics/samples/file_structure/headers_have_include_guards/good/Parser.cpp new file mode 100644 index 0000000..09df204 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/headers_have_include_guards/good/Parser.cpp @@ -0,0 +1,116 @@ +// This file demonstrates a translation unit that includes multiple headers, +// each of which is protected by an include guard (or #pragma once). +// The guards ensure that even if the same header is transitively included +// multiple times, its contents are only processed once by the compiler. + +// token.h (inline for demonstration) +// ----------------------------------------------- +// #pragma once ← include guard: #pragma once form +// +// enum class TokenKind { Identifier, Number, Operator, EndOfStream }; +// +// struct Token { +// TokenKind kind; +// std::string value; +// int line; +// }; +// ----------------------------------------------- + +// parse_error.h (inline for demonstration) +// ----------------------------------------------- +// #ifndef MYAPP_PARSE_ERROR_H ← include guard: #define guard form +// #define MYAPP_PARSE_ERROR_H +// +// #include +// #include +// +// class ParseError : public std::runtime_error { +// public: +// explicit ParseError(const std::string& msg, int line) +// : std::runtime_error(msg), line_(line) {} +// int line() const noexcept { return line_; } +// private: +// int line_; +// }; +// +// #endif // MYAPP_PARSE_ERROR_H +// ----------------------------------------------- + +#include +#include +#include +#include + +// Both headers above are guarded; including them multiple times (e.g., via +// transitive includes) is safe and idiomatic. + +enum class TokenKind { Identifier, Number, Operator, EndOfStream }; + +struct Token { + TokenKind kind; + std::string value; + int line; +}; + +class ParseError : public std::runtime_error { +public: + explicit ParseError(const std::string& msg, int line) + : std::runtime_error(msg), line_(line) {} + int line() const noexcept { return line_; } +private: + int line_; +}; + +class Parser { +public: + explicit Parser(std::string source) + : source_(std::move(source)), pos_(0), currentLine_(1) {} + + std::vector tokenize() { + std::vector tokens; + while (pos_ < source_.size()) { + skipWhitespace(); + if (pos_ >= source_.size()) break; + + char c = source_[pos_]; + if (std::isalpha(static_cast(c))) + tokens.push_back(readIdentifier()); + else if (std::isdigit(static_cast(c))) + tokens.push_back(readNumber()); + else + tokens.push_back(readOperator()); + } + tokens.push_back({TokenKind::EndOfStream, "", currentLine_}); + return tokens; + } + +private: + std::string source_; + std::size_t pos_; + int currentLine_; + + void skipWhitespace() { + while (pos_ < source_.size() && std::isspace(static_cast(source_[pos_]))) { + if (source_[pos_] == '\n') ++currentLine_; + ++pos_; + } + } + + Token readIdentifier() { + std::string value; + while (pos_ < source_.size() && std::isalnum(static_cast(source_[pos_]))) + value += source_[pos_++]; + return {TokenKind::Identifier, std::move(value), currentLine_}; + } + + Token readNumber() { + std::string value; + while (pos_ < source_.size() && std::isdigit(static_cast(source_[pos_]))) + value += source_[pos_++]; + return {TokenKind::Number, std::move(value), currentLine_}; + } + + Token readOperator() { + return {TokenKind::Operator, std::string(1, source_[pos_++]), currentLine_}; + } +}; diff --git a/priv/combined_metrics/samples/file_structure/line_count_under_300/bad/mega_service.ex b/priv/combined_metrics/samples/file_structure/line_count_under_300/bad/mega_service.ex new file mode 100644 index 0000000..f14f2de --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_count_under_300/bad/mega_service.ex @@ -0,0 +1,130 @@ +defmodule MegaService do + @moduledoc "Handles accounts, payments, shipping, and email all in one module." + def create_account(email, password) do + if String.length(password) < 8, do: {:error, :weak_password}, else: {:ok, %{email: email, password_hash: hash(password), id: generate_id()}} + end + def update_account(id, attrs) do + case find_account(id) do + nil -> {:error, :not_found} + account -> {:ok, Map.merge(account, attrs)} + end + end + def delete_account(id) do + case find_account(id) do + nil -> {:error, :not_found} + _account -> :ok + end + end + def authenticate(email, password) do + case find_by_email(email) do + nil -> {:error, :not_found} + account -> if verify_password(password, account.password_hash), do: {:ok, account}, else: {:error, :invalid_password} + end + end + def change_password(id, old_password, new_password) do + with {:ok, account} <- {:ok, find_account(id)}, true <- verify_password(old_password, account.password_hash), true <- String.length(new_password) >= 8 do + {:ok, Map.put(account, :password_hash, hash(new_password))} + else + _ -> {:error, :password_change_failed} + end + end + def charge_card(account_id, amount_cents, card_token) do + if amount_cents <= 0, do: {:error, :invalid_amount}, else: call_payment_gateway(card_token, amount_cents, account_id) + end + def refund_charge(charge_id, amount_cents) do + case find_charge(charge_id) do + nil -> {:error, :not_found} + charge -> if amount_cents > charge.amount, do: {:error, :exceeds_original}, else: process_refund(charge, amount_cents) + end + end + def create_subscription(account_id, plan) do + valid_plans = [:basic, :pro, :enterprise] + if plan in valid_plans do + {:ok, %{account_id: account_id, plan: plan, started_at: DateTime.utc_now(), billing_cycle: :monthly}} + else + {:error, :invalid_plan} + end + end + def cancel_subscription(account_id) do + case find_subscription(account_id) do + nil -> {:error, :no_subscription} + sub -> {:ok, Map.put(sub, :cancelled_at, DateTime.utc_now())} + end + end + def apply_coupon(account_id, code) do + case lookup_coupon(code) do + nil -> {:error, :invalid_coupon} + coupon -> if coupon.expired, do: {:error, :expired_coupon}, else: attach_coupon(account_id, coupon) + end + end + def create_shipment(order_id, address) do + case find_order(order_id) do + nil -> {:error, :order_not_found} + order -> {:ok, %{order_id: order.id, address: address, tracking: generate_tracking(), status: :pending}} + end + end + def update_shipment_status(shipment_id, status) do + valid_statuses = [:pending, :in_transit, :delivered, :returned] + if status in valid_statuses do + case find_shipment(shipment_id) do + nil -> {:error, :not_found} + shipment -> {:ok, Map.put(shipment, :status, status)} + end + else + {:error, :invalid_status} + end + end + def estimate_delivery(shipment_id) do + case find_shipment(shipment_id) do + nil -> {:error, :not_found} + %{status: :delivered} -> {:error, :already_delivered} + shipment -> {:ok, calculate_eta(shipment)} + end + end + def cancel_shipment(shipment_id) do + case find_shipment(shipment_id) do + nil -> {:error, :not_found} + %{status: :delivered} -> {:error, :cannot_cancel_delivered} + shipment -> {:ok, Map.put(shipment, :status, :cancelled)} + end + end + def send_welcome_email(account_id) do + case find_account(account_id) do + nil -> {:error, :not_found} + account -> dispatch_email(account.email, "Welcome!", welcome_body(account)) + end + end + def send_receipt_email(account_id, charge_id) do + with account when not is_nil(account) <- find_account(account_id), charge when not is_nil(charge) <- find_charge(charge_id) do + dispatch_email(account.email, "Your receipt", receipt_body(charge)) + else + nil -> {:error, :not_found} + end + end + def send_shipment_notification(account_id, shipment_id) do + with account when not is_nil(account) <- find_account(account_id), shipment when not is_nil(shipment) <- find_shipment(shipment_id) do + dispatch_email(account.email, "Your order shipped!", shipment_body(shipment)) + else + nil -> {:error, :not_found} + end + end + defp hash(password), do: :crypto.hash(:sha256, password) + defp generate_id, do: :rand.uniform(1_000_000) + defp generate_tracking, do: "TRACK-#{:rand.uniform(999_999)}" + defp find_account(_id), do: nil + defp find_by_email(_email), do: nil + defp verify_password(_pw, _hash), do: true + defp call_payment_gateway(_token, _amount, _id), do: {:ok, %{id: generate_id()}} + defp find_charge(_id), do: nil + defp process_refund(charge, _amount), do: {:ok, charge} + defp find_subscription(_id), do: nil + defp lookup_coupon(_code), do: nil + defp attach_coupon(_id, coupon), do: {:ok, coupon} + defp find_order(_id), do: nil + defp find_shipment(_id), do: nil + defp calculate_eta(_shipment), do: DateTime.add(DateTime.utc_now(), 3 * 24 * 3600) + defp dispatch_email(_to, _subject, _body), do: :ok + defp welcome_body(account), do: "Welcome #{account.email}" + defp receipt_body(charge), do: "Amount: #{charge}" + defp shipment_body(shipment), do: "Tracking: #{shipment}" +end diff --git a/priv/combined_metrics/samples/file_structure/line_count_under_300/config.yml b/priv/combined_metrics/samples/file_structure/line_count_under_300/config.yml new file mode 100644 index 0000000..2c19563 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_count_under_300/config.yml @@ -0,0 +1 @@ +doc: "Files should be under 300 lines; longer files typically violate single responsibility." diff --git a/priv/combined_metrics/samples/file_structure/line_count_under_300/good/order_service.ex b/priv/combined_metrics/samples/file_structure/line_count_under_300/good/order_service.ex new file mode 100644 index 0000000..761517a --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_count_under_300/good/order_service.ex @@ -0,0 +1,68 @@ +defmodule OrderService do + @moduledoc """ + Manages order lifecycle: creation, updates, and cancellation. + """ + + alias OrderService.{Order, Repo} + + @spec create_order(map()) :: {:ok, Order.t()} | {:error, String.t()} + def create_order(attrs) do + with {:ok, validated} <- validate_order_attrs(attrs), + {:ok, order} <- Repo.insert(Order, validated) do + {:ok, order} + end + end + + @spec get_order(String.t()) :: {:ok, Order.t()} | {:error, :not_found} + def get_order(order_id) do + case Repo.find(Order, order_id) do + nil -> {:error, :not_found} + order -> {:ok, order} + end + end + + @spec list_orders_for_user(String.t()) :: {:ok, list(Order.t())} + def list_orders_for_user(user_id) do + orders = Repo.all(Order, user_id: user_id) + {:ok, orders} + end + + @spec update_order(String.t(), map()) :: {:ok, Order.t()} | {:error, :not_found | String.t()} + def update_order(order_id, attrs) do + with {:ok, order} <- get_order(order_id), + {:ok, validated} <- validate_order_attrs(attrs), + {:ok, updated} <- Repo.update(order, validated) do + {:ok, updated} + end + end + + @spec cancel_order(String.t()) :: {:ok, Order.t()} | {:error, :not_found | :already_cancelled} + def cancel_order(order_id) do + case get_order(order_id) do + {:error, :not_found} -> + {:error, :not_found} + {:ok, %Order{status: :cancelled}} -> + {:error, :already_cancelled} + {:ok, order} -> + Repo.update(order, %{status: :cancelled}) + end + end + + @spec complete_order(String.t()) :: {:ok, Order.t()} | {:error, :not_found | :not_fulfillable} + def complete_order(order_id) do + with {:ok, order} <- get_order(order_id), + :ok <- ensure_fulfillable(order), + {:ok, completed} <- Repo.update(order, %{status: :completed}) do + {:ok, completed} + end + end + + defp validate_order_attrs(%{items: items}) when is_list(items) and length(items) > 0 do + {:ok, items} + end + + defp validate_order_attrs(_), do: {:error, "Order must contain at least one item"} + + defp ensure_fulfillable(%Order{status: :pending}), do: :ok + defp ensure_fulfillable(_), do: {:error, :not_fulfillable} +end diff --git a/priv/combined_metrics/samples/file_structure/line_length_under_120/bad/query_builder.ex b/priv/combined_metrics/samples/file_structure/line_length_under_120/bad/query_builder.ex new file mode 100644 index 0000000..5e55bb9 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_length_under_120/bad/query_builder.ex @@ -0,0 +1,46 @@ +defmodule QueryBuilder do + @moduledoc """ + Builds Ecto queries for filtering and sorting records. + """ + + import Ecto.Query + + def build_user_query(filters) do + from(u in "users", where: u.active == true and u.role in ^Map.get(filters, :roles, ["admin", "member", "viewer", "guest"]) and u.inserted_at >= ^Map.get(filters, :since, ~D[2020-01-01])) + end + + def build_order_query(user_id, status, date_from, date_to, include_archived) do + from(o in "orders", where: o.user_id == ^user_id and o.status == ^status and o.inserted_at >= ^date_from and o.inserted_at <= ^date_to and (^include_archived or o.archived == false), order_by: [desc: o.inserted_at]) + end + + def build_product_query(filters) do + from(p in "products", where: p.price >= ^Map.get(filters, :min_price, 0) and p.price <= ^Map.get(filters, :max_price, 999_999) and p.category in ^Map.get(filters, :categories, []) and p.in_stock == ^Map.get(filters, :in_stock, true), select: %{id: p.id, name: p.name, price: p.price, category: p.category, description: p.description}) + end + + def paginate(query, page, per_page) do + offset = (page - 1) * per_page + from(q in query, limit: ^per_page, offset: ^offset) + end + + def apply_sort(query, "name_asc"), do: from(q in query, order_by: [asc: q.name]) + def apply_sort(query, "name_desc"), do: from(q in query, order_by: [desc: q.name]) + def apply_sort(query, "created_asc"), do: from(q in query, order_by: [asc: q.inserted_at]) + def apply_sort(query, "created_desc"), do: from(q in query, order_by: [desc: q.inserted_at]) + def apply_sort(query, _), do: query + + def with_preloads(query, preloads) when is_list(preloads) do + Enum.reduce(preloads, query, fn preload, acc -> from(q in acc, preload: ^[preload]) end) + end + + def build_report_query(tenant_id, report_type, date_range_start, date_range_end, group_by_field, aggregate_function, having_threshold) do + from(r in "report_entries", where: r.tenant_id == ^tenant_id and r.type == ^report_type and r.date >= ^date_range_start and r.date <= ^date_range_end, group_by: ^[group_by_field], having: fragment("? > ?", ^aggregate_function, ^having_threshold)) + end + + def build_search_query(search_term, fields, opts) do + pattern = "%#{String.replace(search_term, "%", "\\%")}%" + conditions = Enum.map(fields, fn field -> dynamic([q], ilike(field(q, ^field), ^pattern)) end) + combined_condition = Enum.reduce(conditions, fn cond, acc -> dynamic(^acc or ^cond) end) + base = from(q in Map.get(opts, :schema, "records"), where: ^combined_condition, limit: ^Map.get(opts, :limit, 50), offset: ^Map.get(opts, :offset, 0)) + base + end +end diff --git a/priv/combined_metrics/samples/file_structure/line_length_under_120/config.yml b/priv/combined_metrics/samples/file_structure/line_length_under_120/config.yml new file mode 100644 index 0000000..42f9df1 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_length_under_120/config.yml @@ -0,0 +1 @@ +doc: "Lines should be under 120 characters to avoid horizontal scrolling." diff --git a/priv/combined_metrics/samples/file_structure/line_length_under_120/good/query_builder.ex b/priv/combined_metrics/samples/file_structure/line_length_under_120/good/query_builder.ex new file mode 100644 index 0000000..994e696 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/line_length_under_120/good/query_builder.ex @@ -0,0 +1,89 @@ +defmodule QueryBuilder do + @moduledoc """ + Builds Ecto queries for filtering and sorting records. + """ + + import Ecto.Query + + def build_user_query(filters) do + roles = Map.get(filters, :roles, ["admin", "member", "viewer"]) + since = Map.get(filters, :since, ~D[2020-01-01]) + + from u in "users", + where: u.active == true, + where: u.role in ^roles, + where: u.inserted_at >= ^since + end + + def build_order_query(user_id, status, date_from, date_to, include_archived) do + from o in "orders", + where: o.user_id == ^user_id, + where: o.status == ^status, + where: o.inserted_at >= ^date_from, + where: o.inserted_at <= ^date_to, + where: ^include_archived or o.archived == false, + order_by: [desc: o.inserted_at] + end + + def build_product_query(filters) do + min_price = Map.get(filters, :min_price, 0) + max_price = Map.get(filters, :max_price, 999_999) + categories = Map.get(filters, :categories, []) + in_stock = Map.get(filters, :in_stock, true) + + from p in "products", + where: p.price >= ^min_price, + where: p.price <= ^max_price, + where: p.category in ^categories, + where: p.in_stock == ^in_stock, + select: %{id: p.id, name: p.name, price: p.price, category: p.category} + end + + def paginate(query, page, per_page) do + offset = (page - 1) * per_page + + from q in query, + limit: ^per_page, + offset: ^offset + end + + def apply_sort(query, "name_asc"), do: from(q in query, order_by: [asc: q.name]) + def apply_sort(query, "name_desc"), do: from(q in query, order_by: [desc: q.name]) + def apply_sort(query, "created_asc"), do: from(q in query, order_by: [asc: q.inserted_at]) + def apply_sort(query, "created_desc"), do: from(q in query, order_by: [desc: q.inserted_at]) + def apply_sort(query, _), do: query + + def with_preloads(query, preloads) when is_list(preloads) do + Enum.reduce(preloads, query, fn preload, acc -> + from q in acc, preload: ^[preload] + end) + end + + def build_report_query(tenant_id, report_type, date_start, date_end) do + from r in "report_entries", + where: r.tenant_id == ^tenant_id, + where: r.type == ^report_type, + where: r.date >= ^date_start, + where: r.date <= ^date_end + end + + def build_search_query(search_term, fields, opts) do + pattern = "%#{String.replace(search_term, "%", "\\%")}%" + schema = Map.get(opts, :schema, "records") + limit = Map.get(opts, :limit, 50) + offset = Map.get(opts, :offset, 0) + + conditions = Enum.map(fields, fn field -> + dynamic([q], ilike(field(q, ^field), ^pattern)) + end) + + combined = Enum.reduce(conditions, fn cond, acc -> + dynamic(^acc or ^cond) + end) + + from q in schema, + where: ^combined, + limit: ^limit, + offset: ^offset + end +end diff --git a/priv/combined_metrics/samples/file_structure/no_magic_numbers/bad/rate_limiter.ex b/priv/combined_metrics/samples/file_structure/no_magic_numbers/bad/rate_limiter.ex new file mode 100644 index 0000000..2a1b738 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/no_magic_numbers/bad/rate_limiter.ex @@ -0,0 +1,91 @@ +defmodule RateLimiter do + @moduledoc """ + Rate limiting logic for API endpoints. + """ + + def check_rate(user_id, action) do + key = "#{user_id}:#{action}" + count = get_count(key) + + cond do + action == :api_call and count >= 100 -> + {:error, :rate_limited} + action == :login and count >= 5 -> + {:error, :rate_limited} + action == :export and count >= 10 -> + {:error, :rate_limited} + true -> + increment_count(key) + :ok + end + end + + def session_valid?(created_at) do + age_seconds = DateTime.diff(DateTime.utc_now(), created_at) + age_seconds < 3600 + end + + def token_expired?(issued_at) do + age_seconds = DateTime.diff(DateTime.utc_now(), issued_at) + age_seconds > 86400 + end + + def compute_backoff(attempt) do + min(1000 * :math.pow(2, attempt), 30_000) + end + + def charge_credits(user_id, action) do + cost = + case action do + :api_call -> 1 + :export -> 10 + :bulk_import -> 50 + :report -> 25 + end + + balance = get_balance(user_id) + + if balance >= cost do + deduct_credits(user_id, cost) + :ok + else + {:error, :insufficient_credits} + end + end + + def apply_rate_penalty(user_id, violation_count) do + penalty_seconds = + cond do + violation_count >= 10 -> 86400 + violation_count >= 5 -> 3600 + violation_count >= 3 -> 300 + true -> 60 + end + + lock_until = DateTime.add(DateTime.utc_now(), penalty_seconds) + set_lock(user_id, lock_until) + end + + def calculate_overage_fee(requests_made, limit) do + overage = max(0, requests_made - limit) + overage * 0.15 + end + + def burst_allowed?(user_id) do + recent = count_recent_requests(user_id, 60) + recent < 200 + end + + def cleanup_old_entries do + cutoff = DateTime.add(DateTime.utc_now(), -604800) + delete_entries_before(cutoff) + end + + defp get_count(_key), do: 0 + defp increment_count(_key), do: :ok + defp get_balance(_user_id), do: 100 + defp deduct_credits(_user_id, _amount), do: :ok + defp set_lock(_user_id, _until), do: :ok + defp count_recent_requests(_user_id, _seconds), do: 0 + defp delete_entries_before(_cutoff), do: :ok +end diff --git a/priv/combined_metrics/samples/file_structure/no_magic_numbers/config.yml b/priv/combined_metrics/samples/file_structure/no_magic_numbers/config.yml new file mode 100644 index 0000000..dddb8b2 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/no_magic_numbers/config.yml @@ -0,0 +1 @@ +doc: "Numeric literals should be extracted to named constants rather than used inline." diff --git a/priv/combined_metrics/samples/file_structure/no_magic_numbers/good/rate_limiter.ex b/priv/combined_metrics/samples/file_structure/no_magic_numbers/good/rate_limiter.ex new file mode 100644 index 0000000..ecde981 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/no_magic_numbers/good/rate_limiter.ex @@ -0,0 +1,114 @@ +defmodule RateLimiter do + @moduledoc """ + Rate limiting logic for API endpoints. + """ + + @api_call_limit 100 + @login_limit 5 + @export_limit 10 + + @session_ttl_seconds 3_600 + @token_ttl_seconds 86_400 + @week_in_seconds 604_800 + + @max_backoff_ms 30_000 + @base_backoff_ms 1_000 + + @credit_cost_api_call 1 + @credit_cost_export 10 + @credit_cost_bulk_import 50 + @credit_cost_report 25 + + @penalty_minor_seconds 60 + @penalty_low_seconds 300 + @penalty_medium_seconds 3_600 + @penalty_high_seconds 86_400 + + @overage_fee_per_request 0.15 + @burst_window_seconds 60 + @burst_limit 200 + + def check_rate(user_id, action) do + key = "#{user_id}:#{action}" + count = get_count(key) + + cond do + action == :api_call and count >= @api_call_limit -> + {:error, :rate_limited} + action == :login and count >= @login_limit -> + {:error, :rate_limited} + action == :export and count >= @export_limit -> + {:error, :rate_limited} + true -> + increment_count(key) + :ok + end + end + + def session_valid?(created_at) do + age_seconds = DateTime.diff(DateTime.utc_now(), created_at) + age_seconds < @session_ttl_seconds + end + + def token_expired?(issued_at) do + age_seconds = DateTime.diff(DateTime.utc_now(), issued_at) + age_seconds > @token_ttl_seconds + end + + def compute_backoff(attempt) do + min(@base_backoff_ms * :math.pow(2, attempt), @max_backoff_ms) + end + + def charge_credits(user_id, action) do + cost = credit_cost(action) + balance = get_balance(user_id) + + if balance >= cost do + deduct_credits(user_id, cost) + :ok + else + {:error, :insufficient_credits} + end + end + + def apply_rate_penalty(user_id, violation_count) do + penalty_seconds = + cond do + violation_count >= 10 -> @penalty_high_seconds + violation_count >= 5 -> @penalty_medium_seconds + violation_count >= 3 -> @penalty_low_seconds + true -> @penalty_minor_seconds + end + + lock_until = DateTime.add(DateTime.utc_now(), penalty_seconds) + set_lock(user_id, lock_until) + end + + def calculate_overage_fee(requests_made, limit) do + overage = max(0, requests_made - limit) + overage * @overage_fee_per_request + end + + def burst_allowed?(user_id) do + recent = count_recent_requests(user_id, @burst_window_seconds) + recent < @burst_limit + end + + def cleanup_old_entries do + cutoff = DateTime.add(DateTime.utc_now(), -@week_in_seconds) + delete_entries_before(cutoff) + end + + defp credit_cost(:api_call), do: @credit_cost_api_call + defp credit_cost(:export), do: @credit_cost_export + defp credit_cost(:bulk_import), do: @credit_cost_bulk_import + defp credit_cost(:report), do: @credit_cost_report + + defp get_count(_key), do: 0 + defp increment_count(_key), do: :ok + defp get_balance(_user_id), do: 100 + defp deduct_credits(_user_id, _amount), do: :ok + defp set_lock(_user_id, _until), do: :ok + defp count_recent_requests(_user_id, _seconds), do: 0 + defp delete_entries_before(_cutoff), do: :ok +end diff --git a/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/bad/InvoiceService.java b/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/bad/InvoiceService.java new file mode 100644 index 0000000..49c3ca7 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/bad/InvoiceService.java @@ -0,0 +1,55 @@ +package com.example.billing; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.util.List; + +// Multiple top-level class declarations in a single file + +public class InvoiceService { + + private final InvoiceRepository repository; + private final TaxCalculator taxCalculator; + + public InvoiceService(InvoiceRepository repository, TaxCalculator taxCalculator) { + this.repository = repository; + this.taxCalculator = taxCalculator; + } + + public Invoice createInvoice(Order order) { + List lineItems = order.getItems().stream() + .map(item -> new InvoiceLineItem(item.getDescription(), item.getUnitPrice(), item.getQuantity())) + .toList(); + return new Invoice(order.getId(), lineItems, LocalDate.now().plusDays(30)); + } +} + +// Second top-level class in the same file — violates the one-class-per-file rule +class InvoiceLineItem { + private final String description; + private final BigDecimal unitPrice; + private final int quantity; + + public InvoiceLineItem(String description, BigDecimal unitPrice, int quantity) { + this.description = description; + this.unitPrice = unitPrice; + this.quantity = quantity; + } + + public BigDecimal total() { + return unitPrice.multiply(BigDecimal.valueOf(quantity)); + } + + public String getDescription() { return description; } + public BigDecimal getUnitPrice() { return unitPrice; } + public int getQuantity() { return quantity; } +} + +// Third top-level class in the same file +class InvoiceValidator { + public boolean isValid(Invoice invoice) { + return invoice != null + && invoice.getId() != null + && !invoice.getLineItems().isEmpty(); + } +} diff --git a/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/good/InvoiceService.java b/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/good/InvoiceService.java new file mode 100644 index 0000000..5117698 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/one_top_level_class_per_file/good/InvoiceService.java @@ -0,0 +1,63 @@ +package com.example.billing; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.util.List; + +/** + * Service responsible for creating and managing invoices. + * Supporting types (InvoiceLineItem, InvoiceStatus) live in their own files. + */ +public class InvoiceService { + + private final InvoiceRepository repository; + private final TaxCalculator taxCalculator; + private final NotificationService notifications; + + public InvoiceService( + InvoiceRepository repository, + TaxCalculator taxCalculator, + NotificationService notifications + ) { + this.repository = repository; + this.taxCalculator = taxCalculator; + this.notifications = notifications; + } + + public Invoice createInvoice(Order order) { + List lineItems = order.getItems().stream() + .map(item -> new InvoiceLineItem(item.getDescription(), item.getUnitPrice(), item.getQuantity())) + .toList(); + + BigDecimal subtotal = lineItems.stream() + .map(InvoiceLineItem::total) + .reduce(BigDecimal.ZERO, BigDecimal::add); + + BigDecimal tax = taxCalculator.calculate(subtotal, order.getRegion()); + + Invoice invoice = new Invoice( + order.getId(), + lineItems, + subtotal, + tax, + LocalDate.now().plusDays(30) + ); + + repository.save(invoice); + notifications.sendInvoiceCreated(order.getCustomerEmail(), invoice); + return invoice; + } + + public void markPaid(String invoiceId) { + Invoice invoice = repository.findByIdOrThrow(invoiceId); + invoice.markPaid(); + repository.update(invoice); + notifications.sendPaymentConfirmation(invoice); + } + + public List findOverdue() { + return repository.findByDueDateBefore(LocalDate.now()).stream() + .filter(inv -> inv.getStatus() == InvoiceStatus.PENDING) + .toList(); + } +} diff --git a/priv/combined_metrics/samples/file_structure/single_responsibility/bad/user_handler.ex b/priv/combined_metrics/samples/file_structure/single_responsibility/bad/user_handler.ex new file mode 100644 index 0000000..8178879 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/single_responsibility/bad/user_handler.ex @@ -0,0 +1,110 @@ +defmodule UserHandler do + @moduledoc """ + Handles everything user-related: registration, email, payments, and audit. + """ + + require Logger + + def register_user(attrs) do + with {:ok, _} <- validate_registration(attrs), + {:ok, user} <- insert_user(attrs), + :ok <- send_welcome_email(user), + :ok <- create_free_trial_subscription(user), + :ok <- log_audit_event(:user_registered, user) do + {:ok, user} + end + end + + def update_user(id, attrs) do + case find_user(id) do + nil -> {:error, :not_found} + user -> + updated = Map.merge(user, attrs) + save_user(updated) + send_profile_updated_email(updated) + log_audit_event(:user_updated, updated) + {:ok, updated} + end + end + + def delete_user(id) do + case find_user(id) do + nil -> {:error, :not_found} + user -> + cancel_subscription(user) + send_goodbye_email(user) + remove_user(user) + log_audit_event(:user_deleted, user) + :ok + end + end + + def send_welcome_email(user) do + body = "Hi #{user.name}, welcome to our platform!" + dispatch_email(user.email, "Welcome!", body) + end + + def send_profile_updated_email(user) do + body = "Hi #{user.name}, your profile has been updated." + dispatch_email(user.email, "Profile Updated", body) + end + + def send_goodbye_email(user) do + body = "Goodbye #{user.name}, your account has been deleted." + dispatch_email(user.email, "Account Deleted", body) + end + + def create_free_trial_subscription(user) do + sub = %{user_id: user.id, plan: :free_trial, expires_at: trial_expiry()} + save_subscription(sub) + charge_initial_setup_fee(user, 0) + :ok + end + + def cancel_subscription(user) do + case find_subscription(user.id) do + nil -> :ok + sub -> + update_subscription(sub, %{status: :cancelled}) + process_prorated_refund(user, sub) + :ok + end + end + + def charge_initial_setup_fee(user, amount) do + if amount > 0 do + call_payment_gateway(user.payment_method, amount) + else + :ok + end + end + + def process_prorated_refund(_user, _sub) do + :ok + end + + def log_audit_event(event, user) do + Logger.info("AUDIT: #{event} for user #{user.id} at #{DateTime.utc_now()}") + write_audit_log(%{event: event, user_id: user.id, timestamp: DateTime.utc_now()}) + end + + defp validate_registration(attrs) do + if Map.has_key?(attrs, :email) and Map.has_key?(attrs, :password) do + {:ok, attrs} + else + {:error, :missing_fields} + end + end + + defp find_user(_id), do: nil + defp insert_user(attrs), do: {:ok, Map.put(attrs, :id, :rand.uniform(1000))} + defp save_user(_user), do: :ok + defp remove_user(_user), do: :ok + defp dispatch_email(_to, _subject, _body), do: :ok + defp save_subscription(_sub), do: :ok + defp find_subscription(_user_id), do: nil + defp update_subscription(_sub, _attrs), do: :ok + defp call_payment_gateway(_method, _amount), do: :ok + defp write_audit_log(_entry), do: :ok + defp trial_expiry, do: DateTime.add(DateTime.utc_now(), 30 * 86_400) +end diff --git a/priv/combined_metrics/samples/file_structure/single_responsibility/config.yml b/priv/combined_metrics/samples/file_structure/single_responsibility/config.yml new file mode 100644 index 0000000..2497edc --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/single_responsibility/config.yml @@ -0,0 +1 @@ +doc: "Each file should have one primary concern — low complexity spread across few, focused functions." diff --git a/priv/combined_metrics/samples/file_structure/single_responsibility/good/user_registration.ex b/priv/combined_metrics/samples/file_structure/single_responsibility/good/user_registration.ex new file mode 100644 index 0000000..a530341 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/single_responsibility/good/user_registration.ex @@ -0,0 +1,53 @@ +defmodule UserRegistration do + @moduledoc """ + Handles new user registration: validation and account creation only. + + Side effects (email, billing, audit) are delegated to their respective + context modules and triggered via events after successful registration. + """ + + alias UserRegistration.{Repo, User} + + @spec register(map()) :: {:ok, User.t()} | {:error, :missing_fields | :email_taken | String.t()} + def register(attrs) do + with {:ok, validated} <- validate(attrs), + :ok <- ensure_email_available(validated.email), + {:ok, user} <- Repo.insert(User, validated) do + {:ok, user} + end + end + + @spec validate(map()) :: {:ok, map()} | {:error, :missing_fields} + def validate(attrs) do + required = [:email, :password, :name] + missing = Enum.reject(required, &Map.has_key?(attrs, &1)) + + if missing == [] do + {:ok, attrs} + else + {:error, :missing_fields} + end + end + + @spec ensure_email_available(String.t()) :: :ok | {:error, :email_taken} + def ensure_email_available(email) do + case Repo.find_by(User, email: email) do + nil -> :ok + _existing -> {:error, :email_taken} + end + end + + @spec valid_password?(String.t()) :: boolean() + def valid_password?(password) do + String.length(password) >= 8 and + String.match?(password, ~r/[A-Z]/) and + String.match?(password, ~r/[0-9]/) + end + + @spec normalize_email(String.t()) :: String.t() + def normalize_email(email) do + email + |> String.trim() + |> String.downcase() + end +end diff --git a/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/bad/mailer.ex b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/bad/mailer.ex new file mode 100644 index 0000000..6a23fc5 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/bad/mailer.ex @@ -0,0 +1,86 @@ +defmodule Mailer do + @moduledoc """ + Sends transactional emails. + """ + + @from_address "noreply@example.com" + + def send_welcome(user) do + body = build_welcome_body(user) + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Welcome to the platform", + body: body + }) + end + + def send_password_reset(user, token) do + link = reset_link(token) + body = build_reset_body(user, link) + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Reset your password", + body: body + }) + end + + def send_invoice(user, invoice) do + case format_invoice(invoice) do + {:ok, formatted} -> + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Your invoice ##{invoice.id}", + body: formatted + }) + {:error, reason} -> + {:error, reason} + end + end + + def send_notification(user, message) do + if String.length(message) > 0 do + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Notification", + body: message + }) + else + {:error, :empty_message} + end + end + + def send_bulk(users, subject, body) do + Enum.map(users, fn user -> + dispatch(%{ + to: user.email, + from: @from_address, + subject: subject, + body: body + }) + end) + end + + defp build_welcome_body(user) do + "Hi #{user.name}, welcome aboard!" + end + + defp build_reset_body(user, link) do + "Hi #{user.name}, reset your password here: #{link}" + end + + defp reset_link(token) do + "https://example.com/reset?token=#{token}" + end + + defp format_invoice(invoice) do + {:ok, "Invoice ##{invoice.id}: $#{invoice.total}"} + end + + defp dispatch(email) do + {:ok, email} + end +end diff --git a/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/config.yml b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/config.yml new file mode 100644 index 0000000..6a6b96b --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/config.yml @@ -0,0 +1 @@ +doc: "Indentation should use consistent multiples of 2 or 4 spaces throughout the file." diff --git a/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/good/mailer.ex b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/good/mailer.ex new file mode 100644 index 0000000..2538478 --- /dev/null +++ b/priv/combined_metrics/samples/file_structure/uses_standard_indentation_width/good/mailer.ex @@ -0,0 +1,88 @@ +defmodule Mailer do + @moduledoc """ + Sends transactional emails. + """ + + @from_address "noreply@example.com" + + def send_welcome(user) do + body = build_welcome_body(user) + + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Welcome to the platform", + body: body + }) + end + + def send_password_reset(user, token) do + link = reset_link(token) + body = build_reset_body(user, link) + + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Reset your password", + body: body + }) + end + + def send_invoice(user, invoice) do + case format_invoice(invoice) do + {:ok, formatted} -> + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Your invoice ##{invoice.id}", + body: formatted + }) + {:error, reason} -> + {:error, reason} + end + end + + def send_notification(user, message) do + if String.length(message) > 0 do + dispatch(%{ + to: user.email, + from: @from_address, + subject: "Notification", + body: message + }) + else + {:error, :empty_message} + end + end + + def send_bulk(users, subject, body) do + Enum.map(users, fn user -> + dispatch(%{ + to: user.email, + from: @from_address, + subject: subject, + body: body + }) + end) + end + + defp build_welcome_body(user) do + "Hi #{user.name}, welcome aboard!" + end + + defp build_reset_body(user, link) do + "Hi #{user.name}, reset your password here: #{link}" + end + + defp reset_link(token) do + "https://example.com/reset?token=#{token}" + end + + defp format_invoice(invoice) do + {:ok, "Invoice ##{invoice.id}: $#{invoice.total}"} + end + + defp dispatch(email) do + {:ok, email} + end +end diff --git a/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/bad/user_service.ts b/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/bad/user_service.ts new file mode 100644 index 0000000..b835058 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/bad/user_service.ts @@ -0,0 +1,68 @@ +interface User { + id: string; + email: string; + displayName: string; + role: "admin" | "member"; + score: number; +} + +class UserService { + private readonly baseUrl: string; + private readonly defaultPageSize: number; + + constructor(baseUrl: string, defaultPageSize = 20) { + this.baseUrl = baseUrl; + this.defaultPageSize = defaultPageSize; + } + + async fetchUsers(): Promise { + const response = await fetch(`${this.baseUrl}/users`); + return response.json() as Promise; + } + + async getAdmins(): Promise { + const users = await this.fetchUsers(); + // Using function keyword instead of arrow — `this` is unbound inside + return users.filter(function (user) { + return user.role === "admin"; + }); + } + + async getSortedByScore(): Promise { + const users = await this.fetchUsers(); + return [...users].sort(function (a, b) { + return b.score - a.score; + }); + } + + async getPage(page: number): Promise { + const users = await this.fetchUsers(); + const offset = (page - 1) * this.defaultPageSize; + return users.slice(offset, offset + this.defaultPageSize); + } + + async getDisplayNames(): Promise { + const users = await this.fetchUsers(); + return users.map(function (user) { + return user.displayName; + }); + } + + async searchByEmail(query: string): Promise { + const users = await this.fetchUsers(); + return users.filter(function (user) { + return user.email.toLowerCase().includes(query.toLowerCase()); + }); + } + + async transformToMap(): Promise> { + const users = await this.fetchUsers(); + return users.reduce(function (map, user) { + map.set(user.id, user); + return map; + }, new Map()); + } +} + +export { UserService }; +export type { User }; diff --git a/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/good/user_service.ts b/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/good/user_service.ts new file mode 100644 index 0000000..cc00697 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/arrow_functions_as_callbacks/good/user_service.ts @@ -0,0 +1,61 @@ +interface User { + id: string; + email: string; + displayName: string; + role: "admin" | "member"; + score: number; +} + +class UserService { + private readonly baseUrl: string; + private readonly defaultPageSize: number; + + constructor(baseUrl: string, defaultPageSize = 20) { + this.baseUrl = baseUrl; + this.defaultPageSize = defaultPageSize; + } + + async fetchUsers(): Promise { + const response = await fetch(`${this.baseUrl}/users`); + return response.json() as Promise; + } + + async getAdmins(): Promise { + const users = await this.fetchUsers(); + return users.filter((user) => user.role === "admin"); + } + + async getSortedByScore(): Promise { + const users = await this.fetchUsers(); + return [...users].sort((a, b) => b.score - a.score); + } + + async getPage(page: number): Promise { + const users = await this.fetchUsers(); + const offset = (page - 1) * this.defaultPageSize; + return users.slice(offset, offset + this.defaultPageSize); + } + + async getDisplayNames(): Promise { + const users = await this.fetchUsers(); + return users.map((user) => user.displayName); + } + + async searchByEmail(query: string): Promise { + const users = await this.fetchUsers(); + return users.filter((user) => + user.email.toLowerCase().includes(query.toLowerCase()) + ); + } + + async transformToMap(): Promise> { + const users = await this.fetchUsers(); + return users.reduce((map, user) => { + map.set(user.id, user); + return map; + }, new Map()); + } +} + +export { UserService }; +export type { User }; diff --git a/priv/combined_metrics/samples/function_design/async_functions_contain_await/bad/payment_gateway.ts b/priv/combined_metrics/samples/function_design/async_functions_contain_await/bad/payment_gateway.ts new file mode 100644 index 0000000..657baaf --- /dev/null +++ b/priv/combined_metrics/samples/function_design/async_functions_contain_await/bad/payment_gateway.ts @@ -0,0 +1,69 @@ +interface ChargeRequest { + amount: number; + currency: string; + paymentMethodId: string; + description: string; +} + +interface ChargeResult { + chargeId: string; + status: "succeeded" | "pending" | "failed"; + amount: number; + currency: string; +} + +// async but no await — just wraps a synchronous value +async function buildChargeRequest( + paymentMethodId: string, + amount: number, + currency: string +): Promise { + return { + amount, + currency, + paymentMethodId, + description: `Charge of ${amount} ${currency}`, + }; +} + +// async but no await — validation is synchronous +async function validateChargeRequest(request: ChargeRequest): Promise { + if (request.amount <= 0) return false; + if (!request.paymentMethodId) return false; + if (!request.currency) return false; + return true; +} + +// async but no await — just rethrows synchronously +async function assertPositiveAmount(amount: number): Promise { + if (amount <= 0) { + throw new Error(`Amount must be positive, got ${amount}`); + } +} + +async function createCharge(request: ChargeRequest): Promise { + const response = await fetch("/api/charges", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + + if (!response.ok) { + throw new Error(`Charge failed with status ${response.status}`); + } + + return response.json() as Promise; +} + +// async but only returns a promise chain without await +async function fetchAndLogCharge(chargeId: string): Promise { + return fetch(`/api/charges/${chargeId}`) + .then((r) => r.json() as Promise) + .then((charge) => { + console.log("Fetched charge", charge.chargeId); + return charge; + }); +} + +export { buildChargeRequest, validateChargeRequest, assertPositiveAmount, createCharge, fetchAndLogCharge }; +export type { ChargeRequest, ChargeResult }; diff --git a/priv/combined_metrics/samples/function_design/async_functions_contain_await/good/payment_gateway.ts b/priv/combined_metrics/samples/function_design/async_functions_contain_await/good/payment_gateway.ts new file mode 100644 index 0000000..b07cdbd --- /dev/null +++ b/priv/combined_metrics/samples/function_design/async_functions_contain_await/good/payment_gateway.ts @@ -0,0 +1,75 @@ +interface ChargeRequest { + amount: number; + currency: string; + paymentMethodId: string; + description: string; +} + +interface ChargeResult { + chargeId: string; + status: "succeeded" | "pending" | "failed"; + amount: number; + currency: string; +} + +async function createCharge(request: ChargeRequest): Promise { + const response = await fetch("/api/charges", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(request), + }); + + if (!response.ok) { + throw new Error(`Charge failed with status ${response.status}`); + } + + return response.json() as Promise; +} + +async function fetchCharge(chargeId: string): Promise { + const response = await fetch(`/api/charges/${chargeId}`); + + if (!response.ok) { + throw new Error(`Charge not found: ${chargeId}`); + } + + return response.json() as Promise; +} + +async function waitForChargeSettlement( + chargeId: string, + maxAttempts = 10 +): Promise { + for (let attempt = 0; attempt < maxAttempts; attempt++) { + const charge = await fetchCharge(chargeId); + + if (charge.status === "succeeded" || charge.status === "failed") { + return charge; + } + + await new Promise((resolve) => setTimeout(resolve, 2000 * (attempt + 1))); + } + + throw new Error(`Charge ${chargeId} did not settle after ${maxAttempts} attempts`); +} + +async function processPaymentWithRetry( + request: ChargeRequest, + maxRetries = 3 +): Promise { + let lastError: Error | null = null; + + for (let i = 0; i < maxRetries; i++) { + try { + const charge = await createCharge(request); + return await waitForChargeSettlement(charge.chargeId); + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + } + } + + throw lastError ?? new Error("Payment failed after retries"); +} + +export { createCharge, fetchCharge, waitForChargeSettlement, processPaymentWithRetry }; +export type { ChargeRequest, ChargeResult }; diff --git a/priv/combined_metrics/samples/function_design/async_method_has_await/bad/EmailDispatcher.cs b/priv/combined_metrics/samples/function_design/async_method_has_await/bad/EmailDispatcher.cs new file mode 100644 index 0000000..d82401d --- /dev/null +++ b/priv/combined_metrics/samples/function_design/async_method_has_await/bad/EmailDispatcher.cs @@ -0,0 +1,59 @@ +using System.Collections.Generic; +using System.Net.Http; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Email +{ + public class EmailDispatcher + { + private readonly HttpClient _httpClient; + private readonly string _apiEndpoint; + + public EmailDispatcher(HttpClient httpClient, string apiEndpoint) + { + _httpClient = httpClient; + _apiEndpoint = apiEndpoint; + } + + // async keyword with no await — compiles with a warning; runs synchronously + public async Task SendAsync(EmailMessage message) + { + var payload = JsonSerializer.Serialize(message); + var content = new StringContent(payload, System.Text.Encoding.UTF8, "application/json"); + + // Missing await — blocks synchronously, defeats the purpose of async + var response = _httpClient.PostAsync(_apiEndpoint, content).Result; + var body = response.Content.ReadAsStringAsync().Result; + + return response.IsSuccessStatusCode + ? SendResult.Success() + : SendResult.Failure(body); + } + + // async but delegates all work to non-awaited helpers — no suspension point + public async Task PingAsync() + { + return CheckPing(); // synchronous; async here adds overhead with no benefit + } + + // async method that just wraps a completed task — should not be async + public async Task GetApiEndpointAsync() + { + return _apiEndpoint; // no await, just returns a value + } + + // async that does no I/O at all — the async machinery is pure overhead + public async Task LogMetricsAsync(int sent, int failed) + { + var summary = $"Sent: {sent}, Failed: {failed}"; + System.Console.WriteLine(summary); + } + + private bool CheckPing() + { + var response = _httpClient.GetAsync(_apiEndpoint + "/ping").Result; + return response.IsSuccessStatusCode; + } + } +} diff --git a/priv/combined_metrics/samples/function_design/async_method_has_await/good/EmailDispatcher.cs b/priv/combined_metrics/samples/function_design/async_method_has_await/good/EmailDispatcher.cs new file mode 100644 index 0000000..6480e69 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/async_method_has_await/good/EmailDispatcher.cs @@ -0,0 +1,59 @@ +using System.Collections.Generic; +using System.Net.Http; +using System.Text.Json; +using System.Threading.Tasks; + +namespace Email +{ + public class EmailDispatcher + { + private readonly HttpClient _httpClient; + private readonly string _apiEndpoint; + + public EmailDispatcher(HttpClient httpClient, string apiEndpoint) + { + _httpClient = httpClient; + _apiEndpoint = apiEndpoint; + } + + public async Task SendAsync(EmailMessage message) + { + var payload = JsonSerializer.Serialize(message); + var content = new StringContent(payload, System.Text.Encoding.UTF8, "application/json"); + + // Contains genuine await — truly async I/O operation + var response = await _httpClient.PostAsync(_apiEndpoint, content); + var body = await response.Content.ReadAsStringAsync(); + + return response.IsSuccessStatusCode + ? SendResult.Success() + : SendResult.Failure(body); + } + + public async Task> SendBatchAsync(IEnumerable messages) + { + var tasks = new List>(); + foreach (var message in messages) + tasks.Add(SendAsync(message)); + + // Awaits all concurrent I/O operations + var results = await Task.WhenAll(tasks); + return results; + } + + public async Task PingAsync() + { + // Contains await — not just wrapping sync work + var response = await _httpClient.GetAsync(_apiEndpoint + "/ping"); + return response.IsSuccessStatusCode; + } + + public async Task GetRemainingQuotaAsync() + { + var response = await _httpClient.GetAsync(_apiEndpoint + "/quota"); + response.EnsureSuccessStatusCode(); + var json = await response.Content.ReadAsStringAsync(); + return JsonSerializer.Deserialize(json)!; + } + } +} diff --git a/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/bad/guards.ex b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/bad/guards.ex new file mode 100644 index 0000000..eb9fb04 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/bad/guards.ex @@ -0,0 +1,41 @@ +defmodule Guards do + def valid(value) when is_binary(value) do + String.length(value) > 0 + end + + def active(user) do + user.status == :active && !user.banned + end + + def empty(list) when is_list(list) do + length(list) == 0 + end + + def expired(token) do + DateTime.compare(token.expires_at, DateTime.utc_now()) == :lt + end + + def admin(user) do + user.role == :admin + end + + def verified(user) do + user.email_verified && user.phone_verified + end + + def authorized(user, resource) do + user.role == :admin || resource.owner_id == user.id + end + + def pending(order) do + order.status == :pending + end + + def within_limit(count, limit) do + count < limit + end + + def matching(a, b) do + a == b + end +end diff --git a/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/config.yml b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/config.yml new file mode 100644 index 0000000..87e5ea3 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/config.yml @@ -0,0 +1 @@ +doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)." diff --git a/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/good/guards.ex b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/good/guards.ex new file mode 100644 index 0000000..bb5c168 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/boolean_function_has_question_mark/good/guards.ex @@ -0,0 +1,41 @@ +defmodule Guards do + def valid?(value) when is_binary(value) do + String.length(value) > 0 + end + + def active?(user) do + user.status == :active && !user.banned + end + + def empty?(list) when is_list(list) do + length(list) == 0 + end + + def expired?(token) do + DateTime.compare(token.expires_at, DateTime.utc_now()) == :lt + end + + def admin?(user) do + user.role == :admin + end + + def verified?(user) do + user.email_verified && user.phone_verified + end + + def authorized?(user, resource) do + user.role == :admin || resource.owner_id == user.id + end + + def pending?(order) do + order.status == :pending + end + + def within_limit?(count, limit) do + count < limit + end + + def matching?(a, b) do + a == b + end +end diff --git a/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/bad/EventHandler.swift b/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/bad/EventHandler.swift new file mode 100644 index 0000000..e6eca29 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/bad/EventHandler.swift @@ -0,0 +1,56 @@ +import Foundation + +enum EventPriority: Int, Comparable { + case low = 0, normal = 1, high = 2, critical = 3 + + static func < (lhs: EventPriority, rhs: EventPriority) -> Bool { + return lhs.rawValue < rhs.rawValue + } +} + +struct AppEvent { + let name: String + let payload: [String: Any] + let priority: EventPriority + let occurredAt: Date +} + +typealias EventCallback = (AppEvent) -> Void + +class EventBus { + private var handlers: [String: [EventCallback]] = [:] + private var filters: [String: (AppEvent) -> Bool] = [:] + + // Reads awkwardly: eventBus.handlerRegistration(callback, eventName: "purchase") + func handlerRegistration(_ callback: @escaping EventCallback, eventName: String) { + handlers[eventName, default: []].append(callback) + } + + // Reads awkwardly: eventBus.handlerRemoval(eventName: "purchase") + func handlerRemoval(eventName: String) { + handlers.removeValue(forKey: eventName) + } + + // Reads awkwardly: eventBus.eventPublishing(event) + func eventPublishing(_ event: AppEvent) { + guard let eventHandlers = handlers[event.name] else { return } + let passes = filters[event.name].map { $0(event) } ?? true + guard passes else { return } + eventHandlers.forEach { $0(event) } + } + + // Reads awkwardly: eventBus.filterAddition(predicate, eventName: "purchase") + func filterAddition(_ predicate: @escaping (AppEvent) -> Bool, eventName: String) { + filters[eventName] = predicate + } + + // Reads awkwardly: eventBus.subscriptionCheck(eventName: "purchase") + func subscriptionCheck(eventName: String) -> Bool { + return handlers[eventName]?.isEmpty == false + } + + // Reads awkwardly: eventBus.priorityFiltering(priority: .critical, log: events) + func priorityFiltering(priority: EventPriority, log: [AppEvent]) -> [AppEvent] { + return log.filter { $0.priority >= priority } + } +} diff --git a/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/good/EventHandler.swift b/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/good/EventHandler.swift new file mode 100644 index 0000000..9a7a356 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/call_site_forms_grammatical_phrase/good/EventHandler.swift @@ -0,0 +1,56 @@ +import Foundation + +enum EventPriority: Int, Comparable { + case low = 0, normal = 1, high = 2, critical = 3 + + static func < (lhs: EventPriority, rhs: EventPriority) -> Bool { + return lhs.rawValue < rhs.rawValue + } +} + +struct AppEvent { + let name: String + let payload: [String: Any] + let priority: EventPriority + let occurredAt: Date +} + +typealias EventHandler = (AppEvent) -> Void + +class EventBus { + private var handlers: [String: [EventHandler]] = [:] + private var filters: [String: (AppEvent) -> Bool] = [:] + + // Reads naturally: eventBus.register(handler, for: "purchase") + func register(_ handler: @escaping EventHandler, for eventName: String) { + handlers[eventName, default: []].append(handler) + } + + // Reads naturally: eventBus.remove(handlers, for: "purchase") + func removeHandlers(for eventName: String) { + handlers.removeValue(forKey: eventName) + } + + // Reads naturally: eventBus.publish(event) + func publish(_ event: AppEvent) { + guard let eventHandlers = handlers[event.name] else { return } + let passesFiler = filters[event.name].map { $0(event) } ?? true + guard passesFiler else { return } + eventHandlers.forEach { $0(event) } + } + + // Reads naturally: eventBus.addFilter(predicate, for: "purchase") + func addFilter(_ predicate: @escaping (AppEvent) -> Bool, for eventName: String) { + filters[eventName] = predicate + } + + // Reads naturally: eventBus.isSubscribed(to: "purchase") + func isSubscribed(to eventName: String) -> Bool { + return handlers[eventName]?.isEmpty == false + } + + // Reads naturally: eventBus.events(with priority: .critical) + func events(with priority: EventPriority, from log: [AppEvent]) -> [AppEvent] { + return log.filter { $0.priority >= priority } + } +} diff --git a/priv/combined_metrics/samples/function_design/context_is_first_parameter/bad/mailer.go b/priv/combined_metrics/samples/function_design/context_is_first_parameter/bad/mailer.go new file mode 100644 index 0000000..5b99bd2 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/context_is_first_parameter/bad/mailer.go @@ -0,0 +1,60 @@ +package mailer + +import ( + "context" + "fmt" + "net/smtp" +) + +// Message is an outbound email. +type Message struct { + To string + Subject string + Body string +} + +// SMTPMailer sends email over SMTP. +type SMTPMailer struct { + host string + port int + from string + auth smtp.Auth +} + +// New constructs an SMTPMailer. +func New(host string, port int, from string, auth smtp.Auth) *SMTPMailer { + return &SMTPMailer{host: host, port: port, from: from, auth: auth} +} + +// Send delivers a message. ctx is passed last and named "context" — both +// violate Go conventions: context must be first and named ctx. +func (m *SMTPMailer) Send(msg Message, context context.Context) error { + if context.Err() != nil { + return fmt.Errorf("send email: context already done: %w", context.Err()) + } + + addr := fmt.Sprintf("%s:%d", m.host, m.port) + body := fmt.Sprintf("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s", + m.from, msg.To, msg.Subject, msg.Body) + + if err := smtp.SendMail(addr, m.auth, m.from, []string{msg.To}, []byte(body)); err != nil { + return fmt.Errorf("send email to %q: %w", msg.To, err) + } + return nil +} + +// SendBulk delivers multiple messages. ctx is in the middle — inconsistent +// with the convention that context is always the first parameter. +func (m *SMTPMailer) SendBulk(msgs []Message, ctx context.Context, stopOnError bool) error { + for _, msg := range msgs { + if ctx.Err() != nil { + return fmt.Errorf("send bulk: %w", ctx.Err()) + } + if err := m.Send(msg, ctx); err != nil { + if stopOnError { + return err + } + } + } + return nil +} diff --git a/priv/combined_metrics/samples/function_design/context_is_first_parameter/good/mailer.go b/priv/combined_metrics/samples/function_design/context_is_first_parameter/good/mailer.go new file mode 100644 index 0000000..585c524 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/context_is_first_parameter/good/mailer.go @@ -0,0 +1,57 @@ +package mailer + +import ( + "context" + "fmt" + "net/smtp" +) + +// Message is an outbound email. +type Message struct { + To string + Subject string + Body string +} + +// SMTPMailer sends email over SMTP. +type SMTPMailer struct { + host string + port int + from string + auth smtp.Auth +} + +// New constructs an SMTPMailer. +func New(host string, port int, from string, auth smtp.Auth) *SMTPMailer { + return &SMTPMailer{host: host, port: port, from: from, auth: auth} +} + +// Send delivers a message. ctx is the first parameter, named ctx — idiomatic Go. +func (m *SMTPMailer) Send(ctx context.Context, msg Message) error { + if ctx.Err() != nil { + return fmt.Errorf("send email: context already done: %w", ctx.Err()) + } + + addr := fmt.Sprintf("%s:%d", m.host, m.port) + body := fmt.Sprintf("From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n%s", + m.from, msg.To, msg.Subject, msg.Body) + + if err := smtp.SendMail(addr, m.auth, m.from, []string{msg.To}, []byte(body)); err != nil { + return fmt.Errorf("send email to %q: %w", msg.To, err) + } + return nil +} + +// SendBulk delivers multiple messages, stopping if the context is cancelled. +// ctx is the first parameter on every method that does I/O. +func (m *SMTPMailer) SendBulk(ctx context.Context, msgs []Message) error { + for _, msg := range msgs { + if ctx.Err() != nil { + return fmt.Errorf("send bulk: %w", ctx.Err()) + } + if err := m.Send(ctx, msg); err != nil { + return err + } + } + return nil +} diff --git a/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/bad/order_processor.ex b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/bad/order_processor.ex new file mode 100644 index 0000000..b73b804 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/bad/order_processor.ex @@ -0,0 +1,52 @@ +defmodule OrderProcessor do + def process(order) do + cond do + order.status == :new && order.payment_method == :card && order.total > 0 -> + if order.user.verified do + if order.items != [] do + case charge_card(order) do + {:ok, charge} -> + if order.total > 1000 do + notify_fraud_team(order) + end + {:ok, %{order | status: :paid, charge_id: charge.id}} + {:error, :declined} -> + {:error, :payment_declined} + {:error, _} -> + {:error, :payment_failed} + end + else + {:error, :empty_order} + end + else + {:error, :unverified_user} + end + + order.status == :new && order.payment_method == :invoice -> + if order.user.credit_approved do + {:ok, %{order | status: :invoiced}} + else + {:error, :credit_not_approved} + end + + order.status == :paid -> + if order.shipment_address != nil do + {:ok, %{order | status: :shipped}} + else + {:error, :no_address} + end + + order.status == :shipped -> + {:ok, %{order | status: :delivered}} + + order.status == :cancelled -> + {:error, :already_cancelled} + + true -> + {:error, :invalid_transition} + end + end + + defp charge_card(order), do: {:ok, %{id: "ch_#{order.id}"}} + defp notify_fraud_team(order), do: IO.puts("Fraud check: #{order.id}") +end diff --git a/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/config.yml b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/config.yml new file mode 100644 index 0000000..bdf1f44 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/config.yml @@ -0,0 +1 @@ +doc: "Functions should have a cyclomatic complexity under 10." diff --git a/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/good/order_processor.ex b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/good/order_processor.ex new file mode 100644 index 0000000..2ceae99 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/cyclomatic_complexity_under_10/good/order_processor.ex @@ -0,0 +1,50 @@ +defmodule OrderProcessor do + def process(%{status: :new, payment_method: :card} = order) do + with :ok <- verify_user(order), + :ok <- require_items(order), + {:ok, charge} <- charge_card(order) do + maybe_alert_fraud(order) + {:ok, %{order | status: :paid, charge_id: charge.id}} + end + end + + def process(%{status: :new, payment_method: :invoice} = order) do + if order.user.credit_approved do + {:ok, %{order | status: :invoiced}} + else + {:error, :credit_not_approved} + end + end + + def process(%{status: :paid} = order) do + case order.shipment_address do + nil -> {:error, :no_address} + _ -> {:ok, %{order | status: :shipped}} + end + end + + def process(%{status: :shipped} = order) do + {:ok, %{order | status: :delivered}} + end + + def process(%{status: :cancelled}) do + {:error, :already_cancelled} + end + + def process(_order), do: {:error, :invalid_transition} + + defp verify_user(%{user: %{verified: true}}), do: :ok + defp verify_user(_), do: {:error, :unverified_user} + + defp require_items(%{items: []}), do: {:error, :empty_order} + defp require_items(_), do: :ok + + defp maybe_alert_fraud(%{total: total} = order) when total > 1000 do + notify_fraud_team(order) + end + + defp maybe_alert_fraud(_order), do: :ok + + defp charge_card(order), do: {:ok, %{id: "ch_#{order.id}"}} + defp notify_fraud_team(order), do: IO.puts("Fraud check: #{order.id}") +end diff --git a/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/bad/EmailComposer.swift b/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/bad/EmailComposer.swift new file mode 100644 index 0000000..04475b5 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/bad/EmailComposer.swift @@ -0,0 +1,64 @@ +import Foundation + +struct EmailMessage { + let to: [String] + let subject: String + let body: String + let cc: [String] + let bcc: [String] + let isHTML: Bool + let attachments: [URL] +} + +class EmailComposer { + + // Default parameters intermixed with required ones, making call sites confusing + func compose( + cc: [String] = [], // default before required params + to recipients: [String], // required + isHTML: Bool = false, // default before more required params + subject: String, // required + attachments: [URL] = [], // default + body: String, // required — buried after defaults + bcc: [String] = [] + ) -> EmailMessage { + return EmailMessage( + to: recipients, + subject: subject, + body: body, + cc: cc, + bcc: bcc, + isHTML: isHTML, + attachments: attachments + ) + } + + // Default parameter (includeGettingStartedGuide) appears before required "name" + func sendWelcome( + to recipient: String, + includeGettingStartedGuide: Bool = true, // default before required + name: String, // required after default + replyTo: String? = nil + ) -> EmailMessage { + let body = includeGettingStartedGuide + ? "Welcome, \(name)! Check out our getting started guide." + : "Welcome, \(name)!" + + var bcc: [String] = [] + if let replyAddress = replyTo { + bcc.append(replyAddress) + } + + return EmailMessage(to: [recipient], subject: "Welcome!", body: body, cc: [], bcc: bcc, isHTML: false, attachments: []) + } + + // Default "retryCount" before required "date" + func scheduleDelivery( + for message: EmailMessage, + retryCount: Int = 3, // default before required + at date: Date, // required after default + retryDelay: TimeInterval = 60 + ) { + _ = (message, date, retryCount, retryDelay) + } +} diff --git a/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/good/EmailComposer.swift b/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/good/EmailComposer.swift new file mode 100644 index 0000000..167e085 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/default_parameters_placed_at_end/good/EmailComposer.swift @@ -0,0 +1,64 @@ +import Foundation + +struct EmailMessage { + let to: [String] + let subject: String + let body: String + let cc: [String] + let bcc: [String] + let isHTML: Bool + let attachments: [URL] +} + +class EmailComposer { + + // Required parameters first, defaults at the end + func compose( + to recipients: [String], + subject: String, + body: String, + cc: [String] = [], + bcc: [String] = [], + isHTML: Bool = false, + attachments: [URL] = [] + ) -> EmailMessage { + return EmailMessage( + to: recipients, + subject: subject, + body: body, + cc: cc, + bcc: bcc, + isHTML: isHTML, + attachments: attachments + ) + } + + // Required parameter first, optional config at end + func sendWelcome( + to recipient: String, + name: String, + includeGettingStartedGuide: Bool = true, + replyTo: String? = nil + ) -> EmailMessage { + let body = includeGettingStartedGuide + ? "Welcome, \(name)! Check out our getting started guide." + : "Welcome, \(name)!" + + var bcc: [String] = [] + if let replyAddress = replyTo { + bcc.append(replyAddress) + } + + return EmailMessage(to: [recipient], subject: "Welcome!", body: body, cc: [], bcc: bcc, isHTML: false, attachments: []) + } + + func scheduleDelivery( + for message: EmailMessage, + at date: Date, + retryCount: Int = 3, + retryDelay: TimeInterval = 60 + ) { + // Schedule logic here + _ = (message, date, retryCount, retryDelay) + } +} diff --git a/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/bad/reports.ex b/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/bad/reports.ex new file mode 100644 index 0000000..803e326 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/bad/reports.ex @@ -0,0 +1,64 @@ +defmodule MyApp.Reports do + @moduledoc """ + Report generation. + """ + + alias MyApp.Analytics + + # Bad: `format` option changes the return type from struct -> binary -> map. + # Callers cannot know the return type without inspecting the options. + @spec build_revenue_report(Date.t(), Date.t(), keyword()) :: + map() | binary() | MyApp.Reports.RevenueReport.t() + def build_revenue_report(%Date{} = from, %Date{} = to, opts \\ []) do + rows = Analytics.revenue_by_day(from, to) + total = Enum.sum(Enum.map(rows, & &1.amount)) + + report = %{from: from, to: to, rows: rows, total: total} + + case Keyword.get(opts, :format) do + :csv -> + # Returns a binary when :csv + header = "date,amount\n" + body = Enum.map_join(rows, "\n", &"#{&1.date},#{&1.amount}") + header <> body + + :json -> + # Returns a map when :json + %{ + from: Date.to_iso8601(from), + to: Date.to_iso8601(to), + total: total, + rows: Enum.map(rows, &%{date: Date.to_iso8601(&1.date), amount: &1.amount}) + } + + nil -> + # Returns raw map with no format + report + end + end + + # Bad: `raw` option changes return from list of maps to list of tuples + @spec fetch_revenue_rows(Date.t(), Date.t(), keyword()) :: [map()] | [{Date.t(), integer()}] + def fetch_revenue_rows(from, to, opts \\ []) do + rows = Analytics.revenue_by_day(from, to) + + if Keyword.get(opts, :raw) do + Enum.map(rows, &{&1.date, &1.amount}) + else + rows + end + end + + # Bad: `verbose` option changes return from integer to map + @spec total_revenue(Date.t(), Date.t(), keyword()) :: integer() | map() + def total_revenue(from, to, opts \\ []) do + rows = Analytics.revenue_by_day(from, to) + total = Enum.sum(Enum.map(rows, & &1.amount)) + + if Keyword.get(opts, :verbose) do + %{total: total, from: from, to: to, row_count: length(rows)} + else + total + end + end +end diff --git a/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/good/reports.ex b/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/good/reports.ex new file mode 100644 index 0000000..01aafb2 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/function_does_not_change_return_type_via_options/good/reports.ex @@ -0,0 +1,69 @@ +defmodule MyApp.Reports do + @moduledoc """ + Report generation. Separate functions are used for distinct output + formats rather than changing the return type via options. + """ + + alias MyApp.Reports.{RevenueReport, SummaryReport} + alias MyApp.Analytics + + @doc """ + Builds a revenue report struct for the given date range. + Always returns a `RevenueReport` struct. + """ + @spec build_revenue_report(Date.t(), Date.t()) :: RevenueReport.t() + def build_revenue_report(%Date{} = from, %Date{} = to) do + rows = Analytics.revenue_by_day(from, to) + total = Enum.sum(Enum.map(rows, & &1.amount)) + + %RevenueReport{ + from: from, + to: to, + rows: rows, + total: total, + generated_at: DateTime.utc_now() + } + end + + @doc """ + Renders a revenue report as a CSV binary. + Always returns a binary. + """ + @spec render_revenue_csv(RevenueReport.t()) :: binary() + def render_revenue_csv(%RevenueReport{rows: rows}) do + header = "date,amount\n" + body = Enum.map_join(rows, "\n", &"#{&1.date},#{&1.amount}") + header <> body + end + + @doc """ + Renders a revenue report as a JSON-encodable map. + Always returns a map. + """ + @spec render_revenue_json(RevenueReport.t()) :: map() + def render_revenue_json(%RevenueReport{} = report) do + %{ + from: Date.to_iso8601(report.from), + to: Date.to_iso8601(report.to), + total: report.total, + rows: Enum.map(report.rows, &%{date: Date.to_iso8601(&1.date), amount: &1.amount}) + } + end + + @doc """ + Builds a summary report for a single month. + Always returns a `SummaryReport` struct. + """ + @spec build_summary(integer(), integer()) :: SummaryReport.t() + def build_summary(year, month) do + data = Analytics.monthly_summary(year, month) + + %SummaryReport{ + year: year, + month: month, + total_orders: data.order_count, + total_revenue: data.revenue, + avg_order_value: data.revenue / max(data.order_count, 1) + } + end +end diff --git a/priv/combined_metrics/samples/function_design/has_verb_in_name/bad/api.ex b/priv/combined_metrics/samples/function_design/has_verb_in_name/bad/api.ex new file mode 100644 index 0000000..8ac1843 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/has_verb_in_name/bad/api.ex @@ -0,0 +1,54 @@ +defmodule Api do + def user_data(user_id) do + case http_get("/users/#{user_id}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def order_status(order_id) do + case http_get("/orders/#{order_id}/status") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def payment_result(payment_id) do + case http_get("/payments/#{payment_id}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def product_inventory(sku) do + case http_get("/inventory/#{sku}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def shipment_tracking(tracking_number) do + case http_get("/shipments/#{tracking_number}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def customer_profile(customer_id) do + case http_get("/customers/#{customer_id}/profile") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def webhook_registration(url, events) do + payload = Jason.encode!(%{url: url, events: events}) + case http_post("/webhooks", payload) do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + defp http_get(path), do: {:ok, ~s({"path":"#{path}"})} + defp http_post(path, _body), do: {:ok, ~s({"path":"#{path}","created":true})} +end diff --git a/priv/combined_metrics/samples/function_design/has_verb_in_name/config.yml b/priv/combined_metrics/samples/function_design/has_verb_in_name/config.yml new file mode 100644 index 0000000..cd84977 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/has_verb_in_name/config.yml @@ -0,0 +1 @@ +doc: "Function names should contain a verb describing the action performed." diff --git a/priv/combined_metrics/samples/function_design/has_verb_in_name/good/api.ex b/priv/combined_metrics/samples/function_design/has_verb_in_name/good/api.ex new file mode 100644 index 0000000..c60f64d --- /dev/null +++ b/priv/combined_metrics/samples/function_design/has_verb_in_name/good/api.ex @@ -0,0 +1,54 @@ +defmodule Api do + def fetch_user_data(user_id) do + case http_get("/users/#{user_id}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def get_order_status(order_id) do + case http_get("/orders/#{order_id}/status") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def retrieve_payment_result(payment_id) do + case http_get("/payments/#{payment_id}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def check_product_inventory(sku) do + case http_get("/inventory/#{sku}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def track_shipment(tracking_number) do + case http_get("/shipments/#{tracking_number}") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def load_customer_profile(customer_id) do + case http_get("/customers/#{customer_id}/profile") do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + def register_webhook(url, events) do + payload = Jason.encode!(%{url: url, events: events}) + case http_post("/webhooks", payload) do + {:ok, body} -> {:ok, Jason.decode!(body)} + {:error, reason} -> {:error, reason} + end + end + + defp http_get(path), do: {:ok, ~s({"path":"#{path}"})} + defp http_post(path, _body), do: {:ok, ~s({"path":"#{path}","created":true})} +end diff --git a/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/bad/Matrix.cpp b/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/bad/Matrix.cpp new file mode 100644 index 0000000..2d26944 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/bad/Matrix.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +// Output parameters appear before inputs — confusing parameter order + +// Output result comes first — counterintuitive +void multiplyScalar(std::vector& result, // output first — confusing + const std::vector& input, // input second + double scalar) +{ + result.resize(input.size()); + for (std::size_t i = 0; i < input.size(); ++i) + result[i] = input[i] * scalar; +} + +// Output before input dimensions — reader must study the body to understand the order +void transpose(double* output, // output first + const double* input, // input second + std::size_t rows, std::size_t cols) +{ + for (std::size_t r = 0; r < rows; ++r) + for (std::size_t c = 0; c < cols; ++c) + output[c * rows + r] = input[r * cols + c]; +} + +// Output interleaved with inputs — no clear convention +void addVectors(double* result, // output first + const double* a, // input + std::size_t size, // input dimension + const double* b) // second input — split from first by size +{ + for (std::size_t i = 0; i < size; ++i) + result[i] = a[i] + b[i]; +} + +// Output buried in the middle of the parameter list +void multiplyMatrices(const double* lhs, + std::size_t lhsRows, + double* result, // output in the middle + const double* rhs, + std::size_t sharedDim, + std::size_t rhsCols) +{ + for (std::size_t i = 0; i < lhsRows; ++i) + for (std::size_t k = 0; k < sharedDim; ++k) + for (std::size_t j = 0; j < rhsCols; ++j) + result[i * rhsCols + j] += lhs[i * sharedDim + k] * rhs[k * rhsCols + j]; +} + +// Output first, then all inputs +void formatRow(std::string& output, // output first + int rowIndex, + const std::vector& values, + char separator) +{ + output.clear(); + output += std::to_string(rowIndex) + separator; + for (std::size_t i = 0; i < values.size(); ++i) { + if (i > 0) output += separator; + output += std::to_string(values[i]); + } +} diff --git a/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/good/Matrix.cpp b/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/good/Matrix.cpp new file mode 100644 index 0000000..51e9333 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/input_parameters_before_output_parameters/good/Matrix.cpp @@ -0,0 +1,53 @@ +#include +#include +#include + +// Inputs come first, outputs last — consistent with standard library conventions (e.g., std::copy) + +// Pure inputs first, result returned by value +std::vector multiplyScalar(const std::vector& input, double scalar) { + std::vector result(input.size()); + for (std::size_t i = 0; i < input.size(); ++i) + result[i] = input[i] * scalar; + return result; +} + +// Input rows/cols before the output matrix +void transpose(const double* input, std::size_t rows, std::size_t cols, + double* output) // output parameter last +{ + for (std::size_t r = 0; r < rows; ++r) + for (std::size_t c = 0; c < cols; ++c) + output[c * rows + r] = input[r * cols + c]; +} + +// Read-only inputs (a, b, size) before write output (result) +void addVectors(const double* a, const double* b, std::size_t size, + double* result) // output last +{ + for (std::size_t i = 0; i < size; ++i) + result[i] = a[i] + b[i]; +} + +// Inputs: lhs, rhs matrices and their dimensions; output: result matrix last +void multiplyMatrices(const double* lhs, const double* rhs, + std::size_t lhsRows, std::size_t sharedDim, std::size_t rhsCols, + double* result) // output last +{ + for (std::size_t i = 0; i < lhsRows; ++i) + for (std::size_t k = 0; k < sharedDim; ++k) + for (std::size_t j = 0; j < rhsCols; ++j) + result[i * rhsCols + j] += lhs[i * sharedDim + k] * rhs[k * rhsCols + j]; +} + +// Input configuration first, output buffer last +void formatRow(int rowIndex, const std::vector& values, char separator, + std::string& output) // output last +{ + output.clear(); + output += std::to_string(rowIndex) + separator; + for (std::size_t i = 0; i < values.size(); ++i) { + if (i > 0) output += separator; + output += std::to_string(values[i]); + } +} diff --git a/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/bad/storage.go b/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/bad/storage.go new file mode 100644 index 0000000..4a93408 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/bad/storage.go @@ -0,0 +1,43 @@ +package storage + +import "context" + +// Document is a stored item. +type Document struct { + ID string + Content []byte +} + +// Store is a fat interface with many methods. Implementors must provide all of +// them even when a caller only needs Read. This makes mocking in tests verbose +// and tightly couples callers to the full Store surface. +type Store interface { + Read(ctx context.Context, id string) (*Document, error) + Write(ctx context.Context, doc Document) error + Delete(ctx context.Context, id string) error + List(ctx context.Context) ([]Document, error) + Count(ctx context.Context) (int, error) + Exists(ctx context.Context, id string) (bool, error) + Ping(ctx context.Context) error +} + +// DocumentService depends on the entire Store interface even though it only +// uses Read and Write. +type DocumentService struct { + store Store +} + +// New constructs a DocumentService. +func New(store Store) *DocumentService { + return &DocumentService{store: store} +} + +// Get fetches a document by ID. +func (s *DocumentService) Get(ctx context.Context, id string) (*Document, error) { + return s.store.Read(ctx, id) +} + +// Save persists a document. +func (s *DocumentService) Save(ctx context.Context, doc Document) error { + return s.store.Write(ctx, doc) +} diff --git a/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/good/storage.go b/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/good/storage.go new file mode 100644 index 0000000..be1d6ba --- /dev/null +++ b/priv/combined_metrics/samples/function_design/interface_has_one_or_two_methods/good/storage.go @@ -0,0 +1,53 @@ +package storage + +import "context" + +// Document is a stored item. +type Document struct { + ID string + Content []byte +} + +// Reader is a single-method interface for fetching a document. +// Small interfaces are easy to implement, test, and compose. +type Reader interface { + Read(ctx context.Context, id string) (*Document, error) +} + +// Writer is a single-method interface for persisting a document. +type Writer interface { + Write(ctx context.Context, doc Document) error +} + +// Deleter is a single-method interface for removing a document. +type Deleter interface { + Delete(ctx context.Context, id string) error +} + +// ReadWriter composes Reader and Writer for callers that need both. +// Composed from small interfaces rather than a large monolith. +type ReadWriter interface { + Reader + Writer +} + +// DocumentService uses only the capabilities it requires. +type DocumentService struct { + rw ReadWriter + deleter Deleter +} + +// New constructs a DocumentService. +func New(rw ReadWriter, deleter Deleter) *DocumentService { + return &DocumentService{rw: rw, deleter: deleter} +} + +// Get fetches a document by ID. +func (s *DocumentService) Get(ctx context.Context, id string) (*Document, error) { + return s.rw.Read(ctx, id) +} + +// Save persists a document. +func (s *DocumentService) Save(ctx context.Context, doc Document) error { + return s.rw.Write(ctx, doc) +} diff --git a/priv/combined_metrics/samples/function_design/is_less_than_20_lines/bad/report_generator.ex b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/bad/report_generator.ex new file mode 100644 index 0000000..90ba048 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/bad/report_generator.ex @@ -0,0 +1,53 @@ +defmodule ReportGenerator do + def generate_report(orders, user, opts) do + start_date = Keyword.get(opts, :start_date) + end_date = Keyword.get(opts, :end_date) + format = Keyword.get(opts, :format, :pdf) + + filtered = Enum.filter(orders, fn order -> + order.user_id == user.id && + (is_nil(start_date) || Date.compare(order.date, start_date) != :lt) && + (is_nil(end_date) || Date.compare(order.date, end_date) != :gt) + end) + + total = Enum.reduce(filtered, 0, fn order, acc -> + line_total = Enum.reduce(order.items, 0, fn item, item_acc -> + item_acc + item.price * item.quantity + end) + acc + line_total + end) + + discount = if user.vip do + total * 0.1 + else + 0 + end + + net_total = total - discount + tax = net_total * 0.2 + grand_total = net_total + tax + + summary_lines = Enum.map(filtered, fn order -> + items_text = Enum.map_join(order.items, ", ", fn item -> + "#{item.name} x#{item.quantity} @ #{item.price}" + end) + "Order #{order.id} (#{order.date}): #{items_text}" + end) + + header = "Report for #{user.name} | #{start_date} - #{end_date}" + body = Enum.join(summary_lines, "\n") + footer = "Subtotal: #{total} | Discount: #{discount} | Tax: #{tax} | Total: #{grand_total}" + + content = "#{header}\n\n#{body}\n\n#{footer}" + + case format do + :pdf -> {:ok, render_pdf(content)} + :csv -> {:ok, render_csv(filtered, grand_total)} + :html -> {:ok, "
    #{content}
    "} + _ -> {:error, :unsupported_format} + end + end + + defp render_pdf(content), do: %{type: :pdf, data: content} + defp render_csv(orders, total), do: %{type: :csv, orders: orders, total: total} +end diff --git a/priv/combined_metrics/samples/function_design/is_less_than_20_lines/config.yml b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/config.yml new file mode 100644 index 0000000..10d2d75 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/config.yml @@ -0,0 +1 @@ +doc: "Functions should be 20 lines or fewer." diff --git a/priv/combined_metrics/samples/function_design/is_less_than_20_lines/good/report_generator.ex b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/good/report_generator.ex new file mode 100644 index 0000000..c777d77 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/is_less_than_20_lines/good/report_generator.ex @@ -0,0 +1,55 @@ +defmodule ReportGenerator do + def generate_report(orders, user, opts) do + filtered = filter_orders(orders, user, opts) + totals = calculate_totals(filtered, user) + content = build_content(filtered, user, totals, opts) + render(content, filtered, totals, Keyword.get(opts, :format, :pdf)) + end + + defp filter_orders(orders, user, opts) do + start_date = Keyword.get(opts, :start_date) + end_date = Keyword.get(opts, :end_date) + + Enum.filter(orders, fn order -> + order.user_id == user.id && + within_date_range?(order.date, start_date, end_date) + end) + end + + defp within_date_range?(date, start_date, end_date) do + (is_nil(start_date) || Date.compare(date, start_date) != :lt) && + (is_nil(end_date) || Date.compare(date, end_date) != :gt) + end + + defp calculate_totals(orders, user) do + subtotal = Enum.sum(Enum.map(orders, &order_subtotal/1)) + discount = if user.vip, do: subtotal * 0.1, else: 0 + net = subtotal - discount + %{subtotal: subtotal, discount: discount, net: net, tax: net * 0.2, grand: net + net * 0.2} + end + + defp order_subtotal(order) do + Enum.sum(Enum.map(order.items, fn item -> item.price * item.quantity end)) + end + + defp build_content(orders, user, totals, opts) do + start_date = Keyword.get(opts, :start_date) + end_date = Keyword.get(opts, :end_date) + header = "Report for #{user.name} | #{start_date} - #{end_date}" + body = Enum.map_join(orders, "\n", &format_order_line/1) + footer = "Subtotal: #{totals.subtotal} | Discount: #{totals.discount} | Tax: #{totals.tax} | Total: #{totals.grand}" + "#{header}\n\n#{body}\n\n#{footer}" + end + + defp format_order_line(order) do + items_text = Enum.map_join(order.items, ", ", fn item -> + "#{item.name} x#{item.quantity} @ #{item.price}" + end) + "Order #{order.id} (#{order.date}): #{items_text}" + end + + defp render(content, _orders, _totals, :pdf), do: {:ok, %{type: :pdf, data: content}} + defp render(_content, orders, totals, :csv), do: {:ok, %{type: :csv, orders: orders, total: totals.grand}} + defp render(content, _orders, _totals, :html), do: {:ok, "
    #{content}
    "} + defp render(_content, _orders, _totals, _), do: {:error, :unsupported_format} +end diff --git a/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/bad/Buffer.cpp b/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/bad/Buffer.cpp new file mode 100644 index 0000000..7ced490 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/bad/Buffer.cpp @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include + +class Buffer { +public: + explicit Buffer(std::size_t capacity) + : data_(std::make_unique(capacity)) + , capacity_(capacity) + , size_(0) + {} + + // Move constructor without noexcept: + // std::vector and other containers will use the copy constructor instead of move + // during reallocation, causing unnecessary heap allocations and memcpy calls + Buffer(Buffer&& other) // missing noexcept + : data_(std::move(other.data_)) + , capacity_(other.capacity_) + , size_(other.size_) + { + other.capacity_ = 0; + other.size_ = 0; + } + + // Move assignment also missing noexcept + Buffer& operator=(Buffer&& other) // missing noexcept + { + if (this != &other) { + data_ = std::move(other.data_); + capacity_ = other.capacity_; + size_ = other.size_; + other.capacity_ = 0; + other.size_ = 0; + } + return *this; + } + + Buffer(const Buffer& other) + : data_(std::make_unique(other.capacity_)) + , capacity_(other.capacity_) + , size_(other.size_) + { + std::memcpy(data_.get(), other.data_.get(), other.size_); + } + + Buffer& operator=(const Buffer& other) { + if (this != &other) { + auto newData = std::make_unique(other.capacity_); + std::memcpy(newData.get(), other.data_.get(), other.size_); + data_ = std::move(newData); + capacity_ = other.capacity_; + size_ = other.size_; + } + return *this; + } + + void append(const uint8_t* src, std::size_t length) { + if (size_ + length > capacity_) + throw std::overflow_error("Buffer capacity exceeded"); + std::memcpy(data_.get() + size_, src, length); + size_ += length; + } + + std::size_t size() const noexcept { return size_; } + std::size_t capacity() const noexcept { return capacity_; } + +private: + std::unique_ptr data_; + std::size_t capacity_; + std::size_t size_; +}; + +// Because move ctor is not noexcept, std::vector will copy (not move) Buffer +// objects during reallocation — expensive for large buffers +void demonstrateVectorRealloc() { + std::vector buffers; + buffers.reserve(4); + for (int i = 0; i < 8; ++i) + buffers.emplace_back(1024); // triggers copy, not move, on reallocation +} diff --git a/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/good/Buffer.cpp b/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/good/Buffer.cpp new file mode 100644 index 0000000..3fce1c2 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/move_constructors_are_noexcept/good/Buffer.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +class Buffer { +public: + explicit Buffer(std::size_t capacity) + : data_(std::make_unique(capacity)) + , capacity_(capacity) + , size_(0) + {} + + // Move constructor is noexcept: only reassigns pointers and integers — cannot throw + Buffer(Buffer&& other) noexcept + : data_(std::move(other.data_)) + , capacity_(other.capacity_) + , size_(other.size_) + { + other.capacity_ = 0; + other.size_ = 0; + } + + // Move assignment is noexcept for the same reason + Buffer& operator=(Buffer&& other) noexcept { + if (this != &other) { + data_ = std::move(other.data_); + capacity_ = other.capacity_; + size_ = other.size_; + other.capacity_ = 0; + other.size_ = 0; + } + return *this; + } + + Buffer(const Buffer& other) + : data_(std::make_unique(other.capacity_)) + , capacity_(other.capacity_) + , size_(other.size_) + { + std::memcpy(data_.get(), other.data_.get(), other.size_); + } + + Buffer& operator=(const Buffer& other) { + if (this != &other) { + auto newData = std::make_unique(other.capacity_); + std::memcpy(newData.get(), other.data_.get(), other.size_); + data_ = std::move(newData); + capacity_ = other.capacity_; + size_ = other.size_; + } + return *this; + } + + void append(const uint8_t* src, std::size_t length) { + if (size_ + length > capacity_) + throw std::overflow_error("Buffer capacity exceeded"); + std::memcpy(data_.get() + size_, src, length); + size_ += length; + } + + std::size_t size() const noexcept { return size_; } + std::size_t capacity() const noexcept { return capacity_; } + +private: + std::unique_ptr data_; + std::size_t capacity_; + std::size_t size_; +}; + +// noexcept move allows std::vector to use move during reallocation +// rather than copying — important for performance +void demonstrateVectorRealloc() { + std::vector buffers; + buffers.reserve(4); + for (int i = 0; i < 8; ++i) + buffers.emplace_back(1024); // triggers reallocation; uses move ctor (noexcept) +} diff --git a/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/bad/parser.go b/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/bad/parser.go new file mode 100644 index 0000000..c57a550 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/bad/parser.go @@ -0,0 +1,56 @@ +package parser + +import ( + "bufio" + "fmt" + "strconv" + "strings" +) + +// ParseCSVRow splits a CSV line into its fields. +// Without named returns the two []string values are indistinguishable from +// the signature alone — callers must read the body to know which is which. +func ParseCSVRow(line string) ([]string, []string) { + var headers, values []string + parts := strings.Split(line, ",") + for i, p := range parts { + p = strings.TrimSpace(p) + if i == 0 { + headers = append(headers, p) + } else { + values = append(values, p) + } + } + return headers, values +} + +// ParseBounds extracts the start and end line numbers from a range string "N-M". +// Three return values of which two are int — callers cannot tell from the +// signature which int is start and which is end without reading the body. +func ParseBounds(rangeStr string) (int, int, error) { + parts := strings.SplitN(rangeStr, "-", 2) + if len(parts) != 2 { + return 0, 0, fmt.Errorf("invalid range %q: expected format N-M", rangeStr) + } + start, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return 0, 0, fmt.Errorf("invalid start in range %q: %w", rangeStr, err) + } + end, err := strconv.Atoi(strings.TrimSpace(parts[1])) + if err != nil { + return 0, 0, fmt.Errorf("invalid end in range %q: %w", rangeStr, err) + } + return start, end, nil +} + +// CountWords scans a multi-line string and returns two counts. +// The two ints are ambiguous — is it (words, lines) or (lines, words)? +func CountWords(text string) (int, int) { + var words, lines int + scanner := bufio.NewScanner(strings.NewReader(text)) + for scanner.Scan() { + lines++ + words += len(strings.Fields(scanner.Text())) + } + return words, lines +} diff --git a/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/good/parser.go b/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/good/parser.go new file mode 100644 index 0000000..0abe9cf --- /dev/null +++ b/priv/combined_metrics/samples/function_design/named_return_values_used_for_documentation/good/parser.go @@ -0,0 +1,58 @@ +package parser + +import ( + "bufio" + "fmt" + "strconv" + "strings" +) + +// ParseCSVRow splits a CSV line into its fields. +// Named returns make the two string slices unambiguous at the call site. +func ParseCSVRow(line string) (headers []string, values []string) { + parts := strings.Split(line, ",") + for i, p := range parts { + p = strings.TrimSpace(p) + if i == 0 { + headers = append(headers, p) + } else { + values = append(values, p) + } + } + return +} + +// ParseBounds extracts the start and end line numbers from a range string "N-M". +// Named returns clarify which int is start and which is end. +func ParseBounds(rangeStr string) (start, end int, err error) { + parts := strings.SplitN(rangeStr, "-", 2) + if len(parts) != 2 { + err = fmt.Errorf("invalid range %q: expected format N-M", rangeStr) + return + } + start, err = strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + err = fmt.Errorf("invalid start in range %q: %w", rangeStr, err) + return + } + end, err = strconv.Atoi(strings.TrimSpace(parts[1])) + if err != nil { + err = fmt.Errorf("invalid end in range %q: %w", rangeStr, err) + return + } + if end < start { + err = fmt.Errorf("end %d is before start %d in range %q", end, start, rangeStr) + } + return +} + +// CountWords scans a multi-line string and returns word and line counts. +// Named returns document what each int represents. +func CountWords(text string) (words, lines int) { + scanner := bufio.NewScanner(strings.NewReader(text)) + for scanner.Scan() { + lines++ + words += len(strings.Fields(scanner.Text())) + } + return +} diff --git a/priv/combined_metrics/samples/function_design/nesting_depth_under_4/bad/validator.ex b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/bad/validator.ex new file mode 100644 index 0000000..b53ed02 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/bad/validator.ex @@ -0,0 +1,59 @@ +defmodule Validator do + def validate_request(request) do + if request != nil do + if Map.has_key?(request, :user) do + if request.user != nil do + if Map.has_key?(request.user, :role) do + if request.user.role in [:admin, :editor, :viewer] do + if Map.has_key?(request, :payload) do + {:ok, request} + else + {:error, "missing payload"} + end + else + {:error, "invalid role"} + end + else + {:error, "missing role"} + end + else + {:error, "user is nil"} + end + else + {:error, "missing user"} + end + else + {:error, "request is nil"} + end + end + + def validate_order(order) do + case order do + nil -> {:error, "order is nil"} + _ -> + case order.status do + :pending -> + case order.items do + [] -> {:error, "no items"} + items -> + case Enum.all?(items, &valid_item?/1) do + true -> + case order.payment do + nil -> {:error, "no payment"} + payment -> + case payment.method do + :card -> {:ok, order} + :cash -> {:ok, order} + _ -> {:error, "invalid payment method"} + end + end + false -> {:error, "invalid item"} + end + end + _ -> {:error, "order not pending"} + end + end + end + + defp valid_item?(item), do: item.quantity > 0 && item.price > 0 +end diff --git a/priv/combined_metrics/samples/function_design/nesting_depth_under_4/config.yml b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/config.yml new file mode 100644 index 0000000..df376c9 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/config.yml @@ -0,0 +1 @@ +doc: "Code should not nest deeper than 4 levels." diff --git a/priv/combined_metrics/samples/function_design/nesting_depth_under_4/good/validator.ex b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/good/validator.ex new file mode 100644 index 0000000..5ced262 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/nesting_depth_under_4/good/validator.ex @@ -0,0 +1,49 @@ +defmodule Validator do + def validate_request(nil), do: {:error, "request is nil"} + + def validate_request(request) do + with {:ok, user} <- fetch_user(request), + :ok <- validate_role(user), + :ok <- require_payload(request) do + {:ok, request} + end + end + + def validate_order(nil), do: {:error, "order is nil"} + + def validate_order(order) do + with :ok <- require_pending(order), + :ok <- require_items(order.items), + :ok <- validate_items(order.items), + :ok <- validate_payment(order.payment) do + {:ok, order} + end + end + + defp fetch_user(%{user: nil}), do: {:error, "user is nil"} + defp fetch_user(%{user: user}), do: {:ok, user} + defp fetch_user(_), do: {:error, "missing user"} + + defp validate_role(%{role: role}) when role in [:admin, :editor, :viewer], do: :ok + defp validate_role(%{role: _}), do: {:error, "invalid role"} + defp validate_role(_), do: {:error, "missing role"} + + defp require_payload(%{payload: _}), do: :ok + defp require_payload(_), do: {:error, "missing payload"} + + defp require_pending(%{status: :pending}), do: :ok + defp require_pending(_), do: {:error, "order not pending"} + + defp require_items([]), do: {:error, "no items"} + defp require_items(_), do: :ok + + defp validate_items(items) do + if Enum.all?(items, &valid_item?/1), do: :ok, else: {:error, "invalid item"} + end + + defp validate_payment(nil), do: {:error, "no payment"} + defp validate_payment(%{method: method}) when method in [:card, :cash], do: :ok + defp validate_payment(_), do: {:error, "invalid payment method"} + + defp valid_item?(item), do: item.quantity > 0 && item.price > 0 +end diff --git a/priv/combined_metrics/samples/function_design/no_arguments_object/bad/query_builder.js b/priv/combined_metrics/samples/function_design/no_arguments_object/bad/query_builder.js new file mode 100644 index 0000000..38ff091 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_arguments_object/bad/query_builder.js @@ -0,0 +1,57 @@ +function buildSelectClause() { + const table = arguments[0]; + if (arguments.length <= 1) { + return `SELECT * FROM ${table}`; + } + const columns = []; + for (let i = 1; i < arguments.length; i++) { + columns.push(`"${arguments[i]}"`); + } + return `SELECT ${columns.join(", ")} FROM "${table}"`; +} + +function buildWhereClause() { + if (arguments.length === 0) return ""; + const conditions = []; + for (let i = 0; i < arguments.length; i++) { + conditions.push(arguments[i]); + } + return "WHERE " + conditions.join(" AND "); +} + +function mergeQueryOptions() { + const result = {}; + for (let i = 0; i < arguments.length; i++) { + Object.assign(result, arguments[i]); + } + return result; +} + +function buildOrderClause() { + if (arguments.length === 0) return ""; + const parts = []; + for (let i = 0; i < arguments.length; i++) { + const field = arguments[i]; + parts.push(`"${field.column}" ${field.direction || "ASC"}`); + } + return "ORDER BY " + parts.join(", "); +} + +function buildQuery(table, options) { + const columns = options.columns || []; + const conditions = options.conditions || []; + const orderBy = options.orderBy || []; + + const selectPart = buildSelectClause.apply(null, [table].concat(columns)); + const wherePart = buildWhereClause.apply(null, conditions); + const orderPart = buildOrderClause.apply(null, orderBy); + + const parts = [selectPart, wherePart, orderPart].filter(Boolean); + + if (options.limit != null) parts.push(`LIMIT ${Number(options.limit)}`); + if (options.offset != null) parts.push(`OFFSET ${Number(options.offset)}`); + + return parts.join(" "); +} + +export { buildQuery, buildSelectClause, buildWhereClause, buildOrderClause, mergeQueryOptions }; diff --git a/priv/combined_metrics/samples/function_design/no_arguments_object/good/query_builder.js b/priv/combined_metrics/samples/function_design/no_arguments_object/good/query_builder.js new file mode 100644 index 0000000..19c8fc2 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_arguments_object/good/query_builder.js @@ -0,0 +1,49 @@ +function buildSelectClause(table, ...columns) { + if (columns.length === 0) { + return `SELECT * FROM ${table}`; + } + const escaped = columns.map((c) => `"${c}"`).join(", "); + return `SELECT ${escaped} FROM "${table}"`; +} + +function buildWhereClause(...conditions) { + if (conditions.length === 0) return ""; + return "WHERE " + conditions.join(" AND "); +} + +function mergeQueryOptions(...optionSets) { + return Object.assign({}, ...optionSets); +} + +function buildOrderClause(...fields) { + if (fields.length === 0) return ""; + const parts = fields.map(({ column, direction = "ASC" }) => `"${column}" ${direction}`); + return "ORDER BY " + parts.join(", "); +} + +function buildQuery(table, options = {}, ...extraConditions) { + const { columns = [], conditions = [], orderBy = [], limit, offset } = options; + + const allConditions = [...conditions, ...extraConditions]; + + const parts = [ + buildSelectClause(table, ...columns), + buildWhereClause(...allConditions), + buildOrderClause(...orderBy), + ].filter(Boolean); + + if (limit != null) parts.push(`LIMIT ${Number(limit)}`); + if (offset != null) parts.push(`OFFSET ${Number(offset)}`); + + return parts.join(" "); +} + +function paginatedQuery(table, page, pageSize, ...baseConditions) { + return buildQuery(table, { + conditions: baseConditions, + limit: pageSize, + offset: (page - 1) * pageSize, + }); +} + +export { buildQuery, buildSelectClause, buildWhereClause, buildOrderClause, paginatedQuery, mergeQueryOptions }; diff --git a/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/bad/BackgroundSync.cs b/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/bad/BackgroundSync.cs new file mode 100644 index 0000000..935bbe0 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/bad/BackgroundSync.cs @@ -0,0 +1,61 @@ +using System; +using System.Threading.Tasks; + +namespace Sync +{ + public class BackgroundSync + { + private readonly ISyncRepository _repository; + private readonly ILogger _logger; + + public BackgroundSync(ISyncRepository repository, ILogger logger) + { + _repository = repository; + _logger = logger; + } + + // async void: callers cannot await this; exceptions crash the process unhandled + public async void Synchronize() + { + var pending = await _repository.GetPendingItemsAsync(); + foreach (var item in pending) + { + await _repository.PushItemAsync(item); + await _repository.MarkSyncedAsync(item.Id); + } + } + + // async void: caller cannot observe exceptions or know when it finishes + public async void SynchronizeWithLogging() + { + try + { + var pending = await _repository.GetPendingItemsAsync(); + foreach (var item in pending) + { + await _repository.PushItemAsync(item); + await _repository.MarkSyncedAsync(item.Id); + } + } + catch (Exception ex) + { + // Exception is swallowed here; no way for callers to know about it + _logger.Error("Sync failed", ex); + } + } + + // async void: cannot be unit tested properly; cannot be awaited in service startup + public async void RetryFailed() + { + var failed = await _repository.GetFailedItemsAsync(); + foreach (var item in failed) + await _repository.PushItemAsync(item); + } + + public void TriggerSync() + { + // Fire-and-forget via async void — exceptions are silently lost + Synchronize(); + } + } +} diff --git a/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/good/BackgroundSync.cs b/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/good/BackgroundSync.cs new file mode 100644 index 0000000..5d14172 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_async_void_outside_event_handlers/good/BackgroundSync.cs @@ -0,0 +1,74 @@ +using System; +using System.Threading.Tasks; +using System.Windows.Forms; + +namespace Sync +{ + public class BackgroundSync + { + private readonly ISyncRepository _repository; + private readonly ILogger _logger; + + public BackgroundSync(ISyncRepository repository, ILogger logger) + { + _repository = repository; + _logger = logger; + } + + // Returns Task so callers can await, observe exceptions, and compose + public async Task SynchronizeAsync() + { + var pending = await _repository.GetPendingItemsAsync(); + foreach (var item in pending) + { + await _repository.PushItemAsync(item); + await _repository.MarkSyncedAsync(item.Id); + } + } + + public async Task SynchronizeWithResultAsync() + { + int synced = 0; + int failed = 0; + + var pending = await _repository.GetPendingItemsAsync(); + foreach (var item in pending) + { + try + { + await _repository.PushItemAsync(item); + await _repository.MarkSyncedAsync(item.Id); + synced++; + } + catch (SyncException ex) + { + _logger.Warning("Failed to sync item {id}", ex); + failed++; + } + } + + return new SyncResult(synced, failed); + } + + // async void is acceptable ONLY for event handlers — exceptions cannot be caught otherwise + private async void OnSyncButtonClicked(object sender, EventArgs e) + { + try + { + await SynchronizeAsync(); + } + catch (Exception ex) + { + _logger.Error("Sync failed from UI button", ex); + MessageBox.Show("Sync failed. Please try again."); + } + } + + public async Task RetryFailedAsync() + { + var failed = await _repository.GetFailedItemsAsync(); + foreach (var item in failed) + await _repository.PushItemAsync(item); + } + } +} diff --git a/priv/combined_metrics/samples/function_design/no_boolean_parameter/bad/notifications.ex b/priv/combined_metrics/samples/function_design/no_boolean_parameter/bad/notifications.ex new file mode 100644 index 0000000..e6f0bcd --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_boolean_parameter/bad/notifications.ex @@ -0,0 +1,49 @@ +defmodule Notifications do + def send_email(user, is_welcome) do + if is_welcome do + deliver(user.email, "Welcome!", "Hello #{user.name}, welcome aboard!") + else + deliver(user.email, "See you soon", "Goodbye #{user.name}, we hope to see you again.") + end + end + + def notify_order(user, order, is_shipped) do + if is_shipped do + deliver(user.email, "Your order shipped!", "Order #{order.id} is on its way.") + else + deliver(user.email, "Order confirmed", "We received your order #{order.id}.") + end + end + + def send_payment_notification(user, amount, succeeded) do + if succeeded do + deliver(user.email, "Payment received", "We received your payment of #{amount}.") + else + deliver(user.email, "Payment failed", "Your payment of #{amount} could not be processed.") + end + end + + def schedule_reminder(user, event, is_urgent) do + subject = if is_urgent, do: "[URGENT] Reminder", else: "Reminder" + body = "Don't forget: #{event.title} at #{event.time}" + deliver_with_priority(user.email, subject, body, is_urgent) + end + + def send_admin_alert(admin, message, include_details) do + body = + if include_details do + "#{message}\n\nDetails: #{inspect(message)}" + else + message + end + deliver(admin.email, "Admin Alert", body) + end + + defp deliver(to, subject, body) do + {:ok, %{to: to, subject: subject, body: body}} + end + + defp deliver_with_priority(to, subject, body, urgent) do + {:ok, %{to: to, subject: subject, body: body, priority: if(urgent, do: :high, else: :normal)}} + end +end diff --git a/priv/combined_metrics/samples/function_design/no_boolean_parameter/config.yml b/priv/combined_metrics/samples/function_design/no_boolean_parameter/config.yml new file mode 100644 index 0000000..fd6e0d6 --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_boolean_parameter/config.yml @@ -0,0 +1 @@ +doc: "Functions should not take boolean parameters — a flag usually means the function does two things." diff --git a/priv/combined_metrics/samples/function_design/no_boolean_parameter/good/notifications.ex b/priv/combined_metrics/samples/function_design/no_boolean_parameter/good/notifications.ex new file mode 100644 index 0000000..31d02cd --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_boolean_parameter/good/notifications.ex @@ -0,0 +1,52 @@ +defmodule Notifications do + def send_welcome_email(user) do + deliver(user.email, "Welcome!", "Hello #{user.name}, welcome aboard!") + end + + def send_farewell_email(user) do + deliver(user.email, "See you soon", "Goodbye #{user.name}, we hope to see you again.") + end + + def notify_order_shipped(user, order) do + deliver(user.email, "Your order shipped!", "Order #{order.id} is on its way.") + end + + def notify_order_confirmed(user, order) do + deliver(user.email, "Order confirmed", "We received your order #{order.id}.") + end + + def notify_payment_received(user, amount) do + deliver(user.email, "Payment received", "We received your payment of #{amount}.") + end + + def notify_payment_failed(user, amount) do + deliver(user.email, "Payment failed", "Your payment of #{amount} could not be processed.") + end + + def send_urgent_reminder(user, event) do + body = "Don't forget: #{event.title} at #{event.time}" + deliver_with_priority(user.email, "[URGENT] Reminder", body, :high) + end + + def send_reminder(user, event) do + body = "Don't forget: #{event.title} at #{event.time}" + deliver_with_priority(user.email, "Reminder", body, :normal) + end + + def send_detailed_admin_alert(admin, message) do + body = "#{message}\n\nDetails: #{inspect(message)}" + deliver(admin.email, "Admin Alert", body) + end + + def send_admin_alert(admin, message) do + deliver(admin.email, "Admin Alert", message) + end + + defp deliver(to, subject, body) do + {:ok, %{to: to, subject: subject, body: body}} + end + + defp deliver_with_priority(to, subject, body, priority) do + {:ok, %{to: to, subject: subject, body: body, priority: priority}} + end +end diff --git a/priv/combined_metrics/samples/function_design/no_default_arguments_on_virtual_functions/bad/Widget.cpp b/priv/combined_metrics/samples/function_design/no_default_arguments_on_virtual_functions/bad/Widget.cpp new file mode 100644 index 0000000..55ab0ad --- /dev/null +++ b/priv/combined_metrics/samples/function_design/no_default_arguments_on_virtual_functions/bad/Widget.cpp @@ -0,0 +1,63 @@ +#include +#include + +// Virtual functions with default argument values — the default is resolved statically +// at the call site based on the static type, NOT the dynamic type. +// This means the base class defaults are used even when a derived class override is called. + +class Widget { +public: + virtual ~Widget() = default; + + // Default argument on virtual function — resolved at compile time using static type + virtual void render(const RenderOptions& options = RenderOptions::defaults()) = 0; + + // Default on virtual — if Button overrides this with a different default, the + // base default is used when called via a Widget pointer or reference + virtual void resize(int width, int height = 100) = 0; + + virtual void highlight(const Color& color = Color::yellow()) = 0; + + virtual std::string describe() const = 0; +}; + +class Button : public Widget { +public: + explicit Button(std::string label) : label_(std::move(label)) {} + + // Override with a DIFFERENT default — this default is NEVER used when called + // through a Widget pointer/reference; the base class default applies instead + void render(const RenderOptions& options = RenderOptions::minimal()) override { + (void)options; + } + + void resize(int width, int height = 50) override { // different default — silently ignored via base ptr + width_ = width; + height_ = height; + } + + void highlight(const Color& color = Color::blue()) override { // different default — same problem + highlightColor_ = color; + } + + std::string describe() const override { + return "Button(" + label_ + ")"; + } + +private: + std::string label_; + int width_ = 0; + int height_ = 0; + Color highlightColor_; +}; + +void demonstrate() { + std::unique_ptr w = std::make_unique + + + + + +
    + + +
    +
    + + {selectedIndex + 1} / {behaviorKeys.length} + + {behavior?.split('.').slice(1).join('.')} +
    + + +
    +
    + + {behavior && data[behavior] && ( + setScalar(behavior, metric, val)} + /> + )} +
    +
    + + {yamlOpen && ( + setYamlOpen(false)} /> + )} + + ) +} diff --git a/tools/scalar_tuner/src/assets/hero.png b/tools/scalar_tuner/src/assets/hero.png new file mode 100644 index 0000000000000000000000000000000000000000..cc51a3d20ad4bc961b596a6adfd686685cd84bb0 GIT binary patch literal 44919 zcma%i^5TDbT`tlgo2c`(n!ND-Q6MGAYIbZ-QCh5-QC^YozK_ne*b_MKK#O- zIWy zd$aJVZ?rl%;eiC7d#Sl-cWLv9rA0(UOX(@I3k&yyL+3GaQ4xpb1EGC|i|{byaTI># zBO=0pyZu5XO!hzGNPch4cx%6XJAJpDa<+98BOcYNo1=XER1sv!UW z^>ZDMp%FSmVnt)n^EIR+Nth`vRO^_=UF3EWv75ym{S;#2F8MPot@-y$>ioj!)a1bE zijXPQY;U`qNwl9|wl{W>{FhMSb<>m4{;8Udp4psl)NwFRo(W-T)Y6-qDf=L#U?g<@ zV+T|3+RuE~!E&nodKrkfPcOpJ)&1|p`Tbtd12@MSE8DjWkD|9M>GZsHLf>TTbLx)B z#5K5l%gS7s(yWk?Lj{Nvm`Z-s8xb-Xr`5-xRr%w8v>!oSz{dN*MmxbscQl#Z40qSd z!PQXs-utLEF&$@S#__Lo*pOhG{l(%jyCh-0ME8owiT>U~r&q@MaDRePL(aZAAff9= zBd@*7RZxmiqK^nZH7`bTjIEQw#Y=V6(h{$>7ZIf=7S0;$8~4NXLd4T;Ai~C8&3k-; zYEtJWq6x$#5rrCJ%zspgO z((R)&>BIkkr^qQSEZljO*B+ZDvTeBKJ9N%8Ej=U+62GI)dc|ZMEM66~W12v&QFAIS zoDs`J`wjsl?WdE(NTnjCO!^yB>{yU-2UPT`&FOyVQVmxy#un2Po>GiPPfzd0M^d_i z+Kr}dPhIfsDLd~jOiJ(sHTN;2u)@MaX&0AdXR;BAwr_;1sR;)MM+&{XTzNnKWH@0a zoy9ApaUt=>jjHICu3W42)5;nzHS!M3?aOvZfv-sIc%wc9#l0uHFc}aS4JSrIDOQ?4ri_bS?pjH{U{6qr+6m z--%u=5oc&PxE==-I$~$5gw}yiu_y_o?|ag2+rAgSg%G)}EU}r%*A|v|pjbE`lxJpU zy0{?;(US(i-TiKq6s_(KTYy|YVi&!plMT)EJ4wMU{C7Y;!Xow1nJ+X@ks@r0v25R; z*o$8AP*G*f3$UlYR~18PxKyPj9vU#v)4#GgEx4*?KOhlh>0%3M$-LN7&b*0fXgm$k zH78>bObkx^3_K+RY;G+Usy6L}p9iT!hlnJCmR=;=JL1TdtB#vL!RTJ1TABQx8Ux0w zl^{Jkf(hU>-jr59iK_v-PkV!WwG!LvW<@{3{IbbSiWBrX@S8^`8JFRrc+(AqsUIvm zCTstACtCZ~qy-5^Gr@_z#X!N1*1vH=7@8oL4AEOxWl^YW&LW|1$1J?gG061vk1epe zRI_*s(lrX?-2#tCt_`)p?{zZC+)onl60CU~%4!vPA}h0+fB9ucNkTQ3u29((9Wq=> z^JUm|{_2-=?dMKu&9)#x{lgPOCM`U1^tXDbmZ%I$0fw7|Y-@3Tyj1LGfk$lvzYC85 z=R()QEER%Dz=mTMZ=7E?K74&?)4b~-uj34rKwb~7vU(48%+1xYc^VYn| zncI4NL8xEnmi>eM9EK&~si%*s|BX@zKIUU?cAWA5pdc`xEZIF1Ce=Wcg3#AP?N~p# zD7mfb{oR=ZPE^jgwD3G< z#8h1K&u&zKD4q*Pxt0ta#d}bm;QqZ!hFift22a~7c529SkmFQyN-*H zzQck2cL5iH2@d@Lhq4$~_!wMWL6(&mNq=7HhT}YYI$pVVZeQr>)4>qObE$PPNZ2!0 z&7?y_upwfiefj8-`B$ju)}QKTz*Zs<$Lb?XHBo(jyU(405&`EL({mgxA$Ov49U|rN z2@(l@n`1vzG(v=!u4AZ*0s}~H4{VgcNOJ1rB?Kg!=)mGHKWeC|MHb>aiQ4Qd+gq7|??WH7;?J+kYL8z# z@juTBhW#n3rN))N7T1~)qr~Es;2rln6_U>_Ejxj(E5%Cpoc^vfw64mua!ADSZ8i|+ zB}g?u(dtvesTegnG!9K33T)4eq>)>ZFp?L>R8Qp#(J=bxz2mscD;ZNoJB@ZUqPpI>o7VgScniW4c()#;@;-9PfR`b(r+#4c; z;1-)`!?b}4A3v^zVtGa(a;O%bzu(ZG;(l4+W^vU|a&n*xV0kU$uFQ!5!aWy)^q4^r zn!-6hfj79_B#>GGNvQiKMD?xyW>F&GS>3y?Ric*xp4cz3FH3Gd1z|e+Vuug7*Ya48 zL~K*l5zo1XRuWm%S~GzE4LQyuRsH1&L`Gz-%>!ZTYn9K_Ttz+Pa@9hKob^)gmLVN` zKJz}C50X$$>G1Q_p;%C}B?<9h`60%vwalt2*Ymd44dGF(oOa2mJQuPQmE~Yurn0UC z6(+5$posAd@e$nvJQFL^C~E0E4IH`B68)j#L_u|Ex5mNE8a8{>gAGcIFVS|K?g77# zE@R|9nR>Rw3(5}{d~HnPpooZ*XZC$5FYt20 z3Ydvy9t)XHw8qFCd;mt8r$e?RQ%MiUF@}!oDGG#E6xxV z=z>11f!msSqbAZYnSvt}&J+QXZCU5b`0!gi_R}Z@Qq2d2Mwc z%9aWfp&x2UGbLDvtjGb*p>4O(#}UE+QhYmf0&Vc_Ay<~3V0zym%`Lk}-3MOz<%)%#Pl z<=OjGrvuBq318+CJ-{30QA1-O@<-O!-zFNM^&wp}iWGG$B&eIYtF)Rs4;5FK=>Aa9 zyTJdUgpK$di~MI|ZC=Vkd^V6T5h^z))sl~Dq7~stg?&l_LW6N1>0nX=aS46Ks+vj7 zr#P2~h=M-LLX2!W_k&dv^Tm2}o9vK&uKMDMmPkEcj7~C78vw2XJx^s8uo(Lw>9ET2 zzXG^MDxZzwh4y=Hs@h^Y2$ntYP+GSm>#cM9ZiUR^>tiFtIol3wi8=y~L2f@Bun;{B zr@yZMir9Ur@yw@7ni+Jd*Oc9hFx zK$M%P9+XKj>`spPB?k6^h1pok(_k*E$fr(SnXlXEnE{ODRWuWqB2u+8*2z?-wl+WC zntSCtFwpr0nF!avN+7`^Pt@XDvec7%ipuHYXg%5TXDAXv;U-33A(vzDB8V%0%j-R@ zk!2mox%%pJ<_M$o0lf*YButy@IP%9Zz=UDDlr|NuSNW*bYB{&18Xj|$eVP~(lx>y3 zgjJh3l1)5_uw6CTgk`ABQVoCHT$nbFS*edKLAbhRxLyzMI-{#6H!q_O@+mM7#~@Kw zWFDq#m<+NGVr`grM*Mh=Dq@8Tzl-$WKFWsWruYa^v`B30wDORai8q&__SDBzc?K#o z^UN`hN&IN;bep+mS1Z}i#zurS+Vl`B&+6`B#XK@l^8+&2+e@&zII(kdzid}Lm^AE5 zqjZ+3N*0O?1%{glymHcUP?g3vB#mH9MA)__>pUakjX+4jPuRS$9mmbImM8^= zOGMzKSY0_htZs;&-)|di4DJjSjVQ}hf2vq`u?G4@2@M(y#8xp{#1&$)ZW$rlUwG%{ z-S3I$D5~^(7stnQ#qh(0D6TnSA5R2*0u@x*22u1y%V5wYfW$b@)H*9X9{5!1Gw0`$ z4^fR@T%cw74(zCoPNP98@iS+WaFoE>g!a7#s-iwfRHKJSou%<97*I%619(655MjTr z6;k$p>T1-|cb9V=`;0i>gjBf%t=3jn_oC874-1o3(J|G-g$c?a=wn!m?U?CAd4WKW zm>=k4ApUHFtra|}Wl_G|#Y@n(Qv*q-frfU@rg{K1dLr%5(jA(Als7lSt8bue+zbab zVF0VKb`8x4k`2s^D1=P<^mk&LXhA!1jsr46^sGC@bsZfT)hZq4gnT+I+aHp`_XRE{ zDgx9ExOOSGF^DuVB_iQ8s$S{7agA7rKLtYG0nVl0q1kdJPQ3g#tw9qL?gP!_e~V$R z7B*H7J0{kp*t0|SM#+|$l6`>>9*GXki2@B!1?#&`s}t$D9D05bdTLaq__DzJ3hhhx z4>Z*xjuhGkL>lPDr8KhXi~8N*3~eqgebLTG`3g)&9`ESMo4O`ywJ{RymGvLXG}!Y?yAZ!5^Y19ukC`n~3GM7)2v! zx|C7WvVV`|+~>K~FRJPdp3VTPY##;_7#_^stFuo>5ewhPn5=@ApsXs_<27I&gPv>g~?s5SHzci&*$xeFVsI6?MsNJwojSpg9-+xbDwNanO9CUPbs06^E~@ zW3}{)@boKx;MgISD4?gb;X2~Nzv6Vu z_d;=oiM*wq!ou(NN8Zrg1ZYYlE==ylKlarfHe9u21xL{BI8t!pRC1^0=DGRrV0_Q@ zC#L85xcROt(T$6-@Y|KI-@7cgFD>WF?-)WG5jRleK;pn&=Rb9nZ+_@Mx-Fk~VSb{E zq@Ay=ub)@s&Mz*$+FSlG0WrrMKZI+3YuZ5k`RZGGO+r;}6mJy$DM;>AadvNZ=5yf|1r(je z0NIXNIS||Cv*MHEs{?>y+_cZmakNb+;cq-QqDcP%tMf{NmoE%a zN}Y33Vukiwxzm0dhmNsZQ>TsfYfZ-XZJv?ZTQ(=j1nt6FMd#;_K1oqQ{yq$GC6%)U zZU3B>;dh0p{DE?0kaj|iKj8?vvgC|-pv7<_WZBV7+B?`x+~3_las0^52<3d}UOOFD z7O7yf($skvy4y{NCq)B!Z=x|~NnJN+V(IV6LPL~?ORfvDDj*}q67_9}bTd~ci zlKmqOV)pG2tgWwY4Xr65@I8rddMwBV71bVAeGxT?v8-f6l9tsu9MFYr4r+BQr%mT; zO=G1)NW}SP4_kI0273Ew)qtwOwo=X-`1?bJ^>I^-9FXhSX17W>;{G^F+<9U(<%-*JPc!x>jH zSpfzK?Tx3%`#8Qlql2)Lf)TAiKHBQ5IOieg6~2NY7g@9IFI!7$DETtUG^srTsi2YS zc$`cq59-bK0{Yv})|#O4%XrxCkS29A6q~iTWNRlF;SlDMr$~v5hgerQQg_UB>M>2% zI6J+NtM*`(N7ghI_emz^lYyF_O8LW&&6oX-gU1h39L7r@8tpHA@>FGx*W=fR6E@q@ zg{!zJeVuJaQCuA=1@IE7|3##J$1oumJ5vky^UJEjKU#$)KuHS7B;vs(wJ%$?>4zlr z<=b*ca@HsJ!Osy3xBOqrn__D7pqhw2^7;n0$R~Z;twx??hrssk#C1cMtRHfFzhTG1 zE{;!Tmiq;ZD9#2W4(M?+!*~v>l$%5;__SINKTNAEIBf46X8185dhp4TD9_K#gp?em zl9d>E%I2x(q#pB8rt!89i!Mi7sMMmaZ?N?eM2!JHoQ{QdAoSm@`@TtaEkw{)WuZe^ zzrVO3sL=ewi4YYv1t!gfQ_Xo()Is9PQtqh!#?v&Mscaiz6wb$F>GjZE1xw7d5)*24 zu~!(MAawsNH*G-kU-c=3l(?|JJl0^q#LV(WKmSHC=#5YKstmI(V=6c4>73kKDwk3F zD!sjK#(*WYb8j>uP??1gq4SEU63;>Pk_#yOYu7(GAy4!ABPQY-WoeY1I=l2&k9RM( z;&F-Ki}KoHAb;HXNP-^_3u`-L$+~dmP7LmypyE23q+IsyIAyGbu{1T^)Y7+m(;oN@;N26N#9X<& zwqI@>wi=7v)<%`#h|WWx1pPuT%3Hx zTmHj4u@(m6TMc`y;_9#P8As?uJeu-!|Lgzd>}uWMUo5{kA<)1ndxs@UZR32fT6pJHGaO!4QH(eAa5+t zS1N59EQ1r6i z<(E$QmAL~w+VkGpLI9*Hnm0tLT@_hjW9JWQXev%DVG3YZJ@}x78{*jc{asC?1L_)h zF^DC#%H`1`O_VrpaQ}@~&1zbs5~&ja^i#ZVXwP!}j8mnEV@;<{Ahw)4%S3LKNFJ3i zaiK4p7j50(Gg`7o7JU5p$cw9Ok3@$*lZ@g;nFZi|2gmE)4`U4Rnm2m{vKk-zbX%kA zCoK32`kIhZtyUTzRW&2mT0PG|s|zU{4QPllcC91scP>F97ZXap<9Bv#F$2P|qk;b&2$rxv~0fH76P8hs?SUZLs6n%pW)x z{94NZ^zuBrMOvmx1jBKr7I^C(e7yj;&kgD*7xRHBhV0n=;gNznW(J%ArEdQ3v2RnW zr(kstOqa&TJ`*F&kJM}we0``YRAQ>!`T?;}wzZgRk(fa^)#2*9%Z+psyrobKU%nac znGGN&)Npn`s=}e$R4yL6IsRDDSF=Ps)Z;1?NH}K#C*jVV4dx0@(DMhJqOL*I6)&L4 z9cLFcW!bbaiw~-ib4#2tjht6tOE}{zD6zU{xlC2$ zI>jGRD=rdrA25&Qq4jqQAhS4A^TEeuR}+ZLmIn&KRN3!3YkB-ej*-b9-c-AE)S%N> zf?x6evrm$2MOQ(b0-<^gvSC_6oBe@p+i`Ajxy1G91_dbm9z>* z`v6e3>~L1a-C*c2`$0^HXjr4(?IN{jFy+;}uvyb!LNh16HAJ)d@63e8GRMmWrMZ&F zv_aLU&4#ktx$@=QM^zZSdGAFn^&JpWIEc06k(WFQd*!&PpmY;wf3>)TvXQM+vqd#z zyU8VT;5@(~T!27u_1N3Z<{-f&SNd-M>^C*BK>cKP5&U7*KXmq@FP2FiN4aT+-1iF~ zfRiPbO{*ky%`uehvD+s~XnH7V{jvXcN8((ts-<3M-#N&I$MX3xlZ!UGg+fiN+}`r5 zkj3AjM%Sj6BRHE5?Q@(GmaEXx+0)r!TPtcgyrsy<^`_Wc*hwyr-;OCdQ4#vF=h5Xj!r_#p6O*Q* z)GM*S@GP^XHnavtL<^TD>&W%F)LS4nt}T73^w2{aE8S?2vByR~WOdM+N!yff<@?z8 zI#ww-Zu3B+Dw2VJIAV7nOX9!ujfO>l`;d|vXtw#0QXN#ak`$I0n8kN5(2;87J-CD? zHmL*sL>eCfe*GTXwvDI2D~K%nI37JKu}-!Po8ExO7L8{#pw*RuB`6KEDkQxqNdG4R zbz*yTL(6Iv2z+#WI#BgSE1!LJckdfI7H#~xxtSQ;JHtJbofI^}g8L7|Kn}2;V?6dd zK9bChE}t-w#v@|YYe!RB4PsH{@hW+RWHlR3f&YL23-N7 zB={^p7mTZ^ud}HaFV%4UvxHK!)luf%KBVaoi+}5rSQwa@bCw;vYHCGARWld==<7kL z=59v02kEeG3Rm_z)Zc3=MXmaA)I9-9T+O+St{6L3)`@2_41VCAA&8E3bj5sZx5x4s zmtI{uQpw=7HHzdjnUy|za5p(fC=*%NXWhuB(Dh_u6(6Y_e%!8tO&OI$^_@sEYZMc) z<_`+vf$U0(c!m5aMnvIZvM^uI5SEj)Z(;;xrCT_CmpZM4!RQ9UsISG;<-MiaiPA(v1+;q7waq z#DaO&yeXX-esRlYcP9QBezojM(;1VYYslzFHa5kqnhTql9tB)(1PR83ymJM)zr}u2 zA!bL-PF~HWs6_&|a2T`59w8gMCgzI0ZUSUfQfl;Ojkd&KMV<)NhcnfxuOH2mUXuwQ zAM*!OvW!{`MXjm7TIXfL-k+n%0dP~x1% zi$3~@96_CUQxT;Gzf^B~3kR0u=7eg2I4Fgw5M>k5m~x;XrP_^xUNLYFvz1}cRTX7r z0lHVaPz&tCq!B@(_+nwtq0RK$#IV+@P;sE{>RX8Bn-rrhrkj}46K*PBvhLdC@?i7h zJjx#Hk>f+3F<_Y0nGofcP^IE@)+(L~Q4*1fl-B_6231_D^dqI(^dhIc= z=LA*Dx+nYb(z7F472oY=W@o*6`ujtJZ|o#z!EAVr%)^Fux|HNxTtvhvDsp6UwTFwJ zM*F1zvWTTAmTD7v5DPy;dkkH$be+d!3z!mh9?~B zP;G9Vwc=}F40A(Sds~L)9PeFHO$%36su`>ADF4lttX|1!{}kJEkmfex*_yNVfSVdD*&UI|G|lX40rxwlAPgKpuk`23wH2sCfRuKK%fnp1R#=<@<9%+; zML4y^o|%u9_V0m5cLefgy9n<{uobfvYeu+aZKo0Ktc|gWw&pasMBNnfI2UHbKn{9O z)8)imqR}+@&r{T;xui0wrvTi{YW)CT-RWebe0G8{202Acf|Llgnqf=$=%XtXfK4Qv z=zT1j1nI9*CySKsm0?}}<#3SfXM2MsnAkgZs>SG?0o-+s-LK%L80d)#K;3u!6;8=5 zX@g4Fm=G<8m!gGW=R{0399feKC9Xe6!If(%Vf-@0mQ7tBX0NzqmY|9qPu^277yohID3?W6U;XA5NfW2T%outqW~PhQ+n&nro#DcM$Z$THW`N zvNBz|DwU7qm-tFK?Q`5dA&PTB@?7}m0eDq==POEw^{A`Fa?qK z&48UqJjKg|to+>?O{Xf0(K=JOzIa?8#vDp}6Rf^uG9;_RQ>Sv54OQdMjViE9g742S zMhS8Ye+*}NihDGfGuOzbNvx`CgC7KR%vHu{O-ehz$6LT4Mk3SiWVM?^5C{rNs<(ci zqw`nSS8I-1*=qA%mSmm%)UgQ`dsW)FynP!Cpz`|ATE_}k?|*Q37_<7=60FiHwB(_h zw5+MMx={v+RgSy*%jLa^{Rki@+7`oxIZt}@^zY`)n@lMhgAPv!!2u;Sa^;2L@?^x z%A-Mrjx%teimuzTAPSO;F~lr&gy>_G4IY{^P*NEOF|%r&ntw4|Ix}Z6Za4>|Vq}%A z6pcxIPQ@tDsnqjX?bEekhr8)RQoOi)#Gg%k8s-M;;psx6&rT16qf|d(x zQm|i=dq2&*4+`a7Tfs#LSH|);MEHt+!b{0d7;B0PK<1QGH_ynoq!E*2hGkz#6O9hV z?$@wob1i#9kmr+^>ORB=Br!O}1{@=Or zo%h~IPq;QRxJrZG=B=N=LCa3_ths#xboN?(E~BHD0#-A0HRWBd% zQcIeW%y@>zZ8l81ks#C7e+hpvP3-w#+7K8!Z#+falSF*kz#{e>Br}RGNxX7AU1lVi zBM!bs|1pEQkrg!e8V!3s{|$r6OO-b5{0em=IHTj>B%>xTM{2fQAz|zH#Py4>+?xni_0O!81gn!QL~C|A^iO>kV^4a_%tZvJM}($5)k4nG z1`n!DqAq7NrQbVbxd2VW=*}I~?A_RaioH~%?eBYLjJ5@FW1Pu+UAm(%H!%U>%pk7} zejlDzFG%i?NWK}?hzUWsKEW}sW!hRv85emvYXb>bj9PjkEJUSs#y-}~vu{`L=EN&3c~hF@`6?yd zt*{wD)SEe5tJzqXKE$Yy+1IchWywJgfw_Q4!wv!!5v&6E{)Mf7)=|Ty$5R8b@U^UT zH*#GGHSYPR@bGZ$75&;Bj!Dh8Z%`1MNltRwF(-lxD(>)-*7(HhmG5nQ+i+Z`;k`|g z%h9)2??XolklwMj)H3$J>HaS9heUSwj9nb|SnvxxR~23MWzjJ&wWNu0GHR|_`D@uU zJcWrzlRcU6ndDlgFI8Lbxu<+@@QxstO@yNH$yd+_nh{q=e4eP<==cK*H3z8Y(t_9COqt4~v_Qlm%pPjo%wZFKfn|@@9(-C_ zTK~A)tQ3f~*E*=hg0)-;lGt;ScvIjOMibwZ4x zJ_UAlwx$oR%6XV>upP2|637WYo24&Q}Y_fL*yf-Q)J=sU0Ln?t+}=J zO{6MCeh7$_?fo>?^zii23s=e9C&jWN+3Wk&N8il?$Rn1TVg8b_3$+-c4t1EpM3jNP1tx-~ZtZSw|kM3YHhY<3yn%Vn1xhDJu% z4Dv4H$I&nplNH^mY?|6wy=hopGrWsK{z&zWzg~2L(?_BXd*1qJV>321H#9~{E*{+K z!e9TFLZas6aujoB{o2~V*B17dvd{&Iqsk3=Epw1yoDK19=8B`6=j}^sM*D%B$mSlQ zX#nr4DX~ji#!=Nj_)ias_^{Y(lA?qcE`a>{=4^TOc?#56oiVbq2ANi8i&=TNn?&pk zt`VtbWh*T;WGoa9?%8a=={cj52ay?-Yi9r)62hP4b&xzbC(HecT>GQPlc<;0Z%*7x zZodr#pCg`OB3`dw!hrntXAoJmo=QMs$@kx$r(LhAPd=epl?(E@ zTyv?TwckxHOeIZy3=>WJv}?OuzDp~badvrF4_ zZAYU~d}%i=v{4M&=+*K|6X*V2+1Qvjc2Ko9YD}ENS~}lpu>xTCv^#n6e-9qt zhV_&E$RMR>%`RQ@$54%E!G$j!61RAW5b~GSPP)}#v)oupgLY4;dEuZK@1+Gg;XV}I$rIL*jyWr z%#b+Fa2-|41c5tm(GN?a8dVl1zFisqiPky)WPO?`%oSsK(Hf&IDaL(r`%S z-2Wn#BoRnHfqGV*!s*;zG-l;5+rkmw$u*-sA!lNdlNI=^8=bE^h^& zEODXG-PWduHouXLwjF4F!(35IXa!Q$a@o0)hwQe^4f(f-JAX*4-Cow;VDb*TZdS@H zqUd9T*+%su%e6L7M5t%M=UJ7V9HyWKQT0MWs3COo66`!uFnY3gmQjYiy2x8XhO@)> z$~WPw(}UW1aF~-s=CIaPH+8kG4exyi}ai$+h{shB*3W0rRF7=mD$#s zvR#Q@SDXD3D^=`Ph`BRQ^{vl_$cFGe&)d~zCy%|q@PdImLSty)@pAQ1>&enPc=}Hc zxK|095i`i|VQrKL0815&JK&dK9DdZJTv=}cxe}!(rRTVQA zz>Br`kSb^ePLUvOWki3xxKlM4deNqbyEV}je3vb|B;s5&FGql9?_#CDoYdH0y-F&x zmmEfNh6h@>F{QJ{ho4NR2lD=9hGNH2oIC_rb$IML zpQS^1(_7Yop5+Vhy%+YHF|E`%=bc9rjv2?=;WM~G<|FyL6?u#%TieI6z;E_?35N=+ z0Ixo25mhW*iKUS!M5jj`B4Aoh4{hmH(BZwuOSArZaffRMr0bkL=(zyx)q{3nGIFCt zP?|CQYOzYk5rJl?01bIJjV$ahRJVSWd3!3Z>FXU+^up2{FBnzM>P|-;XGsVkL5`RF z^7=C zeC2+{=kIBc)0DD5`G_YoUabnci0OMA>;XphacRZ#+lS*D8?ARGW7fDCOLMwkx#)by zx#YDL*_I7FjrWyjTBGud;0GL)qpsT(*rB1J-_=`Uw&ydA;1-mYlcj^y@4#eC#Oae{ zJMzbmnKyLiYBU&+6!x)+AHU8|r(4I|5gXO|yvLXkB8XQ!H zX2baRkI_{jpLFvC2dRbFcD)-@6RwWk6)$7O2aHGPQ4w5Ljz{X^ANl66!{l)US^OWr z7AZob!By7dm7H-cRkSe7adHaySI*vu#vJk0AzD%0Oj~;1NL0@B4>hMui3vafOxJH( z4|j*!N321k^8ELv`Q|voWIy=68f3oF19ight;SN>tLXSx=j7MN<#sD^G zXN=O6OXa?}ym}R~{&5qmA3br7O-gH%p>*6pf0>seX8#r;TT_si#b~RwReA-by-m5@KaM)U^CF;34yDGKb(cEIZa6%3o05E4cb7* z+;9{Ba~%6OZ?QP*qY4Lw{;`lW{Fw2)eDG(3ZA~DV=!e=H;w!?-D#OdFS1(gG zyzFg7o63quNB{kdv#R(Yms~Bi4g9(oQwOYZYF`fcDwZ;-e&+u6T3W7QyfyOLH~hV{ zcv{U@RWmFQUhZo-NV~bPb^B)Ma;IYLenRx_^`LpLomh?w_P?t)9#vU4oFt$%US2J7 zG3u77_b6!)XWOBm!OJr?p02gOc^iVO`vx^92i{QobuWO~{!bcylk#?ZolipoAuKZr5iYfc{YDSBTuZQWm0!K#TmjNYXzrs)cQG&h zs{O^UW3-$Pb6!s4t@cgj;iXW3B7S7t=z3bJhFpwR45Ez8fI41>sx74>ekw!_IkXfy zaL5ml)#=(w-DYW8AfCLQ1e{;|xE}b|M;gTf5I`}KA*Be@mJHPc`IVnmN zKzM}j2YhkQ(rua?wS`rnM9N_)A*)+I#aruc65|6j1X`K72zoM*5Z~k)`YpJg5u#T# z1UnK~t?@aOUqv`d{*9m0_V4EBFisI{SFXLr&WLI~tQ zdF3Fs&^^1nyLsQF`roY8z^SLRWCE{Et)_#r$;h|s@RR6~(s*+?KO^%8-RISZ$H2>s zU{yd|BIT`kpIB5PjcsOqU)MkLBt+l-ru8wdyMpf~uKXlS!ZkG8fCc|ZBT$+q#M{LXUTT@!$(pFyi+Z!=WrIl!ht(fbk6;GJYVD*)Qw*}LClLT+2yS_;POgF zq9xDxnSU7MfAAHf5i3~pi3m+?P6Eyb=Wi3&phKKk`PYcAC-FI3!sn7~p9jc`Cj$Q8 zuHDipWtBYU8|yeb(Ipdt&#=;h?}Loqf`0}UBZ!p$r;RqQfsXP)&wO+4Vflp$K6?&Q z;twAQ9bh;;J&DQ?%~cJxeA4^Usg3;(?o`E|Mm8(tG|Ayr6JOM1hW!Z zqxD=krm74NT!{cb)MHL-r<17RXDy8XM(g;r)EeD?j?WYa&0OkUiQjcxzi13nL8K!H zeDiiC=kH~xEt7u3fCSK42D#NOh42IayWdgWtoKjlQnwdQM6un!^>Q};JNS3NxvanR zz__R3*d{xY)ysy%#g0*R>YHm?_pI#R?Qj044R??sFMD2~Kf4zvu{NBA_$usENKfTS z4Gaw@rs*oK9f_aLy@FV(2ZI);S8rim-Z8N3*Dz@+q80$8+CUpR`}czcAl9#Nm*w` z3|4wuio*VcAN5^%L%@{ESF$qq8bp%5q0YxJqK_}=U17JDLBB@&VnLzg8n{M7<51&(7bIU0jO&t zore{7s{$>&?z~!j{}cowSNOHUwt9R85(Umm&g{Vt?c}9`e7nV{JA^-{`()zWc}mP< z`6vz@TnCDyM`=+5RT8M76SsxK1reI)_I0bypU)^%KHehFfB%DUBrq5-5*yhuSmA{K zg;^?iEVP{?k%jiZ^P{_rUv90*a`V}0T|DlP7nH#NEk?)g@D!tQ88(Hzh=ZT!Ipr*U z`$%5ehv&a@uTgn1q`VV-gj@&HX?$b+@rmi(FbA5?fQfs@S1S0_0zft0jJDHE{%Koh zJ}Yt3x&j;YrLThxA1C?y%Im9L>9sWfg@~pxH)IpP6d7j^Rp84-`?w#;l8_>mLOU$b zsHSafe6DIKD~U7^dD|Fa5hAcEABzc6^Ktz%I<)h8d7rUL$;n|Or^b9< zreSTSTbv4S4e zb+4F~=Rivm>wW8;?bgzr-caIP$LEvo{?<~D?wb*f zZzmBM!r>(u$Kar};P##{zdSDu1fuBpt zTQBv*X8N3?HakuultkMtd4Q8C_V4LnBc ze2rw!s6?G6Uf98Phn-$ud5-UQXr(!yslCjt!C&F2N z42*250>QOtI?~TE?4s8%=3ts;Mezd=8L2BMI?lDT` zd+-%YaKTWgiUykY6;X$SH8WzJweL&qkIL~-{r2?12=un^tCjyE$j^eWlG=R)b31$4 zkO%>Vx<_(5UEW5hTP8D@Bgr(i{ZlwprU{UL2MxN=FqS}t>rLg&(9wFi5&|a?mrz&# zoRbHGs<#$=Op@a|-xV_Vm;kCqZ$2nWvjFWH`@0g7A6!LRVAWKP@LcmdKUJmGD^juJxC{MLX2GZvG;>X!!?68TZ^|$=XepiPnI_ zw7cM~+XO<*d*G+10HH=PNat07nZYlXwM@rPmO7qLXF!Qson(VS$82|Sra<}4PZMZ7c8b7fmPo~Zh5UZ z8?C7AAgO@JmB^Lw$JuK7FPee+iUh%!WLW-D7|TxUKs2)mc23L(zxnOpF{>7~e|-~t zbXysjma)vW3S8&i124Twu-3@uWC36HbFS0tID++G@BkdO@4}9WIp8^;aod!0VE$I4 z5;fO>p#q#OGeyM@^ah^>oA=vc>$sD!WAYKOo00&|IytaQ`xdy*D`N*(3eq_ZuzOw$ zIBQjakA4H}(SHCUoigxU#Jzd`lQpGIf8|7aJx@rPiiDYsd|b{%#vtYR4|TP4qD1Ui#tqq>Y+bmSmg z+z30qxeji#D!^@KHArVQG7@eAhbcu6u%r+A~fUC79DP7T;iz6qqP>aA;GauX-0lUmB1ZVAH z_OsO>oKgUmQ;vh}^my3zVKK~m?Sv9DSJi{!$pfW;*{indelQza2iBidfaQ!sAexo| zPK*$(r)0pcX@wB7vWcC5TJYAZW`DlNGS@ng&Z~hyBLySeI*x!{=iCE7!y4GTv>AMt zmVuXk1^f9L2wK_(A#2#*o0AMKbJJ1-)?5j{o7qg$W{F&hT>Bxi_OzG<&uGuwKfjIf z$8B($p21eRx!}LF0QN3t8K+Sl1g>acoYKfv&v!w}2zD;Lm^6TFX*IadD*~B*3&<8Iz)iOh_N{4x&{fS4xV()0>{SrXIL-de)42zC zT=V_D`JV&mh9hz%a_#%5IRC#BbG?4r5j;ncCegYJHs2kk*xSgs93s}2gYC39u$_8}eepBkHv2-_F}GWG%{AYX9!um( z774GGer*__v8MIZZRi0t{)o=TgM;mtgF{f1@A>Sz*Fx&rV%=tyvBa#2@k$NsUcfkLVHNCNR0SThtHEXFUGQ5}559VhEa7VgnO+;XOl8R) z%Wx(0a#?bB4$McCF=BOQNu+&*GB>nFO;-tl$tt@+bD%d&8R!Sg)$+h*Oc|`77zD05 z=fG#tCGgZOV8n^t5G*xc(g?vTo4GIKKD&%d**)j7>{Y)Q0*q_GcafZ(glY&jsRQqM z)!@Cj7`$|=A!5S=kQ&?p|CQIkb#@k5Pf7rLmK{rG+yvJdSHROK^H{-|CMw+`awT%@ zBWQ2>Wx)0DUyZXwKRL#4{2rn<7lEzz2@uW50;g%|u<6SquzBoJ5PTL4Zu7EX_mb-@ zfvaYuSP3C3Tfl2!IUHQq%CcF;D@!W5l`_f#vPDg>Tfd4+@?2)!WB*nO$4%~YO1av6 z|HX`-3`$wndx0f!=eQ=RDFbDU<8}*PQf5q6@yebw(48^63up|Kz{1zkz~Y^H*g5$u ztp3awJmzJAXjTqe?pLw{ui~l#b}z)Ge=+P?S`TjX3&C;5ZT98Z7uKs|%l{TQAW*QA zQ3{?5%D|nyrS`97ZxzETkSr(!kA;`ObzTN+85<27zl>zr@nNvlJPndr*BOalJbldW zu6yaFmM`e$BoKNp?wt8yTI}ZU_T=vV6@1xJ-`n6Sm`~adn_P~fyN+s9%uO*1JRQwsS zy2CV;K){ZzwL=TRdSV_|>*_e|G@89Q9&<}rdS3$v);7U@(+ZF+$p?GQR9N%L0dSh0 z4i*|mVaMbcu$dAM`_~jgqII+MPTY@kTN}S4J(fV|O~%z{ny00>v^pL$ZwolGwgY^% z8$dj*7|f>zGtxW@J2ayi+2+IMua3g{&%;@gbp!&J-GZ>yb&OL=S!PosuYp}vM#mDC8kv z={xzL#a84DIWH+YwACWibOs&j&=}|mlLzjGDJs6O;`J-A>x(9^(`HL|ta0Y3WG?Dr4Y$zkNVR1QH)TfuKp4eVoC>%nyj zmd!RpuyGR{SXU3nEf_IRJqs2SPO_651J;w0!C`tTh-RmOn?Wkei0?p>umO%+)p+L} zRT#9^|D-}UE`h*b)D(8Sm*HPyeqc>Wc+`d_aQ?g*Hmg^{mJjd3?!|Xt-w>+`8rkakE=YB&z+1l(r1Pu5XUQGz-?bWl8CI%Y<5uLF1N{Uq z^+f2X9JJI?J;Y_Ls7=fnbQG-LYhugy3t&GbnH^+2OSN-BGQWhqL9isEhGn1C?29rY zHDsi^t_^}$H$a4W3xus}VSjFffK_tvSyT?eYpPkwUkSbjmF%Qd!#?(Nht`*a``k>h zo0I`A)3aF?n+|3Z!eFP?aR^va0It(2!SS~famu?$wP99*>Tv!5>mAH8~(xn2clZT5LzmBLKbNSHi8lK4_j##EKS?8yVYQS@cx z8UtI@8(BJk58QM!VB7c@Muu6O*MO&P8OuPM*&BjouZD8i%ib`7#?`Qwy-oHQGcsMt zvRn3630P6XveibAu~hwlNjvx%RKf10g>Z093&d_G9T$tvD*Eta`X zRSAG)ujj(Hj|xFF?+kd(y9{o#&w+Se9(XLg12QAbLTe#JAO|n@wg@s|>HNkPh}iHQ z_%APmgY3kFnKi=E9c>V{z6rb+-G{I>55U{75JJ|<*$FIV+3g*$7=Ik>7`g5oe+F#7 zP2)5YYwZ}=FDQi_U)%+UcOHOX=zS2pQ4YIjH^I?O3fQ+)9(ygaV=3L-1VYc?{^iCm z4sE+B+h=k+9B1z>`!F1|RS$si>-lUMUceHwIWJ|MP(pmNnGffMmQ*Fhmh6v5VEQX{Fbt; zl##Fh@(M<}b=>MXbWH;U88t$vaT`cMaayu1HPo zl;i_Y(DA`h$D1ypD{me?wBar+dp{B;4R8k?)o{=q6wi{NYA{i|3zowhz;0v{h{v{q zNcSQLXU4tDCu%@Zl}3 zj3XLguW==W7`HI;t>@}peU=t;yc1^H0=v|NatLE2(x0wA(h~} z^ghQIK`ZMZa2fk`c|H4mEd;V|-RlcWEtq zTQozcNi9Tfd;k#}+Zftm?{Yb(vmW3269lfR1liJ32wqbLksBT`(yd`{mPR47L&PmDOIx~kY4K6{@vN{ld!#?}nA7SgTa`sj%0+ZM8 zv5R;X=BUPij>Ic;2MIby!)824qAEbuy95) zXulzaZ(g;5X#)dU*6POX(M(qjWzT0NtWqmvxB*+$tHI{I1_(541vlL+u+%&TYrYJE z9TVfhW7ZXLoR$vTzfS!B*?SM5s+P4~ch_HMF9RwFm=o$+>e6KnC?YvXFs-%se{Q|^8|^-)>fZYAxqsSwuQ0o+Yfi=-a{^;_ zzx}*lf87HKx_3})+mEaxy~wugWzd#r^on$%pY&u5`8Gqypkuj5N0DaSPa;Y#S^Fi+ z3W(HviA*zY)h9un-fI%^cPKeNgb=yTo&?n%xj+5di@w0EAg7f*2vfNMpS>60E7^iX zy+@2*Q}l;%+GZT5k4+-O^gSZ!c!AXz@~jB$P5an|NHuwl)7BqQ;xNrHpL;F!P%m-EKEeG>UE;$`*4-3ZLLnd!@JcCukz}DunxbU;%kiV zJrSwhQWdXz1N(o7VFJ42I}Z|69|kj9zjMMadd@9AlAVdHW7I5Bq5#jQ;5vzFvr_8vpA`z&0FY+u$3CaeLZSfvC zM+n^P`;nmEjU;aI(UCzC(>|PW7-7yh!;G8c8ep;3Q)Z(`IsA4qT(8UgPrua?q|{&@ zEPJzui@nAkxJm!;019nB(8w`BLfOZH&m5t0G1e^l=Sxpa;jH5*&e}|o;0_V3zDJek zr*9XIaKF@PjD+_Uk~JU0N8$=R_B7-8)+z)@cfeb=0rC59BSEVVfg2{^vT%&Z^&u?h z_rQq%J~ZcCgx1_3QKS1hD116WILSaY)RFX8mpVcL8iCy&Xia+-`atxth&? zLFD=dCxl1fw7eUM>YS~A1#bc+FR6NjD7C?PcO6`I)xr9w5+v)~NB+?lNIpp7YSNEF z>v0qxpC)Y>L8{?<6rC7D43RIFZIo@^hg>4md`nJDhnX8rHtgYC^JI+v)1VqB2>j`{ zUV^sW7YJ5t4T{majRGznLiV2{(cEK$EEJG__#LuLhfwS|fl?CM94q?S;w{dc7-6sH zSq{?$A0#2}qvLN-e1Z!T+(v{-7yPBJ!%wOe-qM%p%V{JPMZ|U%_c%FB}&1 z!&2}S)ovOkTUl~2w+}6sHYPqZl15c8HghRS0=wfoPaIxf27kF5aFQtPED3q+@nP@_ zZz(OW^6I})uUGY``0cAb=PFy;>Lq^;G6Eq)roOCC{q$!$Y@gwdT{C=1SVO39xwE?K zJ3mITTtC$3?}P#WHI{;9E8Gje??;F#2a#ra2Y!1m!$GtHZW8BN*e^)tCQfXtK@sUf z?vXdhGJlJ_W1NQcp}=+sXNgYpkB%YFx}P*=l3)_jb_wjZZ$N84(g zeir%D@2#{(KqSv{pdjf`H;p<2$h90~IA7^Lg?y_K78c;dw8V7`7kqv}h5HzaY)4S- zJwc<-2x`5)&?xl*70#nLZP88k|1KQ2*O9n(z-`ZE1S+&3P^lRyMo*EhF$K?6LvUKq zha-Y7a9H3W^yjs+g$~lQQdoFEj6{~Zn*z58f*Vc6W^f~}2lg$>#esDxY&~)QVFMU9k!Jcgg~lo1wBajQWi$392o&(IXdQEtOh%osZ$TfdLBHDu@>j@S|AHz%Z3cU8Tv8Avl74E}BvL2_bA0tU?5Z-GCVK4lS z<-D5AzXP3l%~0hlCrXW`8p|qYSGf4kZW?j9y&JioxkkXnizMdx!E*CyBp-N)Gp?^A zZeD!D+uD#<|FCte|I@6qUQdD(_TMK_y#oF9ao9P-8(U{Mv)!Y(y7kXa*!mqOpeOPD z|2XjN_)I?*ca@qE#~dSDDnGjfM*I(PRIrBtXb2}3_9I?-nDpQ|eB~~|RxA%T+ltww zwVP-o{KRg+Pr4aJR^2GJ??WNcYNmM)k?R1m&H9mVJ&e4gBLrikD03yva2`YcF><&D z1Cv$WlTLs7qm|ra{pQ8TCwel>-Xg)^InqqHT(nW-+r1-vA0)A*3*|C_QujfWoR~l% z;eIiVN;MwSM6W~0F@6oZ&6V&LZ%3$n7d#|rgcGko-2NMgP<;*mpN8PIWD2%I-;$IK z`ENsgPA$u?6PpqCO+aUId3P~PV7XD2YXssmBA5Vk!FW*;+e2&f5vbZgcI0hVvHSDz z{s+IT;&nD&{iD>0v5)`KakftHnAnaI=uJ7&6J*Gz(snIYIY(~DJZ z5^L*s&P20b*h1%Uiv{*@uXE{FGXhztfCHPovvZ(5w~=7yCai^@!DZnPyw?vPQLmrv zC%|nd%B{e3qkiosO3$TlAyBp*sRwVP*zpxIEnlL{X#zE#pOJ4lOcXneT#F$R*Vm}< zqUScqv-e` z%ALkh>NJ2_mm#Fm4pGVv;3{4RFWEY>1aA>0{T^=1`*2v`4hic`m~LP;)3<2AAMZoPkykwxZa>TM)b#(Oq?z=XSGs)cDY6?wDOrDRLaV}M6a{uYD03ab zS*Ly?*g;ggllZ!gBGcd%0wiw1aVJ>^>1*(oYC?c)8&XZlQYiMqf898o7xt3{c>puA zA$oJ$**(9wbUB@qa8E2+*V)qoFmqqM66ueBR8kPIYW)P=W&4l8cYdx zP6+qIZOIT~l*W*5!rddQ8IGbAu-$nUo}$fg+1?E2?M;Z&xQDaWZ;@m14#f_`k~>HM<>tuO$W6mK!B&9|Blk=|5v9<=Z`&Q_LHdg;)2rysBoSjitRy-$0W`= zzQ;xXG31%NMyUK91WP=mFQW|}VvUGUe1I&=yGYW1i@?nja9lXRtcMX1tl|9YP@H`l zDtx6xsu}Dq3R1IU*`vaoEV3+F)Hpm@I6#gsm1-slZ5*5YQsB#F;R10Qouy`S?@5ID zrXr*oJ;p_sPZ4#2<35A0KMM0YDX;z(Yg68P18=3~Mw{)mIIuPg67zhqWrjT@=7g|# z>aLkS*iCgid+r5^*^zAWN_=J*#AXN5InL~L>A&5fWGBlZk0kdO%*d4s#c^3WYI7=K zA=pd8Is~VMJqTVuf<*2nfd{(~CVvY-vbR{ydVtJzSZ+LvK5*wvIt@fM zrS)12zn|peby!~gP23IO-lx??)*q4s74Ka3lx~6f>iTc_sk3~ja*zIyntKx4W;hYS zx>I{6H%EZ+(|0x`s6?@R0W2)QCbmdyxv&5ibL9k<>sR9B_&CAkZkr;{m(9eL+v%TM z@@gym9zGlTk;>f$>hKe|iPs}V;|)&iu7KOFD>$*`0wU#}A>ZN!F8B_k+IIkD!X z#@jN?pYuWh|J8CoA0kyA!)@ixBe)##5p8k5px*Bbs@#Xr;5+&^aeV-n-3{;*Yi3_e zIJa}o(RWBv8-nO2%L-zkIN?dw->U@4S=c(d< zbE)(CY+mI)-cxAbgEF^%BH1xC_>Un`^AY?cI^npj9$pen@Yr(&?oxHgws?%x{iE>v zVU$M5XE2$6m&IOn=3Rp3ybJ7$-a9Ls=rsT;^9sr4L@+DEG6-h)KxTFlqg!r87nl30 z$d~&qR4_Y*H5i#WTnbk*l=!o$;dwE-zjznR9Pr%J20t48(v0pRVgGBy z?3#k@qDMF;^csf*?!rKzlj?P-&M9Fc%84SEHo~nO;cN>RfBlvN8_DuqcQT=k$6lgS zZgPtwRT(~_T)r6Wq>)^7*0-ELMzgcSuwS?l#}+)Hzvm@RYP2I%qn6SpOp09e`%qBrIz;yW8DdnPBShv7+;%syow6boA0k=r2?~z&Ax35b zp=-Y2m|!eT)pMu zrPS9JqwhcR;<3E?53LWc_iXf0ZK^M_8cqw5y9w=udC(JRf%?2MYQu3jxS$15+SlMM zc^g{%wbbULAwJKKg#~ua@?=80W2P&1&T@z3oKULYh<59YZ^yTP=fWm>C8=+4E3&x0 z!Q36WzyIX`xk+Sh+fP0ICRhkQh2z3r_-=WJ48s9rnLLA=< z*Xeon?_J-%8WavQt2w2#+-t~gdjlNB>qsb%LvBtIOqSe)@?2{BWZ@k)JV2hs3wV*Z z%FRuNq<|k}_(R!b6_-*aKQ9HlXZuj~BC&PHZa#PHne9u|>I><45%k=Tfrb>{$-hBI z9Lv7pM3n;;4o=kOl|xsc9)|_)v$RNuMQ;!+(T7~iK6aOAZWpXj`CIUn?3nZxZFSR-cP2$@68=YsvI;D0{w>EiMRz{M;1C z^QU0zOnVa9lThSO!y(~j78)=Tyic~ukKUKWNLg!nDgu=*AzZ7mChJ&NTIac!3Oo_u z)xSs03vKn#Tov|SdATR-cAbIdl2m9c%76sF7c_*5p(AvWxh-{pBE%?UAp)8Qa(z6t( zFK}5lGP4ueq%W6KzL)xo`n*c$^IwB5|0UQ6_rQPkDAF`PpxkK)soLG}mZIa^N`mAB zoOp57Ut0;<)*}!l_d3W=>MDHpbi!5a0>ZT~Am<&-YN3?2! zc_hH!LI-klH{Fzp3Xg7_wS9}jYb%&w%JE0B39JK)>ZqMZ!brFi z@tUuYsPPth!sj4HA}S*gitT)MM5r!M6;6k&z)2{~r}jNJjE=ct*KBueo@vEGV%%hw zvcM_q;q#`?i(zvR9F(wyIOO!W%7q5B1kS-s_#Tc4y`cIEUh9UCa$pFjtRBEes;MpC zaEKRI{nam}m3uDYw)=8{pF}&Nw6CJfVG2<)18`qDf+Ki_%EeK8r*& zi>Ni7&2Dn3S5kbD*e6)Ph*f%SB#Wc&nc+{PaR|{Yjrt4oNnAr%I6#3vmCcMw&k2Vp zpFdRQXG29W8`|^F!FJJeSS+~@t@$-jqETI${}hpNGE{^zpeRUUyCfd=d&-b*dKcdE zHO(a_Z#a+iP4PsQSN~J>_SI+Goz?R%>a2==Z?mHm5o)(letZD+zT-&L?1RdJ6zt@4 zf&#TYZNVC-2^2zZUK}iz-XVAQ0`WSJVX(NK03Zf(LLnrm^|w|$_O$Ax?tj!%Y(Ic(-7oN1(+|f5BQ$EhgrQI?bOr07 zKED_W0?G9FZGTs8a!Yn@JPQ$Uiv?unMl-SHVpOX9IYg_WbSxH1H1caMEQF@eSrXP* zSgg7Ub-{cVCQzE6O3w>mBzOxJ3m+5J=F`ZYgS~T;sbL1N_bQSos|cq;RKN)`!hWz9 ztw6NyRm7XL3LyHa7E{OLx%q(k*zPb&vJys+#nL*a3bLdBHC~Lg0*qJQ0Cyci7qj2?qYTdl;;&< zztCkI7V3iif;Vtl@_sU8S3fVV`kP(jX@oid}rpkl^=$ z;krz?%9bNu_hv=vk_D(i($6Bi@7MZ`FV&`>O+>%bGZKWnzczOfk14TX^Wk6 z9NC`6asts%m>&z#dG6F+!yrD_2jYBwP!ddr)Vx5JJs>{k+oRs%3O4V+Wz=wcbnKkz z0mV5vP@Q)chlFpynuOI<@NQy|2ye;i@1~TPLnL6^+XD9`lVsOlkv+MEgY!F}KChgJ zw1_Nw9*JirON!=bRDFICTO1%sqqExl( zL1#qaB zpwd_Qy-l|o@r7!-x0u}?T3=BwJ-X7Gl~ zE+Nl!5M_2F(57>?@!1lM20?1RHzfJJAuZ@f?K23{0>KcQ=SkG+OFsu=>nt0hRewgV zoUn3X16lqU)*sXab69RTN3GmEg#v$8kB-0vUR?E$Qgj3^n;S2^+H+t*6AmqHf#}R& z$nvF-rHRD81vyZfpH8E1I;8nxAU->otW*inY(5EO0yU~2Xf7;(I-SSmx603tV|jku z`y}TDu+d#fD3MJLSS@}5GvSBO5I#ennMR~rMvc1wYQmW$tiI4(mJZd0Tzo4W@(aRP z)m)kdr9~&9x;Pe!ivw{&{4CsLOIyPYE*9Ua$mQeoRbv&2@yNfDd-ec4Q#~ z(YfxdjVlVpvQUBS+!!|D^=*#gB%4=I7tEQIm>m%$ClJI70sIk*fpBZk!9|yQSRj6O zDE0{!u~ZTz!8Ee+1vK&okSG#i&Iy2uP&zx#k*BIqCX3U`%!{P+a-g%Y90n`OS-J{m zmn7!;lkGYOvn4lRvGg9ah+GdYJI_*Jl!Y>&ESyXYof_c6R3g?;77mahN-$V`8ZyE@ zP+1ZM)umC;SWHyBA{oY;GGVki2FJznZ+fT~T^#5c<89FW2dRb8S5BC0Pq}wwQz5K( z6(RM&3)Fi~pe1Aq^+7|p6gGu(Uejz7=}M=sM6uIIQ0_*Z=M?IEh7qv0mBsWW1l?Kt zG+EKc#E^r5AhEYd)p?0P@t4%5v!NgqNzN&l2KxvoFNlZE@>48pU>6^^aKMd`ujm|4 z0)TXu_sT6IP^EsMFh3sqmy|(8Fat^g1Pp@N`EmjYJW>6lmu)k>L=@&F6sS?-(pqo^ za&r>N;uo=5PZ|C&i1P)q6)IdKQ(KS)**P)va}o;?=q;>d@l)+ZMNE9PmgKMr0JVi_ zEM@D+lKZe;{usK#)ht%ag%0!=*FtaU8K^Euh78#)xdnl27WdHFLZ}g~sxKyzT|ktv zG!Y65=x-46!GX0T=8Hn0yxg1JmDWl8Y-d5xRj&^NUuN+H=y$qgwWDvVyYjh4gCCN+ zjn`$tWm^*>Rqmn6VF;IfKjKRC2Q)>Dp&{TS>ioZ=<$+j37ZJ7+A!?Kp3P20wFFyVl5a0-Q@*rgBO+gS=cheu5H&$KVArcSN`83 z>m;&QApZWog`7afu!R8{3ksmWw2}q(rRS13F3g4e{8*w{YIt-GH<`szuh!yxYIq!x zCPIZoQ(|r)S+N`(THFH1HE*H2s1jNvw%ob%;j63u^vasu`!sft!D$d z%92PDSYH~@1DJp+2~%5NK$N?b+USyW?4IKcjYTA~i&LPoFqYmE!QeuAZusPGJ|An(yUL=us0oMYf+B4_PU0;%V1x53)o)ECowrNd`+>QC*l0MS&C|f=U>z zswF|qhV1-sXp`6)uc?9QifcHr>Mf3~d<0E8CdVJcLJ6FWGFV+mjg!bgAOLd0L<}NX zFyB}Pjpg(jk%r;gd?JVt9NkzAll4W=6-mXxwYgATMg+Yq5(j@shyMCdm~Tye5U6#& zrn%yQ8c&>l+qF4s+$37_RZW=kLnNpUB2lRqQL@hwEB6L@h65qrc#y z-zd&|d_twm2b{5*Mve0ql-m!Z;LrftB0l1j(QBBktA(_%7bN&SVY{IV#!FkEyQByw z)^_8R;d`X(z9Ru{hW7F_Cahxf+;QmpGdQrS0DA?)Aw}e>ydVxTf&l~#evn@n3Q7I| zBGz0ky=zipo?noTNIowFz$^d$VzusS5VzD%V{s-_g;QC|2^TsrTvC7iONm_5ptrmTh9YHbWy}5*r=h+e8*V?mhw~4;Fj#t?&W(YxU#2G!xsSYp%n1aXak3e+VOy^DtOeNewv*`)}@g+hrxJL5=?$dhT+Ee=SglC!iRb$c_RBOuYHd`t*CSwi7K$@&dNFR z90`i=5ib6SNVNx%k}r`c-_JxgOLqXp#|BaBI)LWzF*Jnrk+^FJ`I=GKzDHwIPuk5l1Fyy42fzcWckC%_MgSkbuBo$;xSy;_u}yC z258ec2bPz^YQt5?3x~7DtG_ZIN{hp&hT`a^D#$PPV|1#%A_6MQsBwRv4ZE#%B(gbB zrJt3T2E%mYX&l>93H8;1&{!FbeJdhi@?$QHf6T<8^~um#8w&fqIn8Y)uX(qc`8B3i z4Sbq)HD&B*(b0Dq*$3a?ockDZ4BsI^;T__n-y>S`4I)WYW2Ac!A@vNo2ZvDOGJw{Q zk7y)XZ9VxB&5_e+4E%~3x6i0N{uyOfUs31#85LF^Q13B~O1lX-h}L6|fCEdT;s$)X zjklq*q=?#JB?^wx?78kn$u+ab096`1t}qKBG+_sVX2cU z!g0JMtGx2}De^+m=0vVNN`i?nSXB!Bg9W~@+)~EuKNljq~=w5AAJD-#mUd2v-<`A1|Gs4q?m(pZ{?L#xVhaAg@(7bd`RT@#D9 zaJ^g zn+tGkTQO{QmB4s?9(Ak`=zkvz&D8<#GQ69D``?TU@&xXmQ*Tv$P)RlHKNF_>urW&W z2?C^^!hJ(O&X|8jOV}r5X!Q}LK1YJ=0Fo8@5hM4SYBy5U-l5iMoQQP-*Au>=BkmKf zM1IEQ@Xx6A{DiZ1lPIy7Mxpr>YFtN=r8SH?pHVu08cusIlid%3>e5J9ZM*{KZI5VR zFM#9r>nODyp*l{KS`2wQhYJU2uSg~^h=Kf~U=r3099W&(X1F1P7gyz#e{7Lk93f(` zvbf;z_vO%8LDaam0@{mDLt|+Q4A-7vL4QLU^);4c!+Fy)cbEvfK}{iydIFF1|Z6u-<3j?FU{w z_8(O5cf8%2*$3UWKF}kpf8?jrFyC|rMjK9n+x5sv^dedR zQzWdpFj$|0!y8XQ=lhf3wwXI2R>?%v?5BK$sdv!p39#N?2162N(@nW>5xopI(KhNl z!PvJl5cYd>o3B>A;N5EG?^uW4P0mesX^ODjQ`F@kb{;l6t6;vN0@mbayhUHZW7{jF zDSSb-%QQ}NHwWB1jKsbD2ormXB*g*5%l0Equ^UzPV`%W6MxFlN|-Sx;`}$6GM};UbCbC8TMM zvsGNal8+!eKMZ2?U7))rj%w1R#>%)LUa#hrUsZ7z>oPa_p{hrFX)c_1U4tG`sp^tw z99&%t`;E5{B-#t}bq&329QF{IuFr<;o-@#29|I@xY9^w=N>^Fz)pAQdG}i=?pyt4ET^6ji zR4{Qh`za4cx0K<;&N?FDWE|WON1q@1-by<2>h1PtTX|ym-#A${I`uCXv+o&Oi>2MP z-%|t+$xCn)y?|poO6fZ;fz9Si@DRHX@7*M#Y9nY4`2}Y!2av8jiZ}%>OQ0Ju(yx&y z*N1GaQMS_Ra?l5~M}K4?f%b&YXbR`{6PQBviND~i#YYsGOyHu|M-*E0quiknO+gdz zmT953Qb2=l1~gVA!gljj8t{{8;6IP-gCoc}{04SgFXPz8dX|Nvu`)K%Nv?($SLKyo zXE7AX7tvpxS75mIG#s~e;_wfpFkD+i4Z9saJKy5yh8D76#V}f13EgE}icA%Ze>j8v zt21D=qlC@)ANV02$9Ggwr)-AR_97hGkcI;r5@GTaS^OUpm{3}7D}d?dEVxQufF+5s zt>_t;Z_b0owp(gPexdg#`AHifnd@1ICGe&H1Gq?m<}UFX%I=WLZC!rlflyo-=jmFUA{|Rjo6S$fD8SU|( z(Gu|)&0)Xbf;W-t@vkU3LXSs(#s&AUIDPN~&O3fWD+zXx%1s)m^I`ZyHV%JZi4&V| zLw7|stVvL7oIau0b`b7jH|h1Pwg^SuT~>MJH&Rp=Cy4k?Z(M`3~z)2K$)UrHRN6AX)t&M}xk7;n&T?^w4r=Ynygv2!q zUecFgur3kiTe7f!eH8o^T41&{okTYd2i7N$Ko`POrU3!+?Qj++TH3~mb2n<1&eJ6MLWfDnID2O?X?8blYllXmSQmDF1`|t6uNjm~gZq!)Dj1 zI~MePSZ*#LN^!V@ zoMA+2u_X^4(nOgXGf5b0;iuS4RGI^4i5eKJkH-lyqSPHZ@Y&k{lT8`07cIewJykfV zc7su^?apEx-jqcIb()c}&CYVTN;JV$tOfQv>TrDLdANwS&}TP5XDt`MO@WjA+2)Sw zZY7>*{`+caSeL8G#<=Ilcb>-a-6brx>L$?wf7vb~$2{2Ys)ZwcudZU3ad;gKv^$y* zq1=lIsUcL^lEn|6LZ1EzQkBM#sxXWMxjw{6_aaa411>mC5upy@R_a%DBut|%mfNu9 zD=zwcMfC|1R`bs&F#JRU`vrA=M8GDasQ3PWQ-*J8u)YAJP093~o`S)O3fOMBf+IiH z;H2!k$qfBBLHRn9ybu7d{Pv6f%G{una{ZHjqVM3a?K;fY*TQaV3yy8R058c~FxhYh z2iK*+jI8~!?S&+u`Sd&!hCjwrhpnK;M7T+vN3c>m9nZ#bu_8KthU|ScTqLXEuUwC# zJ9FV7bAdW^Cj8_ZVX`@$Xtj*aD`V+e9JzAD>MM5@{&LsgE!z&;9W_K*<#3UzLzwD4 zmLF^UV+I$R=(dzh>*#qk$O{$x8+Bsr^S@LicN~q>ZmzQ1k$2BxOAZXzXTx2h6;9%f z@Q`eQuk1BAN>tJJl@I$p6*RaJ#cr!W@ZKlz6@QK}i9wXwki`%Dj7*}|Or=RA$n>$A zrZ9#a-4S+k!H%fUxSq_#TR-DU6p?GdN1XHeMB+-sYWf*@2S4Jh`4`kUf5171Pq-EL zugEfd!4{oZkhmMJ%Z0DZ6BeQ}`=KgdN2ErC*CTo5cU7FW4T+qTdtcxw`Vcl-8sRS1 z1(!XYj4+PxK8FMAl8GwoVYR)O1Tq&EM5vAuWw0d?^;Nh8N3m+SOPz!9rbH&9CnV0m zVmk?`LL;1{N@2IB2v$4u>3yf*y_e`$>=aIjmcxlUxWB>`mLuyS(+FqD^K|Syf|Rep zQ??l{;!W_A>x8p-13hnqx6Cyd(BERPE&&I=Pk5W=aXECTcanFjnZMN+w+1)(X_r@- z{gi|gyGm(ryNnQ(M|6#EP;G~oTr)ydZX;6jK927pXR$pW`s?H9JGp{rjb}u)*AS&N zh!nL^T=e{idjAhZt;2{E?M4QPY|7pdB*_mU-(Vb9LZ)#e@eA6MCU7nOE1FM!!X^K| zpvr-)ztt4-4}PNh1;s}`q4?-9%8yN=$>(R}m=2QbDIf=Q7H;D0u-ks6&286hUR;$| ze&?YAA_uKiNj)|{U4fhEb)wg59Q+{*MjLWS46ETof@dR^LjqUd0B}Az=+uX@i4AF|2pzljs)0iRjjg z&h?PKM4wv=f29_Ls9q<5y$%-=bPu^Y7LRolyNCe!E_(lCgztL@XNfxcyHa4aC$H;5 z)-#how5ZtZ?j0A&a&i)lNIBS#VC4sN%{$2z+(CqP7Y$N%aFed5L8^_# z!~+ytV7-&RAE^uQl)i#6h1Up?=|PU(6zY9GW$ zXbzepVx7jVl)sR;{){V;KeO!x&stBT(s~L-#*@f7Fo8-U)-DU<%HUFN)A$18uRa$-lTx$Tbn9(VB$SZ%Gw@ttJRcjhtLwAh&e7ikhr(E^xn z&W7>UIJipHAW-QtJY;L&qi}%;H49d|v*9CON4CBKmOIjkL@%@m;m>+}nsCrRzk-mtnW-9Erv|Bxt`!f^IMT zWFNBZ1e+bD_k1-jo$IbgqX5~PY$DBJPhD5B&zpdezA3)nyQp3)xS{W(T2}8Ue!A0Lt^y~uy6Bp| zAYpxp812`H*!L3Any(O|b{C#<%|x*`i1=?IT>S>z_SO)s()U1O9HMp&o-&u|x?Uz{ z(uEYQ5tjJRS^bKm)5uW%fJB*oB+3pTokTW$-w-bQeMEiW09*3f8a0g$I=3l=6Vkt+ z!fqOQhF_3pFom4`pV1oj7Ze(g;(E-#(rd$Q8RpM8caCgi z6A5btcfTw|s*~`^H<10mKpnM=I&dw#h+N%>YLAQO(uG5AyoM~0#xe}ta1&R=8uSU8%PLlQHO71L>r*eMr2lxP{k)m zJw)`X^B(b9eTY#VMxy2b;&flaTka}}NEb4U`U^V?#`TBaPyg;j_Vw+tb*abN)10Nw zcDT@W3{~lXi{vHt|A(qRK$O-~q#F&;HGhjlonE@0w-KaD!m4(gxr0c}E_f@}(?Hlj z-x=pD&e4EbN!PfUg%aXaxXoCm&>sH@S^GwjC`Z><<{P!9DU2iEU<{p!A8|YFXS794 z;a2+3XpR1gOM$=OywhJ$ZTAJGmYlGTB2#A!7d$6Xe0chPliw#^T$NXN<=-lPa!qnR z@(n#fO3g&8NhGkRVY54rMDRQUl^ftBUWz3BTVy%QsFqOYt-;Y-?nrjT`T0vU#VNINuu6vG}8m?wzUdxY~rBVKK#Z}$BjM3viU zJj0p${*12luehG{Gdk$J%RxV*C4i{a{xfP%d_?Ynzal|-5NFLlOkQ;R z%-af(S9s;$6_1rDGG9l4w8IIbY$XY4H4$hVLNy!Mv1pA>oRBz89k`x^wiw}B z&FmaknG)EEXORfrN4owK1S+(^Pw^t+^@&=Qn~9_@z(ejl32+zL+zxokUm)vRPn67A z+XiM~{S`aO`aVXHEp>MNaikC-rBTf@oj{h!AYyf&QhiRs{0uRA50Gm7xFA^PLREA5 z-QVo3X0Da=YWb>G*83?};iP&yBDFecKx=}xLIWbTJBik>Bh$Eti2fBa=^7**c#Zh| z-N-Q;M4a9W_{d*@A6@H{tE^d6FTCET7y30vhTm5(*7$7jK5_H zLhJtQ7@N(A?q zKKCAy44=SeNA|t5L7iUxJ)^&wUAJx&4{8dBkfyL+ZhINIB4lLc>pJ3iyJn(Vvm2@&Q>?(-p>%sxXEOm2tF%eMU#jXBH0V zNce*53IB?gkpGEhzptpWpGJ}C&u!($K5ygo5?tazv$qCEb|%7nM*^Ir3K2?{G;Cip3FUQ0xBg0Xh}5}CcAlt8 zyOmzMf|P@gNeEsbl%B`x+@WLFkYWB92}Grdy04LAI*hpeFOhv{0I_O)$TAv7n(;g2 zS`3j8KSP?~TN2erM6OQ|O=25O!t5k=mc+cGwKVv?*YjKb8-A^#TAzFWP=e9b!Wga2 znsk#}h^0X$PWuMjaQW;WN5Mk5F`c5NRgeH1NEk|Mv+p z4)+k1J}1F_LD#nf*~YJsV)y|5>gN%uOV{|oJ%p&X(sjH|M0*=~hewcaJc_2UDO_}) z!YS2BCaxJuACR~26G~0Kp!MVw?xg*UdpTTa;1_fz{(^I!Q)u@6OHYZ-&%C%Qukgx$ zXYp66F?WkDq{5BE&{(`mN%@zjcjl$S?SjBgeMtJh!jQ>!JxqyfeF0TF!*VszWtwaGSl zie%$kNH*$X0}^+Q@-2H2yZ;^vtOt;5)r&&AVH#B4Aj_u!3=o)e%fz(6yiC|mc ztyoI~&UM7jEIPx_<;ncnv4abYzh9qg7SGG0AAshzhCi?uW$-iz0%_(TL4EQR8GVqHLoH> zy`HG_D(oe55w3QH#Fd0X>l)GL6Qmt@h#=(#66F>mu)B!gPn2eG4e6$L$O1n=010&N zv8P0(kC0+?AE!xBGmLsrU^Rp?r%@Cf`G8`ZPbjgS###Gexec$q6)@c#54&A?u-lWB1G@KUHCLglh5E+9s;6G=psN&D|2LH`C4xa(qkpM>*1(hfdE zmI+-ygXajR!7Ib;ISKAF`v2c^*%FA-d`QImgs$~{oHBcfaE&(Pm_McW--DC%S-Q?Q zk!*0A1|crwatEmfeROSyQ1AW)o$H7}0vkR}wi@BUtqk z(n%n=i7{WLYD8*Zq0Zh#V)=rJNwUFRqOvNlhktyks%fOw(7$H76RgeuJ~e-;v1NM20C@U$Ym8)@&!yK93;P z^YB%yftOq*0u<_zr1cD0hn^QkX|>g)**C@4r#~^fd9hpO+0DKUAI2vCOeQG`5hUQv6&Is4Mj5r-G4ecDlROlM$-$A4X4LJ58b1a|&g4 zUvSQeNbC47$g>zm_K~;9HYZDL{t}soU*nAJ01`>4i>>;QbnrT|4nJVR606mTOrkh0 zmKmbj1YeaZL};}jN%s-`t}6)LcL{!q=iseS2`{BmBFgg1QTk0~;Rff63q89+tAk#6 zRmVI$(U|tqq9*pS-Gzi_HWw3LST&{gSQPu-52*Be<(FX6mK&|zQI%?V|4bo?VW!y~ zoH_msr!0vkEgm39tq$QTtwi>XNYd{jF{SHZ&`HF3i>}diqW%tqX&zq6+j@LSsFKKj2C9-!YFs5jZN^CwjL>}zM5s5AZS;hQ zwTrASQR|_bD71cwY|DEnuzXEoL&wb?lQ`ZbI(vtV!!J?dIEs=JA5i7+7ZTPlR6ioe zWR$3Fg2ZYNnoy^fP^N=u!E@YD&qAz5v_FfNNzYlFWU(J1|&c_j8ZhHnt4QU@PdI;M67@jAB=soTol@2_%>Y&`ufI_)H)O)Qly zT>T3D-#1yDG>qsrL7$!_)B9|H!IjXTaXfC!DEVuDtZSq*d~&3Kaa}aL1-kTj{f5W~F-f%m9kLmWbfSh*+ng`BMWL&TWxm96-M3 z1Sz;DcyNhA*}z3qhb#)|)P}61o)lJ*|2&cF7V1LxN!{+FPW=(h!9UP@htNfQ#{H{b zP!sf?l-nCLN57_HY$4BQ3Z;RwL@JYL4S9nyuN5Ng4I%L&j~P<0Q>3h)A=P0JNw&{$ z&yEzeWhbs$wjtGd5Q(-u^qmGMRG*NW13%xS(E7G@50T_F?QcX5h3NMjheV-EJDJ@O zV*jN3N}>*9$aEc(Vqd27IO0yWka}JxLVZDD`iP_^QXHNO$uj{nnO-~DPRE^;bV0t$ z0@CPx&bgNQ&7(EqHGQ6euE{D&{7K25e~C8DKHYHMj@l!oZ=}yA z61}jEn)9UE&(5JNa9R{_)mbL!byBl?s8S!IHS8k{X+IOeenExf5sFV9q1yI)eeNIk zPALDu3KaZ;QR+P}ty>u`!!or+WQ!`lRU|t+LayrsDoK$gIrJiv-Y@o^qfq`0DaEfT zf({K4B`L3(&~>z3+(%8wTQr{EqmcM5>I42N>4Ca)2e=>i1@|w1Phsv$v}$%~`)$+( zzmgm-tGzP6S!AmW^gNGpBI+z6xJ*)@?2V9aKTe;wfa}(zQtf&X`{xD;$&-mFZ=LC( zM>mSxSBNB^6Nx?{GA6+oVAY2_)jZvVjA)M7L{0b{ zo%13JJ!eoIxQ3eGHRvMW(Yd`LmHG<0n73%YctB)(2z~qq6bCGzJ?bs)+CC+s9ieOb zO3pjqbDVB2Q>gOi-1Pw|*pKLp{24C_e#AiHk0>~~H(Y6BR`RL}6#SZ?*O*V_IL(+! z{TD^OwuHQ+aGGiYcx~M}m$G)cLJv2q_pelG1#eqDCutZ92naJfON{F!YJPp#pQ0z4) z?M*4RBgpX>CuKPyQ)8TSWd)mTI}ELDAGG$pq;l!|l2T2uc}T=MMEeYhZ$b)fljk{2 z1U`p+w|S&GJx8%8h2Zo#1@wEas}XnY`{?&sB-;!jkq9%_;|1=KYUN^8rs@Tev=M3c zBhcE=b}q|A)MKP(pP|xslL&cC+SeMx*3lTbiX!hBQTMgyRwd-`y0VM5m_2mF(Ye!g zYKt+GQvHOs*gaCPTj;*Lht}{nbi|eE?=e;U zlX);v8Cg}J;8%?ln?ZHD-MEQKj#X=!&jPp|sfNh3J^Ced;U-BJ6nYye?B~`hBay=< z>WCog&%Z-c#1UGekI)%?EWV+gM6#`ndLU0VgA7u!Tv<<7jiSVFiHLAmh_cdeQwm=RXC6t& zU+lU{g!mX*B0Kh2V8YFJofSgN;DVIhfE3HJRgXXKa#u8YVdm8(7T1lf+$NV0h@ zeXQxK5jw_W$={ZGt;@04lYzG@^fb~aaFqHB|$*U?*@LPfU z8|@#8{f*iRzZL0w&2$+;ZP2=ezPhLlDZJ<|yp#f0Y2X}Mqu)S(?ErO=Cdnx_h8>|P zY#;UKj?jDk3z5hNv_%uiM7%_G$R_Q(i@I~KNa1nQ{WIhenPxhTN&zj42#`AllI)+z z2rv616niXFC{CgIsryK_A0%~aK&s;q%Kg?!Wlqq(FC-^gva|lLEFgnHlX3+tKr&klag0epy0QNmhin3jUnrG zP2p>#4Es@eb^-Zb6VMS!Hk{i=y?Td8caunS9gnqUw8tFDAVG5kg})b%(G>E%cnx%1 zqR=?{E$Sn`qtJLCO&4BE(|tXW5G%imvok30m?okk0uNZC*Onwtnqc(=_v{T)mFJM0 z+oL#7SsA!NA^JFy9iAb@W=KA}+;dHeX6cS&@}0C+Po>kM zk*-5a)F#RTh@gFVpn``YUZRA~fzP`&`jBo&`)H4QPsF-UukF!|hR=Tjts(Ew5xs*F zQvXGs({xVDXb9diHHMg!ys82PzXz218!f5=R!mHUMZS|1)|+tu(k_L;q*|liqMFoJ z=f%%xzp@K`ycr!ae?dpoPiT!erqK2idT)Fo;yp$cZCB*Ggs#{lv|f0Raw4GKtNWq= zn}T1VKKMInmn!y{MODB$DNdabCAU{`=*~T^Om3w*>Iqn{1ZOUjBh&%-DroMbbAeAju|Cc|}@2=j?_B&3ll=5#}W+X7NZ zS*O!}_v}YWl`hJDxsJ1>u(`PP0!`uU6JSJ{zY&cT=9l@-)Ad+GXY9T#u~HZI22B@t z>3V&U9BSv4w}*dyk?{O*ad_1#?5#qLNotpy2n2T;D-;ZSaz*%zqB$ z>RA-}Orb)(Bn2AIqu#%IB$G&-chz6|5&D?FqAlt(+B9Z#UOPlR&)A3WNP6JG6)y1X zpf%D&q_jaH{vyhFd^B)@NNrYz9B!O^AYpr!>zJ6zTtBH7<;teuT(rvbn39PoE;ywT z`Q>{}BhPhCUQaqRK*wB_^}*5{264x>k5np8J{hE^H`{576srLl6z*rL#*ldGvGmMl z5n&elEQ+^66{%w;b{#3qMC(3DLGVhcm%nY6ylo~OubR%kniPEfxw&YX0t{kH|f?J3_qa~ckG~#bWq=z!4)f%;rhV!qXi++bf3bD&c zxiy~OAVtd_uOp-|hltRIQRFcvrYLMMQ{*>`yAF?0;l(C41KPi=yQA zDd|a7&7e@4`{`It&yhl;cuVrIqteQi?au90Q!-l1#jYeLQlkz={K>V3@Aw}*-<$3>H*D0jhjY!V)mQ9z8#&Rlvy9e08tH5=MRPMMGpbAI{ zr`irtm~Rvnnqb?DZ0BiGuk%Q8d4dv8Qj%`-k{;mpDs}@a@S3LI4dB6wo3xMgysD;U z{Pwnu9?1?*kx0t6A#@#OzD(u=bc_k;FTFwg#T^v-&p>~TZYUSc=#Dp|>+&bGXx@{u zKQQa#54E)#lac~Zpg_TY50$|inpVv_Q>*3!p4|EweOLd22b!PIL+Y(2=m1R@KBDL9 zPo(bNqATtYr2(r%I`2vKy^*{nw=k7@Eh5u(Sb9qHJV+tBE+9`e2lhZwV$+D2b3G@C zEC*yHHplfJz63<(N!CQ*J}*$_wSilwdJy~PCZyA6CtCI+mB_V#4Y7%!a~zFC-UgHh z&Y>Y>19|S_XpZD@;C0lU+d+M}33U-BI@iylTnQY_kX$8qB2)*g(EHz^#*h77 znZzE+iU@2V%>^o672)O?y(~wQ>oO|~D(1N?kcu@Bnev$I91-9!GTcUpC|^hm)s0h~ za;y@M6>+ZO@mMZ~@%U?!^#Bs>dL&)IT?$OX9QxMKq+?7<5lhx0vwbQA&)x!e zNilP~SatA%OqgZ67*Oav30=e%YJykL5VcL@x`X!Ek7x`(94_@&TB{T&Q1DMcZMgYF zZP17Ldi4=1{Xd{9>Sxr29H2VHgx1K9XrV`S@GDdWZAoFLI%o+c{?kOp8$wP+9F{v7 zP@tml-gQ!PpX_rQZ>g77D4rf;MVo3jOkw$|7`5=~3d!_4o2+mOAxAYO4*#WIt3;xM zQUqf+tyqf&$)ED%R+=M|=71EmxW6^UaY*`Ib6t$c^&Lln#~doWwk3Cao3=?OMa_c* zoNvu>8xz%9;6JovXbovznZ@|&&jYrmd6tjK*4 zU78(Khs~l{y^Fin{kR|ZnjNyt`R< zdlO_k%%Iqloxq;px>c795^$^6bt}De4ctEU5Y52{NK^HrR=rL)f=Lv5O`-V$6ZNpZ zRK0#e`HL%1py2-uecGQ-=%Nqm+AhC`F8Tu+LibR4b{n-suEoC7Vh&U7zb-jUcHLs@ zJ~nRQu7C^*w|Taoi%#MZ;QXAz^)1}A?3Hjo{&WZOT;^nufX%eIbD+eVkFzM&g;yOr%5vLPp8FKi>_(Azx=-A;_;ntCWu;plNXpk|O~!8XJ!X-3rk_-;frz5*2iR#sV6pg_Sd6xG4&>h@@piI+S{aeOT4fozW5)2 z#GS%!&lNFUNhT%AD*)uUOd`j5nh3C8icdEzdt@Y)yj>wou+hI)706cPg&9aTuY8Nu>nS5DAFCd;*dG(w# zr`e5YYgNh+fC2>yekEuOTT`_}Zg%Imj#Ajaj0(SHBF28{HRWOx6WnzQ?^A7grGiBn zL5=uhIpQt!qFmYBrNDFMt39F0fE4>-Sr(i<2zVHPC%rf=Q0coRBwHS^Ecshb4aiCd zr+H1Tr*!;bWVso{RqHNo&t~1V>g{2j`cR{>s8vW+fdU1;PSmQ`PxM@QqfU1k94_}> zm$s+dR=r4fG$74xOnO^W9S3D~fZL}Y%TnLmubSpGfP8OKwXPE~rpjw#C0aj}@SY7< zcx07Hl}BH%pX?U@ST?@SRvGEI2C*&Fp6)||`+^J{q}V(k&UH6x`v6HY%ga|Zzzs+eRs|9MaKTx`lZlikqEY5R%}gn7?6;ktN*;b3zPA!(+?J|S$5`SJ5H+=g{nY-g5Mn~Jhr|m z@tjwcc&%s>tRLj%yUz`$+6@igv3<0Y=`dxEx44hEZ(GE$MQh!MT<2L_`nJ)W?rhje zw0^vkV*ji=%WbqST{WU*)0rz4?cZoE<`ptkpg@5F1qyzP_zyN4`RKUL%sc=9002ov JPDHLkV1myZcL)Fg literal 0 HcmV?d00001 diff --git a/tools/scalar_tuner/src/assets/react.svg b/tools/scalar_tuner/src/assets/react.svg new file mode 100644 index 0000000..6c87de9 --- /dev/null +++ b/tools/scalar_tuner/src/assets/react.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tools/scalar_tuner/src/assets/vite.svg b/tools/scalar_tuner/src/assets/vite.svg new file mode 100644 index 0000000..5101b67 --- /dev/null +++ b/tools/scalar_tuner/src/assets/vite.svg @@ -0,0 +1 @@ +Vite diff --git a/tools/scalar_tuner/src/components/BehaviorCard.jsx b/tools/scalar_tuner/src/components/BehaviorCard.jsx new file mode 100644 index 0000000..0219a09 --- /dev/null +++ b/tools/scalar_tuner/src/components/BehaviorCard.jsx @@ -0,0 +1,160 @@ +import { useState, useMemo } from 'react' +import MetricRow from './MetricRow' +import ScoreDisplay from './ScoreDisplay' +import { isIntegerMultiple } from '../App' + +function colValue(col, metricKey, vals, effectiveScalar) { + switch (col) { + case 'metric': return metricKey + case 'bad': return vals.bad + case 'good': return vals.good + case 'ratio': return vals.ratio + case 'logdiff': return vals.log_good - vals.log_bad + case 'contrib': return Math.exp(effectiveScalar * (vals.log_good - vals.log_bad)) + case 'scalar': return effectiveScalar + } +} + +function sortIcon(col, sortCol, sortDir) { + if (sortCol !== col) return + if (sortDir === 'asc') return + if (sortDir === 'desc') return +} + +function isInDeadzone(effectiveScalar, logDiff, deadzone) { + if (effectiveScalar === 0) return true + const contrib = Math.exp(effectiveScalar * logDiff) + return contrib >= deadzone && contrib <= 2 - deadzone +} + +export default function BehaviorCard({ behavior, metrics, scalars, score, onScalarChange }) { + const [showAll, setShowAll] = useState(true) + const [sortCol, setSortCol] = useState(null) + const [sortDir, setSortDir] = useState(null) + const [scale, setScale] = useState(1.0) + const [deadzone, setDeadzone] = useState(1.0) + + // Default order: by abs(suggested_scalar), stable, never depends on scalars + const defaultEntries = useMemo( + () => [...Object.entries(metrics)].sort(([, a], [, b]) => + Math.abs(b.suggested_scalar ?? 0) - Math.abs(a.suggested_scalar ?? 0) + ), + [metrics] + ) + + // Virtual score: use scale + deadzone filter, never modifies actual scalars + const virtualScore = useMemo(() => { + const compute = side => Object.entries(scalars).reduce((acc, [k, scalar]) => { + const eff = scalar * scale + if (eff === 0) return acc + const bv = Math.max(metrics[k]?.bad ?? 1, 1e-300) + const gv = Math.max(metrics[k]?.good ?? 1, 1e-300) + const logDiff = Math.log(gv) - Math.log(bv) + if (isInDeadzone(eff, logDiff, deadzone)) return acc + return acc * Math.pow(side === 'bad' ? bv : gv, eff) + }, 1.0) + const bad = compute('bad') + const good = compute('good') + return { bad, good, ratio: bad > 0 ? good / bad : 0 } + }, [scalars, scale, deadzone, metrics]) + + function handleColClick(col) { + if (sortCol !== col) { setSortCol(col); setSortDir('asc') } + else if (sortDir === 'asc') setSortDir('desc') + else { setSortCol(null); setSortDir(null) } + } + + const sortedEntries = useMemo(() => { + const base = showAll ? defaultEntries : defaultEntries.filter(([k]) => (scalars[k] ?? 0) !== 0) + if (!sortCol || !sortDir) return base + return [...base].sort(([ka, va], [kb, vb]) => { + const a = colValue(sortCol, ka, va, (scalars[ka] ?? 0) * scale) + const b = colValue(sortCol, kb, vb, (scalars[kb] ?? 0) * scale) + if (typeof a === 'string') return sortDir === 'asc' ? a.localeCompare(b) : b.localeCompare(a) + return sortDir === 'asc' ? a - b : b - a + }) + }, [defaultEntries, sortCol, sortDir, showAll, scalars, scale]) + + const nonZeroCount = defaultEntries.filter(([k]) => (scalars[k] ?? 0) !== 0).length + const deadzoneCount = useMemo(() => + defaultEntries.filter(([k, v]) => { + const eff = (scalars[k] ?? 0) * scale + const logDiff = v.log_good - v.log_bad + return isInDeadzone(eff, logDiff, deadzone) + }).length, + [defaultEntries, scalars, scale, deadzone] + ) + + function th(col, label) { + return ( + handleColClick(col)}> + {label}{sortIcon(col, sortCol, sortDir)} + + ) + } + + return ( +
    +
    + + +
    + + + +
    +
    + + + + + {th('metric', 'metric')} + {th('bad', 'bad')} + {th('good', 'good')} + {th('ratio', 'ratio')} + {th('logdiff', 'log diff')} + {th('contrib', 'contrib')} + {th('scalar', 'scalar')} + + + + {sortedEntries.map(([metricKey, vals]) => { + const actualScalar = scalars[metricKey] ?? 0 + const effectiveScalar = actualScalar * scale + const logDiff = vals.log_good - vals.log_bad + return ( + onScalarChange(metricKey, v)} + /> + ) + })} + +
    + +
    + ) +} diff --git a/tools/scalar_tuner/src/components/Knob.jsx b/tools/scalar_tuner/src/components/Knob.jsx new file mode 100644 index 0000000..874ca2f --- /dev/null +++ b/tools/scalar_tuner/src/components/Knob.jsx @@ -0,0 +1,68 @@ +import { useEffect, useRef, useState } from 'react' + +const MIN_ANGLE = -135 +const MAX_ANGLE = 135 + +function angleToFactor(angle) { + // center (0°) = ×1.0, full CW (+135°) = ×3.0, full CCW (-135°) = ×0.0 + if (angle >= 0) return 1 + (angle / MAX_ANGLE) * 2 + else return 1 + (angle / Math.abs(MIN_ANGLE)) * 1 +} + +export default function Knob({ onFactor }) { + const [angle, setAngle] = useState(0) + const [dragging, setDragging] = useState(false) + const startRef = useRef(null) + const angleRef = useRef(0) + + function onMouseDown(e) { + e.preventDefault() + startRef.current = { y: e.clientY, startAngle: angleRef.current } + setDragging(true) + } + + useEffect(() => { + if (!dragging) return + + function onMouseMove(e) { + const dy = startRef.current.y - e.clientY + const newAngle = Math.max(MIN_ANGLE, Math.min(MAX_ANGLE, startRef.current.startAngle + dy)) + angleRef.current = newAngle + setAngle(newAngle) + onFactor(angleToFactor(newAngle), false) + } + + function onMouseUp() { + onFactor(angleToFactor(angleRef.current), true) + angleRef.current = 0 + setAngle(0) + setDragging(false) + } + + window.addEventListener('mousemove', onMouseMove) + window.addEventListener('mouseup', onMouseUp) + return () => { + window.removeEventListener('mousemove', onMouseMove) + window.removeEventListener('mouseup', onMouseUp) + } + }, [dragging]) + + const factor = angleToFactor(angle) + + return ( +
    +
    + 0 +
    +
    +
    + +
    + {dragging ? `×${factor.toFixed(2)}` : 'scale all'} +
    + ) +} diff --git a/tools/scalar_tuner/src/components/MetricRow.jsx b/tools/scalar_tuner/src/components/MetricRow.jsx new file mode 100644 index 0000000..c3339e3 --- /dev/null +++ b/tools/scalar_tuner/src/components/MetricRow.jsx @@ -0,0 +1,59 @@ +function fmtContrib(v) { + if (v === 1) return '1.000' + if (v > 999) return '>999' + if (v < 0.001) return v.toExponential(1) + return v.toFixed(3) +} + +export default function MetricRow({ metricKey, vals, scalar, effectiveScalar, isDeadzoned, isDamped, onChange }) { + const logDiff = (vals.log_good - vals.log_bad).toFixed(3) + const ratio = vals.ratio.toFixed(3) + + const contribution = effectiveScalar !== 0 + ? Math.exp(effectiveScalar * (vals.log_good - vals.log_bad)) + : 1 + const contribPositive = contribution > 1 + const contribNeutral = isDeadzoned || Math.abs(contribution - 1) < 0.001 + + return ( + + + {metricKey} + {isDamped && ( + ~int + )} + + {vals.bad.toFixed(4)} + {vals.good.toFixed(4)} + {ratio}x + 0 ? 'pos' : parseFloat(logDiff) < 0 ? 'neg' : ''}`}> + {logDiff} + + + {isDeadzoned ? '—' : `${fmtContrib(contribution)}x`} + + + onChange(parseFloat(e.target.value))} + /> + onChange(parseFloat(e.target.value) || 0)} + className="scalar-input" + /> + + + ) +} diff --git a/tools/scalar_tuner/src/components/ScoreDisplay.jsx b/tools/scalar_tuner/src/components/ScoreDisplay.jsx new file mode 100644 index 0000000..8d372ec --- /dev/null +++ b/tools/scalar_tuner/src/components/ScoreDisplay.jsx @@ -0,0 +1,52 @@ +function fmt(v) { + if (v === 0) return '0' + if (v >= 999999999) return '999999999' + if (v < 0.0001) return v.toExponential(2) + return v.toFixed(4) +} + +function fmtLog(v) { + if (v <= 0) return '—' + return Math.log(v).toFixed(3) +} + +function fmtRatio(v) { + if (v >= 999999999) return '999999999' + return v.toFixed(2) +} + +function fmtLog10Ratio(v) { + if (v <= 0) return '—' + return Math.log10(v).toFixed(2) +} + +export default function ScoreDisplay({ score }) { + if (!score) return null + const { bad, good, ratio } = score + + const cls = ratio >= 2.0 ? 'ratio-good' : ratio >= 1.0 ? 'ratio-weak' : 'ratio-bad' + const icon = ratio >= 2.0 ? '✓' : ratio >= 1.0 ? '~' : '✗' + const isUnderflow = bad < 1e-10 && good < 1e-10 + + return ( + + + + + + + + + + + + + + + + + + +
    bad{fmt(bad)}ln={fmtLog(bad)}
    good{fmt(good)}ln={fmtLog(good)}
    ratio{fmtRatio(ratio)}x {icon}log₁₀={fmtLog10Ratio(ratio)}{isUnderflow ? ' ⚠' : ''}
    + ) +} diff --git a/tools/scalar_tuner/src/components/YamlModal.jsx b/tools/scalar_tuner/src/components/YamlModal.jsx new file mode 100644 index 0000000..875d651 --- /dev/null +++ b/tools/scalar_tuner/src/components/YamlModal.jsx @@ -0,0 +1,64 @@ +import { useMemo } from 'react' + +function toYaml(scalars) { + const categories = {} + + for (const [behaviorKey, metrics] of Object.entries(scalars)) { + const dotIdx = behaviorKey.indexOf('.') + const category = behaviorKey.slice(0, dotIdx) + const behavior = behaviorKey.slice(dotIdx + 1) + + if (!categories[category]) categories[category] = {} + + const nonZero = Object.entries(metrics).filter(([, v]) => v !== 0) + if (nonZero.length === 0) continue + + const grouped = {} + for (const [metricKey, scalar] of nonZero) { + const [group, key] = metricKey.split('.') + if (!grouped[group]) grouped[group] = {} + grouped[group][key] = scalar + } + + categories[category][behavior] = grouped + } + + let out = '' + for (const [category, behaviors] of Object.entries(categories)) { + out += `# ${category}\n` + for (const [behavior, groups] of Object.entries(behaviors)) { + out += `${behavior}:\n` + for (const [group, keys] of Object.entries(groups)) { + out += ` ${group}:\n` + for (const [key, scalar] of Object.entries(keys)) { + out += ` ${key}: ${scalar.toFixed(4)}\n` + } + } + out += '\n' + } + } + return out.trim() +} + +export default function YamlModal({ scalars, onClose }) { + const yaml = useMemo(() => toYaml(scalars), [scalars]) + + function copy() { + navigator.clipboard.writeText(yaml) + } + + return ( +
    +
    e.stopPropagation()}> +
    +

    YAML Export

    +
    + + +
    +
    +
    {yaml}
    +
    +
    + ) +} diff --git a/tools/scalar_tuner/src/index.css b/tools/scalar_tuner/src/index.css new file mode 100644 index 0000000..4300cd0 --- /dev/null +++ b/tools/scalar_tuner/src/index.css @@ -0,0 +1,106 @@ +:root { + --text: #6b6375; + --text-h: #08060d; + --bg: #fff; + --border: #e5e4e7; + --code-bg: #f4f3ec; + --accent: #aa3bff; + --accent-bg: rgba(170, 59, 255, 0.1); + --accent-border: rgba(170, 59, 255, 0.5); + --social-bg: rgba(244, 243, 236, 0.5); + --shadow: + rgba(0, 0, 0, 0.1) 0 10px 15px -3px, rgba(0, 0, 0, 0.05) 0 4px 6px -2px; + + --sans: system-ui, 'Segoe UI', Roboto, sans-serif; + --heading: system-ui, 'Segoe UI', Roboto, sans-serif; + --mono: ui-monospace, Consolas, monospace; + + font: 18px/145% var(--sans); + letter-spacing: 0.18px; + color-scheme: light dark; + color: var(--text); + background: var(--bg); + font-synthesis: none; + text-rendering: optimizeLegibility; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + + @media (max-width: 1024px) { + font-size: 16px; + } +} + +@media (prefers-color-scheme: dark) { + :root { + --text: #9ca3af; + --text-h: #f3f4f6; + --bg: #16171d; + --border: #2e303a; + --code-bg: #1f2028; + --accent: #c084fc; + --accent-bg: rgba(192, 132, 252, 0.15); + --accent-border: rgba(192, 132, 252, 0.5); + --social-bg: rgba(47, 48, 58, 0.5); + --shadow: + rgba(0, 0, 0, 0.4) 0 10px 15px -3px, rgba(0, 0, 0, 0.25) 0 4px 6px -2px; + } + + #social .button-icon { + filter: invert(1) brightness(2); + } +} + +body { + margin: 0; +} + +#root { + width: 100%; + height: 100vh; + display: flex; + flex-direction: column; +} + +h1, +h2 { + font-family: var(--heading); + font-weight: 500; + color: var(--text-h); +} + +h1 { + font-size: 56px; + letter-spacing: -1.68px; + margin: 32px 0; + @media (max-width: 1024px) { + font-size: 36px; + margin: 20px 0; + } +} +h2 { + font-size: 24px; + line-height: 118%; + letter-spacing: -0.24px; + margin: 0 0 8px; + @media (max-width: 1024px) { + font-size: 20px; + } +} +p { + margin: 0; +} + +code, +.counter { + font-family: var(--mono); + display: inline-flex; + border-radius: 4px; + color: var(--text-h); +} + +code { + font-size: 15px; + line-height: 135%; + padding: 4px 8px; + background: var(--code-bg); +} diff --git a/tools/scalar_tuner/src/main.jsx b/tools/scalar_tuner/src/main.jsx new file mode 100644 index 0000000..b9a1a6d --- /dev/null +++ b/tools/scalar_tuner/src/main.jsx @@ -0,0 +1,10 @@ +import { StrictMode } from 'react' +import { createRoot } from 'react-dom/client' +import './index.css' +import App from './App.jsx' + +createRoot(document.getElementById('root')).render( + + + , +) diff --git a/tools/scalar_tuner/vite.config.js b/tools/scalar_tuner/vite.config.js new file mode 100644 index 0000000..8b0f57b --- /dev/null +++ b/tools/scalar_tuner/vite.config.js @@ -0,0 +1,7 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +// https://vite.dev/config/ +export default defineConfig({ + plugins: [react()], +}) From 5250d22aa36c310e2a4e664fa84613f07b797dd9 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:27:52 +0100 Subject: [PATCH 12/71] docs: update README and action configuration Update README with new feature documentation. Update .codeqa.yml, action.yml, and run script to reflect the new engine and CLI structure. Co-Authored-By: Claude Sonnet 4.6 --- .codeqa.yml | 27 +++++++- README.md | 182 +++++++++++++++++++++++++++++++++++++++---------- action.yml | 2 +- scripts/run.sh | 6 +- 4 files changed, 179 insertions(+), 38 deletions(-) diff --git a/.codeqa.yml b/.codeqa.yml index e6d5d46..8fa1a5c 100644 --- a/.codeqa.yml +++ b/.codeqa.yml @@ -4,4 +4,29 @@ # Patterns here are merged with any --ignore-paths passed on the command line. ignore_paths: - - priv/combined_metrics/samples/** + - priv/** + - tools/** + - scripts/** + - docs/** + - plans/** + - devenv* + - direnv* + +# Impact weights for overall score calculation. +# Combined metric categories default to 1 (can be overridden here). +impact: + complexity: 5 + file_structure: 4 + function_design: 4 + code_smells: 3 + naming_conventions: 2 + error_handling: 2 + consistency: 2 + documentation: 1 + testing: 1 + # combined categories override example: + # variable_naming: 2 + +combined_top: 5 # worst offender files per combined-metric behavior + +cosine_significance_threshold: 0.25 # threshold for cosine similarity calculation in behavior categories diff --git a/README.md b/README.md index 259ee49..071789b 100644 --- a/README.md +++ b/README.md @@ -17,14 +17,14 @@ Works with Python, Ruby, JavaScript, TypeScript, Elixir, C#, Java, C++, Go, Rust - [CLI Reference](#cli-reference) - [analyze](#analyze) - [health-report](#health-report) + - [diagnose](#diagnose) - [compare](#compare) - [history](#history) - [correlate](#correlate) - - [stopwords](#stopwords) - [Metrics Reference](#metrics-reference) - [Raw Metrics](#raw-metrics) - [Health Report Categories](#health-report-categories) - - [Behavior Checks](#behavior-checks) + - [Behavior Categories](#behavior-categories) - [Output Formats](#output-formats) - [Grading](#grading) @@ -112,7 +112,7 @@ jobs: | Input | Required | Default | Description | |-------|----------|---------|-------------| -| `command` | yes | — | CLI command to run: `health-report`, `compare`, or `analyze` | +| `command` | yes | — | CLI command to run: `health-report`, `compare`, `analyze`, `history`, `correlate`, or `diagnose` | | `path` | no | `.` | Directory to analyze | | `comment` | no | `false` | Post results as a sticky PR comment | | `fail-grade` | no | — | Fail the action if overall grade is below this (e.g. `C`) | @@ -153,22 +153,83 @@ ignore_paths: ```yaml categories: - - name: Naming - weight: 1.5 + Naming: + name: Naming metrics: - name: vowel_density - good: 0.4 - thresholds: [0.35, 0.3, 0.25] + weight: 1.5 + good: "high" + thresholds: + a: 0.42 + b: 0.38 + c: 0.32 + d: 0.25 ``` +Category-level keys: `name` (display name), `metrics` (list of metric overrides), `top` (worst-offender count override). + +Metric-level keys: `name` (metric key), `weight` (relative weight within the category), `good` (`"high"` or `"low"` — direction where higher values are better or worse), `source` (metric path), `thresholds` (map of letter-grade cutoffs: `a`, `b`, `c`, `d`). + ### Grade scale override ```yaml grade_scale: - - [90, "A"] - - [80, "B"] - - [70, "C"] - - [0, "F"] + - min: 90 + grade: "A" + - min: 80 + grade: "B" + - min: 70 + grade: "C" + - min: 0 + grade: "F" +``` + +### impact + +Impact weights used when computing the overall score. The 9 keys below are the built-in defaults; any category not listed falls back to `1`. These weights apply to both primary and behavior categories. + +```yaml +impact: + complexity: 5 + file_structure: 4 + function_design: 4 + code_smells: 3 + naming_conventions: 2 + error_handling: 2 + consistency: 2 + documentation: 1 + testing: 1 + # override any category key: + # variable_naming: 2 +``` + +### combined_top + +Controls how many worst-offender files are shown per behavior category in `health-report` (default: `2`). + +```yaml +combined_top: 3 +``` + +### near_duplicate_blocks + +Configures codebase-level near-duplicate block detection (used by `analyze`). + +```yaml +near_duplicate_blocks: + max_pairs_per_bucket: 50 +``` + +| Key | Description | +|-----|-------------| +| `max_pairs_per_bucket` | Maximum duplicate pairs reported per similarity bucket (default: unlimited) | + +### cosine_significance_threshold + +Minimum cosine similarity required for a behavior category match to be considered significant. Matches below this threshold are treated as noise and excluded from scoring. Default: `0.15`. + +```yaml +cosine_significance_threshold: 0.25 ``` ## CLI Reference @@ -228,6 +289,31 @@ Produces a graded quality report grouped into behavior categories with worst-off ./codeqa health-report --detail full --top 10 --format github ./lib ``` +### diagnose + +Identifies likely code quality issues by scoring behavior profiles using cosine similarity. Useful for understanding *why* a codebase scores poorly without running a full health report. + +```sh +./codeqa diagnose --path [OPTIONS] +``` + +`--path` is **required**. Note: unlike `health-report`, the path is passed as a named flag (`--path`), not a positional argument. + +| Option | Description | +|--------|-------------| +| `--path PATH` | **(Required)** File or directory to analyze | +| `--mode MODE` | `aggregate` (default) or `per-file` | +| `--top N` | Number of top issues to show (default: `15`) | +| `--format FORMAT` | Output format: `plain` or `json` (default: `plain`) | +| `--combined-top N` | Worst-offender files per behavior in per-file mode (default: `2`) | + +**Example:** + +```sh +./codeqa diagnose --path ./lib --mode aggregate --top 10 +./codeqa diagnose --path ./lib --mode per-file --format json +``` + ### compare Compares code quality metrics between two git refs. Designed for PR workflows. @@ -246,6 +332,16 @@ Compares code quality metrics between two git refs. Designed for PR workflows. | `--output MODE` | Output mode: `auto`, `summary`, or `changes` (default: `auto`) | | `--changes-only` | Only analyze files changed between refs | | `--all-files` | Analyze all source files at both refs (default) | +| `--workers N` | Parallel worker count | +| `--progress` | Show per-file progress | +| `--cache` | Cache computed metrics to disk | +| `--cache-dir PATH` | Directory for cached metrics (default: `.codeqa_cache`) | +| `--timeout MS` | Per-file timeout in milliseconds (default: `5000`) | +| `--show-ncd` | Include NCD similarity matrix | +| `--ncd-top N` | Top similar pairs per file | +| `--ncd-paths PATHS` | Comma-separated paths to compare for NCD | +| `--show-files` | Include per-file metrics in output | +| `--show-file-paths PATHS` | Comma-separated list of specific file paths to include | | `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude | **Example:** @@ -269,6 +365,16 @@ Tracks codebase metrics across multiple commits, writing per-commit JSON snapsho | `--output-dir PATH` | **(Required)** Directory to write JSON snapshots | | `--commits N` | Number of recent commits to analyze | | `--commit-list SHAS` | Comma-separated list of explicit commit SHAs | +| `--workers N` | Parallel worker count | +| `--progress` | Show per-file progress | +| `--cache` | Cache computed metrics to disk | +| `--cache-dir PATH` | Directory for cached metrics (default: `.codeqa_cache`) | +| `--timeout MS` | Per-file timeout in milliseconds (default: `5000`) | +| `--show-ncd` | Include NCD similarity matrix | +| `--ncd-top N` | Top similar pairs per file | +| `--ncd-paths PATHS` | Comma-separated paths to compare for NCD | +| `--show-files` | Include per-file metrics in output | +| `--show-file-paths PATHS` | Comma-separated list of specific file paths to include | | `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude | ### correlate @@ -282,27 +388,12 @@ Finds metric correlations across history snapshots produced by `history`. Run `h | Option | Description | |--------|-------------| | `--top N` | Number of top correlations to show (default: `20`) | -| `--hide-exact` | Hide perfect 1.0 correlations | -| `--all-groups` | Show all metric groups | +| `--hide-exact` | Hide perfect 1.0 and -1.0 correlations | +| `--all-groups` | Include correlations between metrics in the same group | | `--min FLOAT` | Minimum correlation threshold | | `--max FLOAT` | Maximum correlation threshold | | `--combined-only` | Show only combined-metric correlations | -| `--max-steps N` | Limit history steps used | - -### stopwords - -Extracts codebase-specific vocabulary stopwords and fingerprints. Use the output to reduce noise from project-specific boilerplate tokens in subsequent metric analysis. - -```sh -./codeqa stopwords [OPTIONS] -``` - -| Option | Description | -|--------|-------------| -| `--workers N` | Parallel worker count | -| `--stopwords-threshold FLOAT` | Minimum frequency ratio (default: `0.01`) | -| `--progress` | Show per-file progress | -| `--ignore-paths GLOBS` | Comma-separated glob patterns to exclude | +| `--max-steps N` | Maximum number of correlation pairs to evaluate | ## Metrics Reference @@ -329,10 +420,17 @@ All metrics are computed per file and aggregated at the codebase level. | **Magic number density** | Ratio of numeric literals that appear to be unnamed constants | | **Function metrics** | Function count, average and maximum function line count, average and maximum parameter count | | **Cross-file similarity** | `cross_file_density`: overall codebase redundancy via combined compression ratio. `ncd_pairs` (opt-in via `--show-ncd`): Normalized Compression Distance between similar file pairs using winnowing fingerprints | +| **Near-duplicate blocks** | Codebase-level detection of near- and exact-duplicate code blocks using token-based similarity. Reports duplicate pairs grouped by bucket, with source locations. Configurable via `near_duplicate_blocks:` in `.codeqa.yml`. | +| **Block impact & refactoring potentials** | Per-file node tree enriched with leave-one-out impact scores and refactoring potentials. Added to each file entry as `"nodes"` in `analyze` JSON output. Surfaces the highest-impact blocks to refactor. | ### Health Report Categories -The `health-report` command grades your codebase against 6 primary categories. Each category aggregates raw metrics using configurable weights and thresholds. +The `health-report` command evaluates your codebase using two complementary scoring models: + +- **6 primary categories** — graded using configurable thresholds against raw metrics (Readability, Complexity, Structure, Duplication, Naming, Magic Numbers) +- **12 behavior categories** — graded using cosine similarity against behavior profiles (see [Behavior Categories](#behavior-categories)) + +The overall score is a weighted average of all 18 categories. Primary category weights are set via `weight:` in `.codeqa.yml`; behavior category weights are configured via [`impact:`](#impact). | Category | What it measures | |----------|-----------------| @@ -343,11 +441,21 @@ The `health-report` command grades your codebase against 6 primary categories. E | **Naming** | Casing entropy, identifier length variance, avg sub-words per identifier | | **Magic Numbers** | Magic number density | +**Cosine scoring breakpoints** (used for behavior categories): + +| Cosine similarity | Score | Approx. grade | +|-------------------|-------|---------------| +| ≥ 0.5 | 90–100 | A | +| ≥ 0.2 | 70–90 | B–A- | +| ≥ 0.0 | 50–70 | C–B- | +| ≥ −0.3 | 30–50 | D–C- | +| ≥ −1.0 | 0–30 | F–D- | + > Category definitions and thresholds are configurable via `.codeqa.yml`. -### Behavior Checks +### Behavior Categories -In addition to the 6 graded categories, `health-report` evaluates additional behavior check categories using a separate multiplicative scoring model. These appear in the report as "Top Issues" diagnostics. +In addition to the 6 primary categories, `health-report` grades 12 behavior categories using cosine similarity against behavior profiles. These contribute to the overall score alongside the primary categories. | Category | Checks | |----------|--------| @@ -364,13 +472,15 @@ In addition to the 6 graded categories, `health-report` evaluates additional beh | **Dependencies** | Import and dependency patterns | | **Error Handling** | Error handling completeness | +> These categories are graded in the `health-report` output using cosine similarity scoring and contribute to the overall score. + ## Output Formats | Format | Commands | Description | |--------|----------|-------------| -| `json` | `analyze`, `compare` | Full metrics structure, suitable for tooling | -| `markdown` | `compare`, `health-report` | GitHub-flavored markdown tables | -| `plain` | `health-report` | Human-readable terminal output (Markdown) | +| `json` | `analyze`, `compare`, `diagnose` | Full metrics structure, suitable for tooling | +| `markdown` | `compare` | GitHub-flavored markdown tables | +| `plain` | `health-report`, `diagnose` | Human-readable terminal output | | `github` | `health-report`, `compare` | Markdown optimized for GitHub PR comments | ## Grading @@ -397,6 +507,8 @@ In addition to the 6 graded categories, `health-report` evaluates additional beh | E- | ≥ 6 | | F | < 6 | +The overall score is a weighted average across all categories. Primary category weights use the `weight:` field inside each category definition in `.codeqa.yml`. Behavior category weights are configured via `impact:` (defaults range from 1–5; categories not listed fall back to `1`). See [Configuration](#configuration) for examples. + The `fail-grade` action input causes a non-zero exit when the overall grade falls below the specified threshold. ## Contributing & Issues diff --git a/action.yml b/action.yml index 6be6078..f680469 100644 --- a/action.yml +++ b/action.yml @@ -8,7 +8,7 @@ branding: inputs: command: - description: "Command to run: health-report, compare, or analyze" + description: "Command to run: health-report, compare, analyze, or blocks" required: true path: description: "Directory to analyze" diff --git a/scripts/run.sh b/scripts/run.sh index 9804205..cac8046 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -38,8 +38,9 @@ case "$INPUT_COMMAND" in fi ;; analyze) OUTPUT_FILE="${OUTPUT_FILE}.json" ;; + blocks) OUTPUT_FILE="${OUTPUT_FILE}.json" ;; *) - echo "::error::Unknown command: $INPUT_COMMAND. Must be health-report, compare, or analyze." + echo "::error::Unknown command: $INPUT_COMMAND. Must be health-report, compare, analyze, or blocks." exit 1 ;; esac @@ -82,6 +83,9 @@ case "$INPUT_COMMAND" in analyze) ARGS+=("--output" "$OUTPUT_FILE") ;; + blocks) + ARGS+=("--output" "$OUTPUT_FILE") + ;; esac # Parse ignore-paths YAML list into --ignore-paths flag From d49f463d32313259ce2597956485f6c767b9532f Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:30:04 +0100 Subject: [PATCH 13/71] test(analysis): add tests for OTP analysis servers Co-Authored-By: Claude Sonnet 4.6 --- .../analysis/behavior_config_server_test.exs | 75 +++++++++++++++ .../analysis/file_context_server_test.exs | 38 ++++++++ .../analysis/file_metrics_server_test.exs | 91 +++++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 test/codeqa/analysis/behavior_config_server_test.exs create mode 100644 test/codeqa/analysis/file_context_server_test.exs create mode 100644 test/codeqa/analysis/file_metrics_server_test.exs diff --git a/test/codeqa/analysis/behavior_config_server_test.exs b/test/codeqa/analysis/behavior_config_server_test.exs new file mode 100644 index 0000000..b8afe6a --- /dev/null +++ b/test/codeqa/analysis/behavior_config_server_test.exs @@ -0,0 +1,75 @@ +defmodule CodeQA.Analysis.BehaviorConfigServerTest do + use ExUnit.Case, async: true + + alias CodeQA.Analysis.BehaviorConfigServer + + setup do + {:ok, pid} = BehaviorConfigServer.start_link() + {:ok, pid: pid} + end + + test "get_all_behaviors/1 returns a non-empty map of categories", %{pid: pid} do + behaviors = BehaviorConfigServer.get_all_behaviors(pid) + assert is_map(behaviors) + assert map_size(behaviors) > 0 + + Enum.each(behaviors, fn {category, list} -> + assert is_binary(category) + assert is_list(list) + assert length(list) > 0 + + Enum.each(list, fn {behavior, data} -> + assert is_binary(behavior) + assert is_map(data) + end) + end) + end + + test "get_all_behaviors/1 matches YamlElixir direct reads", %{pid: pid} do + behaviors = BehaviorConfigServer.get_all_behaviors(pid) + yaml_dir = "priv/combined_metrics" + + {:ok, files} = File.ls(yaml_dir) + + Enum.each(files |> Enum.filter(&String.ends_with?(&1, ".yml")), fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + {:ok, data} = YamlElixir.read_from_file(Path.join(yaml_dir, yml_file)) + + expected_behaviors = + data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.map(&elem(&1, 0)) + + server_behaviors = Map.get(behaviors, category, []) |> Enum.map(&elem(&1, 0)) + assert Enum.sort(expected_behaviors) == Enum.sort(server_behaviors) + end) + end + + test "get_scalars/3 returns a map of {group, key} => scalar", %{pid: pid} do + behaviors = BehaviorConfigServer.get_all_behaviors(pid) + {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0) + + scalars = BehaviorConfigServer.get_scalars(pid, category, behavior) + assert is_map(scalars) + + Enum.each(scalars, fn {{group, key}, scalar} -> + assert is_binary(group) + assert is_binary(key) + assert is_float(scalar) + end) + end + + test "get_scalars/3 returns empty map for unknown behavior", %{pid: pid} do + assert BehaviorConfigServer.get_scalars(pid, "nonexistent", "also_nonexistent") == %{} + end + + test "get_log_baseline/3 returns a float", %{pid: pid} do + behaviors = BehaviorConfigServer.get_all_behaviors(pid) + {category, [{behavior, _data} | _]} = Enum.at(behaviors, 0) + + baseline = BehaviorConfigServer.get_log_baseline(pid, category, behavior) + assert is_float(baseline) + end + + test "get_log_baseline/3 returns 0.0 for unknown behavior", %{pid: pid} do + assert BehaviorConfigServer.get_log_baseline(pid, "nonexistent", "also_nonexistent") == 0.0 + end +end diff --git a/test/codeqa/analysis/file_context_server_test.exs b/test/codeqa/analysis/file_context_server_test.exs new file mode 100644 index 0000000..660bd9a --- /dev/null +++ b/test/codeqa/analysis/file_context_server_test.exs @@ -0,0 +1,38 @@ +defmodule CodeQA.Analysis.FileContextServerTest do + use ExUnit.Case, async: true + + alias CodeQA.Analysis.FileContextServer + alias CodeQA.Engine.{FileContext, Pipeline} + + setup do + {:ok, pid} = FileContextServer.start_link() + {:ok, pid: pid} + end + + test "get/2 returns a Pipeline.FileContext", %{pid: pid} do + content = "defmodule Foo do\n def bar, do: :ok\nend\n" + ctx = FileContextServer.get(pid, content) + assert %FileContext{} = ctx + assert is_binary(ctx.content) + end + + test "get/2 returns identical struct on second call without rebuilding", %{pid: pid} do + content = "defmodule Foo do\n def bar, do: :ok\nend\n" + ctx1 = FileContextServer.get(pid, content) + ctx2 = FileContextServer.get(pid, content) + assert ctx1 == ctx2 + end + + test "get/2 with different content returns different results", %{pid: pid} do + ctx_a = FileContextServer.get(pid, "defmodule A do\nend\n") + ctx_b = FileContextServer.get(pid, "defmodule B do\n def foo, do: 1\nend\n") + assert ctx_a != ctx_b + end + + test "get/2 matches Pipeline.build_file_context/1 directly", %{pid: pid} do + content = "x = 1\ny = 2\n" + expected = Pipeline.build_file_context(content) + result = FileContextServer.get(pid, content) + assert result == expected + end +end diff --git a/test/codeqa/analysis/file_metrics_server_test.exs b/test/codeqa/analysis/file_metrics_server_test.exs new file mode 100644 index 0000000..791c315 --- /dev/null +++ b/test/codeqa/analysis/file_metrics_server_test.exs @@ -0,0 +1,91 @@ +defmodule CodeQA.Analysis.FileMetricsServerTest do + use ExUnit.Case, async: true + + alias CodeQA.Analysis.FileMetricsServer + + defp build_registry do + CodeQA.Engine.Analyzer.build_registry() + end + + setup do + {:ok, pid} = FileMetricsServer.start_link() + {:ok, pid: pid} + end + + describe "populate/3 and get_by_path/2" do + test "returns pre-populated baseline metrics for a path", %{pid: pid} do + content = "defmodule A do\n def foo, do: 1\nend\n" + + pipeline_result = %{ + "files" => %{ + "lib/a.ex" => %{"metrics" => %{"halstead" => %{"tokens" => 5.0}}} + } + } + + files_map = %{"lib/a.ex" => content} + :ok = FileMetricsServer.populate(pid, pipeline_result, files_map) + + metrics = FileMetricsServer.get_by_path(pid, "lib/a.ex") + assert metrics == %{"halstead" => %{"tokens" => 5.0}} + end + + test "returns nil for unknown path", %{pid: pid} do + :ok = FileMetricsServer.populate(pid, %{"files" => %{}}, %{}) + assert FileMetricsServer.get_by_path(pid, "nonexistent.ex") == nil + end + end + + describe "get_for_content/3" do + test "computes and caches metrics on first call", %{pid: pid} do + registry = build_registry() + content = "defmodule A do\n def foo, do: 1\nend\n" + + metrics = FileMetricsServer.get_for_content(pid, registry, content) + assert is_map(metrics) + assert map_size(metrics) > 0 + end + + test "returns identical result on second call (cache hit)", %{pid: pid} do + registry = build_registry() + content = "defmodule A do\n def foo, do: 1\nend\n" + + m1 = FileMetricsServer.get_for_content(pid, registry, content) + m2 = FileMetricsServer.get_for_content(pid, registry, content) + assert m1 == m2 + end + + test "different content returns different metrics", %{pid: pid} do + registry = build_registry() + ma = FileMetricsServer.get_for_content(pid, registry, "x = 1\n") + + mb = + FileMetricsServer.get_for_content( + pid, + registry, + String.duplicate("def foo(a, b), do: a + b\n", 20) + ) + + assert ma != mb + end + + test "populate cross-indexes hash so get_for_content hits cache", %{pid: pid} do + registry = build_registry() + content = "defmodule A do\n def foo, do: 1\nend\n" + + pipeline_result = %{ + "files" => %{ + "lib/a.ex" => %{ + "metrics" => %{"halstead" => %{"tokens" => 99.0}} + } + } + } + + files_map = %{"lib/a.ex" => content} + :ok = FileMetricsServer.populate(pid, pipeline_result, files_map) + + # Should hit the hash-keyed cache entry seeded from pipeline_result + metrics = FileMetricsServer.get_for_content(pid, registry, content) + assert metrics == %{"halstead" => %{"tokens" => 99.0}} + end + end +end From 6678ca79b1b9edbf3e448045ad5e6812cc1161da Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:30:09 +0100 Subject: [PATCH 14/71] test(ast): add tests for AST parsing, signals, nodes, and enrichment Co-Authored-By: Claude Sonnet 4.6 --- .../classification/node_classifier_test.exs | 246 +++++++++++++ .../ast/classification/node_protocol_test.exs | 114 ++++++ .../node_type_detector_test.exs | 143 ++++++++ ...ompound_node_assertions_languages_test.exs | 136 +++++++ .../enrichment/compound_node_builder_test.exs | 137 ++++++++ .../ast/enrichment/node_analyzer_test.exs | 62 ++++ test/codeqa/ast/lexing/string_token_test.exs | 195 ++++++++++ .../ast/lexing/token_normalizer_test.exs | 332 ++++++++++++++++++ .../codeqa/ast/lexing/token_protocol_test.exs | 142 ++++++++ test/codeqa/ast/nodes/code_node_test.exs | 55 +++ test/codeqa/ast/nodes/function_node_test.exs | 68 ++++ test/codeqa/ast/nodes/import_node_test.exs | 74 ++++ .../ast/parsing/parser_languages_test.exs | 168 +++++++++ test/codeqa/ast/parsing/parser_test.exs | 188 ++++++++++ .../ast/parsing/signal_registry_test.exs | 33 ++ .../codeqa/ast/parsing/signal_stream_test.exs | 43 +++ test/codeqa/ast/parsing/signal_test.exs | 56 +++ .../comment_density_signal_test.exs | 46 +++ .../classification/config_signal_test.exs | 28 ++ .../classification/data_signal_test.exs | 28 ++ .../classification/type_signal_test.exs | 40 +++ .../access_modifier_signal_test.exs | 49 +++ .../assignment_function_signal_test.exs | 84 +++++ .../structural/blank_line_signal_test.exs | 36 ++ .../structural/bracket_signal_test.exs | 43 +++ .../structural/branch_split_signal_test.exs | 93 +++++ .../structural/colon_indent_signal_test.exs | 29 ++ .../comment_divider_signal_test.exs | 52 +++ .../structural/decorator_signal_test.exs | 47 +++ .../structural/dedent_to_zero_signal_test.exs | 55 +++ .../doc_comment_lead_signal_test.exs | 44 +++ .../structural/keyword_signal_test.exs | 38 ++ .../structural/sql_block_signal_test.exs | 60 ++++ .../structural/triple_quote_signal_test.exs | 35 ++ 34 files changed, 2999 insertions(+) create mode 100644 test/codeqa/ast/classification/node_classifier_test.exs create mode 100644 test/codeqa/ast/classification/node_protocol_test.exs create mode 100644 test/codeqa/ast/classification/node_type_detector_test.exs create mode 100644 test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs create mode 100644 test/codeqa/ast/enrichment/compound_node_builder_test.exs create mode 100644 test/codeqa/ast/enrichment/node_analyzer_test.exs create mode 100644 test/codeqa/ast/lexing/string_token_test.exs create mode 100644 test/codeqa/ast/lexing/token_normalizer_test.exs create mode 100644 test/codeqa/ast/lexing/token_protocol_test.exs create mode 100644 test/codeqa/ast/nodes/code_node_test.exs create mode 100644 test/codeqa/ast/nodes/function_node_test.exs create mode 100644 test/codeqa/ast/nodes/import_node_test.exs create mode 100644 test/codeqa/ast/parsing/parser_languages_test.exs create mode 100644 test/codeqa/ast/parsing/parser_test.exs create mode 100644 test/codeqa/ast/parsing/signal_registry_test.exs create mode 100644 test/codeqa/ast/parsing/signal_stream_test.exs create mode 100644 test/codeqa/ast/parsing/signal_test.exs create mode 100644 test/codeqa/ast/signals/classification/comment_density_signal_test.exs create mode 100644 test/codeqa/ast/signals/classification/config_signal_test.exs create mode 100644 test/codeqa/ast/signals/classification/data_signal_test.exs create mode 100644 test/codeqa/ast/signals/classification/type_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/access_modifier_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/assignment_function_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/blank_line_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/bracket_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/branch_split_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/colon_indent_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/comment_divider_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/decorator_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/keyword_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/sql_block_signal_test.exs create mode 100644 test/codeqa/ast/signals/structural/triple_quote_signal_test.exs diff --git a/test/codeqa/ast/classification/node_classifier_test.exs b/test/codeqa/ast/classification/node_classifier_test.exs new file mode 100644 index 0000000..5ec0055 --- /dev/null +++ b/test/codeqa/ast/classification/node_classifier_test.exs @@ -0,0 +1,246 @@ +defmodule CodeQA.AST.NodeClassifierTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Classification.NodeClassifier + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Enrichment.Node + + alias CodeQA.AST.Nodes.{ + CodeNode, + DocNode, + FunctionNode, + ModuleNode, + ImportNode, + AttributeNode, + TestNode + } + + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Code.Web.JavaScript + alias CodeQA.Languages.Code.Native.Go + alias CodeQA.Languages.Code.Native.Rust + alias CodeQA.Languages.Code.Scripting.Ruby + alias CodeQA.Languages.Code.Web.TypeScript + alias CodeQA.Languages.Code.Vm.Java + alias CodeQA.Languages.Code.Vm.CSharp + + defp classify_first(code, opts \\ []) do + lang_mod = opts[:language_module] || CodeQA.Languages.Unknown + + [block | _] = + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + + NodeClassifier.classify(block, lang_mod) + end + + defp node_with_tokens(tokens) do + %Node{ + tokens: tokens, + line_count: 1, + children: [] + } + end + + describe "classify/1 — function detection" do + test "def → FunctionNode" do + assert %FunctionNode{} = + classify_first("def foo(x), do: x + 1", language_module: ElixirLang) + end + + test "defp → FunctionNode" do + assert %FunctionNode{} = classify_first("defp bar(x), do: x", language_module: ElixirLang) + end + + test "defmacro → FunctionNode" do + assert %FunctionNode{} = + classify_first("defmacro my_macro(x), do: x", language_module: ElixirLang) + end + + test "function keyword → FunctionNode" do + assert %FunctionNode{} = + classify_first("function foo(x) {\n return x\n}", language_module: JavaScript) + end + + test "func keyword → FunctionNode" do + assert %FunctionNode{} = + classify_first("func Foo(x int) int {\n return x\n}", language_module: Go) + end + + test "fn keyword → FunctionNode" do + assert %FunctionNode{} = + classify_first("fn main() {\n println!(\"hello\")\n}", language_module: Rust) + end + end + + describe "classify/1 — module detection" do + test "defmodule → ModuleNode" do + assert %ModuleNode{} = + classify_first("defmodule Foo do\n :ok\nend", language_module: ElixirLang) + end + + test "class → ModuleNode" do + assert %ModuleNode{} = classify_first("class Foo:\n pass", language_module: Python) + end + + test "module → ModuleNode" do + assert %ModuleNode{} = + classify_first("module Foo\n def bar; end\nend", language_module: Ruby) + end + + test "interface → ModuleNode" do + assert %ModuleNode{} = + classify_first("interface Foo {\n bar(): void\n}", language_module: TypeScript) + end + + test "struct → ModuleNode" do + assert %ModuleNode{} = + classify_first("struct Point {\n x: f64,\n y: f64,\n}", language_module: Rust) + end + end + + describe "classify/1 — import detection" do + test "import → ImportNode" do + assert %ImportNode{} = classify_first("import Foo", language_module: ElixirLang) + end + + test "alias → ImportNode" do + assert %ImportNode{} = classify_first("alias Foo.Bar", language_module: ElixirLang) + end + + test "use → ImportNode" do + assert %ImportNode{} = + classify_first("use ExUnit.Case, async: true", language_module: ElixirLang) + end + + test "require → ImportNode" do + assert %ImportNode{} = classify_first("require Logger", language_module: ElixirLang) + end + + test "from keyword → ImportNode" do + assert %ImportNode{} = classify_first("from os import path", language_module: Python) + end + end + + describe "classify/1 — test detection" do + test "test macro → TestNode" do + assert %TestNode{} = + classify_first(~s(test "something" do\n :ok\nend), language_module: ElixirLang) + end + + test "describe → TestNode" do + assert %TestNode{} = + classify_first(~s(describe "some context" do\n :ok\nend), + language_module: ElixirLang + ) + end + + test "it → TestNode" do + code = "it \"behaves correctly\" do\n :ok\nend" + assert %TestNode{} = classify_first(code, language_module: JavaScript) + end + end + + describe "classify/1 — doc detection" do + test " token → DocNode" do + # A standalone triple-quoted string starts directly with the token + assert %DocNode{} = classify_first(~s("""\nSome doc.\n""")) + end + + test "direct token in node → DocNode" do + doc_token = %Token{kind: "", content: ~s("""), line: 1, col: 0} + nl = %Token{kind: "", content: "\n", line: 2, col: 0} + node = node_with_tokens([doc_token, nl]) + assert %DocNode{} = NodeClassifier.classify(node, CodeQA.Languages.Unknown) + end + end + + describe "classify/1 — attribute detection" do + test "@spec → AttributeNode with kind: :typespec" do + result = classify_first("@spec foo(integer()) :: :ok", language_module: ElixirLang) + assert %AttributeNode{kind: :typespec} = result + end + + test "@type → AttributeNode with kind: :typespec" do + result = classify_first("@type user_id :: integer()", language_module: ElixirLang) + assert %AttributeNode{kind: :typespec} = result + end + + test "@typep → AttributeNode with kind: :typespec" do + result = classify_first("@typep internal :: atom()", language_module: ElixirLang) + assert %AttributeNode{kind: :typespec} = result + end + + test "@callback → AttributeNode with kind: :typespec" do + result = + classify_first("@callback fetch(term()) :: {:ok, term()}", language_module: ElixirLang) + + assert %AttributeNode{kind: :typespec} = result + end + + test "@enforce_keys → AttributeNode with kind: nil" do + result = classify_first("@enforce_keys [:name, :age]", language_module: ElixirLang) + assert %AttributeNode{kind: nil} = result + end + + test "all Elixir typespec attributes are recognized" do + for attr <- ~w[spec type typep opaque callback macrocallback] do + result = classify_first("@#{attr} foo :: bar", language_module: ElixirLang) + + assert %AttributeNode{kind: :typespec} = result, + "expected AttributeNode(kind: :typespec) for @#{attr}" + end + end + end + + describe "classify/1 — code fallback" do + test "unrecognized token → CodeNode" do + assert %CodeNode{} = classify_first("x = 1 + 2") + end + + test "empty-like node with only whitespace tokens → CodeNode" do + nl = %Token{kind: "", content: "\n", line: 1, col: 0} + node = node_with_tokens([nl]) + + assert %CodeNode{} = + NodeClassifier.classify(node, CodeQA.Languages.Unknown) + end + end + + describe "classify/1 — ambiguity resolution" do + test "test beats function (test is not defp-style)" do + # 'test' is in TestSignal; FunctionSignal does not include 'test' + result = classify_first(~s(test "foo" do\n :ok\nend), language_module: ElixirLang) + assert %TestNode{} = result + end + + test "@inside code body at indent > 0 does not make block :attribute" do + code = "def foo do\n @cache true\n :ok\nend" + # FunctionSignal sees 'def' at indent 0 → :function wins + # AttributeSignal sees '@cache' but at indent 2, not 0 → no vote + result = classify_first(code, language_module: ElixirLang) + assert %FunctionNode{} = result + end + end + + describe "classify/1 — field preservation" do + test "preserves tokens, line_count, children, start/end_line" do + tokens = + "def foo, do: :ok" + |> TokenNormalizer.normalize_structural() + + [node] = Parser.detect_blocks(tokens, ElixirLang) + result = NodeClassifier.classify(node, ElixirLang) + + assert result.tokens == node.tokens + assert result.line_count == node.line_count + assert result.children == node.children + assert result.start_line == node.start_line + assert result.end_line == node.end_line + end + end +end diff --git a/test/codeqa/ast/classification/node_protocol_test.exs b/test/codeqa/ast/classification/node_protocol_test.exs new file mode 100644 index 0000000..54c922a --- /dev/null +++ b/test/codeqa/ast/classification/node_protocol_test.exs @@ -0,0 +1,114 @@ +defmodule CodeQA.AST.NodeProtocolTest.FakeNode do + defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] + + defimpl CodeQA.AST.Classification.NodeProtocol do + def tokens(n), do: n.tokens + def line_count(n), do: n.line_count + def children(n), do: n.children + def start_line(n), do: n.start_line + def end_line(n), do: n.end_line + def label(n), do: n.label + + def flat_tokens(n) do + if Enum.empty?(n.children), + do: n.tokens, + else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + end + end +end + +defmodule CodeQA.AST.NodeProtocolTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.NodeProtocolTest.FakeNode + + @node %FakeNode{ + tokens: [:a, :b], + line_count: 3, + children: [], + start_line: 1, + end_line: 3, + label: "foo.ex:1" + } + + test "tokens/1" do + assert NodeProtocol.tokens(@node) == [:a, :b] + end + + test "line_count/1" do + assert NodeProtocol.line_count(@node) == 3 + end + + test "children/1" do + assert NodeProtocol.children(@node) == [] + end + + test "start_line/1" do + assert NodeProtocol.start_line(@node) == 1 + end + + test "end_line/1" do + assert NodeProtocol.end_line(@node) == 3 + end + + test "label/1" do + assert NodeProtocol.label(@node) == "foo.ex:1" + end + + describe "flat_tokens/1" do + alias CodeQA.AST.Enrichment.Node + + test "leaf node returns own tokens" do + leaf = %Node{tokens: [:a, :b], line_count: 1, children: []} + assert NodeProtocol.flat_tokens(leaf) == [:a, :b] + end + + test "non-leaf node returns flattened descendant tokens" do + child_a = %Node{tokens: [:a], line_count: 1, children: []} + child_b = %Node{tokens: [:b, :c], line_count: 1, children: []} + parent = %Node{tokens: [:x], line_count: 2, children: [child_a, child_b]} + assert NodeProtocol.flat_tokens(parent) == [:a, :b, :c] + end + + test "deeply nested node returns all leaf tokens" do + leaf = %Node{tokens: [:z], line_count: 1, children: []} + mid = %Node{tokens: [:y], line_count: 1, children: [leaf]} + root = %Node{tokens: [:x], line_count: 2, children: [mid]} + assert NodeProtocol.flat_tokens(root) == [:z] + end + end + + describe "Node implements NodeProtocol" do + alias CodeQA.AST.Enrichment.Node + + setup do + node = %Node{ + tokens: [:x, :y], + line_count: 3, + children: [], + start_line: 1, + end_line: 3, + label: "f.ex:1" + } + + %{node: node} + end + + test "tokens/1", %{node: node} do + assert NodeProtocol.tokens(node) == [:x, :y] + end + + test "children/1", %{node: node} do + assert NodeProtocol.children(node) == [] + end + + test "start_line/1", %{node: node} do + assert NodeProtocol.start_line(node) == 1 + end + + test "label/1", %{node: node} do + assert NodeProtocol.label(node) == "f.ex:1" + end + end +end diff --git a/test/codeqa/ast/classification/node_type_detector_test.exs b/test/codeqa/ast/classification/node_type_detector_test.exs new file mode 100644 index 0000000..caf0c7f --- /dev/null +++ b/test/codeqa/ast/classification/node_type_detector_test.exs @@ -0,0 +1,143 @@ +defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Classification.NodeTypeDetector + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Nodes.{CodeNode, DocNode, AttributeNode, FunctionNode} + + defp detect_types(code, lang_mod \\ CodeQA.Languages.Code.Vm.Elixir) do + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + |> NodeTypeDetector.detect_types(lang_mod) + end + + describe "detect_types/1" do + test "block with gets type :doc" do + code = ~s(@moduledoc """\nSome doc.\n""") + [block] = detect_types(code) + assert is_struct(block, DocNode) + end + + test "block with @spec gets type :typespec" do + code = "@spec fetch_user(integer()) :: {:ok, term()}" + [block] = detect_types(code) + assert is_struct(block, AttributeNode) + assert block.kind == :typespec + end + + test "block with @type gets type :typespec" do + code = "@type user_id :: integer()" + [block] = detect_types(code) + assert is_struct(block, AttributeNode) + assert block.kind == :typespec + end + + test "block starting with def gets type :function" do + code = "def foo(x), do: x + 1" + [block] = detect_types(code) + assert is_struct(block, FunctionNode) + end + + test "@ attribute inside function body does not make block :attribute" do + # FunctionSignal sees 'def' first → :function wins + # AttributeSignal sees '@cache' but at indent > 0 → no vote + code = "def foo do\n @cache true\n :ok\nend" + blocks = detect_types(code) + + code_block = + Enum.find(blocks, fn b -> + Enum.any?(b.tokens, &(&1.kind == "" and &1.content == "def")) + end) + + assert is_struct(code_block, FunctionNode) + end + + test "returns same number of blocks as input" do + code = "@spec foo() :: :ok\n\n\ndef foo, do: :ok" + blocks = detect_types(code) + assert length(blocks) == 2 + end + + test "all @typespec_attributes are recognized" do + for attr <- ~w[spec type typep opaque callback macrocallback] do + code = "@#{attr} foo :: bar" + [block] = detect_types(code) + + assert is_struct(block, AttributeNode) and block.kind == :typespec, + "expected AttributeNode with kind: :typespec for @#{attr}" + end + end + + test "empty list returns empty list" do + assert [] == NodeTypeDetector.detect_types([], CodeQA.Languages.Unknown) + end + end + + describe "detect_types/1 — typed struct output" do + test "returns DocNode for doc blocks" do + doc_token = %CodeQA.AST.Lexing.Token{kind: "", content: ~s("""), line: 1, col: 0} + nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 2, col: 0} + + node = %CodeQA.AST.Enrichment.Node{ + tokens: [doc_token, nl], + line_count: 2, + children: [], + start_line: 1, + end_line: 2 + } + + [result] = + CodeQA.AST.Classification.NodeTypeDetector.detect_types( + [node], + CodeQA.Languages.Code.Vm.Elixir + ) + + assert is_struct(result, DocNode) + end + + test "returns AttributeNode for typespec blocks" do + at = %CodeQA.AST.Lexing.Token{kind: "@", content: "@", line: 1, col: 0} + spec = %CodeQA.AST.Lexing.Token{kind: "", content: "spec", line: 1, col: 1} + nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 5} + + node = %CodeQA.AST.Enrichment.Node{ + tokens: [at, spec, nl], + line_count: 1, + children: [], + start_line: 1, + end_line: 1 + } + + [result] = + CodeQA.AST.Classification.NodeTypeDetector.detect_types( + [node], + CodeQA.Languages.Code.Vm.Elixir + ) + + assert is_struct(result, AttributeNode) + assert result.kind == :typespec + end + + test "returns CodeNode for unclassified blocks" do + id = %CodeQA.AST.Lexing.Token{kind: "", content: "foo", line: 1, col: 0} + nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 3} + + node = %CodeQA.AST.Enrichment.Node{ + tokens: [id, nl], + line_count: 1, + children: [], + start_line: 1, + end_line: 1 + } + + [result] = + CodeQA.AST.Classification.NodeTypeDetector.detect_types( + [node], + CodeQA.Languages.Code.Vm.Elixir + ) + + assert is_struct(result, CodeNode) + end + end +end diff --git a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs new file mode 100644 index 0000000..d1e1b8f --- /dev/null +++ b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs @@ -0,0 +1,136 @@ +defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser + alias CodeQA.Languages.Unknown + alias CodeQA.AST.Classification.NodeTypeDetector + alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Enrichment.CompoundNodeBuilder + alias CodeQA.AST.Enrichment.CompoundNode + alias CodeQA.AST.Enrichment.Node + + Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false) + + # Elixir fixtures + use Test.Fixtures.Elixir.Calculator + use Test.Fixtures.Elixir.EventBus + use Test.Fixtures.Elixir.RateLimiter + + # Python fixtures + use Test.Fixtures.Python.Calculator + use Test.Fixtures.Python.CsvPipeline + use Test.Fixtures.Python.ConfigParser + + # JavaScript fixtures + use Test.Fixtures.JavaScript.Calculator + use Test.Fixtures.JavaScript.FormValidator + use Test.Fixtures.JavaScript.ShoppingCart + + # Go fixtures + use Test.Fixtures.Go.Calculator + use Test.Fixtures.Go.HttpMiddleware + use Test.Fixtures.Go.CliParser + + # Rust fixtures + use Test.Fixtures.Rust.Calculator + use Test.Fixtures.Rust.Tokenizer + use Test.Fixtures.Rust.RingBuffer + + # Ruby fixtures + use Test.Fixtures.Ruby.Calculator + use Test.Fixtures.Ruby.OrmLite + use Test.Fixtures.Ruby.MarkdownRenderer + + # TypeScript fixtures + use Test.Fixtures.TypeScript.UserProfileStore + use Test.Fixtures.TypeScript.EventEmitter + use Test.Fixtures.TypeScript.DependencyInjection + + # Java fixtures + use Test.Fixtures.Java.BuilderPattern + use Test.Fixtures.Java.RepositoryPattern + use Test.Fixtures.Java.StrategyPattern + + # C# fixtures + use Test.Fixtures.CSharp.LinqPipeline + use Test.Fixtures.CSharp.AsyncTaskManager + use Test.Fixtures.CSharp.PluginSystem + + # Swift fixtures + use Test.Fixtures.Swift.ResultType + use Test.Fixtures.Swift.CombineStream + use Test.Fixtures.Swift.ActorModel + + # Kotlin fixtures + use Test.Fixtures.Kotlin.SealedState + use Test.Fixtures.Kotlin.CoroutineFlow + use Test.Fixtures.Kotlin.ExtensionLibrary + + # C++ fixtures + use Test.Fixtures.Cpp.SmartPointer + use Test.Fixtures.Cpp.TemplateContainer + use Test.Fixtures.Cpp.ObserverPattern + + # Scala fixtures + use Test.Fixtures.Scala.CaseClassAlgebra + use Test.Fixtures.Scala.TypeclassPattern + use Test.Fixtures.Scala.ActorMessages + + # Dart fixtures + use Test.Fixtures.Dart.WidgetState + use Test.Fixtures.Dart.FuturesAsync + use Test.Fixtures.Dart.MixinComposition + + # Zig fixtures + use Test.Fixtures.Zig.AllocatorInterface + use Test.Fixtures.Zig.TaggedUnion + use Test.Fixtures.Zig.IteratorProtocol + + # Lua fixtures + use Test.Fixtures.Lua.ClassSystem + use Test.Fixtures.Lua.EventSystem + use Test.Fixtures.Lua.StateMachine + + # Generate tests for fixtures with block_assertions + for {language, code, block_assertions} <- @fixture, block_assertion <- block_assertions do + test "[#{language}] #{block_assertion.description}" do + compounds = compound_nodes(unquote(code)) + none_of = Map.get(unquote(Macro.escape(block_assertion)), :none_of, []) + all_of = unquote(Macro.escape(block_assertion)).all_of + + assert Enum.any?(compounds, fn compound -> + tokens = all_tokens(compound) + compound_satisfies?(tokens, all_of, none_of) + end), + "No compound node found matching: #{unquote(block_assertion.description)}" + end + end + + defp compound_nodes(code) do + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(Unknown) + |> NodeTypeDetector.detect_types(Unknown) + |> CompoundNodeBuilder.build() + end + + defp all_tokens(%CompoundNode{docs: docs, typespecs: typespecs, code: code}) do + (docs ++ typespecs ++ code) + |> Enum.flat_map(&node_tokens/1) + end + + defp node_tokens(node) do + NodeProtocol.tokens(node) + end + + defp matches?({:exact, field, value}, token), do: Map.get(token, field) == value + + defp matches?({:partial, field, value}, token), + do: String.contains?(Map.get(token, field, ""), value) + + defp compound_satisfies?(tokens, all_of, none_of) do + Enum.all?(all_of, fn matcher -> Enum.any?(tokens, &matches?(matcher, &1)) end) and + Enum.all?(none_of, fn matcher -> not Enum.any?(tokens, &matches?(matcher, &1)) end) + end +end diff --git a/test/codeqa/ast/enrichment/compound_node_builder_test.exs b/test/codeqa/ast/enrichment/compound_node_builder_test.exs new file mode 100644 index 0000000..3881e9e --- /dev/null +++ b/test/codeqa/ast/enrichment/compound_node_builder_test.exs @@ -0,0 +1,137 @@ +defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Classification.NodeTypeDetector + alias CodeQA.AST.Enrichment.CompoundNode + alias CodeQA.AST.Enrichment.CompoundNodeBuilder + alias CodeQA.AST.Lexing.TokenNormalizer + + alias CodeQA.AST.Nodes.{DocNode, AttributeNode, CodeNode} + + defp build(code) do + lang_mod = CodeQA.Languages.Code.Vm.Elixir + opts = [language_module: lang_mod] + + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + |> NodeTypeDetector.detect_types(lang_mod) + |> CompoundNodeBuilder.build() + end + + describe "build/1" do + test "returns CompoundNode structs" do + [compound | _] = build("def foo, do: :ok") + assert %CompoundNode{} = compound + end + + test "bare code block wraps in compound with empty docs and typespecs" do + [compound] = build("def foo, do: :ok") + assert compound.docs == [] + assert compound.typespecs == [] + assert length(compound.code) == 1 + end + + test "@doc block attaches to following code block" do + code = ~s(@doc """\nSome doc.\n"""\ndef foo, do: :ok) + [compound] = build(code) + assert length(compound.docs) == 1 + assert length(compound.code) == 1 + end + + test "@spec block attaches to following code block" do + code = "@spec foo() :: :ok\ndef foo, do: :ok" + [compound] = build(code) + assert length(compound.typespecs) == 1 + assert length(compound.code) == 1 + end + + test "consecutive code clauses accumulate in same compound" do + code = "def foo(:a), do: 1\ndef foo(:b), do: 2\ndef foo(_), do: 3" + [compound] = build(code) + assert length(compound.code) == 3 + end + + test "doc after code starts a new compound" do + code = ~s(def foo do\n :ok\nend\n\n\n@doc """\nSome doc.\n"""\ndef bar, do: :ok) + compounds = build(code) + assert length(compounds) == 2 + [first, second] = compounds + assert first.docs == [] + assert length(second.docs) == 1 + end + + test "two blank lines between code blocks starts a new compound" do + code = "def foo, do: :ok\n\n\ndef bar, do: :ok" + compounds = build(code) + assert length(compounds) == 2 + end + + test "single blank line between code blocks does NOT start a new compound" do + code = "def foo(:a), do: 1\n\ndef foo(:b), do: 2" + [compound] = build(code) + assert length(compound.code) == 2 + end + + test "start_line is set from first non-whitespace token" do + [compound] = build("def foo, do: :ok") + assert is_integer(compound.start_line) + assert compound.start_line >= 1 + end + + test "start_col is set from first non-whitespace token" do + [compound] = build("def foo, do: :ok") + assert is_integer(compound.start_col) + end + + test "typespec block before any code attaches to compound (no flush)" do + code = "@spec foo() :: :ok\ndef foo, do: :ok" + [compound] = build(code) + assert length(compound.typespecs) == 1 + assert length(compound.code) == 1 + end + + test "end_line is set from last non-whitespace token" do + [compound] = build("def foo, do: :ok") + assert is_integer(compound.end_line) + end + + test "end_col is set from last non-whitespace token" do + [compound] = build("def foo, do: :ok") + assert is_integer(compound.end_col) + end + + test "empty list returns empty list" do + assert [] == CompoundNodeBuilder.build([]) + end + end + + describe "build/1 with typed node structs" do + test "routes DocNode to docs bucket" do + doc = %DocNode{tokens: [:d], line_count: 1, children: [], start_line: 1, end_line: 1} + code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} + + [compound] = CodeQA.AST.Enrichment.CompoundNodeBuilder.build([doc, code]) + assert length(compound.docs) == 1 + assert is_struct(hd(compound.docs), DocNode) + end + + test "routes AttributeNode to typespecs bucket" do + attr = %AttributeNode{ + tokens: [:a], + line_count: 1, + children: [], + start_line: 1, + end_line: 1, + kind: :typespec + } + + code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} + + [compound] = CodeQA.AST.Enrichment.CompoundNodeBuilder.build([attr, code]) + assert length(compound.typespecs) == 1 + assert is_struct(hd(compound.typespecs), AttributeNode) + end + end +end diff --git a/test/codeqa/ast/enrichment/node_analyzer_test.exs b/test/codeqa/ast/enrichment/node_analyzer_test.exs new file mode 100644 index 0000000..6f3e439 --- /dev/null +++ b/test/codeqa/ast/enrichment/node_analyzer_test.exs @@ -0,0 +1,62 @@ +defmodule CodeQA.AST.Enrichment.NodeAnalyzerTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Enrichment.NodeAnalyzer + alias CodeQA.AST.Lexing.TokenNormalizer + + defp tokenize(code), do: TokenNormalizer.normalize_structural(code) + defp bound(code), do: code |> tokenize() |> NodeAnalyzer.bound_variables() + + describe "bound_variables/1" do + test "simple assignment binds the LHS identifier" do + assert "user" in bound("user = Repo.get!(id)") + end + + test "assignment RHS identifiers are NOT bound" do + result = bound("user = Repo.get!(id)") + refute "repo" in result + refute "id" in result + end + + test "with-clause binding (<-) binds the LHS identifier" do + assert "user" in bound("{:ok, user} <- fetch_user(id)") + end + + test "multiple assignments in a block are all bound" do + code = "a = foo()\nb = bar()\nc = baz()" + result = bound(code) + assert "a" in result + assert "b" in result + assert "c" in result + end + + test "compound LHS: only the immediately before = is bound" do + # `x.field = val` — `x` is not re-bound; skip non-simple LHS + result = bound("result = compute(x)") + assert "result" in result + end + + test "== operator does not create a binding" do + result = bound("x == y") + refute "x" in result + refute "y" in result + end + + test "=> fat arrow does not create a binding" do + result = bound("key => value") + refute "key" in result + end + + test "=~ regex match does not create a binding" do + result = bound("str =~ pattern") + refute "str" in result + end + + test "returns MapSet" do + assert %MapSet{} = bound("x = 1") + end + + test "empty token list returns empty MapSet" do + assert MapSet.new() == NodeAnalyzer.bound_variables([]) + end + end +end diff --git a/test/codeqa/ast/lexing/string_token_test.exs b/test/codeqa/ast/lexing/string_token_test.exs new file mode 100644 index 0000000..0a99e9e --- /dev/null +++ b/test/codeqa/ast/lexing/string_token_test.exs @@ -0,0 +1,195 @@ +defmodule CodeQA.AST.StringTokenTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.TokenNormalizer + + describe "StringToken struct" do + test "has kind, content, line, col, interpolations, multiline, and quotes fields" do + tok = %StringToken{ + kind: "", + content: ~s("hello"), + line: 1, + col: 0, + interpolations: nil + } + + assert tok.kind == "" + assert tok.content == ~s("hello") + assert tok.line == 1 + assert tok.col == 0 + assert tok.interpolations == nil + assert tok.multiline == false + assert tok.quotes == :double + end + + test "interpolations defaults to nil" do + tok = %StringToken{kind: "", content: ~s("hello")} + assert tok.interpolations == nil + end + + test "multiline defaults to false" do + tok = %StringToken{kind: "", content: ~s("hello")} + assert tok.multiline == false + end + + test "quotes defaults to :double" do + tok = %StringToken{kind: "", content: ~s("hello")} + assert tok.quotes == :double + end + + test "multiline triple-quote struct" do + tok = %StringToken{kind: "", content: ~s("""), multiline: true, quotes: :double} + assert tok.multiline == true + assert tok.quotes == :double + end + end + + describe "TokenNormalizer emits StringToken for strings" do + test "plain string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~s("hello")) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "plain string StringToken has nil interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~s("hello")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == nil + end + + test "Elixir/Ruby interpolated string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello #{name}"|) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "JS/TS backtick interpolated string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello ${name}`|) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "Kotlin/Dart/Scala interpolated string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello ${name}"|) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "Swift interpolated string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello \(name)"|) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "plain backtick string emits a StringToken" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello`|) + |> Enum.filter(&(&1.kind == "")) + + assert %StringToken{} = tok + end + + test "non-string tokens are still plain Token structs" do + tokens = TokenNormalizer.normalize_structural("foo = 42") + id = Enum.find(tokens, &(&1.kind == "")) + refute match?(%StringToken{}, id) + end + end + + describe "quotes field" do + test "double-quoted string has quotes :double" do + [tok] = + TokenNormalizer.normalize_structural(~s("hello")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :double + end + + test "single-quoted string has quotes :single" do + [tok] = + TokenNormalizer.normalize_structural("'hello'") + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :single + end + + test "backtick string has quotes :backtick" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :backtick + end + + test "backtick interpolated string has quotes :backtick" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello ${name}`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :backtick + end + + test "Elixir interpolated string has quotes :double" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello #{name}"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :double + end + end + + describe "multiline field" do + test "regular string has multiline false" do + [tok] = + TokenNormalizer.normalize_structural(~s("hello")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.multiline == false + end + + test "double triple-quote token has multiline true" do + [tok | _] = + TokenNormalizer.normalize_structural(~s("""\nhello\n""")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.multiline == true + end + + test "single triple-quote token has multiline true" do + [tok | _] = + TokenNormalizer.normalize_structural("'''\nhello\n'''") + |> Enum.filter(&(&1.kind == "")) + + assert tok.multiline == true + end + + test "triple-quote token quotes :double for \"\"\"" do + [tok | _] = + TokenNormalizer.normalize_structural(~s("""\nhello\n""")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :double + end + + test "triple-quote token quotes :single for '''" do + [tok | _] = + TokenNormalizer.normalize_structural("'''\nhello\n'''") + |> Enum.filter(&(&1.kind == "")) + + assert tok.quotes == :single + end + end +end diff --git a/test/codeqa/ast/lexing/token_normalizer_test.exs b/test/codeqa/ast/lexing/token_normalizer_test.exs new file mode 100644 index 0000000..e8264ad --- /dev/null +++ b/test/codeqa/ast/lexing/token_normalizer_test.exs @@ -0,0 +1,332 @@ +defmodule CodeQA.AST.TokenNormalizerTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.StringToken + + defp kinds(tokens), do: Enum.map(tokens, & &1.kind) + + describe "normalize_structural/1" do + test "emits between lines" do + result = TokenNormalizer.normalize_structural("a\nb") + assert "" in kinds(result) + end + + test "two blank lines produce two or more consecutive tokens" do + result = TokenNormalizer.normalize_structural("a\n\nb") + + nl_runs = + result + |> Enum.chunk_by(&(&1.kind == "")) + |> Enum.filter(fn [h | _] -> h.kind == "" end) + |> Enum.map(&length/1) + + assert Enum.any?(nl_runs, &(&1 >= 2)) + end + + test "emits one token per 2 leading spaces" do + result = TokenNormalizer.normalize_structural(" foo") + assert Enum.count(result, &(&1.kind == "")) == 2 + end + + test "emits one token per tab" do + result = TokenNormalizer.normalize_structural("\t\tfoo") + assert Enum.count(result, &(&1.kind == "")) == 2 + end + + test "normalizes identifiers to " do + result = TokenNormalizer.normalize_structural("foo bar") + assert kinds(result) == ["", ""] + end + + test "normalizes numbers to " do + result = TokenNormalizer.normalize_structural("x = 42") + assert "" in kinds(result) + end + + test "empty string returns empty list" do + assert TokenNormalizer.normalize_structural("") == [] + end + + test "single leading space produces zero tokens (below threshold)" do + result = TokenNormalizer.normalize_structural(" foo") + assert not Enum.any?(result, &(&1.kind == "")) + end + + test "punctuation tokens like ( and : survive as individual tokens" do + result = TokenNormalizer.normalize_structural("foo(x):") + assert "(" in kinds(result) + assert ")" in kinds(result) + assert ":" in kinds(result) + end + + test "tokens carry line numbers" do + result = TokenNormalizer.normalize_structural("foo\nbar") + lines = Enum.map(result, & &1.line) + assert 1 in lines + assert 2 in lines + end + + test "tokens carry col offsets" do + result = TokenNormalizer.normalize_structural("foo") + [tok] = result + assert tok.col == 0 + end + + test "identifier token preserves original content" do + result = TokenNormalizer.normalize_structural("myVar") + [tok] = result + assert tok.kind == "" + assert tok.content == "myVar" + end + + test "keyword content is preserved (not normalized away)" do + result = TokenNormalizer.normalize_structural("def foo") + contents = Enum.map(result, & &1.content) + assert "def" in contents + end + + test "string token content is the original literal" do + result = TokenNormalizer.normalize_structural(~s("hello")) + tok = Enum.find(result, &(&1.kind == "")) + assert tok.content == ~s("hello") + end + + # multi-char operator tests + + test ">= is a single token" do + result = TokenNormalizer.normalize_structural("x >= y") + assert ">=" in kinds(result) + refute ">" in kinds(result) + end + + test "<= is a single token" do + result = TokenNormalizer.normalize_structural("x <= y") + assert "<=" in kinds(result) + refute "<" in kinds(result) + end + + test "== is a single token" do + result = TokenNormalizer.normalize_structural("x == y") + assert "==" in kinds(result) + end + + test "!= is a single token" do + result = TokenNormalizer.normalize_structural("x != y") + assert "!=" in kinds(result) + refute "!" in kinds(result) + end + + test "=== is a single token (not == + =)" do + result = TokenNormalizer.normalize_structural("x === y") + assert "===" in kinds(result) + refute "==" in kinds(result) + end + + test "!== is a single token" do + result = TokenNormalizer.normalize_structural("x !== y") + assert "!==" in kinds(result) + refute "!=" in kinds(result) + end + + test "|> is a single token (Elixir pipe)" do + result = TokenNormalizer.normalize_structural("x |> f") + assert "|>" in kinds(result) + refute "|" in kinds(result) + end + + test "<> is a single token (Elixir concat)" do + result = TokenNormalizer.normalize_structural(~s("a" <> "b")) + assert "<>" in kinds(result) + end + + test "<- is a single token (Elixir/Go arrow)" do + result = TokenNormalizer.normalize_structural("x <- y") + assert "<-" in kinds(result) + refute "<" in kinds(result) + end + + test "-> is a single token" do + result = TokenNormalizer.normalize_structural("x -> y") + assert "->" in kinds(result) + refute "-" in kinds(result) + end + + test "=> is a single token (fat arrow)" do + result = TokenNormalizer.normalize_structural("k => v") + assert "=>" in kinds(result) + end + + test "=~ is a single token (regex match)" do + result = TokenNormalizer.normalize_structural("x =~ y") + assert "=~" in kinds(result) + end + + test "&& is a single token" do + result = TokenNormalizer.normalize_structural("a && b") + assert "&&" in kinds(result) + refute "&" in kinds(result) + end + + test "|| is a single token" do + result = TokenNormalizer.normalize_structural("a || b") + assert "||" in kinds(result) + refute "|" in kinds(result) + end + + test ":: is a single token" do + result = TokenNormalizer.normalize_structural("Foo::Bar") + assert "::" in kinds(result) + refute ":" in kinds(result) + end + + test ".. is a single token" do + result = TokenNormalizer.normalize_structural("1..10") + assert ".." in kinds(result) + end + + test "... is a single token (not .. + .)" do + result = TokenNormalizer.normalize_structural("1...10") + assert "..." in kinds(result) + refute ".." in kinds(result) + end + + test "multi-char operator value equals content (no normalization)" do + result = TokenNormalizer.normalize_structural("x >= y") + tok = Enum.find(result, &(&1.kind == ">=")) + assert tok.content == ">=" + end + end + + describe "interpolated string tokens are normalised to " do + test "Elixir/Ruby #{} emits with interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello #{name}"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["name"] + end + + test "JS/TS backtick with \${} emits with interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello ${name}`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["name"] + end + + test "JS/TS backtick static content has interpolation stripped" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello ${name} world`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.content == "`hello world`" + end + + test "JS/TS backtick two interpolations are both captured" do + [tok] = + TokenNormalizer.normalize_structural(~S|`${a} and ${b}`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["a", "b"] + end + + test "plain backtick string without interpolation emits with nil interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~S|`hello world`|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == nil + end + + test "Kotlin/Dart/Scala \${} emits with interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello ${name}"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["name"] + end + + test "Kotlin/Dart/Scala static content has interpolation stripped" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello ${name} world"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.content == ~S|"hello world"| + end + + test "Kotlin/Dart/Scala two interpolations are both captured" do + [tok] = + TokenNormalizer.normalize_structural(~S|"${a} and ${b}"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["a", "b"] + end + + test "Swift \\(...) emits with interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello \(name)"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["name"] + end + + test "Swift static content has interpolation stripped" do + [tok] = + TokenNormalizer.normalize_structural(~S|"hello \(name) world"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.content == ~S|"hello world"| + end + + test "Swift two interpolations are both captured" do + [tok] = + TokenNormalizer.normalize_structural(~S|"\(a) and \(b)"|) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == ["a", "b"] + end + + test "plain double-quoted string has nil interpolations" do + [tok] = + TokenNormalizer.normalize_structural(~s("hello")) + |> Enum.filter(&(&1.kind == "")) + + assert tok.interpolations == nil + end + end + + describe " token" do + test "triple double-quotes emits a StringToken with kind " do + tokens = TokenNormalizer.normalize_structural(~s(""")) + + assert [%StringToken{kind: "", content: ~s("""), multiline: true, quotes: :double}] = + tokens + end + + test "triple single-quotes emits a StringToken with kind " do + tokens = TokenNormalizer.normalize_structural("'''") + + assert [%StringToken{kind: "", content: "'''", multiline: true, quotes: :single}] = + tokens + end + + test "triple-quote is not consumed as empty string + bare quote" do + tokens = TokenNormalizer.normalize_structural(~s(""")) + refute Enum.any?(tokens, &(&1.kind == "")) + end + + test "content between triple-quotes is tokenized normally" do + code = ~s("""\nhello world\n""") + tokens = TokenNormalizer.normalize_structural(code) + trip_count = Enum.count(tokens, &(&1.kind == "")) + assert trip_count == 2 + assert Enum.any?(tokens, &(&1.kind == "" and &1.content == "hello")) + end + + test "regular double-quoted string still works" do + tokens = TokenNormalizer.normalize_structural(~s("hello")) + assert [%StringToken{kind: ""}] = tokens + end + end +end diff --git a/test/codeqa/ast/lexing/token_protocol_test.exs b/test/codeqa/ast/lexing/token_protocol_test.exs new file mode 100644 index 0000000..5534d10 --- /dev/null +++ b/test/codeqa/ast/lexing/token_protocol_test.exs @@ -0,0 +1,142 @@ +defmodule CodeQA.AST.Lexing.TokenProtocolTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.TokenProtocol + + describe "Token implementation" do + setup do + {:ok, token: %Token{kind: "", content: "foo", line: 3, col: 7}} + end + + test "kind/1", %{token: t} do + assert TokenProtocol.kind(t) == "" + end + + test "content/1", %{token: t} do + assert TokenProtocol.content(t) == "foo" + end + + test "line/1", %{token: t} do + assert TokenProtocol.line(t) == 3 + end + + test "col/1", %{token: t} do + assert TokenProtocol.col(t) == 7 + end + + test "nil location fields are preserved" do + t = %Token{kind: "", content: "\n", line: nil, col: nil} + assert TokenProtocol.line(t) == nil + assert TokenProtocol.col(t) == nil + end + end + + describe "StringToken implementation" do + setup do + {:ok, + token: %StringToken{ + kind: "", + content: "\"hello\"", + line: 10, + col: 2, + interpolations: nil + }} + end + + test "kind/1", %{token: t} do + assert TokenProtocol.kind(t) == "" + end + + test "content/1", %{token: t} do + assert TokenProtocol.content(t) == "\"hello\"" + end + + test "line/1", %{token: t} do + assert TokenProtocol.line(t) == 10 + end + + test "col/1", %{token: t} do + assert TokenProtocol.col(t) == 2 + end + + test "works with interpolated string token" do + t = %StringToken{ + kind: "", + content: "\"\#{x}\"", + line: 5, + col: 0, + interpolations: ["x"] + } + + assert TokenProtocol.kind(t) == "" + assert TokenProtocol.content(t) == "\"\#{x}\"" + end + end + + describe "StringToken (multiline) via protocol" do + setup do + {:ok, + token: %StringToken{ + kind: "", + content: ~s("""), + line: 2, + col: 0, + multiline: true, + quotes: :double + }} + end + + test "kind/1", %{token: t} do + assert TokenProtocol.kind(t) == "" + end + + test "content/1", %{token: t} do + assert TokenProtocol.content(t) == ~s(""") + end + + test "line/1", %{token: t} do + assert TokenProtocol.line(t) == 2 + end + + test "col/1", %{token: t} do + assert TokenProtocol.col(t) == 0 + end + + test "single-quote variant" do + t = %StringToken{ + kind: "", + content: "'''", + line: 5, + col: 0, + multiline: true, + quotes: :single + } + + assert TokenProtocol.kind(t) == "" + assert t.quotes == :single + end + end + + describe "polymorphic use" do + test "mixed token list can be processed uniformly" do + tokens = [ + %Token{kind: "", content: "x", line: 1, col: 0}, + %StringToken{kind: "", content: "\"hi\"", line: 1, col: 4}, + %StringToken{ + kind: "", + content: ~s("""), + line: 2, + col: 0, + multiline: true, + quotes: :double + }, + %Token{kind: "", content: "\n", line: 2, col: 3} + ] + + kinds = Enum.map(tokens, &TokenProtocol.kind/1) + assert kinds == ["", "", "", ""] + end + end +end diff --git a/test/codeqa/ast/nodes/code_node_test.exs b/test/codeqa/ast/nodes/code_node_test.exs new file mode 100644 index 0000000..f161ad8 --- /dev/null +++ b/test/codeqa/ast/nodes/code_node_test.exs @@ -0,0 +1,55 @@ +defmodule CodeQA.AST.Nodes.CodeNodeTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Nodes.{CodeNode, DocNode} + alias CodeQA.AST.Classification.NodeProtocol + + @tokens [:a, :b, :c] + + describe "CodeNode" do + setup do + node = %CodeNode{ + tokens: @tokens, + line_count: 2, + children: [], + start_line: 1, + end_line: 2, + label: "f.ex:1" + } + + %{node: node} + end + + test "implements NodeProtocol", %{node: node} do + assert NodeProtocol.tokens(node) == @tokens + assert NodeProtocol.line_count(node) == 2 + assert NodeProtocol.children(node) == [] + assert NodeProtocol.start_line(node) == 1 + assert NodeProtocol.end_line(node) == 2 + assert NodeProtocol.label(node) == "f.ex:1" + end + + test "all common fields default to nil except children" do + node = %CodeNode{tokens: [], line_count: 0, children: []} + assert NodeProtocol.start_line(node) == nil + assert NodeProtocol.end_line(node) == nil + assert NodeProtocol.label(node) == nil + end + end + + describe "DocNode" do + test "implements NodeProtocol" do + node = %DocNode{ + tokens: @tokens, + line_count: 1, + children: [], + start_line: 5, + end_line: 5, + label: nil + } + + assert NodeProtocol.tokens(node) == @tokens + assert NodeProtocol.children(node) == [] + end + end +end diff --git a/test/codeqa/ast/nodes/function_node_test.exs b/test/codeqa/ast/nodes/function_node_test.exs new file mode 100644 index 0000000..029a2a7 --- /dev/null +++ b/test/codeqa/ast/nodes/function_node_test.exs @@ -0,0 +1,68 @@ +defmodule CodeQA.AST.Nodes.FunctionNodeTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Nodes.{FunctionNode, ModuleNode} + alias CodeQA.AST.Classification.NodeProtocol + + describe "FunctionNode" do + setup do + node = %FunctionNode{ + tokens: [:a], + line_count: 5, + children: [], + start_line: 10, + end_line: 14, + label: "foo.ex:10", + name: "calculate", + arity: 2, + visibility: :public + } + + %{node: node} + end + + test "implements NodeProtocol", %{node: node} do + assert NodeProtocol.tokens(node) == [:a] + assert NodeProtocol.line_count(node) == 5 + assert NodeProtocol.start_line(node) == 10 + end + + test "specific fields are accessible", %{node: node} do + assert node.name == "calculate" + assert node.arity == 2 + assert node.visibility == :public + end + + test "specific fields default to nil" do + node = %FunctionNode{tokens: [], line_count: 0, children: []} + assert node.name == nil + assert node.arity == nil + assert node.visibility == nil + end + end + + describe "ModuleNode" do + test "implements NodeProtocol" do + node = %ModuleNode{ + tokens: [:m], + line_count: 20, + children: [], + start_line: 1, + end_line: 20, + label: nil, + name: "MyApp.Foo", + kind: :module + } + + assert NodeProtocol.tokens(node) == [:m] + assert node.name == "MyApp.Foo" + assert node.kind == :module + end + + test "specific fields default to nil" do + node = %ModuleNode{tokens: [], line_count: 0, children: []} + assert node.name == nil + assert node.kind == nil + end + end +end diff --git a/test/codeqa/ast/nodes/import_node_test.exs b/test/codeqa/ast/nodes/import_node_test.exs new file mode 100644 index 0000000..16c6771 --- /dev/null +++ b/test/codeqa/ast/nodes/import_node_test.exs @@ -0,0 +1,74 @@ +defmodule CodeQA.AST.Nodes.ImportNodeTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Nodes.{ImportNode, AttributeNode, TestNode} + alias CodeQA.AST.Classification.NodeProtocol + + describe "ImportNode" do + test "implements NodeProtocol" do + node = %ImportNode{ + tokens: [:i], + line_count: 1, + children: [], + start_line: 3, + end_line: 3, + label: nil, + target: "MyApp.Repo" + } + + assert NodeProtocol.tokens(node) == [:i] + assert node.target == "MyApp.Repo" + end + + test "target defaults to nil" do + node = %ImportNode{tokens: [], line_count: 0, children: []} + assert node.target == nil + end + end + + describe "AttributeNode" do + test "implements NodeProtocol" do + node = %AttributeNode{ + tokens: [:a], + line_count: 1, + children: [], + start_line: 2, + end_line: 2, + label: nil, + name: "moduledoc", + kind: :annotation + } + + assert NodeProtocol.tokens(node) == [:a] + assert node.name == "moduledoc" + assert node.kind == :annotation + end + + test "supports :typespec kind" do + node = %AttributeNode{tokens: [], line_count: 0, children: [], kind: :typespec} + assert node.kind == :typespec + end + end + + describe "TestNode" do + test "implements NodeProtocol" do + node = %TestNode{ + tokens: [:t], + line_count: 4, + children: [], + start_line: 10, + end_line: 13, + label: nil, + description: "returns the sum" + } + + assert NodeProtocol.tokens(node) == [:t] + assert node.description == "returns the sum" + end + + test "description defaults to nil" do + node = %TestNode{tokens: [], line_count: 0, children: []} + assert node.description == nil + end + end +end diff --git a/test/codeqa/ast/parsing/parser_languages_test.exs b/test/codeqa/ast/parsing/parser_languages_test.exs new file mode 100644 index 0000000..e2f3040 --- /dev/null +++ b/test/codeqa/ast/parsing/parser_languages_test.exs @@ -0,0 +1,168 @@ +defmodule CodeQA.AST.Parsing.ParserLanguagesTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Language + alias CodeQA.Languages.Unknown + + Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false) + + # Elixir fixtures + use Test.Fixtures.Elixir.Calculator + use Test.Fixtures.Elixir.EventBus + use Test.Fixtures.Elixir.RateLimiter + + # Python fixtures + use Test.Fixtures.Python.Calculator + use Test.Fixtures.Python.CsvPipeline + use Test.Fixtures.Python.ConfigParser + + # JavaScript fixtures + use Test.Fixtures.JavaScript.Calculator + use Test.Fixtures.JavaScript.FormValidator + use Test.Fixtures.JavaScript.ShoppingCart + + # Go fixtures + use Test.Fixtures.Go.Calculator + use Test.Fixtures.Go.HttpMiddleware + use Test.Fixtures.Go.CliParser + + # Rust fixtures + use Test.Fixtures.Rust.Calculator + use Test.Fixtures.Rust.Tokenizer + use Test.Fixtures.Rust.RingBuffer + + # Ruby fixtures + use Test.Fixtures.Ruby.Calculator + use Test.Fixtures.Ruby.OrmLite + use Test.Fixtures.Ruby.MarkdownRenderer + + # TypeScript fixtures + use Test.Fixtures.TypeScript.UserProfileStore + use Test.Fixtures.TypeScript.EventEmitter + use Test.Fixtures.TypeScript.DependencyInjection + + # Java fixtures + use Test.Fixtures.Java.BuilderPattern + use Test.Fixtures.Java.RepositoryPattern + use Test.Fixtures.Java.StrategyPattern + + # C# fixtures + use Test.Fixtures.CSharp.LinqPipeline + use Test.Fixtures.CSharp.AsyncTaskManager + use Test.Fixtures.CSharp.PluginSystem + + # Swift fixtures + use Test.Fixtures.Swift.ResultType + use Test.Fixtures.Swift.CombineStream + use Test.Fixtures.Swift.ActorModel + + # Kotlin fixtures + use Test.Fixtures.Kotlin.SealedState + use Test.Fixtures.Kotlin.CoroutineFlow + use Test.Fixtures.Kotlin.ExtensionLibrary + + # C++ fixtures + use Test.Fixtures.Cpp.SmartPointer + use Test.Fixtures.Cpp.TemplateContainer + use Test.Fixtures.Cpp.ObserverPattern + + # Scala fixtures + use Test.Fixtures.Scala.CaseClassAlgebra + use Test.Fixtures.Scala.TypeclassPattern + use Test.Fixtures.Scala.ActorMessages + + # Dart fixtures + use Test.Fixtures.Dart.WidgetState + use Test.Fixtures.Dart.FuturesAsync + use Test.Fixtures.Dart.MixinComposition + + # Zig fixtures + use Test.Fixtures.Zig.AllocatorInterface + use Test.Fixtures.Zig.TaggedUnion + use Test.Fixtures.Zig.IteratorProtocol + + # Lua fixtures + use Test.Fixtures.Lua.ClassSystem + use Test.Fixtures.Lua.EventSystem + use Test.Fixtures.Lua.StateMachine + + # Note: accumulate: true prepends, so Enum.at(0) is the LAST registered fixture. + # All @code values use 0 leading spaces, so @indentation_level will always be 0 + # and the normalization branch below is never taken. + @indentation_level @fixture + |> Enum.at(0) + |> elem(1) + |> String.split("\n") + |> List.first() + |> then(&Regex.run(~r/^\s*/, &1)) + |> List.first() + |> String.length() + + @normalized_fixtures for {language, code, block_assertions} <- @fixture, + do: + {language, + if @indentation_level > 0 do + code + |> String.split("\n") + |> Enum.map_join( + "\n", + &String.replace_leading( + &1, + String.duplicate(" ", @indentation_level), + "" + ) + ) + else + code + end, block_assertions} + + defp blocks(code, lang_mod \\ CodeQA.Languages.Unknown) do + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + end + + defp children(code, lang_mod \\ CodeQA.Languages.Unknown) do + code + |> TokenNormalizer.normalize_structural() + |> Parser.detect_blocks(lang_mod) + |> Enum.flat_map(& &1.children) + end + + describe "blocks/2" do + for {language, code, _block_assertions} <- @normalized_fixtures do + lang_name = language |> String.split() |> hd() + lang_mod = Language.find(lang_name) + + test "detects at least 3 blocks for #{language} code" do + lang_mod = unquote(lang_mod) + result = blocks(unquote(code), lang_mod) + + if unquote(lang_mod) == Unknown do + assert length(result) >= 1 + else + assert length(result) >= 3 + end + end + + test "detects at least 3 sub-blocks for #{language} code" do + lang_mod = unquote(lang_mod) + result = children(unquote(code), lang_mod) + + if unquote(lang_mod) == Unknown do + assert length(result) >= 0 + else + assert length(result) >= 3 + end + end + + test "detects less sub-blocks than line-numbers for #{language} code" do + lang_mod = unquote(lang_mod) + result = children(unquote(code), lang_mod) + assert length(result) < length(String.split(unquote(code), "\n")) + end + end + end +end diff --git a/test/codeqa/ast/parsing/parser_test.exs b/test/codeqa/ast/parsing/parser_test.exs new file mode 100644 index 0000000..c37a8a8 --- /dev/null +++ b/test/codeqa/ast/parsing/parser_test.exs @@ -0,0 +1,188 @@ +defmodule CodeQA.AST.Parsing.ParserTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Unknown + + defp tokenize(code), do: TokenNormalizer.normalize_structural(code) + + describe "detect_blocks/2" do + test "single block for file with no blank lines" do + tokens = tokenize("def foo\n x = 1\nend\n") + blocks = Parser.detect_blocks(tokens, ElixirLang) + assert length(blocks) == 1 + end + + test "splits into two blocks at blank line" do + tokens = tokenize("def foo\n x\nend\n\n\ndef bar\n y\nend\n") + blocks = Parser.detect_blocks(tokens, ElixirLang) + assert length(blocks) == 2 + end + + test "each block has correct line_count" do + tokens = tokenize("def foo\n x\nend\n\n\ndef bar\n y\nend\n") + [b1, b2] = Parser.detect_blocks(tokens, ElixirLang) + assert b1.line_count >= 3 + assert b2.line_count >= 3 + end + + test "empty input returns empty list" do + assert Parser.detect_blocks([], Unknown) == [] + end + + test "detects bracket sub-blocks" do + tokens = tokenize("foo(a, b)\nbar(c)\n") + [block] = Parser.detect_blocks(tokens, Unknown) + assert block.children != [] + end + + test "detects colon-indent sub-blocks for python language hint" do + tokens = tokenize("def foo:\n return 1\n") + [block] = Parser.detect_blocks(tokens, Python) + assert block.children != [] + end + + test "fewer sub-blocks without python hint than with it (colon rule not applied)" do + tokens = tokenize("def foo:\n return 1\n") + without_hint = Parser.detect_blocks(tokens, Unknown) + with_hint = Parser.detect_blocks(tokens, Python) + count_without = without_hint |> Enum.map(&length(&1.children)) |> Enum.sum() + count_with = with_hint |> Enum.map(&length(&1.children)) |> Enum.sum() + assert count_with >= count_without + end + + test "block has children_count accessible via Node.children_count/1" do + alias CodeQA.AST.Enrichment.Node + tokens = tokenize("foo(a)\nbar(b)\n") + [block] = Parser.detect_blocks(tokens, Unknown) + assert Node.children_count(block) == length(block.children) + end + end + + describe "recursive sub-block nesting" do + test "nested bracket calls produce a multi-level sub-block tree" do + # def foo(bar(x, y), baz) — the arg list contains another call with its own args + tokens = tokenize("def foo(bar(x, y), baz)\n result\nend\n") + [block] = Parser.detect_blocks(tokens, Unknown) + + # depth 1 — the outer argument list + args = + Enum.find(block.children, fn b -> + Enum.any?(b.tokens, &(&1.content == "bar")) + end) + + assert args != nil, "expected an arg-list sub-block containing 'bar'" + + # depth 2 — the inner call (x, y) inside bar(...) + inner = + Enum.find(args.children, fn b -> + Enum.any?(b.tokens, &(&1.content == "x")) + end) + + assert inner != nil, "expected a sub-block for the inner call (x, y)" + + # depth 3 — (x, y) is a leaf: no further bracket structure inside + assert inner.children == [] + end + + test "triply nested brackets produce three levels of sub-blocks" do + tokens = tokenize("def outer(inner(deep(value)))\n :ok\nend\n") + [block] = Parser.detect_blocks(tokens, Unknown) + + # depth 1: (inner(deep(value))) + d1 = + Enum.find(block.children, fn b -> + Enum.any?(b.tokens, &(&1.content == "inner")) + end) + + assert d1 != nil + + # depth 2: (deep(value)) + d2 = + Enum.find(d1.children, fn b -> + Enum.any?(b.tokens, &(&1.content == "deep")) + end) + + assert d2 != nil + + # depth 3: (value) — leaf + d3 = + Enum.find(d2.children, fn b -> + Enum.any?(b.tokens, &(&1.content == "value")) + end) + + assert d3 != nil + assert d3.children == [] + end + end + + describe "triple-quote protection" do + test "blank lines inside a heredoc do not create a new block" do + code = """ + before + + + \""" + Some doc. + + More doc. + \""" + + after + """ + + tokens = TokenNormalizer.normalize_structural(code) + blocks = Parser.detect_blocks(tokens, Unknown) + # The heredoc (including its blank line) should be ONE block, not split + heredoc_block = + Enum.find(blocks, fn b -> + Enum.any?(b.tokens, &(&1.kind == "")) + end) + + assert heredoc_block != nil + # Ensure no split happened inside — the heredoc block contains both "Some" and "More" + contents = Enum.filter(heredoc_block.tokens, &(&1.kind == "")) + names = Enum.map(contents, & &1.content) + assert "Some" in names + assert "More" in names + end + + test "content before and after a heredoc becomes separate blocks" do + code = """ + def foo do + :ok + end + + + \""" + doc here + \""" + + + def bar do + :ok + end + """ + + tokens = TokenNormalizer.normalize_structural(code) + blocks = Parser.detect_blocks(tokens, Unknown) + # Expect exactly 3 blocks: code-before, heredoc, code-after + assert length(blocks) == 3 + assert Enum.any?(Enum.at(blocks, 0).tokens, &(&1.content == "foo")) + assert Enum.any?(Enum.at(blocks, 1).tokens, &(&1.kind == "")) + assert Enum.any?(Enum.at(blocks, 2).tokens, &(&1.content == "bar")) + end + end + + describe "language_from_path/1" do + test "returns :python for .py files" do + assert Parser.language_from_path("lib/foo.py") == :python + end + + test "returns :unknown for unknown extensions" do + assert Parser.language_from_path("lib/foo.xyz") == :unknown + end + end +end diff --git a/test/codeqa/ast/parsing/signal_registry_test.exs b/test/codeqa/ast/parsing/signal_registry_test.exs new file mode 100644 index 0000000..f0c0788 --- /dev/null +++ b/test/codeqa/ast/parsing/signal_registry_test.exs @@ -0,0 +1,33 @@ +defmodule CodeQA.AST.Parsing.SignalRegistryTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Parsing.SignalRegistry + + test "new/0 returns empty registry" do + r = SignalRegistry.new() + assert r.structural == [] + assert r.classification == [] + end + + test "register_structural/2 appends signal" do + alias CodeQA.AST.Signals.Structural.BlankLineSignal + r = SignalRegistry.new() |> SignalRegistry.register_structural(%BlankLineSignal{}) + assert length(r.structural) == 1 + end + + test "register_classification/2 appends signal" do + alias CodeQA.AST.Signals.Classification.FunctionSignal + r = SignalRegistry.new() |> SignalRegistry.register_classification(%FunctionSignal{}) + assert length(r.classification) == 1 + end + + test "default/0 includes all built-in signals" do + r = SignalRegistry.default() + assert length(r.structural) >= 4 + assert length(r.classification) >= 6 + end + + test "default/0 has exactly 10 classification signals" do + r = SignalRegistry.default() + assert length(r.classification) == 10 + end +end diff --git a/test/codeqa/ast/parsing/signal_stream_test.exs b/test/codeqa/ast/parsing/signal_stream_test.exs new file mode 100644 index 0000000..20c9226 --- /dev/null +++ b/test/codeqa/ast/parsing/signal_stream_test.exs @@ -0,0 +1,43 @@ +defmodule CodeQA.AST.SignalStreamTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.Token + alias CodeQA.Support.CounterSignal + + defp tok(kind, content), do: %Token{kind: kind, content: content, line: 1, col: 0} + + test "returns one emission list per signal" do + tokens = [tok("", "foo"), tok("", "\n"), tok("", "bar")] + results = SignalStream.run(tokens, [%CounterSignal{}], []) + assert length(results) == 1 + end + + test "emissions list contains all emitted values from the signal" do + tokens = [tok("", "foo"), tok("", "\n"), tok("", "bar")] + + [ + [ + {CodeQA.Support.CounterSignal, :test, :id_seen, 0}, + {CodeQA.Support.CounterSignal, :test, :id_seen, 2} + ] + ] = + SignalStream.run(tokens, [%CounterSignal{}], []) + end + + test "non-emitting tokens produce no entries" do + tokens = [tok("", "\n"), tok("", "\n")] + [[]] = SignalStream.run(tokens, [%CounterSignal{}], []) + end + + test "multiple signals run independently" do + tokens = [tok("", "x")] + results = SignalStream.run(tokens, [%CounterSignal{}, %CounterSignal{}], []) + assert length(results) == 2 + end + + test "empty token stream returns empty emissions per signal" do + results = SignalStream.run([], [%CounterSignal{}], []) + assert results == [[]] + end +end diff --git a/test/codeqa/ast/parsing/signal_test.exs b/test/codeqa/ast/parsing/signal_test.exs new file mode 100644 index 0000000..47d72ad --- /dev/null +++ b/test/codeqa/ast/parsing/signal_test.exs @@ -0,0 +1,56 @@ +defmodule CodeQA.AST.SignalTest do + use ExUnit.Case, async: true + + defmodule TestSignal do + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: TestSignal + def group(_), do: :split + def init(_, _opts), do: %{count: 0} + + def emit(_, _token, state) do + new_state = %{state | count: state.count + 1} + {MapSet.new([{:tick, state.count}]), new_state} + end + end + end + + defmodule SilentSignal do + defstruct [] + + defimpl CodeQA.AST.Parsing.Signal do + def source(_), do: SilentSignal + def group(_), do: :split + def init(_, _), do: %{} + def emit(_, _token, state), do: {MapSet.new(), state} + end + end + + alias CodeQA.AST.Parsing.Signal + + test "source returns the implementing module" do + assert Signal.source(%TestSignal{}) == TestSignal + end + + test "group returns the signal's group atom" do + assert Signal.group(%TestSignal{}) == :split + end + + test "init returns initial state" do + assert Signal.init(%TestSignal{}, []) == %{count: 0} + end + + test "emit returns {MapSet of {name, value} pairs, new_state}" do + token = %CodeQA.AST.Lexing.Token{kind: "", content: "foo", line: 1, col: 0} + {emissions, new_state} = Signal.emit(%TestSignal{}, token, %{count: 0}) + assert MapSet.member?(emissions, {:tick, 0}) + assert new_state == %{count: 1} + end + + test "emit may return empty MapSet for no emission" do + token = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 0} + {emissions, _state} = Signal.emit(%SilentSignal{}, token, %{}) + assert MapSet.size(emissions) == 0 + end +end diff --git a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs new file mode 100644 index 0000000..71069d7 --- /dev/null +++ b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs @@ -0,0 +1,46 @@ +defmodule CodeQA.AST.Signals.Classification.CommentDensitySignalTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Signals.Classification.CommentDensitySignal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Unknown + + defp run(tokens, lang_mod \\ Unknown), + do: SignalStream.run(tokens, [%CommentDensitySignal{}], lang_mod) |> List.flatten() + + defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} + defp nl, do: %{kind: "", content: "\n", line: 1, col: 0} + defp on_line(tokens, line), do: Enum.map(tokens, &%{&1 | line: line}) + + test "votes comment when >60% of lines start with #" do + tokens = + on_line([t("#"), t("license")], 1) ++ + [nl()] ++ + on_line([t("#"), t("copyright")], 2) ++ + [nl()] ++ + on_line([t("#"), t("author")], 3) ++ + [nl()] ++ + on_line([t("def"), t("foo")], 4) + + emissions = run(tokens, Python) + assert [{CommentDensitySignal, :classification, :comment_vote, _}] = emissions + end + + test "does not vote when comment density is low" do + tokens = + on_line([t("def"), t("foo")], 1) ++ + [nl()] ++ + on_line([t("#"), t("note")], 2) + + assert run(tokens, Python) == [] + end + + test "does not vote when no comment_prefixes provided" do + tokens = + on_line([t("#"), t("comment")], 1) ++ + [nl()] ++ + on_line([t("#"), t("comment")], 2) + + assert run(tokens, Unknown) == [] + end +end diff --git a/test/codeqa/ast/signals/classification/config_signal_test.exs b/test/codeqa/ast/signals/classification/config_signal_test.exs new file mode 100644 index 0000000..5c63672 --- /dev/null +++ b/test/codeqa/ast/signals/classification/config_signal_test.exs @@ -0,0 +1,28 @@ +defmodule CodeQA.AST.Signals.Classification.ConfigSignalTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Signals.Classification.ConfigSignal + alias CodeQA.AST.Parsing.SignalStream + + defp run(tokens), do: SignalStream.run(tokens, [%ConfigSignal{}], []) |> List.flatten() + defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} + + test "emits config_vote for 'config' keyword at indent 0" do + emissions = run([t("config"), t(":app"), t(","), t("key:"), t("val")]) + assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions + end + + test "emits config_vote for 'configure' keyword" do + emissions = run([t("configure")]) + assert [{ConfigSignal, :classification, :config_vote, 3}] = emissions + end + + test "does not emit when indented" do + emissions = run([t("", ""), t("config")]) + assert emissions == [] + end + + test "does not emit for 'config' inside brackets" do + tokens = [t("(", "("), t("config"), t(")", ")")] + assert run(tokens) == [] + end +end diff --git a/test/codeqa/ast/signals/classification/data_signal_test.exs b/test/codeqa/ast/signals/classification/data_signal_test.exs new file mode 100644 index 0000000..c537f95 --- /dev/null +++ b/test/codeqa/ast/signals/classification/data_signal_test.exs @@ -0,0 +1,28 @@ +defmodule CodeQA.AST.Signals.Classification.DataSignalTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Signals.Classification.DataSignal + alias CodeQA.AST.Parsing.SignalStream + + defp run(tokens), do: SignalStream.run(tokens, [%DataSignal{}], []) |> List.flatten() + + defp t(content, kind), do: %{kind: kind, content: content, line: 1, col: 0} + defp str(v), do: t(v, "") + defp num(v), do: t(v, "") + defp id(v), do: t(v, "") + + test "votes data for high-literal token stream" do + tokens = [str("foo"), str("bar"), num("1"), num("2"), id("key")] + emissions = run(tokens) + assert [{DataSignal, :classification, :data_vote, _}] = emissions + end + + test "does not vote when control-flow keyword present" do + tokens = [str("foo"), id("if"), str("bar")] + assert run(tokens) == [] + end + + test "does not vote when literal ratio is low" do + tokens = [id("foo"), id("bar"), id("baz"), str("one")] + assert run(tokens) == [] + end +end diff --git a/test/codeqa/ast/signals/classification/type_signal_test.exs b/test/codeqa/ast/signals/classification/type_signal_test.exs new file mode 100644 index 0000000..96811e6 --- /dev/null +++ b/test/codeqa/ast/signals/classification/type_signal_test.exs @@ -0,0 +1,40 @@ +defmodule CodeQA.AST.Signals.Classification.TypeSignalTest do + use ExUnit.Case, async: true + alias CodeQA.AST.Signals.Classification.TypeSignal + alias CodeQA.AST.Parsing.SignalStream + + defp run(tokens), do: SignalStream.run(tokens, [%TypeSignal{}], []) |> List.flatten() + + defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} + + test "emits type_vote weight 3 for @type at indent 0" do + emissions = run([t("@", "@"), t("type"), t("t"), t("::"), t("integer")]) + assert [{TypeSignal, :classification, :type_vote, 3}] = emissions + end + + test "emits type_vote for @typep" do + emissions = run([t("@", "@"), t("typep"), t("t"), t("::")]) + assert [{TypeSignal, :classification, :type_vote, 3}] = emissions + end + + test "emits type_vote for @opaque" do + emissions = run([t("@", "@"), t("opaque"), t("t"), t("::")]) + assert [{TypeSignal, :classification, :type_vote, 3}] = emissions + end + + test "does not emit for @spec" do + emissions = run([t("@", "@"), t("spec"), t("foo"), t("()")]) + assert emissions == [] + end + + test "does not emit for @type inside indented block" do + emissions = run([t("", ""), t("@", "@"), t("type"), t("t")]) + assert emissions == [] + end + + test "emits at most one vote" do + tokens = [t("@", "@"), t("type"), t("a"), t("", ""), t("@", "@"), t("typep"), t("b")] + emissions = run(tokens) + assert length(emissions) == 1 + end +end diff --git a/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs new file mode 100644 index 0000000..5300881 --- /dev/null +++ b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs @@ -0,0 +1,49 @@ +defmodule CodeQA.AST.Signals.Structural.AccessModifierSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.AccessModifierSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Java + + defp split_values(code, lang_mod) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%AccessModifierSignal{}], lang_mod) + for {_src, :split, :access_modifier_split, v} <- emissions, do: v + end + + test "no split for first modifier (seen_content == false)" do + assert split_values("public void foo() {}\n", Java) == [] + end + + test "emits split at second public modifier after content" do + splits = split_values("public void foo() {}\npublic void bar() {}\n", Java) + assert length(splits) == 1 + end + + test "emits split at private modifier after content" do + splits = split_values("public void foo() {}\nprivate void bar() {}\n", Java) + assert length(splits) == 1 + end + + test "does not split when modifier is inside brackets" do + splits = split_values("public void foo(private int x) {}\n", Java) + assert splits == [] + end + + test "does not split on identifier that matches modifier but is not at line start" do + splits = split_values("public void foo() {}\nfoo.public.bar()\n", Java) + assert splits == [] + end + + test "works at indent > 0 (unlike KeywordSignal)" do + # Two indented public declarations, no enclosing brackets — should split + splits = split_values(" public void foo() {}\n public void bar() {}\n", Java) + assert length(splits) == 1 + end + + test "group is :split" do + assert Signal.group(%AccessModifierSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs new file mode 100644 index 0000000..534b735 --- /dev/null +++ b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs @@ -0,0 +1,84 @@ +defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.AssignmentFunctionSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp split_indices(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%AssignmentFunctionSignal{}], []) + for {_src, :split, :assignment_function_split, v} <- emissions, do: v + end + + test "emits split for identifier = function() pattern (second in file)" do + code = """ + const first = function() {} + const foo = function() {} + """ + + splits = split_indices(code) + assert length(splits) == 1 + end + + test "emits split for arrow function pattern: bar = () => {}" do + code = """ + const first = function() {} + const bar = () => {} + """ + + splits = split_indices(code) + assert length(splits) == 1 + end + + test "emits split for async function pattern: baz = async function() {}" do + code = """ + const first = function() {} + const baz = async function() {} + """ + + splits = split_indices(code) + assert length(splits) == 1 + end + + test "does NOT emit for the first assignment in file (seen_content == false)" do + code = "const foo = function() {}\n" + splits = split_indices(code) + assert splits == [] + end + + test "does NOT emit for plain assignment: x = 1" do + code = """ + const first = function() {} + x = 1 + """ + + splits = split_indices(code) + assert splits == [] + end + + test "does NOT emit when identifier is indented (indent > 0)" do + code = """ + const first = function() {} + foo = function() {} + """ + + splits = split_indices(code) + assert splits == [] + end + + test "emits split for module.exports = function() pattern" do + code = """ + const first = function() {} + module.exports = function() {} + """ + + splits = split_indices(code) + assert length(splits) == 1 + end + + test "group/1 returns :split" do + assert Signal.group(%AssignmentFunctionSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/blank_line_signal_test.exs b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs new file mode 100644 index 0000000..464ca46 --- /dev/null +++ b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs @@ -0,0 +1,36 @@ +defmodule CodeQA.AST.Signals.Structural.BlankLineSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.BlankLineSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + + defp split_values(code, lang_mod) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%BlankLineSignal{}], lang_mod) + for {_src, :split, :blank_split, v} <- emissions, do: v + end + + test "no splits for single block" do + assert split_values("def foo\n x\nend\n", ElixirLang) == [] + end + + test "emits split after blank line following block-end token" do + splits = split_values("def foo\n x\nend\n\n\ndef bar\n y\nend\n", ElixirLang) + assert length(splits) == 1 + end + + test "no split when blank line does not follow block-end token" do + assert split_values("x = 1\n\n\ny = 2\n", ElixirLang) == [] + end + + test "group is :split" do + assert Signal.group(%BlankLineSignal{}) == :split + end + + test "source is BlankLineSignal" do + assert Signal.source(%BlankLineSignal{}) == BlankLineSignal + end +end diff --git a/test/codeqa/ast/signals/structural/bracket_signal_test.exs b/test/codeqa/ast/signals/structural/bracket_signal_test.exs new file mode 100644 index 0000000..4159b6b --- /dev/null +++ b/test/codeqa/ast/signals/structural/bracket_signal_test.exs @@ -0,0 +1,43 @@ +defmodule CodeQA.AST.Signals.Structural.BracketSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.BracketSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp enclosure_values(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%BracketSignal{}], []) + for {_src, :enclosure, :bracket_enclosure, v} <- emissions, do: v + end + + test "no enclosures for code without brackets" do + assert enclosure_values("foo\n") == [] + end + + test "emits enclosure for a single bracketed expression" do + enclosures = enclosure_values("foo(a, b)\n") + assert length(enclosures) == 1 + end + + test "emits only outermost enclosure for nested brackets" do + enclosures = enclosure_values("foo(bar(x))\n") + assert length(enclosures) == 1 + end + + test "enclosure value is {start_idx, end_idx} tuple" do + [{start, stop}] = enclosure_values("foo(a)\n") + assert is_integer(start) + assert is_integer(stop) + assert stop > start + end + + test "mismatched closing bracket is silently skipped" do + assert enclosure_values("foo)\n") == [] + end + + test "group is :enclosure" do + assert Signal.group(%BracketSignal{}) == :enclosure + end +end diff --git a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs new file mode 100644 index 0000000..7a7eb94 --- /dev/null +++ b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs @@ -0,0 +1,93 @@ +defmodule CodeQA.AST.Signals.Structural.BranchSplitSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.BranchSplitSignal + alias CodeQA.AST.Parsing.{Signal, SignalStream} + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Code.Scripting.Ruby + alias CodeQA.Languages.Code.Scripting.PHP + alias CodeQA.Languages.Code.Vm.Java + + defp split_values(code, lang_mod) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%BranchSplitSignal{}], lang_mod) + for {_src, :branch_split, :branch_split, v} <- emissions, do: v + end + + test "group is :branch_split" do + assert Signal.group(%BranchSplitSignal{}) == :branch_split + end + + test "no split for code with no branch keywords" do + assert split_values("x = 1\ny = 2\n", ElixirLang) == [] + end + + test "emits split at else after seen content" do + splits = split_values("if x do\n :a\nelse\n :b\nend\n", ElixirLang) + assert length(splits) == 1 + end + + test "emits split at elif" do + splits = split_values("if x:\n pass\nelif y:\n pass\n", Python) + assert length(splits) == 1 + end + + test "emits split at multiple branch keywords" do + splits = split_values("if x do\n :a\nelsif y\n :b\nelse\n :c\nend\n", Ruby) + assert length(splits) == 2 + end + + test "does not emit at first keyword (no seen_content yet)" do + splits = split_values("if x do\n :a\nend\n", ElixirLang) + assert splits == [] + end + + test "does not emit when keyword is inside brackets" do + splits = split_values("foo(if x do 1 else 2 end)\n", ElixirLang) + assert splits == [] + end + + test "emits split at rescue" do + splits = split_values("try do\n :ok\nrescue\n _ -> :error\nend\n", ElixirLang) + assert length(splits) == 1 + end + + test "emits split at cond branch" do + splits = split_values("x = 1\ncond do\n x -> :a\nend\n", ElixirLang) + assert length(splits) == 1 + end + + test "emits split at except (Python)" do + splits = split_values("try:\n pass\nexcept ValueError:\n pass\n", Python) + assert length(splits) == 1 + end + + test "emits split at ensure (Elixir)" do + splits = + split_values( + "try do\n :ok\nrescue\n _ -> :error\nensure\n cleanup()\nend\n", + ElixirLang + ) + + assert length(splits) == 2 + end + + test "emits split at elseif (PHP)" do + splits = split_values("if x then\n :a\nelseif y then\n :b\nend\n", PHP) + assert length(splits) == 1 + end + + test "emits split at case label (switch body)" do + splits = + split_values("switch x\n case 1:\n :a\n case 2:\n :b\nend\n", Java) + + assert length(splits) >= 1 + end + + test "emits split at when keyword" do + splits = split_values("x = 1\nwhen x > 0 do\n :pos\nend\n", ElixirLang) + assert length(splits) == 1 + end +end diff --git a/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs new file mode 100644 index 0000000..484edf6 --- /dev/null +++ b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs @@ -0,0 +1,29 @@ +defmodule CodeQA.AST.Signals.Structural.ColonIndentSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.ColonIndentSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Scripting.Python + + defp enclosure_values(code, lang_mod \\ Python) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%ColonIndentSignal{}], lang_mod) + for {_src, :enclosure, :colon_indent_enclosure, v} <- emissions, do: v + end + + test "no enclosures for non-python language" do + assert enclosure_values("def foo:\n return 1\n", CodeQA.Languages.Unknown) == + [] + end + + test "emits enclosure for colon-indented block in python" do + enclosures = enclosure_values("def foo:\n return 1\n") + assert enclosures != [] + end + + test "group is :enclosure" do + assert Signal.group(%ColonIndentSignal{}) == :enclosure + end +end diff --git a/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs new file mode 100644 index 0000000..329c910 --- /dev/null +++ b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs @@ -0,0 +1,52 @@ +defmodule CodeQA.AST.Signals.Structural.CommentDividerSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.CommentDividerSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Code.Vm.Java + alias CodeQA.Languages.Data.Sql + + defp split_values(code, lang_mod) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%CommentDividerSignal{}], lang_mod) + for {_src, :split, :comment_divider_split, v} <- emissions, do: v + end + + test "no split for first divider comment (seen_content == false at start of file)" do + assert split_values("# ---\n", ElixirLang) == [] + end + + test "emits split at # --- after prior content" do + splits = split_values("x = 1\n# ---\ny = 2\n", ElixirLang) + assert length(splits) == 1 + end + + test "emits split at // === after prior content" do + splits = split_values("x = 1\n// ===\ny = 2\n", Java) + assert length(splits) == 1 + end + + test "emits split at -- --- after prior content (SQL style)" do + splits = split_values("x = 1\n-- ---\ny = 2\n", Sql) + assert length(splits) == 1 + end + + test "does NOT emit for # followed by identifier (real comment)" do + assert split_values("x = 1\n# This is a real comment\n", ElixirLang) == [] + end + + test "does NOT emit when # is not at line start" do + assert split_values("x = 1\nx # ---\n", ElixirLang) == [] + end + + test "does NOT emit for indented divider comment (inside a block)" do + assert split_values("x = 1\n # ---\n", ElixirLang) == [] + end + + test "group is :split" do + assert Signal.group(%CommentDividerSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/decorator_signal_test.exs b/test/codeqa/ast/signals/structural/decorator_signal_test.exs new file mode 100644 index 0000000..b1474d7 --- /dev/null +++ b/test/codeqa/ast/signals/structural/decorator_signal_test.exs @@ -0,0 +1,47 @@ +defmodule CodeQA.AST.Signals.Structural.DecoratorSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.DecoratorSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp split_values(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%DecoratorSignal{}], []) + for {_src, :split, :decorator_split, v} <- emissions, do: v + end + + test "no split for first @ (seen_content == false at start of file)" do + assert split_values("@decorator\ndef foo() {}\n") == [] + end + + test "emits split at second @decorator after content" do + splits = split_values("@decorator\ndef foo() {}\n@decorator\ndef bar() {}\n") + assert length(splits) == 1 + end + + test "does not emit when @ is inside brackets" do + splits = split_values("@decorator\ndef foo(@param x) {}\n") + assert splits == [] + end + + test "does not emit when @ is not at line start (mid-expression)" do + splits = split_values("@decorator\ndef foo() { x@y }\n") + assert splits == [] + end + + test "emits split for Rust #[ pattern at line start after content" do + splits = split_values("#[derive(Debug)]\nstruct Foo {}\n#[derive(Clone)]\nstruct Bar {}\n") + assert length(splits) == 1 + end + + test "does not emit for # at line start when next token is not [" do + splits = split_values("@decorator\ndef foo() {}\n# comment\ndef bar() {}\n") + assert splits == [] + end + + test "group is :split" do + assert Signal.group(%DecoratorSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs new file mode 100644 index 0000000..749e165 --- /dev/null +++ b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs @@ -0,0 +1,55 @@ +defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.DedentToZeroSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp split_count(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%DedentToZeroSignal{}], []) + length(for {_src, :split, :dedent_split, _v} <- emissions, do: true) + end + + test "no split in a single flat block (no indentation change)" do + code = "foo\nbar\nbaz\n" + assert split_count(code) == 0 + end + + test "emits split when first token of a new line at indent 0 after indented content" do + code = "def foo:\n return 1\ndef bar:\n" + assert split_count(code) == 1 + end + + test "does NOT emit when returning to indent 0 from same-level content (no prior indent)" do + code = "foo\nbar\n" + assert split_count(code) == 0 + end + + test "does NOT emit at the very start of file (seen_content == false)" do + code = "foo\n bar\n" + # The very first line has no prior indent, so no split should fire + assert split_count(code) == 0 + end + + test "handles multiple indented blocks with splits" do + code = "foo:\n x = 1\nbar:\n y = 2\nbaz:\n" + # split at "bar" and "baz" + assert split_count(code) == 2 + end + + test "does NOT split if current line also has indent (both lines indented)" do + code = "foo:\n x = 1\n y = 2\n" + assert split_count(code) == 0 + end + + test "emits split when a blank line separates an indented block from a new block at indent 0" do + code = "def foo:\n return 1\n\ndef bar:\n" + assert split_count(code) == 1 + end + + test "group/1 returns :split" do + assert Signal.group(%DedentToZeroSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs new file mode 100644 index 0000000..28965c4 --- /dev/null +++ b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs @@ -0,0 +1,44 @@ +defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.DocCommentLeadSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp split_values(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%DocCommentLeadSignal{}], []) + for {_src, :split, :doc_comment_split, v} <- emissions, do: v + end + + test "no split for first /// (seen_content == false at start of file)" do + assert split_values("/// doc\n") == [] + end + + test "emits split at /// after prior content (Rust/C# doc comment)" do + splits = split_values("fn foo() {}\n/// doc\n") + assert length(splits) == 1 + end + + test "emits split at /** after prior content (Java/JS JSDoc)" do + splits = split_values("function foo() {}\n/**\n * doc\n */\n") + assert length(splits) == 1 + end + + test "does NOT emit for // followed by identifier (regular line comment)" do + assert split_values("x = 1\n// regular comment\n") == [] + end + + test "does NOT emit for // that is not at line start" do + assert split_values("x = 1\nx // doc\n") == [] + end + + test "does NOT emit for / at line start when next is not *" do + assert split_values("x = 1\n/ something\n") == [] + end + + test "group is :split" do + assert Signal.group(%DocCommentLeadSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/keyword_signal_test.exs b/test/codeqa/ast/signals/structural/keyword_signal_test.exs new file mode 100644 index 0000000..4aecba6 --- /dev/null +++ b/test/codeqa/ast/signals/structural/keyword_signal_test.exs @@ -0,0 +1,38 @@ +defmodule CodeQA.AST.Signals.Structural.KeywordSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.KeywordSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + + defp split_values(code, lang_mod) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%KeywordSignal{}], lang_mod) + for {_src, :split, :keyword_split, v} <- emissions, do: v + end + + test "no split for single def" do + assert split_values("def foo\n x\nend\n", ElixirLang) == [] + end + + test "emits split at second def keyword at depth 0 indent 0" do + splits = split_values("def foo\n x\nend\ndef bar\n y\nend\n", ElixirLang) + assert length(splits) == 1 + end + + test "does not split on def inside a module (indented)" do + splits = split_values("defmodule Foo do\n def foo, do: 1\nend\n", ElixirLang) + assert splits == [] + end + + test "does not split on keyword inside brackets" do + splits = split_values("foo(def, bar)\n", ElixirLang) + assert splits == [] + end + + test "group is :split" do + assert Signal.group(%KeywordSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/sql_block_signal_test.exs b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs new file mode 100644 index 0000000..e655f8d --- /dev/null +++ b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs @@ -0,0 +1,60 @@ +defmodule CodeQA.AST.Signals.Structural.SQLBlockSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.SQLBlockSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.Languages.Data.Sql + + defp split_values(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%SQLBlockSignal{}], Sql) + for {_src, :split, :sql_block_split, v} <- emissions, do: v + end + + test "no split for the first statement (seen_content == false)" do + assert split_values("CREATE TABLE users (id INT);\n") == [] + end + + test "emits split at second CREATE TABLE DDL statement" do + code = "CREATE TABLE users (id INT);\nCREATE TABLE orders (id INT);\n" + splits = split_values(code) + assert length(splits) == 1 + end + + test "emits split at SELECT when a query follows other content" do + code = "CREATE TABLE users (id INT);\nSELECT id FROM users;\n" + splits = split_values(code) + assert length(splits) == 1 + end + + test "emits split at lowercase create (case-insensitive match)" do + code = "create table users (id INT);\ncreate table orders (id INT);\n" + splits = split_values(code) + assert length(splits) == 1 + end + + test "emits split at INSERT after prior content" do + code = "CREATE TABLE users (id INT);\nINSERT INTO users VALUES (1);\n" + splits = split_values(code) + assert length(splits) == 1 + end + + test "does NOT emit for SQL keyword mid-statement (not at line start)" do + # FROM is not at line start; only SELECT is, but it's the first statement + code = "SELECT id FROM users;\n" + splits = split_values(code) + assert splits == [] + end + + test "does NOT emit for non-SQL identifier at line start" do + code = "CREATE TABLE users (id INT);\nusername VARCHAR(255);\n" + splits = split_values(code) + assert splits == [] + end + + test "group/1 returns :split" do + assert Signal.group(%SQLBlockSignal{}) == :split + end +end diff --git a/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs new file mode 100644 index 0000000..fed5f3f --- /dev/null +++ b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs @@ -0,0 +1,35 @@ +defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignalTest do + use ExUnit.Case, async: true + + alias CodeQA.AST.Signals.Structural.TripleQuoteSignal + alias CodeQA.AST.Parsing.Signal + alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Lexing.TokenNormalizer + + defp split_values(code) do + tokens = TokenNormalizer.normalize_structural(code) + [emissions] = SignalStream.run(tokens, [%TripleQuoteSignal{}], []) + for {_src, :split, :triple_split, v} <- emissions, do: v + end + + test "no splits for plain code" do + assert split_values("def foo\n :ok\nend\n") == [] + end + + test "emits two splits for a complete heredoc" do + code = "\"\"\"\nhello\n\"\"\"\n" + splits = split_values(code) + assert length(splits) == 2 + end + + test "emits one split for unclosed heredoc (mismatch tolerance)" do + # single token with no closing pair + code = "\"\"\"\nhello\n" + splits = split_values(code) + assert length(splits) == 1 + end + + test "group is :split" do + assert Signal.group(%TripleQuoteSignal{}) == :split + end +end From 45ae2b28175b4b65f02f7b8dc3e11229f1b8ea63 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 19 Mar 2026 18:31:09 +0100 Subject: [PATCH 15/71] chore(gitignore): ignore scripts/*.exs and docs plan directories Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 81b993d..4ef9df3 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ devenv.lock # Git worktrees .worktrees/ docs/plans/ +docs/superpowers/ +plans/ +scripts/*.exs # Claude Code .claude/ From b58c8bf813dc53a99b44acfd4c38a417112e579e Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:06:54 +0100 Subject: [PATCH 16/71] feat(combined-metrics): detect and persist language coverage per behavior Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/sample_runner.ex | 83 ++++++++++++++++++- .../combined_metrics/sample_runner_test.exs | 43 ++++++++++ 2 files changed, 124 insertions(+), 2 deletions(-) diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 4bd078c..e4c163c 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -467,10 +467,17 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do hint -> [" _fix_hint: #{inspect(hint)}"] end + languages_line = + case Map.get(groups, "_languages") do + nil -> [] + [] -> [] + langs -> [" _languages: [#{Enum.join(langs, ", ")}]"] + end + group_lines = groups |> Enum.filter(fn {k, v} -> - k not in ["_doc", "_log_baseline", "_fix_hint"] and is_map(v) + k not in ["_doc", "_log_baseline", "_fix_hint", "_languages"] and is_map(v) end) |> Enum.sort_by(fn {group, _} -> group end) |> Enum.flat_map(fn {group, keys} -> @@ -482,7 +489,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do [" #{group}:" | key_lines] end) - ["#{behavior}:" | doc_line] ++ fix_hint_line ++ baseline_line ++ group_lines ++ [""] + ["#{behavior}:" | doc_line] ++ + fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""] end) Enum.join(lines, "\n") <> "\n" @@ -496,4 +504,75 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do defp sample_path(category, behavior, kind) do Path.join([@samples_root, category, behavior, kind]) end + + defp dir_languages(dir) do + case File.ls(dir) do + {:ok, files} -> + files + |> Enum.map(&CodeQA.Language.detect/1) + |> Enum.map(& &1.name()) + |> MapSet.new() + + _ -> + MapSet.new() + end + end + + defp languages_for_behavior(category, behavior) do + bad_langs = dir_languages(sample_path(category, behavior, "bad")) + good_langs = dir_languages(sample_path(category, behavior, "good")) + + bad_langs + |> MapSet.intersection(good_langs) + |> MapSet.to_list() + |> Enum.reject(&(&1 == "unknown")) + |> Enum.sort() + end + + defp maybe_put_languages(groups, []), do: groups + defp maybe_put_languages(groups, langs), do: Map.put(groups, "_languages", langs) + + @doc """ + Updates only the `_languages` field in YAML config files based on sample data. + + Scans `bad/` and `good/` sample directories for each behavior, detects languages + from file extensions via `CodeQA.Language.detect/1`, and writes the intersection + as `_languages` to the YAML. Behaviors without sample data are left without a + `_languages` key (treated as applying to all languages at scoring time). + All existing scalars and baselines are preserved. + + Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`. + """ + @spec apply_languages(keyword()) :: [map()] + def apply_languages(opts \\ []) do + filter_category = opts[:category] + + @yaml_dir + |> File.ls!() + |> Enum.filter(&String.ends_with?(&1, ".yml")) + |> Enum.filter(fn yml_file -> + filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category + end) + |> Enum.sort() + |> Enum.map(fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, existing} = YamlElixir.read_from_file(yaml_path) + + updated = + existing + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Map.new(fn {behavior, groups} -> + langs = languages_for_behavior(category, behavior) + {behavior, maybe_put_languages(groups, langs)} + end) + + File.write!(yaml_path, format_yaml(updated)) + + behaviors_with_languages = + Enum.count(updated, fn {_b, groups} -> Map.has_key?(groups, "_languages") end) + + %{category: category, behaviors_with_languages: behaviors_with_languages} + end) + end end diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index ea582d2..84bc4a0 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -8,6 +8,49 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do %{results: results} end + describe "apply_languages/1" do + test "returns one entry per requested category" do + stats = SampleRunner.apply_languages(category: "variable_naming") + assert length(stats) == 1 + [entry] = stats + assert entry.category == "variable_naming" + assert is_integer(entry.behaviors_with_languages) + end + + test "writes _languages to behaviors that have samples" do + SampleRunner.apply_languages(category: "variable_naming") + {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml") + langs = get_in(data, ["name_is_generic", "_languages"]) + assert is_list(langs) + assert length(langs) > 0 + assert Enum.all?(langs, &is_binary/1) + end + + test "behaviors without sample dirs get no _languages key" do + SampleRunner.apply_languages(category: "variable_naming") + {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml") + + Enum.each(data, fn {_behavior, groups} -> + if is_map(groups) do + case Map.get(groups, "_languages") do + nil -> :ok + langs -> assert is_list(langs) and length(langs) > 0 + end + end + end) + end + + test "only includes languages with both good and bad samples" do + # uses code_smells which has single-language behaviors + SampleRunner.apply_languages(category: "code_smells") + {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/code_smells.yml") + + # no_dead_code_after_return has only .ex samples + langs = get_in(data, ["no_dead_code_after_return", "_languages"]) + assert langs == ["elixir"] + end + end + describe "run/1" do test "returns a list of results with required keys", %{results: results} do assert is_list(results) From a8267d374f9df72d723cdd41dc622d9d1668da84 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:11:45 +0100 Subject: [PATCH 17/71] feat(combined-metrics): filter diagnose_aggregate by language Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/sample_runner.ex | 102 +++++++++++------- .../combined_metrics/sample_runner_test.exs | 35 ++++++ 2 files changed, 98 insertions(+), 39 deletions(-) diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index e4c163c..3d33c30 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -275,6 +275,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do @spec diagnose_aggregate(map(), keyword()) :: [map()] def diagnose_aggregate(aggregate, opts \\ []) do top_n = Keyword.get(opts, :top, 15) + language = Keyword.get(opts, :language) + languages = Keyword.get(opts, :languages) Scorer.all_yamls() |> Enum.sort_by(fn {path, _} -> path end) @@ -284,48 +286,54 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.flat_map(fn {behavior, behavior_data} -> - scalars = Scorer.scalars_for(yaml_path, behavior) + behavior_langs = Map.get(behavior_data, "_languages", []) - if map_size(scalars) == 0 do + if not behavior_language_applies?(behavior_langs, language, languages) do [] else - log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 - - {dot, norm_s_sq, norm_v_sq, contributions} = - Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, - {d, ns, nv, contribs} -> - log_m = :math.log(Scorer.get(aggregate, group, key)) - contrib = scalar * log_m - - {d + contrib, ns + scalar * scalar, nv + log_m * log_m, - [{:"#{group}.#{key}", contrib} | contribs]} - end) - - cos_sim = - if norm_s_sq > 0 and norm_v_sq > 0, - do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), - else: 0.0 - - raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) - calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline - - top_metrics = - contributions - |> Enum.sort_by(fn {_, c} -> c end) - |> Enum.take(5) - |> Enum.map(fn {metric, contribution} -> - %{metric: to_string(metric), contribution: Float.round(contribution, 4)} - end) - - [ - %{ - category: category, - behavior: behavior, - cosine: Float.round(cos_sim, 4), - score: Float.round(calibrated, 4), - top_metrics: top_metrics - } - ] + scalars = Scorer.scalars_for(yaml_path, behavior) + + if map_size(scalars) == 0 do + [] + else + log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + + {dot, norm_s_sq, norm_v_sq, contributions} = + Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, + {d, ns, nv, contribs} -> + log_m = :math.log(Scorer.get(aggregate, group, key)) + contrib = scalar * log_m + + {d + contrib, ns + scalar * scalar, nv + log_m * log_m, + [{:"#{group}.#{key}", contrib} | contribs]} + end) + + cos_sim = + if norm_s_sq > 0 and norm_v_sq > 0, + do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), + else: 0.0 + + raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) + calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline + + top_metrics = + contributions + |> Enum.sort_by(fn {_, c} -> c end) + |> Enum.take(5) + |> Enum.map(fn {metric, contribution} -> + %{metric: to_string(metric), contribution: Float.round(contribution, 4)} + end) + + [ + %{ + category: category, + behavior: behavior, + cosine: Float.round(cos_sim, 4), + score: Float.round(calibrated, 4), + top_metrics: top_metrics + } + ] + end end end) end) @@ -444,6 +452,22 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high + # Returns true if the behavior should be included for the given language context. + # behavior_langs: the "_languages" list from the YAML ([] = applies to all) + # language: single language string from :language opt (nil = no filter) + # languages: project language list from :languages opt (nil = no filter) + defp behavior_language_applies?(_behavior_langs, nil, nil), do: true + defp behavior_language_applies?([], _language, _languages), do: true + + defp behavior_language_applies?(behavior_langs, language, nil) when is_binary(language), + do: language in behavior_langs + + defp behavior_language_applies?(behavior_langs, nil, languages) when is_list(languages), + do: Enum.any?(behavior_langs, &(&1 in languages)) + + defp behavior_language_applies?(behavior_langs, language, languages), + do: language in behavior_langs or Enum.any?(behavior_langs, &(&1 in languages)) + defp format_yaml(data) do lines = data diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index 84bc4a0..fcf1dc4 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -51,6 +51,41 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do end end + describe "diagnose_aggregate/2 language option" do + test "accepts :language option without crashing" do + # minimal aggregate — behavior will be scored but most will have no scalars + agg = %{} + result = SampleRunner.diagnose_aggregate(agg, top: 5, language: "elixir") + assert is_list(result) + end + + test "accepts :languages option without crashing" do + agg = %{} + result = SampleRunner.diagnose_aggregate(agg, top: 5, languages: ["elixir", "rust"]) + assert is_list(result) + end + + # NOTE: This test uses `<=` intentionally. Before Task 7 + `mix compile --force`, + # all behaviors have empty `_languages` in the compiled cache, so no filtering + # occurs and all three counts are equal. The `<=` assertion passes in both + # pre- and post-Task-7 states. + test "with language option returns subset of unfiltered results" do + agg = + "priv/combined_metrics/samples/variable_naming/name_is_generic/bad" + |> CodeQA.Engine.Collector.collect_files() + |> CodeQA.Engine.Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) + + all = SampleRunner.diagnose_aggregate(agg, top: 999) + elixir_only = SampleRunner.diagnose_aggregate(agg, top: 999, language: "elixir") + rust_only = SampleRunner.diagnose_aggregate(agg, top: 999, language: "rust") + + # Filtered sets are subsets (or equal, pre-Task-7) of unfiltered + assert length(elixir_only) <= length(all) + assert length(rust_only) <= length(all) + end + end + describe "run/1" do test "returns a list of results with required keys", %{results: results} do assert is_list(results) From 4e1262087f0055d216960157f3459b998c2d0af5 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:14:26 +0100 Subject: [PATCH 18/71] fix(combined-metrics): improve behavior_language_applies? guards and docs - Add guard (is_binary/is_list) to catch-all clause of behavior_language_applies?/3 - Add explicit clause for (_, nil, []) to treat empty languages list as "no filter" - Add comment on the [] catch-all clause clarifying priority semantics - Document :language and :languages options in diagnose_aggregate/2 @doc Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/sample_runner.ex | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 3d33c30..9afb513 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -260,7 +260,11 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do ## Options - * `:top` - number of results to return (default 15) + * `:top` - number of results to return (default 15) + * `:language` - single language string for per-file filtering; when set, only + behaviors whose `_languages` list includes this language are scored + * `:languages` - list of language strings for project-level filtering; when set, only + behaviors whose `_languages` list overlaps with this list are scored ## Result shape @@ -457,16 +461,22 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do # language: single language string from :language opt (nil = no filter) # languages: project language list from :languages opt (nil = no filter) defp behavior_language_applies?(_behavior_langs, nil, nil), do: true + + # Empty behavior_langs means "applies to all languages" — always include. + # This clause takes priority over all non-nil filter cases. defp behavior_language_applies?([], _language, _languages), do: true + defp behavior_language_applies?(_behavior_langs, nil, []), do: true + defp behavior_language_applies?(behavior_langs, language, nil) when is_binary(language), do: language in behavior_langs defp behavior_language_applies?(behavior_langs, nil, languages) when is_list(languages), do: Enum.any?(behavior_langs, &(&1 in languages)) - defp behavior_language_applies?(behavior_langs, language, languages), - do: language in behavior_langs or Enum.any?(behavior_langs, &(&1 in languages)) + defp behavior_language_applies?(behavior_langs, language, languages) + when is_binary(language) and is_list(languages), + do: language in behavior_langs or Enum.any?(behavior_langs, &(&1 in languages)) defp format_yaml(data) do lines = From bca01a2f6978d592480de852a9a9b7ef29a694f3 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:16:29 +0100 Subject: [PATCH 19/71] feat(combined-metrics): filter score_aggregate by project languages Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/sample_runner.ex | 10 +++++-- .../combined_metrics/sample_runner_test.exs | 26 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 9afb513..bcf3375 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -216,8 +216,10 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do ... ] """ - @spec score_aggregate(map()) :: [map()] - def score_aggregate(aggregate) do + @spec score_aggregate(map(), keyword()) :: [map()] + def score_aggregate(aggregate, opts \\ []) do + languages = Keyword.get(opts, :languages) + Scorer.all_yamls() |> Enum.sort_by(fn {path, _} -> path end) |> Enum.map(fn {yaml_path, data} -> @@ -226,6 +228,10 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do behaviors = data |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.reject(fn {_behavior, behavior_data} -> + behavior_langs = Map.get(behavior_data, "_languages", []) + not behavior_language_applies?(behavior_langs, nil, languages) + end) |> Enum.map(fn {behavior, behavior_data} -> log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index fcf1dc4..8332d3f 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -86,6 +86,32 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do end end + describe "score_aggregate/2 language filtering" do + test "accepts :languages option without crashing" do + result = SampleRunner.score_aggregate(%{}, languages: ["elixir"]) + assert is_list(result) + assert Enum.all?(result, &Map.has_key?(&1, :behaviors)) + end + + test "with languages option returns fewer behaviors than unfiltered" do + agg = + "priv/combined_metrics/samples/variable_naming/name_is_generic/bad" + |> CodeQA.Engine.Collector.collect_files() + |> CodeQA.Engine.Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) + + all_count = SampleRunner.score_aggregate(agg) |> Enum.flat_map(& &1.behaviors) |> length() + + elixir_count = + SampleRunner.score_aggregate(agg, languages: ["elixir"]) + |> Enum.flat_map(& &1.behaviors) + |> length() + + # elixir-only project sees fewer or equal behaviors + assert elixir_count <= all_count + end + end + describe "run/1" do test "returns a list of results with required keys", %{results: results} do assert is_list(results) From a9f017b266c62eee221bfd690e1cb3990ceecc91 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:18:19 +0100 Subject: [PATCH 20/71] feat(file-scorer): detect file language and filter behaviors accordingly Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/file_scorer.ex | 3 ++- .../combined_metrics/file_scorer_test.exs | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex index 368d163..4ea54b4 100644 --- a/lib/codeqa/combined_metrics/file_scorer.ex +++ b/lib/codeqa/combined_metrics/file_scorer.ex @@ -65,11 +65,12 @@ defmodule CodeQA.CombinedMetrics.FileScorer do end) |> Enum.reduce(%{}, fn {path, file_data}, acc -> top_nodes = CodeQA.HealthReport.Grader.top_3_nodes(Map.get(file_data, "nodes")) + language = CodeQA.Language.detect(path).name() file_data |> Map.get("metrics", %{}) |> file_to_aggregate() - |> SampleRunner.diagnose_aggregate(top: 99_999) + |> SampleRunner.diagnose_aggregate(top: 99_999, language: language) |> Enum.reduce(acc, fn %{ category: category, behavior: behavior, diff --git a/test/codeqa/combined_metrics/file_scorer_test.exs b/test/codeqa/combined_metrics/file_scorer_test.exs index b863693..c280cd3 100644 --- a/test/codeqa/combined_metrics/file_scorer_test.exs +++ b/test/codeqa/combined_metrics/file_scorer_test.exs @@ -188,6 +188,33 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do end end + describe "worst_files_per_behavior/2 language filtering" do + test "does not include rust-only behaviors when scoring an elixir file" do + fake_metrics = %{"halstead" => %{"tokens" => 100.0, "difficulty" => 5.0}} + files_map = %{"lib/foo.ex" => %{"metrics" => fake_metrics}} + + results = FileScorer.worst_files_per_behavior(files_map) + + # Any behavior that only applies to rust should not have this .ex file in results + rust_only_keys = + Enum.filter(results, fn {key, entries} -> + [cat, beh] = String.split(key, ".", parts: 2) + yaml_path = "priv/combined_metrics/#{cat}.yml" + + case YamlElixir.read_from_file(yaml_path) do + {:ok, data} -> + langs = get_in(data, [beh, "_languages"]) || [] + langs != [] and "elixir" not in langs and entries != [] + + _ -> + false + end + end) + + assert rust_only_keys == [] + end + end + # Build a realistic files_map using a real project file so diagnose_aggregate # has real metric values to work with. We use a small fixed map rather than # running the full analyzer to keep tests fast. From 5556a304ac3608b30e1d2d1560683cdffca59cfe Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:22:36 +0100 Subject: [PATCH 21/71] feat: thread project languages through all codebase-level scoring call sites Co-Authored-By: Claude Sonnet 4.6 --- .../block_impact/refactoring_potentials.ex | 14 +++++++------ lib/codeqa/block_impact_analyzer.ex | 21 ++++++++++++++++--- lib/codeqa/diagnostics.ex | 18 +++++++++++++--- lib/codeqa/health_report.ex | 13 ++++++++++-- lib/codeqa/health_report/grader.ex | 8 ++++--- .../combined_metrics/sample_runner_test.exs | 7 +++++++ 6 files changed, 64 insertions(+), 17 deletions(-) diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex index 34ca366..1c0c89d 100644 --- a/lib/codeqa/block_impact/refactoring_potentials.ex +++ b/lib/codeqa/block_impact/refactoring_potentials.ex @@ -40,9 +40,11 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do opts \\ [] ) do top_n = Keyword.get(opts, :top, 3) + language = Keyword.get(opts, :language) + languages = Keyword.get(opts, :languages) - file_delta = compute_file_delta(baseline_file_cosines, without_file_metrics) - codebase_delta = compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg) + file_delta = compute_file_delta(baseline_file_cosines, without_file_metrics, language) + codebase_delta = compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages) all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta)) @@ -64,14 +66,14 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do end) end - defp compute_file_delta(baseline_cosines, without_metrics) do + defp compute_file_delta(baseline_cosines, without_metrics, language) do without_agg = FileScorer.file_to_aggregate(without_metrics) - without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999) + without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999, language: language) cosines_to_delta(baseline_cosines, without_cosines) end - defp compute_codebase_delta(baseline_cosines, without_agg) do - without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999) + defp compute_codebase_delta(baseline_cosines, without_agg, languages) do + without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999, languages: languages) cosines_to_delta(baseline_cosines, without_cosines) end diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index f4b17e4..37578b3 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -41,9 +41,10 @@ defmodule CodeQA.BlockImpactAnalyzer do workers = Keyword.get(opts, :workers, System.schedulers_online()) baseline_codebase_agg = Analyzer.analyze_codebase_aggregate(files_map) + project_langs = project_languages(files_map) baseline_codebase_cosines = - SampleRunner.diagnose_aggregate(baseline_codebase_agg, top: 99_999) + SampleRunner.diagnose_aggregate(baseline_codebase_agg, top: 99_999, languages: project_langs) file_results = pipeline_result["files"] @@ -90,7 +91,8 @@ defmodule CodeQA.BlockImpactAnalyzer do top_level_nodes = Parser.detect_blocks(root_tokens, Unknown) baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics) - baseline_file_cosines = SampleRunner.diagnose_aggregate(baseline_file_agg, top: 99_999) + language = CodeQA.Language.detect(path).name() + baseline_file_cosines = SampleRunner.diagnose_aggregate(baseline_file_agg, top: 99_999, language: language) top_level_nodes |> Enum.map(fn node -> @@ -181,12 +183,25 @@ defmodule CodeQA.BlockImpactAnalyzer do |> Map.put(path, %{"metrics" => without_file_metrics}) |> Analyzer.aggregate_file_metrics() + language = CodeQA.Language.detect(path).name() + project_langs = project_languages(file_results) + RefactoringPotentials.compute( baseline_file_cosines, without_file_metrics, baseline_codebase_cosines, without_codebase_agg, - top: nodes_top + top: nodes_top, + language: language, + languages: project_langs ) end + + defp project_languages(files_map) do + files_map + |> Map.keys() + |> Enum.map(&CodeQA.Language.detect(&1).name()) + |> Enum.reject(&(&1 == "unknown")) + |> Enum.uniq() + end end diff --git a/lib/codeqa/diagnostics.ex b/lib/codeqa/diagnostics.ex index 6b3956e..6f4c0d3 100644 --- a/lib/codeqa/diagnostics.ex +++ b/lib/codeqa/diagnostics.ex @@ -36,8 +36,11 @@ defmodule CodeQA.Diagnostics do defp run_aggregate(result, top, format) do aggregate = get_in(result, ["codebase", "aggregate"]) - issues_task = Task.async(fn -> SampleRunner.diagnose_aggregate(aggregate, top: top) end) - categories_task = Task.async(fn -> SampleRunner.score_aggregate(aggregate) end) + files = Map.get(result, "files", %{}) + project_langs = project_languages(files) + + issues_task = Task.async(fn -> SampleRunner.diagnose_aggregate(aggregate, top: top, languages: project_langs) end) + categories_task = Task.async(fn -> SampleRunner.score_aggregate(aggregate, languages: project_langs) end) issues = Task.await(issues_task) categories = Task.await(categories_task) @@ -60,7 +63,8 @@ defmodule CodeQA.Diagnostics do Map.new(files, fn {file_path, file_data} -> metrics = Map.get(file_data, "metrics", %{}) file_agg = FileScorer.file_to_aggregate(metrics) - diagnoses = SampleRunner.diagnose_aggregate(file_agg, top: top) + language = CodeQA.Language.detect(file_path).name() + diagnoses = SampleRunner.diagnose_aggregate(file_agg, top: top, language: language) {file_path, diagnoses} end) @@ -94,6 +98,14 @@ defmodule CodeQA.Diagnostics do end end + defp project_languages(files_map) do + files_map + |> Map.keys() + |> Enum.map(&CodeQA.Language.detect(&1).name()) + |> Enum.reject(&(&1 == "unknown")) + |> Enum.uniq() + end + defp issues_table(issues) do rows = Enum.map(issues, fn %{category: cat, behavior: beh, cosine: cosine, score: score} -> diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 4a521e2..24b3365 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -20,6 +20,7 @@ defmodule CodeQA.HealthReport do aggregate = get_in(analysis_results, ["codebase", "aggregate"]) || %{} files = Map.get(analysis_results, "files", %{}) + project_langs = project_languages(files) threshold_grades = categories @@ -43,7 +44,7 @@ defmodule CodeQA.HealthReport do worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) - cosine_grades = Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale) + cosine_grades = Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) # TODO(option-c): a unified flat issues list would replace the current per-category worst offenders loop; all category results would be flattened, deduplicated by file+line, and re-ranked by a cross-category severity score before rendering. all_categories = @@ -56,7 +57,7 @@ defmodule CodeQA.HealthReport do metadata = build_metadata(analysis_results) - top_issues = SampleRunner.diagnose_aggregate(aggregate, top: 10) + top_issues = SampleRunner.diagnose_aggregate(aggregate, top: 10, languages: project_langs) %{ metadata: metadata, @@ -82,6 +83,14 @@ defmodule CodeQA.HealthReport do } end + defp project_languages(files_map) do + files_map + |> Map.keys() + |> Enum.map(&CodeQA.Language.detect(&1).name()) + |> Enum.reject(&(&1 == "unknown")) + |> Enum.uniq() + end + defp build_category_summary(%{type: :cosine}), do: "" defp build_category_summary(graded) do diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex index 94259d6..20413c9 100644 --- a/lib/codeqa/health_report/grader.ex +++ b/lib/codeqa/health_report/grader.ex @@ -234,17 +234,19 @@ defmodule CodeQA.HealthReport.Grader do @spec grade_cosine_categories( aggregate :: map(), worst_files :: %{String.t() => [map()]}, - grade_scale :: [{number(), String.t()}] + grade_scale :: [{number(), String.t()}], + languages :: [String.t()] ) :: [map()] def grade_cosine_categories( aggregate, worst_files, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale() + scale \\ CodeQA.HealthReport.Categories.default_grade_scale(), + languages \\ [] ) do threshold = CodeQA.Config.cosine_significance_threshold() aggregate - |> SampleRunner.diagnose_aggregate(top: 99_999) + |> SampleRunner.diagnose_aggregate(top: 99_999, languages: languages) |> Enum.group_by(& &1.category) |> Enum.map(fn {category, behaviors} -> behavior_entries = diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index 8332d3f..103b805 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -112,6 +112,13 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do end end + describe "grade_cosine_categories/4 languages wiring" do + test "accepts languages argument" do + result = CodeQA.HealthReport.Grader.grade_cosine_categories(%{}, %{}, [], ["elixir"]) + assert is_list(result) + end + end + describe "run/1" do test "returns a list of results with required keys", %{results: results} do assert is_list(results) From d8d448ac6fe669c193cb23f8e57865295fb077f8 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:41:00 +0100 Subject: [PATCH 22/71] fix(block-impact-analyzer): avoid redundant language detection, clarify project_languages param Co-Authored-By: Claude Sonnet 4.6 --- .../block_impact/refactoring_potentials.ex | 13 +++++++-- lib/codeqa/block_impact_analyzer.ex | 29 ++++++++++++------- lib/codeqa/diagnostics.ex | 10 +++++-- lib/codeqa/health_report.ex | 3 +- 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex index 1c0c89d..092d97b 100644 --- a/lib/codeqa/block_impact/refactoring_potentials.ex +++ b/lib/codeqa/block_impact/refactoring_potentials.ex @@ -44,7 +44,9 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do languages = Keyword.get(opts, :languages) file_delta = compute_file_delta(baseline_file_cosines, without_file_metrics, language) - codebase_delta = compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages) + + codebase_delta = + compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages) all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta)) @@ -68,12 +70,17 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do defp compute_file_delta(baseline_cosines, without_metrics, language) do without_agg = FileScorer.file_to_aggregate(without_metrics) - without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999, language: language) + + without_cosines = + SampleRunner.diagnose_aggregate(without_agg, top: 99_999, language: language) + cosines_to_delta(baseline_cosines, without_cosines) end defp compute_codebase_delta(baseline_cosines, without_agg, languages) do - without_cosines = SampleRunner.diagnose_aggregate(without_agg, top: 99_999, languages: languages) + without_cosines = + SampleRunner.diagnose_aggregate(without_agg, top: 99_999, languages: languages) + cosines_to_delta(baseline_cosines, without_cosines) end diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index 37578b3..d743184 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -44,7 +44,10 @@ defmodule CodeQA.BlockImpactAnalyzer do project_langs = project_languages(files_map) baseline_codebase_cosines = - SampleRunner.diagnose_aggregate(baseline_codebase_agg, top: 99_999, languages: project_langs) + SampleRunner.diagnose_aggregate(baseline_codebase_agg, + top: 99_999, + languages: project_langs + ) file_results = pipeline_result["files"] @@ -92,7 +95,9 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics) language = CodeQA.Language.detect(path).name() - baseline_file_cosines = SampleRunner.diagnose_aggregate(baseline_file_agg, top: 99_999, language: language) + + baseline_file_cosines = + SampleRunner.diagnose_aggregate(baseline_file_agg, top: 99_999, language: language) top_level_nodes |> Enum.map(fn node -> @@ -104,7 +109,8 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_cosines, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + language ) end) |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) @@ -119,7 +125,8 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_cosines, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + language ) do potentials = if length(node.tokens) < @min_tokens do @@ -132,7 +139,8 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_cosines, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + language ) end @@ -147,7 +155,8 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_cosines, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + language ) end) |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) @@ -173,7 +182,8 @@ defmodule CodeQA.BlockImpactAnalyzer do baseline_file_cosines, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + language ) do reconstructed = FileImpact.reconstruct_without(root_tokens, node) without_file_metrics = Analyzer.analyze_file(path, reconstructed) @@ -183,7 +193,6 @@ defmodule CodeQA.BlockImpactAnalyzer do |> Map.put(path, %{"metrics" => without_file_metrics}) |> Analyzer.aggregate_file_metrics() - language = CodeQA.Language.detect(path).name() project_langs = project_languages(file_results) RefactoringPotentials.compute( @@ -197,8 +206,8 @@ defmodule CodeQA.BlockImpactAnalyzer do ) end - defp project_languages(files_map) do - files_map + defp project_languages(path_keyed_map) do + path_keyed_map |> Map.keys() |> Enum.map(&CodeQA.Language.detect(&1).name()) |> Enum.reject(&(&1 == "unknown")) diff --git a/lib/codeqa/diagnostics.ex b/lib/codeqa/diagnostics.ex index 6f4c0d3..d086e44 100644 --- a/lib/codeqa/diagnostics.ex +++ b/lib/codeqa/diagnostics.ex @@ -39,8 +39,14 @@ defmodule CodeQA.Diagnostics do files = Map.get(result, "files", %{}) project_langs = project_languages(files) - issues_task = Task.async(fn -> SampleRunner.diagnose_aggregate(aggregate, top: top, languages: project_langs) end) - categories_task = Task.async(fn -> SampleRunner.score_aggregate(aggregate, languages: project_langs) end) + issues_task = + Task.async(fn -> + SampleRunner.diagnose_aggregate(aggregate, top: top, languages: project_langs) + end) + + categories_task = + Task.async(fn -> SampleRunner.score_aggregate(aggregate, languages: project_langs) end) + issues = Task.await(issues_task) categories = Task.await(categories_task) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 24b3365..6b5c4d6 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -44,7 +44,8 @@ defmodule CodeQA.HealthReport do worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) - cosine_grades = Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) + cosine_grades = + Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) # TODO(option-c): a unified flat issues list would replace the current per-category worst offenders loop; all category results would be flattened, deduplicated by file+line, and re-ranked by a cross-category severity score before rendering. all_categories = From 1e5718712b6f695c537770846d3c1c70642e2346 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:43:16 +0100 Subject: [PATCH 23/71] feat(cli): add --apply-languages flag to sample_report mix task Co-Authored-By: Claude Sonnet 4.6 --- lib/mix/tasks/codeqa/sample_report.ex | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/mix/tasks/codeqa/sample_report.ex b/lib/mix/tasks/codeqa/sample_report.ex index 0246777..e4a1ba5 100644 --- a/lib/mix/tasks/codeqa/sample_report.ex +++ b/lib/mix/tasks/codeqa/sample_report.ex @@ -11,6 +11,8 @@ defmodule Mix.Tasks.Codeqa.SampleReport do mix codeqa.sample_report --verbose mix codeqa.sample_report --output results.json mix codeqa.sample_report --apply-scalars + mix codeqa.sample_report --apply-languages + mix codeqa.sample_report --apply-languages --category variable_naming mix codeqa.sample_report --file path/to/file.ex A ratio ≥ 2x means the formula meaningfully separates good from bad code. @@ -31,6 +33,7 @@ defmodule Mix.Tasks.Codeqa.SampleReport do output: :string, report: :string, apply_scalars: :boolean, + apply_languages: :boolean, file: :string, top: :integer ] @@ -62,6 +65,15 @@ defmodule Mix.Tasks.Codeqa.SampleReport do Enum.each(stats, &print_scalar_stats/1) end + if opts[:apply_languages] do + stats = CodeQA.CombinedMetrics.SampleRunner.apply_languages(opts) + IO.puts("\nApplied language coverage to YAML configs:") + + Enum.each(stats, fn %{category: cat, behaviors_with_languages: n} -> + IO.puts(" #{cat}: #{n} behaviors with language coverage") + end) + end + if path = opts[:file] do print_file_scores(path, opts) end From 76fc86cf769a94c5eb1c666c959aeb284a72ac88 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 13:45:57 +0100 Subject: [PATCH 24/71] chore(combined-metrics): populate _languages coverage from samples --- priv/combined_metrics/code_smells.yml | 5 +++++ priv/combined_metrics/consistency.yml | 4 ++++ priv/combined_metrics/dependencies.yml | 3 +++ priv/combined_metrics/documentation.yml | 6 ++++++ priv/combined_metrics/error_handling.yml | 3 +++ priv/combined_metrics/file_structure.yml | 6 ++++++ priv/combined_metrics/function_design.yml | 9 +++++++++ priv/combined_metrics/naming_conventions.yml | 5 +++++ priv/combined_metrics/scope_and_assignment.yml | 6 ++++++ priv/combined_metrics/testing.yml | 4 ++++ priv/combined_metrics/type_and_value.yml | 5 +++++ priv/combined_metrics/variable_naming.yml | 14 ++++++++++++++ 12 files changed, 70 insertions(+) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index d8fe1b8..6be13fa 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,7 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." _fix_hint: "Use a single quote style (e.g., double quotes) consistently throughout the file" + _languages: [elixir] _log_baseline: -18.9505 branching: mean_branching_density: 0.0243 @@ -98,6 +99,7 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." _fix_hint: "Remove unreachable statements after return/raise/exit — they can never execute" + _languages: [elixir] _log_baseline: -57.2281 branching: mean_branch_count: -2.0000 @@ -204,6 +206,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." _fix_hint: "Remove IO.inspect/IO.puts/console.log debug output — use Logger or remove entirely" + _languages: [elixir] _log_baseline: -86.5160 branching: mean_branch_count: -0.3540 @@ -314,6 +317,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." _fix_hint: "Resolve FIXME/XXX/HACK comments before merging — they indicate known unresolved issues" + _languages: [elixir] _log_baseline: -2.1522 branching: mean_branch_count: 0.1755 @@ -428,6 +432,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." _fix_hint: "Replace nested ternary/conditional expressions with if-else blocks or pattern matching" + _languages: [elixir] _log_baseline: 7.2208 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/consistency.yml b/priv/combined_metrics/consistency.yml index 8dbd172..426e360 100644 --- a/priv/combined_metrics/consistency.yml +++ b/priv/combined_metrics/consistency.yml @@ -1,6 +1,7 @@ consistent_casing_within_file: _doc: "A file should use one naming convention throughout — no mixing of camelCase and snake_case for the same kind of identifier." _fix_hint: "Pick one casing convention (snake_case or camelCase) and apply it uniformly" + _languages: [elixir] _log_baseline: -2.4622 brevity: mean_sample_size: -0.0471 @@ -60,6 +61,7 @@ consistent_casing_within_file: consistent_error_return_shape: _doc: "All functions in a module should return errors in the same shape — mixed `nil`, `false`, and `{:error, _}` returns are confusing." _fix_hint: "Use a single error return format (e.g., {:error, reason}) consistently throughout" + _languages: [elixir] _log_baseline: 31.6243 branching: mean_branch_count: -0.2178 @@ -157,6 +159,7 @@ consistent_error_return_shape: consistent_function_style: _doc: "A module should not mix one-liner and multi-clause function definitions for the same concern." _fix_hint: "Use a consistent function definition style — all def or all defp, one-liner or block form" + _languages: [elixir] _log_baseline: -2.0498 branching: mean_branch_count: -0.1610 @@ -251,6 +254,7 @@ consistent_function_style: same_concept_same_name: _doc: "The same domain concept should use the same name throughout a file — mixing `user`, `usr`, and `u` for the same thing harms readability." _fix_hint: "Use the same name for the same concept everywhere — avoid synonyms like user/account/member" + _languages: [elixir] _log_baseline: -10.7039 brevity: mean_sample_size: -1.3837 diff --git a/priv/combined_metrics/dependencies.yml b/priv/combined_metrics/dependencies.yml index 000ecbf..fbc3591 100644 --- a/priv/combined_metrics/dependencies.yml +++ b/priv/combined_metrics/dependencies.yml @@ -1,6 +1,7 @@ import_count_under_10: _doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling." _fix_hint: "Reduce imports — split large modules or use fully-qualified names for rarely-used deps" + _languages: [elixir] _log_baseline: 7.0687 branching: mean_branch_count: 0.2110 @@ -115,6 +116,7 @@ import_count_under_10: low_coupling: _doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling." _fix_hint: "Reduce dependencies between modules — introduce interfaces or narrow the public API" + _languages: [elixir] _log_baseline: -38.4249 branching: mean_branch_count: 0.0745 @@ -226,6 +228,7 @@ low_coupling: no_wildcard_imports: _doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies." _fix_hint: "Replace wildcard imports with explicit named imports for clarity and reduced scope pollution" + _languages: [elixir] _log_baseline: -9.4788 branching: mean_branching_density: 0.0249 diff --git a/priv/combined_metrics/documentation.yml b/priv/combined_metrics/documentation.yml index 75b19fd..66ca037 100644 --- a/priv/combined_metrics/documentation.yml +++ b/priv/combined_metrics/documentation.yml @@ -1,6 +1,7 @@ docstring_is_nonempty: _doc: "Docstrings must contain meaningful content, not just a placeholder or empty string." _fix_hint: "Replace placeholder docstrings with a real description of what the function does" + _languages: [elixir] _log_baseline: 29.4288 branching: mean_branch_count: 0.3089 @@ -112,6 +113,7 @@ docstring_is_nonempty: file_has_license_header: _doc: "Source files should begin with a license or copyright header." _fix_hint: "Add a license header comment at the top of the file" + _languages: [elixir] _log_baseline: 5.7261 branching: mean_branching_density: -0.0081 @@ -200,6 +202,7 @@ file_has_license_header: file_has_module_docstring: _doc: "Files should have a module-level docstring explaining purpose and usage." _fix_hint: "Add a @moduledoc string describing the module's purpose" + _languages: [elixir] _log_baseline: 24.2268 branching: mean_branch_count: 0.3854 @@ -304,6 +307,7 @@ file_has_module_docstring: file_has_no_commented_out_code: _doc: "Files should not contain commented-out code blocks left from development." _fix_hint: "Remove commented-out code — use version control to track deleted code" + _languages: [elixir] _log_baseline: -8.1616 branching: mean_branching_density: 0.0368 @@ -411,6 +415,7 @@ file_has_no_commented_out_code: function_has_docstring: _doc: "Public functions should have a docstring describing behaviour, params, and return value." _fix_hint: "Add @doc strings to public functions describing parameters, return value, and purpose" + _languages: [elixir] _log_baseline: 43.0440 branching: mean_branch_count: 0.5279 @@ -529,6 +534,7 @@ function_has_docstring: function_todo_comment_in_body: _doc: "Functions should not contain TODO/FIXME comments indicating unfinished work." _fix_hint: "Resolve or extract TODO comments — create a tracked issue instead of leaving them inline" + _languages: [elixir] _log_baseline: 7.0511 branching: mean_branch_count: -0.0287 diff --git a/priv/combined_metrics/error_handling.yml b/priv/combined_metrics/error_handling.yml index 1398a3d..a0b8b49 100644 --- a/priv/combined_metrics/error_handling.yml +++ b/priv/combined_metrics/error_handling.yml @@ -1,6 +1,7 @@ does_not_swallow_errors: _doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures." _fix_hint: "Propagate or log errors — do not silently discard {:error, _} tuples or rescue clauses" + _languages: [elixir] _log_baseline: 87.3594 branching: mean_branch_count: -0.1041 @@ -111,6 +112,7 @@ does_not_swallow_errors: error_message_is_descriptive: _doc: "Error values should carry a meaningful message, not just a bare atom or empty string." _fix_hint: "Include context in error messages — describe what failed, not just that it failed" + _languages: [elixir] _log_baseline: 52.7594 branching: mean_branch_count: 0.0664 @@ -215,6 +217,7 @@ error_message_is_descriptive: returns_typed_error: _doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`." _fix_hint: "Return typed errors like {:error, :not_found} instead of bare :error or nil" + _languages: [elixir] _log_baseline: 208.7673 branching: mean_branch_count: -0.2092 diff --git a/priv/combined_metrics/file_structure.yml b/priv/combined_metrics/file_structure.yml index 19418d4..73376ce 100644 --- a/priv/combined_metrics/file_structure.yml +++ b/priv/combined_metrics/file_structure.yml @@ -1,6 +1,7 @@ has_consistent_indentation: _doc: "Files should use a single, consistent indentation style with no mixed tabs and spaces." _fix_hint: "Use a consistent indentation width throughout the file (2 or 4 spaces, not mixed)" + _languages: [elixir] _log_baseline: -12.7016 branching: mean_branching_density: 0.1994 @@ -37,6 +38,7 @@ has_consistent_indentation: line_count_under_300: _doc: "Files should be under 300 lines; longer files typically violate single responsibility." _fix_hint: "Split large files — extract cohesive groups of functions into separate modules" + _languages: [elixir] _log_baseline: -49.2655 branching: mean_branch_count: -0.4508 @@ -154,6 +156,7 @@ line_count_under_300: line_length_under_120: _doc: "Lines should be under 120 characters to avoid horizontal scrolling." _fix_hint: "Wrap lines at 80–120 characters — break long expressions into multiple lines" + _languages: [elixir] _log_baseline: -6.2404 branching: mean_branching_density: -0.1942 @@ -257,6 +260,7 @@ line_length_under_120: no_magic_numbers: _doc: "Numeric literals should be extracted to named constants rather than used inline." _fix_hint: "Replace literal numbers with named constants or module attributes" + _languages: [elixir] _log_baseline: 107.5222 branching: mean_branch_count: -0.4352 @@ -361,6 +365,7 @@ no_magic_numbers: single_responsibility: _doc: "Each file should have one primary concern — low complexity spread across few, focused functions." _fix_hint: "Split the module — each file should have one primary purpose" + _languages: [elixir] _log_baseline: -35.4996 branching: mean_branch_count: -0.0678 @@ -475,6 +480,7 @@ single_responsibility: uses_standard_indentation_width: _doc: "Indentation should use consistent multiples of 2 or 4 spaces throughout the file." _fix_hint: "Use the project-standard 2-space indentation throughout" + _languages: [elixir] _log_baseline: -17.9172 compression: mean_raw_bytes: -0.2512 diff --git a/priv/combined_metrics/function_design.yml b/priv/combined_metrics/function_design.yml index 6824ebb..b97868b 100644 --- a/priv/combined_metrics/function_design.yml +++ b/priv/combined_metrics/function_design.yml @@ -1,6 +1,7 @@ boolean_function_has_question_mark: _doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)." _fix_hint: "Add a ? suffix to boolean-returning functions (e.g., valid? instead of is_valid)" + _languages: [elixir] _log_baseline: 7.0991 brevity: mean_sample_size: 0.0085 @@ -47,6 +48,7 @@ boolean_function_has_question_mark: cyclomatic_complexity_under_10: _doc: "Functions should have a cyclomatic complexity under 10." _fix_hint: "Reduce branching — extract complex conditionals into helper functions" + _languages: [elixir] _log_baseline: -1.6476 branching: mean_branch_count: -0.2373 @@ -151,6 +153,7 @@ cyclomatic_complexity_under_10: has_verb_in_name: _doc: "Function names should contain a verb describing the action performed." _fix_hint: "Start function names with a verb (get_, fetch_, build_, compute_, validate_)" + _languages: [elixir] _log_baseline: 15.9117 compression: mean_raw_bytes: 0.0990 @@ -177,6 +180,7 @@ has_verb_in_name: is_less_than_20_lines: _doc: "Functions should be 20 lines or fewer." _fix_hint: "Split long functions — each function should fit on one screen (under 20 lines)" + _languages: [elixir] _log_baseline: 23.1945 branching: mean_branch_count: -0.0820 @@ -289,6 +293,7 @@ is_less_than_20_lines: nesting_depth_under_4: _doc: "Code should not nest deeper than 4 levels." _fix_hint: "Reduce nesting — use early returns, guards, or extract inner blocks" + _languages: [elixir] _log_baseline: 1.1322 branching: mean_branch_count: -0.3267 @@ -400,6 +405,7 @@ nesting_depth_under_4: no_boolean_parameter: _doc: "Functions should not take boolean parameters — a flag usually means the function does two things." _fix_hint: "Replace boolean parameters with two separate functions or use an options map" + _languages: [elixir] _log_baseline: 3.0928 branching: mean_branch_count: -2.0000 @@ -507,6 +513,7 @@ no_boolean_parameter: no_magic_numbers: _doc: "Numeric literals should be named constants, not inline magic numbers." _fix_hint: "Replace magic numbers inside functions with named module attributes or constants" + _languages: [elixir] _log_baseline: 48.6069 branching: mean_branch_count: -0.2708 @@ -611,6 +618,7 @@ no_magic_numbers: parameter_count_under_4: _doc: "Functions should take fewer than 4 parameters." _fix_hint: "Reduce parameter count — group related params into a struct or options map" + _languages: [elixir] _log_baseline: 1.6218 branching: mean_non_blank_count: 0.0967 @@ -717,6 +725,7 @@ parameter_count_under_4: uses_ternary_expression: _doc: "Simple conditional assignments should use inline expressions rather than full if-blocks." _fix_hint: "Replace verbose if-else blocks with concise ternary/conditional expressions where readable" + _languages: [elixir] _log_baseline: -0.3649 branching: mean_branch_count: -0.4693 diff --git a/priv/combined_metrics/naming_conventions.yml b/priv/combined_metrics/naming_conventions.yml index 33e5d8a..4f3693d 100644 --- a/priv/combined_metrics/naming_conventions.yml +++ b/priv/combined_metrics/naming_conventions.yml @@ -1,6 +1,7 @@ class_name_is_noun: _doc: "Class and module names should be nouns describing what they represent, not verbs or gerunds." _fix_hint: "Name modules/classes with nouns (User, OrderProcessor) not verbs" + _languages: [elixir] _log_baseline: 4.2909 brevity: mean_sample_size: 0.7106 @@ -58,6 +59,7 @@ class_name_is_noun: file_name_matches_primary_export: _doc: "The file name should match the primary class or module it exports (e.g. `user.js` exports `User`)." _fix_hint: "Rename the file to match the primary module it defines" + _languages: [elixir] _log_baseline: 0.0000 casing_entropy: mean_pascal_case_count: 0.0000 @@ -67,6 +69,7 @@ file_name_matches_primary_export: function_name_is_not_single_word: _doc: "Single-word function names like `run`, `process`, or `handle` are too vague to convey intent." _fix_hint: "Use at least two words in function names to convey intent (e.g., fetch_user not fetch)" + _languages: [elixir] _log_baseline: 17.4874 compression: mean_raw_bytes: 0.2480 @@ -94,6 +97,7 @@ function_name_is_not_single_word: function_name_matches_return_type: _doc: "Functions prefixed with `get_`, `fetch_`, or `find_` should return the thing they name." _fix_hint: "Align the function name with what it returns (get_ for values, is_/has_ for booleans)" + _languages: [elixir] _log_baseline: 7.9532 branching: mean_max_nesting_depth: 0.1335 @@ -188,6 +192,7 @@ function_name_matches_return_type: test_name_starts_with_verb: _doc: "Test descriptions should start with a verb: `creates`, `raises`, `returns`, not a noun phrase." _fix_hint: "Start test descriptions with a verb (returns, raises, creates, validates)" + _languages: [elixir] _log_baseline: 7.8702 branching: mean_branch_count: 1.9977 diff --git a/priv/combined_metrics/scope_and_assignment.yml b/priv/combined_metrics/scope_and_assignment.yml index 507e8ae..c21c6dd 100644 --- a/priv/combined_metrics/scope_and_assignment.yml +++ b/priv/combined_metrics/scope_and_assignment.yml @@ -1,6 +1,7 @@ declared_close_to_use: _doc: "Variables should be declared near their first use, not hoisted to the top of the function." _fix_hint: "Move variable declarations closer to their first use to reduce cognitive scope" + _languages: [elixir] _log_baseline: -59.6022 branching: mean_branch_count: -0.4906 @@ -107,6 +108,7 @@ declared_close_to_use: mutated_after_initial_assignment: _doc: "Variables should not be reassigned after their initial value — prefer introducing a new name." _fix_hint: "Avoid reassigning variables — introduce a new name for each transformed value" + _languages: [elixir] _log_baseline: 6.6347 branching: mean_branch_count: 0.1519 @@ -217,6 +219,7 @@ mutated_after_initial_assignment: reassigned_multiple_times: _doc: "A variable reassigned many times is a sign the name is too generic or the function does too much." _fix_hint: "Refactor repeated reassignment into a pipeline or named intermediate values" + _languages: [elixir] _log_baseline: -107.5659 branching: mean_non_blank_count: -0.2287 @@ -316,6 +319,7 @@ reassigned_multiple_times: scope_is_minimal: _doc: "Variables should be scoped as narrowly as possible — not declared at a wider scope than needed." _fix_hint: "Narrow the scope of variables — declare them in the innermost block where they are used" + _languages: [elixir] _log_baseline: -6.7522 branching: mean_branch_count: -0.1072 @@ -424,6 +428,7 @@ scope_is_minimal: shadowed_by_inner_scope: _doc: "Inner-scope names that shadow outer-scope names cause confusion about which value is in play." _fix_hint: "Rename the inner variable to avoid shadowing the outer one" + _languages: [elixir] _log_baseline: -33.9501 branching: mean_branching_density: 2.0000 @@ -529,6 +534,7 @@ shadowed_by_inner_scope: used_only_once: _doc: "A variable used only once is a candidate for inlining — it rarely adds clarity over a direct expression." _fix_hint: "Inline single-use variables directly into their only use site" + _languages: [elixir] _log_baseline: -116.4649 branching: mean_non_blank_count: -0.5385 diff --git a/priv/combined_metrics/testing.yml b/priv/combined_metrics/testing.yml index ef74557..6b1d5e9 100644 --- a/priv/combined_metrics/testing.yml +++ b/priv/combined_metrics/testing.yml @@ -1,6 +1,7 @@ reasonable_test_to_code_ratio: _doc: "There should be an adequate number of test cases relative to the code being tested." _fix_hint: "Add more tests — aim for at least one test per public function" + _languages: [elixir] _log_baseline: 8.2261 branching: mean_branch_count: 0.1869 @@ -111,6 +112,7 @@ reasonable_test_to_code_ratio: test_has_assertion: _doc: "Every test body must contain at least one assertion — a test without assertions proves nothing." _fix_hint: "Add at least one assert/refute to every test — a test without assertions proves nothing" + _languages: [elixir] _log_baseline: -9.6007 branching: mean_branch_count: 0.0918 @@ -214,6 +216,7 @@ test_has_assertion: test_name_describes_behavior: _doc: "Test names should describe the expected behaviour, not just the method under test." _fix_hint: "Describe expected behavior in test names: 'returns {:error, :not_found} when user missing'" + _languages: [elixir] _log_baseline: 56.4575 branching: mean_branch_count: 2.0000 @@ -307,6 +310,7 @@ test_name_describes_behavior: test_single_concept: _doc: "Each test should verify a single concept — tests covering multiple things are harder to diagnose when they fail." _fix_hint: "Test one thing per test — split tests covering multiple behaviors into separate test cases" + _languages: [elixir] _log_baseline: 35.9646 branching: mean_branch_count: 0.3696 diff --git a/priv/combined_metrics/type_and_value.yml b/priv/combined_metrics/type_and_value.yml index dc85081..f3e061c 100644 --- a/priv/combined_metrics/type_and_value.yml +++ b/priv/combined_metrics/type_and_value.yml @@ -1,6 +1,7 @@ boolean_assigned_from_comparison: _doc: "Boolean variables should be assigned directly from comparisons or predicate calls, not set via conditionals." _fix_hint: "Assign the comparison result directly — bool = x > 0 not if x > 0 do true else false end" + _languages: [elixir] _log_baseline: 3.2119 branching: mean_branch_count: -0.8402 @@ -113,6 +114,7 @@ boolean_assigned_from_comparison: hardcoded_url_or_path: _doc: "URLs, file paths, and host names should be configuration values, not inline string literals." _fix_hint: "Move hardcoded URLs and file paths to configuration or module attributes" + _languages: [elixir] _log_baseline: 119.8203 branching: mean_max_nesting_depth: 0.9571 @@ -216,6 +218,7 @@ hardcoded_url_or_path: no_empty_string_initial: _doc: "Initialising a variable to an empty string and reassigning later signals missing structure." _fix_hint: "Replace initial empty string assignments with nil or a meaningful default value" + _languages: [elixir] _log_baseline: -11.4915 branching: mean_branch_count: -0.1786 @@ -320,6 +323,7 @@ no_empty_string_initial: no_implicit_null_initial: _doc: "Initialising a variable to `nil`/`null` and assigning it later in a branch signals missing structure." _fix_hint: "Use nil explicitly when a variable starts null — or restructure to avoid nil initialization" + _languages: [elixir] _log_baseline: -3.2196 branching: mean_branch_count: 0.0293 @@ -424,6 +428,7 @@ no_implicit_null_initial: no_magic_value_assigned: _doc: "Literal strings and numbers assigned to variables should be named constants, not inline values." _fix_hint: "Replace magic value assignments with named constants or module attributes" + _languages: [elixir] _log_baseline: -6.5439 branching: mean_branch_count: -0.2035 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 8b9a7dc..4d68d74 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,7 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." _fix_hint: "Prefix boolean variables with is_, has_, or can_ (e.g., is_valid, has_errors)" + _languages: [elixir, javascript, ruby] _log_baseline: 15.9674 brevity: mean_sample_size: 0.0752 @@ -64,6 +65,7 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." _fix_hint: "Use plural names for collections (users, orders, ids not user, order, id)" + _languages: [elixir, javascript, ruby] _log_baseline: 32.4645 brevity: mean_sample_size: -0.6992 @@ -126,6 +128,7 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." _fix_hint: "Use descriptive loop variable names instead of single letters (user not u)" + _languages: [elixir, javascript, ruby] _log_baseline: -28.4780 brevity: mean_sample_size: -0.1049 @@ -206,6 +209,7 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." _fix_hint: "Split variables with 'and' into two focused variables (user_and_role → user + role)" + _languages: [elixir, javascript, ruby] _log_baseline: -11.0411 branching: mean_branch_count: -0.7789 @@ -316,6 +320,7 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." _fix_hint: "Remove type suffixes from names (userList → users, dataMap → data)" + _languages: [elixir, javascript, ruby] _log_baseline: -26.2345 branching: mean_branch_count: -0.4150 @@ -402,6 +407,7 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." _fix_hint: "Expand abbreviations to full words (usr → user, cnt → count, idx → index)" + _languages: [elixir, javascript, ruby] _log_baseline: 23.1000 brevity: mean_sample_size: -0.2397 @@ -488,6 +494,7 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." _fix_hint: "Replace generic names (data, result, tmp, info) with domain-specific names" + _languages: [elixir, javascript, ruby] _log_baseline: 37.2228 branching: mean_branch_count: 0.5193 @@ -600,6 +607,7 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." _fix_hint: "Replace number-like names (x1, y2) with descriptive names indicating purpose" + _languages: [elixir, javascript, ruby] _log_baseline: 1.7668 brevity: mean_sample_size: -0.0262 @@ -680,6 +688,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." _fix_hint: "Replace single-letter names with descriptive names (n → count, s → status)" + _languages: [elixir, javascript, ruby] _log_baseline: 25.4977 branching: mean_branching_density: -0.0458 @@ -779,6 +788,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." _fix_hint: "Shorten overly long names — aim for 2–3 descriptive words, drop redundant context" + _languages: [elixir, javascript, ruby] _log_baseline: -7.5164 branching: mean_branch_count: 0.0340 @@ -894,6 +904,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." _fix_hint: "Use at least 3 characters — replace very short names with full words" + _languages: [elixir, javascript, ruby] _log_baseline: -0.4484 branching: mean_branch_count: -0.2327 @@ -980,6 +991,7 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." _fix_hint: "Rename negated booleans positively (is_not_active → is_inactive, not_found → missing)" + _languages: [elixir, javascript, ruby] _log_baseline: -15.6244 brevity: mean_sample_size: -0.3765 @@ -1049,6 +1061,7 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." _fix_hint: "Remove type prefixes from names (strName → name, intCount → count)" + _languages: [elixir, javascript, ruby] _log_baseline: -15.0940 brevity: mean_sample_size: -0.0814 @@ -1121,6 +1134,7 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." _fix_hint: "Use SCREAMING_SNAKE_CASE for module-level constants (@MAX_SIZE not @max_size)" + _languages: [elixir, javascript, ruby] _log_baseline: -6.3000 branching: mean_branching_density: 0.0176 From db764fc01069c8b1dae5475f92af93d4cfdd31bd Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 14:09:37 +0100 Subject: [PATCH 25/71] feat(ci): add workflow to auto-sync behavior language coverage and scalars --- .github/workflows/sync-behavior-coverage.yml | 65 ++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .github/workflows/sync-behavior-coverage.yml diff --git a/.github/workflows/sync-behavior-coverage.yml b/.github/workflows/sync-behavior-coverage.yml new file mode 100644 index 0000000..8529dcc --- /dev/null +++ b/.github/workflows/sync-behavior-coverage.yml @@ -0,0 +1,65 @@ +name: Sync Behavior Coverage + +on: + pull_request: + branches: [main] + +permissions: + contents: write + +jobs: + sync: + runs-on: ubuntu-latest + if: github.event.pull_request.head.repo.full_name == github.repository + + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Elixir + uses: erlef/setup-beam@v1 + with: + otp-version: "27.3" + elixir-version: "1.19" + + - name: Cache deps + uses: actions/cache@v4 + with: + path: | + deps + _build + key: ${{ runner.os }}-mix-${{ hashFiles('mix.lock') }} + restore-keys: ${{ runner.os }}-mix- + + - name: Install dependencies + run: mix deps.get + + - name: Compile + run: mix compile --warnings-as-errors + + - name: Regenerate language coverage + run: mix codeqa.sample_report --apply-languages + + - name: Regenerate scalar vectors + run: mix codeqa.sample_report --apply-scalars + + - name: Check for YAML drift + id: diff + run: | + if git diff --quiet priv/combined_metrics/; then + echo "changed=false" >> $GITHUB_OUTPUT + else + echo "changed=true" >> $GITHUB_OUTPUT + fi + + - name: Commit and push updated YAMLs + if: steps.diff.outputs.changed == 'true' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add priv/combined_metrics/*.yml + git commit -m "chore(combined-metrics): sync language coverage and scalar vectors [skip ci]" + git push From 1d10f533554413ac38df67fd5e9d15364a327815 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 20 Mar 2026 13:16:42 +0000 Subject: [PATCH 26/71] chore(combined-metrics): sync language coverage and scalar vectors [skip ci] --- priv/combined_metrics/code_smells.yml | 135 +-- priv/combined_metrics/consistency.yml | 162 ++-- priv/combined_metrics/dependencies.yml | 75 +- priv/combined_metrics/documentation.yml | 161 ++-- priv/combined_metrics/error_handling.yml | 234 +++--- priv/combined_metrics/file_structure.yml | 111 +-- priv/combined_metrics/function_design.yml | 359 ++++---- priv/combined_metrics/naming_conventions.yml | 97 +-- .../combined_metrics/scope_and_assignment.yml | 273 +++---- priv/combined_metrics/testing.yml | 113 +-- priv/combined_metrics/type_and_value.yml | 251 +++--- priv/combined_metrics/variable_naming.yml | 771 +++++++++--------- 12 files changed, 1370 insertions(+), 1372 deletions(-) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index 6be13fa..f1c73c5 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,17 +1,15 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _fix_hint: "Use a single quote style (e.g., double quotes) consistently throughout the file" - _languages: [elixir] - _log_baseline: -18.9505 + _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 mean_non_blank_count: -0.0248 brevity: mean_sample_size: -0.0656 casing_entropy: - mean_entropy: -0.0493 + mean_entropy: -0.0405 mean_pascal_case_count: -0.1743 - mean_snake_case_count: -0.0451 + mean_snake_case_count: -0.0505 compression: mean_raw_bytes: -0.0672 mean_redundancy: 0.0207 @@ -43,9 +41,9 @@ consistent_string_quote_style: mean_k: -0.0368 mean_r_squared: -0.0080 identifier_length_variance: - mean_mean: 0.0058 - mean_std_dev: 0.0158 - mean_variance: 0.0316 + mean_mean: 0.0059 + mean_std_dev: 0.0235 + mean_variance: 0.0470 indentation: mean_blank_line_ratio: 0.0205 mean_variance: 0.0246 @@ -56,6 +54,11 @@ consistent_string_quote_style: magic_number_density: mean_string_literal_ratio: 0.3018 near_duplicate_blocks_file: + mean_near_dup_block_d2: 0.9542 + mean_near_dup_block_d3: -0.6021 + mean_near_dup_block_d4: -0.9542 + mean_near_dup_block_d5: 0.9542 + mean_near_dup_block_d6: -0.6021 mean_sub_block_count: -0.1804 ngram: mean_bigram_hapax_fraction: -0.0101 @@ -86,21 +89,19 @@ consistent_string_quote_style: mean_distinct_symbol_types: -0.0966 mean_symbol_count: -0.0999 vocabulary: - mean_mattr: -0.0304 - mean_raw_ttr: -0.0079 - mean_total_identifiers: -0.0589 - mean_unique_identifiers: -0.0668 + mean_mattr: -0.0187 + mean_raw_ttr: -0.0050 + mean_total_identifiers: -0.0666 + mean_unique_identifiers: -0.0714 vowel_density: - mean_total_chars: -0.0531 + mean_total_chars: -0.0607 zipf: mean_total_tokens: -0.0783 mean_vocab_size: -0.0656 no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _fix_hint: "Remove unreachable statements after return/raise/exit — they can never execute" - _languages: [elixir] - _log_baseline: -57.2281 + _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 mean_branching_density: -1.4201 @@ -108,10 +109,10 @@ no_dead_code_after_return: brevity: mean_sample_size: -0.2610 casing_entropy: - mean_entropy: -0.2355 + mean_entropy: -0.2430 mean_other_count: -0.8708 mean_pascal_case_count: -0.5752 - mean_snake_case_count: -0.3869 + mean_snake_case_count: -0.3559 compression: mean_raw_bytes: -0.4531 mean_redundancy: -0.0467 @@ -147,9 +148,9 @@ no_dead_code_after_return: mean_k: -0.1169 identifier_length_variance: mean_max: -0.4367 - mean_mean: 0.0078 - mean_std_dev: -0.2478 - mean_variance: -0.4957 + mean_mean: -0.0159 + mean_std_dev: -0.2804 + mean_variance: -0.5607 indentation: mean_blank_line_ratio: 0.2883 mean_mean_depth: -0.4448 @@ -162,6 +163,11 @@ no_dead_code_after_return: mean_density: 0.2821 mean_string_literal_ratio: -0.8289 near_duplicate_blocks_file: + mean_block_count: -0.1083 + mean_near_dup_block_d0: 1.1292 + mean_near_dup_block_d5: 1.1292 + mean_near_dup_block_d7: -0.7124 + mean_near_dup_block_d8: 1.1292 mean_sub_block_count: -0.3612 ngram: mean_bigram_hapax_fraction: 0.0142 @@ -192,12 +198,12 @@ no_dead_code_after_return: mean_density: 0.2141 mean_symbol_count: -0.2386 vocabulary: - mean_mattr: -0.0870 - mean_raw_ttr: 0.0851 - mean_total_identifiers: -0.4256 - mean_unique_identifiers: -0.3406 + mean_mattr: -0.0424 + mean_raw_ttr: 0.0435 + mean_total_identifiers: -0.4061 + mean_unique_identifiers: -0.3626 vowel_density: - mean_total_chars: -0.4179 + mean_total_chars: -0.4220 zipf: mean_exponent: -0.0067 mean_total_tokens: -0.3093 @@ -205,9 +211,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _fix_hint: "Remove IO.inspect/IO.puts/console.log debug output — use Logger or remove entirely" - _languages: [elixir] - _log_baseline: -86.5160 + _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 mean_branching_density: 0.0181 @@ -216,10 +220,10 @@ no_debug_print_statements: brevity: mean_sample_size: -0.2128 casing_entropy: - mean_entropy: 0.0285 + mean_entropy: 0.0841 mean_other_count: 0.1169 mean_pascal_case_count: -1.0141 - mean_snake_case_count: -0.5844 + mean_snake_case_count: -0.6773 compression: mean_raw_bytes: -0.4490 mean_redundancy: -0.0518 @@ -254,9 +258,9 @@ no_debug_print_statements: mean_k: 0.0324 mean_r_squared: -0.0077 identifier_length_variance: - mean_mean: 0.1266 - mean_std_dev: 0.0871 - mean_variance: 0.1743 + mean_mean: 0.1804 + mean_std_dev: 0.0496 + mean_variance: 0.0993 indentation: mean_blank_line_ratio: -0.0499 mean_max_depth: -0.2215 @@ -271,6 +275,7 @@ no_debug_print_statements: mean_density: 0.5227 mean_string_literal_ratio: -1.0798 near_duplicate_blocks_file: + mean_block_count: -0.2856 mean_sub_block_count: -0.4114 ngram: mean_bigram_hapax_fraction: -0.0183 @@ -302,12 +307,12 @@ no_debug_print_statements: mean_distinct_symbol_types: -0.1242 mean_symbol_count: -0.5369 vocabulary: - mean_mattr: 0.2274 - mean_raw_ttr: 0.3523 - mean_total_identifiers: -0.5867 - mean_unique_identifiers: -0.2346 + mean_mattr: 0.3160 + mean_raw_ttr: 0.4438 + mean_total_identifiers: -0.6654 + mean_unique_identifiers: -0.2215 vowel_density: - mean_total_chars: -0.4601 + mean_total_chars: -0.4850 zipf: mean_exponent: -0.1196 mean_r_squared: 0.0127 @@ -316,9 +321,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _fix_hint: "Resolve FIXME/XXX/HACK comments before merging — they indicate known unresolved issues" - _languages: [elixir] - _log_baseline: -2.1522 + _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 mean_branching_density: 0.1504 @@ -326,9 +329,9 @@ no_fixme_comments: brevity: mean_sample_size: -0.0887 casing_entropy: - mean_entropy: -0.0225 - mean_pascal_case_count: -0.0769 - mean_snake_case_count: -0.0130 + mean_entropy: 0.0286 + mean_pascal_case_count: 0.0129 + mean_snake_case_count: -0.0450 comment_structure: mean_comment_line_count: -0.6667 mean_comment_line_ratio: 0.5464 @@ -369,9 +372,9 @@ no_fixme_comments: mean_beta: -0.0665 mean_k: 0.1089 identifier_length_variance: - mean_mean: 0.0027 - mean_std_dev: 0.0259 - mean_variance: 0.0517 + mean_mean: 0.0159 + mean_std_dev: 0.0203 + mean_variance: 0.0407 indentation: mean_blank_line_ratio: 0.0697 mean_max_depth: 0.0646 @@ -385,6 +388,7 @@ no_fixme_comments: mean_density: -0.0646 mean_string_literal_ratio: 0.0564 near_duplicate_blocks_file: + mean_block_count: 0.0305 mean_sub_block_count: 0.1090 ngram: mean_bigram_hapax_fraction: -0.0378 @@ -417,12 +421,12 @@ no_fixme_comments: mean_density: 0.1041 mean_symbol_count: 0.1218 vocabulary: - mean_mattr: -0.1086 - mean_raw_ttr: -0.1007 - mean_total_identifiers: -0.0217 - mean_unique_identifiers: -0.1224 + mean_mattr: -0.0826 + mean_raw_ttr: -0.0781 + mean_total_identifiers: -0.0352 + mean_unique_identifiers: -0.1133 vowel_density: - mean_total_chars: -0.0190 + mean_total_chars: -0.0193 zipf: mean_exponent: 0.0584 mean_r_squared: 0.0015 @@ -431,9 +435,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _fix_hint: "Replace nested ternary/conditional expressions with if-else blocks or pattern matching" - _languages: [elixir] - _log_baseline: 7.2208 + _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 mean_branching_density: -0.3441 @@ -442,10 +444,10 @@ no_nested_ternary: brevity: mean_sample_size: 0.0486 casing_entropy: - mean_entropy: 0.2495 + mean_entropy: 0.2311 mean_other_count: 0.7455 mean_pascal_case_count: 0.1237 - mean_snake_case_count: 0.0885 + mean_snake_case_count: 0.1138 compression: mean_raw_bytes: -0.0141 mean_redundancy: -0.0117 @@ -483,8 +485,9 @@ no_nested_ternary: mean_k: 0.0563 identifier_length_variance: mean_max: 0.0170 - mean_std_dev: -0.0032 - mean_variance: -0.0065 + mean_mean: -0.0112 + mean_std_dev: -0.0060 + mean_variance: -0.0120 indentation: mean_blank_line_ratio: 0.3825 mean_max_depth: -0.2891 @@ -500,6 +503,8 @@ no_nested_ternary: mean_magic_number_count: -0.0310 mean_string_literal_ratio: 0.0146 near_duplicate_blocks_file: + mean_block_count: 0.0885 + mean_near_dup_block_d7: -0.1824 mean_sub_block_count: 0.5472 ngram: mean_bigram_hapax_fraction: -0.0464 @@ -531,12 +536,12 @@ no_nested_ternary: mean_distinct_symbol_types: 0.0377 mean_symbol_count: 0.2475 vocabulary: - mean_mattr: -0.0453 - mean_raw_ttr: -0.0380 - mean_total_identifiers: 0.1302 - mean_unique_identifiers: 0.0921 + mean_mattr: -0.0587 + mean_raw_ttr: -0.0515 + mean_total_identifiers: 0.1551 + mean_unique_identifiers: 0.1036 vowel_density: - mean_total_chars: 0.1293 + mean_total_chars: 0.1439 zipf: mean_exponent: 0.0240 mean_r_squared: 0.0111 diff --git a/priv/combined_metrics/consistency.yml b/priv/combined_metrics/consistency.yml index 426e360..25026ea 100644 --- a/priv/combined_metrics/consistency.yml +++ b/priv/combined_metrics/consistency.yml @@ -1,14 +1,12 @@ consistent_casing_within_file: _doc: "A file should use one naming convention throughout — no mixing of camelCase and snake_case for the same kind of identifier." - _fix_hint: "Pick one casing convention (snake_case or camelCase) and apply it uniformly" - _languages: [elixir] - _log_baseline: -2.4622 + _log_baseline: -2.4826 brevity: mean_sample_size: -0.0471 casing_entropy: mean_camel_case_count: -2.0000 - mean_entropy: -0.4219 - mean_snake_case_count: 0.2745 + mean_entropy: -0.4254 + mean_snake_case_count: 0.2663 compression: mean_raw_bytes: 0.0213 mean_redundancy: 0.0219 @@ -33,9 +31,9 @@ consistent_casing_within_file: mean_beta: -0.0232 mean_k: 0.0253 identifier_length_variance: - mean_mean: 0.0342 - mean_std_dev: 0.0123 - mean_variance: 0.0246 + mean_mean: 0.0337 + mean_std_dev: 0.0139 + mean_variance: 0.0278 ngram: mean_bigram_hapax_fraction: -0.0071 mean_bigram_repetition_rate: 0.0267 @@ -49,20 +47,18 @@ consistent_casing_within_file: symbol_density: mean_density: -0.0214 vocabulary: - mean_mattr: -0.0679 - mean_raw_ttr: -0.0715 - mean_unique_identifiers: -0.0714 + mean_mattr: -0.0680 + mean_raw_ttr: -0.0735 + mean_unique_identifiers: -0.0735 vowel_density: - mean_total_chars: 0.0342 + mean_total_chars: 0.0337 zipf: mean_exponent: 0.0265 mean_vocab_size: -0.0471 consistent_error_return_shape: _doc: "All functions in a module should return errors in the same shape — mixed `nil`, `false`, and `{:error, _}` returns are confusing." - _fix_hint: "Use a single error return format (e.g., {:error, reason}) consistently throughout" - _languages: [elixir] - _log_baseline: 31.6243 + _log_baseline: 30.7874 branching: mean_branch_count: -0.2178 mean_branching_density: -0.1258 @@ -70,8 +66,8 @@ consistent_error_return_shape: brevity: mean_sample_size: 0.0231 casing_entropy: - mean_entropy: -0.0372 - mean_snake_case_count: 0.0582 + mean_entropy: 0.0245 + mean_snake_case_count: -0.0406 compression: mean_raw_bytes: -0.0234 mean_redundancy: 0.0058 @@ -103,8 +99,9 @@ consistent_error_return_shape: mean_beta: -0.0402 mean_k: 0.0903 identifier_length_variance: - mean_std_dev: 0.0324 - mean_variance: 0.0648 + mean_mean: 0.0455 + mean_std_dev: 0.0308 + mean_variance: 0.0616 indentation: mean_blank_line_ratio: 0.0185 mean_mean_depth: -0.0638 @@ -117,6 +114,8 @@ consistent_error_return_shape: mean_density: -0.0557 mean_string_literal_ratio: -2.0000 near_duplicate_blocks_file: + mean_near_dup_block_d7: 0.7522 + mean_near_dup_block_d8: -0.4400 mean_sub_block_count: 0.1553 ngram: mean_bigram_hapax_fraction: -0.0430 @@ -145,12 +144,12 @@ consistent_error_return_shape: mean_density: 0.1360 mean_symbol_count: 0.1121 vocabulary: - mean_mattr: 0.0268 - mean_raw_ttr: -0.0151 - mean_total_identifiers: 0.0519 - mean_unique_identifiers: 0.0368 + mean_mattr: 0.0877 + mean_raw_ttr: 0.0733 + mean_total_identifiers: -0.0353 + mean_unique_identifiers: 0.0381 vowel_density: - mean_total_chars: 0.0479 + mean_total_chars: 0.0102 zipf: mean_exponent: 0.0181 mean_total_tokens: 0.0361 @@ -158,9 +157,7 @@ consistent_error_return_shape: consistent_function_style: _doc: "A module should not mix one-liner and multi-clause function definitions for the same concern." - _fix_hint: "Use a consistent function definition style — all def or all defp, one-liner or block form" - _languages: [elixir] - _log_baseline: -2.0498 + _log_baseline: 0.1374 branching: mean_branch_count: -0.1610 mean_branching_density: -0.3349 @@ -169,10 +166,10 @@ consistent_function_style: brevity: mean_sample_size: 0.0028 casing_entropy: - mean_entropy: -0.0562 + mean_entropy: -0.0534 mean_other_count: -0.2753 mean_pascal_case_count: -0.0379 - mean_snake_case_count: 0.0228 + mean_snake_case_count: 0.0199 compression: mean_raw_bytes: 0.0313 mean_redundancy: 0.0188 @@ -204,9 +201,8 @@ consistent_function_style: heaps: mean_beta: 0.0024 identifier_length_variance: - mean_mean: 0.0040 - mean_std_dev: 0.0053 - mean_variance: 0.0106 + mean_mean: 0.0076 + mean_variance: 0.0038 indentation: mean_blank_line_ratio: -0.0991 mean_max_depth: -0.1143 @@ -217,6 +213,10 @@ consistent_function_style: mean_max_nesting_depth: -0.1610 mean_unique_line_ratio: -0.0456 near_duplicate_blocks_file: + mean_block_count: 0.2753 + mean_near_dup_block_d0: 0.9145 + mean_near_dup_block_d7: 0.1610 + mean_near_dup_block_d8: 0.5506 mean_sub_block_count: 0.0594 ngram: mean_bigram_hapax_fraction: 0.0037 @@ -241,74 +241,72 @@ consistent_function_style: mean_density: -0.0473 mean_symbol_count: -0.0159 vocabulary: - mean_mattr: -0.0035 - mean_raw_ttr: -0.0090 - mean_total_identifiers: 0.0133 - mean_unique_identifiers: 0.0042 + mean_mattr: -0.0025 + mean_raw_ttr: -0.0051 + mean_total_identifiers: 0.0098 + mean_unique_identifiers: 0.0047 vowel_density: - mean_total_chars: 0.0173 + mean_total_chars: 0.0175 zipf: mean_exponent: -0.0054 mean_vocab_size: 0.0028 same_concept_same_name: _doc: "The same domain concept should use the same name throughout a file — mixing `user`, `usr`, and `u` for the same thing harms readability." - _fix_hint: "Use the same name for the same concept everywhere — avoid synonyms like user/account/member" - _languages: [elixir] - _log_baseline: -10.7039 + _log_baseline: -12.4109 brevity: - mean_sample_size: -1.3837 + mean_sample_size: -1.3457 compression: - mean_raw_bytes: 0.1823 - mean_redundancy: 0.4046 - mean_unique_line_ratio: -0.3343 - mean_zlib_bytes: -0.8496 - mean_zlib_ratio: 1.0316 + mean_raw_bytes: 0.1773 + mean_redundancy: 0.3935 + mean_unique_line_ratio: -0.3251 + mean_zlib_bytes: -0.8263 + mean_zlib_ratio: 1.0033 entropy: - mean_char_entropy: -0.1859 - mean_char_normalized: -0.1851 - mean_token_entropy: -0.3646 - mean_token_max_entropy: -0.2981 - mean_vocab_size: -1.3837 + mean_char_entropy: -0.1808 + mean_char_normalized: -0.1800 + mean_token_entropy: -0.3546 + mean_token_max_entropy: -0.2899 + mean_vocab_size: -1.3457 halstead: - mean_difficulty: 1.9192 - mean_effort: 1.6104 - mean_estimated_bugs: -0.3082 - mean_n2_unique_operands: -1.9192 - mean_time_to_implement_seconds: 1.6104 - mean_vocabulary: -1.4248 - mean_volume: -0.3088 + mean_difficulty: 1.8665 + mean_effort: 1.5662 + mean_estimated_bugs: -0.2997 + mean_n2_unique_operands: -1.8665 + mean_time_to_implement_seconds: 1.5662 + mean_vocabulary: -1.3857 + mean_volume: -0.3003 heaps: - mean_beta: -0.6035 - mean_k: 0.5246 + mean_beta: -0.5870 + mean_k: 0.5102 identifier_length_variance: - mean_mean: 0.3369 - mean_std_dev: -0.2793 - mean_variance: -0.5589 + mean_mean: 0.3431 + mean_std_dev: -0.4791 + mean_variance: -0.9580 line_patterns: - mean_unique_line_ratio: -0.7135 + mean_unique_line_ratio: -0.6939 ngram: - mean_bigram_hapax_fraction: -0.6648 - mean_bigram_repeated_unique: -0.2150 - mean_bigram_repetition_rate: 0.6714 - mean_bigram_unique: -1.2077 - mean_trigram_hapax_fraction: -0.6812 - mean_trigram_repeated_unique: 1.3250 - mean_trigram_repetition_rate: 1.6604 - mean_trigram_unique: -1.0154 + mean_bigram_hapax_fraction: -0.6466 + mean_bigram_repeated_unique: -0.2091 + mean_bigram_repetition_rate: 0.6530 + mean_bigram_unique: -1.1746 + mean_trigram_hapax_fraction: -0.6625 + mean_trigram_repeated_unique: 1.2887 + mean_trigram_repetition_rate: 1.6149 + mean_trigram_unique: -0.9875 readability: - mean_avg_line_length: 0.1889 - mean_avg_sub_words_per_id: -0.1821 - mean_flesch_adapted: 0.2414 + mean_avg_line_length: 0.1837 + mean_avg_sub_words_per_id: -0.1771 + mean_flesch_adapted: 0.2348 symbol_density: - mean_density: -0.1858 + mean_density: -0.1807 vocabulary: - mean_mattr: -1.7927 - mean_raw_ttr: -2.0000 - mean_unique_identifiers: -1.9992 + mean_mattr: -1.8899 + mean_raw_ttr: -1.9969 + mean_unique_identifiers: -2.0000 vowel_density: - mean_total_chars: 0.3371 + mean_total_chars: 0.3432 zipf: - mean_exponent: 0.7915 - mean_vocab_size: -1.3837 + mean_exponent: 0.7698 + mean_vocab_size: -1.3457 diff --git a/priv/combined_metrics/dependencies.yml b/priv/combined_metrics/dependencies.yml index fbc3591..7bcfacf 100644 --- a/priv/combined_metrics/dependencies.yml +++ b/priv/combined_metrics/dependencies.yml @@ -1,8 +1,6 @@ import_count_under_10: _doc: "Files should import fewer than 10 modules; high import counts signal excessive coupling." - _fix_hint: "Reduce imports — split large modules or use fully-qualified names for rarely-used deps" - _languages: [elixir] - _log_baseline: 7.0687 + _log_baseline: 7.1916 branching: mean_branch_count: 0.2110 mean_branching_density: -1.0683 @@ -11,9 +9,9 @@ import_count_under_10: brevity: mean_sample_size: 0.0119 casing_entropy: - mean_entropy: -0.0396 + mean_entropy: -0.0389 mean_pascal_case_count: -0.1657 - mean_snake_case_count: 0.0025 + mean_snake_case_count: -0.0025 comment_structure: mean_comment_line_ratio: -1.2802 compression: @@ -54,9 +52,9 @@ import_count_under_10: mean_r_squared: 0.0101 identifier_length_variance: mean_max: 0.0679 - mean_mean: 0.0712 - mean_std_dev: 0.0609 - mean_variance: 0.1218 + mean_mean: 0.0648 + mean_std_dev: 0.0688 + mean_variance: 0.1375 indentation: mean_blank_line_ratio: 0.1478 mean_max_depth: -0.0876 @@ -72,6 +70,7 @@ import_count_under_10: mean_magic_number_count: 0.4898 mean_string_literal_ratio: 0.0265 near_duplicate_blocks_file: + mean_block_count: 0.0765 mean_sub_block_count: 0.1110 ngram: mean_bigram_repeated_unique: 0.0034 @@ -101,12 +100,11 @@ import_count_under_10: mean_distinct_symbol_types: 0.0643 mean_symbol_count: 0.0087 vocabulary: - mean_mattr: 0.0037 - mean_raw_ttr: 0.0453 - mean_total_identifiers: -0.0543 - mean_unique_identifiers: -0.0090 + mean_mattr: -0.0031 + mean_raw_ttr: 0.0573 + mean_total_identifiers: -0.0573 vowel_density: - mean_total_chars: 0.0169 + mean_total_chars: 0.0075 zipf: mean_exponent: -0.0152 mean_r_squared: 0.0050 @@ -115,9 +113,7 @@ import_count_under_10: low_coupling: _doc: "Modules should depend on few external symbols — a low unique-operand count relative to total is a proxy for tight coupling." - _fix_hint: "Reduce dependencies between modules — introduce interfaces or narrow the public API" - _languages: [elixir] - _log_baseline: -38.4249 + _log_baseline: -38.2335 branching: mean_branch_count: 0.0745 mean_branching_density: 0.2097 @@ -126,9 +122,9 @@ low_coupling: brevity: mean_sample_size: -0.1276 casing_entropy: - mean_entropy: -0.0947 + mean_entropy: -0.0870 mean_pascal_case_count: -0.3722 - mean_snake_case_count: -0.1208 + mean_snake_case_count: -0.1302 compression: mean_raw_bytes: -0.1657 mean_redundancy: 0.0126 @@ -166,8 +162,9 @@ low_coupling: mean_r_squared: -0.0234 identifier_length_variance: mean_max: -0.0427 - mean_std_dev: -0.0173 - mean_variance: -0.0345 + mean_mean: 0.0133 + mean_std_dev: -0.0321 + mean_variance: -0.0642 indentation: mean_blank_line_ratio: -0.0752 mean_max_depth: -0.0352 @@ -182,6 +179,9 @@ low_coupling: mean_density: -2.0000 mean_string_literal_ratio: 0.1282 near_duplicate_blocks_file: + mean_block_count: 0.1123 + mean_near_dup_block_d4: 0.2314 + mean_near_dup_block_d8: 0.2314 mean_sub_block_count: -0.0902 ngram: mean_bigram_hapax_fraction: -0.0247 @@ -213,12 +213,12 @@ low_coupling: mean_distinct_symbol_types: -0.0960 mean_symbol_count: -0.1794 vocabulary: - mean_mattr: -0.0983 - mean_raw_ttr: 0.0304 - mean_total_identifiers: -0.1705 - mean_unique_identifiers: -0.1402 + mean_mattr: -0.0823 + mean_raw_ttr: 0.0349 + mean_total_identifiers: -0.1801 + mean_unique_identifiers: -0.1453 vowel_density: - mean_total_chars: -0.1694 + mean_total_chars: -0.1669 zipf: mean_exponent: 0.0065 mean_r_squared: -0.0205 @@ -227,17 +227,15 @@ low_coupling: no_wildcard_imports: _doc: "Wildcard imports (`import *`, `using Module`) pollute the local namespace and hide dependencies." - _fix_hint: "Replace wildcard imports with explicit named imports for clarity and reduced scope pollution" - _languages: [elixir] - _log_baseline: -9.4788 + _log_baseline: -8.9685 branching: mean_branching_density: 0.0249 mean_non_blank_count: -0.0268 brevity: mean_sample_size: -0.0077 casing_entropy: - mean_entropy: -0.0070 - mean_snake_case_count: 0.0189 + mean_entropy: -0.0054 + mean_snake_case_count: 0.0163 compression: mean_raw_bytes: 0.0310 mean_unique_line_ratio: -0.0046 @@ -263,9 +261,9 @@ no_wildcard_imports: mean_k: 0.1998 mean_r_squared: -0.0155 identifier_length_variance: - mean_mean: 0.0387 - mean_std_dev: 0.0490 - mean_variance: 0.0979 + mean_mean: 0.0438 + mean_std_dev: 0.0473 + mean_variance: 0.0945 indentation: mean_blank_line_ratio: 0.0763 mean_mean_depth: -0.0117 @@ -279,6 +277,9 @@ no_wildcard_imports: mean_magic_number_count: 1.1312 mean_string_literal_ratio: -0.3463 near_duplicate_blocks_file: + mean_near_dup_block_d6: -0.3309 + mean_near_dup_block_d7: 0.3309 + mean_near_dup_block_d8: 0.3309 mean_sub_block_count: 0.0355 ngram: mean_bigram_hapax_fraction: 0.0182 @@ -310,11 +311,11 @@ no_wildcard_imports: mean_distinct_symbol_types: -0.0817 mean_symbol_count: 0.0042 vocabulary: - mean_mattr: 0.0212 - mean_raw_ttr: -0.0140 - mean_total_identifiers: 0.0140 + mean_mattr: 0.0259 + mean_raw_ttr: -0.0117 + mean_total_identifiers: 0.0116 vowel_density: - mean_total_chars: 0.0527 + mean_total_chars: 0.0554 zipf: mean_exponent: -0.0270 mean_total_tokens: 0.0131 diff --git a/priv/combined_metrics/documentation.yml b/priv/combined_metrics/documentation.yml index 66ca037..4e3333c 100644 --- a/priv/combined_metrics/documentation.yml +++ b/priv/combined_metrics/documentation.yml @@ -1,8 +1,6 @@ docstring_is_nonempty: _doc: "Docstrings must contain meaningful content, not just a placeholder or empty string." - _fix_hint: "Replace placeholder docstrings with a real description of what the function does" - _languages: [elixir] - _log_baseline: 29.4288 + _log_baseline: 28.4942 branching: mean_branch_count: 0.3089 mean_branching_density: 0.2652 @@ -10,10 +8,10 @@ docstring_is_nonempty: brevity: mean_sample_size: 0.1931 casing_entropy: - mean_entropy: 0.0560 + mean_entropy: 0.0676 mean_other_count: 0.0709 mean_pascal_case_count: 0.3089 - mean_snake_case_count: 0.1585 + mean_snake_case_count: 0.1382 compression: mean_raw_bytes: 0.1245 mean_redundancy: -0.0198 @@ -49,9 +47,9 @@ docstring_is_nonempty: mean_beta: 0.0242 mean_k: 0.0556 identifier_length_variance: - mean_mean: -0.0039 - mean_std_dev: -0.0154 - mean_variance: -0.0307 + mean_mean: 0.0042 + mean_std_dev: -0.0168 + mean_variance: -0.0336 indentation: mean_blank_line_ratio: 0.0413 mean_mean_depth: -0.0330 @@ -65,8 +63,9 @@ docstring_is_nonempty: mean_magic_number_count: 0.1709 mean_string_literal_ratio: 0.1078 near_duplicate_blocks_file: - mean_block_count: 0.5417 - mean_near_dup_block_d8: 0.2709 + mean_block_count: 0.0907 + mean_near_dup_block_d5: -0.2709 + mean_near_dup_block_d8: 0.1000 mean_sub_block_count: -0.0061 ngram: mean_bigram_hapax_fraction: 0.0378 @@ -98,12 +97,12 @@ docstring_is_nonempty: mean_distinct_symbol_types: 0.0505 mean_symbol_count: 0.0664 vocabulary: - mean_mattr: 0.1408 - mean_raw_ttr: 0.0942 - mean_total_identifiers: 0.1709 - mean_unique_identifiers: 0.2651 + mean_mattr: 0.1382 + mean_raw_ttr: 0.0976 + mean_total_identifiers: 0.1534 + mean_unique_identifiers: 0.2510 vowel_density: - mean_total_chars: 0.1670 + mean_total_chars: 0.1576 zipf: mean_exponent: -0.0353 mean_r_squared: 0.0037 @@ -112,18 +111,16 @@ docstring_is_nonempty: file_has_license_header: _doc: "Source files should begin with a license or copyright header." - _fix_hint: "Add a license header comment at the top of the file" - _languages: [elixir] - _log_baseline: 5.7261 + _log_baseline: 5.8777 branching: mean_branching_density: -0.0081 mean_non_blank_count: 0.0080 brevity: mean_sample_size: 0.0263 casing_entropy: - mean_entropy: 0.0312 + mean_entropy: 0.0296 mean_pascal_case_count: 0.0957 - mean_snake_case_count: 0.0036 + mean_snake_case_count: 0.0039 comment_structure: mean_comment_line_ratio: -2.0000 compression: @@ -153,8 +150,7 @@ file_has_license_header: mean_beta: -0.0113 mean_k: 0.0614 identifier_length_variance: - mean_mean: -0.0037 - mean_variance: -0.0031 + mean_mean: -0.0048 indentation: mean_blank_line_ratio: 0.0206 mean_mean_depth: -0.0080 @@ -167,6 +163,7 @@ file_has_license_header: mean_magic_number_count: 0.1973 mean_string_literal_ratio: -0.0104 near_duplicate_blocks_file: + mean_block_count: 0.0650 mean_sub_block_count: 0.0089 ngram: mean_bigram_hapax_fraction: 0.0086 @@ -188,12 +185,12 @@ file_has_license_header: mean_density: -0.0042 mean_symbol_count: 0.0065 vocabulary: - mean_mattr: 0.0121 - mean_raw_ttr: 0.0200 - mean_total_identifiers: 0.0175 - mean_unique_identifiers: 0.0374 + mean_mattr: 0.0108 + mean_raw_ttr: 0.0207 + mean_total_identifiers: 0.0187 + mean_unique_identifiers: 0.0395 vowel_density: - mean_total_chars: 0.0138 + mean_total_chars: 0.0139 zipf: mean_exponent: -0.0055 mean_total_tokens: 0.0091 @@ -201,9 +198,7 @@ file_has_license_header: file_has_module_docstring: _doc: "Files should have a module-level docstring explaining purpose and usage." - _fix_hint: "Add a @moduledoc string describing the module's purpose" - _languages: [elixir] - _log_baseline: 24.2268 + _log_baseline: 24.1681 branching: mean_branch_count: 0.3854 mean_branching_density: -2.0000 @@ -211,9 +206,9 @@ file_has_module_docstring: brevity: mean_sample_size: 0.2219 casing_entropy: - mean_entropy: -0.0217 + mean_entropy: -0.0210 mean_pascal_case_count: 0.0929 - mean_snake_case_count: 0.1492 + mean_snake_case_count: 0.1544 compression: mean_raw_bytes: 0.1161 mean_redundancy: -0.0256 @@ -247,9 +242,9 @@ file_has_module_docstring: mean_k: 0.5760 mean_r_squared: -0.0049 identifier_length_variance: - mean_mean: -0.0059 - mean_std_dev: -0.0462 - mean_variance: -0.0924 + mean_mean: -0.0101 + mean_std_dev: -0.0477 + mean_variance: -0.0954 indentation: mean_blank_line_ratio: 0.0686 mean_mean_depth: -0.0240 @@ -263,7 +258,7 @@ file_has_module_docstring: mean_magic_number_count: 0.1599 mean_string_literal_ratio: 0.1425 near_duplicate_blocks_file: - mean_block_count: 0.3854 + mean_block_count: 0.0586 mean_sub_block_count: 0.0098 ngram: mean_bigram_hapax_fraction: 0.0500 @@ -293,12 +288,12 @@ file_has_module_docstring: mean_distinct_symbol_types: 0.0618 mean_symbol_count: 0.0433 vocabulary: - mean_mattr: 0.0527 - mean_raw_ttr: 0.1365 - mean_total_identifiers: 0.1309 - mean_unique_identifiers: 0.2674 + mean_mattr: 0.0532 + mean_raw_ttr: 0.1353 + mean_total_identifiers: 0.1326 + mean_unique_identifiers: 0.2679 vowel_density: - mean_total_chars: 0.1250 + mean_total_chars: 0.1226 zipf: mean_exponent: -0.0467 mean_total_tokens: 0.0837 @@ -306,18 +301,16 @@ file_has_module_docstring: file_has_no_commented_out_code: _doc: "Files should not contain commented-out code blocks left from development." - _fix_hint: "Remove commented-out code — use version control to track deleted code" - _languages: [elixir] - _log_baseline: -8.1616 + _log_baseline: -8.5677 branching: mean_branching_density: 0.0368 mean_non_blank_count: -0.0367 brevity: mean_sample_size: -0.0046 casing_entropy: - mean_entropy: -0.0252 - mean_pascal_case_count: -0.0657 - mean_snake_case_count: 0.0160 + mean_entropy: -0.0091 + mean_pascal_case_count: -0.0597 + mean_snake_case_count: -0.0126 comment_structure: mean_comment_line_count: -0.9901 mean_comment_line_ratio: 0.3578 @@ -353,9 +346,9 @@ file_has_no_commented_out_code: mean_k: 0.1958 mean_r_squared: -0.0200 identifier_length_variance: - mean_mean: 0.0074 - mean_std_dev: 0.0252 - mean_variance: 0.0503 + mean_mean: 0.0169 + mean_std_dev: 0.0264 + mean_variance: 0.0527 indentation: mean_blank_line_ratio: 0.0551 mean_max_depth: 0.0324 @@ -369,6 +362,7 @@ file_has_no_commented_out_code: mean_density: 2.0000 mean_string_literal_ratio: -0.0818 near_duplicate_blocks_file: + mean_block_count: -0.0474 mean_sub_block_count: -0.0454 ngram: mean_bigram_hapax_fraction: 0.0101 @@ -401,12 +395,12 @@ file_has_no_commented_out_code: mean_density: -0.0172 mean_symbol_count: -0.0237 vocabulary: - mean_mattr: -0.0404 - mean_raw_ttr: -0.0140 - mean_total_identifiers: -0.0030 - mean_unique_identifiers: -0.0170 + mean_mattr: -0.0327 + mean_raw_ttr: 0.0060 + mean_total_identifiers: -0.0246 + mean_unique_identifiers: -0.0186 vowel_density: - mean_total_chars: 0.0044 + mean_total_chars: -0.0077 zipf: mean_exponent: -0.0043 mean_total_tokens: -0.0158 @@ -414,9 +408,7 @@ file_has_no_commented_out_code: function_has_docstring: _doc: "Public functions should have a docstring describing behaviour, params, and return value." - _fix_hint: "Add @doc strings to public functions describing parameters, return value, and purpose" - _languages: [elixir] - _log_baseline: 43.0440 + _log_baseline: 41.6283 branching: mean_branch_count: 0.5279 mean_branching_density: 0.3832 @@ -424,10 +416,10 @@ function_has_docstring: brevity: mean_sample_size: 0.2608 casing_entropy: - mean_entropy: -0.0193 + mean_entropy: -0.0026 mean_other_count: 0.3105 mean_pascal_case_count: 0.1852 - mean_snake_case_count: 0.3052 + mean_snake_case_count: 0.2708 comment_structure: mean_comment_line_ratio: -2.0000 compression: @@ -467,9 +459,9 @@ function_has_docstring: mean_k: -0.0612 mean_r_squared: -0.0041 identifier_length_variance: - mean_mean: -0.0412 - mean_std_dev: -0.0363 - mean_variance: -0.0727 + mean_mean: -0.0191 + mean_std_dev: -0.0493 + mean_variance: -0.0985 indentation: mean_blank_line_ratio: 0.1003 mean_max_depth: -0.1288 @@ -484,9 +476,10 @@ function_has_docstring: mean_magic_number_count: 0.4104 mean_string_literal_ratio: 0.5931 near_duplicate_blocks_file: - mean_block_count: 0.6209 - mean_near_dup_block_d6: 0.3105 - mean_near_dup_block_d8: 0.3105 + mean_block_count: 0.2288 + mean_near_dup_block_d6: -0.3105 + mean_near_dup_block_d7: 0.3105 + mean_near_dup_block_d8: -0.1816 mean_sub_block_count: 0.0349 ngram: mean_bigram_hapax_fraction: 0.0560 @@ -520,12 +513,12 @@ function_has_docstring: mean_distinct_symbol_types: 0.0427 mean_symbol_count: 0.1896 vocabulary: - mean_mattr: 0.1771 - mean_raw_ttr: 0.0544 - mean_total_identifiers: 0.2819 - mean_unique_identifiers: 0.3363 + mean_mattr: 0.1769 + mean_raw_ttr: 0.0666 + mean_total_identifiers: 0.2541 + mean_unique_identifiers: 0.3207 vowel_density: - mean_total_chars: 0.2407 + mean_total_chars: 0.2350 zipf: mean_exponent: 0.0025 mean_total_tokens: 0.2284 @@ -533,9 +526,7 @@ function_has_docstring: function_todo_comment_in_body: _doc: "Functions should not contain TODO/FIXME comments indicating unfinished work." - _fix_hint: "Resolve or extract TODO comments — create a tracked issue instead of leaving them inline" - _languages: [elixir] - _log_baseline: 7.0511 + _log_baseline: 7.2394 branching: mean_branch_count: -0.0287 mean_branching_density: -0.0435 @@ -543,9 +534,9 @@ function_todo_comment_in_body: brevity: mean_sample_size: -0.0084 casing_entropy: - mean_entropy: 0.0039 - mean_pascal_case_count: 0.0055 - mean_snake_case_count: -0.0047 + mean_entropy: 0.0157 + mean_pascal_case_count: 0.0410 + mean_snake_case_count: -0.0125 comment_structure: mean_comment_line_count: -0.5392 mean_comment_line_ratio: 0.7796 @@ -582,9 +573,9 @@ function_todo_comment_in_body: mean_k: 0.1608 mean_r_squared: -0.0095 identifier_length_variance: - mean_mean: 0.0058 - mean_std_dev: 0.0154 - mean_variance: 0.0309 + mean_mean: 0.0061 + mean_std_dev: 0.0128 + mean_variance: 0.0257 indentation: mean_blank_line_ratio: 0.0593 mean_mean_depth: -0.0184 @@ -597,6 +588,7 @@ function_todo_comment_in_body: mean_density: -2.0000 mean_string_literal_ratio: -0.0151 near_duplicate_blocks_file: + mean_block_count: 0.0317 mean_sub_block_count: 0.0281 ngram: mean_bigram_hapax_fraction: -0.0187 @@ -628,12 +620,11 @@ function_todo_comment_in_body: mean_distinct_symbol_types: -0.0140 mean_symbol_count: 0.0200 vocabulary: - mean_mattr: -0.0437 - mean_raw_ttr: -0.0159 - mean_total_identifiers: -0.0024 - mean_unique_identifiers: -0.0182 + mean_mattr: -0.0525 + mean_raw_ttr: -0.0250 + mean_unique_identifiers: -0.0236 vowel_density: - mean_total_chars: 0.0035 + mean_total_chars: 0.0076 zipf: mean_total_tokens: 0.0157 mean_vocab_size: -0.0084 diff --git a/priv/combined_metrics/error_handling.yml b/priv/combined_metrics/error_handling.yml index a0b8b49..ce7d040 100644 --- a/priv/combined_metrics/error_handling.yml +++ b/priv/combined_metrics/error_handling.yml @@ -1,8 +1,6 @@ does_not_swallow_errors: _doc: "Errors must be handled or re-raised — empty rescue/catch blocks silently hide failures." - _fix_hint: "Propagate or log errors — do not silently discard {:error, _} tuples or rescue clauses" - _languages: [elixir] - _log_baseline: 87.3594 + _log_baseline: 86.0584 branching: mean_branch_count: -0.1041 mean_branching_density: -0.2095 @@ -11,10 +9,10 @@ does_not_swallow_errors: brevity: mean_sample_size: 0.2830 casing_entropy: - mean_entropy: -0.1673 + mean_entropy: -0.1412 mean_other_count: -1.6214 mean_pascal_case_count: 0.8391 - mean_snake_case_count: 0.5551 + mean_snake_case_count: 0.4785 compression: mean_raw_bytes: 0.3818 mean_redundancy: 0.0202 @@ -49,9 +47,8 @@ does_not_swallow_errors: mean_beta: -0.0869 mean_k: 0.2466 identifier_length_variance: - mean_mean: -0.0142 - mean_std_dev: -0.1252 - mean_variance: -0.2505 + mean_std_dev: -0.1168 + mean_variance: -0.2335 indentation: mean_blank_line_ratio: 0.0451 mean_max_depth: 0.1740 @@ -65,6 +62,9 @@ does_not_swallow_errors: magic_number_density: mean_string_literal_ratio: 0.2524 near_duplicate_blocks_file: + mean_near_dup_block_d0: -0.5405 + mean_near_dup_block_d7: -0.3162 + mean_near_dup_block_d8: 0.8566 mean_sub_block_count: 0.3065 ngram: mean_bigram_hapax_fraction: -0.0373 @@ -98,12 +98,12 @@ does_not_swallow_errors: mean_distinct_symbol_types: 0.0400 mean_symbol_count: 0.6378 vocabulary: - mean_mattr: 0.0190 - mean_raw_ttr: -0.1464 - mean_total_identifiers: 0.5481 - mean_unique_identifiers: 0.4017 + mean_mattr: 0.0350 + mean_raw_ttr: -0.0769 + mean_total_identifiers: 0.4896 + mean_unique_identifiers: 0.4127 vowel_density: - mean_total_chars: 0.5339 + mean_total_chars: 0.4927 zipf: mean_exponent: 0.0933 mean_total_tokens: 0.4926 @@ -111,9 +111,7 @@ does_not_swallow_errors: error_message_is_descriptive: _doc: "Error values should carry a meaningful message, not just a bare atom or empty string." - _fix_hint: "Include context in error messages — describe what failed, not just that it failed" - _languages: [elixir] - _log_baseline: 52.7594 + _log_baseline: 52.7053 branching: mean_branch_count: 0.0664 mean_branching_density: -0.0540 @@ -122,9 +120,9 @@ error_message_is_descriptive: brevity: mean_sample_size: 0.3136 casing_entropy: - mean_entropy: 0.1513 + mean_entropy: 0.1147 mean_pascal_case_count: 2.0000 - mean_snake_case_count: 0.4347 + mean_snake_case_count: 0.5117 compression: mean_raw_bytes: 0.3028 mean_redundancy: 0.0104 @@ -160,9 +158,9 @@ error_message_is_descriptive: mean_k: 0.1259 mean_r_squared: 0.0073 identifier_length_variance: - mean_mean: -0.0598 - mean_std_dev: -0.0787 - mean_variance: -0.1573 + mean_mean: -0.0908 + mean_std_dev: -0.0799 + mean_variance: -0.1597 indentation: mean_blank_line_ratio: -0.1098 mean_max_depth: 0.1754 @@ -176,6 +174,8 @@ error_message_is_descriptive: magic_number_density: mean_string_literal_ratio: 0.3673 near_duplicate_blocks_file: + mean_near_dup_block_d6: -1.0566 + mean_near_dup_block_d8: -0.6667 mean_sub_block_count: 0.0621 ngram: mean_bigram_hapax_fraction: -0.0059 @@ -203,12 +203,12 @@ error_message_is_descriptive: mean_distinct_symbol_types: 0.0664 mean_symbol_count: 0.3056 vocabulary: - mean_mattr: 0.0721 - mean_raw_ttr: -0.0454 - mean_total_identifiers: 0.4472 - mean_unique_identifiers: 0.4018 + mean_mattr: -0.0179 + mean_raw_ttr: -0.1153 + mean_total_identifiers: 0.5114 + mean_unique_identifiers: 0.3962 vowel_density: - mean_total_chars: 0.3874 + mean_total_chars: 0.4207 zipf: mean_r_squared: 0.0056 mean_total_tokens: 0.3002 @@ -216,113 +216,117 @@ error_message_is_descriptive: returns_typed_error: _doc: "Functions should signal failure via a typed return (e.g. `{:error, reason}`) rather than returning `nil` or `false`." - _fix_hint: "Return typed errors like {:error, :not_found} instead of bare :error or nil" - _languages: [elixir] - _log_baseline: 208.7673 + _log_baseline: 120.8554 branching: - mean_branch_count: -0.2092 - mean_branching_density: -0.3081 - mean_max_nesting_depth: 1.8360 - mean_non_blank_count: 0.0989 + mean_branch_count: -0.1286 + mean_branching_density: -0.1895 + mean_max_nesting_depth: 1.1292 + mean_non_blank_count: 0.0608 brevity: - mean_sample_size: 0.3776 + mean_sample_size: 0.2322 casing_entropy: - mean_entropy: -0.7806 - mean_other_count: -0.4385 - mean_pascal_case_count: 1.1584 - mean_snake_case_count: 1.2656 + mean_entropy: -0.3072 + mean_other_count: -0.2697 + mean_pascal_case_count: 0.7124 + mean_snake_case_count: 0.6125 compression: - mean_raw_bytes: 0.7114 - mean_redundancy: 0.0542 - mean_unique_line_ratio: 0.2391 - mean_zlib_bytes: 0.5668 - mean_zlib_ratio: 0.1446 + mean_raw_bytes: 0.4375 + mean_redundancy: 0.0334 + mean_unique_line_ratio: 0.1471 + mean_zlib_bytes: 0.3486 + mean_zlib_ratio: 0.0889 entropy: - mean_char_entropy: 0.1388 - mean_char_max_entropy: 0.0695 - mean_char_normalized: 0.0692 - mean_token_entropy: -0.0195 - mean_token_max_entropy: 0.0863 - mean_token_normalized: -0.1059 - mean_total_tokens: 1.0937 - mean_vocab_size: 0.3776 + mean_char_entropy: 0.0854 + mean_char_max_entropy: 0.0427 + mean_char_normalized: 0.0426 + mean_token_entropy: -0.0120 + mean_token_max_entropy: 0.0531 + mean_token_normalized: -0.0651 + mean_total_tokens: 0.6727 + mean_vocab_size: 0.2322 function_metrics: - mean_avg_function_lines: 0.1470 - mean_avg_param_count: 0.0087 - mean_function_count: -0.0904 - mean_max_function_lines: 0.1338 + mean_avg_function_lines: 0.0904 + mean_avg_param_count: 0.0054 + mean_function_count: -0.0556 + mean_max_function_lines: 0.0823 halstead: - mean_N1_total_operators: 1.2868 - mean_N2_total_operands: 0.8935 - mean_difficulty: 0.7343 - mean_effort: 2.0000 - mean_estimated_bugs: 1.2657 - mean_length: 1.1609 - mean_n1_unique_operators: 0.3422 - mean_n2_unique_operands: 0.5015 - mean_time_to_implement_seconds: 2.0000 - mean_vocabulary: 0.4467 - mean_volume: 1.2657 + mean_N1_total_operators: 0.7914 + mean_N2_total_operands: 0.5495 + mean_difficulty: 0.4516 + mean_effort: 1.2300 + mean_estimated_bugs: 0.7784 + mean_length: 0.7139 + mean_n1_unique_operators: 0.2105 + mean_n2_unique_operands: 0.3084 + mean_time_to_implement_seconds: 1.2300 + mean_vocabulary: 0.2747 + mean_volume: 0.7785 heaps: - mean_beta: -0.3791 - mean_k: 0.7841 - mean_r_squared: 0.0178 + mean_beta: -0.2332 + mean_k: 0.4822 + mean_r_squared: 0.0110 identifier_length_variance: - mean_max: 0.3245 - mean_std_dev: 0.3851 - mean_variance: 0.7704 + mean_max: 0.1996 + mean_mean: 0.1313 + mean_std_dev: 0.2519 + mean_variance: 0.5039 indentation: - mean_blank_line_ratio: -0.2464 - mean_mean_depth: -0.0466 - mean_variance: 0.0604 + mean_blank_line_ratio: -0.1515 + mean_mean_depth: -0.0287 + mean_variance: 0.0372 line_patterns: - mean_blank_line_ratio: -0.2464 - mean_max_nesting_depth: 1.8360 - mean_string_literal_ratio: -1.0976 - mean_unique_line_ratio: 0.2365 + mean_blank_line_ratio: -0.1515 + mean_max_nesting_depth: 1.1292 + mean_string_literal_ratio: -0.6750 + mean_unique_line_ratio: 0.1454 magic_number_density: - mean_string_literal_ratio: -1.0976 + mean_string_literal_ratio: -0.6750 near_duplicate_blocks_file: - mean_sub_block_count: 1.2007 + mean_block_count: -0.0980 + mean_near_dup_block_d0: -1.4248 + mean_near_dup_block_d6: 0.7124 + mean_near_dup_block_d7: -1.0081 + mean_near_dup_block_d8: -2.0000 + mean_sub_block_count: 0.7384 ngram: - mean_bigram_hapax_fraction: -0.2293 - mean_bigram_repeated_unique: 0.7952 - mean_bigram_repetition_rate: 0.2117 - mean_bigram_total: 1.0959 - mean_bigram_unique: 0.5042 - mean_trigram_hapax_fraction: -0.0889 - mean_trigram_repeated_unique: 0.8135 - mean_trigram_repetition_rate: 0.3853 - mean_trigram_total: 1.0981 - mean_trigram_unique: 0.5821 + mean_bigram_hapax_fraction: -0.1410 + mean_bigram_repeated_unique: 0.4891 + mean_bigram_repetition_rate: 0.1302 + mean_bigram_total: 0.6740 + mean_bigram_unique: 0.3101 + mean_trigram_hapax_fraction: -0.0547 + mean_trigram_repeated_unique: 0.5003 + mean_trigram_repetition_rate: 0.2370 + mean_trigram_total: 0.6753 + mean_trigram_unique: 0.3580 punctuation_density: - mean_arrow_density: -1.3061 - mean_bracket_nonalpha_prefix_count: -0.3047 - mean_colon_suffix_density: -1.3955 - mean_dot_count: 1.8360 - mean_id_nonalpha_suffix_density: 0.1318 - mean_question_mark_density: -1.0680 + mean_arrow_density: -0.8033 + mean_bracket_nonalpha_prefix_count: -0.1874 + mean_colon_suffix_density: -0.8583 + mean_dot_count: 1.1292 + mean_id_nonalpha_suffix_density: 0.0810 + mean_question_mark_density: -0.6568 readability: - mean_avg_line_length: 0.6431 - mean_avg_sub_words_per_id: 0.1542 - mean_avg_tokens_per_line: 0.9948 - mean_flesch_adapted: -0.2067 - mean_fog_adapted: 1.0791 - mean_total_lines: 0.0989 + mean_avg_line_length: 0.3955 + mean_avg_sub_words_per_id: 0.0948 + mean_avg_tokens_per_line: 0.6118 + mean_flesch_adapted: -0.1272 + mean_fog_adapted: 0.6637 + mean_total_lines: 0.0608 symbol_density: - mean_density: 0.9451 - mean_distinct_symbol_types: 0.3470 - mean_symbol_count: 1.6563 + mean_density: 0.5813 + mean_distinct_symbol_types: 0.2134 + mean_symbol_count: 1.0187 vocabulary: - mean_mattr: -0.4659 - mean_raw_ttr: -0.6552 - mean_total_identifiers: 1.1152 - mean_unique_identifiers: 0.4603 + mean_mattr: -0.2229 + mean_raw_ttr: -0.2020 + mean_total_identifiers: 0.4979 + mean_unique_identifiers: 0.2957 vowel_density: - mean_total_chars: 1.1198 + mean_total_chars: 0.6292 zipf: - mean_exponent: 0.1702 - mean_r_squared: 0.0411 - mean_total_tokens: 1.0937 - mean_vocab_size: 0.3776 + mean_exponent: 0.1047 + mean_r_squared: 0.0253 + mean_total_tokens: 0.6727 + mean_vocab_size: 0.2322 diff --git a/priv/combined_metrics/file_structure.yml b/priv/combined_metrics/file_structure.yml index 73376ce..b38fc00 100644 --- a/priv/combined_metrics/file_structure.yml +++ b/priv/combined_metrics/file_structure.yml @@ -1,7 +1,5 @@ has_consistent_indentation: _doc: "Files should use a single, consistent indentation style with no mixed tabs and spaces." - _fix_hint: "Use a consistent indentation width throughout the file (2 or 4 spaces, not mixed)" - _languages: [elixir] _log_baseline: -12.7016 branching: mean_branching_density: 0.1994 @@ -37,9 +35,7 @@ has_consistent_indentation: line_count_under_300: _doc: "Files should be under 300 lines; longer files typically violate single responsibility." - _fix_hint: "Split large files — extract cohesive groups of functions into separate modules" - _languages: [elixir] - _log_baseline: -49.2655 + _log_baseline: -45.8565 branching: mean_branch_count: -0.4508 mean_branching_density: -0.2446 @@ -47,10 +43,10 @@ line_count_under_300: brevity: mean_sample_size: -0.2062 casing_entropy: - mean_entropy: 0.0366 + mean_entropy: 0.0413 mean_other_count: -0.6011 mean_pascal_case_count: 0.1036 - mean_snake_case_count: -0.1860 + mean_snake_case_count: -0.2080 compression: mean_raw_bytes: -0.2263 mean_redundancy: -0.0026 @@ -90,9 +86,9 @@ line_count_under_300: mean_r_squared: -0.0094 identifier_length_variance: mean_max: -0.0671 - mean_mean: -0.0670 - mean_std_dev: 0.0028 - mean_variance: 0.0055 + mean_mean: -0.0614 + mean_std_dev: 0.0205 + mean_variance: 0.0411 indentation: mean_blank_line_ratio: -0.4899 mean_max_depth: 0.0301 @@ -107,8 +103,10 @@ line_count_under_300: mean_magic_number_count: -0.4114 mean_string_literal_ratio: 0.0039 near_duplicate_blocks_file: - mean_block_count: 0.1772 - mean_sub_block_count: -0.1378 + mean_block_count: 0.5617 + mean_near_dup_block_d7: 0.1772 + mean_near_dup_block_d8: 0.1772 + mean_sub_block_count: 1.0591 ngram: mean_bigram_hapax_fraction: -0.0655 mean_bigram_repeated_unique: -0.1356 @@ -141,12 +139,12 @@ line_count_under_300: mean_distinct_symbol_types: -0.0604 mean_symbol_count: -0.1504 vocabulary: - mean_mattr: -0.1338 - mean_raw_ttr: -0.0761 - mean_total_identifiers: -0.1689 - mean_unique_identifiers: -0.2450 + mean_mattr: -0.1396 + mean_raw_ttr: -0.0669 + mean_total_identifiers: -0.1838 + mean_unique_identifiers: -0.2507 vowel_density: - mean_total_chars: -0.2359 + mean_total_chars: -0.2452 zipf: mean_exponent: 0.0102 mean_r_squared: -0.0067 @@ -155,17 +153,15 @@ line_count_under_300: line_length_under_120: _doc: "Lines should be under 120 characters to avoid horizontal scrolling." - _fix_hint: "Wrap lines at 80–120 characters — break long expressions into multiple lines" - _languages: [elixir] - _log_baseline: -6.2404 + _log_baseline: -6.2041 branching: mean_branching_density: -0.1942 mean_non_blank_count: 0.1944 brevity: mean_sample_size: -0.0200 casing_entropy: - mean_entropy: -0.0025 - mean_snake_case_count: 0.0039 + mean_entropy: -0.0047 + mean_snake_case_count: 0.0074 compression: mean_raw_bytes: 0.0170 mean_redundancy: 0.0140 @@ -200,9 +196,9 @@ line_length_under_120: mean_beta: -0.0068 mean_k: 0.0086 identifier_length_variance: - mean_mean: -0.0176 - mean_std_dev: -0.0468 - mean_variance: -0.0936 + mean_mean: -0.0207 + mean_std_dev: -0.0480 + mean_variance: -0.0960 indentation: mean_blank_line_ratio: -0.0420 mean_max_depth: 0.1137 @@ -246,10 +242,10 @@ line_length_under_120: mean_distinct_symbol_types: -0.0130 mean_symbol_count: -0.0078 vocabulary: - mean_mattr: -0.0207 - mean_raw_ttr: -0.0312 - mean_total_identifiers: 0.0036 - mean_unique_identifiers: -0.0276 + mean_mattr: -0.0231 + mean_raw_ttr: -0.0300 + mean_total_identifiers: 0.0067 + mean_unique_identifiers: -0.0232 vowel_density: mean_total_chars: -0.0140 zipf: @@ -259,9 +255,7 @@ line_length_under_120: no_magic_numbers: _doc: "Numeric literals should be extracted to named constants rather than used inline." - _fix_hint: "Replace literal numbers with named constants or module attributes" - _languages: [elixir] - _log_baseline: 107.5222 + _log_baseline: 105.2910 branching: mean_branch_count: -0.4352 mean_branching_density: -0.9103 @@ -269,8 +263,8 @@ no_magic_numbers: brevity: mean_sample_size: 0.3955 casing_entropy: - mean_entropy: -0.4640 - mean_snake_case_count: 0.7640 + mean_entropy: -0.5234 + mean_snake_case_count: 0.9072 compression: mean_raw_bytes: 0.7713 mean_redundancy: 0.1328 @@ -307,9 +301,9 @@ no_magic_numbers: mean_r_squared: -0.0645 identifier_length_variance: mean_max: 0.2172 - mean_mean: 0.5105 - mean_std_dev: 0.5395 - mean_variance: 1.0791 + mean_mean: 0.4886 + mean_std_dev: 0.4918 + mean_variance: 0.9835 indentation: mean_blank_line_ratio: 0.3137 mean_mean_depth: -0.4612 @@ -323,6 +317,9 @@ no_magic_numbers: mean_magic_number_count: -0.8032 mean_string_literal_ratio: -0.5060 near_duplicate_blocks_file: + mean_block_count: -0.1911 + mean_near_dup_block_d0: -1.6546 + mean_near_dup_block_d7: -1.0789 mean_sub_block_count: 0.3466 ngram: mean_bigram_hapax_fraction: -0.1520 @@ -351,11 +348,12 @@ no_magic_numbers: mean_density: -0.3071 mean_symbol_count: 0.4654 vocabulary: - mean_mattr: 0.3317 - mean_total_identifiers: 0.6600 - mean_unique_identifiers: 0.6581 + mean_mattr: 0.3553 + mean_raw_ttr: -0.0669 + mean_total_identifiers: 0.7640 + mean_unique_identifiers: 0.6968 vowel_density: - mean_total_chars: 1.1705 + mean_total_chars: 1.2526 zipf: mean_exponent: -0.1353 mean_r_squared: -0.0320 @@ -364,9 +362,7 @@ no_magic_numbers: single_responsibility: _doc: "Each file should have one primary concern — low complexity spread across few, focused functions." - _fix_hint: "Split the module — each file should have one primary purpose" - _languages: [elixir] - _log_baseline: -35.4996 + _log_baseline: -36.0617 branching: mean_branch_count: -0.0678 mean_branching_density: 0.1364 @@ -375,9 +371,10 @@ single_responsibility: brevity: mean_sample_size: -0.0864 casing_entropy: + mean_entropy: -0.0206 mean_other_count: -0.7475 mean_pascal_case_count: 0.0470 - mean_snake_case_count: -0.1816 + mean_snake_case_count: -0.1543 compression: mean_raw_bytes: -0.1908 mean_redundancy: -0.0351 @@ -416,9 +413,9 @@ single_responsibility: mean_r_squared: -0.0163 identifier_length_variance: mean_max: -0.0836 - mean_mean: -0.0346 - mean_std_dev: -0.0825 - mean_variance: -0.1650 + mean_mean: -0.0508 + mean_std_dev: -0.0865 + mean_variance: -0.1729 indentation: mean_blank_line_ratio: 0.0458 mean_mean_depth: -0.0476 @@ -432,6 +429,9 @@ single_responsibility: mean_density: 0.1469 mean_string_literal_ratio: -0.1759 near_duplicate_blocks_file: + mean_block_count: -0.2284 + mean_near_dup_block_d0: -0.2962 + mean_near_dup_block_d7: -0.3737 mean_sub_block_count: -0.1348 ngram: mean_bigram_hapax_fraction: 0.0075 @@ -465,12 +465,12 @@ single_responsibility: mean_distinct_symbol_types: 0.0284 mean_symbol_count: -0.1225 vocabulary: - mean_mattr: -0.0474 - mean_raw_ttr: 0.0249 - mean_total_identifiers: -0.1662 - mean_unique_identifiers: -0.1414 + mean_mattr: -0.0285 + mean_raw_ttr: 0.0110 + mean_total_identifiers: -0.1419 + mean_unique_identifiers: -0.1309 vowel_density: - mean_total_chars: -0.2009 + mean_total_chars: -0.1927 zipf: mean_exponent: -0.0209 mean_r_squared: -0.0043 @@ -479,8 +479,6 @@ single_responsibility: uses_standard_indentation_width: _doc: "Indentation should use consistent multiples of 2 or 4 spaces throughout the file." - _fix_hint: "Use the project-standard 2-space indentation throughout" - _languages: [elixir] _log_baseline: -17.9172 compression: mean_raw_bytes: -0.2512 @@ -499,6 +497,9 @@ uses_standard_indentation_width: mean_variance: -2.0000 line_patterns: mean_blank_line_ratio: 0.2077 + near_duplicate_blocks_file: + mean_near_dup_block_d3: -1.0000 + mean_near_dup_block_d4: 1.0000 punctuation_density: mean_exclamation_density: 0.2630 mean_question_mark_density: 0.2630 diff --git a/priv/combined_metrics/function_design.yml b/priv/combined_metrics/function_design.yml index b97868b..cb1d808 100644 --- a/priv/combined_metrics/function_design.yml +++ b/priv/combined_metrics/function_design.yml @@ -1,7 +1,5 @@ boolean_function_has_question_mark: _doc: "Functions returning a boolean should end with `?` (Elixir/Ruby) or start with `is_`/`has_` (JS/Python)." - _fix_hint: "Add a ? suffix to boolean-returning functions (e.g., valid? instead of is_valid)" - _languages: [elixir] _log_baseline: 7.0991 brevity: mean_sample_size: 0.0085 @@ -47,17 +45,15 @@ boolean_function_has_question_mark: cyclomatic_complexity_under_10: _doc: "Functions should have a cyclomatic complexity under 10." - _fix_hint: "Reduce branching — extract complex conditionals into helper functions" - _languages: [elixir] - _log_baseline: -1.6476 + _log_baseline: -1.4896 branching: mean_branch_count: -0.2373 mean_branching_density: -0.1952 mean_non_blank_count: -0.0421 casing_entropy: - mean_entropy: 0.1030 + mean_entropy: 0.0964 mean_other_count: 0.3306 - mean_snake_case_count: 0.0284 + mean_snake_case_count: 0.0321 compression: mean_raw_bytes: -0.0162 mean_redundancy: -0.0172 @@ -91,9 +87,9 @@ cyclomatic_complexity_under_10: mean_k: 0.0672 mean_r_squared: 0.0049 identifier_length_variance: - mean_mean: 0.0092 - mean_std_dev: 0.0209 - mean_variance: 0.0418 + mean_mean: 0.0130 + mean_std_dev: 0.0120 + mean_variance: 0.0240 indentation: mean_blank_line_ratio: 0.1655 mean_max_depth: -0.2086 @@ -107,6 +103,8 @@ cyclomatic_complexity_under_10: mean_density: -0.0329 mean_string_literal_ratio: -0.0439 near_duplicate_blocks_file: + mean_block_count: 0.1013 + mean_near_dup_block_d8: -0.2086 mean_sub_block_count: 0.0994 ngram: mean_bigram_hapax_fraction: -0.0068 @@ -139,12 +137,12 @@ cyclomatic_complexity_under_10: mean_distinct_symbol_types: -0.0172 mean_symbol_count: 0.0391 vocabulary: - mean_mattr: -0.0325 - mean_raw_ttr: -0.0309 - mean_total_identifiers: 0.0382 - mean_unique_identifiers: 0.0074 + mean_mattr: -0.0361 + mean_raw_ttr: -0.0361 + mean_total_identifiers: 0.0441 + mean_unique_identifiers: 0.0080 vowel_density: - mean_total_chars: 0.0474 + mean_total_chars: 0.0572 zipf: mean_exponent: 0.0120 mean_r_squared: 0.0057 @@ -152,36 +150,32 @@ cyclomatic_complexity_under_10: has_verb_in_name: _doc: "Function names should contain a verb describing the action performed." - _fix_hint: "Start function names with a verb (get_, fetch_, build_, compute_, validate_)" - _languages: [elixir] - _log_baseline: 15.9117 + _log_baseline: 14.8350 compression: - mean_raw_bytes: 0.0990 - mean_redundancy: -0.0473 - mean_zlib_bytes: 0.2439 - mean_zlib_ratio: -0.1449 + mean_raw_bytes: 0.0816 + mean_redundancy: -0.0390 + mean_zlib_bytes: 0.2011 + mean_zlib_ratio: -0.1195 identifier_length_variance: - mean_max: 0.9396 - mean_mean: 0.1989 - mean_std_dev: 1.0001 + mean_max: 0.7747 + mean_mean: 0.2058 + mean_std_dev: 1.0000 mean_variance: 2.0000 punctuation_density: - mean_exclamation_density: -0.1305 + mean_exclamation_density: -0.1076 readability: - mean_avg_line_length: 0.1026 - mean_avg_sub_words_per_id: 0.1614 - mean_flesch_adapted: -0.1605 - mean_fog_adapted: 1.6084 + mean_avg_line_length: 0.0846 + mean_avg_sub_words_per_id: 0.1330 + mean_flesch_adapted: -0.1324 + mean_fog_adapted: 1.3261 symbol_density: - mean_density: -0.1005 + mean_density: -0.0828 vowel_density: - mean_total_chars: 0.1989 + mean_total_chars: 0.2058 is_less_than_20_lines: _doc: "Functions should be 20 lines or fewer." - _fix_hint: "Split long functions — each function should fit on one screen (under 20 lines)" - _languages: [elixir] - _log_baseline: 23.1945 + _log_baseline: 23.9658 branching: mean_branch_count: -0.0820 mean_branching_density: -0.1010 @@ -190,10 +184,10 @@ is_less_than_20_lines: brevity: mean_sample_size: 0.0165 casing_entropy: - mean_entropy: 0.0640 + mean_entropy: 0.0577 mean_other_count: 0.6266 mean_pascal_case_count: 0.0440 - mean_snake_case_count: 0.0800 + mean_snake_case_count: 0.0910 compression: mean_raw_bytes: 0.0746 mean_redundancy: 0.0227 @@ -228,9 +222,9 @@ is_less_than_20_lines: heaps: mean_k: -0.0254 identifier_length_variance: - mean_mean: 0.0149 - mean_std_dev: 0.0313 - mean_variance: 0.0625 + mean_mean: 0.0122 + mean_std_dev: 0.0297 + mean_variance: 0.0593 indentation: mean_blank_line_ratio: -0.0440 mean_mean_depth: -0.0962 @@ -245,6 +239,8 @@ is_less_than_20_lines: mean_magic_number_count: 0.1156 mean_string_literal_ratio: -0.0774 near_duplicate_blocks_file: + mean_block_count: 0.2797 + mean_near_dup_block_d8: 0.3133 mean_sub_block_count: 0.1886 ngram: mean_bigram_hapax_fraction: -0.0508 @@ -278,12 +274,12 @@ is_less_than_20_lines: mean_distinct_symbol_types: 0.0127 mean_symbol_count: 0.0662 vocabulary: - mean_mattr: -0.0368 - mean_raw_ttr: -0.0647 - mean_total_identifiers: 0.0864 - mean_unique_identifiers: 0.0218 + mean_mattr: -0.0390 + mean_raw_ttr: -0.0717 + mean_total_identifiers: 0.0965 + mean_unique_identifiers: 0.0248 vowel_density: - mean_total_chars: 0.1013 + mean_total_chars: 0.1087 zipf: mean_exponent: 0.0225 mean_r_squared: 0.0030 @@ -292,9 +288,7 @@ is_less_than_20_lines: nesting_depth_under_4: _doc: "Code should not nest deeper than 4 levels." - _fix_hint: "Reduce nesting — use early returns, guards, or extract inner blocks" - _languages: [elixir] - _log_baseline: 1.1322 + _log_baseline: 1.0611 branching: mean_branch_count: -0.3267 mean_branching_density: -0.2061 @@ -303,10 +297,10 @@ nesting_depth_under_4: brevity: mean_sample_size: 0.0178 casing_entropy: - mean_entropy: -0.0358 + mean_entropy: -0.0207 mean_other_count: 0.2917 mean_pascal_case_count: -0.2725 - mean_snake_case_count: 0.1015 + mean_snake_case_count: 0.0787 compression: mean_raw_bytes: -0.0069 mean_redundancy: -0.0076 @@ -342,9 +336,9 @@ nesting_depth_under_4: mean_beta: -0.0464 mean_k: 0.0845 identifier_length_variance: - mean_mean: 0.0488 - mean_std_dev: 0.1849 - mean_variance: 0.3698 + mean_mean: 0.0770 + mean_std_dev: 0.1858 + mean_variance: 0.3716 indentation: mean_blank_line_ratio: 0.5622 mean_max_depth: -0.3155 @@ -358,6 +352,7 @@ nesting_depth_under_4: magic_number_density: mean_string_literal_ratio: -0.1046 near_duplicate_blocks_file: + mean_block_count: 0.0856 mean_sub_block_count: 0.1999 ngram: mean_bigram_hapax_fraction: -0.0645 @@ -390,12 +385,12 @@ nesting_depth_under_4: mean_density: 0.1426 mean_symbol_count: 0.1355 vocabulary: - mean_mattr: -0.0321 - mean_raw_ttr: -0.0555 - mean_total_identifiers: 0.0987 - mean_unique_identifiers: 0.0432 + mean_mattr: -0.0269 + mean_raw_ttr: -0.0269 + mean_total_identifiers: 0.0774 + mean_unique_identifiers: 0.0505 vowel_density: - mean_total_chars: 0.1475 + mean_total_chars: 0.1544 zipf: mean_exponent: 0.0250 mean_r_squared: 0.0156 @@ -404,9 +399,7 @@ nesting_depth_under_4: no_boolean_parameter: _doc: "Functions should not take boolean parameters — a flag usually means the function does two things." - _fix_hint: "Replace boolean parameters with two separate functions or use an options map" - _languages: [elixir] - _log_baseline: 3.0928 + _log_baseline: 13.6290 branching: mean_branch_count: -2.0000 mean_branching_density: 1.0271 @@ -415,9 +408,9 @@ no_boolean_parameter: brevity: mean_sample_size: -0.0253 casing_entropy: - mean_entropy: 0.0155 + mean_entropy: 0.0049 mean_pascal_case_count: 0.1180 - mean_snake_case_count: 0.0762 + mean_snake_case_count: 0.0931 compression: mean_raw_bytes: 0.0435 mean_redundancy: 0.0777 @@ -453,9 +446,9 @@ no_boolean_parameter: mean_beta: -0.0314 mean_k: 0.0620 identifier_length_variance: - mean_mean: 0.0178 - mean_std_dev: 0.1693 - mean_variance: 0.3386 + mean_mean: 0.0125 + mean_std_dev: 0.1858 + mean_variance: 0.3715 indentation: mean_blank_line_ratio: 0.4402 mean_max_depth: -0.5579 @@ -469,6 +462,14 @@ no_boolean_parameter: magic_number_density: mean_string_literal_ratio: 0.0206 near_duplicate_blocks_file: + mean_block_count: 0.4338 + mean_near_dup_block_d0: 1.7685 + mean_near_dup_block_d2: 1.1158 + mean_near_dup_block_d4: 1.6737 + mean_near_dup_block_d5: 1.6737 + mean_near_dup_block_d6: 1.7685 + mean_near_dup_block_d7: -0.8842 + mean_near_dup_block_d8: 0.5579 mean_sub_block_count: 0.2775 ngram: mean_bigram_hapax_fraction: -0.1940 @@ -499,12 +500,12 @@ no_boolean_parameter: mean_density: 0.0479 mean_symbol_count: 0.0916 vocabulary: - mean_mattr: -0.0611 - mean_raw_ttr: -0.0924 - mean_total_identifiers: 0.0811 - mean_unique_identifiers: -0.0114 + mean_mattr: -0.0916 + mean_raw_ttr: -0.1091 + mean_total_identifiers: 0.0962 + mean_unique_identifiers: -0.0129 vowel_density: - mean_total_chars: 0.0989 + mean_total_chars: 0.1087 zipf: mean_exponent: 0.0374 mean_total_tokens: 0.0692 @@ -512,9 +513,7 @@ no_boolean_parameter: no_magic_numbers: _doc: "Numeric literals should be named constants, not inline magic numbers." - _fix_hint: "Replace magic numbers inside functions with named module attributes or constants" - _languages: [elixir] - _log_baseline: 48.6069 + _log_baseline: 45.8808 branching: mean_branch_count: -0.2708 mean_branching_density: -0.1682 @@ -522,8 +521,8 @@ no_magic_numbers: brevity: mean_sample_size: 0.1527 casing_entropy: - mean_entropy: -0.2876 - mean_snake_case_count: 0.4222 + mean_entropy: -0.2908 + mean_snake_case_count: 0.4279 compression: mean_raw_bytes: 0.3823 mean_redundancy: 0.0584 @@ -559,9 +558,9 @@ no_magic_numbers: mean_r_squared: -0.0256 identifier_length_variance: mean_max: 0.0987 - mean_mean: 0.3701 - mean_std_dev: 0.3918 - mean_variance: 0.7835 + mean_mean: 0.3721 + mean_std_dev: 0.3878 + mean_variance: 0.7757 indentation: mean_blank_line_ratio: 0.2374 mean_mean_depth: -0.3518 @@ -574,6 +573,9 @@ no_magic_numbers: mean_density: -0.2831 mean_string_literal_ratio: -0.2880 near_duplicate_blocks_file: + mean_block_count: -0.7894 + mean_near_dup_block_d0: -1.1158 + mean_near_dup_block_d7: -1.1158 mean_sub_block_count: 0.2708 ngram: mean_bigram_hapax_fraction: -0.1437 @@ -603,12 +605,12 @@ no_magic_numbers: mean_distinct_symbol_types: 0.0644 mean_symbol_count: 0.3512 vocabulary: - mean_mattr: 0.0054 - mean_raw_ttr: -0.0161 - mean_total_identifiers: 0.3860 - mean_unique_identifiers: 0.3699 + mean_mattr: 0.0058 + mean_raw_ttr: -0.0081 + mean_total_identifiers: 0.3908 + mean_unique_identifiers: 0.3826 vowel_density: - mean_total_chars: 0.7561 + mean_total_chars: 0.7629 zipf: mean_exponent: 0.0164 mean_r_squared: 0.0321 @@ -617,18 +619,16 @@ no_magic_numbers: parameter_count_under_4: _doc: "Functions should take fewer than 4 parameters." - _fix_hint: "Reduce parameter count — group related params into a struct or options map" - _languages: [elixir] - _log_baseline: 1.6218 + _log_baseline: 1.9637 branching: mean_non_blank_count: 0.0967 brevity: mean_sample_size: 0.0261 casing_entropy: - mean_entropy: 0.5987 + mean_entropy: 0.5731 mean_other_count: 0.5408 mean_pascal_case_count: 0.2329 - mean_snake_case_count: -0.0580 + mean_snake_case_count: -0.0351 compression: mean_raw_bytes: -0.0343 mean_redundancy: -0.0308 @@ -667,9 +667,9 @@ parameter_count_under_4: mean_k: -0.0082 mean_r_squared: -0.0062 identifier_length_variance: - mean_mean: -0.0044 - mean_std_dev: -0.0221 - mean_variance: -0.0442 + mean_mean: -0.0239 + mean_std_dev: -0.0185 + mean_variance: -0.0371 indentation: mean_blank_line_ratio: 0.0518 mean_max_depth: 0.1362 @@ -682,6 +682,9 @@ parameter_count_under_4: magic_number_density: mean_density: -2.0000 mean_string_literal_ratio: 0.1674 + near_duplicate_blocks_file: + mean_block_count: 0.0967 + mean_near_dup_block_d7: -0.4658 ngram: mean_bigram_hapax_fraction: 0.0479 mean_bigram_repeated_unique: -0.0222 @@ -711,11 +714,11 @@ parameter_count_under_4: mean_distinct_symbol_types: 0.1042 mean_symbol_count: -0.0218 vocabulary: - mean_mattr: 0.0175 - mean_raw_ttr: 0.0416 - mean_total_identifiers: -0.0416 + mean_mattr: 0.0150 + mean_raw_ttr: 0.0153 + mean_total_identifiers: -0.0153 vowel_density: - mean_total_chars: -0.0460 + mean_total_chars: -0.0393 zipf: mean_exponent: 0.0101 mean_r_squared: -0.0074 @@ -724,106 +727,106 @@ parameter_count_under_4: uses_ternary_expression: _doc: "Simple conditional assignments should use inline expressions rather than full if-blocks." - _fix_hint: "Replace verbose if-else blocks with concise ternary/conditional expressions where readable" - _languages: [elixir] - _log_baseline: -0.3649 + _log_baseline: -4.5289 branching: - mean_branch_count: -0.4693 - mean_branching_density: 0.1280 - mean_non_blank_count: -0.5975 + mean_branch_count: -0.4160 + mean_branching_density: 0.1134 + mean_non_blank_count: -0.5296 brevity: - mean_sample_size: 0.0107 + mean_sample_size: 0.0095 casing_entropy: - mean_entropy: -0.0141 - mean_snake_case_count: 0.0271 + mean_entropy: 0.0068 + mean_snake_case_count: -0.0141 compression: - mean_raw_bytes: -0.0924 - mean_redundancy: -0.0709 - mean_unique_line_ratio: 0.1809 - mean_zlib_bytes: 0.0167 - mean_zlib_ratio: -0.1090 + mean_raw_bytes: -0.0819 + mean_redundancy: -0.0629 + mean_unique_line_ratio: 0.1604 + mean_zlib_bytes: 0.0148 + mean_zlib_ratio: -0.0967 entropy: - mean_char_entropy: 0.0749 - mean_char_normalized: 0.0717 - mean_token_entropy: -0.0088 - mean_token_normalized: -0.0114 - mean_total_tokens: 0.0969 - mean_vocab_size: 0.0107 + mean_char_entropy: 0.0664 + mean_char_normalized: 0.0636 + mean_token_entropy: -0.0078 + mean_token_normalized: -0.0101 + mean_total_tokens: 0.0859 + mean_vocab_size: 0.0095 function_metrics: - mean_avg_function_lines: -0.7654 - mean_function_count: 0.2745 - mean_max_function_lines: -0.4693 + mean_avg_function_lines: -0.6785 + mean_function_count: 0.2434 + mean_max_function_lines: -0.4160 halstead: - mean_N1_total_operators: 0.1767 - mean_N2_total_operands: 0.0622 - mean_difficulty: 0.0952 - mean_effort: 0.2409 - mean_estimated_bugs: 0.1456 - mean_length: 0.1429 - mean_n1_unique_operators: 0.0330 - mean_time_to_implement_seconds: 0.2409 - mean_vocabulary: 0.0114 - mean_volume: 0.1457 + mean_N1_total_operators: 0.1567 + mean_N2_total_operands: 0.0551 + mean_difficulty: 0.0844 + mean_effort: 0.2135 + mean_estimated_bugs: 0.1291 + mean_length: 0.1267 + mean_n1_unique_operators: 0.0293 + mean_time_to_implement_seconds: 0.2135 + mean_vocabulary: 0.0101 + mean_volume: 0.1291 heaps: - mean_beta: -0.0340 - mean_k: 0.0670 + mean_beta: -0.0301 + mean_k: 0.0594 identifier_length_variance: - mean_mean: 0.0616 - mean_std_dev: 0.0617 - mean_variance: 0.1234 + mean_mean: 0.0749 + mean_std_dev: 0.0535 + mean_variance: 0.1070 indentation: - mean_blank_line_ratio: 0.5702 - mean_max_depth: -0.2745 - mean_mean_depth: -0.3658 - mean_variance: -0.6153 + mean_blank_line_ratio: 0.5054 + mean_max_depth: -0.2434 + mean_mean_depth: -0.3243 + mean_variance: -0.5454 line_patterns: - mean_blank_line_ratio: 0.5702 - mean_string_literal_ratio: -0.0964 - mean_unique_line_ratio: 0.1839 + mean_blank_line_ratio: 0.5054 + mean_string_literal_ratio: -0.0855 + mean_unique_line_ratio: 0.1630 magic_number_density: - mean_density: -0.0969 - mean_string_literal_ratio: -0.0964 + mean_density: -0.0859 + mean_string_literal_ratio: -0.0855 near_duplicate_blocks_file: - mean_sub_block_count: 0.2745 + mean_block_count: -0.2821 + mean_near_dup_block_d0: -2.0000 + mean_sub_block_count: 0.2434 ngram: - mean_bigram_hapax_fraction: -0.0616 - mean_bigram_repeated_unique: 0.2415 - mean_bigram_repetition_rate: 0.0886 - mean_bigram_total: 0.0973 - mean_bigram_unique: 0.0487 - mean_trigram_hapax_fraction: -0.0186 - mean_trigram_repeated_unique: 0.1511 - mean_trigram_repetition_rate: 0.1328 - mean_trigram_total: 0.0977 - mean_trigram_unique: 0.0452 + mean_bigram_hapax_fraction: -0.0546 + mean_bigram_repeated_unique: 0.2141 + mean_bigram_repetition_rate: 0.0785 + mean_bigram_total: 0.0863 + mean_bigram_unique: 0.0432 + mean_trigram_hapax_fraction: -0.0165 + mean_trigram_repeated_unique: 0.1339 + mean_trigram_repetition_rate: 0.1178 + mean_trigram_total: 0.0866 + mean_trigram_unique: 0.0400 punctuation_density: - mean_bracket_nonalpha_prefix_count: 0.3060 - mean_bracket_nonalpha_suffix_count: 0.5928 - mean_bracket_number_pair_count: 0.4693 - mean_colon_suffix_density: 2.0000 - mean_dot_count: -1.3176 - mean_id_nonalpha_suffix_density: 0.2152 + mean_bracket_nonalpha_prefix_count: 0.2713 + mean_bracket_nonalpha_suffix_count: 0.5255 + mean_bracket_number_pair_count: 0.4160 + mean_colon_suffix_density: 1.7729 + mean_dot_count: -1.1679 + mean_id_nonalpha_suffix_density: 0.1908 readability: - mean_avg_line_length: 0.5254 - mean_avg_sub_words_per_id: 0.0352 - mean_avg_tokens_per_line: 0.6944 - mean_flesch_adapted: -0.0769 - mean_fog_adapted: 0.6047 - mean_total_lines: -0.5975 + mean_avg_line_length: 0.4657 + mean_avg_sub_words_per_id: 0.0312 + mean_avg_tokens_per_line: 0.6155 + mean_flesch_adapted: -0.0682 + mean_fog_adapted: 0.5360 + mean_total_lines: -0.5296 symbol_density: - mean_density: 0.3573 - mean_distinct_symbol_types: 0.0410 - mean_symbol_count: 0.2651 + mean_density: 0.3167 + mean_distinct_symbol_types: 0.0364 + mean_symbol_count: 0.2350 vocabulary: - mean_mattr: -0.0421 - mean_raw_ttr: -0.0421 - mean_total_identifiers: 0.0230 - mean_unique_identifiers: -0.0191 + mean_mattr: -0.0068 + mean_raw_ttr: -0.0068 + mean_total_identifiers: -0.0117 + mean_unique_identifiers: -0.0185 vowel_density: - mean_total_chars: 0.0845 + mean_total_chars: 0.0632 zipf: - mean_exponent: 0.0361 - mean_r_squared: 0.0150 - mean_total_tokens: 0.0969 - mean_vocab_size: 0.0107 + mean_exponent: 0.0320 + mean_r_squared: 0.0133 + mean_total_tokens: 0.0859 + mean_vocab_size: 0.0095 diff --git a/priv/combined_metrics/naming_conventions.yml b/priv/combined_metrics/naming_conventions.yml index 4f3693d..aa6c6d5 100644 --- a/priv/combined_metrics/naming_conventions.yml +++ b/priv/combined_metrics/naming_conventions.yml @@ -1,8 +1,6 @@ class_name_is_noun: _doc: "Class and module names should be nouns describing what they represent, not verbs or gerunds." - _fix_hint: "Name modules/classes with nouns (User, OrderProcessor) not verbs" - _languages: [elixir] - _log_baseline: 4.2909 + _log_baseline: 2.9861 brevity: mean_sample_size: 0.7106 compression: @@ -28,9 +26,9 @@ class_name_is_noun: mean_k: -0.6266 identifier_length_variance: mean_max: -0.4031 - mean_mean: 0.3059 - mean_std_dev: -0.5093 - mean_variance: -1.0187 + mean_mean: 0.3287 + mean_std_dev: -0.8347 + mean_variance: -1.6695 ngram: mean_bigram_hapax_fraction: 0.2542 mean_bigram_repeated_unique: -0.5967 @@ -47,11 +45,11 @@ class_name_is_noun: symbol_density: mean_density: -0.1381 vocabulary: - mean_mattr: 1.2109 - mean_raw_ttr: 1.2109 - mean_unique_identifiers: 1.2116 + mean_mattr: 1.4020 + mean_raw_ttr: 1.4020 + mean_unique_identifiers: 1.4020 vowel_density: - mean_total_chars: 0.3059 + mean_total_chars: 0.3287 zipf: mean_exponent: -0.2180 mean_vocab_size: 0.7106 @@ -68,45 +66,41 @@ file_name_matches_primary_export: function_name_is_not_single_word: _doc: "Single-word function names like `run`, `process`, or `handle` are too vague to convey intent." - _fix_hint: "Use at least two words in function names to convey intent (e.g., fetch_user not fetch)" - _languages: [elixir] - _log_baseline: 17.4874 + _log_baseline: 17.8470 compression: - mean_raw_bytes: 0.2480 - mean_redundancy: 0.0791 - mean_zlib_bytes: 0.1049 - mean_zlib_ratio: 0.1431 + mean_raw_bytes: 0.2434 + mean_redundancy: 0.0776 + mean_zlib_bytes: 0.1029 + mean_zlib_ratio: 0.1405 entropy: - mean_char_entropy: 0.0245 - mean_char_normalized: 0.0246 + mean_char_entropy: 0.0241 + mean_char_normalized: 0.0241 identifier_length_variance: - mean_max: 0.7830 - mean_mean: 0.5357 + mean_max: 0.7685 + mean_mean: 0.5825 mean_std_dev: 1.0000 mean_variance: 2.0000 readability: - mean_avg_line_length: 0.2607 - mean_avg_sub_words_per_id: 0.3141 - mean_flesch_adapted: -0.3241 - mean_fog_adapted: 1.3508 + mean_avg_line_length: 0.2559 + mean_avg_sub_words_per_id: 0.3083 + mean_flesch_adapted: -0.3181 + mean_fog_adapted: 1.3258 symbol_density: - mean_density: -0.2477 + mean_density: -0.2431 vowel_density: - mean_total_chars: 0.5357 + mean_total_chars: 0.5825 function_name_matches_return_type: _doc: "Functions prefixed with `get_`, `fetch_`, or `find_` should return the thing they name." - _fix_hint: "Align the function name with what it returns (get_ for values, is_/has_ for booleans)" - _languages: [elixir] - _log_baseline: 7.9532 + _log_baseline: 7.5638 branching: mean_max_nesting_depth: 0.1335 brevity: mean_sample_size: 0.0257 casing_entropy: - mean_entropy: 0.0452 + mean_entropy: 0.0310 mean_other_count: 0.0347 - mean_snake_case_count: -0.0493 + mean_snake_case_count: -0.0296 compression: mean_raw_bytes: -0.0190 mean_redundancy: -0.0180 @@ -137,9 +131,8 @@ function_name_matches_return_type: mean_r_squared: 0.0038 identifier_length_variance: mean_max: 0.1082 - mean_mean: 0.0200 - mean_std_dev: 0.0081 - mean_variance: 0.0162 + mean_std_dev: 0.0326 + mean_variance: 0.0653 line_patterns: mean_max_nesting_depth: 0.1335 mean_string_literal_ratio: -0.0027 @@ -148,6 +141,9 @@ function_name_matches_return_type: mean_density: -0.0108 mean_string_literal_ratio: -0.0027 near_duplicate_blocks_file: + mean_near_dup_block_d0: -0.5899 + mean_near_dup_block_d5: -0.2282 + mean_near_dup_block_d7: 0.2282 mean_sub_block_count: 0.0314 ngram: mean_bigram_hapax_fraction: 0.0106 @@ -177,12 +173,12 @@ function_name_matches_return_type: mean_distinct_symbol_types: 0.0639 mean_symbol_count: 0.0442 vocabulary: - mean_mattr: 0.0228 - mean_raw_ttr: 0.0478 - mean_total_identifiers: -0.0410 - mean_unique_identifiers: 0.0068 + mean_mattr: 0.0350 + mean_raw_ttr: 0.0299 + mean_total_identifiers: -0.0225 + mean_unique_identifiers: 0.0074 vowel_density: - mean_total_chars: -0.0210 + mean_total_chars: -0.0235 zipf: mean_exponent: -0.0047 mean_r_squared: 0.0105 @@ -191,17 +187,15 @@ function_name_matches_return_type: test_name_starts_with_verb: _doc: "Test descriptions should start with a verb: `creates`, `raises`, `returns`, not a noun phrase." - _fix_hint: "Start test descriptions with a verb (returns, raises, creates, validates)" - _languages: [elixir] - _log_baseline: 7.8702 + _log_baseline: 7.8915 branching: mean_branch_count: 1.9977 mean_branching_density: 2.0000 brevity: mean_sample_size: 0.0694 casing_entropy: - mean_entropy: -0.0749 - mean_snake_case_count: 0.1317 + mean_entropy: -0.0711 + mean_snake_case_count: 0.1381 compression: mean_raw_bytes: 0.0914 mean_redundancy: 0.0182 @@ -230,8 +224,8 @@ test_name_starts_with_verb: mean_k: 0.0795 mean_r_squared: -0.0081 identifier_length_variance: - mean_std_dev: -0.0221 - mean_variance: -0.0441 + mean_std_dev: -0.0192 + mean_variance: -0.0384 line_patterns: mean_string_literal_ratio: -0.0611 magic_number_density: @@ -258,12 +252,11 @@ test_name_starts_with_verb: symbol_density: mean_density: -0.0912 vocabulary: - mean_mattr: 0.0427 - mean_raw_ttr: -0.0298 - mean_total_identifiers: 0.1126 - mean_unique_identifiers: 0.0828 + mean_mattr: 0.0463 + mean_total_identifiers: 0.1129 + mean_unique_identifiers: 0.1161 vowel_density: - mean_total_chars: 0.1150 + mean_total_chars: 0.1122 zipf: mean_exponent: -0.0239 mean_total_tokens: 0.0600 diff --git a/priv/combined_metrics/scope_and_assignment.yml b/priv/combined_metrics/scope_and_assignment.yml index c21c6dd..9654cc9 100644 --- a/priv/combined_metrics/scope_and_assignment.yml +++ b/priv/combined_metrics/scope_and_assignment.yml @@ -1,8 +1,6 @@ declared_close_to_use: _doc: "Variables should be declared near their first use, not hoisted to the top of the function." - _fix_hint: "Move variable declarations closer to their first use to reduce cognitive scope" - _languages: [elixir] - _log_baseline: -59.6022 + _log_baseline: -59.7486 branching: mean_branch_count: -0.4906 mean_branching_density: -0.2814 @@ -10,9 +8,9 @@ declared_close_to_use: brevity: mean_sample_size: -0.2189 casing_entropy: - mean_entropy: 0.1835 + mean_entropy: 0.1911 mean_pascal_case_count: -0.1139 - mean_snake_case_count: -0.3839 + mean_snake_case_count: -0.4005 comment_structure: mean_comment_line_count: -2.0000 mean_comment_line_ratio: 1.9655 @@ -50,9 +48,9 @@ declared_close_to_use: mean_k: 0.0180 mean_r_squared: 0.0061 identifier_length_variance: - mean_mean: 0.0259 - mean_std_dev: 0.0252 - mean_variance: 0.0505 + mean_mean: 0.0337 + mean_std_dev: 0.0216 + mean_variance: 0.0433 indentation: mean_blank_line_ratio: 0.0720 mean_mean_depth: 0.0139 @@ -94,12 +92,12 @@ declared_close_to_use: mean_distinct_symbol_types: -0.0387 mean_symbol_count: -0.1541 vocabulary: - mean_mattr: -0.0662 - mean_raw_ttr: 0.0986 - mean_total_identifiers: -0.3537 - mean_unique_identifiers: -0.2551 + mean_mattr: -0.0812 + mean_raw_ttr: 0.1048 + mean_total_identifiers: -0.3668 + mean_unique_identifiers: -0.2619 vowel_density: - mean_total_chars: -0.3278 + mean_total_chars: -0.3330 zipf: mean_exponent: -0.0151 mean_total_tokens: -0.2436 @@ -107,9 +105,7 @@ declared_close_to_use: mutated_after_initial_assignment: _doc: "Variables should not be reassigned after their initial value — prefer introducing a new name." - _fix_hint: "Avoid reassigning variables — introduce a new name for each transformed value" - _languages: [elixir] - _log_baseline: 6.6347 + _log_baseline: 6.6896 branching: mean_branch_count: 0.1519 mean_branching_density: 0.2073 @@ -118,9 +114,9 @@ mutated_after_initial_assignment: brevity: mean_sample_size: 0.0068 casing_entropy: - mean_entropy: -0.0957 + mean_entropy: -0.0947 mean_pascal_case_count: -0.2061 - mean_snake_case_count: -0.0467 + mean_snake_case_count: -0.0436 compression: mean_raw_bytes: -0.0496 mean_redundancy: -0.0291 @@ -156,9 +152,9 @@ mutated_after_initial_assignment: mean_k: -0.0775 mean_r_squared: 0.0063 identifier_length_variance: - mean_mean: -0.0225 - mean_std_dev: 0.0262 - mean_variance: 0.0525 + mean_mean: -0.0249 + mean_std_dev: 0.0286 + mean_variance: 0.0571 indentation: mean_blank_line_ratio: -0.1139 mean_max_depth: 0.2725 @@ -204,12 +200,12 @@ mutated_after_initial_assignment: mean_distinct_symbol_types: 0.0436 mean_symbol_count: -0.0402 vocabulary: - mean_mattr: 0.0742 - mean_raw_ttr: 0.0464 - mean_total_identifiers: -0.0634 - mean_unique_identifiers: -0.0170 + mean_mattr: 0.0885 + mean_raw_ttr: 0.0564 + mean_total_identifiers: -0.0623 + mean_unique_identifiers: -0.0059 vowel_density: - mean_total_chars: -0.0859 + mean_total_chars: -0.0872 zipf: mean_exponent: -0.0305 mean_r_squared: 0.0040 @@ -218,109 +214,106 @@ mutated_after_initial_assignment: reassigned_multiple_times: _doc: "A variable reassigned many times is a sign the name is too generic or the function does too much." - _fix_hint: "Refactor repeated reassignment into a pipeline or named intermediate values" - _languages: [elixir] - _log_baseline: -107.5659 + _log_baseline: -63.1779 branching: - mean_non_blank_count: -0.2287 + mean_non_blank_count: -0.1343 brevity: - mean_sample_size: 0.0287 + mean_sample_size: 0.0169 casing_entropy: - mean_entropy: 0.2150 - mean_pascal_case_count: -0.3434 - mean_snake_case_count: -0.9136 + mean_entropy: 0.0898 + mean_pascal_case_count: -0.2016 + mean_snake_case_count: -0.4475 compression: - mean_raw_bytes: -0.4804 - mean_redundancy: -0.2638 - mean_unique_line_ratio: -0.0172 - mean_zlib_bytes: 0.0205 - mean_zlib_ratio: -0.5009 + mean_raw_bytes: -0.2821 + mean_redundancy: -0.1549 + mean_unique_line_ratio: -0.0101 + mean_zlib_bytes: 0.0121 + mean_zlib_ratio: -0.2942 entropy: - mean_char_entropy: 0.0869 - mean_char_max_entropy: 0.0234 - mean_char_normalized: 0.0636 - mean_token_entropy: 0.0534 - mean_token_normalized: 0.0470 - mean_total_tokens: -0.4852 - mean_vocab_size: 0.0287 + mean_char_entropy: 0.0510 + mean_char_max_entropy: 0.0137 + mean_char_normalized: 0.0373 + mean_token_entropy: 0.0313 + mean_token_normalized: 0.0276 + mean_total_tokens: -0.2849 + mean_vocab_size: 0.0169 function_metrics: - mean_avg_function_lines: -0.1143 + mean_avg_function_lines: -0.0671 halstead: - mean_N1_total_operators: -0.2160 - mean_N2_total_operands: -0.9103 - mean_difficulty: -1.1372 - mean_effort: -1.6318 - mean_estimated_bugs: -0.4946 - mean_length: -0.5017 - mean_n1_unique_operators: -0.1489 - mean_n2_unique_operands: 0.0780 - mean_time_to_implement_seconds: -1.6318 - mean_vocabulary: 0.0309 - mean_volume: -0.4946 + mean_N1_total_operators: -0.1269 + mean_N2_total_operands: -0.5345 + mean_difficulty: -0.6678 + mean_effort: -0.9583 + mean_estimated_bugs: -0.2905 + mean_length: -0.2946 + mean_n1_unique_operators: -0.0875 + mean_n2_unique_operands: 0.0458 + mean_time_to_implement_seconds: -0.9583 + mean_vocabulary: 0.0181 + mean_volume: -0.2905 heaps: - mean_beta: 0.2282 - mean_k: -0.3898 - mean_r_squared: 0.0349 + mean_beta: 0.1340 + mean_k: -0.2289 + mean_r_squared: 0.0205 identifier_length_variance: - mean_max: 0.3280 - mean_mean: 0.3212 - mean_std_dev: 0.8224 - mean_variance: 1.6450 + mean_max: 0.1926 + mean_mean: 0.1821 + mean_std_dev: 0.4688 + mean_variance: 0.9376 indentation: - mean_blank_line_ratio: 1.1002 - mean_mean_depth: -0.0484 - mean_variance: 0.1417 + mean_blank_line_ratio: 0.6461 + mean_mean_depth: -0.0284 + mean_variance: 0.0832 line_patterns: - mean_blank_line_ratio: 1.1002 - mean_string_literal_ratio: 0.4958 - mean_unique_line_ratio: -0.0236 + mean_blank_line_ratio: 0.6461 + mean_string_literal_ratio: 0.2911 + mean_unique_line_ratio: -0.0138 magic_number_density: - mean_density: 0.4930 - mean_string_literal_ratio: 0.4958 + mean_density: 0.2895 + mean_string_literal_ratio: 0.2911 near_duplicate_blocks_file: - mean_sub_block_count: 0.3280 + mean_near_dup_block_d8: -2.0000 + mean_sub_block_count: 0.1926 ngram: - mean_bigram_hapax_fraction: 0.4509 - mean_bigram_repeated_unique: -1.1755 - mean_bigram_repetition_rate: -0.4690 - mean_bigram_total: -0.4861 - mean_bigram_unique: 0.0798 - mean_trigram_hapax_fraction: 0.4536 - mean_trigram_repeated_unique: -2.0000 - mean_trigram_repetition_rate: -0.7942 - mean_trigram_total: -0.4871 - mean_trigram_unique: -0.0423 + mean_bigram_hapax_fraction: 0.2648 + mean_bigram_repeated_unique: -0.6903 + mean_bigram_repetition_rate: -0.2754 + mean_bigram_total: -0.2855 + mean_bigram_unique: 0.0469 + mean_trigram_hapax_fraction: 0.2664 + mean_trigram_repeated_unique: -1.1745 + mean_trigram_repetition_rate: -0.4664 + mean_trigram_total: -0.2861 + mean_trigram_unique: -0.0248 punctuation_density: - mean_colon_suffix_density: 0.4930 - mean_dot_count: -0.4849 - mean_id_nonalpha_suffix_density: -0.3517 + mean_colon_suffix_density: 0.2895 + mean_dot_count: -0.2847 + mean_id_nonalpha_suffix_density: -0.2066 readability: - mean_avg_line_length: -0.2630 - mean_avg_sub_words_per_id: 0.1974 - mean_avg_tokens_per_line: -0.2565 - mean_flesch_adapted: -0.1476 - mean_fog_adapted: -0.2565 - mean_total_lines: -0.2287 + mean_avg_line_length: -0.1544 + mean_avg_sub_words_per_id: 0.1159 + mean_avg_tokens_per_line: -0.1506 + mean_flesch_adapted: -0.0867 + mean_fog_adapted: -0.1506 + mean_total_lines: -0.1343 symbol_density: - mean_density: 0.2755 - mean_symbol_count: -0.2056 + mean_density: 0.1618 + mean_symbol_count: -0.1207 vocabulary: - mean_mattr: 0.9538 - mean_raw_ttr: 0.9009 - mean_total_identifiers: -0.7749 - mean_unique_identifiers: 0.1260 + mean_mattr: 0.4975 + mean_raw_ttr: 0.4909 + mean_total_identifiers: -0.3859 + mean_unique_identifiers: 0.1050 vowel_density: - mean_total_chars: -0.4538 + mean_total_chars: -0.2038 zipf: - mean_exponent: -0.2377 - mean_total_tokens: -0.4852 - mean_vocab_size: 0.0287 + mean_exponent: -0.1396 + mean_total_tokens: -0.2849 + mean_vocab_size: 0.0169 scope_is_minimal: _doc: "Variables should be scoped as narrowly as possible — not declared at a wider scope than needed." - _fix_hint: "Narrow the scope of variables — declare them in the innermost block where they are used" - _languages: [elixir] - _log_baseline: -6.7522 + _log_baseline: -7.8286 branching: mean_branch_count: -0.1072 mean_branching_density: -0.0452 @@ -328,9 +321,9 @@ scope_is_minimal: brevity: mean_sample_size: -0.0368 casing_entropy: - mean_entropy: -0.0721 + mean_entropy: -0.0341 mean_other_count: -0.1823 - mean_snake_case_count: 0.0081 + mean_snake_case_count: -0.0471 comment_structure: mean_comment_line_count: -0.4075 mean_comment_line_ratio: 2.0000 @@ -369,9 +362,9 @@ scope_is_minimal: mean_k: -0.0341 mean_r_squared: 0.0103 identifier_length_variance: - mean_mean: -0.0389 - mean_std_dev: 0.0545 - mean_variance: 0.1090 + mean_mean: -0.0320 + mean_std_dev: 0.0864 + mean_variance: 0.1729 indentation: mean_blank_line_ratio: -0.1076 mean_max_depth: -0.4694 @@ -384,6 +377,7 @@ scope_is_minimal: magic_number_density: mean_string_literal_ratio: -0.0144 near_duplicate_blocks_file: + mean_block_count: -0.1691 mean_sub_block_count: 0.0447 ngram: mean_bigram_hapax_fraction: -0.0078 @@ -414,12 +408,11 @@ scope_is_minimal: mean_distinct_symbol_types: 0.0336 mean_symbol_count: 0.0316 vocabulary: - mean_mattr: -0.0277 - mean_raw_ttr: -0.0643 - mean_total_identifiers: -0.0071 - mean_unique_identifiers: -0.0713 + mean_raw_ttr: -0.0313 + mean_total_identifiers: -0.0577 + mean_unique_identifiers: -0.0891 vowel_density: - mean_total_chars: -0.0460 + mean_total_chars: -0.0897 zipf: mean_exponent: 0.0070 mean_total_tokens: 0.0143 @@ -427,9 +420,7 @@ scope_is_minimal: shadowed_by_inner_scope: _doc: "Inner-scope names that shadow outer-scope names cause confusion about which value is in play." - _fix_hint: "Rename the inner variable to avoid shadowing the outer one" - _languages: [elixir] - _log_baseline: -33.9501 + _log_baseline: -33.8958 branching: mean_branching_density: 2.0000 mean_max_nesting_depth: -0.1450 @@ -437,9 +428,9 @@ shadowed_by_inner_scope: brevity: mean_sample_size: -0.0786 casing_entropy: - mean_entropy: 0.1125 + mean_entropy: 0.1132 mean_pascal_case_count: -0.0306 - mean_snake_case_count: -0.2319 + mean_snake_case_count: -0.2452 comment_structure: mean_comment_line_count: -1.1073 mean_comment_line_ratio: 0.8936 @@ -476,9 +467,9 @@ shadowed_by_inner_scope: mean_k: -0.0817 mean_r_squared: -0.0030 identifier_length_variance: - mean_mean: 0.1455 - mean_std_dev: 0.2453 - mean_variance: 0.4905 + mean_mean: 0.1622 + mean_std_dev: 0.2441 + mean_variance: 0.4883 indentation: mean_blank_line_ratio: 0.0798 mean_mean_depth: -0.0595 @@ -519,12 +510,12 @@ shadowed_by_inner_scope: mean_distinct_symbol_types: -0.0561 mean_symbol_count: -0.1275 vocabulary: - mean_mattr: 0.0829 - mean_raw_ttr: 0.1254 - mean_total_identifiers: -0.2059 - mean_unique_identifiers: -0.0804 + mean_mattr: 0.1103 + mean_raw_ttr: 0.1343 + mean_total_identifiers: -0.2135 + mean_unique_identifiers: -0.0792 vowel_density: - mean_total_chars: -0.0604 + mean_total_chars: -0.0513 zipf: mean_exponent: -0.0364 mean_r_squared: 0.0058 @@ -533,16 +524,14 @@ shadowed_by_inner_scope: used_only_once: _doc: "A variable used only once is a candidate for inlining — it rarely adds clarity over a direct expression." - _fix_hint: "Inline single-use variables directly into their only use site" - _languages: [elixir] - _log_baseline: -116.4649 + _log_baseline: -115.3539 branching: mean_non_blank_count: -0.5385 brevity: mean_sample_size: -0.4955 casing_entropy: - mean_entropy: 0.6972 - mean_snake_case_count: -1.1633 + mean_entropy: 0.6530 + mean_snake_case_count: -1.1336 compression: mean_raw_bytes: -0.8394 mean_redundancy: -0.1990 @@ -573,9 +562,9 @@ used_only_once: heaps: mean_k: -0.1674 identifier_length_variance: - mean_mean: -0.2014 - mean_std_dev: 0.1846 - mean_variance: 0.3694 + mean_mean: -0.1940 + mean_std_dev: 0.1753 + mean_variance: 0.3505 indentation: mean_blank_line_ratio: 0.4750 mean_max_depth: 1.2798 @@ -617,12 +606,12 @@ used_only_once: mean_density: 0.6227 mean_symbol_count: -0.2178 vocabulary: - mean_mattr: 0.2063 - mean_raw_ttr: 0.2647 - mean_total_identifiers: -0.9999 - mean_unique_identifiers: -0.7352 + mean_mattr: 0.0846 + mean_raw_ttr: 0.1945 + mean_total_identifiers: -0.9555 + mean_unique_identifiers: -0.7612 vowel_density: - mean_total_chars: -1.2013 + mean_total_chars: -1.1494 zipf: mean_exponent: 0.1820 mean_r_squared: -0.0230 diff --git a/priv/combined_metrics/testing.yml b/priv/combined_metrics/testing.yml index 6b1d5e9..33175c4 100644 --- a/priv/combined_metrics/testing.yml +++ b/priv/combined_metrics/testing.yml @@ -1,8 +1,6 @@ reasonable_test_to_code_ratio: _doc: "There should be an adequate number of test cases relative to the code being tested." - _fix_hint: "Add more tests — aim for at least one test per public function" - _languages: [elixir] - _log_baseline: 8.2261 + _log_baseline: 11.2157 branching: mean_branch_count: 0.1869 mean_branching_density: 0.0352 @@ -10,9 +8,9 @@ reasonable_test_to_code_ratio: brevity: mean_sample_size: 0.0290 casing_entropy: - mean_entropy: 0.0597 + mean_entropy: 0.0656 mean_pascal_case_count: 0.2097 - mean_snake_case_count: 0.0671 + mean_snake_case_count: 0.0455 comment_structure: mean_comment_line_count: -0.5246 mean_comment_line_ratio: 0.5016 @@ -51,9 +49,9 @@ reasonable_test_to_code_ratio: mean_k: 0.0107 mean_r_squared: 0.0171 identifier_length_variance: - mean_mean: 0.0231 - mean_std_dev: 0.0069 - mean_variance: 0.0139 + mean_mean: 0.0278 + mean_std_dev: 0.0092 + mean_variance: 0.0185 indentation: mean_blank_line_ratio: -0.0175 mean_max_depth: 0.1093 @@ -68,6 +66,13 @@ reasonable_test_to_code_ratio: mean_magic_number_count: 0.4248 mean_string_literal_ratio: 0.2536 near_duplicate_blocks_file: + mean_block_count: 0.3136 + mean_near_dup_block_d0: 0.6699 + mean_near_dup_block_d3: 0.1869 + mean_near_dup_block_d4: 0.5246 + mean_near_dup_block_d5: 0.1869 + mean_near_dup_block_d7: 0.7475 + mean_near_dup_block_d8: 0.1869 mean_sub_block_count: 0.0723 ngram: mean_bigram_hapax_fraction: -0.0173 @@ -97,12 +102,12 @@ reasonable_test_to_code_ratio: mean_distinct_symbol_types: -0.0284 mean_symbol_count: 0.0960 vocabulary: - mean_mattr: -0.0209 - mean_raw_ttr: -0.0657 - mean_total_identifiers: 0.0853 - mean_unique_identifiers: 0.0196 + mean_mattr: -0.0271 + mean_raw_ttr: -0.0488 + mean_total_identifiers: 0.0704 + mean_unique_identifiers: 0.0216 vowel_density: - mean_total_chars: 0.1084 + mean_total_chars: 0.0982 zipf: mean_exponent: 0.0408 mean_r_squared: -0.0086 @@ -111,9 +116,7 @@ reasonable_test_to_code_ratio: test_has_assertion: _doc: "Every test body must contain at least one assertion — a test without assertions proves nothing." - _fix_hint: "Add at least one assert/refute to every test — a test without assertions proves nothing" - _languages: [elixir] - _log_baseline: -9.6007 + _log_baseline: -10.8081 branching: mean_branch_count: 0.0918 mean_branching_density: 0.1642 @@ -121,10 +124,10 @@ test_has_assertion: brevity: mean_sample_size: -0.0555 casing_entropy: - mean_entropy: -0.0130 + mean_entropy: -0.0026 mean_other_count: -0.1294 mean_pascal_case_count: -0.0450 - mean_snake_case_count: -0.0338 + mean_snake_case_count: -0.0568 comment_structure: mean_comment_line_count: -0.6211 mean_comment_line_ratio: 0.6522 @@ -158,9 +161,9 @@ test_has_assertion: mean_k: 0.0917 mean_r_squared: -0.0091 identifier_length_variance: - mean_mean: 0.0104 - mean_std_dev: 0.0277 - mean_variance: 0.0553 + mean_mean: 0.0215 + mean_std_dev: 0.0214 + mean_variance: 0.0429 indentation: mean_blank_line_ratio: 0.0261 mean_max_depth: -0.1294 @@ -173,6 +176,12 @@ test_has_assertion: magic_number_density: mean_string_literal_ratio: -0.0139 near_duplicate_blocks_file: + mean_block_count: -0.0376 + mean_near_dup_block_d0: -0.3507 + mean_near_dup_block_d5: 0.2212 + mean_near_dup_block_d6: -0.4425 + mean_near_dup_block_d7: 0.2212 + mean_near_dup_block_d8: -0.5719 mean_sub_block_count: 0.0228 ngram: mean_bigram_hapax_fraction: -0.0509 @@ -202,12 +211,12 @@ test_has_assertion: mean_distinct_symbol_types: -0.0194 mean_symbol_count: 0.0126 vocabulary: - mean_mattr: -0.0644 - mean_raw_ttr: -0.0389 - mean_total_identifiers: -0.0370 - mean_unique_identifiers: -0.0760 + mean_mattr: -0.0607 + mean_raw_ttr: -0.0243 + mean_total_identifiers: -0.0553 + mean_unique_identifiers: -0.0796 vowel_density: - mean_total_chars: -0.0266 + mean_total_chars: -0.0338 zipf: mean_exponent: 0.0248 mean_r_squared: -0.0049 @@ -215,9 +224,7 @@ test_has_assertion: test_name_describes_behavior: _doc: "Test names should describe the expected behaviour, not just the method under test." - _fix_hint: "Describe expected behavior in test names: 'returns {:error, :not_found} when user missing'" - _languages: [elixir] - _log_baseline: 56.4575 + _log_baseline: 57.2080 branching: mean_branch_count: 2.0000 mean_branching_density: -1.5965 @@ -225,9 +232,9 @@ test_name_describes_behavior: brevity: mean_sample_size: 0.1814 casing_entropy: - mean_entropy: -0.1587 + mean_entropy: -0.1610 mean_pascal_case_count: 0.0729 - mean_snake_case_count: 0.3604 + mean_snake_case_count: 0.4125 compression: mean_raw_bytes: 0.3524 mean_redundancy: 0.0412 @@ -258,7 +265,9 @@ test_name_describes_behavior: mean_beta: 0.0106 mean_k: -0.0084 identifier_length_variance: - mean_mean: 0.0465 + mean_mean: 0.0422 + mean_std_dev: -0.0249 + mean_variance: -0.0498 indentation: mean_blank_line_ratio: -0.1184 mean_max_depth: 0.3691 @@ -271,6 +280,9 @@ test_name_describes_behavior: magic_number_density: mean_string_literal_ratio: -0.1419 near_duplicate_blocks_file: + mean_block_count: 0.0868 + mean_near_dup_block_d0: 0.6309 + mean_near_dup_block_d7: -0.2619 mean_sub_block_count: 0.0868 ngram: mean_bigram_hapax_fraction: 0.0841 @@ -296,12 +308,12 @@ test_name_describes_behavior: mean_distinct_symbol_types: 0.0729 mean_symbol_count: 0.2136 vocabulary: - mean_mattr: 0.0068 - mean_raw_ttr: -0.1629 - mean_total_identifiers: 0.3034 - mean_unique_identifiers: 0.1403 + mean_mattr: -0.0698 + mean_raw_ttr: -0.1544 + mean_total_identifiers: 0.3273 + mean_unique_identifiers: 0.1730 vowel_density: - mean_total_chars: 0.3498 + mean_total_chars: 0.3695 zipf: mean_r_squared: 0.0095 mean_total_tokens: 0.2704 @@ -309,9 +321,7 @@ test_name_describes_behavior: test_single_concept: _doc: "Each test should verify a single concept — tests covering multiple things are harder to diagnose when they fail." - _fix_hint: "Test one thing per test — split tests covering multiple behaviors into separate test cases" - _languages: [elixir] - _log_baseline: 35.9646 + _log_baseline: 37.2588 branching: mean_branch_count: 0.3696 mean_branching_density: -2.0000 @@ -320,10 +330,10 @@ test_single_concept: brevity: mean_sample_size: 0.0495 casing_entropy: - mean_entropy: -0.0975 + mean_entropy: -0.0830 mean_other_count: 0.3696 mean_pascal_case_count: -0.0146 - mean_snake_case_count: 0.2046 + mean_snake_case_count: 0.1912 comment_structure: mean_comment_line_count: -1.0376 mean_comment_line_ratio: 1.0694 @@ -363,9 +373,9 @@ test_single_concept: mean_k: 0.1502 mean_r_squared: 0.0046 identifier_length_variance: - mean_mean: 0.0149 - mean_std_dev: 0.0642 - mean_variance: 0.1285 + mean_mean: 0.0255 + mean_std_dev: 0.0733 + mean_variance: 0.1466 indentation: mean_blank_line_ratio: -0.0702 mean_max_depth: 0.2162 @@ -381,6 +391,9 @@ test_single_concept: mean_magic_number_count: 0.1534 mean_string_literal_ratio: 0.3581 near_duplicate_blocks_file: + mean_block_count: 0.5858 + mean_near_dup_block_d0: 0.3696 + mean_near_dup_block_d4: 0.3696 mean_sub_block_count: 0.1857 ngram: mean_bigram_hapax_fraction: -0.0471 @@ -410,12 +423,12 @@ test_single_concept: mean_density: -0.0634 mean_symbol_count: 0.1338 vocabulary: - mean_mattr: -0.0722 - mean_raw_ttr: -0.1338 - mean_total_identifiers: 0.1750 - mean_unique_identifiers: 0.0410 + mean_mattr: -0.0701 + mean_raw_ttr: -0.1129 + mean_total_identifiers: 0.1594 + mean_unique_identifiers: 0.0464 vowel_density: - mean_total_chars: 0.1898 + mean_total_chars: 0.1849 zipf: mean_exponent: 0.0281 mean_r_squared: -0.0039 diff --git a/priv/combined_metrics/type_and_value.yml b/priv/combined_metrics/type_and_value.yml index f3e061c..986b994 100644 --- a/priv/combined_metrics/type_and_value.yml +++ b/priv/combined_metrics/type_and_value.yml @@ -1,8 +1,6 @@ boolean_assigned_from_comparison: _doc: "Boolean variables should be assigned directly from comparisons or predicate calls, not set via conditionals." - _fix_hint: "Assign the comparison result directly — bool = x > 0 not if x > 0 do true else false end" - _languages: [elixir] - _log_baseline: 3.2119 + _log_baseline: 2.5801 branching: mean_branch_count: -0.8402 mean_branching_density: 0.3349 @@ -51,9 +49,9 @@ boolean_assigned_from_comparison: mean_r_squared: 0.0046 identifier_length_variance: mean_max: 0.0208 - mean_mean: 0.0195 - mean_std_dev: -0.0085 - mean_variance: -0.0171 + mean_mean: 0.0168 + mean_std_dev: -0.0019 + mean_variance: -0.0038 indentation: mean_blank_line_ratio: 0.1081 mean_max_depth: -0.2570 @@ -69,6 +67,8 @@ boolean_assigned_from_comparison: mean_magic_number_count: 0.1944 mean_string_literal_ratio: -0.0182 near_duplicate_blocks_file: + mean_block_count: -0.2570 + mean_near_dup_block_d0: -0.3081 mean_sub_block_count: 0.2455 ngram: mean_bigram_hapax_fraction: 0.0123 @@ -104,7 +104,7 @@ boolean_assigned_from_comparison: mean_total_identifiers: 0.1004 mean_unique_identifiers: 0.0953 vowel_density: - mean_total_chars: 0.1199 + mean_total_chars: 0.1172 zipf: mean_exponent: -0.0211 mean_r_squared: 0.0120 @@ -113,113 +113,111 @@ boolean_assigned_from_comparison: hardcoded_url_or_path: _doc: "URLs, file paths, and host names should be configuration values, not inline string literals." - _fix_hint: "Move hardcoded URLs and file paths to configuration or module attributes" - _languages: [elixir] - _log_baseline: 119.8203 + _log_baseline: 54.3750 branching: - mean_max_nesting_depth: 0.9571 + mean_max_nesting_depth: 0.4526 brevity: - mean_sample_size: 0.3152 + mean_sample_size: 0.1491 casing_entropy: - mean_entropy: -0.1421 - mean_other_count: -2.0000 - mean_pascal_case_count: 0.9571 - mean_snake_case_count: 0.1889 + mean_entropy: -0.0622 + mean_other_count: -0.9458 + mean_pascal_case_count: 0.4526 + mean_snake_case_count: 0.0807 compression: - mean_raw_bytes: 0.6634 - mean_redundancy: 0.1246 - mean_unique_line_ratio: 0.1310 - mean_zlib_bytes: 0.4741 - mean_zlib_ratio: 0.1894 + mean_raw_bytes: 0.3137 + mean_redundancy: 0.0589 + mean_unique_line_ratio: 0.0620 + mean_zlib_bytes: 0.2242 + mean_zlib_ratio: 0.0896 entropy: - mean_char_entropy: 0.0190 - mean_char_normalized: 0.0281 - mean_token_entropy: 0.0674 - mean_token_max_entropy: 0.0678 - mean_total_tokens: 0.4785 - mean_vocab_size: 0.3152 + mean_char_entropy: 0.0090 + mean_char_normalized: 0.0133 + mean_token_entropy: 0.0319 + mean_token_max_entropy: 0.0321 + mean_total_tokens: 0.2263 + mean_vocab_size: 0.1491 function_metrics: - mean_avg_function_lines: -0.8947 - mean_avg_param_count: 0.9571 - mean_function_count: 0.7942 - mean_max_param_count: 0.9571 + mean_avg_function_lines: -0.4231 + mean_avg_param_count: 0.4526 + mean_function_count: 0.3756 + mean_max_param_count: 0.4526 halstead: - mean_N1_total_operators: 0.5692 - mean_N2_total_operands: 0.7517 - mean_difficulty: 0.1642 - mean_effort: 0.9625 - mean_estimated_bugs: 0.7982 - mean_length: 0.6248 - mean_n1_unique_operators: 0.3299 - mean_n2_unique_operands: 0.9174 - mean_time_to_implement_seconds: 0.9625 - mean_vocabulary: 0.7373 - mean_volume: 0.7983 + mean_N1_total_operators: 0.2692 + mean_N2_total_operands: 0.3555 + mean_difficulty: 0.0776 + mean_effort: 0.4552 + mean_estimated_bugs: 0.3775 + mean_length: 0.2955 + mean_n1_unique_operators: 0.1560 + mean_n2_unique_operands: 0.4338 + mean_time_to_implement_seconds: 0.4552 + mean_vocabulary: 0.3487 + mean_volume: 0.3775 heaps: - mean_beta: 0.0461 - mean_k: -0.1568 + mean_beta: 0.0218 + mean_k: -0.0742 identifier_length_variance: - mean_mean: 0.6914 - mean_std_dev: 0.5873 - mean_variance: 1.1745 + mean_mean: 0.3229 + mean_std_dev: 0.2786 + mean_variance: 0.5571 indentation: - mean_blank_line_ratio: 0.3864 - mean_mean_depth: -0.2779 - mean_variance: -0.3554 + mean_blank_line_ratio: 0.1827 + mean_mean_depth: -0.1314 + mean_variance: -0.1681 line_patterns: - mean_blank_line_ratio: 0.3864 - mean_max_nesting_depth: 0.9571 - mean_string_literal_ratio: -0.0201 - mean_unique_line_ratio: 0.1351 + mean_blank_line_ratio: 0.1827 + mean_max_nesting_depth: 0.4526 + mean_string_literal_ratio: -0.0095 + mean_unique_line_ratio: 0.0639 magic_number_density: - mean_density: -0.4116 - mean_string_literal_ratio: -0.0201 + mean_density: -0.1946 + mean_string_literal_ratio: -0.0095 near_duplicate_blocks_file: - mean_sub_block_count: 0.8771 + mean_near_dup_block_d0: -0.7737 + mean_near_dup_block_d7: -2.0000 + mean_sub_block_count: 0.4147 ngram: - mean_bigram_hapax_fraction: -0.1563 - mean_bigram_repeated_unique: 0.8894 - mean_bigram_repetition_rate: 0.0704 - mean_bigram_total: 0.4796 - mean_bigram_unique: 0.5223 - mean_trigram_repeated_unique: 0.6908 - mean_trigram_repetition_rate: -0.2750 - mean_trigram_total: 0.4807 - mean_trigram_unique: 0.7019 + mean_bigram_hapax_fraction: -0.0739 + mean_bigram_repeated_unique: 0.4206 + mean_bigram_repetition_rate: 0.0333 + mean_bigram_total: 0.2268 + mean_bigram_unique: 0.2470 + mean_trigram_repeated_unique: 0.3267 + mean_trigram_repetition_rate: -0.1301 + mean_trigram_total: 0.2273 + mean_trigram_unique: 0.3319 punctuation_density: - mean_bracket_nonalpha_prefix_count: 1.1094 - mean_bracket_nonalpha_suffix_count: 2.0000 - mean_colon_suffix_density: -0.4737 - mean_dot_count: 0.1098 - mean_exclamation_density: 0.2954 - mean_id_nonalpha_suffix_density: -0.1516 - mean_question_mark_density: -0.7321 + mean_bracket_nonalpha_prefix_count: 0.5246 + mean_bracket_nonalpha_suffix_count: 0.9458 + mean_colon_suffix_density: -0.2240 + mean_dot_count: 0.0519 + mean_exclamation_density: 0.1397 + mean_id_nonalpha_suffix_density: -0.0717 + mean_question_mark_density: -0.3462 readability: - mean_avg_line_length: 0.6849 - mean_avg_sub_words_per_id: 0.4262 - mean_avg_tokens_per_line: 0.4785 - mean_flesch_adapted: -0.4974 - mean_fog_adapted: 1.0160 + mean_avg_line_length: 0.3239 + mean_avg_sub_words_per_id: 0.2015 + mean_avg_tokens_per_line: 0.2263 + mean_flesch_adapted: -0.2352 + mean_fog_adapted: 0.4805 symbol_density: - mean_density: -0.0213 - mean_symbol_count: 0.6424 + mean_density: -0.0101 + mean_symbol_count: 0.3038 vocabulary: - mean_mattr: 0.3608 - mean_raw_ttr: 0.1636 - mean_total_identifiers: 0.1955 - mean_unique_identifiers: 0.3588 + mean_mattr: 0.1813 + mean_raw_ttr: 0.0726 + mean_total_identifiers: 0.0850 + mean_unique_identifiers: 0.1575 vowel_density: - mean_total_chars: 0.8868 + mean_total_chars: 0.4079 zipf: - mean_r_squared: 0.0346 - mean_total_tokens: 0.4785 - mean_vocab_size: 0.3152 + mean_r_squared: 0.0163 + mean_total_tokens: 0.2263 + mean_vocab_size: 0.1491 no_empty_string_initial: _doc: "Initialising a variable to an empty string and reassigning later signals missing structure." - _fix_hint: "Replace initial empty string assignments with nil or a meaningful default value" - _languages: [elixir] - _log_baseline: -11.4915 + _log_baseline: -12.5778 branching: mean_branch_count: -0.1786 mean_max_nesting_depth: -0.1294 @@ -227,10 +225,10 @@ no_empty_string_initial: brevity: mean_sample_size: -0.0035 casing_entropy: - mean_entropy: -0.1033 + mean_entropy: -0.1014 mean_other_count: -0.6637 mean_pascal_case_count: 0.0802 - mean_snake_case_count: -0.0041 + mean_snake_case_count: -0.0082 compression: mean_raw_bytes: -0.0782 mean_redundancy: -0.0287 @@ -265,9 +263,9 @@ no_empty_string_initial: mean_k: -0.0214 mean_r_squared: -0.0034 identifier_length_variance: - mean_mean: -0.0210 - mean_std_dev: -0.0040 - mean_variance: -0.0080 + mean_mean: -0.0193 + mean_std_dev: -0.0030 + mean_variance: -0.0059 indentation: mean_blank_line_ratio: -0.0652 mean_mean_depth: -0.0937 @@ -280,6 +278,8 @@ no_empty_string_initial: magic_number_density: mean_string_literal_ratio: -0.2412 near_duplicate_blocks_file: + mean_block_count: -0.1976 + mean_near_dup_block_d0: -0.6211 mean_sub_block_count: 0.2212 ngram: mean_bigram_hapax_fraction: -0.0336 @@ -310,10 +310,12 @@ no_empty_string_initial: mean_distinct_symbol_types: 0.0164 mean_symbol_count: -0.0101 vocabulary: - mean_total_identifiers: -0.0164 - mean_unique_identifiers: -0.0167 + mean_mattr: 0.0022 + mean_raw_ttr: 0.0025 + mean_total_identifiers: -0.0195 + mean_unique_identifiers: -0.0170 vowel_density: - mean_total_chars: -0.0374 + mean_total_chars: -0.0388 zipf: mean_exponent: -0.0054 mean_r_squared: -0.0034 @@ -322,9 +324,7 @@ no_empty_string_initial: no_implicit_null_initial: _doc: "Initialising a variable to `nil`/`null` and assigning it later in a branch signals missing structure." - _fix_hint: "Use nil explicitly when a variable starts null — or restructure to avoid nil initialization" - _languages: [elixir] - _log_baseline: -3.2196 + _log_baseline: -3.6430 branching: mean_branch_count: 0.0293 mean_branching_density: 0.0871 @@ -332,10 +332,10 @@ no_implicit_null_initial: brevity: mean_sample_size: 0.0132 casing_entropy: - mean_entropy: 0.0619 + mean_entropy: 0.0660 mean_other_count: 0.1247 - mean_screaming_snake_density: 0.0422 - mean_snake_case_count: -0.0452 + mean_screaming_snake_density: 0.0448 + mean_snake_case_count: -0.0534 compression: mean_raw_bytes: -0.0246 mean_redundancy: -0.0100 @@ -370,9 +370,9 @@ no_implicit_null_initial: mean_k: 0.0418 mean_r_squared: -0.0034 identifier_length_variance: - mean_mean: 0.0130 - mean_std_dev: 0.0211 - mean_variance: 0.0423 + mean_mean: 0.0185 + mean_std_dev: 0.0177 + mean_variance: 0.0354 indentation: mean_blank_line_ratio: -0.1146 mean_mean_depth: 0.0089 @@ -385,6 +385,7 @@ no_implicit_null_initial: mean_density: -0.0066 mean_string_literal_ratio: -0.0022 near_duplicate_blocks_file: + mean_block_count: -0.1493 mean_sub_block_count: 0.0422 ngram: mean_bigram_hapax_fraction: -0.0181 @@ -414,12 +415,11 @@ no_implicit_null_initial: mean_distinct_symbol_types: 0.0342 mean_symbol_count: 0.0303 vocabulary: - mean_mattr: 0.0148 - mean_raw_ttr: 0.0350 - mean_total_identifiers: -0.0402 - mean_unique_identifiers: -0.0052 + mean_mattr: 0.0540 + mean_raw_ttr: 0.0466 + mean_total_identifiers: -0.0466 vowel_density: - mean_total_chars: -0.0272 + mean_total_chars: -0.0281 zipf: mean_exponent: -0.0074 mean_r_squared: 0.0047 @@ -427,9 +427,7 @@ no_implicit_null_initial: no_magic_value_assigned: _doc: "Literal strings and numbers assigned to variables should be named constants, not inline values." - _fix_hint: "Replace magic value assignments with named constants or module attributes" - _languages: [elixir] - _log_baseline: -6.5439 + _log_baseline: -6.3129 branching: mean_branch_count: -0.2035 mean_branching_density: -0.1140 @@ -437,9 +435,9 @@ no_magic_value_assigned: brevity: mean_sample_size: -0.0122 casing_entropy: - mean_entropy: -0.0570 + mean_entropy: -0.0411 mean_other_count: -0.3211 - mean_snake_case_count: -0.0248 + mean_snake_case_count: -0.0502 compression: mean_raw_bytes: -0.1800 mean_redundancy: -0.0342 @@ -476,9 +474,9 @@ no_magic_value_assigned: mean_r_squared: 0.0230 identifier_length_variance: mean_max: -0.0438 - mean_mean: -0.1573 - mean_std_dev: -0.2110 - mean_variance: -0.4220 + mean_mean: -0.1501 + mean_std_dev: -0.2041 + mean_variance: -0.4083 indentation: mean_blank_line_ratio: -0.1918 mean_mean_depth: -0.2808 @@ -490,6 +488,8 @@ no_magic_value_assigned: magic_number_density: mean_string_literal_ratio: -1.7484 near_duplicate_blocks_file: + mean_block_count: 0.1491 + mean_near_dup_block_d0: 0.7737 mean_sub_block_count: 0.2805 ngram: mean_bigram_hapax_fraction: 0.0998 @@ -520,11 +520,12 @@ no_magic_value_assigned: mean_density: -0.0945 mean_symbol_count: -0.2742 vocabulary: - mean_mattr: 0.0329 - mean_raw_ttr: 0.0328 - mean_total_identifiers: -0.0327 + mean_mattr: 0.0346 + mean_raw_ttr: 0.0360 + mean_total_identifiers: -0.0550 + mean_unique_identifiers: -0.0191 vowel_density: - mean_total_chars: -0.1899 + mean_total_chars: -0.2051 zipf: mean_exponent: -0.0525 mean_r_squared: 0.0208 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 4d68d74..1be9c6b 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,14 +1,12 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _fix_hint: "Prefix boolean variables with is_, has_, or can_ (e.g., is_valid, has_errors)" - _languages: [elixir, javascript, ruby] - _log_baseline: 15.9674 + _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 casing_entropy: mean_camel_case_count: 2.0000 - mean_entropy: 0.5244 - mean_snake_case_count: -0.2159 + mean_entropy: 0.4870 + mean_snake_case_count: -0.2309 compression: mean_raw_bytes: 0.1698 mean_redundancy: 0.0581 @@ -31,9 +29,9 @@ boolean_has_is_has_prefix: mean_k: 0.1146 identifier_length_variance: mean_max: 0.3229 - mean_mean: 0.3020 - mean_std_dev: 0.3453 - mean_variance: 0.6908 + mean_mean: 0.3109 + mean_std_dev: 0.3325 + mean_variance: 0.6646 ngram: mean_bigram_hapax_fraction: 0.0164 mean_bigram_repeated_unique: -0.0209 @@ -52,11 +50,11 @@ boolean_has_is_has_prefix: symbol_density: mean_density: -0.1660 vocabulary: - mean_mattr: 0.1133 - mean_raw_ttr: 0.1095 - mean_unique_identifiers: 0.1095 + mean_mattr: 0.1186 + mean_raw_ttr: 0.1173 + mean_unique_identifiers: 0.1175 vowel_density: - mean_total_chars: 0.3030 + mean_total_chars: 0.3117 zipf: mean_exponent: -0.0403 mean_r_squared: 0.0110 @@ -64,78 +62,74 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _fix_hint: "Use plural names for collections (users, orders, ids not user, order, id)" - _languages: [elixir, javascript, ruby] - _log_baseline: 32.4645 + _log_baseline: 21.8380 brevity: - mean_sample_size: -0.6992 + mean_sample_size: -0.5320 casing_entropy: - mean_camel_case_count: 0.6209 - mean_entropy: 0.2223 - mean_snake_case_count: -0.1349 + mean_camel_case_count: 0.4724 + mean_entropy: 0.1726 + mean_snake_case_count: -0.2009 compression: - mean_raw_bytes: 1.0908 - mean_redundancy: 0.4215 - mean_zlib_bytes: 0.1628 - mean_zlib_ratio: 0.9437 + mean_raw_bytes: 0.8299 + mean_redundancy: 0.3207 + mean_zlib_bytes: 0.1239 + mean_zlib_ratio: 0.7180 entropy: - mean_token_entropy: -0.1350 - mean_token_max_entropy: -0.1629 - mean_vocab_size: -0.6992 + mean_token_entropy: -0.1027 + mean_token_max_entropy: -0.1240 + mean_vocab_size: -0.5320 halstead: - mean_N2_total_operands: -0.0666 - mean_difficulty: 0.9299 - mean_effort: 0.6844 - mean_estimated_bugs: -0.2047 - mean_n2_unique_operands: -1.0118 - mean_time_to_implement_seconds: 0.6844 - mean_vocabulary: -0.6902 - mean_volume: -0.2049 + mean_N2_total_operands: -0.0506 + mean_difficulty: 0.7075 + mean_effort: 0.5207 + mean_estimated_bugs: -0.1558 + mean_n2_unique_operands: -0.7698 + mean_time_to_implement_seconds: 0.5207 + mean_vocabulary: -0.5251 + mean_volume: -0.1559 heaps: - mean_k: -0.9514 + mean_k: -0.7238 identifier_length_variance: - mean_mean: 2.0000 - mean_std_dev: -0.8154 - mean_variance: -1.6626 + mean_mean: 1.6364 + mean_std_dev: -0.9858 + mean_variance: -2.0000 ngram: - mean_bigram_hapax_fraction: -0.1830 - mean_bigram_repetition_rate: 0.1958 - mean_bigram_unique: -0.2431 - mean_trigram_repeated_unique: -0.2204 + mean_bigram_hapax_fraction: -0.1392 + mean_bigram_repetition_rate: 0.1490 + mean_bigram_unique: -0.1850 + mean_trigram_repeated_unique: -0.1677 punctuation_density: - mean_arrow_density: 0.0922 - mean_colon_suffix_density: -1.0500 - mean_question_mark_density: -0.7411 + mean_arrow_density: 0.0702 + mean_colon_suffix_density: -0.7988 + mean_question_mark_density: -0.5639 readability: - mean_avg_line_length: 1.1368 - mean_avg_sub_words_per_id: 0.1688 - mean_flesch_adapted: -0.1723 - mean_fog_adapted: 1.0561 + mean_avg_line_length: 0.8649 + mean_avg_sub_words_per_id: 0.1285 + mean_flesch_adapted: -0.1311 + mean_fog_adapted: 0.8035 symbol_density: - mean_density: -1.1302 + mean_density: -0.8598 vocabulary: - mean_mattr: -0.8764 - mean_raw_ttr: -1.0193 - mean_total_identifiers: -0.0701 - mean_unique_identifiers: -1.0655 + mean_mattr: -0.6972 + mean_raw_ttr: -0.7582 + mean_total_identifiers: -0.1337 + mean_unique_identifiers: -0.8807 vowel_density: - mean_total_chars: 1.9266 + mean_total_chars: 1.4857 zipf: - mean_exponent: 0.2071 - mean_r_squared: -0.1226 - mean_vocab_size: -0.6992 + mean_exponent: 0.1576 + mean_r_squared: -0.0933 + mean_vocab_size: -0.5320 loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _fix_hint: "Use descriptive loop variable names instead of single letters (user not u)" - _languages: [elixir, javascript, ruby] - _log_baseline: -28.4780 + _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 casing_entropy: mean_camel_case_count: -2.0000 - mean_entropy: -0.3852 - mean_snake_case_count: 0.2130 + mean_entropy: -0.3919 + mean_snake_case_count: 0.2033 comment_structure: mean_comment_line_ratio: 0.0080 compression: @@ -167,9 +161,9 @@ loop_var_is_single_letter: mean_r_squared: -0.0216 identifier_length_variance: mean_max: -0.5833 - mean_mean: -0.8534 - mean_std_dev: -0.9223 - mean_variance: -1.8497 + mean_mean: -0.8498 + mean_std_dev: -0.9251 + mean_variance: -1.8576 indentation: mean_max_depth: -0.0956 mean_mean_depth: -0.0126 @@ -196,11 +190,11 @@ loop_var_is_single_letter: mean_distinct_symbol_types: 0.0533 mean_symbol_count: 0.0087 vocabulary: - mean_mattr: -0.1051 - mean_raw_ttr: -0.1591 - mean_unique_identifiers: -0.1594 + mean_mattr: -0.1052 + mean_raw_ttr: -0.1626 + mean_unique_identifiers: -0.1618 vowel_density: - mean_total_chars: -0.8588 + mean_total_chars: -0.8389 zipf: mean_exponent: 0.0112 mean_r_squared: -0.0134 @@ -208,120 +202,125 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _fix_hint: "Split variables with 'and' into two focused variables (user_and_role → user + role)" - _languages: [elixir, javascript, ruby] - _log_baseline: -11.0411 + _log_baseline: 0.4689 branching: - mean_branch_count: -0.7789 - mean_branching_density: -0.8340 - mean_non_blank_count: 0.0514 + mean_branch_count: -0.3666 + mean_branching_density: -0.3925 + mean_non_blank_count: 0.0242 brevity: - mean_sample_size: 0.0228 + mean_sample_size: 0.0107 casing_entropy: - mean_camel_case_count: -0.4615 - mean_entropy: 0.1436 - mean_other_count: 1.3389 - mean_pascal_case_count: 0.1900 - mean_snake_case_count: 0.1975 + mean_camel_case_count: -0.2172 + mean_entropy: 0.0678 + mean_other_count: 0.6301 + mean_pascal_case_count: 0.0894 + mean_snake_case_count: 0.1042 comment_structure: - mean_comment_line_ratio: -0.0598 + mean_comment_line_ratio: -0.0282 compression: - mean_raw_bytes: -0.1329 - mean_redundancy: -0.0509 - mean_unique_line_ratio: 0.1427 - mean_zlib_bytes: -0.0335 - mean_zlib_ratio: -0.1016 + mean_raw_bytes: -0.0626 + mean_redundancy: -0.0240 + mean_unique_line_ratio: 0.0672 + mean_zlib_bytes: -0.0158 + mean_zlib_ratio: -0.0478 entropy: - mean_char_max_entropy: 0.0179 - mean_char_normalized: -0.0146 - mean_token_normalized: -0.0144 - mean_total_tokens: 0.1238 - mean_vocab_size: 0.0228 + mean_char_max_entropy: 0.0084 + mean_char_normalized: -0.0068 + mean_token_normalized: -0.0068 + mean_total_tokens: 0.0583 + mean_vocab_size: 0.0107 function_metrics: - mean_avg_function_lines: -0.6838 - mean_avg_param_count: -0.1995 - mean_function_count: 0.7156 - mean_max_function_lines: 0.0491 + mean_avg_function_lines: -0.3218 + mean_avg_param_count: -0.0939 + mean_function_count: 0.3368 + mean_max_function_lines: 0.0231 halstead: - mean_N1_total_operators: 0.1087 - mean_N2_total_operands: 0.1710 - mean_difficulty: 0.1849 - mean_effort: 0.3281 - mean_estimated_bugs: 0.1413 - mean_length: 0.1329 - mean_n1_unique_operators: 0.0560 - mean_n2_unique_operands: 0.0401 - mean_time_to_implement_seconds: 0.3281 - mean_vocabulary: 0.0436 - mean_volume: 0.1414 + mean_N1_total_operators: 0.0512 + mean_N2_total_operands: 0.0805 + mean_difficulty: 0.0870 + mean_effort: 0.1544 + mean_estimated_bugs: 0.0665 + mean_length: 0.0626 + mean_n1_unique_operators: 0.0264 + mean_n2_unique_operands: 0.0189 + mean_time_to_implement_seconds: 0.1544 + mean_vocabulary: 0.0205 + mean_volume: 0.0665 heaps: - mean_beta: -0.1359 - mean_k: 0.4287 + mean_beta: -0.0639 + mean_k: 0.2017 identifier_length_variance: - mean_max: -0.7789 - mean_mean: -0.4671 - mean_std_dev: -0.9757 - mean_variance: -1.9585 + mean_max: -0.3666 + mean_mean: -0.2347 + mean_std_dev: -0.4600 + mean_variance: -0.9236 indentation: - mean_blank_line_ratio: -0.1265 - mean_max_depth: -0.2572 - mean_mean_depth: -0.2927 - mean_variance: -0.5976 + mean_blank_line_ratio: -0.0595 + mean_max_depth: -0.1211 + mean_mean_depth: -0.1378 + mean_variance: -0.2812 line_patterns: - mean_blank_line_ratio: -0.1265 - mean_string_literal_ratio: -0.7394 - mean_unique_line_ratio: 0.1718 + mean_blank_line_ratio: -0.0595 + mean_string_literal_ratio: -0.3480 + mean_unique_line_ratio: 0.0808 magic_number_density: - mean_density: 0.8439 - mean_magic_number_count: 0.9649 - mean_string_literal_ratio: -0.1285 + mean_density: 0.3971 + mean_magic_number_count: 0.4541 + mean_string_literal_ratio: -0.0605 near_duplicate_blocks_file: - mean_sub_block_count: 0.5838 + mean_block_count: 0.1874 + mean_near_dup_block_d0: 1.2114 + mean_near_dup_block_d3: 1.3353 + mean_near_dup_block_d4: 1.7204 + mean_near_dup_block_d5: 2.0000 + mean_near_dup_block_d6: 0.3458 + mean_near_dup_block_d7: -0.2294 + mean_near_dup_block_d8: 0.5102 + mean_sub_block_count: 0.2831 ngram: - mean_bigram_hapax_fraction: 0.0227 - mean_bigram_repeated_unique: 0.0307 - mean_bigram_total: 0.1240 - mean_bigram_unique: 0.0872 - mean_trigram_hapax_fraction: -0.0343 - mean_trigram_repeated_unique: 0.2280 - mean_trigram_repetition_rate: 0.1771 - mean_trigram_total: 0.1242 - mean_trigram_unique: 0.0527 + mean_bigram_hapax_fraction: 0.0107 + mean_bigram_repeated_unique: 0.0144 + mean_bigram_total: 0.0584 + mean_bigram_unique: 0.0410 + mean_trigram_hapax_fraction: -0.0161 + mean_trigram_repeated_unique: 0.1073 + mean_trigram_repetition_rate: 0.0834 + mean_trigram_total: 0.0585 + mean_trigram_unique: 0.0248 punctuation_density: - mean_arrow_density: -0.1900 - mean_bracket_nonalpha_suffix_count: -0.2572 - mean_colon_suffix_density: -1.0487 - mean_dot_count: -0.5320 - mean_exclamation_density: 1.5137 - mean_id_nonalpha_suffix_density: -0.0594 - mean_question_mark_density: 1.5137 + mean_arrow_density: -0.0894 + mean_bracket_nonalpha_suffix_count: -0.1211 + mean_colon_suffix_density: -0.4936 + mean_dot_count: -0.2504 + mean_exclamation_density: 0.7124 + mean_id_nonalpha_suffix_density: -0.0280 + mean_question_mark_density: 0.7124 readability: - mean_avg_line_length: -0.1965 - mean_avg_sub_words_per_id: -0.5151 - mean_avg_tokens_per_line: 0.0675 - mean_flesch_adapted: 0.8109 - mean_fog_adapted: -2.0000 - mean_total_lines: 0.0519 + mean_avg_line_length: -0.0925 + mean_avg_sub_words_per_id: -0.2424 + mean_avg_tokens_per_line: 0.0317 + mean_flesch_adapted: 0.3817 + mean_fog_adapted: -0.9412 + mean_total_lines: 0.0244 symbol_density: - mean_density: 0.1769 - mean_distinct_symbol_types: 0.1589 - mean_symbol_count: 0.0451 + mean_density: 0.0832 + mean_distinct_symbol_types: 0.0748 + mean_symbol_count: 0.0212 vocabulary: - mean_mattr: -0.2049 - mean_raw_ttr: -0.1504 - mean_total_identifiers: 0.1499 + mean_mattr: -0.0887 + mean_raw_ttr: -0.0633 + mean_total_identifiers: 0.0782 + mean_unique_identifiers: 0.0162 vowel_density: - mean_total_chars: -0.3180 + mean_total_chars: -0.1561 zipf: - mean_exponent: 0.0125 - mean_total_tokens: 0.1238 - mean_vocab_size: 0.0228 + mean_exponent: 0.0059 + mean_total_tokens: 0.0583 + mean_vocab_size: 0.0107 name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _fix_hint: "Remove type suffixes from names (userList → users, dataMap → data)" - _languages: [elixir, javascript, ruby] - _log_baseline: -26.2345 + _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 mean_branching_density: -0.4125 @@ -329,10 +328,10 @@ name_contains_type_suffix: mean_sample_size: -0.1936 casing_entropy: mean_camel_case_count: -1.4300 - mean_entropy: -0.3896 + mean_entropy: -0.3631 mean_other_count: -2.0000 mean_pascal_case_count: -0.1660 - mean_snake_case_count: 0.1802 + mean_snake_case_count: 0.1449 compression: mean_raw_bytes: -0.2768 mean_redundancy: -0.1061 @@ -362,9 +361,9 @@ name_contains_type_suffix: mean_k: 0.1792 identifier_length_variance: mean_max: -0.3735 - mean_mean: -0.4843 - mean_std_dev: -0.6059 - mean_variance: -1.2170 + mean_mean: -0.4788 + mean_std_dev: -0.5916 + mean_variance: -1.1882 line_patterns: mean_string_literal_ratio: 0.0109 ngram: @@ -392,12 +391,12 @@ name_contains_type_suffix: mean_density: 0.2546 mean_symbol_count: -0.0213 vocabulary: - mean_mattr: -0.2341 - mean_raw_ttr: -0.2567 - mean_total_identifiers: -0.0421 - mean_unique_identifiers: -0.2988 + mean_mattr: -0.2373 + mean_raw_ttr: -0.2260 + mean_total_identifiers: -0.0765 + mean_unique_identifiers: -0.3022 vowel_density: - mean_total_chars: -0.5247 + mean_total_chars: -0.5536 zipf: mean_exponent: 0.0794 mean_r_squared: 0.0102 @@ -406,96 +405,93 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _fix_hint: "Expand abbreviations to full words (usr → user, cnt → count, idx → index)" - _languages: [elixir, javascript, ruby] - _log_baseline: 23.1000 + _log_baseline: 10.7370 brevity: - mean_sample_size: -0.2397 + mean_sample_size: -0.1542 casing_entropy: - mean_camel_case_count: 0.4951 - mean_entropy: 0.1951 - mean_snake_case_count: -0.2645 + mean_camel_case_count: 0.3184 + mean_entropy: 0.2713 + mean_snake_case_count: -0.4803 compression: - mean_raw_bytes: 0.8246 - mean_redundancy: 0.3054 - mean_unique_line_ratio: 0.1893 - mean_zlib_bytes: 0.1088 - mean_zlib_ratio: 0.7116 + mean_raw_bytes: 0.5303 + mean_redundancy: 0.1964 + mean_unique_line_ratio: 0.1217 + mean_zlib_bytes: 0.0699 + mean_zlib_ratio: 0.4576 entropy: - mean_char_entropy: -0.0619 - mean_char_normalized: -0.0892 - mean_token_entropy: -0.0583 - mean_token_max_entropy: -0.0514 - mean_total_tokens: -0.1699 - mean_vocab_size: -0.2397 + mean_char_entropy: -0.0398 + mean_char_normalized: -0.0573 + mean_token_entropy: -0.0375 + mean_token_max_entropy: -0.0330 + mean_total_tokens: -0.1093 + mean_vocab_size: -0.1542 halstead: - mean_N1_total_operators: -0.1681 - mean_N2_total_operands: -0.1679 - mean_difficulty: 0.3150 - mean_effort: 0.0480 - mean_estimated_bugs: -0.2403 - mean_length: -0.1680 - mean_n2_unique_operands: -0.4607 - mean_time_to_implement_seconds: 0.0480 - mean_vocabulary: -0.3410 - mean_volume: -0.2405 + mean_N1_total_operators: -0.1081 + mean_N2_total_operands: -0.1080 + mean_difficulty: 0.2026 + mean_effort: 0.0309 + mean_estimated_bugs: -0.1545 + mean_length: -0.1081 + mean_n2_unique_operands: -0.2963 + mean_time_to_implement_seconds: 0.0309 + mean_vocabulary: -0.2193 + mean_volume: -0.1547 heaps: - mean_beta: -0.1643 - mean_k: 0.3581 - mean_r_squared: -0.0412 + mean_beta: -0.1056 + mean_k: 0.2303 + mean_r_squared: -0.0265 identifier_length_variance: - mean_max: 2.0000 - mean_mean: 1.8274 - mean_std_dev: 0.3959 - mean_variance: 0.7880 + mean_max: 1.2862 + mean_mean: 1.3727 + mean_variance: 0.0294 line_patterns: - mean_string_literal_ratio: -0.1475 - mean_unique_line_ratio: 0.1981 + mean_string_literal_ratio: -0.0949 + mean_unique_line_ratio: 0.1274 magic_number_density: - mean_density: 0.5685 - mean_string_literal_ratio: -0.3380 + mean_density: 0.3656 + mean_string_literal_ratio: -0.2174 + near_duplicate_blocks_file: + mean_near_dup_block_d8: -2.0000 ngram: - mean_bigram_repeated_unique: -0.0944 - mean_bigram_total: -0.1701 - mean_bigram_unique: -0.0528 - mean_trigram_hapax_fraction: 0.1423 - mean_trigram_repeated_unique: -0.3139 - mean_trigram_repetition_rate: -0.1310 - mean_trigram_total: -0.1703 - mean_trigram_unique: -0.0522 + mean_bigram_repeated_unique: -0.0607 + mean_bigram_total: -0.1094 + mean_bigram_unique: -0.0339 + mean_trigram_hapax_fraction: 0.0915 + mean_trigram_repeated_unique: -0.2019 + mean_trigram_repetition_rate: -0.0842 + mean_trigram_total: -0.1095 + mean_trigram_unique: -0.0336 punctuation_density: - mean_arrow_density: 0.3569 - mean_bracket_nonalpha_prefix_count: -0.4746 - mean_bracket_nonalpha_suffix_count: -0.1763 - mean_colon_suffix_density: -0.6554 - mean_dot_count: -0.1834 - mean_exclamation_density: -0.7311 - mean_id_nonalpha_suffix_density: -0.0637 - mean_question_mark_density: -0.9035 + mean_arrow_density: 0.2295 + mean_bracket_nonalpha_prefix_count: -0.3052 + mean_bracket_nonalpha_suffix_count: -0.1134 + mean_colon_suffix_density: -0.4215 + mean_dot_count: -0.1179 + mean_exclamation_density: -0.4702 + mean_id_nonalpha_suffix_density: -0.0410 + mean_question_mark_density: -0.5810 readability: - mean_avg_line_length: 0.8581 - mean_avg_tokens_per_line: -0.1700 - mean_fog_adapted: -0.1492 + mean_avg_line_length: 0.5519 + mean_avg_tokens_per_line: -0.1093 + mean_fog_adapted: -0.0959 symbol_density: - mean_density: -1.0085 - mean_symbol_count: -0.1894 + mean_density: -0.6485 + mean_symbol_count: -0.1218 vocabulary: - mean_mattr: -0.5807 - mean_raw_ttr: -0.1377 - mean_total_identifiers: -0.1967 - mean_unique_identifiers: -0.3487 + mean_mattr: -0.1900 + mean_raw_ttr: 0.1813 + mean_total_identifiers: -0.3611 + mean_unique_identifiers: -0.2161 vowel_density: - mean_total_chars: 1.6488 + mean_total_chars: 1.0156 zipf: - mean_exponent: 0.0937 - mean_total_tokens: -0.1699 - mean_vocab_size: -0.2397 + mean_exponent: 0.0603 + mean_total_tokens: -0.1093 + mean_vocab_size: -0.1542 name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _fix_hint: "Replace generic names (data, result, tmp, info) with domain-specific names" - _languages: [elixir, javascript, ruby] - _log_baseline: 37.2228 + _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 mean_branching_density: 0.3889 @@ -505,8 +501,8 @@ name_is_generic: mean_sample_size: 0.2053 casing_entropy: mean_camel_case_count: 2.0000 - mean_entropy: 0.3865 - mean_snake_case_count: 0.0835 + mean_entropy: 0.3582 + mean_snake_case_count: 0.0915 compression: mean_raw_bytes: 0.3477 mean_redundancy: 0.0524 @@ -544,9 +540,9 @@ name_is_generic: mean_r_squared: 0.0145 identifier_length_variance: mean_max: 0.4477 - mean_mean: 0.5561 - mean_std_dev: 0.6865 - mean_variance: 1.3793 + mean_mean: 0.5582 + mean_std_dev: 0.6755 + mean_variance: 1.3586 indentation: mean_blank_line_ratio: 0.0556 mean_max_depth: -0.1451 @@ -561,7 +557,12 @@ name_is_generic: mean_density: -0.0624 mean_string_literal_ratio: -0.1451 near_duplicate_blocks_file: - mean_sub_block_count: 0.2270 + mean_block_count: 0.1243 + mean_near_dup_block_d0: 0.9543 + mean_near_dup_block_d6: -0.3521 + mean_near_dup_block_d7: 0.6021 + mean_near_dup_block_d8: 0.0644 + mean_sub_block_count: 0.1831 ngram: mean_bigram_hapax_fraction: 0.1528 mean_bigram_repeated_unique: -0.1344 @@ -592,12 +593,12 @@ name_is_generic: mean_distinct_symbol_types: 0.0252 mean_symbol_count: 0.0858 vocabulary: - mean_mattr: 0.1805 - mean_raw_ttr: 0.1632 - mean_total_identifiers: 0.2060 - mean_unique_identifiers: 0.3675 + mean_mattr: 0.1932 + mean_raw_ttr: 0.1681 + mean_total_identifiers: 0.2205 + mean_unique_identifiers: 0.3862 vowel_density: - mean_total_chars: 0.7602 + mean_total_chars: 0.7766 zipf: mean_exponent: -0.0977 mean_r_squared: 0.0316 @@ -606,16 +607,14 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _fix_hint: "Replace number-like names (x1, y2) with descriptive names indicating purpose" - _languages: [elixir, javascript, ruby] - _log_baseline: 1.7668 + _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 casing_entropy: mean_camel_case_count: 0.6902 - mean_entropy: -0.4709 + mean_entropy: -0.4687 mean_other_count: -2.0000 - mean_snake_case_count: 0.1755 + mean_snake_case_count: 0.1969 compression: mean_raw_bytes: 0.1098 mean_redundancy: 0.0379 @@ -646,9 +645,9 @@ name_is_number_like: mean_r_squared: 0.0046 identifier_length_variance: mean_max: 0.0623 - mean_mean: 0.2201 - mean_std_dev: 0.2399 - mean_variance: 0.4803 + mean_mean: 0.2335 + mean_std_dev: 0.2269 + mean_variance: 0.4543 line_patterns: mean_string_literal_ratio: 0.0201 ngram: @@ -674,12 +673,11 @@ name_is_number_like: mean_distinct_symbol_types: -0.0272 mean_symbol_count: -0.0042 vocabulary: - mean_mattr: -0.0101 - mean_raw_ttr: -0.0152 - mean_total_identifiers: -0.0159 - mean_unique_identifiers: -0.0307 + mean_mattr: -0.0033 + mean_total_identifiers: -0.0235 + mean_unique_identifiers: -0.0258 vowel_density: - mean_total_chars: 0.2031 + mean_total_chars: 0.2085 zipf: mean_exponent: 0.0060 mean_total_tokens: -0.0075 @@ -687,9 +685,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _fix_hint: "Replace single-letter names with descriptive names (n → count, s → status)" - _languages: [elixir, javascript, ruby] - _log_baseline: 25.4977 + _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 mean_non_blank_count: 0.0439 @@ -697,8 +693,8 @@ name_is_single_letter: mean_sample_size: 0.2432 casing_entropy: mean_camel_case_count: 2.0000 - mean_entropy: 0.4023 - mean_snake_case_count: -0.1981 + mean_entropy: 0.3295 + mean_snake_case_count: -0.1106 comment_structure: mean_comment_line_ratio: -0.0675 compression: @@ -735,9 +731,8 @@ name_is_single_letter: mean_k: -0.4973 mean_r_squared: 0.0625 identifier_length_variance: - mean_mean: 1.3809 - mean_std_dev: 0.0433 - mean_variance: 0.0855 + mean_mean: 1.3002 + mean_variance: 0.0148 indentation: mean_blank_line_ratio: -0.0414 mean_mean_depth: 0.0281 @@ -773,12 +768,12 @@ name_is_single_letter: mean_density: -0.6602 mean_symbol_count: -0.0262 vocabulary: - mean_mattr: 0.5595 - mean_raw_ttr: 0.5013 - mean_total_identifiers: -0.0367 - mean_unique_identifiers: 0.4625 + mean_mattr: 0.5024 + mean_raw_ttr: 0.4133 + mean_total_identifiers: 0.0496 + mean_unique_identifiers: 0.4588 vowel_density: - mean_total_chars: 1.3325 + mean_total_chars: 1.3310 zipf: mean_exponent: -0.2161 mean_r_squared: 0.1041 @@ -787,9 +782,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _fix_hint: "Shorten overly long names — aim for 2–3 descriptive words, drop redundant context" - _languages: [elixir, javascript, ruby] - _log_baseline: -7.5164 + _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 mean_branching_density: 0.0916 @@ -799,10 +792,10 @@ name_is_too_long: mean_sample_size: -0.0167 casing_entropy: mean_camel_case_count: -0.1082 - mean_entropy: 0.0111 + mean_entropy: 0.0194 mean_other_count: 0.0922 mean_pascal_case_count: 0.0340 - mean_snake_case_count: 0.1201 + mean_snake_case_count: 0.1095 comment_structure: mean_comment_line_ratio: 0.1321 compression: @@ -840,9 +833,9 @@ name_is_too_long: mean_r_squared: -0.0095 identifier_length_variance: mean_max: -0.4664 - mean_mean: -0.4075 - mean_std_dev: -0.5860 - mean_variance: -1.1740 + mean_mean: -0.4056 + mean_std_dev: -0.5951 + mean_variance: -1.1923 indentation: mean_blank_line_ratio: 0.0206 mean_max_depth: -0.2280 @@ -857,7 +850,13 @@ name_is_too_long: mean_density: -0.0708 mean_string_literal_ratio: -0.1025 near_duplicate_blocks_file: - mean_sub_block_count: 0.1015 + mean_block_count: 0.0593 + mean_near_dup_block_d0: 0.3891 + mean_near_dup_block_d4: 0.6367 + mean_near_dup_block_d6: 0.5046 + mean_near_dup_block_d7: -0.5046 + mean_near_dup_block_d8: 0.9550 + mean_sub_block_count: 0.1005 ngram: mean_bigram_hapax_fraction: -0.0613 mean_bigram_repeated_unique: 0.1532 @@ -890,12 +889,12 @@ name_is_too_long: mean_distinct_symbol_types: 0.0446 mean_symbol_count: 0.0831 vocabulary: - mean_mattr: -0.0845 - mean_raw_ttr: -0.1058 - mean_total_identifiers: 0.0719 - mean_unique_identifiers: -0.0383 + mean_mattr: -0.0939 + mean_raw_ttr: -0.1041 + mean_total_identifiers: 0.0610 + mean_unique_identifiers: -0.0445 vowel_density: - mean_total_chars: -0.3320 + mean_total_chars: -0.3409 zipf: mean_exponent: 0.0555 mean_total_tokens: 0.0756 @@ -903,9 +902,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _fix_hint: "Use at least 3 characters — replace very short names with full words" - _languages: [elixir, javascript, ruby] - _log_baseline: -0.4484 + _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 mean_branching_density: -0.2381 @@ -913,9 +910,9 @@ name_is_too_short: mean_sample_size: -0.1256 casing_entropy: mean_camel_case_count: -0.0450 - mean_entropy: -0.4953 + mean_entropy: -0.4018 mean_other_count: -2.0000 - mean_snake_case_count: 0.0286 + mean_snake_case_count: -0.1480 comment_structure: mean_comment_line_ratio: -0.0092 compression: @@ -948,13 +945,15 @@ name_is_too_short: mean_k: -0.1166 mean_r_squared: 0.0306 identifier_length_variance: - mean_mean: 0.5797 - mean_std_dev: -0.1344 - mean_variance: -0.2709 + mean_mean: 0.6923 + mean_std_dev: -0.2499 + mean_variance: -0.5009 indentation: mean_variance: 0.0168 line_patterns: mean_string_literal_ratio: 0.0229 + near_duplicate_blocks_file: + mean_near_dup_block_d6: -1.2621 ngram: mean_bigram_hapax_fraction: 0.0137 mean_bigram_repeated_unique: -0.0944 @@ -977,12 +976,12 @@ name_is_too_short: mean_distinct_symbol_types: 0.0241 mean_symbol_count: -0.0137 vocabulary: - mean_mattr: -0.2548 - mean_raw_ttr: -0.1285 - mean_total_identifiers: -0.0370 - mean_unique_identifiers: -0.1640 + mean_mattr: -0.2556 + mean_raw_ttr: -0.1188 + mean_total_identifiers: -0.1971 + mean_unique_identifiers: -0.3128 vowel_density: - mean_total_chars: 0.5403 + mean_total_chars: 0.4916 zipf: mean_exponent: 0.0521 mean_total_tokens: -0.0256 @@ -990,85 +989,83 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _fix_hint: "Rename negated booleans positively (is_not_active → is_inactive, not_found → missing)" - _languages: [elixir, javascript, ruby] - _log_baseline: -15.6244 + _log_baseline: -4.4565 brevity: - mean_sample_size: -0.3765 + mean_sample_size: -0.0998 casing_entropy: - mean_camel_case_count: -0.4213 + mean_camel_case_count: -0.1117 compression: - mean_raw_bytes: -0.1561 - mean_zlib_bytes: -0.2426 - mean_zlib_ratio: 0.0872 + mean_raw_bytes: -0.0414 + mean_zlib_bytes: -0.0643 + mean_zlib_ratio: 0.0231 entropy: - mean_token_max_entropy: -0.0741 - mean_vocab_size: -0.3765 + mean_token_max_entropy: -0.0196 + mean_vocab_size: -0.0998 halstead: - mean_difficulty: 0.3608 - mean_effort: 0.2913 - mean_estimated_bugs: -0.0613 - mean_n2_unique_operands: -0.4082 - mean_time_to_implement_seconds: 0.2913 - mean_vocabulary: -0.3088 - mean_volume: -0.0612 + mean_difficulty: 0.0956 + mean_effort: 0.0772 + mean_estimated_bugs: -0.0162 + mean_n2_unique_operands: -0.1082 + mean_time_to_implement_seconds: 0.0772 + mean_vocabulary: -0.0818 + mean_volume: -0.0162 heaps: - mean_beta: 0.1348 - mean_k: -0.7753 + mean_beta: 0.0357 + mean_k: -0.2055 identifier_length_variance: - mean_max: -0.1714 - mean_mean: -0.4077 - mean_std_dev: -0.9919 - mean_variance: -2.0000 + mean_max: -0.0454 + mean_mean: -0.1116 + mean_std_dev: -0.2685 + mean_variance: -0.5427 line_patterns: - mean_string_literal_ratio: 0.1212 + mean_string_literal_ratio: 0.0321 magic_number_density: - mean_string_literal_ratio: 0.2446 + mean_string_literal_ratio: 0.0648 near_duplicate_blocks_file: - mean_sub_block_count: -0.2060 + mean_near_dup_block_d4: -2.0000 + mean_near_dup_block_d5: 2.0000 + mean_sub_block_count: -0.0546 ngram: - mean_bigram_hapax_fraction: -0.1145 - mean_bigram_repeated_unique: 0.1809 - mean_bigram_repetition_rate: 0.1382 - mean_bigram_unique: -0.1039 - mean_trigram_hapax_fraction: -0.0761 - mean_trigram_repeated_unique: 0.1656 - mean_trigram_repetition_rate: 0.2761 - mean_trigram_unique: -0.1381 + mean_bigram_hapax_fraction: -0.0303 + mean_bigram_repeated_unique: 0.0479 + mean_bigram_repetition_rate: 0.0366 + mean_bigram_unique: -0.0275 + mean_trigram_hapax_fraction: -0.0202 + mean_trigram_repeated_unique: 0.0439 + mean_trigram_repetition_rate: 0.0732 + mean_trigram_unique: -0.0366 punctuation_density: - mean_bracket_nonalpha_prefix_count: -0.9075 - mean_bracket_nonalpha_suffix_count: -0.1522 - mean_colon_suffix_density: -0.4874 - mean_exclamation_density: -1.5354 - mean_id_nonalpha_suffix_density: -0.1055 + mean_bracket_nonalpha_prefix_count: -0.2405 + mean_bracket_nonalpha_suffix_count: -0.0403 + mean_colon_suffix_density: -0.1292 + mean_exclamation_density: -0.4070 + mean_id_nonalpha_suffix_density: -0.0280 readability: - mean_avg_line_length: -0.1581 - mean_avg_sub_words_per_id: -0.4843 - mean_flesch_adapted: 0.6639 - mean_fog_adapted: -0.4551 + mean_avg_line_length: -0.0419 + mean_avg_sub_words_per_id: -0.1284 + mean_flesch_adapted: 0.1760 + mean_fog_adapted: -0.1206 symbol_density: - mean_density: 0.1413 + mean_density: 0.0375 vocabulary: - mean_mattr: -0.0722 - mean_raw_ttr: -0.4365 - mean_unique_identifiers: -0.4305 + mean_mattr: -0.0261 + mean_raw_ttr: -0.1352 + mean_unique_identifiers: -0.1462 vowel_density: - mean_total_chars: -0.4070 + mean_total_chars: -0.1238 zipf: - mean_exponent: 0.0569 - mean_vocab_size: -0.3765 + mean_exponent: 0.0151 + mean_vocab_size: -0.0998 no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _fix_hint: "Remove type prefixes from names (strName → name, intCount → count)" - _languages: [elixir, javascript, ruby] - _log_baseline: -15.0940 + _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 casing_entropy: mean_camel_case_count: -2.0000 - mean_entropy: -0.1319 - mean_snake_case_count: 0.3573 + mean_entropy: 0.0152 + mean_snake_case_count: 0.2611 compression: mean_raw_bytes: -0.1982 mean_redundancy: -0.0699 @@ -1097,11 +1094,14 @@ no_hungarian_notation: mean_k: 0.0692 identifier_length_variance: mean_max: -0.1067 - mean_mean: -0.3583 - mean_std_dev: -0.0131 - mean_variance: -0.0329 + mean_mean: -0.3201 + mean_std_dev: 0.0630 + mean_variance: 0.1176 magic_number_density: mean_density: 0.0312 + near_duplicate_blocks_file: + mean_near_dup_block_d4: -1.9732 + mean_near_dup_block_d7: -0.5166 ngram: mean_bigram_hapax_fraction: -0.0158 mean_bigram_repeated_unique: 0.0179 @@ -1122,20 +1122,19 @@ no_hungarian_notation: mean_density: 0.1919 mean_distinct_symbol_types: 0.0849 vocabulary: - mean_mattr: -0.1437 - mean_raw_ttr: -0.1556 - mean_unique_identifiers: -0.1488 + mean_mattr: -0.1598 + mean_raw_ttr: -0.1444 + mean_total_identifiers: -0.1028 + mean_unique_identifiers: -0.2466 vowel_density: - mean_total_chars: -0.3491 + mean_total_chars: -0.4192 zipf: mean_exponent: 0.0655 mean_vocab_size: -0.0814 screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _fix_hint: "Use SCREAMING_SNAKE_CASE for module-level constants (@MAX_SIZE not @max_size)" - _languages: [elixir, javascript, ruby] - _log_baseline: -6.3000 + _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 mean_non_blank_count: -0.0180 @@ -1143,9 +1142,9 @@ screaming_snake_for_constants: mean_sample_size: -0.0136 casing_entropy: mean_camel_case_count: 0.0302 - mean_entropy: 0.0224 - mean_macro_case_count: 1.9927 - mean_pascal_case_count: -0.1675 + mean_entropy: 0.0261 + mean_macro_case_count: 1.9913 + mean_pascal_case_count: -0.1674 mean_screaming_snake_density: 2.0000 comment_structure: mean_comment_line_ratio: 0.0267 @@ -1156,9 +1155,9 @@ screaming_snake_for_constants: mean_zlib_bytes: 0.0081 mean_zlib_ratio: -0.0168 entropy: - mean_char_entropy: 0.0306 + mean_char_entropy: 0.0305 mean_char_max_entropy: 0.0092 - mean_char_normalized: 0.0214 + mean_char_normalized: 0.0213 mean_total_tokens: -0.0066 mean_vocab_size: -0.0136 halstead: @@ -1173,9 +1172,9 @@ screaming_snake_for_constants: mean_vocabulary: -0.0115 mean_volume: -0.0088 identifier_length_variance: - mean_mean: 0.0041 - mean_std_dev: 0.0225 - mean_variance: 0.0446 + mean_mean: 0.0048 + mean_std_dev: 0.0244 + mean_variance: 0.0482 indentation: mean_blank_line_ratio: 0.0078 mean_mean_depth: 0.0069 @@ -1194,7 +1193,7 @@ screaming_snake_for_constants: mean_bigram_unique: -0.0131 mean_trigram_hapax_fraction: -0.0045 mean_trigram_repetition_rate: 0.0072 - mean_trigram_total: -0.0067 + mean_trigram_total: -0.0066 mean_trigram_unique: -0.0101 punctuation_density: mean_arrow_density: 0.0071 @@ -1203,19 +1202,19 @@ screaming_snake_for_constants: readability: mean_avg_line_length: 0.0087 mean_avg_sub_words_per_id: -0.0090 - mean_avg_tokens_per_line: 0.0103 + mean_avg_tokens_per_line: 0.0102 mean_flesch_adapted: 0.0095 - mean_fog_adapted: -0.0083 + mean_fog_adapted: -0.0082 mean_total_lines: -0.0182 symbol_density: mean_symbol_count: -0.0036 vocabulary: - mean_mattr: -0.0045 - mean_raw_ttr: -0.0077 - mean_total_identifiers: -0.0145 - mean_unique_identifiers: -0.0226 + mean_mattr: -0.0037 + mean_raw_ttr: -0.0055 + mean_total_identifiers: -0.0157 + mean_unique_identifiers: -0.0212 vowel_density: - mean_total_chars: -0.0105 + mean_total_chars: -0.0111 zipf: mean_exponent: 0.0038 mean_total_tokens: -0.0066 From f6d1b97031a0ff67b28d042fee3c91f1519e314d Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 22:34:44 +0100 Subject: [PATCH 27/71] docs(spec): health report block impact section, PR delta, and compare consolidation --- ...26-03-20-health-report-blocks-and-delta.md | 272 ++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md diff --git a/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md b/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md new file mode 100644 index 0000000..d525260 --- /dev/null +++ b/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md @@ -0,0 +1,272 @@ +# Health Report: Block Impact Section, PR Delta, and Compare Consolidation + +**Date:** 2026-03-20 +**Status:** Approved for implementation + +--- + +## Goal + +Unify the health-report and compare commands into a single PR-aware report that: + +1. Shows impactful code blocks per changed file with severity and fix hints +2. Shows a before/after metric delta with bar graphs when a base ref is provided +3. Shows a PR impact summary at the top of the report +4. Removes file-level worst_offenders (replaced by block-level view) +5. Deletes the compare command entirely + +--- + +## User Stories Addressed + +| User | Need | How addressed | +|------|------|---------------| +| PR author | Find antipatterns by file and line | Block section: file-grouped, line-precise, behavior + fix hint | +| Reviewer | Estimate merge risk quickly | PR impact summary: score drift, blocks flagged, files changed | +| Reviewer | Spot quality regressions | Block severity label + delta bar graphs | +| New repo user | Assess overall code quality | Overall grade + category breakdown (unchanged) | + +--- + +## Architecture + +### What Changes + +| Component | Change | +|-----------|--------| +| `CLI.HealthReport` | Add `--base-ref`, `--head-ref` flags; wire git diff and dual analysis | +| `HealthReport.generate/2` | Accept `changed_files` + `base_results` opts; add `top_blocks` and `codebase_delta` keys; drop `worst_offenders` | +| `HealthReport.Delta` | New module — wraps delta computation (logic ported from `Comparator`) | +| `HealthReport.Formatter` | Remove worst_offenders rendering; add PR summary, delta bar graphs, block section | +| `CLI.Compare` | **Deleted** | +| `Comparator` | **Deleted** (logic moved to `HealthReport.Delta`) | +| Compare-specific `Formatter` | **Deleted** | +| `Summarizer` | **Deleted** if compare-only (verify at implementation time) | + +### What Stays the Same + +- Overall score, grade, categories (threshold + cosine) +- `top_issues` (SampleRunner diagnose_aggregate) +- Metadata section +- All analysis options (workers, cache, timeout, NCD flags) +- Backward compatibility: running without `--base-ref` produces the existing report minus worst_offenders + +--- + +## Data Flow + +### With `--base-ref` + +``` +CLI.HealthReport + ├── Git.collect_files_at_ref(path, base_ref) → base_files_map + ├── Git.collect_files_at_ref(path, head_ref) → head_files_map (head_ref defaults to HEAD) + ├── Git.changed_files(path, base_ref, head_ref) → [%ChangedFile{path, status}] + ├── Analyzer.analyze_codebase(head_files_map) → head_results + ├── BlockImpactAnalyzer.analyze(head_results, head_files_map) → head_results_with_nodes + ├── Analyzer.analyze_codebase(base_files_map) → base_results + └── HealthReport.generate(head_results_with_nodes, + base_results: base_results, + changed_files: [path, ...]) +``` + +### Without `--base-ref` + +``` +CLI.HealthReport + ├── Analyzer.analyze_codebase(files_map) → results + ├── BlockImpactAnalyzer.analyze(results, files_map) → results_with_nodes + └── HealthReport.generate(results_with_nodes) + (no delta, blocks shown for all files with significant impact) +``` + +--- + +## CLI Options + +Added to `codeqa health-report `: + +| Option | Default | Description | +|--------|---------|-------------| +| `--base-ref REF` | (none) | Base git ref to compare from. Enables delta and PR scoping. | +| `--head-ref REF` | `HEAD` | Head git ref for comparison | + +Removed: `--changes-only` (never used; always analyzes all files). + +--- + +## `HealthReport.generate/2` Output Shape + +```elixir +%{ + metadata: %{path, timestamp, total_files}, + pr_summary: %{ # nil when no base_results + base_score: integer(), + head_score: integer(), + score_delta: integer(), # head - base + base_grade: String.t(), + head_grade: String.t(), + blocks_flagged: integer(), # total blocks above threshold + files_changed: integer(), + files_added: integer(), + files_modified: integer() + }, + overall_score: integer(), + overall_grade: String.t(), + codebase_delta: map() | nil, # nil when no base_results + categories: [category_map], # worst_offenders removed from each + top_issues: [behavior_map], + top_blocks: [file_block_group] # new +} +``` + +### `top_blocks` Shape + +```elixir +[ + %{ + path: String.t(), + status: "added" | "modified" | nil, # nil when no base_results + blocks: [ + %{ + start_line: integer(), + end_line: integer(), # derived from node + type: String.t(), # "code" | "doc" | "typespec" + token_count: integer(), + potentials: [ + %{ + category: String.t(), + behavior: String.t(), + cosine_delta: float(), + severity: :critical | :high | :medium, + fix_hint: String.t() # from behavior definition + } + ] + } + ] + } +] +``` + +### Severity Computation + +For each `{behavior, cosine_delta}` on a block: + +``` +codebase_cosine = current codebase cosine score for that behavior +gap = 1.0 - codebase_cosine # how far below ideal the codebase already is +severity_ratio = cosine_delta / gap # what fraction of existing gap this block causes + +:critical when severity_ratio > 0.50 +:high when severity_ratio > 0.25 +:medium when severity_ratio > 0.10 +(filtered) when severity_ratio <= 0.10 (below significance, not shown) +``` + +`gap` is floored at `0.01` to avoid division by zero when the codebase already scores perfectly on a behavior. + +### Block Filtering + +A block appears in `top_blocks` when: +- `token_count >= 10` +- At least one potential has `severity != filtered` (i.e. `severity_ratio > 0.10`) +- File is in `changed_files` (when `--base-ref` given) or any file (when not) + +Blocks within a file are ordered by their highest `cosine_delta` descending. + +--- + +## `HealthReport.Delta` Module + +New module wrapping delta computation, ported from `Comparator`: + +```elixir +@spec compute(base_results :: map(), head_results :: map()) :: map() +def compute(base_results, head_results) +``` + +Returns per-metric aggregate delta (head minus base), same shape as `Comparator.compare_results/3` currently produces for the `"codebase"` key. File-level deltas are not included (that was compare-only, now removed). + +--- + +## Fix Hints per Behavior + +Fix hints are sourced from the combined_metrics YAML behavior definitions. The `SampleRunner` / `CombinedMetrics` modules are responsible for resolving a `{category, behavior}` key to its fix_hint string. If no fix_hint is defined for a behavior, the field is omitted from the potential map. + +Implementation note: verify at implementation time whether fix_hint is already present in the YAML behavior definitions or needs to be added. + +--- + +## Formatter Changes + +### Removed + +- Worst offenders tables in all category sections (both threshold and cosine) +- All compare-command formatting code + +### Added + +**1. PR Impact Summary** (top of report, only when `pr_summary` present) + +``` +Score: B+ → C | Δ −8 pts | 6 blocks flagged across 3 files | 4 modified, 1 added +``` + +**2. Delta Bar Graphs** (after PR summary, before categories; only when `codebase_delta` present) + +Bar graphs per major category (complexity, readability, duplication, structure) showing base vs head values. Ported from compare's GitHub formatter. Plain formatter uses ASCII, GitHub formatter uses mermaid. + +**3. Block Section** (after top_issues) + +``` +## Blocks (6 flagged across 3 files) + +### path/to/file.ex [modified] + +**lines 42–67** · function · 84 tokens + 🔴 CRITICAL function_design / cyclomatic_complexity_under_10 (Δ 0.41) + → Break this function into smaller single-responsibility functions. + 🟠 HIGH structure / deep_nesting (Δ 0.18) + → Flatten nested conditionals using early returns or pattern matching. + +**lines 120–134** · code · 31 tokens + 🟡 MEDIUM naming / identifier_length (Δ 0.12) + → Use descriptive names that convey intent without abbreviation. +``` + +Severity icons: 🔴 CRITICAL, 🟠 HIGH, 🟡 MEDIUM. +GitHub formatter wraps each file in a `
    ` block. + +--- + +## Deletions + +The following files are deleted as part of this work: + +- `lib/codeqa/cli/compare.ex` +- `lib/codeqa/comparator.ex` +- `lib/codeqa/formatter.ex` (compare formatter — verify no health-report usage first) +- `lib/codeqa/summarizer.ex` (verify compare-only before deleting) +- All compare-related tests + +--- + +## Testing + +- Unit tests for `HealthReport.Delta.compute/2` +- Unit tests for severity computation (all three thresholds + filter boundary) +- Unit tests for `top_blocks` assembly (filtering, ordering, fix_hint inclusion) +- Unit tests for PR summary computation +- Integration test: `HealthReport.generate/2` with and without `base_results` +- Formatter tests: block section renders correctly for plain and github formats +- CLI test: `--base-ref` wires through to git calls and generate correctly +- Deletion verification: no references to deleted modules remain + +--- + +## Out of Scope + +- Per-block raw metric values (blocks carry cosine_delta only, not raw metrics) +- File-level delta details (compare's per-file before/after table is dropped) +- Near-duplicate block pairs in the block section (they exist as metrics but are not surfaced here) +- Relative severity across blocks (no "this block is Nx worse than average block") From 3af09e94ae18f09ae6c02e3e55af513f6b03d93b Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Fri, 20 Mar 2026 22:40:37 +0100 Subject: [PATCH 28/71] =?UTF-8?q?docs(spec):=20address=20review=20findings?= =?UTF-8?q?=20=E2=80=94=20fix=20changed=5Ffiles=20interface,=20end=5Fline,?= =?UTF-8?q?=20codebase=5Fcosine=20source,=20fix=20hints,=20severity=20cave?= =?UTF-8?q?ats?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...26-03-20-health-report-blocks-and-delta.md | 124 ++++++++++++------ 1 file changed, 82 insertions(+), 42 deletions(-) diff --git a/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md b/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md index d525260..75f2013 100644 --- a/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md +++ b/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md @@ -38,10 +38,11 @@ Unify the health-report and compare commands into a single PR-aware report that: | `HealthReport.generate/2` | Accept `changed_files` + `base_results` opts; add `top_blocks` and `codebase_delta` keys; drop `worst_offenders` | | `HealthReport.Delta` | New module — wraps delta computation (logic ported from `Comparator`) | | `HealthReport.Formatter` | Remove worst_offenders rendering; add PR summary, delta bar graphs, block section | +| `BlockImpactAnalyzer` | Add `end_line` to serialized node output | | `CLI.Compare` | **Deleted** | | `Comparator` | **Deleted** (logic moved to `HealthReport.Delta`) | -| Compare-specific `Formatter` | **Deleted** | -| `Summarizer` | **Deleted** if compare-only (verify at implementation time) | +| `lib/codeqa/formatter.ex` | **Deleted** (compare-only formatter — confirmed no health-report usage) | +| `Summarizer` | **Deleted** (confirmed compare-only) | ### What Stays the Same @@ -67,7 +68,7 @@ CLI.HealthReport ├── Analyzer.analyze_codebase(base_files_map) → base_results └── HealthReport.generate(head_results_with_nodes, base_results: base_results, - changed_files: [path, ...]) + changed_files: [%ChangedFile{path, status}]) # full structs, not just paths ``` ### Without `--base-ref` @@ -77,7 +78,7 @@ CLI.HealthReport ├── Analyzer.analyze_codebase(files_map) → results ├── BlockImpactAnalyzer.analyze(results, files_map) → results_with_nodes └── HealthReport.generate(results_with_nodes) - (no delta, blocks shown for all files with significant impact) + (no delta, blocks shown for all files with significant impact, status: nil) ``` --- @@ -106,7 +107,7 @@ Removed: `--changes-only` (never used; always analyzes all files). score_delta: integer(), # head - base base_grade: String.t(), head_grade: String.t(), - blocks_flagged: integer(), # total blocks above threshold + blocks_flagged: integer(), # derived: Enum.sum(Enum.map(top_blocks, &length(&1.blocks))) files_changed: integer(), files_added: integer(), files_modified: integer() @@ -114,24 +115,30 @@ Removed: `--changes-only` (never used; always analyzes all files). overall_score: integer(), overall_grade: String.t(), codebase_delta: map() | nil, # nil when no base_results - categories: [category_map], # worst_offenders removed from each + categories: [category_map], # worst_offenders removed from each top_issues: [behavior_map], - top_blocks: [file_block_group] # new + top_blocks: [file_block_group] # new } ``` +### `pr_summary` Computation Notes + +- `base_score` / `base_grade`: requires running the full grading pipeline on `base_results` (same `Grader.grade_aggregate` + `Grader.overall_score` calls as for head). This is a second pass over base data — not a shortcut. +- `blocks_flagged`: computed after `top_blocks` is assembled (sum of all blocks across all file groups). +- `files_added` / `files_modified`: counted from `changed_files` structs (`:status` field). + ### `top_blocks` Shape ```elixir [ %{ path: String.t(), - status: "added" | "modified" | nil, # nil when no base_results + status: "added" | "modified" | nil, # nil when no base_results (no --base-ref) blocks: [ %{ start_line: integer(), - end_line: integer(), # derived from node - type: String.t(), # "code" | "doc" | "typespec" + end_line: integer(), + type: String.t(), # "code" | "doc" | "typespec" token_count: integer(), potentials: [ %{ @@ -139,7 +146,7 @@ Removed: `--changes-only` (never used; always analyzes all files). behavior: String.t(), cosine_delta: float(), severity: :critical | :high | :medium, - fix_hint: String.t() # from behavior definition + fix_hint: String.t() | nil # nil if not defined for that behavior } ] } @@ -150,50 +157,66 @@ Removed: `--changes-only` (never used; always analyzes all files). ### Severity Computation +Severity is computed during `top_blocks` assembly in `HealthReport.generate/2`, not in `BlockImpactAnalyzer`. The baseline codebase cosine scores are already available via `SampleRunner.diagnose_aggregate(baseline_codebase_agg, top: 99_999, languages: project_langs)` — the same call already made for `top_issues`. Pass these as a lookup map `%{{category, behavior} => codebase_cosine}` into the block assembly step. + For each `{behavior, cosine_delta}` on a block: ``` -codebase_cosine = current codebase cosine score for that behavior -gap = 1.0 - codebase_cosine # how far below ideal the codebase already is -severity_ratio = cosine_delta / gap # what fraction of existing gap this block causes +codebase_cosine = lookup codebase cosine for that {category, behavior} + (default to 0.0 if behavior not found in codebase diagnose) +gap = max(0.01, 1.0 - codebase_cosine) # floor prevents division by zero +severity_ratio = cosine_delta / gap # fraction of existing gap this block causes :critical when severity_ratio > 0.50 :high when severity_ratio > 0.25 :medium when severity_ratio > 0.10 -(filtered) when severity_ratio <= 0.10 (below significance, not shown) +(filtered) when severity_ratio <= 0.10 (below significance, not shown) ``` -`gap` is floored at `0.01` to avoid division by zero when the codebase already scores perfectly on a behavior. +**Note on thresholds:** These are initial defaults. The gap-relative formula means a block with `cosine_delta = 0.12` may be `:critical` in a healthy codebase (small gap) and `:medium` in a poor one (large gap). This is intentional — severity reflects impact relative to where the codebase currently stands. Thresholds should be validated against real codebases and are configurable in future iterations. + +### Fix Hint Enrichment + +Fix hints are sourced from the combined_metrics YAMLs (`priv/combined_metrics/.yml`, `_fix_hint` key per behavior). All 12 category YAMLs have `_fix_hint` fields. Enrichment happens during `top_blocks` assembly in `HealthReport.generate/2` using `CombinedMetrics.Scorer.all_yamls()` (compiled at module load time). Pattern mirrors the existing `cosine_fix_hint/2` in formatters. If a behavior has no `_fix_hint`, the field is `nil`. ### Block Filtering A block appears in `top_blocks` when: -- `token_count >= 10` +- `token_count >= 10` (already guaranteed by BlockImpactAnalyzer, but re-checked for safety) - At least one potential has `severity != filtered` (i.e. `severity_ratio > 0.10`) -- File is in `changed_files` (when `--base-ref` given) or any file (when not) +- File path is in `changed_files` paths (when `--base-ref` given) or any file (when not) Blocks within a file are ordered by their highest `cosine_delta` descending. --- -## `HealthReport.Delta` Module +## `BlockImpactAnalyzer` Change: Add `end_line` -New module wrapping delta computation, ported from `Comparator`: +The serialized node map in `serialize_node/9` (`block_impact_analyzer.ex:167-175`) currently omits `end_line`. Add it: ```elixir -@spec compute(base_results :: map(), head_results :: map()) :: map() -def compute(base_results, head_results) +%{ + "start_line" => node.start_line, + "end_line" => node.end_line, # ADD THIS + "column_start" => ..., + ... +} ``` -Returns per-metric aggregate delta (head minus base), same shape as `Comparator.compare_results/3` currently produces for the `"codebase"` key. File-level deltas are not included (that was compare-only, now removed). +The `Node` struct already has `end_line` — this is a one-line addition. The existing test in `block_impact_analyzer_test.exs` must also assert `Map.has_key?(node, "end_line")`. --- -## Fix Hints per Behavior +## `HealthReport.Delta` Module -Fix hints are sourced from the combined_metrics YAML behavior definitions. The `SampleRunner` / `CombinedMetrics` modules are responsible for resolving a `{category, behavior}` key to its fix_hint string. If no fix_hint is defined for a behavior, the field is omitted from the potential map. +New module wrapping delta computation, ported from `Comparator`: + +```elixir +@spec compute(base_results :: map(), head_results :: map()) :: map() +def compute(base_results, head_results) +``` -Implementation note: verify at implementation time whether fix_hint is already present in the YAML behavior definitions or needs to be added. +Returns per-metric aggregate delta (head minus base), porting `compute_aggregate_delta/2` and `compute_numeric_delta/2` from `Comparator`. File-level deltas are not included (compare-only, now removed). --- @@ -201,12 +224,14 @@ Implementation note: verify at implementation time whether fix_hint is already p ### Removed -- Worst offenders tables in all category sections (both threshold and cosine) -- All compare-command formatting code +- Worst offenders tables in all category sections (both threshold and cosine): + - `plain.ex`: remove calls at lines 60, 64 and functions `cosine_worst_offenders/2` (91-117), `worst_offenders_section/2` (204-245) + - `github.ex`: remove calls at lines 249, 381 and functions `cosine_worst_offenders/2` (254-304), `worst_offenders/2` (384-435) +- All compare-command formatting code (`lib/codeqa/formatter.ex` deleted) ### Added -**1. PR Impact Summary** (top of report, only when `pr_summary` present) +**1. PR Impact Summary** (top of report, only when `pr_summary` present; omitted entirely when nil) ``` Score: B+ → C | Δ −8 pts | 6 blocks flagged across 3 files | 4 modified, 1 added @@ -214,7 +239,7 @@ Score: B+ → C | Δ −8 pts | 6 blocks flagged across 3 files | 4 modifi **2. Delta Bar Graphs** (after PR summary, before categories; only when `codebase_delta` present) -Bar graphs per major category (complexity, readability, duplication, structure) showing base vs head values. Ported from compare's GitHub formatter. Plain formatter uses ASCII, GitHub formatter uses mermaid. +Bar graphs per major category (complexity, readability, duplication, structure) showing base vs head values. Port `progress_bars/2` and `mermaid_chart/1` logic from `lib/codeqa/formatter.ex`. Plain formatter uses ASCII, GitHub formatter uses mermaid. **3. Block Section** (after top_issues) @@ -235,32 +260,46 @@ Bar graphs per major category (complexity, readability, duplication, structure) ``` Severity icons: 🔴 CRITICAL, 🟠 HIGH, 🟡 MEDIUM. -GitHub formatter wraps each file in a `
    ` block. +GitHub formatter wraps each file in a `
    ` block (consistent with how categories are already wrapped in `github.ex:137-195`). --- ## Deletions -The following files are deleted as part of this work: +The following files are deleted as part of this work (all confirmed compare-only, no health-report dependencies): - `lib/codeqa/cli/compare.ex` - `lib/codeqa/comparator.ex` -- `lib/codeqa/formatter.ex` (compare formatter — verify no health-report usage first) -- `lib/codeqa/summarizer.ex` (verify compare-only before deleting) -- All compare-related tests +- `lib/codeqa/formatter.ex` +- `lib/codeqa/summarizer.ex` +- `test/codeqa/cli_compare_test.exs` --- ## Testing +### New tests required + - Unit tests for `HealthReport.Delta.compute/2` -- Unit tests for severity computation (all three thresholds + filter boundary) -- Unit tests for `top_blocks` assembly (filtering, ordering, fix_hint inclusion) -- Unit tests for PR summary computation -- Integration test: `HealthReport.generate/2` with and without `base_results` -- Formatter tests: block section renders correctly for plain and github formats -- CLI test: `--base-ref` wires through to git calls and generate correctly -- Deletion verification: no references to deleted modules remain +- Unit tests for severity computation: all three thresholds, filter boundary, gap floor (gap=0 → floored to 0.01), behavior not found in codebase diagnose (default 0.0) +- Unit tests for `top_blocks` assembly: filtering by token_count, severity, changed_files; ordering by cosine_delta; fix_hint inclusion and nil case +- Unit tests for PR summary computation: score/grade computation from base+head, blocks_flagged derivation, file status counts +- Integration test: `HealthReport.generate/2` with and without `base_results` — verify output keys present/nil correctly +- Formatter tests: block section renders correctly for plain and github formats; pr_summary nil omits summary and delta sections gracefully +- CLI test: `--base-ref` wires through to `Git.collect_files_at_ref`, `Git.changed_files`, and `HealthReport.generate` correctly +- `BlockImpactAnalyzer` test: assert `end_line` present in serialized node + +### Tests to delete + +- `test/codeqa/cli_compare_test.exs` (entire file) +- `test/codeqa/health_report/formatter_test.exs:186-194` — "includes worst offenders section" +- `test/codeqa/health_report/formatter_test.exs:216-226` — "renders cosine worst offenders per behavior" + +### Tests to update + +- `test/codeqa/health_report/formatter_test.exs:196-200` — "summary detail omits category sections" (refute reason changes) +- Any test referencing `worst_offenders` in the generate output shape +- `test/codeqa/block_impact_analyzer_test.exs` — add `end_line` assertion --- @@ -270,3 +309,4 @@ The following files are deleted as part of this work: - File-level delta details (compare's per-file before/after table is dropped) - Near-duplicate block pairs in the block section (they exist as metrics but are not surfaced here) - Relative severity across blocks (no "this block is Nx worse than average block") +- Configurable severity thresholds (hardcoded defaults for now; future iteration) From 6d1042f27e1cb2994068fb7f21f1c91c56b759c4 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 12:40:29 +0100 Subject: [PATCH 29/71] docs(plan): health report block impact, PR delta, and compare consolidation --- ...26-03-21-health-report-blocks-and-delta.md | 1679 +++++++++++++++++ 1 file changed, 1679 insertions(+) create mode 100644 docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md diff --git a/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md b/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md new file mode 100644 index 0000000..1d6e9d0 --- /dev/null +++ b/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md @@ -0,0 +1,1679 @@ +# Health Report: Block Impact, PR Delta, and Compare Consolidation + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Unify health-report and compare commands into a single PR-aware report showing impactful blocks per changed file, before/after metric delta, and a PR impact summary — while deleting the compare command entirely. + +**Architecture:** `HealthReport.generate/2` gains `base_results:` and `changed_files:` opts; a new `HealthReport.TopBlocks` module assembles severity-classified blocks from node data; a new `HealthReport.Delta` module wraps aggregate delta computation ported from `Comparator`; formatters gain PR summary, delta, and block sections and lose worst_offenders rendering; `CLI.HealthReport` gains `--base-ref`/`--head-ref` and runs dual analysis when provided. + +**Tech Stack:** Elixir, ExUnit, `CodeQA.Git`, `CodeQA.CombinedMetrics.{SampleRunner, Scorer}`, `CodeQA.HealthReport.Grader`, `CodeQA.BlockImpactAnalyzer` + +--- + +## File Map + +| File | Change | +|------|--------| +| `lib/codeqa/block_impact_analyzer.ex` | Add `"end_line"` to `serialize_node/9` output | +| `lib/codeqa/health_report/delta.ex` | **Create** — aggregate delta computation (ported from `Comparator`) | +| `lib/codeqa/health_report/top_blocks.ex` | **Create** — block assembly, severity, fix hint enrichment | +| `lib/codeqa/health_report.ex` | Accept new opts, wire `Delta` + `TopBlocks`, drop `worst_offenders` computation | +| `lib/codeqa/health_report/formatter/plain.ex` | Remove worst_offenders rendering; add PR summary, delta table, block section | +| `lib/codeqa/health_report/formatter/github.ex` | Remove worst_offenders rendering; add PR summary, delta table, block section | +| `lib/codeqa/cli/health_report.ex` | Add `--base-ref`/`--head-ref`; dual analysis when base-ref given | +| `lib/codeqa/cli.ex` | Remove compare entry | +| `lib/codeqa/cli/compare.ex` | **Delete** | +| `lib/codeqa/comparator.ex` | **Delete** | +| `lib/codeqa/formatter.ex` | **Delete** | +| `lib/codeqa/summarizer.ex` | **Delete** | +| `test/codeqa/block_impact_analyzer_test.exs` | Add `end_line` assertion | +| `test/codeqa/health_report/delta_test.exs` | **Create** | +| `test/codeqa/health_report/top_blocks_test.exs` | **Create** | +| `test/codeqa/health_report_test.exs` | Add: `top_blocks`, `pr_summary`, `codebase_delta` keys; remove worst_offenders assertions | +| `test/codeqa/health_report/formatter_test.exs` | Delete worst_offenders tests; add block/delta/summary tests | +| `test/codeqa/cli_compare_test.exs` | **Delete** | + +--- + +## Task 1: Add `end_line` to BlockImpactAnalyzer node serialization + +**Files:** +- Modify: `lib/codeqa/block_impact_analyzer.ex:167-175` +- Test: `test/codeqa/block_impact_analyzer_test.exs:42-52` + +- [ ] **Step 1: Add `end_line` assertion to the existing "each node has required fields" test** + +In `test/codeqa/block_impact_analyzer_test.exs`, inside the `Enum.each(nodes, fn node ->` block (line 42), add after line 43: + +```elixir +assert Map.has_key?(node, "end_line") +``` + +- [ ] **Step 2: Run the test to confirm it fails** + +```bash +mix test test/codeqa/block_impact_analyzer_test.exs --trace +``` + +Expected: FAIL — `"end_line"` key missing. + +- [ ] **Step 3: Add `end_line` to the serialized node map** + +In `lib/codeqa/block_impact_analyzer.ex`, edit the map at line 167: + +```elixir +%{ + "start_line" => node.start_line, + "end_line" => node.end_line, + "column_start" => (first_token && first_token.col) || 0, + "char_length" => char_length, + "type" => Atom.to_string(node.type), + "token_count" => length(node.tokens), + "refactoring_potentials" => potentials, + "children" => children +} +``` + +- [ ] **Step 4: Run the test to confirm it passes** + +```bash +mix test test/codeqa/block_impact_analyzer_test.exs --trace +``` + +Expected: all tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add lib/codeqa/block_impact_analyzer.ex test/codeqa/block_impact_analyzer_test.exs +git commit -m "feat(block-impact): serialize end_line in node output" +``` + +--- + +## Task 2: Create `HealthReport.Delta` + +**Files:** +- Create: `lib/codeqa/health_report/delta.ex` +- Create: `test/codeqa/health_report/delta_test.exs` + +- [ ] **Step 1: Write the test file** + +```elixir +# test/codeqa/health_report/delta_test.exs +defmodule CodeQA.HealthReport.DeltaTest do + use ExUnit.Case, async: true + + alias CodeQA.HealthReport.Delta + + defp make_results(aggregate) do + %{"codebase" => %{"aggregate" => aggregate}} + end + + test "returns base, head, and delta aggregates" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) + head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) + + result = Delta.compute(base, head) + + assert result.base.aggregate == %{"entropy" => %{"mean_value" => 5.0}} + assert result.head.aggregate == %{"entropy" => %{"mean_value" => 6.0}} + assert result.delta.aggregate == %{"entropy" => %{"mean_value" => 1.0}} + end + + test "rounds delta to 4 decimal places" do + base = make_results(%{"entropy" => %{"mean_value" => 1.0}}) + head = make_results(%{"entropy" => %{"mean_value" => 4.3333}}) + + result = Delta.compute(base, head) + assert result.delta.aggregate["entropy"]["mean_value"] == 3.3333 + end + + test "handles missing base codebase gracefully" do + base = %{} + head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) + + result = Delta.compute(base, head) + assert result.delta.aggregate == %{} + end + + test "handles missing head codebase gracefully" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) + head = %{} + + result = Delta.compute(base, head) + assert result.delta.aggregate == %{} + end + + test "skips non-numeric metric keys" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0, "label" => "x"}}) + head = make_results(%{"entropy" => %{"mean_value" => 6.0, "label" => "y"}}) + + result = Delta.compute(base, head) + refute Map.has_key?(result.delta.aggregate["entropy"], "label") + assert result.delta.aggregate["entropy"]["mean_value"] == 1.0 + end +end +``` + +- [ ] **Step 2: Run test to confirm it fails** + +```bash +mix test test/codeqa/health_report/delta_test.exs --trace +``` + +Expected: FAIL — module not found. + +- [ ] **Step 3: Create the module** + +```elixir +# lib/codeqa/health_report/delta.ex +defmodule CodeQA.HealthReport.Delta do + @moduledoc "Computes aggregate metric delta between two codebase analysis results." + + @spec compute(map(), map()) :: %{ + base: %{aggregate: map()}, + head: %{aggregate: map()}, + delta: %{aggregate: map()} + } + def compute(base_results, head_results) do + base_agg = get_in(base_results, ["codebase", "aggregate"]) || %{} + head_agg = get_in(head_results, ["codebase", "aggregate"]) || %{} + + %{ + base: %{aggregate: base_agg}, + head: %{aggregate: head_agg}, + delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)} + } + end + + defp compute_aggregate_delta(base_agg, head_agg) do + MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) + |> Enum.reduce(%{}, fn metric_name, acc -> + base_m = Map.get(base_agg, metric_name, %{}) + head_m = Map.get(head_agg, metric_name, %{}) + delta = compute_numeric_delta(base_m, head_m) + if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) + end) + end + + defp compute_numeric_delta(base, head) do + MapSet.new(Map.keys(base) ++ Map.keys(head)) + |> Enum.reduce(%{}, fn key, acc -> + case {Map.get(base, key), Map.get(head, key)} do + {b, h} when is_number(b) and is_number(h) -> + Map.put(acc, key, Float.round(h - b, 4)) + + _ -> + acc + end + end) + end +end +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +```bash +mix test test/codeqa/health_report/delta_test.exs --trace +``` + +Expected: all PASS. + +- [ ] **Step 5: Commit** + +```bash +git add lib/codeqa/health_report/delta.ex test/codeqa/health_report/delta_test.exs +git commit -m "feat(health-report): add Delta module for aggregate metric comparison" +``` + +--- + +## Task 3: Create `HealthReport.TopBlocks` + +**Files:** +- Create: `lib/codeqa/health_report/top_blocks.ex` +- Create: `test/codeqa/health_report/top_blocks_test.exs` + +- [ ] **Step 1: Write the test file** + +```elixir +# test/codeqa/health_report/top_blocks_test.exs +defmodule CodeQA.HealthReport.TopBlocksTest do + use ExUnit.Case, async: true + + alias CodeQA.HealthReport.TopBlocks + alias CodeQA.Git.ChangedFile + + # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60) + defp make_node(cosine_delta, token_count \\ 20) do + %{ + "start_line" => 1, + "end_line" => 10, + "type" => "code", + "token_count" => token_count, + "refactoring_potentials" => [ + %{ + "category" => "function_design", + "behavior" => "cyclomatic_complexity_under_10", + "cosine_delta" => cosine_delta + } + ], + "children" => [] + } + end + + defp make_results(nodes) do + %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}} + end + + defp lookup(cosine \\ 0.0) do + %{{"function_design", "cyclomatic_complexity_under_10"} => cosine} + end + + describe "severity classification" do + test ":critical when severity_ratio > 0.50" do + # gap = max(0.01, 1.0 - 0.0) = 1.0, ratio = 0.60 / 1.0 = 0.60 > 0.50 + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :critical + end + + test ":high when severity_ratio > 0.25 and <= 0.50" do + # ratio = 0.30 / 1.0 = 0.30 + [group] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :high + end + + test ":medium when severity_ratio > 0.10 and <= 0.25" do + # ratio = 0.15 / 1.0 = 0.15 + [group] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :medium + end + + test "filtered when severity_ratio <= 0.10" do + # ratio = 0.05 / 1.0 = 0.05 — block should not appear + assert TopBlocks.build(make_results([make_node(0.05)]), [], lookup()) == [] + end + + test "gap floor prevents division by zero when codebase_cosine = 1.0" do + # gap = max(0.01, 1.0 - 1.0) = 0.01, ratio = 0.02 / 0.01 = 2.0 → :critical + [group] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0)) + assert hd(hd(group.blocks).potentials).severity == :critical + end + + test "gap handles negative codebase_cosine" do + # codebase_cosine = -0.5, gap = max(0.01, 1.0 - (-0.5)) = 1.5 + # ratio = 0.60 / 1.5 = 0.40 → :high + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5)) + assert hd(hd(group.blocks).potentials).severity == :high + end + + test "unknown behavior defaults codebase_cosine to 0.0" do + lookup_empty = %{} + # gap = 1.0, ratio = 0.60 → :critical + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty) + assert hd(hd(group.blocks).potentials).severity == :critical + end + end + + describe "changed_files filtering" do + test "when changed_files is empty, shows all files" do + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert group.path == "lib/foo.ex" + assert group.status == nil + end + + test "when changed_files given, only shows matching files" do + changed = [%ChangedFile{path: "lib/other.ex", status: "added"}] + assert TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) == [] + end + + test "status comes from ChangedFile struct" do + changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] + [group] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) + assert group.status == "modified" + end + end + + describe "block filtering" do + test "blocks with token_count < 10 are excluded" do + assert TopBlocks.build(make_results([make_node(0.60, 9)]), [], lookup()) == [] + end + + test "blocks are ordered by highest cosine_delta descending" do + node_low = make_node(0.20) + node_high = put_in(make_node(0.60), ["start_line"], 20) + results = %{"files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}}} + + [group] = TopBlocks.build(results, [], lookup()) + deltas = Enum.map(group.blocks, fn b -> hd(b.potentials).cosine_delta end) + assert deltas == Enum.sort(deltas, :desc) + end + + test "children nodes are included" do + parent = %{ + "start_line" => 1, "end_line" => 20, + "type" => "code", "token_count" => 5, + "refactoring_potentials" => [], + "children" => [make_node(0.60)] + } + [group] = TopBlocks.build(make_results([parent]), [], lookup()) + assert length(group.blocks) == 1 + end + end + + describe "fix hints" do + test "includes fix_hint string for known behavior" do + # function_design/cyclomatic_complexity_under_10 has _fix_hint in YAML + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + potential = hd(hd(group.blocks).potentials) + assert is_binary(potential.fix_hint) + end + + test "fix_hint is nil for unknown behavior" do + node = %{ + "start_line" => 1, "end_line" => 10, "type" => "code", + "token_count" => 20, + "refactoring_potentials" => [ + %{"category" => "unknown_cat", "behavior" => "unknown_beh", "cosine_delta" => 0.60} + ], + "children" => [] + } + [group] = TopBlocks.build(make_results([node]), [], %{}) + assert hd(hd(group.blocks).potentials).fix_hint == nil + end + end +end +``` + +- [ ] **Step 2: Run tests to confirm they fail** + +```bash +mix test test/codeqa/health_report/top_blocks_test.exs --trace +``` + +Expected: FAIL — module not found. + +- [ ] **Step 3: Create the module** + +```elixir +# lib/codeqa/health_report/top_blocks.ex +defmodule CodeQA.HealthReport.TopBlocks do + @moduledoc "Assembles the top_blocks report section from analysis node data." + + alias CodeQA.CombinedMetrics.Scorer + + @min_tokens 10 + @severity_critical 0.50 + @severity_high 0.25 + @severity_medium 0.10 + @gap_floor 0.01 + + @spec build(map(), [struct()], map()) :: [map()] + def build(analysis_results, changed_files, codebase_cosine_lookup) do + files = Map.get(analysis_results, "files", %{}) + fix_hints = build_fix_hint_lookup() + + file_entries = + if changed_files == [] do + Enum.map(files, fn {path, data} -> {path, nil, data} end) + else + changed_index = Map.new(changed_files, &{&1.path, &1.status}) + + files + |> Enum.filter(fn {path, _} -> Map.has_key?(changed_index, path) end) + |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end) + end + + file_entries + |> Enum.map(fn {path, status, file_data} -> + blocks = + file_data + |> Map.get("nodes", []) + |> Enum.flat_map(&collect_nodes/1) + |> Enum.filter(&(&1["token_count"] >= @min_tokens)) + |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) + |> Enum.reject(&(&1.potentials == [])) + |> Enum.sort_by(&(-max_delta(&1))) + + %{path: path, status: status, blocks: blocks} + end) + |> Enum.reject(&(&1.blocks == [])) + |> Enum.sort_by(& &1.path) + end + + defp collect_nodes(node) do + children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) + [node | children] + end + + defp enrich_block(node, cosine_lookup, fix_hints) do + potentials = + node + |> Map.get("refactoring_potentials", []) + |> Enum.map(&enrich_potential(&1, cosine_lookup, fix_hints)) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.cosine_delta, :desc) + + %{ + start_line: node["start_line"], + end_line: node["end_line"], + type: node["type"], + token_count: node["token_count"], + potentials: potentials + } + end + + defp enrich_potential(p, cosine_lookup, fix_hints) do + category = p["category"] + behavior = p["behavior"] + cosine_delta = p["cosine_delta"] + + codebase_cosine = Map.get(cosine_lookup, {category, behavior}, 0.0) + gap = max(@gap_floor, 1.0 - codebase_cosine) + severity = classify(cosine_delta / gap) + + if severity == :filtered do + nil + else + %{ + category: category, + behavior: behavior, + cosine_delta: cosine_delta, + severity: severity, + fix_hint: Map.get(fix_hints, {category, behavior}) + } + end + end + + defp classify(ratio) when ratio > @severity_critical, do: :critical + defp classify(ratio) when ratio > @severity_high, do: :high + defp classify(ratio) when ratio > @severity_medium, do: :medium + defp classify(_ratio), do: :filtered + + defp max_delta(%{potentials: []}), do: 0.0 + defp max_delta(%{potentials: potentials}), do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta + + defp build_fix_hint_lookup do + Scorer.all_yamls() + |> Enum.flat_map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + Enum.flat_map(data, fn {behavior, behavior_data} -> + case get_in(behavior_data, ["_fix_hint"]) do + nil -> [] + hint -> [{{category, behavior}, hint}] + end + end) + end) + |> Map.new() + end +end +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +```bash +mix test test/codeqa/health_report/top_blocks_test.exs --trace +``` + +Expected: all PASS. + +- [ ] **Step 5: Run full suite** + +```bash +mix test +``` + +Expected: all passing. + +- [ ] **Step 6: Commit** + +```bash +git add lib/codeqa/health_report/top_blocks.ex test/codeqa/health_report/top_blocks_test.exs +git commit -m "feat(health-report): add TopBlocks module for severity-classified block assembly" +``` + +--- + +## Task 4: Update `HealthReport.generate/2` + +**Files:** +- Modify: `lib/codeqa/health_report.ex` +- Modify: `test/codeqa/health_report_test.exs` + +- [ ] **Step 1: Add tests for new output keys** + +Open `test/codeqa/health_report_test.exs`. Add a describe block (create the file if it doesn't exist): + +```elixir +describe "generate/2 output keys" do + @tag :slow + test "without base_results: pr_summary and codebase_delta are nil" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + assert report.pr_summary == nil + assert report.codebase_delta == nil + assert is_list(report.top_blocks) + assert Map.has_key?(report, :overall_score) + assert Map.has_key?(report, :overall_grade) + assert Map.has_key?(report, :categories) + assert Map.has_key?(report, :top_issues) + end + + @tag :slow + test "without base_results: top_blocks shows all files with significant blocks" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + # top_blocks is a list of file groups (may be empty if no blocks above threshold) + assert is_list(report.top_blocks) + Enum.each(report.top_blocks, fn group -> + assert Map.has_key?(group, :path) + assert Map.has_key?(group, :status) + assert Map.has_key?(group, :blocks) + assert group.status == nil + end) + end + + test "worst_offenders is always empty in categories" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + Enum.each(report.categories, fn cat -> + assert Map.get(cat, :worst_offenders, []) == [] + end) + end +end + +describe "generate/2 with base_results" do + @tag :slow + test "pr_summary is populated" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + + report = CodeQA.HealthReport.generate(head_results, + base_results: base_results, + changed_files: changed + ) + + assert %{ + base_score: base_score, + head_score: head_score, + score_delta: delta, + base_grade: _, + head_grade: _, + blocks_flagged: flagged, + files_changed: 1, + files_added: 0, + files_modified: 1 + } = report.pr_summary + + assert is_integer(base_score) + assert is_integer(head_score) + assert delta == head_score - base_score + assert is_integer(flagged) + end + + @tag :slow + test "codebase_delta is populated" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + report = CodeQA.HealthReport.generate(head_results, base_results: base_results) + + assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} = + report.codebase_delta + end + + @tag :slow + test "top_blocks scoped to changed_files" do + files = %{ + "lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n", + "lib/bar.ex" => "defmodule Bar do\n def baz, do: :ok\nend\n" + } + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + + report = CodeQA.HealthReport.generate(head_results, + base_results: base_results, + changed_files: changed + ) + + paths = Enum.map(report.top_blocks, & &1.path) + refute "lib/bar.ex" in paths + end +end +``` + +- [ ] **Step 2: Run new tests to confirm they fail** + +```bash +mix test test/codeqa/health_report_test.exs --trace +``` + +Expected: FAIL — `pr_summary` key missing, etc. + +- [ ] **Step 3: Update `lib/codeqa/health_report.ex`** + +Replace the entire file: + +```elixir +defmodule CodeQA.HealthReport do + @moduledoc "Orchestrates health report generation from analysis results." + + alias CodeQA.HealthReport.{Config, Grader, Formatter, Delta, TopBlocks} + alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} + + @spec generate(map(), keyword()) :: map() + def generate(analysis_results, opts \\ []) do + config_path = Keyword.get(opts, :config) + detail = Keyword.get(opts, :detail, :default) + base_results = Keyword.get(opts, :base_results) + changed_files = Keyword.get(opts, :changed_files, []) + + %{ + categories: categories, + grade_scale: grade_scale, + impact_map: impact_map, + combined_top: combined_top + } = + Config.load(config_path) + + aggregate = get_in(analysis_results, ["codebase", "aggregate"]) || %{} + files = Map.get(analysis_results, "files", %{}) + project_langs = project_languages(files) + + threshold_grades = + categories + |> Grader.grade_aggregate(aggregate, grade_scale) + |> Enum.zip(categories) + |> Enum.map(fn {graded, _cat_def} -> + summary = build_category_summary(graded) + + graded + |> Map.put(:type, :threshold) + |> Map.merge(%{summary: summary, worst_offenders: []}) + end) + + worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) + + cosine_grades = + Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) + + all_categories = + (threshold_grades ++ cosine_grades) + |> Enum.map(fn cat -> + Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) + end) + + {overall_score, overall_grade} = Grader.overall_score(all_categories, grade_scale, impact_map) + + metadata = build_metadata(analysis_results) + + all_cosines = + SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) + + top_issues = Enum.take(all_cosines, 10) + + codebase_cosine_lookup = + Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) + + top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup) + + {codebase_delta, pr_summary} = + if base_results do + build_delta_and_summary( + base_results, + analysis_results, + overall_score, + overall_grade, + all_categories, + categories, + grade_scale, + impact_map, + combined_top, + changed_files, + top_blocks + ) + else + {nil, nil} + end + + %{ + metadata: metadata, + pr_summary: pr_summary, + overall_score: overall_score, + overall_grade: overall_grade, + codebase_delta: codebase_delta, + categories: all_categories, + top_issues: top_issues, + top_blocks: top_blocks + } + end + + @spec to_markdown(map(), atom(), atom()) :: String.t() + def to_markdown(report, detail \\ :default, format \\ :plain) do + Formatter.format_markdown(report, detail, format) + end + + defp build_delta_and_summary( + base_results, + head_results, + head_score, + head_grade, + head_categories, + category_defs, + grade_scale, + impact_map, + combined_top, + changed_files, + top_blocks + ) do + delta = Delta.compute(base_results, head_results) + + base_aggregate = get_in(base_results, ["codebase", "aggregate"]) || %{} + base_files = Map.get(base_results, "files", %{}) + base_project_langs = project_languages(base_files) + + base_threshold_grades = + category_defs + |> Grader.grade_aggregate(base_aggregate, grade_scale) + |> Enum.zip(category_defs) + |> Enum.map(fn {graded, _cat_def} -> + graded + |> Map.put(:type, :threshold) + |> Map.merge(%{summary: "", worst_offenders: []}) + end) + + base_worst_files_map = + FileScorer.worst_files_per_behavior(base_files, combined_top: combined_top) + + base_cosine_grades = + Grader.grade_cosine_categories( + base_aggregate, + base_worst_files_map, + grade_scale, + base_project_langs + ) + + base_all_categories = + (base_threshold_grades ++ base_cosine_grades) + |> Enum.map(fn cat -> + Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) + end) + + {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map) + + blocks_flagged = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + files_added = Enum.count(changed_files, &(&1.status == "added")) + files_modified = Enum.count(changed_files, &(&1.status == "modified")) + + summary = %{ + base_score: base_score, + head_score: head_score, + score_delta: head_score - base_score, + base_grade: base_grade, + head_grade: head_grade, + blocks_flagged: blocks_flagged, + files_changed: length(changed_files), + files_added: files_added, + files_modified: files_modified + } + + {delta, summary} + end + + defp build_metadata(analysis_results) do + meta = Map.get(analysis_results, "metadata", %{}) + + %{ + path: meta["path"] || "unknown", + timestamp: meta["timestamp"] || DateTime.utc_now() |> DateTime.to_iso8601(), + total_files: meta["total_files"] || map_size(Map.get(analysis_results, "files", %{})) + } + end + + defp project_languages(files_map) do + files_map + |> Map.keys() + |> Enum.map(&CodeQA.Language.detect(&1).name()) + |> Enum.reject(&(&1 == "unknown")) + |> Enum.uniq() + end + + defp build_category_summary(%{type: :cosine}), do: "" + + defp build_category_summary(graded) do + low_scorers = + graded.metric_scores + |> Enum.filter(fn m -> m.score < 60 end) + |> length() + + cond do + graded.score >= 90 -> "Excellent" + graded.score >= 70 and low_scorers == 0 -> "Good" + graded.score >= 70 -> "Good overall, #{low_scorers} metric(s) need attention" + graded.score >= 50 -> "Needs improvement" + true -> "Critical — requires attention" + end + end +end +``` + +- [ ] **Step 4: Run tests** + +```bash +mix test test/codeqa/health_report_test.exs --trace +``` + +Expected: new tests PASS. + +- [ ] **Step 5: Run full suite to check for regressions** + +```bash +mix test +``` + +Fix any test that asserts on `worst_offenders` being non-empty in the report output — those assertions should now expect `[]`. + +- [ ] **Step 6: Commit** + +```bash +git add lib/codeqa/health_report.ex test/codeqa/health_report_test.exs +git commit -m "feat(health-report): add top_blocks, pr_summary, codebase_delta; drop worst_offenders" +``` + +--- + +## Task 5: Update plain formatter + +**Files:** +- Modify: `lib/codeqa/health_report/formatter/plain.ex` +- Modify: `test/codeqa/health_report/formatter_test.exs` + +- [ ] **Step 1: Delete failing worst_offenders tests and add new tests** + +In `test/codeqa/health_report/formatter_test.exs`: + +**Delete** these tests (they assert on worst_offenders rendering that is now gone): +- `"includes worst offenders section"` (lines 186–194) +- `"renders cosine worst offenders per behavior"` (lines 216–226) + +**Update** `"summary detail omits category sections"` (line 196) — change to: +```elixir +test "summary detail omits category sections" do + result = Formatter.format_markdown(@sample_report, :summary, :plain) + refute result =~ "Codebase averages" +end +``` + +**Add** these tests after the existing plain describe blocks: + +```elixir +describe "plain formatter: PR summary section" do + @sample_report_with_pr Map.put(@sample_report, :pr_summary, %{ + base_score: 85, + head_score: 77, + score_delta: -8, + base_grade: "B+", + head_grade: "C+", + blocks_flagged: 6, + files_changed: 3, + files_added: 1, + files_modified: 2 + }) + + test "renders PR summary line when pr_summary present" do + result = Formatter.format_markdown(@sample_report_with_pr, :default, :plain) + assert result =~ "B+" + assert result =~ "C+" + assert result =~ "-8" + assert result =~ "6" + assert result =~ "1 added" + assert result =~ "2 modified" + end + + test "omits PR summary when pr_summary is nil" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "Score:" + end +end + +describe "plain formatter: delta section" do + @delta %{ + base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}, "halstead" => %{"mean_difficulty" => 12.0}}}, + head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}, "halstead" => %{"mean_difficulty" => 15.0}}} + } + + @sample_report_with_delta Map.put(@sample_report, :codebase_delta, @delta) + + test "renders metric changes table when codebase_delta present" do + result = Formatter.format_markdown(@sample_report_with_delta, :default, :plain) + assert result =~ "Metric Changes" + assert result =~ "Readability" + assert result =~ "65.00" + assert result =~ "61.00" + end + + test "omits delta section when codebase_delta is nil" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "Metric Changes" + end +end + +describe "plain formatter: block section" do + @block_potential %{ + category: "function_design", + behavior: "cyclomatic_complexity_under_10", + cosine_delta: 0.41, + severity: :critical, + fix_hint: "Reduce branching" + } + + @top_blocks [ + %{ + path: "lib/foo.ex", + status: "modified", + blocks: [ + %{ + start_line: 42, + end_line: 67, + type: "code", + token_count: 84, + potentials: [@block_potential] + } + ] + } + ] + + @sample_report_with_blocks Map.put(@sample_report, :top_blocks, @top_blocks) + + test "renders block section header" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "Blocks" + assert result =~ "1 flagged" + end + + test "renders file group with status" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "lib/foo.ex" + assert result =~ "modified" + end + + test "renders block location and type" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "lines 42" + assert result =~ "67" + assert result =~ "84 tokens" + end + + test "renders severity icon and behavior" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "🔴" + assert result =~ "CRITICAL" + assert result =~ "cyclomatic_complexity_under_10" + assert result =~ "0.41" + end + + test "renders fix hint" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "Reduce branching" + end + + test "omits block section when top_blocks is empty" do + report = Map.put(@sample_report, :top_blocks, []) + result = Formatter.format_markdown(report, :default, :plain) + refute result =~ "## Blocks" + end + + test "omits block section when top_blocks key absent" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "## Blocks" + end +end +``` + +- [ ] **Step 2: Run formatter tests to confirm failures** + +```bash +mix test test/codeqa/health_report/formatter_test.exs --trace +``` + +Expected: new tests FAIL, deleted tests no longer present. + +- [ ] **Step 3: Update `lib/codeqa/health_report/formatter/plain.ex`** + +Replace the `render/2` function and remove `cosine_worst_offenders/2` + `worst_offenders_section/2`. Add new section functions: + +```elixir +@spec render(map(), atom()) :: String.t() +def render(report, detail) do + [ + pr_summary_section(Map.get(report, :pr_summary)), + header(report), + cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), + overall_table(report), + top_issues_section(Map.get(report, :top_issues, []), detail), + blocks_section(Map.get(report, :top_blocks, [])), + category_sections(report.categories, detail) + ] + |> List.flatten() + |> Enum.join("\n") +end +``` + +Remove `cosine_worst_offenders/2` (lines 91–116) and `worst_offenders_section/2` (lines 196–235) entirely. + +Update `render_category/2` for cosine — remove the `cosine_worst_offenders` call: + +```elixir +defp render_category(%{type: :cosine} = cat, _detail) do + cosine_section_header(cat) ++ cosine_behaviors_table(cat) +end + +defp render_category(cat, _detail) do + section_header(cat) ++ metric_detail(cat) +end +``` + +Add the three new private functions at the bottom of the module: + +```elixir +defp pr_summary_section(nil), do: [] + +defp pr_summary_section(summary) do + delta_str = + if summary.score_delta >= 0, + do: "+#{summary.score_delta}", + else: "#{summary.score_delta}" + + status_str = "#{summary.files_modified} modified, #{summary.files_added} added" + + [ + "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", + "" + ] +end + +defp delta_section(nil), do: [] + +defp delta_section(delta) do + base_agg = delta.base.aggregate + head_agg = delta.head.aggregate + + metrics = [ + {"Readability", "readability", "mean_flesch_adapted"}, + {"Complexity", "halstead", "mean_difficulty"}, + {"Duplication", "compression", "mean_redundancy"}, + {"Structure", "branching", "mean_branch_count"} + ] + + rows = + Enum.flat_map(metrics, fn {label, group, key} -> + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end) + + if rows == [] do + [] + else + [ + "## Metric Changes", + "", + "| Category | Base | Head | Δ |", + "|----------|------|------|---|" + | rows + ] ++ [""] + end +end + +defp blocks_section([]), do: [] + +defp blocks_section(top_blocks) do + total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + + file_parts = + Enum.flat_map(top_blocks, fn group -> + status_str = if group.status, do: " [#{group.status}]", else: "" + + block_lines = + Enum.flat_map(group.blocks, fn block -> + end_line = block.end_line || block.start_line + header = "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + + potential_lines = + Enum.flat_map(block.potentials, fn p -> + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = "#{String.upcase(to_string(p.severity))}" + line = " #{icon} #{label} #{p.category} / #{p.behavior} (Δ #{delta_str})" + fix = if p.fix_hint, do: [" → #{p.fix_hint}"], else: [] + [line | fix] + end) + + [header | potential_lines] ++ [""] + end) + + ["### #{group.path}#{status_str}", "" | block_lines] + end) + + [ + "## Blocks (#{total} flagged across #{length(top_blocks)} files)", + "" + | file_parts + ] +end + +defp severity_icon(:critical), do: "🔴" +defp severity_icon(:high), do: "🟠" +defp severity_icon(:medium), do: "🟡" +``` + +- [ ] **Step 4: Run formatter tests** + +```bash +mix test test/codeqa/health_report/formatter_test.exs --trace +``` + +Expected: all PASS. + +- [ ] **Step 5: Run full suite** + +```bash +mix test +``` + +- [ ] **Step 6: Commit** + +```bash +git add lib/codeqa/health_report/formatter/plain.ex test/codeqa/health_report/formatter_test.exs +git commit -m "feat(formatter): add block, delta, PR summary sections; remove worst_offenders (plain)" +``` + +--- + +## Task 6: Update GitHub formatter + +**Files:** +- Modify: `lib/codeqa/health_report/formatter/github.ex` +- Modify: `test/codeqa/health_report/formatter_test.exs` + +- [ ] **Step 1: Add GitHub formatter tests** + +In `test/codeqa/health_report/formatter_test.exs`, add a new describe block: + +```elixir +describe "github formatter: block section" do + @block_potential %{ + category: "function_design", + behavior: "cyclomatic_complexity_under_10", + cosine_delta: 0.41, + severity: :critical, + fix_hint: "Reduce branching" + } + + @top_blocks_gh [ + %{ + path: "lib/foo.ex", + status: "modified", + blocks: [ + %{start_line: 42, end_line: 67, type: "code", token_count: 84, potentials: [@block_potential]} + ] + } + ] + + @report_with_blocks_gh Map.put(@sample_report, :top_blocks, @top_blocks_gh) + + test "renders block section with details wrapper per file" do + result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) + assert result =~ "Blocks" + assert result =~ "
    " + assert result =~ "lib/foo.ex" + assert result =~ "modified" + end + + test "renders severity and fix hint" do + result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) + assert result =~ "🔴" + assert result =~ "cyclomatic_complexity_under_10" + assert result =~ "Reduce branching" + end +end + +describe "github formatter: PR summary and delta" do + @pr_summary_gh %{ + base_score: 85, head_score: 77, score_delta: -8, + base_grade: "B+", head_grade: "C+", + blocks_flagged: 6, files_changed: 3, files_added: 1, files_modified: 2 + } + + @delta_gh %{ + base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}}}, + head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}}} + } + + test "renders PR summary" do + report = @sample_report |> Map.put(:pr_summary, @pr_summary_gh) + result = Formatter.format_markdown(report, :default, :github) + assert result =~ "B+" + assert result =~ "C+" + assert result =~ "-8" + end + + test "renders delta section" do + report = @sample_report |> Map.put(:codebase_delta, @delta_gh) + result = Formatter.format_markdown(report, :default, :github) + assert result =~ "Metric Changes" + assert result =~ "65.00" + assert result =~ "61.00" + end +end +``` + +- [ ] **Step 2: Run tests to confirm failures** + +```bash +mix test test/codeqa/health_report/formatter_test.exs --trace 2>&1 | grep -E "FAILED|failure" +``` + +- [ ] **Step 3: Update `lib/codeqa/health_report/formatter/github.ex`** + +Update `render/3` to include new sections and remove worst_offenders: + +```elixir +def render(report, detail, opts \\ []) do + chart? = Keyword.get(opts, :chart, true) + display_categories = merge_cosine_categories(report.categories) + + [ + pr_summary_section(Map.get(report, :pr_summary)), + header(report), + cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), + if(chart?, do: mermaid_chart(display_categories), else: []), + progress_bars(display_categories), + top_issues_section(Map.get(report, :top_issues, []), detail), + blocks_section(Map.get(report, :top_blocks, [])), + category_sections(display_categories, detail), + footer() + ] + |> List.flatten() + |> Enum.join("\n") +end +``` + +Remove `cosine_worst_offenders/2` (lines 254–304) and `worst_offenders/2` (lines 384–435). + +Update `cosine_section_content/2` — remove the call to `cosine_worst_offenders`: + +```elixir +defp cosine_section_content(cat, _detail) do + # ... existing behaviors_table code ... + behaviors_table ++ [""] +end +``` + +Update `section_content/2` — remove the `++ worst_offenders(cat)` at the end (line 381): + +```elixir +defp section_content(cat, _detail) do + # ... existing code without worst_offenders ... + [ + "Codebase averages: #{metric_summary}", + "" + | metrics_table + ] ++ [""] +end +``` + +Add new private functions at the bottom: + +```elixir +defp pr_summary_section(nil), do: [] + +defp pr_summary_section(summary) do + delta_str = + if summary.score_delta >= 0, + do: "+#{summary.score_delta}", + else: "#{summary.score_delta}" + + status_str = "#{summary.files_modified} modified, #{summary.files_added} added" + + [ + "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", + "" + ] +end + +defp delta_section(nil), do: [] + +defp delta_section(delta) do + base_agg = delta.base.aggregate + head_agg = delta.head.aggregate + + metrics = [ + {"Readability", "readability", "mean_flesch_adapted"}, + {"Complexity", "halstead", "mean_difficulty"}, + {"Duplication", "compression", "mean_redundancy"}, + {"Structure", "branching", "mean_branch_count"} + ] + + rows = + Enum.flat_map(metrics, fn {label, group, key} -> + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end) + + if rows == [] do + [] + else + [ + "## Metric Changes", + "", + "| Category | Base | Head | Δ |", + "|----------|------|------|---|" + | rows + ] ++ [""] + end +end + +defp blocks_section([]), do: [] + +defp blocks_section(top_blocks) do + total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + + file_cards = + Enum.flat_map(top_blocks, fn group -> + status_str = if group.status, do: " [#{group.status}]", else: "" + summary_line = "🔍 #{group.path}#{status_str} — #{length(group.blocks)} block(s)" + + block_lines = + Enum.flat_map(group.blocks, fn block -> + end_line = block.end_line || block.start_line + + potential_lines = + Enum.flat_map(block.potentials, fn p -> + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = String.upcase(to_string(p.severity)) + line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" + fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] + [line | fix] + end) + + ["**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens"] ++ + potential_lines ++ [""] + end) + + inner = List.flatten(block_lines) |> Enum.join("\n") + + [ + "
    ", + "#{summary_line}", + "", + inner, + "
    ", + "" + ] + end) + + [ + "## 🔍 Blocks (#{total} flagged across #{length(top_blocks)} files)", + "" + | file_cards + ] +end + +defp severity_icon(:critical), do: "🔴" +defp severity_icon(:high), do: "🟠" +defp severity_icon(:medium), do: "🟡" +``` + +- [ ] **Step 4: Run formatter tests** + +```bash +mix test test/codeqa/health_report/formatter_test.exs --trace +``` + +Expected: all PASS. + +- [ ] **Step 5: Run full suite** + +```bash +mix test +``` + +- [ ] **Step 6: Commit** + +```bash +git add lib/codeqa/health_report/formatter/github.ex test/codeqa/health_report/formatter_test.exs +git commit -m "feat(formatter): add block, delta, PR summary sections; remove worst_offenders (github)" +``` + +--- + +## Task 7: Update `CLI.HealthReport` + +**Files:** +- Modify: `lib/codeqa/cli/health_report.ex` + +- [ ] **Step 1: Update `@command_options` and usage string** + +In `lib/codeqa/cli/health_report.ex`, add to `@command_options`: + +```elixir +@command_options [ + output: :string, + config: :string, + detail: :string, + top: :integer, + format: :string, + ignore_paths: :string, + base_ref: :string, + head_ref: :string +] +``` + +Add to the usage string: + +``` + --base-ref REF Base git ref for PR comparison (enables delta and block scoping) + --head-ref REF Head git ref (default: HEAD) +``` + +- [ ] **Step 2: Update `run/1` to wire dual analysis** + +Replace the `run/1` body (keeping the existing single-pass as the fallback when no `--base-ref`). The full updated `run/1`: + +```elixir +def run(args) do + {opts, [path], _} = Options.parse(args, @command_options, o: :output) + Options.validate_dir!(path) + extra_ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) + + base_ref = opts[:base_ref] + head_ref = opts[:head_ref] || "HEAD" + + files = + CodeQA.Engine.Collector.collect_files(path, extra_ignore_patterns) + + if map_size(files) == 0 do + IO.puts(:stderr, "Warning: no source files found in '#{path}'") + exit({:shutdown, 1}) + end + + IO.puts(:stderr, "Analyzing #{map_size(files)} files for health report...") + + analyze_opts = + Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() + + start_time = System.monotonic_time(:millisecond) + results = CodeQA.Engine.Analyzer.analyze_codebase(files, analyze_opts) + end_time = System.monotonic_time(:millisecond) + + IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") + + nodes_top = opts[:nodes_top] || 3 + results = CodeQA.BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) + + total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() + + results = + Map.put(results, "metadata", %{ + "path" => Path.expand(path), + "timestamp" => DateTime.utc_now() |> DateTime.to_iso8601(), + "total_files" => map_size(files), + "total_bytes" => total_bytes + }) + + {base_results, changed_files} = + if base_ref do + IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...") + base_files = CodeQA.Git.collect_files_at_ref(path, base_ref) + changed = CodeQA.Git.changed_files(path, base_ref, head_ref) + + IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...") + base_res = CodeQA.Engine.Analyzer.analyze_codebase(base_files, analyze_opts) + + {base_res, changed} + else + {nil, []} + end + + detail = parse_detail(opts[:detail]) + format = parse_format(opts[:format]) + top_n = opts[:top] || 5 + + report = + CodeQA.HealthReport.generate(results, + config: opts[:config], + detail: detail, + top: top_n, + base_results: base_results, + changed_files: changed_files + ) + + markdown = CodeQA.HealthReport.to_markdown(report, detail, format) + + case opts[:output] do + nil -> + markdown + + file -> + File.write!(file, markdown) + IO.puts(:stderr, "Health report written to #{file}") + "" + end +end +``` + +- [ ] **Step 3: Run full test suite** + +```bash +mix test +``` + +Expected: all PASS (no tests for git integration at this stage — the git calls require an actual repo with refs, which integration tests would mock or skip). + +- [ ] **Step 4: Commit** + +```bash +git add lib/codeqa/cli/health_report.ex +git commit -m "feat(cli): add --base-ref/--head-ref to health-report for PR delta and block scoping" +``` + +--- + +## Task 8: Delete compare command and related files + +**Files:** +- Delete: `lib/codeqa/cli/compare.ex` +- Delete: `lib/codeqa/comparator.ex` +- Delete: `lib/codeqa/formatter.ex` +- Delete: `lib/codeqa/summarizer.ex` +- Delete: `test/codeqa/cli_compare_test.exs` +- Modify: `lib/codeqa/cli.ex` + +- [ ] **Step 1: Remove compare from the CLI router** + +Read `lib/codeqa/cli.ex` and remove the line that registers `compare` (line 6). It will look like: + +```elixir +"compare" => CodeQA.CLI.Compare, +``` + +Remove that entry entirely. + +- [ ] **Step 2: Delete the four source files** + +```bash +rm lib/codeqa/cli/compare.ex lib/codeqa/comparator.ex lib/codeqa/formatter.ex lib/codeqa/summarizer.ex +``` + +- [ ] **Step 3: Delete compare tests** + +```bash +rm test/codeqa/cli_compare_test.exs +``` + +- [ ] **Step 4: Verify no remaining references** + +```bash +grep -r "CLI\.Compare\|CodeQA\.Comparator\|CodeQA\.Formatter\b\|CodeQA\.Summarizer" lib/ test/ --include="*.ex" --include="*.exs" +``` + +Expected: no output. + +- [ ] **Step 5: Run full test suite** + +```bash +mix test +``` + +Expected: all PASS, no references to deleted modules. + +- [ ] **Step 6: Commit** + +```bash +git add -A +git commit -m "feat(cli): delete compare command — absorbed into health-report" +``` + +--- From 34a118f8860119a7cece02884db5fc375e1d3891 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 12:50:54 +0100 Subject: [PATCH 30/71] feat(block-impact): serialize end_line in node output Add end_line field to serialized node map in BlockImpactAnalyzer.serialize_node/9. This field is required by the upcoming HealthReport.TopBlocks module to display block location ranges in reports. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/block_impact_analyzer.ex | 1 + test/codeqa/block_impact_analyzer_test.exs | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index d743184..4f62549 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -166,6 +166,7 @@ defmodule CodeQA.BlockImpactAnalyzer do %{ "start_line" => node.start_line, + "end_line" => node.end_line, "column_start" => (first_token && first_token.col) || 0, "char_length" => char_length, "type" => Atom.to_string(node.type), diff --git a/test/codeqa/block_impact_analyzer_test.exs b/test/codeqa/block_impact_analyzer_test.exs index 98aaecb..a8341e9 100644 --- a/test/codeqa/block_impact_analyzer_test.exs +++ b/test/codeqa/block_impact_analyzer_test.exs @@ -41,6 +41,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do Enum.each(nodes, fn node -> assert Map.has_key?(node, "start_line") + assert Map.has_key?(node, "end_line") assert Map.has_key?(node, "column_start") assert Map.has_key?(node, "char_length") assert Map.has_key?(node, "type") From 5b46e98e28cac2d2b5c073aad23cf114345b26bb Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 12:52:35 +0100 Subject: [PATCH 31/71] feat(health-report): add Delta module for aggregate metric comparison Implements CodeQA.HealthReport.Delta to compute delta metrics between base and head codebase analysis results. Only numeric metric values are included in the delta; string/label fields are skipped. Delta values are rounded to 4 decimal places. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report/delta.ex | 42 +++++++++++++++++++ test/codeqa/health_report/delta_test.exs | 53 ++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 lib/codeqa/health_report/delta.ex create mode 100644 test/codeqa/health_report/delta_test.exs diff --git a/lib/codeqa/health_report/delta.ex b/lib/codeqa/health_report/delta.ex new file mode 100644 index 0000000..fc014db --- /dev/null +++ b/lib/codeqa/health_report/delta.ex @@ -0,0 +1,42 @@ +defmodule CodeQA.HealthReport.Delta do + @moduledoc "Computes aggregate metric delta between two codebase analysis results." + + @spec compute(map(), map()) :: %{ + base: %{aggregate: map()}, + head: %{aggregate: map()}, + delta: %{aggregate: map()} + } + def compute(base_results, head_results) do + base_agg = get_in(base_results, ["codebase", "aggregate"]) || %{} + head_agg = get_in(head_results, ["codebase", "aggregate"]) || %{} + + %{ + base: %{aggregate: base_agg}, + head: %{aggregate: head_agg}, + delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)} + } + end + + defp compute_aggregate_delta(base_agg, head_agg) do + MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) + |> Enum.reduce(%{}, fn metric_name, acc -> + base_m = Map.get(base_agg, metric_name, %{}) + head_m = Map.get(head_agg, metric_name, %{}) + delta = compute_numeric_delta(base_m, head_m) + if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) + end) + end + + defp compute_numeric_delta(base, head) do + MapSet.new(Map.keys(base) ++ Map.keys(head)) + |> Enum.reduce(%{}, fn key, acc -> + case {Map.get(base, key), Map.get(head, key)} do + {b, h} when is_number(b) and is_number(h) -> + Map.put(acc, key, Float.round(h - b, 4)) + + _ -> + acc + end + end) + end +end diff --git a/test/codeqa/health_report/delta_test.exs b/test/codeqa/health_report/delta_test.exs new file mode 100644 index 0000000..f13a74d --- /dev/null +++ b/test/codeqa/health_report/delta_test.exs @@ -0,0 +1,53 @@ +defmodule CodeQA.HealthReport.DeltaTest do + use ExUnit.Case, async: true + + alias CodeQA.HealthReport.Delta + + defp make_results(aggregate) do + %{"codebase" => %{"aggregate" => aggregate}} + end + + test "returns base, head, and delta aggregates" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) + head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) + + result = Delta.compute(base, head) + + assert result.base.aggregate == %{"entropy" => %{"mean_value" => 5.0}} + assert result.head.aggregate == %{"entropy" => %{"mean_value" => 6.0}} + assert result.delta.aggregate == %{"entropy" => %{"mean_value" => 1.0}} + end + + test "rounds delta to 4 decimal places" do + base = make_results(%{"entropy" => %{"mean_value" => 1.0}}) + head = make_results(%{"entropy" => %{"mean_value" => 4.3333}}) + + result = Delta.compute(base, head) + assert result.delta.aggregate["entropy"]["mean_value"] == 3.3333 + end + + test "handles missing base codebase gracefully" do + base = %{} + head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) + + result = Delta.compute(base, head) + assert result.delta.aggregate == %{} + end + + test "handles missing head codebase gracefully" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) + head = %{} + + result = Delta.compute(base, head) + assert result.delta.aggregate == %{} + end + + test "skips non-numeric metric keys" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0, "label" => "x"}}) + head = make_results(%{"entropy" => %{"mean_value" => 6.0, "label" => "y"}}) + + result = Delta.compute(base, head) + refute Map.has_key?(result.delta.aggregate["entropy"], "label") + assert result.delta.aggregate["entropy"]["mean_value"] == 1.0 + end +end From fa96b53e46d0d99dab39392a5f5b4fc5d5cfe3c6 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 12:54:46 +0100 Subject: [PATCH 32/71] fix(health-report/delta): coerce Float.round operand; document asymmetric key behaviour --- lib/codeqa/health_report/delta.ex | 2 +- test/codeqa/health_report/delta_test.exs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/codeqa/health_report/delta.ex b/lib/codeqa/health_report/delta.ex index fc014db..52b0085 100644 --- a/lib/codeqa/health_report/delta.ex +++ b/lib/codeqa/health_report/delta.ex @@ -32,7 +32,7 @@ defmodule CodeQA.HealthReport.Delta do |> Enum.reduce(%{}, fn key, acc -> case {Map.get(base, key), Map.get(head, key)} do {b, h} when is_number(b) and is_number(h) -> - Map.put(acc, key, Float.round(h - b, 4)) + Map.put(acc, key, Float.round((h - b) * 1.0, 4)) _ -> acc diff --git a/test/codeqa/health_report/delta_test.exs b/test/codeqa/health_report/delta_test.exs index f13a74d..6932e0c 100644 --- a/test/codeqa/health_report/delta_test.exs +++ b/test/codeqa/health_report/delta_test.exs @@ -50,4 +50,12 @@ defmodule CodeQA.HealthReport.DeltaTest do refute Map.has_key?(result.delta.aggregate["entropy"], "label") assert result.delta.aggregate["entropy"]["mean_value"] == 1.0 end + + test "metric key present only in head produces no delta entry" do + base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) + head = make_results(%{"entropy" => %{"mean_value" => 6.0, "new_metric" => 3.0}}) + + result = Delta.compute(base, head) + refute Map.has_key?(result.delta.aggregate["entropy"], "new_metric") + end end From 321143990cbd78fba791dcd8d71b33c2c6ba2f43 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 12:57:18 +0100 Subject: [PATCH 33/71] feat(health-report): add TopBlocks module for severity-classified block assembly Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report/top_blocks.ex | 113 +++++++++++++ test/codeqa/health_report/top_blocks_test.exs | 151 ++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 lib/codeqa/health_report/top_blocks.ex create mode 100644 test/codeqa/health_report/top_blocks_test.exs diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex new file mode 100644 index 0000000..cc38299 --- /dev/null +++ b/lib/codeqa/health_report/top_blocks.ex @@ -0,0 +1,113 @@ +defmodule CodeQA.HealthReport.TopBlocks do + @moduledoc "Assembles the top_blocks report section from analysis node data." + + alias CodeQA.CombinedMetrics.Scorer + + @min_tokens 10 + @severity_critical 0.50 + @severity_high 0.25 + @severity_medium 0.10 + @gap_floor 0.01 + + @spec build(map(), [struct()], map()) :: [map()] + def build(analysis_results, changed_files, codebase_cosine_lookup) do + files = Map.get(analysis_results, "files", %{}) + fix_hints = build_fix_hint_lookup() + + file_entries = + if changed_files == [] do + Enum.map(files, fn {path, data} -> {path, nil, data} end) + else + changed_index = Map.new(changed_files, &{&1.path, &1.status}) + + files + |> Enum.filter(fn {path, _} -> Map.has_key?(changed_index, path) end) + |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end) + end + + file_entries + |> Enum.map(fn {path, status, file_data} -> + blocks = + file_data + |> Map.get("nodes", []) + |> Enum.flat_map(&collect_nodes/1) + |> Enum.filter(&(&1["token_count"] >= @min_tokens)) + |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) + |> Enum.reject(&(&1.potentials == [])) + |> Enum.sort_by(&(-max_delta(&1))) + + %{path: path, status: status, blocks: blocks} + end) + |> Enum.reject(&(&1.blocks == [])) + |> Enum.sort_by(& &1.path) + end + + defp collect_nodes(node) do + children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) + [node | children] + end + + defp enrich_block(node, cosine_lookup, fix_hints) do + potentials = + node + |> Map.get("refactoring_potentials", []) + |> Enum.map(&enrich_potential(&1, cosine_lookup, fix_hints)) + |> Enum.reject(&is_nil/1) + |> Enum.sort_by(& &1.cosine_delta, :desc) + + %{ + start_line: node["start_line"], + end_line: node["end_line"], + type: node["type"], + token_count: node["token_count"], + potentials: potentials + } + end + + defp enrich_potential(p, cosine_lookup, fix_hints) do + category = p["category"] + behavior = p["behavior"] + cosine_delta = p["cosine_delta"] + + codebase_cosine = Map.get(cosine_lookup, {category, behavior}, 0.0) + gap = max(@gap_floor, 1.0 - codebase_cosine) + severity = classify(cosine_delta / gap) + + if severity == :filtered do + nil + else + %{ + category: category, + behavior: behavior, + cosine_delta: cosine_delta, + severity: severity, + fix_hint: Map.get(fix_hints, {category, behavior}) + } + end + end + + defp classify(ratio) when ratio > @severity_critical, do: :critical + defp classify(ratio) when ratio > @severity_high, do: :high + defp classify(ratio) when ratio > @severity_medium, do: :medium + defp classify(_ratio), do: :filtered + + defp max_delta(%{potentials: []}), do: 0.0 + + defp max_delta(%{potentials: potentials}), + do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta + + defp build_fix_hint_lookup do + Scorer.all_yamls() + |> Enum.flat_map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + Enum.flat_map(data, fn {behavior, behavior_data} -> + case get_in(behavior_data, ["_fix_hint"]) do + nil -> [] + hint -> [{{category, behavior}, hint}] + end + end) + end) + |> Map.new() + end +end diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs new file mode 100644 index 0000000..c5f1bdb --- /dev/null +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -0,0 +1,151 @@ +defmodule CodeQA.HealthReport.TopBlocksTest do + use ExUnit.Case, async: true + + alias CodeQA.HealthReport.TopBlocks + alias CodeQA.Git.ChangedFile + + # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60) + defp make_node(cosine_delta, token_count \\ 20) do + %{ + "start_line" => 1, + "end_line" => 10, + "type" => "code", + "token_count" => token_count, + "refactoring_potentials" => [ + %{ + "category" => "function_design", + "behavior" => "cyclomatic_complexity_under_10", + "cosine_delta" => cosine_delta + } + ], + "children" => [] + } + end + + defp make_results(nodes) do + %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}} + end + + defp lookup(cosine \\ 0.0) do + %{{"function_design", "cyclomatic_complexity_under_10"} => cosine} + end + + describe "severity classification" do + test ":critical when severity_ratio > 0.50" do + # gap = max(0.01, 1.0 - 0.0) = 1.0, ratio = 0.60 / 1.0 = 0.60 > 0.50 + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :critical + end + + test ":high when severity_ratio > 0.25 and <= 0.50" do + # ratio = 0.30 / 1.0 = 0.30 + [group] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :high + end + + test ":medium when severity_ratio > 0.10 and <= 0.25" do + # ratio = 0.15 / 1.0 = 0.15 + [group] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup()) + assert hd(hd(group.blocks).potentials).severity == :medium + end + + test "filtered when severity_ratio <= 0.10" do + # ratio = 0.05 / 1.0 = 0.05 — block should not appear + assert TopBlocks.build(make_results([make_node(0.05)]), [], lookup()) == [] + end + + test "gap floor prevents division by zero when codebase_cosine = 1.0" do + # gap = max(0.01, 1.0 - 1.0) = 0.01, ratio = 0.02 / 0.01 = 2.0 → :critical + [group] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0)) + assert hd(hd(group.blocks).potentials).severity == :critical + end + + test "gap handles negative codebase_cosine" do + # codebase_cosine = -0.5, gap = max(0.01, 1.0 - (-0.5)) = 1.5 + # ratio = 0.60 / 1.5 = 0.40 → :high + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5)) + assert hd(hd(group.blocks).potentials).severity == :high + end + + test "unknown behavior defaults codebase_cosine to 0.0" do + lookup_empty = %{} + # gap = 1.0, ratio = 0.60 → :critical + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty) + assert hd(hd(group.blocks).potentials).severity == :critical + end + end + + describe "changed_files filtering" do + test "when changed_files is empty, shows all files" do + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert group.path == "lib/foo.ex" + assert group.status == nil + end + + test "when changed_files given, only shows matching files" do + changed = [%ChangedFile{path: "lib/other.ex", status: "added"}] + assert TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) == [] + end + + test "status comes from ChangedFile struct" do + changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] + [group] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) + assert group.status == "modified" + end + end + + describe "block filtering" do + test "blocks with token_count < 10 are excluded" do + assert TopBlocks.build(make_results([make_node(0.60, 9)]), [], lookup()) == [] + end + + test "blocks are ordered by highest cosine_delta descending" do + node_low = make_node(0.20) + node_high = put_in(make_node(0.60), ["start_line"], 20) + results = %{"files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}}} + + [group] = TopBlocks.build(results, [], lookup()) + deltas = Enum.map(group.blocks, fn b -> hd(b.potentials).cosine_delta end) + assert deltas == Enum.sort(deltas, :desc) + end + + test "children nodes are included" do + parent = %{ + "start_line" => 1, + "end_line" => 20, + "type" => "code", + "token_count" => 5, + "refactoring_potentials" => [], + "children" => [make_node(0.60)] + } + + [group] = TopBlocks.build(make_results([parent]), [], lookup()) + assert length(group.blocks) == 1 + end + end + + describe "fix hints" do + test "includes fix_hint string for known behavior" do + # function_design/cyclomatic_complexity_under_10 has _fix_hint in YAML + [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + potential = hd(hd(group.blocks).potentials) + assert is_binary(potential.fix_hint) + end + + test "fix_hint is nil for unknown behavior" do + node = %{ + "start_line" => 1, + "end_line" => 10, + "type" => "code", + "token_count" => 20, + "refactoring_potentials" => [ + %{"category" => "unknown_cat", "behavior" => "unknown_beh", "cosine_delta" => 0.60} + ], + "children" => [] + } + + [group] = TopBlocks.build(make_results([node]), [], %{}) + assert hd(hd(group.blocks).potentials).fix_hint == nil + end + end +end From 10dba0908731976cc4e0f2c372c22b6269704ea6 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:01:29 +0100 Subject: [PATCH 34/71] fix(top-blocks): guard non-map YAML entries in build_fix_hint_lookup --- lib/codeqa/health_report/top_blocks.ex | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index cc38299..dda0bb8 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -101,11 +101,15 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.flat_map(fn {yaml_path, data} -> category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - Enum.flat_map(data, fn {behavior, behavior_data} -> - case get_in(behavior_data, ["_fix_hint"]) do - nil -> [] - hint -> [{{category, behavior}, hint}] - end + Enum.flat_map(data, fn + {behavior, behavior_data} when is_map(behavior_data) -> + case Map.get(behavior_data, "_fix_hint") do + nil -> [] + hint -> [{{category, behavior}, hint}] + end + + _ -> + [] end) end) |> Map.new() From 7d47f69fbdecaf6be18f0763dd94b233f00df773 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:05:33 +0100 Subject: [PATCH 35/71] feat(health-report): add top_blocks, pr_summary, codebase_delta; drop worst_offenders Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report.ex | 121 ++++++++++++++++++++++++---- test/codeqa/health_report_test.exs | 124 +++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 16 deletions(-) create mode 100644 test/codeqa/health_report_test.exs diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 6b5c4d6..6ba09ac 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -1,14 +1,15 @@ defmodule CodeQA.HealthReport do @moduledoc "Orchestrates health report generation from analysis results." - alias CodeQA.HealthReport.{Config, Grader, Formatter} + alias CodeQA.HealthReport.{Config, Grader, Formatter, Delta, TopBlocks} alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} @spec generate(map(), keyword()) :: map() def generate(analysis_results, opts \\ []) do config_path = Keyword.get(opts, :config) - detail = Keyword.get(opts, :detail, :default) - top_n = Keyword.get(opts, :top, 5) + _detail = Keyword.get(opts, :detail, :default) + base_results = Keyword.get(opts, :base_results) + changed_files = Keyword.get(opts, :changed_files, []) %{ categories: categories, @@ -26,20 +27,12 @@ defmodule CodeQA.HealthReport do categories |> Grader.grade_aggregate(aggregate, grade_scale) |> Enum.zip(categories) - |> Enum.map(fn {graded, cat_def} -> + |> Enum.map(fn {graded, _cat_def} -> summary = build_category_summary(graded) - cat_top = Map.get(cat_def, :top, top_n) - - worst = - case detail do - :summary -> [] - :full -> Grader.worst_offenders(cat_def, files, map_size(files), grade_scale) - _default -> Grader.worst_offenders(cat_def, files, cat_top, grade_scale) - end graded |> Map.put(:type, :threshold) - |> Map.merge(%{summary: summary, worst_offenders: worst}) + |> Map.merge(%{summary: summary, worst_offenders: []}) end) worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) @@ -47,7 +40,6 @@ defmodule CodeQA.HealthReport do cosine_grades = Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) - # TODO(option-c): a unified flat issues list would replace the current per-category worst offenders loop; all category results would be flattened, deduplicated by file+line, and re-ranked by a cross-category severity score before rendering. all_categories = (threshold_grades ++ cosine_grades) |> Enum.map(fn cat -> @@ -58,14 +50,44 @@ defmodule CodeQA.HealthReport do metadata = build_metadata(analysis_results) - top_issues = SampleRunner.diagnose_aggregate(aggregate, top: 10, languages: project_langs) + all_cosines = + SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) + + top_issues = Enum.take(all_cosines, 10) + + codebase_cosine_lookup = + Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) + + top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup) + + {codebase_delta, pr_summary} = + if base_results do + build_delta_and_summary( + base_results, + analysis_results, + overall_score, + overall_grade, + all_categories, + categories, + grade_scale, + impact_map, + combined_top, + changed_files, + top_blocks + ) + else + {nil, nil} + end %{ metadata: metadata, + pr_summary: pr_summary, overall_score: overall_score, overall_grade: overall_grade, + codebase_delta: codebase_delta, categories: all_categories, - top_issues: top_issues + top_issues: top_issues, + top_blocks: top_blocks } end @@ -74,6 +96,73 @@ defmodule CodeQA.HealthReport do Formatter.format_markdown(report, detail, format) end + defp build_delta_and_summary( + base_results, + head_results, + head_score, + head_grade, + _head_categories, + category_defs, + grade_scale, + impact_map, + combined_top, + changed_files, + top_blocks + ) do + delta = Delta.compute(base_results, head_results) + + base_aggregate = get_in(base_results, ["codebase", "aggregate"]) || %{} + base_files = Map.get(base_results, "files", %{}) + base_project_langs = project_languages(base_files) + + base_threshold_grades = + category_defs + |> Grader.grade_aggregate(base_aggregate, grade_scale) + |> Enum.zip(category_defs) + |> Enum.map(fn {graded, _cat_def} -> + graded + |> Map.put(:type, :threshold) + |> Map.merge(%{summary: "", worst_offenders: []}) + end) + + base_worst_files_map = + FileScorer.worst_files_per_behavior(base_files, combined_top: combined_top) + + base_cosine_grades = + Grader.grade_cosine_categories( + base_aggregate, + base_worst_files_map, + grade_scale, + base_project_langs + ) + + base_all_categories = + (base_threshold_grades ++ base_cosine_grades) + |> Enum.map(fn cat -> + Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) + end) + + {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map) + + blocks_flagged = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + files_added = Enum.count(changed_files, &(&1.status == "added")) + files_modified = Enum.count(changed_files, &(&1.status == "modified")) + + summary = %{ + base_score: base_score, + head_score: head_score, + score_delta: head_score - base_score, + base_grade: base_grade, + head_grade: head_grade, + blocks_flagged: blocks_flagged, + files_changed: length(changed_files), + files_added: files_added, + files_modified: files_modified + } + + {delta, summary} + end + defp build_metadata(analysis_results) do meta = Map.get(analysis_results, "metadata", %{}) diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs new file mode 100644 index 0000000..5455a73 --- /dev/null +++ b/test/codeqa/health_report_test.exs @@ -0,0 +1,124 @@ +defmodule CodeQA.HealthReportTest do + use ExUnit.Case, async: true + + describe "generate/2 output keys" do + @tag :slow + test "without base_results: pr_summary and codebase_delta are nil" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + assert report.pr_summary == nil + assert report.codebase_delta == nil + assert is_list(report.top_blocks) + assert Map.has_key?(report, :overall_score) + assert Map.has_key?(report, :overall_grade) + assert Map.has_key?(report, :categories) + assert Map.has_key?(report, :top_issues) + end + + @tag :slow + test "without base_results: top_blocks shows all files with significant blocks" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + # top_blocks is a list of file groups (may be empty if no blocks above threshold) + assert is_list(report.top_blocks) + + Enum.each(report.top_blocks, fn group -> + assert Map.has_key?(group, :path) + assert Map.has_key?(group, :status) + assert Map.has_key?(group, :blocks) + assert group.status == nil + end) + end + + test "worst_offenders is always empty in categories" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + results = CodeQA.Engine.Analyzer.analyze_codebase(files) + results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + + report = CodeQA.HealthReport.generate(results) + + Enum.each(report.categories, fn cat -> + assert Map.get(cat, :worst_offenders, []) == [] + end) + end + end + + describe "generate/2 with base_results" do + @tag :slow + test "pr_summary is populated" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + + report = + CodeQA.HealthReport.generate(head_results, + base_results: base_results, + changed_files: changed + ) + + assert %{ + base_score: base_score, + head_score: head_score, + score_delta: delta, + base_grade: _, + head_grade: _, + blocks_flagged: flagged, + files_changed: 1, + files_added: 0, + files_modified: 1 + } = report.pr_summary + + assert is_integer(base_score) + assert is_integer(head_score) + assert delta == head_score - base_score + assert is_integer(flagged) + end + + @tag :slow + test "codebase_delta is populated" do + files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + report = CodeQA.HealthReport.generate(head_results, base_results: base_results) + + assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} = + report.codebase_delta + end + + @tag :slow + test "top_blocks scoped to changed_files" do + files = %{ + "lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n", + "lib/bar.ex" => "defmodule Bar do\n def baz, do: :ok\nend\n" + } + + head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) + base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + + changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + + report = + CodeQA.HealthReport.generate(head_results, + base_results: base_results, + changed_files: changed + ) + + paths = Enum.map(report.top_blocks, & &1.path) + refute "lib/bar.ex" in paths + end + end +end From d563d6236e65bafb006bf7c2581a56515354bc1b Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:08:11 +0100 Subject: [PATCH 36/71] fix(health-report): remove dead _detail binding; tag slow test --- lib/codeqa/health_report.ex | 1 - test/codeqa/health_report_test.exs | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 6ba09ac..1e8be12 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -7,7 +7,6 @@ defmodule CodeQA.HealthReport do @spec generate(map(), keyword()) :: map() def generate(analysis_results, opts \\ []) do config_path = Keyword.get(opts, :config) - _detail = Keyword.get(opts, :detail, :default) base_results = Keyword.get(opts, :base_results) changed_files = Keyword.get(opts, :changed_files, []) diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs index 5455a73..1eb4c64 100644 --- a/test/codeqa/health_report_test.exs +++ b/test/codeqa/health_report_test.exs @@ -38,6 +38,7 @@ defmodule CodeQA.HealthReportTest do end) end + @tag :slow test "worst_offenders is always empty in categories" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} results = CodeQA.Engine.Analyzer.analyze_codebase(files) From b9f1bd7d98e149e561642f2b86150b744e53a685 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:12:47 +0100 Subject: [PATCH 37/71] feat(formatter): add block, delta, PR summary sections; remove worst_offenders (plain) Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report/formatter/plain.ex | 188 ++++++++++++---- test/codeqa/health_report/formatter_test.exs | 216 ++++++++++++++++--- 2 files changed, 332 insertions(+), 72 deletions(-) diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index f0e0a8a..f33a139 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -4,10 +4,13 @@ defmodule CodeQA.HealthReport.Formatter.Plain do @spec render(map(), atom()) :: String.t() def render(report, detail) do [ + pr_summary_section(Map.get(report, :pr_summary)), header(report), cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), overall_table(report), top_issues_section(Map.get(report, :top_issues, []), detail), + blocks_section(Map.get(report, :top_blocks, [])), category_sections(report.categories, detail) ] |> List.flatten() @@ -116,6 +119,49 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end) end + defp worst_offenders_section(_cat, :summary), do: [] + + defp worst_offenders_section(cat, _detail) do + offenders = Map.get(cat, :worst_offenders, []) + + if offenders == [] do + [] + else + averages = Map.new(cat.metric_scores, &{&1.name, &1.value}) + + rows = + Enum.map(offenders, fn f -> + metric_issues = + Enum.map_join(f.metric_scores, "
    ", fn m -> + avg = Map.get(averages, m.name) + avg_str = if avg, do: " (avg: #{format_num(avg)})", else: "" + "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}" + end) + + where_part = format_where_part(Map.get(f, :top_nodes, [])) + fix_hint = threshold_fix_hint(f.metric_scores) + fix_part = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil + + extra = + [where_part, fix_part] + |> Enum.reject(&is_nil/1) + |> Enum.map_join("", &"
    #{&1}") + + issues = metric_issues <> extra + + "| #{format_path(f.path)}
    #{format_lines(f[:lines])} lines · #{format_size(f[:bytes])} | #{f.grade} | #{issues} |" + end) + + [ + "### Worst Offenders", + "", + "| File | Grade | Issues |", + "|------|-------|--------|" + | rows + ] ++ [""] + end + end + defp format_cosine_details(f, fix_hint) do why_part = format_why_part(Map.get(f, :top_metrics, [])) where_part = format_where_part(Map.get(f, :top_nodes, [])) @@ -201,49 +247,6 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end end - defp worst_offenders_section(_cat, :summary), do: [] - - defp worst_offenders_section(cat, _detail) do - offenders = Map.get(cat, :worst_offenders, []) - - if offenders == [] do - [] - else - averages = Map.new(cat.metric_scores, &{&1.name, &1.value}) - - rows = - Enum.map(offenders, fn f -> - metric_issues = - Enum.map_join(f.metric_scores, "
    ", fn m -> - avg = Map.get(averages, m.name) - avg_str = if avg, do: " (avg: #{format_num(avg)})", else: "" - "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}" - end) - - where_part = format_where_part(Map.get(f, :top_nodes, [])) - fix_hint = threshold_fix_hint(f.metric_scores) - fix_part = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil - - extra = - [where_part, fix_part] - |> Enum.reject(&is_nil/1) - |> Enum.map_join("", &"
    #{&1}") - - issues = metric_issues <> extra - - "| #{format_path(f.path)}
    #{format_lines(f[:lines])} lines · #{format_size(f[:bytes])} | #{f.grade} | #{issues} |" - end) - - [ - "### Worst Offenders", - "", - "| File | Grade | Issues |", - "|------|-------|--------|" - | rows - ] ++ [""] - end - end - defp threshold_fix_hint(metric_scores) do worst = Enum.min_by(metric_scores, & &1.score, fn -> nil end) @@ -308,4 +311,103 @@ defmodule CodeQA.HealthReport.Formatter.Plain do | rows ] ++ [""] end + + defp pr_summary_section(nil), do: [] + + defp pr_summary_section(summary) do + delta_str = + if summary.score_delta >= 0, + do: "+#{summary.score_delta}", + else: "#{summary.score_delta}" + + status_str = "#{summary.files_modified} modified, #{summary.files_added} added" + + [ + "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", + "" + ] + end + + defp delta_section(nil), do: [] + + defp delta_section(delta) do + base_agg = delta.base.aggregate + head_agg = delta.head.aggregate + + metrics = [ + {"Readability", "readability", "mean_flesch_adapted"}, + {"Complexity", "halstead", "mean_difficulty"}, + {"Duplication", "compression", "mean_redundancy"}, + {"Structure", "branching", "mean_branch_count"} + ] + + rows = + Enum.flat_map(metrics, fn {label, group, key} -> + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end) + + if rows == [] do + [] + else + [ + "## Metric Changes", + "", + "| Category | Base | Head | Δ |", + "|----------|------|------|---|" + | rows + ] ++ [""] + end + end + + defp blocks_section([]), do: [] + + defp blocks_section(top_blocks) do + total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + + file_parts = + Enum.flat_map(top_blocks, fn group -> + status_str = if group.status, do: " [#{group.status}]", else: "" + + block_lines = + Enum.flat_map(group.blocks, fn block -> + end_line = block.end_line || block.start_line + + header = + "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + + potential_lines = + Enum.flat_map(block.potentials, fn p -> + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = "#{String.upcase(to_string(p.severity))}" + line = " #{icon} #{label} #{p.category} / #{p.behavior} (Δ #{delta_str})" + fix = if p.fix_hint, do: [" → #{p.fix_hint}"], else: [] + [line | fix] + end) + + [header | potential_lines] ++ [""] + end) + + ["### #{group.path}#{status_str}", "" | block_lines] + end) + + [ + "## Blocks (#{total} flagged across #{length(top_blocks)} files)", + "" + | file_parts + ] + end + + defp severity_icon(:critical), do: "🔴" + defp severity_icon(:high), do: "🟠" + defp severity_icon(:medium), do: "🟡" end diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index 52c6e30..d7eaf95 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -183,19 +183,8 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "| Complexity | D | 35 | 5 |" end - test "includes worst offenders section" do - result = Formatter.format_markdown(@sample_report, :default, :plain) - assert result =~ "### Worst Offenders" - refute result =~ "lib/
    `foo.ex`" - assert result =~ "`lib/foo.ex`" - assert result =~ "120 lines · 3.8 KB" - assert result =~ "↑ flesch_adapted=65.00 (avg: 102.50)" - refute result =~ "↑ flesch_adapted=65.00, " - end - test "summary detail omits category sections" do result = Formatter.format_markdown(@sample_report, :summary, :plain) - refute result =~ "### Worst Offenders" refute result =~ "Codebase averages" end end @@ -213,13 +202,6 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "| single_responsibility | 0.45 | 78 | B+ |" end - test "renders cosine worst offenders per behavior" do - result = Formatter.format_markdown(@report_with_cosine, :default, :plain) - assert result =~ "### Worst Offenders: no_boolean_parameter" - assert result =~ "| File | Cosine | Details |" - assert result =~ "| `lib/foo/bar.ex` | -0.71 |" - end - test "omits behaviors with no worst offenders" do result = Formatter.format_markdown(@report_with_cosine, :default, :plain) refute result =~ "### Worst Offenders: single_responsibility" @@ -236,6 +218,138 @@ defmodule CodeQA.HealthReport.FormatterTest do end end + describe "plain formatter: PR summary section" do + @sample_report_with_pr Map.put(@sample_report, :pr_summary, %{ + base_score: 85, + head_score: 77, + score_delta: -8, + base_grade: "B+", + head_grade: "C+", + blocks_flagged: 6, + files_changed: 3, + files_added: 1, + files_modified: 2 + }) + + test "renders PR summary line when pr_summary present" do + result = Formatter.format_markdown(@sample_report_with_pr, :default, :plain) + assert result =~ "B+" + assert result =~ "C+" + assert result =~ "-8" + assert result =~ "6" + assert result =~ "1 added" + assert result =~ "2 modified" + end + + test "omits PR summary when pr_summary is nil" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "Score:" + end + end + + describe "plain formatter: delta section" do + @delta %{ + base: %{ + aggregate: %{ + "readability" => %{"mean_flesch_adapted" => 65.0}, + "halstead" => %{"mean_difficulty" => 12.0} + } + }, + head: %{ + aggregate: %{ + "readability" => %{"mean_flesch_adapted" => 61.0}, + "halstead" => %{"mean_difficulty" => 15.0} + } + } + } + + @sample_report_with_delta Map.put(@sample_report, :codebase_delta, @delta) + + test "renders metric changes table when codebase_delta present" do + result = Formatter.format_markdown(@sample_report_with_delta, :default, :plain) + assert result =~ "Metric Changes" + assert result =~ "Readability" + assert result =~ "65.00" + assert result =~ "61.00" + end + + test "omits delta section when codebase_delta is nil" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "Metric Changes" + end + end + + describe "plain formatter: block section" do + @block_potential %{ + category: "function_design", + behavior: "cyclomatic_complexity_under_10", + cosine_delta: 0.41, + severity: :critical, + fix_hint: "Reduce branching" + } + + @top_blocks [ + %{ + path: "lib/foo.ex", + status: "modified", + blocks: [ + %{ + start_line: 42, + end_line: 67, + type: "code", + token_count: 84, + potentials: [@block_potential] + } + ] + } + ] + + @sample_report_with_blocks Map.put(@sample_report, :top_blocks, @top_blocks) + + test "renders block section header" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "Blocks" + assert result =~ "1 flagged" + end + + test "renders file group with status" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "lib/foo.ex" + assert result =~ "modified" + end + + test "renders block location and type" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "lines 42" + assert result =~ "67" + assert result =~ "84 tokens" + end + + test "renders severity icon and behavior" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "🔴" + assert result =~ "CRITICAL" + assert result =~ "cyclomatic_complexity_under_10" + assert result =~ "0.41" + end + + test "renders fix hint" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "Reduce branching" + end + + test "omits block section when top_blocks is empty" do + report = Map.put(@sample_report, :top_blocks, []) + result = Formatter.format_markdown(report, :default, :plain) + refute result =~ "## Blocks" + end + + test "omits block section when top_blocks key absent" do + result = Formatter.format_markdown(@sample_report, :default, :plain) + refute result =~ "## Blocks" + end + end + describe "plain formatter: cosine offenders with enriched data" do defp report_with_enriched_cosine_plain do %{ @@ -283,7 +397,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -321,7 +439,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -347,7 +469,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -384,7 +510,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -421,7 +551,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -472,7 +606,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 32, overall_grade: "F", categories: [category] @@ -500,7 +638,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 32, overall_grade: "F", categories: [category] @@ -542,7 +684,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 10, overall_grade: "F", categories: [category] @@ -861,7 +1007,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -898,7 +1048,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 42, overall_grade: "D+", categories: [category] @@ -1025,7 +1179,11 @@ defmodule CodeQA.HealthReport.FormatterTest do } report = %{ - metadata: %{path: "/home/user/project", timestamp: "2026-03-11T00:00:00Z", total_files: 10}, + metadata: %{ + path: "/home/user/project", + timestamp: "2026-03-11T00:00:00Z", + total_files: 10 + }, overall_score: 10, overall_grade: "F", categories: [category] From 248bd300692e7c263b95979b7abf113e31668efb Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:16:38 +0100 Subject: [PATCH 38/71] feat(formatter): remove worst_offenders rendering and tests from plain formatter Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report/formatter/plain.ex | 171 +-------- test/codeqa/health_report/formatter_test.exs | 359 ------------------- 2 files changed, 4 insertions(+), 526 deletions(-) diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index f33a139..37c7dfb 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -58,13 +58,12 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end) end - defp render_category(%{type: :cosine} = cat, detail) do - cosine_section_header(cat) ++ - cosine_behaviors_table(cat) ++ cosine_worst_offenders(cat, detail) + defp render_category(%{type: :cosine} = cat, _detail) do + cosine_section_header(cat) ++ cosine_behaviors_table(cat) end - defp render_category(cat, detail) do - section_header(cat) ++ metric_detail(cat) ++ worst_offenders_section(cat, detail) + defp render_category(cat, _detail) do + section_header(cat) ++ metric_detail(cat) end defp cosine_section_header(cat) do @@ -91,133 +90,6 @@ defmodule CodeQA.HealthReport.Formatter.Plain do ] ++ [""] end - defp cosine_worst_offenders(_cat, :summary), do: [] - - defp cosine_worst_offenders(cat, _detail) do - Enum.flat_map(cat.behaviors, fn b -> - offenders = Map.get(b, :worst_offenders, []) - - if offenders == [] do - [] - else - fix_hint = cosine_fix_hint(cat.key, b.behavior) - - rows = - Enum.map(offenders, fn f -> - details = format_cosine_details(f, fix_hint) - "| #{format_path(f.file)} | #{format_num(f.cosine)} | #{details} |" - end) - - [ - "### Worst Offenders: #{b.behavior}", - "", - "| File | Cosine | Details |", - "|------|--------|---------|" - | rows - ] ++ [""] - end - end) - end - - defp worst_offenders_section(_cat, :summary), do: [] - - defp worst_offenders_section(cat, _detail) do - offenders = Map.get(cat, :worst_offenders, []) - - if offenders == [] do - [] - else - averages = Map.new(cat.metric_scores, &{&1.name, &1.value}) - - rows = - Enum.map(offenders, fn f -> - metric_issues = - Enum.map_join(f.metric_scores, "
    ", fn m -> - avg = Map.get(averages, m.name) - avg_str = if avg, do: " (avg: #{format_num(avg)})", else: "" - "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}" - end) - - where_part = format_where_part(Map.get(f, :top_nodes, [])) - fix_hint = threshold_fix_hint(f.metric_scores) - fix_part = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil - - extra = - [where_part, fix_part] - |> Enum.reject(&is_nil/1) - |> Enum.map_join("", &"
    #{&1}") - - issues = metric_issues <> extra - - "| #{format_path(f.path)}
    #{format_lines(f[:lines])} lines · #{format_size(f[:bytes])} | #{f.grade} | #{issues} |" - end) - - [ - "### Worst Offenders", - "", - "| File | Grade | Issues |", - "|------|-------|--------|" - | rows - ] ++ [""] - end - end - - defp format_cosine_details(f, fix_hint) do - why_part = format_why_part(Map.get(f, :top_metrics, [])) - where_part = format_where_part(Map.get(f, :top_nodes, [])) - fix_part = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil - - [why_part, where_part, fix_part] - |> Enum.reject(&is_nil/1) - |> Enum.join("
    ") - end - - defp format_why_part([]), do: nil - - defp format_why_part(top_metrics) do - parts = - Enum.map(top_metrics, fn %{metric: metric, contribution: contribution} -> - sign = if contribution < 0, do: "↓", else: "↑" - abs_val = abs(contribution) - - value_str = - if contribution < 0, - do: "−#{format_num(abs_val)}", - else: "+#{format_num(abs_val)}" - - "#{sign} #{metric} (#{value_str})" - end) - - "**Why:** #{Enum.join(parts, ", ")}" - end - - defp format_where_part([]), do: nil - - defp format_where_part(top_nodes) do - parts = - Enum.map(top_nodes, fn node -> - line = node["start_line"] || "?" - type = node["type"] || "unknown" - "line #{line} (#{type})" - end) - - "**Where:** #{Enum.join(parts, ", ")}" - end - - defp cosine_fix_hint(category_key, behavior) do - yaml_path = "priv/combined_metrics/#{category_key}.yml" - - case YamlElixir.read_from_file(yaml_path) do - {:ok, data} -> - get_in(data, [behavior, "_fix_hint"]) - - {:error, reason} -> - require Logger - Logger.debug("cosine_fix_hint: could not read #{yaml_path}: #{inspect(reason)}") - nil - end - end - defp section_header(cat) do metric_summary = Enum.map_join(cat.metric_scores, ", ", fn m -> "#{m.name}=#{format_num(m.value)}" end) @@ -247,41 +119,6 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end end - defp threshold_fix_hint(metric_scores) do - worst = Enum.min_by(metric_scores, & &1.score, fn -> nil end) - - if worst do - categories = CodeQA.HealthReport.Categories.defaults() - all_metrics = Enum.flat_map(categories, & &1.metrics) - - found = - Enum.find(all_metrics, fn m -> - m.name == worst.name and m.source == worst.source - end) - - if found, do: Map.get(found, :fix_hint), else: nil - end - end - - defp format_path(path) when byte_size(path) < 80, do: "`#{path}`" - - defp format_path(path) do - case String.split(path, "/") do - [file] -> "`#{file}`" - parts -> Enum.join(Enum.drop(parts, -1), "/") <> "/
    `#{List.last(parts)}`" - end - end - - defp direction(:high), do: "↑ " - defp direction(_), do: "↓ " - - defp format_lines(nil), do: "—" - defp format_lines(n), do: to_string(n) - - defp format_size(nil), do: "—" - defp format_size(bytes) when bytes < 1024, do: "#{bytes} B" - defp format_size(bytes), do: "#{Float.round(bytes / 1024, 1)} KB" - defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2) defp format_num(value) when is_integer(value), do: to_string(value) defp format_num(value), do: to_string(value) diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index d7eaf95..c3166b9 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -202,20 +202,10 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "| single_responsibility | 0.45 | 78 | B+ |" end - test "omits behaviors with no worst offenders" do - result = Formatter.format_markdown(@report_with_cosine, :default, :plain) - refute result =~ "### Worst Offenders: single_responsibility" - end - test "cosine category impact shown in overall table" do result = Formatter.format_markdown(@report_with_cosine, :default, :plain) assert result =~ "| Function Design | C | 64 | 1 |" end - - test "summary detail omits cosine worst offenders" do - result = Formatter.format_markdown(@report_with_cosine, :summary, :plain) - refute result =~ "### Worst Offenders: no_boolean_parameter" - end end describe "plain formatter: PR summary section" do @@ -350,355 +340,6 @@ defmodule CodeQA.HealthReport.FormatterTest do end end - describe "plain formatter: cosine offenders with enriched data" do - defp report_with_enriched_cosine_plain do - %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 64, - overall_grade: "C", - categories: [@enriched_cosine_category] - } - end - - test "cosine offenders table has 3 columns (File | Cosine | Details)" do - result = Formatter.format_markdown(report_with_enriched_cosine_plain(), :default, :plain) - assert result =~ "| File | Cosine | Details |" - assert result =~ "|------|--------|---------|" - end - - test "Details cell contains Why with direction indicators" do - result = Formatter.format_markdown(report_with_enriched_cosine_plain(), :default, :plain) - assert result =~ "**Why:** ↓ branching.mean_depth (−4.10), ↓ halstead.effort (−3.22)" - end - - test "Details cell omits Why when top_metrics empty" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [], - top_nodes: [%{"start_line" => 10, "type" => "block"}] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Why:**" - assert result =~ "**Where:**" - end - - test "Details cell contains Where" do - result = Formatter.format_markdown(report_with_enriched_cosine_plain(), :default, :plain) - assert result =~ "**Where:** line 89 (block), line 134 (block)" - end - - test "Details cell omits Where when top_nodes empty" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "branching.mean_depth", contribution: -4.10}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Where:**" - assert result =~ "**Why:**" - end - - test "Details cell omits Where when top_nodes key absent" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [%{file: "lib/foo.ex", cosine: -0.5}] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Where:**" - end - - test "Fix in Details when hint present" do - category = %{ - type: :cosine, - key: "variable_naming", - name: "Variable Naming", - score: 50, - grade: "C", - impact: 1, - behaviors: [ - %{ - behavior: "name_is_generic", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "some.metric", contribution: -1.0}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - assert result =~ "**Fix:**" - end - - test "Fix omitted when hint absent" do - category = %{ - type: :cosine, - key: "nonexistent", - name: "Nonexistent", - score: 50, - grade: "C", - impact: 1, - behaviors: [ - %{ - behavior: "nonexistent_behavior", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "some.metric", contribution: -1.0}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Fix:**" - end - end - - describe "plain formatter: threshold offenders with enriched data" do - defp report_with_enriched_threshold_plain do - %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 32, - overall_grade: "F", - categories: [@enriched_threshold_category] - } - end - - test "threshold Issues cell contains Where when top_nodes present" do - result = - Formatter.format_markdown(report_with_enriched_threshold_plain(), :default, :plain) - - assert result =~ "**Where:** line 201 (block), line 312 (block)" - end - - test "threshold Issues cell omits Where when top_nodes empty" do - category = %{ - @enriched_threshold_category - | worst_offenders: [ - %{ - path: "lib/bar.ex", - score: 32, - grade: "F", - lines: 100, - bytes: 3000, - metric_scores: [ - %{name: "difficulty", source: "halstead", good: :low, value: 99.0, score: 0} - ], - top_nodes: [] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 32, - overall_grade: "F", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Where:**" - end - - test "threshold Issues cell omits Where when top_nodes key absent" do - category = %{ - @enriched_threshold_category - | worst_offenders: [ - %{ - path: "lib/bar.ex", - score: 32, - grade: "F", - lines: 100, - bytes: 3000, - metric_scores: [ - %{name: "difficulty", source: "halstead", good: :low, value: 99.0, score: 0} - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 32, - overall_grade: "F", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Where:**" - end - - test "Fix in threshold Issues when hint present" do - result = - Formatter.format_markdown(report_with_enriched_threshold_plain(), :default, :plain) - - assert result =~ "**Fix:** High operator/operand ratio" - end - - test "Fix omitted from threshold Issues when hint absent" do - category = %{ - @enriched_threshold_category - | worst_offenders: [ - %{ - path: "lib/bar.ex", - score: 10, - grade: "F", - lines: 200, - bytes: 6000, - metric_scores: [ - %{ - name: "nonexistent_metric", - source: "nonexistent_source", - good: :low, - value: 99.0, - score: 10 - } - ], - top_nodes: [] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 10, - overall_grade: "F", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "**Fix:**" - end - end - describe "format_markdown/3 defaults to :plain" do test "two-arity call matches plain output" do plain = Formatter.format_markdown(@sample_report, :default, :plain) From 86a901426d9cd41a33f38f8baba59dfe0b263233 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:24:25 +0100 Subject: [PATCH 39/71] feat(formatter): add block, delta, PR summary sections; remove worst_offenders (github) Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report/formatter/github.ex | 310 +++++-------- test/codeqa/health_report/formatter_test.exs | 461 +++---------------- 2 files changed, 173 insertions(+), 598 deletions(-) diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index 90c7c18..48a6071 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -11,11 +11,14 @@ defmodule CodeQA.HealthReport.Formatter.Github do display_categories = merge_cosine_categories(report.categories) [ + pr_summary_section(Map.get(report, :pr_summary)), header(report), cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), if(chart?, do: mermaid_chart(display_categories), else: []), progress_bars(display_categories), top_issues_section(Map.get(report, :top_issues, []), detail), + blocks_section(Map.get(report, :top_blocks, [])), category_sections(display_categories, detail), footer() ] @@ -230,7 +233,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do summary_table ++ [""] ++ sub_sections end - defp cosine_section_content(cat, detail) do + defp cosine_section_content(cat, _detail) do n = length(cat.behaviors) behaviors_rows = @@ -246,112 +249,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do | behaviors_rows ] - offenders_sections = cosine_worst_offenders(cat, detail) - - behaviors_table ++ [""] ++ offenders_sections - end - - defp cosine_worst_offenders(_cat, :summary), do: [] - - defp cosine_worst_offenders(cat, _detail) do - Enum.flat_map(cat.behaviors, fn b -> - offenders = Map.get(b, :worst_offenders, []) - - if offenders == [] do - [] - else - fix_hint = cosine_fix_hint(cat.key, b.behavior) - - cards = - Enum.flat_map(offenders, fn f -> - score_str = format_cosine_score(f.cosine) - summary = "#{f.file} — #{score_str}" - - why_line = format_why_line(Map.get(f, :top_metrics, [])) - where_line = format_where_line(Map.get(f, :top_nodes, [])) - fix_line = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil - - body_lines = - [why_line, where_line, fix_line] - |> Enum.reject(&is_nil/1) - - if body_lines == [] do - [ - "
    ", - "#{summary}", - "", - "
    ", - "" - ] - else - [ - "
    ", - "#{summary}", - "" - ] ++ - body_lines ++ - [ - "", - "
    ", - "" - ] - end - end) - - ["**Worst Offenders: #{b.behavior}**", "" | cards] - end - end) - end - - defp cosine_fix_hint(category_key, behavior) do - yaml_path = "priv/combined_metrics/#{category_key}.yml" - - case YamlElixir.read_from_file(yaml_path) do - {:ok, data} -> - get_in(data, [behavior, "_fix_hint"]) - - {:error, reason} -> - require Logger - Logger.debug("cosine_fix_hint: could not read #{yaml_path}: #{inspect(reason)}") - nil - end - end - - defp format_cosine_score(cosine) when cosine < 0, - do: "−#{format_num(abs(cosine))}" - - defp format_cosine_score(cosine), do: format_num(cosine) - - defp format_why_line([]), do: nil - - defp format_why_line(top_metrics) do - parts = - Enum.map(top_metrics, fn %{metric: metric, contribution: contribution} -> - sign = if contribution < 0, do: "↓", else: "↑" - abs_val = abs(contribution) - - value_str = - if contribution < 0, - do: "−#{format_num(abs_val)}", - else: "+#{format_num(abs_val)}" - - "#{sign} #{metric} (#{value_str})" - end) - - "**Why:** #{Enum.join(parts, ", ")}" - end - - defp format_where_line([]), do: nil - - defp format_where_line(top_nodes) do - parts = - Enum.map(top_nodes, fn node -> - line = node["start_line"] || "?" - type = node["type"] || "unknown" - "line #{line} (#{type})" - end) - - "**Where:** #{Enum.join(parts, ", ")}" + behaviors_table ++ [""] end defp section_content(cat, _detail) do @@ -378,84 +276,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do "Codebase averages: #{metric_summary}", "" | metrics_table - ] ++ [""] ++ worst_offenders(cat) - end - - defp worst_offenders(cat) do - # TODO(option-c): the flat issues list alternative would replace the per-category worst offenders loop with a single table sorted by severity: each row is one specific issue at one location (file:line — behavior — fix), not one file with multiple behaviors collapsed into it. - offenders = Map.get(cat, :worst_offenders, []) - - if offenders == [] do - [] - else - averages = Map.new(cat.metric_scores, &{&1.name, &1.value}) - - cards = - Enum.flat_map(offenders, fn f -> - size_str = "#{format_lines(f[:lines])} lines · #{format_size(f[:bytes])}" - summary = "#{f.path} — #{size_str} — #{f.grade} (#{f.score})" - - why_str = format_threshold_why(f.metric_scores, averages) - why_line = if why_str != "", do: "**Why:** #{why_str}", else: nil - - where_line = format_where_line(Map.get(f, :top_nodes, [])) - - fix_hint = threshold_fix_hint(f.metric_scores) - fix_line = if fix_hint, do: "**Fix:** #{fix_hint}", else: nil - - body_lines = - [why_line, where_line, fix_line] - |> Enum.reject(&is_nil/1) - - if body_lines == [] do - [ - "
    ", - "#{summary}", - "", - "
    ", - "" - ] - else - [ - "
    ", - "#{summary}", - "" - ] ++ - body_lines ++ - [ - "", - "
    ", - "" - ] - end - end) - - ["**Worst Offenders**", "" | cards] - end - end - - defp format_threshold_why(metric_scores, averages) do - Enum.map_join(metric_scores, " · ", fn m -> - avg = Map.get(averages, m.name) - avg_str = if avg, do: " (avg: #{format_num(avg)})", else: "" - "#{direction(m.good)}#{m.name}=#{format_num(m.value)}#{avg_str}" - end) - end - - defp threshold_fix_hint(metric_scores) do - worst = Enum.min_by(metric_scores, & &1.score, fn -> nil end) - - if worst do - categories = CodeQA.HealthReport.Categories.defaults() - all_metrics = Enum.flat_map(categories, & &1.metrics) - - found = - Enum.find(all_metrics, fn m -> - m.name == worst.name and m.source == worst.source - end) - - if found, do: Map.get(found, :fix_hint), else: nil - end + ] ++ [""] end defp top_issues_section([], _detail), do: [] @@ -504,20 +325,119 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp extract_project_name(_), do: "unknown" - defp direction(:high), do: "↑ " - defp direction(_), do: "↓ " - - defp format_lines(nil), do: "—" - defp format_lines(n), do: to_string(n) - - defp format_size(nil), do: "—" - defp format_size(bytes) when bytes < 1024, do: "#{bytes} B" - defp format_size(bytes), do: "#{Float.round(bytes / 1024, 1)} KB" - defp format_num(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2) defp format_num(value) when is_integer(value), do: to_string(value) defp format_num(value), do: to_string(value) defp format_date(timestamp) when is_binary(timestamp), do: String.slice(timestamp, 0, 10) defp format_date(_), do: "unknown" + + defp pr_summary_section(nil), do: [] + + defp pr_summary_section(summary) do + delta_str = + if summary.score_delta >= 0, + do: "+#{summary.score_delta}", + else: "#{summary.score_delta}" + + status_str = "#{summary.files_modified} modified, #{summary.files_added} added" + + [ + "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", + "" + ] + end + + defp delta_section(nil), do: [] + + defp delta_section(delta) do + base_agg = delta.base.aggregate + head_agg = delta.head.aggregate + + metrics = [ + {"Readability", "readability", "mean_flesch_adapted"}, + {"Complexity", "halstead", "mean_difficulty"}, + {"Duplication", "compression", "mean_redundancy"}, + {"Structure", "branching", "mean_branch_count"} + ] + + rows = + Enum.flat_map(metrics, fn {label, group, key} -> + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end) + + if rows == [] do + [] + else + [ + "## Metric Changes", + "", + "| Category | Base | Head | Δ |", + "|----------|------|------|---|" + | rows + ] ++ [""] + end + end + + defp blocks_section([]), do: [] + + defp blocks_section(top_blocks) do + total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + + file_cards = + Enum.flat_map(top_blocks, fn group -> + status_str = if group.status, do: " [#{group.status}]", else: "" + summary_line = "🔍 #{group.path}#{status_str} — #{length(group.blocks)} block(s)" + + block_lines = + Enum.flat_map(group.blocks, fn block -> + end_line = block.end_line || block.start_line + + potential_lines = + Enum.flat_map(block.potentials, fn p -> + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = String.upcase(to_string(p.severity)) + line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" + fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] + [line | fix] + end) + + [ + "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + ] ++ + potential_lines ++ [""] + end) + + inner = List.flatten(block_lines) |> Enum.join("\n") + + [ + "
    ", + "#{summary_line}", + "", + inner, + "
    ", + "" + ] + end) + + [ + "## 🔍 Blocks (#{total} flagged across #{length(top_blocks)} files)", + "" + | file_cards + ] + end + + defp severity_icon(:critical), do: "🔴" + defp severity_icon(:high), do: "🟠" + defp severity_icon(:medium), do: "🟡" end diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index c3166b9..39ce63a 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -411,25 +411,6 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "| Behavior | Cosine | Score | Grade |" assert result =~ "| no_boolean_parameter | 0.12 | 56 | C |" end - - test "renders cosine worst offenders per behavior as details cards" do - result = Formatter.format_markdown(@report_with_cosine, :default, :github) - assert result =~ "**Worst Offenders: no_boolean_parameter**" - assert result =~ "lib/foo/bar.ex" - assert result =~ "−0.71" - refute result =~ "| File | Cosine |" - refute result =~ "| `lib/foo/bar.ex` |" - end - - test "omits behaviors with no worst offenders" do - result = Formatter.format_markdown(@report_with_cosine, :default, :github) - refute result =~ "**Worst Offenders: single_responsibility**" - end - - test "summary detail omits cosine worst offenders" do - result = Formatter.format_markdown(@report_with_cosine, :summary, :github) - refute result =~ "**Worst Offenders: no_boolean_parameter**" - end end describe "format_markdown/4 with :github format and chart: false" do @@ -440,407 +421,81 @@ defmodule CodeQA.HealthReport.FormatterTest do end end - describe "github cosine worst offender
    cards" do - defp report_with_enriched_cosine do - %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 64, - overall_grade: "C", - categories: [@enriched_cosine_category] - } - end - - test "renders
    card for each worst offender" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - assert result =~ "
    " - assert result =~ "lib/codeqa/formatter.ex" - end - - test "renders score with Unicode minus for negative cosine in card summary" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - assert result =~ "lib/codeqa/formatter.ex — −0.65" - end - - test "renders Why row with ↓ for negative contributions" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - assert result =~ "**Why:** ↓ branching.mean_depth (−4.10), ↓ halstead.effort (−3.22)" - end - - test "renders Why row with ↑ for positive contributions" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: 0.5, - score: 90, - grade: "A", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: 0.5, - top_metrics: [%{metric: "halstead.effort", contribution: 2.5}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 90, - overall_grade: "A", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - assert result =~ "↑ halstead.effort (+2.50)" - end - - test "omits Why row when top_metrics is empty" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [], - top_nodes: [%{"start_line" => 10, "type" => "block"}] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Why:**" - assert result =~ "**Where:**" - end - - test "renders Where row as 'line N (type)'" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - assert result =~ "**Where:** line 89 (block), line 134 (block)" - end - - test "omits Where row when top_nodes is empty" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "branching.mean_depth", contribution: -4.10}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Where:**" - assert result =~ "**Why:**" - end - - test "omits Where row when top_nodes key is absent" do - category = %{ - @enriched_cosine_category - | behaviors: [ - %{ - behavior: "no_boolean_parameter", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{file: "lib/foo.ex", cosine: -0.5} - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Where:**" - end - - test "does not render old table format" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - refute result =~ "| File | Cosine |" - end - - test "omits Fix row when cosine fix_hint is nil" do - category = %{ - type: :cosine, - key: "nonexistent", - name: "Nonexistent", - score: 50, - grade: "C", - impact: 1, - behaviors: [ - %{ - behavior: "nonexistent_behavior", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "some.metric", contribution: -1.0}], - top_nodes: [] - } - ] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Fix:**" - end + describe "github formatter: block section" do + @block_potential %{ + category: "function_design", + behavior: "cyclomatic_complexity_under_10", + cosine_delta: 0.41, + severity: :critical, + fix_hint: "Reduce branching" + } - test "renders Fix row for cosine when hint is present" do - category = %{ - type: :cosine, - key: "variable_naming", - name: "Variable Naming", - score: 50, - grade: "C", - impact: 1, - behaviors: [ + @top_blocks_gh [ + %{ + path: "lib/foo.ex", + status: "modified", + blocks: [ %{ - behavior: "name_is_generic", - cosine: -0.5, - score: 42, - grade: "D+", - worst_offenders: [ - %{ - file: "lib/foo.ex", - cosine: -0.5, - top_metrics: [%{metric: "some.metric", contribution: -1.0}], - top_nodes: [] - } - ] + start_line: 42, + end_line: 67, + type: "code", + token_count: 84, + potentials: [@block_potential] } ] } + ] - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 42, - overall_grade: "D+", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - assert result =~ "**Fix:**" - end - - test "snapshot: full enriched cosine offender card" do - result = Formatter.format_markdown(report_with_enriched_cosine(), :default, :github) - assert result =~ "
    " - assert result =~ "" - assert result =~ "**Why:**" - assert result =~ "↓" - assert result =~ "**Where:**" - assert result =~ "line" - assert result =~ "(" - assert result =~ "
    " - end - end - - describe "github threshold worst offender
    cards" do - defp report_with_enriched_threshold do - %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 32, - overall_grade: "F", - categories: [@enriched_threshold_category] - } - end + @report_with_blocks_gh Map.put(@sample_report, :top_blocks, @top_blocks_gh) - test "renders
    card for each threshold worst offender" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) + test "renders block section with details wrapper per file" do + result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) + assert result =~ "Blocks" assert result =~ "
    " - assert result =~ "lib/foo.ex" - end - - test "summary line includes lines, size and grade" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - assert result =~ "491 lines" - assert result =~ "F (32)" - end - - test "renders Why row with · separator for threshold metrics" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - assert result =~ "**Why:** ↓ difficulty=99.00 (avg: 39.00)" - end - - test "renders Where row for threshold worst offenders" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - assert result =~ "**Where:** line 201 (block), line 312 (block)" + assert result =~ "lib/foo.ex" + assert result =~ "modified" end - test "renders Fix row from Categories.defaults when hint available" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - assert result =~ "**Fix:** High operator/operand ratio" + test "renders severity and fix hint" do + result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) + assert result =~ "🔴" + assert result =~ "cyclomatic_complexity_under_10" + assert result =~ "Reduce branching" end + end - test "omits Where row when top_nodes is empty" do - category = %{ - @enriched_threshold_category - | worst_offenders: [ - %{ - path: "lib/bar.ex", - score: 32, - grade: "F", - lines: 100, - bytes: 3000, - metric_scores: [ - %{name: "difficulty", source: "halstead", good: :low, value: 99.0, score: 0} - ], - top_nodes: [] - } - ] - } - - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 32, - overall_grade: "F", - categories: [category] - } - - result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Where:**" - end - - test "does not render old table format" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - refute result =~ "| File | Grade | Issues |" - end - - test "omits Fix row when threshold fix_hint is nil" do - category = %{ - @enriched_threshold_category - | worst_offenders: [ - %{ - path: "lib/bar.ex", - score: 10, - grade: "F", - lines: 200, - bytes: 6000, - metric_scores: [ - %{ - name: "nonexistent_metric", - source: "nonexistent_source", - good: :low, - value: 99.0, - score: 10 - } - ], - top_nodes: [] - } - ] - } + describe "github formatter: PR summary and delta" do + @pr_summary_gh %{ + base_score: 85, + head_score: 77, + score_delta: -8, + base_grade: "B+", + head_grade: "C+", + blocks_flagged: 6, + files_changed: 3, + files_added: 1, + files_modified: 2 + } - report = %{ - metadata: %{ - path: "/home/user/project", - timestamp: "2026-03-11T00:00:00Z", - total_files: 10 - }, - overall_score: 10, - overall_grade: "F", - categories: [category] - } + @delta_gh %{ + base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}}}, + head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}}} + } + test "renders PR summary" do + report = @sample_report |> Map.put(:pr_summary, @pr_summary_gh) result = Formatter.format_markdown(report, :default, :github) - refute result =~ "**Fix:**" + assert result =~ "B+" + assert result =~ "C+" + assert result =~ "-8" end - test "snapshot: full enriched threshold offender card" do - result = Formatter.format_markdown(report_with_enriched_threshold(), :default, :github) - assert result =~ "
    " - assert result =~ "" - assert result =~ "**Why:**" - assert result =~ "**Where:**" - assert result =~ "
    " + test "renders delta section" do + report = @sample_report |> Map.put(:codebase_delta, @delta_gh) + result = Formatter.format_markdown(report, :default, :github) + assert result =~ "Metric Changes" + assert result =~ "65.00" + assert result =~ "61.00" end end end From ec2bc51e5e3c448e99e2cdbd9e673a3d29a65fbb Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:28:34 +0100 Subject: [PATCH 40/71] fix(formatter): add fallback clause to severity_icon/1 --- lib/codeqa/health_report/formatter/github.ex | 1 + lib/codeqa/health_report/formatter/plain.ex | 1 + 2 files changed, 2 insertions(+) diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index 48a6071..694b7ff 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -440,4 +440,5 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp severity_icon(:critical), do: "🔴" defp severity_icon(:high), do: "🟠" defp severity_icon(:medium), do: "🟡" + defp severity_icon(_), do: "⚪" end diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index 37c7dfb..25117a3 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -247,4 +247,5 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp severity_icon(:critical), do: "🔴" defp severity_icon(:high), do: "🟠" defp severity_icon(:medium), do: "🟡" + defp severity_icon(_), do: "⚪" end From 980860c151242135a87d8a0ca7f45c8264e6b241 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:30:16 +0100 Subject: [PATCH 41/71] feat(cli): add --base-ref/--head-ref to health-report for PR delta and block scoping Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/cli/health_report.ex | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 525e2e0..680bbbc 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -24,6 +24,8 @@ defmodule CodeQA.CLI.HealthReport do --cache-dir DIR Directory to store cache (default: .codeqa_cache) -t, --timeout MS Timeout for similarity analysis (default: 5000) --ignore-paths PATHS Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*") + --base-ref REF Base git ref for PR comparison (enables delta and block scoping) + --head-ref REF Head git ref (default: HEAD) """ end @@ -38,7 +40,9 @@ defmodule CodeQA.CLI.HealthReport do detail: :string, top: :integer, format: :string, - ignore_paths: :string + ignore_paths: :string, + base_ref: :string, + head_ref: :string ] def run(args) do @@ -46,12 +50,12 @@ defmodule CodeQA.CLI.HealthReport do Options.validate_dir!(path) extra_ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) + base_ref = opts[:base_ref] + head_ref = opts[:head_ref] || "HEAD" + files = CodeQA.Engine.Collector.collect_files(path, extra_ignore_patterns) - # IO.inspect(files |> Map.keys(), label: "files", limit: :infinity) - # Process.exit(Process.get(:codeqa_config), :kill) - if map_size(files) == 0 do IO.puts(:stderr, "Warning: no source files found in '#{path}'") exit({:shutdown, 1}) @@ -81,6 +85,20 @@ defmodule CodeQA.CLI.HealthReport do "total_bytes" => total_bytes }) + {base_results, changed_files} = + if base_ref do + IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...") + base_files = CodeQA.Git.collect_files_at_ref(path, base_ref) + changed = CodeQA.Git.changed_files(path, base_ref, head_ref) + + IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...") + base_res = CodeQA.Engine.Analyzer.analyze_codebase(base_files, analyze_opts) + + {base_res, changed} + else + {nil, []} + end + detail = parse_detail(opts[:detail]) format = parse_format(opts[:format]) top_n = opts[:top] || 5 @@ -89,7 +107,9 @@ defmodule CodeQA.CLI.HealthReport do CodeQA.HealthReport.generate(results, config: opts[:config], detail: detail, - top: top_n + top: top_n, + base_results: base_results, + changed_files: changed_files ) markdown = CodeQA.HealthReport.to_markdown(report, detail, format) From 110f4397002e0201856d97ec1a2bebb0400d4ad5 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 13:33:34 +0100 Subject: [PATCH 42/71] =?UTF-8?q?feat(cli):=20delete=20compare=20command?= =?UTF-8?q?=20=E2=80=94=20absorbed=20into=20health-report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/codeqa/cli.ex | 1 - lib/codeqa/cli/compare.ex | 239 --------------------- lib/codeqa/comparator.ex | 109 ---------- lib/codeqa/formatter.ex | 346 ------------------------------- lib/codeqa/summarizer.ex | 126 ----------- test/codeqa/cli_compare_test.exs | 91 -------- test/codeqa/formatter_test.exs | 150 -------------- 7 files changed, 1062 deletions(-) delete mode 100644 lib/codeqa/cli/compare.ex delete mode 100644 lib/codeqa/comparator.ex delete mode 100644 lib/codeqa/formatter.ex delete mode 100644 lib/codeqa/summarizer.ex delete mode 100644 test/codeqa/cli_compare_test.exs delete mode 100644 test/codeqa/formatter_test.exs diff --git a/lib/codeqa/cli.ex b/lib/codeqa/cli.ex index 85daa5e..fcb8e01 100644 --- a/lib/codeqa/cli.ex +++ b/lib/codeqa/cli.ex @@ -3,7 +3,6 @@ defmodule CodeQA.CLI do @commands %{ "analyze" => CodeQA.CLI.Analyze, - "compare" => CodeQA.CLI.Compare, "history" => CodeQA.CLI.History, "correlate" => CodeQA.CLI.Correlate, "health-report" => CodeQA.CLI.HealthReport, diff --git a/lib/codeqa/cli/compare.ex b/lib/codeqa/cli/compare.ex deleted file mode 100644 index 9adc0c9..0000000 --- a/lib/codeqa/cli/compare.ex +++ /dev/null @@ -1,239 +0,0 @@ -defmodule CodeQA.CLI.Compare do - @moduledoc false - - @behaviour CodeQA.CLI.Command - - alias CodeQA.CLI.Options - - @version "0.1.0" - - @impl CodeQA.CLI.Command - def usage do - """ - Usage: codeqa compare [options] - - Compare code quality metrics between two git refs. - - Options: - --base-ref REF Base git ref to compare from (required) - --head-ref REF Head git ref to compare to (default: HEAD) - --changes-only Only analyze changed files - --all-files Analyze all source files (default) - --format FORMAT Output format: json, markdown, or github (default: json) - --output MODE Output mode: auto, summary, or changes (default: auto) - --progress Show per-file progress on stderr - -w, --workers N Number of parallel workers - --cache Enable caching file metrics - --cache-dir DIR Directory to store cache (default: .codeqa_cache) - -t, --timeout MS Timeout for similarity analysis (default: 5000) - --show-ncd Compute and show NCD similarity metric - --ncd-top N Number of top similar files to show per file - --ncd-paths PATHS Comma-separated list of paths to compute NCD for - --show-files Include individual file metrics in the output - --show-file-paths P Comma-separated list of paths to include in the output - --ignore-paths PATHS Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*") - """ - end - - @impl CodeQA.CLI.Command - def run(args) when args in [["--help"], ["-h"]] do - usage() - end - - def run(args) do - {opts, [path], _} = - Options.parse( - args, - [ - base_ref: :string, - head_ref: :string, - changes_only: :boolean, - all_files: :boolean, - format: :string, - output: :string - ], - [] - ) - - base_ref = opts[:base_ref] || raise "Missing --base-ref" - head_ref = opts[:head_ref] || "HEAD" - changes_only = if opts[:changes_only], do: true, else: false - format = opts[:format] || "json" - output_mode = opts[:output] || "auto" - - Options.validate_dir!(path) - CodeQA.Config.load(path) - - ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) - opts = Keyword.put(opts, :ignore_patterns, ignore_patterns) - - {base_result, head_result, changes} = - run_comparison(path, base_ref, head_ref, changes_only, opts) - - comparison = - CodeQA.Comparator.compare_results(base_result, head_result, changes) - |> enrich_metadata(base_ref, head_ref, changes_only) - |> filter_files_for_output(opts, format) - - output_comparison(comparison, format, output_mode) - end - - defp run_comparison(path, base_ref, head_ref, changes_only, opts) do - ignore_patterns = opts[:ignore_patterns] || [] - changes = CodeQA.Git.changed_files(path, base_ref, head_ref) - changes = CodeQA.Engine.Collector.reject_ignored(changes, & &1.path, ignore_patterns) - - file_paths = - if changes_only do - IO.puts(:stderr, "Comparing #{length(changes)} changed files...") - Enum.map(changes, & &1.path) - else - IO.puts(:stderr, "Comparing all source files...") - nil - end - - empty = %{"files" => %{}, "codebase" => %{"aggregate" => %{}, "similarity" => %{}}} - - if changes_only and length(changes) == 0 do - IO.puts(:stderr, "No source files changed — nothing to compare.") - {empty, empty, []} - else - base_files = CodeQA.Git.collect_files_at_ref(path, base_ref, file_paths) - head_files = CodeQA.Git.collect_files_at_ref(path, head_ref, file_paths) - base_files = CodeQA.Engine.Collector.reject_ignored_map(base_files, ignore_patterns) - head_files = CodeQA.Engine.Collector.reject_ignored_map(head_files, ignore_patterns) - - if map_size(base_files) == 0 and map_size(head_files) == 0 do - IO.puts(:stderr, "Warning: no source files found at either ref") - exit({:shutdown, 1}) - end - - print_progress(opts, base_files, head_files) - - analyze_opts = - Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() - - base_result = - if map_size(base_files) > 0, - do: CodeQA.Engine.Analyzer.analyze_codebase(base_files, analyze_opts), - else: empty - - head_result = - if map_size(head_files) > 0, - do: CodeQA.Engine.Analyzer.analyze_codebase(head_files, analyze_opts), - else: empty - - changes = if changes_only, do: changes, else: synthesize_changes(base_files, head_files) - - {base_result, head_result, changes} - end - end - - defp print_progress(opts, base_files, head_files) do - if opts[:progress] do - step_prefix = if opts[:show_ncd], do: "1/5 ", else: "1/1 " - - IO.puts( - :stderr, - " #{step_prefix}Analyzing base (#{map_size(base_files)} files) and head (#{map_size(head_files)} files)..." - ) - else - IO.puts( - :stderr, - "Analyzing base (#{map_size(base_files)} files) and head (#{map_size(head_files)} files)..." - ) - end - end - - defp enrich_metadata(comparison, base_ref, head_ref, changes_only) do - comparison - |> put_in(["metadata", "base_ref"], base_ref) - |> put_in(["metadata", "head_ref"], head_ref) - |> put_in(["metadata", "changes_only"], changes_only) - |> put_in(["metadata", "version"], @version) - |> put_in(["metadata", "timestamp"], DateTime.utc_now() |> DateTime.to_iso8601()) - end - - defp output_comparison(comparison, "markdown", output_mode) do - CodeQA.Formatter.format_markdown(comparison, output_mode) - end - - defp output_comparison(comparison, "github", output_mode) do - CodeQA.Formatter.format_github(comparison, output_mode) - end - - defp output_comparison(comparison, _format, output_mode) do - codebase_summary = CodeQA.Summarizer.summarize_codebase(comparison) - - file_summaries = - Map.new(Map.get(comparison, "files", %{}), fn {path, data} -> - {path, CodeQA.Summarizer.summarize_file(path, data)} - end) - - Jason.encode!(build_json_output(comparison, codebase_summary, file_summaries, output_mode), - pretty: true - ) - end - - defp build_json_output(comparison, codebase_summary, file_summaries, output_mode) do - result = %{"metadata" => comparison["metadata"]} - - result = - if output_mode in ["auto", "summary"] do - result - |> Map.put("summary", codebase_summary) - |> Map.put("codebase", comparison["codebase"]) - else - result - end - - if output_mode in ["auto", "changes"] and Map.has_key?(comparison, "files") do - files_with_summaries = - Map.new(comparison["files"], fn {path, data} -> - {path, Map.put(data, "summary", Map.get(file_summaries, path, %{}))} - end) - - Map.put(result, "files", files_with_summaries) - else - result - end - end - - defp synthesize_changes(base_files, head_files) do - all_paths = MapSet.union(MapSet.new(Map.keys(base_files)), MapSet.new(Map.keys(head_files))) - - all_paths - |> Enum.sort() - |> Enum.map(fn path -> - status = - cond do - Map.has_key?(base_files, path) and Map.has_key?(head_files, path) -> "modified" - Map.has_key?(head_files, path) -> "added" - true -> "deleted" - end - - %CodeQA.Git.ChangedFile{path: path, status: status} - end) - end - - defp filter_files_for_output(results, _opts, format) when format in ["github", "markdown"], - do: results - - defp filter_files_for_output(results, opts, _format) do - cond do - opts[:show_files] -> - results - - opts[:show_file_paths] -> - target_paths = String.split(opts[:show_file_paths], ",") |> MapSet.new() - - filtered = - Map.filter(results["files"], fn {path, _} -> MapSet.member?(target_paths, path) end) - - Map.put(results, "files", filtered) - - true -> - Map.delete(results, "files") - end - end -end diff --git a/lib/codeqa/comparator.ex b/lib/codeqa/comparator.ex deleted file mode 100644 index 4fbfa40..0000000 --- a/lib/codeqa/comparator.ex +++ /dev/null @@ -1,109 +0,0 @@ -defmodule CodeQA.Comparator do - @moduledoc "Compare two analysis results and compute metric deltas." - - def compare_results(base_result, head_result, changes) do - base_files = Map.get(base_result, "files", %{}) - head_files = Map.get(head_result, "files", %{}) - - {file_comparisons, status_counts} = - changes - |> Enum.reduce({%{}, %{"added" => 0, "modified" => 0, "deleted" => 0}}, fn change, - {files, counts} -> - base_data = Map.get(base_files, change.path) - head_data = Map.get(head_files, change.path) - delta = compute_file_delta(base_data, head_data) - - file_entry = %{ - "status" => change.status, - "base" => base_data, - "head" => head_data, - "delta" => delta - } - - {Map.put(files, change.path, file_entry), Map.update!(counts, change.status, &(&1 + 1))} - end) - - base_agg = get_in(base_result, ["codebase", "aggregate"]) || %{} - head_agg = get_in(head_result, ["codebase", "aggregate"]) || %{} - agg_delta = compute_aggregate_delta(base_agg, head_agg) - - summary = build_summary(status_counts) - - %{ - "metadata" => %{ - "total_files_compared" => length(changes), - "summary" => summary - }, - "files" => file_comparisons, - "codebase" => %{ - "base" => %{"aggregate" => base_agg}, - "head" => %{"aggregate" => head_agg}, - "delta" => %{"aggregate" => agg_delta} - } - } - end - - defp compute_file_delta(nil, _head), do: nil - defp compute_file_delta(_base, nil), do: nil - - defp compute_file_delta(base_data, head_data) do - top_delta = - ["bytes", "lines"] - |> Enum.reduce(%{}, fn key, acc -> - case {Map.get(base_data, key), Map.get(head_data, key)} do - {b, h} when is_number(b) and is_number(h) -> Map.put(acc, key, h - b) - _ -> acc - end - end) - - base_metrics = Map.get(base_data, "metrics", %{}) - head_metrics = Map.get(head_data, "metrics", %{}) - - metrics_delta = - MapSet.new(Map.keys(base_metrics) ++ Map.keys(head_metrics)) - |> Enum.reduce(%{}, fn metric_name, acc -> - base_m = Map.get(base_metrics, metric_name, %{}) - head_m = Map.get(head_metrics, metric_name, %{}) - delta = compute_numeric_delta(base_m, head_m) - if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) - end) - - Map.put(top_delta, "metrics", metrics_delta) - end - - defp compute_aggregate_delta(base_agg, head_agg) do - MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) - |> Enum.reduce(%{}, fn metric_name, acc -> - base_m = Map.get(base_agg, metric_name, %{}) - head_m = Map.get(head_agg, metric_name, %{}) - delta = compute_numeric_delta(base_m, head_m) - if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) - end) - end - - defp compute_numeric_delta(base, head) do - MapSet.new(Map.keys(base) ++ Map.keys(head)) - |> Enum.reduce(%{}, fn key, acc -> - case {Map.get(base, key), Map.get(head, key)} do - {b, h} when is_number(b) and is_number(h) -> - Map.put(acc, key, Float.round((h - b) / 1, 4)) - - _ -> - acc - end - end) - end - - defp build_summary(counts) do - parts = - [ - {"added", counts["added"]}, - {"modified", counts["modified"]}, - {"deleted", counts["deleted"]} - ] - |> Enum.filter(fn {_, c} -> c > 0 end) - |> Enum.map(fn {status, count} -> "#{count} #{status}" end) - - if parts == [], do: "no changes", else: Enum.join(parts, ", ") - end -end diff --git a/lib/codeqa/formatter.ex b/lib/codeqa/formatter.ex deleted file mode 100644 index 14c1992..0000000 --- a/lib/codeqa/formatter.ex +++ /dev/null @@ -1,346 +0,0 @@ -defmodule CodeQA.Formatter do - @moduledoc false - - @summary_metrics [ - {"entropy", "char_entropy", "Entropy"}, - {"halstead", "volume", "Halstead Vol."}, - {"halstead", "difficulty", "Difficulty"}, - {"readability", "flesch_adapted", "Readability"}, - {"compression", "redundancy", "Redundancy"} - ] - - @bar_width 20 - @filled "█" - @empty "░" - - def format_github(comparison, output_mode \\ "auto") do - metadata = comparison["metadata"] - files = comparison["files"] || %{} - codebase = comparison["codebase"] || %{} - - if metadata["total_files_compared"] == 0 do - "## Code Quality: PR Comparison\n\nNo file changes detected." - else - build_github_report(metadata, files, codebase, output_mode) - end - end - - defp build_github_report(metadata, files, codebase, output_mode) do - categories = CodeQA.HealthReport.Categories.defaults() - scale = CodeQA.HealthReport.Categories.default_grade_scale() - - base_agg = get_in(codebase, ["base", "aggregate"]) || %{} - head_agg = get_in(codebase, ["head", "aggregate"]) || %{} - - base_grades = CodeQA.HealthReport.Grader.grade_aggregate(categories, base_agg, scale) - head_grades = CodeQA.HealthReport.Grader.grade_aggregate(categories, head_agg, scale) - - paired = Enum.zip(base_grades, head_grades) - - lines = - [ - "## Code Quality: PR Comparison", - "", - "**#{metadata["total_files_compared"]} files compared** (#{metadata["summary"]})", - "" - ] ++ - mermaid_chart(head_grades) ++ - progress_bars(paired) ++ - [""] ++ - file_details(files, codebase, output_mode) ++ - aggregate_details(codebase) - - Enum.join(lines, "\n") - end - - defp mermaid_chart(head_grades) do - names = Enum.map_join(head_grades, ", ", fn g -> ~s("#{g.name}") end) - scores = Enum.map_join(head_grades, ", ", fn g -> to_string(g.score) end) - - [ - "```mermaid", - "%%{init: {'theme': 'neutral'}}%%", - "xychart-beta", - " title \"Code Health After PR\"", - " x-axis [#{names}]", - " y-axis \"Score\" 0 --> 100", - " bar [#{scores}]", - "```", - "" - ] - end - - defp progress_bars(paired) do - max_name_len = - Enum.reduce(paired, 0, fn {_base, head}, acc -> - max(acc, String.length(head.name)) - end) - - rows = - Enum.map(paired, fn {base, head} -> - name = String.pad_trailing(head.name, max_name_len) - base_bar = build_bar(base.score) - head_bar = build_bar(head.score) - emoji = grade_emoji(head.grade) - delta = head.score - base.score - delta_str = if delta >= 0, do: "+#{delta}", else: to_string(delta) - "#{name} #{base_bar} #{base.score} → #{head_bar} #{head.score} #{emoji} #{delta_str}" - end) - - ["```"] ++ rows ++ ["```"] - end - - defp file_details(files, codebase, _output_mode) do - codebase_summary = - CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase}) - - file_summaries = - Map.new(files, fn {path, data} -> - {path, CodeQA.Summarizer.summarize_file(path, data)} - end) - - inner = - (format_file_table(files, file_summaries) ++ [""]) - |> Enum.join("\n") - - [ - "
    ", - "File changes — #{codebase_summary["gist"]}", - "", - inner, - "
    ", - "" - ] - end - - defp aggregate_details(codebase) do - inner = - format_aggregate_table(codebase, build_direction_map()) - |> Enum.join("\n") - - if inner == "" do - [] - else - [ - "
    ", - "Aggregate metrics", - "", - inner, - "", - "
    ", - "" - ] - end - end - - defp build_bar(score) do - filled = round(score / 100 * @bar_width) - filled = min(max(filled, 0), @bar_width) - empty = @bar_width - filled - String.duplicate(@filled, filled) <> String.duplicate(@empty, empty) - end - - defp grade_emoji(grade) do - cond do - grade in ["A", "A-"] -> "🟢" - grade in ["B+", "B", "B-"] -> "🟡" - grade in ["C+", "C", "C-"] -> "🟠" - true -> "🔴" - end - end - - def format_markdown(comparison, output_mode \\ "auto") do - metadata = comparison["metadata"] - files = comparison["files"] || %{} - codebase = comparison["codebase"] - - if metadata["total_files_compared"] == 0 do - "## Code Quality: PR Comparison\n\nNo file changes detected." - else - build_report(metadata, files, codebase, output_mode) - end - end - - defp build_report(metadata, files, codebase, output_mode) do - codebase_summary = - CodeQA.Summarizer.summarize_codebase(%{"files" => files, "codebase" => codebase}) - - lines = [ - "## Code Quality: PR Comparison", - "", - "**#{metadata["total_files_compared"]} files compared** (#{metadata["summary"]})", - "" - ] - - lines = - if output_mode in ["auto", "summary"] do - lines ++ ["> #{codebase_summary["gist"]}", ""] - else - lines - end - - lines = - if output_mode in ["auto", "changes"] do - file_summaries = - Map.new(files, fn {path, data} -> - {path, CodeQA.Summarizer.summarize_file(path, data)} - end) - - lines ++ format_file_table(files, file_summaries) ++ [""] - else - lines - end - - lines = - if output_mode in ["auto", "summary"] do - lines ++ format_aggregate_table(codebase) - else - lines - end - - Enum.join(lines, "\n") - end - - defp format_file_table(files, file_summaries) do - columns = detect_columns(files) - - if columns == [], - do: ["No metric data available."], - else: build_file_rows(files, file_summaries, columns) - end - - defp build_file_rows(files, file_summaries, columns) do - header = - "| File | Status | Summary | " <> - Enum.map_join(columns, " | ", fn {_, _, label} -> label end) <> " |" - - separator = - "|------|--------|---------|" <> Enum.map_join(columns, "", fn _ -> "--------|" end) - - rows = - files - |> Enum.sort_by(fn {path, _} -> path end) - |> Enum.map(fn {path, data} -> - gist = get_in(file_summaries, [path, "gist"]) || "" - cells = format_file_row(data, columns) - "| `#{path}` | #{data["status"]} | #{gist} | " <> Enum.join(cells, " | ") <> " |" - end) - - [header, separator | rows] - end - - defp format_file_row(data, columns) do - Enum.map(columns, fn {metric_name, key, _label} -> - case data["status"] do - "modified" -> format_modified_cell(data, metric_name, key) - "added" -> format_added_cell(data, metric_name, key) - "deleted" -> format_deleted_cell(data, metric_name, key) - _ -> "—" - end - end) - end - - defp format_modified_cell(data, metric_name, key) do - case get_in(data, ["delta", "metrics", metric_name, key]) do - nil -> "—" - val -> format_delta(val) - end - end - - defp format_added_cell(data, metric_name, key) do - case get_in(data, ["head", "metrics", metric_name, key]) do - nil -> "—" - val -> "*#{format_value(val)}*" - end - end - - defp format_deleted_cell(data, metric_name, key) do - case get_in(data, ["base", "metrics", metric_name, key]) do - nil -> "—" - val -> "~~#{format_value(val)}~~" - end - end - - defp format_aggregate_table(codebase, direction_map \\ %{}) do - base_agg = get_in(codebase, ["base", "aggregate"]) || %{} - head_agg = get_in(codebase, ["head", "aggregate"]) || %{} - delta_agg = get_in(codebase, ["delta", "aggregate"]) || %{} - - if base_agg == %{} and head_agg == %{}, - do: [], - else: build_aggregate_rows(base_agg, head_agg, delta_agg, direction_map) - end - - defp build_aggregate_rows(base_agg, head_agg, delta_agg, direction_map) do - header = [ - "### Aggregate Metrics", - "", - "| Metric | Base | Head | Delta |", - "|--------|------|------|-------|" - ] - - rows = - MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) - |> Enum.sort() - |> Enum.flat_map(fn metric_name -> - base_m = Map.get(base_agg, metric_name, %{}) - head_m = Map.get(head_agg, metric_name, %{}) - delta_m = Map.get(delta_agg, metric_name, %{}) - - MapSet.new(Map.keys(base_m) ++ Map.keys(head_m)) - |> Enum.sort() - |> Enum.map(fn key -> - direction = Map.get(direction_map, "#{metric_name}.#{key}") - delta_cell = format_delta_with_direction(delta_m[key], direction) - - "| #{metric_name}.#{key} | #{format_value(base_m[key])} | #{format_value(head_m[key])} | #{delta_cell} |" - end) - end) - - header ++ rows - end - - defp build_direction_map do - CodeQA.HealthReport.Categories.defaults() - |> Enum.flat_map(fn cat -> - Enum.map(cat.metrics, fn m -> {"#{m.source}.mean_#{m.name}", m.good} end) - end) - |> Map.new() - end - - defp format_delta_with_direction(nil, _direction), do: "—" - - defp format_delta_with_direction(value, direction) do - formatted = format_delta(value) - emoji = delta_emoji(value, direction) - if emoji, do: "#{emoji} #{formatted}", else: formatted - end - - defp delta_emoji(_value, nil), do: nil - defp delta_emoji(value, :high) when value > 0, do: "🟢" - defp delta_emoji(value, :high) when value < 0, do: "🔴" - defp delta_emoji(value, :low) when value < 0, do: "🟢" - defp delta_emoji(value, :low) when value > 0, do: "🔴" - defp delta_emoji(_value, _direction), do: nil - - defp detect_columns(files) do - Enum.filter(@summary_metrics, fn {metric_name, key, _label} -> - Enum.any?(files, fn {_path, data} -> - source = data["head"] || data["base"] - source && get_in(source, ["metrics", metric_name, key]) != nil - end) - end) - end - - defp format_delta(nil), do: "—" - - defp format_delta(value) when value > 0, - do: "+#{:erlang.float_to_binary(value / 1, decimals: 2)}" - - defp format_delta(value) when value < 0, do: :erlang.float_to_binary(value / 1, decimals: 2) - defp format_delta(_), do: "0.00" - - defp format_value(nil), do: "—" - defp format_value(value) when is_float(value), do: :erlang.float_to_binary(value, decimals: 2) - defp format_value(value), do: to_string(value) -end diff --git a/lib/codeqa/summarizer.ex b/lib/codeqa/summarizer.ex deleted file mode 100644 index d6d9c92..0000000 --- a/lib/codeqa/summarizer.ex +++ /dev/null @@ -1,126 +0,0 @@ -defmodule CodeQA.Summarizer do - @moduledoc false - - @codebase_direction_metrics [ - {"complexity", "halstead", "mean_volume"}, - {"readability", "readability", "mean_flesch_adapted"}, - {"entropy", "entropy", "mean_char_entropy"}, - {"redundancy", "compression", "mean_redundancy"} - ] - - @file_direction_metrics [ - {"complexity", "halstead", "volume"}, - {"readability", "readability", "flesch_adapted"}, - {"entropy", "entropy", "char_entropy"}, - {"redundancy", "compression", "redundancy"} - ] - - @threshold_stable 0.05 - @threshold_slight 0.20 - - def summarize_codebase(comparison) do - files = Map.get(comparison, "files", %{}) - codebase = Map.get(comparison, "codebase", %{}) - - file_counts = count_statuses(files) - directions = compute_codebase_directions(codebase) - gist = build_codebase_gist(file_counts, directions) - - %{"gist" => gist, "file_counts" => file_counts, "directions" => directions} - end - - def summarize_file(_path, %{"status" => "added"} = data) do - lines = get_in(data, ["head", "lines"]) || 0 - %{"gist" => "new file (#{lines} lines)", "status" => "added", "lines" => lines} - end - - def summarize_file(_path, %{"status" => "deleted"} = data) do - lines = get_in(data, ["base", "lines"]) || 0 - %{"gist" => "removed (#{lines} lines)", "status" => "deleted", "lines" => lines} - end - - def summarize_file(_path, %{"status" => "modified"} = data) do - directions = compute_file_directions(data) - gist = build_file_gist(directions) - %{"gist" => gist, "status" => "modified", "directions" => directions} - end - - defp count_statuses(files) do - Enum.reduce(files, %{"added" => 0, "modified" => 0, "deleted" => 0}, fn {_path, data}, acc -> - status = Map.get(data, "status", "modified") - Map.update!(acc, status, &(&1 + 1)) - end) - end - - defp compute_codebase_directions(codebase) do - base_agg = get_in(codebase, ["base", "aggregate"]) || %{} - delta_agg = get_in(codebase, ["delta", "aggregate"]) || %{} - - Map.new(@codebase_direction_metrics, fn {dir_key, metric, agg_key} -> - base_val = get_in(base_agg, [metric, agg_key]) - delta_val = get_in(delta_agg, [metric, agg_key]) - {dir_key, classify_change(base_val, delta_val)} - end) - end - - defp compute_file_directions(file_data) do - base_metrics = get_in(file_data, ["base", "metrics"]) || %{} - delta_metrics = get_in(file_data, ["delta", "metrics"]) || %{} - - Map.new(@file_direction_metrics, fn {dir_key, metric, key} -> - base_val = get_in(base_metrics, [metric, key]) - delta_val = get_in(delta_metrics, [metric, key]) - {dir_key, classify_change(base_val, delta_val)} - end) - end - - defp classify_change(nil, _), do: "stable" - defp classify_change(_, nil), do: "stable" - defp classify_change(0, _), do: "stable" - defp classify_change(+0.0, _), do: "stable" - - defp classify_change(base_val, delta_val) do - ratio = abs(delta_val) / abs(base_val) - - cond do - ratio < @threshold_stable -> "stable" - ratio < @threshold_slight and delta_val > 0 -> "increased slightly" - ratio < @threshold_slight -> "decreased slightly" - delta_val > 0 -> "increased" - true -> "decreased" - end - end - - defp build_file_gist(directions) do - parts = - directions - |> Enum.reject(fn {_, d} -> d == "stable" end) - |> Enum.map(fn {k, d} -> "#{k} #{d}" end) - - if parts == [], do: "all metrics stable", else: Enum.join(parts, ", ") - end - - defp build_codebase_gist(file_counts, directions) do - file_parts = - [ - {"added", file_counts["added"]}, - {"modified", file_counts["modified"]}, - {"deleted", file_counts["deleted"]} - ] - |> Enum.filter(fn {_, c} -> c > 0 end) - |> Enum.map(fn {s, c} -> "#{c} #{s}" end) - - file_summary = if file_parts == [], do: "no changes", else: Enum.join(file_parts, ", ") - - dir_parts = - directions - |> Enum.reject(fn {_, d} -> d == "stable" end) - |> Enum.map(fn {k, d} -> "#{k} #{d}" end) - - if dir_parts == [] do - "#{file_summary} — all metrics stable" - else - "#{file_summary} — #{Enum.join(dir_parts, ", ")}" - end - end -end diff --git a/test/codeqa/cli_compare_test.exs b/test/codeqa/cli_compare_test.exs deleted file mode 100644 index b39e2c2..0000000 --- a/test/codeqa/cli_compare_test.exs +++ /dev/null @@ -1,91 +0,0 @@ -defmodule CodeQA.CLI.CompareTest do - use ExUnit.Case, async: true - - @moduletag :tmp_dir - - setup %{tmp_dir: tmp_dir} do - # Initialize a git repo with one source file and one non-source file - System.cmd("git", ["init"], cd: tmp_dir) - System.cmd("git", ["config", "user.email", "test@test.com"], cd: tmp_dir) - System.cmd("git", ["config", "user.name", "Test"], cd: tmp_dir) - - File.mkdir_p!(Path.join(tmp_dir, "lib")) - File.write!(Path.join(tmp_dir, "lib/app.ex"), "defmodule App do\nend") - System.cmd("git", ["add", "."], cd: tmp_dir) - System.cmd("git", ["commit", "-m", "initial"], cd: tmp_dir) - - %{repo: tmp_dir} - end - - describe "compare with github format" do - test "file changes section shows actual file count when source files changed", %{repo: repo} do - File.write!(Path.join(repo, "lib/app.ex"), """ - defmodule App do - def hello, do: :world - def goodbye, do: :world - end - """) - - System.cmd("git", ["add", "."], cd: repo) - System.cmd("git", ["commit", "-m", "update app"], cd: repo) - - stdout = CodeQA.CLI.main(["compare", repo, "--base-ref", "HEAD~1", "--format", "github"]) - - assert stdout =~ "File changes — 1 modified" - refute stdout =~ "File changes — no changes" - end - end - - describe "compare with no source file changes" do - test "exits 0 when only non-source files changed", %{repo: repo} do - # Create a branch, change only a .md file (not a source file) - File.write!(Path.join(repo, "README.txt"), "hello") - System.cmd("git", ["add", "."], cd: repo) - System.cmd("git", ["commit", "-m", "add readme"], cd: repo) - - # compare should succeed (not crash) when no source files changed - {base_ref, head_ref} = {"HEAD~1", "HEAD"} - - changes = CodeQA.Git.changed_files(repo, base_ref, head_ref) - assert changes == [], "expected no source file changes, got: #{inspect(changes)}" - - # Verify the CLI handles this gracefully by calling main - stdout = - CodeQA.CLI.main([ - "compare", - repo, - "--base-ref", - base_ref, - "--changes-only", - "--format", - "json" - ]) - - result = Jason.decode!(stdout) - - assert result["metadata"]["total_files_compared"] == 0 - end - - test "outputs valid JSON with empty comparison", %{repo: repo} do - # Change only a non-source file - File.write!(Path.join(repo, "README.txt"), "hello") - System.cmd("git", ["add", "."], cd: repo) - System.cmd("git", ["commit", "-m", "add readme"], cd: repo) - - # Assert on JSON return value directly - stdout = - CodeQA.CLI.main([ - "compare", - repo, - "--base-ref", - "HEAD~1", - "--changes-only", - "--format", - "json" - ]) - - assert {:ok, result} = Jason.decode(stdout) - assert result["metadata"]["total_files_compared"] == 0 - end - end -end diff --git a/test/codeqa/formatter_test.exs b/test/codeqa/formatter_test.exs deleted file mode 100644 index de5be57..0000000 --- a/test/codeqa/formatter_test.exs +++ /dev/null @@ -1,150 +0,0 @@ -defmodule CodeQA.FormatterTest do - use ExUnit.Case, async: true - - alias CodeQA.Formatter - - @sample_comparison %{ - "metadata" => %{ - "total_files_compared" => 1, - "summary" => "1 modified", - "base_ref" => "abc123", - "head_ref" => "HEAD" - }, - "files" => %{ - "lib/foo.ex" => %{ - "status" => "modified", - "base" => %{ - "metrics" => %{"halstead" => %{"volume" => 1000.0}}, - "lines" => 100, - "bytes" => 3000 - }, - "head" => %{ - "metrics" => %{"halstead" => %{"volume" => 800.0}}, - "lines" => 95, - "bytes" => 2800 - }, - "delta" => %{ - "metrics" => %{"halstead" => %{"volume" => -200.0}}, - "lines" => -5, - "bytes" => -200 - } - } - }, - "codebase" => %{ - "base" => %{ - "aggregate" => %{ - "readability" => %{ - "mean_flesch_adapted" => 65.0, - "mean_fog_adapted" => 8.0, - "mean_avg_tokens_per_line" => 7.0, - "mean_avg_line_length" => 45.0 - }, - "halstead" => %{ - "mean_difficulty" => 15.0, - "mean_effort" => 8000.0, - "mean_volume" => 500.0, - "mean_estimated_bugs" => 0.2 - } - } - }, - "head" => %{ - "aggregate" => %{ - "readability" => %{ - "mean_flesch_adapted" => 75.0, - "mean_fog_adapted" => 7.0, - "mean_avg_tokens_per_line" => 6.0, - "mean_avg_line_length" => 42.0 - }, - "halstead" => %{ - "mean_difficulty" => 12.0, - "mean_effort" => 6000.0, - "mean_volume" => 400.0, - "mean_estimated_bugs" => 0.15 - } - } - }, - "delta" => %{ - "aggregate" => %{ - "readability" => %{ - "mean_flesch_adapted" => 10.0, - "mean_fog_adapted" => -1.0, - "mean_avg_tokens_per_line" => -1.0, - "mean_avg_line_length" => -3.0 - }, - "halstead" => %{ - "mean_difficulty" => -3.0, - "mean_effort" => -2000.0, - "mean_volume" => -100.0, - "mean_estimated_bugs" => -0.05 - } - } - } - } - } - - describe "format_github/1" do - test "includes mermaid chart of head scores" do - result = Formatter.format_github(@sample_comparison) - assert result =~ "```mermaid" - assert result =~ "xychart-beta" - assert result =~ "bar [" - end - - test "includes progress bars with base → head" do - result = Formatter.format_github(@sample_comparison) - assert result =~ "→" - end - - test "includes grade emoji" do - result = Formatter.format_github(@sample_comparison) - assert result =~ "🟢" or result =~ "🟡" or result =~ "🟠" or result =~ "🔴" - end - - test "wraps file details in collapsible section" do - result = Formatter.format_github(@sample_comparison) - assert result =~ "
    " - assert result =~ "
    " - end - - test "shows no changes message when zero files compared" do - comparison = put_in(@sample_comparison, ["metadata", "total_files_compared"], 0) - result = Formatter.format_github(comparison) - assert result =~ "No file changes detected" - end - - test "shows 🟢 in aggregate delta for improving high-is-better metric" do - # flesch_adapted is good: :high, delta +10.0 → improvement - result = Formatter.format_github(@sample_comparison) - assert result =~ "🟢 +10.00" - end - - test "file changes section shows actual file counts, not 'no changes'" do - result = Formatter.format_github(@sample_comparison) - assert result =~ "File changes — 1 modified" - refute result =~ "File changes — no changes" - end - - test "file changes section reflects metric directions from codebase data" do - result = Formatter.format_github(@sample_comparison) - - # halstead.mean_volume drops 100/500 = 20% → "decreased"; readability rises 10/65 ≈ 15% → "increased slightly" - refute result =~ "File changes — 1 modified — all metrics stable" - end - - test "shows 🔴 in aggregate delta for worsening low-is-better metric" do - # halstead.volume is good: :low, delta +300 → regression - worsening = - put_in( - @sample_comparison, - ["codebase", "head", "aggregate", "halstead", "mean_volume"], - 800.0 - ) - |> put_in(["codebase", "delta", "aggregate"], %{ - "halstead" => %{"mean_volume" => 300.0} - }) - - result = Formatter.format_github(worsening) - assert result =~ "🔴 +300.00" - end - end -end From 1deed5aa047b4a9a1563043a17519fd7316a9eae Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 14:23:19 +0100 Subject: [PATCH 43/71] chore(quality): fix all credo and dialyzer issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add dialyzer ignores for Mix.Task PLT false positives and Engine.Registry unknown type - Fix [W] warnings: length/1 comparisons → list equality, always-returns-left expressions - Fix [F] refactoring: extract nested lambdas to helpers across ~20 files, merge double Enum.filter, swap negated conditions, replace Enum.map+join with Enum.map_join, split complex functions into clauses - Fix [R] readability: add @moduledoc false to all 40 language modules, sort all alias blocks alphabetically, fix long lines and numeric underscore - Fix [D] design: alias all inline module references throughout lib/ and test/ Co-Authored-By: Claude Sonnet 4.6 --- .dialyzer_ignore.exs | 10 +- lib/codeqa/analysis/behavior_config_server.ex | 24 +- lib/codeqa/analysis/file_context_server.ex | 8 +- .../ast/classification/node_classifier.ex | 25 +- .../ast/classification/node_type_detector.ex | 2 +- .../ast/enrichment/compound_node_builder.ex | 6 +- lib/codeqa/ast/enrichment/node_analyzer.ex | 2 +- lib/codeqa/ast/lexing/token_normalizer.ex | 38 +-- lib/codeqa/ast/nodes/attribute_node.ex | 4 +- lib/codeqa/ast/nodes/code_node.ex | 4 +- lib/codeqa/ast/nodes/doc_node.ex | 4 +- lib/codeqa/ast/nodes/function_node.ex | 4 +- lib/codeqa/ast/nodes/import_node.ex | 4 +- lib/codeqa/ast/nodes/module_node.ex | 4 +- lib/codeqa/ast/nodes/test_node.ex | 4 +- lib/codeqa/ast/parsing/parser.ex | 5 +- lib/codeqa/ast/parsing/signal_registry.ex | 2 +- lib/codeqa/ast/parsing/signal_stream.ex | 28 +- .../classification/attribute_signal.ex | 30 +- .../classification/comment_density_signal.ex | 15 +- .../signals/classification/config_signal.ex | 16 +- .../signals/classification/function_signal.ex | 18 +- .../signals/classification/import_signal.ex | 20 +- .../signals/classification/module_signal.ex | 18 +- .../ast/signals/classification/test_signal.ex | 20 +- .../ast/signals/classification/type_signal.ex | 16 +- lib/codeqa/block_impact_analyzer.ex | 3 - lib/codeqa/cli.ex | 3 +- lib/codeqa/cli/analyze.ex | 14 +- lib/codeqa/cli/correlate.ex | 3 +- lib/codeqa/cli/health_report.ex | 24 +- lib/codeqa/cli/history.ex | 18 +- lib/codeqa/cli/options.ex | 5 +- lib/codeqa/cli/progress.ex | 6 +- lib/codeqa/combined_metrics/category.ex | 4 +- lib/codeqa/combined_metrics/code_smells.ex | 5 +- lib/codeqa/combined_metrics/consistency.ex | 5 +- lib/codeqa/combined_metrics/dependencies.ex | 5 +- lib/codeqa/combined_metrics/documentation.ex | 5 +- lib/codeqa/combined_metrics/error_handling.ex | 5 +- lib/codeqa/combined_metrics/file_scorer.ex | 14 +- lib/codeqa/combined_metrics/file_structure.ex | 5 +- .../combined_metrics/function_design.ex | 5 +- .../combined_metrics/naming_conventions.ex | 5 +- lib/codeqa/combined_metrics/sample_runner.ex | 285 ++++++++++-------- .../combined_metrics/scope_and_assignment.ex | 5 +- lib/codeqa/combined_metrics/scorer.ex | 4 +- lib/codeqa/combined_metrics/testing.ex | 5 +- lib/codeqa/combined_metrics/type_and_value.ex | 5 +- .../combined_metrics/variable_naming.ex | 5 +- lib/codeqa/diagnostics.ex | 38 ++- lib/codeqa/engine/analyzer.ex | 12 +- lib/codeqa/engine/parallel.ex | 10 +- lib/codeqa/git.ex | 4 +- lib/codeqa/health_report.ex | 26 +- lib/codeqa/health_report/formatter/github.ex | 103 +++---- lib/codeqa/health_report/formatter/plain.ex | 68 ++--- lib/codeqa/health_report/grader.ex | 22 +- lib/codeqa/health_report/top_blocks.ex | 21 +- lib/codeqa/languages/code/native/cpp.ex | 1 + lib/codeqa/languages/code/native/go.ex | 1 + lib/codeqa/languages/code/native/haskell.ex | 1 + lib/codeqa/languages/code/native/ocaml.ex | 1 + lib/codeqa/languages/code/native/rust.ex | 1 + lib/codeqa/languages/code/native/swift.ex | 1 + lib/codeqa/languages/code/native/zig.ex | 1 + lib/codeqa/languages/code/scripting/julia.ex | 1 + lib/codeqa/languages/code/scripting/lua.ex | 1 + lib/codeqa/languages/code/scripting/perl.ex | 1 + lib/codeqa/languages/code/scripting/php.ex | 1 + lib/codeqa/languages/code/scripting/python.ex | 1 + lib/codeqa/languages/code/scripting/r.ex | 1 + lib/codeqa/languages/code/scripting/ruby.ex | 1 + lib/codeqa/languages/code/scripting/shell.ex | 1 + lib/codeqa/languages/code/vm/clojure.ex | 1 + lib/codeqa/languages/code/vm/csharp.ex | 1 + lib/codeqa/languages/code/vm/dart.ex | 1 + lib/codeqa/languages/code/vm/elixir.ex | 1 + lib/codeqa/languages/code/vm/erlang.ex | 1 + lib/codeqa/languages/code/vm/fsharp.ex | 1 + lib/codeqa/languages/code/vm/java.ex | 1 + lib/codeqa/languages/code/vm/kotlin.ex | 1 + lib/codeqa/languages/code/vm/scala.ex | 1 + lib/codeqa/languages/code/web/javascript.ex | 1 + lib/codeqa/languages/code/web/typescript.ex | 1 + lib/codeqa/languages/config/dockerfile.ex | 1 + lib/codeqa/languages/config/makefile.ex | 1 + lib/codeqa/languages/config/terraform.ex | 1 + lib/codeqa/languages/data/graphql.ex | 1 + lib/codeqa/languages/data/json.ex | 1 + lib/codeqa/languages/data/sql.ex | 1 + lib/codeqa/languages/data/toml.ex | 1 + lib/codeqa/languages/data/yaml.ex | 1 + lib/codeqa/languages/language.ex | 3 +- lib/codeqa/languages/markup/css.ex | 1 + lib/codeqa/languages/markup/html.ex | 1 + lib/codeqa/languages/markup/markdown.ex | 1 + lib/codeqa/languages/markup/xml.ex | 1 + lib/codeqa/languages/unknown.ex | 1 + lib/codeqa/metrics/codebase/similarity.ex | 18 +- lib/codeqa/metrics/file/casing_entropy.ex | 4 +- .../metrics/file/near_duplicate_blocks.ex | 72 +++-- .../metrics/post_processing/menzerath.ex | 30 +- lib/mix/tasks/codeqa/sample_report.ex | 46 +-- priv/combined_metrics/code_smells.yml | 5 + priv/combined_metrics/variable_naming.yml | 14 + .../analysis/behavior_config_server_test.exs | 2 +- .../analysis/file_metrics_server_test.exs | 3 +- .../classification/node_classifier_test.exs | 25 +- .../ast/classification/node_protocol_test.exs | 9 +- .../node_type_detector_test.exs | 44 +-- ...ompound_node_assertions_languages_test.exs | 10 +- .../enrichment/compound_node_builder_test.exs | 9 +- .../ast/lexing/token_normalizer_test.exs | 4 +- .../codeqa/ast/lexing/token_protocol_test.exs | 2 +- test/codeqa/ast/nodes/code_node_test.exs | 2 +- test/codeqa/ast/nodes/function_node_test.exs | 2 +- test/codeqa/ast/nodes/import_node_test.exs | 2 +- .../ast/parsing/parser_languages_test.exs | 10 +- test/codeqa/ast/parsing/parser_test.exs | 6 +- .../codeqa/ast/parsing/signal_stream_test.exs | 2 +- .../comment_density_signal_test.exs | 2 +- .../classification/config_signal_test.exs | 2 +- .../classification/data_signal_test.exs | 2 +- .../classification/type_signal_test.exs | 2 +- .../access_modifier_signal_test.exs | 4 +- .../assignment_function_signal_test.exs | 4 +- .../structural/blank_line_signal_test.exs | 4 +- .../structural/bracket_signal_test.exs | 4 +- .../structural/branch_split_signal_test.exs | 10 +- .../structural/colon_indent_signal_test.exs | 7 +- .../comment_divider_signal_test.exs | 4 +- .../structural/decorator_signal_test.exs | 4 +- .../structural/dedent_to_zero_signal_test.exs | 4 +- .../doc_comment_lead_signal_test.exs | 4 +- .../structural/keyword_signal_test.exs | 4 +- .../structural/sql_block_signal_test.exs | 4 +- .../structural/triple_quote_signal_test.exs | 4 +- .../block_impact/codebase_impact_test.exs | 5 +- test/codeqa/block_impact/file_impact_test.exs | 2 +- .../refactoring_potentials_test.exs | 21 +- test/codeqa/block_impact_analyzer_test.exs | 11 +- .../combined_metrics/file_scorer_test.exs | 2 +- .../combined_metrics/sample_runner_test.exs | 19 +- test/codeqa/engine/analyzer_test.exs | 8 +- test/codeqa/health_report/grader_test.exs | 10 +- test/codeqa/health_report/top_blocks_test.exs | 22 +- test/codeqa/health_report_test.exs | 51 ++-- test/codeqa/metrics/file/bradford_test.exs | 2 +- test/codeqa/metrics/file/branching_test.exs | 2 +- test/codeqa/metrics/file/brevity_test.exs | 2 +- .../metrics/file/function_metrics_test.exs | 6 +- .../file/near_duplicate_blocks_file_test.exs | 2 +- test/codeqa/metrics/file/rfc_test.exs | 2 +- 154 files changed, 977 insertions(+), 700 deletions(-) diff --git a/.dialyzer_ignore.exs b/.dialyzer_ignore.exs index b76a4d0..9722072 100644 --- a/.dialyzer_ignore.exs +++ b/.dialyzer_ignore.exs @@ -2,5 +2,13 @@ # Dialyzer specializes analyze/2 for the codebase call-site where include_pairs # is always true, making the false branch appear unreachable. Both branches are # valid and reachable at runtime from the file-level and codebase callers. - {"lib/codeqa/metrics/file/near_duplicate_blocks.ex", :pattern_match} + {"lib/codeqa/metrics/file/near_duplicate_blocks.ex", :pattern_match}, + # Mix module type information is not available in the PLT; these are valid + # Mix.Task callbacks and standard Mix module calls. + {"lib/mix/tasks/codeqa/sample_report.ex", :callback_info_missing}, + {"lib/mix/tasks/codeqa/signal_debug.ex", :callback_info_missing}, + {"lib/mix/tasks/codeqa/sample_report.ex", :unknown_function}, + {"lib/mix/tasks/codeqa/signal_debug.ex", :unknown_function}, + # CodeQA.Engine.Registry.t/0 is defined via a macro; type is available at runtime. + {"lib/codeqa/analysis/file_metrics_server.ex", :unknown_type} ] diff --git a/lib/codeqa/analysis/behavior_config_server.ex b/lib/codeqa/analysis/behavior_config_server.ex index f526b20..04cc9ba 100644 --- a/lib/codeqa/analysis/behavior_config_server.ex +++ b/lib/codeqa/analysis/behavior_config_server.ex @@ -85,23 +85,25 @@ defmodule CodeQA.Analysis.BehaviorConfigServer do {:ok, files} -> files |> Enum.filter(&String.ends_with?(&1, ".yml")) - |> Enum.each(fn yml_file -> - category = String.trim_trailing(yml_file, ".yml") - yaml_path = Path.join(@yaml_dir, yml_file) - {:ok, data} = YamlElixir.read_from_file(yaml_path) - - data - |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Enum.each(fn {behavior, behavior_data} -> - :ets.insert(tid, {{category, behavior}, behavior_data}) - end) - end) + |> Enum.each(&load_yml_file(&1, tid)) {:error, _} -> :ok end end + defp load_yml_file(yml_file, tid) do + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, data} = YamlElixir.read_from_file(yaml_path) + + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.each(fn {behavior, behavior_data} -> + :ets.insert(tid, {{category, behavior}, behavior_data}) + end) + end + @doc false def scalars_from_behavior_data(behavior_data) do behavior_data diff --git a/lib/codeqa/analysis/file_context_server.ex b/lib/codeqa/analysis/file_context_server.ex index 28f9670..3de7859 100644 --- a/lib/codeqa/analysis/file_context_server.ex +++ b/lib/codeqa/analysis/file_context_server.ex @@ -17,6 +17,8 @@ defmodule CodeQA.Analysis.FileContextServer do use GenServer alias CodeQA.Engine.{FileContext, Pipeline} + alias CodeQA.Language + alias CodeQA.Languages.Unknown # --- Public API --- @@ -72,14 +74,14 @@ defmodule CodeQA.Analysis.FileContextServer do defp resolve_language_name(opts) do cond do lang = Keyword.get(opts, :language) -> - mod = CodeQA.Language.find(lang) || CodeQA.Languages.Unknown + mod = Language.find(lang) || Unknown mod.name() path = Keyword.get(opts, :path) -> - CodeQA.Language.detect(path).name() + Language.detect(path).name() true -> - CodeQA.Languages.Unknown.name() + Unknown.name() end end end diff --git a/lib/codeqa/ast/classification/node_classifier.ex b/lib/codeqa/ast/classification/node_classifier.ex index 57c559d..b6f20fa 100644 --- a/lib/codeqa/ast/classification/node_classifier.ex +++ b/lib/codeqa/ast/classification/node_classifier.ex @@ -35,27 +35,28 @@ defmodule CodeQA.AST.Classification.NodeClassifier do """ alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Parsing.SignalStream - - alias CodeQA.AST.Signals.Classification.{ - DocSignal, - AttributeSignal, - FunctionSignal, - ModuleSignal, - ImportSignal, - TestSignal - } alias CodeQA.AST.Nodes.{ + AttributeNode, CodeNode, DocNode, FunctionNode, - ModuleNode, ImportNode, - AttributeNode, + ModuleNode, TestNode } + alias CodeQA.AST.Parsing.SignalStream + + alias CodeQA.AST.Signals.Classification.{ + AttributeSignal, + DocSignal, + FunctionSignal, + ImportSignal, + ModuleSignal, + TestSignal + } + @classification_signals [ %DocSignal{}, %AttributeSignal{}, diff --git a/lib/codeqa/ast/classification/node_type_detector.ex b/lib/codeqa/ast/classification/node_type_detector.ex index 6175968..5038371 100644 --- a/lib/codeqa/ast/classification/node_type_detector.ex +++ b/lib/codeqa/ast/classification/node_type_detector.ex @@ -7,8 +7,8 @@ defmodule CodeQA.AST.Classification.NodeTypeDetector do for the full list of signals and their weights. """ - alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Classification.NodeClassifier + alias CodeQA.AST.Enrichment.Node @doc """ Classify each node in the list into the most specific typed struct. diff --git a/lib/codeqa/ast/enrichment/compound_node_builder.ex b/lib/codeqa/ast/enrichment/compound_node_builder.ex index f446e72..27c6165 100644 --- a/lib/codeqa/ast/enrichment/compound_node_builder.ex +++ b/lib/codeqa/ast/enrichment/compound_node_builder.ex @@ -13,11 +13,11 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilder do promoted to the compound's `docs`/`typespecs` lists. """ - alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Enrichment.CompoundNode alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Nodes.{DocNode, AttributeNode} + alias CodeQA.AST.Enrichment.CompoundNode + alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Nodes.{AttributeNode, DocNode} @doc """ Groups a list of typed nodes into CompoundNode structs. diff --git a/lib/codeqa/ast/enrichment/node_analyzer.ex b/lib/codeqa/ast/enrichment/node_analyzer.ex index d506d3d..2f6221c 100644 --- a/lib/codeqa/ast/enrichment/node_analyzer.ex +++ b/lib/codeqa/ast/enrichment/node_analyzer.ex @@ -7,8 +7,8 @@ defmodule CodeQA.AST.Enrichment.NodeAnalyzer do reference and should not appear in the node's domain fingerprint. """ - alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.Token @doc """ Returns a MapSet of lowercase identifier names that are locally bound diff --git a/lib/codeqa/ast/lexing/token_normalizer.ex b/lib/codeqa/ast/lexing/token_normalizer.ex index d2f0016..5cabba3 100644 --- a/lib/codeqa/ast/lexing/token_normalizer.ex +++ b/lib/codeqa/ast/lexing/token_normalizer.ex @@ -5,9 +5,9 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do See [lexical analysis](https://en.wikipedia.org/wiki/Lexical_analysis). """ - alias CodeQA.AST.Lexing.Token - alias CodeQA.AST.Lexing.StringToken alias CodeQA.AST.Lexing.NewlineToken + alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.WhitespaceToken @doc """ @@ -28,24 +28,28 @@ defmodule CodeQA.AST.Lexing.TokenNormalizer do lines |> Enum.with_index() |> Enum.flat_map(fn {line, idx} -> - line_num = idx + 1 - {tokens, last_token} = tokenize_line(line, line_num) - - if idx < last_idx do - # last_token is tracked during scanning — O(1) vs List.last/1 which is O(N). - nl_col = - case last_token do - nil -> 0 - t -> t.col + String.length(t.content) - end - - tokens ++ [%NewlineToken{content: "\n", line: line_num, col: nl_col}] - else - tokens - end + tokens_with_newline(line, idx, last_idx) end) end + defp tokens_with_newline(line, idx, last_idx) do + line_num = idx + 1 + {tokens, last_token} = tokenize_line(line, line_num) + + if idx < last_idx do + # last_token is tracked during scanning — O(1) vs List.last/1 which is O(N). + nl_col = + case last_token do + nil -> 0 + t -> t.col + String.length(t.content) + end + + tokens ++ [%NewlineToken{content: "\n", line: line_num, col: nl_col}] + else + tokens + end + end + # Returns {tokens, last_token} where last_token is the final token on the line # (or nil for an empty line), allowing normalize_structural to compute nl_col # in O(1) without calling List.last/1. diff --git a/lib/codeqa/ast/nodes/attribute_node.ex b/lib/codeqa/ast/nodes/attribute_node.ex index dba171d..7dd106c 100644 --- a/lib/codeqa/ast/nodes/attribute_node.ex +++ b/lib/codeqa/ast/nodes/attribute_node.ex @@ -49,6 +49,8 @@ defmodule CodeQA.AST.Nodes.AttributeNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -59,7 +61,7 @@ defmodule CodeQA.AST.Nodes.AttributeNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/code_node.ex b/lib/codeqa/ast/nodes/code_node.ex index 1b26244..b7dfd9d 100644 --- a/lib/codeqa/ast/nodes/code_node.ex +++ b/lib/codeqa/ast/nodes/code_node.ex @@ -28,6 +28,8 @@ defmodule CodeQA.AST.Nodes.CodeNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -38,7 +40,7 @@ defmodule CodeQA.AST.Nodes.CodeNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/doc_node.ex b/lib/codeqa/ast/nodes/doc_node.ex index 2b6dbfd..5e011ca 100644 --- a/lib/codeqa/ast/nodes/doc_node.ex +++ b/lib/codeqa/ast/nodes/doc_node.ex @@ -28,6 +28,8 @@ defmodule CodeQA.AST.Nodes.DocNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -38,7 +40,7 @@ defmodule CodeQA.AST.Nodes.DocNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/function_node.ex b/lib/codeqa/ast/nodes/function_node.ex index 6d91c88..48c6a5d 100644 --- a/lib/codeqa/ast/nodes/function_node.ex +++ b/lib/codeqa/ast/nodes/function_node.ex @@ -41,6 +41,8 @@ defmodule CodeQA.AST.Nodes.FunctionNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -51,7 +53,7 @@ defmodule CodeQA.AST.Nodes.FunctionNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/import_node.ex b/lib/codeqa/ast/nodes/import_node.ex index 999013d..3730370 100644 --- a/lib/codeqa/ast/nodes/import_node.ex +++ b/lib/codeqa/ast/nodes/import_node.ex @@ -29,6 +29,8 @@ defmodule CodeQA.AST.Nodes.ImportNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -39,7 +41,7 @@ defmodule CodeQA.AST.Nodes.ImportNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/module_node.ex b/lib/codeqa/ast/nodes/module_node.ex index 9ad3efb..c8d5072 100644 --- a/lib/codeqa/ast/nodes/module_node.ex +++ b/lib/codeqa/ast/nodes/module_node.ex @@ -30,6 +30,8 @@ defmodule CodeQA.AST.Nodes.ModuleNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -40,7 +42,7 @@ defmodule CodeQA.AST.Nodes.ModuleNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/nodes/test_node.ex b/lib/codeqa/ast/nodes/test_node.ex index 4fc955c..b3460cf 100644 --- a/lib/codeqa/ast/nodes/test_node.ex +++ b/lib/codeqa/ast/nodes/test_node.ex @@ -29,6 +29,8 @@ defmodule CodeQA.AST.Nodes.TestNode do end defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -39,7 +41,7 @@ defmodule CodeQA.AST.Nodes.TestNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end diff --git a/lib/codeqa/ast/parsing/parser.ex b/lib/codeqa/ast/parsing/parser.ex index cb23a91..2615bb9 100644 --- a/lib/codeqa/ast/parsing/parser.ex +++ b/lib/codeqa/ast/parsing/parser.ex @@ -34,9 +34,8 @@ defmodule CodeQA.AST.Parsing.Parser do """ alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.Language alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Parsing.SignalStream alias CodeQA.AST.Signals.Structural.{ BlankLineSignal, @@ -46,6 +45,8 @@ defmodule CodeQA.AST.Parsing.Parser do TripleQuoteSignal } + alias CodeQA.Language + @spec detect_blocks([CodeQA.AST.Lexing.Token.t()], module()) :: [Node.t()] def detect_blocks([], _lang_mod), do: [] diff --git a/lib/codeqa/ast/parsing/signal_registry.ex b/lib/codeqa/ast/parsing/signal_registry.ex index 555de16..0f4a752 100644 --- a/lib/codeqa/ast/parsing/signal_registry.ex +++ b/lib/codeqa/ast/parsing/signal_registry.ex @@ -11,8 +11,8 @@ defmodule CodeQA.AST.Parsing.SignalRegistry do AccessModifierSignal, AssignmentFunctionSignal, BlankLineSignal, - BranchSplitSignal, BracketSignal, + BranchSplitSignal, ColonIndentSignal, CommentDividerSignal, DecoratorSignal, diff --git a/lib/codeqa/ast/parsing/signal_stream.ex b/lib/codeqa/ast/parsing/signal_stream.ex index 935cbc2..8b6f451 100644 --- a/lib/codeqa/ast/parsing/signal_stream.ex +++ b/lib/codeqa/ast/parsing/signal_stream.ex @@ -34,21 +34,25 @@ defmodule CodeQA.AST.Parsing.SignalStream do {_final_state, emissions} = Enum.reduce_while(triples, {init_state, []}, fn triple, {state, acc} -> - {emitted, new_state} = Signal.emit(signal, triple, state) - - new_acc = - emitted - |> Enum.map(fn {name, value} -> {source, group, name, value} end) - |> Enum.reduce(acc, fn e, a -> [e | a] end) - - if new_state == :halt do - {:halt, {new_state, new_acc}} - else - {:cont, {new_state, new_acc}} - end + emit_step(signal, triple, state, acc, source, group) end) Enum.reverse(emissions) end) end + + defp emit_step(signal, triple, state, acc, source, group) do + {emitted, new_state} = Signal.emit(signal, triple, state) + + new_acc = + emitted + |> Enum.map(fn {name, value} -> {source, group, name, value} end) + |> Enum.reduce(acc, fn e, a -> [e | a] end) + + if new_state == :halt do + {:halt, {new_state, new_acc}} + else + {:cont, {new_state, new_acc}} + end + end end diff --git a/lib/codeqa/ast/signals/classification/attribute_signal.ex b/lib/codeqa/ast/signals/classification/attribute_signal.ex index 18a52cd..aaaa640 100644 --- a/lib/codeqa/ast/signals/classification/attribute_signal.ex +++ b/lib/codeqa/ast/signals/classification/attribute_signal.ex @@ -42,24 +42,26 @@ defmodule CodeQA.AST.Signals.Classification.AttributeSignal do {MapSet.new(), %{state | saw_at: true, at_line_start: false}} "" when saw_at -> - name = token.content + emit_attribute(token.content, state) - cond do - MapSet.member?(@skip_attrs, name) -> - # @doc/@moduledoc: let DocSignal handle via tokens - {MapSet.new(), %{state | saw_at: false, at_line_start: false, voted: true}} + _ -> + {MapSet.new(), %{state | saw_at: false, at_line_start: false}} + end + end - MapSet.member?(@typespec_attrs, name) -> - {MapSet.new([{:attribute_vote, 3}]), - %{state | saw_at: false, at_line_start: false, voted: true}} + defp emit_attribute(name, state) do + base_state = %{state | saw_at: false, at_line_start: false, voted: true} - true -> - {MapSet.new([{:attribute_vote, 2}]), - %{state | saw_at: false, at_line_start: false, voted: true}} - end + cond do + MapSet.member?(@skip_attrs, name) -> + # @doc/@moduledoc: let DocSignal handle via tokens + {MapSet.new(), base_state} - _ -> - {MapSet.new(), %{state | saw_at: false, at_line_start: false}} + MapSet.member?(@typespec_attrs, name) -> + {MapSet.new([{:attribute_vote, 3}]), base_state} + + true -> + {MapSet.new([{:attribute_vote, 2}]), base_state} end end end diff --git a/lib/codeqa/ast/signals/classification/comment_density_signal.ex b/lib/codeqa/ast/signals/classification/comment_density_signal.ex index cc3614f..ceb4c23 100644 --- a/lib/codeqa/ast/signals/classification/comment_density_signal.ex +++ b/lib/codeqa/ast/signals/classification/comment_density_signal.ex @@ -47,15 +47,18 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignal do %{state | at_line_start: false} end - if next == nil and map_size(prefixes) > 0 and state.total_lines > 0 do - if state.comment_lines / state.total_lines > 0.6 do - {MapSet.new([{:comment_vote, 2}]), :halt} - else - {MapSet.new(), state} - end + maybe_emit_vote(next, prefixes, state) + end + + defp maybe_emit_vote(nil, prefixes, state) + when map_size(prefixes) > 0 and state.total_lines > 0 do + if state.comment_lines / state.total_lines > 0.6 do + {MapSet.new([{:comment_vote, 2}]), :halt} else {MapSet.new(), state} end end + + defp maybe_emit_vote(_next, _prefixes, state), do: {MapSet.new(), state} end end diff --git a/lib/codeqa/ast/signals/classification/config_signal.ex b/lib/codeqa/ast/signals/classification/config_signal.ex index d6453b1..43b5872 100644 --- a/lib/codeqa/ast/signals/classification/config_signal.ex +++ b/lib/codeqa/ast/signals/classification/config_signal.ex @@ -42,12 +42,16 @@ defmodule CodeQA.AST.Signals.Classification.ConfigSignal do %{state | bracket_depth: max(0, bd - 1), at_line_start: false, is_first: false}} _ -> - if ind == 0 and bd == 0 and MapSet.member?(@config_keywords, token.content) do - weight = if first, do: 3, else: 1 - {MapSet.new([{:config_vote, weight}]), :halt} - else - {MapSet.new(), %{state | at_line_start: false, is_first: false}} - end + emit_content_token(token, state, ind, bd, first) + end + end + + defp emit_content_token(token, state, ind, bd, first) do + if ind == 0 and bd == 0 and MapSet.member?(@config_keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:config_vote, weight}]), :halt} + else + {MapSet.new(), %{state | at_line_start: false, is_first: false}} end end end diff --git a/lib/codeqa/ast/signals/classification/function_signal.ex b/lib/codeqa/ast/signals/classification/function_signal.ex index ca261ce..62d3f48 100644 --- a/lib/codeqa/ast/signals/classification/function_signal.ex +++ b/lib/codeqa/ast/signals/classification/function_signal.ex @@ -58,14 +58,18 @@ defmodule CodeQA.AST.Signals.Classification.FunctionSignal do %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}} _ -> - if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do - weight = if first, do: 3, else: 1 + emit_content_token(token, state, ind, bd, first) + end + end + + defp emit_content_token(token, state, ind, bd, first) do + base_state = %{state | is_first: false, at_line_start: false} - {MapSet.new([{:function_vote, weight}]), - %{state | is_first: false, at_line_start: false, voted: true}} - else - {MapSet.new(), %{state | is_first: false, at_line_start: false}} - end + if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:function_vote, weight}]), %{base_state | voted: true}} + else + {MapSet.new(), base_state} end end end diff --git a/lib/codeqa/ast/signals/classification/import_signal.ex b/lib/codeqa/ast/signals/classification/import_signal.ex index b6ad524..e27ed8a 100644 --- a/lib/codeqa/ast/signals/classification/import_signal.ex +++ b/lib/codeqa/ast/signals/classification/import_signal.ex @@ -48,14 +48,18 @@ defmodule CodeQA.AST.Signals.Classification.ImportSignal do {MapSet.new(), state} _ -> - if ind == 0 and MapSet.member?(state.keywords, token.content) do - weight = if first, do: 3, else: 1 - - {MapSet.new([{:import_vote, weight}]), - %{state | is_first: false, at_line_start: false, voted: true}} - else - {MapSet.new(), %{state | is_first: false, at_line_start: false}} - end + emit_content_token(token, state, ind, first) + end + end + + defp emit_content_token(token, state, ind, first) do + base_state = %{state | is_first: false, at_line_start: false} + + if ind == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:import_vote, weight}]), %{base_state | voted: true}} + else + {MapSet.new(), base_state} end end end diff --git a/lib/codeqa/ast/signals/classification/module_signal.ex b/lib/codeqa/ast/signals/classification/module_signal.ex index e58a150..4e9ca98 100644 --- a/lib/codeqa/ast/signals/classification/module_signal.ex +++ b/lib/codeqa/ast/signals/classification/module_signal.ex @@ -57,14 +57,18 @@ defmodule CodeQA.AST.Signals.Classification.ModuleSignal do %{state | bracket_depth: max(0, bd - 1), is_first: false, at_line_start: false}} _ -> - if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do - weight = if first, do: 3, else: 1 + emit_content_token(token, state, ind, bd, first) + end + end + + defp emit_content_token(token, state, ind, bd, first) do + base_state = %{state | is_first: false, at_line_start: false} - {MapSet.new([{:module_vote, weight}]), - %{state | is_first: false, at_line_start: false, voted: true}} - else - {MapSet.new(), %{state | is_first: false, at_line_start: false}} - end + if ind == 0 and bd == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:module_vote, weight}]), %{base_state | voted: true}} + else + {MapSet.new(), base_state} end end end diff --git a/lib/codeqa/ast/signals/classification/test_signal.ex b/lib/codeqa/ast/signals/classification/test_signal.ex index b9d5a8c..de6abe5 100644 --- a/lib/codeqa/ast/signals/classification/test_signal.ex +++ b/lib/codeqa/ast/signals/classification/test_signal.ex @@ -49,14 +49,18 @@ defmodule CodeQA.AST.Signals.Classification.TestSignal do {MapSet.new(), state} _ -> - if ind == 0 and MapSet.member?(state.keywords, token.content) do - weight = if first, do: 3, else: 1 - - {MapSet.new([{:test_vote, weight}]), - %{state | is_first: false, at_line_start: false, voted: true}} - else - {MapSet.new(), %{state | is_first: false, at_line_start: false}} - end + emit_content_token(token, state, ind, first) + end + end + + defp emit_content_token(token, state, ind, first) do + base_state = %{state | is_first: false, at_line_start: false} + + if ind == 0 and MapSet.member?(state.keywords, token.content) do + weight = if first, do: 3, else: 1 + {MapSet.new([{:test_vote, weight}]), %{base_state | voted: true}} + else + {MapSet.new(), base_state} end end end diff --git a/lib/codeqa/ast/signals/classification/type_signal.ex b/lib/codeqa/ast/signals/classification/type_signal.ex index 0acf6ad..fc4440f 100644 --- a/lib/codeqa/ast/signals/classification/type_signal.ex +++ b/lib/codeqa/ast/signals/classification/type_signal.ex @@ -34,16 +34,20 @@ defmodule CodeQA.AST.Signals.Classification.TypeSignal do {MapSet.new(), %{state | saw_at: true, at_line_start: false}} _ when state.saw_at and state.indent == 0 -> - if MapSet.member?(@type_attrs, token.content) do - weight = if state.is_first, do: 3, else: 1 - {MapSet.new([{:type_vote, weight}]), :halt} - else - {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}} - end + emit_after_at(token, state) _ -> {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}} end end + + defp emit_after_at(token, state) do + if MapSet.member?(@type_attrs, token.content) do + weight = if state.is_first, do: 3, else: 1 + {MapSet.new([{:type_vote, weight}]), :halt} + else + {MapSet.new(), %{state | saw_at: false, is_first: false, at_line_start: false}} + end + end end end diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index 4f62549..de00e02 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -104,7 +104,6 @@ defmodule CodeQA.BlockImpactAnalyzer do serialize_node( node, path, - content, root_tokens, baseline_file_cosines, file_results, @@ -120,7 +119,6 @@ defmodule CodeQA.BlockImpactAnalyzer do defp serialize_node( node, path, - content, root_tokens, baseline_file_cosines, file_results, @@ -150,7 +148,6 @@ defmodule CodeQA.BlockImpactAnalyzer do serialize_node( child, path, - content, root_tokens, baseline_file_cosines, file_results, diff --git a/lib/codeqa/cli.ex b/lib/codeqa/cli.ex index fcb8e01..3e36d57 100644 --- a/lib/codeqa/cli.ex +++ b/lib/codeqa/cli.ex @@ -27,8 +27,7 @@ defmodule CodeQA.CLI do command_usages = @commands |> Enum.sort_by(fn {name, _} -> name end) - |> Enum.map(fn {_name, mod} -> mod.usage() end) - |> Enum.join("\n") + |> Enum.map_join("\n", fn {_name, mod} -> mod.usage() end) "Usage: codeqa [options]\n\n" <> command_usages end diff --git a/lib/codeqa/cli/analyze.ex b/lib/codeqa/cli/analyze.ex index 81f4970..475e15c 100644 --- a/lib/codeqa/cli/analyze.ex +++ b/lib/codeqa/cli/analyze.ex @@ -3,7 +3,11 @@ defmodule CodeQA.CLI.Analyze do @behaviour CodeQA.CLI.Command + alias CodeQA.BlockImpactAnalyzer alias CodeQA.CLI.Options + alias CodeQA.Config + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector @version "0.1.0" @@ -40,10 +44,10 @@ defmodule CodeQA.CLI.Analyze do Options.parse(args, [output: :string], o: :output) Options.validate_dir!(path) - CodeQA.Config.load(path) + Config.load(path) files = - CodeQA.Engine.Collector.collect_files(path, Options.parse_ignore_paths(opts[:ignore_paths])) + Collector.collect_files(path, Options.parse_ignore_paths(opts[:ignore_paths])) if map_size(files) == 0 do IO.puts(:stderr, "Warning: no source files found in '#{path}'") @@ -53,16 +57,16 @@ defmodule CodeQA.CLI.Analyze do print_progress(opts, files) analyze_opts = - Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() + Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() start_time = System.monotonic_time(:millisecond) - results = CodeQA.Engine.Analyzer.analyze_codebase(files, analyze_opts) + results = Analyzer.analyze_codebase(files, analyze_opts) end_time = System.monotonic_time(:millisecond) IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") nodes_top = opts[:nodes_top] || 3 - results = CodeQA.BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) + results = BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() results = filter_files_for_output(results, opts, "json") diff --git a/lib/codeqa/cli/correlate.ex b/lib/codeqa/cli/correlate.ex index 5021842..c38a248 100644 --- a/lib/codeqa/cli/correlate.ex +++ b/lib/codeqa/cli/correlate.ex @@ -4,6 +4,7 @@ defmodule CodeQA.CLI.Correlate do @behaviour CodeQA.CLI.Command alias CodeQA.CLI.Options + alias CodeQA.CLI.UI @impl CodeQA.CLI.Command def usage do @@ -266,7 +267,7 @@ defmodule CodeQA.CLI.Correlate do eta_ms = round((total_pairs - current) * avg_time) output = - CodeQA.CLI.UI.progress_bar(current, total_pairs, eta: CodeQA.CLI.UI.format_eta(eta_ms)) + UI.progress_bar(current, total_pairs, eta: UI.format_eta(eta_ms)) IO.write(:stderr, "\r" <> output) if current == total_pairs, do: IO.puts(:stderr, "") diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 680bbbc..66b769d 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -3,7 +3,13 @@ defmodule CodeQA.CLI.HealthReport do @behaviour CodeQA.CLI.Command + alias CodeQA.BlockImpactAnalyzer alias CodeQA.CLI.Options + alias CodeQA.Config + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector + alias CodeQA.Git + alias CodeQA.HealthReport @impl CodeQA.CLI.Command def usage do @@ -54,7 +60,7 @@ defmodule CodeQA.CLI.HealthReport do head_ref = opts[:head_ref] || "HEAD" files = - CodeQA.Engine.Collector.collect_files(path, extra_ignore_patterns) + Collector.collect_files(path, extra_ignore_patterns) if map_size(files) == 0 do IO.puts(:stderr, "Warning: no source files found in '#{path}'") @@ -64,16 +70,16 @@ defmodule CodeQA.CLI.HealthReport do IO.puts(:stderr, "Analyzing #{map_size(files)} files for health report...") analyze_opts = - Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() + Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() start_time = System.monotonic_time(:millisecond) - results = CodeQA.Engine.Analyzer.analyze_codebase(files, analyze_opts) + results = Analyzer.analyze_codebase(files, analyze_opts) end_time = System.monotonic_time(:millisecond) IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") nodes_top = opts[:nodes_top] || 3 - results = CodeQA.BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) + results = BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() @@ -88,11 +94,11 @@ defmodule CodeQA.CLI.HealthReport do {base_results, changed_files} = if base_ref do IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...") - base_files = CodeQA.Git.collect_files_at_ref(path, base_ref) - changed = CodeQA.Git.changed_files(path, base_ref, head_ref) + base_files = Git.collect_files_at_ref(path, base_ref) + changed = Git.changed_files(path, base_ref, head_ref) IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...") - base_res = CodeQA.Engine.Analyzer.analyze_codebase(base_files, analyze_opts) + base_res = Analyzer.analyze_codebase(base_files, analyze_opts) {base_res, changed} else @@ -104,7 +110,7 @@ defmodule CodeQA.CLI.HealthReport do top_n = opts[:top] || 5 report = - CodeQA.HealthReport.generate(results, + HealthReport.generate(results, config: opts[:config], detail: detail, top: top_n, @@ -112,7 +118,7 @@ defmodule CodeQA.CLI.HealthReport do changed_files: changed_files ) - markdown = CodeQA.HealthReport.to_markdown(report, detail, format) + markdown = HealthReport.to_markdown(report, detail, format) case opts[:output] do nil -> diff --git a/lib/codeqa/cli/history.ex b/lib/codeqa/cli/history.ex index b24ea4c..ca40669 100644 --- a/lib/codeqa/cli/history.ex +++ b/lib/codeqa/cli/history.ex @@ -4,6 +4,11 @@ defmodule CodeQA.CLI.History do @behaviour CodeQA.CLI.Command alias CodeQA.CLI.Options + alias CodeQA.CLI.Progress + alias CodeQA.Config + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector + alias CodeQA.Git @version "0.1.0" @@ -58,10 +63,10 @@ defmodule CodeQA.CLI.History do commits = resolve_commits(opts, path) IO.puts(:stderr, "Found #{length(commits)} commits to analyze.") - CodeQA.Config.load(path) + Config.load(path) analyze_opts = - Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() + Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) @@ -97,14 +102,13 @@ defmodule CodeQA.CLI.History do current_opts = if opts[:progress], do: [ - {:on_progress, - fn c, t, p, _tt -> CodeQA.CLI.Progress.callback(c, t, p, start_time_progress) end} + {:on_progress, fn c, t, p, _tt -> Progress.callback(c, t, p, start_time_progress) end} | analyze_opts ], else: analyze_opts - files = CodeQA.Git.collect_files_at_ref(path, commit) - files = CodeQA.Engine.Collector.reject_ignored_map(files, ignore_patterns) + files = Git.collect_files_at_ref(path, commit) + files = Collector.reject_ignored_map(files, ignore_patterns) if map_size(files) == 0 do IO.puts(:stderr, "Warning: no source files found at commit #{commit}") @@ -115,7 +119,7 @@ defmodule CodeQA.CLI.History do defp write_commit_result(commit, path, output_dir, files, analyze_opts) do start_time = System.monotonic_time(:millisecond) - results = CodeQA.Engine.Analyzer.analyze_codebase(files, analyze_opts) + results = Analyzer.analyze_codebase(files, analyze_opts) end_time = System.monotonic_time(:millisecond) IO.puts(:stderr, " Analysis completed in #{end_time - start_time}ms") diff --git a/lib/codeqa/cli/options.ex b/lib/codeqa/cli/options.ex index 5003292..199a95d 100644 --- a/lib/codeqa/cli/options.ex +++ b/lib/codeqa/cli/options.ex @@ -1,6 +1,8 @@ defmodule CodeQA.CLI.Options do @moduledoc false + alias CodeQA.CLI.Progress + @common_strict [ workers: :integer, cache: :boolean, @@ -68,8 +70,7 @@ defmodule CodeQA.CLI.Options do [{:timeout, opts[:timeout] || 5000}] |> maybe_add( opts[:progress], - {:on_progress, - fn c, t, p, _tt -> CodeQA.CLI.Progress.callback(c, t, p, start_time_progress) end} + {:on_progress, fn c, t, p, _tt -> Progress.callback(c, t, p, start_time_progress) end} ) |> maybe_add(opts[:cache], {:cache_dir, opts[:cache_dir] || ".codeqa_cache"}) |> maybe_add( diff --git a/lib/codeqa/cli/progress.ex b/lib/codeqa/cli/progress.ex index 6ffdd14..aa09b05 100644 --- a/lib/codeqa/cli/progress.ex +++ b/lib/codeqa/cli/progress.ex @@ -1,6 +1,8 @@ defmodule CodeQA.CLI.Progress do @moduledoc false + alias CodeQA.CLI.UI + @spec callback(integer(), integer(), String.t(), integer()) :: :ok def callback(completed, total, path, start_time) do now = System.monotonic_time(:millisecond) @@ -11,8 +13,8 @@ defmodule CodeQA.CLI.Progress do label = if String.length(path) > 30, do: "..." <> String.slice(path, -27..-1), else: path output = - CodeQA.CLI.UI.progress_bar(completed, total, - eta: CodeQA.CLI.UI.format_eta(eta_ms), + UI.progress_bar(completed, total, + eta: UI.format_eta(eta_ms), label: label ) diff --git a/lib/codeqa/combined_metrics/category.ex b/lib/codeqa/combined_metrics/category.ex index 2467969..def09ad 100644 --- a/lib/codeqa/combined_metrics/category.ex +++ b/lib/codeqa/combined_metrics/category.ex @@ -22,6 +22,8 @@ defmodule CodeQA.CombinedMetrics.Category do defmacro __using__(yaml_path: yaml_path) do quote do + alias CodeQA.CombinedMetrics.Scorer + @callback score(metrics :: map()) :: float() @doc """ @@ -31,7 +33,7 @@ defmodule CodeQA.CombinedMetrics.Category do """ @spec compute_score(String.t(), map()) :: float() def compute_score(metric_name, metrics) do - CodeQA.CombinedMetrics.Scorer.compute_score(unquote(yaml_path), metric_name, metrics) + Scorer.compute_score(unquote(yaml_path), metric_name, metrics) end end end diff --git a/lib/codeqa/combined_metrics/code_smells.ex b/lib/codeqa/combined_metrics/code_smells.ex index 5b5678a..13586ba 100644 --- a/lib/codeqa/combined_metrics/code_smells.ex +++ b/lib/codeqa/combined_metrics/code_smells.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.CodeSmells do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.CodeSmells, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.CodeSmells @moduledoc doc - @behaviour CodeQA.CombinedMetrics.CodeSmells + @behaviour CodeSmells @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.CodeSmells.compute_score(@score_key, metrics) + do: CodeSmells.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/consistency.ex b/lib/codeqa/combined_metrics/consistency.ex index 6a1474e..1c4af0c 100644 --- a/lib/codeqa/combined_metrics/consistency.ex +++ b/lib/codeqa/combined_metrics/consistency.ex @@ -18,12 +18,13 @@ defmodule CodeQA.CombinedMetrics.Consistency do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.Consistency, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.Consistency @moduledoc doc - @behaviour CodeQA.CombinedMetrics.Consistency + @behaviour Consistency @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.Consistency.compute_score(@score_key, metrics) + do: Consistency.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/dependencies.ex b/lib/codeqa/combined_metrics/dependencies.ex index 0cde033..f0b25aa 100644 --- a/lib/codeqa/combined_metrics/dependencies.ex +++ b/lib/codeqa/combined_metrics/dependencies.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.Dependencies do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.Dependencies, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.Dependencies @moduledoc doc - @behaviour CodeQA.CombinedMetrics.Dependencies + @behaviour Dependencies @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.Dependencies.compute_score(@score_key, metrics) + do: Dependencies.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/documentation.ex b/lib/codeqa/combined_metrics/documentation.ex index 31abd0e..94f8a95 100644 --- a/lib/codeqa/combined_metrics/documentation.ex +++ b/lib/codeqa/combined_metrics/documentation.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.Documentation do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.Documentation, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.Documentation @moduledoc doc - @behaviour CodeQA.CombinedMetrics.Documentation + @behaviour Documentation @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.Documentation.compute_score(@score_key, metrics) + do: Documentation.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/error_handling.ex b/lib/codeqa/combined_metrics/error_handling.ex index 62e2032..9039ef6 100644 --- a/lib/codeqa/combined_metrics/error_handling.ex +++ b/lib/codeqa/combined_metrics/error_handling.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.ErrorHandling do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.ErrorHandling, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.ErrorHandling @moduledoc doc - @behaviour CodeQA.CombinedMetrics.ErrorHandling + @behaviour ErrorHandling @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.ErrorHandling.compute_score(@score_key, metrics) + do: ErrorHandling.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex index 4ea54b4..a35a473 100644 --- a/lib/codeqa/combined_metrics/file_scorer.ex +++ b/lib/codeqa/combined_metrics/file_scorer.ex @@ -7,6 +7,9 @@ defmodule CodeQA.CombinedMetrics.FileScorer do """ alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Config + alias CodeQA.HealthReport.Grader + alias CodeQA.Language @doc """ Converts a single file's raw metric map to aggregate format. @@ -56,7 +59,10 @@ defmodule CodeQA.CombinedMetrics.FileScorer do ] } def worst_files_per_behavior(files_map, opts \\ []) do - # TODO(option-c): cosine similarity is computed at file level; a line-level mapping would require computing a separate cosine score for each AST node by projecting that node's metric vector against the behavior's feature-weight vector. This is not currently possible because serialized nodes do not carry their own metric values. + # NOTE: cosine similarity is computed at file level; a line-level mapping would require computing a separate + # cosine score for each AST node by projecting that node's metric vector against the behavior's + # feature-weight vector. This is not currently possible because serialized nodes do not carry their own + # metric values. top_n = Keyword.get(opts, :combined_top, 2) files_map @@ -64,8 +70,8 @@ defmodule CodeQA.CombinedMetrics.FileScorer do file_data |> Map.get("metrics", %{}) |> map_size() == 0 end) |> Enum.reduce(%{}, fn {path, file_data}, acc -> - top_nodes = CodeQA.HealthReport.Grader.top_3_nodes(Map.get(file_data, "nodes")) - language = CodeQA.Language.detect(path).name() + top_nodes = Grader.top_3_nodes(Map.get(file_data, "nodes")) + language = Language.detect(path).name() file_data |> Map.get("metrics", %{}) @@ -84,7 +90,7 @@ defmodule CodeQA.CombinedMetrics.FileScorer do end) end) |> Map.new(fn {key, entries} -> - threshold = CodeQA.Config.cosine_significance_threshold() + threshold = Config.cosine_significance_threshold() sorted = entries diff --git a/lib/codeqa/combined_metrics/file_structure.ex b/lib/codeqa/combined_metrics/file_structure.ex index 164d3c9..aa6f153 100644 --- a/lib/codeqa/combined_metrics/file_structure.ex +++ b/lib/codeqa/combined_metrics/file_structure.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.FileStructure do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.FileStructure, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.FileStructure @moduledoc doc - @behaviour CodeQA.CombinedMetrics.FileStructure + @behaviour FileStructure @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.FileStructure.compute_score(@score_key, metrics) + do: FileStructure.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/function_design.ex b/lib/codeqa/combined_metrics/function_design.ex index 3e2e5e9..3eab5f7 100644 --- a/lib/codeqa/combined_metrics/function_design.ex +++ b/lib/codeqa/combined_metrics/function_design.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.FunctionDesign do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.FunctionDesign, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.FunctionDesign @moduledoc doc - @behaviour CodeQA.CombinedMetrics.FunctionDesign + @behaviour FunctionDesign @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.FunctionDesign.compute_score(@score_key, metrics) + do: FunctionDesign.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/naming_conventions.ex b/lib/codeqa/combined_metrics/naming_conventions.ex index f463b85..eafb5dc 100644 --- a/lib/codeqa/combined_metrics/naming_conventions.ex +++ b/lib/codeqa/combined_metrics/naming_conventions.ex @@ -19,12 +19,13 @@ defmodule CodeQA.CombinedMetrics.NamingConventions do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.NamingConventions, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.NamingConventions @moduledoc doc - @behaviour CodeQA.CombinedMetrics.NamingConventions + @behaviour NamingConventions @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.NamingConventions.compute_score(@score_key, metrics) + do: NamingConventions.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index bcf3375..76566d4 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -7,6 +7,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do """ alias CodeQA.CombinedMetrics.Scorer + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector @samples_root "priv/combined_metrics/samples" @yaml_dir "priv/combined_metrics" @@ -85,8 +87,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do defp analyze(dir) do dir - |> CodeQA.Engine.Collector.collect_files() - |> CodeQA.Engine.Analyzer.analyze_codebase() + |> Collector.collect_files() + |> Analyzer.analyze_codebase() |> get_in(["codebase", "aggregate"]) end @@ -296,55 +298,15 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.flat_map(fn {behavior, behavior_data} -> - behavior_langs = Map.get(behavior_data, "_languages", []) - - if not behavior_language_applies?(behavior_langs, language, languages) do - [] - else - scalars = Scorer.scalars_for(yaml_path, behavior) - - if map_size(scalars) == 0 do - [] - else - log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 - - {dot, norm_s_sq, norm_v_sq, contributions} = - Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, - {d, ns, nv, contribs} -> - log_m = :math.log(Scorer.get(aggregate, group, key)) - contrib = scalar * log_m - - {d + contrib, ns + scalar * scalar, nv + log_m * log_m, - [{:"#{group}.#{key}", contrib} | contribs]} - end) - - cos_sim = - if norm_s_sq > 0 and norm_v_sq > 0, - do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), - else: 0.0 - - raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) - calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline - - top_metrics = - contributions - |> Enum.sort_by(fn {_, c} -> c end) - |> Enum.take(5) - |> Enum.map(fn {metric, contribution} -> - %{metric: to_string(metric), contribution: Float.round(contribution, 4)} - end) - - [ - %{ - category: category, - behavior: behavior, - cosine: Float.round(cos_sim, 4), - score: Float.round(calibrated, 4), - top_metrics: top_metrics - } - ] - end - end + maybe_score_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages + ) end) end) |> Enum.sort_by(& &1.cosine) @@ -370,9 +332,9 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do @yaml_dir |> File.ls!() - |> Enum.filter(&String.ends_with?(&1, ".yml")) |> Enum.filter(fn yml_file -> - filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category + String.ends_with?(yml_file, ".yml") and + (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) end) |> Enum.sort() |> Enum.map(fn yml_file -> @@ -401,23 +363,29 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do {Map.put(acc_yaml, behavior, groups), Map.update!(stats, :skipped, &(&1 + 1))} metrics -> - {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics) - # Fall back to current groups if everything was deadzoned - groups = - if(map_size(new_groups) > 0, do: new_groups, else: current_groups) - |> Map.put("_log_baseline", Float.round(log_baseline, 6)) - |> maybe_put_doc(doc) - - {Map.put(acc_yaml, behavior, groups), - %{ - stats - | updated: stats.updated + n_updated, - deadzoned: stats.deadzoned + n_deadzoned - }} + apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) end end) end + defp apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) do + {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics) + # Fall back to current groups if everything was deadzoned + base_groups = if map_size(new_groups) > 0, do: new_groups, else: current_groups + + groups = + base_groups + |> Map.put("_log_baseline", Float.round(log_baseline, 6)) + |> maybe_put_doc(doc) + + {Map.put(acc_yaml, behavior, groups), + %{ + stats + | updated: stats.updated + n_updated, + deadzoned: stats.deadzoned + n_deadzoned + }} + end + defp read_behavior_doc(category, behavior) do config_path = Path.join([@samples_root, category, behavior, "config.yml"]) @@ -444,24 +412,96 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do if deadzone?(data.ratio) do {groups, log_baseline, n_updated, n_deadzoned + 1} else - new_groups = - Map.update( - groups, - group, - %{key => data.suggested_scalar}, - &Map.put(&1, key, data.suggested_scalar) - ) - - # Baseline: expected log score at the geometric mean of good/bad sample values - geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10)) - new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean) - {new_groups, new_baseline, n_updated + 1, n_deadzoned} + accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) end end) end + defp accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) do + new_groups = + Map.update( + groups, + group, + %{key => data.suggested_scalar}, + &Map.put(&1, key, data.suggested_scalar) + ) + + # Baseline: expected log score at the geometric mean of good/bad sample values + geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10)) + new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean) + {new_groups, new_baseline, n_updated + 1, n_deadzoned} + end + defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high + defp maybe_score_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages + ) do + behavior_langs = Map.get(behavior_data, "_languages", []) + + if behavior_language_applies?(behavior_langs, language, languages) do + score_behavior_cosine(yaml_path, behavior, behavior_data, aggregate, category) + else + [] + end + end + + defp score_behavior_cosine(yaml_path, behavior, behavior_data, aggregate, category) do + scalars = Scorer.scalars_for(yaml_path, behavior) + + if map_size(scalars) == 0 do + [] + else + build_cosine_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) + end + end + + defp build_cosine_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) do + log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + + {dot, norm_s_sq, norm_v_sq, contributions} = + Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, + {d, ns, nv, contribs} -> + log_m = :math.log(Scorer.get(aggregate, group, key)) + contrib = scalar * log_m + + {d + contrib, ns + scalar * scalar, nv + log_m * log_m, + [{:"#{group}.#{key}", contrib} | contribs]} + end) + + cos_sim = + if norm_s_sq > 0 and norm_v_sq > 0, + do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), + else: 0.0 + + raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) + calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline + + top_metrics = + contributions + |> Enum.sort_by(fn {_, c} -> c end) + |> Enum.take(5) + |> Enum.map(fn {metric, contribution} -> + %{metric: to_string(metric), contribution: Float.round(contribution, 4)} + end) + + [ + %{ + category: category, + behavior: behavior, + cosine: Float.round(cos_sim, 4), + score: Float.round(calibrated, 4), + top_metrics: top_metrics + } + ] + end + # Returns true if the behavior should be included for the given language context. # behavior_langs: the "_languages" list from the YAML ([] = applies to all) # language: single language string from :language opt (nil = no filter) @@ -488,54 +528,51 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do lines = data |> Enum.sort_by(fn {behavior, _} -> behavior end) - |> Enum.flat_map(fn {behavior, groups} -> - doc_line = - case Map.get(groups, "_doc") do - nil -> [] - doc -> [" _doc: #{inspect(doc)}"] - end - - baseline_line = - case Map.get(groups, "_log_baseline") do - nil -> [] - val -> [" _log_baseline: #{fmt_scalar(val)}"] - end - - fix_hint_line = - case Map.get(groups, "_fix_hint") do - nil -> [] - hint -> [" _fix_hint: #{inspect(hint)}"] - end - - languages_line = - case Map.get(groups, "_languages") do - nil -> [] - [] -> [] - langs -> [" _languages: [#{Enum.join(langs, ", ")}]"] - end - - group_lines = - groups - |> Enum.filter(fn {k, v} -> - k not in ["_doc", "_log_baseline", "_fix_hint", "_languages"] and is_map(v) - end) - |> Enum.sort_by(fn {group, _} -> group end) - |> Enum.flat_map(fn {group, keys} -> - key_lines = - keys - |> Enum.sort_by(fn {key, _} -> key end) - |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) - - [" #{group}:" | key_lines] - end) - - ["#{behavior}:" | doc_line] ++ - fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""] - end) + |> Enum.flat_map(fn {behavior, groups} -> format_behavior_lines(behavior, groups) end) Enum.join(lines, "\n") <> "\n" end + defp format_behavior_lines(behavior, groups) do + doc_line = yaml_doc_line(Map.get(groups, "_doc")) + baseline_line = yaml_baseline_line(Map.get(groups, "_log_baseline")) + fix_hint_line = yaml_fix_hint_line(Map.get(groups, "_fix_hint")) + languages_line = yaml_languages_line(Map.get(groups, "_languages")) + group_lines = format_group_lines(groups) + + ["#{behavior}:" | doc_line] ++ + fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""] + end + + defp yaml_doc_line(nil), do: [] + defp yaml_doc_line(doc), do: [" _doc: #{inspect(doc)}"] + + defp yaml_baseline_line(nil), do: [] + defp yaml_baseline_line(val), do: [" _log_baseline: #{fmt_scalar(val)}"] + + defp yaml_fix_hint_line(nil), do: [] + defp yaml_fix_hint_line(hint), do: [" _fix_hint: #{inspect(hint)}"] + + defp yaml_languages_line(nil), do: [] + defp yaml_languages_line([]), do: [] + defp yaml_languages_line(langs), do: [" _languages: [#{Enum.join(langs, ", ")}]"] + + defp format_group_lines(groups) do + groups + |> Enum.filter(fn {k, v} -> + k not in ["_doc", "_log_baseline", "_fix_hint", "_languages"] and is_map(v) + end) + |> Enum.sort_by(fn {group, _} -> group end) + |> Enum.flat_map(fn {group, keys} -> + key_lines = + keys + |> Enum.sort_by(fn {key, _} -> key end) + |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) + + [" #{group}:" | key_lines] + end) + end + defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) defp fmt_scalar(n) when is_integer(n), do: "#{n}.0" @@ -589,9 +626,9 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do @yaml_dir |> File.ls!() - |> Enum.filter(&String.ends_with?(&1, ".yml")) |> Enum.filter(fn yml_file -> - filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category + String.ends_with?(yml_file, ".yml") and + (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) end) |> Enum.sort() |> Enum.map(fn yml_file -> diff --git a/lib/codeqa/combined_metrics/scope_and_assignment.ex b/lib/codeqa/combined_metrics/scope_and_assignment.ex index ddb1d11..0b3e616 100644 --- a/lib/codeqa/combined_metrics/scope_and_assignment.ex +++ b/lib/codeqa/combined_metrics/scope_and_assignment.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.ScopeAndAssignment do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.ScopeAndAssignment, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.ScopeAndAssignment @moduledoc doc - @behaviour CodeQA.CombinedMetrics.ScopeAndAssignment + @behaviour ScopeAndAssignment @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.ScopeAndAssignment.compute_score(@score_key, metrics) + do: ScopeAndAssignment.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/scorer.ex b/lib/codeqa/combined_metrics/scorer.ex index 29b6d2f..627fc97 100644 --- a/lib/codeqa/combined_metrics/scorer.ex +++ b/lib/codeqa/combined_metrics/scorer.ex @@ -1,4 +1,6 @@ defmodule CodeQA.CombinedMetrics.Scorer do + alias CodeQA.Engine.Analyzer + @moduledoc """ Pure computation engine for combined metric formulas. @@ -45,7 +47,7 @@ defmodule CodeQA.CombinedMetrics.Scorer do @doc "Returns the full default scalar map: all registered file metric keys mapped to 0.0." @spec default_scalars() :: %{{String.t(), String.t()} => float()} def default_scalars do - CodeQA.Engine.Analyzer.build_registry().file_metrics + Analyzer.build_registry().file_metrics |> Enum.flat_map(fn mod -> Enum.map(mod.keys(), fn key -> {{mod.name(), "mean_" <> key}, 0.0} end) end) diff --git a/lib/codeqa/combined_metrics/testing.ex b/lib/codeqa/combined_metrics/testing.ex index 8876453..52b41e4 100644 --- a/lib/codeqa/combined_metrics/testing.ex +++ b/lib/codeqa/combined_metrics/testing.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.Testing do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.Testing, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.Testing @moduledoc doc - @behaviour CodeQA.CombinedMetrics.Testing + @behaviour Testing @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.Testing.compute_score(@score_key, metrics) + do: Testing.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/type_and_value.ex b/lib/codeqa/combined_metrics/type_and_value.ex index 5f76a8a..d461c60 100644 --- a/lib/codeqa/combined_metrics/type_and_value.ex +++ b/lib/codeqa/combined_metrics/type_and_value.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.TypeAndValue do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.TypeAndValue, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.TypeAndValue @moduledoc doc - @behaviour CodeQA.CombinedMetrics.TypeAndValue + @behaviour TypeAndValue @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.TypeAndValue.compute_score(@score_key, metrics) + do: TypeAndValue.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/combined_metrics/variable_naming.ex b/lib/codeqa/combined_metrics/variable_naming.ex index c846414..db49793 100644 --- a/lib/codeqa/combined_metrics/variable_naming.ex +++ b/lib/codeqa/combined_metrics/variable_naming.ex @@ -17,12 +17,13 @@ defmodule CodeQA.CombinedMetrics.VariableNaming do for {key, doc} <- @behaviors do defmodule Module.concat(CodeQA.CombinedMetrics.VariableNaming, Macro.camelize(key)) do + alias CodeQA.CombinedMetrics.VariableNaming @moduledoc doc - @behaviour CodeQA.CombinedMetrics.VariableNaming + @behaviour VariableNaming @score_key key @impl true def score(metrics), - do: CodeQA.CombinedMetrics.VariableNaming.compute_score(@score_key, metrics) + do: VariableNaming.compute_score(@score_key, metrics) end end end diff --git a/lib/codeqa/diagnostics.ex b/lib/codeqa/diagnostics.ex index d086e44..f2479e0 100644 --- a/lib/codeqa/diagnostics.ex +++ b/lib/codeqa/diagnostics.ex @@ -4,7 +4,10 @@ defmodule CodeQA.Diagnostics do cosine similarity against combined metric behavior profiles. """ - alias CodeQA.CombinedMetrics.{SampleRunner, FileScorer} + alias CodeQA.CombinedMetrics.FileScorer + alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector alias CodeQA.HealthReport.Grader @doc """ @@ -25,8 +28,8 @@ defmodule CodeQA.Diagnostics do top = opts[:top] || 15 format = opts[:format] || :plain - files = CodeQA.Engine.Collector.collect_files(path) - result = CodeQA.Engine.Analyzer.analyze_codebase(files, []) + files = Collector.collect_files(path) + result = Analyzer.analyze_codebase(files, []) case mode do :per_file -> run_per_file(result, top, format) @@ -78,16 +81,7 @@ defmodule CodeQA.Diagnostics do :json -> files_json = Enum.map(file_diagnoses, fn {file_path, diagnoses} -> - behaviors = - Enum.map(diagnoses, fn d -> - %{ - behavior: "#{d.category}.#{d.behavior}", - cosine: d.cosine, - score: Grader.score_cosine(d.cosine) - } - end) - - %{file: file_path, behaviors: behaviors} + %{file: file_path, behaviors: Enum.map(diagnoses, &diagnosis_to_map/1)} end) Jason.encode!(%{files: files_json}, pretty: true) @@ -95,15 +89,27 @@ defmodule CodeQA.Diagnostics do _ -> file_rows = Enum.flat_map(file_diagnoses, fn {file_path, diagnoses} -> - Enum.map(diagnoses, fn %{category: cat, behavior: beh, cosine: cosine, score: score} -> - {file_path, "#{cat}.#{beh}", cosine, score} - end) + diagnoses_to_rows(file_path, diagnoses) end) "## Diagnose: per-file\n\n" <> per_file_table(file_rows, top) end end + defp diagnosis_to_map(d) do + %{ + behavior: "#{d.category}.#{d.behavior}", + cosine: d.cosine, + score: Grader.score_cosine(d.cosine) + } + end + + defp diagnoses_to_rows(file_path, diagnoses) do + Enum.map(diagnoses, fn %{category: cat, behavior: beh, cosine: cosine, score: score} -> + {file_path, "#{cat}.#{beh}", cosine, score} + end) + end + defp project_languages(files_map) do files_map |> Map.keys() diff --git a/lib/codeqa/engine/analyzer.ex b/lib/codeqa/engine/analyzer.ex index b7e5c4e..6fe8cf0 100644 --- a/lib/codeqa/engine/analyzer.ex +++ b/lib/codeqa/engine/analyzer.ex @@ -2,9 +2,11 @@ defmodule CodeQA.Engine.Analyzer do @moduledoc "Orchestrates metric computation across files." alias CodeQA.Analysis.RunSupervisor + alias CodeQA.Engine.Parallel + alias CodeQA.Engine.Pipeline alias CodeQA.Engine.Registry - alias CodeQA.Metrics.File, as: Metrics alias CodeQA.Metrics.Codebase, as: CodebaseMetrics + alias CodeQA.Metrics.File, as: Metrics @registry Registry.new() |> Registry.register_file_metric(Metrics.Entropy) @@ -35,14 +37,14 @@ defmodule CodeQA.Engine.Analyzer do @spec analyze_file(String.t(), String.t()) :: map() def analyze_file(_path, content) do - ctx = CodeQA.Engine.Pipeline.build_file_context(content) - CodeQA.Engine.Registry.run_file_metrics(@registry, ctx, []) + ctx = Pipeline.build_file_context(content) + Registry.run_file_metrics(@registry, ctx, []) end @spec analyze_codebase_aggregate(map(), keyword()) :: map() def analyze_codebase_aggregate(files_map, opts \\ []) do with_run_context(opts, fn opts -> - file_results = CodeQA.Engine.Parallel.analyze_files(files_map, opts) + file_results = Parallel.analyze_files(files_map, opts) aggregate_file_metrics(file_results) end) end @@ -66,7 +68,7 @@ defmodule CodeQA.Engine.Analyzer do defp do_analyze_codebase(files, opts) do registry = @registry - file_results = CodeQA.Engine.Parallel.analyze_files(files, opts) + file_results = Parallel.analyze_files(files, opts) codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) aggregate = aggregate_file_metrics(file_results) diff --git a/lib/codeqa/engine/parallel.ex b/lib/codeqa/engine/parallel.ex index 5ddf757..f5a8da1 100644 --- a/lib/codeqa/engine/parallel.ex +++ b/lib/codeqa/engine/parallel.ex @@ -1,4 +1,8 @@ defmodule CodeQA.Engine.Parallel do + alias CodeQA.Analysis.FileContextServer + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Registry + @moduledoc "Parallel file analysis using Flow (GenStage-based)." def analyze_files(files, opts \\ []) when is_map(files) do @@ -65,12 +69,12 @@ defmodule CodeQA.Engine.Parallel do end defp analyze_single_file(path, content, opts) do - registry = CodeQA.Engine.Analyzer.build_registry() + registry = Analyzer.build_registry() file_opts = Keyword.put(opts, :path, path) pid = Keyword.fetch!(opts, :file_context_pid) - ctx = CodeQA.Analysis.FileContextServer.get(pid, content, file_opts) - metrics = CodeQA.Engine.Registry.run_file_metrics(registry, ctx, opts) + ctx = FileContextServer.get(pid, content, file_opts) + metrics = Registry.run_file_metrics(registry, ctx, opts) %{ "bytes" => ctx.byte_count, diff --git a/lib/codeqa/git.ex b/lib/codeqa/git.ex index 0576ddc..106ce54 100644 --- a/lib/codeqa/git.ex +++ b/lib/codeqa/git.ex @@ -8,6 +8,8 @@ defmodule CodeQA.Git do defstruct @enforce_keys end + alias CodeQA.Engine.Collector + @status_map %{"A" => "added", "M" => "modified", "D" => "deleted"} @spec gitignored_files(String.t(), [String.t()]) :: MapSet.t() @@ -82,6 +84,6 @@ defmodule CodeQA.Git do defp source_file?(path) do ext = path |> Path.extname() |> String.downcase() - MapSet.member?(CodeQA.Engine.Collector.source_extensions(), ext) + MapSet.member?(Collector.source_extensions(), ext) end end diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 1e8be12..a634a02 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -1,8 +1,8 @@ defmodule CodeQA.HealthReport do @moduledoc "Orchestrates health report generation from analysis results." - alias CodeQA.HealthReport.{Config, Grader, Formatter, Delta, TopBlocks} alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} + alias CodeQA.HealthReport.{Config, Delta, Formatter, Grader, TopBlocks} @spec generate(map(), keyword()) :: map() def generate(analysis_results, opts \\ []) do @@ -59,6 +59,13 @@ defmodule CodeQA.HealthReport do top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup) + grading_cfg = %{ + category_defs: categories, + grade_scale: grade_scale, + impact_map: impact_map, + combined_top: combined_top + } + {codebase_delta, pr_summary} = if base_results do build_delta_and_summary( @@ -66,11 +73,7 @@ defmodule CodeQA.HealthReport do analysis_results, overall_score, overall_grade, - all_categories, - categories, - grade_scale, - impact_map, - combined_top, + grading_cfg, changed_files, top_blocks ) @@ -100,11 +103,12 @@ defmodule CodeQA.HealthReport do head_results, head_score, head_grade, - _head_categories, - category_defs, - grade_scale, - impact_map, - combined_top, + %{ + category_defs: category_defs, + grade_scale: grade_scale, + impact_map: impact_map, + combined_top: combined_top + }, changed_files, top_blocks ) do diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index 694b7ff..e0d9170 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -52,23 +52,19 @@ defmodule CodeQA.HealthReport.Formatter.Github do end end - defp grade_letter_from_score(score) do - cond do - score >= 97 -> "A+" - score >= 93 -> "A" - score >= 90 -> "A-" - score >= 87 -> "B+" - score >= 83 -> "B" - score >= 80 -> "B-" - score >= 77 -> "C+" - score >= 73 -> "C" - score >= 70 -> "C-" - score >= 67 -> "D+" - score >= 63 -> "D" - score >= 60 -> "D-" - true -> "F" - end - end + defp grade_letter_from_score(score) when score >= 97, do: "A+" + defp grade_letter_from_score(score) when score >= 93, do: "A" + defp grade_letter_from_score(score) when score >= 90, do: "A-" + defp grade_letter_from_score(score) when score >= 87, do: "B+" + defp grade_letter_from_score(score) when score >= 83, do: "B" + defp grade_letter_from_score(score) when score >= 80, do: "B-" + defp grade_letter_from_score(score) when score >= 77, do: "C+" + defp grade_letter_from_score(score) when score >= 73, do: "C" + defp grade_letter_from_score(score) when score >= 70, do: "C-" + defp grade_letter_from_score(score) when score >= 67, do: "D+" + defp grade_letter_from_score(score) when score >= 63, do: "D" + defp grade_letter_from_score(score) when score >= 60, do: "D-" + defp grade_letter_from_score(_score), do: "F" defp header(report) do emoji = grade_emoji(report.overall_grade) @@ -284,11 +280,9 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp top_issues_section(issues, _detail) do rows = - issues - |> Enum.map(fn i -> + Enum.map_join(issues, "\n", fn i -> "| `#{i.category}.#{i.behavior}` | #{format_num(i.cosine)} | #{format_num(i.score)} |" end) - |> Enum.join("\n") table = "| Behavior | Cosine | Score |\n|----------|--------|-------|\n#{rows}" @@ -361,19 +355,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do {"Structure", "branching", "mean_branch_count"} ] - rows = - Enum.flat_map(metrics, fn {label, group, key} -> - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) - - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) - diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] - else - [] - end - end) + rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg)) if rows == [] do [] @@ -388,6 +370,19 @@ defmodule CodeQA.HealthReport.Formatter.Github do end end + defp format_metric_row({label, group, key}, base_agg, head_agg) do + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end + defp blocks_section([]), do: [] defp blocks_section(top_blocks) do @@ -398,27 +393,8 @@ defmodule CodeQA.HealthReport.Formatter.Github do status_str = if group.status, do: " [#{group.status}]", else: "" summary_line = "🔍 #{group.path}#{status_str} — #{length(group.blocks)} block(s)" - block_lines = - Enum.flat_map(group.blocks, fn block -> - end_line = block.end_line || block.start_line - - potential_lines = - Enum.flat_map(block.potentials, fn p -> - icon = severity_icon(p.severity) - delta_str = format_num(p.cosine_delta) - label = String.upcase(to_string(p.severity)) - line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" - fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] - [line | fix] - end) - - [ - "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" - ] ++ - potential_lines ++ [""] - end) - - inner = List.flatten(block_lines) |> Enum.join("\n") + inner = + group.blocks |> Enum.flat_map(&format_block/1) |> List.flatten() |> Enum.join("\n") [ "
    ", @@ -437,6 +413,25 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end + defp format_block(block) do + end_line = block.end_line || block.start_line + + header = + "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + + potential_lines = Enum.flat_map(block.potentials, &format_potential/1) + [header] ++ potential_lines ++ [""] + end + + defp format_potential(p) do + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = String.upcase(to_string(p.severity)) + line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" + fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] + [line | fix] + end + defp severity_icon(:critical), do: "🔴" defp severity_icon(:high), do: "🟠" defp severity_icon(:medium), do: "🟡" diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index 25117a3..0b99b5c 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -178,19 +178,7 @@ defmodule CodeQA.HealthReport.Formatter.Plain do {"Structure", "branching", "mean_branch_count"} ] - rows = - Enum.flat_map(metrics, fn {label, group, key} -> - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) - - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) - diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] - else - [] - end - end) + rows = Enum.flat_map(metrics, &format_metric_row(&1, base_agg, head_agg)) if rows == [] do [] @@ -205,6 +193,19 @@ defmodule CodeQA.HealthReport.Formatter.Plain do end end + defp format_metric_row({label, group, key}, base_agg, head_agg) do + base_val = get_in(base_agg, [group, key]) + head_val = get_in(head_agg, [group, key]) + + if is_number(base_val) and is_number(head_val) do + diff = Float.round(head_val - base_val, 2) + diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" + ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] + else + [] + end + end + defp blocks_section([]), do: [] defp blocks_section(top_blocks) do @@ -213,27 +214,7 @@ defmodule CodeQA.HealthReport.Formatter.Plain do file_parts = Enum.flat_map(top_blocks, fn group -> status_str = if group.status, do: " [#{group.status}]", else: "" - - block_lines = - Enum.flat_map(group.blocks, fn block -> - end_line = block.end_line || block.start_line - - header = - "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" - - potential_lines = - Enum.flat_map(block.potentials, fn p -> - icon = severity_icon(p.severity) - delta_str = format_num(p.cosine_delta) - label = "#{String.upcase(to_string(p.severity))}" - line = " #{icon} #{label} #{p.category} / #{p.behavior} (Δ #{delta_str})" - fix = if p.fix_hint, do: [" → #{p.fix_hint}"], else: [] - [line | fix] - end) - - [header | potential_lines] ++ [""] - end) - + block_lines = Enum.flat_map(group.blocks, &format_block/1) ["### #{group.path}#{status_str}", "" | block_lines] end) @@ -244,6 +225,25 @@ defmodule CodeQA.HealthReport.Formatter.Plain do ] end + defp format_block(block) do + end_line = block.end_line || block.start_line + + header = + "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + + potential_lines = Enum.flat_map(block.potentials, &format_potential/1) + [header | potential_lines] ++ [""] + end + + defp format_potential(p) do + icon = severity_icon(p.severity) + delta_str = format_num(p.cosine_delta) + label = String.upcase(to_string(p.severity)) + line = " #{icon} #{label} #{p.category} / #{p.behavior} (Δ #{delta_str})" + fix = if p.fix_hint, do: [" → #{p.fix_hint}"], else: [] + [line | fix] + end + defp severity_icon(:critical), do: "🔴" defp severity_icon(:high), do: "🟠" defp severity_icon(:medium), do: "🟡" diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex index 20413c9..8011c8f 100644 --- a/lib/codeqa/health_report/grader.ex +++ b/lib/codeqa/health_report/grader.ex @@ -2,6 +2,8 @@ defmodule CodeQA.HealthReport.Grader do @moduledoc "Scores metrics and assigns letter grades." alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Config + alias CodeQA.HealthReport.Categories @doc """ Score a single metric value (0-100) based on thresholds and direction. @@ -100,7 +102,7 @@ defmodule CodeQA.HealthReport.Grader do @doc "Convert a numeric score (0-100) to a letter grade using the given scale." @spec grade_letter(number(), [{number(), String.t()}]) :: String.t() - def grade_letter(score, scale \\ CodeQA.HealthReport.Categories.default_grade_scale()) do + def grade_letter(score, scale \\ Categories.default_grade_scale()) do Enum.find_value(scale, "F", fn {min, letter} -> if score >= min, do: letter end) @@ -114,7 +116,7 @@ defmodule CodeQA.HealthReport.Grader do def grade_category( category, file_metrics, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale() + scale \\ Categories.default_grade_scale() ) do scored = category.metrics @@ -161,7 +163,7 @@ defmodule CodeQA.HealthReport.Grader do def grade_file( categories, file_metrics, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale() + scale \\ Categories.default_grade_scale() ) do Enum.map(categories, &grade_category(&1, file_metrics, scale)) end @@ -173,7 +175,7 @@ defmodule CodeQA.HealthReport.Grader do def grade_aggregate( categories, aggregate, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale() + scale \\ Categories.default_grade_scale() ) do # Convert aggregate format (mean_X keys) to file-metric-like format file_like = @@ -205,7 +207,7 @@ defmodule CodeQA.HealthReport.Grader do ) :: {integer(), String.t()} def overall_score( category_grades, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale(), + scale \\ Categories.default_grade_scale(), impact_map \\ %{} ) do if category_grades == [] do @@ -240,10 +242,10 @@ defmodule CodeQA.HealthReport.Grader do def grade_cosine_categories( aggregate, worst_files, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale(), + scale \\ Categories.default_grade_scale(), languages \\ [] ) do - threshold = CodeQA.Config.cosine_significance_threshold() + threshold = Config.cosine_significance_threshold() aggregate |> SampleRunner.diagnose_aggregate(top: 99_999, languages: languages) @@ -297,9 +299,11 @@ defmodule CodeQA.HealthReport.Grader do category, all_file_metrics, top_n, - scale \\ CodeQA.HealthReport.Categories.default_grade_scale() + scale \\ Categories.default_grade_scale() ) do - # TODO(option-c): threshold metric scores are file-level aggregates; line-level attribution would require each AST node to carry its own per-metric values so that the node with the highest contribution to the bad metric score could be identified and reported directly. + # NOTE: threshold metric scores are file-level aggregates; line-level attribution would require + # each AST node to carry its own per-metric values so that the node with the highest + # contribution to the bad metric score could be identified and reported directly. all_file_metrics |> Enum.map(fn {path, file_data} -> metrics = Map.get(file_data, "metrics", %{}) diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index dda0bb8..1761941 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -100,18 +100,17 @@ defmodule CodeQA.HealthReport.TopBlocks do Scorer.all_yamls() |> Enum.flat_map(fn {yaml_path, data} -> category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - - Enum.flat_map(data, fn - {behavior, behavior_data} when is_map(behavior_data) -> - case Map.get(behavior_data, "_fix_hint") do - nil -> [] - hint -> [{{category, behavior}, hint}] - end - - _ -> - [] - end) + Enum.flat_map(data, &hints_for_behavior(category, &1)) end) |> Map.new() end + + defp hints_for_behavior(category, {behavior, behavior_data}) when is_map(behavior_data) do + case Map.get(behavior_data, "_fix_hint") do + nil -> [] + hint -> [{{category, behavior}, hint}] + end + end + + defp hints_for_behavior(_category, _entry), do: [] end diff --git a/lib/codeqa/languages/code/native/cpp.ex b/lib/codeqa/languages/code/native/cpp.ex index 3cc8724..31cbb4e 100644 --- a/lib/codeqa/languages/code/native/cpp.ex +++ b/lib/codeqa/languages/code/native/cpp.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Cpp do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/go.ex b/lib/codeqa/languages/code/native/go.ex index 45ea58d..b728aab 100644 --- a/lib/codeqa/languages/code/native/go.ex +++ b/lib/codeqa/languages/code/native/go.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Go do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/haskell.ex b/lib/codeqa/languages/code/native/haskell.ex index c0b1075..48cd646 100644 --- a/lib/codeqa/languages/code/native/haskell.ex +++ b/lib/codeqa/languages/code/native/haskell.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Haskell do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/ocaml.ex b/lib/codeqa/languages/code/native/ocaml.ex index 6857a5f..d1e8b21 100644 --- a/lib/codeqa/languages/code/native/ocaml.ex +++ b/lib/codeqa/languages/code/native/ocaml.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Ocaml do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/rust.ex b/lib/codeqa/languages/code/native/rust.ex index 4aa7907..0616834 100644 --- a/lib/codeqa/languages/code/native/rust.ex +++ b/lib/codeqa/languages/code/native/rust.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Rust do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/swift.ex b/lib/codeqa/languages/code/native/swift.ex index 1a83b67..0422528 100644 --- a/lib/codeqa/languages/code/native/swift.ex +++ b/lib/codeqa/languages/code/native/swift.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Swift do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/native/zig.ex b/lib/codeqa/languages/code/native/zig.ex index 4141a2e..f3e13f8 100644 --- a/lib/codeqa/languages/code/native/zig.ex +++ b/lib/codeqa/languages/code/native/zig.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Native.Zig do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/julia.ex b/lib/codeqa/languages/code/scripting/julia.ex index e99f8e3..8f859d1 100644 --- a/lib/codeqa/languages/code/scripting/julia.ex +++ b/lib/codeqa/languages/code/scripting/julia.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Julia do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/lua.ex b/lib/codeqa/languages/code/scripting/lua.ex index e9e94b8..7ae8e9d 100644 --- a/lib/codeqa/languages/code/scripting/lua.ex +++ b/lib/codeqa/languages/code/scripting/lua.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Lua do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/perl.ex b/lib/codeqa/languages/code/scripting/perl.ex index 5f90eeb..3155f1c 100644 --- a/lib/codeqa/languages/code/scripting/perl.ex +++ b/lib/codeqa/languages/code/scripting/perl.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Perl do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/php.ex b/lib/codeqa/languages/code/scripting/php.ex index 0311e69..294b9a1 100644 --- a/lib/codeqa/languages/code/scripting/php.ex +++ b/lib/codeqa/languages/code/scripting/php.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.PHP do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/python.ex b/lib/codeqa/languages/code/scripting/python.ex index bdb31aa..e1c4bb4 100644 --- a/lib/codeqa/languages/code/scripting/python.ex +++ b/lib/codeqa/languages/code/scripting/python.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Python do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/r.ex b/lib/codeqa/languages/code/scripting/r.ex index c22cb15..d735d2b 100644 --- a/lib/codeqa/languages/code/scripting/r.ex +++ b/lib/codeqa/languages/code/scripting/r.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.R do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/ruby.ex b/lib/codeqa/languages/code/scripting/ruby.ex index 90b17a7..d1e9761 100644 --- a/lib/codeqa/languages/code/scripting/ruby.ex +++ b/lib/codeqa/languages/code/scripting/ruby.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Ruby do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/scripting/shell.ex b/lib/codeqa/languages/code/scripting/shell.ex index acbe37c..710d28a 100644 --- a/lib/codeqa/languages/code/scripting/shell.ex +++ b/lib/codeqa/languages/code/scripting/shell.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Scripting.Shell do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/clojure.ex b/lib/codeqa/languages/code/vm/clojure.ex index 770b280..5dd149b 100644 --- a/lib/codeqa/languages/code/vm/clojure.ex +++ b/lib/codeqa/languages/code/vm/clojure.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Clojure do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/csharp.ex b/lib/codeqa/languages/code/vm/csharp.ex index b90933f..85edce7 100644 --- a/lib/codeqa/languages/code/vm/csharp.ex +++ b/lib/codeqa/languages/code/vm/csharp.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.CSharp do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/dart.ex b/lib/codeqa/languages/code/vm/dart.ex index 16e29de..e821e22 100644 --- a/lib/codeqa/languages/code/vm/dart.ex +++ b/lib/codeqa/languages/code/vm/dart.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Dart do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/elixir.ex b/lib/codeqa/languages/code/vm/elixir.ex index 401f808..2eab027 100644 --- a/lib/codeqa/languages/code/vm/elixir.ex +++ b/lib/codeqa/languages/code/vm/elixir.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Elixir do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/erlang.ex b/lib/codeqa/languages/code/vm/erlang.ex index d390e01..c835dd6 100644 --- a/lib/codeqa/languages/code/vm/erlang.ex +++ b/lib/codeqa/languages/code/vm/erlang.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Erlang do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/fsharp.ex b/lib/codeqa/languages/code/vm/fsharp.ex index e6b12ef..9c7792f 100644 --- a/lib/codeqa/languages/code/vm/fsharp.ex +++ b/lib/codeqa/languages/code/vm/fsharp.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Fsharp do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/java.ex b/lib/codeqa/languages/code/vm/java.ex index 151bee1..fa018e0 100644 --- a/lib/codeqa/languages/code/vm/java.ex +++ b/lib/codeqa/languages/code/vm/java.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Java do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/kotlin.ex b/lib/codeqa/languages/code/vm/kotlin.ex index 72980a2..4c286c2 100644 --- a/lib/codeqa/languages/code/vm/kotlin.ex +++ b/lib/codeqa/languages/code/vm/kotlin.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Kotlin do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/vm/scala.ex b/lib/codeqa/languages/code/vm/scala.ex index 55a7a00..08ac7ab 100644 --- a/lib/codeqa/languages/code/vm/scala.ex +++ b/lib/codeqa/languages/code/vm/scala.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Vm.Scala do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/web/javascript.ex b/lib/codeqa/languages/code/web/javascript.ex index 38194f1..87f48f5 100644 --- a/lib/codeqa/languages/code/web/javascript.ex +++ b/lib/codeqa/languages/code/web/javascript.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Web.JavaScript do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/code/web/typescript.ex b/lib/codeqa/languages/code/web/typescript.ex index 23ab690..b8a422a 100644 --- a/lib/codeqa/languages/code/web/typescript.ex +++ b/lib/codeqa/languages/code/web/typescript.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Code.Web.TypeScript do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/config/dockerfile.ex b/lib/codeqa/languages/config/dockerfile.ex index 97373ea..e1ed3a6 100644 --- a/lib/codeqa/languages/config/dockerfile.ex +++ b/lib/codeqa/languages/config/dockerfile.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Config.Dockerfile do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/config/makefile.ex b/lib/codeqa/languages/config/makefile.ex index 6ad3d7b..ffb4522 100644 --- a/lib/codeqa/languages/config/makefile.ex +++ b/lib/codeqa/languages/config/makefile.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Config.Makefile do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/config/terraform.ex b/lib/codeqa/languages/config/terraform.ex index bf257bc..c35cb9f 100644 --- a/lib/codeqa/languages/config/terraform.ex +++ b/lib/codeqa/languages/config/terraform.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Config.Terraform do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/data/graphql.ex b/lib/codeqa/languages/data/graphql.ex index 70e02e2..47dbc51 100644 --- a/lib/codeqa/languages/data/graphql.ex +++ b/lib/codeqa/languages/data/graphql.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Data.GraphQL do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/data/json.ex b/lib/codeqa/languages/data/json.ex index c0b0589..0b1909e 100644 --- a/lib/codeqa/languages/data/json.ex +++ b/lib/codeqa/languages/data/json.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Data.Json do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/data/sql.ex b/lib/codeqa/languages/data/sql.ex index 9178fe5..ddc4018 100644 --- a/lib/codeqa/languages/data/sql.ex +++ b/lib/codeqa/languages/data/sql.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Data.Sql do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/data/toml.ex b/lib/codeqa/languages/data/toml.ex index 9f555a3..1051c0d 100644 --- a/lib/codeqa/languages/data/toml.ex +++ b/lib/codeqa/languages/data/toml.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Data.Toml do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/data/yaml.ex b/lib/codeqa/languages/data/yaml.ex index 77d8f1f..8beb0cb 100644 --- a/lib/codeqa/languages/data/yaml.ex +++ b/lib/codeqa/languages/data/yaml.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Data.Yaml do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/language.ex b/lib/codeqa/languages/language.ex index ada7fa1..3ccd172 100644 --- a/lib/codeqa/languages/language.ex +++ b/lib/codeqa/languages/language.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Language do + @moduledoc false @callback name() :: String.t() @callback extensions() :: [String.t()] @callback comment_prefixes() :: [String.t()] @@ -168,7 +169,7 @@ defmodule CodeQA.Language do defp strip_line_comments(content, []), do: content defp strip_line_comments(content, prefixes) do - pattern = prefixes |> Enum.map(&Regex.escape/1) |> Enum.join("|") + pattern = Enum.map_join(prefixes, "|", &Regex.escape/1) Regex.replace(Regex.compile!("(#{pattern}).*$", [:multiline]), content, "") end diff --git a/lib/codeqa/languages/markup/css.ex b/lib/codeqa/languages/markup/css.ex index 81a8e7c..0b0af14 100644 --- a/lib/codeqa/languages/markup/css.ex +++ b/lib/codeqa/languages/markup/css.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Markup.Css do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/markup/html.ex b/lib/codeqa/languages/markup/html.ex index 4835835..31a0fe8 100644 --- a/lib/codeqa/languages/markup/html.ex +++ b/lib/codeqa/languages/markup/html.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Markup.Html do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/markup/markdown.ex b/lib/codeqa/languages/markup/markdown.ex index 1e81a8c..ee75d60 100644 --- a/lib/codeqa/languages/markup/markdown.ex +++ b/lib/codeqa/languages/markup/markdown.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Markup.Markdown do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/markup/xml.ex b/lib/codeqa/languages/markup/xml.ex index 8c136c9..85c7668 100644 --- a/lib/codeqa/languages/markup/xml.ex +++ b/lib/codeqa/languages/markup/xml.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Markup.Xml do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/languages/unknown.ex b/lib/codeqa/languages/unknown.ex index 9873a86..11a0f7a 100644 --- a/lib/codeqa/languages/unknown.ex +++ b/lib/codeqa/languages/unknown.ex @@ -1,4 +1,5 @@ defmodule CodeQA.Languages.Unknown do + @moduledoc false use CodeQA.Language @impl true diff --git a/lib/codeqa/metrics/codebase/similarity.ex b/lib/codeqa/metrics/codebase/similarity.ex index cc8b644..e20e556 100644 --- a/lib/codeqa/metrics/codebase/similarity.ex +++ b/lib/codeqa/metrics/codebase/similarity.ex @@ -13,6 +13,10 @@ defmodule CodeQA.Metrics.Codebase.Similarity do @behaviour CodeQA.Metrics.Codebase.CodebaseMetric + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.CLI.UI + alias CodeQA.Metrics.File.Winnowing + @impl true def name, do: "similarity" @@ -99,7 +103,7 @@ defmodule CodeQA.Metrics.Codebase.Similarity do defp maybe_print_fingerprint_progress(true, i, total) do if rem(i + 1, max(1, div(total, 20))) == 0 do - IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(i + 1, total, label: "Fingerprinting")) + IO.write(:stderr, "\r" <> UI.progress_bar(i + 1, total, label: "Fingerprinting")) end end @@ -130,7 +134,7 @@ defmodule CodeQA.Metrics.Codebase.Similarity do defp maybe_print_index_progress(true, idx, total) do if rem(idx + 1, max(1, div(total, 20))) == 0 do - IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(idx + 1, total, label: "Indexing")) + IO.write(:stderr, "\r" <> UI.progress_bar(idx + 1, total, label: "Indexing")) end end @@ -231,7 +235,7 @@ defmodule CodeQA.Metrics.Codebase.Similarity do defp maybe_print_lsh_progress(true, idx, total) do if rem(idx + 1, max(1, div(total, 20))) == 0 do - IO.write(:stderr, "\r" <> CodeQA.CLI.UI.progress_bar(idx + 1, total, label: "LSH Filter")) + IO.write(:stderr, "\r" <> UI.progress_bar(idx + 1, total, label: "LSH Filter")) end end @@ -284,8 +288,8 @@ defmodule CodeQA.Metrics.Codebase.Similarity do eta_ms = round((total_pairs - c) * avg_time) output = - CodeQA.CLI.UI.progress_bar(c, total_pairs, - eta: CodeQA.CLI.UI.format_eta(eta_ms), + UI.progress_bar(c, total_pairs, + eta: UI.format_eta(eta_ms), label: "NCD Compression" ) @@ -327,9 +331,9 @@ defmodule CodeQA.Metrics.Codebase.Similarity do defp compute_fingerprints(content, _opts) do content - |> CodeQA.AST.Lexing.TokenNormalizer.normalize_structural() + |> TokenNormalizer.normalize_structural() |> Enum.map(& &1.kind) - |> CodeQA.Metrics.File.Winnowing.kgrams(5) + |> Winnowing.kgrams(5) |> MapSet.new() end diff --git a/lib/codeqa/metrics/file/casing_entropy.ex b/lib/codeqa/metrics/file/casing_entropy.ex index 6f1cb2d..4256e0e 100644 --- a/lib/codeqa/metrics/file/casing_entropy.ex +++ b/lib/codeqa/metrics/file/casing_entropy.ex @@ -20,6 +20,8 @@ defmodule CodeQA.Metrics.File.CasingEntropy do @behaviour CodeQA.Metrics.File.FileMetric + alias CodeQA.Metrics.File.Inflector + @impl true def name, do: "casing_entropy" @@ -45,7 +47,7 @@ defmodule CodeQA.Metrics.File.CasingEntropy do def analyze(%{identifiers: identifiers}) do counts = identifiers - |> Enum.map(&CodeQA.Metrics.File.Inflector.detect_casing/1) + |> Enum.map(&Inflector.detect_casing/1) |> Enum.frequencies() total = length(identifiers) diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks.ex b/lib/codeqa/metrics/file/near_duplicate_blocks.ex index c1cb707..94ba060 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks.ex @@ -11,11 +11,11 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do d5 ≤ 25%, d6 ≤ 30%, d7 ≤ 40%, d8 ≤ 50% """ - alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Parsing.Parser - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser alias CodeQA.Language @max_bucket 8 @@ -230,10 +230,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do decorated = if MapSet.size(pruned) > 0 do - Enum.map(decorated, fn {i, b, v, h, l, c, n, bigrams} -> - {i, b, v, h, l, c, n, - Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))} - end) + Enum.map(decorated, &prune_bigrams(&1, pruned)) else decorated end @@ -361,38 +358,53 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do bigrams_a |> Enum.reduce(%{}, fn bigram, acc -> h = :erlang.phash2(bigram) - - Map.get(shingle_index, h, []) - |> Enum.reduce(acc, fn j, cnt -> - if j > i, do: Map.update(cnt, j, 1, &(&1 + 1)), else: cnt - end) + Map.get(shingle_index, h, []) |> Enum.reduce(acc, &count_candidate(&1, &2, i)) end) |> Enum.filter(fn {_, count} -> count >= min_shared end) |> Enum.map(&elem(&1, 0)) |> Enum.reject(fn j -> j in exact_list end) |> Enum.flat_map(fn j -> - {_j, block_b, values_b, _hash_b, len_b, children_b, newlines_b, _bigrams_b} = - elem(decorated_arr, j) + near_pair_for_candidate( + j, + decorated_arr, + block_a, + values_a, + len_a, + children_a, + newlines_a + ) + end) + + exact_pairs ++ near_pairs + end - min_count = min(len_a, len_b) - max_allowed = round(min_count * 0.5) + defp count_candidate(j, cnt, i) when j > i, do: Map.update(cnt, j, 1, &(&1 + 1)) + defp count_candidate(_j, cnt, _i), do: cnt - if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and - abs(len_a - len_b) <= max_allowed do - ed = token_edit_distance_bounded(values_a, values_b, max_allowed) + defp near_pair_for_candidate(j, decorated_arr, block_a, values_a, len_a, children_a, newlines_a) do + {_j, block_b, values_b, _hash_b, len_b, children_b, newlines_b, _bigrams_b} = + elem(decorated_arr, j) - case percent_bucket(ed, min_count) do - nil -> [] - bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}] - # ed=0 handled by exact_pairs above - _ -> [] - end - else - [] - end - end) + min_count = min(len_a, len_b) + max_allowed = round(min_count * 0.5) - exact_pairs ++ near_pairs + if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and + abs(len_a - len_b) <= max_allowed do + ed = token_edit_distance_bounded(values_a, values_b, max_allowed) + + case percent_bucket(ed, min_count) do + nil -> [] + bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}] + # ed=0 handled by exact_pairs above + _ -> [] + end + else + [] + end + end + + defp prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned) do + {i, b, v, h, l, c, n, Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))} end # Uses pre-computed children counts and newline counts from the decorated tuple diff --git a/lib/codeqa/metrics/post_processing/menzerath.ex b/lib/codeqa/metrics/post_processing/menzerath.ex index 2d9bd32..4b5b10c 100644 --- a/lib/codeqa/metrics/post_processing/menzerath.ex +++ b/lib/codeqa/metrics/post_processing/menzerath.ex @@ -256,22 +256,26 @@ defmodule CodeQA.Metrics.PostProcessing.Menzerath do if denom == 0.0 do {nil, nil} else - b = (n * sum_lxly - sum_lx * sum_ly) / denom - log_a = (sum_ly - b * sum_lx) / n - mean_ly = sum_ly / n + fit_power_law_coefficients(log_xs, log_ys, sum_lx, sum_ly, sum_lxly, n, denom) + end + end + end - ss_tot = Enum.reduce(log_ys, 0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end) + defp fit_power_law_coefficients(log_xs, log_ys, sum_lx, sum_ly, sum_lxly, n, denom) do + b = (n * sum_lxly - sum_lx * sum_ly) / denom + log_a = (sum_ly - b * sum_lx) / n + mean_ly = sum_ly / n - ss_res = - Enum.zip(log_xs, log_ys) - |> Enum.reduce(0.0, fn {lx, ly}, acc -> - acc + (ly - (log_a + b * lx)) ** 2 - end) + ss_tot = Enum.reduce(log_ys, 0.0, fn ly, acc -> acc + (ly - mean_ly) ** 2 end) - r_squared = if ss_tot == 0.0, do: 0.0, else: 1.0 - ss_res / ss_tot - {b, r_squared} - end - end + ss_res = + Enum.zip(log_xs, log_ys) + |> Enum.reduce(0.0, fn {lx, ly}, acc -> + acc + (ly - (log_a + b * lx)) ** 2 + end) + + r_squared = if ss_tot == 0.0, do: 0.0, else: 1.0 - ss_res / ss_tot + {b, r_squared} end defp round4(v), do: Float.round(v * 1.0, 4) diff --git a/lib/mix/tasks/codeqa/sample_report.ex b/lib/mix/tasks/codeqa/sample_report.ex index e4a1ba5..1bc5cf0 100644 --- a/lib/mix/tasks/codeqa/sample_report.ex +++ b/lib/mix/tasks/codeqa/sample_report.ex @@ -38,11 +38,15 @@ defmodule Mix.Tasks.Codeqa.SampleReport do top: :integer ] + alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector + def run(args) do Mix.Task.run("app.start") {opts, _, _} = OptionParser.parse(args, switches: @switches) - results = CodeQA.CombinedMetrics.SampleRunner.run(opts) + results = SampleRunner.run(opts) results |> Enum.group_by(& &1.category) @@ -54,19 +58,19 @@ defmodule Mix.Tasks.Codeqa.SampleReport do end if path = opts[:report] do - report = CodeQA.CombinedMetrics.SampleRunner.build_metric_report(opts) + report = SampleRunner.build_metric_report(opts) File.write!(path, Jason.encode!(report, pretty: true)) IO.puts("\nMetric report written to #{path}") end if opts[:apply_scalars] do - stats = CodeQA.CombinedMetrics.SampleRunner.apply_scalars(opts) + stats = SampleRunner.apply_scalars(opts) IO.puts("\nApplied scalars to YAML configs:") Enum.each(stats, &print_scalar_stats/1) end if opts[:apply_languages] do - stats = CodeQA.CombinedMetrics.SampleRunner.apply_languages(opts) + stats = SampleRunner.apply_languages(opts) IO.puts("\nApplied language coverage to YAML configs:") Enum.each(stats, fn %{category: cat, behaviors_with_languages: n} -> @@ -116,28 +120,30 @@ defmodule Mix.Tasks.Codeqa.SampleReport do ) if opts[:verbose] do - Enum.each(r.metric_detail, fn m -> - scalar_str = if m.scalar >= 0, do: "+#{m.scalar}", else: "#{m.scalar}" - - IO.puts( - " " <> - pad("#{m.group}.#{m.key}", 45) <> - pad(scalar_str, 7) <> - pad(fmt(m.bad), 8) <> - pad(fmt(m.good), 8) <> - "#{m.ratio}x" - ) - end) + Enum.each(r.metric_detail, &print_metric_detail/1) end end + defp print_metric_detail(m) do + scalar_str = if m.scalar >= 0, do: "+#{m.scalar}", else: "#{m.scalar}" + + IO.puts( + " " <> + pad("#{m.group}.#{m.key}", 45) <> + pad(scalar_str, 7) <> + pad(fmt(m.bad), 8) <> + pad(fmt(m.good), 8) <> + "#{m.ratio}x" + ) + end + defp print_file_scores(path, opts) do expanded = Path.expand(path) files = cond do File.dir?(expanded) -> - CodeQA.Engine.Collector.collect_files(expanded) + Collector.collect_files(expanded) File.regular?(expanded) -> %{Path.basename(expanded) => File.read!(expanded)} @@ -152,18 +158,18 @@ defmodule Mix.Tasks.Codeqa.SampleReport do aggregate = files - |> CodeQA.Engine.Analyzer.analyze_codebase() + |> Analyzer.analyze_codebase() |> get_in(["codebase", "aggregate"]) top_n = opts[:top] || 15 - issues = CodeQA.CombinedMetrics.SampleRunner.diagnose_aggregate(aggregate, top: top_n) + issues = SampleRunner.diagnose_aggregate(aggregate, top: top_n) IO.puts("\nTop #{top_n} likely issues (by cosine similarity):") IO.puts(String.duplicate("-", 75)) IO.puts(" " <> pad("behavior", 38) <> pad("cosine", 9) <> "score") Enum.each(issues, &print_issue_row/1) IO.puts("\nFull breakdown by category:") - combined = CodeQA.CombinedMetrics.SampleRunner.score_aggregate(aggregate) + combined = SampleRunner.score_aggregate(aggregate) IO.puts("") Enum.each(combined, &print_combined_category/1) else diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index f1c73c5..2201f7d 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,5 +1,6 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." + _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -101,6 +102,7 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." + _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -211,6 +213,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." + _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -321,6 +324,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." + _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -435,6 +439,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." + _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 1be9c6b..e7bc6fa 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,5 +1,6 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." + _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -62,6 +63,7 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." + _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -123,6 +125,7 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." + _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -202,6 +205,7 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." + _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -320,6 +324,7 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." + _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -405,6 +410,7 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." + _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -491,6 +497,7 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." + _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -607,6 +614,7 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." + _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -685,6 +693,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -782,6 +791,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." + _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -902,6 +912,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -989,6 +1000,7 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." + _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1059,6 +1071,7 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." + _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1134,6 +1147,7 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." + _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 diff --git a/test/codeqa/analysis/behavior_config_server_test.exs b/test/codeqa/analysis/behavior_config_server_test.exs index b8afe6a..ebcc31b 100644 --- a/test/codeqa/analysis/behavior_config_server_test.exs +++ b/test/codeqa/analysis/behavior_config_server_test.exs @@ -16,7 +16,7 @@ defmodule CodeQA.Analysis.BehaviorConfigServerTest do Enum.each(behaviors, fn {category, list} -> assert is_binary(category) assert is_list(list) - assert length(list) > 0 + assert list != [] Enum.each(list, fn {behavior, data} -> assert is_binary(behavior) diff --git a/test/codeqa/analysis/file_metrics_server_test.exs b/test/codeqa/analysis/file_metrics_server_test.exs index 791c315..b68f4b3 100644 --- a/test/codeqa/analysis/file_metrics_server_test.exs +++ b/test/codeqa/analysis/file_metrics_server_test.exs @@ -2,9 +2,10 @@ defmodule CodeQA.Analysis.FileMetricsServerTest do use ExUnit.Case, async: true alias CodeQA.Analysis.FileMetricsServer + alias CodeQA.Engine.Analyzer defp build_registry do - CodeQA.Engine.Analyzer.build_registry() + Analyzer.build_registry() end setup do diff --git a/test/codeqa/ast/classification/node_classifier_test.exs b/test/codeqa/ast/classification/node_classifier_test.exs index 5ec0055..f266a2f 100644 --- a/test/codeqa/ast/classification/node_classifier_test.exs +++ b/test/codeqa/ast/classification/node_classifier_test.exs @@ -2,33 +2,34 @@ defmodule CodeQA.AST.NodeClassifierTest do use ExUnit.Case, async: true alias CodeQA.AST.Classification.NodeClassifier - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser - alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Nodes.{ + AttributeNode, CodeNode, DocNode, FunctionNode, - ModuleNode, ImportNode, - AttributeNode, + ModuleNode, TestNode } - alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang - alias CodeQA.Languages.Code.Scripting.Python - alias CodeQA.Languages.Code.Web.JavaScript alias CodeQA.Languages.Code.Native.Go alias CodeQA.Languages.Code.Native.Rust + alias CodeQA.Languages.Code.Scripting.Python alias CodeQA.Languages.Code.Scripting.Ruby - alias CodeQA.Languages.Code.Web.TypeScript - alias CodeQA.Languages.Code.Vm.Java alias CodeQA.Languages.Code.Vm.CSharp + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Code.Vm.Java + alias CodeQA.Languages.Code.Web.JavaScript + alias CodeQA.Languages.Code.Web.TypeScript + alias CodeQA.Languages.Unknown defp classify_first(code, opts \\ []) do - lang_mod = opts[:language_module] || CodeQA.Languages.Unknown + lang_mod = opts[:language_module] || Unknown [block | _] = code @@ -155,7 +156,7 @@ defmodule CodeQA.AST.NodeClassifierTest do doc_token = %Token{kind: "", content: ~s("""), line: 1, col: 0} nl = %Token{kind: "", content: "\n", line: 2, col: 0} node = node_with_tokens([doc_token, nl]) - assert %DocNode{} = NodeClassifier.classify(node, CodeQA.Languages.Unknown) + assert %DocNode{} = NodeClassifier.classify(node, Unknown) end end @@ -207,7 +208,7 @@ defmodule CodeQA.AST.NodeClassifierTest do node = node_with_tokens([nl]) assert %CodeNode{} = - NodeClassifier.classify(node, CodeQA.Languages.Unknown) + NodeClassifier.classify(node, Unknown) end end diff --git a/test/codeqa/ast/classification/node_protocol_test.exs b/test/codeqa/ast/classification/node_protocol_test.exs index 54c922a..5e79a00 100644 --- a/test/codeqa/ast/classification/node_protocol_test.exs +++ b/test/codeqa/ast/classification/node_protocol_test.exs @@ -2,6 +2,8 @@ defmodule CodeQA.AST.NodeProtocolTest.FakeNode do defstruct [:tokens, :line_count, :children, :start_line, :end_line, :label] defimpl CodeQA.AST.Classification.NodeProtocol do + alias CodeQA.AST.Classification.NodeProtocol + def tokens(n), do: n.tokens def line_count(n), do: n.line_count def children(n), do: n.children @@ -12,7 +14,7 @@ defmodule CodeQA.AST.NodeProtocolTest.FakeNode do def flat_tokens(n) do if Enum.empty?(n.children), do: n.tokens, - else: Enum.flat_map(n.children, &CodeQA.AST.Classification.NodeProtocol.flat_tokens/1) + else: Enum.flat_map(n.children, &NodeProtocol.flat_tokens/1) end end end @@ -21,6 +23,7 @@ defmodule CodeQA.AST.NodeProtocolTest do use ExUnit.Case, async: true alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.NodeProtocolTest.FakeNode @node %FakeNode{ @@ -57,8 +60,6 @@ defmodule CodeQA.AST.NodeProtocolTest do end describe "flat_tokens/1" do - alias CodeQA.AST.Enrichment.Node - test "leaf node returns own tokens" do leaf = %Node{tokens: [:a, :b], line_count: 1, children: []} assert NodeProtocol.flat_tokens(leaf) == [:a, :b] @@ -80,8 +81,6 @@ defmodule CodeQA.AST.NodeProtocolTest do end describe "Node implements NodeProtocol" do - alias CodeQA.AST.Enrichment.Node - setup do node = %Node{ tokens: [:x, :y], diff --git a/test/codeqa/ast/classification/node_type_detector_test.exs b/test/codeqa/ast/classification/node_type_detector_test.exs index caf0c7f..f4c9753 100644 --- a/test/codeqa/ast/classification/node_type_detector_test.exs +++ b/test/codeqa/ast/classification/node_type_detector_test.exs @@ -1,11 +1,15 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do use ExUnit.Case, async: true - alias CodeQA.AST.Parsing.Parser alias CodeQA.AST.Classification.NodeTypeDetector + alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Nodes.{CodeNode, DocNode, AttributeNode, FunctionNode} + alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode, FunctionNode} + alias CodeQA.AST.Parsing.Parser + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.Languages.Unknown - defp detect_types(code, lang_mod \\ CodeQA.Languages.Code.Vm.Elixir) do + defp detect_types(code, lang_mod \\ ElixirLang) do code |> TokenNormalizer.normalize_structural() |> Parser.detect_blocks(lang_mod) @@ -70,16 +74,16 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do end test "empty list returns empty list" do - assert [] == NodeTypeDetector.detect_types([], CodeQA.Languages.Unknown) + assert [] == NodeTypeDetector.detect_types([], Unknown) end end describe "detect_types/1 — typed struct output" do test "returns DocNode for doc blocks" do - doc_token = %CodeQA.AST.Lexing.Token{kind: "", content: ~s("""), line: 1, col: 0} - nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 2, col: 0} + doc_token = %Token{kind: "", content: ~s("""), line: 1, col: 0} + nl = %Token{kind: "", content: "\n", line: 2, col: 0} - node = %CodeQA.AST.Enrichment.Node{ + node = %Node{ tokens: [doc_token, nl], line_count: 2, children: [], @@ -88,20 +92,20 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do } [result] = - CodeQA.AST.Classification.NodeTypeDetector.detect_types( + NodeTypeDetector.detect_types( [node], - CodeQA.Languages.Code.Vm.Elixir + ElixirLang ) assert is_struct(result, DocNode) end test "returns AttributeNode for typespec blocks" do - at = %CodeQA.AST.Lexing.Token{kind: "@", content: "@", line: 1, col: 0} - spec = %CodeQA.AST.Lexing.Token{kind: "", content: "spec", line: 1, col: 1} - nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 5} + at = %Token{kind: "@", content: "@", line: 1, col: 0} + spec = %Token{kind: "", content: "spec", line: 1, col: 1} + nl = %Token{kind: "", content: "\n", line: 1, col: 5} - node = %CodeQA.AST.Enrichment.Node{ + node = %Node{ tokens: [at, spec, nl], line_count: 1, children: [], @@ -110,9 +114,9 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do } [result] = - CodeQA.AST.Classification.NodeTypeDetector.detect_types( + NodeTypeDetector.detect_types( [node], - CodeQA.Languages.Code.Vm.Elixir + ElixirLang ) assert is_struct(result, AttributeNode) @@ -120,10 +124,10 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do end test "returns CodeNode for unclassified blocks" do - id = %CodeQA.AST.Lexing.Token{kind: "", content: "foo", line: 1, col: 0} - nl = %CodeQA.AST.Lexing.Token{kind: "", content: "\n", line: 1, col: 3} + id = %Token{kind: "", content: "foo", line: 1, col: 0} + nl = %Token{kind: "", content: "\n", line: 1, col: 3} - node = %CodeQA.AST.Enrichment.Node{ + node = %Node{ tokens: [id, nl], line_count: 1, children: [], @@ -132,9 +136,9 @@ defmodule CodeQA.AST.Classification.NodeTypeDetectorTest do } [result] = - CodeQA.AST.Classification.NodeTypeDetector.detect_types( + NodeTypeDetector.detect_types( [node], - CodeQA.Languages.Code.Vm.Elixir + ElixirLang ) assert is_struct(result, CodeNode) diff --git a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs index d1e1b8f..3a6adbb 100644 --- a/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs +++ b/test/codeqa/ast/enrichment/compound_node_assertions_languages_test.exs @@ -1,14 +1,14 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeAssertionsLanguagesTest do use ExUnit.Case, async: true - alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Parsing.Parser - alias CodeQA.Languages.Unknown - alias CodeQA.AST.Classification.NodeTypeDetector alias CodeQA.AST.Classification.NodeProtocol - alias CodeQA.AST.Enrichment.CompoundNodeBuilder + alias CodeQA.AST.Classification.NodeTypeDetector alias CodeQA.AST.Enrichment.CompoundNode + alias CodeQA.AST.Enrichment.CompoundNodeBuilder alias CodeQA.AST.Enrichment.Node + alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser + alias CodeQA.Languages.Unknown Module.register_attribute(__MODULE__, :fixture, accumulate: true, persist: false) diff --git a/test/codeqa/ast/enrichment/compound_node_builder_test.exs b/test/codeqa/ast/enrichment/compound_node_builder_test.exs index 3881e9e..00a1006 100644 --- a/test/codeqa/ast/enrichment/compound_node_builder_test.exs +++ b/test/codeqa/ast/enrichment/compound_node_builder_test.exs @@ -1,13 +1,12 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do use ExUnit.Case, async: true - alias CodeQA.AST.Parsing.Parser alias CodeQA.AST.Classification.NodeTypeDetector alias CodeQA.AST.Enrichment.CompoundNode alias CodeQA.AST.Enrichment.CompoundNodeBuilder alias CodeQA.AST.Lexing.TokenNormalizer - - alias CodeQA.AST.Nodes.{DocNode, AttributeNode, CodeNode} + alias CodeQA.AST.Nodes.{AttributeNode, CodeNode, DocNode} + alias CodeQA.AST.Parsing.Parser defp build(code) do lang_mod = CodeQA.Languages.Code.Vm.Elixir @@ -112,7 +111,7 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do doc = %DocNode{tokens: [:d], line_count: 1, children: [], start_line: 1, end_line: 1} code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} - [compound] = CodeQA.AST.Enrichment.CompoundNodeBuilder.build([doc, code]) + [compound] = CompoundNodeBuilder.build([doc, code]) assert length(compound.docs) == 1 assert is_struct(hd(compound.docs), DocNode) end @@ -129,7 +128,7 @@ defmodule CodeQA.AST.Enrichment.CompoundNodeBuilderTest do code = %CodeNode{tokens: [:c], line_count: 2, children: [], start_line: 2, end_line: 3} - [compound] = CodeQA.AST.Enrichment.CompoundNodeBuilder.build([attr, code]) + [compound] = CompoundNodeBuilder.build([attr, code]) assert length(compound.typespecs) == 1 assert is_struct(hd(compound.typespecs), AttributeNode) end diff --git a/test/codeqa/ast/lexing/token_normalizer_test.exs b/test/codeqa/ast/lexing/token_normalizer_test.exs index e8264ad..19a886a 100644 --- a/test/codeqa/ast/lexing/token_normalizer_test.exs +++ b/test/codeqa/ast/lexing/token_normalizer_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.TokenNormalizerTest do use ExUnit.Case, async: true - alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Lexing.TokenNormalizer defp kinds(tokens), do: Enum.map(tokens, & &1.kind) diff --git a/test/codeqa/ast/lexing/token_protocol_test.exs b/test/codeqa/ast/lexing/token_protocol_test.exs index 5534d10..340d94a 100644 --- a/test/codeqa/ast/lexing/token_protocol_test.exs +++ b/test/codeqa/ast/lexing/token_protocol_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.Lexing.TokenProtocolTest do use ExUnit.Case, async: true - alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.StringToken + alias CodeQA.AST.Lexing.Token alias CodeQA.AST.Lexing.TokenProtocol describe "Token implementation" do diff --git a/test/codeqa/ast/nodes/code_node_test.exs b/test/codeqa/ast/nodes/code_node_test.exs index f161ad8..20082f0 100644 --- a/test/codeqa/ast/nodes/code_node_test.exs +++ b/test/codeqa/ast/nodes/code_node_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.Nodes.CodeNodeTest do use ExUnit.Case, async: true - alias CodeQA.AST.Nodes.{CodeNode, DocNode} alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Nodes.{CodeNode, DocNode} @tokens [:a, :b, :c] diff --git a/test/codeqa/ast/nodes/function_node_test.exs b/test/codeqa/ast/nodes/function_node_test.exs index 029a2a7..a1770bc 100644 --- a/test/codeqa/ast/nodes/function_node_test.exs +++ b/test/codeqa/ast/nodes/function_node_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.Nodes.FunctionNodeTest do use ExUnit.Case, async: true - alias CodeQA.AST.Nodes.{FunctionNode, ModuleNode} alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Nodes.{FunctionNode, ModuleNode} describe "FunctionNode" do setup do diff --git a/test/codeqa/ast/nodes/import_node_test.exs b/test/codeqa/ast/nodes/import_node_test.exs index 16c6771..53c4a98 100644 --- a/test/codeqa/ast/nodes/import_node_test.exs +++ b/test/codeqa/ast/nodes/import_node_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.Nodes.ImportNodeTest do use ExUnit.Case, async: true - alias CodeQA.AST.Nodes.{ImportNode, AttributeNode, TestNode} alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Nodes.{AttributeNode, ImportNode, TestNode} describe "ImportNode" do test "implements NodeProtocol" do diff --git a/test/codeqa/ast/parsing/parser_languages_test.exs b/test/codeqa/ast/parsing/parser_languages_test.exs index e2f3040..5526d10 100644 --- a/test/codeqa/ast/parsing/parser_languages_test.exs +++ b/test/codeqa/ast/parsing/parser_languages_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.Parsing.ParserLanguagesTest do use ExUnit.Case, async: true - alias CodeQA.AST.Parsing.Parser alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Parsing.Parser alias CodeQA.Language alias CodeQA.Languages.Unknown @@ -118,13 +118,13 @@ defmodule CodeQA.AST.Parsing.ParserLanguagesTest do code end, block_assertions} - defp blocks(code, lang_mod \\ CodeQA.Languages.Unknown) do + defp blocks(code, lang_mod \\ Unknown) do code |> TokenNormalizer.normalize_structural() |> Parser.detect_blocks(lang_mod) end - defp children(code, lang_mod \\ CodeQA.Languages.Unknown) do + defp children(code, lang_mod \\ Unknown) do code |> TokenNormalizer.normalize_structural() |> Parser.detect_blocks(lang_mod) @@ -141,7 +141,7 @@ defmodule CodeQA.AST.Parsing.ParserLanguagesTest do result = blocks(unquote(code), lang_mod) if unquote(lang_mod) == Unknown do - assert length(result) >= 1 + assert result != [] else assert length(result) >= 3 end @@ -152,7 +152,7 @@ defmodule CodeQA.AST.Parsing.ParserLanguagesTest do result = children(unquote(code), lang_mod) if unquote(lang_mod) == Unknown do - assert length(result) >= 0 + assert is_list(result) else assert length(result) >= 3 end diff --git a/test/codeqa/ast/parsing/parser_test.exs b/test/codeqa/ast/parsing/parser_test.exs index c37a8a8..51ead52 100644 --- a/test/codeqa/ast/parsing/parser_test.exs +++ b/test/codeqa/ast/parsing/parser_test.exs @@ -1,9 +1,10 @@ defmodule CodeQA.AST.Parsing.ParserTest do use ExUnit.Case, async: true - alias CodeQA.AST.Parsing.Parser + alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.AST.Parsing.Parser alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang alias CodeQA.Languages.Unknown defp tokenize(code), do: TokenNormalizer.normalize_structural(code) @@ -54,7 +55,6 @@ defmodule CodeQA.AST.Parsing.ParserTest do end test "block has children_count accessible via Node.children_count/1" do - alias CodeQA.AST.Enrichment.Node tokens = tokenize("foo(a)\nbar(b)\n") [block] = Parser.detect_blocks(tokens, Unknown) assert Node.children_count(block) == length(block.children) diff --git a/test/codeqa/ast/parsing/signal_stream_test.exs b/test/codeqa/ast/parsing/signal_stream_test.exs index 20c9226..69cfcaf 100644 --- a/test/codeqa/ast/parsing/signal_stream_test.exs +++ b/test/codeqa/ast/parsing/signal_stream_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.AST.SignalStreamTest do use ExUnit.Case, async: true - alias CodeQA.AST.Parsing.SignalStream alias CodeQA.AST.Lexing.Token + alias CodeQA.AST.Parsing.SignalStream alias CodeQA.Support.CounterSignal defp tok(kind, content), do: %Token{kind: kind, content: content, line: 1, col: 0} diff --git a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs index 71069d7..374b191 100644 --- a/test/codeqa/ast/signals/classification/comment_density_signal_test.exs +++ b/test/codeqa/ast/signals/classification/comment_density_signal_test.exs @@ -1,7 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.CommentDensitySignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Classification.CommentDensitySignal alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Signals.Classification.CommentDensitySignal alias CodeQA.Languages.Code.Scripting.Python alias CodeQA.Languages.Unknown diff --git a/test/codeqa/ast/signals/classification/config_signal_test.exs b/test/codeqa/ast/signals/classification/config_signal_test.exs index 5c63672..da510c2 100644 --- a/test/codeqa/ast/signals/classification/config_signal_test.exs +++ b/test/codeqa/ast/signals/classification/config_signal_test.exs @@ -1,7 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.ConfigSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Classification.ConfigSignal alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Signals.Classification.ConfigSignal defp run(tokens), do: SignalStream.run(tokens, [%ConfigSignal{}], []) |> List.flatten() defp t(content, kind \\ ""), do: %{kind: kind, content: content, line: 1, col: 0} diff --git a/test/codeqa/ast/signals/classification/data_signal_test.exs b/test/codeqa/ast/signals/classification/data_signal_test.exs index c537f95..852067b 100644 --- a/test/codeqa/ast/signals/classification/data_signal_test.exs +++ b/test/codeqa/ast/signals/classification/data_signal_test.exs @@ -1,7 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.DataSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Classification.DataSignal alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Signals.Classification.DataSignal defp run(tokens), do: SignalStream.run(tokens, [%DataSignal{}], []) |> List.flatten() diff --git a/test/codeqa/ast/signals/classification/type_signal_test.exs b/test/codeqa/ast/signals/classification/type_signal_test.exs index 96811e6..aa400d3 100644 --- a/test/codeqa/ast/signals/classification/type_signal_test.exs +++ b/test/codeqa/ast/signals/classification/type_signal_test.exs @@ -1,7 +1,7 @@ defmodule CodeQA.AST.Signals.Classification.TypeSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Classification.TypeSignal alias CodeQA.AST.Parsing.SignalStream + alias CodeQA.AST.Signals.Classification.TypeSignal defp run(tokens), do: SignalStream.run(tokens, [%TypeSignal{}], []) |> List.flatten() diff --git a/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs index 5300881..2a86352 100644 --- a/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs +++ b/test/codeqa/ast/signals/structural/access_modifier_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.AccessModifierSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.AccessModifierSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.AccessModifierSignal alias CodeQA.Languages.Code.Vm.Java defp split_values(code, lang_mod) do diff --git a/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs index 534b735..bd76abf 100644 --- a/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs +++ b/test/codeqa/ast/signals/structural/assignment_function_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.AssignmentFunctionSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.AssignmentFunctionSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.AssignmentFunctionSignal defp split_indices(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/ast/signals/structural/blank_line_signal_test.exs b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs index 464ca46..4e7d9d2 100644 --- a/test/codeqa/ast/signals/structural/blank_line_signal_test.exs +++ b/test/codeqa/ast/signals/structural/blank_line_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.BlankLineSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.BlankLineSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.BlankLineSignal alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang defp split_values(code, lang_mod) do diff --git a/test/codeqa/ast/signals/structural/bracket_signal_test.exs b/test/codeqa/ast/signals/structural/bracket_signal_test.exs index 4159b6b..611474b 100644 --- a/test/codeqa/ast/signals/structural/bracket_signal_test.exs +++ b/test/codeqa/ast/signals/structural/bracket_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.BracketSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.BracketSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.BracketSignal defp enclosure_values(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs index 7a7eb94..320390c 100644 --- a/test/codeqa/ast/signals/structural/branch_split_signal_test.exs +++ b/test/codeqa/ast/signals/structural/branch_split_signal_test.exs @@ -1,13 +1,13 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.BranchSplitSignal - alias CodeQA.AST.Parsing.{Signal, SignalStream} alias CodeQA.AST.Lexing.TokenNormalizer - alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang + alias CodeQA.AST.Parsing.{Signal, SignalStream} + alias CodeQA.AST.Signals.Structural.BranchSplitSignal + alias CodeQA.Languages.Code.Scripting.PHP alias CodeQA.Languages.Code.Scripting.Python alias CodeQA.Languages.Code.Scripting.Ruby - alias CodeQA.Languages.Code.Scripting.PHP + alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang alias CodeQA.Languages.Code.Vm.Java defp split_values(code, lang_mod) do @@ -83,7 +83,7 @@ defmodule CodeQA.AST.Signals.Structural.BranchSplitSignalTest do splits = split_values("switch x\n case 1:\n :a\n case 2:\n :b\nend\n", Java) - assert length(splits) >= 1 + assert splits != [] end test "emits split at when keyword" do diff --git a/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs index 484edf6..7ff96a0 100644 --- a/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs +++ b/test/codeqa/ast/signals/structural/colon_indent_signal_test.exs @@ -1,11 +1,12 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.ColonIndentSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.ColonIndentSignal alias CodeQA.Languages.Code.Scripting.Python + alias CodeQA.Languages.Unknown defp enclosure_values(code, lang_mod \\ Python) do tokens = TokenNormalizer.normalize_structural(code) @@ -14,7 +15,7 @@ defmodule CodeQA.AST.Signals.Structural.ColonIndentSignalTest do end test "no enclosures for non-python language" do - assert enclosure_values("def foo:\n return 1\n", CodeQA.Languages.Unknown) == + assert enclosure_values("def foo:\n return 1\n", Unknown) == [] end diff --git a/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs index 329c910..29762cb 100644 --- a/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs +++ b/test/codeqa/ast/signals/structural/comment_divider_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.CommentDividerSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.CommentDividerSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.CommentDividerSignal alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang alias CodeQA.Languages.Code.Vm.Java alias CodeQA.Languages.Data.Sql diff --git a/test/codeqa/ast/signals/structural/decorator_signal_test.exs b/test/codeqa/ast/signals/structural/decorator_signal_test.exs index b1474d7..6a5bb10 100644 --- a/test/codeqa/ast/signals/structural/decorator_signal_test.exs +++ b/test/codeqa/ast/signals/structural/decorator_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.DecoratorSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.DecoratorSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.DecoratorSignal defp split_values(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs index 749e165..ddf8702 100644 --- a/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs +++ b/test/codeqa/ast/signals/structural/dedent_to_zero_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.DedentToZeroSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.DedentToZeroSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.DedentToZeroSignal defp split_count(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs index 28965c4..da269e8 100644 --- a/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs +++ b/test/codeqa/ast/signals/structural/doc_comment_lead_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.DocCommentLeadSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.DocCommentLeadSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.DocCommentLeadSignal defp split_values(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/ast/signals/structural/keyword_signal_test.exs b/test/codeqa/ast/signals/structural/keyword_signal_test.exs index 4aecba6..b269c40 100644 --- a/test/codeqa/ast/signals/structural/keyword_signal_test.exs +++ b/test/codeqa/ast/signals/structural/keyword_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.KeywordSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.KeywordSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.KeywordSignal alias CodeQA.Languages.Code.Vm.Elixir, as: ElixirLang defp split_values(code, lang_mod) do diff --git a/test/codeqa/ast/signals/structural/sql_block_signal_test.exs b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs index e655f8d..5f89598 100644 --- a/test/codeqa/ast/signals/structural/sql_block_signal_test.exs +++ b/test/codeqa/ast/signals/structural/sql_block_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.SQLBlockSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.SQLBlockSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.SQLBlockSignal alias CodeQA.Languages.Data.Sql defp split_values(code) do diff --git a/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs index fed5f3f..2b840bd 100644 --- a/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs +++ b/test/codeqa/ast/signals/structural/triple_quote_signal_test.exs @@ -1,10 +1,10 @@ defmodule CodeQA.AST.Signals.Structural.TripleQuoteSignalTest do use ExUnit.Case, async: true - alias CodeQA.AST.Signals.Structural.TripleQuoteSignal + alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Signal alias CodeQA.AST.Parsing.SignalStream - alias CodeQA.AST.Lexing.TokenNormalizer + alias CodeQA.AST.Signals.Structural.TripleQuoteSignal defp split_values(code) do tokens = TokenNormalizer.normalize_structural(code) diff --git a/test/codeqa/block_impact/codebase_impact_test.exs b/test/codeqa/block_impact/codebase_impact_test.exs index 0a0c264..55ef4b4 100644 --- a/test/codeqa/block_impact/codebase_impact_test.exs +++ b/test/codeqa/block_impact/codebase_impact_test.exs @@ -1,9 +1,10 @@ defmodule CodeQA.BlockImpact.CodebaseImpactTest do use ExUnit.Case, async: true - alias CodeQA.BlockImpact.CodebaseImpact alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser + alias CodeQA.BlockImpact.CodebaseImpact + alias CodeQA.Engine.Analyzer alias CodeQA.Languages.Unknown @content_a """ @@ -48,7 +49,7 @@ defmodule CodeQA.BlockImpact.CodebaseImpactTest do node = first_block(@content_a) if length(node.tokens) >= 10 do - baseline = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files_map()) + baseline = Analyzer.analyze_codebase_aggregate(files_map()) without = CodebaseImpact.compute("lib/a.ex", @content_a, node, files_map()) # Not necessarily different in all keys, but result is valid assert is_map(without) diff --git a/test/codeqa/block_impact/file_impact_test.exs b/test/codeqa/block_impact/file_impact_test.exs index 3bffe8c..b44f0a9 100644 --- a/test/codeqa/block_impact/file_impact_test.exs +++ b/test/codeqa/block_impact/file_impact_test.exs @@ -1,9 +1,9 @@ defmodule CodeQA.BlockImpact.FileImpactTest do use ExUnit.Case, async: true - alias CodeQA.BlockImpact.FileImpact alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser + alias CodeQA.BlockImpact.FileImpact alias CodeQA.Languages.Unknown @fixture_content """ diff --git a/test/codeqa/block_impact/refactoring_potentials_test.exs b/test/codeqa/block_impact/refactoring_potentials_test.exs index d5f2014..a1252f2 100644 --- a/test/codeqa/block_impact/refactoring_potentials_test.exs +++ b/test/codeqa/block_impact/refactoring_potentials_test.exs @@ -4,6 +4,7 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do alias CodeQA.BlockImpact.RefactoringPotentials alias CodeQA.CombinedMetrics.FileScorer alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Engine.Analyzer defp file_cosines(fm) do fm @@ -27,13 +28,13 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do end """ - baseline_fm = CodeQA.Engine.Analyzer.analyze_file("lib/foo.ex", content) + baseline_fm = Analyzer.analyze_file("lib/foo.ex", content) simple = "defmodule Foo do\n def bar, do: :ok\nend\n" - without_fm = CodeQA.Engine.Analyzer.analyze_file("lib/foo.ex", simple) + without_fm = Analyzer.analyze_file("lib/foo.ex", simple) files = %{"lib/foo.ex" => content} - baseline_agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files) - without_agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(%{"lib/foo.ex" => simple}) + baseline_agg = Analyzer.analyze_codebase_aggregate(files) + without_agg = Analyzer.analyze_codebase_aggregate(%{"lib/foo.ex" => simple}) baseline_file_cosines = file_cosines(baseline_fm) baseline_codebase_cosines = SampleRunner.diagnose_aggregate(baseline_agg, top: 99_999) @@ -60,8 +61,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do test "returns at most top N results (default 3)" do content = "defmodule A do\n def foo, do: 1\nend\n" - fm = CodeQA.Engine.Analyzer.analyze_file("lib/a.ex", content) - agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) + fm = Analyzer.analyze_file("lib/a.ex", content) + agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) baseline_file_cosines = file_cosines(fm) baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999) @@ -74,8 +75,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do test "respects top: N option" do content = "defmodule A do\n def foo, do: 1\nend\n" - fm = CodeQA.Engine.Analyzer.analyze_file("lib/a.ex", content) - agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) + fm = Analyzer.analyze_file("lib/a.ex", content) + agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) baseline_file_cosines = file_cosines(fm) baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999) @@ -90,8 +91,8 @@ defmodule CodeQA.BlockImpact.RefactoringPotentialsTest do test "results are sorted descending by cosine_delta" do content = "defmodule A do\n def foo, do: 1\nend\n" - fm = CodeQA.Engine.Analyzer.analyze_file("lib/a.ex", content) - agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) + fm = Analyzer.analyze_file("lib/a.ex", content) + agg = Analyzer.analyze_codebase_aggregate(%{"lib/a.ex" => content}) baseline_file_cosines = file_cosines(fm) baseline_codebase_cosines = SampleRunner.diagnose_aggregate(agg, top: 99_999) diff --git a/test/codeqa/block_impact_analyzer_test.exs b/test/codeqa/block_impact_analyzer_test.exs index a8341e9..be644e9 100644 --- a/test/codeqa/block_impact_analyzer_test.exs +++ b/test/codeqa/block_impact_analyzer_test.exs @@ -3,6 +3,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do use ExUnit.Case, async: false alias CodeQA.BlockImpactAnalyzer + alias CodeQA.Engine.Analyzer @fixture_content """ defmodule MyModule do @@ -21,7 +22,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do describe "analyze/3" do test "adds 'nodes' key to each file entry in the pipeline result" do files = %{"lib/my_module.ex" => @fixture_content} - pipeline_result = CodeQA.Engine.Analyzer.analyze_codebase(files) + pipeline_result = Analyzer.analyze_codebase(files) result = BlockImpactAnalyzer.analyze(pipeline_result, files) @@ -34,7 +35,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do test "each node has required fields" do files = %{"lib/my_module.ex" => @fixture_content} - pipeline_result = CodeQA.Engine.Analyzer.analyze_codebase(files) + pipeline_result = Analyzer.analyze_codebase(files) result = BlockImpactAnalyzer.analyze(pipeline_result, files) nodes = result["files"]["lib/my_module.ex"]["nodes"] @@ -55,7 +56,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do test "nodes are sorted by start_line ascending" do files = %{"lib/my_module.ex" => @fixture_content} - pipeline_result = CodeQA.Engine.Analyzer.analyze_codebase(files) + pipeline_result = Analyzer.analyze_codebase(files) result = BlockImpactAnalyzer.analyze(pipeline_result, files) nodes = result["files"]["lib/my_module.ex"]["nodes"] @@ -65,7 +66,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do test "preserves existing 'codebase' key in pipeline result" do files = %{"lib/my_module.ex" => @fixture_content} - pipeline_result = CodeQA.Engine.Analyzer.analyze_codebase(files) + pipeline_result = Analyzer.analyze_codebase(files) result = BlockImpactAnalyzer.analyze(pipeline_result, files) assert Map.has_key?(result, "codebase") @@ -74,7 +75,7 @@ defmodule CodeQA.BlockImpactAnalyzerTest do test "nodes_top option limits refactoring_potentials per node" do files = %{"lib/my_module.ex" => @fixture_content} - pipeline_result = CodeQA.Engine.Analyzer.analyze_codebase(files) + pipeline_result = Analyzer.analyze_codebase(files) result = BlockImpactAnalyzer.analyze(pipeline_result, files, nodes_top: 1) nodes = result["files"]["lib/my_module.ex"]["nodes"] diff --git a/test/codeqa/combined_metrics/file_scorer_test.exs b/test/codeqa/combined_metrics/file_scorer_test.exs index c280cd3..55ef933 100644 --- a/test/codeqa/combined_metrics/file_scorer_test.exs +++ b/test/codeqa/combined_metrics/file_scorer_test.exs @@ -262,7 +262,7 @@ defmodule CodeQA.CombinedMetrics.FileScorerTest do "vocabulary" => 60.0, "volume" => 1200.0, "difficulty" => 30.0, - "effort" => 36000.0, + "effort" => 36_000.0, "bugs" => 0.4 }, "branching" => %{ diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index 103b805..c1986aa 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -2,6 +2,9 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do use ExUnit.Case alias CodeQA.CombinedMetrics.SampleRunner + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector + alias CodeQA.HealthReport.Grader setup_all do results = SampleRunner.run(category: "variable_naming", verbose: true) @@ -22,7 +25,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do {:ok, data} = YamlElixir.read_from_file("priv/combined_metrics/variable_naming.yml") langs = get_in(data, ["name_is_generic", "_languages"]) assert is_list(langs) - assert length(langs) > 0 + assert langs != [] assert Enum.all?(langs, &is_binary/1) end @@ -34,7 +37,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do if is_map(groups) do case Map.get(groups, "_languages") do nil -> :ok - langs -> assert is_list(langs) and length(langs) > 0 + langs -> assert is_list(langs) and langs != [] end end end) @@ -72,8 +75,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do test "with language option returns subset of unfiltered results" do agg = "priv/combined_metrics/samples/variable_naming/name_is_generic/bad" - |> CodeQA.Engine.Collector.collect_files() - |> CodeQA.Engine.Analyzer.analyze_codebase() + |> Collector.collect_files() + |> Analyzer.analyze_codebase() |> get_in(["codebase", "aggregate"]) all = SampleRunner.diagnose_aggregate(agg, top: 999) @@ -96,8 +99,8 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do test "with languages option returns fewer behaviors than unfiltered" do agg = "priv/combined_metrics/samples/variable_naming/name_is_generic/bad" - |> CodeQA.Engine.Collector.collect_files() - |> CodeQA.Engine.Analyzer.analyze_codebase() + |> Collector.collect_files() + |> Analyzer.analyze_codebase() |> get_in(["codebase", "aggregate"]) all_count = SampleRunner.score_aggregate(agg) |> Enum.flat_map(& &1.behaviors) |> length() @@ -114,7 +117,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do describe "grade_cosine_categories/4 languages wiring" do test "accepts languages argument" do - result = CodeQA.HealthReport.Grader.grade_cosine_categories(%{}, %{}, [], ["elixir"]) + result = Grader.grade_cosine_categories(%{}, %{}, [], ["elixir"]) assert is_list(result) end end @@ -122,7 +125,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do describe "run/1" do test "returns a list of results with required keys", %{results: results} do assert is_list(results) - assert length(results) > 0 + assert results != [] result = hd(results) assert Map.has_key?(result, :bad_score) assert Map.has_key?(result, :good_score) diff --git a/test/codeqa/engine/analyzer_test.exs b/test/codeqa/engine/analyzer_test.exs index ccc4a41..3d0d79a 100644 --- a/test/codeqa/engine/analyzer_test.exs +++ b/test/codeqa/engine/analyzer_test.exs @@ -1,10 +1,12 @@ defmodule CodeQA.Engine.AnalyzerTest do use ExUnit.Case, async: true + alias CodeQA.Engine.Analyzer + describe "analyze_file/2" do test "returns a metrics map with group keys" do content = "defmodule Foo do\n def bar, do: :ok\nend\n" - result = CodeQA.Engine.Analyzer.analyze_file("lib/foo.ex", content) + result = Analyzer.analyze_file("lib/foo.ex", content) assert is_map(result) assert map_size(result) > 0 # Each value should be a map of metric keys to numbers @@ -21,7 +23,7 @@ defmodule CodeQA.Engine.AnalyzerTest do "lib/b.ex" => "defmodule B do\n def bar, do: :b\nend\n" } - agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files) + agg = Analyzer.analyze_codebase_aggregate(files) assert is_map(agg) # At least one group should have mean_ keys Enum.each(agg, fn {_group, keys} -> @@ -37,7 +39,7 @@ defmodule CodeQA.Engine.AnalyzerTest do test "does not run codebase metrics (returns quickly for large input)" do # Just assert it returns without error for a reasonable input files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: 1\nend\n"} - agg = CodeQA.Engine.Analyzer.analyze_codebase_aggregate(files) + agg = Analyzer.analyze_codebase_aggregate(files) assert is_map(agg) end end diff --git a/test/codeqa/health_report/grader_test.exs b/test/codeqa/health_report/grader_test.exs index 21de94e..b8c5c14 100644 --- a/test/codeqa/health_report/grader_test.exs +++ b/test/codeqa/health_report/grader_test.exs @@ -1,6 +1,8 @@ defmodule CodeQA.HealthReport.GraderTest do use ExUnit.Case, async: true + alias CodeQA.Engine.Analyzer + alias CodeQA.Engine.Collector alias CodeQA.HealthReport.Grader @default_scale CodeQA.HealthReport.Categories.default_grade_scale() @@ -118,7 +120,7 @@ defmodule CodeQA.HealthReport.GraderTest do # weighted = (80*2 + 60*1) / 3 = 220/3 ≈ 73 impact_map = %{"readability" => 2} {score, _grade} = Grader.overall_score(categories, @default_scale, impact_map) - assert score == round((80 * 2 + 60 * 1) / 3) + assert score == 73 end test "backward compat: /2 call with empty impact_map equals arithmetic mean" do @@ -151,14 +153,14 @@ defmodule CodeQA.HealthReport.GraderTest do impact_map = %{"function_design" => 2, "variable_naming" => 1} {score, _} = Grader.overall_score(categories, @default_scale, impact_map) # (60*2 + 40*1) / 3 = 160/3 ≈ 53 - assert score == round((60 * 2 + 40 * 1) / 3) + assert score == 53 end end # Shared aggregate for grade_cosine_categories/3 tests — computed once for the module. setup_all do - files = CodeQA.Engine.Collector.collect_files("lib", []) - result = CodeQA.Engine.Analyzer.analyze_codebase(files) + files = Collector.collect_files("lib", []) + result = Analyzer.analyze_codebase(files) aggregate = get_in(result, ["codebase", "aggregate"]) {:ok, aggregate: aggregate} end diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs index c5f1bdb..feb1194 100644 --- a/test/codeqa/health_report/top_blocks_test.exs +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.HealthReport.TopBlocksTest do use ExUnit.Case, async: true - alias CodeQA.HealthReport.TopBlocks alias CodeQA.Git.ChangedFile + alias CodeQA.HealthReport.TopBlocks # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60) defp make_node(cosine_delta, token_count \\ 20) do @@ -126,8 +126,24 @@ defmodule CodeQA.HealthReport.TopBlocksTest do describe "fix hints" do test "includes fix_hint string for known behavior" do - # function_design/cyclomatic_complexity_under_10 has _fix_hint in YAML - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + # naming_conventions/file_name_matches_primary_export has _fix_hint in YAML + node = %{ + "start_line" => 1, + "end_line" => 10, + "type" => "code", + "token_count" => 20, + "refactoring_potentials" => [ + %{ + "category" => "naming_conventions", + "behavior" => "file_name_matches_primary_export", + "cosine_delta" => 0.60 + } + ], + "children" => [] + } + + hint_lookup = %{{"naming_conventions", "file_name_matches_primary_export"} => 0.0} + [group] = TopBlocks.build(make_results([node]), [], hint_lookup) potential = hd(hd(group.blocks).potentials) assert is_binary(potential.fix_hint) end diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs index 1eb4c64..8d25e4d 100644 --- a/test/codeqa/health_report_test.exs +++ b/test/codeqa/health_report_test.exs @@ -1,14 +1,19 @@ defmodule CodeQA.HealthReportTest do use ExUnit.Case, async: true + alias CodeQA.BlockImpactAnalyzer + alias CodeQA.Engine.Analyzer + alias CodeQA.Git.ChangedFile + alias CodeQA.HealthReport + describe "generate/2 output keys" do @tag :slow test "without base_results: pr_summary and codebase_delta are nil" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + results = Analyzer.analyze_codebase(files) + results = BlockImpactAnalyzer.analyze(results, files) - report = CodeQA.HealthReport.generate(results) + report = HealthReport.generate(results) assert report.pr_summary == nil assert report.codebase_delta == nil @@ -22,10 +27,10 @@ defmodule CodeQA.HealthReportTest do @tag :slow test "without base_results: top_blocks shows all files with significant blocks" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + results = Analyzer.analyze_codebase(files) + results = BlockImpactAnalyzer.analyze(results, files) - report = CodeQA.HealthReport.generate(results) + report = HealthReport.generate(results) # top_blocks is a list of file groups (may be empty if no blocks above threshold) assert is_list(report.top_blocks) @@ -41,10 +46,10 @@ defmodule CodeQA.HealthReportTest do @tag :slow test "worst_offenders is always empty in categories" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) + results = Analyzer.analyze_codebase(files) + results = BlockImpactAnalyzer.analyze(results, files) - report = CodeQA.HealthReport.generate(results) + report = HealthReport.generate(results) Enum.each(report.categories, fn cat -> assert Map.get(cat, :worst_offenders, []) == [] @@ -56,14 +61,14 @@ defmodule CodeQA.HealthReportTest do @tag :slow test "pr_summary is populated" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = Analyzer.analyze_codebase(files) + head_results = BlockImpactAnalyzer.analyze(head_results, files) + base_results = Analyzer.analyze_codebase(files) - changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] report = - CodeQA.HealthReport.generate(head_results, + HealthReport.generate(head_results, base_results: base_results, changed_files: changed ) @@ -89,11 +94,11 @@ defmodule CodeQA.HealthReportTest do @tag :slow test "codebase_delta is populated" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = Analyzer.analyze_codebase(files) + head_results = BlockImpactAnalyzer.analyze(head_results, files) + base_results = Analyzer.analyze_codebase(files) - report = CodeQA.HealthReport.generate(head_results, base_results: base_results) + report = HealthReport.generate(head_results, base_results: base_results) assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} = report.codebase_delta @@ -106,14 +111,14 @@ defmodule CodeQA.HealthReportTest do "lib/bar.ex" => "defmodule Bar do\n def baz, do: :ok\nend\n" } - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) + head_results = Analyzer.analyze_codebase(files) + head_results = BlockImpactAnalyzer.analyze(head_results, files) + base_results = Analyzer.analyze_codebase(files) - changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] + changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] report = - CodeQA.HealthReport.generate(head_results, + HealthReport.generate(head_results, base_results: base_results, changed_files: changed ) diff --git a/test/codeqa/metrics/file/bradford_test.exs b/test/codeqa/metrics/file/bradford_test.exs index f323ca8..db948d9 100644 --- a/test/codeqa/metrics/file/bradford_test.exs +++ b/test/codeqa/metrics/file/bradford_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.Metrics.File.BradfordTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.Bradford alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.Bradford # Bradford zones are built by ranking lines densest-first, then walking down # until each third of total tokens is accumulated: diff --git a/test/codeqa/metrics/file/branching_test.exs b/test/codeqa/metrics/file/branching_test.exs index d527a60..c4749b3 100644 --- a/test/codeqa/metrics/file/branching_test.exs +++ b/test/codeqa/metrics/file/branching_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.Metrics.File.BranchingTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.Branching alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.Branching defp ctx(code), do: Pipeline.build_file_context(code) defp density(code), do: Branching.analyze(ctx(code))["branching_density"] diff --git a/test/codeqa/metrics/file/brevity_test.exs b/test/codeqa/metrics/file/brevity_test.exs index 2f5662d..4f65fa0 100644 --- a/test/codeqa/metrics/file/brevity_test.exs +++ b/test/codeqa/metrics/file/brevity_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.Metrics.File.BrevityTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.Brevity alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.Brevity defp ctx(code), do: Pipeline.build_file_context(code) defp result(code), do: Brevity.analyze(ctx(code)) diff --git a/test/codeqa/metrics/file/function_metrics_test.exs b/test/codeqa/metrics/file/function_metrics_test.exs index eee4bc2..7f05b90 100644 --- a/test/codeqa/metrics/file/function_metrics_test.exs +++ b/test/codeqa/metrics/file/function_metrics_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.Metrics.File.FunctionMetricsTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.FunctionMetrics alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.FunctionMetrics defp ctx(code), do: Pipeline.build_file_context(code) defp analyze(code), do: FunctionMetrics.analyze(ctx(code)) @@ -50,7 +50,7 @@ defmodule CodeQA.Metrics.File.FunctionMetricsTest do for keyword <- FunctionMetrics.func_keywords() do test "detects function starting with #{keyword}" do code = "#{unquote(keyword)} my_func(x) {\n return x\n}" - result = FunctionMetrics.analyze(CodeQA.Engine.Pipeline.build_file_context(code)) + result = FunctionMetrics.analyze(Pipeline.build_file_context(code)) assert result["avg_function_lines"] > 0, "expected '#{unquote(keyword)}' to be detected as function start" @@ -62,7 +62,7 @@ defmodule CodeQA.Metrics.File.FunctionMetricsTest do for modifier <- FunctionMetrics.access_modifiers() do test "detects method starting with #{modifier}" do code = "#{unquote(modifier)} void MyMethod() {\n return;\n}" - result = FunctionMetrics.analyze(CodeQA.Engine.Pipeline.build_file_context(code)) + result = FunctionMetrics.analyze(Pipeline.build_file_context(code)) assert result["avg_function_lines"] > 0, "expected '#{unquote(modifier)}' access modifier to trigger method detection" diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs index d52234f..c59f707 100644 --- a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs +++ b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs @@ -1,7 +1,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.NearDuplicateBlocksFile alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.NearDuplicateBlocksFile defp ctx(code, path \\ "test.ex") do base = Pipeline.build_file_context(code) diff --git a/test/codeqa/metrics/file/rfc_test.exs b/test/codeqa/metrics/file/rfc_test.exs index 3ac9fcd..19716f3 100644 --- a/test/codeqa/metrics/file/rfc_test.exs +++ b/test/codeqa/metrics/file/rfc_test.exs @@ -1,8 +1,8 @@ defmodule CodeQA.Metrics.File.RFCTest do use ExUnit.Case, async: true - alias CodeQA.Metrics.File.RFC alias CodeQA.Engine.Pipeline + alias CodeQA.Metrics.File.RFC defp ctx(code), do: Pipeline.build_file_context(code) defp result(code), do: RFC.analyze(ctx(code)) From 1876aebed925ded97dd555e9dbe95cceb01c81cc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 21 Mar 2026 13:27:38 +0000 Subject: [PATCH 44/71] chore(combined-metrics): sync language coverage and scalar vectors [skip ci] --- priv/combined_metrics/code_smells.yml | 5 ----- priv/combined_metrics/variable_naming.yml | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index 2201f7d..f1c73c5 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,5 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -102,7 +101,6 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -213,7 +211,6 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -324,7 +321,6 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -439,7 +435,6 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index e7bc6fa..1be9c6b 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,5 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -63,7 +62,6 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -125,7 +123,6 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -205,7 +202,6 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -324,7 +320,6 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -410,7 +405,6 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -497,7 +491,6 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -614,7 +607,6 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -693,7 +685,6 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -791,7 +782,6 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -912,7 +902,6 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -1000,7 +989,6 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1071,7 +1059,6 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1147,7 +1134,6 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 05019e87a2d785377dffaf5184b3e5bb85b4e278 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 15:10:10 +0100 Subject: [PATCH 45/71] refactor(codebase): structural cleanup from health report analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Split sample_runner.ex (655→393 lines) into three focused modules: cosine_vector.ex (vector math), yaml_formatter.ex (YAML serialisation), scalar_applier.ex (scalar/language writing) - Split near_duplicate_blocks.ex (454→198 lines) into distance.ex (Levenshtein + bucketing) and candidates.ex (fingerprinting + pair finding) - Extract accumulate_file_behaviors/3 from file_scorer.ex reduce callback that was doing two things (SRP fix) - Unify score_low_is_good/score_high_is_good → score_by_direction/3 in grader.ex; unify interpolate_below_d pair → interpolate_beyond_d/3 using abs() to remove directional duplication - Add _languages metadata to code_smells and variable_naming YAML behaviors Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/combined_metrics/cosine_vector.ex | 69 +++ lib/codeqa/combined_metrics/file_scorer.ex | 41 +- lib/codeqa/combined_metrics/sample_runner.ex | 524 +++++------------- lib/codeqa/combined_metrics/scalar_applier.ex | 209 +++++++ lib/codeqa/combined_metrics/yaml_formatter.ex | 72 +++ lib/codeqa/health_report/grader.ex | 157 +++--- .../metrics/file/near_duplicate_blocks.ex | 316 +---------- .../file/near_duplicate_blocks/candidates.ex | 214 +++++++ .../file/near_duplicate_blocks/distance.ex | 114 ++++ priv/combined_metrics/code_smells.yml | 5 + priv/combined_metrics/variable_naming.yml | 14 + 11 files changed, 973 insertions(+), 762 deletions(-) create mode 100644 lib/codeqa/combined_metrics/cosine_vector.ex create mode 100644 lib/codeqa/combined_metrics/scalar_applier.ex create mode 100644 lib/codeqa/combined_metrics/yaml_formatter.ex create mode 100644 lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex create mode 100644 lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex diff --git a/lib/codeqa/combined_metrics/cosine_vector.ex b/lib/codeqa/combined_metrics/cosine_vector.ex new file mode 100644 index 0000000..158b24e --- /dev/null +++ b/lib/codeqa/combined_metrics/cosine_vector.ex @@ -0,0 +1,69 @@ +defmodule CodeQA.CombinedMetrics.CosineVector do + @moduledoc """ + Computes cosine similarity between a behavior's scalar weight vector and a + log-metric vector derived from an aggregate. + + Pure math — no I/O, no YAML loading. Intended for internal use by `SampleRunner`. + """ + + alias CodeQA.CombinedMetrics.Scorer + + @doc """ + Builds the cosine result entry for a single behavior against the given aggregate. + + Returns a one-element list `[result_map]` on success or `[]` when the behavior + has no non-zero scalars (no sample data) and should be excluded. + """ + @spec compute(String.t(), String.t(), map(), map(), String.t()) :: [map()] + def compute(yaml_path, behavior, behavior_data, aggregate, category) do + scalars = Scorer.scalars_for(yaml_path, behavior) + + if map_size(scalars) == 0 do + [] + else + build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) + end + end + + # --- Internal helpers --- + + defp build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) do + log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + + {dot, norm_s_sq, norm_v_sq, contributions} = + Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, + {d, ns, nv, contribs} -> + log_m = :math.log(Scorer.get(aggregate, group, key)) + contrib = scalar * log_m + + {d + contrib, ns + scalar * scalar, nv + log_m * log_m, + [{:"#{group}.#{key}", contrib} | contribs]} + end) + + cos_sim = + if norm_s_sq > 0 and norm_v_sq > 0, + do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), + else: 0.0 + + raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) + calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline + + top_metrics = + contributions + |> Enum.sort_by(fn {_, c} -> c end) + |> Enum.take(5) + |> Enum.map(fn {metric, contribution} -> + %{metric: to_string(metric), contribution: Float.round(contribution, 4)} + end) + + [ + %{ + category: category, + behavior: behavior, + cosine: Float.round(cos_sim, 4), + score: Float.round(calibrated, 4), + top_metrics: top_metrics + } + ] + end +end diff --git a/lib/codeqa/combined_metrics/file_scorer.ex b/lib/codeqa/combined_metrics/file_scorer.ex index a35a473..e7479b0 100644 --- a/lib/codeqa/combined_metrics/file_scorer.ex +++ b/lib/codeqa/combined_metrics/file_scorer.ex @@ -70,24 +70,7 @@ defmodule CodeQA.CombinedMetrics.FileScorer do file_data |> Map.get("metrics", %{}) |> map_size() == 0 end) |> Enum.reduce(%{}, fn {path, file_data}, acc -> - top_nodes = Grader.top_3_nodes(Map.get(file_data, "nodes")) - language = Language.detect(path).name() - - file_data - |> Map.get("metrics", %{}) - |> file_to_aggregate() - |> SampleRunner.diagnose_aggregate(top: 99_999, language: language) - |> Enum.reduce(acc, fn %{ - category: category, - behavior: behavior, - cosine: cosine, - top_metrics: top_metrics - }, - inner_acc -> - key = "#{category}.#{behavior}" - entry = %{file: path, cosine: cosine, top_metrics: top_metrics, top_nodes: top_nodes} - Map.update(inner_acc, key, [entry], &[entry | &1]) - end) + accumulate_file_behaviors(path, file_data, acc) end) |> Map.new(fn {key, entries} -> threshold = Config.cosine_significance_threshold() @@ -101,4 +84,26 @@ defmodule CodeQA.CombinedMetrics.FileScorer do {key, sorted} end) end + + # Diagnoses a single file's metrics and merges per-behavior entries into the accumulator. + defp accumulate_file_behaviors(path, file_data, acc) do + top_nodes = Grader.top_3_nodes(Map.get(file_data, "nodes")) + language = Language.detect(path).name() + + file_data + |> Map.get("metrics", %{}) + |> file_to_aggregate() + |> SampleRunner.diagnose_aggregate(top: 99_999, language: language) + |> Enum.reduce(acc, fn %{ + category: category, + behavior: behavior, + cosine: cosine, + top_metrics: top_metrics + }, + inner_acc -> + key = "#{category}.#{behavior}" + entry = %{file: path, cosine: cosine, top_metrics: top_metrics, top_nodes: top_nodes} + Map.update(inner_acc, key, [entry], &[entry | &1]) + end) + end end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 76566d4..3cd6533 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -6,14 +6,14 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do manual scalar tuning of combined metric formulas. """ - alias CodeQA.CombinedMetrics.Scorer - alias CodeQA.Engine.Analyzer - alias CodeQA.Engine.Collector + alias CodeQA.CombinedMetrics.{CosineVector, ScalarApplier, Scorer} + alias CodeQA.Engine.{Analyzer, Collector} @samples_root "priv/combined_metrics/samples" - @yaml_dir "priv/combined_metrics" - @deadzone_low 0.995 - @deadzone_high 1.005 + + # --------------------------------------------------------------------------- + # Public API + # --------------------------------------------------------------------------- @doc """ Runs all behaviors found in sample directories, optionally filtered by category. @@ -40,12 +40,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do filter_category = opts[:category] @samples_root - |> File.ls!() - |> Enum.flat_map(fn category -> - Path.join([@samples_root, category]) - |> File.ls!() - |> Enum.map(&{category, &1}) - end) + |> list_behaviors() |> Enum.filter(fn {category, behavior} -> (filter_category == nil or category == filter_category) and has_both_dirs?(category, behavior) @@ -55,62 +50,6 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end - defp has_both_dirs?(category, behavior) do - File.dir?(sample_path(category, behavior, "bad")) and - File.dir?(sample_path(category, behavior, "good")) - end - - defp score_behavior(category, behavior, opts) do - yaml_path = "priv/combined_metrics/#{category}.yml" - bad_agg = analyze(sample_path(category, behavior, "bad")) - good_agg = analyze(sample_path(category, behavior, "good")) - - bad_score = Scorer.compute_score(yaml_path, behavior, bad_agg) - good_score = Scorer.compute_score(yaml_path, behavior, good_agg) - ratio = if bad_score > 0, do: good_score / bad_score, else: 0.0 - - base = %{ - category: category, - behavior: behavior, - bad_score: bad_score, - good_score: good_score, - ratio: Float.round(ratio, 2), - direction_ok: good_score >= bad_score - } - - if opts[:verbose] do - Map.put(base, :metric_detail, metric_detail(yaml_path, behavior, bad_agg, good_agg)) - else - Map.put(base, :metric_detail, []) - end - end - - defp analyze(dir) do - dir - |> Collector.collect_files() - |> Analyzer.analyze_codebase() - |> get_in(["codebase", "aggregate"]) - end - - defp metric_detail(yaml_path, behavior, bad_agg, good_agg) do - Scorer.scalars_for(yaml_path, behavior) - |> Enum.map(fn {{group, key}, scalar} -> - bad_val = Scorer.get(bad_agg, group, key) - good_val = Scorer.get(good_agg, group, key) - ratio = if bad_val > 0, do: Float.round(good_val / bad_val, 2), else: 0.0 - - %{ - group: group, - key: key, - scalar: scalar, - bad: bad_val, - good: good_val, - ratio: ratio - } - end) - |> Enum.sort_by(&abs(&1.ratio - 1.0), :desc) - end - @doc """ Builds a per-behavior metric correlation report for scalar tuning. @@ -144,12 +83,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do filter_category = opts[:category] @samples_root - |> File.ls!() - |> Enum.flat_map(fn category -> - Path.join([@samples_root, category]) - |> File.ls!() - |> Enum.map(&{category, &1}) - end) + |> list_behaviors() |> Enum.filter(fn {category, behavior} -> (filter_category == nil or category == filter_category) and has_both_dirs?(category, behavior) @@ -159,44 +93,6 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end - defp behavior_metric_table(category, behavior) do - bad_agg = analyze(sample_path(category, behavior, "bad")) - good_agg = analyze(sample_path(category, behavior, "good")) - - entries = - Scorer.default_scalars() - |> Map.keys() - |> Enum.map(fn {group, key} -> - bad_val = Scorer.get(bad_agg, group, key) - good_val = Scorer.get(good_agg, group, key) - log_bad = :math.log(bad_val) - log_good = :math.log(good_val) - ratio = good_val / bad_val - log_diff = log_good - log_bad - {"#{group}.#{key}", bad_val, good_val, log_bad, log_good, ratio, log_diff} - end) - - max_abs_log_diff = - entries - |> Enum.map(fn {_, _, _, _, _, _, ld} -> abs(ld) end) - |> Enum.max(fn -> 1.0 end) - |> max(1.0e-10) - - Map.new(entries, fn {metric_key, bad_val, good_val, log_bad, log_good, ratio, log_diff} -> - suggested_scalar = Float.round(2.0 * log_diff / max_abs_log_diff, 4) - - {metric_key, - %{ - bad: r4(bad_val), - good: r4(good_val), - log_bad: r4(log_bad), - log_good: r4(log_good), - ratio: r4(ratio), - suggested_scalar: suggested_scalar - }} - end) - end - @doc """ Scores all combined metric behaviors against the given codebase aggregate map. @@ -246,12 +142,6 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end - defp humanize(slug) do - slug - |> String.split("_") - |> Enum.map_join(" ", &String.capitalize/1) - end - @doc """ Identifies the most likely code quality issues in an aggregate by cosine similarity. @@ -298,7 +188,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.flat_map(fn {behavior, behavior_data} -> - maybe_score_behavior( + maybe_diagnose_behavior( yaml_path, behavior, behavior_data, @@ -317,9 +207,9 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do Applies suggested scalars from sample analysis back to the YAML config files. For each behavior that has sample data, rewrites its scalar entries using the - log-linear suggestion method. Metrics whose ratio falls in the deadzone - (#{@deadzone_low} ≤ ratio ≤ #{@deadzone_high}) are excluded. All non-deadzoned - metrics are written, even if they were not previously present in the YAML. + log-linear suggestion method. Metrics whose ratio falls in the deadzone are + excluded. All non-deadzoned metrics are written, even if they were not + previously present in the YAML. Behaviors without sample data are left unchanged. @@ -328,113 +218,144 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do @spec apply_scalars(keyword()) :: [map()] def apply_scalars(opts \\ []) do report = build_metric_report(opts) - filter_category = opts[:category] + ScalarApplier.apply_scalars(report, opts) + end - @yaml_dir - |> File.ls!() - |> Enum.filter(fn yml_file -> - String.ends_with?(yml_file, ".yml") and - (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) - end) - |> Enum.sort() - |> Enum.map(fn yml_file -> - category = String.trim_trailing(yml_file, ".yml") - yaml_path = Path.join(@yaml_dir, yml_file) - {:ok, existing} = YamlElixir.read_from_file(yaml_path) + @doc """ + Updates only the `_languages` field in YAML config files based on sample data. - {updated_yaml, stats} = apply_to_category(existing, category, report) - File.write!(yaml_path, format_yaml(updated_yaml)) + Scans `bad/` and `good/` sample directories for each behavior, detects languages + from file extensions via `CodeQA.Language.detect/1`, and writes the intersection + as `_languages` to the YAML. Behaviors without sample data are left without a + `_languages` key (treated as applying to all languages at scoring time). + All existing scalars and baselines are preserved. - Map.put(stats, :category, category) - end) + Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`. + """ + @spec apply_languages(keyword()) :: [map()] + def apply_languages(opts \\ []) do + ScalarApplier.apply_languages(opts) end - defp apply_to_category(existing, category, report) do - existing - |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Enum.reduce({%{}, %{updated: 0, deadzoned: 0, skipped: 0}}, fn - {behavior, current_groups}, {acc_yaml, stats} -> - report_key = "#{category}.#{behavior}" - doc = read_behavior_doc(category, behavior) - - case Map.get(report, report_key) do - nil -> - groups = maybe_put_doc(current_groups, doc) - {Map.put(acc_yaml, behavior, groups), Map.update!(stats, :skipped, &(&1 + 1))} - - metrics -> - apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) - end + # --------------------------------------------------------------------------- + # Sample discovery + # --------------------------------------------------------------------------- + + defp list_behaviors(samples_root) do + samples_root + |> File.ls!() + |> Enum.flat_map(fn category -> + Path.join([samples_root, category]) + |> File.ls!() + |> Enum.map(&{category, &1}) end) end - defp apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) do - {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics) - # Fall back to current groups if everything was deadzoned - base_groups = if map_size(new_groups) > 0, do: new_groups, else: current_groups - - groups = - base_groups - |> Map.put("_log_baseline", Float.round(log_baseline, 6)) - |> maybe_put_doc(doc) - - {Map.put(acc_yaml, behavior, groups), - %{ - stats - | updated: stats.updated + n_updated, - deadzoned: stats.deadzoned + n_deadzoned - }} + defp has_both_dirs?(category, behavior) do + File.dir?(sample_path(category, behavior, "bad")) and + File.dir?(sample_path(category, behavior, "good")) + end + + defp sample_path(category, behavior, kind) do + Path.join([@samples_root, category, behavior, kind]) + end + + defp analyze(dir) do + dir + |> Collector.collect_files() + |> Analyzer.analyze_codebase() + |> get_in(["codebase", "aggregate"]) end - defp read_behavior_doc(category, behavior) do - config_path = Path.join([@samples_root, category, behavior, "config.yml"]) + # --------------------------------------------------------------------------- + # Sample scoring + # --------------------------------------------------------------------------- - case File.read(config_path) do - {:ok, content} -> - case YamlElixir.read_from_string(content) do - {:ok, %{"doc" => doc}} when is_binary(doc) -> doc - _ -> nil - end + defp score_behavior(category, behavior, opts) do + yaml_path = "priv/combined_metrics/#{category}.yml" + bad_agg = analyze(sample_path(category, behavior, "bad")) + good_agg = analyze(sample_path(category, behavior, "good")) + + bad_score = Scorer.compute_score(yaml_path, behavior, bad_agg) + good_score = Scorer.compute_score(yaml_path, behavior, good_agg) + ratio = if bad_score > 0, do: good_score / bad_score, else: 0.0 + + base = %{ + category: category, + behavior: behavior, + bad_score: bad_score, + good_score: good_score, + ratio: Float.round(ratio, 2), + direction_ok: good_score >= bad_score + } - _ -> - nil + if opts[:verbose] do + Map.put(base, :metric_detail, metric_detail(yaml_path, behavior, bad_agg, good_agg)) + else + Map.put(base, :metric_detail, []) end end - defp maybe_put_doc(groups, nil), do: groups - defp maybe_put_doc(groups, doc), do: Map.put(groups, "_doc", doc) + defp metric_detail(yaml_path, behavior, bad_agg, good_agg) do + Scorer.scalars_for(yaml_path, behavior) + |> Enum.map(fn {{group, key}, scalar} -> + bad_val = Scorer.get(bad_agg, group, key) + good_val = Scorer.get(good_agg, group, key) + ratio = if bad_val > 0, do: Float.round(good_val / bad_val, 2), else: 0.0 + %{group: group, key: key, scalar: scalar, bad: bad_val, good: good_val, ratio: ratio} + end) + |> Enum.sort_by(&abs(&1.ratio - 1.0), :desc) + end + + # --------------------------------------------------------------------------- + # Metric report (vector building) + # --------------------------------------------------------------------------- + + defp behavior_metric_table(category, behavior) do + bad_agg = analyze(sample_path(category, behavior, "bad")) + good_agg = analyze(sample_path(category, behavior, "good")) + + entries = + Scorer.default_scalars() + |> Map.keys() + |> Enum.map(fn {group, key} -> + bad_val = Scorer.get(bad_agg, group, key) + good_val = Scorer.get(good_agg, group, key) + log_bad = :math.log(bad_val) + log_good = :math.log(good_val) + ratio = good_val / bad_val + log_diff = log_good - log_bad + {"#{group}.#{key}", bad_val, good_val, log_bad, log_good, ratio, log_diff} + end) - defp groups_from_report(metrics) do - Enum.reduce(metrics, {%{}, 0.0, 0, 0}, fn {metric_key, data}, - {groups, log_baseline, n_updated, n_deadzoned} -> - [group, key] = String.split(metric_key, ".", parts: 2) + max_abs_log_diff = + entries + |> Enum.map(fn {_, _, _, _, _, _, ld} -> abs(ld) end) + |> Enum.max(fn -> 1.0 end) + |> max(1.0e-10) - if deadzone?(data.ratio) do - {groups, log_baseline, n_updated, n_deadzoned + 1} - else - accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) - end + Map.new(entries, fn {metric_key, bad_val, good_val, log_bad, log_good, ratio, log_diff} -> + suggested_scalar = Float.round(2.0 * log_diff / max_abs_log_diff, 4) + + {metric_key, + %{ + bad: r4(bad_val), + good: r4(good_val), + log_bad: r4(log_bad), + log_good: r4(log_good), + ratio: r4(ratio), + suggested_scalar: suggested_scalar + }} end) end - defp accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) do - new_groups = - Map.update( - groups, - group, - %{key => data.suggested_scalar}, - &Map.put(&1, key, data.suggested_scalar) - ) - - # Baseline: expected log score at the geometric mean of good/bad sample values - geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10)) - new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean) - {new_groups, new_baseline, n_updated + 1, n_deadzoned} - end + defp r4(f), do: Float.round(f / 1.0, 4) - defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high + # --------------------------------------------------------------------------- + # Cosine diagnosis (delegates vector math to CosineVector) + # --------------------------------------------------------------------------- - defp maybe_score_behavior( + defp maybe_diagnose_behavior( yaml_path, behavior, behavior_data, @@ -446,70 +367,22 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do behavior_langs = Map.get(behavior_data, "_languages", []) if behavior_language_applies?(behavior_langs, language, languages) do - score_behavior_cosine(yaml_path, behavior, behavior_data, aggregate, category) + CosineVector.compute(yaml_path, behavior, behavior_data, aggregate, category) else [] end end - defp score_behavior_cosine(yaml_path, behavior, behavior_data, aggregate, category) do - scalars = Scorer.scalars_for(yaml_path, behavior) - - if map_size(scalars) == 0 do - [] - else - build_cosine_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) - end - end - - defp build_cosine_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) do - log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 - - {dot, norm_s_sq, norm_v_sq, contributions} = - Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, - {d, ns, nv, contribs} -> - log_m = :math.log(Scorer.get(aggregate, group, key)) - contrib = scalar * log_m - - {d + contrib, ns + scalar * scalar, nv + log_m * log_m, - [{:"#{group}.#{key}", contrib} | contribs]} - end) - - cos_sim = - if norm_s_sq > 0 and norm_v_sq > 0, - do: dot / (:math.sqrt(norm_s_sq) * :math.sqrt(norm_v_sq)), - else: 0.0 - - raw_score = Scorer.compute_score(yaml_path, behavior, aggregate) - calibrated = :math.log(max(raw_score, 1.0e-300)) - log_baseline - - top_metrics = - contributions - |> Enum.sort_by(fn {_, c} -> c end) - |> Enum.take(5) - |> Enum.map(fn {metric, contribution} -> - %{metric: to_string(metric), contribution: Float.round(contribution, 4)} - end) - - [ - %{ - category: category, - behavior: behavior, - cosine: Float.round(cos_sim, 4), - score: Float.round(calibrated, 4), - top_metrics: top_metrics - } - ] - end + # --------------------------------------------------------------------------- + # Language filtering + # --------------------------------------------------------------------------- - # Returns true if the behavior should be included for the given language context. # behavior_langs: the "_languages" list from the YAML ([] = applies to all) # language: single language string from :language opt (nil = no filter) # languages: project language list from :languages opt (nil = no filter) defp behavior_language_applies?(_behavior_langs, nil, nil), do: true # Empty behavior_langs means "applies to all languages" — always include. - # This clause takes priority over all non-nil filter cases. defp behavior_language_applies?([], _language, _languages), do: true defp behavior_language_applies?(_behavior_langs, nil, []), do: true @@ -524,132 +397,13 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do when is_binary(language) and is_list(languages), do: language in behavior_langs or Enum.any?(behavior_langs, &(&1 in languages)) - defp format_yaml(data) do - lines = - data - |> Enum.sort_by(fn {behavior, _} -> behavior end) - |> Enum.flat_map(fn {behavior, groups} -> format_behavior_lines(behavior, groups) end) - - Enum.join(lines, "\n") <> "\n" - end - - defp format_behavior_lines(behavior, groups) do - doc_line = yaml_doc_line(Map.get(groups, "_doc")) - baseline_line = yaml_baseline_line(Map.get(groups, "_log_baseline")) - fix_hint_line = yaml_fix_hint_line(Map.get(groups, "_fix_hint")) - languages_line = yaml_languages_line(Map.get(groups, "_languages")) - group_lines = format_group_lines(groups) - - ["#{behavior}:" | doc_line] ++ - fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""] - end - - defp yaml_doc_line(nil), do: [] - defp yaml_doc_line(doc), do: [" _doc: #{inspect(doc)}"] - - defp yaml_baseline_line(nil), do: [] - defp yaml_baseline_line(val), do: [" _log_baseline: #{fmt_scalar(val)}"] - - defp yaml_fix_hint_line(nil), do: [] - defp yaml_fix_hint_line(hint), do: [" _fix_hint: #{inspect(hint)}"] - - defp yaml_languages_line(nil), do: [] - defp yaml_languages_line([]), do: [] - defp yaml_languages_line(langs), do: [" _languages: [#{Enum.join(langs, ", ")}]"] - - defp format_group_lines(groups) do - groups - |> Enum.filter(fn {k, v} -> - k not in ["_doc", "_log_baseline", "_fix_hint", "_languages"] and is_map(v) - end) - |> Enum.sort_by(fn {group, _} -> group end) - |> Enum.flat_map(fn {group, keys} -> - key_lines = - keys - |> Enum.sort_by(fn {key, _} -> key end) - |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) - - [" #{group}:" | key_lines] - end) - end - - defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) - defp fmt_scalar(n) when is_integer(n), do: "#{n}.0" - - defp r4(f), do: Float.round(f / 1.0, 4) - - defp sample_path(category, behavior, kind) do - Path.join([@samples_root, category, behavior, kind]) - end - - defp dir_languages(dir) do - case File.ls(dir) do - {:ok, files} -> - files - |> Enum.map(&CodeQA.Language.detect/1) - |> Enum.map(& &1.name()) - |> MapSet.new() - - _ -> - MapSet.new() - end - end - - defp languages_for_behavior(category, behavior) do - bad_langs = dir_languages(sample_path(category, behavior, "bad")) - good_langs = dir_languages(sample_path(category, behavior, "good")) - - bad_langs - |> MapSet.intersection(good_langs) - |> MapSet.to_list() - |> Enum.reject(&(&1 == "unknown")) - |> Enum.sort() - end - - defp maybe_put_languages(groups, []), do: groups - defp maybe_put_languages(groups, langs), do: Map.put(groups, "_languages", langs) - - @doc """ - Updates only the `_languages` field in YAML config files based on sample data. + # --------------------------------------------------------------------------- + # Misc + # --------------------------------------------------------------------------- - Scans `bad/` and `good/` sample directories for each behavior, detects languages - from file extensions via `CodeQA.Language.detect/1`, and writes the intersection - as `_languages` to the YAML. Behaviors without sample data are left without a - `_languages` key (treated as applying to all languages at scoring time). - All existing scalars and baselines are preserved. - - Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`. - """ - @spec apply_languages(keyword()) :: [map()] - def apply_languages(opts \\ []) do - filter_category = opts[:category] - - @yaml_dir - |> File.ls!() - |> Enum.filter(fn yml_file -> - String.ends_with?(yml_file, ".yml") and - (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) - end) - |> Enum.sort() - |> Enum.map(fn yml_file -> - category = String.trim_trailing(yml_file, ".yml") - yaml_path = Path.join(@yaml_dir, yml_file) - {:ok, existing} = YamlElixir.read_from_file(yaml_path) - - updated = - existing - |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Map.new(fn {behavior, groups} -> - langs = languages_for_behavior(category, behavior) - {behavior, maybe_put_languages(groups, langs)} - end) - - File.write!(yaml_path, format_yaml(updated)) - - behaviors_with_languages = - Enum.count(updated, fn {_b, groups} -> Map.has_key?(groups, "_languages") end) - - %{category: category, behaviors_with_languages: behaviors_with_languages} - end) + defp humanize(slug) do + slug + |> String.split("_") + |> Enum.map_join(" ", &String.capitalize/1) end end diff --git a/lib/codeqa/combined_metrics/scalar_applier.ex b/lib/codeqa/combined_metrics/scalar_applier.ex new file mode 100644 index 0000000..1c8ec4b --- /dev/null +++ b/lib/codeqa/combined_metrics/scalar_applier.ex @@ -0,0 +1,209 @@ +defmodule CodeQA.CombinedMetrics.ScalarApplier do + @moduledoc """ + Writes suggested scalars and language metadata back to the combined-metrics YAML + config files under `priv/combined_metrics/`. + + Intended for internal use by `SampleRunner`. Two entry points: + + * `apply_scalars/2` — rewrites scalar weights using log-linear suggestions + * `apply_languages/2` — rewrites `_languages` based on sample file extensions + """ + + alias CodeQA.CombinedMetrics.YamlFormatter + + @samples_root "priv/combined_metrics/samples" + @yaml_dir "priv/combined_metrics" + @deadzone_low 0.995 + @deadzone_high 1.005 + + @doc """ + Applies suggested scalars from `report` (a `build_metric_report/1` result) to + the YAML files under `priv/combined_metrics/`. + + Returns a list of per-category stats maps with `:category`, `:updated`, + `:deadzoned`, and `:skipped` keys. + """ + @spec apply_scalars(map(), keyword()) :: [map()] + def apply_scalars(report, opts \\ []) do + filter_category = opts[:category] + + @yaml_dir + |> File.ls!() + |> Enum.filter(fn yml_file -> + String.ends_with?(yml_file, ".yml") and + (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) + end) + |> Enum.sort() + |> Enum.map(fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, existing} = YamlElixir.read_from_file(yaml_path) + + {updated_yaml, stats} = apply_to_category(existing, category, report) + File.write!(yaml_path, YamlFormatter.format(updated_yaml)) + + Map.put(stats, :category, category) + end) + end + + @doc """ + Updates only the `_languages` field in YAML config files based on sample data. + + Returns a list of `%{category: String.t(), behaviors_with_languages: non_neg_integer()}`. + """ + @spec apply_languages(keyword()) :: [map()] + def apply_languages(opts \\ []) do + filter_category = opts[:category] + + @yaml_dir + |> File.ls!() + |> Enum.filter(fn yml_file -> + String.ends_with?(yml_file, ".yml") and + (filter_category == nil or String.trim_trailing(yml_file, ".yml") == filter_category) + end) + |> Enum.sort() + |> Enum.map(fn yml_file -> + category = String.trim_trailing(yml_file, ".yml") + yaml_path = Path.join(@yaml_dir, yml_file) + {:ok, existing} = YamlElixir.read_from_file(yaml_path) + + updated = + existing + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Map.new(fn {behavior, groups} -> + langs = languages_for_behavior(category, behavior) + {behavior, maybe_put_languages(groups, langs)} + end) + + File.write!(yaml_path, YamlFormatter.format(updated)) + + behaviors_with_languages = + Enum.count(updated, fn {_b, groups} -> Map.has_key?(groups, "_languages") end) + + %{category: category, behaviors_with_languages: behaviors_with_languages} + end) + end + + # --------------------------------------------------------------------------- + # Scalar application helpers + # --------------------------------------------------------------------------- + + defp apply_to_category(existing, category, report) do + existing + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.reduce({%{}, %{updated: 0, deadzoned: 0, skipped: 0}}, fn + {behavior, current_groups}, {acc_yaml, stats} -> + report_key = "#{category}.#{behavior}" + doc = read_behavior_doc(category, behavior) + + case Map.get(report, report_key) do + nil -> + groups = maybe_put_doc(current_groups, doc) + {Map.put(acc_yaml, behavior, groups), Map.update!(stats, :skipped, &(&1 + 1))} + + metrics -> + apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) + end + end) + end + + defp apply_metrics(acc_yaml, stats, behavior, current_groups, metrics, doc) do + {new_groups, log_baseline, n_updated, n_deadzoned} = groups_from_report(metrics) + # Fall back to current groups if everything was deadzoned + base_groups = if map_size(new_groups) > 0, do: new_groups, else: current_groups + + groups = + base_groups + |> Map.put("_log_baseline", Float.round(log_baseline, 6)) + |> maybe_put_doc(doc) + + {Map.put(acc_yaml, behavior, groups), + %{ + stats + | updated: stats.updated + n_updated, + deadzoned: stats.deadzoned + n_deadzoned + }} + end + + defp groups_from_report(metrics) do + Enum.reduce(metrics, {%{}, 0.0, 0, 0}, fn {metric_key, data}, + {groups, log_baseline, n_updated, n_deadzoned} -> + [group, key] = String.split(metric_key, ".", parts: 2) + + if deadzone?(data.ratio) do + {groups, log_baseline, n_updated, n_deadzoned + 1} + else + accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) + end + end) + end + + defp accumulate_metric(groups, log_baseline, n_updated, n_deadzoned, group, key, data) do + new_groups = + Map.update( + groups, + group, + %{key => data.suggested_scalar}, + &Map.put(&1, key, data.suggested_scalar) + ) + + geo_mean = :math.sqrt(max(data.bad, 1.0e-10) * max(data.good, 1.0e-10)) + new_baseline = log_baseline + data.suggested_scalar * :math.log(geo_mean) + {new_groups, new_baseline, n_updated + 1, n_deadzoned} + end + + defp deadzone?(ratio), do: ratio >= @deadzone_low and ratio <= @deadzone_high + + defp read_behavior_doc(category, behavior) do + config_path = Path.join([@samples_root, category, behavior, "config.yml"]) + + case File.read(config_path) do + {:ok, content} -> + case YamlElixir.read_from_string(content) do + {:ok, %{"doc" => doc}} when is_binary(doc) -> doc + _ -> nil + end + + _ -> + nil + end + end + + defp maybe_put_doc(groups, nil), do: groups + defp maybe_put_doc(groups, doc), do: Map.put(groups, "_doc", doc) + + # --------------------------------------------------------------------------- + # Language detection helpers + # --------------------------------------------------------------------------- + + defp dir_languages(dir) do + case File.ls(dir) do + {:ok, files} -> + files + |> Enum.map(&CodeQA.Language.detect/1) + |> Enum.map(& &1.name()) + |> MapSet.new() + + _ -> + MapSet.new() + end + end + + defp languages_for_behavior(category, behavior) do + bad_langs = dir_languages(sample_path(category, behavior, "bad")) + good_langs = dir_languages(sample_path(category, behavior, "good")) + + bad_langs + |> MapSet.intersection(good_langs) + |> MapSet.to_list() + |> Enum.reject(&(&1 == "unknown")) + |> Enum.sort() + end + + defp maybe_put_languages(groups, []), do: groups + defp maybe_put_languages(groups, langs), do: Map.put(groups, "_languages", langs) + + defp sample_path(category, behavior, kind) do + Path.join([@samples_root, category, behavior, kind]) + end +end diff --git a/lib/codeqa/combined_metrics/yaml_formatter.ex b/lib/codeqa/combined_metrics/yaml_formatter.ex new file mode 100644 index 0000000..ca034b2 --- /dev/null +++ b/lib/codeqa/combined_metrics/yaml_formatter.ex @@ -0,0 +1,72 @@ +defmodule CodeQA.CombinedMetrics.YamlFormatter do + @moduledoc """ + Serialises a combined-metrics behavior map back to the hand-authored YAML format. + + Intended for internal use by `SampleRunner`. The output format preserves the + conventions used across `priv/combined_metrics/*.yml`: + + - Behaviors sorted alphabetically + - Meta-keys (`_doc`, `_fix_hint`, `_languages`, `_log_baseline`) emitted before + group sections + - Groups and keys within groups sorted alphabetically + - Floats written with four decimal places + """ + + @doc """ + Serialises a `%{behavior => groups}` map to a YAML string. + """ + @spec format(map()) :: String.t() + def format(data) do + lines = + data + |> Enum.sort_by(fn {behavior, _} -> behavior end) + |> Enum.flat_map(fn {behavior, groups} -> behavior_lines(behavior, groups) end) + + Enum.join(lines, "\n") <> "\n" + end + + # --- Behavior-level serialisation --- + + defp behavior_lines(behavior, groups) do + doc_line = doc_line(Map.get(groups, "_doc")) + baseline_line = baseline_line(Map.get(groups, "_log_baseline")) + fix_hint_line = fix_hint_line(Map.get(groups, "_fix_hint")) + languages_line = languages_line(Map.get(groups, "_languages")) + group_lines = group_lines(groups) + + ["#{behavior}:" | doc_line] ++ + fix_hint_line ++ languages_line ++ baseline_line ++ group_lines ++ [""] + end + + defp doc_line(nil), do: [] + defp doc_line(doc), do: [" _doc: #{inspect(doc)}"] + + defp baseline_line(nil), do: [] + defp baseline_line(val), do: [" _log_baseline: #{fmt_scalar(val)}"] + + defp fix_hint_line(nil), do: [] + defp fix_hint_line(hint), do: [" _fix_hint: #{inspect(hint)}"] + + defp languages_line(nil), do: [] + defp languages_line([]), do: [] + defp languages_line(langs), do: [" _languages: [#{Enum.join(langs, ", ")}]"] + + defp group_lines(groups) do + groups + |> Enum.filter(fn {k, v} -> + k not in ["_doc", "_log_baseline", "_fix_hint", "_languages"] and is_map(v) + end) + |> Enum.sort_by(fn {group, _} -> group end) + |> Enum.flat_map(fn {group, keys} -> + key_lines = + keys + |> Enum.sort_by(fn {key, _} -> key end) + |> Enum.map(fn {key, scalar} -> " #{key}: #{fmt_scalar(scalar)}" end) + + [" #{group}:" | key_lines] + end) + end + + defp fmt_scalar(f) when is_float(f), do: :erlang.float_to_binary(f, decimals: 4) + defp fmt_scalar(n) when is_integer(n), do: "#{n}.0" +end diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex index 8011c8f..46ed374 100644 --- a/lib/codeqa/health_report/grader.ex +++ b/lib/codeqa/health_report/grader.ex @@ -13,11 +13,11 @@ defmodule CodeQA.HealthReport.Grader do """ @spec score_metric(map(), number()) :: integer() def score_metric(%{good: :high, thresholds: t}, value) do - value |> score_high_is_good(t) |> clamp(0, 100) + score_by_direction(:high, value, t) |> clamp(0, 100) end def score_metric(%{good: _, thresholds: t}, value) do - value |> score_low_is_good(t) |> clamp(0, 100) + score_by_direction(:low, value, t) |> clamp(0, 100) end @doc """ @@ -46,28 +46,27 @@ defmodule CodeQA.HealthReport.Grader do defp cosine_to_score(c) when c >= -0.3, do: interpolate_between(c, -0.3, 30, 0.0, 50) defp cosine_to_score(c), do: interpolate_between(c, -1.0, 0, -0.3, 30) - # Lower values are better: below A = 100, A = 90, A-B = 70-90, etc. - defp score_low_is_good(val, t) do + # :low — lower values are better (t.a < t.b < t.c < t.d); below t.a = 100 + # :high — higher values are better (t.a > t.b > t.c > t.d); above t.a = 100 + defp score_by_direction(:low, val, t) do cond do val < t.a -> 100 val == t.a -> 90 val <= t.b -> interpolate_between(val, t.a, 90, t.b, 70) val <= t.c -> interpolate_between(val, t.b, 70, t.c, 50) val <= t.d -> interpolate_between(val, t.c, 50, t.d, 30) - true -> interpolate_below_d(val, t.d, 30) + true -> interpolate_beyond_d(val, t.d, 30) end end - # Higher values are better: above A = 100, A = 90, A-B = 70-90, etc. - # Thresholds are in descending order (a > b > c > d) - defp score_high_is_good(val, t) do + defp score_by_direction(:high, val, t) do cond do val > t.a -> 100 val == t.a -> 90 val >= t.b -> interpolate_between(val, t.a, 90, t.b, 70) val >= t.c -> interpolate_between(val, t.b, 70, t.c, 50) val >= t.d -> interpolate_between(val, t.c, 50, t.d, 30) - true -> interpolate_below_d_high(val, t.d, 30) + true -> interpolate_beyond_d(val, t.d, 30) end end @@ -82,24 +81,19 @@ defmodule CodeQA.HealthReport.Grader do end end - # Value beyond D threshold (low is good): score degrades below 30 - defp interpolate_below_d(_val, threshold_d, _score_at_d) when threshold_d == 0, do: 0 + # Score degrades below 30 when value is beyond the D threshold in either direction. + # abs(val - threshold_d) captures overshoot for :low and undershoot for :high uniformly. + defp interpolate_beyond_d(_val, 0, _score_at_d), do: 0 - defp interpolate_below_d(val, threshold_d, score_at_d) do - overshoot = (val - threshold_d) / threshold_d - round(Kernel.max(0, score_at_d - overshoot * score_at_d)) + defp interpolate_beyond_d(val, threshold_d, score_at_d) do + deviation = abs(val - threshold_d) / threshold_d + round(Kernel.max(0, score_at_d - deviation * score_at_d)) end - # Value below D threshold (high is good): score degrades below 30 - defp interpolate_below_d_high(_val, threshold_d, _score_at_d) when threshold_d == 0, do: 0 - - defp interpolate_below_d_high(val, threshold_d, score_at_d) do - undershoot = (threshold_d - val) / threshold_d - round(Kernel.max(0, score_at_d - undershoot * score_at_d)) + defp clamp(val, min_val, max_val) do + val |> Kernel.max(min_val) |> Kernel.min(max_val) end - defp clamp(val, min_val, max_val), do: val |> Kernel.max(min_val) |> Kernel.min(max_val) - @doc "Convert a numeric score (0-100) to a letter grade using the given scale." @spec grade_letter(number(), [{number(), String.t()}]) :: String.t() def grade_letter(score, scale \\ Categories.default_grade_scale()) do @@ -120,31 +114,10 @@ defmodule CodeQA.HealthReport.Grader do ) do scored = category.metrics - |> Enum.map(fn metric_def -> - value = get_in(file_metrics, [metric_def.source, metric_def.name]) - - if value do - %{ - name: metric_def.name, - source: metric_def.source, - weight: metric_def.weight, - good: metric_def.good, - value: value, - score: score_metric(metric_def, value) - } - end - end) + |> Enum.map(&score_metric_entry(&1, file_metrics)) |> Enum.reject(&is_nil/1) - total_weight = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.weight end) - - score = - if total_weight > 0 do - weighted = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.score * s.weight end) - round(weighted / total_weight) - else - 0 - end + score = weighted_category_score(scored) %{ key: category.key, @@ -155,6 +128,34 @@ defmodule CodeQA.HealthReport.Grader do } end + defp score_metric_entry(metric_def, file_metrics) do + value = get_in(file_metrics, [metric_def.source, metric_def.name]) + + if value do + %{ + name: metric_def.name, + source: metric_def.source, + weight: metric_def.weight, + good: metric_def.good, + value: value, + score: score_metric(metric_def, value) + } + end + end + + defp weighted_category_score([]), do: 0 + + defp weighted_category_score(scored) do + total_weight = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.weight end) + + if total_weight > 0 do + weighted = Enum.reduce(scored, 0.0, fn s, acc -> acc + s.score * s.weight end) + round(weighted / total_weight) + else + 0 + end + end + @doc """ Grade a file's metrics against all categories. `file_metrics` is the `%{"entropy" => %{...}, "halstead" => %{...}}` map from analysis. @@ -252,38 +253,48 @@ defmodule CodeQA.HealthReport.Grader do |> Enum.group_by(& &1.category) |> Enum.map(fn {category, behaviors} -> behavior_entries = - behaviors - |> Enum.reject(fn b -> abs(b.cosine) < threshold end) - |> Enum.map(fn b -> - cosine_score = score_cosine(b.cosine) - - %{ - behavior: b.behavior, - cosine: b.cosine, - score: cosine_score, - grade: grade_letter(cosine_score, scale), - worst_offenders: Map.get(worst_files, "#{category}.#{b.behavior}", []) - } - end) - - category_score = - if behavior_entries == [] do - 50 - else - round(Enum.sum(Enum.map(behavior_entries, & &1.score)) / length(behavior_entries)) - end + score_behavior_entries(behaviors, threshold, worst_files, scale, category) - %{ - type: :cosine, - key: category, - name: humanize_category(category), - score: category_score, - grade: grade_letter(category_score, scale), - behaviors: behavior_entries - } + category_score = average_behavior_score(behavior_entries) + build_cosine_category(category, category_score, behavior_entries, scale) end) end + defp score_behavior_entries(behaviors, threshold, worst_files, scale, category) do + behaviors + |> Enum.reject(fn b -> abs(b.cosine) < threshold end) + |> Enum.map(&score_behavior_entry(&1, worst_files, scale, category)) + end + + defp score_behavior_entry(b, worst_files, scale, category) do + cosine_score = score_cosine(b.cosine) + + %{ + behavior: b.behavior, + cosine: b.cosine, + score: cosine_score, + grade: grade_letter(cosine_score, scale), + worst_offenders: Map.get(worst_files, "#{category}.#{b.behavior}", []) + } + end + + defp average_behavior_score([]), do: 50 + + defp average_behavior_score(entries) do + round(Enum.sum(Enum.map(entries, & &1.score)) / length(entries)) + end + + defp build_cosine_category(category, category_score, behavior_entries, scale) do + %{ + type: :cosine, + key: category, + name: humanize_category(category), + score: category_score, + grade: grade_letter(category_score, scale), + behaviors: behavior_entries + } + end + defp humanize_category(slug) do slug |> String.split("_") diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks.ex b/lib/codeqa/metrics/file/near_duplicate_blocks.ex index 94ba060..e1e0c08 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks.ex @@ -11,114 +11,30 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do d5 ≤ 25%, d6 ≤ 30%, d7 ≤ 40%, d8 ≤ 50% """ - alias CodeQA.AST.Classification.NodeProtocol alias CodeQA.AST.Enrichment.Node - alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser alias CodeQA.Language + alias CodeQA.Metrics.File.NearDuplicateBlocks.Candidates + alias CodeQA.Metrics.File.NearDuplicateBlocks.Distance @max_bucket 8 - @bucket_thresholds [ - {0, 0.0}, - {1, 0.05}, - {2, 0.10}, - {3, 0.15}, - {4, 0.20}, - {5, 0.25}, - {6, 0.30}, - {7, 0.40}, - {8, 0.50} - ] - - # Pre-compute token kind strings to avoid repeated function calls in the hot path. - @nl_kind NewlineToken.kind() - @ws_kind WhitespaceToken.kind() + + # --------------------------------------------------------------------------- + # Public API — distance helpers delegated to Distance submodule + # --------------------------------------------------------------------------- @doc "Standard Levenshtein distance between two token lists." @spec token_edit_distance([String.t()], [String.t()]) :: non_neg_integer() - def token_edit_distance([], b), do: length(b) - def token_edit_distance(a, []), do: length(a) - - def token_edit_distance(a, b) do - a_arr = List.to_tuple(a) - b_arr = List.to_tuple(b) - lb = tuple_size(b_arr) - init_row = List.to_tuple(Enum.to_list(0..lb)) - result_row = levenshtein_rows(a_arr, b_arr, tuple_size(a_arr), lb, init_row, 1) - elem(result_row, lb) - end - - defp levenshtein_rows(_a, _b, la, _lb, prev, i) when i > la, do: prev - - defp levenshtein_rows(a, b, la, lb, prev, i) do - ai = elem(a, i - 1) - curr_reversed = levenshtein_cols(b, lb, prev, ai, [i], 1) - curr = List.to_tuple(:lists.reverse(curr_reversed)) - levenshtein_rows(a, b, la, lb, curr, i + 1) - end - - defp levenshtein_cols(_b, lb, _prev, _ai, acc, j) when j > lb, do: acc - - defp levenshtein_cols(b, lb, prev, ai, [last_val | _] = acc, j) do - cost = if ai == elem(b, j - 1), do: 0, else: 1 - val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost)) - levenshtein_cols(b, lb, prev, ai, [val | acc], j + 1) - end - - # Bounded Levenshtein: returns the edit distance, or max_distance + 1 if the - # distance would exceed max_distance. Bails after each row when the row minimum - # already exceeds max_distance — the final distance can only grow from there. - defp token_edit_distance_bounded([], b, _max), do: length(b) - defp token_edit_distance_bounded(a, [], _max), do: length(a) - - defp token_edit_distance_bounded(a, b, max_distance) do - a_arr = List.to_tuple(a) - b_arr = List.to_tuple(b) - lb = tuple_size(b_arr) - init_row = List.to_tuple(Enum.to_list(0..lb)) - levenshtein_rows_bounded(a_arr, b_arr, tuple_size(a_arr), lb, init_row, max_distance, 1) - end - - defp levenshtein_rows_bounded(_a, _b, la, lb, prev, _max, i) when i > la, do: elem(prev, lb) - - defp levenshtein_rows_bounded(a, b, la, lb, prev, max_distance, i) do - ai = elem(a, i - 1) - # levenshtein_cols_with_min tracks the row minimum as it builds, avoiding - # a separate O(lb) pass to find the min after the row is complete. - {curr_reversed, row_min} = levenshtein_cols_with_min(b, lb, prev, ai, {[i], i}, 1) - curr = List.to_tuple(:lists.reverse(curr_reversed)) - - if row_min > max_distance do - max_distance + 1 - else - levenshtein_rows_bounded(a, b, la, lb, curr, max_distance, i + 1) - end - end - - defp levenshtein_cols_with_min(_b, lb, _prev, _ai, acc_and_min, j) when j > lb, do: acc_and_min - - defp levenshtein_cols_with_min(b, lb, prev, ai, {[last_val | _] = acc, min_val}, j) do - cost = if ai == elem(b, j - 1), do: 0, else: 1 - val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost)) - levenshtein_cols_with_min(b, lb, prev, ai, {[val | acc], min(min_val, val)}, j + 1) - end + defdelegate token_edit_distance(a, b), to: Distance @doc "Map an edit distance and min token count to a percentage bucket 0–8, or nil if > 50%." @spec percent_bucket(non_neg_integer(), non_neg_integer()) :: 0..8 | nil - def percent_bucket(_ed, 0), do: nil - def percent_bucket(0, _min_count), do: 0 + defdelegate percent_bucket(ed, min_count), to: Distance - def percent_bucket(ed, min_count) do - pct = ed / min_count - - @bucket_thresholds - |> Enum.find(fn {bucket, threshold} -> bucket > 0 and pct <= threshold end) - |> case do - {bucket, _} -> bucket - nil -> nil - end - end + # --------------------------------------------------------------------------- + # Public API — analysis entry points + # --------------------------------------------------------------------------- @doc """ Analyze a list of `{path, content}` pairs for near-duplicate blocks. @@ -192,6 +108,18 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do buckets end + @doc false + def label_blocks(blocks, path) do + Enum.map(blocks, fn block -> + label = if block.start_line, do: "#{path}:#{block.start_line}", else: path + %{block | label: label} + end) + end + + # --------------------------------------------------------------------------- + # Internal pair-finding pipeline + # --------------------------------------------------------------------------- + # Internal implementation returning {buckets, sub_block_count} so that # analyze_from_blocks gets both without a redundant NodeProtocol.children pass. defp do_find_pairs(blocks, opts) do @@ -203,21 +131,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do if length(blocks) < 2 do {%{}, 0} else - # Pre-compute canonical values and hashes once per block. Each decorated entry - # is {index, block, values, hash, len_values, children_count, newline_count, bigrams} - # so downstream functions never recompute them. - decorated = - blocks - |> Enum.with_index() - |> Enum.map(fn {block, i} -> - values = canonical_values(NodeProtocol.flat_tokens(block)) - children_count = length(NodeProtocol.children(block)) - newline_count = Enum.count(values, &(&1 == @nl_kind)) - bigrams = Enum.chunk_every(values, 2, 1, :discard) - - {i, block, values, :erlang.phash2(values), length(values), children_count, - newline_count, bigrams} - end) + decorated = Candidates.decorate(blocks) # sub_block_count derived from the already-computed children_count in decorated. sub_block_count = @@ -226,16 +140,16 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do # IDF: prune bigrams that appear in more than idf_max_freq fraction of blocks. # These are structural noise (e.g. "end nil", "return false") that inflate the # candidate set without helping identify true duplicates. - pruned = compute_frequent_bigrams(decorated, idf_max_freq) + pruned = Candidates.compute_frequent_bigrams(decorated, idf_max_freq) decorated = if MapSet.size(pruned) > 0 do - Enum.map(decorated, &prune_bigrams(&1, pruned)) + Enum.map(decorated, &Candidates.prune_bigrams(&1, pruned)) else decorated end - {exact_index, shingle_index} = build_indexes(decorated) + {exact_index, shingle_index} = Candidates.build_indexes(decorated) total = length(decorated) # Convert to tuple for O(1) indexed lookup inside the hot comparison loop. @@ -247,177 +161,15 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do raw_pairs = decorated |> Flow.from_enumerable(max_demand: 10, stages: workers) - |> Flow.flat_map(&find_pairs_for_block(&1, decorated_arr, exact_index, shingle_index)) + |> Flow.flat_map( + &Candidates.find_pairs_for_block(&1, decorated_arr, exact_index, shingle_index) + ) |> Enum.to_list() {bucket_pairs(raw_pairs, max_pairs), sub_block_count} end end - # Returns the set of bigram hashes that appear in more than max_freq fraction of blocks. - defp compute_frequent_bigrams(decorated, max_freq) do - total = length(decorated) - # Minimum threshold of 2 so a bigram must appear in 3+ blocks before being - # pruned — prevents over-pruning when the total block count is very small - # (e.g. with 2 blocks, any shared bigram would otherwise always be pruned). - threshold = max(2, round(total * max_freq)) - - decorated - |> Enum.reduce(%{}, fn {_, _, _, _, _, _, _, bigrams}, acc -> - bigrams - |> Enum.uniq_by(&:erlang.phash2/1) - |> Enum.reduce(acc, fn bigram, a -> - Map.update(a, :erlang.phash2(bigram), 1, &(&1 + 1)) - end) - end) - |> Enum.filter(fn {_, count} -> count > threshold end) - |> Enum.map(&elem(&1, 0)) - |> MapSet.new() - end - - # Strip leading/trailing and tokens and extract kind values as strings. - # This ensures blocks split at blank-line boundaries compare as equal - # even if trailing newlines differ between first and last blocks. - # Returns [String.t()] (kinds only) so hashing, comparison, and edit distance - # are independent of token line/col metadata. - # - # Optimised to 3 passes: one reduce (skip leading NL/WS + collect reversed kinds), - # one drop_while (strip trailing from the reversed list), one :lists.reverse. - defp canonical_values(tokens) do - # Pass 1: skip leading NL/WS while building a reversed kinds list. - {reversed, _in_content} = - Enum.reduce(tokens, {[], false}, fn t, {acc, in_content} -> - kind = t.kind - is_skip = kind == @nl_kind or kind == @ws_kind - - if in_content or not is_skip do - {[kind | acc], true} - else - {acc, false} - end - end) - - # Pass 2: drop trailing NL/WS (which appear as leading in the reversed list). - # Pass 3: reverse back to source order via native BIF. - reversed - |> Enum.drop_while(&(&1 == @nl_kind or &1 == @ws_kind)) - |> :lists.reverse() - end - - # Build both exact (hash → [idx]) and shingle (bigram_hash → [idx]) indexes in one pass, - # using the pre-computed values from the decorated list. - defp build_indexes(decorated) do - Enum.reduce(decorated, {%{}, %{}}, fn {idx, _block, _values, hash, _len, _children, _newlines, - bigrams}, - {exact_acc, shingle_acc} -> - exact_acc = Map.update(exact_acc, hash, [idx], &[idx | &1]) - - shingle_acc = - bigrams - |> Enum.reduce(shingle_acc, fn bigram, sh_acc -> - h = :erlang.phash2(bigram) - Map.update(sh_acc, h, [idx], &[idx | &1]) - end) - - {exact_acc, shingle_acc} - end) - end - - defp find_pairs_for_block( - {i, block_a, values_a, hash_a, len_a, children_a, newlines_a, bigrams_a}, - decorated_arr, - exact_index, - shingle_index - ) do - # For small exact-match lists (typically 0–3 entries) a plain list membership - # check avoids the overhead of constructing a MapSet. - exact_list = Map.get(exact_index, hash_a, []) - - # For d0 (exact), find hash-matching blocks and confirm with value equality - # to guard against phash2 collisions. - exact_pairs = - exact_list - |> Enum.filter(&(&1 > i)) - |> Enum.map(fn j -> - {_j, block_b, values_b, _hash_b, _len_b, children_b, newlines_b, _bigrams_b} = - elem(decorated_arr, j) - - if values_b == values_a and - structure_compatible?(children_a, newlines_a, children_b, newlines_b) do - {0, {block_a.label, block_b.label}} - else - nil - end - end) - |> Enum.reject(&is_nil/1) - - # For d1-d8 (near), use shingle index to find candidates. - min_shared = max(0, round(len_a * 0.5) - 1) - - near_pairs = - bigrams_a - |> Enum.reduce(%{}, fn bigram, acc -> - h = :erlang.phash2(bigram) - Map.get(shingle_index, h, []) |> Enum.reduce(acc, &count_candidate(&1, &2, i)) - end) - |> Enum.filter(fn {_, count} -> count >= min_shared end) - |> Enum.map(&elem(&1, 0)) - |> Enum.reject(fn j -> j in exact_list end) - |> Enum.flat_map(fn j -> - near_pair_for_candidate( - j, - decorated_arr, - block_a, - values_a, - len_a, - children_a, - newlines_a - ) - end) - - exact_pairs ++ near_pairs - end - - defp count_candidate(j, cnt, i) when j > i, do: Map.update(cnt, j, 1, &(&1 + 1)) - defp count_candidate(_j, cnt, _i), do: cnt - - defp near_pair_for_candidate(j, decorated_arr, block_a, values_a, len_a, children_a, newlines_a) do - {_j, block_b, values_b, _hash_b, len_b, children_b, newlines_b, _bigrams_b} = - elem(decorated_arr, j) - - min_count = min(len_a, len_b) - max_allowed = round(min_count * 0.5) - - if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and - abs(len_a - len_b) <= max_allowed do - ed = token_edit_distance_bounded(values_a, values_b, max_allowed) - - case percent_bucket(ed, min_count) do - nil -> [] - bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}] - # ed=0 handled by exact_pairs above - _ -> [] - end - else - [] - end - end - - defp prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned) do - {i, b, v, h, l, c, n, Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))} - end - - # Uses pre-computed children counts and newline counts from the decorated tuple - # so NodeProtocol.children/1 and Enum.count/2 are not called per candidate pair. - defp structure_compatible?(children_a, newlines_a, children_b, newlines_b) do - sub_diff = abs(children_a - children_b) - lines_a = newlines_a + 1 - lines_b = newlines_b + 1 - max_lines = max(lines_a, lines_b) - line_ratio = if max_lines > 0, do: abs(lines_a - lines_b) / max_lines, else: 0.0 - sub_diff <= 1 and line_ratio <= 0.30 - end - defp bucket_pairs(raw_pairs, max_pairs) do Enum.reduce(raw_pairs, %{}, fn {bucket, pair}, acc -> Map.update( @@ -438,14 +190,6 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocks do defp maybe_append(list, _pair, max, count) when is_integer(max) and count >= max, do: list defp maybe_append(list, pair, _max, _count), do: [pair | list] - @doc false - def label_blocks(blocks, path) do - Enum.map(blocks, fn block -> - label = if block.start_line, do: "#{path}:#{block.start_line}", else: path - %{block | label: label} - end) - end - defp format_pairs(pairs) do Enum.map(pairs, fn {label_a, label_b} -> %{"source_a" => label_a, "source_b" => label_b} diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex new file mode 100644 index 0000000..01fa53e --- /dev/null +++ b/lib/codeqa/metrics/file/near_duplicate_blocks/candidates.ex @@ -0,0 +1,214 @@ +defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Candidates do + @moduledoc """ + Block fingerprinting, indexing, and candidate-pair matching for near-duplicate detection. + + Handles: + - Canonical token-value extraction (stripping leading/trailing whitespace tokens) + - Exact-hash and shingle indexes for fast candidate lookup + - IDF-based bigram pruning to reduce structural-noise candidates + - Structural compatibility checks (child-count and line-ratio guards) + - Pair scoring and bucketing + """ + + alias CodeQA.AST.Classification.NodeProtocol + alias CodeQA.AST.Lexing.{NewlineToken, WhitespaceToken} + alias CodeQA.Metrics.File.NearDuplicateBlocks.Distance + + # Pre-compute token kind strings to avoid repeated function calls in the hot path. + @nl_kind NewlineToken.kind() + @ws_kind WhitespaceToken.kind() + + @doc """ + Decorate a list of blocks with pre-computed canonical values, hashes, bigrams, and + structural metadata. Each entry is an 8-tuple: + + {index, block, values, hash, len_values, children_count, newline_count, bigrams} + """ + @spec decorate([term()]) :: [tuple()] + def decorate(blocks) do + blocks + |> Enum.with_index() + |> Enum.map(fn {block, i} -> + values = canonical_values(NodeProtocol.flat_tokens(block)) + children_count = length(NodeProtocol.children(block)) + newline_count = Enum.count(values, &(&1 == @nl_kind)) + bigrams = Enum.chunk_every(values, 2, 1, :discard) + + {i, block, values, :erlang.phash2(values), length(values), children_count, newline_count, + bigrams} + end) + end + + @doc """ + Build both exact (hash → [idx]) and shingle (bigram_hash → [idx]) indexes in one pass, + using the pre-computed values from the decorated list. + """ + @spec build_indexes([tuple()]) :: {map(), map()} + def build_indexes(decorated) do + Enum.reduce(decorated, {%{}, %{}}, fn {idx, _block, _values, hash, _len, _children, _newlines, + bigrams}, + {exact_acc, shingle_acc} -> + exact_acc = Map.update(exact_acc, hash, [idx], &[idx | &1]) + + shingle_acc = + bigrams + |> Enum.reduce(shingle_acc, fn bigram, sh_acc -> + h = :erlang.phash2(bigram) + Map.update(sh_acc, h, [idx], &[idx | &1]) + end) + + {exact_acc, shingle_acc} + end) + end + + @doc """ + Returns the set of bigram hashes that appear in more than `max_freq` fraction of blocks. + + Minimum threshold of 2 so a bigram must appear in 3+ blocks before being pruned — + prevents over-pruning when the total block count is very small. + """ + @spec compute_frequent_bigrams([tuple()], float()) :: MapSet.t() + def compute_frequent_bigrams(decorated, max_freq) do + total = length(decorated) + threshold = max(2, round(total * max_freq)) + + decorated + |> Enum.reduce(%{}, fn {_, _, _, _, _, _, _, bigrams}, acc -> + bigrams + |> Enum.uniq_by(&:erlang.phash2/1) + |> Enum.reduce(acc, fn bigram, a -> + Map.update(a, :erlang.phash2(bigram), 1, &(&1 + 1)) + end) + end) + |> Enum.filter(fn {_, count} -> count > threshold end) + |> Enum.map(&elem(&1, 0)) + |> MapSet.new() + end + + @doc "Remove bigrams whose hash is in the pruned set from a decorated tuple." + @spec prune_bigrams(tuple(), MapSet.t()) :: tuple() + def prune_bigrams({i, b, v, h, l, c, n, bigrams}, pruned) do + {i, b, v, h, l, c, n, Enum.reject(bigrams, &MapSet.member?(pruned, :erlang.phash2(&1)))} + end + + @doc """ + Find all near-duplicate pairs for a single block against the full decorated array. + Returns a list of `{bucket, {label_a, label_b}}` pairs. + """ + @spec find_pairs_for_block(tuple(), tuple(), map(), map()) :: list() + def find_pairs_for_block( + {i, block_a, values_a, hash_a, len_a, children_a, newlines_a, bigrams_a}, + decorated_arr, + exact_index, + shingle_index + ) do + # For small exact-match lists (typically 0–3 entries) a plain list membership + # check avoids the overhead of constructing a MapSet. + exact_list = Map.get(exact_index, hash_a, []) + + # For d0 (exact), find hash-matching blocks and confirm with value equality + # to guard against phash2 collisions. + exact_pairs = + exact_list + |> Enum.filter(&(&1 > i)) + |> Enum.map(fn j -> + {_j, block_b, values_b, _hash_b, _len_b, children_b, newlines_b, _bigrams_b} = + elem(decorated_arr, j) + + if values_b == values_a and + structure_compatible?(children_a, newlines_a, children_b, newlines_b) do + {0, {block_a.label, block_b.label}} + else + nil + end + end) + |> Enum.reject(&is_nil/1) + + # For d1-d8 (near), use shingle index to find candidates. + min_shared = max(0, round(len_a * 0.5) - 1) + + near_pairs = + bigrams_a + |> Enum.reduce(%{}, fn bigram, acc -> + h = :erlang.phash2(bigram) + Map.get(shingle_index, h, []) |> Enum.reduce(acc, &count_candidate(&1, &2, i)) + end) + |> Enum.filter(fn {_, count} -> count >= min_shared end) + |> Enum.map(&elem(&1, 0)) + |> Enum.reject(fn j -> j in exact_list end) + |> Enum.flat_map(fn j -> + near_pair_for_candidate( + j, + decorated_arr, + block_a, + values_a, + len_a, + children_a, + newlines_a + ) + end) + + exact_pairs ++ near_pairs + end + + # --------------------------------------------------------------------------- + # Private helpers + # --------------------------------------------------------------------------- + + # Strip leading/trailing and tokens and extract kind values as strings. + # Optimised to 3 passes: one reduce (skip leading NL/WS + collect reversed kinds), + # one drop_while (strip trailing), one :lists.reverse. + defp canonical_values(tokens) do + {reversed, _in_content} = + Enum.reduce(tokens, {[], false}, fn t, {acc, in_content} -> + kind = t.kind + is_skip = kind == @nl_kind or kind == @ws_kind + + if in_content or not is_skip do + {[kind | acc], true} + else + {acc, false} + end + end) + + reversed + |> Enum.drop_while(&(&1 == @nl_kind or &1 == @ws_kind)) + |> :lists.reverse() + end + + defp count_candidate(j, cnt, i) when j > i, do: Map.update(cnt, j, 1, &(&1 + 1)) + defp count_candidate(_j, cnt, _i), do: cnt + + defp near_pair_for_candidate(j, decorated_arr, block_a, values_a, len_a, children_a, newlines_a) do + {_j, block_b, values_b, _hash_b, len_b, children_b, newlines_b, _bigrams_b} = + elem(decorated_arr, j) + + min_count = min(len_a, len_b) + max_allowed = round(min_count * 0.5) + + if structure_compatible?(children_a, newlines_a, children_b, newlines_b) and + abs(len_a - len_b) <= max_allowed do + ed = Distance.token_edit_distance_bounded(values_a, values_b, max_allowed) + + case Distance.percent_bucket(ed, min_count) do + nil -> [] + bucket when bucket > 0 -> [{bucket, {block_a.label, block_b.label}}] + # ed=0 handled by exact_pairs above + _ -> [] + end + else + [] + end + end + + # Uses pre-computed children counts and newline counts from the decorated tuple + # so NodeProtocol.children/1 and Enum.count/2 are not called per candidate pair. + defp structure_compatible?(children_a, newlines_a, children_b, newlines_b) do + sub_diff = abs(children_a - children_b) + lines_a = newlines_a + 1 + lines_b = newlines_b + 1 + max_lines = max(lines_a, lines_b) + line_ratio = if max_lines > 0, do: abs(lines_a - lines_b) / max_lines, else: 0.0 + sub_diff <= 1 and line_ratio <= 0.30 + end +end diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex new file mode 100644 index 0000000..475aa3e --- /dev/null +++ b/lib/codeqa/metrics/file/near_duplicate_blocks/distance.ex @@ -0,0 +1,114 @@ +defmodule CodeQA.Metrics.File.NearDuplicateBlocks.Distance do + @moduledoc """ + Token-level edit distance and percentage-bucket classification for near-duplicate detection. + + Provides standard Levenshtein distance, a bounded variant that short-circuits + when the distance already exceeds a threshold, and a bucket classifier that maps + an edit distance + minimum token count to a similarity bucket (d0–d8). + + Distance buckets: + d0 = exact (0%), d1 ≤ 5%, d2 ≤ 10%, d3 ≤ 15%, d4 ≤ 20%, + d5 ≤ 25%, d6 ≤ 30%, d7 ≤ 40%, d8 ≤ 50% + """ + + @bucket_thresholds [ + {0, 0.0}, + {1, 0.05}, + {2, 0.10}, + {3, 0.15}, + {4, 0.20}, + {5, 0.25}, + {6, 0.30}, + {7, 0.40}, + {8, 0.50} + ] + + @doc "Standard Levenshtein distance between two token lists." + @spec token_edit_distance([String.t()], [String.t()]) :: non_neg_integer() + def token_edit_distance([], b), do: length(b) + def token_edit_distance(a, []), do: length(a) + + def token_edit_distance(a, b) do + a_arr = List.to_tuple(a) + b_arr = List.to_tuple(b) + lb = tuple_size(b_arr) + init_row = List.to_tuple(Enum.to_list(0..lb)) + result_row = levenshtein_rows(a_arr, b_arr, tuple_size(a_arr), lb, init_row, 1) + elem(result_row, lb) + end + + defp levenshtein_rows(_a, _b, la, _lb, prev, i) when i > la, do: prev + + defp levenshtein_rows(a, b, la, lb, prev, i) do + ai = elem(a, i - 1) + curr_reversed = levenshtein_cols(b, lb, prev, ai, [i], 1) + curr = List.to_tuple(:lists.reverse(curr_reversed)) + levenshtein_rows(a, b, la, lb, curr, i + 1) + end + + defp levenshtein_cols(_b, lb, _prev, _ai, acc, j) when j > lb, do: acc + + defp levenshtein_cols(b, lb, prev, ai, [last_val | _] = acc, j) do + cost = if ai == elem(b, j - 1), do: 0, else: 1 + val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost)) + levenshtein_cols(b, lb, prev, ai, [val | acc], j + 1) + end + + # Bounded Levenshtein: returns the edit distance, or max_distance + 1 if the + # distance would exceed max_distance. Bails after each row when the row minimum + # already exceeds max_distance — the final distance can only grow from there. + @doc false + @spec token_edit_distance_bounded([String.t()], [String.t()], non_neg_integer()) :: + non_neg_integer() + def token_edit_distance_bounded([], b, _max), do: length(b) + def token_edit_distance_bounded(a, [], _max), do: length(a) + + def token_edit_distance_bounded(a, b, max_distance) do + a_arr = List.to_tuple(a) + b_arr = List.to_tuple(b) + lb = tuple_size(b_arr) + init_row = List.to_tuple(Enum.to_list(0..lb)) + levenshtein_rows_bounded(a_arr, b_arr, tuple_size(a_arr), lb, init_row, max_distance, 1) + end + + defp levenshtein_rows_bounded(_a, _b, la, lb, prev, _max, i) when i > la, do: elem(prev, lb) + + defp levenshtein_rows_bounded(a, b, la, lb, prev, max_distance, i) do + ai = elem(a, i - 1) + # levenshtein_cols_with_min tracks the row minimum as it builds, avoiding + # a separate O(lb) pass to find the min after the row is complete. + {curr_reversed, row_min} = levenshtein_cols_with_min(b, lb, prev, ai, {[i], i}, 1) + curr = List.to_tuple(:lists.reverse(curr_reversed)) + + if row_min > max_distance do + max_distance + 1 + else + levenshtein_rows_bounded(a, b, la, lb, curr, max_distance, i + 1) + end + end + + defp levenshtein_cols_with_min(_b, lb, _prev, _ai, acc_and_min, j) when j > lb, + do: acc_and_min + + defp levenshtein_cols_with_min(b, lb, prev, ai, {[last_val | _] = acc, min_val}, j) do + cost = if ai == elem(b, j - 1), do: 0, else: 1 + val = min(elem(prev, j) + 1, min(last_val + 1, elem(prev, j - 1) + cost)) + levenshtein_cols_with_min(b, lb, prev, ai, {[val | acc], min(min_val, val)}, j + 1) + end + + @doc "Map an edit distance and min token count to a percentage bucket 0–8, or nil if > 50%." + @spec percent_bucket(non_neg_integer(), non_neg_integer()) :: 0..8 | nil + def percent_bucket(_ed, 0), do: nil + def percent_bucket(0, _min_count), do: 0 + + def percent_bucket(ed, min_count) do + pct = ed / min_count + + @bucket_thresholds + |> Enum.find(fn {bucket, threshold} -> bucket > 0 and pct <= threshold end) + |> case do + {bucket, _} -> bucket + nil -> nil + end + end +end diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index f1c73c5..2201f7d 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,5 +1,6 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." + _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -101,6 +102,7 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." + _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -211,6 +213,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." + _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -321,6 +324,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." + _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -435,6 +439,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." + _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 1be9c6b..e7bc6fa 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,5 +1,6 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." + _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -62,6 +63,7 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." + _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -123,6 +125,7 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." + _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -202,6 +205,7 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." + _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -320,6 +324,7 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." + _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -405,6 +410,7 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." + _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -491,6 +497,7 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." + _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -607,6 +614,7 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." + _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -685,6 +693,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -782,6 +791,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." + _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -902,6 +912,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -989,6 +1000,7 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." + _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1059,6 +1071,7 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." + _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1134,6 +1147,7 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." + _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 97cb5d0b654caf19604deecb1ccadf7e1158aa08 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 21 Mar 2026 14:15:00 +0000 Subject: [PATCH 46/71] chore(combined-metrics): sync language coverage and scalar vectors [skip ci] --- priv/combined_metrics/code_smells.yml | 5 ----- priv/combined_metrics/variable_naming.yml | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index 2201f7d..f1c73c5 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,5 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -102,7 +101,6 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -213,7 +211,6 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -324,7 +321,6 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -439,7 +435,6 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index e7bc6fa..1be9c6b 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,5 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -63,7 +62,6 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -125,7 +123,6 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -205,7 +202,6 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -324,7 +320,6 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -410,7 +405,6 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -497,7 +491,6 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -614,7 +607,6 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -693,7 +685,6 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -791,7 +782,6 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -912,7 +902,6 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -1000,7 +989,6 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1071,7 +1059,6 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1147,7 +1134,6 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 579f1919806f4165ee390ae3f13ceb8e9a06a7e5 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sat, 21 Mar 2026 15:21:18 +0100 Subject: [PATCH 47/71] chore(ci): upgrade checkout to v6, add caching, remove dead blocks command - Upgrade all workflows and README examples to actions/checkout@v6 - Add deps/_build caching to release, compare, health-report workflows - Fix dialyzer PLT cache path (configure priv/plts in mix.exs) so PLTs actually cache instead of rebuilding every run - Fix cache keys across workflows: include MIX_ENV, OTP/Elixir versions, and mix.exs in hash to prevent stale cache hits after version bumps - Skip mix deps.get on cache hit in test.yml - Delete blocks.yml workflow and remove all references to the blocks command from action.yml and run.sh (command was never implemented in the CLI) Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/blocks.yml | 18 --------------- .github/workflows/bootstrap-labels.yml | 2 +- .github/workflows/compare.yml | 11 ++++++++- .github/workflows/dialyzer.yml | 6 ++--- .github/workflows/health-report.yml | 12 +++++++++- .github/workflows/release.yml | 24 +++++++++++++++++--- .github/workflows/sync-behavior-coverage.yml | 6 ++--- .github/workflows/test.yml | 9 +++++--- .github/workflows/validate-issue-links.yml | 2 +- README.md | 4 ++-- action.yml | 2 +- mix.exs | 6 ++++- scripts/run.sh | 6 +---- 13 files changed, 65 insertions(+), 43 deletions(-) delete mode 100644 .github/workflows/blocks.yml diff --git a/.github/workflows/blocks.yml b/.github/workflows/blocks.yml deleted file mode 100644 index 284adec..0000000 --- a/.github/workflows/blocks.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Extract Code Blocks - -on: - pull_request: - branches: [main] - -permissions: - contents: read - -jobs: - blocks: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: ./ - with: - command: blocks - build: source diff --git a/.github/workflows/bootstrap-labels.yml b/.github/workflows/bootstrap-labels.yml index a865335..52c644a 100644 --- a/.github/workflows/bootstrap-labels.yml +++ b/.github/workflows/bootstrap-labels.yml @@ -10,7 +10,7 @@ jobs: bootstrap: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Bootstrap labels uses: actions/github-script@v7 diff --git a/.github/workflows/compare.yml b/.github/workflows/compare.yml index fa13ef0..5a672ad 100644 --- a/.github/workflows/compare.yml +++ b/.github/workflows/compare.yml @@ -12,10 +12,19 @@ jobs: compare: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 + - name: Cache Mix deps and build + uses: actions/cache@v4 + with: + path: | + deps + _build + key: ${{ runner.os }}-mix-1.19-27.3-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: ${{ runner.os }}-mix-1.19-27.3- + - name: Get fork point id: fork-point run: | diff --git a/.github/workflows/dialyzer.yml b/.github/workflows/dialyzer.yml index 0674398..dfaca60 100644 --- a/.github/workflows/dialyzer.yml +++ b/.github/workflows/dialyzer.yml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout PR - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Elixir uses: erlef/setup-beam@v1 @@ -45,9 +45,9 @@ jobs: uses: actions/cache@v4 with: path: _build - key: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }} + key: build-${{ env.MIX_ENV }}-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock') }} restore-keys: | - build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}- + build-${{ env.MIX_ENV }}-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}- - name: Cache PLT uses: actions/cache@v4 diff --git a/.github/workflows/health-report.yml b/.github/workflows/health-report.yml index 55e38f0..2e0b896 100644 --- a/.github/workflows/health-report.yml +++ b/.github/workflows/health-report.yml @@ -12,7 +12,17 @@ jobs: health-report: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 + + - name: Cache Mix deps and build + uses: actions/cache@v4 + with: + path: | + deps + _build + key: ${{ runner.os }}-mix-1.19-27.3-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: ${{ runner.os }}-mix-1.19-27.3- + - uses: ./ with: command: health-report diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d1ebf66..13300bd 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,15 +17,33 @@ jobs: build: runs-on: ubuntu-latest + env: + ELIXIR_VERSION: "1.19" + OTP_VERSION: "27.3" + steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Elixir uses: erlef/setup-beam@v1 with: - otp-version: "27.3" - elixir-version: "1.19" + otp-version: ${{ env.OTP_VERSION }} + elixir-version: ${{ env.ELIXIR_VERSION }} + + - name: Cache deps + uses: actions/cache@v4 + with: + path: deps + key: deps-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: deps-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}- + + - name: Cache build + uses: actions/cache@v4 + with: + path: _build + key: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: build-${{ env.ELIXIR_VERSION }}-${{ env.OTP_VERSION }}- - name: Build escript run: | diff --git a/.github/workflows/sync-behavior-coverage.yml b/.github/workflows/sync-behavior-coverage.yml index 8529dcc..dc1cc4f 100644 --- a/.github/workflows/sync-behavior-coverage.yml +++ b/.github/workflows/sync-behavior-coverage.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Checkout PR branch - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: ref: ${{ github.head_ref }} token: ${{ secrets.GITHUB_TOKEN }} @@ -31,8 +31,8 @@ jobs: path: | deps _build - key: ${{ runner.os }}-mix-${{ hashFiles('mix.lock') }} - restore-keys: ${{ runner.os }}-mix- + key: ${{ runner.os }}-mix-dev-otp27.3-elixir1.19-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: ${{ runner.os }}-mix-dev-otp27.3-elixir1.19- - name: Install dependencies run: mix deps.get diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d560a17..04ba9b3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,24 +10,27 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up Elixir + id: beam uses: erlef/setup-beam@v1 with: otp-version: "27.3" elixir-version: "1.19" - name: Cache deps + id: mix-cache uses: actions/cache@v4 with: path: | deps _build - key: ${{ runner.os }}-mix-${{ hashFiles('mix.lock') }} - restore-keys: ${{ runner.os }}-mix- + key: ${{ runner.os }}-mix-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}-${{ hashFiles('mix.lock', 'mix.exs') }} + restore-keys: ${{ runner.os }}-mix-${{ steps.beam.outputs.otp-version }}-${{ steps.beam.outputs.elixir-version }}- - name: Install dependencies + if: steps.mix-cache.outputs.cache-hit != 'true' run: mix deps.get - name: Compile diff --git a/.github/workflows/validate-issue-links.yml b/.github/workflows/validate-issue-links.yml index e366437..5960df4 100644 --- a/.github/workflows/validate-issue-links.yml +++ b/.github/workflows/validate-issue-links.yml @@ -12,7 +12,7 @@ jobs: validate: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Validate issue template links run: | diff --git a/README.md b/README.md index 071789b..1eab2d4 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ jobs: health-report: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: num42/codeqa-action@v1 with: command: health-report @@ -95,7 +95,7 @@ jobs: compare: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Get fork point diff --git a/action.yml b/action.yml index f680469..6be6078 100644 --- a/action.yml +++ b/action.yml @@ -8,7 +8,7 @@ branding: inputs: command: - description: "Command to run: health-report, compare, analyze, or blocks" + description: "Command to run: health-report, compare, or analyze" required: true path: description: "Directory to analyze" diff --git a/mix.exs b/mix.exs index 3c06031..cb2f413 100644 --- a/mix.exs +++ b/mix.exs @@ -12,7 +12,11 @@ defmodule CodeQA.MixProject do elixirc_paths: elixirc_paths(Mix.env()), preferred_envs: [precommit: :test], aliases: aliases(), - dialyzer: [ignore_warnings: ".dialyzer_ignore.exs"], + dialyzer: [ + ignore_warnings: ".dialyzer_ignore.exs", + plt_local_path: "priv/plts", + plt_core_path: "priv/plts" + ], consolidate_protocols: Mix.env() != :test ] end diff --git a/scripts/run.sh b/scripts/run.sh index cac8046..9804205 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -38,9 +38,8 @@ case "$INPUT_COMMAND" in fi ;; analyze) OUTPUT_FILE="${OUTPUT_FILE}.json" ;; - blocks) OUTPUT_FILE="${OUTPUT_FILE}.json" ;; *) - echo "::error::Unknown command: $INPUT_COMMAND. Must be health-report, compare, analyze, or blocks." + echo "::error::Unknown command: $INPUT_COMMAND. Must be health-report, compare, or analyze." exit 1 ;; esac @@ -83,9 +82,6 @@ case "$INPUT_COMMAND" in analyze) ARGS+=("--output" "$OUTPUT_FILE") ;; - blocks) - ARGS+=("--output" "$OUTPUT_FILE") - ;; esac # Parse ignore-paths YAML list into --ignore-paths flag From 0787134f12b099ab3acd92c300acefac81d79e2a Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:07 +0100 Subject: [PATCH 48/71] fix(near-duplicate-blocks): return zeroed map when blocks is nil LOO analysis skips structural tokenization, producing a nil blocks field. Add a dedicated clause so NearDuplicateBlocksFile.analyze/1 returns a zeroed map instead of crashing on a missing pattern match. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/metrics/file/near_duplicate_blocks_file.ex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex index 86f71e3..7a15e74 100644 --- a/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex +++ b/lib/codeqa/metrics/file/near_duplicate_blocks_file.ex @@ -20,6 +20,8 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFile do end @impl true + def analyze(%{blocks: nil}), do: Map.new(keys(), fn k -> {k, 0} end) + def analyze(%{path: path, blocks: blocks}) when is_list(blocks) do NearDuplicateBlocks.analyze_from_blocks( NearDuplicateBlocks.label_blocks(blocks, path || "unknown"), From 20d1773c4e197262e277ee7d67a5990fb1fef4b8 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:12 +0100 Subject: [PATCH 49/71] chore(combined-metrics): annotate language coverage for all behaviors Adds _languages constraints to behaviors that are language-specific, enabling the analyzer to skip irrelevant behaviors when only a subset of languages is present in the project. Co-Authored-By: Claude Sonnet 4.6 --- priv/combined_metrics/code_smells.yml | 5 +++++ priv/combined_metrics/variable_naming.yml | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index f1c73c5..2201f7d 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,5 +1,6 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." + _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -101,6 +102,7 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." + _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -211,6 +213,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." + _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -321,6 +324,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." + _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -435,6 +439,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." + _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 1be9c6b..e7bc6fa 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,5 +1,6 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." + _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -62,6 +63,7 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." + _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -123,6 +125,7 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." + _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -202,6 +205,7 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." + _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -320,6 +324,7 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." + _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -405,6 +410,7 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." + _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -491,6 +497,7 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." + _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -607,6 +614,7 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." + _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -685,6 +693,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -782,6 +791,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." + _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -902,6 +912,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -989,6 +1000,7 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." + _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1059,6 +1071,7 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." + _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1134,6 +1147,7 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." + _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From e68cbd3c58ab3d5ea545ed94bdc2b94f6754b523 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:21 +0100 Subject: [PATCH 50/71] perf(block-impact): precompute log metrics and cache behaviors for cosine scoring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three complementary optimizations that cut repeated work per LOO iteration: 1. SampleRunner.diagnose_aggregate/2 now precomputes a log-metrics map once per call, so CosineVector inner loops do O(1) lookups instead of recomputing :math.log for every (behavior, metric) pair. 2. BlockImpactAnalyzer.analyze/3 accepts a behavior_config_pid and fetches all behaviors once at the start of each run, filtering by project languages before passing the cached map down through RefactoringPotentials and SampleRunner — eliminating repeated YAML reads per node. 3. BlockImpactAnalyzer computes an incremental aggregate structure up front and applies O(behaviors) swap operations per node instead of re-aggregating the full files map from scratch each time. Telemetry events are emitted at the codebase, file, and node levels to surface bottlenecks without requiring profiling runs. Co-Authored-By: Claude Sonnet 4.6 --- .../block_impact/refactoring_potentials.ex | 22 +- lib/codeqa/block_impact_analyzer.ex | 325 +++++++++++++++--- lib/codeqa/combined_metrics/cosine_vector.ex | 31 +- lib/codeqa/combined_metrics/sample_runner.ex | 92 +++-- 4 files changed, 381 insertions(+), 89 deletions(-) diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex index 092d97b..f2417ef 100644 --- a/lib/codeqa/block_impact/refactoring_potentials.ex +++ b/lib/codeqa/block_impact/refactoring_potentials.ex @@ -42,11 +42,13 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do top_n = Keyword.get(opts, :top, 3) language = Keyword.get(opts, :language) languages = Keyword.get(opts, :languages) + behavior_map = Keyword.get(opts, :behavior_map) - file_delta = compute_file_delta(baseline_file_cosines, without_file_metrics, language) + file_delta = + compute_file_delta(baseline_file_cosines, without_file_metrics, language, behavior_map) codebase_delta = - compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages) + compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages, behavior_map) all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta)) @@ -68,18 +70,26 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do end) end - defp compute_file_delta(baseline_cosines, without_metrics, language) do + defp compute_file_delta(baseline_cosines, without_metrics, language, behavior_map) do without_agg = FileScorer.file_to_aggregate(without_metrics) without_cosines = - SampleRunner.diagnose_aggregate(without_agg, top: 99_999, language: language) + SampleRunner.diagnose_aggregate(without_agg, + top: 99_999, + language: language, + behavior_map: behavior_map + ) cosines_to_delta(baseline_cosines, without_cosines) end - defp compute_codebase_delta(baseline_cosines, without_agg, languages) do + defp compute_codebase_delta(baseline_cosines, without_agg, languages, behavior_map) do without_cosines = - SampleRunner.diagnose_aggregate(without_agg, top: 99_999, languages: languages) + SampleRunner.diagnose_aggregate(without_agg, + top: 99_999, + languages: languages, + behavior_map: behavior_map + ) cosines_to_delta(baseline_cosines, without_cosines) end diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index de00e02..c317617 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -8,8 +8,29 @@ defmodule CodeQA.BlockImpactAnalyzer do The pipeline result is returned with a `"nodes"` key added to each file entry. All other keys in the result are preserved unchanged. + + ## Telemetry + + Emits the following events (all durations in microseconds): + + - `[:codeqa, :block_impact, :analyze]` — full run + measurements: `%{duration: us}` + metadata: `%{file_count: n}` + + - `[:codeqa, :block_impact, :codebase_cosines]` — codebase baseline cosine computation + measurements: `%{duration: us}` + metadata: `%{behavior_count: n}` + + - `[:codeqa, :block_impact, :file]` — per-file node computation + measurements: `%{duration: us, tokenize_us: us, parse_us: us, file_cosines_us: us, node_count: n}` + metadata: `%{path: string}` + + - `[:codeqa, :block_impact, :node]` — per-node leave-one-out computation + measurements: `%{duration: us, reconstruct_us: us, analyze_file_us: us, aggregate_us: us, refactoring_us: us}` + metadata: `%{path: string, token_count: n}` """ + alias CodeQA.Analysis.BehaviorConfigServer alias CodeQA.AST.Enrichment.Node alias CodeQA.AST.Lexing.TokenNormalizer alias CodeQA.AST.Parsing.Parser @@ -34,20 +55,49 @@ defmodule CodeQA.BlockImpactAnalyzer do - `:nodes_top` — number of refactoring potentials per node (default 3) - `:workers` — parallelism for `Task.async_stream` (default `System.schedulers_online()`) + - `:baseline_codebase_agg` — pre-computed codebase aggregate (skips redundant analysis) """ @spec analyze(map(), map(), keyword()) :: map() def analyze(pipeline_result, files_map, opts \\ []) do nodes_top = Keyword.get(opts, :nodes_top, 3) workers = Keyword.get(opts, :workers, System.schedulers_online()) - baseline_codebase_agg = Analyzer.analyze_codebase_aggregate(files_map) + t0 = now() + + baseline_codebase_agg = + Keyword.get_lazy(opts, :baseline_codebase_agg, fn -> + Analyzer.analyze_codebase_aggregate(files_map) + end) + + cached_behaviors = + case Keyword.get(opts, :behavior_config_pid) do + nil -> nil + pid -> BehaviorConfigServer.get_all_behaviors(pid) + end + project_langs = project_languages(files_map) - baseline_codebase_cosines = - SampleRunner.diagnose_aggregate(baseline_codebase_agg, - top: 99_999, - languages: project_langs - ) + filtered_behaviors = + if cached_behaviors && project_langs != [] do + filter_behaviors_by_languages(cached_behaviors, project_langs) + else + cached_behaviors + end + + {baseline_codebase_cosines, cosines_us} = + timed(fn -> + SampleRunner.diagnose_aggregate(baseline_codebase_agg, + top: 99_999, + languages: project_langs, + behavior_map: filtered_behaviors + ) + end) + + :telemetry.execute( + [:codeqa, :block_impact, :codebase_cosines], + %{duration: cosines_us}, + %{behavior_count: length(baseline_codebase_cosines)} + ) file_results = pipeline_result["files"] @@ -58,16 +108,23 @@ defmodule CodeQA.BlockImpactAnalyzer do content = Map.get(files_map, path, "") baseline_file_metrics = Map.get(file_data, "metrics", %{}) - nodes = - compute_nodes( + {nodes, file_measurements} = + compute_nodes_timed( path, content, baseline_file_metrics, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + filtered_behaviors ) + :telemetry.execute( + [:codeqa, :block_impact, :file], + file_measurements, + %{path: path} + ) + {path, Map.put(file_data, "nodes", nodes)} end, max_concurrency: workers, @@ -76,43 +133,80 @@ defmodule CodeQA.BlockImpactAnalyzer do ) |> Enum.reduce(%{}, fn {:ok, {path, data}}, acc -> Map.put(acc, path, data) end) + :telemetry.execute( + [:codeqa, :block_impact, :analyze], + %{duration: now() - t0}, + %{file_count: map_size(file_results)} + ) + Map.put(pipeline_result, "files", updated_files) end - defp compute_nodes( + defp compute_nodes_timed( path, content, baseline_file_metrics, file_results, baseline_codebase_cosines, - nodes_top + nodes_top, + cached_behaviors ) do if content == "" do - [] + {[], %{duration: 0, tokenize_us: 0, parse_us: 0, file_cosines_us: 0, node_count: 0}} else - root_tokens = TokenNormalizer.normalize_structural(content) - top_level_nodes = Parser.detect_blocks(root_tokens, Unknown) + t0 = now() + + {root_tokens, tokenize_us} = timed(fn -> TokenNormalizer.normalize_structural(content) end) + {top_level_nodes, parse_us} = timed(fn -> Parser.detect_blocks(root_tokens, Unknown) end) baseline_file_agg = FileScorer.file_to_aggregate(baseline_file_metrics) language = CodeQA.Language.detect(path).name() - baseline_file_cosines = - SampleRunner.diagnose_aggregate(baseline_file_agg, top: 99_999, language: language) + {baseline_file_cosines, file_cosines_us} = + timed(fn -> + SampleRunner.diagnose_aggregate(baseline_file_agg, + top: 99_999, + language: language, + behavior_map: cached_behaviors + ) + end) - top_level_nodes - |> Enum.map(fn node -> - serialize_node( - node, - path, - root_tokens, - baseline_file_cosines, - file_results, - baseline_codebase_cosines, - nodes_top, - language - ) - end) - |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) + inc_agg = build_incremental_agg(file_results) + old_file_triples = file_metrics_to_triples(baseline_file_metrics) + project_langs = project_languages(file_results) + + node_ctx = %{ + inc_agg: inc_agg, + old_file_triples: old_file_triples, + project_langs: project_langs, + cached_behaviors: cached_behaviors + } + + nodes = + top_level_nodes + |> Enum.map(fn node -> + serialize_node( + node, + path, + root_tokens, + baseline_file_cosines, + baseline_codebase_cosines, + nodes_top, + language, + node_ctx + ) + end) + |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) + + measurements = %{ + duration: now() - t0, + tokenize_us: tokenize_us, + parse_us: parse_us, + file_cosines_us: file_cosines_us, + node_count: length(top_level_nodes) + } + + {nodes, measurements} end end @@ -121,24 +215,24 @@ defmodule CodeQA.BlockImpactAnalyzer do path, root_tokens, baseline_file_cosines, - file_results, baseline_codebase_cosines, nodes_top, - language + language, + node_ctx ) do potentials = if length(node.tokens) < @min_tokens do [] else - compute_potentials( + compute_potentials_timed( node, path, root_tokens, baseline_file_cosines, - file_results, baseline_codebase_cosines, nodes_top, - language + language, + node_ctx ) end @@ -150,10 +244,10 @@ defmodule CodeQA.BlockImpactAnalyzer do path, root_tokens, baseline_file_cosines, - file_results, baseline_codebase_cosines, nodes_top, - language + language, + node_ctx ) end) |> Enum.sort_by(fn n -> {n["start_line"], n["column_start"]} end) @@ -173,35 +267,144 @@ defmodule CodeQA.BlockImpactAnalyzer do } end - defp compute_potentials( + defp compute_potentials_timed( %Node{} = node, path, root_tokens, baseline_file_cosines, - file_results, baseline_codebase_cosines, nodes_top, - language + language, + node_ctx ) do - reconstructed = FileImpact.reconstruct_without(root_tokens, node) - without_file_metrics = Analyzer.analyze_file(path, reconstructed) + t0 = now() - without_codebase_agg = - file_results - |> Map.put(path, %{"metrics" => without_file_metrics}) - |> Analyzer.aggregate_file_metrics() - - project_langs = project_languages(file_results) - - RefactoringPotentials.compute( - baseline_file_cosines, - without_file_metrics, - baseline_codebase_cosines, - without_codebase_agg, - top: nodes_top, - language: language, - languages: project_langs + {reconstructed, reconstruct_us} = + timed(fn -> FileImpact.reconstruct_without(root_tokens, node) end) + + {without_file_metrics, analyze_file_us} = + timed(fn -> Analyzer.analyze_file_for_loo(path, reconstructed) end) + + {without_codebase_agg, aggregate_us} = + timed(fn -> + new_triples = file_metrics_to_triples(without_file_metrics) + + node_ctx.inc_agg + |> swap_file_in_agg(node_ctx.old_file_triples, new_triples) + |> incremental_agg_to_aggregate() + end) + + {potentials, refactoring_us} = + timed(fn -> + RefactoringPotentials.compute( + baseline_file_cosines, + without_file_metrics, + baseline_codebase_cosines, + without_codebase_agg, + top: nodes_top, + language: language, + languages: node_ctx.project_langs, + behavior_map: node_ctx.cached_behaviors + ) + end) + + :telemetry.execute( + [:codeqa, :block_impact, :node], + %{ + duration: now() - t0, + reconstruct_us: reconstruct_us, + analyze_file_us: analyze_file_us, + aggregate_us: aggregate_us, + refactoring_us: refactoring_us + }, + %{path: path, token_count: length(node.tokens)} ) + + potentials + end + + defp file_metrics_to_triples(metrics) when is_map(metrics) do + metrics + |> Enum.flat_map(fn {metric_name, metric_data} when is_map(metric_data) -> + metric_data + |> Enum.filter(fn {_k, v} -> is_number(v) end) + |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end) + + _ -> [] + end) + end + + defp build_incremental_agg(file_results) do + file_results + |> Map.values() + |> Enum.flat_map(fn file_data -> + file_data |> Map.get("metrics", %{}) |> file_metrics_to_triples() + end) + |> Enum.group_by(fn {metric, key, _val} -> {metric, key} end, fn {_, _, val} -> val end) + |> Map.new(fn {{metric, key}, values} -> + n = length(values) + sum = Enum.sum(values) + sum_sq = Enum.reduce(values, 0.0, fn v, acc -> acc + v * v end) + {{metric, key}, %{sum: sum, sum_sq: sum_sq, min: Enum.min(values), max: Enum.max(values), count: n}} + end) + end + + defp swap_file_in_agg(inc_agg, old_triples, new_triples) do + old_map = Map.new(old_triples, fn {metric, key, val} -> {{metric, key}, val} end) + new_map = Map.new(new_triples, fn {metric, key, val} -> {{metric, key}, val} end) + all_keys = Enum.uniq(Map.keys(old_map) ++ Map.keys(new_map)) + + Enum.reduce(all_keys, inc_agg, fn mk, acc -> + case Map.get(acc, mk) do + nil -> + acc + + state -> + old_val = Map.get(old_map, mk, 0.0) + new_val = Map.get(new_map, mk, 0.0) + + Map.put(acc, mk, %{ + sum: state.sum - old_val + new_val, + sum_sq: state.sum_sq - old_val * old_val + new_val * new_val, + min: min(state.min, new_val), + max: max(state.max, new_val), + count: state.count + }) + end + end) + end + + defp incremental_agg_to_aggregate(inc_agg) do + Enum.reduce(inc_agg, %{}, fn {{metric, key}, state}, acc -> + n = state.count + mean = if n > 0, do: state.sum / n, else: 0.0 + variance = if n > 0, do: max(state.sum_sq / n - mean * mean, 0.0), else: 0.0 + std = :math.sqrt(variance) + + metric_agg = Map.get(acc, metric, %{}) + + updated = + Map.merge(metric_agg, %{ + "mean_#{key}" => Float.round(mean * 1.0, 4), + "std_#{key}" => Float.round(std * 1.0, 4), + "min_#{key}" => Float.round(state.min * 1.0, 4), + "max_#{key}" => Float.round(state.max * 1.0, 4) + }) + + Map.put(acc, metric, updated) + end) + end + + defp filter_behaviors_by_languages(behaviors_map, project_langs) do + Map.new(behaviors_map, fn {category, behaviors} -> + filtered = + Enum.filter(behaviors, fn {_behavior, behavior_data} -> + behavior_langs = Map.get(behavior_data, "_languages", []) + behavior_langs == [] or Enum.any?(behavior_langs, &(&1 in project_langs)) + end) + + {category, filtered} + end) end defp project_languages(path_keyed_map) do @@ -211,4 +414,12 @@ defmodule CodeQA.BlockImpactAnalyzer do |> Enum.reject(&(&1 == "unknown")) |> Enum.uniq() end + + defp timed(fun) do + t = now() + result = fun.() + {result, now() - t} + end + + defp now, do: System.monotonic_time(:microsecond) end diff --git a/lib/codeqa/combined_metrics/cosine_vector.ex b/lib/codeqa/combined_metrics/cosine_vector.ex index 158b24e..36bbe23 100644 --- a/lib/codeqa/combined_metrics/cosine_vector.ex +++ b/lib/codeqa/combined_metrics/cosine_vector.ex @@ -13,27 +13,35 @@ defmodule CodeQA.CombinedMetrics.CosineVector do Returns a one-element list `[result_map]` on success or `[]` when the behavior has no non-zero scalars (no sample data) and should be excluded. + + ## Options + + * `:log_metrics` - precomputed log-metric map `%{group => %{key => log_val}}`. + When present, values are looked up directly instead of being recomputed via + `:math.log/1`. Falls back to inline computation when absent or when a key is + not found in the map. """ - @spec compute(String.t(), String.t(), map(), map(), String.t()) :: [map()] - def compute(yaml_path, behavior, behavior_data, aggregate, category) do + @spec compute(String.t(), String.t(), map(), map(), String.t(), keyword()) :: [map()] + def compute(yaml_path, behavior, behavior_data, aggregate, category, opts \\ []) do scalars = Scorer.scalars_for(yaml_path, behavior) if map_size(scalars) == 0 do [] else - build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) + build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars, opts) end end # --- Internal helpers --- - defp build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars) do + defp build_result(yaml_path, behavior, behavior_data, aggregate, category, scalars, opts) do log_baseline = Map.get(behavior_data, "_log_baseline", 0.0) / 1.0 + log_metrics = Keyword.get(opts, :log_metrics) {dot, norm_s_sq, norm_v_sq, contributions} = Enum.reduce(scalars, {0.0, 0.0, 0.0, []}, fn {{group, key}, scalar}, {d, ns, nv, contribs} -> - log_m = :math.log(Scorer.get(aggregate, group, key)) + log_m = lookup_log_metric(log_metrics, aggregate, group, key) contrib = scalar * log_m {d + contrib, ns + scalar * scalar, nv + log_m * log_m, @@ -66,4 +74,17 @@ defmodule CodeQA.CombinedMetrics.CosineVector do } ] end + + # Returns a precomputed log value when available, otherwise computes inline. + # Both paths apply the same max(val, 1.0e-300) floor guard to ensure identical + # results regardless of whether log_metrics was precomputed or not. + defp lookup_log_metric(nil, aggregate, group, key), + do: :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300)) + + defp lookup_log_metric(log_metrics, aggregate, group, key) do + case get_in(log_metrics, [group, key]) do + nil -> :math.log(max(Scorer.get(aggregate, group, key) / 1.0, 1.0e-300)) + log_val -> log_val + end + end end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 3cd6533..6c5b6c9 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -179,26 +179,55 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do top_n = Keyword.get(opts, :top, 15) language = Keyword.get(opts, :language) languages = Keyword.get(opts, :languages) + behavior_map = Keyword.get(opts, :behavior_map) + + log_metrics = precompute_log_metrics(aggregate) + cosine_opts = [log_metrics: log_metrics] + + behaviors_stream = + if behavior_map do + behavior_map + |> Enum.sort_by(fn {category, _} -> category end) + |> Enum.flat_map(fn {category, behaviors} -> + Enum.flat_map(behaviors, fn {behavior, behavior_data} -> + yaml_path = "priv/combined_metrics/#{category}.yml" + + maybe_diagnose_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages, + cosine_opts + ) + end) + end) + else + Scorer.all_yamls() + |> Enum.sort_by(fn {path, _} -> path end) + |> Enum.flat_map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.flat_map(fn {behavior, behavior_data} -> + maybe_diagnose_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages, + cosine_opts + ) + end) + end) + end - Scorer.all_yamls() - |> Enum.sort_by(fn {path, _} -> path end) - |> Enum.flat_map(fn {yaml_path, data} -> - category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - - data - |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Enum.flat_map(fn {behavior, behavior_data} -> - maybe_diagnose_behavior( - yaml_path, - behavior, - behavior_data, - aggregate, - category, - language, - languages - ) - end) - end) + behaviors_stream |> Enum.sort_by(& &1.cosine) |> Enum.take(top_n) end @@ -355,6 +384,26 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do # Cosine diagnosis (delegates vector math to CosineVector) # --------------------------------------------------------------------------- + # Builds a nested map of precomputed log values for all numeric entries in the + # aggregate: %{group => %{key => :math.log(max(val, 1.0e-300))}}. + # Called once per diagnose_aggregate/2 invocation so the inner reduce in + # CosineVector.build_result can do O(1) lookups instead of recomputing log + # for every (behavior, metric) pair. + defp precompute_log_metrics(aggregate) do + aggregate + |> Enum.filter(fn {_group, sub_map} -> is_map(sub_map) end) + |> Map.new(fn {group, sub_map} -> + log_sub = + sub_map + |> Enum.filter(fn {_key, val} -> is_number(val) end) + |> Map.new(fn {key, val} -> + {key, :math.log(max(val / 1.0, 1.0e-300))} + end) + + {group, log_sub} + end) + end + defp maybe_diagnose_behavior( yaml_path, behavior, @@ -362,12 +411,13 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do aggregate, category, language, - languages + languages, + cosine_opts \\ [] ) do behavior_langs = Map.get(behavior_data, "_languages", []) if behavior_language_applies?(behavior_langs, language, languages) do - CosineVector.compute(yaml_path, behavior, behavior_data, aggregate, category) + CosineVector.compute(yaml_path, behavior, behavior_data, aggregate, category, cosine_opts) else [] end From 70302cf2b29be7891ae384034ec9ccac259975d5 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:27 +0100 Subject: [PATCH 51/71] refactor(health-report): pre-compute cosines outside Grader Grader.grade_cosine_categories/3 now accepts a pre-grouped cosines_by_category map instead of calling diagnose_aggregate internally. HealthReport.generate/2 computes and groups the cosines once, eliminating a duplicate diagnose_aggregate call that existed for top_issues downstream. Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/health_report.ex | 19 ++++++++------ lib/codeqa/health_report/grader.ex | 24 ++++++++---------- lib/codeqa/health_report/top_blocks.ex | 35 +++++++++++++------------- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index a634a02..e70525b 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -36,8 +36,13 @@ defmodule CodeQA.HealthReport do worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) + all_cosines = + SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) + + cosines_by_category = Enum.group_by(all_cosines, & &1.category) + cosine_grades = - Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) + Grader.grade_cosine_categories(cosines_by_category, worst_files_map, grade_scale) all_categories = (threshold_grades ++ cosine_grades) @@ -49,9 +54,6 @@ defmodule CodeQA.HealthReport do metadata = build_metadata(analysis_results) - all_cosines = - SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) - top_issues = Enum.take(all_cosines, 10) codebase_cosine_lookup = @@ -131,12 +133,15 @@ defmodule CodeQA.HealthReport do base_worst_files_map = FileScorer.worst_files_per_behavior(base_files, combined_top: combined_top) + base_cosines_by_category = + SampleRunner.diagnose_aggregate(base_aggregate, top: 99_999, languages: base_project_langs) + |> Enum.group_by(& &1.category) + base_cosine_grades = Grader.grade_cosine_categories( - base_aggregate, + base_cosines_by_category, base_worst_files_map, - grade_scale, - base_project_langs + grade_scale ) base_all_categories = diff --git a/lib/codeqa/health_report/grader.ex b/lib/codeqa/health_report/grader.ex index 46ed374..d671a0b 100644 --- a/lib/codeqa/health_report/grader.ex +++ b/lib/codeqa/health_report/grader.ex @@ -1,7 +1,6 @@ defmodule CodeQA.HealthReport.Grader do @moduledoc "Scores metrics and assigns letter grades." - alias CodeQA.CombinedMetrics.SampleRunner alias CodeQA.Config alias CodeQA.HealthReport.Categories @@ -228,29 +227,28 @@ defmodule CodeQA.HealthReport.Grader do @doc """ Grade codebase aggregate metrics using cosine similarity. - Calls `SampleRunner.diagnose_aggregate/2` to get all behaviors with cosine - values, groups them by category, and returns a graded category list suitable - for use with `overall_score/3`. + Accepts `cosines_by_category`, a map of category string keys to lists of + behavior cosine entries as returned by + `Enum.group_by(SampleRunner.diagnose_aggregate(...), & &1.category)`. + + The caller is responsible for computing `cosines_by_category` so that + `diagnose_aggregate/2` is invoked only once across the report pipeline. Categories with zero behaviors are skipped. """ @spec grade_cosine_categories( - aggregate :: map(), + cosines_by_category :: %{String.t() => [map()]}, worst_files :: %{String.t() => [map()]}, - grade_scale :: [{number(), String.t()}], - languages :: [String.t()] + grade_scale :: [{number(), String.t()}] ) :: [map()] def grade_cosine_categories( - aggregate, + cosines_by_category, worst_files, - scale \\ Categories.default_grade_scale(), - languages \\ [] + scale \\ Categories.default_grade_scale() ) do threshold = Config.cosine_significance_threshold() - aggregate - |> SampleRunner.diagnose_aggregate(top: 99_999, languages: languages) - |> Enum.group_by(& &1.category) + cosines_by_category |> Enum.map(fn {category, behaviors} -> behavior_entries = score_behavior_entries(behaviors, threshold, worst_files, scale, category) diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 1761941..6b59bc3 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -9,6 +9,24 @@ defmodule CodeQA.HealthReport.TopBlocks do @severity_medium 0.10 @gap_floor 0.01 + defp build_fix_hint_lookup do + Scorer.all_yamls() + |> Enum.flat_map(fn {yaml_path, data} -> + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + Enum.flat_map(data, &hints_for_behavior(category, &1)) + end) + |> Map.new() + end + + defp hints_for_behavior(category, {behavior, behavior_data}) when is_map(behavior_data) do + case Map.get(behavior_data, "_fix_hint") do + nil -> [] + hint -> [{{category, behavior}, hint}] + end + end + + defp hints_for_behavior(_category, _entry), do: [] + @spec build(map(), [struct()], map()) :: [map()] def build(analysis_results, changed_files, codebase_cosine_lookup) do files = Map.get(analysis_results, "files", %{}) @@ -96,21 +114,4 @@ defmodule CodeQA.HealthReport.TopBlocks do defp max_delta(%{potentials: potentials}), do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta - defp build_fix_hint_lookup do - Scorer.all_yamls() - |> Enum.flat_map(fn {yaml_path, data} -> - category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - Enum.flat_map(data, &hints_for_behavior(category, &1)) - end) - |> Map.new() - end - - defp hints_for_behavior(category, {behavior, behavior_data}) when is_map(behavior_data) do - case Map.get(behavior_data, "_fix_hint") do - nil -> [] - hint -> [{{category, behavior}, hint}] - end - end - - defp hints_for_behavior(_category, _entry), do: [] end From 71c8f0ae4dcda932447bb4d2924310c1cfbf2724 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:36 +0100 Subject: [PATCH 52/71] refactor(engine): integrate block impact computation into the analyzer pipeline Block impact analysis is now triggered via compute_nodes: true in analyze_codebase/2 instead of requiring callers to invoke BlockImpactAnalyzer.analyze/3 as a separate step. The CLI commands are simplified accordingly. Supporting changes: - FileMetricsServer removed from RunSupervisor (unused since the LOO path now uses analyze_file_for_loo/2 which bypasses the run context) - Analyzer.with_run_context/2 now exposes behavior_config_pid so the block impact path can reuse the shared BehaviorConfigServer - Pipeline.build_file_context/2 accepts skip_structural: true for LOO paths that do not need block detection - Collector defaults now ignore *.md and *.mdx files - CLI health-report exposes --telemetry flag to print block impact timings Co-Authored-By: Claude Sonnet 4.6 --- lib/codeqa/analysis/file_context_server.ex | 2 +- lib/codeqa/analysis/run_context.ex | 5 +- lib/codeqa/analysis/run_supervisor.ex | 10 +- lib/codeqa/cli/analyze.ex | 4 - lib/codeqa/cli/health_report.ex | 120 ++++++++++++++++++++- lib/codeqa/engine/analyzer.ex | 39 +++++-- lib/codeqa/engine/collector.ex | 4 +- lib/codeqa/engine/pipeline.ex | 5 + 8 files changed, 163 insertions(+), 26 deletions(-) diff --git a/lib/codeqa/analysis/file_context_server.ex b/lib/codeqa/analysis/file_context_server.ex index 3de7859..987595f 100644 --- a/lib/codeqa/analysis/file_context_server.ex +++ b/lib/codeqa/analysis/file_context_server.ex @@ -35,7 +35,7 @@ defmodule CodeQA.Analysis.FileContextServer do Returns a cached (or freshly built) `FileContext` for `content`. The language is resolved from `opts` (`:language` or `:path`); defaults to - `Unknown`, consistent with how `FileImpact` calls `build_file_context/2`. + `Unknown`. """ @spec get(pid(), String.t(), keyword()) :: FileContext.t() def get(pid, content, opts \\ []) do diff --git a/lib/codeqa/analysis/run_context.ex b/lib/codeqa/analysis/run_context.ex index e9e4aae..e0e9d52 100644 --- a/lib/codeqa/analysis/run_context.ex +++ b/lib/codeqa/analysis/run_context.ex @@ -6,11 +6,10 @@ defmodule CodeQA.Analysis.RunContext do cached state without named process registration. """ - defstruct [:behavior_config_pid, :file_context_pid, :file_metrics_pid] + defstruct [:behavior_config_pid, :file_context_pid] @type t :: %__MODULE__{ behavior_config_pid: pid(), - file_context_pid: pid(), - file_metrics_pid: pid() + file_context_pid: pid() } end diff --git a/lib/codeqa/analysis/run_supervisor.ex b/lib/codeqa/analysis/run_supervisor.ex index 687e58c..ab6bb10 100644 --- a/lib/codeqa/analysis/run_supervisor.ex +++ b/lib/codeqa/analysis/run_supervisor.ex @@ -2,7 +2,7 @@ defmodule CodeQA.Analysis.RunSupervisor do @moduledoc """ One-shot supervisor for the per-analysis-run GenServers. - Started at the top of `BlockImpactAnalyzer.analyze/3` and stopped (via + Started at the top of `Analyzer.with_run_context/2` and stopped (via `Supervisor.stop/1`) in an `after` block when the run completes. Servers are not registered by name, preventing collisions when multiple @@ -11,7 +11,7 @@ defmodule CodeQA.Analysis.RunSupervisor do use Supervisor - alias CodeQA.Analysis.{BehaviorConfigServer, FileContextServer, FileMetricsServer, RunContext} + alias CodeQA.Analysis.{BehaviorConfigServer, FileContextServer, RunContext} @spec start_link(keyword()) :: Supervisor.on_start() def start_link(opts \\ []) do @@ -29,8 +29,7 @@ defmodule CodeQA.Analysis.RunSupervisor do %RunContext{ behavior_config_pid: find_pid(children, BehaviorConfigServer), - file_context_pid: find_pid(children, FileContextServer), - file_metrics_pid: find_pid(children, FileMetricsServer) + file_context_pid: find_pid(children, FileContextServer) } end @@ -38,8 +37,7 @@ defmodule CodeQA.Analysis.RunSupervisor do def init(_opts) do children = [ {BehaviorConfigServer, []}, - {FileContextServer, []}, - {FileMetricsServer, []} + {FileContextServer, []} ] Supervisor.init(children, strategy: :one_for_one) diff --git a/lib/codeqa/cli/analyze.ex b/lib/codeqa/cli/analyze.ex index 475e15c..9c1f840 100644 --- a/lib/codeqa/cli/analyze.ex +++ b/lib/codeqa/cli/analyze.ex @@ -3,7 +3,6 @@ defmodule CodeQA.CLI.Analyze do @behaviour CodeQA.CLI.Command - alias CodeQA.BlockImpactAnalyzer alias CodeQA.CLI.Options alias CodeQA.Config alias CodeQA.Engine.Analyzer @@ -65,9 +64,6 @@ defmodule CodeQA.CLI.Analyze do IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") - nodes_top = opts[:nodes_top] || 3 - results = BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) - total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() results = filter_files_for_output(results, opts, "json") diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 66b769d..0e6cceb 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -3,7 +3,6 @@ defmodule CodeQA.CLI.HealthReport do @behaviour CodeQA.CLI.Command - alias CodeQA.BlockImpactAnalyzer alias CodeQA.CLI.Options alias CodeQA.Config alias CodeQA.Engine.Analyzer @@ -48,7 +47,8 @@ defmodule CodeQA.CLI.HealthReport do format: :string, ignore_paths: :string, base_ref: :string, - head_ref: :string + head_ref: :string, + telemetry: :boolean ] def run(args) do @@ -69,8 +69,10 @@ defmodule CodeQA.CLI.HealthReport do IO.puts(:stderr, "Analyzing #{map_size(files)} files for health report...") + telemetry_pid = if opts[:telemetry], do: attach_block_impact_telemetry() + analyze_opts = - Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() + Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() ++ [compute_nodes: true] start_time = System.monotonic_time(:millisecond) results = Analyzer.analyze_codebase(files, analyze_opts) @@ -78,8 +80,7 @@ defmodule CodeQA.CLI.HealthReport do IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") - nodes_top = opts[:nodes_top] || 3 - results = BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) + if telemetry_pid, do: print_block_impact_telemetry(telemetry_pid) total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() @@ -149,4 +150,113 @@ defmodule CodeQA.CLI.HealthReport do IO.puts(:stderr, "Warning: unknown format '#{other}', using 'plain'") :plain end + + # --------------------------------------------------------------------------- + # Block impact telemetry + # --------------------------------------------------------------------------- + + defp attach_block_impact_telemetry do + {:ok, pid} = Agent.start_link(fn -> %{nodes: [], files: [], codebase_cosines_us: 0} end) + + :telemetry.attach_many( + "block-impact-reporter", + [ + [:codeqa, :block_impact, :codebase_cosines], + [:codeqa, :block_impact, :file], + [:codeqa, :block_impact, :node] + ], + fn event, measurements, metadata, ^pid -> + case event do + [:codeqa, :block_impact, :codebase_cosines] -> + Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration)) + + [:codeqa, :block_impact, :file] -> + Agent.update(pid, fn state -> + Map.update!(state, :files, &[{metadata.path, measurements} | &1]) + end) + + [:codeqa, :block_impact, :node] -> + Agent.update(pid, fn state -> + Map.update!(state, :nodes, &[{metadata.path, measurements} | &1]) + end) + end + end, + pid + ) + + pid + end + + defp print_block_impact_telemetry(pid) do + state = Agent.get(pid, & &1) + Agent.stop(pid) + :telemetry.detach("block-impact-reporter") + + nodes = state.nodes + files = state.files + + total_nodes = length(nodes) + total_files = length(files) + + node_totals = Enum.map(nodes, fn {_, m} -> m end) + file_totals = Enum.map(files, fn {_, m} -> m end) + + IO.puts(:stderr, """ + + ── Block Impact Telemetry ────────────────────────────── + Codebase cosines: #{us(state.codebase_cosines_us)} + Files processed: #{total_files} + Nodes processed: #{total_nodes} + + Per-file breakdown (avg across #{total_files} files): + tokenize: #{avg_us(file_totals, :tokenize_us)} + parse blocks: #{avg_us(file_totals, :parse_us)} + file cosines: #{avg_us(file_totals, :file_cosines_us)} + total/file: #{avg_us(file_totals, :duration)} + + Per-node breakdown (avg across #{total_nodes} nodes): + reconstruct: #{avg_us(node_totals, :reconstruct_us)} + analyze_file: #{avg_us(node_totals, :analyze_file_us)} + aggregate: #{avg_us(node_totals, :aggregate_us)} + refactoring cosine: #{avg_us(node_totals, :refactoring_us)} + total/node: #{avg_us(node_totals, :duration)} + + Top 5 slowest files (total node time): + #{top_slow_files(files, nodes)} + ──────────────────────────────────────────────────────── + """) + end + + defp top_slow_files(files, nodes) do + node_time_by_file = + nodes + |> Enum.group_by(fn {path, _} -> path end, fn {_, m} -> m.duration end) + |> Map.new(fn {path, durations} -> {path, Enum.sum(durations)} end) + + files + |> Enum.map(fn {path, fm} -> + node_time = Map.get(node_time_by_file, path, 0) + {path, fm.node_count, node_time} + end) + |> Enum.sort_by(fn {_, _, t} -> -t end) + |> Enum.take(5) + |> Enum.map_join("\n", fn {path, node_count, node_time} -> + " #{path} (#{node_count} nodes, #{us(node_time)} node time)" + end) + end + + defp avg_us([], _key), do: "n/a" + + defp avg_us(measurements, key) do + total = Enum.sum(Enum.map(measurements, &Map.get(&1, key, 0))) + us(div(total, length(measurements))) + end + + defp us(microseconds) when microseconds >= 1_000_000, + do: "#{Float.round(microseconds / 1_000_000, 2)}s" + + defp us(microseconds) when microseconds >= 1_000, + do: "#{Float.round(microseconds / 1_000, 1)}ms" + + defp us(microseconds), do: "#{microseconds}µs" end diff --git a/lib/codeqa/engine/analyzer.ex b/lib/codeqa/engine/analyzer.ex index 6fe8cf0..bd55e11 100644 --- a/lib/codeqa/engine/analyzer.ex +++ b/lib/codeqa/engine/analyzer.ex @@ -2,6 +2,7 @@ defmodule CodeQA.Engine.Analyzer do @moduledoc "Orchestrates metric computation across files." alias CodeQA.Analysis.RunSupervisor + alias CodeQA.BlockImpactAnalyzer alias CodeQA.Engine.Parallel alias CodeQA.Engine.Pipeline alias CodeQA.Engine.Registry @@ -41,6 +42,12 @@ defmodule CodeQA.Engine.Analyzer do Registry.run_file_metrics(@registry, ctx, []) end + @spec analyze_file_for_loo(String.t(), String.t()) :: map() + def analyze_file_for_loo(_path, content) do + ctx = Pipeline.build_file_context(content, skip_structural: true) + Registry.run_file_metrics(@registry, ctx, []) + end + @spec analyze_codebase_aggregate(map(), keyword()) :: map() def analyze_codebase_aggregate(files_map, opts \\ []) do with_run_context(opts, fn opts -> @@ -57,6 +64,7 @@ defmodule CodeQA.Engine.Analyzer do {:ok, sup} = RunSupervisor.start_link() run_ctx = RunSupervisor.run_context(sup) opts = Keyword.put(opts, :file_context_pid, run_ctx.file_context_pid) + opts = Keyword.put(opts, :behavior_config_pid, run_ctx.behavior_config_pid) try do fun.(opts) @@ -67,15 +75,34 @@ defmodule CodeQA.Engine.Analyzer do defp do_analyze_codebase(files, opts) do registry = @registry - file_results = Parallel.analyze_files(files, opts) - codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) aggregate = aggregate_file_metrics(file_results) - %{ - "files" => file_results, - "codebase" => Map.put(codebase_metrics, "aggregate", aggregate) - } + if Keyword.get(opts, :compute_nodes, false) do + nodes_opts = + [baseline_codebase_agg: aggregate] ++ + Keyword.take(opts, [:nodes_top, :workers, :behavior_config_pid]) + + pipeline_result = %{ + "files" => file_results, + "codebase" => %{"aggregate" => aggregate} + } + + updated_pipeline_result = BlockImpactAnalyzer.analyze(pipeline_result, files, nodes_opts) + codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) + + updated_codebase = + Map.merge(codebase_metrics, updated_pipeline_result["codebase"]) + + Map.put(updated_pipeline_result, "codebase", updated_codebase) + else + codebase_metrics = Registry.run_codebase_metrics(registry, files, opts) + + %{ + "files" => file_results, + "codebase" => Map.put(codebase_metrics, "aggregate", aggregate) + } + end end defp metric_data_to_triples({metric_name, metric_data}) do diff --git a/lib/codeqa/engine/collector.ex b/lib/codeqa/engine/collector.ex index ea6b16f..1315c77 100644 --- a/lib/codeqa/engine/collector.ex +++ b/lib/codeqa/engine/collector.ex @@ -7,6 +7,8 @@ defmodule CodeQA.Engine.Collector do .next coverage ]) + @default_ignore_patterns ~w[**/*.md **/*.mdx] + @spec source_extensions() :: MapSet.t() def source_extensions do CodeQA.Language.all() @@ -57,7 +59,7 @@ defmodule CodeQA.Engine.Collector do Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end) end - defp all_ignore_patterns(extra), do: extra ++ CodeQA.Config.ignore_paths() + defp all_ignore_patterns(extra), do: extra ++ @default_ignore_patterns ++ CodeQA.Config.ignore_paths() defp do_reject_ignored_map(files_map, []), do: files_map diff --git a/lib/codeqa/engine/pipeline.ex b/lib/codeqa/engine/pipeline.ex index 33a9229..8a697a0 100644 --- a/lib/codeqa/engine/pipeline.ex +++ b/lib/codeqa/engine/pipeline.ex @@ -34,11 +34,16 @@ defmodule CodeQA.Engine.Pipeline do lines = content |> String.split("\n") |> trim_trailing_empty() encoded = content + skip_structural = Keyword.get(opts, :skip_structural, false) + {path, blocks} = case Keyword.get(opts, :path) do nil -> {nil, nil} + p when skip_structural -> + {p, nil} + p -> lang_mod = Language.detect(p) structural_tokens = TokenNormalizer.normalize_structural(content) From 3abbc0aed5f48175ee4bd1edda426224ad6edf2a Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 16:48:42 +0100 Subject: [PATCH 53/71] test: update tests for refactored grader and near-duplicate-blocks interfaces - GraderTest: setup_all now computes cosines_by_category once and passes it into all grade_cosine_categories/3 tests, matching the new signature - SampleRunnerTest: update describe block to reflect the 3-arity signature - NearDuplicateBlocksFileTest: add test for nil blocks (LOO skip_structural path) Co-Authored-By: Claude Sonnet 4.6 --- .../combined_metrics/sample_runner_test.exs | 6 +- test/codeqa/health_report/grader_test.exs | 63 ++++++++++--------- .../file/near_duplicate_blocks_file_test.exs | 11 ++++ 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/test/codeqa/combined_metrics/sample_runner_test.exs b/test/codeqa/combined_metrics/sample_runner_test.exs index c1986aa..692c306 100644 --- a/test/codeqa/combined_metrics/sample_runner_test.exs +++ b/test/codeqa/combined_metrics/sample_runner_test.exs @@ -115,9 +115,9 @@ defmodule CodeQA.CombinedMetrics.SampleRunnerTest do end end - describe "grade_cosine_categories/4 languages wiring" do - test "accepts languages argument" do - result = Grader.grade_cosine_categories(%{}, %{}, [], ["elixir"]) + describe "grade_cosine_categories/3" do + test "returns a list for empty input" do + result = Grader.grade_cosine_categories(%{}, %{}) assert is_list(result) end end diff --git a/test/codeqa/health_report/grader_test.exs b/test/codeqa/health_report/grader_test.exs index b8c5c14..6f9ea54 100644 --- a/test/codeqa/health_report/grader_test.exs +++ b/test/codeqa/health_report/grader_test.exs @@ -1,6 +1,7 @@ defmodule CodeQA.HealthReport.GraderTest do use ExUnit.Case, async: true + alias CodeQA.CombinedMetrics.SampleRunner alias CodeQA.Engine.Analyzer alias CodeQA.Engine.Collector alias CodeQA.HealthReport.Grader @@ -157,12 +158,14 @@ defmodule CodeQA.HealthReport.GraderTest do end end - # Shared aggregate for grade_cosine_categories/3 tests — computed once for the module. + # Shared cosines_by_category for grade_cosine_categories/3 tests — computed once for the module. setup_all do files = Collector.collect_files("lib", []) result = Analyzer.analyze_codebase(files) aggregate = get_in(result, ["codebase", "aggregate"]) - {:ok, aggregate: aggregate} + all_cosines = SampleRunner.diagnose_aggregate(aggregate, top: 99_999) + cosines_by_category = Enum.group_by(all_cosines, & &1.category) + {:ok, cosines_by_category: cosines_by_category} end # ----------------------------------------------------------------------- @@ -170,13 +173,13 @@ defmodule CodeQA.HealthReport.GraderTest do # ----------------------------------------------------------------------- describe "grade_cosine_categories/3" do - test "returns a list", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "returns a list", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) assert is_list(result) end - test "each entry has required top-level keys", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "each entry has required top-level keys", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result do assert Map.has_key?(cat, :type), "missing :type in #{inspect(cat)}" @@ -188,13 +191,13 @@ defmodule CodeQA.HealthReport.GraderTest do end end - test "type is :cosine for every entry", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "type is :cosine for every entry", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, do: assert(cat.type == :cosine) end - test "scores are integers in [0, 100]", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "scores are integers in [0, 100]", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result do assert is_integer(cat.score), "score not integer in #{cat.key}" @@ -202,20 +205,20 @@ defmodule CodeQA.HealthReport.GraderTest do end end - test "grade is a string", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "grade is a string", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, do: assert(is_binary(cat.grade)) end test "impact key is absent (HealthReport.generate/2 is responsible for embedding impact)", %{ - aggregate: aggregate + cosines_by_category: cosines_by_category } do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, do: refute(Map.has_key?(cat, :impact)) end - test "name is humanized from key", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "name is humanized from key", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result do # name must be a non-empty string, words capitalized @@ -226,8 +229,8 @@ defmodule CodeQA.HealthReport.GraderTest do end end - test "each behavior entry has required keys", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "each behavior entry has required keys", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, b <- cat.behaviors do assert Map.has_key?(b, :behavior) @@ -238,8 +241,8 @@ defmodule CodeQA.HealthReport.GraderTest do end end - test "behavior scores are integers in [0, 100]", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "behavior scores are integers in [0, 100]", %{cosines_by_category: cosines_by_category} do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, b <- cat.behaviors do assert is_integer(b.score) @@ -247,22 +250,24 @@ defmodule CodeQA.HealthReport.GraderTest do end end - test "worst_offenders uses worst_files lookup", %{aggregate: aggregate} do + test "worst_offenders uses worst_files lookup", %{cosines_by_category: cosines_by_category} do sentinel = [%{file: "lib/sentinel.ex", cosine: -0.99}] # Get one real behavior key to inject into worst_files - [first_cat | _] = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + [first_cat | _] = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) first_behavior = hd(first_cat.behaviors) lookup_key = "#{first_cat.key}.#{first_behavior.behavior}" worst_files = %{lookup_key => sentinel} - result = Grader.grade_cosine_categories(aggregate, worst_files, @default_scale) + result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale) found_cat = Enum.find(result, &(&1.key == first_cat.key)) found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior)) assert found_behavior.worst_offenders == sentinel end - test "top_metrics and top_nodes pass through unmodified", %{aggregate: aggregate} do + test "top_metrics and top_nodes pass through unmodified", %{ + cosines_by_category: cosines_by_category + } do sentinel = [ %{ file: "lib/sentinel.ex", @@ -272,20 +277,22 @@ defmodule CodeQA.HealthReport.GraderTest do } ] - [first_cat | _] = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + [first_cat | _] = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) first_behavior = hd(first_cat.behaviors) lookup_key = "#{first_cat.key}.#{first_behavior.behavior}" worst_files = %{lookup_key => sentinel} - result = Grader.grade_cosine_categories(aggregate, worst_files, @default_scale) + result = Grader.grade_cosine_categories(cosines_by_category, worst_files, @default_scale) found_cat = Enum.find(result, &(&1.key == first_cat.key)) found_behavior = Enum.find(found_cat.behaviors, &(&1.behavior == first_behavior.behavior)) assert found_behavior.worst_offenders == sentinel end - test "worst_offenders defaults to [] when key absent", %{aggregate: aggregate} do - result = Grader.grade_cosine_categories(aggregate, %{}, @default_scale) + test "worst_offenders defaults to [] when key absent", %{ + cosines_by_category: cosines_by_category + } do + result = Grader.grade_cosine_categories(cosines_by_category, %{}, @default_scale) for cat <- result, b <- cat.behaviors, do: assert(b.worst_offenders == []) end end diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs index c59f707..8b7b6ab 100644 --- a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs +++ b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs @@ -25,6 +25,17 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do end end + describe "analyze/1 with nil blocks" do + test "returns zeroed map with all keys when blocks is nil" do + ctx = Pipeline.build_file_context("x = 1\n", skip_structural: true) + result = NearDuplicateBlocksFile.analyze(ctx) + assert Map.has_key?(result, "block_count") + assert Map.has_key?(result, "sub_block_count") + for d <- 0..8, do: assert(Map.has_key?(result, "near_dup_block_d#{d}")) + for {_key, value} <- result, do: assert(value == 0) + end + end + describe "analyze/1" do test "returns a map with all expected keys" do result = NearDuplicateBlocksFile.analyze(ctx("x = 1\n")) From 3b67dbbe1dcdbcbaf9f5a0e0452babf04080d919 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:00:18 +0100 Subject: [PATCH 54/71] refactor: fix dialyzer, credo nesting, and compiler warnings - Remove dead do_reject_ignored_map(_, []) clause in Collector (patterns list always non-empty) - Extract handle_block_impact_event/4 multi-clause fn in HealthReport to reduce nesting depth - Extract diagnose_from_behavior_map_entry/5 and diagnose_from_yaml/5 in SampleRunner to reduce nesting depth - Remove unused default arg \\ [] from private maybe_diagnose_behavior/8 Co-Authored-By: Claude Sonnet 4.6 --- .../block_impact/refactoring_potentials.ex | 7 ++- lib/codeqa/block_impact_analyzer.ex | 16 +++-- lib/codeqa/cli/health_report.ex | 36 +++++------ lib/codeqa/combined_metrics/sample_runner.ex | 60 ++++++++----------- lib/codeqa/engine/collector.ex | 5 +- lib/codeqa/health_report/top_blocks.ex | 1 - 6 files changed, 62 insertions(+), 63 deletions(-) diff --git a/lib/codeqa/block_impact/refactoring_potentials.ex b/lib/codeqa/block_impact/refactoring_potentials.ex index f2417ef..0dc1738 100644 --- a/lib/codeqa/block_impact/refactoring_potentials.ex +++ b/lib/codeqa/block_impact/refactoring_potentials.ex @@ -48,7 +48,12 @@ defmodule CodeQA.BlockImpact.RefactoringPotentials do compute_file_delta(baseline_file_cosines, without_file_metrics, language, behavior_map) codebase_delta = - compute_codebase_delta(baseline_codebase_cosines, without_codebase_agg, languages, behavior_map) + compute_codebase_delta( + baseline_codebase_cosines, + without_codebase_agg, + languages, + behavior_map + ) all_keys = Enum.uniq(Map.keys(file_delta) ++ Map.keys(codebase_delta)) diff --git a/lib/codeqa/block_impact_analyzer.ex b/lib/codeqa/block_impact_analyzer.ex index c317617..aaebdf6 100644 --- a/lib/codeqa/block_impact_analyzer.ex +++ b/lib/codeqa/block_impact_analyzer.ex @@ -325,12 +325,14 @@ defmodule CodeQA.BlockImpactAnalyzer do defp file_metrics_to_triples(metrics) when is_map(metrics) do metrics - |> Enum.flat_map(fn {metric_name, metric_data} when is_map(metric_data) -> - metric_data - |> Enum.filter(fn {_k, v} -> is_number(v) end) - |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end) + |> Enum.flat_map(fn + {metric_name, metric_data} when is_map(metric_data) -> + metric_data + |> Enum.filter(fn {_k, v} -> is_number(v) end) + |> Enum.map(fn {key, value} -> {metric_name, key, value / 1} end) - _ -> [] + _ -> + [] end) end @@ -345,7 +347,9 @@ defmodule CodeQA.BlockImpactAnalyzer do n = length(values) sum = Enum.sum(values) sum_sq = Enum.reduce(values, 0.0, fn v, acc -> acc + v * v end) - {{metric, key}, %{sum: sum, sum_sq: sum_sq, min: Enum.min(values), max: Enum.max(values), count: n}} + + {{metric, key}, + %{sum: sum, sum_sq: sum_sq, min: Enum.min(values), max: Enum.max(values), count: n}} end) end diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 0e6cceb..6d20ab2 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -72,7 +72,8 @@ defmodule CodeQA.CLI.HealthReport do telemetry_pid = if opts[:telemetry], do: attach_block_impact_telemetry() analyze_opts = - Options.build_analyze_opts(opts) ++ Config.near_duplicate_blocks_opts() ++ [compute_nodes: true] + Options.build_analyze_opts(opts) ++ + Config.near_duplicate_blocks_opts() ++ [compute_nodes: true] start_time = System.monotonic_time(:millisecond) results = Analyzer.analyze_codebase(files, analyze_opts) @@ -165,28 +166,29 @@ defmodule CodeQA.CLI.HealthReport do [:codeqa, :block_impact, :file], [:codeqa, :block_impact, :node] ], - fn event, measurements, metadata, ^pid -> - case event do - [:codeqa, :block_impact, :codebase_cosines] -> - Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration)) - - [:codeqa, :block_impact, :file] -> - Agent.update(pid, fn state -> - Map.update!(state, :files, &[{metadata.path, measurements} | &1]) - end) - - [:codeqa, :block_impact, :node] -> - Agent.update(pid, fn state -> - Map.update!(state, :nodes, &[{metadata.path, measurements} | &1]) - end) - end - end, + &handle_block_impact_event(&1, &2, &3, &4), pid ) pid end + defp handle_block_impact_event([:codeqa, :block_impact, :codebase_cosines], measurements, _metadata, pid) do + Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration)) + end + + defp handle_block_impact_event([:codeqa, :block_impact, :file], measurements, metadata, pid) do + Agent.update(pid, fn state -> + Map.update!(state, :files, &[{metadata.path, measurements} | &1]) + end) + end + + defp handle_block_impact_event([:codeqa, :block_impact, :node], measurements, metadata, pid) do + Agent.update(pid, fn state -> + Map.update!(state, :nodes, &[{metadata.path, measurements} | &1]) + end) + end + defp print_block_impact_telemetry(pid) do state = Agent.get(pid, & &1) Agent.stop(pid) diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 6c5b6c9..4bd397b 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -188,43 +188,15 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do if behavior_map do behavior_map |> Enum.sort_by(fn {category, _} -> category end) - |> Enum.flat_map(fn {category, behaviors} -> - Enum.flat_map(behaviors, fn {behavior, behavior_data} -> - yaml_path = "priv/combined_metrics/#{category}.yml" - - maybe_diagnose_behavior( - yaml_path, - behavior, - behavior_data, - aggregate, - category, - language, - languages, - cosine_opts - ) - end) - end) + |> Enum.flat_map( + &diagnose_from_behavior_map_entry(&1, aggregate, language, languages, cosine_opts) + ) else Scorer.all_yamls() |> Enum.sort_by(fn {path, _} -> path end) - |> Enum.flat_map(fn {yaml_path, data} -> - category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - - data - |> Enum.filter(fn {_k, v} -> is_map(v) end) - |> Enum.flat_map(fn {behavior, behavior_data} -> - maybe_diagnose_behavior( - yaml_path, - behavior, - behavior_data, - aggregate, - category, - language, - languages, - cosine_opts - ) - end) - end) + |> Enum.flat_map( + &diagnose_from_yaml(&1, aggregate, language, languages, cosine_opts) + ) end behaviors_stream @@ -404,6 +376,24 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end + defp diagnose_from_behavior_map_entry({category, behaviors}, aggregate, language, languages, cosine_opts) do + yaml_path = "priv/combined_metrics/#{category}.yml" + + Enum.flat_map(behaviors, fn {behavior, behavior_data} -> + maybe_diagnose_behavior(yaml_path, behavior, behavior_data, aggregate, category, language, languages, cosine_opts) + end) + end + + defp diagnose_from_yaml({yaml_path, data}, aggregate, language, languages, cosine_opts) do + category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") + + data + |> Enum.filter(fn {_k, v} -> is_map(v) end) + |> Enum.flat_map(fn {behavior, behavior_data} -> + maybe_diagnose_behavior(yaml_path, behavior, behavior_data, aggregate, category, language, languages, cosine_opts) + end) + end + defp maybe_diagnose_behavior( yaml_path, behavior, @@ -412,7 +402,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do category, language, languages, - cosine_opts \\ [] + cosine_opts ) do behavior_langs = Map.get(behavior_data, "_languages", []) diff --git a/lib/codeqa/engine/collector.ex b/lib/codeqa/engine/collector.ex index 1315c77..3d1b8b4 100644 --- a/lib/codeqa/engine/collector.ex +++ b/lib/codeqa/engine/collector.ex @@ -59,9 +59,8 @@ defmodule CodeQA.Engine.Collector do Enum.reject(list, fn item -> ignored?(key_fn.(item), patterns) end) end - defp all_ignore_patterns(extra), do: extra ++ @default_ignore_patterns ++ CodeQA.Config.ignore_paths() - - defp do_reject_ignored_map(files_map, []), do: files_map + defp all_ignore_patterns(extra), + do: extra ++ @default_ignore_patterns ++ CodeQA.Config.ignore_paths() defp do_reject_ignored_map(files_map, patterns) do Map.reject(files_map, fn {path, _} -> ignored?(path, patterns) end) diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 6b59bc3..42a0bfd 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -113,5 +113,4 @@ defmodule CodeQA.HealthReport.TopBlocks do defp max_delta(%{potentials: potentials}), do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta - end From 03cce19204d22e32af03dc0293498d7c8989276b Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:03:57 +0100 Subject: [PATCH 55/71] fix(test): pass path to build_file_context in NearDuplicateBlocksFileTest After the pipeline refactor, build_file_context computes blocks only when a path is provided. The test helper was building context without a path then patching it in, so blocks were always nil and the analyze/1 nil-clause short-circuited to zeros. Pass path: directly so blocks are populated. Co-Authored-By: Claude Sonnet 4.6 --- test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs index 8b7b6ab..cb10540 100644 --- a/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs +++ b/test/codeqa/metrics/file/near_duplicate_blocks_file_test.exs @@ -4,8 +4,7 @@ defmodule CodeQA.Metrics.File.NearDuplicateBlocksFileTest do alias CodeQA.Metrics.File.NearDuplicateBlocksFile defp ctx(code, path \\ "test.ex") do - base = Pipeline.build_file_context(code) - Map.put(base, :path, path) + Pipeline.build_file_context(code, path: path) end describe "name/0" do From ddff5c1fb2ae2396deb88eabdf2618c3b37913ae Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 22 Mar 2026 16:08:32 +0000 Subject: [PATCH 56/71] chore(combined-metrics): sync language coverage and scalar vectors [skip ci] --- priv/combined_metrics/code_smells.yml | 5 ----- priv/combined_metrics/variable_naming.yml | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index 2201f7d..f1c73c5 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,5 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -102,7 +101,6 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -213,7 +211,6 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -324,7 +321,6 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -439,7 +435,6 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index e7bc6fa..1be9c6b 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,5 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -63,7 +62,6 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -125,7 +123,6 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -205,7 +202,6 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -324,7 +320,6 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -410,7 +405,6 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -497,7 +491,6 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -614,7 +607,6 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -693,7 +685,6 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -791,7 +782,6 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -912,7 +902,6 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -1000,7 +989,6 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1071,7 +1059,6 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1147,7 +1134,6 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 4e2eeb0ce429265aaa4dddb9c799ab34d4d9e8de Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:33:52 +0100 Subject: [PATCH 57/71] docs: add multi-part PR comment design spec Co-Authored-By: Claude Sonnet 4.6 --- ...026-03-22-multi-part-pr-comments-design.md | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md diff --git a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md new file mode 100644 index 0000000..ef2deb5 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md @@ -0,0 +1,108 @@ +# Multi-Part PR Comments Design + +**Date:** 2026-03-22 +**Status:** Proposed + +## Context + +The `codeqa health-report` GitHub Action posts a markdown report as a sticky PR comment via `marocchino/sticky-pull-request-comment@v2`. GitHub's PR comment API has a hard 65,536 character limit. On large codebases (300+ files), the generated report exceeds this limit and the posting step fails. + +## Solution + +Split the report into fixed-section parts, each posted as a separate sticky PR comment. No content compression — splitting is purely a rendering concern. + +## Part Assignment + +Parts are fixed, not dynamically determined by content size (except Part 3+ which slices the blocks section). + +| Part | Sticky Header | Content | +|------|--------------|---------| +| 1 | `codeqa-health-report-1` | Header + overall grade + mermaid chart + progress bars + overall category table + PR summary + metric changes (delta) | +| 2 | `codeqa-health-report-2` | Top likely issues + all category detail sections (threshold metrics + cosine behaviors) | +| 3+ | `codeqa-health-report-3`, `codeqa-health-report-4`, … | Blocks section, sliced at 60,000 characters per part | + +Each non-final chunk of Part 3+ ends with: + +``` +> ⚠️ Truncated at 60,000 chars — continued in next comment +``` + +If there are no blocks, Part 3 is written as a single empty part (`""`). + +## Formatter Changes + +**File:** `lib/codeqa/health_report/formatter/github.ex` + +Add three new rendering entry points alongside the existing `render/3`: + +- `render_part_1(report, opts)` → `String.t()` — header, summary table, PR summary, delta, mermaid chart, progress bars +- `render_part_2(report, opts)` → `String.t()` — top issues, all category detail sections +- `render_parts_3(report, opts)` → `[String.t()]` — blocks section sliced into 60,000-char chunks; returns `[""]` when no blocks exist + +The existing `render/3` is not changed. It continues to produce the full single-string report for `--output file` usage. + +## CLI Changes + +**File:** `lib/codeqa/cli/health_report.ex` + +Add `render_parts(report, opts)` → `[String.t()]` — returns a flat list `[part_1, part_2, part_3a, part_3b, ...]`. Used internally when the `comment: true` path is active. + +When writing output for comment mode, the CLI writes each part to a numbered temp file: + +- `$TMPDIR/codeqa-part-1.md` +- `$TMPDIR/codeqa-part-2.md` +- `$TMPDIR/codeqa-part-3.md` +- … etc. + +It also writes `$TMPDIR/codeqa-part-count.txt` containing the integer count of parts. + +The existing `--output` flag behaviour (write single file) is unchanged. + +## Stale Comment Handling + +If a previous run produced 4 parts and the current run produces 2, the old parts 3 and 4 remain stale. To handle this, always write a minimum of 3 part files. Parts beyond the actual content get a single-line placeholder: + +``` +> _No content for this section._ +``` + +The sticky comment action overwrites the stale comment with the placeholder rather than leaving old content. The minimum of 3 is sufficient for the current fixed-section design. Real content is written for any blocks overflow to part 4+. + +## Action / run.sh Changes + +**File:** `scripts/run.sh` + +After generating part files, loop over them and post each using `gh pr comment` (or the GitHub API directly). Each part uses its own sticky header `codeqa-health-report-{N}`. + +`run.sh` takes ownership of the posting loop since it already has access to all required env vars. This avoids duplicating logic across multiple YAML steps. + +**File:** `.github/workflows/health-report.yml` + +The current single `marocchino/sticky-pull-request-comment@v2` step is replaced. The YAML posting step becomes a no-op for comment posting; run.sh handles it entirely. No YAML changes are needed as the number of parts varies — a shell loop in run.sh handles the variable count cleanly. + +## Key Constraints + +- `render/3` must not change behaviour — used by `--output` flag +- Part 1 must always be self-contained — a reader seeing only Part 1 gets the full codebase health picture +- Parts 2 and 3 are drill-down detail; safe to be empty if the codebase has no behaviors or blocks +- 60,000 char slice limit (not 65,536) leaves headroom for sticky comment metadata + +## Breaking Change + +The sticky comment header for Part 1 changes from `codeqa-health-report` to `codeqa-health-report-1`. Old single-part comments will not be cleaned up automatically on the first run after upgrade. + +## Files Affected + +| File | Change | +|------|--------| +| `lib/codeqa/health_report/formatter/github.ex` | Add `render_part_1/2`, `render_part_2/2`, `render_parts_3/2` | +| `lib/codeqa/cli/health_report.ex` | Add `render_parts/2`, multi-file output in comment mode | +| `scripts/run.sh` | Loop to post multiple part files | +| `.github/workflows/health-report.yml` | Simplified posting step | +| `test/codeqa/health_report/formatter_test.exs` | Tests for new part renderers | + +## What Does Not Change + +- The `--detail`, `--top`, `--format`, `--output` CLI flags +- The plain formatter (`Formatter.Plain`) +- Report data assembly (`health_report.ex`, `grader.ex`, `top_blocks.ex`) — splitting is purely a rendering concern From 9a220ddb318426de00c51fadc98d5ccb2601d85c Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:35:12 +0100 Subject: [PATCH 58/71] docs: address spec review issues in multi-part PR comments design Co-Authored-By: Claude Sonnet 4.6 --- ...026-03-22-multi-part-pr-comments-design.md | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md index ef2deb5..52a6a79 100644 --- a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md +++ b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md @@ -45,7 +45,7 @@ The existing `render/3` is not changed. It continues to produce the full single- **File:** `lib/codeqa/cli/health_report.ex` -Add `render_parts(report, opts)` → `[String.t()]` — returns a flat list `[part_1, part_2, part_3a, part_3b, ...]`. Used internally when the `comment: true` path is active. +Add `render_parts(report, opts)` → `[String.t()]` — returns a flat list `[part_1, part_2, part_3a, part_3b, ...]`. Used internally when the `comment: true` path is active. `comment: true` is an **existing** flag (already parsed from `INPUT_COMMENT` env var in `run.sh` and passed as `--comment` to the CLI); no new flag is introduced. When writing output for comment mode, the CLI writes each part to a numbered temp file: @@ -68,17 +68,29 @@ If a previous run produced 4 parts and the current run produces 2, the old parts The sticky comment action overwrites the stale comment with the placeholder rather than leaving old content. The minimum of 3 is sufficient for the current fixed-section design. Real content is written for any blocks overflow to part 4+. +**Known limitation:** if run N produces more than 3 parts (e.g., 5) and run N+1 produces fewer (e.g., 3), parts 4 and 5 from run N remain stale permanently — the minimum-3 floor does not cover them. This is accepted as an edge case; the stale comments are cosmetic (they hold the placeholder text), and a future cleanup step can address it if needed. + ## Action / run.sh Changes **File:** `scripts/run.sh` -After generating part files, loop over them and post each using `gh pr comment` (or the GitHub API directly). Each part uses its own sticky header `codeqa-health-report-{N}`. +After generating part files, loop over them and post each as a sticky PR comment. Use the GitHub REST API directly (`curl -s -X POST/PATCH`) with the following sticky update-or-create logic: + +1. Search existing PR comments for one whose body contains the sentinel `` (appended to each part by the formatter) +2. If found: `PATCH /repos/{owner}/{repo}/issues/comments/{id}` with the new body +3. If not found: `POST /repos/{owner}/{repo}/issues/{pr_number}/comments` with the new body + +Each part's markdown ends with the sentinel HTML comment so future runs can locate and update it: + +``` + +``` -`run.sh` takes ownership of the posting loop since it already has access to all required env vars. This avoids duplicating logic across multiple YAML steps. +This replicates the sticky semantics of `marocchino/sticky-pull-request-comment@v2` without depending on that action for a variable number of posts. `run.sh` uses `GITHUB_TOKEN` (already available in the action environment) and `GITHUB_API_URL`, `GITHUB_REPOSITORY`, and `PR_NUMBER` (sourced from the workflow env). **File:** `.github/workflows/health-report.yml` -The current single `marocchino/sticky-pull-request-comment@v2` step is replaced. The YAML posting step becomes a no-op for comment posting; run.sh handles it entirely. No YAML changes are needed as the number of parts varies — a shell loop in run.sh handles the variable count cleanly. +Remove the `marocchino/sticky-pull-request-comment@v2` step. `run.sh` now owns posting entirely. The workflow passes `PR_NUMBER: ${{ github.event.pull_request.number }}` as an env var to the run step. ## Key Constraints From d2438f5b83aada34b9e1564fd66d069f2690aca8 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:36:03 +0100 Subject: [PATCH 59/71] docs: fix sentinel and part-count clarifications in multi-part spec Co-Authored-By: Claude Sonnet 4.6 --- .../2026-03-22-multi-part-pr-comments-design.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md index 52a6a79..f67de67 100644 --- a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md +++ b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md @@ -39,7 +39,15 @@ Add three new rendering entry points alongside the existing `render/3`: - `render_part_2(report, opts)` → `String.t()` — top issues, all category detail sections - `render_parts_3(report, opts)` → `[String.t()]` — blocks section sliced into 60,000-char chunks; returns `[""]` when no blocks exist -The existing `render/3` is not changed. It continues to produce the full single-string report for `--output file` usage. +**Each rendered part must end with a sentinel HTML comment as its final line:** + +``` + +``` + +where `N` is the 1-based part index. This sentinel is the sole mechanism by which `run.sh` locates an existing comment to update (the `marocchino` action and its header concept are removed entirely). Without the sentinel, every run would create a new comment instead of updating the previous one. + +The existing `render/3` is not changed. It continues to produce the full single-string report for `--output file` usage and does not append a sentinel. ## CLI Changes @@ -54,7 +62,7 @@ When writing output for comment mode, the CLI writes each part to a numbered tem - `$TMPDIR/codeqa-part-3.md` - … etc. -It also writes `$TMPDIR/codeqa-part-count.txt` containing the integer count of parts. +It also writes `$TMPDIR/codeqa-part-count.txt` containing the **padded** part count (i.e., `max(actual_parts, 3)`), which is the number of files `run.sh` should iterate over. The padding ensures stale cleanup files are always written. The existing `--output` flag behaviour (write single file) is unchanged. From 757114e7f7eac42217e3c9dc964ea7902f6bd9a1 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Sun, 22 Mar 2026 17:36:46 +0100 Subject: [PATCH 60/71] =?UTF-8?q?docs:=20final=20spec=20clarifications=20?= =?UTF-8?q?=E2=80=94=20part-count=20loop=20and=20placeholder=20ownership?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- .../specs/2026-03-22-multi-part-pr-comments-design.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md index f67de67..8ece93b 100644 --- a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md +++ b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md @@ -37,7 +37,7 @@ Add three new rendering entry points alongside the existing `render/3`: - `render_part_1(report, opts)` → `String.t()` — header, summary table, PR summary, delta, mermaid chart, progress bars - `render_part_2(report, opts)` → `String.t()` — top issues, all category detail sections -- `render_parts_3(report, opts)` → `[String.t()]` — blocks section sliced into 60,000-char chunks; returns `[""]` when no blocks exist +- `render_parts_3(report, opts)` → `[String.t()]` — blocks section sliced into 60,000-char chunks; returns `["> _No content for this section._"]` when no blocks exist (the CLI does not substitute the placeholder — the formatter is responsible) **Each rendered part must end with a sentinel HTML comment as its final line:** @@ -62,7 +62,7 @@ When writing output for comment mode, the CLI writes each part to a numbered tem - `$TMPDIR/codeqa-part-3.md` - … etc. -It also writes `$TMPDIR/codeqa-part-count.txt` containing the **padded** part count (i.e., `max(actual_parts, 3)`), which is the number of files `run.sh` should iterate over. The padding ensures stale cleanup files are always written. +It also writes `$TMPDIR/codeqa-part-count.txt` containing the **padded** part count (i.e., `max(actual_parts, 3)`). `run.sh` reads this file to determine how many iterations to perform — it does not infer part count from files on disk. The padding ensures stale cleanup files are always written for at least parts 1–3. The existing `--output` flag behaviour (write single file) is unchanged. From ffa74003cf7042fddc067714db2e1c4f47978944 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 14:35:21 +0100 Subject: [PATCH 61/71] feat(multi-part-comments): split health report into multiple PR comments Implements the multi-part PR comments spec to handle GitHub's 65,536 character limit on PR comments. Large reports are now split into: - Part 1: Header, grade, chart, progress bars - Part 2: Top issues, category detail sections - Part 3+: Blocks section, auto-sliced at 60,000 chars Changes: - Add render_part_1/2, render_part_2/2, render_parts_3/2 to Github formatter - Add render_parts/2 to Formatter module - Add --comment flag to CLI that writes numbered part files - Update run.sh to post via GitHub API with sentinel-based sticky updates - Remove marocchino/sticky-pull-request-comment dependency - Add 13 tests for multi-part rendering Breaking: Old single-part comments won't auto-clean on first run. --- action.yml | 8 +- lib/codeqa/cli/health_report.ex | 62 +++++++-- lib/codeqa/combined_metrics/sample_runner.ex | 34 ++++- lib/codeqa/health_report/formatter.ex | 19 +++ lib/codeqa/health_report/formatter/github.ex | 132 +++++++++++++++++++ scripts/run.sh | 78 ++++++++++- test/codeqa/health_report/formatter_test.exs | 119 +++++++++++++++++ 7 files changed, 427 insertions(+), 25 deletions(-) diff --git a/action.yml b/action.yml index 6be6078..3d43011 100644 --- a/action.yml +++ b/action.yml @@ -93,15 +93,9 @@ runs: INPUT_VERSION: ${{ inputs.version }} INPUT_BUILD: ${{ inputs.build }} GITHUB_ACTION_PATH: ${{ github.action_path }} + PR_NUMBER: ${{ github.event.pull_request.number }} run: ${{ github.action_path }}/scripts/run.sh - - name: Post PR comment - if: inputs.comment == 'true' && github.event_name == 'pull_request' - uses: marocchino/sticky-pull-request-comment@v2 - with: - header: codeqa-${{ inputs.command }} - path: ${{ steps.run.outputs.report-file }} - - name: Check grade threshold if: inputs.fail-grade != '' && inputs.command == 'health-report' shell: bash diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index 6d20ab2..b271bb4 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -31,6 +31,7 @@ defmodule CodeQA.CLI.HealthReport do --ignore-paths PATHS Comma-separated list of path patterns to ignore (supports wildcards, e.g. "test/*,docs/*") --base-ref REF Base git ref for PR comparison (enables delta and block scoping) --head-ref REF Head git ref (default: HEAD) + --comment Multi-part mode: writes numbered part files to TMPDIR for PR comments """ end @@ -48,7 +49,8 @@ defmodule CodeQA.CLI.HealthReport do ignore_paths: :string, base_ref: :string, head_ref: :string, - telemetry: :boolean + telemetry: :boolean, + comment: :boolean ] def run(args) do @@ -120,17 +122,52 @@ defmodule CodeQA.CLI.HealthReport do changed_files: changed_files ) - markdown = HealthReport.to_markdown(report, detail, format) + if opts[:comment] do + write_comment_parts(report, detail) + else + markdown = HealthReport.to_markdown(report, detail, format) - case opts[:output] do - nil -> - markdown + case opts[:output] do + nil -> + markdown - file -> - File.write!(file, markdown) - IO.puts(:stderr, "Health report written to #{file}") - "" + file -> + File.write!(file, markdown) + IO.puts(:stderr, "Health report written to #{file}") + "" + end + end + end + + defp write_comment_parts(report, detail) do + tmpdir = System.get_env("TMPDIR", "/tmp") + parts = HealthReport.Formatter.render_parts(report, detail: detail) + + # Write each part to a numbered file + Enum.with_index(parts, 1) + |> Enum.each(fn {content, n} -> + path = Path.join(tmpdir, "codeqa-part-#{n}.md") + File.write!(path, content) + IO.puts(:stderr, "Part #{n} written to #{path} (#{byte_size(content)} bytes)") + end) + + # Ensure at least 3 parts exist for stale cleanup + actual_count = length(parts) + padded_count = max(actual_count, 3) + + for n <- (actual_count + 1)..padded_count do + path = Path.join(tmpdir, "codeqa-part-#{n}.md") + placeholder = "> _No content for this section._\n\n" + File.write!(path, placeholder) + IO.puts(:stderr, "Part #{n} (placeholder) written to #{path}") end + + # Write part count for run.sh to read + count_path = Path.join(tmpdir, "codeqa-part-count.txt") + File.write!(count_path, to_string(padded_count)) + IO.puts(:stderr, "Part count (#{padded_count}) written to #{count_path}") + + "" end defp parse_detail(nil), do: :default @@ -173,7 +210,12 @@ defmodule CodeQA.CLI.HealthReport do pid end - defp handle_block_impact_event([:codeqa, :block_impact, :codebase_cosines], measurements, _metadata, pid) do + defp handle_block_impact_event( + [:codeqa, :block_impact, :codebase_cosines], + measurements, + _metadata, + pid + ) do Agent.update(pid, &Map.put(&1, :codebase_cosines_us, measurements.duration)) end diff --git a/lib/codeqa/combined_metrics/sample_runner.ex b/lib/codeqa/combined_metrics/sample_runner.ex index 4bd397b..f652c83 100644 --- a/lib/codeqa/combined_metrics/sample_runner.ex +++ b/lib/codeqa/combined_metrics/sample_runner.ex @@ -194,9 +194,7 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do else Scorer.all_yamls() |> Enum.sort_by(fn {path, _} -> path end) - |> Enum.flat_map( - &diagnose_from_yaml(&1, aggregate, language, languages, cosine_opts) - ) + |> Enum.flat_map(&diagnose_from_yaml(&1, aggregate, language, languages, cosine_opts)) end behaviors_stream @@ -376,11 +374,26 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do end) end - defp diagnose_from_behavior_map_entry({category, behaviors}, aggregate, language, languages, cosine_opts) do + defp diagnose_from_behavior_map_entry( + {category, behaviors}, + aggregate, + language, + languages, + cosine_opts + ) do yaml_path = "priv/combined_metrics/#{category}.yml" Enum.flat_map(behaviors, fn {behavior, behavior_data} -> - maybe_diagnose_behavior(yaml_path, behavior, behavior_data, aggregate, category, language, languages, cosine_opts) + maybe_diagnose_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages, + cosine_opts + ) end) end @@ -390,7 +403,16 @@ defmodule CodeQA.CombinedMetrics.SampleRunner do data |> Enum.filter(fn {_k, v} -> is_map(v) end) |> Enum.flat_map(fn {behavior, behavior_data} -> - maybe_diagnose_behavior(yaml_path, behavior, behavior_data, aggregate, category, language, languages, cosine_opts) + maybe_diagnose_behavior( + yaml_path, + behavior, + behavior_data, + aggregate, + category, + language, + languages, + cosine_opts + ) end) end diff --git a/lib/codeqa/health_report/formatter.ex b/lib/codeqa/health_report/formatter.ex index df17d8d..d166f14 100644 --- a/lib/codeqa/health_report/formatter.ex +++ b/lib/codeqa/health_report/formatter.ex @@ -8,4 +8,23 @@ defmodule CodeQA.HealthReport.Formatter do def format_markdown(report, detail, :plain, _opts), do: Plain.render(report, detail) def format_markdown(report, detail, :github, opts), do: Github.render(report, detail, opts) + + @doc """ + Renders the report as multiple parts for GitHub PR comments. + Returns a flat list of strings: [part_1, part_2, part_3, ...]. + + Part 1: Header, summary, PR summary, delta, chart, progress bars + Part 2: Top issues, category detail sections + Part 3+: Blocks section, sliced at 60,000 chars per part + + Each part ends with a sentinel comment for sticky comment identification. + """ + @spec render_parts(map(), keyword()) :: [String.t()] + def render_parts(report, opts \\ []) do + part_1 = Github.render_part_1(report, opts) + part_2 = Github.render_part_2(report, opts) + parts_3 = Github.render_parts_3(report, opts) + + [part_1, part_2 | parts_3] + end end diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index e0d9170..03c0b93 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -4,6 +4,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do @bar_width 20 @filled "█" @empty "░" + @part_char_limit 60_000 @spec render(map(), atom(), keyword()) :: String.t() def render(report, detail, opts \\ []) do @@ -26,6 +27,136 @@ defmodule CodeQA.HealthReport.Formatter.Github do |> Enum.join("\n") end + @doc """ + Renders Part 1: header, summary table, PR summary, delta, mermaid chart, progress bars. + Each part ends with a sentinel HTML comment for sticky comment identification. + """ + @spec render_part_1(map(), keyword()) :: String.t() + def render_part_1(report, opts \\ []) do + chart? = Keyword.get(opts, :chart, true) + display_categories = merge_cosine_categories(report.categories) + + [ + pr_summary_section(Map.get(report, :pr_summary)), + header(report), + cosine_legend(), + delta_section(Map.get(report, :codebase_delta)), + if(chart?, do: mermaid_chart(display_categories), else: []), + progress_bars(display_categories), + sentinel(1) + ] + |> List.flatten() + |> Enum.join("\n") + end + + @doc """ + Renders Part 2: top issues + all category detail sections. + """ + @spec render_part_2(map(), keyword()) :: String.t() + def render_part_2(report, opts \\ []) do + detail = Keyword.get(opts, :detail, :default) + display_categories = merge_cosine_categories(report.categories) + + [ + top_issues_section(Map.get(report, :top_issues, []), detail), + category_sections(display_categories, detail), + sentinel(2) + ] + |> List.flatten() + |> Enum.join("\n") + end + + @doc """ + Renders Part 3+: blocks section sliced into 60,000-char chunks. + Returns a list of strings, one per part. If no blocks, returns a single placeholder. + """ + @spec render_parts_3(map(), keyword()) :: [String.t()] + def render_parts_3(report, _opts \\ []) do + top_blocks = Map.get(report, :top_blocks, []) + + if top_blocks == [] do + ["> _No content for this section._\n\n" <> sentinel_str(3)] + else + blocks_content = blocks_section(top_blocks) |> List.flatten() |> Enum.join("\n") + slice_blocks_content(blocks_content, 3) + end + end + + defp slice_blocks_content(content, start_part) do + slice_blocks_content(content, start_part, []) + end + + defp slice_blocks_content("", part_num, acc) do + # No more content; finalize the last part if any, or emit placeholder + case acc do + [] -> ["> _No content for this section._\n\n" <> sentinel_str(part_num)] + _ -> Enum.reverse(acc) + end + end + + defp slice_blocks_content(content, part_num, acc) do + sentinel = sentinel_str(part_num) + truncation_warning = "\n\n> ⚠️ Truncated at 60,000 chars — continued in next comment\n\n" + + # Reserve space for sentinel and potential truncation warning + available = @part_char_limit - byte_size(sentinel) - byte_size(truncation_warning) - 10 + + if byte_size(content) <= available + byte_size(truncation_warning) do + # Fits in this part + final_part = content <> "\n\n" <> sentinel + Enum.reverse([final_part | acc]) + else + # Need to split + {chunk, rest} = split_at_safe_boundary(content, available) + part_content = chunk <> truncation_warning <> sentinel + slice_blocks_content(rest, part_num + 1, [part_content | acc]) + end + end + + defp split_at_safe_boundary(content, max_bytes) do + # Try to split at a
    boundary to avoid breaking HTML structure + prefix = binary_part(content, 0, min(max_bytes, byte_size(content))) + + case :binary.matches(prefix, "
    ") do + [] -> + # No good boundary, split at newline + split_at_newline(content, max_bytes) + + matches -> + {pos, len} = List.last(matches) + split_pos = pos + len + + if split_pos > div(max_bytes, 2) do + # Good split point + {binary_part(content, 0, split_pos), + binary_part(content, split_pos, byte_size(content) - split_pos)} + else + # Too early, try newline + split_at_newline(content, max_bytes) + end + end + end + + defp split_at_newline(content, max_bytes) do + prefix = binary_part(content, 0, min(max_bytes, byte_size(content))) + + case :binary.matches(prefix, "\n") do + [] -> + # No newline, hard split + {prefix, binary_part(content, byte_size(prefix), byte_size(content) - byte_size(prefix))} + + matches -> + {pos, _len} = List.last(matches) + + {binary_part(content, 0, pos), + binary_part(content, pos + 1, byte_size(content) - pos - 1)} + end + end + + defp sentinel(n), do: [sentinel_str(n)] + + defp sentinel_str(n), do: "" + defp merge_cosine_categories(categories) do {cosine, threshold} = Enum.split_with(categories, &(&1.type == :cosine)) @@ -300,6 +431,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do end defp footer do + # Legacy footer for single-part render/3 (used by --output file mode) ["", ""] end diff --git a/scripts/run.sh b/scripts/run.sh index 9804205..4cc0918 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -47,17 +47,20 @@ esac # --- Build CLI arguments --- ARGS=("$INPUT_COMMAND" "$INPUT_PATH") CAPTURE_STDOUT=false +COMMENT_MODE=false case "$INPUT_COMMAND" in health-report) - ARGS+=("--output" "$OUTPUT_FILE") ARGS+=("--detail" "$INPUT_DETAIL") ARGS+=("--top" "$INPUT_TOP") if [[ -n "$INPUT_CONFIG" ]]; then ARGS+=("--config" "$INPUT_CONFIG") fi if [[ "${INPUT_COMMENT:-false}" == "true" ]]; then - ARGS+=("--format" "github") + ARGS+=("--comment") + COMMENT_MODE=true + else + ARGS+=("--output" "$OUTPUT_FILE") fi ;; compare) @@ -117,6 +120,77 @@ else "$CODEQA" "${ARGS[@]}" fi +# --- Post multi-part PR comments (health-report with comment mode) --- +if [[ "$COMMENT_MODE" == "true" ]]; then + TMPDIR="${TMPDIR:-/tmp}" + PART_COUNT_FILE="${TMPDIR}/codeqa-part-count.txt" + + if [[ ! -f "$PART_COUNT_FILE" ]]; then + echo "::error::Part count file not found at ${PART_COUNT_FILE}" + exit 1 + fi + + PART_COUNT=$(cat "$PART_COUNT_FILE") + echo "Posting ${PART_COUNT} comment parts..." + + # GitHub API settings + API_URL="${GITHUB_API_URL:-https://api.github.com}" + REPO="${GITHUB_REPOSITORY}" + PR_NUMBER="${PR_NUMBER:-}" + + if [[ -z "$PR_NUMBER" ]]; then + echo "::error::PR_NUMBER not set. Cannot post PR comments." + exit 1 + fi + + for i in $(seq 1 "$PART_COUNT"); do + PART_FILE="${TMPDIR}/codeqa-part-${i}.md" + SENTINEL="" + + if [[ ! -f "$PART_FILE" ]]; then + echo "::warning::Part file ${PART_FILE} not found, skipping" + continue + fi + + BODY=$(cat "$PART_FILE") + + # Search for existing comment with this sentinel + echo "Searching for existing comment with sentinel: ${SENTINEL}" + COMMENTS_JSON=$(curl -fsSL \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + "${API_URL}/repos/${REPO}/issues/${PR_NUMBER}/comments?per_page=100" 2>/dev/null || echo "[]") + + # Find comment ID containing the sentinel + COMMENT_ID=$(echo "$COMMENTS_JSON" | jq -r --arg sentinel "$SENTINEL" \ + '.[] | select(.body | contains($sentinel)) | .id' | head -1) + + # Prepare JSON payload + PAYLOAD=$(jq -n --arg body "$BODY" '{"body": $body}') + + if [[ -n "$COMMENT_ID" && "$COMMENT_ID" != "null" ]]; then + echo "Updating existing comment ${COMMENT_ID} for part ${i}..." + curl -fsSL -X PATCH \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + "${API_URL}/repos/${REPO}/issues/comments/${COMMENT_ID}" \ + -d "$PAYLOAD" > /dev/null + else + echo "Creating new comment for part ${i}..." + curl -fsSL -X POST \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + "${API_URL}/repos/${REPO}/issues/${PR_NUMBER}/comments" \ + -d "$PAYLOAD" > /dev/null + fi + done + + echo "All ${PART_COUNT} comment parts posted successfully" + + # Use part 1 as the main output file for grade extraction + OUTPUT_FILE="${TMPDIR}/codeqa-part-1.md" +fi + # --- Extract grade (health-report only) --- GRADE="" if [[ "$INPUT_COMMAND" == "health-report" && -f "$OUTPUT_FILE" ]]; then diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index 39ce63a..244091e 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -498,4 +498,123 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "61.00" end end + + describe "render_parts/2" do + test "returns at least 3 parts" do + parts = Formatter.render_parts(@sample_report) + assert length(parts) >= 3 + end + + test "each part ends with sentinel comment" do + parts = Formatter.render_parts(@sample_report) + + Enum.with_index(parts, 1) + |> Enum.each(fn {part, n} -> + assert part =~ "" + end) + end + + test "part 1 contains header and grade" do + [part_1 | _] = Formatter.render_parts(@sample_report) + assert part_1 =~ "Code Health: B+" + assert part_1 =~ "(79/100)" + end + + test "part 1 contains mermaid chart by default" do + [part_1 | _] = Formatter.render_parts(@sample_report) + assert part_1 =~ "```mermaid" + end + + test "part 1 contains progress bars" do + [part_1 | _] = Formatter.render_parts(@sample_report) + assert part_1 =~ "████" + end + + test "part 2 contains category details" do + [_, part_2 | _] = Formatter.render_parts(@sample_report) + assert part_2 =~ "
    " + assert part_2 =~ "Readability" + end + + test "part 3 is placeholder when no blocks" do + [_, _, part_3 | _] = Formatter.render_parts(@sample_report) + assert part_3 =~ "_No content for this section._" + end + + test "part 3 contains blocks when present" do + report = Map.put(@sample_report, :top_blocks, @top_blocks_gh) + [_, _, part_3 | _] = Formatter.render_parts(report) + assert part_3 =~ "lib/foo.ex" + assert part_3 =~ "Blocks" + end + end + + describe "Github.render_parts_3/2 slicing" do + alias CodeQA.HealthReport.Formatter.Github + + @many_blocks Enum.map(1..100, fn i -> + %{ + path: "lib/file_#{i}.ex", + status: "modified", + blocks: + Enum.map(1..10, fn j -> + %{ + start_line: j * 10, + end_line: j * 10 + 20, + type: "function", + token_count: 150, + potentials: [ + %{ + category: "function_design", + behavior: "single_responsibility", + cosine_delta: 0.35, + severity: :high, + fix_hint: "Consider extracting helper function" + } + ] + } + end) + } + end) + + test "slices large blocks section into multiple parts" do + report = Map.put(@sample_report, :top_blocks, @many_blocks) + parts = Github.render_parts_3(report) + + # With 100 files × 10 blocks, this should produce multiple parts + assert length(parts) > 1 + end + + test "each sliced part ends with sentinel" do + report = Map.put(@sample_report, :top_blocks, @many_blocks) + parts = Github.render_parts_3(report) + + Enum.with_index(parts, 3) + |> Enum.each(fn {part, n} -> + assert part =~ "" + end) + end + + test "non-final parts have truncation warning" do + report = Map.put(@sample_report, :top_blocks, @many_blocks) + parts = Github.render_parts_3(report) + + if length(parts) > 1 do + non_final = Enum.take(parts, length(parts) - 1) + + Enum.each(non_final, fn part -> + assert part =~ "Truncated at 60,000 chars" + end) + end + end + + test "each part is under 65536 chars" do + report = Map.put(@sample_report, :top_blocks, @many_blocks) + parts = Github.render_parts_3(report) + + Enum.each(parts, fn part -> + assert byte_size(part) < 65_536, "Part exceeds GitHub comment limit" + end) + end + end end From d198001c9ae4038430a028db7219e498e2f01d7c Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 14:59:32 +0100 Subject: [PATCH 62/71] fix(action): pass GITHUB_TOKEN to run.sh for PR comment posting The multi-part comment posting uses the GitHub API to create/update comments. GITHUB_TOKEN must be explicitly passed via env in composite actions - it's not automatically available. Also fix Elixir 1.19 Range warning by using explicit step (//1) when iterating over potentially empty ranges. --- action.yml | 1 + lib/codeqa/cli/health_report.ex | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 3d43011..ebee062 100644 --- a/action.yml +++ b/action.yml @@ -94,6 +94,7 @@ runs: INPUT_BUILD: ${{ inputs.build }} GITHUB_ACTION_PATH: ${{ github.action_path }} PR_NUMBER: ${{ github.event.pull_request.number }} + GITHUB_TOKEN: ${{ github.token }} run: ${{ github.action_path }}/scripts/run.sh - name: Check grade threshold diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index b271bb4..fc9bbaa 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -155,7 +155,7 @@ defmodule CodeQA.CLI.HealthReport do actual_count = length(parts) padded_count = max(actual_count, 3) - for n <- (actual_count + 1)..padded_count do + for n <- (actual_count + 1)..padded_count//1 do path = Path.join(tmpdir, "codeqa-part-#{n}.md") placeholder = "> _No content for this section._\n\n" File.write!(path, placeholder) From 570812a7654add5e788cc8a2bfda740784fd79c9 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 15:08:34 +0100 Subject: [PATCH 63/71] chore: remove docs/ and ignore test/ in codeqa analysis - Remove superpowers specs/plans (implementation complete) - Add test/ to ignore_paths in .codeqa.yml --- .codeqa.yml | 1 + ...26-03-21-health-report-blocks-and-delta.md | 1679 ----------------- ...26-03-20-health-report-blocks-and-delta.md | 312 --- ...026-03-22-multi-part-pr-comments-design.md | 128 -- 4 files changed, 1 insertion(+), 2119 deletions(-) delete mode 100644 docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md delete mode 100644 docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md delete mode 100644 docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md diff --git a/.codeqa.yml b/.codeqa.yml index 8fa1a5c..05b52a5 100644 --- a/.codeqa.yml +++ b/.codeqa.yml @@ -9,6 +9,7 @@ ignore_paths: - scripts/** - docs/** - plans/** + - test/** - devenv* - direnv* diff --git a/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md b/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md deleted file mode 100644 index 1d6e9d0..0000000 --- a/docs/superpowers/plans/2026-03-21-health-report-blocks-and-delta.md +++ /dev/null @@ -1,1679 +0,0 @@ -# Health Report: Block Impact, PR Delta, and Compare Consolidation - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Unify health-report and compare commands into a single PR-aware report showing impactful blocks per changed file, before/after metric delta, and a PR impact summary — while deleting the compare command entirely. - -**Architecture:** `HealthReport.generate/2` gains `base_results:` and `changed_files:` opts; a new `HealthReport.TopBlocks` module assembles severity-classified blocks from node data; a new `HealthReport.Delta` module wraps aggregate delta computation ported from `Comparator`; formatters gain PR summary, delta, and block sections and lose worst_offenders rendering; `CLI.HealthReport` gains `--base-ref`/`--head-ref` and runs dual analysis when provided. - -**Tech Stack:** Elixir, ExUnit, `CodeQA.Git`, `CodeQA.CombinedMetrics.{SampleRunner, Scorer}`, `CodeQA.HealthReport.Grader`, `CodeQA.BlockImpactAnalyzer` - ---- - -## File Map - -| File | Change | -|------|--------| -| `lib/codeqa/block_impact_analyzer.ex` | Add `"end_line"` to `serialize_node/9` output | -| `lib/codeqa/health_report/delta.ex` | **Create** — aggregate delta computation (ported from `Comparator`) | -| `lib/codeqa/health_report/top_blocks.ex` | **Create** — block assembly, severity, fix hint enrichment | -| `lib/codeqa/health_report.ex` | Accept new opts, wire `Delta` + `TopBlocks`, drop `worst_offenders` computation | -| `lib/codeqa/health_report/formatter/plain.ex` | Remove worst_offenders rendering; add PR summary, delta table, block section | -| `lib/codeqa/health_report/formatter/github.ex` | Remove worst_offenders rendering; add PR summary, delta table, block section | -| `lib/codeqa/cli/health_report.ex` | Add `--base-ref`/`--head-ref`; dual analysis when base-ref given | -| `lib/codeqa/cli.ex` | Remove compare entry | -| `lib/codeqa/cli/compare.ex` | **Delete** | -| `lib/codeqa/comparator.ex` | **Delete** | -| `lib/codeqa/formatter.ex` | **Delete** | -| `lib/codeqa/summarizer.ex` | **Delete** | -| `test/codeqa/block_impact_analyzer_test.exs` | Add `end_line` assertion | -| `test/codeqa/health_report/delta_test.exs` | **Create** | -| `test/codeqa/health_report/top_blocks_test.exs` | **Create** | -| `test/codeqa/health_report_test.exs` | Add: `top_blocks`, `pr_summary`, `codebase_delta` keys; remove worst_offenders assertions | -| `test/codeqa/health_report/formatter_test.exs` | Delete worst_offenders tests; add block/delta/summary tests | -| `test/codeqa/cli_compare_test.exs` | **Delete** | - ---- - -## Task 1: Add `end_line` to BlockImpactAnalyzer node serialization - -**Files:** -- Modify: `lib/codeqa/block_impact_analyzer.ex:167-175` -- Test: `test/codeqa/block_impact_analyzer_test.exs:42-52` - -- [ ] **Step 1: Add `end_line` assertion to the existing "each node has required fields" test** - -In `test/codeqa/block_impact_analyzer_test.exs`, inside the `Enum.each(nodes, fn node ->` block (line 42), add after line 43: - -```elixir -assert Map.has_key?(node, "end_line") -``` - -- [ ] **Step 2: Run the test to confirm it fails** - -```bash -mix test test/codeqa/block_impact_analyzer_test.exs --trace -``` - -Expected: FAIL — `"end_line"` key missing. - -- [ ] **Step 3: Add `end_line` to the serialized node map** - -In `lib/codeqa/block_impact_analyzer.ex`, edit the map at line 167: - -```elixir -%{ - "start_line" => node.start_line, - "end_line" => node.end_line, - "column_start" => (first_token && first_token.col) || 0, - "char_length" => char_length, - "type" => Atom.to_string(node.type), - "token_count" => length(node.tokens), - "refactoring_potentials" => potentials, - "children" => children -} -``` - -- [ ] **Step 4: Run the test to confirm it passes** - -```bash -mix test test/codeqa/block_impact_analyzer_test.exs --trace -``` - -Expected: all tests PASS. - -- [ ] **Step 5: Commit** - -```bash -git add lib/codeqa/block_impact_analyzer.ex test/codeqa/block_impact_analyzer_test.exs -git commit -m "feat(block-impact): serialize end_line in node output" -``` - ---- - -## Task 2: Create `HealthReport.Delta` - -**Files:** -- Create: `lib/codeqa/health_report/delta.ex` -- Create: `test/codeqa/health_report/delta_test.exs` - -- [ ] **Step 1: Write the test file** - -```elixir -# test/codeqa/health_report/delta_test.exs -defmodule CodeQA.HealthReport.DeltaTest do - use ExUnit.Case, async: true - - alias CodeQA.HealthReport.Delta - - defp make_results(aggregate) do - %{"codebase" => %{"aggregate" => aggregate}} - end - - test "returns base, head, and delta aggregates" do - base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) - head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) - - result = Delta.compute(base, head) - - assert result.base.aggregate == %{"entropy" => %{"mean_value" => 5.0}} - assert result.head.aggregate == %{"entropy" => %{"mean_value" => 6.0}} - assert result.delta.aggregate == %{"entropy" => %{"mean_value" => 1.0}} - end - - test "rounds delta to 4 decimal places" do - base = make_results(%{"entropy" => %{"mean_value" => 1.0}}) - head = make_results(%{"entropy" => %{"mean_value" => 4.3333}}) - - result = Delta.compute(base, head) - assert result.delta.aggregate["entropy"]["mean_value"] == 3.3333 - end - - test "handles missing base codebase gracefully" do - base = %{} - head = make_results(%{"entropy" => %{"mean_value" => 6.0}}) - - result = Delta.compute(base, head) - assert result.delta.aggregate == %{} - end - - test "handles missing head codebase gracefully" do - base = make_results(%{"entropy" => %{"mean_value" => 5.0}}) - head = %{} - - result = Delta.compute(base, head) - assert result.delta.aggregate == %{} - end - - test "skips non-numeric metric keys" do - base = make_results(%{"entropy" => %{"mean_value" => 5.0, "label" => "x"}}) - head = make_results(%{"entropy" => %{"mean_value" => 6.0, "label" => "y"}}) - - result = Delta.compute(base, head) - refute Map.has_key?(result.delta.aggregate["entropy"], "label") - assert result.delta.aggregate["entropy"]["mean_value"] == 1.0 - end -end -``` - -- [ ] **Step 2: Run test to confirm it fails** - -```bash -mix test test/codeqa/health_report/delta_test.exs --trace -``` - -Expected: FAIL — module not found. - -- [ ] **Step 3: Create the module** - -```elixir -# lib/codeqa/health_report/delta.ex -defmodule CodeQA.HealthReport.Delta do - @moduledoc "Computes aggregate metric delta between two codebase analysis results." - - @spec compute(map(), map()) :: %{ - base: %{aggregate: map()}, - head: %{aggregate: map()}, - delta: %{aggregate: map()} - } - def compute(base_results, head_results) do - base_agg = get_in(base_results, ["codebase", "aggregate"]) || %{} - head_agg = get_in(head_results, ["codebase", "aggregate"]) || %{} - - %{ - base: %{aggregate: base_agg}, - head: %{aggregate: head_agg}, - delta: %{aggregate: compute_aggregate_delta(base_agg, head_agg)} - } - end - - defp compute_aggregate_delta(base_agg, head_agg) do - MapSet.new(Map.keys(base_agg) ++ Map.keys(head_agg)) - |> Enum.reduce(%{}, fn metric_name, acc -> - base_m = Map.get(base_agg, metric_name, %{}) - head_m = Map.get(head_agg, metric_name, %{}) - delta = compute_numeric_delta(base_m, head_m) - if delta == %{}, do: acc, else: Map.put(acc, metric_name, delta) - end) - end - - defp compute_numeric_delta(base, head) do - MapSet.new(Map.keys(base) ++ Map.keys(head)) - |> Enum.reduce(%{}, fn key, acc -> - case {Map.get(base, key), Map.get(head, key)} do - {b, h} when is_number(b) and is_number(h) -> - Map.put(acc, key, Float.round(h - b, 4)) - - _ -> - acc - end - end) - end -end -``` - -- [ ] **Step 4: Run tests to confirm they pass** - -```bash -mix test test/codeqa/health_report/delta_test.exs --trace -``` - -Expected: all PASS. - -- [ ] **Step 5: Commit** - -```bash -git add lib/codeqa/health_report/delta.ex test/codeqa/health_report/delta_test.exs -git commit -m "feat(health-report): add Delta module for aggregate metric comparison" -``` - ---- - -## Task 3: Create `HealthReport.TopBlocks` - -**Files:** -- Create: `lib/codeqa/health_report/top_blocks.ex` -- Create: `test/codeqa/health_report/top_blocks_test.exs` - -- [ ] **Step 1: Write the test file** - -```elixir -# test/codeqa/health_report/top_blocks_test.exs -defmodule CodeQA.HealthReport.TopBlocksTest do - use ExUnit.Case, async: true - - alias CodeQA.HealthReport.TopBlocks - alias CodeQA.Git.ChangedFile - - # A node with cosine_delta 0.60 — will be :critical when codebase_cosine = 0.0 (gap=1.0, ratio=0.60) - defp make_node(cosine_delta, token_count \\ 20) do - %{ - "start_line" => 1, - "end_line" => 10, - "type" => "code", - "token_count" => token_count, - "refactoring_potentials" => [ - %{ - "category" => "function_design", - "behavior" => "cyclomatic_complexity_under_10", - "cosine_delta" => cosine_delta - } - ], - "children" => [] - } - end - - defp make_results(nodes) do - %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}} - end - - defp lookup(cosine \\ 0.0) do - %{{"function_design", "cyclomatic_complexity_under_10"} => cosine} - end - - describe "severity classification" do - test ":critical when severity_ratio > 0.50" do - # gap = max(0.01, 1.0 - 0.0) = 1.0, ratio = 0.60 / 1.0 = 0.60 > 0.50 - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :critical - end - - test ":high when severity_ratio > 0.25 and <= 0.50" do - # ratio = 0.30 / 1.0 = 0.30 - [group] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :high - end - - test ":medium when severity_ratio > 0.10 and <= 0.25" do - # ratio = 0.15 / 1.0 = 0.15 - [group] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :medium - end - - test "filtered when severity_ratio <= 0.10" do - # ratio = 0.05 / 1.0 = 0.05 — block should not appear - assert TopBlocks.build(make_results([make_node(0.05)]), [], lookup()) == [] - end - - test "gap floor prevents division by zero when codebase_cosine = 1.0" do - # gap = max(0.01, 1.0 - 1.0) = 0.01, ratio = 0.02 / 0.01 = 2.0 → :critical - [group] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0)) - assert hd(hd(group.blocks).potentials).severity == :critical - end - - test "gap handles negative codebase_cosine" do - # codebase_cosine = -0.5, gap = max(0.01, 1.0 - (-0.5)) = 1.5 - # ratio = 0.60 / 1.5 = 0.40 → :high - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5)) - assert hd(hd(group.blocks).potentials).severity == :high - end - - test "unknown behavior defaults codebase_cosine to 0.0" do - lookup_empty = %{} - # gap = 1.0, ratio = 0.60 → :critical - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty) - assert hd(hd(group.blocks).potentials).severity == :critical - end - end - - describe "changed_files filtering" do - test "when changed_files is empty, shows all files" do - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) - assert group.path == "lib/foo.ex" - assert group.status == nil - end - - test "when changed_files given, only shows matching files" do - changed = [%ChangedFile{path: "lib/other.ex", status: "added"}] - assert TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) == [] - end - - test "status comes from ChangedFile struct" do - changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] - [group] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) - assert group.status == "modified" - end - end - - describe "block filtering" do - test "blocks with token_count < 10 are excluded" do - assert TopBlocks.build(make_results([make_node(0.60, 9)]), [], lookup()) == [] - end - - test "blocks are ordered by highest cosine_delta descending" do - node_low = make_node(0.20) - node_high = put_in(make_node(0.60), ["start_line"], 20) - results = %{"files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}}} - - [group] = TopBlocks.build(results, [], lookup()) - deltas = Enum.map(group.blocks, fn b -> hd(b.potentials).cosine_delta end) - assert deltas == Enum.sort(deltas, :desc) - end - - test "children nodes are included" do - parent = %{ - "start_line" => 1, "end_line" => 20, - "type" => "code", "token_count" => 5, - "refactoring_potentials" => [], - "children" => [make_node(0.60)] - } - [group] = TopBlocks.build(make_results([parent]), [], lookup()) - assert length(group.blocks) == 1 - end - end - - describe "fix hints" do - test "includes fix_hint string for known behavior" do - # function_design/cyclomatic_complexity_under_10 has _fix_hint in YAML - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) - potential = hd(hd(group.blocks).potentials) - assert is_binary(potential.fix_hint) - end - - test "fix_hint is nil for unknown behavior" do - node = %{ - "start_line" => 1, "end_line" => 10, "type" => "code", - "token_count" => 20, - "refactoring_potentials" => [ - %{"category" => "unknown_cat", "behavior" => "unknown_beh", "cosine_delta" => 0.60} - ], - "children" => [] - } - [group] = TopBlocks.build(make_results([node]), [], %{}) - assert hd(hd(group.blocks).potentials).fix_hint == nil - end - end -end -``` - -- [ ] **Step 2: Run tests to confirm they fail** - -```bash -mix test test/codeqa/health_report/top_blocks_test.exs --trace -``` - -Expected: FAIL — module not found. - -- [ ] **Step 3: Create the module** - -```elixir -# lib/codeqa/health_report/top_blocks.ex -defmodule CodeQA.HealthReport.TopBlocks do - @moduledoc "Assembles the top_blocks report section from analysis node data." - - alias CodeQA.CombinedMetrics.Scorer - - @min_tokens 10 - @severity_critical 0.50 - @severity_high 0.25 - @severity_medium 0.10 - @gap_floor 0.01 - - @spec build(map(), [struct()], map()) :: [map()] - def build(analysis_results, changed_files, codebase_cosine_lookup) do - files = Map.get(analysis_results, "files", %{}) - fix_hints = build_fix_hint_lookup() - - file_entries = - if changed_files == [] do - Enum.map(files, fn {path, data} -> {path, nil, data} end) - else - changed_index = Map.new(changed_files, &{&1.path, &1.status}) - - files - |> Enum.filter(fn {path, _} -> Map.has_key?(changed_index, path) end) - |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end) - end - - file_entries - |> Enum.map(fn {path, status, file_data} -> - blocks = - file_data - |> Map.get("nodes", []) - |> Enum.flat_map(&collect_nodes/1) - |> Enum.filter(&(&1["token_count"] >= @min_tokens)) - |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) - |> Enum.reject(&(&1.potentials == [])) - |> Enum.sort_by(&(-max_delta(&1))) - - %{path: path, status: status, blocks: blocks} - end) - |> Enum.reject(&(&1.blocks == [])) - |> Enum.sort_by(& &1.path) - end - - defp collect_nodes(node) do - children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) - [node | children] - end - - defp enrich_block(node, cosine_lookup, fix_hints) do - potentials = - node - |> Map.get("refactoring_potentials", []) - |> Enum.map(&enrich_potential(&1, cosine_lookup, fix_hints)) - |> Enum.reject(&is_nil/1) - |> Enum.sort_by(& &1.cosine_delta, :desc) - - %{ - start_line: node["start_line"], - end_line: node["end_line"], - type: node["type"], - token_count: node["token_count"], - potentials: potentials - } - end - - defp enrich_potential(p, cosine_lookup, fix_hints) do - category = p["category"] - behavior = p["behavior"] - cosine_delta = p["cosine_delta"] - - codebase_cosine = Map.get(cosine_lookup, {category, behavior}, 0.0) - gap = max(@gap_floor, 1.0 - codebase_cosine) - severity = classify(cosine_delta / gap) - - if severity == :filtered do - nil - else - %{ - category: category, - behavior: behavior, - cosine_delta: cosine_delta, - severity: severity, - fix_hint: Map.get(fix_hints, {category, behavior}) - } - end - end - - defp classify(ratio) when ratio > @severity_critical, do: :critical - defp classify(ratio) when ratio > @severity_high, do: :high - defp classify(ratio) when ratio > @severity_medium, do: :medium - defp classify(_ratio), do: :filtered - - defp max_delta(%{potentials: []}), do: 0.0 - defp max_delta(%{potentials: potentials}), do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta - - defp build_fix_hint_lookup do - Scorer.all_yamls() - |> Enum.flat_map(fn {yaml_path, data} -> - category = yaml_path |> Path.basename() |> String.trim_trailing(".yml") - - Enum.flat_map(data, fn {behavior, behavior_data} -> - case get_in(behavior_data, ["_fix_hint"]) do - nil -> [] - hint -> [{{category, behavior}, hint}] - end - end) - end) - |> Map.new() - end -end -``` - -- [ ] **Step 4: Run tests to confirm they pass** - -```bash -mix test test/codeqa/health_report/top_blocks_test.exs --trace -``` - -Expected: all PASS. - -- [ ] **Step 5: Run full suite** - -```bash -mix test -``` - -Expected: all passing. - -- [ ] **Step 6: Commit** - -```bash -git add lib/codeqa/health_report/top_blocks.ex test/codeqa/health_report/top_blocks_test.exs -git commit -m "feat(health-report): add TopBlocks module for severity-classified block assembly" -``` - ---- - -## Task 4: Update `HealthReport.generate/2` - -**Files:** -- Modify: `lib/codeqa/health_report.ex` -- Modify: `test/codeqa/health_report_test.exs` - -- [ ] **Step 1: Add tests for new output keys** - -Open `test/codeqa/health_report_test.exs`. Add a describe block (create the file if it doesn't exist): - -```elixir -describe "generate/2 output keys" do - @tag :slow - test "without base_results: pr_summary and codebase_delta are nil" do - files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) - - report = CodeQA.HealthReport.generate(results) - - assert report.pr_summary == nil - assert report.codebase_delta == nil - assert is_list(report.top_blocks) - assert Map.has_key?(report, :overall_score) - assert Map.has_key?(report, :overall_grade) - assert Map.has_key?(report, :categories) - assert Map.has_key?(report, :top_issues) - end - - @tag :slow - test "without base_results: top_blocks shows all files with significant blocks" do - files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) - - report = CodeQA.HealthReport.generate(results) - - # top_blocks is a list of file groups (may be empty if no blocks above threshold) - assert is_list(report.top_blocks) - Enum.each(report.top_blocks, fn group -> - assert Map.has_key?(group, :path) - assert Map.has_key?(group, :status) - assert Map.has_key?(group, :blocks) - assert group.status == nil - end) - end - - test "worst_offenders is always empty in categories" do - files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - results = CodeQA.Engine.Analyzer.analyze_codebase(files) - results = CodeQA.BlockImpactAnalyzer.analyze(results, files) - - report = CodeQA.HealthReport.generate(results) - - Enum.each(report.categories, fn cat -> - assert Map.get(cat, :worst_offenders, []) == [] - end) - end -end - -describe "generate/2 with base_results" do - @tag :slow - test "pr_summary is populated" do - files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - - changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] - - report = CodeQA.HealthReport.generate(head_results, - base_results: base_results, - changed_files: changed - ) - - assert %{ - base_score: base_score, - head_score: head_score, - score_delta: delta, - base_grade: _, - head_grade: _, - blocks_flagged: flagged, - files_changed: 1, - files_added: 0, - files_modified: 1 - } = report.pr_summary - - assert is_integer(base_score) - assert is_integer(head_score) - assert delta == head_score - base_score - assert is_integer(flagged) - end - - @tag :slow - test "codebase_delta is populated" do - files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - - report = CodeQA.HealthReport.generate(head_results, base_results: base_results) - - assert %{base: %{aggregate: _}, head: %{aggregate: _}, delta: %{aggregate: _}} = - report.codebase_delta - end - - @tag :slow - test "top_blocks scoped to changed_files" do - files = %{ - "lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n", - "lib/bar.ex" => "defmodule Bar do\n def baz, do: :ok\nend\n" - } - head_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - head_results = CodeQA.BlockImpactAnalyzer.analyze(head_results, files) - base_results = CodeQA.Engine.Analyzer.analyze_codebase(files) - - changed = [%CodeQA.Git.ChangedFile{path: "lib/foo.ex", status: "modified"}] - - report = CodeQA.HealthReport.generate(head_results, - base_results: base_results, - changed_files: changed - ) - - paths = Enum.map(report.top_blocks, & &1.path) - refute "lib/bar.ex" in paths - end -end -``` - -- [ ] **Step 2: Run new tests to confirm they fail** - -```bash -mix test test/codeqa/health_report_test.exs --trace -``` - -Expected: FAIL — `pr_summary` key missing, etc. - -- [ ] **Step 3: Update `lib/codeqa/health_report.ex`** - -Replace the entire file: - -```elixir -defmodule CodeQA.HealthReport do - @moduledoc "Orchestrates health report generation from analysis results." - - alias CodeQA.HealthReport.{Config, Grader, Formatter, Delta, TopBlocks} - alias CodeQA.CombinedMetrics.{FileScorer, SampleRunner} - - @spec generate(map(), keyword()) :: map() - def generate(analysis_results, opts \\ []) do - config_path = Keyword.get(opts, :config) - detail = Keyword.get(opts, :detail, :default) - base_results = Keyword.get(opts, :base_results) - changed_files = Keyword.get(opts, :changed_files, []) - - %{ - categories: categories, - grade_scale: grade_scale, - impact_map: impact_map, - combined_top: combined_top - } = - Config.load(config_path) - - aggregate = get_in(analysis_results, ["codebase", "aggregate"]) || %{} - files = Map.get(analysis_results, "files", %{}) - project_langs = project_languages(files) - - threshold_grades = - categories - |> Grader.grade_aggregate(aggregate, grade_scale) - |> Enum.zip(categories) - |> Enum.map(fn {graded, _cat_def} -> - summary = build_category_summary(graded) - - graded - |> Map.put(:type, :threshold) - |> Map.merge(%{summary: summary, worst_offenders: []}) - end) - - worst_files_map = FileScorer.worst_files_per_behavior(files, combined_top: combined_top) - - cosine_grades = - Grader.grade_cosine_categories(aggregate, worst_files_map, grade_scale, project_langs) - - all_categories = - (threshold_grades ++ cosine_grades) - |> Enum.map(fn cat -> - Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) - end) - - {overall_score, overall_grade} = Grader.overall_score(all_categories, grade_scale, impact_map) - - metadata = build_metadata(analysis_results) - - all_cosines = - SampleRunner.diagnose_aggregate(aggregate, top: 99_999, languages: project_langs) - - top_issues = Enum.take(all_cosines, 10) - - codebase_cosine_lookup = - Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) - - top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup) - - {codebase_delta, pr_summary} = - if base_results do - build_delta_and_summary( - base_results, - analysis_results, - overall_score, - overall_grade, - all_categories, - categories, - grade_scale, - impact_map, - combined_top, - changed_files, - top_blocks - ) - else - {nil, nil} - end - - %{ - metadata: metadata, - pr_summary: pr_summary, - overall_score: overall_score, - overall_grade: overall_grade, - codebase_delta: codebase_delta, - categories: all_categories, - top_issues: top_issues, - top_blocks: top_blocks - } - end - - @spec to_markdown(map(), atom(), atom()) :: String.t() - def to_markdown(report, detail \\ :default, format \\ :plain) do - Formatter.format_markdown(report, detail, format) - end - - defp build_delta_and_summary( - base_results, - head_results, - head_score, - head_grade, - head_categories, - category_defs, - grade_scale, - impact_map, - combined_top, - changed_files, - top_blocks - ) do - delta = Delta.compute(base_results, head_results) - - base_aggregate = get_in(base_results, ["codebase", "aggregate"]) || %{} - base_files = Map.get(base_results, "files", %{}) - base_project_langs = project_languages(base_files) - - base_threshold_grades = - category_defs - |> Grader.grade_aggregate(base_aggregate, grade_scale) - |> Enum.zip(category_defs) - |> Enum.map(fn {graded, _cat_def} -> - graded - |> Map.put(:type, :threshold) - |> Map.merge(%{summary: "", worst_offenders: []}) - end) - - base_worst_files_map = - FileScorer.worst_files_per_behavior(base_files, combined_top: combined_top) - - base_cosine_grades = - Grader.grade_cosine_categories( - base_aggregate, - base_worst_files_map, - grade_scale, - base_project_langs - ) - - base_all_categories = - (base_threshold_grades ++ base_cosine_grades) - |> Enum.map(fn cat -> - Map.put(cat, :impact, Map.get(impact_map, to_string(cat.key), 1)) - end) - - {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map) - - blocks_flagged = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) - files_added = Enum.count(changed_files, &(&1.status == "added")) - files_modified = Enum.count(changed_files, &(&1.status == "modified")) - - summary = %{ - base_score: base_score, - head_score: head_score, - score_delta: head_score - base_score, - base_grade: base_grade, - head_grade: head_grade, - blocks_flagged: blocks_flagged, - files_changed: length(changed_files), - files_added: files_added, - files_modified: files_modified - } - - {delta, summary} - end - - defp build_metadata(analysis_results) do - meta = Map.get(analysis_results, "metadata", %{}) - - %{ - path: meta["path"] || "unknown", - timestamp: meta["timestamp"] || DateTime.utc_now() |> DateTime.to_iso8601(), - total_files: meta["total_files"] || map_size(Map.get(analysis_results, "files", %{})) - } - end - - defp project_languages(files_map) do - files_map - |> Map.keys() - |> Enum.map(&CodeQA.Language.detect(&1).name()) - |> Enum.reject(&(&1 == "unknown")) - |> Enum.uniq() - end - - defp build_category_summary(%{type: :cosine}), do: "" - - defp build_category_summary(graded) do - low_scorers = - graded.metric_scores - |> Enum.filter(fn m -> m.score < 60 end) - |> length() - - cond do - graded.score >= 90 -> "Excellent" - graded.score >= 70 and low_scorers == 0 -> "Good" - graded.score >= 70 -> "Good overall, #{low_scorers} metric(s) need attention" - graded.score >= 50 -> "Needs improvement" - true -> "Critical — requires attention" - end - end -end -``` - -- [ ] **Step 4: Run tests** - -```bash -mix test test/codeqa/health_report_test.exs --trace -``` - -Expected: new tests PASS. - -- [ ] **Step 5: Run full suite to check for regressions** - -```bash -mix test -``` - -Fix any test that asserts on `worst_offenders` being non-empty in the report output — those assertions should now expect `[]`. - -- [ ] **Step 6: Commit** - -```bash -git add lib/codeqa/health_report.ex test/codeqa/health_report_test.exs -git commit -m "feat(health-report): add top_blocks, pr_summary, codebase_delta; drop worst_offenders" -``` - ---- - -## Task 5: Update plain formatter - -**Files:** -- Modify: `lib/codeqa/health_report/formatter/plain.ex` -- Modify: `test/codeqa/health_report/formatter_test.exs` - -- [ ] **Step 1: Delete failing worst_offenders tests and add new tests** - -In `test/codeqa/health_report/formatter_test.exs`: - -**Delete** these tests (they assert on worst_offenders rendering that is now gone): -- `"includes worst offenders section"` (lines 186–194) -- `"renders cosine worst offenders per behavior"` (lines 216–226) - -**Update** `"summary detail omits category sections"` (line 196) — change to: -```elixir -test "summary detail omits category sections" do - result = Formatter.format_markdown(@sample_report, :summary, :plain) - refute result =~ "Codebase averages" -end -``` - -**Add** these tests after the existing plain describe blocks: - -```elixir -describe "plain formatter: PR summary section" do - @sample_report_with_pr Map.put(@sample_report, :pr_summary, %{ - base_score: 85, - head_score: 77, - score_delta: -8, - base_grade: "B+", - head_grade: "C+", - blocks_flagged: 6, - files_changed: 3, - files_added: 1, - files_modified: 2 - }) - - test "renders PR summary line when pr_summary present" do - result = Formatter.format_markdown(@sample_report_with_pr, :default, :plain) - assert result =~ "B+" - assert result =~ "C+" - assert result =~ "-8" - assert result =~ "6" - assert result =~ "1 added" - assert result =~ "2 modified" - end - - test "omits PR summary when pr_summary is nil" do - result = Formatter.format_markdown(@sample_report, :default, :plain) - refute result =~ "Score:" - end -end - -describe "plain formatter: delta section" do - @delta %{ - base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}, "halstead" => %{"mean_difficulty" => 12.0}}}, - head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}, "halstead" => %{"mean_difficulty" => 15.0}}} - } - - @sample_report_with_delta Map.put(@sample_report, :codebase_delta, @delta) - - test "renders metric changes table when codebase_delta present" do - result = Formatter.format_markdown(@sample_report_with_delta, :default, :plain) - assert result =~ "Metric Changes" - assert result =~ "Readability" - assert result =~ "65.00" - assert result =~ "61.00" - end - - test "omits delta section when codebase_delta is nil" do - result = Formatter.format_markdown(@sample_report, :default, :plain) - refute result =~ "Metric Changes" - end -end - -describe "plain formatter: block section" do - @block_potential %{ - category: "function_design", - behavior: "cyclomatic_complexity_under_10", - cosine_delta: 0.41, - severity: :critical, - fix_hint: "Reduce branching" - } - - @top_blocks [ - %{ - path: "lib/foo.ex", - status: "modified", - blocks: [ - %{ - start_line: 42, - end_line: 67, - type: "code", - token_count: 84, - potentials: [@block_potential] - } - ] - } - ] - - @sample_report_with_blocks Map.put(@sample_report, :top_blocks, @top_blocks) - - test "renders block section header" do - result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "Blocks" - assert result =~ "1 flagged" - end - - test "renders file group with status" do - result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "lib/foo.ex" - assert result =~ "modified" - end - - test "renders block location and type" do - result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "lines 42" - assert result =~ "67" - assert result =~ "84 tokens" - end - - test "renders severity icon and behavior" do - result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "🔴" - assert result =~ "CRITICAL" - assert result =~ "cyclomatic_complexity_under_10" - assert result =~ "0.41" - end - - test "renders fix hint" do - result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "Reduce branching" - end - - test "omits block section when top_blocks is empty" do - report = Map.put(@sample_report, :top_blocks, []) - result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "## Blocks" - end - - test "omits block section when top_blocks key absent" do - result = Formatter.format_markdown(@sample_report, :default, :plain) - refute result =~ "## Blocks" - end -end -``` - -- [ ] **Step 2: Run formatter tests to confirm failures** - -```bash -mix test test/codeqa/health_report/formatter_test.exs --trace -``` - -Expected: new tests FAIL, deleted tests no longer present. - -- [ ] **Step 3: Update `lib/codeqa/health_report/formatter/plain.ex`** - -Replace the `render/2` function and remove `cosine_worst_offenders/2` + `worst_offenders_section/2`. Add new section functions: - -```elixir -@spec render(map(), atom()) :: String.t() -def render(report, detail) do - [ - pr_summary_section(Map.get(report, :pr_summary)), - header(report), - cosine_legend(), - delta_section(Map.get(report, :codebase_delta)), - overall_table(report), - top_issues_section(Map.get(report, :top_issues, []), detail), - blocks_section(Map.get(report, :top_blocks, [])), - category_sections(report.categories, detail) - ] - |> List.flatten() - |> Enum.join("\n") -end -``` - -Remove `cosine_worst_offenders/2` (lines 91–116) and `worst_offenders_section/2` (lines 196–235) entirely. - -Update `render_category/2` for cosine — remove the `cosine_worst_offenders` call: - -```elixir -defp render_category(%{type: :cosine} = cat, _detail) do - cosine_section_header(cat) ++ cosine_behaviors_table(cat) -end - -defp render_category(cat, _detail) do - section_header(cat) ++ metric_detail(cat) -end -``` - -Add the three new private functions at the bottom of the module: - -```elixir -defp pr_summary_section(nil), do: [] - -defp pr_summary_section(summary) do - delta_str = - if summary.score_delta >= 0, - do: "+#{summary.score_delta}", - else: "#{summary.score_delta}" - - status_str = "#{summary.files_modified} modified, #{summary.files_added} added" - - [ - "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", - "" - ] -end - -defp delta_section(nil), do: [] - -defp delta_section(delta) do - base_agg = delta.base.aggregate - head_agg = delta.head.aggregate - - metrics = [ - {"Readability", "readability", "mean_flesch_adapted"}, - {"Complexity", "halstead", "mean_difficulty"}, - {"Duplication", "compression", "mean_redundancy"}, - {"Structure", "branching", "mean_branch_count"} - ] - - rows = - Enum.flat_map(metrics, fn {label, group, key} -> - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) - - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) - diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] - else - [] - end - end) - - if rows == [] do - [] - else - [ - "## Metric Changes", - "", - "| Category | Base | Head | Δ |", - "|----------|------|------|---|" - | rows - ] ++ [""] - end -end - -defp blocks_section([]), do: [] - -defp blocks_section(top_blocks) do - total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) - - file_parts = - Enum.flat_map(top_blocks, fn group -> - status_str = if group.status, do: " [#{group.status}]", else: "" - - block_lines = - Enum.flat_map(group.blocks, fn block -> - end_line = block.end_line || block.start_line - header = "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" - - potential_lines = - Enum.flat_map(block.potentials, fn p -> - icon = severity_icon(p.severity) - delta_str = format_num(p.cosine_delta) - label = "#{String.upcase(to_string(p.severity))}" - line = " #{icon} #{label} #{p.category} / #{p.behavior} (Δ #{delta_str})" - fix = if p.fix_hint, do: [" → #{p.fix_hint}"], else: [] - [line | fix] - end) - - [header | potential_lines] ++ [""] - end) - - ["### #{group.path}#{status_str}", "" | block_lines] - end) - - [ - "## Blocks (#{total} flagged across #{length(top_blocks)} files)", - "" - | file_parts - ] -end - -defp severity_icon(:critical), do: "🔴" -defp severity_icon(:high), do: "🟠" -defp severity_icon(:medium), do: "🟡" -``` - -- [ ] **Step 4: Run formatter tests** - -```bash -mix test test/codeqa/health_report/formatter_test.exs --trace -``` - -Expected: all PASS. - -- [ ] **Step 5: Run full suite** - -```bash -mix test -``` - -- [ ] **Step 6: Commit** - -```bash -git add lib/codeqa/health_report/formatter/plain.ex test/codeqa/health_report/formatter_test.exs -git commit -m "feat(formatter): add block, delta, PR summary sections; remove worst_offenders (plain)" -``` - ---- - -## Task 6: Update GitHub formatter - -**Files:** -- Modify: `lib/codeqa/health_report/formatter/github.ex` -- Modify: `test/codeqa/health_report/formatter_test.exs` - -- [ ] **Step 1: Add GitHub formatter tests** - -In `test/codeqa/health_report/formatter_test.exs`, add a new describe block: - -```elixir -describe "github formatter: block section" do - @block_potential %{ - category: "function_design", - behavior: "cyclomatic_complexity_under_10", - cosine_delta: 0.41, - severity: :critical, - fix_hint: "Reduce branching" - } - - @top_blocks_gh [ - %{ - path: "lib/foo.ex", - status: "modified", - blocks: [ - %{start_line: 42, end_line: 67, type: "code", token_count: 84, potentials: [@block_potential]} - ] - } - ] - - @report_with_blocks_gh Map.put(@sample_report, :top_blocks, @top_blocks_gh) - - test "renders block section with details wrapper per file" do - result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) - assert result =~ "Blocks" - assert result =~ "
    " - assert result =~ "lib/foo.ex" - assert result =~ "modified" - end - - test "renders severity and fix hint" do - result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) - assert result =~ "🔴" - assert result =~ "cyclomatic_complexity_under_10" - assert result =~ "Reduce branching" - end -end - -describe "github formatter: PR summary and delta" do - @pr_summary_gh %{ - base_score: 85, head_score: 77, score_delta: -8, - base_grade: "B+", head_grade: "C+", - blocks_flagged: 6, files_changed: 3, files_added: 1, files_modified: 2 - } - - @delta_gh %{ - base: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 65.0}}}, - head: %{aggregate: %{"readability" => %{"mean_flesch_adapted" => 61.0}}} - } - - test "renders PR summary" do - report = @sample_report |> Map.put(:pr_summary, @pr_summary_gh) - result = Formatter.format_markdown(report, :default, :github) - assert result =~ "B+" - assert result =~ "C+" - assert result =~ "-8" - end - - test "renders delta section" do - report = @sample_report |> Map.put(:codebase_delta, @delta_gh) - result = Formatter.format_markdown(report, :default, :github) - assert result =~ "Metric Changes" - assert result =~ "65.00" - assert result =~ "61.00" - end -end -``` - -- [ ] **Step 2: Run tests to confirm failures** - -```bash -mix test test/codeqa/health_report/formatter_test.exs --trace 2>&1 | grep -E "FAILED|failure" -``` - -- [ ] **Step 3: Update `lib/codeqa/health_report/formatter/github.ex`** - -Update `render/3` to include new sections and remove worst_offenders: - -```elixir -def render(report, detail, opts \\ []) do - chart? = Keyword.get(opts, :chart, true) - display_categories = merge_cosine_categories(report.categories) - - [ - pr_summary_section(Map.get(report, :pr_summary)), - header(report), - cosine_legend(), - delta_section(Map.get(report, :codebase_delta)), - if(chart?, do: mermaid_chart(display_categories), else: []), - progress_bars(display_categories), - top_issues_section(Map.get(report, :top_issues, []), detail), - blocks_section(Map.get(report, :top_blocks, [])), - category_sections(display_categories, detail), - footer() - ] - |> List.flatten() - |> Enum.join("\n") -end -``` - -Remove `cosine_worst_offenders/2` (lines 254–304) and `worst_offenders/2` (lines 384–435). - -Update `cosine_section_content/2` — remove the call to `cosine_worst_offenders`: - -```elixir -defp cosine_section_content(cat, _detail) do - # ... existing behaviors_table code ... - behaviors_table ++ [""] -end -``` - -Update `section_content/2` — remove the `++ worst_offenders(cat)` at the end (line 381): - -```elixir -defp section_content(cat, _detail) do - # ... existing code without worst_offenders ... - [ - "Codebase averages: #{metric_summary}", - "" - | metrics_table - ] ++ [""] -end -``` - -Add new private functions at the bottom: - -```elixir -defp pr_summary_section(nil), do: [] - -defp pr_summary_section(summary) do - delta_str = - if summary.score_delta >= 0, - do: "+#{summary.score_delta}", - else: "#{summary.score_delta}" - - status_str = "#{summary.files_modified} modified, #{summary.files_added} added" - - [ - "> **Score:** #{summary.base_grade} → #{summary.head_grade} | **Δ** #{delta_str} pts | **#{summary.blocks_flagged}** blocks flagged across #{summary.files_changed} files | #{status_str}", - "" - ] -end - -defp delta_section(nil), do: [] - -defp delta_section(delta) do - base_agg = delta.base.aggregate - head_agg = delta.head.aggregate - - metrics = [ - {"Readability", "readability", "mean_flesch_adapted"}, - {"Complexity", "halstead", "mean_difficulty"}, - {"Duplication", "compression", "mean_redundancy"}, - {"Structure", "branching", "mean_branch_count"} - ] - - rows = - Enum.flat_map(metrics, fn {label, group, key} -> - base_val = get_in(base_agg, [group, key]) - head_val = get_in(head_agg, [group, key]) - - if is_number(base_val) and is_number(head_val) do - diff = Float.round(head_val - base_val, 2) - diff_str = if diff >= 0, do: "+#{format_num(diff)}", else: "#{format_num(diff)}" - ["| #{label} | #{format_num(base_val)} | #{format_num(head_val)} | #{diff_str} |"] - else - [] - end - end) - - if rows == [] do - [] - else - [ - "## Metric Changes", - "", - "| Category | Base | Head | Δ |", - "|----------|------|------|---|" - | rows - ] ++ [""] - end -end - -defp blocks_section([]), do: [] - -defp blocks_section(top_blocks) do - total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) - - file_cards = - Enum.flat_map(top_blocks, fn group -> - status_str = if group.status, do: " [#{group.status}]", else: "" - summary_line = "🔍 #{group.path}#{status_str} — #{length(group.blocks)} block(s)" - - block_lines = - Enum.flat_map(group.blocks, fn block -> - end_line = block.end_line || block.start_line - - potential_lines = - Enum.flat_map(block.potentials, fn p -> - icon = severity_icon(p.severity) - delta_str = format_num(p.cosine_delta) - label = String.upcase(to_string(p.severity)) - line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" - fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] - [line | fix] - end) - - ["**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens"] ++ - potential_lines ++ [""] - end) - - inner = List.flatten(block_lines) |> Enum.join("\n") - - [ - "
    ", - "#{summary_line}", - "", - inner, - "
    ", - "" - ] - end) - - [ - "## 🔍 Blocks (#{total} flagged across #{length(top_blocks)} files)", - "" - | file_cards - ] -end - -defp severity_icon(:critical), do: "🔴" -defp severity_icon(:high), do: "🟠" -defp severity_icon(:medium), do: "🟡" -``` - -- [ ] **Step 4: Run formatter tests** - -```bash -mix test test/codeqa/health_report/formatter_test.exs --trace -``` - -Expected: all PASS. - -- [ ] **Step 5: Run full suite** - -```bash -mix test -``` - -- [ ] **Step 6: Commit** - -```bash -git add lib/codeqa/health_report/formatter/github.ex test/codeqa/health_report/formatter_test.exs -git commit -m "feat(formatter): add block, delta, PR summary sections; remove worst_offenders (github)" -``` - ---- - -## Task 7: Update `CLI.HealthReport` - -**Files:** -- Modify: `lib/codeqa/cli/health_report.ex` - -- [ ] **Step 1: Update `@command_options` and usage string** - -In `lib/codeqa/cli/health_report.ex`, add to `@command_options`: - -```elixir -@command_options [ - output: :string, - config: :string, - detail: :string, - top: :integer, - format: :string, - ignore_paths: :string, - base_ref: :string, - head_ref: :string -] -``` - -Add to the usage string: - -``` - --base-ref REF Base git ref for PR comparison (enables delta and block scoping) - --head-ref REF Head git ref (default: HEAD) -``` - -- [ ] **Step 2: Update `run/1` to wire dual analysis** - -Replace the `run/1` body (keeping the existing single-pass as the fallback when no `--base-ref`). The full updated `run/1`: - -```elixir -def run(args) do - {opts, [path], _} = Options.parse(args, @command_options, o: :output) - Options.validate_dir!(path) - extra_ignore_patterns = Options.parse_ignore_paths(opts[:ignore_paths]) - - base_ref = opts[:base_ref] - head_ref = opts[:head_ref] || "HEAD" - - files = - CodeQA.Engine.Collector.collect_files(path, extra_ignore_patterns) - - if map_size(files) == 0 do - IO.puts(:stderr, "Warning: no source files found in '#{path}'") - exit({:shutdown, 1}) - end - - IO.puts(:stderr, "Analyzing #{map_size(files)} files for health report...") - - analyze_opts = - Options.build_analyze_opts(opts) ++ CodeQA.Config.near_duplicate_blocks_opts() - - start_time = System.monotonic_time(:millisecond) - results = CodeQA.Engine.Analyzer.analyze_codebase(files, analyze_opts) - end_time = System.monotonic_time(:millisecond) - - IO.puts(:stderr, "Analysis completed in #{end_time - start_time}ms") - - nodes_top = opts[:nodes_top] || 3 - results = CodeQA.BlockImpactAnalyzer.analyze(results, files, nodes_top: nodes_top) - - total_bytes = results["files"] |> Map.values() |> Enum.map(& &1["bytes"]) |> Enum.sum() - - results = - Map.put(results, "metadata", %{ - "path" => Path.expand(path), - "timestamp" => DateTime.utc_now() |> DateTime.to_iso8601(), - "total_files" => map_size(files), - "total_bytes" => total_bytes - }) - - {base_results, changed_files} = - if base_ref do - IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...") - base_files = CodeQA.Git.collect_files_at_ref(path, base_ref) - changed = CodeQA.Git.changed_files(path, base_ref, head_ref) - - IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...") - base_res = CodeQA.Engine.Analyzer.analyze_codebase(base_files, analyze_opts) - - {base_res, changed} - else - {nil, []} - end - - detail = parse_detail(opts[:detail]) - format = parse_format(opts[:format]) - top_n = opts[:top] || 5 - - report = - CodeQA.HealthReport.generate(results, - config: opts[:config], - detail: detail, - top: top_n, - base_results: base_results, - changed_files: changed_files - ) - - markdown = CodeQA.HealthReport.to_markdown(report, detail, format) - - case opts[:output] do - nil -> - markdown - - file -> - File.write!(file, markdown) - IO.puts(:stderr, "Health report written to #{file}") - "" - end -end -``` - -- [ ] **Step 3: Run full test suite** - -```bash -mix test -``` - -Expected: all PASS (no tests for git integration at this stage — the git calls require an actual repo with refs, which integration tests would mock or skip). - -- [ ] **Step 4: Commit** - -```bash -git add lib/codeqa/cli/health_report.ex -git commit -m "feat(cli): add --base-ref/--head-ref to health-report for PR delta and block scoping" -``` - ---- - -## Task 8: Delete compare command and related files - -**Files:** -- Delete: `lib/codeqa/cli/compare.ex` -- Delete: `lib/codeqa/comparator.ex` -- Delete: `lib/codeqa/formatter.ex` -- Delete: `lib/codeqa/summarizer.ex` -- Delete: `test/codeqa/cli_compare_test.exs` -- Modify: `lib/codeqa/cli.ex` - -- [ ] **Step 1: Remove compare from the CLI router** - -Read `lib/codeqa/cli.ex` and remove the line that registers `compare` (line 6). It will look like: - -```elixir -"compare" => CodeQA.CLI.Compare, -``` - -Remove that entry entirely. - -- [ ] **Step 2: Delete the four source files** - -```bash -rm lib/codeqa/cli/compare.ex lib/codeqa/comparator.ex lib/codeqa/formatter.ex lib/codeqa/summarizer.ex -``` - -- [ ] **Step 3: Delete compare tests** - -```bash -rm test/codeqa/cli_compare_test.exs -``` - -- [ ] **Step 4: Verify no remaining references** - -```bash -grep -r "CLI\.Compare\|CodeQA\.Comparator\|CodeQA\.Formatter\b\|CodeQA\.Summarizer" lib/ test/ --include="*.ex" --include="*.exs" -``` - -Expected: no output. - -- [ ] **Step 5: Run full test suite** - -```bash -mix test -``` - -Expected: all PASS, no references to deleted modules. - -- [ ] **Step 6: Commit** - -```bash -git add -A -git commit -m "feat(cli): delete compare command — absorbed into health-report" -``` - ---- diff --git a/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md b/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md deleted file mode 100644 index 75f2013..0000000 --- a/docs/superpowers/specs/2026-03-20-health-report-blocks-and-delta.md +++ /dev/null @@ -1,312 +0,0 @@ -# Health Report: Block Impact Section, PR Delta, and Compare Consolidation - -**Date:** 2026-03-20 -**Status:** Approved for implementation - ---- - -## Goal - -Unify the health-report and compare commands into a single PR-aware report that: - -1. Shows impactful code blocks per changed file with severity and fix hints -2. Shows a before/after metric delta with bar graphs when a base ref is provided -3. Shows a PR impact summary at the top of the report -4. Removes file-level worst_offenders (replaced by block-level view) -5. Deletes the compare command entirely - ---- - -## User Stories Addressed - -| User | Need | How addressed | -|------|------|---------------| -| PR author | Find antipatterns by file and line | Block section: file-grouped, line-precise, behavior + fix hint | -| Reviewer | Estimate merge risk quickly | PR impact summary: score drift, blocks flagged, files changed | -| Reviewer | Spot quality regressions | Block severity label + delta bar graphs | -| New repo user | Assess overall code quality | Overall grade + category breakdown (unchanged) | - ---- - -## Architecture - -### What Changes - -| Component | Change | -|-----------|--------| -| `CLI.HealthReport` | Add `--base-ref`, `--head-ref` flags; wire git diff and dual analysis | -| `HealthReport.generate/2` | Accept `changed_files` + `base_results` opts; add `top_blocks` and `codebase_delta` keys; drop `worst_offenders` | -| `HealthReport.Delta` | New module — wraps delta computation (logic ported from `Comparator`) | -| `HealthReport.Formatter` | Remove worst_offenders rendering; add PR summary, delta bar graphs, block section | -| `BlockImpactAnalyzer` | Add `end_line` to serialized node output | -| `CLI.Compare` | **Deleted** | -| `Comparator` | **Deleted** (logic moved to `HealthReport.Delta`) | -| `lib/codeqa/formatter.ex` | **Deleted** (compare-only formatter — confirmed no health-report usage) | -| `Summarizer` | **Deleted** (confirmed compare-only) | - -### What Stays the Same - -- Overall score, grade, categories (threshold + cosine) -- `top_issues` (SampleRunner diagnose_aggregate) -- Metadata section -- All analysis options (workers, cache, timeout, NCD flags) -- Backward compatibility: running without `--base-ref` produces the existing report minus worst_offenders - ---- - -## Data Flow - -### With `--base-ref` - -``` -CLI.HealthReport - ├── Git.collect_files_at_ref(path, base_ref) → base_files_map - ├── Git.collect_files_at_ref(path, head_ref) → head_files_map (head_ref defaults to HEAD) - ├── Git.changed_files(path, base_ref, head_ref) → [%ChangedFile{path, status}] - ├── Analyzer.analyze_codebase(head_files_map) → head_results - ├── BlockImpactAnalyzer.analyze(head_results, head_files_map) → head_results_with_nodes - ├── Analyzer.analyze_codebase(base_files_map) → base_results - └── HealthReport.generate(head_results_with_nodes, - base_results: base_results, - changed_files: [%ChangedFile{path, status}]) # full structs, not just paths -``` - -### Without `--base-ref` - -``` -CLI.HealthReport - ├── Analyzer.analyze_codebase(files_map) → results - ├── BlockImpactAnalyzer.analyze(results, files_map) → results_with_nodes - └── HealthReport.generate(results_with_nodes) - (no delta, blocks shown for all files with significant impact, status: nil) -``` - ---- - -## CLI Options - -Added to `codeqa health-report `: - -| Option | Default | Description | -|--------|---------|-------------| -| `--base-ref REF` | (none) | Base git ref to compare from. Enables delta and PR scoping. | -| `--head-ref REF` | `HEAD` | Head git ref for comparison | - -Removed: `--changes-only` (never used; always analyzes all files). - ---- - -## `HealthReport.generate/2` Output Shape - -```elixir -%{ - metadata: %{path, timestamp, total_files}, - pr_summary: %{ # nil when no base_results - base_score: integer(), - head_score: integer(), - score_delta: integer(), # head - base - base_grade: String.t(), - head_grade: String.t(), - blocks_flagged: integer(), # derived: Enum.sum(Enum.map(top_blocks, &length(&1.blocks))) - files_changed: integer(), - files_added: integer(), - files_modified: integer() - }, - overall_score: integer(), - overall_grade: String.t(), - codebase_delta: map() | nil, # nil when no base_results - categories: [category_map], # worst_offenders removed from each - top_issues: [behavior_map], - top_blocks: [file_block_group] # new -} -``` - -### `pr_summary` Computation Notes - -- `base_score` / `base_grade`: requires running the full grading pipeline on `base_results` (same `Grader.grade_aggregate` + `Grader.overall_score` calls as for head). This is a second pass over base data — not a shortcut. -- `blocks_flagged`: computed after `top_blocks` is assembled (sum of all blocks across all file groups). -- `files_added` / `files_modified`: counted from `changed_files` structs (`:status` field). - -### `top_blocks` Shape - -```elixir -[ - %{ - path: String.t(), - status: "added" | "modified" | nil, # nil when no base_results (no --base-ref) - blocks: [ - %{ - start_line: integer(), - end_line: integer(), - type: String.t(), # "code" | "doc" | "typespec" - token_count: integer(), - potentials: [ - %{ - category: String.t(), - behavior: String.t(), - cosine_delta: float(), - severity: :critical | :high | :medium, - fix_hint: String.t() | nil # nil if not defined for that behavior - } - ] - } - ] - } -] -``` - -### Severity Computation - -Severity is computed during `top_blocks` assembly in `HealthReport.generate/2`, not in `BlockImpactAnalyzer`. The baseline codebase cosine scores are already available via `SampleRunner.diagnose_aggregate(baseline_codebase_agg, top: 99_999, languages: project_langs)` — the same call already made for `top_issues`. Pass these as a lookup map `%{{category, behavior} => codebase_cosine}` into the block assembly step. - -For each `{behavior, cosine_delta}` on a block: - -``` -codebase_cosine = lookup codebase cosine for that {category, behavior} - (default to 0.0 if behavior not found in codebase diagnose) -gap = max(0.01, 1.0 - codebase_cosine) # floor prevents division by zero -severity_ratio = cosine_delta / gap # fraction of existing gap this block causes - -:critical when severity_ratio > 0.50 -:high when severity_ratio > 0.25 -:medium when severity_ratio > 0.10 -(filtered) when severity_ratio <= 0.10 (below significance, not shown) -``` - -**Note on thresholds:** These are initial defaults. The gap-relative formula means a block with `cosine_delta = 0.12` may be `:critical` in a healthy codebase (small gap) and `:medium` in a poor one (large gap). This is intentional — severity reflects impact relative to where the codebase currently stands. Thresholds should be validated against real codebases and are configurable in future iterations. - -### Fix Hint Enrichment - -Fix hints are sourced from the combined_metrics YAMLs (`priv/combined_metrics/.yml`, `_fix_hint` key per behavior). All 12 category YAMLs have `_fix_hint` fields. Enrichment happens during `top_blocks` assembly in `HealthReport.generate/2` using `CombinedMetrics.Scorer.all_yamls()` (compiled at module load time). Pattern mirrors the existing `cosine_fix_hint/2` in formatters. If a behavior has no `_fix_hint`, the field is `nil`. - -### Block Filtering - -A block appears in `top_blocks` when: -- `token_count >= 10` (already guaranteed by BlockImpactAnalyzer, but re-checked for safety) -- At least one potential has `severity != filtered` (i.e. `severity_ratio > 0.10`) -- File path is in `changed_files` paths (when `--base-ref` given) or any file (when not) - -Blocks within a file are ordered by their highest `cosine_delta` descending. - ---- - -## `BlockImpactAnalyzer` Change: Add `end_line` - -The serialized node map in `serialize_node/9` (`block_impact_analyzer.ex:167-175`) currently omits `end_line`. Add it: - -```elixir -%{ - "start_line" => node.start_line, - "end_line" => node.end_line, # ADD THIS - "column_start" => ..., - ... -} -``` - -The `Node` struct already has `end_line` — this is a one-line addition. The existing test in `block_impact_analyzer_test.exs` must also assert `Map.has_key?(node, "end_line")`. - ---- - -## `HealthReport.Delta` Module - -New module wrapping delta computation, ported from `Comparator`: - -```elixir -@spec compute(base_results :: map(), head_results :: map()) :: map() -def compute(base_results, head_results) -``` - -Returns per-metric aggregate delta (head minus base), porting `compute_aggregate_delta/2` and `compute_numeric_delta/2` from `Comparator`. File-level deltas are not included (compare-only, now removed). - ---- - -## Formatter Changes - -### Removed - -- Worst offenders tables in all category sections (both threshold and cosine): - - `plain.ex`: remove calls at lines 60, 64 and functions `cosine_worst_offenders/2` (91-117), `worst_offenders_section/2` (204-245) - - `github.ex`: remove calls at lines 249, 381 and functions `cosine_worst_offenders/2` (254-304), `worst_offenders/2` (384-435) -- All compare-command formatting code (`lib/codeqa/formatter.ex` deleted) - -### Added - -**1. PR Impact Summary** (top of report, only when `pr_summary` present; omitted entirely when nil) - -``` -Score: B+ → C | Δ −8 pts | 6 blocks flagged across 3 files | 4 modified, 1 added -``` - -**2. Delta Bar Graphs** (after PR summary, before categories; only when `codebase_delta` present) - -Bar graphs per major category (complexity, readability, duplication, structure) showing base vs head values. Port `progress_bars/2` and `mermaid_chart/1` logic from `lib/codeqa/formatter.ex`. Plain formatter uses ASCII, GitHub formatter uses mermaid. - -**3. Block Section** (after top_issues) - -``` -## Blocks (6 flagged across 3 files) - -### path/to/file.ex [modified] - -**lines 42–67** · function · 84 tokens - 🔴 CRITICAL function_design / cyclomatic_complexity_under_10 (Δ 0.41) - → Break this function into smaller single-responsibility functions. - 🟠 HIGH structure / deep_nesting (Δ 0.18) - → Flatten nested conditionals using early returns or pattern matching. - -**lines 120–134** · code · 31 tokens - 🟡 MEDIUM naming / identifier_length (Δ 0.12) - → Use descriptive names that convey intent without abbreviation. -``` - -Severity icons: 🔴 CRITICAL, 🟠 HIGH, 🟡 MEDIUM. -GitHub formatter wraps each file in a `
    ` block (consistent with how categories are already wrapped in `github.ex:137-195`). - ---- - -## Deletions - -The following files are deleted as part of this work (all confirmed compare-only, no health-report dependencies): - -- `lib/codeqa/cli/compare.ex` -- `lib/codeqa/comparator.ex` -- `lib/codeqa/formatter.ex` -- `lib/codeqa/summarizer.ex` -- `test/codeqa/cli_compare_test.exs` - ---- - -## Testing - -### New tests required - -- Unit tests for `HealthReport.Delta.compute/2` -- Unit tests for severity computation: all three thresholds, filter boundary, gap floor (gap=0 → floored to 0.01), behavior not found in codebase diagnose (default 0.0) -- Unit tests for `top_blocks` assembly: filtering by token_count, severity, changed_files; ordering by cosine_delta; fix_hint inclusion and nil case -- Unit tests for PR summary computation: score/grade computation from base+head, blocks_flagged derivation, file status counts -- Integration test: `HealthReport.generate/2` with and without `base_results` — verify output keys present/nil correctly -- Formatter tests: block section renders correctly for plain and github formats; pr_summary nil omits summary and delta sections gracefully -- CLI test: `--base-ref` wires through to `Git.collect_files_at_ref`, `Git.changed_files`, and `HealthReport.generate` correctly -- `BlockImpactAnalyzer` test: assert `end_line` present in serialized node - -### Tests to delete - -- `test/codeqa/cli_compare_test.exs` (entire file) -- `test/codeqa/health_report/formatter_test.exs:186-194` — "includes worst offenders section" -- `test/codeqa/health_report/formatter_test.exs:216-226` — "renders cosine worst offenders per behavior" - -### Tests to update - -- `test/codeqa/health_report/formatter_test.exs:196-200` — "summary detail omits category sections" (refute reason changes) -- Any test referencing `worst_offenders` in the generate output shape -- `test/codeqa/block_impact_analyzer_test.exs` — add `end_line` assertion - ---- - -## Out of Scope - -- Per-block raw metric values (blocks carry cosine_delta only, not raw metrics) -- File-level delta details (compare's per-file before/after table is dropped) -- Near-duplicate block pairs in the block section (they exist as metrics but are not surfaced here) -- Relative severity across blocks (no "this block is Nx worse than average block") -- Configurable severity thresholds (hardcoded defaults for now; future iteration) diff --git a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md b/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md deleted file mode 100644 index 8ece93b..0000000 --- a/docs/superpowers/specs/2026-03-22-multi-part-pr-comments-design.md +++ /dev/null @@ -1,128 +0,0 @@ -# Multi-Part PR Comments Design - -**Date:** 2026-03-22 -**Status:** Proposed - -## Context - -The `codeqa health-report` GitHub Action posts a markdown report as a sticky PR comment via `marocchino/sticky-pull-request-comment@v2`. GitHub's PR comment API has a hard 65,536 character limit. On large codebases (300+ files), the generated report exceeds this limit and the posting step fails. - -## Solution - -Split the report into fixed-section parts, each posted as a separate sticky PR comment. No content compression — splitting is purely a rendering concern. - -## Part Assignment - -Parts are fixed, not dynamically determined by content size (except Part 3+ which slices the blocks section). - -| Part | Sticky Header | Content | -|------|--------------|---------| -| 1 | `codeqa-health-report-1` | Header + overall grade + mermaid chart + progress bars + overall category table + PR summary + metric changes (delta) | -| 2 | `codeqa-health-report-2` | Top likely issues + all category detail sections (threshold metrics + cosine behaviors) | -| 3+ | `codeqa-health-report-3`, `codeqa-health-report-4`, … | Blocks section, sliced at 60,000 characters per part | - -Each non-final chunk of Part 3+ ends with: - -``` -> ⚠️ Truncated at 60,000 chars — continued in next comment -``` - -If there are no blocks, Part 3 is written as a single empty part (`""`). - -## Formatter Changes - -**File:** `lib/codeqa/health_report/formatter/github.ex` - -Add three new rendering entry points alongside the existing `render/3`: - -- `render_part_1(report, opts)` → `String.t()` — header, summary table, PR summary, delta, mermaid chart, progress bars -- `render_part_2(report, opts)` → `String.t()` — top issues, all category detail sections -- `render_parts_3(report, opts)` → `[String.t()]` — blocks section sliced into 60,000-char chunks; returns `["> _No content for this section._"]` when no blocks exist (the CLI does not substitute the placeholder — the formatter is responsible) - -**Each rendered part must end with a sentinel HTML comment as its final line:** - -``` - -``` - -where `N` is the 1-based part index. This sentinel is the sole mechanism by which `run.sh` locates an existing comment to update (the `marocchino` action and its header concept are removed entirely). Without the sentinel, every run would create a new comment instead of updating the previous one. - -The existing `render/3` is not changed. It continues to produce the full single-string report for `--output file` usage and does not append a sentinel. - -## CLI Changes - -**File:** `lib/codeqa/cli/health_report.ex` - -Add `render_parts(report, opts)` → `[String.t()]` — returns a flat list `[part_1, part_2, part_3a, part_3b, ...]`. Used internally when the `comment: true` path is active. `comment: true` is an **existing** flag (already parsed from `INPUT_COMMENT` env var in `run.sh` and passed as `--comment` to the CLI); no new flag is introduced. - -When writing output for comment mode, the CLI writes each part to a numbered temp file: - -- `$TMPDIR/codeqa-part-1.md` -- `$TMPDIR/codeqa-part-2.md` -- `$TMPDIR/codeqa-part-3.md` -- … etc. - -It also writes `$TMPDIR/codeqa-part-count.txt` containing the **padded** part count (i.e., `max(actual_parts, 3)`). `run.sh` reads this file to determine how many iterations to perform — it does not infer part count from files on disk. The padding ensures stale cleanup files are always written for at least parts 1–3. - -The existing `--output` flag behaviour (write single file) is unchanged. - -## Stale Comment Handling - -If a previous run produced 4 parts and the current run produces 2, the old parts 3 and 4 remain stale. To handle this, always write a minimum of 3 part files. Parts beyond the actual content get a single-line placeholder: - -``` -> _No content for this section._ -``` - -The sticky comment action overwrites the stale comment with the placeholder rather than leaving old content. The minimum of 3 is sufficient for the current fixed-section design. Real content is written for any blocks overflow to part 4+. - -**Known limitation:** if run N produces more than 3 parts (e.g., 5) and run N+1 produces fewer (e.g., 3), parts 4 and 5 from run N remain stale permanently — the minimum-3 floor does not cover them. This is accepted as an edge case; the stale comments are cosmetic (they hold the placeholder text), and a future cleanup step can address it if needed. - -## Action / run.sh Changes - -**File:** `scripts/run.sh` - -After generating part files, loop over them and post each as a sticky PR comment. Use the GitHub REST API directly (`curl -s -X POST/PATCH`) with the following sticky update-or-create logic: - -1. Search existing PR comments for one whose body contains the sentinel `` (appended to each part by the formatter) -2. If found: `PATCH /repos/{owner}/{repo}/issues/comments/{id}` with the new body -3. If not found: `POST /repos/{owner}/{repo}/issues/{pr_number}/comments` with the new body - -Each part's markdown ends with the sentinel HTML comment so future runs can locate and update it: - -``` - -``` - -This replicates the sticky semantics of `marocchino/sticky-pull-request-comment@v2` without depending on that action for a variable number of posts. `run.sh` uses `GITHUB_TOKEN` (already available in the action environment) and `GITHUB_API_URL`, `GITHUB_REPOSITORY`, and `PR_NUMBER` (sourced from the workflow env). - -**File:** `.github/workflows/health-report.yml` - -Remove the `marocchino/sticky-pull-request-comment@v2` step. `run.sh` now owns posting entirely. The workflow passes `PR_NUMBER: ${{ github.event.pull_request.number }}` as an env var to the run step. - -## Key Constraints - -- `render/3` must not change behaviour — used by `--output` flag -- Part 1 must always be self-contained — a reader seeing only Part 1 gets the full codebase health picture -- Parts 2 and 3 are drill-down detail; safe to be empty if the codebase has no behaviors or blocks -- 60,000 char slice limit (not 65,536) leaves headroom for sticky comment metadata - -## Breaking Change - -The sticky comment header for Part 1 changes from `codeqa-health-report` to `codeqa-health-report-1`. Old single-part comments will not be cleaned up automatically on the first run after upgrade. - -## Files Affected - -| File | Change | -|------|--------| -| `lib/codeqa/health_report/formatter/github.ex` | Add `render_part_1/2`, `render_part_2/2`, `render_parts_3/2` | -| `lib/codeqa/cli/health_report.ex` | Add `render_parts/2`, multi-file output in comment mode | -| `scripts/run.sh` | Loop to post multiple part files | -| `.github/workflows/health-report.yml` | Simplified posting step | -| `test/codeqa/health_report/formatter_test.exs` | Tests for new part renderers | - -## What Does Not Change - -- The `--detail`, `--top`, `--format`, `--output` CLI flags -- The plain formatter (`Formatter.Plain`) -- Report data assembly (`health_report.ex`, `grader.ex`, `top_blocks.ex`) — splitting is purely a rendering concern From 4b43621bcf2f260770ea6d2a3cbaa0dd4e891df4 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 15:13:21 +0100 Subject: [PATCH 64/71] chore: ignore README.md and action.yml in codeqa analysis --- .codeqa.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.codeqa.yml b/.codeqa.yml index 05b52a5..26f39be 100644 --- a/.codeqa.yml +++ b/.codeqa.yml @@ -12,6 +12,8 @@ ignore_paths: - test/** - devenv* - direnv* + - README.md + - action.yml # Impact weights for overall score calculation. # Combined metric categories default to 1 (can be overridden here). From fe643c3019d251d6462958e55d8996c248d34f55 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 15:38:57 +0100 Subject: [PATCH 65/71] feat(blocks): show top 10 blocks by impact with source code Replaces the overwhelming "all blocks grouped by file" output with a focused, actionable view: - Flatten all blocks across the codebase into a single list - Rank by highest cosine_delta (strongest anti-pattern signal) - Take only the top 10 most impactful blocks - Include actual source code in collapsed
    blocks - Show line numbers for easy navigation This reduces the blocks section from 20+ PR comment parts (hundreds of KB) to a single compact section that developers can actually act on. Changes: - TopBlocks.build/3 now returns flat list of blocks with source code - Github formatter shows collapsed code blocks with syntax highlighting - Plain formatter updated to match new structure - Removed multi-part slicing (no longer needed with top 10 limit) --- lib/codeqa/health_report.ex | 2 +- lib/codeqa/health_report/formatter/github.ex | 173 ++++++--------- lib/codeqa/health_report/formatter/plain.ex | 35 ++- lib/codeqa/health_report/top_blocks.ex | 50 +++-- test/codeqa/health_report/formatter_test.exs | 208 ++++++++++-------- test/codeqa/health_report/top_blocks_test.exs | 105 ++++++--- test/codeqa/health_report_test.exs | 15 +- 7 files changed, 330 insertions(+), 258 deletions(-) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index e70525b..f97d2f8 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -152,7 +152,7 @@ defmodule CodeQA.HealthReport do {base_score, base_grade} = Grader.overall_score(base_all_categories, grade_scale, impact_map) - blocks_flagged = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) + blocks_flagged = length(top_blocks) files_added = Enum.count(changed_files, &(&1.status == "added")) files_modified = Enum.count(changed_files, &(&1.status == "modified")) diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index 03c0b93..b3cf6df 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -4,7 +4,6 @@ defmodule CodeQA.HealthReport.Formatter.Github do @bar_width 20 @filled "█" @empty "░" - @part_char_limit 60_000 @spec render(map(), atom(), keyword()) :: String.t() def render(report, detail, opts \\ []) do @@ -67,89 +66,18 @@ defmodule CodeQA.HealthReport.Formatter.Github do end @doc """ - Renders Part 3+: blocks section sliced into 60,000-char chunks. - Returns a list of strings, one per part. If no blocks, returns a single placeholder. + Renders Part 3: blocks section (top 10 blocks with code). + Returns a list with a single part since blocks are now limited to top 10. """ @spec render_parts_3(map(), keyword()) :: [String.t()] def render_parts_3(report, _opts \\ []) do top_blocks = Map.get(report, :top_blocks, []) if top_blocks == [] do - ["> _No content for this section._\n\n" <> sentinel_str(3)] + ["> _No near-duplicate blocks detected._\n\n" <> sentinel_str(3)] else blocks_content = blocks_section(top_blocks) |> List.flatten() |> Enum.join("\n") - slice_blocks_content(blocks_content, 3) - end - end - - defp slice_blocks_content(content, start_part) do - slice_blocks_content(content, start_part, []) - end - - defp slice_blocks_content("", part_num, acc) do - # No more content; finalize the last part if any, or emit placeholder - case acc do - [] -> ["> _No content for this section._\n\n" <> sentinel_str(part_num)] - _ -> Enum.reverse(acc) - end - end - - defp slice_blocks_content(content, part_num, acc) do - sentinel = sentinel_str(part_num) - truncation_warning = "\n\n> ⚠️ Truncated at 60,000 chars — continued in next comment\n\n" - - # Reserve space for sentinel and potential truncation warning - available = @part_char_limit - byte_size(sentinel) - byte_size(truncation_warning) - 10 - - if byte_size(content) <= available + byte_size(truncation_warning) do - # Fits in this part - final_part = content <> "\n\n" <> sentinel - Enum.reverse([final_part | acc]) - else - # Need to split - {chunk, rest} = split_at_safe_boundary(content, available) - part_content = chunk <> truncation_warning <> sentinel - slice_blocks_content(rest, part_num + 1, [part_content | acc]) - end - end - - defp split_at_safe_boundary(content, max_bytes) do - # Try to split at a
    boundary to avoid breaking HTML structure - prefix = binary_part(content, 0, min(max_bytes, byte_size(content))) - - case :binary.matches(prefix, "
    ") do - [] -> - # No good boundary, split at newline - split_at_newline(content, max_bytes) - - matches -> - {pos, len} = List.last(matches) - split_pos = pos + len - - if split_pos > div(max_bytes, 2) do - # Good split point - {binary_part(content, 0, split_pos), - binary_part(content, split_pos, byte_size(content) - split_pos)} - else - # Too early, try newline - split_at_newline(content, max_bytes) - end - end - end - - defp split_at_newline(content, max_bytes) do - prefix = binary_part(content, 0, min(max_bytes, byte_size(content))) - - case :binary.matches(prefix, "\n") do - [] -> - # No newline, hard split - {prefix, binary_part(content, byte_size(prefix), byte_size(content) - byte_size(prefix))} - - matches -> - {pos, _len} = List.last(matches) - - {binary_part(content, 0, pos), - binary_part(content, pos + 1, byte_size(content) - pos - 1)} + [blocks_content <> "\n\n" <> sentinel_str(3)] end end @@ -518,52 +446,79 @@ defmodule CodeQA.HealthReport.Formatter.Github do defp blocks_section([]), do: [] defp blocks_section(top_blocks) do - total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) - - file_cards = - Enum.flat_map(top_blocks, fn group -> - status_str = if group.status, do: " [#{group.status}]", else: "" - summary_line = "🔍 #{group.path}#{status_str} — #{length(group.blocks)} block(s)" - - inner = - group.blocks |> Enum.flat_map(&format_block/1) |> List.flatten() |> Enum.join("\n") - - [ - "
    ", - "#{summary_line}", - "", - inner, - "
    ", - "" - ] - end) + block_cards = Enum.flat_map(top_blocks, &format_block_card/1) [ - "## 🔍 Blocks (#{total} flagged across #{length(top_blocks)} files)", + "## 🔍 Top #{length(top_blocks)} Code Blocks by Impact", + "", + "> Ranked by cosine delta — highest anti-pattern signal first.", "" - | file_cards + | block_cards ] end - defp format_block(block) do + defp format_block_card(block) do end_line = block.end_line || block.start_line + top_potential = List.first(block.potentials) + icon = severity_icon(top_potential.severity) + delta_str = format_num(top_potential.cosine_delta) + status_str = if block.status, do: " [#{block.status}]", else: "" - header = - "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + summary_line = + "#{icon} #{block.path}:#{block.start_line}-#{end_line}#{status_str} — #{block.type} (#{block.token_count} tokens) — Δ#{delta_str}" - potential_lines = Enum.flat_map(block.potentials, &format_potential/1) - [header] ++ potential_lines ++ [""] + issues = format_block_issues(block.potentials) + code_block = format_code_block(block) + + [ + "
    ", + "#{summary_line}", + "", + "**Issues:**", + "" + | issues + ] ++ ["", code_block, "", "
    ", ""] + end + + defp format_block_issues(potentials) do + Enum.flat_map(potentials, fn p -> + icon = severity_icon(p.severity) + label = String.upcase(to_string(p.severity)) + delta_str = format_num(p.cosine_delta) + line = "- #{icon} **#{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" + fix = if p.fix_hint, do: [" > #{p.fix_hint}"], else: [] + [line | fix] + end) end - defp format_potential(p) do - icon = severity_icon(p.severity) - delta_str = format_num(p.cosine_delta) - label = String.upcase(to_string(p.severity)) - line = "**#{icon} #{label}** `#{p.category}/#{p.behavior}` (Δ #{delta_str})" - fix = if p.fix_hint, do: ["> #{p.fix_hint}"], else: [] - [line | fix] + defp format_code_block(%{source: nil}), do: "_Source code not available_" + + defp format_code_block(%{source: source, language: lang, start_line: start_line}) do + lang_hint = code_fence_lang(lang) + # Add line number comments for context + lines = String.split(source, "\n") + + numbered_lines = + lines + |> Enum.with_index(start_line) + |> Enum.map(fn {line, num} -> "#{String.pad_leading(to_string(num), 4)} │ #{line}" end) + |> Enum.join("\n") + + "```#{lang_hint}\n#{numbered_lines}\n```" end + defp code_fence_lang("elixir"), do: "elixir" + defp code_fence_lang("ruby"), do: "ruby" + defp code_fence_lang("javascript"), do: "javascript" + defp code_fence_lang("typescript"), do: "typescript" + defp code_fence_lang("python"), do: "python" + defp code_fence_lang("swift"), do: "swift" + defp code_fence_lang("kotlin"), do: "kotlin" + defp code_fence_lang("java"), do: "java" + defp code_fence_lang("go"), do: "go" + defp code_fence_lang("rust"), do: "rust" + defp code_fence_lang(_), do: "" + defp severity_icon(:critical), do: "🔴" defp severity_icon(:high), do: "🟠" defp severity_icon(:medium), do: "🟡" diff --git a/lib/codeqa/health_report/formatter/plain.ex b/lib/codeqa/health_report/formatter/plain.ex index 0b99b5c..3576ed6 100644 --- a/lib/codeqa/health_report/formatter/plain.ex +++ b/lib/codeqa/health_report/formatter/plain.ex @@ -209,30 +209,41 @@ defmodule CodeQA.HealthReport.Formatter.Plain do defp blocks_section([]), do: [] defp blocks_section(top_blocks) do - total = Enum.sum(Enum.map(top_blocks, fn g -> length(g.blocks) end)) - - file_parts = - Enum.flat_map(top_blocks, fn group -> - status_str = if group.status, do: " [#{group.status}]", else: "" - block_lines = Enum.flat_map(group.blocks, &format_block/1) - ["### #{group.path}#{status_str}", "" | block_lines] - end) + block_parts = Enum.flat_map(top_blocks, &format_block/1) [ - "## Blocks (#{total} flagged across #{length(top_blocks)} files)", + "## Top #{length(top_blocks)} Code Blocks by Impact", "" - | file_parts + | block_parts ] end defp format_block(block) do end_line = block.end_line || block.start_line + status_str = if block.status, do: " [#{block.status}]", else: "" header = - "**lines #{block.start_line}–#{end_line}** · #{block.type} · #{block.token_count} tokens" + "### #{block.path}:#{block.start_line}-#{end_line}#{status_str}" + + subheader = + "#{block.type} · #{block.token_count} tokens" potential_lines = Enum.flat_map(block.potentials, &format_potential/1) - [header | potential_lines] ++ [""] + code_lines = format_code_block(block) + [header, subheader, "" | potential_lines] ++ ["" | code_lines] ++ [""] + end + + defp format_code_block(%{source: nil}), do: ["_Source code not available_"] + + defp format_code_block(%{source: source, start_line: start_line}) do + lines = String.split(source, "\n") + + numbered_lines = + lines + |> Enum.with_index(start_line) + |> Enum.map(fn {line, num} -> " #{String.pad_leading(to_string(num), 4)} │ #{line}" end) + + ["```" | numbered_lines] ++ ["```"] end defp format_potential(p) do diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 42a0bfd..57fd933 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -8,6 +8,7 @@ defmodule CodeQA.HealthReport.TopBlocks do @severity_high 0.25 @severity_medium 0.10 @gap_floor 0.01 + @top_n 10 defp build_fix_hint_lookup do Scorer.all_yamls() @@ -30,6 +31,7 @@ defmodule CodeQA.HealthReport.TopBlocks do @spec build(map(), [struct()], map()) :: [map()] def build(analysis_results, changed_files, codebase_cosine_lookup) do files = Map.get(analysis_results, "files", %{}) + base_path = get_in(analysis_results, ["metadata", "path"]) || "." fix_hints = build_fix_hint_lookup() file_entries = @@ -43,21 +45,22 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end) end + # Flatten all blocks across all files, enrich with path and source code file_entries - |> Enum.map(fn {path, status, file_data} -> - blocks = - file_data - |> Map.get("nodes", []) - |> Enum.flat_map(&collect_nodes/1) - |> Enum.filter(&(&1["token_count"] >= @min_tokens)) - |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) - |> Enum.reject(&(&1.potentials == [])) - |> Enum.sort_by(&(-max_delta(&1))) - - %{path: path, status: status, blocks: blocks} + |> Enum.flat_map(fn {path, status, file_data} -> + file_data + |> Map.get("nodes", []) + |> Enum.flat_map(&collect_nodes/1) + |> Enum.filter(&(&1["token_count"] >= @min_tokens)) + |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) + |> Enum.reject(&(&1.potentials == [])) + |> Enum.map(&Map.merge(&1, %{path: path, status: status})) end) - |> Enum.reject(&(&1.blocks == [])) - |> Enum.sort_by(& &1.path) + # Rank by highest cosine_delta and take top N + |> Enum.sort_by(&(-max_delta(&1))) + |> Enum.take(@top_n) + # Add source code for each block + |> Enum.map(&add_source_code(&1, base_path)) end defp collect_nodes(node) do @@ -113,4 +116,25 @@ defmodule CodeQA.HealthReport.TopBlocks do defp max_delta(%{potentials: potentials}), do: Enum.max_by(potentials, & &1.cosine_delta).cosine_delta + + defp add_source_code(block, base_path) do + full_path = Path.join(base_path, block.path) + start_line = block.start_line + end_line = block.end_line || start_line + + source = + case File.read(full_path) do + {:ok, content} -> + content + |> String.split("\n") + |> Enum.slice((start_line - 1)..(end_line - 1)//1) + |> Enum.join("\n") + + {:error, _} -> + nil + end + + lang = CodeQA.Language.detect(block.path).name() + Map.merge(block, %{source: source, language: lang}) + end end diff --git a/test/codeqa/health_report/formatter_test.exs b/test/codeqa/health_report/formatter_test.exs index 244091e..1b87051 100644 --- a/test/codeqa/health_report/formatter_test.exs +++ b/test/codeqa/health_report/formatter_test.exs @@ -282,15 +282,13 @@ defmodule CodeQA.HealthReport.FormatterTest do %{ path: "lib/foo.ex", status: "modified", - blocks: [ - %{ - start_line: 42, - end_line: 67, - type: "code", - token_count: 84, - potentials: [@block_potential] - } - ] + start_line: 42, + end_line: 67, + type: "code", + token_count: 84, + source: "def foo do\n :bar\nend", + language: "elixir", + potentials: [@block_potential] } ] @@ -298,11 +296,10 @@ defmodule CodeQA.HealthReport.FormatterTest do test "renders block section header" do result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "Blocks" - assert result =~ "1 flagged" + assert result =~ "Top 1 Code Blocks by Impact" end - test "renders file group with status" do + test "renders file path with status" do result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) assert result =~ "lib/foo.ex" assert result =~ "modified" @@ -310,8 +307,7 @@ defmodule CodeQA.HealthReport.FormatterTest do test "renders block location and type" do result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) - assert result =~ "lines 42" - assert result =~ "67" + assert result =~ "42-67" assert result =~ "84 tokens" end @@ -328,15 +324,21 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "Reduce branching" end + test "renders source code" do + result = Formatter.format_markdown(@sample_report_with_blocks, :default, :plain) + assert result =~ "def foo do" + assert result =~ ":bar" + end + test "omits block section when top_blocks is empty" do report = Map.put(@sample_report, :top_blocks, []) result = Formatter.format_markdown(report, :default, :plain) - refute result =~ "## Blocks" + refute result =~ "Code Blocks" end test "omits block section when top_blocks key absent" do result = Formatter.format_markdown(@sample_report, :default, :plain) - refute result =~ "## Blocks" + refute result =~ "Code Blocks" end end @@ -434,23 +436,21 @@ defmodule CodeQA.HealthReport.FormatterTest do %{ path: "lib/foo.ex", status: "modified", - blocks: [ - %{ - start_line: 42, - end_line: 67, - type: "code", - token_count: 84, - potentials: [@block_potential] - } - ] + start_line: 42, + end_line: 67, + type: "code", + token_count: 84, + source: "def foo do\n :bar\nend", + language: "elixir", + potentials: [@block_potential] } ] @report_with_blocks_gh Map.put(@sample_report, :top_blocks, @top_blocks_gh) - test "renders block section with details wrapper per file" do + test "renders block section with details wrapper per block" do result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) - assert result =~ "Blocks" + assert result =~ "Top 1 Code Blocks by Impact" assert result =~ "
    " assert result =~ "lib/foo.ex" assert result =~ "modified" @@ -462,6 +462,12 @@ defmodule CodeQA.HealthReport.FormatterTest do assert result =~ "cyclomatic_complexity_under_10" assert result =~ "Reduce branching" end + + test "renders source code in collapsed block" do + result = Formatter.format_markdown(@report_with_blocks_gh, :default, :github) + assert result =~ "```elixir" + assert result =~ "def foo do" + end end describe "github formatter: PR summary and delta" do @@ -538,83 +544,107 @@ defmodule CodeQA.HealthReport.FormatterTest do test "part 3 is placeholder when no blocks" do [_, _, part_3 | _] = Formatter.render_parts(@sample_report) - assert part_3 =~ "_No content for this section._" + assert part_3 =~ "_No near-duplicate blocks detected._" end test "part 3 contains blocks when present" do report = Map.put(@sample_report, :top_blocks, @top_blocks_gh) [_, _, part_3 | _] = Formatter.render_parts(report) assert part_3 =~ "lib/foo.ex" - assert part_3 =~ "Blocks" + assert part_3 =~ "Code Blocks" end end - describe "Github.render_parts_3/2 slicing" do + describe "Github.render_parts_3/2" do alias CodeQA.HealthReport.Formatter.Github - @many_blocks Enum.map(1..100, fn i -> - %{ - path: "lib/file_#{i}.ex", - status: "modified", - blocks: - Enum.map(1..10, fn j -> - %{ - start_line: j * 10, - end_line: j * 10 + 20, - type: "function", - token_count: 150, - potentials: [ - %{ - category: "function_design", - behavior: "single_responsibility", - cosine_delta: 0.35, - severity: :high, - fix_hint: "Consider extracting helper function" - } - ] - } - end) - } - end) - - test "slices large blocks section into multiple parts" do - report = Map.put(@sample_report, :top_blocks, @many_blocks) - parts = Github.render_parts_3(report) - - # With 100 files × 10 blocks, this should produce multiple parts - assert length(parts) > 1 - end - - test "each sliced part ends with sentinel" do - report = Map.put(@sample_report, :top_blocks, @many_blocks) - parts = Github.render_parts_3(report) - - Enum.with_index(parts, 3) - |> Enum.each(fn {part, n} -> - assert part =~ "" - end) - end - - test "non-final parts have truncation warning" do - report = Map.put(@sample_report, :top_blocks, @many_blocks) - parts = Github.render_parts_3(report) - - if length(parts) > 1 do - non_final = Enum.take(parts, length(parts) - 1) - - Enum.each(non_final, fn part -> - assert part =~ "Truncated at 60,000 chars" + test "returns single part with blocks (top 10 limit means no slicing needed)" do + blocks = + Enum.map(1..10, fn i -> + %{ + path: "lib/file_#{i}.ex", + status: "modified", + start_line: 10, + end_line: 30, + type: "function", + token_count: 150, + source: "def foo, do: :bar", + language: "elixir", + potentials: [ + %{ + category: "function_design", + behavior: "single_responsibility", + cosine_delta: 0.35, + severity: :high, + fix_hint: "Consider extracting helper function" + } + ] + } end) - end - end - test "each part is under 65536 chars" do - report = Map.put(@sample_report, :top_blocks, @many_blocks) + report = Map.put(@sample_report, :top_blocks, blocks) parts = Github.render_parts_3(report) - Enum.each(parts, fn part -> - assert byte_size(part) < 65_536, "Part exceeds GitHub comment limit" - end) + # With top 10 blocks, should be a single part + assert length(parts) == 1 + end + + test "part ends with sentinel" do + blocks = [ + %{ + path: "lib/foo.ex", + status: nil, + start_line: 1, + end_line: 10, + type: "code", + token_count: 50, + source: "def foo, do: :bar", + language: "elixir", + potentials: [ + %{ + category: "function_design", + behavior: "single_responsibility", + cosine_delta: 0.35, + severity: :high, + fix_hint: nil + } + ] + } + ] + + report = Map.put(@sample_report, :top_blocks, blocks) + [part] = Github.render_parts_3(report) + assert part =~ "" + end + + test "renders source code in fenced block" do + blocks = [ + %{ + path: "lib/foo.ex", + status: nil, + start_line: 1, + end_line: 10, + type: "code", + token_count: 50, + source: "def hello do\n :world\nend", + language: "elixir", + potentials: [ + %{ + category: "function_design", + behavior: "single_responsibility", + cosine_delta: 0.35, + severity: :high, + fix_hint: nil + } + ] + } + ] + + report = Map.put(@sample_report, :top_blocks, blocks) + [part] = Github.render_parts_3(report) + assert part =~ "```elixir" + assert part =~ "def hello do" + assert part =~ ":world" end end end diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs index feb1194..91eb7af 100644 --- a/test/codeqa/health_report/top_blocks_test.exs +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -23,7 +23,7 @@ defmodule CodeQA.HealthReport.TopBlocksTest do end defp make_results(nodes) do - %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}} + %{"files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, "metadata" => %{"path" => "/tmp"}} end defp lookup(cosine \\ 0.0) do @@ -33,20 +33,20 @@ defmodule CodeQA.HealthReport.TopBlocksTest do describe "severity classification" do test ":critical when severity_ratio > 0.50" do # gap = max(0.01, 1.0 - 0.0) = 1.0, ratio = 0.60 / 1.0 = 0.60 > 0.50 - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :critical + [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert hd(block.potentials).severity == :critical end test ":high when severity_ratio > 0.25 and <= 0.50" do # ratio = 0.30 / 1.0 = 0.30 - [group] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :high + [block] = TopBlocks.build(make_results([make_node(0.30)]), [], lookup()) + assert hd(block.potentials).severity == :high end test ":medium when severity_ratio > 0.10 and <= 0.25" do # ratio = 0.15 / 1.0 = 0.15 - [group] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup()) - assert hd(hd(group.blocks).potentials).severity == :medium + [block] = TopBlocks.build(make_results([make_node(0.15)]), [], lookup()) + assert hd(block.potentials).severity == :medium end test "filtered when severity_ratio <= 0.10" do @@ -56,30 +56,30 @@ defmodule CodeQA.HealthReport.TopBlocksTest do test "gap floor prevents division by zero when codebase_cosine = 1.0" do # gap = max(0.01, 1.0 - 1.0) = 0.01, ratio = 0.02 / 0.01 = 2.0 → :critical - [group] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0)) - assert hd(hd(group.blocks).potentials).severity == :critical + [block] = TopBlocks.build(make_results([make_node(0.02)]), [], lookup(1.0)) + assert hd(block.potentials).severity == :critical end test "gap handles negative codebase_cosine" do # codebase_cosine = -0.5, gap = max(0.01, 1.0 - (-0.5)) = 1.5 # ratio = 0.60 / 1.5 = 0.40 → :high - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5)) - assert hd(hd(group.blocks).potentials).severity == :high + [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup(-0.5)) + assert hd(block.potentials).severity == :high end test "unknown behavior defaults codebase_cosine to 0.0" do lookup_empty = %{} # gap = 1.0, ratio = 0.60 → :critical - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty) - assert hd(hd(group.blocks).potentials).severity == :critical + [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup_empty) + assert hd(block.potentials).severity == :critical end end describe "changed_files filtering" do test "when changed_files is empty, shows all files" do - [group] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) - assert group.path == "lib/foo.ex" - assert group.status == nil + [block] = TopBlocks.build(make_results([make_node(0.60)]), [], lookup()) + assert block.path == "lib/foo.ex" + assert block.status == nil end test "when changed_files given, only shows matching files" do @@ -89,8 +89,8 @@ defmodule CodeQA.HealthReport.TopBlocksTest do test "status comes from ChangedFile struct" do changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] - [group] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) - assert group.status == "modified" + [block] = TopBlocks.build(make_results([make_node(0.60)]), changed, lookup()) + assert block.status == "modified" end end @@ -102,10 +102,14 @@ defmodule CodeQA.HealthReport.TopBlocksTest do test "blocks are ordered by highest cosine_delta descending" do node_low = make_node(0.20) node_high = put_in(make_node(0.60), ["start_line"], 20) - results = %{"files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}}} - [group] = TopBlocks.build(results, [], lookup()) - deltas = Enum.map(group.blocks, fn b -> hd(b.potentials).cosine_delta end) + results = %{ + "files" => %{"lib/foo.ex" => %{"nodes" => [node_low, node_high]}}, + "metadata" => %{"path" => "/tmp"} + } + + blocks = TopBlocks.build(results, [], lookup()) + deltas = Enum.map(blocks, fn b -> hd(b.potentials).cosine_delta end) assert deltas == Enum.sort(deltas, :desc) end @@ -119,8 +123,8 @@ defmodule CodeQA.HealthReport.TopBlocksTest do "children" => [make_node(0.60)] } - [group] = TopBlocks.build(make_results([parent]), [], lookup()) - assert length(group.blocks) == 1 + blocks = TopBlocks.build(make_results([parent]), [], lookup()) + assert length(blocks) == 1 end end @@ -143,8 +147,8 @@ defmodule CodeQA.HealthReport.TopBlocksTest do } hint_lookup = %{{"naming_conventions", "file_name_matches_primary_export"} => 0.0} - [group] = TopBlocks.build(make_results([node]), [], hint_lookup) - potential = hd(hd(group.blocks).potentials) + [block] = TopBlocks.build(make_results([node]), [], hint_lookup) + potential = hd(block.potentials) assert is_binary(potential.fix_hint) end @@ -160,8 +164,55 @@ defmodule CodeQA.HealthReport.TopBlocksTest do "children" => [] } - [group] = TopBlocks.build(make_results([node]), [], %{}) - assert hd(hd(group.blocks).potentials).fix_hint == nil + [block] = TopBlocks.build(make_results([node]), [], %{}) + assert hd(block.potentials).fix_hint == nil + end + end + + describe "source code extraction" do + test "includes source code when file exists" do + # Create a temp file + tmp_dir = System.tmp_dir!() + test_dir = Path.join(tmp_dir, "top_blocks_test_#{:rand.uniform(100_000)}") + File.mkdir_p!(test_dir) + file_path = Path.join(test_dir, "test.ex") + File.write!(file_path, "line 1\nline 2\nline 3\nline 4\nline 5") + + results = %{ + "files" => %{"test.ex" => %{"nodes" => [make_node(0.60) |> Map.put("end_line", 3)]}}, + "metadata" => %{"path" => test_dir} + } + + [block] = TopBlocks.build(results, [], lookup()) + assert block.source == "line 1\nline 2\nline 3" + assert block.language == "elixir" + + File.rm_rf!(test_dir) + end + + test "source is nil when file does not exist" do + results = %{ + "files" => %{"nonexistent.ex" => %{"nodes" => [make_node(0.60)]}}, + "metadata" => %{"path" => "/nonexistent/path"} + } + + [block] = TopBlocks.build(results, [], lookup()) + assert block.source == nil + end + end + + describe "top N limiting" do + test "returns at most 10 blocks" do + # Create 15 nodes + nodes = for i <- 1..15, do: put_in(make_node(0.60 + i * 0.01), ["start_line"], i * 10) + + results = %{ + "files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, + "metadata" => %{"path" => "/tmp"} + } + + blocks = TopBlocks.build(results, [], lookup()) + assert length(blocks) == 10 end end end diff --git a/test/codeqa/health_report_test.exs b/test/codeqa/health_report_test.exs index 8d25e4d..80f8575 100644 --- a/test/codeqa/health_report_test.exs +++ b/test/codeqa/health_report_test.exs @@ -25,21 +25,22 @@ defmodule CodeQA.HealthReportTest do end @tag :slow - test "without base_results: top_blocks shows all files with significant blocks" do + test "without base_results: top_blocks shows top 10 blocks by impact" do files = %{"lib/foo.ex" => "defmodule Foo do\n def bar, do: :ok\nend\n"} results = Analyzer.analyze_codebase(files) results = BlockImpactAnalyzer.analyze(results, files) report = HealthReport.generate(results) - # top_blocks is a list of file groups (may be empty if no blocks above threshold) + # top_blocks is a flat list of blocks (may be empty if no blocks above threshold) assert is_list(report.top_blocks) - Enum.each(report.top_blocks, fn group -> - assert Map.has_key?(group, :path) - assert Map.has_key?(group, :status) - assert Map.has_key?(group, :blocks) - assert group.status == nil + Enum.each(report.top_blocks, fn block -> + assert Map.has_key?(block, :path) + assert Map.has_key?(block, :status) + assert Map.has_key?(block, :potentials) + assert Map.has_key?(block, :source) + assert block.status == nil end) end From 228aef2890534ec201e346fc41aacedc5644a2d3 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 15:42:09 +0100 Subject: [PATCH 66/71] chore: trigger CI From 9b2d18497f1f444afc1ce46ba50b880e83a8629a Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 20:52:15 +0100 Subject: [PATCH 67/71] feat(blocks): filter blocks by line count (3-20 lines default) Adds configurable line range filtering for code blocks to focus on actionable, refactorable chunks: - Default: only show blocks between 3 and 20 lines - Configurable via .codeqa.yml: - block_min_lines: 3 (default) - block_max_lines: 20 (default) Blocks outside this range are excluded before ranking. This filters out: - Tiny blocks (< 3 lines) that are too small to be meaningful - Large blocks (> 20 lines) that need bigger refactoring --- lib/codeqa/health_report.ex | 10 ++- lib/codeqa/health_report/config.ex | 14 +++- lib/codeqa/health_report/top_blocks.ex | 17 ++++- test/codeqa/health_report/top_blocks_test.exs | 69 ++++++++++++++++++- 4 files changed, 101 insertions(+), 9 deletions(-) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index f97d2f8..9a64de8 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -14,7 +14,9 @@ defmodule CodeQA.HealthReport do categories: categories, grade_scale: grade_scale, impact_map: impact_map, - combined_top: combined_top + combined_top: combined_top, + block_min_lines: block_min_lines, + block_max_lines: block_max_lines } = Config.load(config_path) @@ -59,7 +61,11 @@ defmodule CodeQA.HealthReport do codebase_cosine_lookup = Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) - top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup) + top_blocks = + TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup, + block_min_lines: block_min_lines, + block_max_lines: block_max_lines + ) grading_cfg = %{ category_defs: categories, diff --git a/lib/codeqa/health_report/config.ex b/lib/codeqa/health_report/config.ex index 0b06a1d..7c457b2 100644 --- a/lib/codeqa/health_report/config.ex +++ b/lib/codeqa/health_report/config.ex @@ -7,14 +7,18 @@ defmodule CodeQA.HealthReport.Config do categories: [map()], grade_scale: [{number(), String.t()}], impact_map: %{String.t() => pos_integer()}, - combined_top: pos_integer() + combined_top: pos_integer(), + block_min_lines: pos_integer(), + block_max_lines: pos_integer() } def load(nil) do %{ categories: Categories.defaults(), grade_scale: Categories.default_grade_scale(), impact_map: CodeQA.Config.impact_map(), - combined_top: CodeQA.Config.combined_top() + combined_top: CodeQA.Config.combined_top(), + block_min_lines: 3, + block_max_lines: 20 } end @@ -43,12 +47,16 @@ defmodule CodeQA.HealthReport.Config do grade_scale = parse_grade_scale(Map.get(yaml, "grade_scale")) impact_map = parse_impact(Map.get(yaml, "impact")) combined_top = Map.get(yaml, "combined_top", 2) + block_min_lines = Map.get(yaml, "block_min_lines", 3) + block_max_lines = Map.get(yaml, "block_max_lines", 20) %{ categories: categories, grade_scale: grade_scale, impact_map: impact_map, - combined_top: combined_top + combined_top: combined_top, + block_min_lines: block_min_lines, + block_max_lines: block_max_lines } end diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 57fd933..1bd370a 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -9,6 +9,8 @@ defmodule CodeQA.HealthReport.TopBlocks do @severity_medium 0.10 @gap_floor 0.01 @top_n 10 + @default_min_lines 3 + @default_max_lines 20 defp build_fix_hint_lookup do Scorer.all_yamls() @@ -28,12 +30,15 @@ defmodule CodeQA.HealthReport.TopBlocks do defp hints_for_behavior(_category, _entry), do: [] - @spec build(map(), [struct()], map()) :: [map()] - def build(analysis_results, changed_files, codebase_cosine_lookup) do + @spec build(map(), [struct()], map(), keyword()) :: [map()] + def build(analysis_results, changed_files, codebase_cosine_lookup, opts \\ []) do files = Map.get(analysis_results, "files", %{}) base_path = get_in(analysis_results, ["metadata", "path"]) || "." fix_hints = build_fix_hint_lookup() + min_lines = Keyword.get(opts, :block_min_lines, @default_min_lines) + max_lines = Keyword.get(opts, :block_max_lines, @default_max_lines) + file_entries = if changed_files == [] do Enum.map(files, fn {path, data} -> {path, nil, data} end) @@ -52,6 +57,7 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Map.get("nodes", []) |> Enum.flat_map(&collect_nodes/1) |> Enum.filter(&(&1["token_count"] >= @min_tokens)) + |> Enum.filter(&block_in_line_range?(&1, min_lines, max_lines)) |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) |> Enum.reject(&(&1.potentials == [])) |> Enum.map(&Map.merge(&1, %{path: path, status: status})) @@ -63,6 +69,13 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.map(&add_source_code(&1, base_path)) end + defp block_in_line_range?(node, min_lines, max_lines) do + start_line = node["start_line"] || 1 + end_line = node["end_line"] || start_line + line_count = end_line - start_line + 1 + line_count >= min_lines and line_count <= max_lines + end + defp collect_nodes(node) do children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) [node | children] diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs index 91eb7af..c86001b 100644 --- a/test/codeqa/health_report/top_blocks_test.exs +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -203,8 +203,13 @@ defmodule CodeQA.HealthReport.TopBlocksTest do describe "top N limiting" do test "returns at most 10 blocks" do - # Create 15 nodes - nodes = for i <- 1..15, do: put_in(make_node(0.60 + i * 0.01), ["start_line"], i * 10) + # Create 15 nodes, each 10 lines (within default 3-20 range) + nodes = + for i <- 1..15 do + make_node(0.60 + i * 0.01) + |> put_in(["start_line"], i * 20) + |> put_in(["end_line"], i * 20 + 9) + end results = %{ "files" => %{"lib/foo.ex" => %{"nodes" => nodes}}, @@ -215,4 +220,64 @@ defmodule CodeQA.HealthReport.TopBlocksTest do assert length(blocks) == 10 end end + + describe "line range filtering" do + test "blocks outside line range are excluded" do + # 2-line block (below min of 3) + small_node = + make_node(0.60) + |> put_in(["start_line"], 1) + |> put_in(["end_line"], 2) + + # 25-line block (above max of 20) + large_node = + make_node(0.60) + |> put_in(["start_line"], 10) + |> put_in(["end_line"], 34) + + results = %{ + "files" => %{"lib/foo.ex" => %{"nodes" => [small_node, large_node]}}, + "metadata" => %{"path" => "/tmp"} + } + + blocks = TopBlocks.build(results, [], lookup()) + assert blocks == [] + end + + test "blocks within line range are included" do + # 10-line block (within 3-20 range) + node = + make_node(0.60) + |> put_in(["start_line"], 1) + |> put_in(["end_line"], 10) + + results = %{ + "files" => %{"lib/foo.ex" => %{"nodes" => [node]}}, + "metadata" => %{"path" => "/tmp"} + } + + blocks = TopBlocks.build(results, [], lookup()) + assert length(blocks) == 1 + end + + test "line range is configurable" do + # 2-line block + small_node = + make_node(0.60) + |> put_in(["start_line"], 1) + |> put_in(["end_line"], 2) + + results = %{ + "files" => %{"lib/foo.ex" => %{"nodes" => [small_node]}}, + "metadata" => %{"path" => "/tmp"} + } + + # Default range (3-20) excludes it + assert TopBlocks.build(results, [], lookup()) == [] + + # Custom range (1-5) includes it + blocks = TopBlocks.build(results, [], lookup(), block_min_lines: 1, block_max_lines: 5) + assert length(blocks) == 1 + end + end end From bfa9e3e5e4a33b9f6d899efa0a0ecd2d2a5d822e Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 21:22:13 +0100 Subject: [PATCH 68/71] feat(blocks): filter blocks by diff line overlap when --base-ref provided When comparing refs, only show blocks whose lines overlap with the actual diff hunks, not just blocks in changed files. This surfaces only blocks relevant to the current PR changes. - Add Git.diff_line_ranges/3 to parse unified diff and extract changed line ranges per file - Add filter_by_diff_overlap/3 to TopBlocks to filter by line overlap - Integrate in health-report CLI when --base-ref is provided - Add warning when diff parsing fails (graceful degradation) - Add comprehensive tests for edge cases --- lib/codeqa/cli/health_report.ex | 20 +- lib/codeqa/git.ex | 72 +++++++ lib/codeqa/health_report.ex | 4 +- lib/codeqa/health_report/top_blocks.ex | 33 +++ test/codeqa/git_test.exs | 185 +++++++++++++++++ test/codeqa/health_report/top_blocks_test.exs | 188 ++++++++++++++++++ 6 files changed, 497 insertions(+), 5 deletions(-) diff --git a/lib/codeqa/cli/health_report.ex b/lib/codeqa/cli/health_report.ex index fc9bbaa..a21a743 100644 --- a/lib/codeqa/cli/health_report.ex +++ b/lib/codeqa/cli/health_report.ex @@ -95,18 +95,29 @@ defmodule CodeQA.CLI.HealthReport do "total_bytes" => total_bytes }) - {base_results, changed_files} = + {base_results, changed_files, diff_line_ranges} = if base_ref do IO.puts(:stderr, "Collecting base snapshot at #{base_ref}...") base_files = Git.collect_files_at_ref(path, base_ref) changed = Git.changed_files(path, base_ref, head_ref) + diff_ranges = + case Git.diff_line_ranges(path, base_ref, head_ref) do + {:ok, ranges} -> + ranges + + {:error, reason} -> + IO.puts(:stderr, "Warning: failed to parse diff line ranges: #{inspect(reason)}") + IO.puts(:stderr, "Block scoping disabled - showing all blocks in changed files") + %{} + end + IO.puts(:stderr, "Analyzing base snapshot (#{map_size(base_files)} files)...") base_res = Analyzer.analyze_codebase(base_files, analyze_opts) - {base_res, changed} + {base_res, changed, diff_ranges} else - {nil, []} + {nil, [], %{}} end detail = parse_detail(opts[:detail]) @@ -119,7 +130,8 @@ defmodule CodeQA.CLI.HealthReport do detail: detail, top: top_n, base_results: base_results, - changed_files: changed_files + changed_files: changed_files, + diff_line_ranges: diff_line_ranges ) if opts[:comment] do diff --git a/lib/codeqa/git.ex b/lib/codeqa/git.ex index 106ce54..4489205 100644 --- a/lib/codeqa/git.ex +++ b/lib/codeqa/git.ex @@ -43,6 +43,78 @@ defmodule CodeQA.Git do |> Enum.flat_map(&parse_change_line/1) end + @doc """ + Returns a map of file paths to lists of changed line ranges in the head version. + + Each range is a tuple `{start_line, end_line}` representing lines that were + added or modified in the diff between base_ref and head_ref. + """ + @spec diff_line_ranges(String.t(), String.t(), String.t()) :: + {:ok, %{String.t() => [{pos_integer(), pos_integer()}]}} | {:error, term()} + def diff_line_ranges(repo_path, base_ref, head_ref) do + case System.cmd( + "git", + ["diff", "-U0", "#{base_ref}..#{head_ref}"], + cd: repo_path, + stderr_to_stdout: false + ) do + {output, 0} -> + {:ok, parse_diff_hunks(output)} + + {_output, code} -> + {:error, "git diff exited with code #{code}"} + end + end + + @typep parse_state :: {String.t() | nil, %{String.t() => [{pos_integer(), pos_integer()}]}} + + @spec parse_diff_hunks(String.t()) :: %{String.t() => [{pos_integer(), pos_integer()}]} + defp parse_diff_hunks(diff_output) do + diff_output + |> String.split("\n") + |> Enum.reduce({nil, %{}}, &parse_diff_line/2) + |> elem(1) + |> Map.new(fn {path, ranges} -> {path, Enum.reverse(ranges)} end) + end + + @spec parse_diff_line(String.t(), parse_state()) :: parse_state() + defp parse_diff_line("diff --git a/" <> rest, {_current_file, acc}) do + # Extract the "b/..." path from the diff header + case Regex.run(~r/ b\/(.+)$/, rest) do + [_, path] -> {path, acc} + nil -> {nil, acc} + end + end + + defp parse_diff_line("@@ " <> rest, {current_file, acc}) when is_binary(current_file) do + # Parse hunk header: @@ -old_start,old_count +new_start,new_count @@ + case Regex.run(~r/\+(\d+)(?:,(\d+))?/, rest) do + [_, start_str] -> + # Single line change (no count means 1 line) + start = String.to_integer(start_str) + updated = Map.update(acc, current_file, [{start, start}], &[{start, start} | &1]) + {current_file, updated} + + [_, start_str, count_str] -> + start = String.to_integer(start_str) + count = String.to_integer(count_str) + + if count == 0 do + # Deletion only, no new lines + {current_file, acc} + else + end_line = start + count - 1 + updated = Map.update(acc, current_file, [{start, end_line}], &[{start, end_line} | &1]) + {current_file, updated} + end + + nil -> + {current_file, acc} + end + end + + defp parse_diff_line(_line, state), do: state + def read_file_at_ref(repo_path, ref, path) do case System.cmd("git", ["show", "#{ref}:#{path}"], cd: repo_path, stderr_to_stdout: true) do {output, 0} -> output diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 9a64de8..7ffa31a 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -9,6 +9,7 @@ defmodule CodeQA.HealthReport do config_path = Keyword.get(opts, :config) base_results = Keyword.get(opts, :base_results) changed_files = Keyword.get(opts, :changed_files, []) + diff_line_ranges = Keyword.get(opts, :diff_line_ranges, %{}) %{ categories: categories, @@ -64,7 +65,8 @@ defmodule CodeQA.HealthReport do top_blocks = TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup, block_min_lines: block_min_lines, - block_max_lines: block_max_lines + block_max_lines: block_max_lines, + diff_line_ranges: diff_line_ranges ) grading_cfg = %{ diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index 1bd370a..fb3be04 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -38,6 +38,7 @@ defmodule CodeQA.HealthReport.TopBlocks do min_lines = Keyword.get(opts, :block_min_lines, @default_min_lines) max_lines = Keyword.get(opts, :block_max_lines, @default_max_lines) + diff_line_ranges = Keyword.get(opts, :diff_line_ranges, %{}) file_entries = if changed_files == [] do @@ -53,11 +54,14 @@ defmodule CodeQA.HealthReport.TopBlocks do # Flatten all blocks across all files, enrich with path and source code file_entries |> Enum.flat_map(fn {path, status, file_data} -> + path_diff_ranges = Map.get(diff_line_ranges, path, []) + file_data |> Map.get("nodes", []) |> Enum.flat_map(&collect_nodes/1) |> Enum.filter(&(&1["token_count"] >= @min_tokens)) |> Enum.filter(&block_in_line_range?(&1, min_lines, max_lines)) + |> filter_by_diff_overlap(path_diff_ranges, diff_line_ranges) |> Enum.map(&enrich_block(&1, codebase_cosine_lookup, fix_hints)) |> Enum.reject(&(&1.potentials == [])) |> Enum.map(&Map.merge(&1, %{path: path, status: status})) @@ -69,6 +73,7 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.map(&add_source_code(&1, base_path)) end + @spec block_in_line_range?(map(), pos_integer(), pos_integer()) :: boolean() defp block_in_line_range?(node, min_lines, max_lines) do start_line = node["start_line"] || 1 end_line = node["end_line"] || start_line @@ -76,6 +81,34 @@ defmodule CodeQA.HealthReport.TopBlocks do line_count >= min_lines and line_count <= max_lines end + # When no diff_line_ranges provided (empty map), show all blocks - no filtering needed + @spec filter_by_diff_overlap([map()], [{pos_integer(), pos_integer()}], map()) :: [map()] + defp filter_by_diff_overlap(blocks, _path_ranges, diff_line_ranges) + when map_size(diff_line_ranges) == 0, + do: blocks + + # When diff_line_ranges provided, filter blocks by overlap + defp filter_by_diff_overlap(blocks, path_ranges, _diff_line_ranges) do + Enum.filter(blocks, &block_overlaps_diff?(&1, path_ranges)) + end + + @spec block_overlaps_diff?(map(), [{pos_integer(), pos_integer()}]) :: boolean() + defp block_overlaps_diff?(_node, []), do: false + + defp block_overlaps_diff?(node, path_ranges) do + block_start = node["start_line"] || 1 + block_end = node["end_line"] || block_start + + Enum.any?(path_ranges, fn {diff_start, diff_end} -> + ranges_overlap?(block_start, block_end, diff_start, diff_end) + end) + end + + @spec ranges_overlap?(pos_integer(), pos_integer(), pos_integer(), pos_integer()) :: boolean() + defp ranges_overlap?(start1, end1, start2, end2) do + start1 <= end2 and start2 <= end1 + end + defp collect_nodes(node) do children = node |> Map.get("children", []) |> Enum.flat_map(&collect_nodes/1) [node | children] diff --git a/test/codeqa/git_test.exs b/test/codeqa/git_test.exs index 2d1ec2f..f1a800c 100644 --- a/test/codeqa/git_test.exs +++ b/test/codeqa/git_test.exs @@ -96,6 +96,191 @@ defmodule CodeQA.GitTest do end end + describe "diff_line_ranges/3" do + test "parses single-line hunks" do + in_tmp_git_repo(fn repo -> + # Create initial commit + File.write!(Path.join(repo, "foo.ex"), "line1\nline2\nline3\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Modify a single line + File.write!(Path.join(repo, "foo.ex"), "line1\nmodified\nline3\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "change"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert Map.has_key?(ranges, "foo.ex") + assert {2, 2} in ranges["foo.ex"] + end) + end + + test "parses multi-line hunks" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "foo.ex"), "a\nb\nc\nd\ne\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Replace lines 2-4 + File.write!(Path.join(repo, "foo.ex"), "a\nX\nY\nZ\ne\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert Map.has_key?(ranges, "foo.ex") + assert {2, 4} in ranges["foo.ex"] + end) + end + + test "handles multiple hunks in same file" do + in_tmp_git_repo(fn repo -> + lines = Enum.map_join(1..20, "\n", &"line#{&1}") + File.write!(Path.join(repo, "foo.ex"), lines <> "\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Change line 2 and line 15 + new_lines = + 1..20 + |> Enum.map(fn + 2 -> "changed2" + 15 -> "changed15" + n -> "line#{n}" + end) + |> Enum.join("\n") + + File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert Map.has_key?(ranges, "foo.ex") + assert length(ranges["foo.ex"]) == 2 + assert {2, 2} in ranges["foo.ex"] + assert {15, 15} in ranges["foo.ex"] + end) + end + + test "handles multiple files" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "a.ex"), "a1\na2\n") + File.write!(Path.join(repo, "b.ex"), "b1\nb2\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + File.write!(Path.join(repo, "a.ex"), "a1\nchanged\n") + File.write!(Path.join(repo, "b.ex"), "b1\nchanged\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert {2, 2} in ranges["a.ex"] + assert {2, 2} in ranges["b.ex"] + end) + end + + test "handles added lines (insertion)" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "foo.ex"), "a\nb\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Insert new line between a and b + File.write!(Path.join(repo, "foo.ex"), "a\nnew\nb\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "insert"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert Map.has_key?(ranges, "foo.ex") + # Line 2 is the new line + assert {2, 2} in ranges["foo.ex"] + end) + end + + test "handles deleted lines (no new lines)" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "foo.ex"), "a\nb\nc\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Delete line b + File.write!(Path.join(repo, "foo.ex"), "a\nc\n") + System.cmd("git", ["add", "."], cd: repo) + System.cmd("git", ["commit", "-m", "delete"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + # File should either not be in ranges or have empty list (deletion only) + ranges_for_file = Map.get(ranges, "foo.ex", []) + # No new lines were added, so no ranges pointing to new content + assert ranges_for_file == [] or not Map.has_key?(ranges, "foo.ex") + end) + end + + test "returns empty map when no diff" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "foo.ex"), "content\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD", "HEAD") + + assert ranges == %{} + end) + end + + test "handles new file (no base version)" do + in_tmp_git_repo(fn repo -> + File.write!(Path.join(repo, "existing.ex"), "existing\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + File.write!(Path.join(repo, "new.ex"), "line1\nline2\nline3\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "add new file"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + assert Map.has_key?(ranges, "new.ex") + assert {1, 3} in ranges["new.ex"] + end) + end + + test "returns ranges in ascending order" do + in_tmp_git_repo(fn repo -> + lines = Enum.map_join(1..20, "\n", &"line#{&1}") + File.write!(Path.join(repo, "foo.ex"), lines <> "\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "initial"], cd: repo) + + # Change lines 2, 10, and 18 + new_lines = + 1..20 + |> Enum.map(fn + 2 -> "changed2" + 10 -> "changed10" + 18 -> "changed18" + n -> "line#{n}" + end) + |> Enum.join("\n") + + File.write!(Path.join(repo, "foo.ex"), new_lines <> "\n") + {_, 0} = System.cmd("git", ["add", "."], cd: repo) + {_, 0} = System.cmd("git", ["commit", "-m", "change"], cd: repo) + + {:ok, ranges} = Git.diff_line_ranges(repo, "HEAD~1", "HEAD") + + # Ranges should be in ascending order by start line + assert ranges["foo.ex"] == [{2, 2}, {10, 10}, {18, 18}] + end) + end + end + defp in_tmp_git_repo(fun) do tmp = Path.join(System.tmp_dir!(), "codeqa_git_test_#{:rand.uniform(999_999)}") File.mkdir_p!(tmp) diff --git a/test/codeqa/health_report/top_blocks_test.exs b/test/codeqa/health_report/top_blocks_test.exs index c86001b..4bef28b 100644 --- a/test/codeqa/health_report/top_blocks_test.exs +++ b/test/codeqa/health_report/top_blocks_test.exs @@ -280,4 +280,192 @@ defmodule CodeQA.HealthReport.TopBlocksTest do assert length(blocks) == 1 end end + + describe "diff_line_ranges filtering" do + test "when diff_line_ranges is empty map, shows all blocks" do + node = make_node(0.60) + [block] = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: %{}) + assert block.path == "lib/foo.ex" + end + + test "when diff_line_ranges provided, only shows blocks overlapping diff" do + # Block at lines 1-10 + node = make_node(0.60) + + # Diff changes lines 5-7 (overlaps with block) + diff_ranges = %{"lib/foo.ex" => [{5, 7}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.path == "lib/foo.ex" + end + + test "excludes blocks that don't overlap with diff" do + # Block at lines 1-10 + node = make_node(0.60) + + # Diff changes lines 50-55 (no overlap) + diff_ranges = %{"lib/foo.ex" => [{50, 55}]} + + blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + assert blocks == [] + end + + test "excludes blocks when file has no diff ranges" do + node = make_node(0.60) + + # Diff only has ranges for different file + diff_ranges = %{"lib/other.ex" => [{1, 10}]} + + blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + assert blocks == [] + end + + test "includes block with exact overlap" do + # Block at lines 5-15 + node = + make_node(0.60) + |> put_in(["start_line"], 5) + |> put_in(["end_line"], 15) + + # Diff changes exactly lines 5-15 + diff_ranges = %{"lib/foo.ex" => [{5, 15}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.start_line == 5 + assert block.end_line == 15 + end + + test "includes block with partial overlap at start" do + # Block at lines 10-20 + node = + make_node(0.60) + |> put_in(["start_line"], 10) + |> put_in(["end_line"], 20) + + # Diff changes lines 5-12 (overlaps start of block) + diff_ranges = %{"lib/foo.ex" => [{5, 12}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.start_line == 10 + end + + test "includes block with partial overlap at end" do + # Block at lines 10-20 + node = + make_node(0.60) + |> put_in(["start_line"], 10) + |> put_in(["end_line"], 20) + + # Diff changes lines 18-25 (overlaps end of block) + diff_ranges = %{"lib/foo.ex" => [{18, 25}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.end_line == 20 + end + + test "includes block when diff is entirely inside block" do + # Block at lines 1-10 + node = make_node(0.60) + + # Diff changes lines 3-5 (inside block) + diff_ranges = %{"lib/foo.ex" => [{3, 5}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.path == "lib/foo.ex" + assert block.start_line == 1 + assert block.end_line == 10 + end + + test "works with multiple diff ranges for same file" do + # Block at lines 1-10 + node = make_node(0.60) + + # Diff changes lines 50-55 and 5-7 (second range overlaps) + diff_ranges = %{"lib/foo.ex" => [{50, 55}, {5, 7}]} + + [block] = + TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + + assert block.path == "lib/foo.ex" + assert block.start_line == 1 + assert block.end_line == 10 + end + + test "excludes adjacent but non-overlapping ranges" do + # Block at lines 1-10 + node = make_node(0.60) + + # Diff changes line 11 (adjacent but not overlapping) + diff_ranges = %{"lib/foo.ex" => [{11, 11}]} + + blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + assert blocks == [] + end + + test "excludes blocks when file has empty diff ranges list" do + node = make_node(0.60) + + # File is present but with empty ranges (e.g., only deletions) + diff_ranges = %{"lib/foo.ex" => []} + + blocks = TopBlocks.build(make_results([node]), [], lookup(), diff_line_ranges: diff_ranges) + assert blocks == [] + end + + test "single-line block overlapping single-line diff" do + # Single-line block at line 5 + node = + make_node(0.60) + |> put_in(["start_line"], 5) + |> put_in(["end_line"], 5) + + diff_ranges = %{"lib/foo.ex" => [{5, 5}]} + + # Need to adjust min_lines for this test since block is only 1 line + [block] = + TopBlocks.build(make_results([node]), [], lookup(), + diff_line_ranges: diff_ranges, + block_min_lines: 1 + ) + + assert block.start_line == 5 + assert block.end_line == 5 + end + + test "when both changed_files and diff_line_ranges provided, both filters apply" do + node = make_node(0.60) + changed = [%ChangedFile{path: "lib/foo.ex", status: "modified"}] + diff_ranges = %{"lib/foo.ex" => [{5, 7}]} + + [block] = + TopBlocks.build(make_results([node]), changed, lookup(), diff_line_ranges: diff_ranges) + + assert block.path == "lib/foo.ex" + assert block.status == "modified" + assert block.start_line == 1 + assert block.end_line == 10 + end + + test "changed_files filter applies before diff_line_ranges filter" do + node = make_node(0.60) + # File is in diff_ranges but not in changed_files + changed = [%ChangedFile{path: "lib/other.ex", status: "modified"}] + diff_ranges = %{"lib/foo.ex" => [{5, 7}]} + + blocks = + TopBlocks.build(make_results([node]), changed, lookup(), diff_line_ranges: diff_ranges) + + assert blocks == [] + end + end end From c9704ed5af3b29f9d915e4d1b7161ec336e70175 Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 21:41:09 +0100 Subject: [PATCH 69/71] feat(report): show worst offending block per cosine category Add worst_per_category/4 to TopBlocks that identifies the single worst block for each cosine-based category (code_smells, function_design, etc.) based on cosine_delta. Blocks must overlap with PR diff lines. Display in GitHub formatter: - Show source code if block is 4-10 lines - Show file location only if <4 or >10 lines --- lib/codeqa/health_report.ex | 21 +++++-- lib/codeqa/health_report/formatter/github.ex | 59 +++++++++++++++----- lib/codeqa/health_report/top_blocks.ex | 50 ++++++++++++++--- priv/combined_metrics/code_smells.yml | 5 ++ priv/combined_metrics/variable_naming.yml | 14 +++++ 5 files changed, 123 insertions(+), 26 deletions(-) diff --git a/lib/codeqa/health_report.ex b/lib/codeqa/health_report.ex index 7ffa31a..183b737 100644 --- a/lib/codeqa/health_report.ex +++ b/lib/codeqa/health_report.ex @@ -62,11 +62,21 @@ defmodule CodeQA.HealthReport do codebase_cosine_lookup = Map.new(all_cosines, fn i -> {{i.category, i.behavior}, i.cosine} end) + block_opts = [ + block_min_lines: block_min_lines, + block_max_lines: block_max_lines, + diff_line_ranges: diff_line_ranges + ] + top_blocks = - TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup, - block_min_lines: block_min_lines, - block_max_lines: block_max_lines, - diff_line_ranges: diff_line_ranges + TopBlocks.build(analysis_results, changed_files, codebase_cosine_lookup, block_opts) + + worst_blocks_by_category = + TopBlocks.worst_per_category( + analysis_results, + changed_files, + codebase_cosine_lookup, + block_opts ) grading_cfg = %{ @@ -99,7 +109,8 @@ defmodule CodeQA.HealthReport do codebase_delta: codebase_delta, categories: all_categories, top_issues: top_issues, - top_blocks: top_blocks + top_blocks: top_blocks, + worst_blocks_by_category: worst_blocks_by_category } end diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index b3cf6df..b72b250 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -9,6 +9,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do def render(report, detail, opts \\ []) do chart? = Keyword.get(opts, :chart, true) display_categories = merge_cosine_categories(report.categories) + worst_blocks = Map.get(report, :worst_blocks_by_category, %{}) [ pr_summary_section(Map.get(report, :pr_summary)), @@ -19,7 +20,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do progress_bars(display_categories), top_issues_section(Map.get(report, :top_issues, []), detail), blocks_section(Map.get(report, :top_blocks, [])), - category_sections(display_categories, detail), + category_sections(display_categories, detail, worst_blocks), footer() ] |> List.flatten() @@ -55,10 +56,11 @@ defmodule CodeQA.HealthReport.Formatter.Github do def render_part_2(report, opts \\ []) do detail = Keyword.get(opts, :detail, :default) display_categories = merge_cosine_categories(report.categories) + worst_blocks = Map.get(report, :worst_blocks_by_category, %{}) [ top_issues_section(Map.get(report, :top_issues, []), detail), - category_sections(display_categories, detail), + category_sections(display_categories, detail, worst_blocks), sentinel(2) ] |> List.flatten() @@ -186,18 +188,18 @@ defmodule CodeQA.HealthReport.Formatter.Github do String.duplicate(@filled, filled) <> String.duplicate(@empty, empty) end - defp category_sections(_categories, :summary), do: [] + defp category_sections(_categories, :summary, _worst_blocks), do: [] - defp category_sections(categories, detail) do - Enum.flat_map(categories, &render_category(&1, detail)) + defp category_sections(categories, detail, worst_blocks) do + Enum.flat_map(categories, &render_category(&1, detail, worst_blocks)) end - defp render_category(%{type: :cosine_group} = group, detail) do + defp render_category(%{type: :cosine_group} = group, detail, worst_blocks) do emoji = grade_emoji(group.grade) summary_line = "#{emoji} #{group.name} — #{group.grade} (#{group.score}/100)" inner = - cosine_group_content(group, detail) + cosine_group_content(group, detail, worst_blocks) |> List.flatten() |> Enum.join("\n") @@ -212,12 +214,12 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end - defp render_category(%{type: :cosine} = cat, detail) do + defp render_category(%{type: :cosine} = cat, detail, worst_blocks) do emoji = grade_emoji(cat.grade) summary_line = "#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)" inner = - cosine_section_content(cat, detail) + cosine_section_content(cat, detail, worst_blocks) |> List.flatten() |> Enum.join("\n") @@ -232,7 +234,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end - defp render_category(cat, detail) do + defp render_category(cat, detail, _worst_blocks) do emoji = grade_emoji(cat.grade) summary_line = "#{emoji} #{cat.name} — #{cat.grade} (#{cat.score}/100)" @@ -252,7 +254,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do ] end - defp cosine_group_content(group, detail) do + defp cosine_group_content(group, detail, worst_blocks) do rows = Enum.map(group.categories, fn cat -> emoji = grade_emoji(cat.grade) @@ -270,7 +272,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do emoji = grade_emoji(cat.grade) inner = - cosine_section_content(cat, detail) + cosine_section_content(cat, detail, worst_blocks) |> List.flatten() |> Enum.join("\n") @@ -288,8 +290,9 @@ defmodule CodeQA.HealthReport.Formatter.Github do summary_table ++ [""] ++ sub_sections end - defp cosine_section_content(cat, _detail) do + defp cosine_section_content(cat, _detail, worst_blocks) do n = length(cat.behaviors) + category_key = to_string(cat.key) behaviors_rows = Enum.map(cat.behaviors, fn b -> @@ -304,7 +307,35 @@ defmodule CodeQA.HealthReport.Formatter.Github do | behaviors_rows ] - behaviors_table ++ [""] + worst_block_section = + case Map.get(worst_blocks, category_key) do + nil -> [] + block -> render_worst_block(block) + end + + behaviors_table ++ [""] ++ worst_block_section + end + + defp render_worst_block(block) do + line_count = (block.end_line || block.start_line) - block.start_line + 1 + location = "#{block.path}:#{block.start_line}-#{block.end_line}" + + if line_count >= 4 and line_count <= 10 and block.source do + lang = block.language || "" + + [ + "> **Worst offender** (`#{location}`):", + "> ```#{lang}", + block.source |> String.split("\n") |> Enum.map(&"> #{&1}") |> Enum.join("\n"), + "> ```", + "" + ] + else + [ + "> **Worst offender**: `#{location}` (#{line_count} lines)", + "" + ] + end end defp section_content(cat, _detail) do diff --git a/lib/codeqa/health_report/top_blocks.ex b/lib/codeqa/health_report/top_blocks.ex index fb3be04..5ceddf5 100644 --- a/lib/codeqa/health_report/top_blocks.ex +++ b/lib/codeqa/health_report/top_blocks.ex @@ -32,8 +32,49 @@ defmodule CodeQA.HealthReport.TopBlocks do @spec build(map(), [struct()], map(), keyword()) :: [map()] def build(analysis_results, changed_files, codebase_cosine_lookup, opts \\ []) do - files = Map.get(analysis_results, "files", %{}) base_path = get_in(analysis_results, ["metadata", "path"]) || "." + + analysis_results + |> collect_enriched_blocks(changed_files, codebase_cosine_lookup, opts) + # Rank by highest cosine_delta and take top N + |> Enum.sort_by(&(-max_delta(&1))) + |> Enum.take(@top_n) + # Add source code for each block + |> Enum.map(&add_source_code(&1, base_path)) + end + + @doc """ + Returns a map of category => worst offending block for that category. + Only includes blocks that overlap with the diff (if diff_line_ranges provided). + """ + @spec worst_per_category(map(), [struct()], map(), keyword()) :: %{String.t() => map()} + def worst_per_category(analysis_results, changed_files, codebase_cosine_lookup, opts \\ []) do + base_path = get_in(analysis_results, ["metadata", "path"]) || "." + + all_blocks = + collect_enriched_blocks(analysis_results, changed_files, codebase_cosine_lookup, opts) + + # Group blocks by category, finding the worst block per category + all_blocks + |> Enum.flat_map(fn block -> + # Each block may contribute to multiple categories via its potentials + block.potentials + |> Enum.map(fn potential -> + {potential.category, block, potential.cosine_delta} + end) + end) + |> Enum.group_by(&elem(&1, 0), fn {_cat, block, delta} -> {block, delta} end) + |> Enum.map(fn {category, block_deltas} -> + # Find the block with highest cosine_delta for this category + {worst_block, _delta} = Enum.max_by(block_deltas, fn {_block, delta} -> delta end) + {category, add_source_code(worst_block, base_path)} + end) + |> Map.new() + end + + # Shared logic for collecting and enriching blocks + defp collect_enriched_blocks(analysis_results, changed_files, codebase_cosine_lookup, opts) do + files = Map.get(analysis_results, "files", %{}) fix_hints = build_fix_hint_lookup() min_lines = Keyword.get(opts, :block_min_lines, @default_min_lines) @@ -51,7 +92,7 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.map(fn {path, data} -> {path, Map.get(changed_index, path), data} end) end - # Flatten all blocks across all files, enrich with path and source code + # Flatten all blocks across all files, enrich with path file_entries |> Enum.flat_map(fn {path, status, file_data} -> path_diff_ranges = Map.get(diff_line_ranges, path, []) @@ -66,11 +107,6 @@ defmodule CodeQA.HealthReport.TopBlocks do |> Enum.reject(&(&1.potentials == [])) |> Enum.map(&Map.merge(&1, %{path: path, status: status})) end) - # Rank by highest cosine_delta and take top N - |> Enum.sort_by(&(-max_delta(&1))) - |> Enum.take(@top_n) - # Add source code for each block - |> Enum.map(&add_source_code(&1, base_path)) end @spec block_in_line_range?(map(), pos_integer(), pos_integer()) :: boolean() diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index f1c73c5..2201f7d 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,5 +1,6 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." + _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -101,6 +102,7 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." + _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -211,6 +213,7 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." + _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -321,6 +324,7 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." + _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -435,6 +439,7 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." + _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index 1be9c6b..e7bc6fa 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,5 +1,6 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." + _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -62,6 +63,7 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." + _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -123,6 +125,7 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." + _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -202,6 +205,7 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." + _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -320,6 +324,7 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." + _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -405,6 +410,7 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." + _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -491,6 +497,7 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." + _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -607,6 +614,7 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." + _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -685,6 +693,7 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -782,6 +791,7 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." + _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -902,6 +912,7 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." + _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -989,6 +1000,7 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." + _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1059,6 +1071,7 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." + _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1134,6 +1147,7 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." + _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 09c09017e3e78beb7f4568450427c4a542bcfed1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 20:45:57 +0000 Subject: [PATCH 70/71] chore(combined-metrics): sync language coverage and scalar vectors [skip ci] --- priv/combined_metrics/code_smells.yml | 5 ----- priv/combined_metrics/variable_naming.yml | 14 -------------- 2 files changed, 19 deletions(-) diff --git a/priv/combined_metrics/code_smells.yml b/priv/combined_metrics/code_smells.yml index 2201f7d..f1c73c5 100644 --- a/priv/combined_metrics/code_smells.yml +++ b/priv/combined_metrics/code_smells.yml @@ -1,6 +1,5 @@ consistent_string_quote_style: _doc: "Files should use a single, consistent string quoting style throughout." - _languages: [elixir] _log_baseline: -18.2553 branching: mean_branching_density: 0.0243 @@ -102,7 +101,6 @@ consistent_string_quote_style: no_dead_code_after_return: _doc: "There should be no unreachable statements after a return or early exit." - _languages: [elixir] _log_baseline: -55.8435 branching: mean_branch_count: -2.0000 @@ -213,7 +211,6 @@ no_dead_code_after_return: no_debug_print_statements: _doc: "Debug output (`console.log`, `IO.inspect`, `fmt.Println`) must not be left in committed code." - _languages: [elixir] _log_baseline: -88.0844 branching: mean_branch_count: -0.3540 @@ -324,7 +321,6 @@ no_debug_print_statements: no_fixme_comments: _doc: "FIXME, XXX, and HACK comments indicate known problems that should be resolved before merging." - _languages: [elixir] _log_baseline: -2.0233 branching: mean_branch_count: 0.1755 @@ -439,7 +435,6 @@ no_fixme_comments: no_nested_ternary: _doc: "Nested conditional expressions (ternary-within-ternary) are harder to read than a plain if-else." - _languages: [elixir] _log_baseline: 7.6475 branching: mean_branch_count: -0.5662 diff --git a/priv/combined_metrics/variable_naming.yml b/priv/combined_metrics/variable_naming.yml index e7bc6fa..1be9c6b 100644 --- a/priv/combined_metrics/variable_naming.yml +++ b/priv/combined_metrics/variable_naming.yml @@ -1,6 +1,5 @@ boolean_has_is_has_prefix: _doc: "Boolean variables should be prefixed with `is_`, `has_`, or `can_`." - _languages: [elixir, javascript, ruby] _log_baseline: 15.9481 brevity: mean_sample_size: 0.0752 @@ -63,7 +62,6 @@ boolean_has_is_has_prefix: collection_name_is_plural: _doc: "Variables holding a collection should use a plural name." - _languages: [elixir, javascript, ruby] _log_baseline: 21.8380 brevity: mean_sample_size: -0.5320 @@ -125,7 +123,6 @@ collection_name_is_plural: loop_var_is_single_letter: _doc: "Loop index variables (`i`, `j`, `k`) are acceptable inside loop bodies." - _languages: [elixir, javascript, ruby] _log_baseline: -28.3218 brevity: mean_sample_size: -0.1049 @@ -205,7 +202,6 @@ loop_var_is_single_letter: name_contains_and: _doc: "Variable names containing `and` signal a variable that holds two concerns." - _languages: [elixir, javascript, ruby] _log_baseline: 0.4689 branching: mean_branch_count: -0.3666 @@ -324,7 +320,6 @@ name_contains_and: name_contains_type_suffix: _doc: "Type suffixes in names (`userString`, `nameList`) are redundant noise." - _languages: [elixir, javascript, ruby] _log_baseline: -26.6817 branching: mean_branch_count: -0.4150 @@ -410,7 +405,6 @@ name_contains_type_suffix: name_is_abbreviation: _doc: "Abbreviated names (`usr`, `cfg`, `mgr`) reduce readability." - _languages: [elixir, javascript, ruby] _log_baseline: 10.7370 brevity: mean_sample_size: -0.1542 @@ -497,7 +491,6 @@ name_is_abbreviation: name_is_generic: _doc: "Generic names (`data`, `result`, `tmp`, `val`, `obj`) convey no domain meaning." - _languages: [elixir, javascript, ruby] _log_baseline: 37.4815 branching: mean_branch_count: 0.5193 @@ -614,7 +607,6 @@ name_is_generic: name_is_number_like: _doc: "Number-suffixed names (`var1`, `thing2`) signal a missing abstraction." - _languages: [elixir, javascript, ruby] _log_baseline: 1.7611 brevity: mean_sample_size: -0.0262 @@ -693,7 +685,6 @@ name_is_number_like: name_is_single_letter: _doc: "Single-letter names outside loop indices are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: 26.2113 branching: mean_branching_density: -0.0458 @@ -791,7 +782,6 @@ name_is_single_letter: name_is_too_long: _doc: "Names longer than ~30 characters harm readability." - _languages: [elixir, javascript, ruby] _log_baseline: -7.8322 branching: mean_branch_count: 0.0340 @@ -912,7 +902,6 @@ name_is_too_long: name_is_too_short: _doc: "Names shorter than 3 characters (outside loops) are too opaque." - _languages: [elixir, javascript, ruby] _log_baseline: -2.7224 branching: mean_branch_count: -0.2327 @@ -1000,7 +989,6 @@ name_is_too_short: negated_boolean_name: _doc: "Negated boolean names (`isNotValid`, `notActive`) are harder to reason about." - _languages: [elixir, javascript, ruby] _log_baseline: -4.4565 brevity: mean_sample_size: -0.0998 @@ -1071,7 +1059,6 @@ negated_boolean_name: no_hungarian_notation: _doc: "Hungarian notation prefixes (`strName`, `bFlag`) add noise without type safety." - _languages: [elixir, javascript, ruby] _log_baseline: -15.5962 brevity: mean_sample_size: -0.0814 @@ -1147,7 +1134,6 @@ no_hungarian_notation: screaming_snake_for_constants: _doc: "Module-level constants should use SCREAMING_SNAKE_CASE." - _languages: [elixir, javascript, ruby] _log_baseline: -5.9884 branching: mean_branching_density: 0.0176 From 8925e76ade98083b48c257c45fbe869a12b0f37d Mon Sep 17 00:00:00 2001 From: Andreas Solleder Date: Thu, 26 Mar 2026 23:09:50 +0100 Subject: [PATCH 71/71] fix(report): display blocks from 1-15 lines instead of 4-10 --- lib/codeqa/health_report/formatter/github.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/codeqa/health_report/formatter/github.ex b/lib/codeqa/health_report/formatter/github.ex index b72b250..dd4a696 100644 --- a/lib/codeqa/health_report/formatter/github.ex +++ b/lib/codeqa/health_report/formatter/github.ex @@ -320,7 +320,7 @@ defmodule CodeQA.HealthReport.Formatter.Github do line_count = (block.end_line || block.start_line) - block.start_line + 1 location = "#{block.path}:#{block.start_line}-#{block.end_line}" - if line_count >= 4 and line_count <= 10 and block.source do + if line_count >= 1 and line_count <= 15 and block.source do lang = block.language || "" [