From 40fffae7206513e718a27cb37041260d3902e46a Mon Sep 17 00:00:00 2001 From: Nelson Osacky Date: Fri, 29 May 2026 14:35:35 +0200 Subject: [PATCH 1/2] ci(benchmark): Add help task configuration benchmark on PRs Add a pull_request workflow that benchmarks this project's `help` task with the configuration cache disabled (2 warm-ups, 5 builds), comparing the PR base commit against the head commit in a single gradle-profiler run via the git-checkout mutator. This is separate from the existing duckduckgo benchmark build: it profiles configuration time of this project rather than the runtime cost of applying the plugin to a sample app, and it runs automatically on every PR. Results are uploaded as an artifact and summarized as a sticky PR comment. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/benchmark-help-config.yml | 69 +++++++++++++++++++ scripts/benchmark/help-config-cache.scenarios | 35 ++++++++++ scripts/benchmark/help-config-comment.py | 62 +++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 .github/workflows/benchmark-help-config.yml create mode 100644 scripts/benchmark/help-config-cache.scenarios create mode 100644 scripts/benchmark/help-config-comment.py diff --git a/.github/workflows/benchmark-help-config.yml b/.github/workflows/benchmark-help-config.yml new file mode 100644 index 000000000..4a2af2853 --- /dev/null +++ b/.github/workflows/benchmark-help-config.yml @@ -0,0 +1,69 @@ +name: Benchmark help task configuration + +on: + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + benchmark-help-config: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout Repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + # Full history so gradle-profiler's git-checkout can reach both the + # base and head commits of the PR. + fetch-depth: 0 + + - name: Set up Java + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5 + with: + distribution: 'temurin' + java-version: '17' + + - name: Install Gradle Profiler and benchmark the help task + env: + BASE_REF: ${{ github.event.pull_request.base.sha }} + HEAD_REF: ${{ github.event.pull_request.head.sha }} + run: | + curl -s "https://get.sdkman.io" | bash + source "$HOME/.sdkman/bin/sdkman-init.sh" + sdk install gradleprofiler 0.24.0 + gradle-profiler --benchmark \ + --scenario-file scripts/benchmark/help-config-cache.scenarios \ + --output-dir out/help-config + + - name: Upload results + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 + with: + name: help-config-benchmark + path: out/help-config/ + + - name: Build PR comment + run: python3 scripts/benchmark/help-config-comment.py out/help-config/benchmark.csv comment.md + + - name: Post comparison comment + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const fs = require('fs'); + const marker = ''; + const body = fs.readFileSync('comment.md', 'utf8'); + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + const comments = await github.paginate(github.rest.issues.listComments, { + owner, repo, issue_number, + }); + const existing = comments.find((c) => c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body }); + } else { + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + } diff --git a/scripts/benchmark/help-config-cache.scenarios b/scripts/benchmark/help-config-cache.scenarios new file mode 100644 index 000000000..391fbb7e8 --- /dev/null +++ b/scripts/benchmark/help-config-cache.scenarios @@ -0,0 +1,35 @@ +# Benchmarks the configuration-phase performance of this project's `help` task +# with the configuration cache disabled, comparing the PR base commit against +# the PR head commit in a single gradle-profiler run via the git-checkout +# mutator. +# +# BASE_REF and HEAD_REF are supplied as environment variables by the CI +# workflow; HOCON falls back to environment variables for ${...} lookups. +# +# `help` configures the whole build while executing essentially nothing, so it +# is a clean configuration-time signal. --no-configuration-cache forces a full +# configuration on every build. + +help_base { + title = "help base" + tasks = ["help"] + gradle-args = ["--no-configuration-cache"] + warm-ups = 2 + iterations = 5 + git-checkout { + build = ${BASE_REF} + cleanup = ${HEAD_REF} + } +} + +help_pr { + title = "help PR" + tasks = ["help"] + gradle-args = ["--no-configuration-cache"] + warm-ups = 2 + iterations = 5 + git-checkout { + build = ${HEAD_REF} + cleanup = ${HEAD_REF} + } +} diff --git a/scripts/benchmark/help-config-comment.py b/scripts/benchmark/help-config-comment.py new file mode 100644 index 000000000..a85c149d1 --- /dev/null +++ b/scripts/benchmark/help-config-comment.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""Render a gradle-profiler benchmark.csv into a Markdown comparison table. + +Usage: help-config-comment.py + +The CSV has one column per scenario; we read the `scenario` header row for the +titles and the `mean` row for the mean total build time (milliseconds). +""" +import csv +import sys + +MARKER = "" + + +def main(csv_path: str, out_path: str) -> None: + header = None + mean = None + with open(csv_path, newline="") as f: + for row in csv.reader(f): + if not row: + continue + if row[0] == "scenario": + header = row + elif row[0] == "mean": + mean = row + + if header is None or mean is None: + body = f"{MARKER}\n### `help` configuration benchmark\n\nCould not parse benchmark results." + with open(out_path, "w") as f: + f.write(body) + return + + titles = header[1:] + means = [float(v) for v in mean[1 : 1 + len(titles)]] + by_title = dict(zip(titles, means)) + + base = by_title.get("help base") + pr = by_title.get("help PR") + + lines = [ + MARKER, + "### `help` configuration benchmark (configuration cache disabled)", + "", + "Mean of 5 builds after 2 warm-ups.", + "", + "| Scenario | Mean build time |", + "| --- | --- |", + f"| Base (`help base`) | {base:.0f} ms |", + f"| PR (`help PR`) | {pr:.0f} ms |", + ] + + delta = pr - base + pct = (delta / base) * 100 if base else 0 + sign = "🔺" if delta > 0 else "✅" + lines.append(f"| **Difference** | {sign} {delta:+.0f} ms ({pct:+.1f}%) |") + + with open(out_path, "w") as f: + f.write("\n".join(lines) + "\n") + + +if __name__ == "__main__": + main(sys.argv[1], sys.argv[2]) From 30ae4ba4e0b57e74f823f2166d20c8052795a034 Mon Sep 17 00:00:00 2001 From: Nelson Osacky Date: Fri, 29 May 2026 14:52:19 +0200 Subject: [PATCH 2/2] fix(benchmark): Compute help benchmark mean from measured builds gradle-profiler does not write summary rows (mean, min, max) to benchmark.csv; those only appear in the HTML report. The comment parser looked for a non-existent `mean` row and always fell back to "Could not parse benchmark results". Average the `measured build #N` rows instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/benchmark/help-config-comment.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/scripts/benchmark/help-config-comment.py b/scripts/benchmark/help-config-comment.py index a85c149d1..d7b2ac8a7 100644 --- a/scripts/benchmark/help-config-comment.py +++ b/scripts/benchmark/help-config-comment.py @@ -3,8 +3,10 @@ Usage: help-config-comment.py -The CSV has one column per scenario; we read the `scenario` header row for the -titles and the `mean` row for the mean total build time (milliseconds). +The CSV has one column per scenario. gradle-profiler does not write summary +rows to the CSV (those live in the HTML report), so we read the `scenario` +header row for the titles and average the `measured build #N` rows ourselves to +get the mean total build time in milliseconds. """ import csv import sys @@ -14,24 +16,27 @@ def main(csv_path: str, out_path: str) -> None: header = None - mean = None + measured = [] with open(csv_path, newline="") as f: for row in csv.reader(f): if not row: continue if row[0] == "scenario": header = row - elif row[0] == "mean": - mean = row + elif row[0].startswith("measured build"): + measured.append(row) - if header is None or mean is None: + if header is None or not measured: body = f"{MARKER}\n### `help` configuration benchmark\n\nCould not parse benchmark results." with open(out_path, "w") as f: f.write(body) return titles = header[1:] - means = [float(v) for v in mean[1 : 1 + len(titles)]] + means = [] + for col in range(1, 1 + len(titles)): + values = [float(r[col]) for r in measured] + means.append(sum(values) / len(values)) by_title = dict(zip(titles, means)) base = by_title.get("help base")