From 3e1bf73c037259da1359f8659eb53608842ffb49 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 8 Apr 2026 13:02:38 +0200 Subject: [PATCH 1/4] Add maintainer-approval workflow and OWNERS file Add a GitHub Actions workflow that blocks merge unless a core team member has approved. This is Step 1 of replacing CODEOWNERS with OWNERS. CODEOWNERS stays for now so existing behavior is unchanged. The OWNERS file is an exact copy of CODEOWNERS. The maintainer-approval workflow reads it at runtime to determine the core team from the * catch-all rule, then checks if any core team member has approved the PR. Domain-specific exemption: if all changed files are owned by non-core owners that include the PR author, any approval suffices. --- .github/OWNERS | 11 ++ .github/workflows/maintainer-approval.js | 121 ++++++++++++++++++++++ .github/workflows/maintainer-approval.yml | 31 ++++++ 3 files changed, 163 insertions(+) create mode 100644 .github/OWNERS create mode 100644 .github/workflows/maintainer-approval.js create mode 100644 .github/workflows/maintainer-approval.yml diff --git a/.github/OWNERS b/.github/OWNERS new file mode 100644 index 0000000000..d1618a2dee --- /dev/null +++ b/.github/OWNERS @@ -0,0 +1,11 @@ +* @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum +/cmd/bundle/bundle.go @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum @lennartkats-db +/libs/template/ @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum @lennartkats-db +/acceptance/pipelines/ @jefferycheng1 @kanterov @lennartkats-db +/cmd/pipelines/ @jefferycheng1 @kanterov @lennartkats-db +/cmd/labs/ @alexott @nfx +/cmd/apps/ @databricks/eng-apps-devex +/cmd/workspace/apps/ @databricks/eng-apps-devex +/libs/apps/ @databricks/eng-apps-devex +/acceptance/apps/ @databricks/eng-apps-devex +/experimental/aitools/ @databricks/eng-apps-devex @lennartkats-db diff --git a/.github/workflows/maintainer-approval.js b/.github/workflows/maintainer-approval.js new file mode 100644 index 0000000000..c787362b4f --- /dev/null +++ b/.github/workflows/maintainer-approval.js @@ -0,0 +1,121 @@ +const fs = require("fs"); +const path = require("path"); + +// Parse .github/OWNERS (same format as CODEOWNERS). +// Returns array of { pattern, owners } rules. +function parseOwners() { + const ownersPath = path.join( + process.env.GITHUB_WORKSPACE, + ".github", + "OWNERS" + ); + const lines = fs.readFileSync(ownersPath, "utf-8").split("\n"); + const rules = []; + for (const raw of lines) { + const line = raw.trim(); + if (!line || line.startsWith("#")) continue; + const parts = line.split(/\s+/); + if (parts.length < 2) continue; + const pattern = parts[0]; + // Strip @ prefix, skip team refs (org/team). + const owners = parts + .slice(1) + .filter((p) => p.startsWith("@") && !p.includes("/")) + .map((p) => p.slice(1)); + rules.push({ pattern, owners }); + } + return rules; +} + +// Get core team from the * catch-all rule. +function getCoreTeam(rules) { + const catchAll = rules.find((r) => r.pattern === "*"); + return catchAll ? catchAll.owners : []; +} + +// Match a filepath against an OWNERS pattern. +// Supports: "*" (catch-all), "/dir/" (prefix), "/path/file" (exact). +function ownersMatch(pattern, filepath) { + if (pattern === "*") return true; + let p = pattern; + if (p.startsWith("/")) p = p.slice(1); + if (p.endsWith("/")) return filepath.startsWith(p); + return filepath === p; +} + +// Find which owners match a given file (last match wins, like CODEOWNERS). +function findOwners(filepath, rules) { + let matched = []; + for (const rule of rules) { + if (ownersMatch(rule.pattern, filepath)) { + matched = rule.owners; + } + } + return matched; +} + +// Check if the PR author is exempted. +// If ALL changed files are owned by non-core-team owners that include the +// author, the PR can merge with any approval (not necessarily core team). +function isExempted(authorLogin, files, rules, coreTeam) { + const coreSet = new Set(coreTeam); + for (const { filename } of files) { + const owners = findOwners(filename, rules); + const nonCoreOwners = owners.filter((o) => !coreSet.has(o)); + if (nonCoreOwners.length === 0 || !nonCoreOwners.includes(authorLogin)) { + return false; + } + } + return true; +} + +module.exports = async ({ github, context, core }) => { + const rules = parseOwners(); + const coreTeam = getCoreTeam(rules); + + if (coreTeam.length === 0) { + core.setFailed( + "Could not determine core team from .github/OWNERS (no * rule found)." + ); + return; + } + + const reviews = await github.paginate(github.rest.pulls.listReviews, { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + + const coreTeamApproved = reviews.some( + ({ state, user: { login } }) => + state === "APPROVED" && coreTeam.includes(login) + ); + + if (coreTeamApproved) { + return; + } + + // Check exemption rules based on file ownership. + const { pull_request: pr } = context.payload; + const authorLogin = pr?.user?.login; + + const files = await github.paginate(github.rest.pulls.listFiles, { + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + + if (authorLogin && isExempted(authorLogin, files, rules, coreTeam)) { + const hasAnyApproval = reviews.some(({ state }) => state === "APPROVED"); + if (!hasAnyApproval) { + core.setFailed( + "PR from exempted author still needs at least one approval." + ); + } + return; + } + + core.setFailed( + `Requires approval from a core team member: ${coreTeam.join(", ")}.` + ); +}; diff --git a/.github/workflows/maintainer-approval.yml b/.github/workflows/maintainer-approval.yml new file mode 100644 index 0000000000..37e93951d3 --- /dev/null +++ b/.github/workflows/maintainer-approval.yml @@ -0,0 +1,31 @@ +name: Maintainer approval + +on: + pull_request_target: + pull_request_review: + types: [submitted, dismissed] + +defaults: + run: + shell: bash + +jobs: + check: + runs-on: ubuntu-latest + timeout-minutes: 5 + permissions: + pull-requests: read + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + sparse-checkout: | + .github/workflows + .github/OWNERS + - name: Require core team approval + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + retries: 3 + script: | + const script = require('./.github/workflows/maintainer-approval.js'); + await script({ context, github, core }); From afff833d299450bc73eceac510dc6a6773c12462 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 8 Apr 2026 13:15:02 +0200 Subject: [PATCH 2/4] Fix review findings and add round-robin reviewer fallback Fix null user destructuring in maintainer-approval.js that would crash on reviews from deleted GitHub accounts. Add empty file list guard in the exemption check. Add success output for debugging. Add round-robin fallback to suggest_reviewers.py: when git history can't determine a reviewer, query the GitHub search API for each eligible reviewer's review count over the last 30 days and pick the person with the fewest reviews. Falls back to random if the API fails. Also fix yamlfmt: script block uses |- instead of |. --- .github/workflows/maintainer-approval.js | 9 ++- .github/workflows/maintainer-approval.yml | 2 +- tools/suggest_reviewers.py | 78 ++++++++++++++++++++--- 3 files changed, 77 insertions(+), 12 deletions(-) diff --git a/.github/workflows/maintainer-approval.js b/.github/workflows/maintainer-approval.js index c787362b4f..082e44af7a 100644 --- a/.github/workflows/maintainer-approval.js +++ b/.github/workflows/maintainer-approval.js @@ -58,6 +58,7 @@ function findOwners(filepath, rules) { // If ALL changed files are owned by non-core-team owners that include the // author, the PR can merge with any approval (not necessarily core team). function isExempted(authorLogin, files, rules, coreTeam) { + if (files.length === 0) return false; const coreSet = new Set(coreTeam); for (const { filename } of files) { const owners = findOwners(filename, rules); @@ -87,11 +88,15 @@ module.exports = async ({ github, context, core }) => { }); const coreTeamApproved = reviews.some( - ({ state, user: { login } }) => - state === "APPROVED" && coreTeam.includes(login) + ({ state, user }) => + state === "APPROVED" && user && coreTeam.includes(user.login) ); if (coreTeamApproved) { + const approver = reviews.find( + ({ state, user }) => state === "APPROVED" && user && coreTeam.includes(user.login) + ); + core.info(`Core team approval from @${approver.user.login}`); return; } diff --git a/.github/workflows/maintainer-approval.yml b/.github/workflows/maintainer-approval.yml index 37e93951d3..bb54eb8d75 100644 --- a/.github/workflows/maintainer-approval.yml +++ b/.github/workflows/maintainer-approval.yml @@ -26,6 +26,6 @@ jobs: uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: retries: 3 - script: | + script: |- const script = require('./.github/workflows/maintainer-approval.js'); await script({ context, github, core }); diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py index 76609e5566..3928c21d75 100644 --- a/tools/suggest_reviewers.py +++ b/tools/suggest_reviewers.py @@ -5,9 +5,10 @@ import fnmatch import os +import random import subprocess import sys -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from pathlib import Path MENTION_REVIEWERS = True @@ -189,6 +190,38 @@ def fmt_eligible(owners: list[str]) -> str: return ", ".join(o.lstrip("@") for o in owners) +def count_recent_reviews(repo: str, logins: list[str], days: int = 30) -> dict[str, int]: + since = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d") + counts: dict[str, int] = {} + for login in logins: + r = subprocess.run( + [ + "gh", + "api", + "search/issues", + "-f", + f"q=repo:{repo} reviewed-by:{login} is:pr created:>{since}", + "--jq", + ".total_count", + ], + capture_output=True, + encoding="utf-8", + ) + if r.returncode == 0 and r.stdout.strip().isdigit(): + counts[login] = int(r.stdout.strip()) + return counts + + +def select_round_robin(repo: str, eligible_owners: list[str], pr_author: str) -> str | None: + candidates = [o.lstrip("@") for o in eligible_owners if "/" not in o and o.lstrip("@").lower() != pr_author.lower()] + if not candidates: + return None + counts = count_recent_reviews(repo, candidates) + if not counts: + return random.choice(candidates) + return min(candidates, key=lambda c: counts.get(c, 0)) + + def build_comment( sorted_scores: list[tuple[str, float]], dir_scores: dict[str, dict[str, float]], @@ -196,6 +229,7 @@ def build_comment( scored_count: int, eligible_owners: list[str], pr_author: str, + round_robin_reviewer: str | None = None, ) -> str: reviewers = select_reviewers(sorted_scores) suggested_logins = {login.lower() for login, _ in reviewers} @@ -226,13 +260,34 @@ def build_comment( fmt_eligible(eligible), ] elif eligible: - lines += [ - "## Eligible reviewers", - "", - "Could not determine reviewers from git history. Based on CODEOWNERS, these people or teams could review:", - "", - fmt_eligible(eligible), - ] + if round_robin_reviewer: + mention = f"@{round_robin_reviewer}" if MENTION_REVIEWERS else round_robin_reviewer + remaining = [o for o in eligible if o.lstrip("@").lower() != round_robin_reviewer.lower()] + lines += [ + "## Suggested reviewer", + "", + "Could not determine reviewers from git history.", + "Round-robin suggestion (based on recent review load):", + "", + f"- {mention}", + ] + if remaining: + lines += [ + "", + "## Eligible reviewers", + "", + "Based on CODEOWNERS, these people or teams could also review:", + "", + fmt_eligible(remaining), + ] + else: + lines += [ + "## Eligible reviewers", + "", + "Could not determine reviewers from git history. Based on CODEOWNERS, these people or teams could review:", + "", + fmt_eligible(eligible), + ] else: lines += [ "## Suggested reviewers", @@ -286,7 +341,12 @@ def main(): scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo) sorted_scores = sorted(scores.items(), key=lambda x: -x[1]) eligible = parse_codeowners(files) - comment = build_comment(sorted_scores, dir_scores, len(files), scored_count, eligible, pr_author) + + round_robin = None + if not select_reviewers(sorted_scores) and eligible: + round_robin = select_round_robin(repo, eligible, pr_author) + + comment = build_comment(sorted_scores, dir_scores, len(files), scored_count, eligible, pr_author, round_robin) print(comment) existing_id = find_existing_comment(repo, pr_number) From 3f0f4e4fac8e15324ef21d6c5430912102b59163 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 8 Apr 2026 13:15:36 +0200 Subject: [PATCH 3/4] Widen /cmd/bundle/bundle.go to /cmd/bundle/ in OWNERS The CODEOWNERS entry only covered the single file. The intent was to include lennartkats-db on all bundle command changes, not just the root registration file. Since OWNERS is our new file and CODEOWNERS stays unchanged for now, we can fix this here. --- .github/OWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/OWNERS b/.github/OWNERS index d1618a2dee..72d835acd4 100644 --- a/.github/OWNERS +++ b/.github/OWNERS @@ -1,5 +1,5 @@ * @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum -/cmd/bundle/bundle.go @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum @lennartkats-db +/cmd/bundle/ @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum @lennartkats-db /libs/template/ @andrewnester @anton-107 @denik @pietern @shreyas-goenka @simonfaltum @lennartkats-db /acceptance/pipelines/ @jefferycheng1 @kanterov @lennartkats-db /cmd/pipelines/ @jefferycheng1 @kanterov @lennartkats-db From aa73dd69282db6c06238e1d4f3365d37a17e1032 Mon Sep 17 00:00:00 2001 From: simon Date: Wed, 8 Apr 2026 13:58:00 +0200 Subject: [PATCH 4/4] Rewrite suggest-reviewers in JS, extract shared OWNERS module Replace tools/suggest_reviewers.py with a JavaScript version that runs via actions/github-script. This drops the uv/Python dependency from the suggest-reviewers workflow. Extract shared OWNERS parsing into .github/scripts/owners.js, imported by both maintainer-approval.js and suggest-reviewers.js. Both scripts now read .github/OWNERS instead of .github/CODEOWNERS. The JS version uses Octokit for all GitHub API calls (no gh CLI) and child_process.execSync for git log only. Includes round-robin fallback: when git history can't determine a reviewer, queries the GitHub search API for recent review counts and picks the person with fewest reviews. --- .github/scripts/owners.js | 68 ++++ .github/workflows/maintainer-approval.js | 69 +--- .github/workflows/maintainer-approval.yml | 1 + .github/workflows/suggest-reviewers.js | 354 +++++++++++++++++++++ .github/workflows/suggest-reviewers.yml | 17 +- tools/suggest_reviewers.py | 366 ---------------------- 6 files changed, 442 insertions(+), 433 deletions(-) create mode 100644 .github/scripts/owners.js create mode 100644 .github/workflows/suggest-reviewers.js delete mode 100644 tools/suggest_reviewers.py diff --git a/.github/scripts/owners.js b/.github/scripts/owners.js new file mode 100644 index 0000000000..d8bd70a347 --- /dev/null +++ b/.github/scripts/owners.js @@ -0,0 +1,68 @@ +const fs = require("fs"); + +/** + * Parse an OWNERS file (same format as CODEOWNERS). + * Returns array of { pattern, owners } rules. + * + * By default, team refs (org/team) are filtered out and @ is stripped. + * Pass { includeTeams: true } to keep team refs (with @ stripped). + * + * @param {string} filePath - absolute path to the OWNERS file + * @param {{ includeTeams?: boolean }} [opts] + * @returns {Array<{pattern: string, owners: string[]}>} + */ +function parseOwnersFile(filePath, opts) { + const includeTeams = opts && opts.includeTeams; + const lines = fs.readFileSync(filePath, "utf-8").split("\n"); + const rules = []; + for (const raw of lines) { + const line = raw.trim(); + if (!line || line.startsWith("#")) continue; + const parts = line.split(/\s+/); + if (parts.length < 2) continue; + const pattern = parts[0]; + const owners = parts + .slice(1) + .filter((p) => p.startsWith("@") && (includeTeams || !p.includes("/"))) + .map((p) => p.slice(1)); + rules.push({ pattern, owners }); + } + return rules; +} + +/** + * Match a filepath against an OWNERS pattern. + * Supports: "*" (catch-all), "/dir/" (prefix), "/path/file" (exact). + */ +function ownersMatch(pattern, filepath) { + if (pattern === "*") return true; + let p = pattern; + if (p.startsWith("/")) p = p.slice(1); + if (p.endsWith("/")) return filepath.startsWith(p); + return filepath === p; +} + +/** + * Find owners for a file. Last match wins, like CODEOWNERS. + * @returns {string[]} owner logins + */ +function findOwners(filepath, rules) { + let matched = []; + for (const rule of rules) { + if (ownersMatch(rule.pattern, filepath)) { + matched = rule.owners; + } + } + return matched; +} + +/** + * Get core team from the * catch-all rule. + * @returns {string[]} logins + */ +function getCoreTeam(rules) { + const catchAll = rules.find((r) => r.pattern === "*"); + return catchAll ? catchAll.owners : []; +} + +module.exports = { parseOwnersFile, ownersMatch, findOwners, getCoreTeam }; diff --git a/.github/workflows/maintainer-approval.js b/.github/workflows/maintainer-approval.js index 082e44af7a..52c62bf631 100644 --- a/.github/workflows/maintainer-approval.js +++ b/.github/workflows/maintainer-approval.js @@ -1,58 +1,9 @@ -const fs = require("fs"); const path = require("path"); - -// Parse .github/OWNERS (same format as CODEOWNERS). -// Returns array of { pattern, owners } rules. -function parseOwners() { - const ownersPath = path.join( - process.env.GITHUB_WORKSPACE, - ".github", - "OWNERS" - ); - const lines = fs.readFileSync(ownersPath, "utf-8").split("\n"); - const rules = []; - for (const raw of lines) { - const line = raw.trim(); - if (!line || line.startsWith("#")) continue; - const parts = line.split(/\s+/); - if (parts.length < 2) continue; - const pattern = parts[0]; - // Strip @ prefix, skip team refs (org/team). - const owners = parts - .slice(1) - .filter((p) => p.startsWith("@") && !p.includes("/")) - .map((p) => p.slice(1)); - rules.push({ pattern, owners }); - } - return rules; -} - -// Get core team from the * catch-all rule. -function getCoreTeam(rules) { - const catchAll = rules.find((r) => r.pattern === "*"); - return catchAll ? catchAll.owners : []; -} - -// Match a filepath against an OWNERS pattern. -// Supports: "*" (catch-all), "/dir/" (prefix), "/path/file" (exact). -function ownersMatch(pattern, filepath) { - if (pattern === "*") return true; - let p = pattern; - if (p.startsWith("/")) p = p.slice(1); - if (p.endsWith("/")) return filepath.startsWith(p); - return filepath === p; -} - -// Find which owners match a given file (last match wins, like CODEOWNERS). -function findOwners(filepath, rules) { - let matched = []; - for (const rule of rules) { - if (ownersMatch(rule.pattern, filepath)) { - matched = rule.owners; - } - } - return matched; -} +const { + parseOwnersFile, + findOwners, + getCoreTeam, +} = require("../scripts/owners"); // Check if the PR author is exempted. // If ALL changed files are owned by non-core-team owners that include the @@ -71,7 +22,12 @@ function isExempted(authorLogin, files, rules, coreTeam) { } module.exports = async ({ github, context, core }) => { - const rules = parseOwners(); + const ownersPath = path.join( + process.env.GITHUB_WORKSPACE, + ".github", + "OWNERS" + ); + const rules = parseOwnersFile(ownersPath); const coreTeam = getCoreTeam(rules); if (coreTeam.length === 0) { @@ -94,7 +50,8 @@ module.exports = async ({ github, context, core }) => { if (coreTeamApproved) { const approver = reviews.find( - ({ state, user }) => state === "APPROVED" && user && coreTeam.includes(user.login) + ({ state, user }) => + state === "APPROVED" && user && coreTeam.includes(user.login) ); core.info(`Core team approval from @${approver.user.login}`); return; diff --git a/.github/workflows/maintainer-approval.yml b/.github/workflows/maintainer-approval.yml index bb54eb8d75..5ddec5a165 100644 --- a/.github/workflows/maintainer-approval.yml +++ b/.github/workflows/maintainer-approval.yml @@ -21,6 +21,7 @@ jobs: persist-credentials: false sparse-checkout: | .github/workflows + .github/scripts .github/OWNERS - name: Require core team approval uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 diff --git a/.github/workflows/suggest-reviewers.js b/.github/workflows/suggest-reviewers.js new file mode 100644 index 0000000000..4d38a1be23 --- /dev/null +++ b/.github/workflows/suggest-reviewers.js @@ -0,0 +1,354 @@ +const path = require("path"); +const { execSync } = require("child_process"); +const { + parseOwnersFile, + findOwners, +} = require("../scripts/owners"); + +const MENTION_REVIEWERS = true; +const OWNERS_LINK = "[OWNERS](.github/OWNERS)"; +const MARKER = ""; + +const loginCache = {}; + +function classifyFile(filepath, totalFiles) { + const base = path.basename(filepath); + if (base.startsWith("out.") || base === "output.txt") { + return 0.01 / Math.max(totalFiles, 1); + } + if (filepath.startsWith("acceptance/") || filepath.startsWith("integration/")) { + return 0.2; + } + if (filepath.endsWith("_test.go")) return 0.3; + return filepath.endsWith(".go") ? 1.0 : 0.5; +} + +async function getChangedFiles(github, owner, repo, prNumber) { + const files = await github.paginate(github.rest.pulls.listFiles, { + owner, + repo, + pull_number: prNumber, + }); + return files.map((f) => f.filename); +} + +function gitLog(filepath) { + try { + const out = execSync( + `git log -50 --no-merges --since="12 months ago" --format="%H|%an|%aI" -- "${filepath}"`, + { encoding: "utf-8" } + ); + const entries = []; + for (const line of out.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + const parts = trimmed.split("|", 3); + if (parts.length !== 3) continue; + const date = new Date(parts[2]); + if (isNaN(date.getTime())) continue; + entries.push({ sha: parts[0], name: parts[1], date }); + } + return entries; + } catch { + return []; + } +} + +async function resolveLogin(github, owner, repo, sha, authorName) { + if (authorName in loginCache) return loginCache[authorName]; + try { + const { data } = await github.rest.repos.getCommit({ owner, repo, ref: sha }); + const login = data.author?.login || null; + loginCache[authorName] = login; + return login; + } catch { + loginCache[authorName] = null; + return null; + } +} + +function parseOwnersForFiles(changedFiles, ownersPath) { + const rules = parseOwnersFile(ownersPath, { includeTeams: true }); + const allOwners = new Set(); + for (const filepath of changedFiles) { + for (const o of findOwners(filepath, rules)) allOwners.add(o); + } + return Array.from(allOwners).sort(); +} + +async function scoreContributors(files, prAuthor, now, github, owner, repo) { + const scores = {}; + const dirScores = {}; + let scoredCount = 0; + const authorLogin = prAuthor.toLowerCase(); + const totalFiles = files.length; + + for (const filepath of files) { + const weight = classifyFile(filepath, totalFiles); + let history = gitLog(filepath); + if (history.length === 0) { + const parent = path.dirname(filepath); + if (parent && parent !== ".") { + history = gitLog(parent); + } + } + if (history.length === 0) continue; + + const topDir = path.dirname(filepath) || "."; + let fileContributed = false; + for (const { sha, name, date } of history) { + if (name.endsWith("[bot]")) continue; + const login = await resolveLogin(github, owner, repo, sha, name); + if (!login || login.toLowerCase() === authorLogin) continue; + const daysAgo = Math.max(0, (now - date) / 86400000); + const s = weight * Math.pow(0.5, daysAgo / 150); + scores[login] = (scores[login] || 0) + s; + if (!dirScores[login]) dirScores[login] = {}; + dirScores[login][topDir] = (dirScores[login][topDir] || 0) + s; + fileContributed = true; + } + if (fileContributed) scoredCount++; + } + return { scores, dirScores, scoredCount }; +} + +function topDirs(ds, n = 3) { + return Object.entries(ds || {}) + .sort((a, b) => b[1] - a[1]) + .slice(0, n) + .map(([d]) => d); +} + +function fmtReviewer(login, dirs) { + const mention = MENTION_REVIEWERS ? `@${login}` : login; + const dirList = dirs.map((d) => `\`${d}/\``).join(", "); + return `- ${mention} -- recent work in ${dirList}`; +} + +function selectReviewers(ss) { + if (ss.length === 0) return []; + const out = [ss[0]]; + if (ss.length >= 2 && ss[0][1] < 1.5 * ss[1][1]) { + out.push(ss[1]); + if (ss.length >= 3 && ss[1][1] < 1.5 * ss[2][1]) { + out.push(ss[2]); + } + } + return out; +} + +function computeConfidence(ss, scoredCount) { + if (ss.length === 0) return "low"; + if (ss.length === 1) return "high"; + if (ss.length === 2) { + if (ss[0][1] > 2 * ss[1][1]) return "high"; + if (ss[0][1] > 1.5 * ss[1][1]) return "medium"; + return "low"; + } + if (scoredCount < 3) return "low"; + if (ss[0][1] > 2 * ss[2][1]) return "high"; + if (ss[0][1] > 1.5 * ss[2][1]) return "medium"; + return "low"; +} + +function fmtEligible(owners) { + if (MENTION_REVIEWERS) return owners.map((o) => `@${o}`).join(", "); + return owners.join(", "); +} + +async function countRecentReviews(github, owner, repo, logins, days = 30) { + const since = new Date(Date.now() - days * 86400000) + .toISOString() + .slice(0, 10); + const counts = {}; + for (const login of logins) { + try { + const { data } = await github.rest.search.issuesAndPullRequests({ + q: `repo:${owner}/${repo} reviewed-by:${login} is:pr created:>${since}`, + }); + counts[login] = data.total_count; + } catch { + // skip on error + } + } + return counts; +} + +async function selectRoundRobin(github, owner, repo, eligibleOwners, prAuthor) { + const candidates = eligibleOwners + .filter((o) => !o.includes("/") && o.toLowerCase() !== prAuthor.toLowerCase()); + if (candidates.length === 0) return null; + const counts = await countRecentReviews(github, owner, repo, candidates); + if (Object.keys(counts).length === 0) { + return candidates[Math.floor(Math.random() * candidates.length)]; + } + return candidates.reduce((best, c) => + (counts[c] || 0) < (counts[best] || 0) ? c : best + ); +} + +function buildComment( + sortedScores, + dirScores, + totalFiles, + scoredCount, + eligibleOwners, + prAuthor, + roundRobinReviewer +) { + const reviewers = selectReviewers(sortedScores); + const suggestedLogins = new Set(reviewers.map(([login]) => login.toLowerCase())); + const eligible = eligibleOwners.filter( + (o) => + o.toLowerCase() !== prAuthor.toLowerCase() && + !suggestedLogins.has(o.toLowerCase()) + ); + + const lines = [MARKER]; + if (reviewers.length > 0) { + lines.push( + "## Suggested reviewers", + "", + "Based on git history of the changed files, these people are best suited to review:", + "" + ); + for (const [login] of reviewers) { + lines.push(fmtReviewer(login, topDirs(dirScores[login]))); + } + lines.push("", `Confidence: ${computeConfidence(sortedScores, scoredCount)}`); + if (eligible.length > 0) { + lines.push( + "", + "## Eligible reviewers", + "", + "Based on OWNERS, these people or teams could also review:", + "", + fmtEligible(eligible) + ); + } + } else if (eligible.length > 0) { + if (roundRobinReviewer) { + const mention = MENTION_REVIEWERS + ? `@${roundRobinReviewer}` + : roundRobinReviewer; + const remaining = eligible.filter( + (o) => o.toLowerCase() !== roundRobinReviewer.toLowerCase() + ); + lines.push( + "## Suggested reviewer", + "", + "Could not determine reviewers from git history.", + "Round-robin suggestion (based on recent review load):", + "", + `- ${mention}` + ); + if (remaining.length > 0) { + lines.push( + "", + "## Eligible reviewers", + "", + "Based on OWNERS, these people or teams could also review:", + "", + fmtEligible(remaining) + ); + } + } else { + lines.push( + "## Eligible reviewers", + "", + "Could not determine reviewers from git history. Based on OWNERS, these people or teams could review:", + "", + fmtEligible(eligible) + ); + } + } else { + lines.push( + "## Suggested reviewers", + "", + `Could not determine reviewers from git history. Please pick from ${OWNERS_LINK}.` + ); + } + + lines.push( + "", + `Suggestions based on git history of ${totalFiles} changed files ` + + `(${scoredCount} scored). ` + + `See ${OWNERS_LINK} for path-specific ownership rules.` + ); + return lines.join("\n") + "\n"; +} + +async function findExistingComment(github, owner, repo, prNumber) { + const comments = await github.paginate(github.rest.issues.listComments, { + owner, + repo, + issue_number: prNumber, + }); + const match = comments.find((c) => c.body && c.body.includes(MARKER)); + return match ? match.id : null; +} + +module.exports = async ({ github, context, core }) => { + const owner = context.repo.owner; + const repo = context.repo.repo; + const prNumber = context.payload.pull_request.number; + const prAuthor = context.payload.pull_request.user.login; + + const files = await getChangedFiles(github, owner, repo, prNumber); + if (files.length === 0) { + core.info("No changed files found."); + return; + } + + const now = new Date(); + const ownersPath = path.join( + process.env.GITHUB_WORKSPACE, + ".github", + "OWNERS" + ); + + const { scores, dirScores, scoredCount } = await scoreContributors( + files, + prAuthor, + now, + github, + owner, + repo + ); + const sortedScores = Object.entries(scores).sort((a, b) => b[1] - a[1]); + const eligible = parseOwnersForFiles(files, ownersPath); + + let roundRobin = null; + if (selectReviewers(sortedScores).length === 0 && eligible.length > 0) { + roundRobin = await selectRoundRobin(github, owner, repo, eligible, prAuthor); + } + + const comment = buildComment( + sortedScores, + dirScores, + files.length, + scoredCount, + eligible, + prAuthor, + roundRobin + ); + + core.info(comment); + + const existingId = await findExistingComment(github, owner, repo, prNumber); + if (existingId) { + await github.rest.issues.updateComment({ + owner, + repo, + comment_id: existingId, + body: comment, + }); + } else { + await github.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body: comment, + }); + } +}; diff --git a/.github/workflows/suggest-reviewers.yml b/.github/workflows/suggest-reviewers.yml index 15a32c2c7d..066bc6021b 100644 --- a/.github/workflows/suggest-reviewers.yml +++ b/.github/workflows/suggest-reviewers.yml @@ -21,15 +21,10 @@ jobs: with: fetch-depth: 0 - - name: Install uv - uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 - with: - version: "0.6.5" - - name: Suggest reviewers - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - GITHUB_REPOSITORY: ${{ github.repository }} - PR_NUMBER: ${{ github.event.pull_request.number }} - PR_AUTHOR: ${{ github.event.pull_request.user.login }} - run: uv run tools/suggest_reviewers.py + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + retries: 3 + script: |- + const script = require('./.github/workflows/suggest-reviewers.js'); + await script({ context, github, core }); diff --git a/tools/suggest_reviewers.py b/tools/suggest_reviewers.py deleted file mode 100644 index 3928c21d75..0000000000 --- a/tools/suggest_reviewers.py +++ /dev/null @@ -1,366 +0,0 @@ -#!/usr/bin/env python3 -# /// script -# requires-python = ">=3.12" -# /// - -import fnmatch -import os -import random -import subprocess -import sys -from datetime import datetime, timedelta, timezone -from pathlib import Path - -MENTION_REVIEWERS = True -CODEOWNERS_LINK = "[CODEOWNERS](.github/CODEOWNERS)" -MARKER = "" -_login_cache: dict[str, str | None] = {} - - -def classify_file(path: str, total_files: int) -> float: - p = Path(path) - if p.name.startswith("out.") or p.name == "output.txt": - return 0.01 / max(total_files, 1) - if path.startswith(("acceptance/", "integration/")): - return 0.2 - if path.endswith("_test.go"): - return 0.3 - return 1.0 if path.endswith(".go") else 0.5 - - -def get_changed_files(pr_number: str) -> list[str]: - r = subprocess.run( - ["gh", "pr", "diff", "--name-only", pr_number], - capture_output=True, - encoding="utf-8", - ) - if r.returncode != 0: - print(f"gh pr diff failed: {r.stderr.strip()}", file=sys.stderr) - sys.exit(1) - return [f.strip() for f in r.stdout.splitlines() if f.strip()] - - -def git_log(path: str) -> list[tuple[str, str, datetime]]: - r = subprocess.run( - ["git", "log", "-50", "--no-merges", "--since=12 months ago", "--format=%H|%an|%aI", "--", path], - capture_output=True, - encoding="utf-8", - ) - if r.returncode != 0: - return [] - entries = [] - for line in r.stdout.splitlines(): - parts = line.strip().split("|", 2) - if len(parts) != 3: - continue - try: - entries.append((parts[0], parts[1], datetime.fromisoformat(parts[2]))) - except ValueError: - continue - return entries - - -def resolve_login(repo: str, sha: str, author_name: str) -> str | None: - if author_name in _login_cache: - return _login_cache[author_name] - r = subprocess.run( - ["gh", "api", f"repos/{repo}/commits/{sha}", "--jq", ".author.login"], - capture_output=True, - encoding="utf-8", - ) - login = r.stdout.strip() or None if r.returncode == 0 else None - _login_cache[author_name] = login - return login - - -def _codeowners_match(pattern: str, filepath: str) -> bool: - if pattern.startswith("/"): - pattern = pattern[1:] - if pattern.endswith("/"): - return filepath.startswith(pattern) - return fnmatch.fnmatch(filepath, pattern) or filepath == pattern - return fnmatch.fnmatch(filepath, pattern) or fnmatch.fnmatch(Path(filepath).name, pattern) - - -def parse_codeowners(changed_files: list[str]) -> list[str]: - path = Path(".github/CODEOWNERS") - if not path.exists(): - return [] - rules: list[tuple[str, list[str]]] = [] - for line in path.read_text().splitlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - parts = line.split() - owners = [p for p in parts[1:] if p.startswith("@")] - if len(parts) >= 2 and owners: - rules.append((parts[0], owners)) - - all_owners: set[str] = set() - for filepath in changed_files: - matched = [] - for pattern, owners in rules: - if _codeowners_match(pattern, filepath): - matched = owners - all_owners.update(matched) - return sorted(all_owners) - - -def score_contributors( - files: list[str], pr_author: str, now: datetime, repo: str -) -> tuple[dict[str, float], dict[str, dict[str, float]], int]: - scores: dict[str, float] = {} - dir_scores: dict[str, dict[str, float]] = {} - scored_count = 0 - author_login = pr_author.lower() - - total_files = len(files) - for filepath in files: - weight = classify_file(filepath, total_files) - history = git_log(filepath) - if not history: - parent = str(Path(filepath).parent) - if parent and parent != ".": - history = git_log(parent) - if not history: - continue - - top_dir = str(Path(filepath).parent) or "." - file_contributed = False - for sha, name, commit_date in history: - if name.endswith("[bot]"): - continue - login = resolve_login(repo, sha, name) - if not login or login.lower() == author_login: - continue - days_ago = max(0, (now - commit_date).total_seconds() / 86400) - s = weight * (0.5 ** (days_ago / 150)) - scores[login] = scores.get(login, 0) + s - dir_scores.setdefault(login, {}) - dir_scores[login][top_dir] = dir_scores[login].get(top_dir, 0) + s - file_contributed = True - if file_contributed: - scored_count += 1 - return scores, dir_scores, scored_count - - -def top_dirs(ds: dict[str, float], n: int = 3) -> list[str]: - return [d for d, _ in sorted(ds.items(), key=lambda x: -x[1])[:n]] - - -def fmt_reviewer(login: str, dirs: list[str]) -> str: - mention = f"@{login}" if MENTION_REVIEWERS else login - return f"- {mention} -- recent work in {', '.join(f'`{d}/`' for d in dirs)}" - - -def select_reviewers(ss: list[tuple[str, float]]) -> list[tuple[str, float]]: - if not ss: - return [] - out = [ss[0]] - if len(ss) >= 2 and ss[0][1] < 1.5 * ss[1][1]: - out.append(ss[1]) - if len(ss) >= 3 and ss[1][1] < 1.5 * ss[2][1]: - out.append(ss[2]) - return out - - -def compute_confidence(ss: list[tuple[str, float]], scored_count: int) -> str: - if not ss: - return "low" - if len(ss) == 1: - return "high" - if len(ss) == 2: - if ss[0][1] > 2 * ss[1][1]: - return "high" - if ss[0][1] > 1.5 * ss[1][1]: - return "medium" - return "low" - if scored_count < 3: - return "low" - if ss[0][1] > 2 * ss[2][1]: - return "high" - if ss[0][1] > 1.5 * ss[2][1]: - return "medium" - return "low" - - -def fmt_eligible(owners: list[str]) -> str: - if MENTION_REVIEWERS: - return ", ".join(owners) - return ", ".join(o.lstrip("@") for o in owners) - - -def count_recent_reviews(repo: str, logins: list[str], days: int = 30) -> dict[str, int]: - since = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d") - counts: dict[str, int] = {} - for login in logins: - r = subprocess.run( - [ - "gh", - "api", - "search/issues", - "-f", - f"q=repo:{repo} reviewed-by:{login} is:pr created:>{since}", - "--jq", - ".total_count", - ], - capture_output=True, - encoding="utf-8", - ) - if r.returncode == 0 and r.stdout.strip().isdigit(): - counts[login] = int(r.stdout.strip()) - return counts - - -def select_round_robin(repo: str, eligible_owners: list[str], pr_author: str) -> str | None: - candidates = [o.lstrip("@") for o in eligible_owners if "/" not in o and o.lstrip("@").lower() != pr_author.lower()] - if not candidates: - return None - counts = count_recent_reviews(repo, candidates) - if not counts: - return random.choice(candidates) - return min(candidates, key=lambda c: counts.get(c, 0)) - - -def build_comment( - sorted_scores: list[tuple[str, float]], - dir_scores: dict[str, dict[str, float]], - total_files: int, - scored_count: int, - eligible_owners: list[str], - pr_author: str, - round_robin_reviewer: str | None = None, -) -> str: - reviewers = select_reviewers(sorted_scores) - suggested_logins = {login.lower() for login, _ in reviewers} - eligible = [ - o - for o in eligible_owners - if o.lstrip("@").lower() != pr_author.lower() and o.lstrip("@").lower() not in suggested_logins - ] - - lines = [MARKER] - if reviewers: - lines += [ - "## Suggested reviewers", - "", - "Based on git history of the changed files, these people are best suited to review:", - "", - ] - for login, _ in reviewers: - lines.append(fmt_reviewer(login, top_dirs(dir_scores.get(login, {})))) - lines += ["", f"Confidence: {compute_confidence(sorted_scores, scored_count)}"] - if eligible: - lines += [ - "", - "## Eligible reviewers", - "", - "Based on CODEOWNERS, these people or teams could also review:", - "", - fmt_eligible(eligible), - ] - elif eligible: - if round_robin_reviewer: - mention = f"@{round_robin_reviewer}" if MENTION_REVIEWERS else round_robin_reviewer - remaining = [o for o in eligible if o.lstrip("@").lower() != round_robin_reviewer.lower()] - lines += [ - "## Suggested reviewer", - "", - "Could not determine reviewers from git history.", - "Round-robin suggestion (based on recent review load):", - "", - f"- {mention}", - ] - if remaining: - lines += [ - "", - "## Eligible reviewers", - "", - "Based on CODEOWNERS, these people or teams could also review:", - "", - fmt_eligible(remaining), - ] - else: - lines += [ - "## Eligible reviewers", - "", - "Could not determine reviewers from git history. Based on CODEOWNERS, these people or teams could review:", - "", - fmt_eligible(eligible), - ] - else: - lines += [ - "## Suggested reviewers", - "", - f"Could not determine reviewers from git history. Please pick from {CODEOWNERS_LINK}.", - ] - - lines += [ - "", - f"Suggestions based on git history of {total_files} changed files " - f"({scored_count} scored). " - f"See {CODEOWNERS_LINK} for path-specific ownership rules.", - ] - return "\n".join(lines) + "\n" - - -def find_existing_comment(repo: str, pr_number: str) -> str | None: - r = subprocess.run( - [ - "gh", - "api", - f"repos/{repo}/issues/{pr_number}/comments", - "--paginate", - "--jq", - f'.[] | select(.body | contains("{MARKER}")) | .id', - ], - capture_output=True, - encoding="utf-8", - ) - if r.returncode != 0: - print(f"gh api comments failed: {r.stderr.strip()}", file=sys.stderr) - sys.exit(1) - for cid in r.stdout.splitlines(): - cid = cid.strip() - if cid: - return cid - return None - - -def main(): - repo = os.environ["GITHUB_REPOSITORY"] - pr_number = os.environ["PR_NUMBER"] - pr_author = os.environ["PR_AUTHOR"] - - files = get_changed_files(pr_number) - if not files: - print("No changed files found.") - return - - now = datetime.now(timezone.utc) - scores, dir_scores, scored_count = score_contributors(files, pr_author, now, repo) - sorted_scores = sorted(scores.items(), key=lambda x: -x[1]) - eligible = parse_codeowners(files) - - round_robin = None - if not select_reviewers(sorted_scores) and eligible: - round_robin = select_round_robin(repo, eligible, pr_author) - - comment = build_comment(sorted_scores, dir_scores, len(files), scored_count, eligible, pr_author, round_robin) - - print(comment) - existing_id = find_existing_comment(repo, pr_number) - if existing_id: - subprocess.run( - ["gh", "api", f"repos/{repo}/issues/comments/{existing_id}", "-X", "PATCH", "-f", f"body={comment}"], - check=True, - ) - else: - subprocess.run( - ["gh", "pr", "comment", pr_number, "--body", comment], - check=True, - ) - - -if __name__ == "__main__": - main()