|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Update PR references in changelog files. |
| 3 | +
|
| 4 | +1. Convert occurrences of `#1234` to the canonical markdown link |
| 5 | + `([#1234](https://github.com/databricks/cli/pull/1234))`. |
| 6 | +2. Validate that for existing converted references the PR number in the text |
| 7 | + and in the URL match. |
| 8 | +""" |
| 9 | + |
| 10 | +import argparse |
| 11 | +import pathlib |
| 12 | +import re |
| 13 | +import sys |
| 14 | + |
| 15 | +DEFAULT_FILES = ("NEXT_CHANGELOG.md", "CHANGELOG.md") |
| 16 | + |
| 17 | +# Regex that matches an *already converted* link, e.g.: |
| 18 | +# ([#1234](https://github.com/databricks/cli/pull/1234)) |
| 19 | +# The groups capture the PR number in the text and in the URL respectively so |
| 20 | +# they can be compared for consistency. |
| 21 | +CONVERTED_LINK_RE = re.compile( |
| 22 | + r"\(\[#(?P<num_text>\d+)\]\(" # ([#1234]( |
| 23 | + r"https://github\.com/databricks/cli/pull/(?P<num_url>\d+)" # …/pull/1234 |
| 24 | + r"\)\)" # )) |
| 25 | +) |
| 26 | + |
| 27 | +# Regex that matches a *raw* reference, `#1234`, that is **not** already inside |
| 28 | +# a converted link. The negative look-behind ensures the # is not preceded by |
| 29 | +# a literal '[' which would indicate an already converted link. |
| 30 | +RAW_REF_RE = re.compile(r"(?<!\[)#(?P<num>\d+)\b") |
| 31 | + |
| 32 | + |
| 33 | +def find_mismatched_links(text): |
| 34 | + """Return texts of mismatching converted links.""" |
| 35 | + mismatches = [] |
| 36 | + for m in CONVERTED_LINK_RE.finditer(text): |
| 37 | + num_text, num_url = m.group("num_text"), m.group("num_url") |
| 38 | + if num_text != num_url: |
| 39 | + context = text[max(0, m.start() - 20) : m.end() + 20] |
| 40 | + mismatches.append(f"Converted link numbers differ: text #{num_text} vs URL #{num_url} — …{context}…") |
| 41 | + return mismatches |
| 42 | + |
| 43 | + |
| 44 | +def convert_raw_references(text): |
| 45 | + """Convert raw `#1234` references to markdown links.""" |
| 46 | + |
| 47 | + def _repl(match): |
| 48 | + num = match.group("num") |
| 49 | + return f"([#{num}](https://github.com/databricks/cli/pull/{num}))" |
| 50 | + |
| 51 | + return RAW_REF_RE.sub(_repl, text) |
| 52 | + |
| 53 | + |
| 54 | +def process_file(path): |
| 55 | + """Process a single file. |
| 56 | +
|
| 57 | + Returns True if the file was *modified*. |
| 58 | + Raises `SystemExit` with non-zero status on mismatching converted links. |
| 59 | + """ |
| 60 | + original = path.read_text(encoding="utf-8") |
| 61 | + |
| 62 | + mismatches = find_mismatched_links(original) |
| 63 | + if mismatches: |
| 64 | + for msg in mismatches: |
| 65 | + print(f"{path}:{msg}", file=sys.stderr) |
| 66 | + sys.exit(1) |
| 67 | + |
| 68 | + updated = convert_raw_references(original) |
| 69 | + if updated != original: |
| 70 | + path.write_text(updated, encoding="utf-8") |
| 71 | + print(f"Updated {path}") |
| 72 | + return True |
| 73 | + |
| 74 | + return False |
| 75 | + |
| 76 | + |
| 77 | +def main(argv=None): |
| 78 | + parser = argparse.ArgumentParser(description="Convert #PR references in changelogs to links.") |
| 79 | + parser.add_argument("files", nargs="*", help=f"Markdown files to process (default: {DEFAULT_FILES})") |
| 80 | + args = parser.parse_args(argv) |
| 81 | + |
| 82 | + modified_any = False |
| 83 | + for file_path in args.files or DEFAULT_FILES: |
| 84 | + file_path = pathlib.Path(file_path) |
| 85 | + modified_any |= process_file(file_path) |
| 86 | + |
| 87 | + |
| 88 | +if __name__ == "__main__": |
| 89 | + main() |
0 commit comments