From 4f2147af345141d97ad691fef9aed8a616034042 Mon Sep 17 00:00:00 2001
From: qiancai <qqzczy@126.com>
Date: Mon, 27 Apr 2026 11:37:53 +0800
Subject: [PATCH 1/4] support generating release notes by ai

---
 scripts/release_notes_ai/__init__.py        |   1 +
 scripts/release_notes_ai/ai_client.py       | 296 +++++++
 scripts/release_notes_ai/cli.py             | 283 ++++++
 scripts/release_notes_ai/constants.py       |  98 +++
 scripts/release_notes_ai/excel_workbook.py  | 906 ++++++++++++++++++++
 scripts/release_notes_ai/github_client.py   | 321 +++++++
 scripts/release_notes_ai/markdown_writer.py | 121 +++
 scripts/release_notes_ai/models.py          | 101 +++
 scripts/release_notes_ai/requirements.txt   |   3 +
 scripts/release_notes_ai/scope_filter.py    | 366 ++++++++
 scripts/release_notes_ai/utils.py           |  87 ++
 scripts/release_notes_generate_ai.py        |  10 +
 12 files changed, 2593 insertions(+)
 create mode 100644 scripts/release_notes_ai/__init__.py
 create mode 100644 scripts/release_notes_ai/ai_client.py
 create mode 100644 scripts/release_notes_ai/cli.py
 create mode 100644 scripts/release_notes_ai/constants.py
 create mode 100644 scripts/release_notes_ai/excel_workbook.py
 create mode 100644 scripts/release_notes_ai/github_client.py
 create mode 100644 scripts/release_notes_ai/markdown_writer.py
 create mode 100644 scripts/release_notes_ai/models.py
 create mode 100644 scripts/release_notes_ai/requirements.txt
 create mode 100644 scripts/release_notes_ai/scope_filter.py
 create mode 100644 scripts/release_notes_ai/utils.py
 create mode 100644 scripts/release_notes_generate_ai.py

diff --git a/scripts/release_notes_ai/__init__.py b/scripts/release_notes_ai/__init__.py
new file mode 100644
index 0000000000000..65f7e128c779b
--- /dev/null
+++ b/scripts/release_notes_ai/__init__.py
@@ -0,0 +1 @@
+"""Helpers for generating TiDB release notes with AI."""
diff --git a/scripts/release_notes_ai/ai_client.py b/scripts/release_notes_ai/ai_client.py
new file mode 100644
index 0000000000000..503e28b63023b
--- /dev/null
+++ b/scripts/release_notes_ai/ai_client.py
@@ -0,0 +1,296 @@
+from __future__ import annotations
+
+import dataclasses
+from functools import lru_cache
+import json
+import os
+import shlex
+import shutil
+import subprocess
+import tempfile
+import textwrap
+from pathlib import Path
+from typing import Any
+
+from .constants import BUG_FIXES_REFERENCE, IMPROVEMENTS_REFERENCE
+from .models import GeneratedNote, RowContext
+
+
+class AIClient:
+    def __init__(self, command: str, model: str | None, timeout: int):
+        self.command = shlex.split(command)
+        self.model = model
+        self.timeout = timeout
+
+    def generate(self, prompt: str, expected_links: list[str], contributors: list[str]) -> GeneratedNote:
+        result, errors = self._run_and_validate(prompt, expected_links, contributors)
+        if result:
+            return result
+
+        repair_prompt = build_repair_prompt(prompt, errors)
+        result, repair_errors = self._run_and_validate(repair_prompt, expected_links, contributors)
+        if result:
+            return result
+        raise ValueError("; ".join(repair_errors))
+
+    def _run_and_validate(
+        self, prompt: str, expected_links: list[str], contributors: list[str]
+    ) -> tuple[GeneratedNote | None, list[str]]:
+        output = self._run(prompt)
+        try:
+            data = extract_json_object(output)
+        except ValueError as exc:
+            return None, [str(exc)]
+        return validate_ai_response(data, expected_links, contributors)
+
+    def _run(self, prompt: str) -> str:
+        command = list(self.command)
+        if not command:
+            raise ValueError("AI command is empty. Pass a command with --ai-command.")
+        if not is_executable_available(command[0]):
+            raise FileNotFoundError(
+                f"AI command executable not found: {command[0]!r}. "
+                "Install it or pass a custom command with --ai-command."
+            )
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_path: Path | None = None
+            if self._is_codex_exec(command):
+                if self.model:
+                    command.extend(["-m", self.model])
+                temp_path = Path(temp_dir)
+                schema_path = temp_path / "ai-output-schema.json"
+                output_path = temp_path / "ai-output.txt"
+                schema_path.write_text(json.dumps(ai_output_schema()), encoding="utf-8")
+                output_path.touch()
+                command.extend(["--output-schema", str(schema_path)])
+                command.extend(["--output-last-message", str(output_path)])
+
+            completed = subprocess.run(
+                command,
+                input=prompt,
+                text=True,
+                capture_output=True,
+                timeout=self.timeout,
+                check=False,
+            )
+            if completed.returncode != 0:
+                raise RuntimeError(
+                    "AI command failed with exit code "
+                    f"{completed.returncode}: {summarize_process_output(completed)}"
+                )
+            if output_path and output_path.exists():
+                last_message = output_path.read_text(encoding="utf-8").strip()
+                if last_message:
+                    return last_message
+            return completed.stdout.strip()
+
+    @staticmethod
+    def _is_codex_exec(command: list[str]) -> bool:
+        if not command:
+            return False
+        executable = Path(command[0]).name
+        return executable == "codex" and "exec" in command[1:]
+
+
+def is_executable_available(executable: str) -> bool:
+    if os.sep in executable or (os.altsep and os.altsep in executable):
+        return Path(executable).exists()
+    return shutil.which(executable) is not None
+
+
+def ai_output_schema() -> dict[str, Any]:
+    return {
+        "type": "object",
+        "additionalProperties": False,
+        "required": ["type", "release_note", "needs_review", "reason"],
+        "properties": {
+            "type": {"type": "string", "enum": ["improvement", "bug_fix"]},
+            "release_note": {"type": "string"},
+            "needs_review": {"type": "boolean"},
+            "reason": {"type": "string"},
+        },
+    }
+
+
+def summarize_process_output(completed: subprocess.CompletedProcess[str]) -> str:
+    parts = []
+    if completed.stderr.strip():
+        parts.append("stderr:\n" + tail_output(completed.stderr))
+    if completed.stdout.strip():
+        parts.append("stdout:\n" + tail_output(completed.stdout))
+    return "\n\n".join(parts) or "no output"
+
+
+def tail_output(text: str, max_lines: int = 40, max_chars: int = 4000) -> str:
+    tail = "\n".join(text.strip().splitlines()[-max_lines:])
+    if len(tail) > max_chars:
+        tail = "...[truncated]\n" + tail[-max_chars:]
+    return tail
+
+
+def build_generation_prompt(
+    row_context: RowContext,
+    expected_links: list[str],
+    contributors: list[str],
+) -> str:
+    improvements_reference = load_reference_file(IMPROVEMENTS_REFERENCE)
+    bug_fixes_reference = load_reference_file(BUG_FIXES_REFERENCE)
+    context = {
+        "row_number": row_context.row_number,
+        "component": row_context.component,
+        "raw_component_from_excel": row_context.raw_component,
+        "issue_type_from_excel": row_context.issue_type,
+        "pr_title_from_excel": row_context.pr_title,
+        "formatted_release_note_from_excel": row_context.formatted_release_note,
+        "expected_links": expected_links,
+        "contributors": contributors,
+        "issues": [dataclasses.asdict(issue) for issue in row_context.issues],
+        "pull_requests": [dataclasses.asdict(pull) for pull in row_context.pulls],
+    }
+    return textwrap.dedent(
+        f"""
+        You write exactly one English TiDB release note entry.
+
+        Return only a JSON object with exactly these keys:
+        - type: "improvement" or "bug_fix"
+        - release_note: one Markdown bullet that starts with "- "
+        - needs_review: true or false
+        - reason: a short reason for the type and wording
+
+        Rules:
+        - Write from the user's perspective.
+        - Use the Excel issue_type as a strong signal, but decide the final type from the issue,
+          PR description, and code changes.
+        - For improvements, follow the Improvements reference below.
+        - For bug fixes, follow the Bug fixes reference below.
+        - Do not end the release note with a period.
+        - Include every expected link in Markdown release-note style.
+        - Include every contributor as @[user](https://github.com/user).
+        - If there is no issue URL, use the PR link as the suffix link.
+        - Do not expose internal function names unless they are the user-visible behavior.
+        - If the available context is insufficient, still draft the best note and set needs_review
+          to true.
+
+        Expected links:
+        {json.dumps(expected_links, ensure_ascii=False, indent=2)}
+
+        Contributors:
+        {json.dumps(contributors, ensure_ascii=False, indent=2)}
+
+        Row context:
+        {json.dumps(context, ensure_ascii=False, indent=2)}
+
+        Improvements reference:
+        {improvements_reference}
+
+        Bug fixes reference:
+        {bug_fixes_reference}
+        """
+    ).strip()
+
+
+def build_repair_prompt(original_prompt: str, errors: list[str]) -> str:
+    return textwrap.dedent(
+        f"""
+        Your previous answer did not satisfy the required JSON schema or release-note rules.
+
+        Validation errors:
+        {json.dumps(errors, ensure_ascii=False, indent=2)}
+
+        Rewrite the answer. Return only the corrected JSON object.
+
+        Original task:
+        {original_prompt}
+        """
+    ).strip()
+
+
+@lru_cache(maxsize=None)
+def load_reference_file(path: Path) -> str:
+    try:
+        return path.read_text(encoding="utf-8")
+    except FileNotFoundError as exc:
+        raise FileNotFoundError(
+            f"Cannot find release-note reference file: {path}. "
+            "Make sure the repo-local write-review-translate-release-notes skill is present."
+        ) from exc
+
+
+def extract_json_object(output: str) -> dict[str, Any]:
+    output = output.strip()
+    if not output:
+        raise ValueError("AI command returned no output")
+    try:
+        data = json.loads(output)
+    except json.JSONDecodeError:
+        candidates = extract_json_object_candidates(output)
+        if not candidates:
+            raise ValueError("AI output did not contain a JSON object") from None
+        required_keys = {"type", "release_note", "needs_review", "reason"}
+        data = next(
+            (candidate for candidate in candidates if required_keys <= candidate.keys()),
+            candidates[0],
+        )
+    if not isinstance(data, dict):
+        raise ValueError("AI output JSON is not an object")
+    return data
+
+
+def extract_json_object_candidates(output: str) -> list[dict[str, Any]]:
+    decoder = json.JSONDecoder()
+    candidates: list[dict[str, Any]] = []
+    for index, char in enumerate(output):
+        if char != "{":
+            continue
+        try:
+            data, _end = decoder.raw_decode(output[index:])
+        except json.JSONDecodeError:
+            continue
+        if isinstance(data, dict):
+            candidates.append(data)
+    return candidates
+
+
+def validate_ai_response(
+    data: dict[str, Any],
+    expected_links: list[str],
+    contributors: list[str],
+) -> tuple[GeneratedNote | None, list[str]]:
+    errors: list[str] = []
+    note_type = data.get("type")
+    release_note = data.get("release_note")
+    needs_review = data.get("needs_review")
+    reason = data.get("reason")
+
+    if note_type not in {"improvement", "bug_fix"}:
+        errors.append('type must be "improvement" or "bug_fix"')
+    if not isinstance(release_note, str) or not release_note.startswith("- "):
+        errors.append('release_note must be a string that starts with "- "')
+    if isinstance(release_note, str) and release_note.rstrip().endswith("."):
+        errors.append("release_note must not end with a period")
+    if not isinstance(needs_review, bool):
+        errors.append("needs_review must be a boolean")
+    if not isinstance(reason, str):
+        errors.append("reason must be a string")
+
+    if isinstance(release_note, str):
+        for link in expected_links:
+            if link and link not in release_note:
+                errors.append(f"release_note is missing expected link: {link}")
+        for contributor in contributors:
+            expected = f"@[{contributor}](https://github.com/{contributor})"
+            if contributor and expected not in release_note:
+                errors.append(f"release_note is missing contributor: {contributor}")
+
+    if errors:
+        return None, errors
+    return (
+        GeneratedNote(
+            note_type=str(note_type),
+            release_note=str(release_note).strip(),
+            needs_review=bool(needs_review),
+            reason=str(reason).strip(),
+        ),
+        [],
+    )
diff --git a/scripts/release_notes_ai/cli.py b/scripts/release_notes_ai/cli.py
new file mode 100644
index 0000000000000..ee1d79a074c4a
--- /dev/null
+++ b/scripts/release_notes_ai/cli.py
@@ -0,0 +1,283 @@
+from __future__ import annotations
+
+import argparse
+import os
+import tempfile
+from pathlib import Path
+
+import openpyxl
+
+from .ai_client import AIClient
+from .excel_workbook import (
+    clear_output_columns,
+    generate_notes_without_ai,
+    generate_notes_for_sheet,
+    merge_rows_by_issue_and_component,
+    prepare_sheet_columns,
+    sort_sheet_rows_by_component,
+    store_existing_release_notes,
+    update_pr_authors_and_dup_notes,
+)
+from .github_client import GitHubClient
+from .markdown_writer import write_release_file
+from .scope_filter import move_prs_not_in_scope, parse_date_value
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Generate English release notes with AI from a tirelease workbook."
+    )
+    parser.add_argument("--version", required=True, help="Target TiDB version, for example 8.5.7.")
+    parser.add_argument("--excel", required=True, help="Path to the release note Excel workbook.")
+    parser.add_argument(
+        "--releases-dir",
+        required=True,
+        help="Path to the existing English release notes directory.",
+    )
+    parser.add_argument("--sheet", default="pr_for_release_note", help="Workbook sheet name.")
+    parser.add_argument("--github-token-file", help="Path to a GitHub token file.")
+    parser.add_argument(
+        "--ai-command",
+        default="codex --ask-for-approval never exec --sandbox read-only --ephemeral",
+        help="Command-line AI command. The prompt is passed through stdin.",
+    )
+    parser.add_argument(
+        "--ai-model",
+        default="gpt-5.4",
+        help="Model name passed to codex exec with -m.",
+    )
+    parser.add_argument(
+        "--involve-ai-generation",
+        type=parse_on_off,
+        default="ON",
+        help=(
+            "Whether to use AI for non-dup release notes. Use ON to generate with AI, "
+            "or OFF to output the original formated_release_note values. Default: ON."
+        ),
+    )
+    parser.add_argument(
+        "--output-release-file",
+        help="Output Markdown file. Defaults to release-{version}-updated-by-ai.md.",
+    )
+    parser.add_argument(
+        "--ai-timeout",
+        type=int,
+        default=600,
+        help="Timeout in seconds for each AI command invocation.",
+    )
+    parser.add_argument(
+        "--ai-workers",
+        type=int,
+        default=3,
+        help=(
+            "Number of concurrent AI command invocations. The default is conservative "
+            "for codex exec subprocesses."
+        ),
+    )
+    parser.add_argument(
+        "--github-workers",
+        type=int,
+        default=8,
+        help="Number of concurrent GitHub API prefetch workers.",
+    )
+    parser.add_argument(
+        "--author-workers",
+        type=int,
+        default=3,
+        help="Number of concurrent workers used to resolve bot-authored cherry-pick PR authors.",
+    )
+    parser.add_argument(
+        "--checkpoint-interval",
+        type=int,
+        default=1,
+        help=(
+            "Save the Excel workbook after every N completed AI rows. "
+            "Default: 1. Use 0 to disable."
+        ),
+    )
+    parser.add_argument(
+        "--force-regenerate",
+        action="store_true",
+        help="Clear existing AI release notes and regenerate all non-dup rows.",
+    )
+    parser.add_argument(
+        "--release-date",
+        default="TBD",
+        help='Release date text for the Markdown header, for example "August 14, 2025".',
+    )
+    parser.add_argument(
+        "--skip-scope-preprocess",
+        action="store_true",
+        help="Skip moving not-in-scope PR rows to the PRs_not_in_scope sheet.",
+    )
+    parser.add_argument(
+        "--scope-base-branch-start-date",
+        help=(
+            "Override the estimated release-m.n branch start date for x.y.0 scope "
+            "preprocessing, in YYYY-MM-DD format."
+        ),
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    validate_positive_int("--ai-workers", args.ai_workers)
+    validate_positive_int("--github-workers", args.github_workers)
+    validate_positive_int("--author-workers", args.author_workers)
+    if args.checkpoint_interval < 0:
+        raise ValueError("--checkpoint-interval must be greater than or equal to 0")
+    base_branch_start_date = None
+    if args.scope_base_branch_start_date:
+        base_branch_start_date = parse_date_value(args.scope_base_branch_start_date)
+        if not base_branch_start_date:
+            raise ValueError("--scope-base-branch-start-date must use YYYY-MM-DD format")
+
+    token = load_github_token(args.github_token_file)
+    github = GitHubClient(token)
+    involve_ai_generation = args.involve_ai_generation == "ON"
+    ai = AIClient(args.ai_command, args.ai_model, args.ai_timeout) if involve_ai_generation else None
+
+    output_file = (
+        Path(args.output_release_file)
+        if args.output_release_file
+        else Path(args.releases_dir) / f"release-{args.version}-updated-by-ai.md"
+    )
+
+    excel_path = Path(args.excel)
+    processed_excel_path = default_processed_excel_path(excel_path)
+    workbook = openpyxl.load_workbook(excel_path)
+    if args.sheet not in workbook.sheetnames:
+        raise ValueError(f"Cannot find sheet {args.sheet!r} in {args.excel}")
+    sheet = workbook[args.sheet]
+    if not args.skip_scope_preprocess:
+        move_prs_not_in_scope(
+            workbook,
+            sheet,
+            args.version,
+            Path(args.releases_dir),
+            github,
+            base_branch_start_date=base_branch_start_date,
+        )
+    sort_sheet_rows_by_component(sheet)
+    header = prepare_sheet_columns(sheet)
+    clear_output_columns(sheet, header, clear_ai=args.force_regenerate)
+
+    existing_notes = store_existing_release_notes(Path(args.releases_dir), args.version)
+    update_pr_authors_and_dup_notes(
+        sheet,
+        header,
+        existing_notes,
+        github,
+        author_workers=args.author_workers,
+    )
+    merge_rows_by_issue_and_component(sheet, header)
+
+    if involve_ai_generation:
+        checkpoint_callback = build_checkpoint_callback(
+            workbook,
+            processed_excel_path,
+            args.checkpoint_interval,
+        )
+        markdown_entries = generate_notes_for_sheet(
+            sheet,
+            header,
+            github,
+            ai,
+            ai_workers=args.ai_workers,
+            github_workers=args.github_workers,
+            checkpoint_callback=checkpoint_callback,
+        )
+    else:
+        markdown_entries = generate_notes_without_ai(sheet, header)
+    save_workbook_safely(workbook, processed_excel_path)
+    write_release_file(output_file, args.version, args.release_date, markdown_entries)
+
+    print(f"Original Excel workbook unchanged: {excel_path}", flush=True)
+    print(f"Processed Excel workbook: {processed_excel_path}", flush=True)
+    print(f"Generated release note file: {output_file}", flush=True)
+    return 0
+
+
+def validate_positive_int(name: str, value: int) -> None:
+    if value < 1:
+        raise ValueError(f"{name} must be greater than or equal to 1")
+
+
+def parse_on_off(value: str) -> str:
+    normalized = value.strip().upper()
+    if normalized not in {"ON", "OFF"}:
+        raise argparse.ArgumentTypeError("value must be ON or OFF")
+    return normalized
+
+
+def default_processed_excel_path(excel_path: Path) -> Path:
+    return excel_path.with_name(f"{excel_path.stem}_processed{excel_path.suffix}")
+
+
+def build_checkpoint_callback(
+    workbook: openpyxl.Workbook,
+    excel_path: Path,
+    checkpoint_interval: int,
+):
+    if checkpoint_interval <= 0:
+        return None
+
+    def checkpoint(completed: int, total: int) -> None:
+        if completed % checkpoint_interval != 0 and completed != total:
+            return
+        save_workbook_safely(workbook, excel_path)
+        print(
+            f"Checkpoint saved after {completed}/{total} AI row(s): {excel_path}",
+            flush=True,
+        )
+
+    return checkpoint
+
+
+def save_workbook_safely(workbook: openpyxl.Workbook, excel_path: Path) -> None:
+    excel_path = excel_path.resolve()
+    temp_file = tempfile.NamedTemporaryFile(
+        prefix=f".{excel_path.stem}.",
+        suffix=excel_path.suffix,
+        dir=excel_path.parent,
+        delete=False,
+    )
+    temp_path = Path(temp_file.name)
+    temp_file.close()
+    saved_temp = False
+    try:
+        workbook.save(temp_path)
+        saved_temp = True
+        os.replace(temp_path, excel_path)
+    except Exception as exc:
+        if saved_temp and temp_path.exists():
+            raise RuntimeError(
+                f"Failed to replace {excel_path}: {exc}. "
+                f"A complete temporary workbook remains at {temp_path}."
+            ) from exc
+        temp_path.unlink(missing_ok=True)
+        raise RuntimeError(f"Failed to save workbook {excel_path}: {exc}") from exc
+
+
+def load_github_token(token_file: str | None) -> str | None:
+    import shutil
+    import subprocess
+
+    if token_file:
+        return Path(token_file).read_text(encoding="utf-8").strip()
+    if os.environ.get("GITHUB_TOKEN"):
+        return os.environ["GITHUB_TOKEN"].strip()
+    gh = shutil.which("gh")
+    if not gh:
+        return None
+    completed = subprocess.run(
+        [gh, "auth", "token"],
+        text=True,
+        capture_output=True,
+        timeout=10,
+        check=False,
+    )
+    if completed.returncode == 0 and completed.stdout.strip():
+        return completed.stdout.strip()
+    return None
diff --git a/scripts/release_notes_ai/constants.py b/scripts/release_notes_ai/constants.py
new file mode 100644
index 0000000000000..c3e947167a23b
--- /dev/null
+++ b/scripts/release_notes_ai/constants.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+import re
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+IMPROVEMENTS_REFERENCE = (
+    REPO_ROOT
+    / ".ai"
+    / "skills"
+    / "write-review-translate-release-notes"
+    / "references"
+    / "improvements.md"
+)
+BUG_FIXES_REFERENCE = (
+    REPO_ROOT
+    / ".ai"
+    / "skills"
+    / "write-review-translate-release-notes"
+    / "references"
+    / "bug-fixes.md"
+)
+
+BOT_AUTHORS = {"ti-chi-bot", "ti-srebot"}
+# Keep the misspelled source column name because tirelease exports it this way.
+REQUIRED_HEADERS = {
+    "pr_author",
+    "pr_link",
+    "pr_title",
+    "formated_release_note",
+    "issue_type",
+}
+COMPONENT_HEADERS = ("component", "components")
+
+GITHUB_ITEM_URL_RE = re.compile(
+    r"https://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[\w.-]+)/"
+    r"(?P<kind>issues|pull)/(?P<number>\d+)"
+)
+ISSUE_URL_RE = re.compile(
+    r"https://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[\w.-]+)/issues/(?P<number>\d+)"
+)
+PR_URL_RE = re.compile(
+    r"https://github\.com/(?P<owner>[^/\s]+)/(?P<repo>[\w.-]+)/pull/(?P<number>\d+)"
+)
+AUTHOR_RE = re.compile(r"@\[([^\]]+)\]")
+
+TOP_LEVEL_COMPONENTS = ["TiDB", "TiKV", "PD", "TiFlash", "TiProxy"]
+TOOL_COMPONENTS = [
+    "Backup & Restore (BR)",
+    "TiCDC",
+    "TiDB Data Migration (DM)",
+    "TiDB Lightning",
+    "Dumpling",
+    "TiUP",
+    "TiDB Binlog",
+    "sync-diff-inspector",
+]
+COMPONENT_ALIASES = {
+    "tidb": "TiDB",
+    "tikv": "TiKV",
+    "pd": "PD",
+    "tiflash": "TiFlash",
+    "tiproxy": "TiProxy",
+    "br": "Backup & Restore (BR)",
+    "backup & restore": "Backup & Restore (BR)",
+    "backup & restore (br)": "Backup & Restore (BR)",
+    "cdc": "TiCDC",
+    "ticdc": "TiCDC",
+    "dm": "TiDB Data Migration (DM)",
+    "tidb data migration": "TiDB Data Migration (DM)",
+    "tidb data migration (dm)": "TiDB Data Migration (DM)",
+    "tidb lightning": "TiDB Lightning",
+    "lightning": "TiDB Lightning",
+    "dumpling": "Dumpling",
+    "tiup": "TiUP",
+    "tidb binlog": "TiDB Binlog",
+    "ng monitoring": "TiDB",
+    "sync_diff": "sync-diff-inspector",
+    "sync-diff-inspector": "sync-diff-inspector",
+    "sync diff inspector": "sync-diff-inspector",
+    "planner": "TiDB",
+    "execution": "TiDB",
+    "sql-infra": "TiDB",
+    "transaction": "TiDB",
+    "engine": "TiDB",
+    "observability": "TiDB",
+    "dxf": "TiDB",
+    "storage": "TiDB",
+    "tidb-dashboard": "TiDB",
+    "tidb dashboard": "TiDB",
+    "ddl": "TiDB",
+    "coprocessor": "TiDB",
+    "compute": "TiDB",
+    "scheduling": "TiDB",
+    "spm": "TiDB",
+    "ng-monitoring": "TiDB",
+}
diff --git a/scripts/release_notes_ai/excel_workbook.py b/scripts/release_notes_ai/excel_workbook.py
new file mode 100644
index 0000000000000..260b4b807d04e
--- /dev/null
+++ b/scripts/release_notes_ai/excel_workbook.py
@@ -0,0 +1,906 @@
+from __future__ import annotations
+
+import copy
+import re
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from collections import OrderedDict
+from pathlib import Path
+from typing import Any, Callable
+
+from openpyxl.styles import PatternFill
+
+from .ai_client import build_generation_prompt
+from .constants import (
+    AUTHOR_RE,
+    BOT_AUTHORS,
+    COMPONENT_HEADERS,
+    GITHUB_ITEM_URL_RE,
+    REQUIRED_HEADERS,
+    TOOL_COMPONENTS,
+    TOP_LEVEL_COMPONENTS,
+)
+from .models import (
+    ExistingNote,
+    GitHubDataCache,
+    MarkdownEntry,
+    RowContext,
+    RowGenerationResult,
+    RowInput,
+)
+from .utils import (
+    extract_issue_urls,
+    extract_pr_urls,
+    normalize_component,
+    normalize_raw_component,
+    normalized_release_component,
+    replace_author_markdown,
+    split_lines,
+    split_multi_value,
+    str_value,
+    unique_ordered,
+)
+
+
+GRAY_FILL = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
+
+
+def prepare_sheet_columns(sheet: Any) -> dict[str, int]:
+    header = get_header(sheet)
+    missing = sorted(REQUIRED_HEADERS - set(header))
+    if missing:
+        raise ValueError(f"Missing required Excel columns: {', '.join(missing)}")
+    get_component_col(header)
+
+    ai_col = header.get("release_notes_written_by_ai")
+    formatted_col = header["formated_release_note"]
+    if not ai_col:
+        sheet.insert_cols(formatted_col + 1)
+        sheet.cell(row=1, column=formatted_col + 1, value="release_notes_written_by_ai")
+        header = get_header(sheet)
+
+    if "published_release_notes" not in header:
+        last_col = sheet.max_column
+        sheet.cell(row=1, column=last_col + 1, value="published_release_notes")
+        header = get_header(sheet)
+    return header
+
+
+def get_header(sheet: Any) -> dict[str, int]:
+    header: dict[str, int] = {}
+    for index, cell in enumerate(sheet[1], start=1):
+        if cell.value:
+            header[str(cell.value).strip()] = index
+    return header
+
+
+def clear_output_columns(sheet: Any, header: dict[str, int], clear_ai: bool = True) -> None:
+    for row_number in range(2, sheet.max_row + 1):
+        if clear_ai:
+            sheet.cell(row=row_number, column=header["release_notes_written_by_ai"]).value = None
+        sheet.cell(row=row_number, column=header["published_release_notes"]).value = None
+
+
+def sort_sheet_rows_by_component(sheet: Any) -> None:
+    header = get_header(sheet)
+    component_col = get_component_col(header)
+    if sheet.max_row <= 2:
+        return
+
+    snapshots = [
+        (row_number, component_sort_key(sheet.cell(row=row_number, column=component_col).value), snapshot_row(sheet, row_number))
+        for row_number in range(2, sheet.max_row + 1)
+    ]
+    sorted_snapshots = sorted(snapshots, key=lambda item: item[1])
+    if [row_number for row_number, _key, _snapshot in snapshots] == [
+        row_number for row_number, _key, _snapshot in sorted_snapshots
+    ]:
+        return
+
+    for target_row, (_source_row, _key, snapshot) in enumerate(sorted_snapshots, start=2):
+        restore_row(sheet, target_row, snapshot)
+
+    print("Sorted worksheet rows by component before release-note generation", flush=True)
+
+
+def component_sort_key(value: Any) -> tuple[int, str]:
+    component = normalize_raw_component(value)
+    if not component:
+        return (1, "")
+    return (0, component.casefold())
+
+
+def snapshot_row(sheet: Any, row_number: int) -> dict[str, Any]:
+    row_dimension = sheet.row_dimensions[row_number]
+    return {
+        "height": row_dimension.height,
+        "hidden": row_dimension.hidden,
+        "outline_level": row_dimension.outlineLevel,
+        "collapsed": row_dimension.collapsed,
+        "cells": [snapshot_cell(sheet.cell(row=row_number, column=column)) for column in range(1, sheet.max_column + 1)],
+    }
+
+
+def snapshot_cell(cell: Any) -> dict[str, Any]:
+    return {
+        "value": cell.value,
+        "style": copy.copy(cell._style),
+        "number_format": cell.number_format,
+        "hyperlink": copy.copy(cell.hyperlink) if cell.hyperlink else None,
+        "comment": copy.copy(cell.comment) if cell.comment else None,
+    }
+
+
+def restore_row(sheet: Any, row_number: int, snapshot: dict[str, Any]) -> None:
+    row_dimension = sheet.row_dimensions[row_number]
+    row_dimension.height = snapshot["height"]
+    row_dimension.hidden = snapshot["hidden"]
+    row_dimension.outlineLevel = snapshot["outline_level"]
+    row_dimension.collapsed = snapshot["collapsed"]
+    for column, cell_snapshot in enumerate(snapshot["cells"], start=1):
+        cell = sheet.cell(row=row_number, column=column)
+        cell.value = cell_snapshot["value"]
+        cell._style = copy.copy(cell_snapshot["style"])
+        cell.number_format = cell_snapshot["number_format"]
+        cell._hyperlink = copy.copy(cell_snapshot["hyperlink"]) if cell_snapshot["hyperlink"] else None
+        cell.comment = copy.copy(cell_snapshot["comment"]) if cell_snapshot["comment"] else None
+
+
+def get_component_col(header: dict[str, int]) -> int:
+    for name in COMPONENT_HEADERS:
+        if name in header:
+            return header[name]
+    raise ValueError("Missing required Excel column: component or components")
+
+
+def issue_urls_for_row(sheet: Any, header: dict[str, int], row_number: int) -> list[str]:
+    candidates: list[str] = []
+    if "issue_url" in header:
+        candidates.append(str_value(sheet.cell(row=row_number, column=header["issue_url"]).value))
+    candidates.append(str_value(sheet.cell(row=row_number, column=header["formated_release_note"]).value))
+    return unique_ordered(url for text in candidates for url in extract_issue_urls(text))
+
+
+def first_issue_url_for_row(sheet: Any, header: dict[str, int], row_number: int) -> str | None:
+    issue_urls = issue_urls_for_row(sheet, header, row_number)
+    return issue_urls[0] if issue_urls else None
+
+
+def store_existing_release_notes(releases_dir: Path, version: str) -> list[ExistingNote]:
+    existing_notes: list[ExistingNote] = []
+    seen: set[tuple[str, tuple[str, ...]]] = set()
+    target_version = parse_semver_tuple(version)
+
+    for file_path in sorted(releases_dir.rglob("*.md")):
+        if should_skip_release_file(file_path, target_version):
+            continue
+        level1 = level2 = level3 = ""
+        with file_path.open("r", encoding="utf-8") as file:
+            for raw_line in file:
+                line = raw_line.strip()
+                authors = AUTHOR_RE.findall(line)
+                item_url = GITHUB_ITEM_URL_RE.search(line)
+                if item_url:
+                    key = (item_url.group(), tuple(authors))
+                    if key in seen:
+                        continue
+                    seen.add(key)
+                    note_level = level1 + level2 + level3
+                    note_type, component = classify_note_level(note_level)
+                    existing_notes.append(
+                        ExistingNote(
+                            url=item_url.group(),
+                            line=line,
+                            file_name=file_path.name,
+                            note_level=note_level,
+                            authors=authors,
+                            note_type=note_type,
+                            component=component,
+                        )
+                    )
+                    continue
+
+                heading = parse_release_note_heading(raw_line)
+                if not heading:
+                    continue
+                heading_level, label = heading
+                if heading_level == 1:
+                    level1 = "> " + label
+                    level2 = level3 = ""
+                elif heading_level == 2:
+                    level2 = "> " + label
+                    level3 = ""
+                elif heading_level == 3:
+                    level3 = "> " + label
+    return existing_notes
+
+
+def should_skip_release_file(file_path: Path, target_version: tuple[int, int, int]) -> bool:
+    if "updated-by-ai" in file_path.stem:
+        return True
+    file_version = release_file_semver_tuple(file_path)
+    if not file_version:
+        return False
+    return file_version >= target_version
+
+
+def parse_semver_tuple(version: str) -> tuple[int, int, int]:
+    match = re.match(r"^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)", version)
+    if not match:
+        raise ValueError(f"Invalid TiDB version: {version}")
+    return (
+        int(match.group("major")),
+        int(match.group("minor")),
+        int(match.group("patch")),
+    )
+
+
+def release_file_semver_tuple(file_path: Path) -> tuple[int, int, int] | None:
+    match = re.match(
+        r"^release-(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)",
+        file_path.stem,
+    )
+    if not match:
+        return None
+    return (
+        int(match.group("major")),
+        int(match.group("minor")),
+        int(match.group("patch")),
+    )
+
+
+def parse_release_note_heading(raw_line: str) -> tuple[int, str] | None:
+    line = raw_line.rstrip()
+    section = re.match(r"^##\s+(.+?)\s*$", line)
+    if section:
+        return 1, section.group(1).strip()
+
+    top_component = re.match(r"^[+-]\s+(.+?)\s*$", line)
+    if top_component:
+        label = top_component.group(1).strip()
+        if label.lower() == "tools" or normalized_release_component(label):
+            return 2, label
+
+    tool_component = re.match(r"^ {4}[+-]\s+(.+?)\s*$", line)
+    if tool_component:
+        label = tool_component.group(1).strip()
+        if normalized_release_component(label):
+            return 3, label
+    return None
+
+
+def update_pr_authors_and_dup_notes(
+    sheet: Any,
+    header: dict[str, int],
+    existing_notes: list[ExistingNote],
+    github: Any,
+    author_workers: int = 1,
+) -> None:
+    apply_bot_author_replacements(sheet, header, github, author_workers)
+    existing_notes_by_url = index_existing_notes_by_url(existing_notes)
+
+    for row_number in range(2, sheet.max_row + 1):
+        author_cell = sheet.cell(row=row_number, column=header["pr_author"])
+        current_author = str_value(author_cell.value)
+
+        issue_url = first_issue_url_for_row(sheet, header, row_number)
+        if not issue_url:
+            continue
+
+        current_authors = split_multi_value(current_author)
+        dup_notes = []
+        for existing in existing_notes_by_url.get(issue_url, []):
+            if existing.authors and not set(current_authors).intersection(existing.authors):
+                continue
+            dup_notes.append(existing.dup_text)
+
+        if dup_notes:
+            dup_col = header["published_release_notes"]
+            sheet.cell(row=row_number, column=dup_col, value="\n".join(unique_ordered(dup_notes)))
+            fill_row(sheet, row_number)
+            print(f"Row {row_number}: found duplicated release note for {issue_url}", flush=True)
+
+
+def apply_bot_author_replacements(
+    sheet: Any,
+    header: dict[str, int],
+    github: Any,
+    author_workers: int,
+) -> None:
+    requests = bot_author_requests(sheet, header)
+    if not requests:
+        return
+    print(
+        f"Resolving {len(requests)} bot-authored PR row(s) with {author_workers} worker(s)",
+        flush=True,
+    )
+
+    replacements = resolve_bot_author_replacements(requests, github, author_workers)
+    for row_number in sorted(replacements):
+        current_author, actual_author = replacements[row_number]
+        author_cell = sheet.cell(row=row_number, column=header["pr_author"])
+        formatted_cell = sheet.cell(row=row_number, column=header["formated_release_note"])
+        formatted_note = str_value(formatted_cell.value)
+        print(
+            f"Replacing bot author in row {row_number}: {current_author} -> {actual_author}",
+            flush=True,
+        )
+        author_cell.value = actual_author
+        formatted_cell.value = replace_author_markdown(
+            formatted_note, current_author, actual_author
+        )
+
+
+def bot_author_requests(sheet: Any, header: dict[str, int]) -> list[tuple[int, str, str, str]]:
+    requests = []
+    for row_number in range(2, sheet.max_row + 1):
+        current_author = str_value(sheet.cell(row=row_number, column=header["pr_author"]).value)
+        pr_link = str_value(sheet.cell(row=row_number, column=header["pr_link"]).value)
+        if current_author not in BOT_AUTHORS or not pr_link:
+            continue
+        pr_title = str_value(sheet.cell(row=row_number, column=header["pr_title"]).value)
+        requests.append((row_number, pr_link, pr_title, current_author))
+    return requests
+
+
+def resolve_bot_author_replacements(
+    requests: list[tuple[int, str, str, str]],
+    github: Any,
+    author_workers: int,
+) -> dict[int, tuple[str, str]]:
+    replacements: dict[int, tuple[str, str]] = {}
+    total = len(requests)
+    if author_workers == 1:
+        for completed, request in enumerate(requests, start=1):
+            row_number, pr_link, pr_title, current_author = request
+            actual_author = resolve_bot_author(github, request)
+            print_bot_author_progress(completed, total, row_number, current_author, actual_author)
+            if actual_author != current_author:
+                replacements[row_number] = (current_author, actual_author)
+        return replacements
+
+    with ThreadPoolExecutor(max_workers=author_workers) as executor:
+        futures = {
+            executor.submit(resolve_bot_author, github, request): request
+            for request in requests
+        }
+        for completed, future in enumerate(as_completed(futures), start=1):
+            row_number, _pr_link, _pr_title, current_author = futures[future]
+            actual_author = future.result()
+            print_bot_author_progress(completed, total, row_number, current_author, actual_author)
+            if actual_author != current_author:
+                replacements[row_number] = (current_author, actual_author)
+    return replacements
+
+
+def print_bot_author_progress(
+    completed: int,
+    total: int,
+    row_number: int,
+    current_author: str,
+    actual_author: str,
+) -> None:
+    status = "unchanged" if actual_author == current_author else f"{current_author} -> {actual_author}"
+    print(
+        f"Resolved bot author {completed}/{total}: row {row_number} ({status})",
+        flush=True,
+    )
+
+
+def resolve_bot_author(github: Any, request: tuple[int, str, str, str]) -> str:
+    row_number, pr_link, pr_title, current_author = request
+    try:
+        return github.get_original_author_for_cherry_pick(
+            row_number,
+            pr_link,
+            pr_title,
+            current_author,
+        )
+    except Exception as exc:  # noqa: BLE001
+        print(
+            f"Row {row_number}: failed to resolve bot author for {pr_link}: {exc}",
+            file=sys.stderr,
+            flush=True,
+        )
+        return current_author
+
+
+def index_existing_notes_by_url(existing_notes: list[ExistingNote]) -> dict[str, list[ExistingNote]]:
+    indexed: dict[str, list[ExistingNote]] = {}
+    for existing in existing_notes:
+        indexed.setdefault(existing.url, []).append(existing)
+    return indexed
+
+
+def merge_rows_by_issue_and_component(sheet: Any, header: dict[str, int]) -> None:
+    groups: OrderedDict[tuple[str, str], list[int]] = OrderedDict()
+    component_col = get_component_col(header)
+    for row_number in range(2, sheet.max_row + 1):
+        issue_url = first_issue_url_for_row(sheet, header, row_number)
+        if not issue_url:
+            continue
+        component = normalize_raw_component(sheet.cell(row=row_number, column=component_col).value)
+        if not component:
+            continue
+        groups.setdefault((issue_url, component), []).append(row_number)
+
+    rows_to_delete: list[int] = []
+    for (_issue_url, _component), rows in groups.items():
+        if len(rows) <= 1:
+            continue
+        keep_row = rows[0]
+        merge_pr_links(sheet, header, keep_row, rows)
+        merge_authors(sheet, header, keep_row, rows)
+        merge_dup_notes(sheet, header, keep_row, rows)
+        fill_first_empty_values(sheet, header, keep_row, rows)
+        if str_value(sheet.cell(row=keep_row, column=header["published_release_notes"]).value):
+            fill_row(sheet, keep_row)
+        rows_to_delete.extend(rows[1:])
+
+    for row_number in sorted(rows_to_delete, reverse=True):
+        sheet.delete_rows(row_number, 1)
+
+
+def merge_pr_links(sheet: Any, header: dict[str, int], keep_row: int, rows: list[int]) -> None:
+    links: list[str] = []
+    for row in rows:
+        links.extend(split_multi_value(sheet.cell(row=row, column=header["pr_link"]).value))
+    sheet.cell(row=keep_row, column=header["pr_link"], value=", ".join(unique_ordered(links)))
+
+
+def merge_authors(sheet: Any, header: dict[str, int], keep_row: int, rows: list[int]) -> None:
+    authors: list[str] = []
+    for row in rows:
+        authors.extend(split_multi_value(sheet.cell(row=row, column=header["pr_author"]).value))
+    sheet.cell(row=keep_row, column=header["pr_author"], value=", ".join(unique_ordered(authors)))
+
+
+def merge_dup_notes(sheet: Any, header: dict[str, int], keep_row: int, rows: list[int]) -> None:
+    notes: list[str] = []
+    for row in rows:
+        notes.extend(split_lines(sheet.cell(row=row, column=header["published_release_notes"]).value))
+    if notes:
+        sheet.cell(row=keep_row, column=header["published_release_notes"], value="\n".join(unique_ordered(notes)))
+
+
+def fill_first_empty_values(sheet: Any, header: dict[str, int], keep_row: int, rows: list[int]) -> None:
+    columns_to_skip = {
+        header["pr_link"],
+        header["pr_author"],
+        header["published_release_notes"],
+        header["release_notes_written_by_ai"],
+    }
+    for col in range(1, sheet.max_column + 1):
+        if col in columns_to_skip:
+            continue
+        keep_cell = sheet.cell(row=keep_row, column=col)
+        if str_value(keep_cell.value):
+            continue
+        for row in rows[1:]:
+            value = sheet.cell(row=row, column=col).value
+            if str_value(value):
+                keep_cell.value = value
+                break
+
+
+def generate_notes_for_sheet(
+    sheet: Any,
+    header: dict[str, int],
+    github: Any,
+    ai: Any,
+    ai_workers: int = 1,
+    github_workers: int = 1,
+    checkpoint_callback: Callable[[int, int], None] | None = None,
+) -> list[MarkdownEntry]:
+    entries_by_row: dict[int, list[MarkdownEntry]] = {}
+    row_inputs = [
+        build_row_input(sheet, header, row_number)
+        for row_number in range(2, sheet.max_row + 1)
+    ]
+    rows_to_generate: list[RowInput] = []
+
+    for row_input in row_inputs:
+        row_number = row_input.row_number
+        component = row_input.component
+        dup_text = str_value(sheet.cell(row=row_number, column=header["published_release_notes"]).value)
+        if dup_text:
+            sheet.cell(row=row_number, column=header["release_notes_written_by_ai"]).value = None
+            entries_by_row[row_number] = dup_entries_for_row(row_input, dup_text)
+            continue
+
+        ai_cell = sheet.cell(row=row_number, column=header["release_notes_written_by_ai"])
+        expected_links = row_input.issue_urls or row_input.pr_urls
+        if not expected_links:
+            ai_cell.value = "AI_GENERATION_FAILED: missing issue URL and PR URL"
+            continue
+
+        existing_note = str_value(ai_cell.value)
+        if is_reusable_ai_note(existing_note):
+            note_type = classify_note_type_from_text(existing_note, row_input.issue_type)
+            entries_by_row[row_number] = [
+                MarkdownEntry(
+                    note_type or "improvement",
+                    component,
+                    existing_note,
+                    row_input.raw_component,
+                )
+            ]
+            print(f"Row {row_number}: skipped existing AI release note", flush=True)
+            continue
+
+        rows_to_generate.append(row_input)
+
+    github_cache = prefetch_github_data(rows_to_generate, github, github_workers)
+    total_to_generate = len(rows_to_generate)
+    if total_to_generate:
+        print(
+            f"Generating AI release notes for {total_to_generate} row(s) "
+            f"with {ai_workers} worker(s)",
+            flush=True,
+        )
+
+    completed = 0
+    with ThreadPoolExecutor(max_workers=ai_workers) as executor:
+        futures = [
+            executor.submit(generate_note_for_row, row_input, github_cache, ai)
+            for row_input in rows_to_generate
+        ]
+        for future in as_completed(futures):
+            result = future.result()
+            apply_generation_result(sheet, header, result, entries_by_row)
+            completed += 1
+            if checkpoint_callback:
+                checkpoint_callback(completed, total_to_generate)
+
+    entries: list[MarkdownEntry] = []
+    for row_input in row_inputs:
+        entries.extend(entries_by_row.get(row_input.row_number, []))
+    return entries
+
+
+def generate_notes_without_ai(sheet: Any, header: dict[str, int]) -> list[MarkdownEntry]:
+    entries: list[MarkdownEntry] = []
+    for row_number in range(2, sheet.max_row + 1):
+        row_input = build_row_input(sheet, header, row_number)
+        dup_text = str_value(sheet.cell(row=row_number, column=header["published_release_notes"]).value)
+        if dup_text:
+            entries.extend(dup_entries_for_row(row_input, dup_text))
+            continue
+
+        formatted_notes = split_lines(row_input.formatted_release_note)
+        if not formatted_notes:
+            print(
+                f"Row {row_number}: skipped non-dup row because formated_release_note is empty",
+                file=sys.stderr,
+                flush=True,
+            )
+            continue
+        note_type = classify_note_type_from_text(
+            row_input.formatted_release_note,
+            row_input.issue_type,
+        )
+        for note in formatted_notes:
+            entries.append(
+                MarkdownEntry(
+                    note_type or "improvement",
+                    row_input.component,
+                    note,
+                    row_input.raw_component,
+                )
+            )
+
+    print(
+        f"AI generation is OFF; generated Markdown from formated_release_note for {len(entries)} note(s)",
+        flush=True,
+    )
+    return entries
+
+
+def dup_entries_for_row(row_input: RowInput, dup_text: str) -> list[MarkdownEntry]:
+    entries: list[MarkdownEntry] = []
+    for dup_note in split_lines(dup_text):
+        note_type = classify_note_type_from_text(
+            dup_note,
+            row_input.issue_type,
+        )
+        dup_component = parse_component_from_dup(dup_note) or row_input.component
+        if note_type in {"improvement", "bug_fix"}:
+            entries.append(
+                MarkdownEntry(
+                    note_type,
+                    normalize_component(dup_component),
+                    dup_note,
+                    row_input.raw_component,
+                )
+            )
+    return entries
+
+
+def build_row_input(sheet: Any, header: dict[str, int], row_number: int) -> RowInput:
+    raw_component = normalize_raw_component(
+        sheet.cell(row=row_number, column=get_component_col(header)).value
+    )
+    return RowInput(
+        row_number=row_number,
+        component=release_component_for_row(sheet, header, row_number),
+        raw_component=raw_component,
+        issue_type=str_value(sheet.cell(row=row_number, column=header["issue_type"]).value),
+        pr_title=str_value(sheet.cell(row=row_number, column=header["pr_title"]).value),
+        pr_authors=split_multi_value(sheet.cell(row=row_number, column=header["pr_author"]).value),
+        pr_urls=extract_pr_urls(str_value(sheet.cell(row=row_number, column=header["pr_link"]).value)),
+        issue_urls=issue_urls_for_row(sheet, header, row_number),
+        formatted_release_note=str_value(
+            sheet.cell(row=row_number, column=header["formated_release_note"]).value
+        ),
+    )
+
+
+def is_reusable_ai_note(note: str) -> bool:
+    return bool(note) and not note.startswith("AI_GENERATION_FAILED:")
+
+
+def prefetch_github_data(row_inputs: list[RowInput], github: Any, github_workers: int) -> GitHubDataCache:
+    issue_urls = unique_ordered(url for row_input in row_inputs for url in row_input.issue_urls)
+    pr_urls = unique_ordered(url for row_input in row_inputs for url in row_input.pr_urls)
+    issues = {}
+    pulls = {}
+
+    if not issue_urls and not pr_urls:
+        return GitHubDataCache(issues=issues, pulls=pulls)
+
+    print(
+        f"Prefetching GitHub data: {len(issue_urls)} issue(s), {len(pr_urls)} PR(s) "
+        f"with {github_workers} worker(s)",
+        flush=True,
+    )
+
+    with ThreadPoolExecutor(max_workers=github_workers) as executor:
+        futures = {
+            executor.submit(github.get_issue, issue_url): ("issue", issue_url)
+            for issue_url in issue_urls
+        }
+        futures.update(
+            {
+                executor.submit(github.get_pull, pr_url): ("pull", pr_url)
+                for pr_url in pr_urls
+            }
+        )
+        for future in as_completed(futures):
+            item_type, url = futures[future]
+            try:
+                data = future.result()
+            except Exception as exc:  # noqa: BLE001
+                print(f"Failed to prefetch GitHub {item_type} {url}: {exc}", file=sys.stderr, flush=True)
+                continue
+            if item_type == "issue":
+                issues[url] = data
+            else:
+                pulls[url] = data
+    return GitHubDataCache(issues=issues, pulls=pulls)
+
+
+def generate_note_for_row(
+    row_input: RowInput,
+    github_cache: GitHubDataCache,
+    ai: Any,
+) -> RowGenerationResult:
+    expected_links = row_input.issue_urls or row_input.pr_urls
+    row_context = build_row_context_from_cache(row_input, github_cache)
+    contributors = unique_ordered(
+        [author for author in row_context.pr_authors if author not in BOT_AUTHORS]
+    )
+    try:
+        prompt = build_generation_prompt(row_context, expected_links, contributors)
+        generated = ai.generate(prompt, expected_links, contributors)
+        return RowGenerationResult(
+            row_number=row_input.row_number,
+            component=row_input.component,
+            raw_component=row_input.raw_component,
+            note_type=generated.note_type,
+            note=generated.release_note,
+            error=None,
+            needs_review=generated.needs_review,
+            reason=generated.reason,
+        )
+    except Exception as exc:  # noqa: BLE001
+        return RowGenerationResult(
+            row_number=row_input.row_number,
+            component=row_input.component,
+            raw_component=row_input.raw_component,
+            note_type=None,
+            note=None,
+            error=str(exc),
+        )
+
+
+def build_row_context_from_cache(row_input: RowInput, github_cache: GitHubDataCache) -> RowContext:
+    pr_authors = list(row_input.pr_authors)
+    issues = [
+        github_cache.issues[issue_url]
+        for issue_url in row_input.issue_urls
+        if issue_url in github_cache.issues
+    ]
+    pulls = []
+    for pr_url in row_input.pr_urls:
+        pull = github_cache.pulls.get(pr_url)
+        if not pull:
+            continue
+        pulls.append(pull)
+        if pull.author:
+            pr_authors.append(pull.author)
+    return RowContext(
+        row_number=row_input.row_number,
+        component=row_input.component,
+        raw_component=row_input.raw_component,
+        issue_type=row_input.issue_type,
+        pr_title=row_input.pr_title,
+        pr_authors=unique_ordered(pr_authors),
+        pr_urls=row_input.pr_urls,
+        issue_urls=row_input.issue_urls,
+        formatted_release_note=row_input.formatted_release_note,
+        issues=issues,
+        pulls=pulls,
+    )
+
+
+def apply_generation_result(
+    sheet: Any,
+    header: dict[str, int],
+    result: RowGenerationResult,
+    entries_by_row: dict[int, list[MarkdownEntry]],
+) -> None:
+    ai_cell = sheet.cell(row=result.row_number, column=header["release_notes_written_by_ai"])
+    if result.error:
+        ai_cell.value = f"AI_GENERATION_FAILED: {result.error}"
+        print(
+            f"Row {result.row_number}: AI generation failed: {result.error}",
+            file=sys.stderr,
+            flush=True,
+        )
+        return
+    if not result.note or not result.note_type:
+        ai_cell.value = "AI_GENERATION_FAILED: empty AI generation result"
+        print(
+            f"Row {result.row_number}: AI generation failed: empty AI generation result",
+            file=sys.stderr,
+            flush=True,
+        )
+        return
+
+    ai_cell.value = result.note
+    entries_by_row[result.row_number] = [
+        MarkdownEntry(result.note_type, result.component, result.note, result.raw_component)
+    ]
+    review_marker = " (needs review)" if result.needs_review else ""
+    print(
+        f"Row {result.row_number}: generated {result.note_type}{review_marker}: {result.reason}",
+        flush=True,
+    )
+
+
+def release_component_for_row(sheet: Any, header: dict[str, int], row_number: int) -> str:
+    raw_component = normalize_raw_component(
+        sheet.cell(row=row_number, column=get_component_col(header)).value
+    )
+    raw_lower = raw_component.lower()
+    raw_release_component = release_component_from_raw(raw_component)
+    if raw_release_component:
+        return raw_release_component
+
+    urls = issue_urls_for_row(sheet, header, row_number)
+    urls.extend(extract_pr_urls(str_value(sheet.cell(row=row_number, column=header["pr_link"]).value)))
+    repos = {match.group("repo").lower() for url in urls for match in [GITHUB_ITEM_URL_RE.search(url)] if match}
+
+    if "pd" in repos:
+        return "PD"
+    if "tikv" in repos:
+        return "TiKV"
+    if "tiflash" in repos:
+        return "TiFlash"
+    if "ng-monitoring" in repos:
+        return "TiDB"
+    if "tiup" in repos:
+        return "TiUP"
+    if repos.intersection({"tiflow", "ticdc"}):
+        if "dm" in raw_lower and "cdc" not in raw_lower:
+            return "TiDB Data Migration (DM)"
+        return "TiCDC"
+    if "tidb" in repos:
+        if "br" in raw_lower:
+            return "Backup & Restore (BR)"
+        if "lightning" in raw_lower:
+            return "TiDB Lightning"
+        if "dumpling" in raw_lower:
+            return "Dumpling"
+        return "TiDB"
+    if "tidb-dashboard" in repos:
+        return "TiDB"
+    return normalize_component(raw_component)
+
+
+def release_component_from_raw(raw_component: str) -> str:
+    normalized_raw = normalize_component(raw_component)
+    if normalized_raw in TOP_LEVEL_COMPONENTS or normalized_raw in TOOL_COMPONENTS:
+        return normalized_raw
+
+    token_components = [
+        normalize_component(token)
+        for token in split_multi_value(raw_component)
+    ]
+    if not token_components:
+        return ""
+
+    for component in [
+        "Backup & Restore (BR)",
+        "TiDB Lightning",
+        "Dumpling",
+        "TiUP",
+        "sync-diff-inspector",
+    ]:
+        if component in token_components:
+            return component
+
+    for component in TOP_LEVEL_COMPONENTS:
+        if component in token_components:
+            return component
+
+    if "TiDB Data Migration (DM)" in token_components:
+        return "TiDB Data Migration (DM)"
+    if "TiCDC" in token_components:
+        return "TiCDC"
+
+    return ""
+
+
+def classify_note_level(note_level: str) -> tuple[str | None, str | None]:
+    labels = [label.strip() for label in re.findall(r">\s*([^>]+)", note_level)]
+    if not labels:
+        return None, None
+    section = labels[0].lower()
+    note_type = None
+    if "bug fixes" in section or "error fixes" in section:
+        note_type = "bug_fix"
+    elif "improvements" in section:
+        note_type = "improvement"
+
+    component_labels = labels[1:]
+    if component_labels and component_labels[0].lower() == "tools":
+        component_labels = component_labels[1:]
+    for label in reversed(component_labels):
+        component = normalized_release_component(label)
+        if component:
+            return note_type, component
+    return note_type, None
+
+
+def classify_note_type_from_text(note: str, issue_type: str) -> str | None:
+    note_lower = note.lower()
+    issue_type_lower = issue_type.lower()
+    if "> bug fixes" in note_lower or "> 错误修复" in note_lower:
+        return "bug_fix"
+    if "> improvements" in note_lower or "> 改进提升" in note_lower:
+        return "improvement"
+    if "bug" in issue_type_lower or "fix" in issue_type_lower:
+        return "bug_fix"
+    if "improvement" in issue_type_lower or "enhancement" in issue_type_lower:
+        return "improvement"
+    if note.strip().startswith("- Fix "):
+        return "bug_fix"
+    return "improvement"
+
+
+def parse_component_from_dup(note: str) -> str | None:
+    labels = [label.strip() for label in re.findall(r">\s*([^>]+)", note)]
+    cleaned: list[str] = []
+    for label in labels:
+        if " - " in label:
+            label = label.split(" - ", 1)[0]
+        cleaned.append(label.strip())
+    if len(cleaned) < 2:
+        return None
+    return normalized_release_component(cleaned[-1])
+
+
+def fill_row(sheet: Any, row_number: int) -> None:
+    for column in range(1, sheet.max_column + 1):
+        sheet.cell(row=row_number, column=column).fill = copy.copy(GRAY_FILL)
diff --git a/scripts/release_notes_ai/github_client.py b/scripts/release_notes_ai/github_client.py
new file mode 100644
index 0000000000000..f0f4d1b5e2ff2
--- /dev/null
+++ b/scripts/release_notes_ai/github_client.py
@@ -0,0 +1,321 @@
+from __future__ import annotations
+
+import re
+import sys
+import threading
+import time
+from typing import Any
+
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+from .constants import GITHUB_ITEM_URL_RE
+from .models import IssueInfo, PullInfo
+from .utils import parse_github_url
+
+
+def create_retry_policy() -> Retry:
+    return Retry(
+        total=3,
+        connect=3,
+        read=3,
+        status=3,
+        backoff_factor=1,
+        status_forcelist=(500, 502, 503, 504),
+        allowed_methods=frozenset(["GET"]),
+        respect_retry_after_header=True,
+        raise_on_status=False,
+    )
+
+
+class GitHubClient:
+    def __init__(
+        self,
+        token: str | None,
+        max_rate_limit_retries: int = 3,
+        max_rate_limit_sleep: int = 600,
+    ):
+        self.max_rate_limit_retries = max_rate_limit_retries
+        self.max_rate_limit_sleep = max_rate_limit_sleep
+        self.headers = {
+            "Accept": "application/vnd.github+json",
+            "X-GitHub-Api-Version": "2022-11-28",
+        }
+        if token:
+            self.headers["Authorization"] = f"Bearer {token}"
+        self._thread_local = threading.local()
+
+    def get_session(self) -> requests.Session:
+        session = getattr(self._thread_local, "session", None)
+        if session is None:
+            session = requests.Session()
+            session.headers.update(self.headers)
+            adapter = HTTPAdapter(max_retries=create_retry_policy())
+            session.mount("https://", adapter)
+            self._thread_local.session = session
+        return session
+
+    def get_json(self, api_path: str) -> dict[str, Any]:
+        data = self.get_api_json(api_path)
+        if not isinstance(data, dict):
+            raise ValueError(f"Expected object response from {api_path}")
+        return data
+
+    def get_api_json(self, api_path: str, params: dict[str, Any] | None = None) -> Any:
+        return self.get_url_json(f"https://api.github.com{api_path}", params=params)
+
+    def get_url_json(self, url: str, params: dict[str, Any] | None = None) -> Any:
+        last_response: requests.Response | None = None
+        for attempt in range(self.max_rate_limit_retries + 1):
+            response = self.get_session().get(url, params=params, timeout=30)
+            last_response = response
+            if self.is_rate_limited(response) and attempt < self.max_rate_limit_retries:
+                sleep_seconds = self.rate_limit_sleep_seconds(response, attempt)
+                print(
+                    "GitHub API rate limit reached; retrying in "
+                    f"{sleep_seconds} seconds: {url}",
+                    file=sys.stderr,
+                    flush=True,
+                )
+                time.sleep(sleep_seconds)
+                continue
+            response.raise_for_status()
+            return response.json()
+        if last_response is not None:
+            last_response.raise_for_status()
+        raise RuntimeError(f"GitHub API request failed: {url}")
+
+    def is_rate_limited(self, response: requests.Response) -> bool:
+        if response.status_code == 429:
+            return True
+        if response.status_code != 403:
+            return False
+        if response.headers.get("x-ratelimit-remaining") == "0":
+            return True
+        message = response.text.lower()
+        return "rate limit" in message or "abuse detection" in message
+
+    def rate_limit_sleep_seconds(self, response: requests.Response, attempt: int) -> int:
+        retry_after = response.headers.get("retry-after")
+        if retry_after and retry_after.isdigit():
+            return min(max(int(retry_after), 1), self.max_rate_limit_sleep)
+        reset = response.headers.get("x-ratelimit-reset")
+        if reset and reset.isdigit():
+            wait_seconds = int(reset) - int(time.time()) + 5
+            return min(max(wait_seconds, 1), self.max_rate_limit_sleep)
+        return min(2 ** attempt, self.max_rate_limit_sleep)
+
+    def get_pull(self, pr_url: str) -> PullInfo:
+        owner, repo, number = parse_github_url(pr_url, "pull")
+        pull = self.get_json(f"/repos/{owner}/{repo}/pulls/{number}")
+        files_summary = self.get_pull_files_summary(owner, repo, number)
+        return PullInfo(
+            url=pr_url,
+            title=str(pull.get("title") or ""),
+            body=str(pull.get("body") or ""),
+            author=str((pull.get("user") or {}).get("login") or ""),
+            head_ref=str((pull.get("head") or {}).get("ref") or ""),
+            base_ref=str((pull.get("base") or {}).get("ref") or ""),
+            files_summary=files_summary,
+            merged_at=str(pull.get("merged_at") or ""),
+            created_at=str(pull.get("created_at") or ""),
+        )
+
+    def get_issue(self, issue_url: str) -> IssueInfo:
+        owner, repo, number = parse_github_url(issue_url, "issues")
+        issue = self.get_json(f"/repos/{owner}/{repo}/issues/{number}")
+        labels = [
+            str(label.get("name"))
+            for label in issue.get("labels", [])
+            if isinstance(label, dict) and label.get("name")
+        ]
+        return IssueInfo(
+            url=issue_url,
+            title=str(issue.get("title") or ""),
+            body=str(issue.get("body") or ""),
+            labels=labels,
+        )
+
+    def get_pull_files_summary(
+        self,
+        owner: str,
+        repo: str,
+        number: str,
+        max_files: int = 80,
+        max_patch_chars: int = 1200,
+        max_total_chars: int = 60000,
+    ) -> str:
+        lines: list[str] = []
+        page = 1
+        total_chars = 0
+        while len(lines) < max_files:
+            files = self.get_api_json(
+                f"/repos/{owner}/{repo}/pulls/{number}/files",
+                params={"per_page": 100, "page": page},
+            )
+            if not isinstance(files, list) or not files:
+                break
+            for item in files:
+                if len(lines) >= max_files or total_chars >= max_total_chars:
+                    break
+                if not isinstance(item, dict):
+                    continue
+                patch = str(item.get("patch") or "")
+                if len(patch) > max_patch_chars:
+                    patch = patch[:max_patch_chars] + "\n...[patch truncated]"
+                block = "\n".join(
+                    [
+                        f"file: {item.get('filename', '')}",
+                        f"status: {item.get('status', '')}",
+                        f"additions: {item.get('additions', 0)}",
+                        f"deletions: {item.get('deletions', 0)}",
+                        "patch:",
+                        patch,
+                    ]
+                )
+                lines.append(block)
+                total_chars += len(block)
+            page += 1
+        if not lines:
+            return "No changed-file information is available."
+        if len(lines) >= max_files:
+            lines.append("...[file list truncated]")
+        return "\n\n".join(lines)
+
+    def list_pulls_for_base(
+        self,
+        owner: str,
+        repo: str,
+        base: str,
+        state: str = "closed",
+        max_pages: int = 10,
+    ) -> list[PullInfo]:
+        pulls: list[PullInfo] = []
+        for page in range(1, max_pages + 1):
+            data = self.get_api_json(
+                f"/repos/{owner}/{repo}/pulls",
+                params={
+                    "state": state,
+                    "base": base,
+                    "sort": "created",
+                    "direction": "asc",
+                    "per_page": 100,
+                    "page": page,
+                },
+            )
+            if not isinstance(data, list) or not data:
+                break
+            for pull in data:
+                if not isinstance(pull, dict):
+                    continue
+                pulls.append(
+                    PullInfo(
+                        url=str(pull.get("html_url") or ""),
+                        title=str(pull.get("title") or ""),
+                        body=str(pull.get("body") or ""),
+                        author=str((pull.get("user") or {}).get("login") or ""),
+                        head_ref=str((pull.get("head") or {}).get("ref") or ""),
+                        base_ref=str((pull.get("base") or {}).get("ref") or ""),
+                        files_summary="",
+                        merged_at=str(pull.get("merged_at") or ""),
+                        created_at=str(pull.get("created_at") or ""),
+                    )
+                )
+            if len(data) < 100:
+                break
+        return pulls
+
+    def get_original_author_for_cherry_pick(
+        self, row_number: int, cp_pr_link: str, cp_pr_title: str, current_author: str
+    ) -> str:
+        default_owner, default_repo, _cp_number = parse_github_url(cp_pr_link, "pull")
+        target_ref = find_original_pr_reference(cp_pr_title, default_owner, default_repo)
+        if not target_ref:
+            try:
+                cp_info = self.get_pull(cp_pr_link)
+                target_ref = (
+                    find_original_pr_reference(cp_info.head_ref, default_owner, default_repo)
+                    or find_original_pr_reference(cp_info.title, default_owner, default_repo)
+                    or find_original_pr_reference(cp_info.body, default_owner, default_repo)
+                )
+            except Exception as exc:  # noqa: BLE001
+                print(
+                    f"Row {row_number}: failed to inspect cherry-pick PR "
+                    f"{cp_pr_link}: {exc}",
+                    file=sys.stderr,
+                )
+                return current_author
+
+        if not target_ref:
+            print(
+                f"Row {row_number}: failed to find the original PR for "
+                f"{cp_pr_link} created by {current_author}.",
+                file=sys.stderr,
+            )
+            return current_author
+
+        target_owner, target_repo, target_number = target_ref
+        target_pr_link = f"https://github.com/{target_owner}/{target_repo}/pull/{target_number}"
+        try:
+            return self.get_pull(target_pr_link).author or current_author
+        except Exception as exc:  # noqa: BLE001
+            print(
+                f"Row {row_number}: failed to find the non-bot author for "
+                f"{cp_pr_link}: {exc}",
+                file=sys.stderr,
+            )
+            return current_author
+
+
+def find_original_pr_reference(
+    text: str,
+    default_owner: str,
+    default_repo: str,
+) -> tuple[str, str, str] | None:
+    text = text or ""
+    marker_lines = [
+        line
+        for line in text.splitlines()
+        if re.search(r"\b(backport|cherry[- ]?pick|original|source|from)\b", line, re.I)
+    ]
+    for line in marker_lines:
+        reference = find_pr_reference_in_text(line, default_owner, default_repo)
+        if reference:
+            return reference
+
+    same_repo = re.search(r"\(#(?P<number>\d+)\)\s*$", text)
+    if same_repo:
+        return default_owner, default_repo, same_repo.group("number")
+
+    branch = re.search(r"(?:^|[/_-])cherry-pick-(?P<number>\d+)(?:\D|$)", text)
+    if branch:
+        return default_owner, default_repo, branch.group("number")
+
+    if "\n" not in text and len(text) <= 300:
+        return find_pr_reference_in_text(text, default_owner, default_repo)
+
+    return None
+
+
+def find_pr_reference_in_text(
+    text: str,
+    default_owner: str,
+    default_repo: str,
+) -> tuple[str, str, str] | None:
+    for full_url in GITHUB_ITEM_URL_RE.finditer(text or ""):
+        if full_url.group("kind") == "pull":
+            return full_url.group("owner"), full_url.group("repo"), full_url.group("number")
+
+    cross_repo = re.search(
+        r"(?<![\w./-])(?P<owner>[\w.-]+)/(?P<repo>[\w.-]+)#(?P<number>\d+)\b",
+        text or "",
+    )
+    if cross_repo:
+        return cross_repo.group("owner"), cross_repo.group("repo"), cross_repo.group("number")
+
+    same_repo = re.search(r"\(#(?P<number>\d+)\)\s*$", text or "")
+    if same_repo:
+        return default_owner, default_repo, same_repo.group("number")
+
+    return None
diff --git a/scripts/release_notes_ai/markdown_writer.py b/scripts/release_notes_ai/markdown_writer.py
new file mode 100644
index 0000000000000..38d02cdf51950
--- /dev/null
+++ b/scripts/release_notes_ai/markdown_writer.py
@@ -0,0 +1,121 @@
+from __future__ import annotations
+
+from collections import defaultdict
+from pathlib import Path
+
+from .constants import TOOL_COMPONENTS, TOP_LEVEL_COMPONENTS
+from .models import MarkdownEntry
+from .utils import normalize_component, str_value
+
+
+def write_release_file(
+    output_file: Path,
+    version: str,
+    release_date: str,
+    entries: list[MarkdownEntry],
+) -> None:
+    major_minor = ".".join(version.split(".")[:2])
+    grouped = group_markdown_entries(entries)
+    content: list[str] = [
+        "---",
+        f"title: TiDB {version} Release Notes",
+        f"summary: Learn about the improvements and bug fixes in TiDB {version}.",
+        "---",
+        "",
+        f"# TiDB {version} Release Notes",
+        "",
+        f"Release date: {release_date}",
+        "",
+        f"TiDB version: {version}",
+        "",
+        "Quick access: "
+        f"[Quick start](https://docs.pingcap.com/tidb/v{major_minor}/quick-start-with-tidb) | "
+        f"[Production deployment](https://docs.pingcap.com/tidb/v{major_minor}/production-deployment-using-tiup)",
+        "",
+    ]
+
+    content.extend(render_section("## Improvements", grouped["improvement"]))
+    content.append("")
+    content.extend(render_section("## Bug fixes", grouped["bug_fix"]))
+    content.append("")
+    while content and content[-1] == "":
+        content.pop()
+
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+    output_file.write_text("\n".join(content) + "\n", encoding="utf-8")
+
+
+def group_markdown_entries(entries: list[MarkdownEntry]) -> dict[str, dict[str, list[MarkdownEntry]]]:
+    grouped: dict[str, dict[str, list[MarkdownEntry]]] = {
+        "improvement": defaultdict(list),
+        "bug_fix": defaultdict(list),
+    }
+    for entry in entries:
+        if entry.note_type not in grouped:
+            continue
+        component = normalize_component(entry.component) or "Other"
+        grouped[entry.note_type][component].append(entry)
+    return grouped
+
+
+def render_section(title: str, entries_by_component: dict[str, list[MarkdownEntry]]) -> list[str]:
+    lines = [title, ""]
+    top_components = [
+        component
+        for component in TOP_LEVEL_COMPONENTS
+        if component in entries_by_component and entries_by_component[component]
+    ]
+    unknown_top_components = sorted(
+        component
+        for component in entries_by_component
+        if component not in TOP_LEVEL_COMPONENTS
+        and component not in TOOL_COMPONENTS
+        and entries_by_component[component]
+    )
+    tool_components = [
+        component
+        for component in TOOL_COMPONENTS
+        if component in entries_by_component and entries_by_component[component]
+    ]
+
+    for component in top_components + unknown_top_components:
+        lines.append(f"+ {component}")
+        lines.append("")
+        for entry in entries_by_component[component]:
+            lines.append(f"    {note_with_component_marker(entry)}")
+        lines.append("")
+
+    if tool_components:
+        lines.append("+ Tools")
+        lines.append("")
+        for component in tool_components:
+            lines.append(f"    + {component}")
+            lines.append("")
+            for entry in entries_by_component[component]:
+                lines.append(f"        {note_with_component_marker(entry)}")
+            lines.append("")
+
+    while lines and lines[-1] == "":
+        lines.pop()
+    return lines
+
+
+def note_with_component_marker(entry: MarkdownEntry) -> str:
+    note = ensure_release_note_bullet(entry.note)
+    raw_component = sanitize_component_marker(entry.raw_component)
+    if not raw_component or "<!-- component:" in note:
+        return note
+    return f"{note} <!-- component: {raw_component} -->"
+
+
+def ensure_release_note_bullet(note: str) -> str:
+    note = str_value(note)
+    if note.startswith("- "):
+        return note
+    if note.startswith(("+ ", "* ")):
+        return "- " + note[2:].lstrip()
+    return f"- {note}"
+
+
+def sanitize_component_marker(component: str) -> str:
+    return " ".join(str_value(component).replace("--", "- -").split())
diff --git a/scripts/release_notes_ai/models.py b/scripts/release_notes_ai/models.py
new file mode 100644
index 0000000000000..7e89853cb3202
--- /dev/null
+++ b/scripts/release_notes_ai/models.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import dataclasses
+
+
+@dataclasses.dataclass
+class ExistingNote:
+    url: str
+    line: str
+    file_name: str
+    note_level: str
+    authors: list[str]
+    note_type: str | None
+    component: str | None
+
+    @property
+    def dup_text(self) -> str:
+        return f"- (dup): {self.file_name} {self.note_level} {self.line}"
+
+
+@dataclasses.dataclass
+class PullInfo:
+    url: str
+    title: str
+    body: str
+    author: str
+    head_ref: str
+    base_ref: str
+    files_summary: str
+    merged_at: str = ""
+    created_at: str = ""
+
+
+@dataclasses.dataclass
+class IssueInfo:
+    url: str
+    title: str
+    body: str
+    labels: list[str]
+
+
+@dataclasses.dataclass
+class GeneratedNote:
+    note_type: str
+    release_note: str
+    needs_review: bool
+    reason: str
+
+
+@dataclasses.dataclass
+class RowContext:
+    row_number: int
+    component: str
+    raw_component: str
+    issue_type: str
+    pr_title: str
+    pr_authors: list[str]
+    pr_urls: list[str]
+    issue_urls: list[str]
+    formatted_release_note: str
+    issues: list[IssueInfo]
+    pulls: list[PullInfo]
+
+
+@dataclasses.dataclass
+class RowInput:
+    row_number: int
+    component: str
+    raw_component: str
+    issue_type: str
+    pr_title: str
+    pr_authors: list[str]
+    pr_urls: list[str]
+    issue_urls: list[str]
+    formatted_release_note: str
+
+
+@dataclasses.dataclass
+class GitHubDataCache:
+    issues: dict[str, IssueInfo]
+    pulls: dict[str, PullInfo]
+
+
+@dataclasses.dataclass
+class MarkdownEntry:
+    note_type: str
+    component: str
+    note: str
+    raw_component: str = ""
+
+
+@dataclasses.dataclass
+class RowGenerationResult:
+    row_number: int
+    component: str
+    raw_component: str
+    note_type: str | None
+    note: str | None
+    error: str | None
+    needs_review: bool = False
+    reason: str = ""
diff --git a/scripts/release_notes_ai/requirements.txt b/scripts/release_notes_ai/requirements.txt
new file mode 100644
index 0000000000000..89cfc13a2a578
--- /dev/null
+++ b/scripts/release_notes_ai/requirements.txt
@@ -0,0 +1,3 @@
+openpyxl>=3.1
+requests>=2.31
+urllib3>=1.26
diff --git a/scripts/release_notes_ai/scope_filter.py b/scripts/release_notes_ai/scope_filter.py
new file mode 100644
index 0000000000000..019824068d6e1
--- /dev/null
+++ b/scripts/release_notes_ai/scope_filter.py
@@ -0,0 +1,366 @@
+from __future__ import annotations
+
+import copy
+import re
+from dataclasses import dataclass
+from datetime import date, datetime
+from pathlib import Path
+from typing import Any
+
+from .excel_workbook import get_header
+from .models import PullInfo
+from .utils import parse_github_url, str_value
+
+
+OUT_OF_SCOPE_SHEET = "PRs_not_in_scope"
+REASON_HEADER = "Reason"
+SCOPE_REQUIRED_HEADERS = {"pr_status", "pr_merge_time", "pr_link"}
+
+
+@dataclass(frozen=True)
+class Version:
+    major: int
+    minor: int
+    patch: int
+
+    @property
+    def release_branch(self) -> str:
+        return f"release-{self.major}.{self.minor}"
+
+    @property
+    def text(self) -> str:
+        return f"{self.major}.{self.minor}.{self.patch}"
+
+    @property
+    def previous_patch_text(self) -> str:
+        return f"{self.major}.{self.minor}.{self.patch - 1}"
+
+
+@dataclass(frozen=True)
+class TimelineRelease:
+    version: Version
+    display_version: str
+    release_date: date
+
+
+@dataclass
+class ScopeContext:
+    version: Version
+    releases_dir: Path
+    github: Any
+    base_branch_start_date: date | None = None
+    timeline: list[TimelineRelease] | None = None
+    release_branch_pulls: dict[str, list[PullInfo]] | None = None
+
+    def __post_init__(self) -> None:
+        if self.timeline is None:
+            self.timeline = parse_release_timeline(self.releases_dir / "release-timeline.md")
+        if self.release_branch_pulls is None:
+            self.release_branch_pulls = {}
+
+
+def move_prs_not_in_scope(
+    workbook: Any,
+    sheet: Any,
+    version: str,
+    releases_dir: Path,
+    github: Any,
+    base_branch_start_date: date | None = None,
+    target_sheet_name: str = OUT_OF_SCOPE_SHEET,
+) -> int:
+    header = get_header(sheet)
+    missing = sorted(SCOPE_REQUIRED_HEADERS - set(header))
+    if missing:
+        raise ValueError(
+            "Missing required Excel columns for scope preprocessing: "
+            + ", ".join(missing)
+        )
+
+    context = ScopeContext(
+        version=parse_version(version),
+        releases_dir=releases_dir,
+        github=github,
+        base_branch_start_date=base_branch_start_date,
+    )
+    target = ensure_out_of_scope_sheet(workbook, sheet, target_sheet_name)
+
+    rows_to_move: list[tuple[int, str]] = []
+    for row_number in range(2, sheet.max_row + 1):
+        reason = out_of_scope_reason(sheet, header, row_number, context)
+        if reason:
+            rows_to_move.append((row_number, reason))
+
+    for row_number, reason in rows_to_move:
+        append_row_with_reason(sheet, target, row_number, reason)
+
+    for row_number, _reason in reversed(rows_to_move):
+        sheet.delete_rows(row_number, 1)
+
+    if rows_to_move:
+        print(
+            f"Moved {len(rows_to_move)} row(s) to {target_sheet_name} before release-note generation",
+            flush=True,
+        )
+    return len(rows_to_move)
+
+
+def ensure_out_of_scope_sheet(workbook: Any, source_sheet: Any, target_sheet_name: str) -> Any:
+    if target_sheet_name in workbook.sheetnames:
+        target = workbook[target_sheet_name]
+        if target.max_row == 0 or not target.cell(row=1, column=1).value:
+            copy_header(source_sheet, target)
+        else:
+            ensure_reason_header(source_sheet, target)
+        return target
+
+    target = workbook.create_sheet(target_sheet_name)
+    copy_header(source_sheet, target)
+    return target
+
+
+def copy_header(source_sheet: Any, target_sheet: Any) -> None:
+    for column in range(1, source_sheet.max_column + 1):
+        copy_cell(source_sheet.cell(row=1, column=column), target_sheet.cell(row=1, column=column))
+    ensure_reason_header(source_sheet, target_sheet)
+
+
+def ensure_reason_header(source_sheet: Any, target_sheet: Any) -> None:
+    target_sheet.cell(row=1, column=source_sheet.max_column + 1, value=REASON_HEADER)
+
+
+def append_row_with_reason(source_sheet: Any, target_sheet: Any, row_number: int, reason: str) -> None:
+    target_row = target_sheet.max_row + 1
+    for column in range(1, source_sheet.max_column + 1):
+        copy_cell(
+            source_sheet.cell(row=row_number, column=column),
+            target_sheet.cell(row=target_row, column=column),
+        )
+    target_sheet.cell(row=target_row, column=source_sheet.max_column + 1, value=reason)
+
+
+def copy_cell(source_cell: Any, target_cell: Any) -> None:
+    target_cell.value = source_cell.value
+    if source_cell.has_style:
+        target_cell._style = copy.copy(source_cell._style)
+    if source_cell.number_format:
+        target_cell.number_format = source_cell.number_format
+    if source_cell.hyperlink:
+        target_cell._hyperlink = copy.copy(source_cell.hyperlink)
+    if source_cell.comment:
+        target_cell.comment = copy.copy(source_cell.comment)
+
+
+def out_of_scope_reason(
+    sheet: Any,
+    header: dict[str, int],
+    row_number: int,
+    context: ScopeContext,
+) -> str | None:
+    status = str_value(sheet.cell(row=row_number, column=header["pr_status"]).value).lower()
+    if status != "merged":
+        return f"PR status is {status or 'empty'}, not merged"
+
+    merge_date = parse_date_value(sheet.cell(row=row_number, column=header["pr_merge_time"]).value)
+    if not merge_date:
+        return None
+
+    if context.version.patch >= 1:
+        previous_date = release_date_for_version(context.timeline or [], context.version.previous_patch_text)
+        if not previous_date:
+            raise ValueError(
+                f"Cannot find release date for previous version {context.version.previous_patch_text} "
+                "in releases/release-timeline.md"
+            )
+        if merge_date < previous_date:
+            return (
+                f"PR merged on {merge_date.isoformat()}, before previous release "
+                f"{context.version.previous_patch_text} date {previous_date.isoformat()}"
+            )
+        return None
+
+    return major_release_out_of_scope_reason(sheet, header, row_number, merge_date, context)
+
+
+def major_release_out_of_scope_reason(
+    sheet: Any,
+    header: dict[str, int],
+    row_number: int,
+    merge_date: date,
+    context: ScopeContext,
+) -> str | None:
+    latest_zero = latest_released_zero_patch(context.timeline or [], context.version.text)
+    if not latest_zero:
+        raise ValueError("Cannot find a previously released x.y.0 version in releases/release-timeline.md")
+
+    if merge_date >= latest_zero.release_date:
+        return None
+
+    branch_start = context.base_branch_start_date or estimated_release_branch_start_date(context, latest_zero)
+    if not branch_start:
+        return None
+    if merge_date < branch_start:
+        return (
+            f"PR merged on {merge_date.isoformat()}, before estimated {latest_zero.version.release_branch} "
+            f"branch start date {branch_start.isoformat()}"
+        )
+
+    pr_link = str_value(sheet.cell(row=row_number, column=header["pr_link"]).value)
+    cherry_pick = find_release_branch_cherry_pick(context, latest_zero, pr_link)
+    if not cherry_pick:
+        return None
+    cherry_pick_date = parse_date_value(cherry_pick.merged_at)
+    if cherry_pick_date and cherry_pick_date < latest_zero.release_date:
+        return (
+            f"Cherry-pick PR {cherry_pick.url} merged on {cherry_pick_date.isoformat()} "
+            f"before {latest_zero.display_version} release date {latest_zero.release_date.isoformat()}"
+        )
+    return None
+
+
+def estimated_release_branch_start_date(
+    context: ScopeContext,
+    latest_zero: TimelineRelease,
+) -> date | None:
+    branch_pulls = release_branch_pulls(context, latest_zero.version.release_branch)
+    created_dates = [parse_date_value(pull.created_at) for pull in branch_pulls]
+    created_dates = [value for value in created_dates if value]
+    return min(created_dates) if created_dates else None
+
+
+def find_release_branch_cherry_pick(
+    context: ScopeContext,
+    latest_zero: TimelineRelease,
+    pr_link: str,
+) -> PullInfo | None:
+    try:
+        owner, repo, number = parse_github_url(pr_link, "pull")
+    except ValueError:
+        return None
+    if (owner, repo) != ("pingcap", "tidb"):
+        return None
+
+    candidates = []
+    for pull in release_branch_pulls(context, latest_zero.version.release_branch):
+        haystack = "\n".join([pull.title, pull.body, pull.head_ref, pull.url])
+        if references_original_pr(haystack, owner, repo, number, pr_link):
+            candidates.append(pull)
+
+    merged_candidates = [
+        pull for pull in candidates if parse_date_value(pull.merged_at)
+    ]
+    if not merged_candidates:
+        return None
+    return min(
+        merged_candidates,
+        key=lambda pull: parse_date_value(pull.merged_at) or date.max,
+    )
+
+
+def references_original_pr(
+    text: str,
+    owner: str,
+    repo: str,
+    number: str,
+    pr_link: str,
+) -> bool:
+    text = text or ""
+    patterns = [
+        re.escape(pr_link),
+        rf"(?<![\w./-]){re.escape(owner)}/{re.escape(repo)}#{re.escape(number)}\b",
+        rf"\(#{re.escape(number)}\)",
+        rf"(?:^|[/_-])cherry-pick-{re.escape(number)}(?:\D|$)",
+    ]
+    if any(re.search(pattern, text) for pattern in patterns):
+        return True
+
+    marker = re.compile(r"\b(backport|cherry[- ]?pick|original|source|from)\b", re.I)
+    same_repo_ref = re.compile(rf"(?<![\w./-])#{re.escape(number)}\b")
+    return any(
+        marker.search(line) and same_repo_ref.search(line)
+        for line in text.splitlines()
+    )
+
+
+def release_branch_pulls(context: ScopeContext, branch: str) -> list[PullInfo]:
+    assert context.release_branch_pulls is not None
+    if branch not in context.release_branch_pulls:
+        context.release_branch_pulls[branch] = context.github.list_pulls_for_base(
+            "pingcap",
+            "tidb",
+            branch,
+            state="closed",
+        )
+    return context.release_branch_pulls[branch]
+
+
+def parse_release_timeline(path: Path) -> list[TimelineRelease]:
+    releases: list[TimelineRelease] = []
+    if not path.exists():
+        raise FileNotFoundError(f"Cannot find release timeline: {path}")
+    pattern = re.compile(
+        r"\|\s*\[(?P<version>[^\]]+)\]\([^)]+\)\s*\|\s*(?P<date>\d{4}-\d{2}-\d{2})\s*\|"
+    )
+    for line in path.read_text(encoding="utf-8").splitlines():
+        match = pattern.search(line)
+        if not match:
+            continue
+        try:
+            version = parse_version(match.group("version"))
+        except ValueError:
+            continue
+        release_date = date.fromisoformat(match.group("date"))
+        releases.append(TimelineRelease(version, match.group("version"), release_date))
+    return releases
+
+
+def release_date_for_version(timeline: list[TimelineRelease], version_text: str) -> date | None:
+    for release in timeline:
+        if release.version.text == version_text:
+            return release.release_date
+    return None
+
+
+def latest_released_zero_patch(
+    timeline: list[TimelineRelease],
+    target_version_text: str,
+) -> TimelineRelease | None:
+    zero_patch_releases = [
+        release
+        for release in timeline
+        if release.version.patch == 0 and release.version.text != target_version_text
+    ]
+    if not zero_patch_releases:
+        return None
+    return max(zero_patch_releases, key=lambda release: release.release_date)
+
+
+def parse_version(version: str) -> Version:
+    match = re.match(r"^(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)", version)
+    if not match:
+        raise ValueError(f"Invalid TiDB version: {version}")
+    return Version(
+        major=int(match.group("major")),
+        minor=int(match.group("minor")),
+        patch=int(match.group("patch")),
+    )
+
+
+def parse_date_value(value: Any) -> date | None:
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value.date()
+    if isinstance(value, date):
+        return value
+    text = str_value(value)
+    if not text:
+        return None
+    text = text.replace("Z", "+00:00")
+    try:
+        return datetime.fromisoformat(text).date()
+    except ValueError:
+        pass
+    match = re.search(r"\d{4}-\d{2}-\d{2}", text)
+    if match:
+        return date.fromisoformat(match.group())
+    return None
diff --git a/scripts/release_notes_ai/utils.py b/scripts/release_notes_ai/utils.py
new file mode 100644
index 0000000000000..1c0641787019c
--- /dev/null
+++ b/scripts/release_notes_ai/utils.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+from typing import Any, Iterable
+
+from .constants import (
+    COMPONENT_ALIASES,
+    GITHUB_ITEM_URL_RE,
+    ISSUE_URL_RE,
+    PR_URL_RE,
+    TOOL_COMPONENTS,
+    TOP_LEVEL_COMPONENTS,
+)
+
+
+def parse_github_url(url: str, expected_kind: str) -> tuple[str, str, str]:
+    match = GITHUB_ITEM_URL_RE.search(url)
+    if not match:
+        raise ValueError(f"Invalid GitHub URL: {url}")
+    if match.group("kind") != expected_kind:
+        raise ValueError(f"Expected a GitHub {expected_kind} URL, got: {url}")
+    return match.group("owner"), match.group("repo"), match.group("number")
+
+
+def extract_issue_urls(text: str) -> list[str]:
+    return unique_ordered(match.group() for match in ISSUE_URL_RE.finditer(text or ""))
+
+
+def extract_pr_urls(text: str) -> list[str]:
+    return unique_ordered(match.group() for match in PR_URL_RE.finditer(text or ""))
+
+
+def replace_author_markdown(text: str, old_author: str, new_author: str) -> str:
+    text = text or ""
+    return text.replace(
+        f"[{old_author}](https://github.com/{old_author}",
+        f"[{new_author}](https://github.com/{new_author}",
+    )
+
+
+def normalize_component(component: str) -> str:
+    cleaned = " ".join(str_value(component).split())
+    if not cleaned:
+        return ""
+    return COMPONENT_ALIASES.get(cleaned.lower(), cleaned)
+
+
+def normalize_raw_component(component: Any) -> str:
+    return " ".join(str_value(component).split())
+
+
+def normalized_release_component(component: str) -> str | None:
+    normalized = normalize_component(component)
+    if normalized in TOP_LEVEL_COMPONENTS or normalized in TOOL_COMPONENTS:
+        return normalized
+    return None
+
+
+def split_multi_value(value: Any) -> list[str]:
+    text = str_value(value)
+    if not text:
+        return []
+    return [item.strip() for item in text.replace("\n", ",").split(",") if item.strip()]
+
+
+def split_lines(value: Any) -> list[str]:
+    text = str_value(value)
+    if not text:
+        return []
+    return [line.strip() for line in text.splitlines() if line.strip()]
+
+
+def unique_ordered(values: Iterable[str]) -> list[str]:
+    result: list[str] = []
+    seen: set[str] = set()
+    for value in values:
+        cleaned = str_value(value)
+        if not cleaned or cleaned in seen:
+            continue
+        seen.add(cleaned)
+        result.append(cleaned)
+    return result
+
+
+def str_value(value: Any) -> str:
+    if value is None:
+        return ""
+    return str(value).strip()
diff --git a/scripts/release_notes_generate_ai.py b/scripts/release_notes_generate_ai.py
new file mode 100644
index 0000000000000..5d1e701f56cec
--- /dev/null
+++ b/scripts/release_notes_generate_ai.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""CLI entry point for generating English TiDB release notes with AI."""
+
+from release_notes_ai.cli import main
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From 4583453ca945f965df345d18bbc7941e4ae045fb Mon Sep 17 00:00:00 2001
From: qiancai <qqzczy@126.com>
Date: Wed, 29 Apr 2026 17:55:48 +0800
Subject: [PATCH 2/4] improve the filter logic and move the prompt to an
 independent file

---
 scripts/release_notes_ai/ai_client.py         |  83 +++---
 scripts/release_notes_ai/cli.py               |   8 +
 scripts/release_notes_ai/constants.py         |   3 +
 scripts/release_notes_ai/excel_workbook.py    | 240 ++++++++++++++++--
 .../release_notes_ai/prompts/generation.md    |  40 +++
 scripts/release_notes_generate_ai.py          |  37 ++-
 6 files changed, 345 insertions(+), 66 deletions(-)
 create mode 100644 scripts/release_notes_ai/prompts/generation.md

diff --git a/scripts/release_notes_ai/ai_client.py b/scripts/release_notes_ai/ai_client.py
index 503e28b63023b..d2770e3fbc56c 100644
--- a/scripts/release_notes_ai/ai_client.py
+++ b/scripts/release_notes_ai/ai_client.py
@@ -12,7 +12,11 @@
 from pathlib import Path
 from typing import Any
 
-from .constants import BUG_FIXES_REFERENCE, IMPROVEMENTS_REFERENCE
+from .constants import (
+    BUG_FIXES_REFERENCE,
+    GENERATION_PROMPT_TEMPLATE,
+    IMPROVEMENTS_REFERENCE,
+)
 from .models import GeneratedNote, RowContext
 
 
@@ -134,6 +138,7 @@ def build_generation_prompt(
     expected_links: list[str],
     contributors: list[str],
 ) -> str:
+    prompt_template = load_prompt_template(GENERATION_PROMPT_TEMPLATE)
     improvements_reference = load_reference_file(IMPROVEMENTS_REFERENCE)
     bug_fixes_reference = load_reference_file(BUG_FIXES_REFERENCE)
     context = {
@@ -148,46 +153,16 @@ def build_generation_prompt(
         "issues": [dataclasses.asdict(issue) for issue in row_context.issues],
         "pull_requests": [dataclasses.asdict(pull) for pull in row_context.pulls],
     }
-    return textwrap.dedent(
-        f"""
-        You write exactly one English TiDB release note entry.
-
-        Return only a JSON object with exactly these keys:
-        - type: "improvement" or "bug_fix"
-        - release_note: one Markdown bullet that starts with "- "
-        - needs_review: true or false
-        - reason: a short reason for the type and wording
-
-        Rules:
-        - Write from the user's perspective.
-        - Use the Excel issue_type as a strong signal, but decide the final type from the issue,
-          PR description, and code changes.
-        - For improvements, follow the Improvements reference below.
-        - For bug fixes, follow the Bug fixes reference below.
-        - Do not end the release note with a period.
-        - Include every expected link in Markdown release-note style.
-        - Include every contributor as @[user](https://github.com/user).
-        - If there is no issue URL, use the PR link as the suffix link.
-        - Do not expose internal function names unless they are the user-visible behavior.
-        - If the available context is insufficient, still draft the best note and set needs_review
-          to true.
-
-        Expected links:
-        {json.dumps(expected_links, ensure_ascii=False, indent=2)}
-
-        Contributors:
-        {json.dumps(contributors, ensure_ascii=False, indent=2)}
-
-        Row context:
-        {json.dumps(context, ensure_ascii=False, indent=2)}
-
-        Improvements reference:
-        {improvements_reference}
-
-        Bug fixes reference:
-        {bug_fixes_reference}
-        """
-    ).strip()
+    return render_prompt_template(
+        prompt_template,
+        {
+            "EXPECTED_LINKS": json.dumps(expected_links, ensure_ascii=False, indent=2),
+            "CONTRIBUTORS": json.dumps(contributors, ensure_ascii=False, indent=2),
+            "ROW_CONTEXT": json.dumps(context, ensure_ascii=False, indent=2),
+            "IMPROVEMENTS_REFERENCE": improvements_reference,
+            "BUG_FIXES_REFERENCE": bug_fixes_reference,
+        },
+    )
 
 
 def build_repair_prompt(original_prompt: str, errors: list[str]) -> str:
@@ -206,6 +181,32 @@ def build_repair_prompt(original_prompt: str, errors: list[str]) -> str:
     ).strip()
 
 
+def render_prompt_template(template: str, values: dict[str, str]) -> str:
+    for key, value in values.items():
+        template = template.replace(f"{{{{{key}}}}}", value)
+    return template.strip()
+
+
+@lru_cache(maxsize=None)
+def load_prompt_template(path: Path) -> str:
+    try:
+        return strip_prompt_template_heading(path.read_text(encoding="utf-8"))
+    except FileNotFoundError as exc:
+        raise FileNotFoundError(
+            f"Cannot find release-note prompt template: {path}. "
+            "Make sure scripts/release_notes_ai/prompts/generation.md exists."
+        ) from exc
+
+
+def strip_prompt_template_heading(template: str) -> str:
+    lines = template.splitlines()
+    if lines and lines[0].startswith("# "):
+        lines = lines[1:]
+        if lines and not lines[0].strip():
+            lines = lines[1:]
+    return "\n".join(lines)
+
+
 @lru_cache(maxsize=None)
 def load_reference_file(path: Path) -> str:
     try:
diff --git a/scripts/release_notes_ai/cli.py b/scripts/release_notes_ai/cli.py
index ee1d79a074c4a..fdeaccfda3efb 100644
--- a/scripts/release_notes_ai/cli.py
+++ b/scripts/release_notes_ai/cli.py
@@ -13,6 +13,7 @@
     generate_notes_without_ai,
     generate_notes_for_sheet,
     merge_rows_by_issue_and_component,
+    move_rows_with_issues_already_in_same_series,
     prepare_sheet_columns,
     sort_sheet_rows_by_component,
     store_existing_release_notes,
@@ -164,6 +165,13 @@ def main() -> int:
     clear_output_columns(sheet, header, clear_ai=args.force_regenerate)
 
     existing_notes = store_existing_release_notes(Path(args.releases_dir), args.version)
+    move_rows_with_issues_already_in_same_series(
+        workbook,
+        sheet,
+        header,
+        existing_notes,
+        args.version,
+    )
     update_pr_authors_and_dup_notes(
         sheet,
         header,
diff --git a/scripts/release_notes_ai/constants.py b/scripts/release_notes_ai/constants.py
index c3e947167a23b..89cca90e52d2b 100644
--- a/scripts/release_notes_ai/constants.py
+++ b/scripts/release_notes_ai/constants.py
@@ -21,6 +21,9 @@
     / "references"
     / "bug-fixes.md"
 )
+GENERATION_PROMPT_TEMPLATE = (
+    REPO_ROOT / "scripts" / "release_notes_ai" / "prompts" / "generation.md"
+)
 
 BOT_AUTHORS = {"ti-chi-bot", "ti-srebot"}
 # Keep the misspelled source column name because tirelease exports it this way.
diff --git a/scripts/release_notes_ai/excel_workbook.py b/scripts/release_notes_ai/excel_workbook.py
index 260b4b807d04e..177f28fd64c9f 100644
--- a/scripts/release_notes_ai/excel_workbook.py
+++ b/scripts/release_notes_ai/excel_workbook.py
@@ -43,6 +43,7 @@
 
 
 GRAY_FILL = PatternFill(start_color="D3D3D3", end_color="D3D3D3", fill_type="solid")
+SAME_SERIES_REASON_HEADER = "reason"
 
 
 def prepare_sheet_columns(sheet: Any) -> dict[str, int]:
@@ -168,7 +169,7 @@ def first_issue_url_for_row(sheet: Any, header: dict[str, int], row_number: int)
 
 def store_existing_release_notes(releases_dir: Path, version: str) -> list[ExistingNote]:
     existing_notes: list[ExistingNote] = []
-    seen: set[tuple[str, tuple[str, ...]]] = set()
+    seen: set[tuple[str, tuple[str, ...], str]] = set()
     target_version = parse_semver_tuple(version)
 
     for file_path in sorted(releases_dir.rglob("*.md")):
@@ -179,25 +180,26 @@ def store_existing_release_notes(releases_dir: Path, version: str) -> list[Exist
             for raw_line in file:
                 line = raw_line.strip()
                 authors = AUTHOR_RE.findall(line)
-                item_url = GITHUB_ITEM_URL_RE.search(line)
-                if item_url:
-                    key = (item_url.group(), tuple(authors))
-                    if key in seen:
-                        continue
-                    seen.add(key)
+                item_urls = [match.group() for match in GITHUB_ITEM_URL_RE.finditer(line)]
+                if item_urls:
                     note_level = level1 + level2 + level3
                     note_type, component = classify_note_level(note_level)
-                    existing_notes.append(
-                        ExistingNote(
-                            url=item_url.group(),
-                            line=line,
-                            file_name=file_path.name,
-                            note_level=note_level,
-                            authors=authors,
-                            note_type=note_type,
-                            component=component,
+                    for item_url in item_urls:
+                        key = (item_url, tuple(authors), file_path.name)
+                        if key in seen:
+                            continue
+                        seen.add(key)
+                        existing_notes.append(
+                            ExistingNote(
+                                url=item_url,
+                                line=line,
+                                file_name=file_path.name,
+                                note_level=note_level,
+                                authors=authors,
+                                note_type=note_type,
+                                component=component,
+                            )
                         )
-                    )
                     continue
 
                 heading = parse_release_note_heading(raw_line)
@@ -283,22 +285,207 @@ def update_pr_authors_and_dup_notes(
         author_cell = sheet.cell(row=row_number, column=header["pr_author"])
         current_author = str_value(author_cell.value)
 
-        issue_url = first_issue_url_for_row(sheet, header, row_number)
-        if not issue_url:
+        issue_urls = issue_urls_for_row(sheet, header, row_number)
+        if not issue_urls:
             continue
 
         current_authors = split_multi_value(current_author)
         dup_notes = []
-        for existing in existing_notes_by_url.get(issue_url, []):
-            if existing.authors and not set(current_authors).intersection(existing.authors):
-                continue
-            dup_notes.append(existing.dup_text)
+        for issue_url in issue_urls:
+            for existing in existing_notes_by_url.get(issue_url, []):
+                if existing.authors and not set(current_authors).intersection(existing.authors):
+                    continue
+                dup_notes.append(existing.dup_text)
 
         if dup_notes:
             dup_col = header["published_release_notes"]
             sheet.cell(row=row_number, column=dup_col, value="\n".join(unique_ordered(dup_notes)))
             fill_row(sheet, row_number)
-            print(f"Row {row_number}: found duplicated release note for {issue_url}", flush=True)
+            print(
+                f"Row {row_number}: found duplicated release note for {', '.join(issue_urls)}",
+                flush=True,
+            )
+
+
+def move_rows_with_issues_already_in_same_series(
+    workbook: Any,
+    sheet: Any,
+    header: dict[str, int],
+    existing_notes: list[ExistingNote],
+    version: str,
+) -> int:
+    files_by_issue_url = same_series_release_files_by_issue_url(existing_notes, version)
+    if not files_by_issue_url:
+        return 0
+
+    target_sheet_name = same_series_issues_sheet_name(version)
+    target, reason_col = ensure_sheet_with_reason(workbook, sheet, target_sheet_name)
+    rows_to_move: list[tuple[int, str]] = []
+
+    for row_number in range(2, sheet.max_row + 1):
+        issue_urls = issue_urls_for_row(sheet, header, row_number)
+        reason = same_series_issue_reason(issue_urls, files_by_issue_url)
+        if reason:
+            rows_to_move.append((row_number, reason))
+
+    for row_number, reason in rows_to_move:
+        append_row_with_reason(sheet, target, row_number, reason, reason_col)
+
+    for row_number, _reason in reversed(rows_to_move):
+        sheet.delete_rows(row_number, 1)
+
+    if rows_to_move:
+        print(
+            f"Moved {len(rows_to_move)} row(s) to {target_sheet_name} because their issues "
+            "already appear in earlier release notes from the same major.minor series",
+            flush=True,
+        )
+    return len(rows_to_move)
+
+
+def same_series_release_files_by_issue_url(
+    existing_notes: list[ExistingNote],
+    version: str,
+) -> dict[str, list[str]]:
+    target_version = parse_semver_tuple(version)
+    files_by_issue_url: dict[str, list[str]] = {}
+
+    for existing in existing_notes:
+        match = GITHUB_ITEM_URL_RE.search(existing.url)
+        if not match or match.group("kind") != "issues":
+            continue
+
+        file_version = release_file_semver_tuple(Path(existing.file_name))
+        if not file_version:
+            continue
+        if file_version[:2] != target_version[:2] or file_version >= target_version:
+            continue
+
+        files = files_by_issue_url.setdefault(existing.url, [])
+        if existing.file_name not in files:
+            files.append(existing.file_name)
+
+    for issue_url, files in list(files_by_issue_url.items()):
+        files_by_issue_url[issue_url] = sorted(files, key=release_file_name_sort_key)
+    return files_by_issue_url
+
+
+def same_series_issues_sheet_name(version: str) -> str:
+    major, minor, _patch = parse_semver_tuple(version)
+    return f"issues_already_in_earlier_v{major}.{minor}_notes"
+
+
+def same_series_issue_reason(
+    issue_urls: list[str],
+    files_by_issue_url: dict[str, list[str]],
+) -> str | None:
+    reasons = []
+    for issue_url in issue_urls:
+        files = files_by_issue_url.get(issue_url)
+        if files:
+            reasons.append(f"{issue_url} appears in {', '.join(files)}")
+    return "; ".join(reasons) if reasons else None
+
+
+def release_file_name_sort_key(file_name: str) -> tuple[int, int, int, str]:
+    version = release_file_semver_tuple(Path(file_name))
+    if not version:
+        return (sys.maxsize, sys.maxsize, sys.maxsize, file_name)
+    return (*version, file_name)
+
+
+def ensure_sheet_with_reason(
+    workbook: Any,
+    source_sheet: Any,
+    target_sheet_name: str,
+) -> tuple[Any, int]:
+    if target_sheet_name in workbook.sheetnames:
+        target = workbook[target_sheet_name]
+        if not str_value(target.cell(row=1, column=1).value):
+            reason_col = copy_header_with_reason(source_sheet, target)
+        else:
+            reason_col = ensure_same_series_reason_header(source_sheet, target)
+        return target, reason_col
+
+    target = workbook.create_sheet(target_sheet_name)
+    reason_col = copy_header_with_reason(source_sheet, target)
+    return target, reason_col
+
+
+def copy_header_with_reason(source_sheet: Any, target_sheet: Any) -> int:
+    for column in range(1, source_sheet.max_column + 1):
+        copy_cell(
+            source_sheet.cell(row=1, column=column),
+            target_sheet.cell(row=1, column=column),
+        )
+    return ensure_same_series_reason_header(source_sheet, target_sheet)
+
+
+def ensure_same_series_reason_header(source_sheet: Any, target_sheet: Any) -> int:
+    reason_col = find_header_column(target_sheet, SAME_SERIES_REASON_HEADER)
+    if not reason_col:
+        reason_col = max(source_sheet.max_column, target_sheet.max_column) + 1
+        copy_missing_header_cells(source_sheet, target_sheet)
+        target_sheet.cell(row=1, column=reason_col, value=SAME_SERIES_REASON_HEADER)
+        return reason_col
+
+    while reason_col <= source_sheet.max_column:
+        target_sheet.insert_cols(reason_col)
+        reason_col += 1
+
+    copy_missing_header_cells(source_sheet, target_sheet)
+    return reason_col
+
+
+def copy_missing_header_cells(source_sheet: Any, target_sheet: Any) -> None:
+    for column in range(1, source_sheet.max_column + 1):
+        if not str_value(target_sheet.cell(row=1, column=column).value):
+            copy_cell(
+                source_sheet.cell(row=1, column=column),
+                target_sheet.cell(row=1, column=column),
+            )
+
+
+def find_header_column(sheet: Any, header_name: str) -> int | None:
+    for column in range(1, sheet.max_column + 1):
+        if str_value(sheet.cell(row=1, column=column).value) == header_name:
+            return column
+    return None
+
+
+def append_row_with_reason(
+    source_sheet: Any,
+    target_sheet: Any,
+    row_number: int,
+    reason: str,
+    reason_col: int,
+) -> None:
+    target_row = target_sheet.max_row + 1
+    source_dimension = source_sheet.row_dimensions[row_number]
+    target_dimension = target_sheet.row_dimensions[target_row]
+    target_dimension.height = source_dimension.height
+    target_dimension.hidden = source_dimension.hidden
+    target_dimension.outlineLevel = source_dimension.outlineLevel
+    target_dimension.collapsed = source_dimension.collapsed
+
+    for column in range(1, source_sheet.max_column + 1):
+        copy_cell(
+            source_sheet.cell(row=row_number, column=column),
+            target_sheet.cell(row=target_row, column=column),
+        )
+    target_sheet.cell(row=target_row, column=reason_col, value=reason)
+
+
+def copy_cell(source_cell: Any, target_cell: Any) -> None:
+    target_cell.value = source_cell.value
+    if source_cell.has_style:
+        target_cell._style = copy.copy(source_cell._style)
+    if source_cell.number_format:
+        target_cell.number_format = source_cell.number_format
+    if source_cell.hyperlink:
+        target_cell._hyperlink = copy.copy(source_cell.hyperlink)
+    if source_cell.comment:
+        target_cell.comment = copy.copy(source_cell.comment)
 
 
 def apply_bot_author_replacements(
@@ -407,7 +594,12 @@ def resolve_bot_author(github: Any, request: tuple[int, str, str, str]) -> str:
 
 def index_existing_notes_by_url(existing_notes: list[ExistingNote]) -> dict[str, list[ExistingNote]]:
     indexed: dict[str, list[ExistingNote]] = {}
+    seen: set[tuple[str, tuple[str, ...]]] = set()
     for existing in existing_notes:
+        key = (existing.url, tuple(existing.authors))
+        if key in seen:
+            continue
+        seen.add(key)
         indexed.setdefault(existing.url, []).append(existing)
     return indexed
 
diff --git a/scripts/release_notes_ai/prompts/generation.md b/scripts/release_notes_ai/prompts/generation.md
new file mode 100644
index 0000000000000..8eb5b1e993381
--- /dev/null
+++ b/scripts/release_notes_ai/prompts/generation.md
@@ -0,0 +1,40 @@
+# Generation Prompt
+
+You are a senior technical writer who has profound knowledge of TiDB.
+
+Your task is to write exactly one English release note entry for a TiDB issue or PR.
+
+Return only a JSON object with exactly these keys:
+
+- type: "improvement" or "bug_fix"
+- release_note: one Markdown bullet that starts with "- "
+- needs_review: true or false
+- reason: a short reason for the type and wording
+
+Rules:
+
+- Write from the user's perspective.
+- Use the Excel issue_type as a strong signal, but decide the final type from the issue, PR description, and code changes.
+- For improvements, follow the Improvements reference below.
+- For bug fixes, follow the Bug fixes reference below.
+- Do not end the release note with a period.
+- Include every expected link in Markdown release-note style.
+- Include every contributor as @[user](https://github.com/user).
+- If there is no issue URL, use the PR link as the suffix link.
+- Do not expose internal function names unless they are the user-visible behavior.
+- If the available context is insufficient, still draft the best note and set needs_review to true.
+
+Expected links:
+{{EXPECTED_LINKS}}
+
+Contributors:
+{{CONTRIBUTORS}}
+
+Row context:
+{{ROW_CONTEXT}}
+
+Improvements reference:
+{{IMPROVEMENTS_REFERENCE}}
+
+Bug fixes reference:
+{{BUG_FIXES_REFERENCE}}
diff --git a/scripts/release_notes_generate_ai.py b/scripts/release_notes_generate_ai.py
index 5d1e701f56cec..bdcb30ba8433b 100644
--- a/scripts/release_notes_generate_ai.py
+++ b/scripts/release_notes_generate_ai.py
@@ -1,7 +1,42 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-"""CLI entry point for generating English TiDB release notes with AI."""
+""" This script generates English TiDB release notes from a workbook with PR links and issue links of a specific release.
+
+What does this script do?
+
+ - Filter out the PRs and issues that are not in the target release scope. For example, PRs that were merged before this previous path release.
+ - Move the issues that already appeared in earlier notes from the same major.minor series to a separate worksheet.
+ - Mark the release notes that are already published in other series as ``(dup)`` and reuse the release notes for the same issue.
+ - Generate the English release note using AI according to the release note draft provided in the PR, the description and code changes of the PR, the descriptions of the issue
+ - Map components in the workbook to the corresponding release note components.
+ - Generate the release note file for the target release according to the release note template file.
+
+Typical usage:
+
+    python3 scripts/release_notes_generate_ai.py \
+        --version 8.5.7 \
+        --excel /path/to/tirelease.xlsx \
+        --releases-dir releases \
+        --github-token-file /path/to/github-token.txt
+
+Useful options:
+
+    --involve-ai-generation OFF
+        Skip AI generation and use the source ``formated_release_note`` values
+        for non-duplicate rows.
+
+    --force-regenerate
+        Clear existing AI-generated notes in the processed workbook and generate
+        them again.
+
+    --output-release-file /path/to/release-8.5.7.md
+        Write the generated Markdown to a custom path. By default, the output is
+        ``release-<version>-updated-by-ai.md`` under ``--releases-dir``.
+
+Run ``python3 scripts/release_notes_generate_ai.py --help`` for the full option
+list.
+"""
 
 from release_notes_ai.cli import main
 

From 195da0b95b759c8511ec1130213d4ee50ec68224 Mon Sep 17 00:00:00 2001
From: qiancai <qqzczy@126.com>
Date: Wed, 6 May 2026 14:59:38 +0800
Subject: [PATCH 3/4] update the naming rule of the release note file

---
 scripts/release_notes_ai/cli.py      | 14 ++++++++++++--
 scripts/release_notes_generate_ai.py |  6 ++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/scripts/release_notes_ai/cli.py b/scripts/release_notes_ai/cli.py
index fdeaccfda3efb..7aea9b9ee43db 100644
--- a/scripts/release_notes_ai/cli.py
+++ b/scripts/release_notes_ai/cli.py
@@ -58,7 +58,10 @@ def parse_args() -> argparse.Namespace:
     )
     parser.add_argument(
         "--output-release-file",
-        help="Output Markdown file. Defaults to release-{version}-updated-by-ai.md.",
+        help=(
+            "Output Markdown file. Defaults to release-{version}-updated-by-ai.md "
+            "if release-{version}.md already exists, otherwise release-{version}.md."
+        ),
     )
     parser.add_argument(
         "--ai-timeout",
@@ -142,7 +145,7 @@ def main() -> int:
     output_file = (
         Path(args.output_release_file)
         if args.output_release_file
-        else Path(args.releases_dir) / f"release-{args.version}-updated-by-ai.md"
+        else default_output_release_file(Path(args.releases_dir), args.version)
     )
 
     excel_path = Path(args.excel)
@@ -219,6 +222,13 @@ def parse_on_off(value: str) -> str:
     return normalized
 
 
+def default_output_release_file(releases_dir: Path, version: str) -> Path:
+    release_file = releases_dir / f"release-{version}.md"
+    if release_file.is_file():
+        return releases_dir / f"release-{version}-updated-by-ai.md"
+    return release_file
+
+
 def default_processed_excel_path(excel_path: Path) -> Path:
     return excel_path.with_name(f"{excel_path.stem}_processed{excel_path.suffix}")
 
diff --git a/scripts/release_notes_generate_ai.py b/scripts/release_notes_generate_ai.py
index bdcb30ba8433b..03d1ec7f8a59b 100644
--- a/scripts/release_notes_generate_ai.py
+++ b/scripts/release_notes_generate_ai.py
@@ -31,8 +31,10 @@
         them again.
 
     --output-release-file /path/to/release-8.5.7.md
-        Write the generated Markdown to a custom path. By default, the output is
-        ``release-<version>-updated-by-ai.md`` under ``--releases-dir``.
+        Write the generated Markdown to a custom path. By default, the output
+        under ``--releases-dir`` is ``release-<version>-updated-by-ai.md`` if
+        ``release-<version>.md`` already exists, otherwise
+        ``release-<version>.md``.
 
 Run ``python3 scripts/release_notes_generate_ai.py --help`` for the full option
 list.

From 259870d8186173c3a079e96f33c9e28a169d75e1 Mon Sep 17 00:00:00 2001
From: qiancai <qqzczy@126.com>
Date: Wed, 6 May 2026 16:24:10 +0800
Subject: [PATCH 4/4] add the usage descriptions for the scripts

---
 scripts/release-notes-generator-readme.md | 79 +++++++++++++++++++++++
 scripts/release_notes_ai/__init__.py      |  1 -
 scripts/release_notes_ai/cli.py           | 34 +++-------
 scripts/release_notes_ai/constants.py     |  2 +-
 scripts/release_notes_generate_ai.py      | 35 ++--------
 5 files changed, 94 insertions(+), 57 deletions(-)
 create mode 100644 scripts/release-notes-generator-readme.md
 delete mode 100644 scripts/release_notes_ai/__init__.py

diff --git a/scripts/release-notes-generator-readme.md b/scripts/release-notes-generator-readme.md
new file mode 100644
index 0000000000000..d966eaf3b83c4
--- /dev/null
+++ b/scripts/release-notes-generator-readme.md
@@ -0,0 +1,79 @@
+# Release notes generator
+
+`scripts/release_notes_generate_ai.py` generates English TiDB release notes according to PRs and issues in a specified excel file.
+
+## What it does
+
+**Scope filtering:**
+
+- Filters out PRs and issues that are not in the target release scope. For example, it filters out PRs that were merged before the previous patch release.
+- Moves issues that already appeared in earlier notes from the same major.minor series to a separate worksheet.
+
+**Duplicate handling:**
+
+- Marks release notes that are already published in other series as `(dup)` and reuses the release notes for the same issue.
+
+**Release note generation:**
+
+- Generates English release notes using AI according to the release note draft provided in the PR, the PR description and code changes, and the issue description.
+- Maps components in the workbook to the corresponding release note components.
+
+**File output in Markdown:**
+
+- Generates the release note file for the target release according to the release note template file.
+- Add the improvements and bug fixes of each component to the corresponding sections of the release note file.
+
+## Prerequisites
+
+- Install Python dependencies:
+
+    ```bash
+    python3 -m pip install -r scripts/release_notes_ai/requirements.txt
+    ```
+
+- Prepare a GitHub token with access to the public repositories and set the GitHub token in the `GITHUB_TOKEN` environment variable:
+
+    ```bash
+    export GITHUB_TOKEN=<your-github-token>
+    ```
+
+- Install and log in to Codex CLI. The default `--ai-command` uses `codex exec`, so the installed Codex CLI must support `exec`, `--sandbox read-only`, `--ephemeral`, `--output-schema`, `--output-last-message`, and `-m <model>`.
+
+## Typical usage
+
+```bash
+python3 scripts/release_notes_generate_ai.py \
+    --version 8.5.7 \
+    --excel /path/to/release-note-excel.xlsx \
+    --releases-dir releases
+```
+
+## Option descriptions
+
+| Option | Required | Default value | Usage example | Description |
+| --- | --- | --- | --- | --- |
+| `--version <tidb-version>` | Yes | None | `--version 8.5.7` | Target TiDB version. This value is used for scope filtering, existing release-note lookup, generated Markdown front matter, and the default output file name. |
+| `--excel <workbook-path>` | Yes | None | `--excel /path/to/release-note-excel.xlsx` | Path to the source release note excel file. The source workbook is not overwritten. The processed workbook is written to `<original-name>_processed.xlsx`. |
+| `--releases-dir <releases-dir>` | Yes | None | `--releases-dir releases` | Path to the existing English release notes directory. The script scans this directory for historical release notes and writes the generated Markdown under this directory unless `--output-release-file` is specified. |
+| `--sheet <sheet-name>` | No | `pr_for_release_note` | `--sheet pr_for_release_note` | Workbook sheet to process. |
+| `--ai-command <command>` | No | `codex --ask-for-approval never exec --sandbox read-only --ephemeral` | `--ai-command "codex --ask-for-approval never exec --sandbox read-only --ephemeral"` | Command used to invoke the AI generator. The prompt is passed through standard input. When the command is `codex exec`, the script also passes `--output-schema` and `--output-last-message`. |
+| `--ai-model <model>` | No | `gpt-5.4` | `--ai-model gpt-5.4` | Model name passed to `codex exec` with `-m`. |
+| `--involve-ai-generation <ON-or-OFF>` | No | `ON` | `--involve-ai-generation OFF` | Whether to generate non-duplicate release notes with AI. Use `ON` to invoke AI, or `OFF` to use the source `formated_release_note` values. |
+| `--output-release-file <markdown-file>` | No | Conditional | `--output-release-file /path/to/release-8.5.7.md` | Write the generated Markdown to a custom path. By default, the output under `--releases-dir` is `release-<version>-updated-by-ai.md` if `release-<version>.md` already exists, otherwise `release-<version>.md`. |
+| `--ai-timeout <seconds>` | No | `600` | `--ai-timeout 600` | Timeout in seconds for each AI command invocation. |
+| `--ai-workers <count>` | No | `3` | `--ai-workers 3` | Number of concurrent AI command invocations. |
+| `--github-workers <count>` | No | `8` | `--github-workers 8` | Number of concurrent GitHub API prefetch workers. |
+| `--author-workers <count>` | No | `3` | `--author-workers 3` | Number of concurrent workers used to resolve bot-authored cherry-pick PR authors. |
+| `--checkpoint-interval <count>` | No | `1` | `--checkpoint-interval 1` | Save the processed workbook after every N completed AI rows. Use `0` to disable checkpoint saves. |
+| `--force-regenerate` | No | Disabled | `--force-regenerate` | Clear existing AI-generated notes in the processed workbook and generate all non-duplicate rows again. |
+| `--release-date <release-date>` | No | `TBD` | `--release-date "August 14, 2025"` | Release date text for the generated Markdown header. |
+| `--skip-scope-preprocess` | No | Disabled | `--skip-scope-preprocess` | Skip moving not-in-scope PR rows to the `PRs_not_in_scope` sheet. |
+| `--scope-base-branch-start-date <YYYY-MM-DD>` | No | Estimated from release history | `--scope-base-branch-start-date 2025-01-01` | Override the estimated release-m.n branch start date for x.y.0 scope preprocessing. The value must use the `YYYY-MM-DD` format. |
+
+## Generated files
+
+- The source excel file passed to `--excel` is not overwritten.
+- The processed excel file is written to `<original-name>_processed.xlsx` next to the source workbook.
+- The generated Markdown file is written to `--output-release-file` when that option is specified.
+- If `--output-release-file` is omitted and `release-<version>.md` already exists under `--releases-dir`, the generated Markdown file is written to `release-<version>-updated-by-ai.md`.
+- If `--output-release-file` is omitted and `release-<version>.md` does not exist under `--releases-dir`, the generated Markdown file is written to `release-<version>.md`.
diff --git a/scripts/release_notes_ai/__init__.py b/scripts/release_notes_ai/__init__.py
deleted file mode 100644
index 65f7e128c779b..0000000000000
--- a/scripts/release_notes_ai/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Helpers for generating TiDB release notes with AI."""
diff --git a/scripts/release_notes_ai/cli.py b/scripts/release_notes_ai/cli.py
index 7aea9b9ee43db..b1e913a7bca0d 100644
--- a/scripts/release_notes_ai/cli.py
+++ b/scripts/release_notes_ai/cli.py
@@ -26,7 +26,7 @@
 
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
-        description="Generate English release notes with AI from a tirelease workbook."
+        description="Generate English release notes with AI according to PRs and issues in a specified excel file."
     )
     parser.add_argument("--version", required=True, help="Target TiDB version, for example 8.5.7.")
     parser.add_argument("--excel", required=True, help="Path to the release note Excel workbook.")
@@ -36,7 +36,6 @@ def parse_args() -> argparse.Namespace:
         help="Path to the existing English release notes directory.",
     )
     parser.add_argument("--sheet", default="pr_for_release_note", help="Workbook sheet name.")
-    parser.add_argument("--github-token-file", help="Path to a GitHub token file.")
     parser.add_argument(
         "--ai-command",
         default="codex --ask-for-approval never exec --sandbox read-only --ephemeral",
@@ -137,7 +136,10 @@ def main() -> int:
         if not base_branch_start_date:
             raise ValueError("--scope-base-branch-start-date must use YYYY-MM-DD format")
 
-    token = load_github_token(args.github_token_file)
+    try:
+        token = load_github_token()
+    except ValueError as exc:
+        raise SystemExit(f"error: {exc}") from None
     github = GitHubClient(token)
     involve_ai_generation = args.involve_ai_generation == "ON"
     ai = AIClient(args.ai_command, args.ai_model, args.ai_timeout) if involve_ai_generation else None
@@ -278,24 +280,8 @@ def save_workbook_safely(workbook: openpyxl.Workbook, excel_path: Path) -> None:
         raise RuntimeError(f"Failed to save workbook {excel_path}: {exc}") from exc
 
 
-def load_github_token(token_file: str | None) -> str | None:
-    import shutil
-    import subprocess
-
-    if token_file:
-        return Path(token_file).read_text(encoding="utf-8").strip()
-    if os.environ.get("GITHUB_TOKEN"):
-        return os.environ["GITHUB_TOKEN"].strip()
-    gh = shutil.which("gh")
-    if not gh:
-        return None
-    completed = subprocess.run(
-        [gh, "auth", "token"],
-        text=True,
-        capture_output=True,
-        timeout=10,
-        check=False,
-    )
-    if completed.returncode == 0 and completed.stdout.strip():
-        return completed.stdout.strip()
-    return None
+def load_github_token() -> str:
+    token = os.environ.get("GITHUB_TOKEN", "").strip()
+    if not token:
+        raise ValueError("GITHUB_TOKEN environment variable is required")
+    return token
diff --git a/scripts/release_notes_ai/constants.py b/scripts/release_notes_ai/constants.py
index 89cca90e52d2b..12d0d9ca8dcee 100644
--- a/scripts/release_notes_ai/constants.py
+++ b/scripts/release_notes_ai/constants.py
@@ -26,7 +26,7 @@
 )
 
 BOT_AUTHORS = {"ti-chi-bot", "ti-srebot"}
-# Keep the misspelled source column name because tirelease exports it this way.
+# Keep the misspelled source column name because release note excel file exports it this way.
 REQUIRED_HEADERS = {
     "pr_author",
     "pr_link",
diff --git a/scripts/release_notes_generate_ai.py b/scripts/release_notes_generate_ai.py
index 03d1ec7f8a59b..0a6d3eb761268 100644
--- a/scripts/release_notes_generate_ai.py
+++ b/scripts/release_notes_generate_ai.py
@@ -1,43 +1,16 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-""" This script generates English TiDB release notes from a workbook with PR links and issue links of a specific release.
-
-What does this script do?
-
- - Filter out the PRs and issues that are not in the target release scope. For example, PRs that were merged before this previous path release.
- - Move the issues that already appeared in earlier notes from the same major.minor series to a separate worksheet.
- - Mark the release notes that are already published in other series as ``(dup)`` and reuse the release notes for the same issue.
- - Generate the English release note using AI according to the release note draft provided in the PR, the description and code changes of the PR, the descriptions of the issue
- - Map components in the workbook to the corresponding release note components.
- - Generate the release note file for the target release according to the release note template file.
+"""Generate TiDB improvements and bug fixes for release notes according to PRs and issues in a specified excel file.
 
 Typical usage:
 
     python3 scripts/release_notes_generate_ai.py \
         --version 8.5.7 \
-        --excel /path/to/tirelease.xlsx \
-        --releases-dir releases \
-        --github-token-file /path/to/github-token.txt
-
-Useful options:
-
-    --involve-ai-generation OFF
-        Skip AI generation and use the source ``formated_release_note`` values
-        for non-duplicate rows.
-
-    --force-regenerate
-        Clear existing AI-generated notes in the processed workbook and generate
-        them again.
-
-    --output-release-file /path/to/release-8.5.7.md
-        Write the generated Markdown to a custom path. By default, the output
-        under ``--releases-dir`` is ``release-<version>-updated-by-ai.md`` if
-        ``release-<version>.md`` already exists, otherwise
-        ``release-<version>.md``.
+        --excel /path/to/release-note-excel.xlsx \
+        --releases-dir releases
 
-Run ``python3 scripts/release_notes_generate_ai.py --help`` for the full option
-list.
+For detailed usage and options, see scripts/release-notes-generator-readme.md.
 """
 
 from release_notes_ai.cli import main