From c3ac49025f12a5be765d3d83c08ad1f0a2e3cf16 Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Wed, 23 Mar 2022 15:28:29 +0800 Subject: [PATCH 01/18] Update tidb-lightning-faq.md --- tidb-lightning/tidb-lightning-faq.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tidb-lightning/tidb-lightning-faq.md b/tidb-lightning/tidb-lightning-faq.md index f12af703887db..45ce601ad200a 100644 --- a/tidb-lightning/tidb-lightning-faq.md +++ b/tidb-lightning/tidb-lightning-faq.md @@ -119,7 +119,7 @@ If `tidb-lightning` abnormally exited, the cluster might be stuck in the "import {{< copyable "shell-regular" >}} ```sh -tidb-lightning-ctl --fetch-mode +tidb-lightning-ctl --config tidb-lightning.toml --fetch-mode ``` You can force the cluster back to "normal mode" using the following command: @@ -127,7 +127,7 @@ You can force the cluster back to "normal mode" using the following command: {{< copyable "shell-regular" >}} ```sh -tidb-lightning-ctl --switch-mode=normal +tidb-lightning-ctl --config tidb-lightning.toml --fetch-mode ``` ## Can TiDB Lightning be used with 1-Gigabit network card? From c4e83db28e689e722b75b2e0fa2a7c08b15ac702 Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Mon, 6 Jun 2022 10:41:48 +0800 Subject: [PATCH 02/18] Update alert-rules.md --- alert-rules.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alert-rules.md b/alert-rules.md index c06f3330addbc..abe9503b24b90 100644 --- a/alert-rules.md +++ b/alert-rules.md @@ -419,7 +419,7 @@ This section gives the alert rules for the TiKV component. * Solution: - Adjust the `block-cache-size` value of both `rockdb.defaultcf` and `rocksdb.writecf`. + Adjust the `block-cache-size` value of both `rocksdb.defaultcf` and `rocksdb.writecf`. #### `TiKV_GC_can_not_work` From e11f2901d79cdf88b23d59bafd4de3b2c154587c Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Mon, 6 Jun 2022 10:43:10 +0800 Subject: [PATCH 03/18] Revert "Update alert-rules.md" This reverts commit c4e83db28e689e722b75b2e0fa2a7c08b15ac702. --- alert-rules.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alert-rules.md b/alert-rules.md index abe9503b24b90..c06f3330addbc 100644 --- a/alert-rules.md +++ b/alert-rules.md @@ -419,7 +419,7 @@ This section gives the alert rules for the TiKV component. * Solution: - Adjust the `block-cache-size` value of both `rocksdb.defaultcf` and `rocksdb.writecf`. + Adjust the `block-cache-size` value of both `rockdb.defaultcf` and `rocksdb.writecf`. #### `TiKV_GC_can_not_work` From 0db7df99f2ab4562d4961b7fd2803491c9d5915d Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Tue, 1 Aug 2023 11:26:57 +0800 Subject: [PATCH 04/18] Update dumpling-overview.md --- dumpling-overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dumpling-overview.md b/dumpling-overview.md index de8bf19a336d9..f4cf5c6ace2a0 100644 --- a/dumpling-overview.md +++ b/dumpling-overview.md @@ -108,7 +108,7 @@ In the command above: + The `-t` option specifies the number of threads for the export. Increasing the number of threads improves the concurrency of Dumpling and the export speed, and also increases the database's memory consumption. Therefore, it is not recommended to set the number too large. Usually, it's less than 64. -+ The `-r` option specifies the maximum number of rows in a single file. With this option specified, Dumpling enables the in-table concurrency to speed up the export and reduce the memory usage. When the upstream database is TiDB v3.0 or later versions, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting and the specific `-r` value does not affect the split algorithm. When the upstream database is MySQL and the primary key is of the `int` type, specifying `-r` can also enable the in-table concurrency. ++ The `-r` option enables the in-table concurrency to speed up the export. When the source database is TiDB, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting, and reduces the memory usage. The specific `-r` value does not affect the split algorithm. When the source database is MySQL and the primary key is of the `int` type, specifying `-r` can also enable the in-table concurrency. + The `-F` option is used to specify the maximum size of a single file (the unit here is `MiB`; inputs like `5GiB` or `8KB` are also acceptable). It is recommended to keep its value to 256 MiB or less if you plan to use TiDB Lightning to load this file into a TiDB instance. > **Note:** From ae9709d906e7ffdaec0e461810a5427061603e7a Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:26:10 +0800 Subject: [PATCH 05/18] update go to v1.21 --- hardware-and-software-requirements.md | 2 +- pd-control.md | 2 +- pd-recover.md | 2 +- tidb-control.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hardware-and-software-requirements.md b/hardware-and-software-requirements.md index 15d824567af30..bb8e7ae2ecfd5 100644 --- a/hardware-and-software-requirements.md +++ b/hardware-and-software-requirements.md @@ -47,7 +47,7 @@ As an open-source distributed SQL database with high performance, TiDB can be de | Libraries required for compiling and running TiDB | Version | | :--- | :--- | -| Golang | 1.20 or later | +| Golang | 1.21 or later | | Rust | nightly-2022-07-31 or later | | GCC | 7.x | | LLVM | 13.0 or later | diff --git a/pd-control.md b/pd-control.md index 20e391c4b9852..53eb4a9321d6d 100644 --- a/pd-control.md +++ b/pd-control.md @@ -33,7 +33,7 @@ To obtain `pd-ctl` of the latest version, download the TiDB server installation ### Compile from source code -1. [Go](https://golang.org/) 1.20 or later is required because the Go modules are used. +1. [Go](https://golang.org/) 1.21 or later is required because the Go modules are used. 2. In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make` or `make pd-ctl` command to compile and generate `bin/pd-ctl`. ## Usage diff --git a/pd-recover.md b/pd-recover.md index 1910e6c46ba24..24b3bd3049d80 100644 --- a/pd-recover.md +++ b/pd-recover.md @@ -10,7 +10,7 @@ PD Recover is a disaster recovery tool of PD, used to recover the PD cluster whi ## Compile from source code -+ [Go](https://golang.org/) 1.20 or later is required because the Go modules are used. ++ [Go](https://golang.org/) 1.21 or later is required because the Go modules are used. + In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make pd-recover` command to compile and generate `bin/pd-recover`. > **Note:** diff --git a/tidb-control.md b/tidb-control.md index 921a27a761ded..2c79e6ff347f4 100644 --- a/tidb-control.md +++ b/tidb-control.md @@ -26,7 +26,7 @@ After installing TiUP, you can use `tiup ctl:v tidb` command to ### Compile from source code -- Compilation environment requirement: [Go](https://golang.org/) 1.20 or later +- Compilation environment requirement: [Go](https://golang.org/) 1.21 or later - Compilation procedures: Go to the root directory of the [TiDB Control project](https://github.com/pingcap/tidb-ctl), use the `make` command to compile, and generate `tidb-ctl`. - Compilation documentation: you can find the help files in the `doc` directory; if the help files are lost or you want to update them, use the `make doc` command to generate the help files. From f0b8b492a6596caf9b78d23428211449fe42b92e Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:27:52 +0800 Subject: [PATCH 06/18] Revert "update go to v1.21" This reverts commit ae9709d906e7ffdaec0e461810a5427061603e7a. --- hardware-and-software-requirements.md | 2 +- pd-control.md | 2 +- pd-recover.md | 2 +- tidb-control.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hardware-and-software-requirements.md b/hardware-and-software-requirements.md index bb8e7ae2ecfd5..15d824567af30 100644 --- a/hardware-and-software-requirements.md +++ b/hardware-and-software-requirements.md @@ -47,7 +47,7 @@ As an open-source distributed SQL database with high performance, TiDB can be de | Libraries required for compiling and running TiDB | Version | | :--- | :--- | -| Golang | 1.21 or later | +| Golang | 1.20 or later | | Rust | nightly-2022-07-31 or later | | GCC | 7.x | | LLVM | 13.0 or later | diff --git a/pd-control.md b/pd-control.md index 53eb4a9321d6d..20e391c4b9852 100644 --- a/pd-control.md +++ b/pd-control.md @@ -33,7 +33,7 @@ To obtain `pd-ctl` of the latest version, download the TiDB server installation ### Compile from source code -1. [Go](https://golang.org/) 1.21 or later is required because the Go modules are used. +1. [Go](https://golang.org/) 1.20 or later is required because the Go modules are used. 2. In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make` or `make pd-ctl` command to compile and generate `bin/pd-ctl`. ## Usage diff --git a/pd-recover.md b/pd-recover.md index 24b3bd3049d80..1910e6c46ba24 100644 --- a/pd-recover.md +++ b/pd-recover.md @@ -10,7 +10,7 @@ PD Recover is a disaster recovery tool of PD, used to recover the PD cluster whi ## Compile from source code -+ [Go](https://golang.org/) 1.21 or later is required because the Go modules are used. ++ [Go](https://golang.org/) 1.20 or later is required because the Go modules are used. + In the root directory of the [PD project](https://github.com/pingcap/pd), use the `make pd-recover` command to compile and generate `bin/pd-recover`. > **Note:** diff --git a/tidb-control.md b/tidb-control.md index 2c79e6ff347f4..921a27a761ded 100644 --- a/tidb-control.md +++ b/tidb-control.md @@ -26,7 +26,7 @@ After installing TiUP, you can use `tiup ctl:v tidb` command to ### Compile from source code -- Compilation environment requirement: [Go](https://golang.org/) 1.21 or later +- Compilation environment requirement: [Go](https://golang.org/) 1.20 or later - Compilation procedures: Go to the root directory of the [TiDB Control project](https://github.com/pingcap/tidb-ctl), use the `make` command to compile, and generate `tidb-ctl`. - Compilation documentation: you can find the help files in the `doc` directory; if the help files are lost or you want to update them, use the `make doc` command to generate the help files. From 9e88a6e364585f84a6176eb2dabb9c3a6e01b6f5 Mon Sep 17 00:00:00 2001 From: xixirangrang <35301108+hfxsd@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:14:53 +0800 Subject: [PATCH 07/18] add encoding='utf-8 for windows --- scripts/release_notes_update_pr_author_info_add_dup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_notes_update_pr_author_info_add_dup.py b/scripts/release_notes_update_pr_author_info_add_dup.py index 92039bf89380d..22f7c0cf56a49 100644 --- a/scripts/release_notes_update_pr_author_info_add_dup.py +++ b/scripts/release_notes_update_pr_author_info_add_dup.py @@ -174,7 +174,7 @@ def create_release_file(version, dup_notes_levels, dup_notes): release_file = os.path.join(ext_path, f'release-{version}.md') shutil.copyfile(template_file, release_file) # Replace the file content - with open(release_file, 'r+') as file: + with open(release_file, 'r+', encoding='utf-8') as file: content = file.read() content = content.replace('x.y.z', version) version_parts = version.split('.') From e25b4afaf0a5c4f98a688d4aecd6409c53298579 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Mon, 23 Dec 2024 12:10:58 +0800 Subject: [PATCH 08/18] Merge remote-tracking branch 'upstream/master' From 0c77222e5336b8a430a08f74d17f5bfbd3d5d8fe Mon Sep 17 00:00:00 2001 From: houfaxin Date: Tue, 21 Jan 2025 18:09:45 +0800 Subject: [PATCH 09/18] Merge remote-tracking branch 'upstream/master' From e6edea0666f177a89b4a32842a16bbd2f4632d72 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Thu, 7 Aug 2025 11:46:59 +0800 Subject: [PATCH 10/18] Update tidb-performance-tuning-config.md --- tidb-performance-tuning-config.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tidb-performance-tuning-config.md b/tidb-performance-tuning-config.md index ed1c5fab3f63a..6bb43748c1ba8 100644 --- a/tidb-performance-tuning-config.md +++ b/tidb-performance-tuning-config.md @@ -272,8 +272,8 @@ The following table compares throughput (operations per second) between the base | Item | Baseline (OPS) | Optimized (OPS) | Improvement | | ---------| ---- | ----| ----| -| load data | 2858.5 | 5074.3 | +77.59% | -| workloada | 2243.0 | 12804.3 | +470.86% | +| Load data | 2858.5 | 5074.3 | +77.59% | +| Workload | 2243.0 | 12804.3 | +470.86% | #### Performance analysis @@ -461,7 +461,7 @@ You can control the execution mode of DML statements using the [`tidb_dml_type`] To use the bulk DML execution mode, set `tidb_dml_type` to `"bulk"`. This mode optimizes bulk data loading without conflicts and reduces memory usage during large write operations. Before using this mode, ensure that: -- Auto-commit is enabled. +- Enable [`autocommit`](/system-variables.md#autocommit). - The [`pessimistic-auto-commit`](/tidb-configuration-file.md#pessimistic-auto-commit-new-in-v600) configuration item is set to `false`. ```sql From 15bf44e8888023ee0385181bd3d94f61b7cedfd9 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Fri, 24 Apr 2026 15:06:48 +0800 Subject: [PATCH 11/18] Weekly TiDB PR Doc Check --- .../workflows/tidb-pr-weekly-doc-check.yml | 102 ++++++ .../check_tidb_prs_and_create_docs_cn_pr.py | 309 ++++++++++++++++++ 2 files changed, 411 insertions(+) create mode 100644 .github/workflows/tidb-pr-weekly-doc-check.yml create mode 100644 scripts/check_tidb_prs_and_create_docs_cn_pr.py diff --git a/.github/workflows/tidb-pr-weekly-doc-check.yml b/.github/workflows/tidb-pr-weekly-doc-check.yml new file mode 100644 index 0000000000000..1a2db56ec3563 --- /dev/null +++ b/.github/workflows/tidb-pr-weekly-doc-check.yml @@ -0,0 +1,102 @@ +name: Weekly TiDB PR Doc Check (docs-cn) + +on: + schedule: + # 01:00 every Monday in Asia/Shanghai (UTC+8) => 17:00 every Sunday UTC + - cron: "0 17 * * 0" + workflow_dispatch: + +jobs: + weekly-check: + if: github.repository == 'pingcap/docs' + runs-on: ubuntu-latest + + permissions: + contents: read + + env: + SOURCE_REPO: pingcap/tidb + OUTPUT_DIR: tmp/tidb-doc-check + DOCS_CN_BASE_BRANCH: master + + steps: + - name: Checkout docs repo + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Scan merged TiDB PRs in last weekly window + id: scan + env: + GITHUB_TOKEN: ${{ secrets.DOCS_CN_BOT_TOKEN || github.token }} + SOURCE_REPO: ${{ env.SOURCE_REPO }} + OUTPUT_DIR: ${{ env.OUTPUT_DIR }} + DOCS_CN_BASE_BRANCH: ${{ env.DOCS_CN_BASE_BRANCH }} + run: | + set -euo pipefail + python scripts/check_tidb_prs_and_create_docs_cn_pr.py + + - name: Skip when no docs updates are needed + if: steps.scan.outputs.needs_update != 'true' + run: | + echo "No doc-impact PRs found in this weekly window." + + - name: Checkout docs-cn repo + if: steps.scan.outputs.needs_update == 'true' + uses: actions/checkout@v4 + with: + repository: pingcap/docs-cn + token: ${{ secrets.DOCS_CN_BOT_TOKEN }} + ref: ${{ steps.scan.outputs.docs_cn_base_branch }} + path: docs-cn + persist-credentials: false + + - name: Copy weekly report into docs-cn + if: steps.scan.outputs.needs_update == 'true' + shell: bash + run: | + set -euo pipefail + mkdir -p docs-cn/weekly-doc-sync + cp "${{ steps.scan.outputs.report_path }}" "docs-cn/weekly-doc-sync/${{ steps.scan.outputs.report_filename }}" + + - name: Create docs-cn PR + if: steps.scan.outputs.needs_update == 'true' + uses: peter-evans/create-pull-request@v7 + with: + path: docs-cn + token: ${{ secrets.DOCS_CN_BOT_TOKEN }} + branch: ${{ steps.scan.outputs.branch_name }} + base: ${{ steps.scan.outputs.docs_cn_base_branch }} + commit-message: "docs: weekly TiDB PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" + title: "docs: weekly TiDB PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" + body: | + ### What is changed, added or deleted? (Required) + + Add a weekly report that checks merged `pingcap/tidb` code PRs from the previous week and identifies PRs that likely require docs updates. + + - Source repo: `${{ env.SOURCE_REPO }}` + - Time window (Asia/Shanghai): `${{ steps.scan.outputs.window_start_date }} 00:00` to `${{ steps.scan.outputs.window_end_date }} 00:00` + - Report file: `weekly-doc-sync/${{ steps.scan.outputs.report_filename }}` + + This report is heuristic-based and requires maintainer confirmation before making detailed docs edits. + + ### Which TiDB version(s) do your changes apply to? (Required) + + - [x] master + + ### What is the related PR or file link(s)? + + - TiDB merged PR search: https://github.com/pingcap/tidb/pulls?q=sort%3Aupdated-desc+is%3Apr+is%3Amerged + + ### Do your changes match any of the following descriptions? + + - [ ] Delete files + - [ ] Change aliases + - [ ] Need modification after applied to another branch + - [ ] Might cause conflicts after applied to another branch + add-paths: | + weekly-doc-sync/${{ steps.scan.outputs.report_filename }} + delete-branch: true diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py new file mode 100644 index 0000000000000..27ea4704d8298 --- /dev/null +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +"""Weekly checker for merged TiDB PRs that might require docs updates. + +This script: +1. Collects merged PRs in pingcap/tidb during the previous Monday-to-Monday window + in Asia/Shanghai timezone. +2. Uses lightweight heuristics to decide whether a PR likely needs docs updates. +3. Writes a markdown report and json summary for downstream CI steps. +4. Exposes outputs for GitHub Actions via GITHUB_OUTPUT. +""" + +from __future__ import annotations + +import datetime as dt +import json +import os +import pathlib +import urllib.parse +import urllib.request +from typing import Dict, List, Tuple + + +SOURCE_REPO = os.environ.get("SOURCE_REPO", "pingcap/tidb") +OUTPUT_DIR = pathlib.Path(os.environ.get("OUTPUT_DIR", "tmp/tidb-doc-check")).resolve() +DOCS_CN_BASE_BRANCH = os.environ.get("DOCS_CN_BASE_BRANCH", "master") +TOKEN = os.environ.get("GITHUB_TOKEN", "").strip() + + +POSITIVE_LABELS = { + "type/compatibility", + "type/compatibility or feature change", + "type/feature", + "type/enhancement", + "release-note", +} + +NEGATIVE_LABELS = { + "type/ci", + "type/chore", + "type/refactor", + "type/test", + "type/build", +} + +POSITIVE_KEYWORDS = [ + "compatibility", + "deprecate", + "deprecated", + "new feature", + "sql", + "syntax", + "default value", + "system variable", + "configuration", + "config", + "api", + "planner", + "optimizer", + "ddl", +] + +WATCH_PATH_PREFIXES = [ + "pkg/sessionctx/variable/", + "pkg/config/", + "pkg/parser/", + "pkg/ddl/", + "pkg/planner/", + "pkg/executor/", + "br/", + "lightning/", + "dumpling/", +] + + +def gh_api_json(url: str) -> Dict: + headers = { + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "User-Agent": "tidb-doc-weekly-checker", + } + if TOKEN: + headers["Authorization"] = f"Bearer {TOKEN}" + req = urllib.request.Request(url, headers=headers) + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def list_search_results(query: str) -> List[Dict]: + all_items: List[Dict] = [] + page = 1 + while True: + params = urllib.parse.urlencode( + { + "q": query, + "sort": "updated", + "order": "desc", + "per_page": 100, + "page": page, + } + ) + data = gh_api_json(f"https://api.github.com/search/issues?{params}") + items = data.get("items", []) + if not items: + break + all_items.extend(items) + if len(items) < 100: + break + page += 1 + return all_items + + +def list_pr_files(repo: str, number: int) -> List[str]: + files: List[str] = [] + page = 1 + while True: + url = ( + f"https://api.github.com/repos/{repo}/pulls/{number}/files" + f"?per_page=100&page={page}" + ) + data = gh_api_json(url) + if not data: + break + files.extend(item.get("filename", "") for item in data) + if len(data) < 100: + break + page += 1 + return [f for f in files if f] + + +def weekly_window_shanghai(now_utc: dt.datetime) -> Tuple[dt.datetime, dt.datetime]: + utc8 = dt.timezone(dt.timedelta(hours=8)) + now_sh = now_utc.astimezone(utc8) + monday_this_week = (now_sh - dt.timedelta(days=now_sh.weekday())).date() + end_sh = dt.datetime.combine(monday_this_week, dt.time(0, 0), tzinfo=utc8) + start_sh = end_sh - dt.timedelta(days=7) + return start_sh, end_sh + + +def classify_pr(pr: Dict, pr_files: List[str]) -> Tuple[bool, List[str], int]: + score = 0 + reasons: List[str] = [] + + labels = {label.get("name", "").lower() for label in pr.get("labels", [])} + title = (pr.get("title") or "").lower() + body = (pr.get("body") or "").lower() + text = f"{title}\n{body}" + + hit_positive_labels = sorted(POSITIVE_LABELS.intersection(labels)) + hit_negative_labels = sorted(NEGATIVE_LABELS.intersection(labels)) + + if hit_positive_labels: + score += 2 + reasons.append(f"Hit labels: {', '.join(hit_positive_labels)}") + if hit_negative_labels and not hit_positive_labels: + score -= 1 + reasons.append(f"Only maintenance labels: {', '.join(hit_negative_labels)}") + + kw_hits = sorted({kw for kw in POSITIVE_KEYWORDS if kw in text}) + if kw_hits: + score += 1 + reasons.append(f"Keyword hints: {', '.join(kw_hits[:5])}") + + path_hits = sorted( + { + path + for path in pr_files + if any(path.startswith(prefix) for prefix in WATCH_PATH_PREFIXES) + } + ) + if path_hits: + score += 1 + reasons.append(f"Core behavior paths touched (count={len(path_hits)})") + + only_tests_or_tools = bool(pr_files) and all( + path.startswith("tests/") + or path.startswith("pkg/util/") + or path.startswith(".github/") + for path in pr_files + ) + if only_tests_or_tools and not hit_positive_labels: + score -= 1 + reasons.append("Files look test/tooling-only") + + needs_docs_update = score >= 2 + if not reasons: + reasons.append("No clear doc-impact signal found") + return needs_docs_update, reasons, score + + +def write_github_output(kv: Dict[str, str]) -> None: + output_path = os.environ.get("GITHUB_OUTPUT", "").strip() + if not output_path: + return + with open(output_path, "a", encoding="utf-8") as f: + for k, v in kv.items(): + f.write(f"{k}={v}\n") + + +def main() -> None: + if not TOKEN: + raise SystemExit("GITHUB_TOKEN is required.") + + now_utc = dt.datetime.now(dt.timezone.utc) + start_sh, end_sh = weekly_window_shanghai(now_utc) + start_date = start_sh.date().isoformat() + end_date = end_sh.date().isoformat() + + query = f"repo:{SOURCE_REPO} is:pr is:merged merged:{start_date}..{end_date}" + merged_prs = list_search_results(query) + + results: List[Dict] = [] + needs_update_prs: List[Dict] = [] + for item in merged_prs: + number = item["number"] + pr_detail = gh_api_json(f"https://api.github.com/repos/{SOURCE_REPO}/pulls/{number}") + pr_files = list_pr_files(SOURCE_REPO, number) + + needs_docs_update, reasons, score = classify_pr(pr_detail, pr_files) + row = { + "number": number, + "title": pr_detail.get("title", ""), + "url": pr_detail.get("html_url", ""), + "merged_at": pr_detail.get("merged_at", ""), + "labels": [x.get("name", "") for x in pr_detail.get("labels", [])], + "score": score, + "needs_docs_update": needs_docs_update, + "reasons": reasons, + } + results.append(row) + if needs_docs_update: + needs_update_prs.append(row) + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + window_tag = f"{start_date}_to_{end_date}" + report_filename = f"tidb-weekly-doc-check-{window_tag}.md" + json_filename = f"tidb-weekly-doc-check-{window_tag}.json" + + report_path = OUTPUT_DIR / report_filename + json_path = OUTPUT_DIR / json_filename + + lines: List[str] = [] + lines.append("# TiDB weekly merged PR doc-impact check") + lines.append("") + lines.append(f"- Source repo: `{SOURCE_REPO}`") + lines.append(f"- Time window (Asia/Shanghai): `{start_date} 00:00` to `{end_date} 00:00`") + lines.append(f"- Total merged PRs found: `{len(results)}`") + lines.append(f"- PRs judged as docs-update-needed: `{len(needs_update_prs)}`") + lines.append("") + + if needs_update_prs: + lines.append("## PRs that likely need docs updates") + lines.append("") + for pr in needs_update_prs: + lines.append(f"### #{pr['number']} {pr['title']}") + lines.append(f"- PR: {pr['url']}") + lines.append(f"- Merged at: `{pr['merged_at']}`") + lines.append(f"- Labels: `{', '.join(pr['labels']) if pr['labels'] else 'none'}`") + lines.append(f"- Heuristic score: `{pr['score']}`") + lines.append(f"- Reasons: {'; '.join(pr['reasons'])}") + lines.append("") + lines.append("## Suggested next action") + lines.append("") + lines.append("- Confirm each candidate PR and update matching docs pages in `pingcap/docs-cn`.") + lines.append("- This report is heuristic-based and should be reviewed by a maintainer.") + lines.append("") + else: + lines.append("## Result") + lines.append("") + lines.append("No PR reached the docs-update threshold in this window.") + lines.append("") + + report_path.write_text("\n".join(lines), encoding="utf-8") + + json_payload = { + "source_repo": SOURCE_REPO, + "time_window": { + "timezone": "Asia/Shanghai", + "start": start_sh.isoformat(), + "end": end_sh.isoformat(), + }, + "total_merged_prs": len(results), + "docs_update_needed_count": len(needs_update_prs), + "pull_requests": results, + } + json_path.write_text(json.dumps(json_payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + branch_tag = end_date.replace("-", "") + branch_name = f"weekly/tidb-doc-check-{branch_tag}" + + write_github_output( + { + "needs_update": "true" if needs_update_prs else "false", + "report_path": str(report_path), + "json_path": str(json_path), + "report_filename": report_filename, + "branch_name": branch_name, + "docs_cn_base_branch": DOCS_CN_BASE_BRANCH, + "window_start_date": start_date, + "window_end_date": end_date, + } + ) + + print(f"Report: {report_path}") + print(f"Summary JSON: {json_path}") + print(f"Needs update: {'yes' if needs_update_prs else 'no'}") + + +if __name__ == "__main__": + main() From dd28a15674cbb773908e2adbd8b6ece4b23881e8 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 11:56:16 +0800 Subject: [PATCH 12/18] Update check_tidb_prs_and_create_docs_cn_pr.py --- .../check_tidb_prs_and_create_docs_cn_pr.py | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 27ea4704d8298..28fca63913e64 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -15,9 +15,11 @@ import json import os import pathlib +import re +import urllib.error import urllib.parse import urllib.request -from typing import Dict, List, Tuple +from typing import Dict, List, Pattern, Tuple SOURCE_REPO = os.environ.get("SOURCE_REPO", "pingcap/tidb") @@ -44,7 +46,6 @@ POSITIVE_KEYWORDS = [ "compatibility", - "deprecate", "deprecated", "new feature", "sql", @@ -52,7 +53,6 @@ "default value", "system variable", "configuration", - "config", "api", "planner", "optimizer", @@ -81,8 +81,14 @@ def gh_api_json(url: str) -> Dict: if TOKEN: headers["Authorization"] = f"Bearer {TOKEN}" req = urllib.request.Request(url, headers=headers) - with urllib.request.urlopen(req, timeout=30) as resp: - return json.loads(resp.read().decode("utf-8")) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + detail = exc.read().decode("utf-8", errors="replace") + raise RuntimeError(f"GitHub API HTTP error {exc.code} for {url}: {detail}") from exc + except urllib.error.URLError as exc: + raise RuntimeError(f"GitHub API network error for {url}: {exc.reason}") from exc def list_search_results(query: str) -> List[Dict]: @@ -136,6 +142,28 @@ def weekly_window_shanghai(now_utc: dt.datetime) -> Tuple[dt.datetime, dt.dateti return start_sh, end_sh +def format_iso8601_with_colon_offset(ts: dt.datetime) -> str: + return ts.isoformat(timespec="seconds") + + +def parse_merged_at(merged_at: str) -> dt.datetime: + if merged_at.endswith("Z"): + merged_at = merged_at.replace("Z", "+00:00") + return dt.datetime.fromisoformat(merged_at) + + +def build_keyword_patterns(keywords: List[str]) -> List[Tuple[str, Pattern[str]]]: + patterns: List[Tuple[str, Pattern[str]]] = [] + for keyword in keywords: + escaped = re.escape(keyword).replace(r"\ ", r"\s+") + pattern = re.compile(rf"\b{escaped}\b") + patterns.append((keyword, pattern)) + return patterns + + +KEYWORD_PATTERNS = build_keyword_patterns(POSITIVE_KEYWORDS) + + def classify_pr(pr: Dict, pr_files: List[str]) -> Tuple[bool, List[str], int]: score = 0 reasons: List[str] = [] @@ -155,7 +183,7 @@ def classify_pr(pr: Dict, pr_files: List[str]) -> Tuple[bool, List[str], int]: score -= 1 reasons.append(f"Only maintenance labels: {', '.join(hit_negative_labels)}") - kw_hits = sorted({kw for kw in POSITIVE_KEYWORDS if kw in text}) + kw_hits = sorted({keyword for keyword, pattern in KEYWORD_PATTERNS if pattern.search(text)}) if kw_hits: score += 1 reasons.append(f"Keyword hints: {', '.join(kw_hits[:5])}") @@ -204,8 +232,10 @@ def main() -> None: start_sh, end_sh = weekly_window_shanghai(now_utc) start_date = start_sh.date().isoformat() end_date = end_sh.date().isoformat() + start_iso = format_iso8601_with_colon_offset(start_sh) + end_iso = format_iso8601_with_colon_offset(end_sh) - query = f"repo:{SOURCE_REPO} is:pr is:merged merged:{start_date}..{end_date}" + query = f"repo:{SOURCE_REPO} is:pr is:merged merged:{start_iso}..{end_iso}" merged_prs = list_search_results(query) results: List[Dict] = [] @@ -213,6 +243,12 @@ def main() -> None: for item in merged_prs: number = item["number"] pr_detail = gh_api_json(f"https://api.github.com/repos/{SOURCE_REPO}/pulls/{number}") + merged_at_raw = pr_detail.get("merged_at", "") + if not merged_at_raw: + continue + merged_at = parse_merged_at(merged_at_raw).astimezone(start_sh.tzinfo) + if not (start_sh <= merged_at < end_sh): + continue pr_files = list_pr_files(SOURCE_REPO, number) needs_docs_update, reasons, score = classify_pr(pr_detail, pr_files) From 3cb57ae5d21681c83a3a74bfe834b70c3e390758 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 14:32:50 +0800 Subject: [PATCH 13/18] Update check_tidb_prs_and_create_docs_cn_pr.py --- .../check_tidb_prs_and_create_docs_cn_pr.py | 107 ++++++++++++------ 1 file changed, 72 insertions(+), 35 deletions(-) diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 28fca63913e64..698fffde0dfe2 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -"""Weekly checker for merged TiDB PRs that might require docs updates. +"""Weekly checker for merged PingCAP code PRs that might require docs updates. This script: -1. Collects merged PRs in pingcap/tidb during the previous Monday-to-Monday window - in Asia/Shanghai timezone. +1. Collects merged PRs in PingCAP source repositories during the previous + Monday-to-Monday window in Asia/Shanghai timezone. 2. Uses lightweight heuristics to decide whether a PR likely needs docs updates. 3. Writes a markdown report and json summary for downstream CI steps. 4. Exposes outputs for GitHub Actions via GITHUB_OUTPUT. @@ -22,7 +22,10 @@ from typing import Dict, List, Pattern, Tuple -SOURCE_REPO = os.environ.get("SOURCE_REPO", "pingcap/tidb") +SOURCE_ORG = os.environ.get("SOURCE_ORG", "pingcap") +EXCLUDED_REPOS = { + item.strip() for item in os.environ.get("EXCLUDED_REPOS", "pingcap/docs,pingcap/docs-cn").split(",") if item.strip() +} OUTPUT_DIR = pathlib.Path(os.environ.get("OUTPUT_DIR", "tmp/tidb-doc-check")).resolve() DOCS_CN_BASE_BRANCH = os.environ.get("DOCS_CN_BASE_BRANCH", "master") TOKEN = os.environ.get("GITHUB_TOKEN", "").strip() @@ -115,6 +118,34 @@ def list_search_results(query: str) -> List[Dict]: return all_items +def list_source_repos(org: str) -> List[str]: + repos: List[str] = [] + page = 1 + while True: + url = ( + f"https://api.github.com/orgs/{org}/repos" + f"?type=public&sort=updated&per_page=100&page={page}" + ) + data = gh_api_json(url) + if not data: + break + for repo in data: + full_name = repo.get("full_name", "") + if not full_name: + continue + if repo.get("fork", False): + continue + if repo.get("archived", False) or repo.get("disabled", False): + continue + if full_name in EXCLUDED_REPOS: + continue + repos.append(full_name) + if len(data) < 100: + break + page += 1 + return sorted(set(repos)) + + def list_pr_files(repo: str, number: int) -> List[str]: files: List[str] = [] page = 1 @@ -235,36 +266,38 @@ def main() -> None: start_iso = format_iso8601_with_colon_offset(start_sh) end_iso = format_iso8601_with_colon_offset(end_sh) - query = f"repo:{SOURCE_REPO} is:pr is:merged merged:{start_iso}..{end_iso}" - merged_prs = list_search_results(query) - results: List[Dict] = [] needs_update_prs: List[Dict] = [] - for item in merged_prs: - number = item["number"] - pr_detail = gh_api_json(f"https://api.github.com/repos/{SOURCE_REPO}/pulls/{number}") - merged_at_raw = pr_detail.get("merged_at", "") - if not merged_at_raw: - continue - merged_at = parse_merged_at(merged_at_raw).astimezone(start_sh.tzinfo) - if not (start_sh <= merged_at < end_sh): - continue - pr_files = list_pr_files(SOURCE_REPO, number) - - needs_docs_update, reasons, score = classify_pr(pr_detail, pr_files) - row = { - "number": number, - "title": pr_detail.get("title", ""), - "url": pr_detail.get("html_url", ""), - "merged_at": pr_detail.get("merged_at", ""), - "labels": [x.get("name", "") for x in pr_detail.get("labels", [])], - "score": score, - "needs_docs_update": needs_docs_update, - "reasons": reasons, - } - results.append(row) - if needs_docs_update: - needs_update_prs.append(row) + source_repos = list_source_repos(SOURCE_ORG) + for source_repo in source_repos: + query = f"repo:{source_repo} is:pr is:merged merged:{start_iso}..{end_iso}" + merged_prs = list_search_results(query) + for item in merged_prs: + number = item["number"] + pr_detail = gh_api_json(f"https://api.github.com/repos/{source_repo}/pulls/{number}") + merged_at_raw = pr_detail.get("merged_at", "") + if not merged_at_raw: + continue + merged_at = parse_merged_at(merged_at_raw).astimezone(start_sh.tzinfo) + if not (start_sh <= merged_at < end_sh): + continue + pr_files = list_pr_files(source_repo, number) + + needs_docs_update, reasons, score = classify_pr(pr_detail, pr_files) + row = { + "repo": source_repo, + "number": number, + "title": pr_detail.get("title", ""), + "url": pr_detail.get("html_url", ""), + "merged_at": pr_detail.get("merged_at", ""), + "labels": [x.get("name", "") for x in pr_detail.get("labels", [])], + "score": score, + "needs_docs_update": needs_docs_update, + "reasons": reasons, + } + results.append(row) + if needs_docs_update: + needs_update_prs.append(row) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) window_tag = f"{start_date}_to_{end_date}" @@ -277,7 +310,9 @@ def main() -> None: lines: List[str] = [] lines.append("# TiDB weekly merged PR doc-impact check") lines.append("") - lines.append(f"- Source repo: `{SOURCE_REPO}`") + lines.append(f"- Source org: `{SOURCE_ORG}`") + lines.append(f"- Repositories scanned: `{len(source_repos)}`") + lines.append(f"- Excluded repositories: `{', '.join(sorted(EXCLUDED_REPOS))}`") lines.append(f"- Time window (Asia/Shanghai): `{start_date} 00:00` to `{end_date} 00:00`") lines.append(f"- Total merged PRs found: `{len(results)}`") lines.append(f"- PRs judged as docs-update-needed: `{len(needs_update_prs)}`") @@ -287,7 +322,7 @@ def main() -> None: lines.append("## PRs that likely need docs updates") lines.append("") for pr in needs_update_prs: - lines.append(f"### #{pr['number']} {pr['title']}") + lines.append(f"### {pr['repo']}#{pr['number']} {pr['title']}") lines.append(f"- PR: {pr['url']}") lines.append(f"- Merged at: `{pr['merged_at']}`") lines.append(f"- Labels: `{', '.join(pr['labels']) if pr['labels'] else 'none'}`") @@ -308,7 +343,9 @@ def main() -> None: report_path.write_text("\n".join(lines), encoding="utf-8") json_payload = { - "source_repo": SOURCE_REPO, + "source_org": SOURCE_ORG, + "scanned_repositories": source_repos, + "excluded_repositories": sorted(EXCLUDED_REPOS), "time_window": { "timezone": "Asia/Shanghai", "start": start_sh.isoformat(), From 970b272525fe9fc4c4e08f6dcc1886ac60164787 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 14:33:10 +0800 Subject: [PATCH 14/18] Generalize weekly docs check to PingCAP org Update the weekly docs-check workflow to scan merged code PRs across the pingcap organization instead of only pingcap/tidb. Introduce SOURCE_ORG and EXCLUDED_REPOS env vars (defaulting to pingcap/docs and pingcap/docs-cn), update job/name strings, commit/title templates, and the PR body/link to reflect the org-wide scan and excluded repos. --- .../workflows/tidb-pr-weekly-doc-check.yml | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/tidb-pr-weekly-doc-check.yml b/.github/workflows/tidb-pr-weekly-doc-check.yml index 1a2db56ec3563..d817e35c8bff8 100644 --- a/.github/workflows/tidb-pr-weekly-doc-check.yml +++ b/.github/workflows/tidb-pr-weekly-doc-check.yml @@ -1,4 +1,4 @@ -name: Weekly TiDB PR Doc Check (docs-cn) +name: Weekly PingCAP Code PR Doc Check (docs-cn) on: schedule: @@ -15,7 +15,8 @@ jobs: contents: read env: - SOURCE_REPO: pingcap/tidb + SOURCE_ORG: pingcap + EXCLUDED_REPOS: pingcap/docs,pingcap/docs-cn OUTPUT_DIR: tmp/tidb-doc-check DOCS_CN_BASE_BRANCH: master @@ -28,11 +29,12 @@ jobs: with: python-version: "3.11" - - name: Scan merged TiDB PRs in last weekly window + - name: Scan merged PingCAP code PRs in last weekly window id: scan env: GITHUB_TOKEN: ${{ secrets.DOCS_CN_BOT_TOKEN || github.token }} - SOURCE_REPO: ${{ env.SOURCE_REPO }} + SOURCE_ORG: ${{ env.SOURCE_ORG }} + EXCLUDED_REPOS: ${{ env.EXCLUDED_REPOS }} OUTPUT_DIR: ${{ env.OUTPUT_DIR }} DOCS_CN_BASE_BRANCH: ${{ env.DOCS_CN_BASE_BRANCH }} run: | @@ -70,14 +72,15 @@ jobs: token: ${{ secrets.DOCS_CN_BOT_TOKEN }} branch: ${{ steps.scan.outputs.branch_name }} base: ${{ steps.scan.outputs.docs_cn_base_branch }} - commit-message: "docs: weekly TiDB PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" - title: "docs: weekly TiDB PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" + commit-message: "docs: weekly PingCAP code PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" + title: "docs: weekly PingCAP code PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" body: | ### What is changed, added or deleted? (Required) - Add a weekly report that checks merged `pingcap/tidb` code PRs from the previous week and identifies PRs that likely require docs updates. + Add a weekly report that checks merged code PRs across PingCAP source repositories (excluding documentation repositories) and identifies PRs that likely require docs updates. - - Source repo: `${{ env.SOURCE_REPO }}` + - Source org: `${{ env.SOURCE_ORG }}` + - Excluded repositories: `${{ env.EXCLUDED_REPOS }}` - Time window (Asia/Shanghai): `${{ steps.scan.outputs.window_start_date }} 00:00` to `${{ steps.scan.outputs.window_end_date }} 00:00` - Report file: `weekly-doc-sync/${{ steps.scan.outputs.report_filename }}` @@ -89,7 +92,7 @@ jobs: ### What is the related PR or file link(s)? - - TiDB merged PR search: https://github.com/pingcap/tidb/pulls?q=sort%3Aupdated-desc+is%3Apr+is%3Amerged + - PingCAP repositories: https://github.com/orgs/pingcap/repositories?type=source ### Do your changes match any of the following descriptions? From 36dea3eae59d14fb1d036ee19b6dd33f84b56015 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 14:33:13 +0800 Subject: [PATCH 15/18] Update check_tidb_prs_and_create_docs_cn_pr.py --- scripts/check_tidb_prs_and_create_docs_cn_pr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 698fffde0dfe2..0b249730722a4 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -124,7 +124,7 @@ def list_source_repos(org: str) -> List[str]: while True: url = ( f"https://api.github.com/orgs/{org}/repos" - f"?type=public&sort=updated&per_page=100&page={page}" + f"?type=all&sort=updated&per_page=100&page={page}" ) data = gh_api_json(url) if not data: From 4934edcd9cb58993e5afea952a8b1307c9943c14 Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 14:42:13 +0800 Subject: [PATCH 16/18] Auto-apply weekly docs-cn updates Add automation to apply heuristic weekly doc updates to docs-cn. - Add scripts/apply_weekly_docs_cn_updates.py: reads the scan JSON, maps PRs/paths to target docs, and appends a "weekly code sync" section to matched docs; writes a summary to docs-cn/weekly-doc-sync/applied-doc-updates.json. - Update workflow .github/workflows/tidb-pr-weekly-doc-check.yml to copy the scan JSON, run the new script when updates are needed, and include the update summary in the created PR (also adjust PR copy text). - Update scripts/check_tidb_prs_and_create_docs_cn_pr.py to include changed_files in the PR payload so the apply script can resolve target docs. This automates applying simple, heuristic doc notes from the weekly PR scan and generates a summary for maintainers to review. --- .../workflows/tidb-pr-weekly-doc-check.yml | 15 ++- scripts/apply_weekly_docs_cn_updates.py | 126 ++++++++++++++++++ .../check_tidb_prs_and_create_docs_cn_pr.py | 1 + 3 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 scripts/apply_weekly_docs_cn_updates.py diff --git a/.github/workflows/tidb-pr-weekly-doc-check.yml b/.github/workflows/tidb-pr-weekly-doc-check.yml index d817e35c8bff8..07a54649fd3b9 100644 --- a/.github/workflows/tidb-pr-weekly-doc-check.yml +++ b/.github/workflows/tidb-pr-weekly-doc-check.yml @@ -63,6 +63,16 @@ jobs: set -euo pipefail mkdir -p docs-cn/weekly-doc-sync cp "${{ steps.scan.outputs.report_path }}" "docs-cn/weekly-doc-sync/${{ steps.scan.outputs.report_filename }}" + cp "${{ steps.scan.outputs.json_path }}" "docs-cn/weekly-doc-sync/" + + - name: Apply concrete docs updates in docs-cn + if: steps.scan.outputs.needs_update == 'true' + shell: bash + run: | + set -euo pipefail + python scripts/apply_weekly_docs_cn_updates.py \ + --report-json "${{ steps.scan.outputs.json_path }}" \ + --docs-cn-dir "docs-cn" - name: Create docs-cn PR if: steps.scan.outputs.needs_update == 'true' @@ -83,8 +93,9 @@ jobs: - Excluded repositories: `${{ env.EXCLUDED_REPOS }}` - Time window (Asia/Shanghai): `${{ steps.scan.outputs.window_start_date }} 00:00` to `${{ steps.scan.outputs.window_end_date }} 00:00` - Report file: `weekly-doc-sync/${{ steps.scan.outputs.report_filename }}` + - Update summary: `weekly-doc-sync/applied-doc-updates.json` - This report is heuristic-based and requires maintainer confirmation before making detailed docs edits. + This PR includes heuristic-based direct doc edits. Please review technical accuracy before merging. ### Which TiDB version(s) do your changes apply to? (Required) @@ -100,6 +111,4 @@ jobs: - [ ] Change aliases - [ ] Need modification after applied to another branch - [ ] Might cause conflicts after applied to another branch - add-paths: | - weekly-doc-sync/${{ steps.scan.outputs.report_filename }} delete-branch: true diff --git a/scripts/apply_weekly_docs_cn_updates.py b/scripts/apply_weekly_docs_cn_updates.py new file mode 100644 index 0000000000000..a0f5e442c6231 --- /dev/null +++ b/scripts/apply_weekly_docs_cn_updates.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +"""Apply concrete docs-cn updates from weekly code PR scan results. + +This script reads the JSON produced by check_tidb_prs_and_create_docs_cn_pr.py +and directly updates matched docs files in docs-cn by appending a short +"weekly code sync" section for each impacted page. +""" + +from __future__ import annotations + +import argparse +import json +import pathlib +from collections import defaultdict +from typing import Dict, List + + +PATH_TO_DOCS = { + "pkg/sessionctx/variable/": ["system-variables.md"], + "pkg/config/": ["tidb-configuration-file.md"], + "pkg/parser/": ["sql-statements/sql-statement-overview.md"], + "pkg/ddl/": ["ddl-introduction.md"], + "pkg/planner/": ["sql-optimization-concepts.md"], + "pkg/executor/": ["sql-optimization-concepts.md"], + "br/": ["br/backup-and-restore-overview.md"], + "lightning/": ["tidb-lightning/tidb-lightning-overview.md"], + "dumpling/": ["dumpling-overview.md"], +} + +REPO_TO_DOCS = { + "pingcap/tiflow": ["ticdc/ticdc-overview.md"], + "pingcap/tikv": ["tikv-overview.md"], + "pingcap/pd": ["pd-overview.md"], + "pingcap/tidb-binlog": ["tidb-binlog/tidb-binlog-overview.md"], + "pingcap/tiup": ["tiup/tiup-overview.md"], +} + + +def resolve_target_docs(repo: str, changed_files: List[str]) -> List[str]: + targets = set(REPO_TO_DOCS.get(repo, [])) + for file_path in changed_files: + for prefix, docs_list in PATH_TO_DOCS.items(): + if file_path.startswith(prefix): + targets.update(docs_list) + return sorted(targets) + + +def append_section(file_path: pathlib.Path, section_title: str, lines: List[str]) -> bool: + if not file_path.exists(): + return False + + content = file_path.read_text(encoding="utf-8") + marker = f"" + if marker in content: + return False + + block = [""] + block.append(marker) + block.append(f"## {section_title}") + block.append("") + block.extend(lines) + block.append("") + file_path.write_text(content.rstrip() + "\n" + "\n".join(block), encoding="utf-8") + return True + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--report-json", required=True) + parser.add_argument("--docs-cn-dir", required=True) + args = parser.parse_args() + + report_json = pathlib.Path(args.report_json).resolve() + docs_cn_dir = pathlib.Path(args.docs_cn_dir).resolve() + payload = json.loads(report_json.read_text(encoding="utf-8")) + + start = payload["time_window"]["start"][:10] + end = payload["time_window"]["end"][:10] + section_title = f"每周代码变更同步({start} 到 {end})" + + doc_to_items: Dict[str, List[Dict]] = defaultdict(list) + for pr in payload.get("pull_requests", []): + if not pr.get("needs_docs_update"): + continue + targets = resolve_target_docs(pr.get("repo", ""), pr.get("changed_files", [])) + for target in targets: + doc_to_items[target].append(pr) + + changed_files: List[str] = [] + missing_files: List[str] = [] + for rel_path, items in sorted(doc_to_items.items()): + abs_path = docs_cn_dir / rel_path + lines = [] + for item in items: + lines.append( + f"- [{item['repo']}#{item['number']}]({item['url']}): {item['title']}" + ) + ok = append_section(abs_path, section_title, lines) + if ok: + changed_files.append(rel_path) + elif not abs_path.exists(): + missing_files.append(rel_path) + + print("Changed docs files:") + for path in changed_files: + print(f"- {path}") + + if missing_files: + print("Missing mapped files:") + for path in sorted(set(missing_files)): + print(f"- {path}") + + summary = { + "changed_docs_files": changed_files, + "missing_mapped_files": sorted(set(missing_files)), + "mapped_docs_count": len(doc_to_items), + } + (docs_cn_dir / "weekly-doc-sync").mkdir(parents=True, exist_ok=True) + (docs_cn_dir / "weekly-doc-sync" / "applied-doc-updates.json").write_text( + json.dumps(summary, ensure_ascii=False, indent=2) + "\n", + encoding="utf-8", + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 0b249730722a4..563d85aab5900 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -291,6 +291,7 @@ def main() -> None: "url": pr_detail.get("html_url", ""), "merged_at": pr_detail.get("merged_at", ""), "labels": [x.get("name", "") for x in pr_detail.get("labels", [])], + "changed_files": pr_files, "score": score, "needs_docs_update": needs_docs_update, "reasons": reasons, From 86af603324d75c0d9177ef55abf3fefa5261ef6e Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 15:02:17 +0800 Subject: [PATCH 17/18] Add scan job and per-PR create jobs Refactor the weekly doc-check workflow into a scan job that discovers candidate PRs and a downstream create-pr-per-source job that processes each candidate in parallel. The scan job now emits outputs (needs_update, candidates_count, candidates_matrix, paths, window dates) and uploads a JSON/report artifact; a TARGET_BRANCH_MAP env var is introduced to map source base branches to docs target branches. apply_weekly_docs_cn_updates.py was rewritten to apply updates for a single source PR, add a per-PR marker/note, and write an applied--.json summary. check_tidb_prs_and_create_docs_cn_pr.py now accepts TARGET_BRANCH_MAP, records source_base_branch for PRs, builds a matrix of candidates, and writes candidates_count/candidates_matrix outputs for the workflow. --- .../workflows/tidb-pr-weekly-doc-check.yml | 110 ++++++++++++------ scripts/apply_weekly_docs_cn_updates.py | 85 ++++++-------- .../check_tidb_prs_and_create_docs_cn_pr.py | 38 ++++++ 3 files changed, 149 insertions(+), 84 deletions(-) diff --git a/.github/workflows/tidb-pr-weekly-doc-check.yml b/.github/workflows/tidb-pr-weekly-doc-check.yml index 07a54649fd3b9..6e077560fc821 100644 --- a/.github/workflows/tidb-pr-weekly-doc-check.yml +++ b/.github/workflows/tidb-pr-weekly-doc-check.yml @@ -7,19 +7,25 @@ on: workflow_dispatch: jobs: - weekly-check: + scan: if: github.repository == 'pingcap/docs' runs-on: ubuntu-latest - permissions: contents: read - + outputs: + needs_update: ${{ steps.scan.outputs.needs_update }} + candidates_count: ${{ steps.scan.outputs.candidates_count }} + candidates_matrix: ${{ steps.scan.outputs.candidates_matrix }} + json_path: ${{ steps.scan.outputs.json_path }} + report_path: ${{ steps.scan.outputs.report_path }} + window_start_date: ${{ steps.scan.outputs.window_start_date }} + window_end_date: ${{ steps.scan.outputs.window_end_date }} env: SOURCE_ORG: pingcap EXCLUDED_REPOS: pingcap/docs,pingcap/docs-cn OUTPUT_DIR: tmp/tidb-doc-check DOCS_CN_BASE_BRANCH: master - + TARGET_BRANCH_MAP: '{"master":"master","release-8.5":"release-8.5","release-8.1":"release-8.1","release-7.5":"release-7.5","release-7.1":"release-7.1","release-6.5":"release-6.5","release-6.1":"release-6.1","release-5.4":"release-5.4"}' steps: - name: Checkout docs repo uses: actions/checkout@v4 @@ -37,73 +43,105 @@ jobs: EXCLUDED_REPOS: ${{ env.EXCLUDED_REPOS }} OUTPUT_DIR: ${{ env.OUTPUT_DIR }} DOCS_CN_BASE_BRANCH: ${{ env.DOCS_CN_BASE_BRANCH }} + TARGET_BRANCH_MAP: ${{ env.TARGET_BRANCH_MAP }} run: | set -euo pipefail python scripts/check_tidb_prs_and_create_docs_cn_pr.py - - name: Skip when no docs updates are needed - if: steps.scan.outputs.needs_update != 'true' + - name: Print summary run: | - echo "No doc-impact PRs found in this weekly window." + echo "needs_update=${{ steps.scan.outputs.needs_update }}" + echo "candidates_count=${{ steps.scan.outputs.candidates_count }}" - - name: Checkout docs-cn repo + - name: Upload scan artifacts if: steps.scan.outputs.needs_update == 'true' + uses: actions/upload-artifact@v4 + with: + name: weekly-doc-check-report + path: | + ${{ steps.scan.outputs.json_path }} + ${{ steps.scan.outputs.report_path }} + + create-pr-per-source: + if: needs.scan.outputs.needs_update == 'true' + needs: scan + runs-on: ubuntu-latest + strategy: + fail-fast: false + max-parallel: 4 + matrix: ${{ fromJson(needs.scan.outputs.candidates_matrix) }} + permissions: + contents: read + steps: + - name: Checkout docs repo + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Download scan artifacts + uses: actions/download-artifact@v4 + with: + name: weekly-doc-check-report + path: tmp/tidb-doc-check + + - name: Checkout docs-cn repo target branch uses: actions/checkout@v4 with: repository: pingcap/docs-cn token: ${{ secrets.DOCS_CN_BOT_TOKEN }} - ref: ${{ steps.scan.outputs.docs_cn_base_branch }} + ref: ${{ matrix.target_branch }} path: docs-cn persist-credentials: false - - name: Copy weekly report into docs-cn - if: steps.scan.outputs.needs_update == 'true' - shell: bash + - name: Apply docs-cn updates for one source PR run: | set -euo pipefail - mkdir -p docs-cn/weekly-doc-sync - cp "${{ steps.scan.outputs.report_path }}" "docs-cn/weekly-doc-sync/${{ steps.scan.outputs.report_filename }}" - cp "${{ steps.scan.outputs.json_path }}" "docs-cn/weekly-doc-sync/" + python scripts/apply_weekly_docs_cn_updates.py \ + --report-json "$(ls tmp/tidb-doc-check/*.json | head -1)" \ + --docs-cn-dir "docs-cn" \ + --source-repo "${{ matrix.repo }}" \ + --source-pr-number "${{ matrix.number }}" - - name: Apply concrete docs updates in docs-cn - if: steps.scan.outputs.needs_update == 'true' + - name: Build branch metadata + id: meta shell: bash run: | set -euo pipefail - python scripts/apply_weekly_docs_cn_updates.py \ - --report-json "${{ steps.scan.outputs.json_path }}" \ - --docs-cn-dir "docs-cn" + safe_repo="$(echo "${{ matrix.repo }}" | tr '/' '-')" + echo "safe_repo=${safe_repo}" >> "$GITHUB_OUTPUT" + echo "branch=weekly/doc-sync/${safe_repo}-pr-${{ matrix.number }}-${{ needs.scan.outputs.window_end_date }}" >> "$GITHUB_OUTPUT" - - name: Create docs-cn PR - if: steps.scan.outputs.needs_update == 'true' + - name: Create docs-cn PR for one source PR uses: peter-evans/create-pull-request@v7 with: path: docs-cn token: ${{ secrets.DOCS_CN_BOT_TOKEN }} - branch: ${{ steps.scan.outputs.branch_name }} - base: ${{ steps.scan.outputs.docs_cn_base_branch }} - commit-message: "docs: weekly PingCAP code PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" - title: "docs: weekly PingCAP code PR doc-impact check (${{ steps.scan.outputs.window_start_date }} to ${{ steps.scan.outputs.window_end_date }})" + branch: ${{ steps.meta.outputs.branch }} + base: ${{ matrix.target_branch }} + commit-message: "docs: sync from ${{ matrix.repo }}#${{ matrix.number }}" + title: "docs: sync from ${{ matrix.repo }}#${{ matrix.number }}" body: | ### What is changed, added or deleted? (Required) - Add a weekly report that checks merged code PRs across PingCAP source repositories (excluding documentation repositories) and identifies PRs that likely require docs updates. - - - Source org: `${{ env.SOURCE_ORG }}` - - Excluded repositories: `${{ env.EXCLUDED_REPOS }}` - - Time window (Asia/Shanghai): `${{ steps.scan.outputs.window_start_date }} 00:00` to `${{ steps.scan.outputs.window_end_date }} 00:00` - - Report file: `weekly-doc-sync/${{ steps.scan.outputs.report_filename }}` - - Update summary: `weekly-doc-sync/applied-doc-updates.json` + Sync docs-cn updates for one source code PR. - This PR includes heuristic-based direct doc edits. Please review technical accuracy before merging. + - Source PR: ${{ matrix.url }} + - Source repo: `${{ matrix.repo }}` + - Source PR number: `${{ matrix.number }}` + - Source base branch: `${{ matrix.source_base_branch }}` + - Target docs-cn branch: `${{ matrix.target_branch }}` + - Weekly window (Asia/Shanghai): `${{ needs.scan.outputs.window_start_date }} 00:00` to `${{ needs.scan.outputs.window_end_date }} 00:00` ### Which TiDB version(s) do your changes apply to? (Required) - - [x] master + - [x] ${{ matrix.target_branch }} ### What is the related PR or file link(s)? - - PingCAP repositories: https://github.com/orgs/pingcap/repositories?type=source + - This PR is translated from: ${{ matrix.url }} ### Do your changes match any of the following descriptions? diff --git a/scripts/apply_weekly_docs_cn_updates.py b/scripts/apply_weekly_docs_cn_updates.py index a0f5e442c6231..7fdf3bc8bc7f7 100644 --- a/scripts/apply_weekly_docs_cn_updates.py +++ b/scripts/apply_weekly_docs_cn_updates.py @@ -1,17 +1,11 @@ #!/usr/bin/env python3 -"""Apply concrete docs-cn updates from weekly code PR scan results. - -This script reads the JSON produced by check_tidb_prs_and_create_docs_cn_pr.py -and directly updates matched docs files in docs-cn by appending a short -"weekly code sync" section for each impacted page. -""" +"""Apply docs-cn updates for one source PR candidate.""" from __future__ import annotations import argparse import json import pathlib -from collections import defaultdict from typing import Dict, List @@ -45,81 +39,76 @@ def resolve_target_docs(repo: str, changed_files: List[str]) -> List[str]: return sorted(targets) -def append_section(file_path: pathlib.Path, section_title: str, lines: List[str]) -> bool: +def append_pr_note(file_path: pathlib.Path, marker: str, lines: List[str]) -> bool: if not file_path.exists(): return False - content = file_path.read_text(encoding="utf-8") - marker = f"" if marker in content: return False - - block = [""] - block.append(marker) - block.append(f"## {section_title}") - block.append("") + block = ["", marker, "## Weekly code sync note", ""] block.extend(lines) block.append("") file_path.write_text(content.rstrip() + "\n" + "\n".join(block), encoding="utf-8") return True +def load_candidate(report: Dict, repo: str, number: int) -> Dict: + for pr in report.get("pull_requests", []): + if pr.get("repo") == repo and int(pr.get("number", -1)) == number: + return pr + raise SystemExit(f"Candidate not found in report: {repo}#{number}") + + def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--report-json", required=True) parser.add_argument("--docs-cn-dir", required=True) + parser.add_argument("--source-repo", required=True) + parser.add_argument("--source-pr-number", required=True, type=int) args = parser.parse_args() report_json = pathlib.Path(args.report_json).resolve() docs_cn_dir = pathlib.Path(args.docs_cn_dir).resolve() payload = json.loads(report_json.read_text(encoding="utf-8")) + pr = load_candidate(payload, args.source_repo, args.source_pr_number) - start = payload["time_window"]["start"][:10] - end = payload["time_window"]["end"][:10] - section_title = f"每周代码变更同步({start} 到 {end})" + if not pr.get("needs_docs_update"): + return - doc_to_items: Dict[str, List[Dict]] = defaultdict(list) - for pr in payload.get("pull_requests", []): - if not pr.get("needs_docs_update"): - continue - targets = resolve_target_docs(pr.get("repo", ""), pr.get("changed_files", [])) - for target in targets: - doc_to_items[target].append(pr) + targets = resolve_target_docs(pr.get("repo", ""), pr.get("changed_files", [])) + marker = f"" + note_lines = [ + f"- Source PR: [{pr['repo']}#{pr['number']}]({pr['url']})", + f"- Title: {pr['title']}", + f"- Merged at: `{pr['merged_at']}`", + f"- Reasons: {'; '.join(pr.get('reasons', []))}", + ] changed_files: List[str] = [] missing_files: List[str] = [] - for rel_path, items in sorted(doc_to_items.items()): + for rel_path in targets: abs_path = docs_cn_dir / rel_path - lines = [] - for item in items: - lines.append( - f"- [{item['repo']}#{item['number']}]({item['url']}): {item['title']}" - ) - ok = append_section(abs_path, section_title, lines) - if ok: + changed = append_pr_note(abs_path, marker, note_lines) + if changed: changed_files.append(rel_path) elif not abs_path.exists(): missing_files.append(rel_path) - print("Changed docs files:") - for path in changed_files: - print(f"- {path}") - - if missing_files: - print("Missing mapped files:") - for path in sorted(set(missing_files)): - print(f"- {path}") - + out_dir = docs_cn_dir / "weekly-doc-sync" + out_dir.mkdir(parents=True, exist_ok=True) + summary_path = out_dir / f"applied-{pr['repo'].replace('/', '_')}-{pr['number']}.json" summary = { + "source_repo": pr["repo"], + "source_pr_number": pr["number"], + "target_docs_files": targets, "changed_docs_files": changed_files, "missing_mapped_files": sorted(set(missing_files)), - "mapped_docs_count": len(doc_to_items), } - (docs_cn_dir / "weekly-doc-sync").mkdir(parents=True, exist_ok=True) - (docs_cn_dir / "weekly-doc-sync" / "applied-doc-updates.json").write_text( - json.dumps(summary, ensure_ascii=False, indent=2) + "\n", - encoding="utf-8", - ) + summary_path.write_text(json.dumps(summary, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + print(f"Changed docs files for {pr['repo']}#{pr['number']}:") + for item in changed_files: + print(f"- {item}") if __name__ == "__main__": diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 563d85aab5900..086784b540122 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -29,6 +29,7 @@ OUTPUT_DIR = pathlib.Path(os.environ.get("OUTPUT_DIR", "tmp/tidb-doc-check")).resolve() DOCS_CN_BASE_BRANCH = os.environ.get("DOCS_CN_BASE_BRANCH", "master") TOKEN = os.environ.get("GITHUB_TOKEN", "").strip() +TARGET_BRANCH_MAP_RAW = os.environ.get("TARGET_BRANCH_MAP", "").strip() POSITIVE_LABELS = { @@ -255,10 +256,28 @@ def write_github_output(kv: Dict[str, str]) -> None: f.write(f"{k}={v}\n") +def load_target_branch_map(raw: str) -> Dict[str, str]: + if not raw: + return {} + try: + loaded = json.loads(raw) + except json.JSONDecodeError as exc: + raise SystemExit(f"Invalid TARGET_BRANCH_MAP JSON: {exc}") from exc + if not isinstance(loaded, dict): + raise SystemExit("TARGET_BRANCH_MAP must be a JSON object.") + out: Dict[str, str] = {} + for k, v in loaded.items(): + if isinstance(k, str) and isinstance(v, str) and k and v: + out[k] = v + return out + + def main() -> None: if not TOKEN: raise SystemExit("GITHUB_TOKEN is required.") + target_branch_map = load_target_branch_map(TARGET_BRANCH_MAP_RAW) + now_utc = dt.datetime.now(dt.timezone.utc) start_sh, end_sh = weekly_window_shanghai(now_utc) start_date = start_sh.date().isoformat() @@ -290,6 +309,7 @@ def main() -> None: "title": pr_detail.get("title", ""), "url": pr_detail.get("html_url", ""), "merged_at": pr_detail.get("merged_at", ""), + "source_base_branch": pr_detail.get("base", {}).get("ref", ""), "labels": [x.get("name", "") for x in pr_detail.get("labels", [])], "changed_files": pr_files, "score": score, @@ -360,10 +380,28 @@ def main() -> None: branch_tag = end_date.replace("-", "") branch_name = f"weekly/tidb-doc-check-{branch_tag}" + candidates: List[Dict] = [] + for pr in needs_update_prs: + source_base_branch = pr.get("source_base_branch") or "" + target_branch = target_branch_map.get(source_base_branch, DOCS_CN_BASE_BRANCH) + candidates.append( + { + "repo": pr["repo"], + "number": pr["number"], + "title": pr["title"], + "url": pr["url"], + "merged_at": pr["merged_at"], + "source_base_branch": source_base_branch, + "target_branch": target_branch, + } + ) + matrix_json = json.dumps({"include": candidates}, ensure_ascii=False, separators=(",", ":")) write_github_output( { "needs_update": "true" if needs_update_prs else "false", + "candidates_count": str(len(candidates)), + "candidates_matrix": matrix_json, "report_path": str(report_path), "json_path": str(json_path), "report_filename": report_filename, From 5894c19a6c1477999e17279efce509caaa83ceca Mon Sep 17 00:00:00 2001 From: houfaxin Date: Wed, 6 May 2026 16:47:48 +0800 Subject: [PATCH 18/18] Support EXTRA_REPOS in tidb doc check Add an EXTRA_REPOS env var to the workflow and job so additional repos (default tikv/tikv,tikv/pd) can be passed into the doc-check job. Parse EXTRA_REPOS in the script and merge it into the list of source repositories, then remove any EXCLUDED_REPOS before returning the final sorted repo list. This allows including extra repositories in the weekly TiDB PR documentation checks. --- .github/workflows/tidb-pr-weekly-doc-check.yml | 2 ++ scripts/check_tidb_prs_and_create_docs_cn_pr.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tidb-pr-weekly-doc-check.yml b/.github/workflows/tidb-pr-weekly-doc-check.yml index 6e077560fc821..3c63ade357b45 100644 --- a/.github/workflows/tidb-pr-weekly-doc-check.yml +++ b/.github/workflows/tidb-pr-weekly-doc-check.yml @@ -23,6 +23,7 @@ jobs: env: SOURCE_ORG: pingcap EXCLUDED_REPOS: pingcap/docs,pingcap/docs-cn + EXTRA_REPOS: tikv/tikv,tikv/pd OUTPUT_DIR: tmp/tidb-doc-check DOCS_CN_BASE_BRANCH: master TARGET_BRANCH_MAP: '{"master":"master","release-8.5":"release-8.5","release-8.1":"release-8.1","release-7.5":"release-7.5","release-7.1":"release-7.1","release-6.5":"release-6.5","release-6.1":"release-6.1","release-5.4":"release-5.4"}' @@ -41,6 +42,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.DOCS_CN_BOT_TOKEN || github.token }} SOURCE_ORG: ${{ env.SOURCE_ORG }} EXCLUDED_REPOS: ${{ env.EXCLUDED_REPOS }} + EXTRA_REPOS: ${{ env.EXTRA_REPOS }} OUTPUT_DIR: ${{ env.OUTPUT_DIR }} DOCS_CN_BASE_BRANCH: ${{ env.DOCS_CN_BASE_BRANCH }} TARGET_BRANCH_MAP: ${{ env.TARGET_BRANCH_MAP }} diff --git a/scripts/check_tidb_prs_and_create_docs_cn_pr.py b/scripts/check_tidb_prs_and_create_docs_cn_pr.py index 086784b540122..b4db43e4b5320 100644 --- a/scripts/check_tidb_prs_and_create_docs_cn_pr.py +++ b/scripts/check_tidb_prs_and_create_docs_cn_pr.py @@ -26,6 +26,9 @@ EXCLUDED_REPOS = { item.strip() for item in os.environ.get("EXCLUDED_REPOS", "pingcap/docs,pingcap/docs-cn").split(",") if item.strip() } +EXTRA_REPOS = { + item.strip() for item in os.environ.get("EXTRA_REPOS", "tikv/tikv,tikv/pd").split(",") if item.strip() +} OUTPUT_DIR = pathlib.Path(os.environ.get("OUTPUT_DIR", "tmp/tidb-doc-check")).resolve() DOCS_CN_BASE_BRANCH = os.environ.get("DOCS_CN_BASE_BRANCH", "master") TOKEN = os.environ.get("GITHUB_TOKEN", "").strip() @@ -144,7 +147,10 @@ def list_source_repos(org: str) -> List[str]: if len(data) < 100: break page += 1 - return sorted(set(repos)) + merged = set(repos) + merged.update(EXTRA_REPOS) + merged.difference_update(EXCLUDED_REPOS) + return sorted(merged) def list_pr_files(repo: str, number: int) -> List[str]: