From a12866137c375b9a1a129f9b9896a281496048eb Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sat, 23 May 2026 14:02:45 -0700 Subject: [PATCH 1/2] FIX: Sync docs TOC with generated API pages The Read the Docs build was failing because doc/myst.yml referenced api/pyrit_common_cli_helpers.md, which is never produced by build_scripts/gen_api_md.py. The generator only emits per-submodule pages when the parent module is a pure aggregate (only submodules, no direct API). pyrit.common has its own public API, so its cli_helpers submodule is not expanded into a separate page. Also adds the missing api/pyrit_output.md entry, which is generated by the script but was not listed in the TOC. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- doc/myst.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/myst.yml b/doc/myst.yml index d87593e3e..580b36a33 100644 --- a/doc/myst.yml +++ b/doc/myst.yml @@ -208,7 +208,6 @@ project: - file: api/pyrit_cli_pyrit_scan.md - file: api/pyrit_cli_pyrit_shell.md - file: api/pyrit_common.md - - file: api/pyrit_common_cli_helpers.md - file: api/pyrit_datasets.md - file: api/pyrit_embedding.md - file: api/pyrit_exceptions.md @@ -221,6 +220,7 @@ project: - file: api/pyrit_memory.md - file: api/pyrit_message_normalizer.md - file: api/pyrit_models.md + - file: api/pyrit_output.md - file: api/pyrit_prompt_converter.md - file: api/pyrit_prompt_normalizer.md - file: api/pyrit_prompt_target.md From 997df90b8a3dc7c35f6c966c8c4eee9949cc84b6 Mon Sep 17 00:00:00 2001 From: Roman Lutz Date: Sat, 23 May 2026 16:00:04 -0700 Subject: [PATCH 2/2] DOC: Fail docs build when api/ TOC and generated pages disagree The Read the Docs build for #1753 quietly failed because jupyter-book emits "Table of contents entry does not exist" as warnings rather than errors, and --strict does not promote them. The CI deploy-book job and every per-PR check passed; only RTD surfaced the failure. Move that check upstream into the generator itself: - validate_docs.py now auto-detects whether doc/api/ exists. Pre-commit (before any docs build) keeps skipping api/* like today; after gen_api_md.py runs, api/* entries are validated like any other TOC file and orphan detection covers generated pages too. - gen_api_md.py invokes validate_docs.main() after writing the pages and propagates a non-zero exit code, so Makefile / CI / RTD all fail fast when doc/myst.yml drifts from the generator output in either direction (stale entry or missing entry). - Extended test_validate_docs.py to cover the new conditional behavior in both directions. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- build_scripts/gen_api_md.py | 14 +++++++++ build_scripts/validate_docs.py | 28 ++++++++++++----- .../unit/build_scripts/test_validate_docs.py | 30 +++++++++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/build_scripts/gen_api_md.py b/build_scripts/gen_api_md.py index 02225dec4..da128dc2d 100644 --- a/build_scripts/gen_api_md.py +++ b/build_scripts/gen_api_md.py @@ -17,8 +17,13 @@ """ import json +import sys from pathlib import Path +# Import sibling script for post-generation TOC validation. +sys.path.insert(0, str(Path(__file__).parent)) +import validate_docs # noqa: E402 + API_JSON_DIR = Path("doc/_api") API_MD_DIR = Path("doc/api") @@ -399,6 +404,15 @@ def main() -> None: index_path.write_text("\n".join(index_parts), encoding="utf-8") print(f"Written {index_path}") + # Fail loudly if doc/myst.yml's api/ TOC entries no longer match what we + # generated. Without this check, mismatches only manifest as easy-to-miss + # warnings in the jupyter-book log (--strict does not treat them as errors) + # and silently break the Read the Docs build downstream. + print("Validating doc/myst.yml stays in sync with generated API pages...") + rc = validate_docs.main() + if rc != 0: + sys.exit(rc) + if __name__ == "__main__": main() diff --git a/build_scripts/validate_docs.py b/build_scripts/validate_docs.py index c574bf9e7..e97d824c6 100644 --- a/build_scripts/validate_docs.py +++ b/build_scripts/validate_docs.py @@ -33,14 +33,21 @@ def parse_toc_files(toc_entries: list, files: set | None = None) -> set[str]: def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]: - """Check that all files referenced in the TOC exist.""" - # Directories with auto-generated content (gitignored, created during build) - generated_dirs = {"api/", "api\\"} + """Check that all files referenced in the TOC exist. + + Auto-generated ``api/*.md`` pages are produced by + ``build_scripts/gen_api_md.py`` and are gitignored, so they are skipped + while the ``doc/api/`` directory has not been generated yet (e.g. during + pre-commit). Once that directory exists (i.e. after a docs build), the + api/ entries are validated like any other file so the TOC stays in sync + with the generator output. + """ + skip_generated_api = not (doc_root / "api").exists() + api_prefixes = ("api/", "api\\") errors = [] for file_ref in toc_files: - # Skip files in auto-generated directories - if any(file_ref.startswith(d) for d in generated_dirs): + if skip_generated_api and file_ref.startswith(api_prefixes): continue file_path = doc_root / file_ref if not file_path.exists(): @@ -49,17 +56,24 @@ def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]: def find_orphaned_files(toc_files: set[str], doc_root: Path) -> list[str]: - """Find documentation files not referenced in the TOC.""" + """Find documentation files not referenced in the TOC. + + ``doc/api/`` holds auto-generated reference pages. They are skipped while + the directory does not yet exist (pre-commit, before any docs build), but + once present they are checked for orphans so the TOC reflects exactly the + set of files produced by ``build_scripts/gen_api_md.py``. + """ skip_dirs = { "_build", "_api", - "api", "css", ".ipynb_checkpoints", "__pycache__", "playwright_demo", "generate_docs", } + if not (doc_root / "api").exists(): + skip_dirs.add("api") skip_files = { "myst.yml", "roakey.png", diff --git a/tests/unit/build_scripts/test_validate_docs.py b/tests/unit/build_scripts/test_validate_docs.py index 7a048a8cc..68eaf5882 100644 --- a/tests/unit/build_scripts/test_validate_docs.py +++ b/tests/unit/build_scripts/test_validate_docs.py @@ -49,10 +49,25 @@ def test_validate_toc_files_error_when_file_missing(tmp_path: Path) -> None: def test_validate_toc_files_skips_api_generated_files(tmp_path: Path) -> None: + # When doc/api/ does not exist (e.g. pre-commit before any docs build), + # api/* TOC entries are skipped because they will be generated later. errors = validate_toc_files({"api/some_module"}, tmp_path) assert errors == [] +def test_validate_toc_files_validates_api_entries_when_api_dir_exists(tmp_path: Path) -> None: + # Once doc/api/ exists (post gen_api_md.py), api/* TOC entries are + # validated like any other file so stale entries are caught. + api_dir = tmp_path / "api" + api_dir.mkdir() + (api_dir / "pyrit_existing.md").write_text("# existing") + + errors = validate_toc_files({"api/pyrit_existing.md", "api/pyrit_missing.md"}, tmp_path) + + assert len(errors) == 1 + assert "pyrit_missing.md" in errors[0] + + def test_validate_toc_files_multiple_missing_files(tmp_path: Path) -> None: errors = validate_toc_files({"a.md", "b.md"}, tmp_path) assert len(errors) == 2 @@ -89,3 +104,18 @@ def test_find_orphaned_files_skips_py_companion_files(tmp_path: Path) -> None: (tmp_path / "notebook.py").write_text("# companion") orphaned = find_orphaned_files(set(), tmp_path) assert not any("notebook.py" in o for o in orphaned) + + +def test_find_orphaned_files_detects_orphaned_api_pages_when_dir_exists(tmp_path: Path) -> None: + # Post-build: doc/api/ exists. Any generated page that isn't in the TOC + # is flagged so stale or unlisted modules surface immediately instead of + # showing up later as a Read the Docs build failure. + api_dir = tmp_path / "api" + api_dir.mkdir() + (api_dir / "pyrit_listed.md").write_text("# listed") + (api_dir / "pyrit_orphan.md").write_text("# orphan") + + orphaned = find_orphaned_files({"api/pyrit_listed.md"}, tmp_path) + + assert any("pyrit_orphan.md" in o for o in orphaned) + assert not any("pyrit_listed.md" in o for o in orphaned)