diff --git a/build_scripts/gen_api_md.py b/build_scripts/gen_api_md.py index 02225dec4..da128dc2d 100644 --- a/build_scripts/gen_api_md.py +++ b/build_scripts/gen_api_md.py @@ -17,8 +17,13 @@ """ import json +import sys from pathlib import Path +# Import sibling script for post-generation TOC validation. +sys.path.insert(0, str(Path(__file__).parent)) +import validate_docs # noqa: E402 + API_JSON_DIR = Path("doc/_api") API_MD_DIR = Path("doc/api") @@ -399,6 +404,15 @@ def main() -> None: index_path.write_text("\n".join(index_parts), encoding="utf-8") print(f"Written {index_path}") + # Fail loudly if doc/myst.yml's api/ TOC entries no longer match what we + # generated. Without this check, mismatches only manifest as easy-to-miss + # warnings in the jupyter-book log (--strict does not treat them as errors) + # and silently break the Read the Docs build downstream. + print("Validating doc/myst.yml stays in sync with generated API pages...") + rc = validate_docs.main() + if rc != 0: + sys.exit(rc) + if __name__ == "__main__": main() diff --git a/build_scripts/validate_docs.py b/build_scripts/validate_docs.py index c574bf9e7..e97d824c6 100644 --- a/build_scripts/validate_docs.py +++ b/build_scripts/validate_docs.py @@ -33,14 +33,21 @@ def parse_toc_files(toc_entries: list, files: set | None = None) -> set[str]: def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]: - """Check that all files referenced in the TOC exist.""" - # Directories with auto-generated content (gitignored, created during build) - generated_dirs = {"api/", "api\\"} + """Check that all files referenced in the TOC exist. + + Auto-generated ``api/*.md`` pages are produced by + ``build_scripts/gen_api_md.py`` and are gitignored, so they are skipped + while the ``doc/api/`` directory has not been generated yet (e.g. during + pre-commit). Once that directory exists (i.e. after a docs build), the + api/ entries are validated like any other file so the TOC stays in sync + with the generator output. + """ + skip_generated_api = not (doc_root / "api").exists() + api_prefixes = ("api/", "api\\") errors = [] for file_ref in toc_files: - # Skip files in auto-generated directories - if any(file_ref.startswith(d) for d in generated_dirs): + if skip_generated_api and file_ref.startswith(api_prefixes): continue file_path = doc_root / file_ref if not file_path.exists(): @@ -49,17 +56,24 @@ def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]: def find_orphaned_files(toc_files: set[str], doc_root: Path) -> list[str]: - """Find documentation files not referenced in the TOC.""" + """Find documentation files not referenced in the TOC. + + ``doc/api/`` holds auto-generated reference pages. They are skipped while + the directory does not yet exist (pre-commit, before any docs build), but + once present they are checked for orphans so the TOC reflects exactly the + set of files produced by ``build_scripts/gen_api_md.py``. + """ skip_dirs = { "_build", "_api", - "api", "css", ".ipynb_checkpoints", "__pycache__", "playwright_demo", "generate_docs", } + if not (doc_root / "api").exists(): + skip_dirs.add("api") skip_files = { "myst.yml", "roakey.png", diff --git a/doc/myst.yml b/doc/myst.yml index d87593e3e..580b36a33 100644 --- a/doc/myst.yml +++ b/doc/myst.yml @@ -208,7 +208,6 @@ project: - file: api/pyrit_cli_pyrit_scan.md - file: api/pyrit_cli_pyrit_shell.md - file: api/pyrit_common.md - - file: api/pyrit_common_cli_helpers.md - file: api/pyrit_datasets.md - file: api/pyrit_embedding.md - file: api/pyrit_exceptions.md @@ -221,6 +220,7 @@ project: - file: api/pyrit_memory.md - file: api/pyrit_message_normalizer.md - file: api/pyrit_models.md + - file: api/pyrit_output.md - file: api/pyrit_prompt_converter.md - file: api/pyrit_prompt_normalizer.md - file: api/pyrit_prompt_target.md diff --git a/tests/unit/build_scripts/test_validate_docs.py b/tests/unit/build_scripts/test_validate_docs.py index 7a048a8cc..68eaf5882 100644 --- a/tests/unit/build_scripts/test_validate_docs.py +++ b/tests/unit/build_scripts/test_validate_docs.py @@ -49,10 +49,25 @@ def test_validate_toc_files_error_when_file_missing(tmp_path: Path) -> None: def test_validate_toc_files_skips_api_generated_files(tmp_path: Path) -> None: + # When doc/api/ does not exist (e.g. pre-commit before any docs build), + # api/* TOC entries are skipped because they will be generated later. errors = validate_toc_files({"api/some_module"}, tmp_path) assert errors == [] +def test_validate_toc_files_validates_api_entries_when_api_dir_exists(tmp_path: Path) -> None: + # Once doc/api/ exists (post gen_api_md.py), api/* TOC entries are + # validated like any other file so stale entries are caught. + api_dir = tmp_path / "api" + api_dir.mkdir() + (api_dir / "pyrit_existing.md").write_text("# existing") + + errors = validate_toc_files({"api/pyrit_existing.md", "api/pyrit_missing.md"}, tmp_path) + + assert len(errors) == 1 + assert "pyrit_missing.md" in errors[0] + + def test_validate_toc_files_multiple_missing_files(tmp_path: Path) -> None: errors = validate_toc_files({"a.md", "b.md"}, tmp_path) assert len(errors) == 2 @@ -89,3 +104,18 @@ def test_find_orphaned_files_skips_py_companion_files(tmp_path: Path) -> None: (tmp_path / "notebook.py").write_text("# companion") orphaned = find_orphaned_files(set(), tmp_path) assert not any("notebook.py" in o for o in orphaned) + + +def test_find_orphaned_files_detects_orphaned_api_pages_when_dir_exists(tmp_path: Path) -> None: + # Post-build: doc/api/ exists. Any generated page that isn't in the TOC + # is flagged so stale or unlisted modules surface immediately instead of + # showing up later as a Read the Docs build failure. + api_dir = tmp_path / "api" + api_dir.mkdir() + (api_dir / "pyrit_listed.md").write_text("# listed") + (api_dir / "pyrit_orphan.md").write_text("# orphan") + + orphaned = find_orphaned_files({"api/pyrit_listed.md"}, tmp_path) + + assert any("pyrit_orphan.md" in o for o in orphaned) + assert not any("pyrit_listed.md" in o for o in orphaned)