Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions build_scripts/gen_api_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@
"""

import json
import sys
from pathlib import Path

# Import sibling script for post-generation TOC validation.
sys.path.insert(0, str(Path(__file__).parent))
import validate_docs # noqa: E402

API_JSON_DIR = Path("doc/_api")
API_MD_DIR = Path("doc/api")

Expand Down Expand Up @@ -399,6 +404,15 @@ def main() -> None:
index_path.write_text("\n".join(index_parts), encoding="utf-8")
print(f"Written {index_path}")

# Fail loudly if doc/myst.yml's api/ TOC entries no longer match what we
# generated. Without this check, mismatches only manifest as easy-to-miss
# warnings in the jupyter-book log (--strict does not treat them as errors)
# and silently break the Read the Docs build downstream.
print("Validating doc/myst.yml stays in sync with generated API pages...")
rc = validate_docs.main()
if rc != 0:
sys.exit(rc)


if __name__ == "__main__":
main()
28 changes: 21 additions & 7 deletions build_scripts/validate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,21 @@ def parse_toc_files(toc_entries: list, files: set | None = None) -> set[str]:


def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]:
"""Check that all files referenced in the TOC exist."""
# Directories with auto-generated content (gitignored, created during build)
generated_dirs = {"api/", "api\\"}
"""Check that all files referenced in the TOC exist.

Auto-generated ``api/*.md`` pages are produced by
``build_scripts/gen_api_md.py`` and are gitignored, so they are skipped
while the ``doc/api/`` directory has not been generated yet (e.g. during
pre-commit). Once that directory exists (i.e. after a docs build), the
api/ entries are validated like any other file so the TOC stays in sync
with the generator output.
"""
skip_generated_api = not (doc_root / "api").exists()
api_prefixes = ("api/", "api\\")

errors = []
for file_ref in toc_files:
# Skip files in auto-generated directories
if any(file_ref.startswith(d) for d in generated_dirs):
if skip_generated_api and file_ref.startswith(api_prefixes):
continue
file_path = doc_root / file_ref
if not file_path.exists():
Expand All @@ -49,17 +56,24 @@ def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]:


def find_orphaned_files(toc_files: set[str], doc_root: Path) -> list[str]:
"""Find documentation files not referenced in the TOC."""
"""Find documentation files not referenced in the TOC.

``doc/api/`` holds auto-generated reference pages. They are skipped while
the directory does not yet exist (pre-commit, before any docs build), but
once present they are checked for orphans so the TOC reflects exactly the
set of files produced by ``build_scripts/gen_api_md.py``.
"""
skip_dirs = {
"_build",
"_api",
"api",
"css",
".ipynb_checkpoints",
"__pycache__",
"playwright_demo",
"generate_docs",
}
if not (doc_root / "api").exists():
skip_dirs.add("api")
skip_files = {
"myst.yml",
"roakey.png",
Expand Down
2 changes: 1 addition & 1 deletion doc/myst.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ project:
- file: api/pyrit_cli_pyrit_scan.md
- file: api/pyrit_cli_pyrit_shell.md
- file: api/pyrit_common.md
- file: api/pyrit_common_cli_helpers.md
- file: api/pyrit_datasets.md
- file: api/pyrit_embedding.md
- file: api/pyrit_exceptions.md
Expand All @@ -221,6 +220,7 @@ project:
- file: api/pyrit_memory.md
- file: api/pyrit_message_normalizer.md
- file: api/pyrit_models.md
- file: api/pyrit_output.md
- file: api/pyrit_prompt_converter.md
- file: api/pyrit_prompt_normalizer.md
- file: api/pyrit_prompt_target.md
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/build_scripts/test_validate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,25 @@ def test_validate_toc_files_error_when_file_missing(tmp_path: Path) -> None:


def test_validate_toc_files_skips_api_generated_files(tmp_path: Path) -> None:
# When doc/api/ does not exist (e.g. pre-commit before any docs build),
# api/* TOC entries are skipped because they will be generated later.
errors = validate_toc_files({"api/some_module"}, tmp_path)
assert errors == []


def test_validate_toc_files_validates_api_entries_when_api_dir_exists(tmp_path: Path) -> None:
# Once doc/api/ exists (post gen_api_md.py), api/* TOC entries are
# validated like any other file so stale entries are caught.
api_dir = tmp_path / "api"
api_dir.mkdir()
(api_dir / "pyrit_existing.md").write_text("# existing")

errors = validate_toc_files({"api/pyrit_existing.md", "api/pyrit_missing.md"}, tmp_path)

assert len(errors) == 1
assert "pyrit_missing.md" in errors[0]


def test_validate_toc_files_multiple_missing_files(tmp_path: Path) -> None:
errors = validate_toc_files({"a.md", "b.md"}, tmp_path)
assert len(errors) == 2
Expand Down Expand Up @@ -89,3 +104,18 @@ def test_find_orphaned_files_skips_py_companion_files(tmp_path: Path) -> None:
(tmp_path / "notebook.py").write_text("# companion")
orphaned = find_orphaned_files(set(), tmp_path)
assert not any("notebook.py" in o for o in orphaned)


def test_find_orphaned_files_detects_orphaned_api_pages_when_dir_exists(tmp_path: Path) -> None:
# Post-build: doc/api/ exists. Any generated page that isn't in the TOC
# is flagged so stale or unlisted modules surface immediately instead of
# showing up later as a Read the Docs build failure.
api_dir = tmp_path / "api"
api_dir.mkdir()
(api_dir / "pyrit_listed.md").write_text("# listed")
(api_dir / "pyrit_orphan.md").write_text("# orphan")

orphaned = find_orphaned_files({"api/pyrit_listed.md"}, tmp_path)

assert any("pyrit_orphan.md" in o for o in orphaned)
assert not any("pyrit_listed.md" in o for o in orphaned)
Loading