Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/MCP.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ Export tools write artifact files with a 24-hour TTL; in-app chat renders downlo

### Export and deliverables

`export_audit_report`, `export_compare_csv`, `export_list_as_csv`, `export_sitemap_xml`, `validate_rich_results`, `compose_custom_report`, `export_custom_report`, `list_export_formats`
`export_audit_report`, `export_compare_csv`, `export_list_as_csv`, `export_sitemap_xml`, `validate_rich_results`, `list_export_formats`

Full audit exports use the same generators as the Export view. PDF export requires `reportlab`.

Expand Down
1 change: 0 additions & 1 deletion src/website_profiling/llm/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def _max_tool_rounds(cfg: dict[str, str]) -> int:
- Full audit PDF/HTML/CSV/JSON: export_audit_report with format pdf|html|csv|json
- Compare issue diff CSV: export_compare_csv with baseline_report_id
- Export a list as CSV: export_list_as_csv with tool_name and tool_args (e.g. list_broken_links)
- Custom client report: compose_custom_report with title and sections (executive_summary, category_scores, tool, notes), then export_custom_report format=pdf or html
- After export tools succeed, tell the user their download is ready; the UI renders file buttons automatically

Visualization playbook (chat UI renders charts and tables from tool JSON automatically):
Expand Down
15 changes: 15 additions & 0 deletions src/website_profiling/reporting/pdf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""PDF document model and export pipeline."""
from __future__ import annotations

from .builder import build_pdf_document
from .document import PdfDocument
from .options import PdfBuildOptions, PdfLimits
from .render import render_pdf_document

__all__ = [
"build_pdf_document",
"render_pdf_document",
"PdfDocument",
"PdfBuildOptions",
"PdfLimits",
]
29 changes: 29 additions & 0 deletions src/website_profiling/reporting/pdf/adapters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Section adapter registry.

Each adapter maps a section key to a function that accepts the raw payload
dict + PdfBuildOptions and returns a list of PdfSection objects. Adapters
that find no relevant data return an empty list.
"""
from __future__ import annotations

from typing import Any, Callable

from ..document import PdfSection
from ..options import PdfBuildOptions

SectionAdapterFn = Callable[[dict[str, Any], PdfBuildOptions], list[PdfSection]]

# Populated by each sub-module at import time
SECTION_ADAPTERS: dict[str, SectionAdapterFn] = {}


def register(key: str) -> Callable[[SectionAdapterFn], SectionAdapterFn]:
"""Decorator: @register("lighthouse") marks a function as a section adapter."""
def _wrap(fn: SectionAdapterFn) -> SectionAdapterFn:
SECTION_ADAPTERS[key] = fn
return fn
return _wrap


# Import adapters so they self-register
from . import core, findings, appendix # noqa: E402, F401
68 changes: 68 additions & 0 deletions src/website_profiling/reporting/pdf/adapters/appendix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""Appendix adapter — crawled URL sample and data-source glossary."""
from __future__ import annotations

from typing import Any

from ....tools.export_audit_data import _GLOSSARY_ROWS
from ..document import (
KeyValueBlock,
PdfSection,
PdfTruncation,
SpacerBlock,
UrlListBlock,
)
from ..options import PdfBuildOptions
from . import register


@register("appendix")
def adapt_appendix(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
if not opts.include_appendix:
return []

sections: list[PdfSection] = []

# --- Crawled URLs sample ---
links = [l for l in (payload.get("links") or []) if isinstance(l, dict)]
if links:
limit = opts.limits.urls_sample
sample = links[:limit]
rows = [
{
"url": str(lnk.get("url") or ""),
"status": str(lnk.get("status") or ""),
"title": str(lnk.get("title") or "").strip(),
}
for lnk in sample
]
has_titles = any(r["title"] for r in rows)
trunc = PdfTruncation(shown=len(rows), total=len(links)) if len(links) > limit else None
sections.append(PdfSection(
id="appendix.urls",
section_key="links",
title="Crawled URLs (sample)",
priority=80,
page_break_before=False,
blocks=[
UrlListBlock(
id="appendix.url_list",
rows=rows,
show_title=has_titles,
truncation=trunc,
),
SpacerBlock(id="appendix.url_spacer", height_pt=6),
],
))

# --- Glossary ---
if opts.include_glossary:
gloss_rows = [(term, desc) for term, desc in _GLOSSARY_ROWS]
sections.append(PdfSection(
id="appendix.glossary",
section_key="core",
title="Data source glossary",
priority=90,
blocks=[KeyValueBlock(id="appendix.glossary_kv", rows=gloss_rows, layout="glossary")],
))

return sections
38 changes: 38 additions & 0 deletions src/website_profiling/reporting/pdf/adapters/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Core adapter — audit-details section (category scores live on cover)."""
from __future__ import annotations

from typing import Any

from ....tools.export_audit_data import _format_report_date, _summary_lines
from ..document import KeyValueBlock, PdfSection, SpacerBlock
from ..options import PdfBuildOptions
from . import register


@register("core")
def adapt_core(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
sections: list[PdfSection] = []

# Category scores are rendered on the cover page — not duplicated here.

# --- Audit details section ---
summary_rows = _summary_lines(payload)
if summary_rows:
formatted_rows: list[tuple[str, str]] = []
for key, val in summary_rows:
if key == "Report generated":
formatted_rows.append((key, _format_report_date(val)))
else:
formatted_rows.append((key, val))
sections.append(PdfSection(
id="core.audit_details",
section_key="core",
title="Audit details",
priority=70,
blocks=[
KeyValueBlock(id="core.audit_kv", rows=formatted_rows, layout="audit"),
SpacerBlock(id="core.audit_spacer", height_pt=6),
],
))

return sections
54 changes: 54 additions & 0 deletions src/website_profiling/reporting/pdf/adapters/findings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Findings adapter — normalizes and groups all audit issues."""
from __future__ import annotations

from typing import Any

from ....tools.export_audit_data import _issues_rows, _priority_sort_key
from ..document import PdfSection, PdfTruncation
from ..normalize import group_issues_for_pdf, normalize_issue_for_pdf
from ..options import PdfBuildOptions
from . import register


@register("findings")
def adapt_findings(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
raw_rows = _issues_rows(payload)
if not raw_rows:
return []

raw_rows = sorted(raw_rows, key=_priority_sort_key)
total = len(raw_rows)
capped = raw_rows[: opts.limits.issues_total]

pdf_issues = [
normalize_issue_for_pdf(row, include_recommendation=opts.include_recommendations)
for row in capped
]

groups = group_issues_for_pdf(
pdf_issues,
issues_per_group=opts.limits.issues_per_group,
issues_total=opts.limits.issues_total,
)

if not groups:
return []

section_trunc: PdfTruncation | None = None
if total > opts.limits.issues_total:
section_trunc = PdfTruncation(
shown=opts.limits.issues_total,
total=total,
reason="limit",
continue_in=["CSV", "workbook"],
)

return [PdfSection(
id="findings",
section_key="findings",
title="Findings",
priority=20,
page_break_before=False,
blocks=list(groups), # type: ignore[arg-type]
truncation=section_trunc,
)]
Loading
Loading