diff --git a/docs/MCP.md b/docs/MCP.md
index 5a5c0db..7a67883 100644
--- a/docs/MCP.md
+++ b/docs/MCP.md
@@ -214,7 +214,7 @@ Export tools write artifact files with a 24-hour TTL; in-app chat renders downlo
### Export and deliverables
-`export_audit_report`, `export_compare_csv`, `export_list_as_csv`, `export_sitemap_xml`, `validate_rich_results`, `compose_custom_report`, `export_custom_report`, `list_export_formats`
+`export_audit_report`, `export_compare_csv`, `export_list_as_csv`, `export_sitemap_xml`, `validate_rich_results`, `list_export_formats`
Full audit exports use the same generators as the Export view. PDF export requires `reportlab`.
diff --git a/src/website_profiling/llm/agent.py b/src/website_profiling/llm/agent.py
index 40418c6..985abdb 100644
--- a/src/website_profiling/llm/agent.py
+++ b/src/website_profiling/llm/agent.py
@@ -79,7 +79,6 @@ def _max_tool_rounds(cfg: dict[str, str]) -> int:
- Full audit PDF/HTML/CSV/JSON: export_audit_report with format pdf|html|csv|json
- Compare issue diff CSV: export_compare_csv with baseline_report_id
- Export a list as CSV: export_list_as_csv with tool_name and tool_args (e.g. list_broken_links)
-- Custom client report: compose_custom_report with title and sections (executive_summary, category_scores, tool, notes), then export_custom_report format=pdf or html
- After export tools succeed, tell the user their download is ready; the UI renders file buttons automatically
Visualization playbook (chat UI renders charts and tables from tool JSON automatically):
diff --git a/src/website_profiling/reporting/pdf/__init__.py b/src/website_profiling/reporting/pdf/__init__.py
new file mode 100644
index 0000000..c2aabc1
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/__init__.py
@@ -0,0 +1,15 @@
+"""PDF document model and export pipeline."""
+from __future__ import annotations
+
+from .builder import build_pdf_document
+from .document import PdfDocument
+from .options import PdfBuildOptions, PdfLimits
+from .render import render_pdf_document
+
+__all__ = [
+ "build_pdf_document",
+ "render_pdf_document",
+ "PdfDocument",
+ "PdfBuildOptions",
+ "PdfLimits",
+]
diff --git a/src/website_profiling/reporting/pdf/adapters/__init__.py b/src/website_profiling/reporting/pdf/adapters/__init__.py
new file mode 100644
index 0000000..50c1f7d
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/adapters/__init__.py
@@ -0,0 +1,29 @@
+"""Section adapter registry.
+
+Each adapter maps a section key to a function that accepts the raw payload
+dict + PdfBuildOptions and returns a list of PdfSection objects. Adapters
+that find no relevant data return an empty list.
+"""
+from __future__ import annotations
+
+from typing import Any, Callable
+
+from ..document import PdfSection
+from ..options import PdfBuildOptions
+
+SectionAdapterFn = Callable[[dict[str, Any], PdfBuildOptions], list[PdfSection]]
+
+# Populated by each sub-module at import time
+SECTION_ADAPTERS: dict[str, SectionAdapterFn] = {}
+
+
+def register(key: str) -> Callable[[SectionAdapterFn], SectionAdapterFn]:
+ """Decorator: @register("lighthouse") marks a function as a section adapter."""
+ def _wrap(fn: SectionAdapterFn) -> SectionAdapterFn:
+ SECTION_ADAPTERS[key] = fn
+ return fn
+ return _wrap
+
+
+# Import adapters so they self-register
+from . import core, findings, appendix # noqa: E402, F401
diff --git a/src/website_profiling/reporting/pdf/adapters/appendix.py b/src/website_profiling/reporting/pdf/adapters/appendix.py
new file mode 100644
index 0000000..16329ff
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/adapters/appendix.py
@@ -0,0 +1,68 @@
+"""Appendix adapter — crawled URL sample and data-source glossary."""
+from __future__ import annotations
+
+from typing import Any
+
+from ....tools.export_audit_data import _GLOSSARY_ROWS
+from ..document import (
+ KeyValueBlock,
+ PdfSection,
+ PdfTruncation,
+ SpacerBlock,
+ UrlListBlock,
+)
+from ..options import PdfBuildOptions
+from . import register
+
+
+@register("appendix")
+def adapt_appendix(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
+ if not opts.include_appendix:
+ return []
+
+ sections: list[PdfSection] = []
+
+ # --- Crawled URLs sample ---
+ links = [l for l in (payload.get("links") or []) if isinstance(l, dict)]
+ if links:
+ limit = opts.limits.urls_sample
+ sample = links[:limit]
+ rows = [
+ {
+ "url": str(lnk.get("url") or ""),
+ "status": str(lnk.get("status") or ""),
+ "title": str(lnk.get("title") or "").strip(),
+ }
+ for lnk in sample
+ ]
+ has_titles = any(r["title"] for r in rows)
+ trunc = PdfTruncation(shown=len(rows), total=len(links)) if len(links) > limit else None
+ sections.append(PdfSection(
+ id="appendix.urls",
+ section_key="links",
+ title="Crawled URLs (sample)",
+ priority=80,
+ page_break_before=False,
+ blocks=[
+ UrlListBlock(
+ id="appendix.url_list",
+ rows=rows,
+ show_title=has_titles,
+ truncation=trunc,
+ ),
+ SpacerBlock(id="appendix.url_spacer", height_pt=6),
+ ],
+ ))
+
+ # --- Glossary ---
+ if opts.include_glossary:
+ gloss_rows = [(term, desc) for term, desc in _GLOSSARY_ROWS]
+ sections.append(PdfSection(
+ id="appendix.glossary",
+ section_key="core",
+ title="Data source glossary",
+ priority=90,
+ blocks=[KeyValueBlock(id="appendix.glossary_kv", rows=gloss_rows, layout="glossary")],
+ ))
+
+ return sections
diff --git a/src/website_profiling/reporting/pdf/adapters/core.py b/src/website_profiling/reporting/pdf/adapters/core.py
new file mode 100644
index 0000000..27bef7e
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/adapters/core.py
@@ -0,0 +1,38 @@
+"""Core adapter — audit-details section (category scores live on cover)."""
+from __future__ import annotations
+
+from typing import Any
+
+from ....tools.export_audit_data import _format_report_date, _summary_lines
+from ..document import KeyValueBlock, PdfSection, SpacerBlock
+from ..options import PdfBuildOptions
+from . import register
+
+
+@register("core")
+def adapt_core(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
+ sections: list[PdfSection] = []
+
+ # Category scores are rendered on the cover page — not duplicated here.
+
+ # --- Audit details section ---
+ summary_rows = _summary_lines(payload)
+ if summary_rows:
+ formatted_rows: list[tuple[str, str]] = []
+ for key, val in summary_rows:
+ if key == "Report generated":
+ formatted_rows.append((key, _format_report_date(val)))
+ else:
+ formatted_rows.append((key, val))
+ sections.append(PdfSection(
+ id="core.audit_details",
+ section_key="core",
+ title="Audit details",
+ priority=70,
+ blocks=[
+ KeyValueBlock(id="core.audit_kv", rows=formatted_rows, layout="audit"),
+ SpacerBlock(id="core.audit_spacer", height_pt=6),
+ ],
+ ))
+
+ return sections
diff --git a/src/website_profiling/reporting/pdf/adapters/findings.py b/src/website_profiling/reporting/pdf/adapters/findings.py
new file mode 100644
index 0000000..ebc18e8
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/adapters/findings.py
@@ -0,0 +1,54 @@
+"""Findings adapter — normalizes and groups all audit issues."""
+from __future__ import annotations
+
+from typing import Any
+
+from ....tools.export_audit_data import _issues_rows, _priority_sort_key
+from ..document import PdfSection, PdfTruncation
+from ..normalize import group_issues_for_pdf, normalize_issue_for_pdf
+from ..options import PdfBuildOptions
+from . import register
+
+
+@register("findings")
+def adapt_findings(payload: dict[str, Any], opts: PdfBuildOptions) -> list[PdfSection]:
+ raw_rows = _issues_rows(payload)
+ if not raw_rows:
+ return []
+
+ raw_rows = sorted(raw_rows, key=_priority_sort_key)
+ total = len(raw_rows)
+ capped = raw_rows[: opts.limits.issues_total]
+
+ pdf_issues = [
+ normalize_issue_for_pdf(row, include_recommendation=opts.include_recommendations)
+ for row in capped
+ ]
+
+ groups = group_issues_for_pdf(
+ pdf_issues,
+ issues_per_group=opts.limits.issues_per_group,
+ issues_total=opts.limits.issues_total,
+ )
+
+ if not groups:
+ return []
+
+ section_trunc: PdfTruncation | None = None
+ if total > opts.limits.issues_total:
+ section_trunc = PdfTruncation(
+ shown=opts.limits.issues_total,
+ total=total,
+ reason="limit",
+ continue_in=["CSV", "workbook"],
+ )
+
+ return [PdfSection(
+ id="findings",
+ section_key="findings",
+ title="Findings",
+ priority=20,
+ page_break_before=False,
+ blocks=list(groups), # type: ignore[arg-type]
+ truncation=section_trunc,
+ )]
diff --git a/src/website_profiling/reporting/pdf/builder.py b/src/website_profiling/reporting/pdf/builder.py
new file mode 100644
index 0000000..8f2d30b
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/builder.py
@@ -0,0 +1,178 @@
+"""build_pdf_document — assembles a PdfDocument from a raw report payload."""
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+from ...tools.export_audit_data import (
+ _executive_export_data,
+ _executive_source_label,
+ _format_report_date,
+ _issue_priority_counts,
+ _issues_rows,
+ _overall_score,
+ _priority_sort_key,
+ _score_band,
+)
+from .document import (
+ SCHEMA_VERSION,
+ PdfCoverBlock,
+ PdfDocument,
+ PdfFooterBlock,
+ PdfIssue,
+ PdfMeta,
+ PdfScoreHero,
+ ScoreCard,
+ ScoreCardsBlock,
+ StatChip,
+ StatGridBlock,
+)
+from .normalize import normalize_issue_for_pdf
+from .options import PdfBuildOptions
+from .adapters import SECTION_ADAPTERS
+
+
+def _build_meta(
+ payload: dict[str, Any],
+ opts: PdfBuildOptions,
+ exported_at: str,
+ all_issue_counts: dict[str, int],
+ overall: Optional[int],
+ included_sections: list[str],
+) -> PdfMeta:
+ site = str(payload.get("site_name") or "Site Audit")
+ generated_raw = str(payload.get("report_generated_at") or "")
+ generated = _format_report_date(generated_raw)
+ meta_block = payload.get("report_meta") or {}
+ data_sources: list[str] = []
+ if isinstance(meta_block, dict):
+ data_sources = [str(s) for s in (meta_block.get("data_sources") or [])]
+ report_title = str(payload.get("report_title") or "Technical SEO Audit Report")
+ return PdfMeta(
+ report_id=opts.report_id,
+ property=site,
+ report_title=report_title,
+ generated_at=generated,
+ exported_at=exported_at,
+ data_sources=data_sources,
+ health_score=overall,
+ issue_counts=all_issue_counts,
+ included_sections=included_sections,
+ )
+
+
+def _build_cover(
+ payload: dict[str, Any],
+ opts: PdfBuildOptions,
+ overall: Optional[int],
+ all_issue_counts: dict[str, int],
+) -> PdfCoverBlock:
+ site = str(payload.get("site_name") or "Site Audit")
+ report_title = str(payload.get("report_title") or "Technical SEO Audit Report")
+
+ score_txt, band = _score_band(float(overall) if overall is not None else None)
+ hero = PdfScoreHero(score=score_txt, band=band, label="Overall health score") # type: ignore[arg-type]
+
+ priority_chips = [
+ StatChip(label="Critical", value=str(all_issue_counts["critical"]), tone="critical"),
+ StatChip(label="High", value=str(all_issue_counts["high"]), tone="high"),
+ StatChip(label="Medium", value=str(all_issue_counts["medium"]), tone="medium"),
+ StatChip(label="Low", value=str(all_issue_counts["low"]), tone="low"),
+ ]
+ priority_strip = StatGridBlock(id="cover.priority_strip", chips=priority_chips, columns=4)
+
+ categories = payload.get("categories") or []
+ score_cards: list[ScoreCard] = []
+ for cat in categories:
+ if not isinstance(cat, dict):
+ continue
+ from ...reporting.terminology import category_display_name
+ name = category_display_name(str(cat.get("name") or "Category"))
+ raw = cat.get("score")
+ sv: float | None = None
+ if raw is not None:
+ try:
+ sv = float(raw)
+ except (TypeError, ValueError):
+ pass
+ stxt, sband = _score_band(sv)
+ issue_n = len(cat.get("issues") or [])
+ score_cards.append(ScoreCard(name=name, score=stxt, issue_count=issue_n, tone=sband)) # type: ignore[arg-type]
+ cat_scores_block = ScoreCardsBlock(id="cover.category_scores", cards=score_cards)
+
+ # Executive summary
+ exec_data = _executive_export_data(payload)
+ exec_summary = exec_data.get("summary") or None
+ exec_source = _executive_source_label(exec_data.get("source") or "") if exec_data.get("source") else None
+ priorities_list: list[str] = exec_data.get("priorities") or []
+
+ # Top issues for cover — one row per distinct headline; prefer rows with a URL
+ all_rows = sorted(_issues_rows(payload), key=_priority_sort_key)
+ top_limit = opts.limits.top_issues_cover
+ headline_order: list[str] = []
+ by_headline: dict[str, PdfIssue] = {}
+ for row in all_rows:
+ issue = normalize_issue_for_pdf(row, include_recommendation=False)
+ if issue.headline not in by_headline:
+ headline_order.append(issue.headline)
+ by_headline[issue.headline] = issue
+ elif not by_headline[issue.headline].url and issue.url:
+ by_headline[issue.headline] = issue
+ top_issues = [by_headline[h] for h in headline_order[:top_limit]]
+
+ return PdfCoverBlock(
+ headline=f"Site Audit — {site}",
+ subtitle=report_title,
+ hero=hero,
+ priority_strip=priority_strip,
+ category_scores=cat_scores_block,
+ executive_summary=exec_summary,
+ executive_source=exec_source,
+ priorities_list=priorities_list[:8],
+ top_issues=top_issues,
+ )
+
+
+def build_pdf_document(
+ payload: dict[str, Any],
+ opts: Optional[PdfBuildOptions] = None,
+) -> PdfDocument:
+ """Transform a raw ReportPayload dict into a PdfDocument ready for rendering."""
+ if opts is None:
+ opts = PdfBuildOptions()
+
+ exported_at = datetime.now(timezone.utc).strftime("%d %B %Y, %H:%M UTC")
+ overall = _overall_score(payload)
+ all_issues = _issues_rows(payload)
+ all_issue_counts = _issue_priority_counts(all_issues)
+
+ effective_sections = opts.effective_sections()
+
+ # Run each requested adapter
+ sections: list = []
+ for key in effective_sections:
+ adapter = SECTION_ADAPTERS.get(key)
+ if adapter is None:
+ continue
+ result = adapter(payload, opts)
+ sections.extend(result)
+
+ # Sort sections by priority
+ sections.sort(key=lambda s: s.priority)
+
+ meta = _build_meta(
+ payload, opts, exported_at, all_issue_counts, overall,
+ included_sections=effective_sections,
+ )
+ cover = _build_cover(payload, opts, overall, all_issue_counts)
+ footer = PdfFooterBlock(exported_at=exported_at)
+
+ return PdfDocument(
+ schema_version=SCHEMA_VERSION,
+ document_kind="audit",
+ meta=meta,
+ cover=cover,
+ sections=sections,
+ footer=footer,
+ appendix=None, # appendix content is included as PdfSections in sections list
+ )
diff --git a/src/website_profiling/reporting/pdf/document.py b/src/website_profiling/reporting/pdf/document.py
new file mode 100644
index 0000000..94e9aa4
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/document.py
@@ -0,0 +1,322 @@
+"""PdfDocument v1 — versioned, block-based document model.
+
+All types are JSON-serializable dataclasses. The renderer consumes these;
+no ReportLab types appear here.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Literal, Optional
+
+SCHEMA_VERSION = "1.0"
+
+# ---------------------------------------------------------------------------
+# Primitive / shared
+# ---------------------------------------------------------------------------
+
+PriorityTone = Literal["critical", "high", "medium", "low", "neutral", "good", "fair", "poor"]
+DocumentKind = Literal["audit", "compare"]
+
+
+@dataclass
+class PdfTruncation:
+ shown: int
+ total: int
+ reason: Literal["limit", "page_budget", "empty"] = "limit"
+ continue_in: list[str] = field(default_factory=lambda: ["CSV", "workbook"])
+
+
+# ---------------------------------------------------------------------------
+# Block types — renderer handles each `type` discriminator
+# ---------------------------------------------------------------------------
+
+@dataclass
+class HeadingBlock:
+ type: str = field(default="heading", init=False)
+ id: str = ""
+ text: str = ""
+ level: int = 2 # 2 = section heading, 3 = sub-heading
+ visible: bool = True
+
+
+@dataclass
+class ParagraphBlock:
+ type: str = field(default="paragraph", init=False)
+ id: str = ""
+ text: str = ""
+ italic: bool = False
+ visible: bool = True
+
+
+@dataclass
+class CalloutBlock:
+ type: str = field(default="callout", init=False)
+ id: str = ""
+ text: str = ""
+ severity: Literal["info", "warn", "critical"] = "info"
+ visible: bool = True
+
+
+@dataclass
+class SpacerBlock:
+ type: str = field(default="spacer", init=False)
+ id: str = ""
+ height_pt: float = 8.0
+ visible: bool = True
+
+
+@dataclass
+class KpiItem:
+ label: str
+ value: str
+ delta: Optional[str] = None
+ tone: PriorityTone = "neutral"
+ help: Optional[str] = None
+
+
+@dataclass
+class KpiRowBlock:
+ type: str = field(default="kpi_row", init=False)
+ id: str = ""
+ items: list[KpiItem] = field(default_factory=list)
+ visible: bool = True
+
+
+@dataclass
+class StatChip:
+ label: str
+ value: str
+ tone: PriorityTone = "neutral"
+
+
+@dataclass
+class StatGridBlock:
+ type: str = field(default="stat_grid", init=False)
+ id: str = ""
+ chips: list[StatChip] = field(default_factory=list)
+ columns: int = 4
+ visible: bool = True
+
+
+@dataclass
+class KeyValueBlock:
+ type: str = field(default="key_value", init=False)
+ id: str = ""
+ rows: list[tuple[str, str]] = field(default_factory=list)
+ layout: Literal["default", "audit", "glossary"] = "default"
+ visible: bool = True
+
+
+@dataclass
+class ScoreCard:
+ name: str
+ score: Optional[str] # formatted string, e.g. "87" or "—"
+ issue_count: int = 0
+ tone: Literal["score-good", "score-fair", "score-poor", "score-na"] = "score-na"
+
+
+@dataclass
+class ScoreCardsBlock:
+ type: str = field(default="score_cards", init=False)
+ id: str = ""
+ cards: list[ScoreCard] = field(default_factory=list)
+ visible: bool = True
+
+
+@dataclass
+class TableColumn:
+ key: str
+ label: str
+ width: Literal["narrow", "medium", "wide", "url"] = "medium"
+ align: Literal["left", "center", "right"] = "left"
+
+
+@dataclass
+class MetricTableBlock:
+ type: str = field(default="metric_table", init=False)
+ id: str = ""
+ columns: list[TableColumn] = field(default_factory=list)
+ rows: list[dict[str, str]] = field(default_factory=list)
+ repeat_header: bool = True
+ truncation: Optional[PdfTruncation] = None
+ visible: bool = True
+
+
+@dataclass
+class UrlListBlock:
+ type: str = field(default="url_list", init=False)
+ id: str = ""
+ rows: list[dict[str, str]] = field(default_factory=list) # keys: url, status, title
+ show_title: bool = True
+ truncation: Optional[PdfTruncation] = None
+ visible: bool = True
+
+
+# ---------------------------------------------------------------------------
+# Issue blocks — primary findings format
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfIssueMetrics:
+ gsc_clicks: Optional[int] = None
+ gsc_impressions: Optional[int] = None
+ ga4_sessions: Optional[int] = None
+ impact_score: Optional[float] = None
+ lh_audit_id: Optional[str] = None
+
+
+@dataclass
+class PdfIssue:
+ id: str
+ priority: str
+ category: str
+ headline: str # ≤ 80 chars, no embedded URL duplication
+ url: Optional[str] = None
+ path: Optional[str] = None # display-only short path
+ detail: Optional[str] = None
+ recommendation: Optional[str] = None
+ metrics: Optional[PdfIssueMetrics] = None
+ tags: list[str] = field(default_factory=list)
+ related_urls: list[str] = field(default_factory=list) # collapsed duplicates
+
+
+@dataclass
+class IssueGroupBlock:
+ type: str = field(default="issue_group", init=False)
+ id: str = ""
+ title: str = ""
+ group_label: str = "" # e.g. "Critical — 1 issue"
+ issues: list[PdfIssue] = field(default_factory=list)
+ render_as: Literal["list", "compact_table"] = "list"
+ truncation: Optional[PdfTruncation] = None
+ visible: bool = True
+
+
+@dataclass
+class IssueTableBlock:
+ """Fallback tabular rendering for dense medium/low groups."""
+ type: str = field(default="issue_table", init=False)
+ id: str = ""
+ title: str = ""
+ issues: list[PdfIssue] = field(default_factory=list)
+ truncation: Optional[PdfTruncation] = None
+ visible: bool = True
+
+
+@dataclass
+class MarkdownBlock:
+ type: str = field(default="markdown", init=False)
+ id: str = ""
+ text: str = ""
+ visible: bool = True
+
+
+# Union type for IDE / type-checkers
+PdfBlock = (
+ HeadingBlock
+ | ParagraphBlock
+ | CalloutBlock
+ | SpacerBlock
+ | KpiRowBlock
+ | StatGridBlock
+ | KeyValueBlock
+ | ScoreCardsBlock
+ | MetricTableBlock
+ | UrlListBlock
+ | IssueGroupBlock
+ | IssueTableBlock
+ | MarkdownBlock
+)
+
+# ---------------------------------------------------------------------------
+# Cover
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfScoreHero:
+ score: Optional[str]
+ band: Literal["score-good", "score-fair", "score-poor", "score-na"]
+ label: str # e.g. "Overall health score"
+
+
+@dataclass
+class PdfCoverBlock:
+ headline: str
+ subtitle: str
+ hero: PdfScoreHero
+ priority_strip: StatGridBlock
+ category_scores: ScoreCardsBlock
+ executive_summary: Optional[str] = None # prose paragraph
+ executive_source: Optional[str] = None
+ priorities_list: list[str] = field(default_factory=list)
+ top_issues: list[PdfIssue] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Section
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfSection:
+ id: str
+ section_key: str
+ title: str
+ priority: int = 50 # lower = earlier in document
+ page_break_before: bool = False
+ keep_with_next_blocks: int = 1
+ source_label: Optional[str] = None
+ provenance: Optional[str] = None
+ blocks: list[Any] = field(default_factory=list) # list[PdfBlock]
+ truncation: Optional[PdfTruncation] = None
+
+
+# ---------------------------------------------------------------------------
+# Appendix
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfAppendix:
+ url_sample: Optional[UrlListBlock] = None
+ audit_details: Optional[KeyValueBlock] = None
+ glossary: Optional[KeyValueBlock] = None
+
+
+# ---------------------------------------------------------------------------
+# Meta / Footer
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfMeta:
+ report_id: Optional[int]
+ property: str
+ report_title: str
+ generated_at: str # formatted for display
+ exported_at: str
+ data_sources: list[str]
+ health_score: Optional[int]
+ issue_counts: dict[str, int] # {critical, high, medium, low}
+ truncation_summary: list[str] = field(default_factory=list)
+ included_sections: list[str] = field(default_factory=list)
+ locale: str = "en"
+
+
+@dataclass
+class PdfFooterBlock:
+ confidential_note: str = "Confidential — prepared for client review."
+ generator: str = "Site Audit"
+ exported_at: str = ""
+
+
+# ---------------------------------------------------------------------------
+# Root document
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PdfDocument:
+ schema_version: str
+ document_kind: DocumentKind
+ meta: PdfMeta
+ cover: PdfCoverBlock
+ sections: list[PdfSection]
+ footer: PdfFooterBlock
+ appendix: Optional[PdfAppendix] = None
diff --git a/src/website_profiling/reporting/pdf/normalize.py b/src/website_profiling/reporting/pdf/normalize.py
new file mode 100644
index 0000000..94cc4c7
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/normalize.py
@@ -0,0 +1,354 @@
+"""Issue normalization and grouping for PDF output.
+
+Transforms raw ``_issues_rows`` dicts (which mirror the DB payload) into
+``PdfIssue`` objects suited for print layout:
+- Strips duplicated URLs from headlines
+- Expands Lighthouse audit-id abbreviations into human labels
+- Groups by priority → category for use by IssueGroupBlock
+"""
+from __future__ import annotations
+
+import hashlib
+import re
+from typing import Any, Optional
+from urllib.parse import urlparse
+
+from .document import IssueGroupBlock, PdfIssue, PdfIssueMetrics, PdfTruncation
+
+# ---------------------------------------------------------------------------
+# Lighthouse audit-id → human label registry
+# ---------------------------------------------------------------------------
+
+_LH_AUDIT_LABELS: dict[str, str] = {
+ "cache-insight": "Serve assets with efficient cache policy",
+ "color-contrast": "Background and foreground colors lack sufficient contrast",
+ "unused-css-rules": "Remove unused CSS",
+ "errors-in-console": "Browser errors logged to the console",
+ "label-content-name-mismatch": "Button/link label does not match accessible name",
+ "network-dependency-tree-insight": "Minimize critical request chain depth",
+ "render-blocking-insight": "Eliminate render-blocking resources",
+ "unused-javascript": "Remove unused JavaScript",
+ "uses-optimized-images": "Efficiently encode images",
+ "uses-responsive-images": "Properly size images",
+ "uses-webp-images": "Serve images in next-gen formats",
+ "largest-contentful-paint-element": "Largest Contentful Paint element",
+ "total-blocking-time": "Total Blocking Time",
+ "cumulative-layout-shift": "Cumulative Layout Shift",
+ "first-contentful-paint": "First Contentful Paint",
+ "speed-index": "Speed Index",
+ "interactive": "Time to Interactive",
+ "server-response-time": "Reduce initial server response time",
+ "dom-size": "Avoid an excessive DOM size",
+ "long-tasks": "Avoid long main-thread tasks",
+ "layout-shifts": "Avoid large layout shifts",
+ "image-alt": "Image elements do not have alt attributes",
+ "link-name": "Links do not have a discernible name",
+ "button-name": "Buttons do not have an accessible name",
+ "duplicate-id-active": "Document has active focus elements with duplicate ID",
+ "heading-order": "Heading elements are not in a sequentially-descending order",
+ "meta-description": "Document does not have a meta description",
+ "document-title": "Document does not have a
element",
+ "hreflang": "Document does not have a valid hreflang",
+ "canonical": "Page is not canonical",
+ "robots-txt": "Robots.txt is not valid",
+ "tap-targets": "Touch targets are not sized appropriately",
+}
+
+_URL_IN_MSG_PATTERN = re.compile(
+ r"(https?://\S+|(?:^|[\s:])(/\S+))", re.IGNORECASE
+)
+
+# Colon at end of a known-bad audit id: "cache-insight:" → strip colon
+_AUDIT_ID_TRAILING_COLON = re.compile(r"^([\w-]+):$")
+
+
+def _lh_label(audit_id: str) -> str:
+ """Return a human-readable label for a Lighthouse audit id."""
+ clean = audit_id.rstrip(":").strip().lower()
+ return _LH_AUDIT_LABELS.get(clean, clean.replace("-", " ").title())
+
+
+def _strip_url_from_headline(message: str, url: str) -> str:
+ """Remove URL from message text when it duplicates the dedicated url field."""
+ if not url or not message:
+ return message
+
+ # Direct inclusion: "Issue text: https://example.com/path"
+ stripped = message.replace(url, "").strip().rstrip(":").strip()
+ if stripped and stripped != message:
+ return stripped
+
+ # URL with trailing slash variant
+ url_slash = url.rstrip("/") + "/"
+ stripped2 = message.replace(url_slash, "").strip().rstrip(":").strip()
+ if stripped2 and stripped2 != message:
+ return stripped2
+
+ return message
+
+
+def _extract_path(url: str) -> Optional[str]:
+ """Return just the path component of a URL for compact display."""
+ if not url:
+ return None
+ try:
+ parsed = urlparse(url)
+ return parsed.path or None
+ except Exception:
+ return None
+
+
+def _is_lighthouse_row(message: str, tags: list[str]) -> tuple[bool, str]:
+ """Detect Lighthouse issue rows and return (is_lh, audit_id)."""
+ # Pattern: "audit-id:" alone or at start of message
+ m = _AUDIT_ID_TRAILING_COLON.match(message.strip())
+ if m:
+ return True, m.group(1)
+ # Tag-based
+ if "lighthouse" in tags:
+ return True, ""
+ return False, ""
+
+
+def _issue_id(row: dict[str, Any]) -> str:
+ key = f"{row.get('category','')}\x00{row.get('priority','')}\x00{row.get('message','')}\x00{row.get('url','')}"
+ return hashlib.md5(key.encode()).hexdigest()[:12]
+
+
+def _shorten_headline(headline: str, raw_message: str, url: str) -> str:
+ """Apply common headline cleanups after URL strip / lighthouse expansion."""
+ lower = headline.lower()
+ lower_raw = raw_message.lower()
+
+ if "url in sitemap but not crawled" in lower:
+ return "In sitemap, not crawled"
+
+ if lower_raw.startswith("redirect:"):
+ m = re.match(r"redirect:\s*(\d{3})\s*to\b", lower_raw)
+ if m:
+ return f"{m.group(1)} redirect"
+
+ if lower.startswith("lighthouse:"):
+ return headline.split(":", 1)[-1].strip()
+
+ if lower.startswith("axe:"):
+ body = headline.split(":", 1)[-1].strip()
+ if len(body) > 90:
+ dot = body.find(". ")
+ if dot > 0:
+ body = body[: dot + 1]
+ else:
+ body = body[:87].rsplit(" ", 1)[0] + "…"
+ return body
+
+ if len(headline) > 100:
+ return headline[:97].rsplit(" ", 1)[0] + "…"
+
+ return headline
+
+
+_GENERIC_CWV_REC = "See Performance (Core Web Vitals) in this audit, or re-run Lighthouse from Run audit."
+
+
+def _normalize_recommendation(rec: Optional[str]) -> Optional[str]:
+ if not rec:
+ return None
+ if rec.strip() == _GENERIC_CWV_REC:
+ return "Review Lighthouse audit details for this page."
+ return rec.strip()
+
+
+def collapse_duplicate_issues(issues: list[PdfIssue]) -> list[PdfIssue]:
+ """Merge rows that share the same headline + recommendation into one card with URL list."""
+ buckets: dict[tuple[str, str], list[PdfIssue]] = {}
+ order: list[tuple[str, str]] = []
+ for iss in issues:
+ key = (iss.headline, iss.recommendation or "")
+ if key not in buckets:
+ order.append(key)
+ buckets[key] = []
+ buckets[key].append(iss)
+
+ collapsed: list[PdfIssue] = []
+ for key in order:
+ group = buckets[key]
+ first = group[0]
+ urls: list[str] = []
+ for item in group:
+ if item.url and item.url not in urls:
+ urls.append(item.url)
+ if len(urls) <= 1:
+ collapsed.append(first)
+ continue
+ headline = first.headline
+ if len(urls) > 1 and not headline.endswith(")"):
+ headline = f"{headline} ({len(urls)} URLs)"
+ collapsed.append(PdfIssue(
+ id=first.id,
+ priority=first.priority,
+ category=first.category,
+ headline=headline,
+ url=None,
+ path=first.path,
+ detail=first.detail,
+ recommendation=first.recommendation,
+ metrics=first.metrics,
+ tags=first.tags,
+ related_urls=urls,
+ ))
+ return collapsed
+
+
+def normalize_issue_for_pdf(
+ row: dict[str, Any],
+ include_recommendation: bool = True,
+) -> PdfIssue:
+ """Convert a raw issues_row dict → PdfIssue for print layout."""
+ priority = str(row.get("priority") or "").lower()
+ category = str(row.get("category") or "")
+ raw_message = str(row.get("message") or "").strip()
+ url = str(row.get("url") or "").strip()
+ recommendation = _normalize_recommendation(
+ str(row.get("recommendation") or "").strip() if include_recommendation else None
+ )
+
+ # Detect Lighthouse rows (audit-id only, no human label)
+ is_lh, audit_id = _is_lighthouse_row(raw_message, [])
+ if is_lh and audit_id:
+ headline = _lh_label(audit_id)
+ else:
+ headline = _strip_url_from_headline(raw_message, url)
+
+ headline = _shorten_headline(headline, raw_message, url)
+
+ tags: list[str] = []
+ lower_msg = raw_message.lower()
+ if "sitemap" in lower_msg:
+ tags.append("sitemap")
+ if is_lh or "lighthouse" in lower_msg:
+ tags.append("lighthouse")
+ if "axe" in lower_msg or "wcag" in lower_msg or "contrast" in lower_msg:
+ tags.append("axe")
+ if "redirect" in lower_msg:
+ tags.append("redirect")
+ if "canonical" in lower_msg:
+ tags.append("canonical")
+ if "security" in category.lower():
+ tags.append("security")
+
+ # Metrics from issue dict (ReportIssue fields)
+ gsc_clicks = row.get("gsc_clicks")
+ gsc_imp = row.get("gsc_impressions")
+ impact = row.get("impact_score")
+ lh_id = audit_id if is_lh else row.get("lh_audit_id")
+ metrics = None
+ if any(v is not None for v in (gsc_clicks, gsc_imp, impact, lh_id)):
+ metrics = PdfIssueMetrics(
+ gsc_clicks=int(gsc_clicks) if gsc_clicks is not None else None,
+ gsc_impressions=int(gsc_imp) if gsc_imp is not None else None,
+ impact_score=float(impact) if impact is not None else None,
+ lh_audit_id=str(lh_id) if lh_id else None,
+ )
+
+ return PdfIssue(
+ id=_issue_id(row),
+ priority=priority,
+ category=category,
+ headline=headline,
+ url=url or None,
+ path=_extract_path(url),
+ detail=None,
+ recommendation=recommendation or None,
+ metrics=metrics,
+ tags=tags,
+ )
+
+
+# ---------------------------------------------------------------------------
+# Grouping
+# ---------------------------------------------------------------------------
+
+_PRIORITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+_PRIORITY_LABELS = {
+ "critical": "Critical",
+ "high": "High",
+ "medium": "Medium",
+ "low": "Low",
+}
+
+# Above this count per priority, sub-group by category
+_SUBGROUP_THRESHOLD = 8
+
+# Always use stacked list layout — tables only for cover top-issues / URL inventory
+_COMPACT_TABLE_THRESHOLD = 999
+
+
+def group_issues_for_pdf(
+ issues: list[PdfIssue],
+ issues_per_group: int = 25,
+ issues_total: int = 120,
+) -> list[IssueGroupBlock]:
+ """Group PdfIssue list by priority → category, returning IssueGroupBlock list."""
+ # Sort and cap total
+ sorted_issues = sorted(issues, key=lambda i: (_PRIORITY_ORDER.get(i.priority, 9), i.category))
+ if len(sorted_issues) > issues_total:
+ sorted_issues = sorted_issues[:issues_total]
+
+ # Bucket by priority
+ by_priority: dict[str, list[PdfIssue]] = {}
+ for iss in sorted_issues:
+ by_priority.setdefault(iss.priority, []).append(iss)
+
+ groups: list[IssueGroupBlock] = []
+
+ for pri in ("critical", "high", "medium", "low"):
+ pri_issues = by_priority.get(pri, [])
+ if not pri_issues:
+ continue
+
+ pri_label = _PRIORITY_LABELS.get(pri, pri.title())
+ total_in_pri = len(pri_issues)
+
+ if total_in_pri <= _SUBGROUP_THRESHOLD:
+ # Single group for this priority
+ shown = collapse_duplicate_issues(pri_issues[:issues_per_group])
+ trunc = (
+ PdfTruncation(shown=len(shown), total=total_in_pri)
+ if total_in_pri > len(shown)
+ else None
+ )
+ render_as = "compact_table" if len(shown) >= _COMPACT_TABLE_THRESHOLD else "list"
+ groups.append(IssueGroupBlock(
+ id=f"findings.{pri}",
+ title=f"{pri_label} findings",
+ group_label=f"{pri_label} — {total_in_pri} issue{'s' if total_in_pri != 1 else ''}",
+ issues=shown,
+ render_as=render_as,
+ truncation=trunc,
+ ))
+ else:
+ # Sub-group by category
+ by_cat: dict[str, list[PdfIssue]] = {}
+ for iss in pri_issues:
+ by_cat.setdefault(iss.category, []).append(iss)
+
+ for cat, cat_issues in sorted(by_cat.items()):
+ cat_total = len(cat_issues)
+ shown = collapse_duplicate_issues(cat_issues[:issues_per_group])
+ trunc = (
+ PdfTruncation(shown=len(shown), total=cat_total)
+ if cat_total > len(shown)
+ else None
+ )
+ render_as = "compact_table" if len(shown) >= _COMPACT_TABLE_THRESHOLD else "list"
+ cat_id = cat.lower().replace(" ", "_").replace("&", "and")
+ groups.append(IssueGroupBlock(
+ id=f"findings.{pri}.{cat_id}",
+ title=f"{pri_label} — {cat}",
+ group_label=f"{pri_label} — {cat}: {cat_total} issue{'s' if cat_total != 1 else ''}",
+ issues=shown,
+ render_as=render_as,
+ truncation=trunc,
+ ))
+
+ return groups
diff --git a/src/website_profiling/reporting/pdf/options.py b/src/website_profiling/reporting/pdf/options.py
new file mode 100644
index 0000000..678a446
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/options.py
@@ -0,0 +1,43 @@
+"""PdfBuildOptions, PdfLimits, and document profiles."""
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Literal, Optional
+
+Profile = Literal["executive", "standard", "full"]
+
+# Sections for each profile; None in sections means "use profile default"
+_PROFILE_SECTIONS: dict[str, list[str]] = {
+ "executive": ["core"],
+ "standard": ["core", "findings", "appendix"],
+ "full": ["core", "findings", "lighthouse", "security", "traffic", "keywords",
+ "indexation", "content", "links", "appendix"],
+}
+
+
+@dataclass
+class PdfLimits:
+ issues_total: int = 120
+ issues_per_group: int = 25
+ top_issues_cover: int = 6
+ urls_sample: int = 20
+ metric_table_rows: int = 15
+ gsc_queries: int = 10
+ keyword_rows: int = 15
+ diagnostic_items: int = 20
+
+
+@dataclass
+class PdfBuildOptions:
+ profile: Profile = "standard"
+ sections: Optional[list[str]] = None # None → derive from profile
+ limits: PdfLimits = field(default_factory=PdfLimits)
+ include_appendix: bool = True
+ include_recommendations: bool = True
+ include_glossary: bool = True
+ report_id: Optional[int] = None
+
+ def effective_sections(self) -> list[str]:
+ if self.sections is not None:
+ return self.sections
+ return _PROFILE_SECTIONS.get(self.profile, _PROFILE_SECTIONS["standard"])
diff --git a/src/website_profiling/reporting/pdf/render/__init__.py b/src/website_profiling/reporting/pdf/render/__init__.py
new file mode 100644
index 0000000..fc2a79a
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/render/__init__.py
@@ -0,0 +1,7 @@
+"""PDF/HTML renderers."""
+from __future__ import annotations
+
+from .html import render_html_document
+from .reportlab import render_pdf_document
+
+__all__ = ["render_pdf_document", "render_html_document"]
diff --git a/src/website_profiling/reporting/pdf/render/html.py b/src/website_profiling/reporting/pdf/render/html.py
new file mode 100644
index 0000000..ee1eda8
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/render/html.py
@@ -0,0 +1,774 @@
+"""HTML renderer — converts PdfDocument → preview/print HTML matching the PDF layout."""
+from __future__ import annotations
+
+import html
+import re
+from typing import Any
+
+from ..document import (
+ CalloutBlock,
+ HeadingBlock,
+ IssueGroupBlock,
+ IssueTableBlock,
+ KeyValueBlock,
+ KpiRowBlock,
+ MarkdownBlock,
+ MetricTableBlock,
+ ParagraphBlock,
+ PdfCoverBlock,
+ PdfDocument,
+ PdfIssue,
+ PdfSection,
+ ScoreCardsBlock,
+ SpacerBlock,
+ StatGridBlock,
+ UrlListBlock,
+)
+from . import styles as S
+
+
+def html_styles() -> str:
+ """CSS shared by standard export preview HTML."""
+ return """
+ :root {
+ --ink: #0f172a;
+ --muted: #64748b;
+ --line: #e2e8f0;
+ --surface: #ffffff;
+ --surface-muted: #f8fafc;
+ --header-bg: #f1f5f9;
+ --brand-accent: #2563eb;
+ --good: #059669;
+ --good-bg: #ecfdf5;
+ --fair: #d97706;
+ --fair-bg: #fffbeb;
+ --poor: #dc2626;
+ --poor-bg: #fef2f2;
+ --critical-fg: #991b1b;
+ --critical-bg: #fee2e2;
+ --high-fg: #c2410c;
+ --high-bg: #ffedd5;
+ --medium-fg: #a16207;
+ --medium-bg: #fef3c7;
+ --low-fg: #475569;
+ --low-bg: #f1f5f9;
+ }
+ * { box-sizing: border-box; }
+ body {
+ margin: 0;
+ background: #eef2f7;
+ color: var(--ink);
+ font: 400 14px/1.45 "Segoe UI", system-ui, -apple-system, sans-serif;
+ }
+ .report {
+ max-width: 816px;
+ margin: 0 auto;
+ background: var(--surface);
+ box-shadow: 0 1px 3px rgba(15, 23, 42, 0.08);
+ }
+ .cover {
+ padding: 1.75rem 1.85rem 1.25rem;
+ background: var(--surface);
+ }
+ .cover-head {
+ display: flex;
+ justify-content: space-between;
+ align-items: flex-start;
+ gap: 1rem;
+ margin-bottom: 0.35rem;
+ }
+ .cover-head h1 {
+ margin: 0;
+ font-size: 1.35rem;
+ font-weight: 700;
+ line-height: 1.25;
+ }
+ .cover-subtitle {
+ margin: 0.25rem 0 0;
+ color: var(--muted);
+ font-size: 0.92rem;
+ }
+ .hero-score {
+ text-align: center;
+ min-width: 4.5rem;
+ }
+ .hero-score .score {
+ display: block;
+ font-size: 2rem;
+ font-weight: 700;
+ line-height: 1;
+ }
+ .hero-score .suffix {
+ display: block;
+ margin-top: 0.15rem;
+ font-size: 0.72rem;
+ color: var(--muted);
+ }
+ .hero-score.score-good .score { color: var(--good); }
+ .hero-score.score-fair .score { color: var(--fair); }
+ .hero-score.score-poor .score { color: var(--poor); }
+ .hero-score.score-na .score { color: var(--muted); }
+ .cover-meta-line {
+ margin: 0.5rem 0 1rem;
+ color: var(--muted);
+ font-size: 0.82rem;
+ }
+ .section-title {
+ margin: 1.1rem 0 0.35rem;
+ font-size: 0.82rem;
+ font-weight: 700;
+ color: var(--ink);
+ }
+ .section-rule {
+ border: none;
+ border-top: 1px solid var(--line);
+ margin: 0 0 0.65rem;
+ }
+ .section-lead {
+ margin: 0 0 0.65rem;
+ color: var(--muted);
+ font-size: 0.78rem;
+ }
+ .grid-table {
+ width: 100%;
+ border-collapse: collapse;
+ table-layout: fixed;
+ margin-bottom: 0.85rem;
+ font-size: 0.82rem;
+ }
+ .grid-table th,
+ .grid-table td {
+ border: 1px solid var(--line);
+ padding: 0.65rem 0.5rem;
+ text-align: center;
+ vertical-align: middle;
+ }
+ .stat-grid td.stat-critical { background: var(--critical-bg); color: var(--critical-fg); }
+ .stat-grid td.stat-high { background: var(--high-bg); color: var(--high-fg); }
+ .stat-grid td.stat-medium { background: var(--medium-bg); color: var(--medium-fg); }
+ .stat-grid td.stat-low { background: var(--low-bg); color: var(--low-fg); }
+ .stat-grid .stat-value {
+ display: block;
+ font-size: 1.15rem;
+ font-weight: 700;
+ line-height: 1.1;
+ }
+ .stat-grid .stat-label {
+ display: block;
+ margin-top: 0.2rem;
+ font-size: 0.72rem;
+ color: var(--muted);
+ }
+ .score-grid .score-value {
+ display: block;
+ font-size: 0.95rem;
+ font-weight: 700;
+ line-height: 1.1;
+ }
+ .score-grid .score-name {
+ display: block;
+ margin-top: 0.25rem;
+ font-size: 0.72rem;
+ font-weight: 600;
+ line-height: 1.25;
+ }
+ .score-grid .score-meta {
+ display: block;
+ margin-top: 0.15rem;
+ font-size: 0.68rem;
+ color: var(--muted);
+ }
+ .score-grid td { background: var(--surface-muted); }
+ .score-grid .score-good .score-value { color: var(--good); }
+ .score-grid .score-fair .score-value { color: var(--fair); }
+ .score-grid .score-poor .score-value { color: var(--poor); }
+ .score-grid .score-na .score-value { color: var(--muted); }
+ .exec-panel {
+ border: 1px solid var(--line);
+ border-left: 3px solid var(--brand-accent);
+ background: var(--surface-muted);
+ padding: 0.85rem 1rem;
+ margin-bottom: 1rem;
+ border-radius: 0 4px 4px 0;
+ }
+ .exec-source {
+ margin: 0 0 0.45rem;
+ font-size: 0.68rem;
+ font-weight: 700;
+ color: var(--brand-accent);
+ text-transform: uppercase;
+ letter-spacing: 0.04em;
+ }
+ .exec-body { margin: 0; font-size: 0.88rem; line-height: 1.5; }
+ .exec-subhead {
+ margin: 0.65rem 0 0.35rem;
+ font-size: 0.72rem;
+ font-weight: 700;
+ color: var(--muted);
+ }
+ .exec-priorities {
+ margin: 0;
+ padding-left: 1.1rem;
+ font-size: 0.82rem;
+ line-height: 1.45;
+ }
+ .data-table {
+ width: 100%;
+ border-collapse: collapse;
+ font-size: 0.82rem;
+ border: 1px solid var(--line);
+ margin-bottom: 0.85rem;
+ }
+ .data-table th,
+ .data-table td {
+ padding: 0.55rem 0.65rem;
+ text-align: left;
+ vertical-align: middle;
+ border-bottom: 1px solid var(--line);
+ }
+ .data-table thead th {
+ background: var(--header-bg);
+ font-size: 0.72rem;
+ font-weight: 700;
+ color: var(--muted);
+ }
+ .data-table tbody tr:nth-child(even) td { background: var(--surface-muted); }
+ .data-table tbody tr:last-child td { border-bottom: none; }
+ .data-table .col-status { text-align: center; width: 4.5rem; }
+ .data-table .col-priority { text-align: center; width: 5rem; }
+ .kv-audit th {
+ width: 23%;
+ font-weight: 700;
+ vertical-align: top;
+ }
+ .kv-glossary th {
+ width: 21%;
+ font-weight: 700;
+ vertical-align: top;
+ background: var(--header-bg);
+ }
+ .kv-glossary td { line-height: 1.45; }
+ .link { color: var(--brand-accent); word-break: break-all; }
+ .site-wide { color: var(--muted); font-style: italic; font-size: 0.78rem; }
+ .badge {
+ display: inline-block;
+ padding: 0.15rem 0.45rem;
+ border-radius: 3px;
+ font-size: 0.65rem;
+ font-weight: 700;
+ text-transform: uppercase;
+ letter-spacing: 0.03em;
+ border: 1px solid transparent;
+ }
+ .badge-critical { background: var(--critical-bg); color: var(--critical-fg); border-color: var(--critical-fg); }
+ .badge-high { background: var(--high-bg); color: var(--high-fg); border-color: var(--high-fg); }
+ .badge-medium { background: var(--medium-bg); color: var(--medium-fg); border-color: var(--medium-fg); }
+ .badge-low { background: var(--low-bg); color: var(--low-fg); border-color: var(--low-fg); }
+ .status-200 { background: var(--good-bg); color: var(--good); border-color: var(--good); }
+ .status-3xx { background: var(--fair-bg); color: var(--fair); border-color: var(--fair); }
+ .status-4xx, .status-5xx { background: var(--poor-bg); color: var(--poor); border-color: var(--poor); }
+ .status-other { background: var(--surface-muted); color: var(--muted); border-color: var(--line); }
+ .content { padding: 0 1.85rem 1.5rem; }
+ .doc-section { margin-bottom: 1.35rem; }
+ .doc-section > h2 {
+ margin: 0 0 0.35rem;
+ font-size: 0.82rem;
+ font-weight: 700;
+ }
+ .doc-section .source-label {
+ margin: 0 0 0.5rem;
+ font-size: 0.78rem;
+ color: var(--muted);
+ }
+ .group-label {
+ margin: 0.65rem 0 0.35rem;
+ font-size: 0.78rem;
+ font-weight: 700;
+ }
+ .issue-card {
+ border-left: 3px solid var(--line);
+ background: var(--surface-muted);
+ padding: 0.45rem 0.65rem;
+ margin-bottom: 0.45rem;
+ font-size: 0.82rem;
+ }
+ .issue-card.priority-critical { border-color: var(--critical-fg); background: var(--critical-bg); }
+ .issue-card.priority-high { border-color: var(--high-fg); background: var(--high-bg); }
+ .issue-card.priority-medium { border-color: var(--medium-fg); background: var(--medium-bg); }
+ .issue-card.priority-low { border-color: var(--low-fg); background: var(--low-bg); }
+ .issue-headline { margin: 0; font-weight: 700; line-height: 1.35; }
+ .issue-url {
+ margin: 0.2rem 0 0;
+ font-size: 0.76rem;
+ color: var(--brand-accent);
+ word-break: break-all;
+ }
+ .issue-rec {
+ margin: 0.25rem 0 0;
+ font-size: 0.76rem;
+ color: var(--muted);
+ font-style: italic;
+ }
+ .issue-url-list {
+ margin: 0.25rem 0 0;
+ padding-left: 1rem;
+ font-size: 0.76rem;
+ color: var(--brand-accent);
+ }
+ .muted-note {
+ margin: 0.35rem 0 0;
+ font-size: 0.76rem;
+ color: var(--muted);
+ }
+ .page-break {
+ break-before: page;
+ page-break-before: always;
+ height: 0;
+ margin: 0;
+ border-top: 1px dashed var(--line);
+ }
+ .report-footer {
+ border-top: 1px solid var(--line);
+ padding: 0.85rem 1.85rem 1.25rem;
+ color: var(--muted);
+ font-size: 0.72rem;
+ line-height: 1.45;
+ }
+ .content {
+ padding: 0 1.85rem 1.5rem;
+ }
+ .custom-section {
+ margin-bottom: 1.35rem;
+ }
+ .custom-section > h2 {
+ margin: 0 0 0.35rem;
+ font-size: 0.82rem;
+ font-weight: 700;
+ }
+ .callout {
+ border: 1px solid var(--line);
+ border-left: 3px solid var(--brand-accent);
+ background: var(--surface-muted);
+ padding: 0.85rem 1rem;
+ border-radius: 0 4px 4px 0;
+ margin: 0.5rem 0;
+ }
+ p.muted, .muted {
+ color: var(--muted);
+ font-size: 0.82rem;
+ margin: 0.35rem 0 0.65rem;
+ }
+ .url, td.url {
+ color: var(--brand-accent);
+ word-break: break-all;
+ font-size: 0.76rem;
+ }
+ table.data, .table-wrap table {
+ width: 100%;
+ border-collapse: collapse;
+ font-size: 0.82rem;
+ border: 1px solid var(--line);
+ margin: 0.5rem 0 0.85rem;
+ }
+ table.data th, table.data td,
+ .table-wrap table th, .table-wrap table td {
+ padding: 0.55rem 0.65rem;
+ text-align: left;
+ vertical-align: top;
+ border-bottom: 1px solid var(--line);
+ }
+ table.data thead th, .table-wrap table thead th {
+ background: var(--header-bg);
+ font-size: 0.72rem;
+ font-weight: 700;
+ color: var(--muted);
+ }
+ .category-cards {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 0.75rem;
+ margin: 0.65rem 0;
+ }
+ article.score-card {
+ flex: 1 1 140px;
+ max-width: 180px;
+ border: 1px solid var(--line);
+ border-radius: 4px;
+ padding: 0.75rem;
+ background: var(--surface-muted);
+ text-align: center;
+ }
+ article.score-card .score-value {
+ font-size: 1.1rem;
+ font-weight: 700;
+ }
+ article.score-card .score-name {
+ margin-top: 0.35rem;
+ font-size: 0.72rem;
+ font-weight: 600;
+ }
+ article.score-card .score-meta {
+ margin-top: 0.2rem;
+ font-size: 0.68rem;
+ color: var(--muted);
+ }
+ article.score-card.score-good .score-value { color: var(--good); }
+ article.score-card.score-fair .score-value { color: var(--fair); }
+ article.score-card.score-poor .score-value { color: var(--poor); }
+ article.score-card.score-na .score-value { color: var(--muted); }
+ .notes, .json-preview {
+ line-height: 1.5;
+ font-size: 0.82rem;
+ }
+ .json-preview {
+ overflow-x: auto;
+ background: var(--surface-muted);
+ padding: 0.75rem;
+ border: 1px solid var(--line);
+ border-radius: 4px;
+ }
+ @media print {
+ body { background: #fff; }
+ .report { max-width: none; box-shadow: none; }
+ .cover, .content, .report-footer { padding-left: 0.65in; padding-right: 0.65in; }
+ .page-break { border: none; }
+ }
+"""
+
+
+def _esc(text: Any) -> str:
+ return html.escape(str(text) if text is not None else "")
+
+
+def _priority_badge(priority: str) -> str:
+ key = priority.lower()
+ cls = f"badge badge-{key}" if key in {"critical", "high", "medium", "low"} else "badge badge-low"
+ return f'{_esc(priority)} '
+
+
+def _status_badge(code: str) -> str:
+ c = str(code or "").strip()
+ if c == "200":
+ cls = "badge status-200"
+ elif c.startswith("3"):
+ cls = "badge status-3xx"
+ elif c and c[0] in "45":
+ cls = "badge status-4xx" if c.startswith("4") else "badge status-5xx"
+ else:
+ cls = "badge status-other"
+ return f'{_esc(c or "—")} '
+
+
+def _issue_location(issue: PdfIssue) -> str:
+ if issue.path:
+ return f'{_esc(issue.path)} '
+ if issue.url:
+ return f'{_esc(issue.url)} '
+ return 'Site-wide '
+
+
+def _section_heading(title: str) -> str:
+ return f'{_esc(title)} '
+
+
+def _render_stat_grid(block: StatGridBlock) -> str:
+ if not block.chips:
+ return ""
+ cells = []
+ for chip in block.chips:
+ tone = chip.tone if chip.tone in {"critical", "high", "medium", "low"} else "low"
+ cells.append(
+ f''
+ f'{_esc(chip.value)} '
+ f'{_esc(chip.label)} '
+ f" "
+ )
+ while len(cells) < block.columns:
+ cells.append(" ")
+ return f''
+
+
+def _render_score_cards(block: ScoreCardsBlock) -> str:
+ if not block.cards:
+ return ""
+ cols = S.GRID_COLS
+ rows_html: list[str] = []
+ row: list[str] = []
+ for card in block.cards:
+ issue_label = f"{card.issue_count} issue{'s' if card.issue_count != 1 else ''}"
+ row.append(
+ f''
+ f'{_esc(card.score or "—")} '
+ f'{_esc(card.name)} '
+ f'{issue_label} '
+ f" "
+ )
+ if len(row) == cols:
+ rows_html.append(f"{''.join(row)} ")
+ row = []
+ if row:
+ while len(row) < cols:
+ row.append(" ")
+ rows_html.append(f"{''.join(row)} ")
+ return f''
+
+
+def _render_executive_panel(cover: PdfCoverBlock) -> str:
+ if not (cover.executive_summary or cover.priorities_list):
+ return ""
+ parts = ['']
+ if cover.executive_source:
+ parts.append(f'
Source · {_esc(cover.executive_source)}
')
+ if cover.executive_summary:
+ parts.append(f'
{_esc(cover.executive_summary)}
')
+ if cover.priorities_list:
+ parts.append('
Recommended priorities
')
+ parts.append('
')
+ for pri in cover.priorities_list[:6]:
+ parts.append(f"{_esc(pri)} ")
+ parts.append(" ")
+ parts.append("
")
+ return "".join(parts)
+
+
+def _render_top_issues(issues: list[PdfIssue]) -> str:
+ if not issues:
+ return ""
+ rows = "".join(
+ f""
+ f'{_priority_badge(iss.priority)} '
+ f"{_esc(iss.headline)} "
+ f"{_issue_location(iss)} "
+ f" "
+ for iss in issues
+ )
+ return (
+ f"{_section_heading('Top traffic-impacting issues')}"
+ f'Ranked by severity and traffic impact — address critical and high items first.
'
+ f''
+ f"Priority Issue Location "
+ f"{rows}
"
+ )
+
+
+def _render_cover(cover: PdfCoverBlock, meta) -> str:
+ counts = meta.issue_counts
+ total = sum(counts.values())
+ meta_line = (
+ f"Report generated {meta.generated_at} · {total} findings "
+ f"(Critical {counts.get('critical', 0)}, High {counts.get('high', 0)}, "
+ f"Medium {counts.get('medium', 0)}, Low {counts.get('low', 0)})"
+ )
+ hero = cover.hero
+ exec_html = ""
+ if cover.executive_summary or cover.priorities_list:
+ exec_html = _section_heading("Executive summary") + _render_executive_panel(cover)
+ top_html = _render_top_issues(cover.top_issues)
+
+ cat_html = ""
+ if cover.category_scores.cards:
+ cat_html = _section_heading("Category scores") + _render_score_cards(cover.category_scores)
+
+ return f"""
+ """
+
+
+def _render_issue(issue: PdfIssue) -> str:
+ pri = issue.priority.lower()
+ cls = f"issue-card priority-{pri}" if pri in {"critical", "high", "medium", "low"} else "issue-card"
+ parts = [f'', f'
{_esc(issue.headline)}
']
+ if issue.related_urls:
+ items = "".join(f"
{_esc(u)} " for u in issue.related_urls[:10])
+ extra = len(issue.related_urls) - 10
+ if extra > 0:
+ items += f'
… and {extra} more (see CSV export) '
+ parts.append(f'
')
+ elif issue.url:
+ parts.append(f'
{_esc(issue.url)}
')
+ if issue.recommendation:
+ parts.append(f'
Fix: {_esc(issue.recommendation)}
')
+ parts.append("
")
+ return "".join(parts)
+
+
+def _render_issue_group(block: IssueGroupBlock) -> str:
+ parts = [f'{_esc(block.group_label)}
']
+ if block.render_as == "compact_table":
+ rows = "".join(
+ f"{_esc(iss.headline)} "
+ f'{_esc(iss.url or "")} '
+ for iss in block.issues
+ )
+ parts.append(
+ f'"
+ )
+ else:
+ for iss in block.issues:
+ parts.append(_render_issue(iss))
+ if block.truncation:
+ t = block.truncation
+ parts.append(
+ f'Showing {t.shown} of {t.total}. '
+ f"Full list in {', '.join(t.continue_in)}.
"
+ )
+ return "".join(parts)
+
+
+def _render_key_value(block: KeyValueBlock) -> str:
+ if not block.rows:
+ return ""
+ layout = getattr(block, "layout", "default") or "default"
+ if layout == "audit":
+ table_cls = "data-table kv-audit"
+ elif layout == "glossary":
+ table_cls = "data-table kv-glossary"
+ else:
+ table_cls = "data-table kv-audit"
+ rows = "".join(
+ f"{_esc(k)} {_esc(v)} " for k, v in block.rows
+ )
+ return f''
+
+
+def _render_url_list(block: UrlListBlock) -> str:
+ if not block.rows:
+ return ""
+ show_title = getattr(block, "show_title", True)
+ head = "URL Status "
+ if show_title:
+ head += "Title "
+ body_rows: list[str] = []
+ for row in block.rows:
+ url = str(row.get("url") or "")
+ status = str(row.get("status") or "")
+ cells = (
+ f'{_esc(url)} '
+ f'{_status_badge(status)} '
+ )
+ if show_title:
+ title = str(row.get("title") or "").strip()
+ title_cell = _esc(title) if title else '— '
+ cells += f"{title_cell} "
+ body_rows.append(f"{cells} ")
+ note = ""
+ if block.truncation:
+ t = block.truncation
+ note = (
+ f'Showing {t.shown} of {t.total} URLs. '
+ f"Export CSV/workbook for full inventory.
"
+ )
+ return (
+ f'{head} '
+ f'{"".join(body_rows)}
{note}'
+ )
+
+
+def _render_block(block: Any) -> str:
+ if not getattr(block, "visible", True):
+ return ""
+ btype = getattr(block, "type", None)
+ if btype == "issue_group":
+ return _render_issue_group(block)
+ if btype == "key_value":
+ return _render_key_value(block)
+ if btype == "url_list":
+ return _render_url_list(block)
+ if btype == "issue_table":
+ rows = "".join(
+ f"{_esc(iss.headline)} {_esc(iss.url or '')} "
+ for iss in block.issues
+ )
+ title = f"{_esc(block.title)}
" if block.title else ""
+ return (
+ f"{title}"
+ )
+ if btype == "paragraph":
+ return f"{_esc(block.text)}
"
+ if btype == "heading":
+ tag = "h3" if block.level >= 3 else "h2"
+ return f"<{tag}>{_esc(block.text)}{tag}>"
+ if btype == "callout":
+ return f''
+ if btype == "markdown":
+ text = re.sub(r"<[^>]+>", " ", block.text)
+ return f"{_esc(text)}
"
+ if btype == "metric_table":
+ cols = block.columns
+ if not cols:
+ return ""
+ head = "".join(f"{_esc(c.label)} " for c in cols)
+ body = ""
+ for row in block.rows:
+ body += "" + "".join(
+ f'{_esc(row.get(c.key, ""))} ' for c in cols
+ ) + " "
+ return f''
+ if btype in {"spacer", "kpi_row", "stat_grid", "score_cards"}:
+ return ""
+ return ""
+
+
+def _render_section(section: PdfSection) -> str:
+ parts = [f'']
+ parts.append(f"{_esc(section.title)} ")
+ if section.source_label:
+ parts.append(f'Source: {_esc(section.source_label)}
')
+ for block in section.blocks:
+ parts.append(_render_block(block))
+ if section.truncation:
+ t = section.truncation
+ parts.append(
+ f'Showing {t.shown} of {t.total} issues. '
+ f"Export CSV or workbook for full data.
"
+ )
+ parts.append(" ")
+ return "".join(parts)
+
+
+def render_html_document(doc: PdfDocument) -> str:
+ """Render a PdfDocument as HTML matching the PDF export layout."""
+ cover_html = _render_cover(doc.cover, doc.meta)
+ sections_html = "".join(_render_section(s) for s in doc.sections)
+ footer = doc.footer
+ footer_text = (
+ f"{footer.confidential_note} "
+ f"Generated by {footer.generator} · {footer.exported_at}"
+ )
+ title = _esc(doc.cover.headline)
+ return f"""
+
+
+
+
+{title}
+
+
+
+
+{cover_html}
+
+
+{sections_html}
+
+
+
+
+"""
diff --git a/src/website_profiling/reporting/pdf/render/reportlab.py b/src/website_profiling/reporting/pdf/render/reportlab.py
new file mode 100644
index 0000000..e20ae69
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/render/reportlab.py
@@ -0,0 +1,944 @@
+"""ReportLab renderer — converts PdfDocument → PDF bytes.
+
+Layout rules:
+- Every table cell is wrapped in Paragraph (prevents column bleed/overflow).
+- Findings are rendered as stacked item blocks (issue_group), not 4-col tables.
+- LongTable + repeatRows=1 for metric/url tables.
+- Page numbers via onFirstPage / onLaterPages callbacks.
+"""
+from __future__ import annotations
+
+import html
+import io
+from typing import Any
+
+from ..document import (
+ CalloutBlock,
+ HeadingBlock,
+ IssueGroupBlock,
+ IssueTableBlock,
+ KeyValueBlock,
+ KpiRowBlock,
+ MarkdownBlock,
+ MetricTableBlock,
+ ParagraphBlock,
+ PdfCoverBlock,
+ PdfDocument,
+ PdfIssue,
+ PdfMeta,
+ PdfSection,
+ ScoreCardsBlock,
+ SpacerBlock,
+ StatGridBlock,
+ UrlListBlock,
+)
+from . import styles as S
+
+
+def _content_w_in() -> float:
+ return S.CONTENT_WIDTH_IN
+
+
+def _col_w_in(cols: int) -> float:
+ return _content_w_in() / cols
+
+
+def _content_w_pt() -> float:
+ from reportlab.lib.units import inch
+ return _content_w_in() * inch
+
+
+def _grid_table_style() -> Any:
+ from reportlab.platypus import TableStyle
+ style = TableStyle([
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ("INNERGRID", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("TOPPADDING", (0, 0), (-1, -1), 10),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 10),
+ ("LEFTPADDING", (0, 0), (-1, -1), 6),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 6),
+ ])
+ return style
+
+
+def _require_reportlab() -> None:
+ try:
+ from reportlab.lib import colors # noqa: F401
+ except ImportError as exc:
+ raise RuntimeError("PDF export requires reportlab (pip install reportlab)") from exc
+
+
+# ---------------------------------------------------------------------------
+# ReportLab helpers
+# ---------------------------------------------------------------------------
+
+def _rl_colors():
+ from reportlab.lib import colors
+ return colors
+
+
+def _hex(color_str: str):
+ return _rl_colors().HexColor(color_str)
+
+
+def _make_styles():
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+ base = getSampleStyleSheet()
+
+ def ps(name: str, parent_name: str = "Normal", **kwargs) -> ParagraphStyle:
+ return ParagraphStyle(name, parent=base[parent_name], **kwargs)
+
+ return {
+ "title": ps("ATitle", "Heading1", fontSize=20, textColor=_hex(S.INK),
+ spaceAfter=2, leading=24, spaceBefore=0),
+ "subtitle": ps("ASubtitle", fontSize=11, textColor=_hex(S.MUTED), spaceAfter=4, leading=14),
+ "section": ps("ASection", "Heading2", fontSize=11, textColor=_hex(S.INK),
+ spaceBefore=12, spaceAfter=4, borderPad=0),
+ "subsection": ps("ASubsection", "Heading3", fontSize=10, textColor=_hex(S.INK),
+ spaceBefore=6, spaceAfter=4),
+ "body": ps("ABody", fontSize=9, leading=13, textColor=_hex(S.INK)),
+ "body_italic": ps("ABodyI", fontSize=9, leading=13, textColor=_hex(S.MUTED), italic=True),
+ "muted": ps("AMuted", fontSize=8, leading=11, textColor=_hex(S.MUTED)),
+ "url": ps("AUrl", fontName="Courier", fontSize=8, leading=10,
+ textColor=_hex(S.BRAND_ACCENT), wordWrap="CJK"),
+ "kv_key": ps("AKvKey", fontSize=9, leading=12, textColor=_hex(S.INK), fontName="Helvetica-Bold"),
+ "kv_val": ps("AKvVal", fontSize=9, leading=12, textColor=_hex(S.INK)),
+ "th": ps("ATh", fontSize=8, leading=10, textColor=_hex(S.MUTED), fontName="Helvetica-Bold"),
+ "td": ps("ATd", fontSize=9, leading=12, textColor=_hex(S.INK)),
+ "td_url": ps("ATdUrl", fontName="Courier", fontSize=8, leading=10,
+ textColor=_hex(S.BRAND_ACCENT), wordWrap="CJK"),
+ "td_link": ps("ATdLink", fontSize=8, leading=11, textColor=_hex(S.BRAND_ACCENT), wordWrap="CJK"),
+ "kv_desc": ps("AKvDesc", fontSize=9, leading=13, textColor=_hex(S.INK)),
+ "cover_title": ps("ACoverTitle", fontSize=22, textColor=_hex("#f8fafc"),
+ spaceAfter=4, leading=28, fontName="Helvetica-Bold"),
+ "cover_sub": ps("ACoverSub", fontSize=11, textColor=_hex("#cbd5e1"), spaceAfter=2),
+ "hero_score": ps("AHeroScore", fontSize=28, leading=32, fontName="Helvetica-Bold"),
+ "hero_suffix": ps("AHeroSuffix", fontSize=10, textColor=_hex(S.MUTED), alignment=2),
+ "score_value": ps("AScoreVal", fontSize=15, leading=18, fontName="Helvetica-Bold", alignment=1),
+ "score_name": ps("AScoreName", fontSize=8, leading=11, alignment=1, spaceAfter=2),
+ "score_meta": ps("AScoreMeta", fontSize=7, leading=9, textColor=_hex(S.MUTED), alignment=1),
+ "stat_value": ps("AStatVal", fontSize=18, leading=20, fontName="Helvetica-Bold", alignment=1),
+ "stat_label": ps("AStatLabel", fontSize=8, leading=10, textColor=_hex(S.MUTED), alignment=1),
+ "cover_meta": ps("ACoverMetaLine", fontSize=9, textColor=_hex(S.MUTED), spaceAfter=10, leading=12),
+ "badge": ps("ABadge", fontSize=8, leading=10, fontName="Helvetica-Bold"),
+ "footer": ps("AFooter", fontSize=7, textColor=_hex(S.MUTED), leading=9),
+ "issue_headline": ps("AIssHeadline", fontSize=9, leading=12,
+ textColor=_hex(S.INK), fontName="Helvetica-Bold"),
+ "issue_rec": ps("AIssRec", fontSize=8, leading=11, textColor=_hex(S.MUTED), italic=True),
+ "callout_info": ps("ACalloutInfo", fontSize=9, leading=12,
+ textColor=_hex(S.BRAND_ACCENT), leftIndent=8),
+ "callout_warn": ps("ACalloutWarn", fontSize=9, leading=12,
+ textColor=_hex(S.FAIR), leftIndent=8),
+ "callout_critical": ps("ACalloutCrit", fontSize=9, leading=12,
+ textColor=_hex(S.CRITICAL_FG), leftIndent=8),
+ "exec_body": ps("AExecBody", fontSize=10, leading=15, textColor=_hex(S.INK), spaceAfter=4),
+ "exec_subhead": ps("AExecSub", fontSize=8, leading=11, textColor=_hex(S.MUTED),
+ fontName="Helvetica-Bold", spaceBefore=6, spaceAfter=3),
+ "exec_bullet": ps("AExecBullet", fontSize=9, leading=13, textColor=_hex(S.INK), leftIndent=10),
+ "exec_source": ps("AExecSource", fontSize=7, leading=9, textColor=_hex(S.BRAND_ACCENT),
+ fontName="Helvetica-Bold", spaceAfter=4),
+ "section_lead": ps("ASectionLead", fontSize=8, leading=11, textColor=_hex(S.MUTED), spaceAfter=6),
+ "td_site": ps("ATdSite", fontSize=8, leading=10, textColor=_hex(S.MUTED), italic=True),
+ }
+
+
+def _p(text: str, style) -> Any:
+ """Plain-text paragraph — content is HTML-escaped."""
+ from reportlab.platypus import Paragraph
+ return Paragraph(html.escape(str(text)), style)
+
+
+def _p_html(markup: str, style) -> Any:
+ """Markup paragraph — caller must escape user content before embedding tags."""
+ from reportlab.platypus import Paragraph
+ return Paragraph(str(markup), style)
+
+
+def _safe_p(text: str, style, fallback: str = "—") -> Any:
+ return _p(text if text else fallback, style)
+
+
+def _table_style_base():
+ from reportlab.platypus import TableStyle
+ return TableStyle([
+ ("BACKGROUND", (0, 0), (-1, 0), _hex(S.HEADER_BG)),
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
+ ("GRID", (0, 0), (-1, -1), 0.3, _hex(S.BORDER)),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+ ("LEFTPADDING", (0, 0), (-1, -1), 5),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 5),
+ ])
+
+
+def _page_callback(canvas, doc, footer_text: str) -> None:
+ from reportlab.lib.units import inch
+ page_w, _ = doc.pagesize
+ canvas.saveState()
+ canvas.setFont("Helvetica", 7)
+ canvas.setFillColor(_hex(S.MUTED))
+ canvas.drawString(0.55 * inch, 0.35 * inch, footer_text)
+ page_num = f"Page {doc.page}"
+ canvas.drawRightString(page_w - 0.55 * inch, 0.35 * inch, page_num)
+ canvas.restoreState()
+
+
+# ---------------------------------------------------------------------------
+# Block renderers — each returns a list of flowables
+# ---------------------------------------------------------------------------
+
+def _render_heading(block: HeadingBlock, st: dict) -> list:
+ from reportlab.platypus import Spacer
+ style = st["section"] if block.level == 2 else st["subsection"]
+ return [_p(block.text, style), Spacer(1, 2)]
+
+
+def _render_paragraph(block: ParagraphBlock, st: dict) -> list:
+ style = st["body_italic"] if block.italic else st["body"]
+ return [_p(block.text, style)]
+
+
+def _render_callout(block: CalloutBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table, TableStyle
+ style_map = {"info": st["callout_info"], "warn": st["callout_warn"], "critical": st["callout_critical"]}
+ bg_map = {"info": "#eff6ff", "warn": S.FAIR_BG, "critical": S.CRITICAL_BG}
+ s = style_map.get(block.severity, st["body"])
+ bg = bg_map.get(block.severity, "#eff6ff")
+ cell = [[_p(block.text, s)]]
+ tbl = Table(cell, colWidths=[_content_w_in() * inch])
+ tbl.setStyle(TableStyle([
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(bg)),
+ ("LEFTPADDING", (0, 0), (-1, -1), 10),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 8),
+ ("TOPPADDING", (0, 0), (-1, -1), 6),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 6),
+ ("BOX", (0, 0), (-1, -1), 2, _hex(S.BRAND_ACCENT)),
+ ]))
+ return [tbl, Spacer(1, 4)]
+
+
+def _render_spacer(block: SpacerBlock, _st: dict) -> list:
+ from reportlab.platypus import Spacer
+ return [Spacer(1, block.height_pt)]
+
+
+def _render_kpi_row(block: KpiRowBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table, TableStyle
+ if not block.items:
+ return []
+ n = len(block.items)
+ w = _col_w_in(n)
+ row_data = [[_p_html(f"{html.escape(i.value)} {html.escape(i.label)} ", st["body"]) for i in block.items]]
+ tbl = Table(row_data, colWidths=[w * inch] * n)
+ tbl.setStyle(TableStyle([
+ ("BOX", (0, 0), (-1, -1), 0.3, _hex(S.BORDER)),
+ ("INNERGRID", (0, 0), (-1, -1), 0.3, _hex(S.BORDER)),
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("TOPPADDING", (0, 0), (-1, -1), 6),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 6),
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(S.SURFACE_MUTED)),
+ ]))
+ return [tbl, Spacer(1, 8)]
+
+
+def _render_stat_grid(block: StatGridBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table
+ if not block.chips:
+ return []
+ n = block.columns
+ col_w = _col_w_in(n)
+ row: list = []
+ for chip in block.chips:
+ fg, _bg = S.PRIORITY_TONES.get(chip.tone, (S.INK, S.SURFACE_MUTED))
+ val_style = ParagraphStyle_compat(st["stat_value"], textColor=_hex(fg))
+ row.append(_cell_stack([(chip.value, val_style), (chip.label, st["stat_label"])], col_w))
+ while len(row) < n:
+ row.append("")
+ tbl = Table([row], colWidths=[col_w * inch] * n, rowHeights=[0.62 * inch])
+ ts = _grid_table_style()
+ for i, chip in enumerate(block.chips):
+ _fg, bg = S.PRIORITY_TONES.get(chip.tone, (S.INK, S.SURFACE_MUTED))
+ ts.add("BACKGROUND", (i, 0), (i, 0), _hex(bg))
+ tbl.setStyle(ts)
+ return [tbl, Spacer(1, 12)]
+
+
+def ParagraphStyle_compat(base_style, **overrides):
+ """Clone a ParagraphStyle with attribute overrides."""
+ from reportlab.lib.styles import ParagraphStyle
+ return ParagraphStyle(
+ f"{base_style.name}_override",
+ parent=base_style,
+ **overrides,
+ )
+
+
+def _section_heading(text: str, st: dict) -> list:
+ from reportlab.platypus import HRFlowable, Spacer
+ return [
+ _p(text, st["section"]),
+ HRFlowable(
+ width=_content_w_pt(),
+ thickness=0.5,
+ color=_hex(S.BORDER),
+ spaceBefore=0,
+ spaceAfter=8,
+ ),
+ ]
+
+
+def _cell_stack(rows: list[tuple[str, Any]], col_w_in: float):
+ """Borderless vertically stacked paragraphs for a grid cell."""
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Table, TableStyle
+ data = [[_p(text, style)] for text, style in rows]
+ tbl = Table(data, colWidths=[col_w_in * inch])
+ tbl.setStyle(TableStyle([
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("LEFTPADDING", (0, 0), (-1, -1), 2),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 2),
+ ("TOPPADDING", (0, 0), (-1, -1), 0),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 0),
+ ]))
+ return tbl
+
+
+def _data_table_style() -> Any:
+ from reportlab.platypus import TableStyle
+ return TableStyle([
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ("LINEBELOW", (0, 0), (-1, -2), 0.35, _hex(S.BORDER)),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ("TOPPADDING", (0, 0), (-1, -1), 9),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 9),
+ ("LEFTPADDING", (0, 0), (-1, -1), 10),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 10),
+ ])
+
+
+def _apply_row_zebra(ts: Any, row_count: int, start_row: int = 0) -> None:
+ for r in range(start_row, start_row + row_count):
+ bg = S.SURFACE_MUTED if (r - start_row) % 2 else "#ffffff"
+ ts.add("BACKGROUND", (0, r), (-1, r), _hex(bg))
+
+
+def _http_status_badge(code: str, st: dict) -> Any:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Table, TableStyle
+ c = str(code or "").strip()
+ if c == "200":
+ fg, bg = S.GOOD, S.GOOD_BG
+ elif c.startswith("3"):
+ fg, bg = S.FAIR, S.FAIR_BG
+ elif c and c[0] in "45":
+ fg, bg = S.POOR, S.POOR_BG
+ else:
+ fg, bg = S.MUTED, S.SURFACE_MUTED
+ badge_style = ParagraphStyle_compat(st["badge"], textColor=_hex(fg), fontSize=8)
+ label = c or "—"
+ tbl = Table([[ _p(label, badge_style) ]], colWidths=[0.52 * inch])
+ tbl.setStyle(TableStyle([
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(bg)),
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("TOPPADDING", (0, 0), (-1, -1), 3),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 3),
+ ("BOX", (0, 0), (-1, -1), 0.4, _hex(fg)),
+ ]))
+ return tbl
+
+
+def _render_key_value(block: KeyValueBlock, st: dict) -> list:
+ layout = getattr(block, "layout", "default") or "default"
+ if layout == "audit":
+ return _render_audit_kv(block, st)
+ if layout == "glossary":
+ return _render_glossary_kv(block, st)
+ return _render_default_kv(block, st)
+
+
+def _render_default_kv(block: KeyValueBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+ if not block.rows:
+ return []
+ data = [[_p(k, st["kv_key"]), _p(v, st["kv_val"])] for k, v in block.rows]
+ kv_key_w = _content_w_in() * 0.30
+ kv_val_w = _content_w_in() - kv_key_w
+ tbl = LongTable(data, colWidths=[kv_key_w * inch, kv_val_w * inch], repeatRows=0)
+ ts = _table_style_base()
+ from reportlab.platypus import TableStyle
+ ts.add("BACKGROUND", (0, 0), (-1, -1), _hex(S.SURFACE_MUTED))
+ ts.add("BACKGROUND", (0, 0), (0, -1), _hex(S.HEADER_BG))
+ ts.add("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold")
+ tbl.setStyle(ts)
+ return [tbl, Spacer(1, 6)]
+
+
+def _render_audit_kv(block: KeyValueBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+ if not block.rows:
+ return []
+ kv_key_w = 1.65
+ kv_val_w = _content_w_in() - kv_key_w
+ data = [[_p(k, st["kv_key"]), _p(v, st["kv_val"])] for k, v in block.rows]
+ tbl = LongTable(data, colWidths=[kv_key_w * inch, kv_val_w * inch], repeatRows=0)
+ ts = _data_table_style()
+ _apply_row_zebra(ts, len(block.rows))
+ tbl.setStyle(ts)
+ return [tbl, Spacer(1, 10)]
+
+
+def _render_glossary_kv(block: KeyValueBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+ if not block.rows:
+ return []
+ term_w = 1.55
+ desc_w = _content_w_in() - term_w
+ data = [[_p(k, st["kv_key"]), _p(v, st["kv_desc"])] for k, v in block.rows]
+ tbl = LongTable(data, colWidths=[term_w * inch, desc_w * inch], repeatRows=0)
+ ts = _data_table_style()
+ for r in range(len(block.rows)):
+ ts.add("BACKGROUND", (0, r), (0, r), _hex(S.HEADER_BG))
+ val_bg = "#ffffff" if r % 2 == 0 else S.SURFACE_MUTED
+ ts.add("BACKGROUND", (1, r), (1, r), _hex(val_bg))
+ tbl.setStyle(ts)
+ return [tbl, Spacer(1, 10)]
+
+
+def _render_score_cards(block: ScoreCardsBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table
+ if not block.cards:
+ return []
+ cols = S.GRID_COLS
+ col_w = _col_w_in(cols)
+ grid_rows: list[list] = []
+ row: list = []
+ for card in block.cards:
+ score_color = S.SCORE_TONES.get(card.tone, S.MUTED)
+ val_style = ParagraphStyle_compat(st["score_value"], textColor=_hex(score_color))
+ issue_label = f"{card.issue_count} issue{'s' if card.issue_count != 1 else ''}"
+ row.append(_cell_stack([
+ (card.score or "—", val_style),
+ (card.name, st["score_name"]),
+ (issue_label, st["score_meta"]),
+ ], col_w))
+ if len(row) == cols:
+ grid_rows.append(row)
+ row = []
+ if row:
+ while len(row) < cols:
+ row.append("")
+ grid_rows.append(row)
+ tbl = Table(grid_rows, colWidths=[col_w * inch] * cols, rowHeights=[0.78 * inch] * len(grid_rows))
+ ts = _grid_table_style()
+ for r_idx, grid_row in enumerate(grid_rows):
+ for c_idx in range(cols):
+ if c_idx < len(grid_row) and grid_row[c_idx] != "":
+ ts.add("BACKGROUND", (c_idx, r_idx), (c_idx, r_idx), _hex(S.SURFACE_MUTED))
+ tbl.setStyle(ts)
+ return [tbl, Spacer(1, 12)]
+
+
+def _url_list_table_style(col_count: int) -> Any:
+ from reportlab.platypus import TableStyle
+ ts = TableStyle([
+ ("BACKGROUND", (0, 0), (-1, 0), _hex(S.HEADER_BG)),
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
+ ("TEXTCOLOR", (0, 0), (-1, 0), _hex(S.MUTED)),
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ("LINEBELOW", (0, 0), (-1, 0), 0.8, _hex(S.BORDER)),
+ ("LINEBELOW", (0, 1), (-1, -1), 0.35, _hex(S.BORDER)),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("TOPPADDING", (0, 0), (-1, -1), 8),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 8),
+ ("LEFTPADDING", (0, 0), (-1, -1), 10),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 10),
+ ])
+ if col_count >= 2:
+ ts.add("ALIGN", (1, 0), (1, -1), "CENTER")
+ return ts
+
+
+def _render_url_list(block: UrlListBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+ if not block.rows:
+ return []
+
+ show_title = getattr(block, "show_title", True)
+ if show_title:
+ header = [_p("URL", st["th"]), _p("Status", st["th"]), _p("Title", st["th"])]
+ status_w = 0.72
+ title_w = 1.85
+ url_w = _content_w_in() - status_w - title_w
+ col_widths = [url_w * inch, status_w * inch, title_w * inch]
+ else:
+ header = [_p("URL", st["th"]), _p("Status", st["th"])]
+ status_w = 0.72
+ url_w = _content_w_in() - status_w
+ col_widths = [url_w * inch, status_w * inch]
+
+ data: list = [header]
+ for r in block.rows:
+ url_cell = _safe_p(r.get("url", ""), st["td_link"])
+ status_cell = _http_status_badge(str(r.get("status", "")), st)
+ if show_title:
+ title = str(r.get("title") or "").strip()
+ data.append([url_cell, status_cell, _p(title, st["td"]) if title else _p("—", st["td_site"])])
+ else:
+ data.append([url_cell, status_cell])
+
+ tbl = LongTable(data, colWidths=col_widths, repeatRows=1)
+ ts = _url_list_table_style(len(col_widths))
+ # Zebra only data rows (skip header)
+ for r in range(1, len(data)):
+ bg = S.SURFACE_MUTED if (r - 1) % 2 else "#ffffff"
+ ts.add("BACKGROUND", (0, r), (-1, r), _hex(bg))
+ tbl.setStyle(ts)
+
+ parts: list = [tbl]
+ if block.truncation:
+ t = block.truncation
+ note = f"Showing {t.shown} of {t.total} URLs. Export CSV/workbook for full inventory."
+ parts.append(Spacer(1, 4))
+ parts.append(_p(note, st["muted"]))
+ parts.append(Spacer(1, 10))
+ return parts
+
+
+def _render_metric_table(block: MetricTableBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+
+ if not block.columns or not block.rows:
+ return []
+
+ _width_map = {"narrow": 0.75, "medium": 1.5, "wide": 2.5, "url": 2.0}
+ total_cols = len(block.columns)
+ available = _content_w_in()
+ col_widths = [_width_map.get(c.width, 1.5) * inch for c in block.columns]
+ # Scale to available width
+ total_specified = sum(col_widths)
+ if total_specified > available * inch:
+ scale = (available * inch) / total_specified
+ col_widths = [w * scale for w in col_widths]
+
+ header = [_p(c.label, st["th"]) for c in block.columns]
+ data: list = [header]
+ for r in block.rows:
+ cell_style = lambda col: st["td_url"] if col.width == "url" else st["td"]
+ data.append([_safe_p(str(r.get(c.key, "")), cell_style(c)) for c in block.columns])
+
+ tbl = LongTable(data, colWidths=col_widths, repeatRows=1 if block.repeat_header else 0)
+ tbl.setStyle(_table_style_base())
+ parts: list = [tbl]
+ if block.truncation:
+ t = block.truncation
+ note = f"Showing {t.shown} of {t.total} rows. Full data in {', '.join(t.continue_in)}."
+ parts.append(Spacer(1, 3))
+ parts.append(_p(note, st["muted"]))
+ parts.append(Spacer(1, 8))
+ return parts
+
+
+def _priority_badge(priority: str, st: dict) -> Any:
+ fg, bg = S.PRIORITY_TONES.get(priority, (S.INK, S.SURFACE_MUTED))
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Table, TableStyle
+ badge_style = ParagraphStyle_compat(st["badge"], textColor=_hex(fg), fontSize=7)
+ cell = [[_p(priority.upper(), badge_style)]]
+ tbl = Table(cell, colWidths=[0.62 * inch])
+ tbl.setStyle(TableStyle([
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(bg)),
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("TOPPADDING", (0, 0), (-1, -1), 3),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 3),
+ ("LEFTPADDING", (0, 0), (-1, -1), 2),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 2),
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(fg)),
+ ]))
+ return tbl
+
+
+def _issue_location_cell(issue: PdfIssue, st: dict) -> Any:
+ if issue.path:
+ return _p(issue.path, st["td_url"])
+ if issue.url:
+ return _p(issue.url, st["td_url"])
+ return _p("Site-wide", st["td_site"])
+
+
+def _top_issues_table_style():
+ from reportlab.platypus import TableStyle
+ return TableStyle([
+ ("BACKGROUND", (0, 0), (-1, 0), _hex(S.HEADER_BG)),
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
+ ("TEXTCOLOR", (0, 0), (-1, 0), _hex(S.MUTED)),
+ ("LINEBELOW", (0, 0), (-1, 0), 0.8, _hex(S.BORDER)),
+ ("LINEBELOW", (0, 1), (-1, -1), 0.35, _hex(S.BORDER)),
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [_hex("#ffffff"), _hex(S.SURFACE_MUTED)]),
+ ("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
+ ("ALIGN", (0, 0), (0, -1), "CENTER"),
+ ("TOPPADDING", (0, 0), (-1, -1), 8),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 8),
+ ("LEFTPADDING", (0, 0), (-1, -1), 8),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 8),
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ])
+
+
+def _render_executive_panel(cover: PdfCoverBlock, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table, TableStyle
+
+ rows: list[list] = []
+ if cover.executive_source:
+ rows.append([_p(f"Source · {cover.executive_source}", st["exec_source"])])
+ if cover.executive_summary:
+ rows.append([_p(cover.executive_summary, st["exec_body"])])
+ if cover.priorities_list:
+ rows.append([_p("Recommended priorities", st["exec_subhead"])])
+ for i, pri in enumerate(cover.priorities_list[:6], 1):
+ rows.append([_p(f"{i}. {pri}", st["exec_bullet"])])
+
+ if not rows:
+ return []
+
+ content_w = _content_w_in()
+ inner = Table(rows, colWidths=[content_w * inch])
+ inner.setStyle(TableStyle([
+ ("LEFTPADDING", (0, 0), (-1, -1), 0),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 0),
+ ("TOPPADDING", (0, 0), (-1, -1), 2),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 2),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ]))
+
+ panel = Table([[inner]], colWidths=[content_w * inch])
+ panel.setStyle(TableStyle([
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(S.SURFACE_MUTED)),
+ ("LINEBEFORE", (0, 0), (0, -1), 3, _hex(S.BRAND_ACCENT)),
+ ("BOX", (0, 0), (-1, -1), 0.5, _hex(S.BORDER)),
+ ("LEFTPADDING", (0, 0), (-1, -1), 14),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 12),
+ ("TOPPADDING", (0, 0), (-1, -1), 10),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 10),
+ ]))
+ return [panel, Spacer(1, 14)]
+
+
+def _render_top_issues_table(issues: list[PdfIssue], st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+
+ if not issues:
+ return []
+
+ content_w = _content_w_in()
+ pri_w = 0.78
+ loc_w = 1.55
+ issue_w = content_w - pri_w - loc_w
+
+ header = [
+ _p("Priority", st["th"]),
+ _p("Issue", st["th"]),
+ _p("Location", st["th"]),
+ ]
+ rows: list = [header]
+ for iss in issues:
+ rows.append([
+ _priority_badge(iss.priority, st),
+ _p(iss.headline, st["td"]),
+ _issue_location_cell(iss, st),
+ ])
+
+ tbl = LongTable(
+ rows,
+ colWidths=[pri_w * inch, issue_w * inch, loc_w * inch],
+ repeatRows=1,
+ )
+ tbl.setStyle(_top_issues_table_style())
+ return [tbl, Spacer(1, 10)]
+
+
+def _render_single_issue(issue: PdfIssue, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table, TableStyle
+ fg, bg = S.PRIORITY_TONES.get(issue.priority, (S.INK, S.SURFACE_MUTED))
+
+ lines: list = [[_p(issue.headline, st["issue_headline"])]]
+ if issue.related_urls:
+ max_show = 10
+ for url in issue.related_urls[:max_show]:
+ lines.append([_p(f"• {url}", st["url"])])
+ extra = len(issue.related_urls) - max_show
+ if extra > 0:
+ lines.append([_p(f"• … and {extra} more (see CSV export)", st["muted"])])
+ elif issue.url:
+ lines.append([_p(issue.url, st["url"])])
+ if issue.recommendation:
+ lines.append([_p(f"Fix: {issue.recommendation}", st["issue_rec"])])
+
+ inner = Table(lines, colWidths=[(_content_w_in() - 0.3) * inch])
+ inner.setStyle(TableStyle([
+ ("LEFTPADDING", (0, 0), (-1, -1), 8),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 4),
+ ("TOPPADDING", (0, 0), (-1, -1), 1),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 2),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ]))
+
+ outer = Table([[inner]], colWidths=[_content_w_in() * inch])
+ outer.setStyle(TableStyle([
+ ("LINEBEFORE", (0, 0), (0, -1), 3, _hex(fg)),
+ ("BACKGROUND", (0, 0), (-1, -1), _hex(bg)),
+ ("TOPPADDING", (0, 0), (-1, -1), 5),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ]))
+ return [outer, Spacer(1, 6)]
+
+
+def _render_issue_group(block: IssueGroupBlock, st: dict) -> list:
+ from reportlab.platypus import Spacer
+ parts: list = []
+ parts.append(_p(block.group_label, st["subsection"]))
+
+ if block.render_as == "compact_table":
+ parts.extend(_render_issue_table_compact(block.issues, st))
+ else:
+ for issue in block.issues:
+ parts.extend(_render_single_issue(issue, st))
+
+ if block.truncation:
+ t = block.truncation
+ note = f"Showing {t.shown} of {t.total}. Full list in {', '.join(t.continue_in)}."
+ parts.append(_p(note, st["muted"]))
+
+ parts.append(Spacer(1, 8))
+ return parts
+
+
+def _render_issue_table_compact(issues: list[PdfIssue], st: dict) -> list:
+ """Two-column Issue | URL table (priority is already in the group heading)."""
+ from reportlab.lib.units import inch
+ from reportlab.platypus import LongTable, Spacer
+ header = [_p("Issue", st["th"]), _p("URL", st["th"])]
+ data: list = [header]
+ for iss in issues:
+ data.append([
+ _p(iss.headline, st["td"]),
+ _safe_p(iss.url or "", st["td_url"]),
+ ])
+ issue_w = _content_w_in() * 0.52
+ url_w = _content_w_in() - issue_w
+ tbl = LongTable(data, colWidths=[issue_w * inch, url_w * inch], repeatRows=1)
+ tbl.setStyle(_table_style_base())
+ return [tbl, Spacer(1, 4)]
+
+
+def _render_issue_table(block: IssueTableBlock, st: dict) -> list:
+ parts: list = []
+ if block.title:
+ parts.append(_p(block.title, st["subsection"]))
+ parts.extend(_render_issue_table_compact(block.issues, st))
+ if block.truncation:
+ t = block.truncation
+ note = f"Showing {t.shown} of {t.total}. Full list in {', '.join(t.continue_in)}."
+ parts.append(_p(note, st["muted"]))
+ return parts
+
+
+def _render_markdown(block: MarkdownBlock, st: dict) -> list:
+ import re
+ from reportlab.platypus import Spacer
+ # Strip HTML-like markdown tags to plain text for safety
+ text = re.sub(r"<[^>]+>", " ", block.text)
+ return [_p(text, st["body"]), Spacer(1, 4)]
+
+
+BLOCK_RENDERERS = {
+ "heading": _render_heading,
+ "paragraph": _render_paragraph,
+ "callout": _render_callout,
+ "spacer": _render_spacer,
+ "kpi_row": _render_kpi_row,
+ "stat_grid": _render_stat_grid,
+ "key_value": _render_key_value,
+ "score_cards": _render_score_cards,
+ "url_list": _render_url_list,
+ "metric_table": _render_metric_table,
+ "issue_group": _render_issue_group,
+ "issue_table": _render_issue_table,
+ "markdown": _render_markdown,
+}
+
+
+def _flowables_for_block(block: Any, st: dict) -> list:
+ btype = getattr(block, "type", None)
+ if not getattr(block, "visible", True):
+ return []
+ renderer = BLOCK_RENDERERS.get(btype)
+ if renderer is None:
+ return []
+ return renderer(block, st)
+
+
+# ---------------------------------------------------------------------------
+# Cover renderer
+# ---------------------------------------------------------------------------
+
+def _render_cover(cover: PdfCoverBlock, meta: PdfMeta, st: dict) -> list:
+ from reportlab.lib.units import inch
+ from reportlab.platypus import Spacer, Table, TableStyle
+ parts: list = []
+
+ content_w = _content_w_in()
+ score_col = 1.35
+ title_col = content_w - score_col
+
+ score_color = S.SCORE_TONES.get(cover.hero.band, S.MUTED)
+ score_display = cover.hero.score or "—"
+ score_style = ParagraphStyle_compat(
+ st["hero_score"], textColor=_hex(score_color), alignment=1, fontSize=32, leading=36,
+ )
+ suffix_style = ParagraphStyle_compat(st["hero_suffix"], alignment=1)
+
+ score_block = Table(
+ [[_p(score_display, score_style)], [_p("/100", suffix_style)]],
+ colWidths=[score_col * inch],
+ )
+ score_block.setStyle(TableStyle([
+ ("ALIGN", (0, 0), (-1, -1), "CENTER"),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ("LEFTPADDING", (0, 0), (-1, -1), 0),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 0),
+ ("TOPPADDING", (0, 0), (-1, -1), 0),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 0),
+ ]))
+
+ title_row = Table(
+ [[_p(cover.headline, st["title"]), score_block]],
+ colWidths=[title_col * inch, score_col * inch],
+ )
+ title_row.setStyle(TableStyle([
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ("ALIGN", (0, 0), (0, 0), "LEFT"),
+ ("ALIGN", (1, 0), (1, 0), "RIGHT"),
+ ("LEFTPADDING", (0, 0), (-1, -1), 0),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 0),
+ ("TOPPADDING", (0, 0), (-1, -1), 0),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 0),
+ ]))
+ parts.append(title_row)
+ parts.append(_p(cover.subtitle, st["subtitle"]))
+
+ counts = meta.issue_counts
+ total = sum(counts.values())
+ meta_line = (
+ f"Report generated {meta.generated_at} · {total} findings "
+ f"(Critical {counts.get('critical', 0)}, High {counts.get('high', 0)}, "
+ f"Medium {counts.get('medium', 0)}, Low {counts.get('low', 0)})"
+ )
+ parts.append(_p(meta_line, st["cover_meta"]))
+
+ parts.extend(_flowables_for_block(cover.priority_strip, st))
+
+ if cover.category_scores.cards:
+ parts.extend(_section_heading("Category scores", st))
+ parts.extend(_render_score_cards(cover.category_scores, st))
+
+ if cover.executive_summary or cover.priorities_list:
+ parts.extend(_section_heading("Executive summary", st))
+ parts.extend(_render_executive_panel(cover, st))
+
+ if cover.top_issues:
+ parts.extend(_section_heading("Top traffic-impacting issues", st))
+ parts.append(_p(
+ "Ranked by severity and traffic impact — address critical and high items first.",
+ st["section_lead"],
+ ))
+ parts.extend(_render_top_issues_table(cover.top_issues, st))
+
+ return parts
+
+
+# ---------------------------------------------------------------------------
+# Section renderer
+# ---------------------------------------------------------------------------
+
+def _render_section(section: PdfSection, st: dict) -> list:
+ from reportlab.platypus import PageBreak, Spacer
+ parts: list = []
+ if section.page_break_before:
+ parts.append(PageBreak())
+ parts.extend(_section_heading(section.title, st))
+ if section.source_label:
+ parts.append(_p(f"Source: {section.source_label}", st["muted"]))
+ for block in section.blocks:
+ parts.extend(_flowables_for_block(block, st))
+ if section.truncation:
+ t = section.truncation
+ note = f"Showing {t.shown} of {t.total} issues. Export CSV or workbook for full data."
+ parts.append(_p(note, st["muted"]))
+ parts.append(Spacer(1, 4))
+ return parts
+
+
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+
+def render_pdf_document(doc: PdfDocument) -> bytes:
+ _require_reportlab()
+
+ from reportlab.lib.pagesizes import letter
+ from reportlab.lib.units import inch
+ from reportlab.platypus import PageBreak, SimpleDocTemplate
+
+ buf = io.BytesIO()
+ footer_text = (
+ f"{doc.footer.confidential_note} "
+ f"Generated by {doc.footer.generator} · {doc.footer.exported_at}"
+ )
+
+ pdf_doc = SimpleDocTemplate(
+ buf,
+ pagesize=letter,
+ topMargin=0.65 * inch,
+ bottomMargin=0.65 * inch,
+ leftMargin=0.65 * inch,
+ rightMargin=0.65 * inch,
+ title=doc.cover.headline,
+ author=doc.footer.generator,
+ )
+
+ st = _make_styles()
+ story: list = []
+
+ story.extend(_render_cover(doc.cover, doc.meta, st))
+ story.append(PageBreak())
+
+ for section in doc.sections:
+ story.extend(_render_section(section, st))
+
+ def on_page(canvas, d):
+ _page_callback(canvas, d, footer_text)
+
+ pdf_doc.build(story, onFirstPage=on_page, onLaterPages=on_page)
+ return buf.getvalue()
diff --git a/src/website_profiling/reporting/pdf/render/styles.py b/src/website_profiling/reporting/pdf/render/styles.py
new file mode 100644
index 0000000..ab9951e
--- /dev/null
+++ b/src/website_profiling/reporting/pdf/render/styles.py
@@ -0,0 +1,54 @@
+"""ReportLab style constants mirroring the HTML CSS design tokens."""
+from __future__ import annotations
+
+# Colour palette — mirrors _report_html_styles() CSS variables
+INK = "#0f172a"
+MUTED = "#64748b"
+BORDER = "#e2e8f0"
+SURFACE_MUTED = "#f8fafc"
+BRAND = "#0b0f19"
+BRAND_ACCENT = "#2563eb"
+
+GOOD = "#059669"
+GOOD_BG = "#ecfdf5"
+FAIR = "#d97706"
+FAIR_BG = "#fffbeb"
+POOR = "#dc2626"
+POOR_BG = "#fef2f2"
+
+CRITICAL_FG = "#991b1b"
+CRITICAL_BG = "#fee2e2"
+HIGH_FG = "#c2410c"
+HIGH_BG = "#ffedd5"
+MEDIUM_FG = "#a16207"
+MEDIUM_BG = "#fef3c7"
+LOW_FG = "#475569"
+LOW_BG = "#f1f5f9"
+
+HEADER_BG = "#f1f5f9"
+
+# Column widths (inches) for common patterns
+COL_NARROW = 0.75
+COL_MEDIUM = 1.5
+COL_WIDE = 2.5
+COL_URL = 2.0
+
+# Letter page with 0.65" margins — keep all flowables on this width for alignment
+PAGE_MARGIN_IN = 0.65
+PAGE_WIDTH_IN = 8.5
+CONTENT_WIDTH_IN = PAGE_WIDTH_IN - 2 * PAGE_MARGIN_IN # 7.2
+GRID_COLS = 4
+
+PRIORITY_TONES = {
+ "critical": (CRITICAL_FG, CRITICAL_BG),
+ "high": (HIGH_FG, HIGH_BG),
+ "medium": (MEDIUM_FG, MEDIUM_BG),
+ "low": (LOW_FG, LOW_BG),
+}
+
+SCORE_TONES = {
+ "score-good": GOOD,
+ "score-fair": FAIR,
+ "score-poor": POOR,
+ "score-na": MUTED,
+}
diff --git a/src/website_profiling/tools/audit_tools/export_tools.py b/src/website_profiling/tools/audit_tools/export_tools.py
index cc738ea..5c2ec4f 100644
--- a/src/website_profiling/tools/audit_tools/export_tools.py
+++ b/src/website_profiling/tools/audit_tools/export_tools.py
@@ -7,18 +7,10 @@
from ..export_artifacts import (
dicts_to_csv,
- read_report_spec,
rows_from_tool_result,
save_artifact,
- save_report_spec,
)
from ..export_compare import export_compare_issues_csv
-from ..export_custom import (
- render_custom_report_html,
- render_custom_report_pdf,
- resolve_section_results,
- validate_sections,
-)
from ..export_audit import (
export_audit_csv,
export_audit_html,
@@ -30,7 +22,6 @@
from .context import AuditToolContext
_EXPORT_FORMATS = {"pdf", "html", "csv", "json"}
-_CUSTOM_FORMATS = {"html", "pdf"}
_MIME = {
"pdf": "application/pdf",
"html": "text/html; charset=utf-8",
@@ -198,8 +189,6 @@
"export_audit_report",
"export_compare_csv",
"export_list_as_csv",
- "compose_custom_report",
- "export_custom_report",
"list_export_formats",
})
@@ -304,112 +293,6 @@ def export_list_as_csv(conn: Connection, ctx: AuditToolContext, args: dict[str,
}
-def _tool_allowed_for_custom(tool_name: str) -> bool:
- if tool_name in _EXPORT_TOOL_NAMES:
- return False
- from .registry import tool_handler_names
- return tool_name in tool_handler_names()
-
-
-def compose_custom_report(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- title = str(args.get("title") or "").strip()
- if not title:
- return {"error": "title is required"}
- sections_raw = args.get("sections")
- sections, err = validate_sections(sections_raw)
- if err:
- return {"error": err}
- assert sections is not None
- for section in sections:
- if section.get("type") == "tool":
- tname = str(section.get("tool_name") or "")
- if not _tool_allowed_for_custom(tname):
- return {"error": f"tool not allowed in custom report: {tname}"}
- scoped = ctx.with_args(args)
- payload = scoped.load_payload(conn)
- if not payload:
- return {"error": "no report found"}
- spec = {
- "title": title,
- "sections": sections,
- "property_id": scoped.property_id,
- "report_id": scoped.report_id,
- }
- spec_id = save_report_spec(spec)
- preview_html = render_custom_report_html(
- title=title,
- payload=payload,
- sections=sections,
- section_results=[None] * len(sections),
- )
- snippet = preview_html[:400].replace("\n", " ")
- return {
- "report_spec_id": spec_id,
- "section_count": len(sections),
- "preview_html_snippet": snippet,
- "title": title,
- }
-
-
-def export_custom_report(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- fmt = str(args.get("format") or "html").lower().strip()
- if fmt not in _CUSTOM_FORMATS:
- return {"error": f"format must be one of: {', '.join(sorted(_CUSTOM_FORMATS))}"}
- scoped = ctx.with_args(args)
- payload = scoped.load_payload(conn)
- if not payload:
- return {"error": "no report found"}
- spec_id = args.get("report_spec_id")
- title = str(args.get("title") or "").strip()
- sections: list[dict[str, Any]] | None = None
- if spec_id:
- spec = read_report_spec(str(spec_id))
- if not spec:
- return {"error": "report_spec_id not found"}
- title = str(spec.get("title") or title or "Custom Report")
- raw_sections = spec.get("sections")
- sections, err = validate_sections(raw_sections)
- if err:
- return {"error": err}
- else:
- sections, err = validate_sections(args.get("sections"))
- if err:
- return {"error": err}
- if not title:
- return {"error": "title is required when report_spec_id is omitted"}
- assert sections is not None
- for section in sections:
- if section.get("type") == "tool":
- tname = str(section.get("tool_name") or "")
- if not _tool_allowed_for_custom(tname):
- return {"error": f"tool not allowed in custom report: {tname}"}
- section_results = resolve_section_results(conn, scoped, payload, sections, _dispatch)
- html_doc = render_custom_report_html(
- title=title,
- payload=payload,
- sections=sections,
- section_results=section_results,
- )
- safe_title = "".join(c if c.isalnum() or c in "-_" else "-" for c in title.lower())[:40] or "custom-report"
- if fmt == "html":
- filename = f"{safe_title}.html"
- return {
- **_artifact_from_bytes(html_doc, filename=filename, mime_type=_MIME["html"], extra={"format": fmt, "title": title}),
- "format": fmt,
- "title": title,
- }
- try:
- pdf_bytes = render_custom_report_pdf(html_doc, title)
- except RuntimeError as exc:
- return {"error": str(exc)}
- filename = f"{safe_title}.pdf"
- return {
- **_artifact_from_bytes(pdf_bytes, filename=filename, mime_type=_MIME["pdf"], extra={"format": fmt, "title": title}),
- "format": fmt,
- "title": title,
- }
-
-
def list_export_formats(_conn: Connection, _ctx: AuditToolContext, _args: dict[str, Any]) -> dict[str, Any]:
return {
"formats": [
@@ -419,14 +302,11 @@ def list_export_formats(_conn: Connection, _ctx: AuditToolContext, _args: dict[s
{"tool": "export_audit_report", "format": "json", "description": "Full audit JSON payload"},
{"tool": "export_compare_csv", "format": "csv", "description": "Issue added/removed diff between two reports"},
{"tool": "export_list_as_csv", "format": "csv", "description": "CSV from any allowlisted list tool result"},
- {"tool": "compose_custom_report", "description": "Save a multi-section custom report spec"},
- {"tool": "export_custom_report", "format": "html|pdf", "description": "Render composed custom report"},
],
"example_prompts": [
"Download the audit as PDF",
"Export broken links as CSV",
"Compare this report to report 38 as CSV",
- "Build a client report with executive summary, category scores, and broken links",
],
"notes": [
"PDF requires reportlab (pip install reportlab)",
diff --git a/src/website_profiling/tools/audit_tools/registry.py b/src/website_profiling/tools/audit_tools/registry.py
index 6729156..d41ebe5 100644
--- a/src/website_profiling/tools/audit_tools/registry.py
+++ b/src/website_profiling/tools/audit_tools/registry.py
@@ -297,10 +297,8 @@
list_slow_pages,
)
from .export_tools import (
- compose_custom_report,
export_audit_report,
export_compare_csv,
- export_custom_report,
export_list_as_csv,
list_export_formats,
)
@@ -585,8 +583,6 @@
"export_audit_report": export_audit_report,
"export_compare_csv": export_compare_csv,
"export_list_as_csv": export_list_as_csv,
- "compose_custom_report": compose_custom_report,
- "export_custom_report": export_custom_report,
"list_export_formats": list_export_formats,
"export_sitemap_xml": export_sitemap_xml,
"validate_rich_results": validate_rich_results,
diff --git a/src/website_profiling/tools/audit_tools/tool_catalog.py b/src/website_profiling/tools/audit_tools/tool_catalog.py
index facbcad..b2c6023 100644
--- a/src/website_profiling/tools/audit_tools/tool_catalog.py
+++ b/src/website_profiling/tools/audit_tools/tool_catalog.py
@@ -275,29 +275,6 @@ def _tool(name: str, description: str, properties: dict[str, Any], required: lis
"Validate structured data / Rich Results for sample URLs (Estimated without API key).",
{"property_id": _PID, "report_id": _RID, "limit": {"type": "integer", "maximum": 50}, "api_key": {"type": "string"}},
),
- _tool(
- "compose_custom_report",
- "Compose a multi-section custom report spec (tool, executive_summary, category_scores, notes sections).",
- {
- "property_id": _PID,
- "report_id": _RID,
- "title": {"type": "string"},
- "sections": {"type": "array", "items": {"type": "object"}},
- },
- ["title", "sections"],
- ),
- _tool(
- "export_custom_report",
- "Render a composed custom report to HTML or PDF.",
- {
- "property_id": _PID,
- "report_id": _RID,
- "report_spec_id": {"type": "string"},
- "title": {"type": "string"},
- "sections": {"type": "array", "items": {"type": "object"}},
- "format": {"type": "string", "enum": ["html", "pdf"]},
- },
- ),
_tool("list_export_formats", "List supported export tools, formats, and example prompts.", {}),
# Image audit
_tool("get_image_audit_summary", "Site-wide image audit totals: alt, lazy-load, dimensions, OG, Lighthouse image diagnostics.", {"property_id": _PID, "report_id": _RID}),
diff --git a/src/website_profiling/tools/audit_tools/tool_domains.py b/src/website_profiling/tools/audit_tools/tool_domains.py
index 6f69d67..7a74e54 100644
--- a/src/website_profiling/tools/audit_tools/tool_domains.py
+++ b/src/website_profiling/tools/audit_tools/tool_domains.py
@@ -188,7 +188,7 @@ def classify_tool_domain(name: str) -> str:
if name in TIER_0_TOOLS:
return _DOMAIN_OVERRIDES.get(name, "core")
- if name.startswith("export_") or name in ("compose_custom_report", "list_export_formats"):
+ if name.startswith("export_") or name == "list_export_formats":
return "export"
if name.startswith((
"get_image_", "list_pages_without_lazy", "list_pages_with_images_missing",
diff --git a/src/website_profiling/tools/export_artifacts.py b/src/website_profiling/tools/export_artifacts.py
index fd2c015..6c270f5 100644
--- a/src/website_profiling/tools/export_artifacts.py
+++ b/src/website_profiling/tools/export_artifacts.py
@@ -12,7 +12,6 @@
from typing import Any
_ARTIFACT_ID_RE = re.compile(r"^[a-f0-9-]{36}$")
-_SPEC_ID_RE = re.compile(r"^[a-f0-9-]{36}$")
_TTL_SECONDS = 24 * 60 * 60
_INLINE_MAX_BYTES = 512 * 1024
_LIST_ROW_KEYS = (
@@ -30,6 +29,17 @@
"clusters",
"deltas",
"results",
+ "broken",
+ "redirects",
+ "diagnostics",
+ "categories",
+ "opportunities",
+ "violations_by_rule",
+ "poor_performance_pages",
+ "errors",
+ "daily",
+ "by_device",
+ "by_channel",
)
@@ -43,12 +53,6 @@ def exports_dir() -> str:
return path
-def specs_dir() -> str:
- path = os.path.join(exports_dir(), "specs")
- os.makedirs(path, exist_ok=True)
- return path
-
-
def _meta_path(artifact_id: str) -> str:
return os.path.join(exports_dir(), f"{artifact_id}.meta.json")
@@ -153,27 +157,6 @@ def delete_artifact(artifact_id: str) -> None:
pass
-def save_report_spec(spec: dict[str, Any]) -> str:
- spec_id = str(uuid.uuid4())
- spec["report_spec_id"] = spec_id
- spec["created_at"] = datetime.now(timezone.utc).isoformat()
- path = os.path.join(specs_dir(), f"{spec_id}.json")
- with open(path, "w", encoding="utf-8") as f:
- json.dump(spec, f)
- return spec_id
-
-
-def read_report_spec(spec_id: str) -> dict[str, Any] | None:
- if not _SPEC_ID_RE.match(spec_id):
- return None
- path = os.path.join(specs_dir(), f"{spec_id}.json")
- if not os.path.isfile(path):
- return None
- with open(path, encoding="utf-8") as f:
- data = json.load(f)
- return data if isinstance(data, dict) else None
-
-
def rows_from_tool_result(result: dict[str, Any]) -> list[dict[str, Any]]:
if result.get("error"):
return []
diff --git a/src/website_profiling/tools/export_audit.py b/src/website_profiling/tools/export_audit.py
index 03cfe76..771e701 100644
--- a/src/website_profiling/tools/export_audit.py
+++ b/src/website_profiling/tools/export_audit.py
@@ -2,19 +2,12 @@
from __future__ import annotations
import csv
-import html
import io
import json
-from datetime import datetime, timezone
from typing import Optional
from ..db import db_session, read_report_payload
-from ..reporting.terminology import category_display_name
from .export_audit_data import (
- _GLOSSARY_ROWS,
- _ISSUE_LIMIT_HTML,
- _ISSUE_LIMIT_PDF,
- _LINK_LIMIT,
_executive_export_data,
_executive_source_label,
_format_report_date,
@@ -93,370 +86,29 @@ def export_audit_json(report_id: Optional[int] = None) -> str:
return json.dumps(payload, indent=2, default=str)
-def export_audit_html(report_id: Optional[int] = None) -> str:
- payload = _load_payload(report_id)
- site_raw = str(payload.get("site_name") or "Site Audit")
- site = html.escape(site_raw)
- generated_raw = str(payload.get("report_generated_at") or "")
- generated = html.escape(_format_report_date(generated_raw))
- all_issues = _issues_rows(payload)
- issues = sorted(all_issues, key=_priority_sort_key)[:_ISSUE_LIMIT_HTML]
- issue_total = len(all_issues)
- priority_counts = _issue_priority_counts(all_issues)
- links = [l for l in (payload.get("links") or []) if isinstance(l, dict)][:_LINK_LIMIT]
- categories = payload.get("categories") or []
- overall = _overall_score(payload)
- overall_txt, overall_cls = _score_band(float(overall) if overall is not None else None)
-
- summary_html = "".join(
- f"{html.escape(k)} {html.escape(v)} "
- for k, v in _summary_lines(payload)
- )
-
- issue_rows = ""
- for row in issues:
- pri = row["priority"].lower()
- badge_cls = f"badge-{pri}" if pri in {"critical", "high", "medium", "low"} else "badge-low"
- issue_rows += (
- ""
- f"{html.escape(row['category'])} "
- f"{html.escape(row['priority'])} "
- f"{html.escape(row['message'])} "
- f"{html.escape(row['url'])} "
- f"{html.escape(row['recommendation'])} "
- " "
- )
-
- has_custom_extract = any(isinstance(l, dict) and l.get("custom_extract") for l in links)
- link_rows = ""
- for link in links:
- status = str(link.get("status") or "")
- status_cls = "badge-low"
- if status.startswith("2"):
- status_cls = "badge-medium"
- elif status.startswith("3"):
- status_cls = "badge-high"
- elif status.startswith("4") or status.startswith("5"):
- status_cls = "badge-critical"
- custom_cell = (
- f"{html.escape(str(link.get('custom_extract') or ''))} "
- if has_custom_extract
- else ""
- )
- link_rows += (
- ""
- f"{html.escape(str(link.get('url') or ''))} "
- f"{html.escape(status or '—')} "
- f"{html.escape(str(link.get('title') or ''))} "
- f"{html.escape(str(link.get('inlinks') or ''))} "
- f"{html.escape(str(link.get('word_count') or ''))} "
- f"{custom_cell}"
- " "
- )
-
- glossary = "".join(
- f"{html.escape(term)} {html.escape(desc)} "
- for term, desc in _GLOSSARY_ROWS
- )
-
- rec_html = _executive_summary_html(payload)
-
- truncated_note = ""
- if issue_total > len(issues):
- truncated_note = (
- f'Showing {len(issues)} of {issue_total} issues. '
- "Download CSV or JSON for the complete audit dataset.
"
- )
-
- exported_at = datetime.now(timezone.utc).strftime("%d %B %Y, %H:%M UTC")
- report_title = html.escape(str(payload.get("report_title") or "Technical SEO Audit Report"))
- report_meta = payload.get("report_meta") if isinstance(payload.get("report_meta"), dict) else {}
- logo_url = str(report_meta.get("export_logo_url") or "").strip()
- logo_html = (
- f' '
- if logo_url
- else ""
- )
- hero_copy = (
- f"{issue_total} findings across {len(categories)} audit categories."
- if categories
- else f"{issue_total} findings recorded in this audit."
- )
- if overall is not None:
- hero_copy = f"Overall health score {overall}/100. {hero_copy}"
-
- return f"""
-
-
-
-
-Site Audit — {site}
-
-
-
-
-
-
-
-
-
{overall_txt}
-
-
Audit health overview
-
{html.escape(hero_copy)}
-
-
-
- {_priority_stats_html(priority_counts)}
-
-
- Category scores
- {_category_cards_html(categories)}
-
-
- {rec_html}
-
-
-
-
- Findings
- {truncated_note}
-
-
- Category Priority Issue URL Recommendation
-
- {issue_rows or 'No issues recorded. '}
-
-
-
-
- Crawled URLs (sample)
- First {len(links)} URLs from the crawl. Export CSV for the full URL inventory.
-
- URL Status Title Inlinks Words {'Custom extract ' if has_custom_extract else ''}
- {link_rows or 'No URLs recorded. '}
-
-
-
-
- Data source glossary
-
-
-
-
-
-
-
-"""
-
-
-def export_audit_pdf(report_id: Optional[int] = None) -> bytes:
- try:
- from reportlab.lib import colors
- from reportlab.lib.pagesizes import letter
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
- from reportlab.lib.units import inch
- from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
- except ImportError as exc:
- raise RuntimeError(
- "PDF export requires reportlab (pip install reportlab)"
- ) from exc
+def export_audit_html(report_id: Optional[int] = None, profile: str = "standard") -> str:
+ """Export audit report as HTML preview matching the PDF layout."""
+ from ..reporting.pdf import build_pdf_document
+ from ..reporting.pdf.options import PdfBuildOptions
+ from ..reporting.pdf.render.html import render_html_document
payload = _load_payload(report_id)
- site = str(payload.get("site_name") or "Site Audit")
- buf = io.BytesIO()
- doc = SimpleDocTemplate(buf, pagesize=letter, topMargin=0.55 * inch, bottomMargin=0.55 * inch)
- styles = getSampleStyleSheet()
- title_style = ParagraphStyle(
- "AuditTitle",
- parent=styles["Heading1"],
- fontSize=20,
- textColor=colors.HexColor("#0f172a"),
- spaceAfter=4,
- )
- subtitle_style = ParagraphStyle(
- "AuditSubtitle",
- parent=styles["Normal"],
- fontSize=10,
- textColor=colors.HexColor("#64748b"),
- spaceAfter=10,
- )
- section_style = ParagraphStyle(
- "SectionHeading",
- parent=styles["Heading2"],
- fontSize=11,
- textColor=colors.HexColor("#0b0f19"),
- spaceBefore=8,
- spaceAfter=6,
- )
- table_header = colors.HexColor("#f1f5f9")
- table_grid = colors.HexColor("#e2e8f0")
-
- story: list[Any] = []
- story.append(Paragraph(f"Site Audit — {html.escape(site)}", title_style))
- meta_line = _format_report_date(str(payload.get("report_generated_at") or ""))
- story.append(Paragraph(
- f"Technical SEO Audit Report · Generated {html.escape(meta_line)}",
- subtitle_style,
- ))
-
- overall = _overall_score(payload)
- all_issues = _issue_priority_counts(_issues_rows(payload))
- if overall is not None:
- story.append(Paragraph(
- f"Overall health score: {overall}/100 · "
- f"Findings: {sum(all_issues.values())} "
- f"(Critical {all_issues['critical']}, High {all_issues['high']}, "
- f"Medium {all_issues['medium']}, Low {all_issues['low']})",
- styles["Normal"],
- ))
- story.append(Spacer(1, 0.15 * inch))
+ opts = PdfBuildOptions(profile=profile, report_id=report_id) # type: ignore[arg-type]
+ doc = build_pdf_document(payload, opts)
+ return render_html_document(doc)
- categories = payload.get("categories") or []
- if categories:
- cat_data = [["Category", "Score", "Issues"]]
- for cat in categories:
- if not isinstance(cat, dict):
- continue
- name = category_display_name(str(cat.get("name") or "Category"))
- score = cat.get("score")
- score_txt = "—"
- if score is not None:
- try:
- score_txt = str(int(round(float(score))))
- except (TypeError, ValueError):
- score_txt = "—"
- cat_data.append([name, score_txt, str(len(cat.get("issues") or []))])
- cat_table = Table(cat_data, colWidths=[3.0 * inch, 0.9 * inch, 0.9 * inch])
- cat_table.setStyle(TableStyle([
- ("BACKGROUND", (0, 0), (-1, 0), table_header),
- ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
- ("FONTSIZE", (0, 0), (-1, -1), 9),
- ("GRID", (0, 0), (-1, -1), 0.25, table_grid),
- ("VALIGN", (0, 0), (-1, -1), "TOP"),
- ("ALIGN", (1, 1), (-1, -1), "CENTER"),
- ]))
- story.append(Paragraph("Category scores", section_style))
- story.append(cat_table)
- story.append(Spacer(1, 0.2 * inch))
- exec_data = _executive_export_data(payload)
- if exec_data["summary"] or exec_data["priorities"] or exec_data["top_issues"]:
- story.append(Paragraph("Executive summary", section_style))
- if exec_data["source"]:
- story.append(Paragraph(
- f"Source: {html.escape(_executive_source_label(exec_data['source']))} ",
- styles["Normal"],
- ))
- if exec_data["summary"]:
- summary_pdf = html.escape(exec_data["summary"]).replace("\n", " ")
- story.append(Paragraph(summary_pdf, styles["Normal"]))
- if exec_data["priorities"]:
- pri_items = "".join(f"• {html.escape(p)} " for p in exec_data["priorities"][:8])
- story.append(Paragraph(f"Priorities {pri_items}", styles["Normal"]))
- if exec_data["top_issues"]:
- top_data = [["Priority", "Issue", "URL"]]
- for iss in exec_data["top_issues"][:6]:
- msg = str(iss.get("message") or "")
- if len(msg) > 100:
- msg = msg[:97] + "..."
- url = str(iss.get("url") or "")
- if len(url) > 70:
- url = url[:67] + "..."
- top_data.append([str(iss.get("priority") or ""), msg, url])
- top_table = Table(top_data, colWidths=[0.85 * inch, 3.2 * inch, 2.45 * inch])
- top_table.setStyle(TableStyle([
- ("BACKGROUND", (0, 0), (-1, 0), table_header),
- ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
- ("FONTSIZE", (0, 0), (-1, -1), 8),
- ("GRID", (0, 0), (-1, -1), 0.25, table_grid),
- ("VALIGN", (0, 0), (-1, -1), "TOP"),
- ]))
- story.append(Paragraph("Top traffic-impacting issues ", styles["Normal"]))
- story.append(top_table)
- story.append(Spacer(1, 0.2 * inch))
-
- summary_data = [["Field", "Value"]] + [[k, v] for k, v in _summary_lines(payload)]
- summary_table = Table(summary_data, colWidths=[2.2 * inch, 4.3 * inch])
- summary_table.setStyle(TableStyle([
- ("BACKGROUND", (0, 0), (-1, 0), table_header),
- ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
- ("FONTSIZE", (0, 0), (-1, -1), 9),
- ("GRID", (0, 0), (-1, -1), 0.25, table_grid),
- ("VALIGN", (0, 0), (-1, -1), "TOP"),
- ]))
- story.append(Paragraph("Audit details", section_style))
- story.append(summary_table)
- story.append(Spacer(1, 0.2 * inch))
-
- issues = sorted(_issues_rows(payload), key=_priority_sort_key)[:_ISSUE_LIMIT_PDF]
- issue_data = [["Category", "Priority", "Issue", "URL"]]
- for row in issues:
- msg = row["message"]
- if len(msg) > 120:
- msg = msg[:117] + "..."
- url = row["url"]
- if len(url) > 80:
- url = url[:77] + "..."
- issue_data.append([row["category"], row["priority"], msg, url])
- if len(issue_data) == 1:
- issue_data.append(["—", "—", "No issues", "—"])
- issue_table = Table(issue_data, colWidths=[1.3 * inch, 0.75 * inch, 2.5 * inch, 2.0 * inch])
- issue_table.setStyle(TableStyle([
- ("BACKGROUND", (0, 0), (-1, 0), table_header),
- ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
- ("FONTSIZE", (0, 0), (-1, -1), 8),
- ("GRID", (0, 0), (-1, -1), 0.25, table_grid),
- ("VALIGN", (0, 0), (-1, -1), "TOP"),
- ]))
- story.append(Paragraph("Findings", section_style))
- total_issues = len(_issues_rows(payload))
- if total_issues > len(issues):
- story.append(Paragraph(
- f"Showing {len(issues)} of {total_issues} issues. Export CSV/JSON for full data.",
- styles["Italic"],
- ))
- story.append(issue_table)
- story.append(Spacer(1, 0.2 * inch))
+def export_audit_pdf(report_id: Optional[int] = None, profile: str = "standard") -> bytes:
+ """Export audit report as a formatted PDF using the PdfDocument pipeline.
- gloss_data = [["Source", "Meaning"]] + list(_GLOSSARY_ROWS)
- gloss_table = Table(gloss_data, colWidths=[1.4 * inch, 5.1 * inch])
- gloss_table.setStyle(TableStyle([
- ("BACKGROUND", (0, 0), (-1, 0), table_header),
- ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
- ("FONTSIZE", (0, 0), (-1, -1), 8),
- ("GRID", (0, 0), (-1, -1), 0.25, table_grid),
- ("VALIGN", (0, 0), (-1, -1), "TOP"),
- ]))
- story.append(Paragraph("Data source glossary", section_style))
- story.append(gloss_table)
+ Args:
+ report_id: Specific report ID to load (None = latest).
+ profile: "executive" | "standard" | "full" (default "standard").
+ """
+ from ..reporting.pdf import build_pdf_document, render_pdf_document
+ from ..reporting.pdf.options import PdfBuildOptions
- exported_at = datetime.now(timezone.utc).strftime("%d %B %Y, %H:%M UTC")
- story.append(Spacer(1, 0.25 * inch))
- story.append(Paragraph(
- f"Confidential — prepared for client review. Generated by Site Audit · {html.escape(exported_at)}",
- ParagraphStyle(
- "Footer",
- parent=styles["Normal"],
- fontSize=7,
- textColor=colors.HexColor("#64748b"),
- ),
- ))
-
- doc.build(story)
- return buf.getvalue()
+ payload = _load_payload(report_id)
+ opts = PdfBuildOptions(profile=profile, report_id=report_id) # type: ignore[arg-type]
+ doc = build_pdf_document(payload, opts)
+ return render_pdf_document(doc)
diff --git a/src/website_profiling/tools/export_audit_html.py b/src/website_profiling/tools/export_audit_html.py
index 2232e6b..e3b61db 100644
--- a/src/website_profiling/tools/export_audit_html.py
+++ b/src/website_profiling/tools/export_audit_html.py
@@ -115,229 +115,5 @@ def _priority_stats_html(counts: dict[str, int]) -> str:
def _report_html_styles() -> str:
- return """
- :root {
- --ink: #0f172a;
- --muted: #64748b;
- --line: #e2e8f0;
- --surface: #ffffff;
- --surface-muted: #f8fafc;
- --brand: #0b0f19;
- --brand-accent: #2563eb;
- --good: #059669;
- --good-bg: #ecfdf5;
- --fair: #d97706;
- --fair-bg: #fffbeb;
- --poor: #dc2626;
- --poor-bg: #fef2f2;
- --critical: #991b1b;
- --high: #c2410c;
- --medium: #a16207;
- --low: #475569;
- }
- * { box-sizing: border-box; }
- body {
- margin: 0;
- background: #eef2f7;
- color: var(--ink);
- font: 400 15px/1.55 "Segoe UI", system-ui, -apple-system, sans-serif;
- }
- .report { max-width: 920px; margin: 0 auto; background: var(--surface); }
- .cover {
- background: linear-gradient(135deg, #0b0f19 0%, #111827 55%, #1e3a5f 100%);
- color: #f8fafc;
- padding: 2.5rem 2.75rem 2rem;
- }
- .cover-brand {
- font-size: 0.72rem;
- letter-spacing: 0.14em;
- text-transform: uppercase;
- color: #93c5fd;
- font-weight: 700;
- margin-bottom: 1rem;
- }
- .cover h1 {
- margin: 0;
- font-size: clamp(1.6rem, 4vw, 2.1rem);
- font-weight: 700;
- line-height: 1.15;
- }
- .cover-subtitle {
- margin: 0.5rem 0 0;
- color: #cbd5e1;
- font-size: 1rem;
- }
- .cover-meta {
- display: grid;
- grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
- gap: 0.75rem 1.5rem;
- margin-top: 1.75rem;
- padding-top: 1.25rem;
- border-top: 1px solid rgba(255,255,255,0.12);
- font-size: 0.82rem;
- }
- .cover-meta dt { color: #94a3b8; margin: 0 0 0.15rem; font-weight: 500; }
- .cover-meta dd { margin: 0; color: #f1f5f9; font-weight: 600; }
- .content { padding: 2rem 2.75rem 2.5rem; }
- .hero-score {
- display: flex;
- flex-wrap: wrap;
- align-items: center;
- gap: 1.25rem 2rem;
- padding: 1.25rem 1.5rem;
- border: 1px solid var(--line);
- border-radius: 12px;
- background: var(--surface-muted);
- margin-bottom: 1.75rem;
- }
- .hero-score-ring {
- width: 88px;
- height: 88px;
- border-radius: 50%;
- display: grid;
- place-items: center;
- font-size: 1.65rem;
- font-weight: 800;
- border: 4px solid currentColor;
- flex-shrink: 0;
- }
- .hero-score-ring.score-good { color: var(--good); background: var(--good-bg); }
- .hero-score-ring.score-fair { color: var(--fair); background: var(--fair-bg); }
- .hero-score-ring.score-poor { color: var(--poor); background: var(--poor-bg); }
- .hero-score-ring.score-na { color: var(--muted); background: #f1f5f9; border-color: #cbd5e1; }
- .hero-score-copy h2 { margin: 0 0 0.35rem; font-size: 1.05rem; }
- .hero-score-copy p { margin: 0; color: var(--muted); font-size: 0.92rem; }
- .stats-row {
- display: grid;
- grid-template-columns: repeat(4, minmax(0, 1fr));
- gap: 0.75rem;
- margin-bottom: 1.75rem;
- }
- .stat {
- border: 1px solid var(--line);
- border-radius: 10px;
- padding: 0.85rem 0.75rem;
- text-align: center;
- background: var(--surface);
- }
- .stat-value { display: block; font-size: 1.35rem; font-weight: 800; line-height: 1.1; }
- .stat-label {
- display: block;
- margin-top: 0.25rem;
- font-size: 0.72rem;
- text-transform: uppercase;
- letter-spacing: 0.06em;
- color: var(--muted);
- font-weight: 600;
- }
- .stat-critical .stat-value { color: var(--critical); }
- .stat-high .stat-value { color: var(--high); }
- .stat-medium .stat-value { color: var(--medium); }
- .stat-low .stat-value { color: var(--low); }
- .score-grid {
- display: grid;
- grid-template-columns: repeat(auto-fill, minmax(140px, 1fr));
- gap: 0.75rem;
- margin-bottom: 1.75rem;
- }
- .score-card {
- border: 1px solid var(--line);
- border-radius: 10px;
- padding: 0.9rem 0.75rem;
- background: var(--surface);
- }
- .score-card .score-value { font-size: 1.5rem; font-weight: 800; line-height: 1; }
- .score-card .score-name { margin-top: 0.45rem; font-size: 0.78rem; font-weight: 600; line-height: 1.25; }
- .score-card .score-meta { margin-top: 0.25rem; font-size: 0.72rem; color: var(--muted); }
- .score-card.score-good .score-value { color: var(--good); }
- .score-card.score-fair .score-value { color: var(--fair); }
- .score-card.score-poor .score-value { color: var(--poor); }
- .score-card.score-na .score-value { color: var(--muted); }
- section { margin-bottom: 2rem; page-break-inside: avoid; }
- section h2 {
- margin: 0 0 0.85rem;
- font-size: 1rem;
- font-weight: 700;
- letter-spacing: 0.02em;
- text-transform: uppercase;
- color: var(--ink);
- padding-bottom: 0.45rem;
- border-bottom: 2px solid var(--brand);
- }
- .callout {
- border-left: 4px solid var(--brand-accent);
- background: #eff6ff;
- padding: 1rem 1.15rem;
- border-radius: 0 10px 10px 0;
- margin-bottom: 0.5rem;
- }
- .callout ul { margin: 0; padding-left: 1.15rem; }
- .callout li { margin: 0.35rem 0; }
- table.data {
- width: 100%;
- border-collapse: collapse;
- font-size: 0.84rem;
- border: 1px solid var(--line);
- border-radius: 10px;
- overflow: hidden;
- }
- table.data th,
- table.data td {
- padding: 0.55rem 0.65rem;
- text-align: left;
- vertical-align: top;
- border-bottom: 1px solid var(--line);
- }
- table.data th {
- background: var(--surface-muted);
- font-size: 0.72rem;
- text-transform: uppercase;
- letter-spacing: 0.05em;
- color: var(--muted);
- font-weight: 700;
- }
- table.data tbody tr:last-child td { border-bottom: none; }
- table.data tbody tr:nth-child(even) td { background: #fcfdff; }
- table.kv th {
- width: 34%;
- font-weight: 600;
- color: var(--ink);
- background: var(--surface-muted);
- }
- .url { word-break: break-all; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 0.78rem; }
- .badge {
- display: inline-block;
- padding: 0.15rem 0.5rem;
- border-radius: 999px;
- font-size: 0.68rem;
- font-weight: 700;
- text-transform: uppercase;
- letter-spacing: 0.04em;
- }
- .badge-critical { background: #fee2e2; color: var(--critical); }
- .badge-high { background: #ffedd5; color: var(--high); }
- .badge-medium { background: #fef3c7; color: var(--medium); }
- .badge-low { background: #f1f5f9; color: var(--low); }
- .muted { color: var(--muted); font-size: 0.86rem; margin: 0.35rem 0 0.75rem; }
- .report-footer {
- border-top: 1px solid var(--line);
- padding: 1.25rem 2.75rem 2rem;
- color: var(--muted);
- font-size: 0.78rem;
- line-height: 1.5;
- }
- @media print {
- body { background: #fff; }
- .report { max-width: none; }
- .cover { -webkit-print-color-adjust: exact; print-color-adjust: exact; }
- .content { padding: 1.2cm 1.4cm; }
- section { page-break-inside: auto; }
- table.data { page-break-inside: auto; }
- table.data tr { page-break-inside: avoid; }
- .report-footer { padding-left: 1.4cm; padding-right: 1.4cm; }
- }
- @media (max-width: 640px) {
- .cover, .content, .report-footer { padding-left: 1.25rem; padding-right: 1.25rem; }
- .stats-row { grid-template-columns: repeat(2, minmax(0, 1fr)); }
- }
-"""
+ from ..reporting.pdf.render.html import html_styles
+ return html_styles()
diff --git a/src/website_profiling/tools/export_custom.py b/src/website_profiling/tools/export_custom.py
deleted file mode 100644
index 95208e5..0000000
--- a/src/website_profiling/tools/export_custom.py
+++ /dev/null
@@ -1,248 +0,0 @@
-"""Custom composed report HTML/PDF builder."""
-from __future__ import annotations
-
-import html
-import io
-import re
-from typing import Any, Callable
-
-from psycopg import Connection
-
-from .export_artifacts import rows_from_tool_result
-from .export_audit import (
- _category_cards_html,
- _executive_export_data,
- _executive_source_label,
- _executive_summary_html,
- _format_report_date,
- _overall_score,
- _report_html_styles,
-)
-
-_MAX_SECTIONS = 12
-_NOTES_MAX_LEN = 8000
-
-_SECTION_TABLE_KEYS = (
- "pages",
- "items",
- "paths",
- "issues",
- "issue_deltas",
- "rows",
- "keywords",
- "queries",
- "links",
- "findings",
- "deltas",
-)
-
-
-def _sanitize_notes(text: str) -> str:
- cleaned = re.sub(r"", "", text, flags=re.I | re.S)
- cleaned = cleaned.replace("<", "<").replace(">", ">")
- return cleaned[:_NOTES_MAX_LEN]
-
-
-def _table_from_rows(rows: list[dict[str, Any]], max_rows: int = 50) -> str:
- if not rows:
- return 'No data.
'
- sample = rows[:max_rows]
- keys: list[str] = []
- seen: set[str] = set()
- for row in sample:
- for k in row:
- if k not in seen:
- seen.add(k)
- keys.append(k)
- if not keys:
- return 'No columns.
'
- head = "".join(f"{html.escape(k)} " for k in keys[:8])
- body_rows = []
- for row in sample:
- cells = "".join(
- f"{html.escape(str(row.get(k, ''))[:500])} " for k in keys[:8]
- )
- body_rows.append(f"{cells} ")
- note = ""
- if len(rows) > max_rows:
- note = f'Showing {max_rows} of {len(rows)} rows.
'
- return (
- f'{head} '
- f"{''.join(body_rows)}
{note}"
- )
-
-
-def _section_html_tool_result(heading: str, result: dict[str, Any]) -> str:
- h = html.escape(heading)
- if result.get("error"):
- return f"{h} {html.escape(str(result['error']))}
"
- rows = rows_from_tool_result(result)
- if rows:
- return f"{h} {_table_from_rows(rows)} "
- for key in _SECTION_TABLE_KEYS:
- raw = result.get(key)
- if isinstance(raw, list) and raw and isinstance(raw[0], dict):
- return f"{h} {_table_from_rows(raw)} "
- preview = html.escape(str(result)[:2000])
- return f""
-
-
-def _section_html_executive(payload: dict[str, Any]) -> str:
- return f"Executive summary {_executive_summary_html(payload)} "
-
-
-def _section_html_categories(payload: dict[str, Any]) -> str:
- cards = _category_cards_html(payload.get("categories") or [])
- overall = _overall_score(payload)
- score_txt = str(overall) if overall is not None else "—"
- return (
- f'Category scores '
- f'Overall health: {html.escape(score_txt)}/100
'
- f'{cards}
'
- )
-
-
-def _section_html_notes(heading: str, markdown: str) -> str:
- body = _sanitize_notes(markdown).replace("\n", " ")
- return f"{html.escape(heading)} {body}
"
-
-
-def render_custom_report_html(
- *,
- title: str,
- payload: dict[str, Any],
- sections: list[dict[str, Any]],
- section_results: list[dict[str, Any] | None],
-) -> str:
- site = html.escape(str(payload.get("site_name") or "Site Audit"))
- generated = html.escape(_format_report_date(str(payload.get("report_generated_at") or "")))
- title_esc = html.escape(title)
- parts: list[str] = []
- for section, result in zip(sections, section_results):
- stype = str(section.get("type") or "")
- if stype == "executive_summary":
- parts.append(_section_html_executive(payload))
- elif stype == "category_scores":
- parts.append(_section_html_categories(payload))
- elif stype == "notes":
- parts.append(_section_html_notes(
- str(section.get("heading") or "Notes"),
- str(section.get("markdown") or ""),
- ))
- elif stype == "tool" and result is not None:
- parts.append(_section_html_tool_result(
- str(section.get("heading") or section.get("tool_name") or "Section"),
- result,
- ))
- body = "\n".join(parts)
- styles = _report_html_styles()
- return f"""
-
-
-
- {title_esc} — {site}
-
-
-
-
-
- Site Audit
- {title_esc}
- {site} · {generated}
-
-
{body}
-
-
-"""
-
-
-def render_custom_report_pdf(html_doc: str, title: str) -> bytes:
- try:
- from reportlab.lib import colors
- from reportlab.lib.pagesizes import letter
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
- from reportlab.lib.units import inch
- from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer
- except ImportError as exc:
- raise RuntimeError("PDF export requires reportlab (pip install reportlab)") from exc
-
- buf = io.BytesIO()
- doc = SimpleDocTemplate(buf, pagesize=letter, topMargin=0.55 * inch, bottomMargin=0.55 * inch)
- styles = getSampleStyleSheet()
- title_style = ParagraphStyle(
- "CustomTitle",
- parent=styles["Heading1"],
- fontSize=18,
- textColor=colors.HexColor("#0f172a"),
- )
- story: list[Any] = [
- Paragraph(html.escape(title), title_style),
- Spacer(1, 0.2 * inch),
- Paragraph(
- "Custom report generated from selected audit sections. "
- "Open the HTML export for full tables and formatting.",
- styles["Normal"],
- ),
- ]
- text = re.sub(r"<[^>]+>", " ", html_doc)
- text = re.sub(r"\s+", " ", text).strip()
- chunk_size = 3000
- for i in range(0, min(len(text), 12000), chunk_size):
- story.append(Paragraph(html.escape(text[i : i + chunk_size]), styles["Normal"]))
- story.append(Spacer(1, 0.1 * inch))
- doc.build(story)
- return buf.getvalue()
-
-
-def validate_sections(sections: Any) -> tuple[list[dict[str, Any]] | None, str | None]:
- if not isinstance(sections, list) or not sections:
- return None, "sections must be a non-empty array"
- if len(sections) > _MAX_SECTIONS:
- return None, f"sections max {_MAX_SECTIONS}"
- normalized: list[dict[str, Any]] = []
- for raw in sections:
- if not isinstance(raw, dict):
- return None, "each section must be an object"
- stype = str(raw.get("type") or "")
- if stype == "tool":
- if not raw.get("tool_name"):
- return None, "tool sections require tool_name"
- normalized.append(raw)
- elif stype in ("executive_summary", "category_scores"):
- normalized.append({"type": stype})
- elif stype == "notes":
- if not raw.get("markdown"):
- return None, "notes sections require markdown"
- normalized.append(raw)
- else:
- return None, f"unknown section type: {stype}"
- return normalized, None
-
-
-def resolve_section_results(
- conn: Connection,
- ctx: Any,
- payload: dict[str, Any],
- sections: list[dict[str, Any]],
- dispatch_fn: Callable[..., dict[str, Any]],
-) -> list[dict[str, Any] | None]:
- results: list[dict[str, Any] | None] = []
- for section in sections:
- stype = section.get("type")
- if stype in ("executive_summary", "category_scores", "notes"):
- results.append(None)
- continue
- if stype == "tool":
- tool_args = dict(section.get("tool_args") or {})
- if ctx.property_id is not None and "property_id" not in tool_args:
- tool_args["property_id"] = ctx.property_id
- if ctx.report_id is not None and "report_id" not in tool_args:
- tool_args["report_id"] = ctx.report_id
- results.append(dispatch_fn(str(section["tool_name"]), tool_args, context=ctx, conn=conn))
- continue
- results.append(None)
- return results
diff --git a/tests/reporting/test_pdf_branch_coverage.py b/tests/reporting/test_pdf_branch_coverage.py
new file mode 100644
index 0000000..255628d
--- /dev/null
+++ b/tests/reporting/test_pdf_branch_coverage.py
@@ -0,0 +1,419 @@
+"""Branch-coverage tests for the PDF pipeline (adapters, normalize, renderers)."""
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+pytest.importorskip("reportlab")
+
+from website_profiling.reporting.pdf.adapters.appendix import adapt_appendix
+from website_profiling.reporting.pdf.adapters.findings import adapt_findings
+from website_profiling.reporting.pdf.builder import build_pdf_document
+from website_profiling.reporting.pdf.document import (
+ SCHEMA_VERSION,
+ CalloutBlock,
+ HeadingBlock,
+ IssueGroupBlock,
+ IssueTableBlock,
+ KeyValueBlock,
+ KpiItem,
+ KpiRowBlock,
+ MarkdownBlock,
+ MetricTableBlock,
+ ParagraphBlock,
+ PdfCoverBlock,
+ PdfDocument,
+ PdfFooterBlock,
+ PdfIssue,
+ PdfMeta,
+ PdfScoreHero,
+ PdfSection,
+ PdfTruncation,
+ ScoreCard,
+ ScoreCardsBlock,
+ SpacerBlock,
+ StatChip,
+ StatGridBlock,
+ TableColumn,
+ UrlListBlock,
+)
+from website_profiling.reporting.pdf.normalize import (
+ _extract_path,
+ _is_lighthouse_row,
+ _strip_url_from_headline,
+ normalize_issue_for_pdf,
+)
+from website_profiling.reporting.pdf.options import PdfBuildOptions, PdfLimits
+from website_profiling.reporting.pdf.render.html import (
+ _render_executive_panel as _html_render_executive_panel,
+ _render_stat_grid as _html_render_stat_grid,
+ _render_score_cards as _html_render_score_cards,
+ _render_block as _html_render_block,
+ render_html_document,
+)
+from website_profiling.reporting.pdf.render.reportlab import (
+ _flowables_for_block,
+ _make_styles,
+ _p,
+ _p_html,
+ _render_executive_panel as _rl_render_executive_panel,
+ _render_top_issues_table,
+ _safe_p,
+ render_pdf_document,
+)
+
+
+def _row(message: str, **kwargs) -> dict:
+ base = {
+ "category": "Technical SEO",
+ "priority": "high",
+ "message": message,
+ "url": "",
+ "recommendation": "Fix it",
+ }
+ base.update(kwargs)
+ return base
+
+
+def _issue(**kwargs) -> PdfIssue:
+ defaults = {
+ "id": "iss001",
+ "priority": "high",
+ "category": "Technical SEO",
+ "headline": "Sample issue",
+ "url": "https://example.com/a",
+ "path": "/a",
+ "recommendation": "Fix it",
+ }
+ defaults.update(kwargs)
+ return PdfIssue(**defaults)
+
+
+def _minimal_cover(**kwargs) -> PdfCoverBlock:
+ defaults = {
+ "headline": "Site Audit — example.com",
+ "subtitle": "Technical SEO Audit Report",
+ "hero": PdfScoreHero(score="80", band="score-good", label="Overall health score"),
+ "priority_strip": StatGridBlock(
+ id="cover.priority",
+ chips=[StatChip(label="High", value="1", tone="high")],
+ columns=4,
+ ),
+ "category_scores": ScoreCardsBlock(
+ id="cover.scores",
+ cards=[ScoreCard(name="Technical SEO", score="80", issue_count=1, tone="score-good")],
+ ),
+ }
+ defaults.update(kwargs)
+ return PdfCoverBlock(**defaults)
+
+
+def _minimal_meta() -> PdfMeta:
+ return PdfMeta(
+ report_id=1,
+ property="example.com",
+ report_title="Technical SEO Audit Report",
+ generated_at="18 June 2026",
+ exported_at="18 June 2026, 12:00 UTC",
+ data_sources=["crawl"],
+ health_score=80,
+ issue_counts={"critical": 0, "high": 1, "medium": 0, "low": 0},
+ )
+
+
+def _exhaustive_document() -> PdfDocument:
+ """Synthetic document exercising every block type and renderer edge path."""
+ related = [f"https://example.com/p{i}" for i in range(15)]
+ issue_with_urls = _issue(
+ headline="Collapsed duplicate",
+ related_urls=related,
+ url=None,
+ recommendation="Consolidate",
+ )
+ compact_group = IssueGroupBlock(
+ id="findings.compact",
+ group_label="Medium — compact table",
+ issues=[_issue(headline="Compact row", url="https://example.com/c")],
+ render_as="compact_table",
+ truncation=PdfTruncation(shown=1, total=5),
+ )
+ list_group = IssueGroupBlock(
+ id="findings.list",
+ group_label="High — list",
+ issues=[issue_with_urls, _issue(url=None, path=None, headline="Site-wide issue")],
+ )
+ return PdfDocument(
+ schema_version=SCHEMA_VERSION,
+ document_kind="audit",
+ meta=_minimal_meta(),
+ cover=_minimal_cover(
+ executive_summary="Executive overview text.",
+ executive_source="deterministic",
+ priorities_list=["Priority one", "Priority two"],
+ top_issues=[_issue(priority="critical", headline="Critical item")],
+ ),
+ sections=[
+ PdfSection(
+ id="blocks.all",
+ section_key="core",
+ title="All block types",
+ priority=10,
+ page_break_before=True,
+ source_label="crawl",
+ truncation=PdfTruncation(shown=2, total=10),
+ blocks=[
+ HeadingBlock(id="h2", text="Section heading", level=2),
+ HeadingBlock(id="h3", text="Sub heading", level=3),
+ ParagraphBlock(id="p", text="Body paragraph"),
+ ParagraphBlock(id="pi", text="Italic note", italic=True),
+ CalloutBlock(id="c-info", text="Info callout", severity="info"),
+ CalloutBlock(id="c-warn", text="Warn callout", severity="warn"),
+ CalloutBlock(id="c-crit", text="Critical callout", severity="critical"),
+ SpacerBlock(id="sp", height_pt=4),
+ KpiRowBlock(id="kpi", items=[KpiItem(label="Pages", value="42")]),
+ StatGridBlock(id="stat", chips=[], columns=4),
+ ScoreCardsBlock(id="scores", cards=[]),
+ KeyValueBlock(id="kv-default", rows=[("Key", "Value")], layout="default"),
+ KeyValueBlock(id="kv-empty", rows=[]),
+ MetricTableBlock(
+ id="metrics",
+ columns=[
+ TableColumn(key="url", label="URL", width="url"),
+ TableColumn(key="val", label="Value", width="wide"),
+ ],
+ rows=[{"url": "https://example.com", "val": "1"}],
+ truncation=PdfTruncation(shown=1, total=3),
+ ),
+ MetricTableBlock(id="metrics-empty", columns=[], rows=[]),
+ UrlListBlock(
+ id="urls",
+ rows=[
+ {"url": "https://example.com", "status": "200", "title": "Home"},
+ {"url": "https://example.com/old", "status": "301", "title": ""},
+ {"url": "https://example.com/missing", "status": "404", "title": "Missing"},
+ {"url": "https://example.com/error", "status": "500", "title": "Error"},
+ {"url": "https://example.com/unknown", "status": "", "title": ""},
+ ],
+ truncation=PdfTruncation(shown=5, total=12),
+ ),
+ UrlListBlock(id="urls-notitle", rows=[{"url": "https://x.com", "status": "200"}], show_title=False),
+ UrlListBlock(id="urls-empty", rows=[]),
+ list_group,
+ compact_group,
+ IssueTableBlock(
+ id="issue-table",
+ title="Issue table",
+ issues=[_issue(headline="Table row")],
+ truncation=PdfTruncation(shown=1, total=4),
+ ),
+ MarkdownBlock(id="md", text="Bold markdown snippet"),
+ ParagraphBlock(id="hidden", text="hidden", visible=False),
+ ],
+ ),
+ ],
+ footer=PdfFooterBlock(exported_at="18 June 2026, 12:00 UTC"),
+ )
+
+
+class TestNormalizeBranches:
+ def test_strip_url_trailing_slash_variant(self):
+ class _Msg(str):
+ def replace(self, old, new="", count=-1):
+ if old == "https://example.com/page":
+ return str(self)
+ return super().replace(old, new, count)
+
+ url = "https://example.com/page"
+ msg = _Msg("Not crawled: https://example.com/page/")
+ assert url not in _strip_url_from_headline(msg, url)
+
+ def test_extract_path_parse_error(self, monkeypatch):
+ def boom(_url):
+ raise ValueError("bad url")
+
+ monkeypatch.setattr(
+ "website_profiling.reporting.pdf.normalize.urlparse",
+ boom,
+ )
+ assert _extract_path("https://example.com") is None
+
+ def test_lighthouse_tag_detection(self):
+ is_lh, audit_id = _is_lighthouse_row("generic message", ["lighthouse"])
+ assert is_lh is True
+ assert audit_id == ""
+
+ def test_redirect_headline_shortening(self):
+ issue = normalize_issue_for_pdf(_row("redirect: 301 to https://example.com/new"))
+ assert issue.headline == "301 redirect"
+ assert "redirect" in issue.tags
+
+ def test_lighthouse_prefix_stripped(self):
+ issue = normalize_issue_for_pdf(_row("lighthouse: Long cache lifetime"))
+ assert issue.headline == "Long cache lifetime"
+
+ def test_axe_headline_truncated_at_sentence(self):
+ long_body = "A" * 50 + ". " + "B" * 60
+ issue = normalize_issue_for_pdf(_row(f"axe: {long_body}"))
+ assert issue.headline.endswith(".")
+ assert len(issue.headline) < len(long_body)
+
+
+class TestAdapterAndBuilderBranches:
+ def test_appendix_disabled(self):
+ payload = {"links": [{"url": "https://example.com", "status": "200"}]}
+ assert adapt_appendix(payload, PdfBuildOptions(include_appendix=False)) == []
+
+ def test_findings_empty_groups_after_normalize(self):
+ payload = {
+ "categories": [{"name": "Tech", "issues": [_row("issue one")]}],
+ }
+ with patch(
+ "website_profiling.reporting.pdf.adapters.findings.group_issues_for_pdf",
+ return_value=[],
+ ):
+ assert adapt_findings(payload, PdfBuildOptions()) == []
+
+ def test_findings_section_truncation_when_over_limit(self):
+ issues = [_row(f"issue {i}") for i in range(30)]
+ payload = {"categories": [{"name": "Tech", "issues": issues}]}
+ opts = PdfBuildOptions(limits=PdfLimits(issues_total=5, issues_per_group=5))
+ sections = adapt_findings(payload, opts)
+ assert sections[0].truncation is not None
+ assert sections[0].truncation.total == 30
+
+ def test_builder_skips_non_dict_categories_and_bad_scores(self):
+ payload = {
+ "site_name": "example.com",
+ "categories": [
+ "bad",
+ {"name": "Tech", "score": "not-a-number", "issues": []},
+ ],
+ "links": [],
+ }
+ doc = build_pdf_document(payload)
+ names = [c.name for c in doc.cover.category_scores.cards]
+ assert names == ["Tech"]
+
+ def test_builder_prefers_url_for_duplicate_headlines(self):
+ payload = {
+ "site_name": "example.com",
+ "categories": [{
+ "name": "Tech",
+ "score": 80,
+ "issues": [
+ _row("Missing title", url=""),
+ _row("Missing title", url="https://example.com/page"),
+ ],
+ }],
+ "links": [],
+ }
+ doc = build_pdf_document(payload)
+ assert doc.cover.top_issues[0].url == "https://example.com/page"
+
+ def test_builder_skips_unknown_section_adapters(self):
+ payload = {"site_name": "example.com", "categories": [], "links": []}
+ doc = build_pdf_document(payload, PdfBuildOptions(sections=["missing", "core"]))
+ assert any(s.id == "core.audit_details" for s in doc.sections)
+
+ def test_options_custom_sections_override_profile(self):
+ opts = PdfBuildOptions(profile="full", sections=["core"])
+ assert opts.effective_sections() == ["core"]
+
+
+class TestRendererBranches:
+ def test_html_renders_all_block_types(self):
+ html = render_html_document(_exhaustive_document())
+ assert "All block types" in html
+ assert "status-3xx" in html
+ assert "status-4xx" in html
+ assert "status-5xx" in html
+ assert "status-other" in html
+ assert "issue-card" in html
+ assert "compact_table" not in html # render_as is not echoed; table headers are
+ assert "IssueURL " in html
+ assert "Fix:" in html
+ assert "and 5 more" in html
+ assert "Source: crawl" in html
+ assert "Showing 2 of 10 issues" in html
+
+ def test_pdf_renders_all_block_types(self):
+ pdf = render_pdf_document(_exhaustive_document())
+ assert pdf[:4] == b"%PDF"
+ assert len(pdf) > 2_000
+
+ def test_reportlab_helper_functions(self):
+ st = _make_styles()
+ assert _p("plain", st["body"]) is not None
+ assert _p_html("markup ", st["body"]) is not None
+ assert _safe_p("", st["body"]) is not None
+
+ def test_reportlab_empty_executive_and_top_issues(self):
+ st = _make_styles()
+ cover = _minimal_cover(
+ executive_summary=None,
+ executive_source=None,
+ priorities_list=[],
+ top_issues=[],
+ )
+ assert _rl_render_executive_panel(cover, st) == []
+ assert _render_top_issues_table([], st) == []
+
+ def test_reportlab_empty_optional_blocks(self):
+ st = _make_styles()
+ assert _flowables_for_block(KpiRowBlock(id="k", items=[]), st) == []
+ assert _flowables_for_block(StatGridBlock(id="s", chips=[]), st) == []
+ assert _flowables_for_block(ScoreCardsBlock(id="sc", cards=[]), st) == []
+ assert _flowables_for_block(KeyValueBlock(id="kv", rows=[]), st) == []
+ assert _flowables_for_block(UrlListBlock(id="u", rows=[]), st) == []
+ assert _flowables_for_block(MetricTableBlock(id="m", columns=[], rows=[]), st) == []
+ assert _flowables_for_block(ParagraphBlock(id="h", text="x", visible=False), st) == []
+
+ def test_html_empty_cover_fragments(self):
+ doc = PdfDocument(
+ schema_version=SCHEMA_VERSION,
+ document_kind="audit",
+ meta=_minimal_meta(),
+ cover=_minimal_cover(
+ top_issues=[],
+ executive_summary=None,
+ executive_source=None,
+ priorities_list=[],
+ priority_strip=StatGridBlock(id="cover.priority", chips=[], columns=4),
+ category_scores=ScoreCardsBlock(id="cover.scores", cards=[]),
+ ),
+ sections=[],
+ footer=PdfFooterBlock(exported_at="now"),
+ )
+ html = render_html_document(doc)
+ assert "Top traffic-impacting issues" not in html
+ assert "Category scores" not in html
+
+ def test_html_renderer_empty_helpers(self):
+ cover = _minimal_cover(executive_summary=None, priorities_list=[])
+ assert _html_render_executive_panel(cover) == ""
+ assert _html_render_stat_grid(StatGridBlock(id="s", chips=[], columns=4)) == ""
+ assert _html_render_score_cards(ScoreCardsBlock(id="sc", cards=[])) == ""
+
+ class _Unknown:
+ type = "unknown"
+ visible = True
+
+ assert _html_render_block(_Unknown()) == ""
+
+ def test_reportlab_empty_kv_and_scaled_metric_table(self):
+ st = _make_styles()
+ assert _flowables_for_block(KeyValueBlock(id="a", rows=[], layout="audit"), st) == []
+ assert _flowables_for_block(KeyValueBlock(id="g", rows=[], layout="glossary"), st) == []
+ wide = MetricTableBlock(
+ id="wide",
+ columns=[TableColumn(key=f"c{i}", label=f"C{i}", width="wide") for i in range(8)],
+ rows=[{f"c{i}": "x" for i in range(8)}],
+ )
+ assert _flowables_for_block(wide, st)
+
+ class _Unknown:
+ type = "not_registered"
+ visible = True
+
+ assert _flowables_for_block(_Unknown(), st) == []
diff --git a/tests/reporting/test_pdf_builder.py b/tests/reporting/test_pdf_builder.py
new file mode 100644
index 0000000..09b9cff
--- /dev/null
+++ b/tests/reporting/test_pdf_builder.py
@@ -0,0 +1,229 @@
+"""Tests for build_pdf_document — document structure and metadata."""
+from __future__ import annotations
+
+import pytest
+
+from website_profiling.reporting.pdf.builder import build_pdf_document
+from website_profiling.reporting.pdf.document import SCHEMA_VERSION, IssueGroupBlock, KeyValueBlock, ScoreCardsBlock
+from website_profiling.reporting.pdf.options import PdfBuildOptions
+
+
+def _base_payload(**overrides) -> dict:
+ p = {
+ "site_name": "test.example",
+ "report_generated_at": "2026-06-18T04:38:27+00:00",
+ "categories": [
+ {
+ "name": "Technical SEO",
+ "score": 79,
+ "issues": [
+ {
+ "priority": "high",
+ "message": "URL in sitemap but not crawled: https://test.example/page",
+ "url": "https://test.example/page",
+ "recommendation": "Review sitemap",
+ },
+ {
+ "priority": "medium",
+ "message": "Missing canonical URL.",
+ "url": "https://test.example/llms.txt",
+ "recommendation": "Add canonical",
+ },
+ ],
+ },
+ {
+ "name": "Mobile SEO",
+ "score": 90,
+ "issues": [
+ {
+ "priority": "critical",
+ "message": "2 page(s) missing viewport meta tag.",
+ "url": "",
+ "recommendation": "Add viewport",
+ }
+ ],
+ },
+ ],
+ "links": [
+ {"url": "https://test.example", "status": "200", "title": "Home"},
+ {"url": "https://test.example/about", "status": "301", "title": "About"},
+ ],
+ "report_meta": {"data_sources": ["crawl", "lighthouse"]},
+ }
+ p.update(overrides)
+ return p
+
+
+class TestDocumentSchema:
+ def test_schema_version(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.schema_version == SCHEMA_VERSION
+
+ def test_document_kind_audit(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.document_kind == "audit"
+
+ def test_meta_property(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.meta.property == "test.example"
+
+ def test_meta_issue_counts(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.meta.issue_counts["critical"] == 1
+ assert doc.meta.issue_counts["high"] == 1
+ assert doc.meta.issue_counts["medium"] == 1
+ assert doc.meta.issue_counts["low"] == 0
+
+ def test_meta_health_score_present(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.meta.health_score is not None
+ assert 0 <= doc.meta.health_score <= 100
+
+ def test_footer_generated(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.footer.exported_at
+
+
+class TestCover:
+ def test_cover_headline(self):
+ doc = build_pdf_document(_base_payload())
+ assert doc.cover.headline == "Site Audit — test.example"
+
+ def test_cover_priority_strip_chips(self):
+ doc = build_pdf_document(_base_payload())
+ chips = {c.label: c.value for c in doc.cover.priority_strip.chips}
+ assert chips["Critical"] == "1"
+ assert chips["High"] == "1"
+ assert chips["Medium"] == "1"
+ assert chips["Low"] == "0"
+
+ def test_cover_category_scores(self):
+ doc = build_pdf_document(_base_payload())
+ names = [c.name for c in doc.cover.category_scores.cards]
+ assert "Technical SEO" in names
+ assert "Mobile SEO" in names
+
+ def test_cover_top_issues_capped(self):
+ payload = _base_payload()
+ doc = build_pdf_document(payload, PdfBuildOptions(limits=type("L", (), {"top_issues_cover": 2,
+ "issues_total": 120, "issues_per_group": 25, "urls_sample": 20,
+ "metric_table_rows": 15, "gsc_queries": 10, "keyword_rows": 15, "diagnostic_items": 20})()))
+ assert len(doc.cover.top_issues) <= 2
+
+ def test_cover_top_issues_critical_first(self):
+ doc = build_pdf_document(_base_payload())
+ if len(doc.cover.top_issues) >= 2:
+ assert doc.cover.top_issues[0].priority == "critical"
+
+ def test_cover_executive_summary_present(self):
+ payload = _base_payload(executive_summary={
+ "source": "deterministic",
+ "summary": "Looks good overall.",
+ "priorities": ["Fix viewport"],
+ })
+ doc = build_pdf_document(payload)
+ assert doc.cover.executive_summary == "Looks good overall."
+ assert doc.cover.priorities_list == ["Fix viewport"]
+
+ def test_cover_executive_summary_none_when_missing(self):
+ doc = build_pdf_document(_base_payload())
+ # no executive_summary in base payload
+ assert doc.cover.executive_summary is None or doc.cover.executive_summary == ""
+
+
+class TestSections:
+ def test_standard_has_findings(self):
+ doc = build_pdf_document(_base_payload())
+ section_ids = [s.id for s in doc.sections]
+ assert "findings" in section_ids
+
+ def test_standard_has_audit_details(self):
+ doc = build_pdf_document(_base_payload())
+ section_ids = [s.id for s in doc.sections]
+ assert "core.audit_details" in section_ids
+
+ def test_category_scores_on_cover_not_in_sections(self):
+ doc = build_pdf_document(_base_payload())
+ section_ids = [s.id for s in doc.sections]
+ assert "core.category_scores" not in section_ids
+ assert len(doc.cover.category_scores.cards) >= 1
+
+ def test_standard_has_url_sample(self):
+ doc = build_pdf_document(_base_payload())
+ section_ids = [s.id for s in doc.sections]
+ assert "appendix.urls" in section_ids
+
+ def test_standard_has_glossary(self):
+ doc = build_pdf_document(_base_payload())
+ section_ids = [s.id for s in doc.sections]
+ assert "appendix.glossary" in section_ids
+
+ def test_sections_sorted_by_priority(self):
+ doc = build_pdf_document(_base_payload())
+ priorities = [s.priority for s in doc.sections]
+ assert priorities == sorted(priorities)
+
+ def test_findings_section_has_issue_group_blocks(self):
+ doc = build_pdf_document(_base_payload())
+ findings = next(s for s in doc.sections if s.id == "findings")
+ assert any(isinstance(b, IssueGroupBlock) for b in findings.blocks)
+
+ def test_findings_starts_on_new_page_via_cover_break(self):
+ doc = build_pdf_document(_base_payload())
+ # Cover ends with explicit page break; findings section should not double-break
+ findings = next(s for s in doc.sections if s.id == "findings")
+ assert findings.page_break_before is False
+
+ def test_url_sample_truncation(self):
+ links = [{"url": f"https://x.com/p{i}", "status": "200", "title": f"P{i}"} for i in range(30)]
+ payload = _base_payload(links=links)
+ doc = build_pdf_document(payload, PdfBuildOptions())
+ url_section = next(s for s in doc.sections if s.id == "appendix.urls")
+ url_block = url_section.blocks[0]
+ assert len(url_block.rows) == 20 # default limit
+ assert url_block.truncation is not None
+ assert url_block.truncation.total == 30
+
+ def test_executive_profile_only_cover_sections(self):
+ doc = build_pdf_document(_base_payload(), PdfBuildOptions(profile="executive"))
+ # executive profile sections = ["core"] only
+ section_keys = {s.section_key for s in doc.sections}
+ assert "findings" not in [s.id for s in doc.sections]
+
+ def test_no_findings_section_when_no_issues(self):
+ payload = _base_payload()
+ payload["categories"] = [{"name": "Technical SEO", "score": 100, "issues": []}]
+ doc = build_pdf_document(payload)
+ section_ids = [s.id for s in doc.sections]
+ assert "findings" not in section_ids
+
+ def test_issues_normalized_url_dedup(self):
+ doc = build_pdf_document(_base_payload())
+ findings = next(s for s in doc.sections if s.id == "findings")
+ all_issues = []
+ for blk in findings.blocks:
+ if isinstance(blk, IssueGroupBlock):
+ all_issues.extend(blk.issues)
+ sitemap_issue = next(
+ (i for i in all_issues if i.headline == "In sitemap, not crawled"), None
+ )
+ assert sitemap_issue is not None
+ # URL must not be embedded in the headline
+ if sitemap_issue.url:
+ assert sitemap_issue.url not in sitemap_issue.headline
+
+
+class TestEmptyPayload:
+ def test_empty_categories(self):
+ doc = build_pdf_document({"site_name": "empty.test", "categories": [], "links": []})
+ assert doc.cover.headline == "Site Audit — empty.test"
+ assert doc.meta.health_score is None
+
+ def test_empty_links_no_url_section(self):
+ doc = build_pdf_document({"site_name": "empty.test", "categories": [], "links": []})
+ ids = [s.id for s in doc.sections]
+ assert "appendix.urls" not in ids
+
+ def test_missing_keys_no_crash(self):
+ doc = build_pdf_document({})
+ assert doc.document_kind == "audit"
diff --git a/tests/reporting/test_pdf_normalize.py b/tests/reporting/test_pdf_normalize.py
new file mode 100644
index 0000000..4cdb4c3
--- /dev/null
+++ b/tests/reporting/test_pdf_normalize.py
@@ -0,0 +1,208 @@
+"""Unit tests for PDF issue normalization and grouping."""
+from __future__ import annotations
+
+import pytest
+
+from website_profiling.reporting.pdf.normalize import (
+ collapse_duplicate_issues,
+ group_issues_for_pdf,
+ normalize_issue_for_pdf,
+)
+
+
+def _row(message: str, url: str = "", priority: str = "high", category: str = "Technical SEO",
+ recommendation: str = "Fix it") -> dict:
+ return {
+ "category": category,
+ "priority": priority,
+ "message": message,
+ "url": url,
+ "recommendation": recommendation,
+ "llm_recommendation": "",
+ }
+
+
+class TestNormalizeIssue:
+ def test_url_dedup_from_message(self):
+ """URL embedded in message should be stripped from headline."""
+ issue = normalize_issue_for_pdf(_row(
+ message="URL in sitemap but not crawled: https://codefrydev.in/2048",
+ url="https://codefrydev.in/2048",
+ ))
+ assert "https://codefrydev.in/2048" not in issue.headline
+ assert issue.headline == "In sitemap, not crawled"
+ assert issue.url == "https://codefrydev.in/2048"
+
+ def test_url_dedup_no_change_when_url_blank(self):
+ issue = normalize_issue_for_pdf(_row(
+ message="2 page(s) missing viewport meta tag.",
+ url="",
+ ))
+ assert "viewport" in issue.headline
+ assert issue.url is None
+
+ def test_lighthouse_cache_insight_label(self):
+ issue = normalize_issue_for_pdf(_row(message="cache-insight:", url="https://example.com"))
+ assert issue.headline == "Serve assets with efficient cache policy"
+ assert "lighthouse" in issue.tags
+
+ def test_lighthouse_color_contrast_label(self):
+ issue = normalize_issue_for_pdf(_row(message="color-contrast:", url="https://example.com"))
+ assert issue.headline == "Background and foreground colors lack sufficient contrast"
+
+ def test_unknown_lighthouse_id_fallback(self):
+ """Unknown audit ids should be title-cased as fallback."""
+ issue = normalize_issue_for_pdf(_row(message="my-custom-check:", url="https://example.com"))
+ assert issue.headline == "My Custom Check"
+
+ def test_plain_message_unchanged(self):
+ issue = normalize_issue_for_pdf(_row(message="Missing H1 on homepage.", url=""))
+ assert issue.headline == "Missing H1 on homepage."
+
+ def test_recommendation_included(self):
+ issue = normalize_issue_for_pdf(_row(message="issue", recommendation="Do this"))
+ assert issue.recommendation == "Do this"
+
+ def test_recommendation_excluded(self):
+ issue = normalize_issue_for_pdf(_row(message="issue", recommendation="Do this"),
+ include_recommendation=False)
+ assert issue.recommendation is None
+
+ def test_sitemap_tag_applied(self):
+ issue = normalize_issue_for_pdf(_row(message="URL in sitemap but not crawled: https://x.com/p",
+ url="https://x.com/p"))
+ assert "sitemap" in issue.tags
+
+ def test_path_extracted_from_url(self):
+ issue = normalize_issue_for_pdf(_row(message="issue", url="https://example.com/blog/post"))
+ assert issue.path == "/blog/post"
+
+ def test_path_none_when_url_blank(self):
+ issue = normalize_issue_for_pdf(_row(message="issue", url=""))
+ assert issue.path is None
+
+ def test_unique_id_generated(self):
+ r = _row(message="Missing title", url="https://example.com")
+ issue = normalize_issue_for_pdf(r)
+ assert len(issue.id) == 12
+
+ def test_same_row_same_id(self):
+ r = _row(message="Missing title", url="https://example.com")
+ i1 = normalize_issue_for_pdf(r)
+ i2 = normalize_issue_for_pdf(r)
+ assert i1.id == i2.id
+
+ def test_different_rows_different_id(self):
+ r1 = _row(message="Missing title", url="https://example.com")
+ r2 = _row(message="Missing title", url="https://other.com")
+ assert normalize_issue_for_pdf(r1).id != normalize_issue_for_pdf(r2).id
+
+ def test_generic_cwv_recommendation_shortened(self):
+ generic = (
+ "See Performance (Core Web Vitals) in this audit, "
+ "or re-run Lighthouse from Run audit."
+ )
+ issue = normalize_issue_for_pdf(_row(message="largest-contentful-paint:", recommendation=generic))
+ assert issue.recommendation == "Review Lighthouse audit details for this page."
+
+
+class TestCollapseDuplicates:
+ def test_merges_same_headline_and_fix(self):
+ rows = [
+ _row("URL in sitemap but not crawled: https://a.com/1", url="https://a.com/1"),
+ _row("URL in sitemap but not crawled: https://a.com/2", url="https://a.com/2"),
+ ]
+ issues = [normalize_issue_for_pdf(r) for r in rows]
+ collapsed = collapse_duplicate_issues(issues)
+ assert len(collapsed) == 1
+ assert collapsed[0].related_urls == ["https://a.com/1", "https://a.com/2"]
+ assert "(2 URLs)" in collapsed[0].headline
+
+ def test_keeps_distinct_recommendations_separate(self):
+ rows = [
+ _row("issue", url="https://a.com/1", recommendation="Fix A"),
+ _row("issue", url="https://a.com/2", recommendation="Fix B"),
+ ]
+ issues = [normalize_issue_for_pdf(r) for r in rows]
+ assert len(collapse_duplicate_issues(issues)) == 2
+
+ def test_collapse_in_grouping(self):
+ rows = [
+ _row(f"URL in sitemap but not crawled: https://a.com/{i}", url=f"https://a.com/{i}")
+ for i in range(5)
+ ]
+ issues = [normalize_issue_for_pdf(r) for r in rows]
+ groups = group_issues_for_pdf(issues)
+ assert len(groups[0].issues) == 1
+ assert len(groups[0].issues[0].related_urls) == 5
+
+
+class TestGroupIssues:
+ def _make_issues(self, specs):
+ result = []
+ for priority, category, msg in specs:
+ row = _row(message=msg, priority=priority, category=category)
+ result.append(normalize_issue_for_pdf(row))
+ return result
+
+ def test_single_priority_single_group(self):
+ issues = self._make_issues([("critical", "Mobile SEO", "Missing viewport")])
+ groups = group_issues_for_pdf(issues)
+ assert len(groups) == 1
+ assert groups[0].id == "findings.critical"
+ assert len(groups[0].issues) == 1
+
+ def test_groups_sorted_critical_first(self):
+ issues = self._make_issues([
+ ("low", "Tech", "thing"),
+ ("critical", "Mobile", "viewport"),
+ ("high", "Technical SEO", "sitemap"),
+ ])
+ groups = group_issues_for_pdf(issues)
+ priorities = [g.id.split(".")[1] for g in groups]
+ assert priorities[0] == "critical"
+ assert priorities[1] == "high"
+ assert priorities[-1] == "low"
+
+ def test_subgroup_by_category_when_many(self):
+ # More than _SUBGROUP_THRESHOLD (8) issues in one priority → sub-groups by category
+ issues = self._make_issues(
+ [("high", f"Cat{i % 3}", f"Issue {i}") for i in range(12)]
+ )
+ groups = group_issues_for_pdf(issues)
+ # Should have multiple sub-groups under high
+ ids = [g.id for g in groups]
+ assert any("." in id and id.startswith("findings.high.") for id in ids)
+
+ def test_truncation_applied(self):
+ issues = self._make_issues([("low", "Tech", f"issue {i}") for i in range(30)])
+ groups = group_issues_for_pdf(issues, issues_per_group=10)
+ low_group = next(g for g in groups if "low" in g.id)
+ assert low_group.truncation is not None
+ assert low_group.truncation.shown == 10
+ assert low_group.truncation.total == 30
+
+ def test_total_cap_respected(self):
+ issues = self._make_issues([("medium", "Tech", f"m{i}") for i in range(200)])
+ groups = group_issues_for_pdf(issues, issues_total=50)
+ total_shown = sum(len(g.issues) for g in groups)
+ assert total_shown <= 50
+
+ def test_empty_input_returns_empty(self):
+ assert group_issues_for_pdf([]) == []
+
+ def test_group_label_includes_count(self):
+ issues = self._make_issues([("critical", "Mobile", "viewport")])
+ groups = group_issues_for_pdf(issues)
+ assert "1 issue" in groups[0].group_label
+
+ def test_list_for_all_groups(self):
+ issues = self._make_issues([("low", "Tech", f"x{i}") for i in range(15)])
+ groups = group_issues_for_pdf(issues, issues_per_group=20)
+ low_group = next(g for g in groups if "low" in g.id)
+ assert low_group.render_as == "list"
+
+ def test_list_for_small_group(self):
+ issues = self._make_issues([("critical", "Mobile", f"x{i}") for i in range(3)])
+ groups = group_issues_for_pdf(issues)
+ assert groups[0].render_as == "list"
diff --git a/tests/reporting/test_pdf_render.py b/tests/reporting/test_pdf_render.py
new file mode 100644
index 0000000..25cef69
--- /dev/null
+++ b/tests/reporting/test_pdf_render.py
@@ -0,0 +1,300 @@
+"""Smoke and content regression tests for the PDF renderer.
+
+These tests verify:
+ 1. Render produces valid PDF bytes.
+ 2. PDF text contains expected content and does NOT contain the old broken patterns.
+ 3. The export_audit.export_audit_pdf() entry point is backward-compatible.
+"""
+from __future__ import annotations
+
+import pytest
+
+pytest.importorskip("reportlab")
+
+from website_profiling.reporting.pdf.builder import build_pdf_document
+from website_profiling.reporting.pdf.render import render_pdf_document
+from website_profiling.reporting.pdf.options import PdfBuildOptions
+
+
+def _rich_payload() -> dict:
+ return {
+ "site_name": "codefrydev.in",
+ "report_generated_at": "2026-06-18T04:38:27+00:00",
+ "report_meta": {
+ "data_sources": ["crawl", "lighthouse", "search_console"],
+ "crawl_scope": {
+ "pages_crawled": 15,
+ "max_pages_configured": 15,
+ "crawl_limited": True,
+ "render_mode": "javascript",
+ "js_concurrency": 3,
+ },
+ },
+ "categories": [
+ {
+ "name": "Technical SEO",
+ "score": 79,
+ "issues": [
+ {
+ "priority": "high",
+ "message": "URL in sitemap but not crawled: https://codefrydev.in/2048",
+ "url": "https://codefrydev.in/2048",
+ "recommendation": "Add the page to the crawl scope.",
+ },
+ {
+ "priority": "medium",
+ "message": "Missing canonical URL.",
+ "url": "https://codefrydev.in/llms.txt",
+ "recommendation": "Add .",
+ },
+ ],
+ },
+ {
+ "name": "Core Web Vitals",
+ "score": 100,
+ "issues": [
+ {
+ "priority": "high",
+ "message": "cache-insight:",
+ "url": "https://codefrydev.in",
+ "recommendation": "Add Cache-Control headers.",
+ },
+ {
+ "priority": "high",
+ "message": "color-contrast:",
+ "url": "https://codefrydev.in",
+ "recommendation": "Increase contrast ratio to 4.5:1.",
+ },
+ ],
+ },
+ {
+ "name": "Accessibility & markup",
+ "score": 69,
+ "issues": [
+ {
+ "priority": "medium",
+ "message": (
+ "axe: Ensure the contrast between foreground and background "
+ "colors meets WCAG 2 AA minimum contrast ra"
+ ),
+ "url": "https://codefrydev.in",
+ "recommendation": "Raise text contrast.",
+ }
+ ],
+ },
+ {
+ "name": "Mobile SEO",
+ "score": 90,
+ "issues": [
+ {
+ "priority": "critical",
+ "message": "2 page(s) missing viewport meta tag.",
+ "url": "",
+ "recommendation": "Add .",
+ }
+ ],
+ },
+ {
+ "name": "Security",
+ "score": 75,
+ "issues": [
+ {
+ "priority": "medium",
+ "message": "X-Content-Type-Options header not set.",
+ "url": "https://codefrydev.in",
+ "recommendation": "Add nosniff header.",
+ },
+ {
+ "priority": "medium",
+ "message": "X-Frame-Options header not set.",
+ "url": "https://codefrydev.in",
+ "recommendation": "Add X-Frame-Options: DENY.",
+ },
+ ],
+ },
+ ],
+ "links": [
+ {"url": "https://codefrydev.in", "status": "200", "title": "CodeFryDev"},
+ {"url": "https://codefrydev.in/games", "status": "301", "title": "Games"},
+ {"url": "https://codefrydev.in/about-us", "status": "301", "title": "About Us"},
+ ],
+ "summary": {"total_urls": 15},
+ "status_counts": {"301": 12, "200": 3},
+ "executive_summary": {
+ "source": "deterministic",
+ "summary": "Overall health is 87/100. Critical gap: viewport meta missing on 2 pages.",
+ "priorities": ["Fix missing viewport meta", "Expand crawl scope to cover sitemap URLs"],
+ "top_issues": [
+ {"priority": "critical", "message": "Missing viewport meta tag", "url": ""},
+ ],
+ },
+ }
+
+
+@pytest.fixture(scope="module")
+def rendered_pdf() -> bytes:
+ payload = _rich_payload()
+ doc = build_pdf_document(payload, PdfBuildOptions(profile="standard"))
+ return render_pdf_document(doc)
+
+
+class TestPdfSmoke:
+ def test_returns_bytes(self, rendered_pdf):
+ assert isinstance(rendered_pdf, bytes)
+
+ def test_pdf_header(self, rendered_pdf):
+ assert rendered_pdf[:4] == b"%PDF"
+
+ def test_non_trivial_size(self, rendered_pdf):
+ assert len(rendered_pdf) > 1_000
+
+ def test_executive_profile_renders(self):
+ payload = _rich_payload()
+ doc = build_pdf_document(payload, PdfBuildOptions(profile="executive"))
+ pdf = render_pdf_document(doc)
+ assert pdf[:4] == b"%PDF"
+
+ def test_empty_payload_renders(self):
+ doc = build_pdf_document({"site_name": "empty", "categories": [], "links": []})
+ pdf = render_pdf_document(doc)
+ assert pdf[:4] == b"%PDF"
+
+
+class TestPdfContent:
+ """Verify content in the PdfDocument model (document level, not raw PDF bytes).
+
+ Content assertions live here because the ReportLab output is FlateDecode
+ compressed. We test the document model which is what drives the render.
+ """
+
+ def _get_doc(self):
+ return build_pdf_document(_rich_payload(), PdfBuildOptions(profile="standard"))
+
+ def test_site_name_in_cover_headline(self):
+ doc = self._get_doc()
+ assert "codefrydev.in" in doc.cover.headline
+
+ def test_no_ellipsis_truncation_in_issue_headlines(self):
+ """The new normalizer must NOT add '...' truncation that the old renderer applied."""
+ from website_profiling.reporting.pdf.document import IssueGroupBlock
+ doc = self._get_doc()
+ findings = next(s for s in doc.sections if s.id == "findings")
+ for blk in findings.blocks:
+ if isinstance(blk, IssueGroupBlock):
+ for issue in blk.issues:
+ assert not issue.headline.endswith("..."), (
+ f"Headline has hard '...' truncation from old code: {issue.headline!r}"
+ )
+
+ def test_lighthouse_label_expanded_in_headline(self):
+ """cache-insight: should be expanded to human label, not left as bare audit id."""
+ from website_profiling.reporting.pdf.document import IssueGroupBlock
+ doc = self._get_doc()
+ findings = next(s for s in doc.sections if s.id == "findings")
+ for blk in findings.blocks:
+ if isinstance(blk, IssueGroupBlock):
+ for issue in blk.issues:
+ assert issue.headline != "cache-insight:", (
+ f"Lighthouse audit id was not expanded: {issue.headline!r}"
+ )
+
+ def test_url_not_duplicated_in_headline(self):
+ """Sitemap URLs embedded in message should not appear in headline."""
+ from website_profiling.reporting.pdf.document import IssueGroupBlock
+ doc = self._get_doc()
+ findings = next(s for s in doc.sections if s.id == "findings")
+ for blk in findings.blocks:
+ if isinstance(blk, IssueGroupBlock):
+ for issue in blk.issues:
+ if issue.url:
+ assert issue.url not in issue.headline, (
+ f"URL {issue.url!r} duplicated in headline {issue.headline!r}"
+ )
+
+ def test_glossary_section_present(self):
+ doc = self._get_doc()
+ from website_profiling.reporting.pdf.document import KeyValueBlock
+ gloss_section = next(s for s in doc.sections if s.id == "appendix.glossary")
+ gloss_block = next(b for b in gloss_section.blocks if isinstance(b, KeyValueBlock))
+ keys = [row[0] for row in gloss_block.rows]
+ assert "Crawl" in keys
+
+
+class TestHtmlPreviewParity:
+ def test_html_renders_from_same_document(self):
+ from website_profiling.reporting.pdf.render.html import render_html_document
+ payload = _rich_payload()
+ doc = build_pdf_document(payload, PdfBuildOptions(profile="standard"))
+ html_out = render_html_document(doc)
+ assert "Site Audit — codefrydev.in" in html_out
+ assert "Executive summary" in html_out
+ assert "Top traffic-impacting issues" in html_out
+ assert "Findings" in html_out
+ assert "Audit details" in html_out
+ assert "class=\"issue-card" in html_out
+ assert "cover-head" in html_out
+ assert "grid-table stat-grid" in html_out
+
+ """Ensure export_audit.export_audit_pdf() remains backward-compatible."""
+
+ def test_backward_compat_no_args(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: _rich_payload())
+ pdf = export_audit.export_audit_pdf()
+ assert isinstance(pdf, bytes)
+ assert pdf[:4] == b"%PDF"
+
+ def test_backward_compat_report_id(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: _rich_payload())
+ pdf = export_audit.export_audit_pdf(report_id=42)
+ assert pdf[:4] == b"%PDF"
+
+ def test_profile_param_standard(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: _rich_payload())
+ pdf = export_audit.export_audit_pdf(profile="standard")
+ assert pdf[:4] == b"%PDF"
+
+ def test_profile_param_executive(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: _rich_payload())
+ pdf = export_audit.export_audit_pdf(profile="executive")
+ assert pdf[:4] == b"%PDF"
+
+ def test_requires_reportlab(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: _rich_payload())
+
+ import builtins
+ real_import = builtins.__import__
+
+ def fake_import(name, *args, **kwargs):
+ if name == "reportlab" or name.startswith("reportlab."):
+ raise ImportError("no reportlab")
+ return real_import(name, *args, **kwargs)
+
+ with pytest.MonkeyPatch().context() as mp:
+ mp.setattr(builtins, "__import__", fake_import)
+ with pytest.raises(RuntimeError, match="reportlab"):
+ export_audit.export_audit_pdf()
+
+ def test_large_payload_no_crash(self, monkeypatch):
+ from website_profiling.tools import export_audit
+ issues = [
+ {
+ "priority": "low",
+ "message": "x" * 150,
+ "url": "https://example.com/" + ("path/" * 20),
+ "recommendation": "fix",
+ }
+ for _ in range(90)
+ ]
+ payload = {
+ "site_name": "Truncate PDF",
+ "categories": [{"name": "Technical SEO", "score": 80, "issues": issues}],
+ "links": [],
+ }
+ monkeypatch.setattr(export_audit, "_load_payload", lambda _rid=None: payload)
+ pdf = export_audit.export_audit_pdf()
+ assert pdf[:4] == b"%PDF"
diff --git a/tests/test_mcp_http_server.py b/tests/test_mcp_http_server.py
index 56ed31e..6209741 100644
--- a/tests/test_mcp_http_server.py
+++ b/tests/test_mcp_http_server.py
@@ -394,7 +394,7 @@ def create_initialization_options(self):
assert captured["name"] == "site-audit-full"
tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type]
- assert len(tools) >= 340
+ assert len(tools) >= 338
def test_bool_env_helper() -> None:
diff --git a/tests/test_mcp_server_helpers.py b/tests/test_mcp_server_helpers.py
index ca2c142..08212e6 100644
--- a/tests/test_mcp_server_helpers.py
+++ b/tests/test_mcp_server_helpers.py
@@ -62,7 +62,7 @@ def test_read_glossary_excerpt_missing(monkeypatch) -> None:
def test_tools_catalog_json_includes_security_tools() -> None:
with patch.dict(os.environ, {"WP_MCP_DOMAIN": "full"}):
catalog = json.loads(mcp_server._tools_catalog_json())
- assert catalog["tool_count"] >= 340
+ assert catalog["tool_count"] >= 338
assert "get_security_findings" in catalog["domains"]["security"]
assert "get_geo_readiness_score" in catalog["domains"]["geo"]
assert "get_gsc_url_inspection" in catalog["domains"]["integrations"]
@@ -173,7 +173,7 @@ async def __aexit__(self, *_args):
assert captured["name"] == "site-audit-full"
assert captured["ran"] is True
tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type]
- assert len(tools) >= 340
+ assert len(tools) >= 338
resources = asyncio.run(captured["list_resources"]()) # type: ignore[arg-type]
assert any(r["uri"] == "audit://property/7" for r in resources)
assert any(r["uri"] == "audit://domains" for r in resources)
@@ -247,7 +247,7 @@ async def __aexit__(self, *_args):
mcp_server.main()
tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type]
- assert len(tools) < 340
+ assert len(tools) < 338
blocked = asyncio.run(captured["call_tool"]("export_audit_report", {"format": "pdf"})) # type: ignore[arg-type]
assert "not exposed" in blocked[0]["text"]
diff --git a/tests/tools/test_audit_tools_expanded.py b/tests/tools/test_audit_tools_expanded.py
index ead7e11..14851c2 100644
--- a/tests/tools/test_audit_tools_expanded.py
+++ b/tests/tools/test_audit_tools_expanded.py
@@ -178,7 +178,7 @@ def conn() -> MagicMock:
def test_handler_schema_parity() -> None:
names = {t["name"] for t in TOOL_DEFINITIONS}
assert names == tool_handler_names()
- assert len(TOOL_DEFINITIONS) == 340
+ assert len(TOOL_DEFINITIONS) == 338
def test_slice_helpers() -> None:
@@ -536,28 +536,6 @@ def test_export_tools(conn: MagicMock, ctx: AuditToolContext, tmp_path, monkeypa
)
assert csv_out.get("artifact_id")
assert csv_out.get("total") == 1
- with patch.object(Ctx, "load_payload", return_value=payload):
- spec = dispatch_tool(
- "compose_custom_report",
- {
- "title": "Client",
- "sections": [{"type": "category_scores"}, {"type": "notes", "heading": "N", "markdown": "Hi"}],
- },
- context=ctx,
- conn=conn,
- )
- assert spec.get("report_spec_id")
- with patch(
- "website_profiling.tools.audit_tools.export_tools.resolve_section_results",
- return_value=[None, None],
- ):
- html_out = dispatch_tool(
- "export_custom_report",
- {"report_spec_id": spec["report_spec_id"], "format": "html"},
- context=ctx,
- conn=conn,
- )
- assert html_out.get("artifact_id")
with patch("website_profiling.tools.audit_tools.export_tools.load_compare_pair") as mock_pair:
mock_pair.return_value = (payload, payload, 2, 1, None)
cmp_out = dispatch_tool("export_compare_csv", {"baseline_report_id": 1}, context=ctx, conn=conn)
diff --git a/tests/tools/test_export_artifacts.py b/tests/tools/test_export_artifacts.py
index c2ac3c9..dc8914a 100644
--- a/tests/tools/test_export_artifacts.py
+++ b/tests/tools/test_export_artifacts.py
@@ -46,14 +46,6 @@ def test_dicts_to_csv() -> None:
assert "https://ex.com" in csv_text
-def test_save_report_spec(artifact_dir) -> None:
- spec_id = export_artifacts.save_report_spec({"title": "T", "sections": []})
- spec = export_artifacts.read_report_spec(spec_id)
- assert spec is not None
- assert spec["title"] == "T"
-
-
-def test_sweep_expired_artifacts(artifact_dir) -> None:
env = export_artifacts.save_artifact(b"x", filename="old.bin", mime_type="application/octet-stream")
meta_path = os.path.join(export_artifacts.exports_dir(), f"{env['artifact_id']}.meta.json")
with open(meta_path, encoding="utf-8") as f:
diff --git a/tests/tools/test_export_artifacts_coverage.py b/tests/tools/test_export_artifacts_coverage.py
index db7aad0..aa09c9a 100644
--- a/tests/tools/test_export_artifacts_coverage.py
+++ b/tests/tools/test_export_artifacts_coverage.py
@@ -11,7 +11,6 @@
def test_export_artifacts_edge_cases(tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DATA_DIR", str(tmp_path))
assert export_artifacts.read_artifact_meta("not-a-uuid") is None
- assert export_artifacts.read_report_spec("not-a-uuid") is None
env = export_artifacts.save_artifact(b"x", filename="b.bin", mime_type="application/octet-stream", meta={"k": 1})
meta_path = tmp_path / "exports" / f"{env['artifact_id']}.meta.json"
with open(meta_path, "w", encoding="utf-8") as f:
@@ -19,6 +18,7 @@ def test_export_artifacts_edge_cases(tmp_path, monkeypatch) -> None:
assert export_artifacts.sweep_expired_artifacts() >= 0
assert export_artifacts.rows_from_tool_result({"error": "x"}) == []
assert export_artifacts.rows_from_tool_result({"pages": ["a", {"url": "b"}]})[0]["value"] == "a"
+ assert export_artifacts.rows_from_tool_result({"broken": [{"url": "https://x.com"}]})[0]["url"] == "https://x.com"
assert export_artifacts.dicts_to_csv([]) == ""
assert export_artifacts.dicts_to_csv([{}]) == ""
csv_filtered = export_artifacts.dicts_to_csv([{"a": 1, "b": 2}], columns=[" ", "a"])
diff --git a/tests/tools/test_export_audit_coverage.py b/tests/tools/test_export_audit_coverage.py
index ed4bbee..061eaeb 100644
--- a/tests/tools/test_export_audit_coverage.py
+++ b/tests/tools/test_export_audit_coverage.py
@@ -145,6 +145,41 @@ def test_helper_functions_cover_branches() -> None:
assert export_audit._category_cards_html([]).startswith(" None:
+ from website_profiling.tools.export_audit_html import (
+ _executive_summary_html,
+ _priority_stats_html,
+ _report_html_styles,
+ )
+
+ assert _executive_summary_html({}) == ""
+ assert _executive_summary_html({"executive_summary": {}}) == ""
+
+ clicks_payload = {
+ "executive_summary": {
+ "top_issues": [
+ {
+ "priority": "high",
+ "message": "Traffic issue",
+ "url": "https://example.com/hot",
+ "gsc_clicks": 42,
+ }
+ ]
+ }
+ }
+ html_block = _executive_summary_html(clicks_payload)
+ assert "42" in html_block
+ assert "GSC clicks" in html_block
+
+ stats = _priority_stats_html({"critical": 1, "high": 2, "medium": 0, "low": 3})
+ assert "stat-critical" in stats
+ assert "Critical" in stats
+
+ styles = _report_html_styles()
+ assert isinstance(styles, str)
+ assert len(styles) > 0
+
+
def test_summary_lines_includes_scope_and_diagnostics() -> None:
lines = dict(export_audit._summary_lines(_rich_payload()))
assert lines["Property"] == "Coverage Site"
@@ -197,10 +232,11 @@ def test_export_json_csv_and_truncated_html(monkeypatch) -> None:
assert "Measured + Search Console" in csv_out
html_out = export_audit.export_audit_html()
- assert "Overall health score 70/100" in html_out
- assert "Showing 200 of" in html_out
- assert "Custom extract" in html_out
- assert "logo.png" in html_out
+ assert "Site Audit — Coverage Site" in html_out
+ assert "Showing 120 of" in html_out
+ assert "Audit details" in html_out
+ assert "Data source glossary" in html_out
+ assert "Crawled URLs (sample)" in html_out
def test_export_pdf_full_branches(monkeypatch) -> None:
diff --git a/tests/tools/test_export_custom.py b/tests/tools/test_export_custom.py
deleted file mode 100644
index b0cd0ff..0000000
--- a/tests/tools/test_export_custom.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""Tests for custom report builder."""
-from __future__ import annotations
-
-from unittest.mock import MagicMock
-
-import pytest
-
-from website_profiling.tools.export_custom import (
- render_custom_report_html,
- render_custom_report_pdf,
- validate_sections,
-)
-
-
-def test_validate_sections_ok() -> None:
- sections, err = validate_sections([
- {"type": "executive_summary"},
- {"type": "notes", "heading": "Summary", "markdown": "Hello"},
- {"type": "tool", "heading": "Broken", "tool_name": "list_broken_links", "tool_args": {}},
- ])
- assert err is None
- assert sections is not None
- assert len(sections) == 3
-
-
-def test_validate_sections_rejects_unknown_type() -> None:
- _, err = validate_sections([{"type": "unknown"}])
- assert err is not None
-
-
-def test_render_custom_report_html() -> None:
- payload = {"site_name": "Example", "report_generated_at": "2026-06-07T12:00:00Z", "categories": []}
- html_doc = render_custom_report_html(
- title="Client Report",
- payload=payload,
- sections=[{"type": "notes", "heading": "Notes", "markdown": "Line one"}],
- section_results=[None],
- )
- assert "Client Report" in html_doc
- assert "Example" in html_doc
- assert "Line one" in html_doc
-
-
-def test_render_custom_report_pdf_smoke() -> None:
- html_doc = "
Test
"
- try:
- pdf = render_custom_report_pdf(html_doc, "Test")
- except RuntimeError as exc:
- pytest.skip(str(exc))
- assert pdf[:4] == b"%PDF"
diff --git a/tests/tools/test_export_custom_coverage.py b/tests/tools/test_export_custom_coverage.py
deleted file mode 100644
index ed30f6f..0000000
--- a/tests/tools/test_export_custom_coverage.py
+++ /dev/null
@@ -1,101 +0,0 @@
-"""Line-coverage tests for export_custom helpers."""
-from __future__ import annotations
-
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.export_custom import (
- _section_html_tool_result,
- _table_from_rows,
- render_custom_report_html,
- render_custom_report_pdf,
- resolve_section_results,
- validate_sections,
-)
-
-
-@pytest.fixture
-def ctx() -> Ctx:
- return Ctx(property_id=1, report_id=1)
-
-
-@pytest.fixture
-def conn() -> MagicMock:
- return MagicMock()
-
-
-def _payload() -> dict:
- return {
- "site_name": "Example",
- "report_generated_at": "2026-06-07T12:00:00Z",
- "categories": [{"id": "tech", "name": "Tech", "score": 80, "issues": []}],
- "executive_summary": {"headline": "OK"},
- }
-
-
-
-
-def test_export_custom_helpers() -> None:
- assert "No data" in _table_from_rows([])
- assert "No columns" in _table_from_rows([{}])
- big = _table_from_rows([{"url": f"https://ex.com/{i}", "n": i} for i in range(60)], max_rows=50)
- assert "Showing 50 of 60" in big
-
- err_html = _section_html_tool_result("H", {"error": "boom"})
- assert "boom" in err_html
- rows_html = _section_html_tool_result("H", {"pages": [{"url": "u"}]})
- assert "u" in rows_html
- items_html = _section_html_tool_result("H", {"items": [{"k": "v"}]})
- assert "v" in items_html
- preview_html = _section_html_tool_result("H", {"meta": "x"})
- assert "json-preview" in preview_html
-
- sections, err = validate_sections([{"type": "notes", "markdown": "hi"}] * 13)
- assert err and "max" in err
- _, err2 = validate_sections([{"type": "tool"}])
- assert err2 and "tool_name" in err2
- _, err3 = validate_sections([{"type": "notes"}])
- assert err3 and "markdown" in err3
- _, err4 = validate_sections("bad")
- assert err4
-
- payload = _payload()
- html_doc = render_custom_report_html(
- title="T",
- payload=payload,
- sections=[
- {"type": "executive_summary"},
- {"type": "category_scores"},
- {"type": "notes", "heading": "N", "markdown": "line"},
- {"type": "tool", "heading": "Broken", "tool_name": "list_broken_links"},
- ],
- section_results=[None, None, None, {"pages": [{"url": "https://ex.com/x"}]}],
- )
- assert "Executive summary" in html_doc
- assert "Category scores" in html_doc
- assert "line" in html_doc
-
- try:
- pdf = render_custom_report_pdf(html_doc, "T")
- assert pdf[:4] == b"%PDF"
- except RuntimeError as exc:
- pytest.skip(str(exc))
-
-
-def test_export_custom_resolve_sections(conn: MagicMock, ctx: Ctx) -> None:
- sections = [
- {"type": "executive_summary"},
- {"type": "tool", "tool_name": "list_broken_links", "tool_args": {}},
- ]
- with patch.object(Ctx, "load_payload", return_value=_payload()):
- results = resolve_section_results(
- conn,
- ctx,
- _payload(),
- sections,
- lambda name, args, **kw: {"pages": [{"url": "https://ex.com"}]},
- )
- assert results[0] is None
- assert results[1]["pages"]
diff --git a/tests/tools/test_export_tools_coverage.py b/tests/tools/test_export_tools_coverage.py
index f16fe31..6544607 100644
--- a/tests/tools/test_export_tools_coverage.py
+++ b/tests/tools/test_export_tools_coverage.py
@@ -35,9 +35,6 @@ def test_export_tools_formats(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch)
assert dispatch_tool("export_audit_report", {"format": "bad"}, context=ctx, conn=conn)["error"]
assert dispatch_tool("export_list_as_csv", {}, context=ctx, conn=conn)["error"]
assert dispatch_tool("export_list_as_csv", {"tool_name": "nope"}, context=ctx, conn=conn)["error"]
- assert dispatch_tool("compose_custom_report", {"sections": []}, context=ctx, conn=conn)["error"]
- assert dispatch_tool("export_custom_report", {"format": "bad"}, context=ctx, conn=conn)["error"]
- assert dispatch_tool("export_custom_report", {"report_spec_id": "missing"}, context=ctx, conn=conn)["error"]
with patch.object(Ctx, "load_payload", return_value=payload), patch(
"website_profiling.tools.audit_tools.export_tools.export_audit_html",
@@ -76,64 +73,6 @@ def test_export_tools_formats(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch)
):
assert dispatch_tool("export_compare_csv", {"baseline_report_id": 1}, context=ctx, conn=conn)["error"]
- with patch.object(Ctx, "load_payload", return_value=payload):
- bad_tool = dispatch_tool(
- "compose_custom_report",
- {
- "title": "T",
- "sections": [{"type": "tool", "tool_name": "export_audit_report", "tool_args": {}}],
- },
- context=ctx,
- conn=conn,
- )
- assert "not allowed" in bad_tool["error"]
-
- spec = dispatch_tool(
- "compose_custom_report",
- {"title": "T", "sections": [{"type": "executive_summary"}]},
- context=ctx,
- conn=conn,
- )
- with patch(
- "website_profiling.tools.audit_tools.export_tools.resolve_section_results",
- return_value=[{"pages": [{"url": "https://ex.com"}]}],
- ):
- html_out = dispatch_tool(
- "export_custom_report",
- {
- "title": "Direct",
- "format": "html",
- "sections": [
- {"type": "tool", "tool_name": "list_broken_links", "tool_args": {}},
- ],
- },
- context=ctx,
- conn=conn,
- )
- assert html_out.get("artifact_id")
- pdf_out = dispatch_tool(
- "export_custom_report",
- {"report_spec_id": spec["report_spec_id"], "format": "pdf"},
- context=ctx,
- conn=conn,
- )
- if pdf_out.get("error"):
- pytest.skip(pdf_out["error"])
- assert pdf_out.get("format") == "pdf"
-
-def test_export_custom_report_pdf_error(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch) -> None:
- monkeypatch.setenv("DATA_DIR", str(tmp_path))
- with patch.object(Ctx, "load_payload", return_value=_payload()), patch(
- "website_profiling.tools.audit_tools.export_tools.render_custom_report_pdf",
- side_effect=RuntimeError("no pdf"),
- ):
- assert dispatch_tool(
- "export_custom_report",
- {"title": "T", "format": "pdf", "sections": [{"type": "executive_summary"}]},
- context=ctx,
- conn=conn,
- )["error"] == "no pdf"
-
def test_export_audit_report_paths(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DATA_DIR", str(tmp_path))
@@ -149,12 +88,3 @@ def test_export_audit_report_paths(conn: MagicMock, ctx: Ctx, tmp_path, monkeypa
side_effect=RuntimeError("export failed"),
):
assert "export failed" in dispatch_tool("export_audit_report", {"format": "csv"}, context=ctx, conn=conn)["error"]
-
- with patch.object(Ctx, "load_payload", return_value=_payload()):
- assert dispatch_tool("compose_custom_report", {"sections": [{"type": "notes", "markdown": "x"}]}, context=ctx, conn=conn)["error"]
- assert dispatch_tool(
- "export_custom_report",
- {"sections": [{"type": "executive_summary"}]},
- context=ctx,
- conn=conn,
- )["error"]
diff --git a/tests/tools/test_mcp_registry.py b/tests/tools/test_mcp_registry.py
index c693d50..63625a3 100644
--- a/tests/tools/test_mcp_registry.py
+++ b/tests/tools/test_mcp_registry.py
@@ -13,7 +13,7 @@
def test_tool_definitions_schema() -> None:
- assert len(TOOL_DEFINITIONS) == 340
+ assert len(TOOL_DEFINITIONS) == 338
for tool in TOOL_DEFINITIONS:
assert tool.get("name")
assert tool.get("description")
diff --git a/tests/tools/test_tools_branch_coverage.py b/tests/tools/test_tools_branch_coverage.py
index 42a8e69..5f4c5b2 100644
--- a/tests/tools/test_tools_branch_coverage.py
+++ b/tests/tools/test_tools_branch_coverage.py
@@ -387,10 +387,6 @@ def test_export_artifacts_workbook_and_custom(tmp_path, monkeypatch, conn: Magic
export_artifacts.delete_artifact(aid)
assert not meta_path.exists()
- spec_id = export_artifacts.save_report_spec({"title": "T"})
- assert export_artifacts.read_report_spec(spec_id)["title"] == "T"
- assert export_artifacts.read_report_spec("not-a-uuid") is None
-
from website_profiling.tools import export_crawl_workbook as wb_mod
assert wb_mod._parse_custom_fields({"price": 9.99}) == {"price": "9.99"}
@@ -404,13 +400,6 @@ def test_export_artifacts_workbook_and_custom(tmp_path, monkeypatch, conn: Magic
with zipfile.ZipFile(io.BytesIO(raw)) as zf:
assert "custom_fields.csv" in zf.namelist()
- from website_profiling.tools.export_custom import render_custom_report_pdf
-
- try:
- render_custom_report_pdf("T", {"site_name": "Ex"}, [], [])
- except Exception:
- pass
-
def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch) -> None:
from website_profiling.tools.audit_tools import backlinks as bl_mod
@@ -433,11 +422,6 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
from website_profiling.tools.audit_tools import report_extras as rex_mod
from website_profiling.tools.audit_tools import security as sec_mod
from website_profiling.tools import export_crawl_workbook as wb_mod
- from website_profiling.tools.export_custom import (
- _section_html_tool_result,
- resolve_section_results,
- validate_sections,
- )
monkeypatch.setenv("DATA_DIR", str(tmp_path))
@@ -632,20 +616,11 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
rows, cols = wb_mod._custom_field_rows([{"url": "", "custom_fields": '{"a":"1"}'}, {"custom_extract": "x"}])
assert rows == [] and cols
- assert export_artifacts.read_report_spec("00000000-0000-0000-0000-000000000000") is None
+ assert export_artifacts.read_artifact_bytes("00000000-0000-0000-0000-000000000000") is None
aid = export_artifacts.save_artifact(b"x", filename="y.bin", mime_type="application/octet-stream")["artifact_id"]
with patch("website_profiling.tools.export_artifacts.os.remove", side_effect=OSError("denied")):
export_artifacts.delete_artifact(aid)
- assert _section_html_tool_result("H", {"keywords": [{"k": "v"}]}) != ""
- _, err = validate_sections([{"type": "weird"}])
- assert err
- results = resolve_section_results(conn, ctx, {}, [{"type": "weird"}], lambda *a, **k: {})
- assert results == [None]
-
- with patch.object(Ctx, "load_payload", return_value={}):
- assert et_mod.compose_custom_report(conn, ctx, {"title": "T", "sections": [{"type": "executive_summary"}]})["error"]
- assert et_mod.export_custom_report(conn, ctx, {"title": "T", "sections": [{"type": "executive_summary"}]})["error"]
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch(
"website_profiling.tools.audit_tools.export_tools._dispatch",
return_value={"error": "tool failed"},
@@ -657,36 +632,9 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
):
out = et_mod.export_list_as_csv(conn, ctx, {"tool_name": "list_broken_links"})
assert out.get("total") == 1
- with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}):
- assert et_mod.compose_custom_report(conn, ctx, {"title": "T", "sections": [{"type": "nope"}]})["error"]
- assert et_mod.export_custom_report(
- conn,
- ctx,
- {"title": "T", "sections": [{"type": "tool", "tool_name": "export_audit_report", "tool_args": {}}]},
- )["error"]
- assert et_mod.export_custom_report(conn, ctx, {"sections": [{"type": "nope"}]})["error"]
- bad_spec = export_artifacts.save_report_spec({"title": "Bad", "sections": [{"type": "nope"}]})
- assert et_mod.export_custom_report(conn, ctx, {"report_spec_id": bad_spec})["error"]
with patch.object(Ctx, "load_payload", return_value={"issues": {"broken": []}}):
assert isinstance(et_mod._dispatch("list_broken_links", {}, ctx, conn), dict)
- with patch("website_profiling.tools.export_custom.rows_from_tool_result", return_value=[]):
- assert "/a" in _section_html_tool_result("H", {"pages": [{"path": "/a"}]})
- _, err5 = validate_sections([{"type": "executive_summary"}, 42])
- assert err5
- import builtins
- real_import = builtins.__import__
-
- def _block_reportlab(name, globals=None, locals=None, fromlist=(), level=0):
- if name == "reportlab" or (fromlist and "reportlab" in name):
- raise ImportError("no reportlab")
- return real_import(name, globals, locals, fromlist, level)
-
- with patch("builtins.__import__", side_effect=_block_reportlab):
- with pytest.raises(RuntimeError, match="reportlab"):
- from website_profiling.tools.export_custom import render_custom_report_pdf as _pdf
- _pdf("", "T")
-
clusters_only_bad = ["bad", "bad2"]
with patch.object(Ctx, "load_payload", return_value={"content_duplicates": clusters_only_bad}):
assert content_mod.get_duplicate_cluster(conn, ctx, {"url": "https://ex.com/a"})["error"]
diff --git a/web/app/api/report/custom/compose/route.ts b/web/app/api/report/custom/compose/route.ts
deleted file mode 100644
index ed27113..0000000
--- a/web/app/api/report/custom/compose/route.ts
+++ /dev/null
@@ -1,45 +0,0 @@
-import { NextResponse, type NextRequest } from 'next/server';
-import { forbiddenIfNotLocal } from '@/server/localOnly';
-import { composeCustomReport } from '@/server/spawnCustomReport';
-import type { ApiRouteHandler } from '@/types/api';
-
-export const runtime = 'nodejs';
-export const dynamic = 'force-dynamic';
-
-export const POST: ApiRouteHandler = async (request: NextRequest): Promise => {
- const denied = forbiddenIfNotLocal(request);
- if (denied) return denied;
-
- let body: {
- title?: string;
- sections?: Array>;
- propertyId?: number;
- reportId?: number;
- };
- try {
- body = await request.json();
- } catch {
- return NextResponse.json({ error: 'Invalid JSON' }, { status: 400 });
- }
-
- const title = String(body.title || '').trim();
- const propertyId = Number(body.propertyId || 0);
- const sections = body.sections;
- if (!title || !propertyId || !Array.isArray(sections) || sections.length === 0) {
- return NextResponse.json({ error: 'title, propertyId, and sections required' }, { status: 400 });
- }
- if (sections.length > 12) {
- return NextResponse.json({ error: 'sections max 12' }, { status: 400 });
- }
-
- const result = await composeCustomReport({
- title,
- sections,
- propertyId,
- reportId: body.reportId,
- });
- if (!result.ok) {
- return NextResponse.json({ error: result.error, ...result.data }, { status: result.status });
- }
- return NextResponse.json(result.data);
-};
diff --git a/web/app/api/report/custom/export/route.ts b/web/app/api/report/custom/export/route.ts
deleted file mode 100644
index 4100abe..0000000
--- a/web/app/api/report/custom/export/route.ts
+++ /dev/null
@@ -1,51 +0,0 @@
-import { NextResponse, type NextRequest } from 'next/server';
-import { forbiddenIfNotLocal } from '@/server/localOnly';
-import { exportCustomReportArtifact } from '@/server/spawnCustomReport';
-import type { ApiRouteHandler } from '@/types/api';
-
-export const runtime = 'nodejs';
-export const dynamic = 'force-dynamic';
-
-export const GET: ApiRouteHandler = async (request: NextRequest): Promise => {
- const denied = forbiddenIfNotLocal(request);
- if (denied) return denied;
-
- const params = request.nextUrl.searchParams;
- const reportSpecId = String(params.get('specId') || '').trim();
- const format = (params.get('format') || 'html').toLowerCase();
- const propertyId = Number(params.get('propertyId') || '0');
- const reportIdRaw = params.get('reportId');
- const reportId = reportIdRaw && /^\d+$/.test(reportIdRaw) ? Number(reportIdRaw) : null;
-
- if (!reportSpecId || !propertyId) {
- return NextResponse.json({ error: 'specId and propertyId required' }, { status: 400 });
- }
- if (format !== 'html' && format !== 'pdf') {
- return NextResponse.json({ error: 'format must be html or pdf' }, { status: 400 });
- }
-
- const result = await exportCustomReportArtifact({
- reportSpecId,
- format,
- propertyId,
- reportId,
- });
- if (!result.ok) {
- return NextResponse.json({ error: result.error, ...result.data }, { status: result.status });
- }
-
- const filename = String(result.data.filename || `custom-report.${format}`);
- const mimeType = String(result.data.mime_type || (format === 'pdf' ? 'application/pdf' : 'text/html'));
- const b64 = String(result.data.data_b64 || '');
- const buf = Buffer.from(b64, 'base64');
- const dispositionParam = params.get('disposition');
- const inline = dispositionParam === 'inline';
-
- return new NextResponse(buf, {
- status: 200,
- headers: {
- 'Content-Type': mimeType,
- 'Content-Disposition': `${inline ? 'inline' : 'attachment'}; filename="${filename}"`,
- },
- });
-};
diff --git a/web/src/components/chat/deriveChatBlocks.ts b/web/src/components/chat/deriveChatBlocks.ts
index f2b683b..9492b40 100644
--- a/web/src/components/chat/deriveChatBlocks.ts
+++ b/web/src/components/chat/deriveChatBlocks.ts
@@ -179,7 +179,6 @@ const EXPORT_TOOLS = new Set([
'export_audit_report',
'export_compare_csv',
'export_list_as_csv',
- 'export_custom_report',
]);
const IMAGE_SUMMARY_TOOL = 'get_image_audit_summary';
diff --git a/web/src/components/chat/sanitizeChatProse.ts b/web/src/components/chat/sanitizeChatProse.ts
index 9d3ce0a..7a64fec 100644
--- a/web/src/components/chat/sanitizeChatProse.ts
+++ b/web/src/components/chat/sanitizeChatProse.ts
@@ -8,7 +8,6 @@ const TOOL_LABELS: Record = {
export_audit_report: 'export audit report',
export_compare_csv: 'export comparison CSV',
export_list_as_csv: 'export list as CSV',
- export_custom_report: 'export custom report',
get_category_recommendations: 'category recommendations',
get_report_summary: 'audit summary',
get_critical_issues: 'critical issues list',
diff --git a/web/src/components/export/CustomReportBuilder.tsx b/web/src/components/export/CustomReportBuilder.tsx
deleted file mode 100644
index 6dc76b8..0000000
--- a/web/src/components/export/CustomReportBuilder.tsx
+++ /dev/null
@@ -1,210 +0,0 @@
-'use client';
-
-import { useCallback, useState } from 'react';
-import { Loader2, Plus, Trash2 } from 'lucide-react';
-import Button from '@/components/Button';
-import { apiUrl } from '@/lib/publicBase';
-import {
- CUSTOM_REPORT_TOOLS,
- CUSTOM_SECTION_TYPES,
- sectionsToPayload,
- type CustomReportSection,
-} from '@/lib/customReportTools';
-import { strings } from '@/lib/strings';
-
-function newSection(): CustomReportSection {
- return {
- id: crypto.randomUUID(),
- type: 'executive_summary',
- };
-}
-
-export interface CustomReportBuilderProps {
- propertyId: number | null;
- reportId: number | null;
-}
-
-export default function CustomReportBuilder({ propertyId, reportId }: CustomReportBuilderProps) {
- const ve = strings.views.exportReport;
- const [title, setTitle] = useState('');
- const [sections, setSections] = useState([newSection()]);
- const [specId, setSpecId] = useState(null);
- const [busy, setBusy] = useState(false);
- const [error, setError] = useState(null);
-
- const compose = useCallback(async () => {
- if (!propertyId || !title.trim()) return null;
- setBusy(true);
- setError(null);
- try {
- const res = await fetch(apiUrl('/report/custom/compose'), {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({
- title: title.trim(),
- sections: sectionsToPayload(sections),
- propertyId,
- reportId,
- }),
- });
- const data = await res.json();
- if (!res.ok) throw new Error(String(data.error || ve.customSaveFailed));
- const id = String(data.report_spec_id || '');
- setSpecId(id || null);
- return id;
- } catch (err) {
- setError(err instanceof Error ? err.message : String(err));
- return null;
- } finally {
- setBusy(false);
- }
- }, [propertyId, reportId, sections, title, ve.customSaveFailed]);
-
- const exportUrl = (format: 'html' | 'pdf', inline = false) => {
- if (!specId || !propertyId) return '#';
- const p = new URLSearchParams({
- specId,
- format,
- propertyId: String(propertyId),
- });
- if (reportId != null) p.set('reportId', String(reportId));
- if (inline) p.set('disposition', 'inline');
- return apiUrl(`/report/custom/export?${p.toString()}`);
- };
-
- const handlePreview = async () => {
- const id = specId || (await compose());
- if (!id) return;
- window.open(exportUrl('html', true), '_blank', 'noopener,noreferrer');
- };
-
- const handleDownload = async (format: 'html' | 'pdf') => {
- const id = specId || (await compose());
- if (!id) return;
- window.location.href = exportUrl(format, false);
- };
-
- const updateSection = (id: string, patch: Partial) => {
- setSections((prev) => prev.map((s) => (s.id === id ? { ...s, ...patch } : s)));
- setSpecId(null);
- };
-
- const addSection = () => {
- if (sections.length >= 12) return;
- setSections((prev) => [...prev, newSection()]);
- setSpecId(null);
- };
-
- const removeSection = (id: string) => {
- setSections((prev) => (prev.length <= 1 ? prev : prev.filter((s) => s.id !== id)));
- setSpecId(null);
- };
-
- if (!propertyId) {
- return (
- {strings.views.issues.taskBoardNoProperty}
- );
- }
-
- return (
-
-
-
{ve.customTitle}
-
{ve.customHint}
-
-
-
- {ve.customReportTitleLabel}
- {
- setTitle(e.target.value);
- setSpecId(null);
- }}
- placeholder={ve.customReportTitlePlaceholder}
- className="w-full max-w-xl rounded-lg border border-default bg-brand-900 px-3 py-2 text-sm text-foreground"
- />
-
-
-
- {sections.map((section, index) => (
-
-
-
- {ve.customSectionType} {index + 1}
-
- removeSection(section.id)}
- disabled={sections.length <= 1}
- className="p-1 text-muted-foreground hover:text-red-400 disabled:opacity-40"
- aria-label="Remove section"
- >
-
-
-
-
- updateSection(section.id, {
- type: e.target.value as CustomReportSection['type'],
- })
- }
- className="w-full max-w-md rounded-lg border border-default bg-brand-900 px-2 py-1.5 text-sm"
- >
- {CUSTOM_SECTION_TYPES.map((opt) => (
-
- {opt.label}
-
- ))}
-
- {section.type === 'tool' ? (
-
updateSection(section.id, { tool_name: e.target.value })}
- className="w-full max-w-md rounded-lg border border-default bg-brand-900 px-2 py-1.5 text-sm"
- >
- {CUSTOM_REPORT_TOOLS.map((t) => (
-
- {t.label}
-
- ))}
-
- ) : null}
- {section.type === 'notes' ? (
-
- ))}
-
-
-
-
= 12}>
-
- {ve.customAddSection}
-
-
void handlePreview()} disabled={busy || !title.trim()}>
- {busy ? : null}
- {ve.customPreview}
-
-
void handleDownload('html')} disabled={busy || !title.trim()}>
- {ve.customDownloadHtml}
-
-
void handleDownload('pdf')} disabled={busy || !title.trim()}>
- {ve.customDownloadPdf}
-
-
- {sections.length >= 12 ? (
-
{ve.customMaxSections}
- ) : null}
- {error ?
{error}
: null}
-
- );
-}
diff --git a/web/src/components/links/tabs/OverviewTab.tsx b/web/src/components/links/tabs/OverviewTab.tsx
index 4385e21..706f9f9 100644
--- a/web/src/components/links/tabs/OverviewTab.tsx
+++ b/web/src/components/links/tabs/OverviewTab.tsx
@@ -1,6 +1,26 @@
-import { useMemo } from 'react';
-import { Check, ChevronRight, Gauge, X } from 'lucide-react';
-import { Badge, LabelWithHint } from '../../index';
+import { useMemo, type ReactNode } from 'react';
+import {
+ Activity,
+ ArrowDownLeft,
+ ArrowUpRight,
+ BookOpen,
+ Check,
+ ChevronRight,
+ ExternalLink,
+ FileCode,
+ FileText,
+ Gauge,
+ Image,
+ Layers,
+ Link2,
+ Route,
+ Timer,
+ X,
+ Zap,
+} from 'lucide-react';
+import { Badge, Card, StatCard } from '../../index';
+import { LighthouseScoreGrid } from '@/components/charts/LighthouseScoreGrid';
+import { metricHelpHint } from '@/lib/metricHelp';
import type { LinkDetail, LinkLighthouseData, PageAnalysis } from '@/types/report';
import { useReport } from '../../../context/useReport';
import { strings, format } from '../../../lib/strings';
@@ -15,7 +35,6 @@ import {
normaliseKw,
formatLhMetric,
} from '../../../utils/linkUtils';
-import { scoreBandColor } from '../../../utils/chartPalette';
import CopyBtn from '../CopyBtn';
import CharBar from '../CharBar';
@@ -49,6 +68,26 @@ function SectionLink({
);
}
+function SectionHeader({
+ title,
+ icon,
+ action,
+}: {
+ title: string;
+ icon?: ReactNode;
+ action?: ReactNode;
+}) {
+ return (
+
+
+ {icon}
+ {title}
+
+ {action}
+
+ );
+}
+
function SocialCheckItem({ label, present }: { label: string; present: boolean }) {
const lc = strings.components.linkTabs.content;
return (
@@ -85,74 +124,126 @@ export default function OverviewTab({ link, lhData, onOpenTab }: OverviewTabProp
const keywords = useMemo(() => parseKeywords(link.top_keywords).slice(0, 8), [link.top_keywords]);
const sslExp = (data?.site_ssl_expires_at || null) as string | null;
- const crawlStats = [
- { key: 'status', label: o.statStatus, value: , raw: true },
+ const crawlMetrics = [
+ {
+ key: 'status',
+ icon: ,
+ label: o.statStatus,
+ value: ,
+ },
{
key: 'responseTime',
- label: ,
- value: {formatMs(link.response_time_ms)} ,
- raw: true,
+ icon: ,
+ label: o.statResponseTime,
+ hint: metricHelpHint('shared.responseTime'),
+ value: formatMs(link.response_time_ms),
+ valueClassName: rtColor(link.response_time_ms),
+ },
+ {
+ key: 'depth',
+ icon: ,
+ label: o.statDepth,
+ hint: metricHelpHint('shared.crawlDepth'),
+ value: link.depth != null ? link.depth : sj.emDash,
+ },
+ {
+ key: 'inlinks',
+ icon: ,
+ label: o.statInlinks,
+ hint: metricHelpHint('shared.inlinks'),
+ value: link.inlinks ?? 0,
+ },
+ {
+ key: 'outlinks',
+ icon: ,
+ label: o.statOutlinks,
+ hint: metricHelpHint('shared.outlinks'),
+ value: link.outlinks ?? 0,
},
- { key: 'depth', label: , value: link.depth != null ? link.depth : sj.emDash },
- { key: 'inlinks', label: , value: link.inlinks ?? 0 },
- { key: 'outlinks', label: , value: link.outlinks ?? 0 },
{
key: 'words',
- label: ,
- value:
- wc > 0 ? (
-
- {wc.toLocaleString()} {wcInfo.label}
-
- ) : (
- sj.emDash
- ),
- raw: true,
+ icon: ,
+ label: o.statWords,
+ hint: metricHelpHint('shared.wordCount'),
+ value: wc > 0 ? wc.toLocaleString() : sj.emDash,
+ band: wc > 0 ? wcInfo.label : undefined,
+ bandClassName: wc > 0 ? wcInfo.color : undefined,
+ valueClassName: wc > 0 ? wcInfo.color : 'text-muted-foreground',
},
{
key: 'readingLevel',
- label: ,
- value:
- rl > 0 ? (
-
- {format(o.readingGrade, { n: rl })} {rlInfo.label}
-
- ) : (
- sj.emDash
- ),
- raw: true,
+ icon: ,
+ label: o.statReadingLevel,
+ hint: metricHelpHint('shared.readingLevel'),
+ value: rl > 0 ? format(o.readingGrade, { n: rl }) : sj.emDash,
+ band: rl > 0 ? rlInfo.label : undefined,
+ bandClassName: rl > 0 ? rlInfo.color : undefined,
+ valueClassName: rl > 0 ? rlInfo.color : 'text-muted-foreground',
},
{
key: 'redirects',
+ icon: ,
label: o.statRedirects,
- value:
- (link.redirect_chain_length ?? 0) > 0 ? (
- {link.redirect_chain_length}
- ) : (
- '0'
- ),
- raw: true,
+ value: link.redirect_chain_length ?? 0,
+ valueClassName:
+ (link.redirect_chain_length ?? 0) > 0
+ ? 'text-yellow-800 dark:text-yellow-400'
+ : 'text-bright',
},
];
- const compositionStats = [
+ const compositionMetrics = [
{
+ key: 'internal',
+ icon: ,
label: o.statInternalLinks,
value: pa.internal_link_count ?? link.internal_link_count ?? sj.emDash,
},
{
+ key: 'external',
+ icon: ,
label: o.statExternalLinks,
value: pa.external_link_count ?? link.external_link_count ?? sj.emDash,
},
- { label: o.statImages, value: link.images_total ?? sj.emDash },
- { label: o.statScripts, value: link.script_count ?? sj.emDash },
- { label: o.statStylesheets, value: link.link_stylesheet_count ?? sj.emDash },
{
+ key: 'images',
+ icon: ,
+ label: o.statImages,
+ value: link.images_total ?? sj.emDash,
+ },
+ {
+ key: 'scripts',
+ icon: ,
+ label: o.statScripts,
+ value: link.script_count ?? sj.emDash,
+ },
+ {
+ key: 'stylesheets',
+ icon: ,
+ label: o.statStylesheets,
+ value: link.link_stylesheet_count ?? sj.emDash,
+ },
+ {
+ key: 'preload',
+ icon: ,
label: o.statPreload,
value: `${pa.preload_count ?? 0} / ${pa.preconnect_count ?? 0}`,
},
];
+ const cwvMetrics = lh
+ ? ([
+ ['LCP', 'lcp_ms'],
+ ['FCP', 'fcp_ms'],
+ ['TBT', 'tbt_ms'],
+ ['CLS', 'cls'],
+ ] as const).map(([label, key]) => ({
+ key,
+ label,
+ value: formatLhMetric(key, lh.median_metrics?.[key]),
+ }))
+ : [];
+
const hasOg = !!(link.og_title && String(link.og_title).trim());
const hasTwitter = !!(link.twitter_title && String(link.twitter_title).trim());
const hasOgImg = !!(link.og_image && String(link.og_image).trim());
@@ -160,34 +251,39 @@ export default function OverviewTab({ link, lhData, onOpenTab }: OverviewTabProp
return (
-
{o.crawlHeading}
-
- {crawlStats.map(({ key, label, value, raw }) => (
-
-
{label}
-
- {raw ? value : {value} }
-
-
- ))}
+
+
+ {crawlMetrics.map(
+ ({ key, icon, label, hint, value, valueClassName, band, bandClassName }) => (
+
+ ),
+ )}
-
-
{o.compositionHeading}
-
-
-
- {compositionStats.map(({ label, value }) => (
-
+
}
+ />
+
+ {compositionMetrics.map(({ key, icon, label, value }) => (
+
))}
{sslExp && (
-
+
{o.sslExpires}: {sslExp.slice(0, 10)}
)}
@@ -195,41 +291,37 @@ export default function OverviewTab({ link, lhData, onOpenTab }: OverviewTabProp
{lh && (
-
-
-
- {o.lighthouseHeading}
-
-
-
-
- {['performance', 'accessibility', 'best-practices', 'seo'].map((cat) => {
- const cs = lh.category_scores || {};
- const score = cs[cat] != null ? Number(cs[cat]) : null;
- const color = score != null ? scoreBandColor(score) : 'rgb(71,85,105)';
- return (
-
-
- {lhLabels[cat] || cat.replace('-', ' ')}
-
-
- {score != null ? score : sj.emDash}
-
-
- );
- })}
-
-
- {[['LCP', 'lcp_ms'], ['FCP', 'fcp_ms'], ['TBT', 'tbt_ms'], ['CLS', 'cls']].map(([label, key]) => {
- const mm = lh.median_metrics || {};
- return (
-
-
{label}
-
{formatLhMetric(key, mm[key])}
+
}
+ action={
}
+ />
+
+
+
+
+ {cwvMetrics.length > 0 ? (
+
+
+ {cwvMetrics.map(({ key, label, value }) => (
+
+ ))}
- );
- })}
-
+
+ ) : null}
+
)}
diff --git a/web/src/components/overview/OverviewSummaryTab.tsx b/web/src/components/overview/OverviewSummaryTab.tsx
index 14fd3e0..c74893e 100644
--- a/web/src/components/overview/OverviewSummaryTab.tsx
+++ b/web/src/components/overview/OverviewSummaryTab.tsx
@@ -27,14 +27,12 @@ import {
OverviewKeywordOpportunitiesCard,
buildKeywordsHref,
} from './OverviewKeywordOpportunitiesCard';
-import { OverviewAtAGlance } from './OverviewAtAGlance';
export interface OverviewSummaryTabProps {
data: ReportPayload;
exportHref: string;
compareHref: string;
reportCount: number;
- lighthouseScores?: Record
| null;
}
export function OverviewSummaryTab({
@@ -42,7 +40,6 @@ export function OverviewSummaryTab({
exportHref,
compareHref,
reportCount,
- lighthouseScores,
}: OverviewSummaryTabProps) {
const vo = strings.views.overview;
const searchParams = useSearchParams();
@@ -100,14 +97,6 @@ export function OverviewSummaryTab({
reportCount={reportCount}
querySuffix={querySuffix}
/>
- }>
-
-
}
diff --git a/web/src/lib/customReportTools.ts b/web/src/lib/customReportTools.ts
deleted file mode 100644
index 660c4a5..0000000
--- a/web/src/lib/customReportTools.ts
+++ /dev/null
@@ -1,40 +0,0 @@
-/** Tools allowed in custom report builder UI (subset of audit-tool allowlist). */
-export const CUSTOM_REPORT_TOOLS = [
- { value: 'get_report_summary', label: 'Report summary' },
- { value: 'get_category_scores', label: 'Category scores' },
- { value: 'get_critical_issues', label: 'Critical issues' },
- { value: 'list_broken_links', label: 'Broken links' },
- { value: 'get_lighthouse_summary', label: 'Lighthouse summary' },
- { value: 'get_google_summary', label: 'Google summary' },
- { value: 'get_image_audit_summary', label: 'Image audit summary' },
- { value: 'get_geo_readiness_score', label: 'GEO readiness score' },
- { value: 'get_axe_audit_summary', label: 'Axe accessibility summary' },
-] as const;
-
-export type CustomSectionType = 'executive_summary' | 'category_scores' | 'tool' | 'notes';
-
-export interface CustomReportSection {
- id: string;
- type: CustomSectionType;
- tool_name?: string;
- markdown?: string;
-}
-
-export const CUSTOM_SECTION_TYPES: { value: CustomSectionType; label: string }[] = [
- { value: 'executive_summary', label: 'Executive summary' },
- { value: 'category_scores', label: 'Category scores' },
- { value: 'tool', label: 'Audit data (tool)' },
- { value: 'notes', label: 'Notes' },
-];
-
-export function sectionsToPayload(sections: CustomReportSection[]): Array> {
- return sections.map((s) => {
- if (s.type === 'tool') {
- return { type: 'tool', tool_name: s.tool_name || 'get_report_summary' };
- }
- if (s.type === 'notes') {
- return { type: 'notes', markdown: s.markdown || '' };
- }
- return { type: s.type };
- });
-}
diff --git a/web/src/server/auditToolAllowlist.ts b/web/src/server/auditToolAllowlist.ts
index 6d97ff9..c6d5b2d 100644
--- a/web/src/server/auditToolAllowlist.ts
+++ b/web/src/server/auditToolAllowlist.ts
@@ -16,7 +16,6 @@ export const AUDIT_TOOL_ALLOWLIST = new Set([
'get_faq_schema_coverage',
'list_pages_missing_faq_schema',
'get_eeat_signals_summary',
- // Custom report builder (curated subset)
'get_report_summary',
'get_category_scores',
'get_critical_issues',
diff --git a/web/src/server/customReportRoute.test.ts b/web/src/server/customReportRoute.test.ts
deleted file mode 100644
index 92db50c..0000000
--- a/web/src/server/customReportRoute.test.ts
+++ /dev/null
@@ -1,82 +0,0 @@
-import { describe, expect, it, vi, beforeEach } from 'vitest';
-import { localRequest, remoteRequest, makeSpawnChild } from '@/server/testHelpers/routeTestUtils';
-
-const composeMock = vi.fn();
-const exportMock = vi.fn();
-
-vi.mock('@/server/spawnCustomReport', () => ({
- composeCustomReport: (...args: unknown[]) => composeMock(...args),
- exportCustomReportArtifact: (...args: unknown[]) => exportMock(...args),
-}));
-
-describe('report/custom routes', () => {
- beforeEach(() => {
- composeMock.mockReset();
- exportMock.mockReset();
- vi.resetModules();
- });
-
- it('compose returns 403 for non-local', async () => {
- const { POST } = await import('../../app/api/report/custom/compose/route');
- const res = await POST(
- remoteRequest('/api/report/custom/compose', {
- method: 'POST',
- body: JSON.stringify({ title: 'T', propertyId: 1, sections: [{ type: 'notes', markdown: 'x' }] }),
- }),
- );
- expect(res.status).toBe(403);
- });
-
- it('compose validates payload', async () => {
- const { POST } = await import('../../app/api/report/custom/compose/route');
- const res = await POST(
- localRequest('/api/report/custom/compose', {
- method: 'POST',
- body: JSON.stringify({ title: 'T' }),
- }),
- );
- expect(res.status).toBe(400);
- });
-
- it('compose returns spec id', async () => {
- composeMock.mockResolvedValue({
- ok: true,
- status: 200,
- data: { report_spec_id: 'abc-123' },
- });
- const { POST } = await import('../../app/api/report/custom/compose/route');
- const res = await POST(
- localRequest('/api/report/custom/compose', {
- method: 'POST',
- body: JSON.stringify({
- title: 'Client report',
- propertyId: 2,
- sections: [{ type: 'executive_summary' }],
- }),
- }),
- );
- expect(res.status).toBe(200);
- const body = await res.json();
- expect(body.report_spec_id).toBe('abc-123');
- });
-
- it('export returns file bytes', async () => {
- exportMock.mockResolvedValue({
- ok: true,
- status: 200,
- data: {
- filename: 'client.html',
- mime_type: 'text/html',
- data_b64: Buffer.from('').toString('base64'),
- },
- });
- const { GET } = await import('../../app/api/report/custom/export/route');
- const res = await GET(
- localRequest('/api/report/custom/export?specId=abc&format=html&propertyId=1'),
- );
- expect(res.status).toBe(200);
- expect(res.headers.get('Content-Type')).toContain('text/html');
- const text = await res.text();
- expect(text).toContain('');
- });
-});
diff --git a/web/src/server/spawnCustomReport.ts b/web/src/server/spawnCustomReport.ts
deleted file mode 100644
index ede4fc5..0000000
--- a/web/src/server/spawnCustomReport.ts
+++ /dev/null
@@ -1,151 +0,0 @@
-import { spawn } from 'child_process';
-import { formatPythonSpawnError, resolvePythonExecutable } from '@/server/resolvePython';
-import { getPipelineSpawnEnv, getRepoRoot } from '@/server/pipelineSpawnEnv';
-
-export interface ComposeCustomReportInput {
- title: string;
- sections: Array>;
- propertyId: number;
- reportId?: number | null;
-}
-
-export interface ExportCustomReportInput {
- reportSpecId: string;
- format: 'html' | 'pdf';
- propertyId: number;
- reportId?: number | null;
-}
-
-function runPythonJson(script: string, argvPayload: string, propertyId: number): Promise<{
- ok: boolean;
- status: number;
- data: Record;
- error?: string;
-}> {
- const repoRoot = getRepoRoot();
- const pythonExe = resolvePythonExecutable(null, repoRoot);
- return new Promise((resolve) => {
- const proc = spawn(pythonExe, ['-c', script, argvPayload], {
- cwd: repoRoot,
- env: getPipelineSpawnEnv(repoRoot, propertyId),
- shell: false,
- });
- let stdout = '';
- let stderr = '';
- proc.stdout?.on('data', (c: Buffer | string) => {
- stdout += c.toString();
- });
- proc.stderr?.on('data', (c: Buffer | string) => {
- stderr += c.toString();
- });
- proc.on('error', (err: Error) => {
- resolve({
- ok: false,
- status: 500,
- data: {},
- error: formatPythonSpawnError(err, pythonExe, repoRoot),
- });
- });
- proc.on('close', (code) => {
- try {
- const data = JSON.parse(stdout.trim() || '{}') as Record;
- if (code !== 0 || data.error) {
- resolve({
- ok: false,
- status: 500,
- data,
- error: String(data.error || stderr.trim() || 'Custom report failed'),
- });
- return;
- }
- resolve({ ok: true, status: 200, data });
- } catch {
- resolve({
- ok: false,
- status: 500,
- data: {},
- error: stderr.trim() || stdout.trim() || 'Invalid custom report response',
- });
- }
- });
- });
-}
-
-const COMPOSE_SCRIPT = `
-import json, sys
-from website_profiling.tools.audit_tools.export_tools import compose_custom_report
-from website_profiling.tools.audit_tools.context import AuditToolContext
-from website_profiling.db.storage import db_session
-
-payload = json.loads(sys.argv[1])
-ctx = AuditToolContext(property_id=int(payload["propertyId"]), report_id=payload.get("reportId"))
-with db_session() as conn:
- result = compose_custom_report(conn, ctx, {
- "title": payload["title"],
- "sections": payload["sections"],
- "property_id": payload["propertyId"],
- "report_id": payload.get("reportId"),
- })
-print(json.dumps(result))
-`;
-
-const EXPORT_SCRIPT = `
-import json, sys, base64
-from website_profiling.tools.audit_tools.export_tools import export_custom_report
-from website_profiling.tools.audit_tools.context import AuditToolContext
-from website_profiling.db.storage import db_session
-from website_profiling.tools.export_artifacts import read_artifact_bytes
-
-payload = json.loads(sys.argv[1])
-ctx = AuditToolContext(property_id=int(payload["propertyId"]), report_id=payload.get("reportId"))
-with db_session() as conn:
- result = export_custom_report(conn, ctx, {
- "format": payload["format"],
- "report_spec_id": payload["reportSpecId"],
- "property_id": payload["propertyId"],
- "report_id": payload.get("reportId"),
- })
-if result.get("error"):
- print(json.dumps(result))
- sys.exit(1)
-aid = result.get("artifact_id")
-if not aid:
- print(json.dumps({"error": "no artifact_id"}))
- sys.exit(1)
-loaded = read_artifact_bytes(str(aid))
-if not loaded:
- print(json.dumps({"error": "artifact not found"}))
- sys.exit(1)
-meta, raw = loaded
-print(json.dumps({
- "filename": meta.get("filename") or result.get("filename"),
- "mime_type": meta.get("mime_type") or result.get("mime_type"),
- "data_b64": base64.b64encode(raw).decode("ascii"),
-}))
-`;
-
-export function composeCustomReport(input: ComposeCustomReportInput) {
- return runPythonJson(
- COMPOSE_SCRIPT,
- JSON.stringify({
- title: input.title,
- sections: input.sections,
- propertyId: input.propertyId,
- reportId: input.reportId ?? null,
- }),
- input.propertyId,
- );
-}
-
-export function exportCustomReportArtifact(input: ExportCustomReportInput) {
- return runPythonJson(
- EXPORT_SCRIPT,
- JSON.stringify({
- reportSpecId: input.reportSpecId,
- format: input.format,
- propertyId: input.propertyId,
- reportId: input.reportId ?? null,
- }),
- input.propertyId,
- );
-}
diff --git a/web/src/strings.json b/web/src/strings.json
index a9c89d8..3a99aca 100644
--- a/web/src/strings.json
+++ b/web/src/strings.json
@@ -3425,27 +3425,7 @@
"downloadCsv": "Download CSV",
"downloadJson": "Download JSON",
"downloadWorkbook": "Download crawl workbook (ZIP)",
- "downloadSitemap": "Download XML sitemap",
- "tabStandard": "Standard export",
- "tabCustom": "Custom report",
- "customTitle": "Build a custom client report",
- "customHint": "Combine executive summary, category scores, audit tool sections, and notes. Export as HTML or PDF.",
- "customReportTitleLabel": "Report title",
- "customReportTitlePlaceholder": "Client audit — June 2026",
- "customAddSection": "Add section",
- "customSectionType": "Section type",
- "customSectionExecutive": "Executive summary",
- "customSectionCategories": "Category scores",
- "customSectionTool": "Audit data (tool)",
- "customSectionNotes": "Notes (markdown)",
- "customToolLabel": "Tool",
- "customNotesLabel": "Notes markdown",
- "customNotesPlaceholder": "Optional client-facing notes…",
- "customPreview": "Preview HTML",
- "customDownloadHtml": "Download HTML",
- "customDownloadPdf": "Download PDF",
- "customSaveFailed": "Could not compose report",
- "customMaxSections": "Maximum 12 sections"
+ "downloadSitemap": "Download XML sitemap"
},
"overview": {
"openExportPage": "Export report",
diff --git a/web/src/views/ExportReport.tsx b/web/src/views/ExportReport.tsx
index d310a8c..dbaf9fc 100644
--- a/web/src/views/ExportReport.tsx
+++ b/web/src/views/ExportReport.tsx
@@ -1,42 +1,22 @@
'use client';
-import { useCallback, useRef, useState } from 'react';
-import { Download, FileText, Printer } from 'lucide-react';
-import Button from '@/components/Button';
-import CustomReportBuilder from '@/components/export/CustomReportBuilder';
+import { useState } from 'react';
+import { Download, FileText } from 'lucide-react';
import { useReport } from '@/context/useReport';
-import { useOptionalPipeline } from '@/context/PipelineContext';
-import { buildAuditExportUrl, buildWorkbookExportUrl, buildSitemapExportUrl } from '@/lib/exportAudit';
+import { buildAuditExportUrl } from '@/lib/exportAudit';
import { strings } from '@/lib/strings';
-import { ViewTabs, ViewTabPanel } from '@/components';
-import type { ViewProps } from '@/types/report';
const ve = strings.views.exportReport;
-const EXPORT_TABS = ['standard', 'custom'] as const;
-type ExportTabId = (typeof EXPORT_TABS)[number];
-export default function ExportReport(_props: ViewProps) {
- const { selectedReportId, reportList, data } = useReport();
- const pipeline = useOptionalPipeline();
- const propertyId = Number(pipeline?.configState.active_property_id || 0) || null;
+export default function ExportReport() {
+ const { selectedReportId, reportList } = useReport();
const reportId = selectedReportId ?? reportList?.[0]?.id ?? null;
const [previewError, setPreviewError] = useState(null);
- const [activeTab, setActiveTab] = useState('standard');
- const iframeRef = useRef(null);
const previewUrl = buildAuditExportUrl('html', reportId, { inline: true });
const pdfUrl = buildAuditExportUrl('pdf', reportId);
const csvUrl = buildAuditExportUrl('csv', reportId);
const jsonUrl = buildAuditExportUrl('json', reportId);
- const workbookUrl = buildWorkbookExportUrl(reportId);
- const sitemapUrl = buildSitemapExportUrl(reportId);
-
- const siteLabel = data?.site_name || strings.app.defaultSiteName;
- const generated = data?.report_generated_at;
-
- const handlePrint = useCallback(() => {
- iframeRef.current?.contentWindow?.print();
- }, []);
return (
@@ -44,18 +24,8 @@ export default function ExportReport(_props: ViewProps) {
{ve.title}
-
- {siteLabel}
- {generated ? ` · ${ve.generatedLabel} ${generated}` : ''}
-
-
{ve.description}
- {activeTab === 'standard' ? (
- ) : null}
-
setActiveTab(id as ExportTabId)}
- ariaLabel={ve.title}
- idPrefix="export-report"
- className="mt-4"
- />
- {activeTab === 'custom' ? (
-
- ) : null}
- {activeTab === 'standard' ? (
-
{previewError ? (
{previewError}
) : (
)}
-
- ) : null}
);
}
diff --git a/web/src/views/Overview.tsx b/web/src/views/Overview.tsx
index 743542a..a47c76e 100644
--- a/web/src/views/Overview.tsx
+++ b/web/src/views/Overview.tsx
@@ -150,7 +150,6 @@ export default function Overview({ searchQuery = '' }: ViewProps) {
exportHref={exportHref}
compareHref={compareHref}
reportCount={reportList.length}
- lighthouseScores={charts.lighthouseScores?.scores}
/>
)}