|
| 1 | +"""Content contracts for example markdown sources. |
| 2 | +
|
| 3 | +These contracts check pedagogical correctness rather than geometry: |
| 4 | +does each cell's prose explain the code in that cell? Does the |
| 5 | +unsupported-cell prose lead with the lesson or with a runtime |
| 6 | +caveat? |
| 7 | +
|
| 8 | +The contracts complement tests/test_marginalia_geometry.py |
| 9 | +(geometry, palette, registration) by catching content-shaped bugs |
| 10 | +that geometry contracts can't see. |
| 11 | +""" |
| 12 | +from __future__ import annotations |
| 13 | + |
| 14 | +import re |
| 15 | +import unittest |
| 16 | + |
| 17 | +from src.example_loader import load_examples |
| 18 | + |
| 19 | + |
| 20 | +_, EXAMPLES = load_examples() |
| 21 | + |
| 22 | + |
| 23 | +def _code_identifiers(code: str) -> set[str]: |
| 24 | + """Return identifier-like tokens from a code block. |
| 25 | +
|
| 26 | + Strips Python keywords and the common stdlib names that appear |
| 27 | + in nearly every example (`print`, `import`) so the audit |
| 28 | + measures the lesson-specific vocabulary, not the language. |
| 29 | + """ |
| 30 | + keywords = { |
| 31 | + "def", "return", "import", "from", "as", "if", "else", |
| 32 | + "elif", "try", "except", "finally", "with", "for", "in", |
| 33 | + "and", "or", "not", "is", "true", "false", "none", |
| 34 | + "print", "pass", "raise", "while", "break", "continue", |
| 35 | + "class", "lambda", "yield", "global", "nonlocal", |
| 36 | + } |
| 37 | + return {w for w in re.findall(r"\b[a-z_][a-z_0-9]+\b", code.lower()) |
| 38 | + if w not in keywords and len(w) > 1} |
| 39 | + |
| 40 | + |
| 41 | +class UnsupportedCellProseContract(unittest.TestCase): |
| 42 | + """Contract 11: every :::unsupported cell's prose explains the |
| 43 | + code, not just the runtime constraint. |
| 44 | +
|
| 45 | + The :::unsupported block is rendered on production pages as a |
| 46 | + walkthrough cell with prose + code. When the prose only says |
| 47 | + 'Dynamic Workers do not provide X', the reader sees the |
| 48 | + constraint but no pedagogical content. The fix is to lead with |
| 49 | + what the code does and move the runtime caveat to a closing |
| 50 | + parenthetical (or to the Notes section). |
| 51 | +
|
| 52 | + Heuristic: each unsupported cell's prose must mention at least |
| 53 | + two code identifiers — variable names, function calls, or |
| 54 | + method names from the code block. Two is the minimum that |
| 55 | + proves the prose discusses *this specific code* rather than |
| 56 | + a generic note about Workers. |
| 57 | + """ |
| 58 | + |
| 59 | + MIN_IDENT_OVERLAP = 2 |
| 60 | + |
| 61 | + def test_unsupported_prose_mentions_code(self): |
| 62 | + failures: list[str] = [] |
| 63 | + for ex in EXAMPLES: |
| 64 | + for cell in ex.get("cells", []): |
| 65 | + if cell.get("kind") != "unsupported": |
| 66 | + continue |
| 67 | + prose = " ".join(cell.get("prose", [])).lower() |
| 68 | + idents = _code_identifiers(cell.get("code", "")) |
| 69 | + hits = sum(1 for ident in idents if ident in prose) |
| 70 | + if hits < self.MIN_IDENT_OVERLAP: |
| 71 | + failures.append( |
| 72 | + f"{ex['slug']}: unsupported cell prose references " |
| 73 | + f"{hits} code identifier(s) (need ≥ {self.MIN_IDENT_OVERLAP}); " |
| 74 | + f"prose looks generic. idents: {sorted(idents)[:5]}…" |
| 75 | + ) |
| 76 | + self.assertEqual(failures, [], "\n " + "\n ".join(failures)) |
| 77 | + |
| 78 | + |
| 79 | +if __name__ == "__main__": |
| 80 | + unittest.main() |
0 commit comments