Skip to content

Commit b508a41

Browse files
Update workflow
1 parent f863931 commit b508a41

17 files changed

Lines changed: 1007 additions & 348 deletions

src/main/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ python src/main/main.py <command> [args...]
1414
Examples:
1515

1616
```bash
17+
python src/main/main.py check
18+
python src/main/main.py build
1719
python src/main/main.py blog
1820
python src/main/main.py slides en/slides/example.html
1921
python src/main/main.py convert-thumbnails input.html output.html
@@ -24,7 +26,9 @@ python src/main/main.py verify-usage . --exclude-dirs templates analysis
2426

2527
- `batch-translate`: translation workflow helper.
2628
- `blog`: generates blog indexes and feeds.
29+
- `build`: runs the default static-site build workflow.
2730
- `build-all-search-indexes`: builds search indexes for all languages.
31+
- `check`: runs the default static-site validation workflow.
2832
- `check-db`: prints translation path mappings stored in `translations.db`.
2933
- `ci-internal-links`: CI-focused internal link verification.
3034
- `convert-thumbnails`: rewrites Wikimedia thumbnail URLs in HTML.
@@ -43,10 +47,15 @@ python src/main/main.py verify-usage . --exclude-dirs templates analysis
4347

4448
- `main.py`: single dispatcher for executable tools in this folder.
4549
- `file_rewrite.py`: shared helper for safe text-file rewrites.
50+
- `config.py` and `paths.py`: shared configuration and repository path helpers.
51+
- `manifest.py`: tracks source fingerprints so generators can skip unchanged outputs.
52+
- `translation_rules.py`: shared translation parsing rules used by extraction and generation.
4653
- Library-style modules such as `analyse.py`, `links.py`, and `translate_manager.py` provide reusable functionality used by the command scripts.
4754

4855
## Development Notes
4956

5057
- Prefer adding new command-line tools through `main.py` so `src/main` keeps a single stable entry point.
5158
- Avoid top-level argument parsing in reusable modules; expose `main(argv=None)` instead.
5259
- Keep file rewrite logic centralized when possible to reduce duplication and accidental partial writes.
60+
- Prefer offline-first behavior for static-site checks and make network access explicit when needed.
61+
- Prefer incremental generators that can skip work when inputs and outputs have not changed.

src/main/blog.py

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# Generate an upto date list of articles in reverse chronological order
99
#!/usr/bin/env python3
1010

11+
import argparse
1112
import sys
1213
from typing import Dict, List, Set, Tuple
1314
from dataclasses import dataclass
@@ -20,7 +21,11 @@
2021
from pytz import timezone
2122

2223
from analyse import HTMLTextAnalysis, WebsiteAnalysis
24+
from config import SITE_AUTHOR, SITE_URL
2325
from git import get_first_latest_modification
26+
from manifest import BuildManifest
27+
from paths import REPO_ROOT
28+
from site_text import strip_author_from_title
2429

2530

2631
@dataclass
@@ -66,6 +71,7 @@ class BlogGenerator:
6671
@staticmethod
6772
def clean_title(title: str) -> str:
6873
"""Remove author name variations from title."""
74+
return strip_author_from_title(title)
6975
replacements = [
7076
": John Samuel",
7177
": ജോൺ ശമൂവേൽ",
@@ -463,12 +469,12 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
463469

464470
articleset: Set[str] = set()
465471
fg = FeedGenerator()
466-
fg.id("https://johnsamuel.info")
467-
fg.title("John Samuel")
468-
fg.description("Personal Blog of John Samuel")
469-
fg.author({"name": "John Samuel"})
472+
fg.id(SITE_URL)
473+
fg.title(SITE_AUTHOR)
474+
fg.description(f"Personal Blog of {SITE_AUTHOR}")
475+
fg.author({"name": SITE_AUTHOR})
470476
fg.language("en")
471-
fg.link(href="https://johnsamuel.info")
477+
fg.link(href=SITE_URL)
472478

473479
for _, row in df.iterrows():
474480
if row["filepath"] in articleset:
@@ -479,15 +485,15 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
479485
metadata = BlogGenerator.extract_article_metadata(row["filepath"])
480486

481487
fe = fg.add_entry(order="append")
482-
fe.id(f"https://johnsamuel.info/{row['filepath']}")
488+
fe.id(f"{SITE_URL}/{row['filepath']}")
483489
fe.title(metadata.title)
484490
fe.pubDate(
485491
datetime.fromtimestamp(
486492
metadata.modification_time, tz=timezone("Europe/Amsterdam")
487493
)
488494
)
489495
fe.description(metadata.title)
490-
fe.link(href=f"https://johnsamuel.info/{row['filepath']}")
496+
fe.link(href=f"{SITE_URL}/{row['filepath']}")
491497
except Exception as e:
492498
print(f"Error adding to feed {row['filepath']}: {e}")
493499
continue
@@ -499,21 +505,43 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
499505

500506
def main(argv=None):
501507
"""Main entry point."""
502-
argv = list(sys.argv[1:] if argv is None else argv)
503-
if argv:
504-
print("This program takes no input")
505-
return 1
508+
parser = argparse.ArgumentParser(
509+
description="Generate blog pages and feeds."
510+
)
511+
parser.add_argument(
512+
"--force",
513+
action="store_true",
514+
help="Regenerate outputs even when the build manifest says they are current.",
515+
)
516+
args = parser.parse_args(sys.argv[1:] if argv is None else argv)
506517

507-
# Get articles dataframe
508518
df = WebsiteAnalysis.get_articles_list_dataframe()
519+
article_sources = [REPO_ROOT / str(filepath) for filepath in df["filepath"].tolist()]
520+
template_sources = list((REPO_ROOT / "templates").glob("blog.html"))
521+
template_sources.extend((REPO_ROOT / "templates" / "blog").glob("*.html"))
522+
manifest = BuildManifest()
523+
output_paths = [
524+
REPO_ROOT / "blog" / "index.html",
525+
REPO_ROOT / "atom.xml",
526+
REPO_ROOT / "rss.xml",
527+
REPO_ROOT / "en" / "blog.html",
528+
REPO_ROOT / "fr" / "blog.html",
529+
REPO_ROOT / "ml" / "ബ്ലോഗ്.html",
530+
REPO_ROOT / "pa" / "ਬਲਾਗ.html",
531+
REPO_ROOT / "hi" / "ब्लॉग.html",
532+
]
533+
sources = [Path(__file__), *template_sources, *article_sources]
534+
535+
if not args.force and manifest.is_current("blog", sources, output_paths):
536+
print("[SKIP] Blog outputs are up to date.")
537+
return 0
509538

510-
# Generate complete article list with year organization
511539
BlogGenerator.generate_complete_list(df)
512-
513-
# Generate feeds (latest N articles)
514540
BlogGenerator.generate_feed(df, feed_count=20)
541+
manifest.update("blog", sources, output_paths)
515542

516543
print("✓ Blog generation complete!")
544+
return 0
517545

518546

519547
if __name__ == "__main__":

0 commit comments

Comments
 (0)