88# Generate an upto date list of articles in reverse chronological order
99#!/usr/bin/env python3
1010
11+ import argparse
1112import sys
1213from typing import Dict , List , Set , Tuple
1314from dataclasses import dataclass
2021from pytz import timezone
2122
2223from analyse import HTMLTextAnalysis , WebsiteAnalysis
24+ from config import SITE_AUTHOR , SITE_URL
2325from git import get_first_latest_modification
26+ from manifest import BuildManifest
27+ from paths import REPO_ROOT
28+ from site_text import strip_author_from_title
2429
2530
2631@dataclass
@@ -66,6 +71,7 @@ class BlogGenerator:
6671 @staticmethod
6772 def clean_title (title : str ) -> str :
6873 """Remove author name variations from title."""
74+ return strip_author_from_title (title )
6975 replacements = [
7076 ": John Samuel" ,
7177 ": ജോൺ ശമൂവേൽ" ,
@@ -463,12 +469,12 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
463469
464470 articleset : Set [str ] = set ()
465471 fg = FeedGenerator ()
466- fg .id ("https://johnsamuel.info" )
467- fg .title ("John Samuel" )
468- fg .description ("Personal Blog of John Samuel " )
469- fg .author ({"name" : "John Samuel" })
472+ fg .id (SITE_URL )
473+ fg .title (SITE_AUTHOR )
474+ fg .description (f "Personal Blog of { SITE_AUTHOR } " )
475+ fg .author ({"name" : SITE_AUTHOR })
470476 fg .language ("en" )
471- fg .link (href = "https://johnsamuel.info" )
477+ fg .link (href = SITE_URL )
472478
473479 for _ , row in df .iterrows ():
474480 if row ["filepath" ] in articleset :
@@ -479,15 +485,15 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
479485 metadata = BlogGenerator .extract_article_metadata (row ["filepath" ])
480486
481487 fe = fg .add_entry (order = "append" )
482- fe .id (f"https://johnsamuel.info /{ row ['filepath' ]} " )
488+ fe .id (f"{ SITE_URL } /{ row ['filepath' ]} " )
483489 fe .title (metadata .title )
484490 fe .pubDate (
485491 datetime .fromtimestamp (
486492 metadata .modification_time , tz = timezone ("Europe/Amsterdam" )
487493 )
488494 )
489495 fe .description (metadata .title )
490- fe .link (href = f"https://johnsamuel.info /{ row ['filepath' ]} " )
496+ fe .link (href = f"{ SITE_URL } /{ row ['filepath' ]} " )
491497 except Exception as e :
492498 print (f"Error adding to feed { row ['filepath' ]} : { e } " )
493499 continue
@@ -499,21 +505,43 @@ def generate_feed(df: pd.DataFrame, feed_count: int = 20) -> None:
499505
500506def main (argv = None ):
501507 """Main entry point."""
502- argv = list (sys .argv [1 :] if argv is None else argv )
503- if argv :
504- print ("This program takes no input" )
505- return 1
508+ parser = argparse .ArgumentParser (
509+ description = "Generate blog pages and feeds."
510+ )
511+ parser .add_argument (
512+ "--force" ,
513+ action = "store_true" ,
514+ help = "Regenerate outputs even when the build manifest says they are current." ,
515+ )
516+ args = parser .parse_args (sys .argv [1 :] if argv is None else argv )
506517
507- # Get articles dataframe
508518 df = WebsiteAnalysis .get_articles_list_dataframe ()
519+ article_sources = [REPO_ROOT / str (filepath ) for filepath in df ["filepath" ].tolist ()]
520+ template_sources = list ((REPO_ROOT / "templates" ).glob ("blog.html" ))
521+ template_sources .extend ((REPO_ROOT / "templates" / "blog" ).glob ("*.html" ))
522+ manifest = BuildManifest ()
523+ output_paths = [
524+ REPO_ROOT / "blog" / "index.html" ,
525+ REPO_ROOT / "atom.xml" ,
526+ REPO_ROOT / "rss.xml" ,
527+ REPO_ROOT / "en" / "blog.html" ,
528+ REPO_ROOT / "fr" / "blog.html" ,
529+ REPO_ROOT / "ml" / "à´¬àµà´²àµ‹à´—àµ.html" ,
530+ REPO_ROOT / "pa" / "ਬਲਾਗ.html" ,
531+ REPO_ROOT / "hi" / "बà¥à¤²à¥‰à¤—.html" ,
532+ ]
533+ sources = [Path (__file__ ), * template_sources , * article_sources ]
534+
535+ if not args .force and manifest .is_current ("blog" , sources , output_paths ):
536+ print ("[SKIP] Blog outputs are up to date." )
537+ return 0
509538
510- # Generate complete article list with year organization
511539 BlogGenerator .generate_complete_list (df )
512-
513- # Generate feeds (latest N articles)
514540 BlogGenerator .generate_feed (df , feed_count = 20 )
541+ manifest .update ("blog" , sources , output_paths )
515542
516543 print ("✓ Blog generation complete!" )
544+ return 0
517545
518546
519547if __name__ == "__main__" :
0 commit comments