Hack23 · pethers · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026 · Mar 1, 2026
diff --git a/scripts/extract-news-metadata.ts b/scripts/extract-news-metadata.ts
@@ -10,7 +10,7 @@
  */
 
 import { readFileSync, writeFileSync, readdirSync, mkdirSync } from 'fs';
-import { join, dirname } from 'path';
+import { join, dirname, relative } from 'path';
 import { fileURLToPath } from 'url';
 
 const __filename = fileURLToPath(import.meta.url);
@@ -56,10 +56,22 @@ interface JsonLdArticle {
   url?: string;
 }
 
+function collectNewsFiles(dir: string): string[] {
+  const result: string[] = [];
+  for (const entry of readdirSync(dir, { withFileTypes: true })) {
+    if (entry.isDirectory()) {
+      result.push(...collectNewsFiles(join(dir, entry.name)));
+    } else if (entry.name.endsWith('.html') && !entry.name.startsWith('index')) {
+      result.push(join(dir, entry.name));
+    }
+  }
+  return result;
+}
+
 function extractMetadata(): void {
   const newsDir = join(ROOT, 'news');
-  const files = readdirSync(newsDir)
-    .filter((f: string) => f.endsWith('.html') && !f.startsWith('index'));
+  const allFilePaths = collectNewsFiles(newsDir);
+  const files = allFilePaths.map((fp) => relative(newsDir, fp));
 
   const articles: ArticleMetadata[] = [];
 

diff --git a/scripts/generate-news-indexes/helpers.ts b/scripts/generate-news-indexes/helpers.ts
@@ -311,29 +311,49 @@ export function extractTags(content: string): string[] {
   return tags.slice(0, 4); // Max 4 tags for display
 }
 
+/**
+ * Collect all article HTML file paths recursively from a directory.
+ * Supports date-based subdirectory structure: news/{year}/{month}/article.html
+ */
+function collectArticleFiles(dir: string): string[] {
+  const result: string[] = [];
+  const entries = fs.readdirSync(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    if (entry.isDirectory()) {
+      result.push(...collectArticleFiles(path.join(dir, entry.name)));
+    } else if (entry.isFile() && entry.name.endsWith('.html') && !entry.name.startsWith('index')) {
+      result.push(path.join(dir, entry.name));
+    }
+  }
+  return result;
+}
+
 /**
  * Scan news directory and group articles by language.
+ * Supports date-based subdirectory structure: news/{year}/{month}/article.html
  */
 export function scanNewsArticles(): Record<string, NewsArticleMetadata[]> {
   console.log('\n📰 Scanning for articles...');
 
-  const files: string[] = fs.readdirSync(NEWS_DIR)
-    .filter((file) => file.endsWith('.html'))
-    .filter((file) => !file.startsWith('index')); // Exclude index files
+  const filePaths: string[] = collectArticleFiles(NEWS_DIR);
 
-  console.log(`  Found ${files.length} article files`);
+  console.log(`  Found ${filePaths.length} article files`);
 
   // Initialize buckets for all 14 supported languages
   const articlesByLang: Record<string, NewsArticleMetadata[]> = Object.fromEntries(
     Object.keys(LANGUAGES).map((lang) => [lang, []]),
   );
 
-  files.forEach((file) => {
-    const filePath: string = path.join(NEWS_DIR, file);
+  filePaths.forEach((filePath) => {
     const metadata: NewsArticleMetadata | null = parseArticleMetadata(filePath);
 
-    if (metadata && articlesByLang[metadata.lang]) {
-      articlesByLang[metadata.lang]!.push(metadata);
+    if (metadata) {
+      // Set slug to relative path from NEWS_DIR (e.g., "2026/02/2026-02-13-article-en.html")
+      metadata.slug = path.relative(NEWS_DIR, filePath).split(path.sep).join('/');
+
+      if (articlesByLang[metadata.lang]) {
+        articlesByLang[metadata.lang]!.push(metadata);
+      }
     }
   });
 

diff --git a/scripts/generate-sitemap.ts b/scripts/generate-sitemap.ts
@@ -135,6 +135,7 @@ function getFileModTime(filePath: string): string {
 
 /**
  * Get news articles with metadata.
+ * Supports date-based subdirectory structure: news/{year}/{month}/article.html
  */
 function getNewsArticles(): ArticleGroup[] {
   console.log('📰 Scanning news directory...');
@@ -144,39 +145,49 @@ function getNewsArticles(): ArticleGroup[] {
     return [];
   }
 
-  const files = fs
-    .readdirSync(NEWS_DIR)
-    .filter((file) => file.endsWith('.html') && file !== 'index.html' && !file.startsWith('index_'));
-
-  console.log(`  Found ${files.length} news articles`);
-
   // Group articles by base slug (without language suffix)
   const articles = new Map<string, ArticleGroup>();
 
-  files.forEach((file) => {
-    const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/);
-    if (match) {
-      const baseSlug = match[1]!;
-      const lang = match[2]!;
-      const filePath = path.join(NEWS_DIR, file);
-      const fileModTime = getFileModTime(filePath);
-
-      if (!articles.has(baseSlug)) {
-        articles.set(baseSlug, {
-          baseSlug,
-          languages: [],
-          lastmod: fileModTime,
-        });
-      } else {
-        const article = articles.get(baseSlug)!;
-        if (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) {
-          article.lastmod = fileModTime;
+  function scanDir(dir: string): void {
+    const entries = fs.readdirSync(dir, { withFileTypes: true });
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        scanDir(path.join(dir, entry.name));
+      } else if (entry.isFile() && entry.name !== 'index.html' && !entry.name.startsWith('index_') && entry.name.endsWith('.html')) {
+        const file = entry.name;
+        const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/);
+        if (match) {
+          const baseSlug = match[1]!;
+          const lang = match[2]!;
+          const filePath = path.join(dir, file);
+          const fileModTime = getFileModTime(filePath);
+
+          // Include subdirectory prefix in baseSlug (e.g., "2026/02/2026-02-13-article")
+          const relDir = path.relative(NEWS_DIR, dir).split(path.sep).join('/');
+          const fullBaseSlug = relDir ? `${relDir}/${baseSlug}` : baseSlug;
+
+          if (!articles.has(fullBaseSlug)) {
+            articles.set(fullBaseSlug, {
+              baseSlug: fullBaseSlug,
+              languages: [],
+              lastmod: fileModTime,
+            });
+          } else {
+            const article = articles.get(fullBaseSlug)!;
+            if (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) {
+              article.lastmod = fileModTime;
+            }
+          }
+
+          articles.get(fullBaseSlug)!.languages.push(lang);
         }
       }
-
-      articles.get(baseSlug)!.languages.push(lang);
     }
-  });
+  }
+
+  scanDir(NEWS_DIR);
+
+  console.log(`  Found ${articles.size} news articles`);
-  console.log(`  Found ${articles.size} news articles`);
+  console.log(`  Found ${articles.size} news article groups`);
-  console.log(`  Found ${articles.size} news articles`);
+  console.log(`  Found ${articles.size} news article groups`);
 
   return Array.from(articles.values());
 }

diff --git a/tests/sitemap-generation.test.ts b/tests/sitemap-generation.test.ts
@@ -118,12 +118,12 @@ describe('Sitemap Generation', () => {
     });
 
     it('should include articles in multiple languages', () => {
-      // Check for language-specific news articles (year-agnostic)
+      // Check for language-specific news articles (flat or date-based directory structure)
       const languages: readonly string[] = ['en', 'sv', 'da', 'no', 'fi', 'de', 'fr', 'es', 'nl', 'ar', 'he', 'ja', 'ko', 'zh'];
       const foundLanguages = new Set<string>();
 
       languages.forEach(lang => {
-        const pattern = new RegExp(`news/\\d{4}-\\d{2}-\\d{2}-.+-${lang}\\.html`);
+        const pattern = new RegExp(`news/(?:\\d{4}/\\d{2}/)?\\d{4}-\\d{2}-\\d{2}-.+-${lang}\\.html`);
         if (pattern.test(sitemapContent)) {
           foundLanguages.add(lang);
         }

diff --git a/vite.config.js b/vite.config.js
@@ -11,23 +11,32 @@
 import { defineConfig } from 'vite';
 import sri from 'vite-plugin-sri-gen';
 import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
 
 /**
- * Auto-discover news article HTML files from the news/ directory.
+ * Auto-discover news article HTML files from the news/ directory recursively.
+ * Supports date-based subdirectory structure: news/{year}/{month}/article.html
  * This prevents new articles from being excluded from the Vite build
  * (and thus missing from S3 deployment).
  */
 function discoverNewsArticles() {
-  const newsDir = new URL('./news', import.meta.url);
+  const newsDir = fileURLToPath(new URL('./news', import.meta.url));
   const entries = {};
-  if (fs.existsSync(newsDir)) {
-    for (const file of fs.readdirSync(newsDir)) {
-      if (file.endsWith('.html') && !file.startsWith('index')) {
-        const name = file.replace('.html', '');
-        entries[`news/${name}`] = `./news/${file}`;
+
+  function scanDir(dir, relPrefix) {
+    if (!fs.existsSync(dir)) return;
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      if (entry.isDirectory()) {
+        scanDir(path.join(dir, entry.name), relPrefix + entry.name + '/');
+      } else if (entry.name.endsWith('.html') && !entry.name.startsWith('index')) {
+        const name = relPrefix + entry.name.replace('.html', '');
+        entries[`news/${name}`] = `./news/${relPrefix}${entry.name}`;
       }
     }
   }
+
+  scanDir(newsDir, '');
   return entries;
 }