-
Notifications
You must be signed in to change notification settings - Fork 2
Keep recursive news scanning; revert existing articles to flat news/ (URL preservation) #698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
7dcee44
e56aa55
fc8e3fb
a8f6b2e
57c2fcb
bcb38c2
9ff2641
338170c
c3a60cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -135,6 +135,7 @@ function getFileModTime(filePath: string): string { | |||||
|
|
||||||
| /** | ||||||
| * Get news articles with metadata. | ||||||
| * Supports date-based subdirectory structure: news/{year}/{month}/article.html | ||||||
| */ | ||||||
| function getNewsArticles(): ArticleGroup[] { | ||||||
| console.log('π° Scanning news directory...'); | ||||||
|
|
@@ -144,39 +145,49 @@ function getNewsArticles(): ArticleGroup[] { | |||||
| return []; | ||||||
| } | ||||||
|
|
||||||
| const files = fs | ||||||
| .readdirSync(NEWS_DIR) | ||||||
| .filter((file) => file.endsWith('.html') && file !== 'index.html' && !file.startsWith('index_')); | ||||||
|
|
||||||
| console.log(` Found ${files.length} news articles`); | ||||||
|
|
||||||
| // Group articles by base slug (without language suffix) | ||||||
| const articles = new Map<string, ArticleGroup>(); | ||||||
|
|
||||||
| files.forEach((file) => { | ||||||
| const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/); | ||||||
| if (match) { | ||||||
| const baseSlug = match[1]!; | ||||||
| const lang = match[2]!; | ||||||
| const filePath = path.join(NEWS_DIR, file); | ||||||
| const fileModTime = getFileModTime(filePath); | ||||||
|
|
||||||
| if (!articles.has(baseSlug)) { | ||||||
| articles.set(baseSlug, { | ||||||
| baseSlug, | ||||||
| languages: [], | ||||||
| lastmod: fileModTime, | ||||||
| }); | ||||||
| } else { | ||||||
| const article = articles.get(baseSlug)!; | ||||||
| if (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) { | ||||||
| article.lastmod = fileModTime; | ||||||
| function scanDir(dir: string): void { | ||||||
| const entries = fs.readdirSync(dir, { withFileTypes: true }); | ||||||
| for (const entry of entries) { | ||||||
| if (entry.isDirectory()) { | ||||||
| scanDir(path.join(dir, entry.name)); | ||||||
|
Comment on lines
+151
to
+155
|
||||||
| } else if (entry.isFile() && entry.name !== 'index.html' && !entry.name.startsWith('index_') && entry.name.endsWith('.html')) { | ||||||
| const file = entry.name; | ||||||
| const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/); | ||||||
| if (match) { | ||||||
| const baseSlug = match[1]!; | ||||||
| const lang = match[2]!; | ||||||
| const filePath = path.join(dir, file); | ||||||
| const fileModTime = getFileModTime(filePath); | ||||||
|
|
||||||
| // Include subdirectory prefix in baseSlug (e.g., "2026/02/2026-02-13-article") | ||||||
| const relDir = path.relative(NEWS_DIR, dir).split(path.sep).join('/'); | ||||||
| const fullBaseSlug = relDir ? `${relDir}/${baseSlug}` : baseSlug; | ||||||
|
|
||||||
| if (!articles.has(fullBaseSlug)) { | ||||||
| articles.set(fullBaseSlug, { | ||||||
| baseSlug: fullBaseSlug, | ||||||
| languages: [], | ||||||
| lastmod: fileModTime, | ||||||
| }); | ||||||
| } else { | ||||||
| const article = articles.get(fullBaseSlug)!; | ||||||
| if (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) { | ||||||
| article.lastmod = fileModTime; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| articles.get(fullBaseSlug)!.languages.push(lang); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| articles.get(baseSlug)!.languages.push(lang); | ||||||
| } | ||||||
| }); | ||||||
| } | ||||||
|
|
||||||
| scanDir(NEWS_DIR); | ||||||
|
|
||||||
| console.log(` Found ${articles.size} news articles`); | ||||||
|
||||||
| console.log(` Found ${articles.size} news articles`); | |
| console.log(` Found ${articles.size} news article groups`); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in bcb38c2. The log line now reads Found ${articles.size} news article groups to make clear that the count is language-group (base-slug) count, not individual HTML file count.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -118,12 +118,12 @@ describe('Sitemap Generation', () => { | |
| }); | ||
|
|
||
| it('should include articles in multiple languages', () => { | ||
| // Check for language-specific news articles (year-agnostic) | ||
| // Check for language-specific news articles (flat or date-based directory structure) | ||
| const languages: readonly string[] = ['en', 'sv', 'da', 'no', 'fi', 'de', 'fr', 'es', 'nl', 'ar', 'he', 'ja', 'ko', 'zh']; | ||
| const foundLanguages = new Set<string>(); | ||
|
|
||
| languages.forEach(lang => { | ||
| const pattern = new RegExp(`news/\\d{4}-\\d{2}-\\d{2}-.+-${lang}\\.html`); | ||
| const pattern = new RegExp(`news/(?:\\d{4}/\\d{2}/)?\\d{4}-\\d{2}-\\d{2}-.+-${lang}\\.html`); | ||
| if (pattern.test(sitemapContent)) { | ||
|
Comment on lines
+121
to
127
|
||
| foundLanguages.add(lang); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
relative(newsDir, fp)will return platform-specific separators (e.g.,\on Windows). Sincefileand derivedslugare persisted intodata/news-articles.jsonand used to represent URL-like paths, this can produce backslash-containing slugs and inconsistent outputs across OSes. Normalize the relative paths to POSIX-style (/) before storing them (and before computingslug).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 338170c. Added
septo thepathnamed imports and changed line 74 to.split(sep).join('/')sofileand derivedslugalways use forward-slash separators innews-articles.json, regardless of OS.