Skip to content

Commit 8d0464f

Browse files
authored
Merge branch 'main' into copilot/add-centralized-error-boundary
2 parents c5304aa + f38f0a5 commit 8d0464f

5 files changed

Lines changed: 67 additions & 6 deletions

File tree

scripts/article-template.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@
1717
* @author Hack23 AB
1818
* @license Apache-2.0
1919
*/
20-
export { generateArticleHTML, generateEventCalendar, generateWatchSection, generateArticleLanguageSwitcher, generateSiteFooter, default } from './article-template/index.js';
20+
export { generateArticleHTML, generateEventCalendar, generateWatchSection, generateArticleLanguageSwitcher, generateSiteFooter, fixHtmlNesting, default } from './article-template/index.js';

scripts/article-template/helpers.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,22 @@ export function getNewsIndexFilename(lang: Language | string): string {
4646
return `index_${lang}.html`;
4747
}
4848

49+
/**
50+
* Remove orphaned `</p>` tags that appear immediately after `</ul>` or `</ol>`.
51+
* Browsers auto-close `<p>` before block-level list elements, so AI-generated
52+
* markup of the form `<p>intro</p><ul>…</ul></p>` leaves a dangling `</p>`.
53+
* This function removes only that specific trailing `</p>` and does not attempt
54+
* any other HTML repair.
55+
*/
56+
export function fixHtmlNesting(htmlContent: string): string {
57+
return htmlContent.replace(/<\/(ul|ol)>\s*<\/p>/g, '</$1>');
58+
}
59+
4960
/**
5061
* Sanitize article body content for JSON-LD structured data.
5162
* Removes newlines and normalizes whitespace to prevent invalid JSON.
63+
* Callers should apply {@link fixHtmlNesting} to the raw HTML *before*
64+
* escaping and passing it here, so the regex has a chance to match.
5265
*/
5366
export function sanitizeArticleBody(htmlContent: string): string {
5467
return htmlContent

scripts/article-template/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616

1717
export { generateArticleHTML } from './template.js';
18-
export { generateEventCalendar, generateWatchSection, generateArticleLanguageSwitcher, generateSiteFooter } from './helpers.js';
18+
export { generateEventCalendar, generateWatchSection, generateArticleLanguageSwitcher, generateSiteFooter, fixHtmlNesting } from './helpers.js';
1919

2020
import { generateArticleHTML } from './template.js';
2121
import { generateEventCalendar, generateWatchSection, generateArticleLanguageSwitcher, generateSiteFooter } from './helpers.js';

scripts/article-template/template.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
getFooterLabel,
1919
getNewsIndexFilename,
2020
sanitizeArticleBody,
21+
fixHtmlNesting,
2122
formatDate,
2223
generateEventCalendar,
2324
generateWatchSection,
@@ -57,6 +58,9 @@ export function generateArticleHTML(data: ArticleData): string {
5758
const formattedDate: string = formatDate(dateObj, lang);
5859
const isoDate: string = dateObj.toISOString().split('T')[0] ?? '';
5960

61+
// Fix invalid HTML nesting once so both the rendered body and JSON-LD are consistent
62+
const fixedContent: string = fixHtmlNesting(content);
63+
6064
// Fall back to English labels if language not supported
6165
const typeLabel: string = TYPE_LABELS[lang]?.[type] || TYPE_LABELS.en[type] || 'News';
6266

@@ -158,8 +162,8 @@ ${ALL_LANG_CODES.map(l => ` <link rel="alternate" hreflang="${l}" href="https:/
158162
"height": 630
159163
},
160164
"articleSection": "${typeLabel}",
161-
"articleBody": "${sanitizeArticleBody(escapeHtml(content))}...",
162-
"wordCount": ${Math.ceil(content.length / 5)},
165+
"articleBody": "${sanitizeArticleBody(escapeHtml(fixedContent))}...",
166+
"wordCount": ${Math.ceil(fixedContent.length / 5)},
163167
"inLanguage": "${lang}",
164168
"keywords": "${keywords.join(', ')}",
165169
"about": {
@@ -270,7 +274,7 @@ ${events.length > 0 ? generateEventCalendar(events as ReadonlyArray<EventGridIte
270274
${subtitle}
271275
</p>
272276
273-
${content}
277+
${fixedContent}
274278
275279
${watchPoints.length > 0 ? generateWatchSection(watchPoints as ReadonlyArray<WatchPoint>, lang) : ''}
276280

tests/article-template.test.ts

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*/
55

66
import { describe, it, expect, afterEach, vi } from 'vitest';
7-
import { generateArticleHTML, generateArticleLanguageSwitcher, generateSiteFooter } from '../scripts/article-template.js';
7+
import { generateArticleHTML, generateArticleLanguageSwitcher, generateSiteFooter, fixHtmlNesting } from '../scripts/article-template.js';
88
import articleTemplateDefault from '../scripts/article-template.js';
99
import type { Language } from '../scripts/types/language.js';
1010
import type { ArticleData, ArticleCategory, EventGridItem, WatchPoint } from '../scripts/types/article.js';
@@ -890,4 +890,48 @@ describe('Article Template', () => {
890890
expect(typeof articleTemplateDefault.generateSiteFooter).toBe('function');
891891
});
892892
});
893+
894+
describe('fixHtmlNesting', () => {
895+
it('should remove orphaned </p> after </ul>', () => {
896+
const input = '<p>intro</p><ul><li>item</li></ul></p>';
897+
expect(fixHtmlNesting(input)).toBe('<p>intro</p><ul><li>item</li></ul>');
898+
});
899+
900+
it('should remove orphaned </p> after </ol>', () => {
901+
const input = '<p>intro</p><ol><li>step</li></ol></p>';
902+
expect(fixHtmlNesting(input)).toBe('<p>intro</p><ol><li>step</li></ol>');
903+
});
904+
905+
it('should handle whitespace between </ul> and </p>', () => {
906+
const input = '<ul><li>x</li></ul> </p>';
907+
expect(fixHtmlNesting(input)).toBe('<ul><li>x</li></ul>');
908+
});
909+
910+
it('should leave valid </p> tags unchanged', () => {
911+
const input = '<p>valid paragraph</p><p>another</p>';
912+
expect(fixHtmlNesting(input)).toBe('<p>valid paragraph</p><p>another</p>');
913+
});
914+
915+
it('should fix the pattern in rendered article HTML body', () => {
916+
const data: MockArticleData = {
917+
...mockArticleData,
918+
content: '<p>Key proposals include:</p><ul><li><strong>Item A</strong></li></ul></p>'
919+
};
920+
const html = generateArticleHTML(data as unknown as ArticleData) as string;
921+
expect(html).not.toContain('</ul></p>');
922+
expect(html).not.toContain('</ol></p>');
923+
});
924+
925+
it('should fix the pattern in JSON-LD articleBody', () => {
926+
const data: MockArticleData = {
927+
...mockArticleData,
928+
content: '<p>Key proposals include:</p><ul><li><strong>Item A</strong></li></ul></p>'
929+
};
930+
const html = generateArticleHTML(data as unknown as ArticleData) as string;
931+
const jsonLdMatch = html.match(/<script type="application\/ld\+json">([\s\S]*?)<\/script>/);
932+
expect(jsonLdMatch).not.toBeNull();
933+
// The escaped form of </ul></p> should not appear in JSON-LD articleBody
934+
expect(jsonLdMatch![1]).not.toContain('&lt;/ul&gt;&lt;/p&gt;');
935+
});
936+
});
893937
});

0 commit comments

Comments
 (0)