@@ -21,39 +21,28 @@ const { Content } = await post.render();
2121const postUrl = ` https://blog.x7md.net/posts/${post .slug }/ ` ;
2222const postContent = await post .render ();
2323
24- // Extract article body text from markdown content
25- // Remove markdown syntax and get clean text for schema.org
26- function extractTextFromMarkdown(markdown : string ) {
27- if (! markdown ) return ' ' ;
24+ // Function to clean and escape text for JSON-LD
25+ function cleanTextForJsonLd(text : string ) {
26+ if (! text ) return ' ' ;
2827
29- // Remove markdown headers, links, code blocks, etc.
30- let text = markdown
31- .replace (/ ^ #{1,6} \s + / gm , ' ' ) // Remove headers
32- .replace (/ \[ ([^ \] ] + )\]\( [^ )] + \) / g , ' $1' ) // Convert links to text
33- .replace (/ ```[\s\S ] *? ```/ g , ' ' ) // Remove code blocks
34- .replace (/ `([^ `] + )`/ g , ' $1' ) // Remove inline code
35- .replace (/ \*\* ([^ *] + )\*\* / g , ' $1' ) // Remove bold
36- .replace (/ \* ([^ *] + )\* / g , ' $1' ) // Remove italic
37- .replace (/ !\[ ([^ \] ] * )\]\( [^ )] + \) / g , ' ' ) // Remove images
38- .replace (/ ^ \s * [-*+] \s + / gm , ' ' ) // Remove list markers
39- .replace (/ ^ \s * \d + \. \s + / gm , ' ' ) // Remove numbered list markers
40- .replace (/ ^ \s * >\s + / gm , ' ' ) // Remove blockquotes
41- .replace (/ \n {3,} / g , ' \n\n ' ) // Normalize line breaks
28+ return text
29+ .replace (/ <[^ >] * >/ g , ' ' ) // Remove HTML tags
30+ .replace (/ &[a-zA-Z0-9 #] + ;/ g , ' ' ) // Remove HTML entities
31+ .replace (/ \s + / g , ' ' ) // Normalize whitespace
32+ .replace (/ \\ / g , ' \\\\ ' ) // Escape backslashes
33+ .replace (/ "/ g , ' \\ "' ) // Escape quotes
34+ .replace (/ \n / g , ' ' ) // Replace newlines with spaces
35+ .replace (/ \r / g , ' ' ) // Remove carriage returns
36+ .replace (/ \t / g , ' ' ) // Replace tabs with spaces
4237 .trim ();
43-
44- // Truncate to reasonable length for schema.org (first 500 words)
45- const words = text .split (/ \s + / ).filter (word => word .length > 0 );
46- if (words .length > 500 ) {
47- text = words .slice (0 , 500 ).join (' ' ) + ' ...' ;
48- }
49-
50- return text ;
5138}
5239
53- const articleBodyText = extractTextFromMarkdown (post .body ) || post .data .description ;
40+ // Use description as articleBody for schema.org (clean and safe approach)
41+ // In the future, we could enhance this by rendering content server-side
42+ const articleBodyText = cleanTextForJsonLd (post .data .description );
5443
55- // Count words in the content (more accurate )
56- const wordCount = post .body ? post .body .split (/ \s + / ).filter (word => word .trim ().length > 0 ).length : 0 ;
44+ // Count words in the description (what we're using for articleBody )
45+ const wordCount = post .data . description ? post .data . description .split (/ \s + / ).filter (word => word .trim ().length > 0 ).length : 0 ;
5746
5847// Calculate reading time (average 200 words per minute)
5948const readingTimeMinutes = Math .ceil (wordCount / 200 );
0 commit comments