From 1e9dcab01059f75431fe3495020875d737544ca8 Mon Sep 17 00:00:00 2001 From: Adam Argyle Date: Mon, 19 Jan 2026 10:18:35 -0800 Subject: [PATCH 1/2] fixes rss plaintext --- src/lib/rss.ts | 91 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 2 deletions(-) diff --git a/src/lib/rss.ts b/src/lib/rss.ts index 5aceb54..6af0ed1 100644 --- a/src/lib/rss.ts +++ b/src/lib/rss.ts @@ -7,6 +7,91 @@ import { dasherize } from '../utils/dasherize'; import { truncate } from '../utils/truncate'; import starpodConfig from '../../starpod.config'; +/** + * Starting around episode 223, the RSS feed changed from HTML to plain text. + * This transformer converts the new plain-text format to match the old HTML structure exactly. + */ +function transformPlainTextToHtml(text: string): string { + const lines = text.split('\n').map(l => l.trim()).filter(Boolean); + const html: string[] = []; + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + + // Check if this is a timestamp line like "(00:00) - Intro" + if (/^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(line)) { + // Start collecting all consecutive timestamp lines into a list + const listItems: string[] = []; + while (i < lines.length && /^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(lines[i])) { + listItems.push(escapeHtml(lines[i])); + i++; + } + html.push(''); + continue; + } + + // Check if this is a section header (bold text like "**Links**" or just "Links") + if (/^\*\*(.+?)\*\*$/.test(line)) { + const text = line.replace(/^\*\*(.+?)\*\*$/, '$1'); + html.push(`

${escapeHtml(text)}

`); + i++; + continue; + } + + // Check if this looks like a link list item (e.g., "CodeRabbit: https://...") + if (/:?\s*https?:\/\//.test(line)) { + // Collect all consecutive link lines into a list + const linkItems: string[] = []; + while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) { + linkItems.push(lines[i]); + i++; + } + html.push(''); + continue; + } + + // Default: regular paragraph + html.push(`

${escapeHtml(line)}

`); + i++; + } + + return html.join('\n'); +} + +function escapeHtml(str: string): string { + return str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') + .replace(/'/g, '''); +} + +function looksLikeHtml(text: string): boolean { + return /<[a-z][\s\S]*>/i.test(text.trim()); +} + export interface Show { title: string; description: string; @@ -102,7 +187,9 @@ export async function getAllEpisodes() { return { id, title: `${title}`, - content: description, + content: looksLikeHtml(description) + ? description + : transformPlainTextToHtml(description), description: truncate(htmlToText(description), 260), duration: itunes_duration, episodeImage: itunes_image?.href, @@ -121,4 +208,4 @@ export async function getAllEpisodes() { episodesCache = episodes; return episodes; -} +} \ No newline at end of file From e4bf56103898e8423684ebca939f4a6e6d10b2bd Mon Sep 17 00:00:00 2001 From: Adam Argyle Date: Mon, 19 Jan 2026 10:39:37 -0800 Subject: [PATCH 2/2] adds tests, abstracts the primary function out of rss --- src/lib/rss-transform.ts | 84 +++++++++++++++++++++++++++++++++++++ src/lib/rss.ts | 89 +++------------------------------------- tests/unit/rss.test.ts | 82 ++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 84 deletions(-) create mode 100644 src/lib/rss-transform.ts create mode 100644 tests/unit/rss.test.ts diff --git a/src/lib/rss-transform.ts b/src/lib/rss-transform.ts new file mode 100644 index 0000000..b99e790 --- /dev/null +++ b/src/lib/rss-transform.ts @@ -0,0 +1,84 @@ +/** + * Starting around episode 223, the RSS feed changed from HTML to plain text. + * This transformer converts the new plain-text format to match the old HTML structure exactly. + */ +export function transformPlainTextToHtml(text: string): string { + const lines = text.split('\n').map(l => l.trim()).filter(Boolean); + const html: string[] = []; + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + + // Check if this is a timestamp line like "(00:00) - Intro" + if (/^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(line)) { + // Start collecting all consecutive timestamp lines into a list + const listItems: string[] = []; + while (i < lines.length && /^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(lines[i])) { + listItems.push(escapeHtml(lines[i])); + i++; + } + html.push(''); + continue; + } + + // Check if this is a section header (bold text like "**Links**" or just "Links") + if (/^\*\*(.+?)\*\*$/.test(line)) { + const text = line.replace(/^\*\*(.+?)\*\*$/, '$1'); + html.push(`

${escapeHtml(text)}

`); + i++; + continue; + } + + // Check if this looks like a link list item (e.g., "CodeRabbit: https://...") + if (/:?\s*https?:\/\//.test(line)) { + // Collect all consecutive link lines into a list + const linkItems: string[] = []; + while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) { + linkItems.push(lines[i]); + i++; + } + html.push(''); + continue; + } + + // Default: regular paragraph + html.push(`

${escapeHtml(line)}

`); + i++; + } + + return html.join('\n'); +} + +export function escapeHtml(str: string): string { + return str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/\"/g, '"') + .replace(/'/g, '''); +} + +export function looksLikeHtml(text: string): boolean { + return /<[a-z][\s\S]*>/i.test(text.trim()); +} diff --git a/src/lib/rss.ts b/src/lib/rss.ts index 6af0ed1..8d01441 100644 --- a/src/lib/rss.ts +++ b/src/lib/rss.ts @@ -3,94 +3,15 @@ import parseFeed from 'rss-to-json'; import { array, number, object, optional, parse, string } from 'valibot'; import { optimizeImage } from './optimize-episode-image'; +import { + escapeHtml, + looksLikeHtml, + transformPlainTextToHtml +} from './rss-transform'; import { dasherize } from '../utils/dasherize'; import { truncate } from '../utils/truncate'; import starpodConfig from '../../starpod.config'; -/** - * Starting around episode 223, the RSS feed changed from HTML to plain text. - * This transformer converts the new plain-text format to match the old HTML structure exactly. - */ -function transformPlainTextToHtml(text: string): string { - const lines = text.split('\n').map(l => l.trim()).filter(Boolean); - const html: string[] = []; - let i = 0; - - while (i < lines.length) { - const line = lines[i]; - - // Check if this is a timestamp line like "(00:00) - Intro" - if (/^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(line)) { - // Start collecting all consecutive timestamp lines into a list - const listItems: string[] = []; - while (i < lines.length && /^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(lines[i])) { - listItems.push(escapeHtml(lines[i])); - i++; - } - html.push(''); - continue; - } - - // Check if this is a section header (bold text like "**Links**" or just "Links") - if (/^\*\*(.+?)\*\*$/.test(line)) { - const text = line.replace(/^\*\*(.+?)\*\*$/, '$1'); - html.push(`

${escapeHtml(text)}

`); - i++; - continue; - } - - // Check if this looks like a link list item (e.g., "CodeRabbit: https://...") - if (/:?\s*https?:\/\//.test(line)) { - // Collect all consecutive link lines into a list - const linkItems: string[] = []; - while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) { - linkItems.push(lines[i]); - i++; - } - html.push(''); - continue; - } - - // Default: regular paragraph - html.push(`

${escapeHtml(line)}

`); - i++; - } - - return html.join('\n'); -} - -function escapeHtml(str: string): string { - return str - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); -} - -function looksLikeHtml(text: string): boolean { - return /<[a-z][\s\S]*>/i.test(text.trim()); -} export interface Show { title: string; diff --git a/tests/unit/rss.test.ts b/tests/unit/rss.test.ts new file mode 100644 index 0000000..e78b58a --- /dev/null +++ b/tests/unit/rss.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from 'vitest'; +import { + escapeHtml, + looksLikeHtml, + transformPlainTextToHtml +} from '../../src/lib/rss-transform'; + +describe('RSS Transformation Functions', () => { + describe('escapeHtml', () => { + it('escapes multiple special characters', () => { + expect(escapeHtml('')).toBe( + '<script>alert("XSS & stuff")</script>' + ); + }); + + it('returns unchanged string without special characters', () => { + expect(escapeHtml('Hello World')).toBe('Hello World'); + }); + }); + + describe('looksLikeHtml', () => { + it('detects HTML tags vs plain text', () => { + expect(looksLikeHtml('

Hello

')).toBe(true); + expect(looksLikeHtml('Just plain text')).toBe(false); + expect(looksLikeHtml('5 < 10 and 10 > 5')).toBe(false); + }); + }); + + describe('transformPlainTextToHtml', () => { + it('handles mixed content with lists, headers, and paragraphs', () => { + const input = `**Episode Summary** + +This episode covers many topics & details. + +**Timestamps** + +(00:00) - Introduction +(05:30) - Main discussion +(00:00:15) - With seconds + +**Links** + +GitHub: https://github.com/example +https://example.com +Company & Co: https://example.com?foo=bar&baz=qux + +Thanks for listening!`; + + const output = transformPlainTextToHtml(input); + + expect(output).toContain('

Episode Summary

'); + expect(output).toContain('

This episode covers many topics & details.

'); + expect(output).toContain('
  • (00:00) - Introduction
  • '); + expect(output).toContain('
  • (00:00:15) - With seconds
  • '); + expect(output.match(/