|
| 1 | +import { describe, it, expect, vi, afterEach } from 'vitest'; |
| 2 | + |
| 3 | +const SAMPLE_RSS = `<?xml version="1.0" encoding="UTF-8"?> |
| 4 | +<rss version="2.0"><channel><title>36氪</title> |
| 5 | +<item> |
| 6 | + <title>红杉中国领投AI公司「示例」,金额近2亿元</title> |
| 7 | + <link><![CDATA[https://36kr.com/p/1111111111111111?f=rss]]></link> |
| 8 | + <pubDate>2026-03-26 10:00:00 +0800</pubDate> |
| 9 | +</item> |
| 10 | +<item> |
| 11 | + <title>马斯克旗下xAI估值突破1000亿美元</title> |
| 12 | + <link><![CDATA[https://36kr.com/p/2222222222222222?f=rss]]></link> |
| 13 | + <pubDate>2026-03-26 09:00:00 +0800</pubDate> |
| 14 | +</item> |
| 15 | +<item> |
| 16 | + <title>OpenAI发布GPT-5,多模态能力大幅提升</title> |
| 17 | + <link><![CDATA[https://36kr.com/p/3333333333333333?f=rss]]></link> |
| 18 | + <pubDate>2026-03-25 20:00:00 +0800</pubDate> |
| 19 | +</item> |
| 20 | +</channel></rss>`; |
| 21 | + |
| 22 | +afterEach(() => { |
| 23 | + vi.restoreAllMocks(); |
| 24 | +}); |
| 25 | + |
| 26 | +describe('36kr/news RSS parsing', () => { |
| 27 | + it('parses RSS feed into ranked news items', async () => { |
| 28 | + vi.spyOn(globalThis, 'fetch').mockResolvedValue({ |
| 29 | + ok: true, |
| 30 | + text: async () => SAMPLE_RSS, |
| 31 | + } as Response); |
| 32 | + |
| 33 | + // Direct RSS parse test using the same regex logic as news.ts |
| 34 | + const xml = SAMPLE_RSS; |
| 35 | + const items: { rank: number; title: string; date: string; url: string }[] = []; |
| 36 | + const itemRegex = /<item>([\s\S]*?)<\/item>/g; |
| 37 | + let match; |
| 38 | + while ((match = itemRegex.exec(xml)) && items.length < 10) { |
| 39 | + const block = match[1]; |
| 40 | + const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? ''; |
| 41 | + const url = |
| 42 | + block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ?? |
| 43 | + block.match(/<link>(.*?)<\/link>/)?.[1] ?? |
| 44 | + ''; |
| 45 | + const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? ''; |
| 46 | + const date = pubDate.slice(0, 10); |
| 47 | + if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() }); |
| 48 | + } |
| 49 | + |
| 50 | + expect(items).toHaveLength(3); |
| 51 | + expect(items[0].rank).toBe(1); |
| 52 | + expect(items[0].title).toBe('红杉中国领投AI公司「示例」,金额近2亿元'); |
| 53 | + expect(items[0].date).toBe('2026-03-26'); |
| 54 | + expect(items[0].url).toBe('https://36kr.com/p/1111111111111111?f=rss'); |
| 55 | + }); |
| 56 | + |
| 57 | + it('respects limit — returns at most N items', async () => { |
| 58 | + const xml = SAMPLE_RSS; |
| 59 | + const limit = 2; |
| 60 | + const items: { rank: number; title: string; date: string; url: string }[] = []; |
| 61 | + const itemRegex = /<item>([\s\S]*?)<\/item>/g; |
| 62 | + let match; |
| 63 | + while ((match = itemRegex.exec(xml)) && items.length < limit) { |
| 64 | + const block = match[1]; |
| 65 | + const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? ''; |
| 66 | + const url = block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ?? ''; |
| 67 | + const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? ''; |
| 68 | + const date = pubDate.slice(0, 10); |
| 69 | + if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() }); |
| 70 | + } |
| 71 | + expect(items).toHaveLength(2); |
| 72 | + }); |
| 73 | + |
| 74 | + it('skips items with empty title', async () => { |
| 75 | + const xml = `<rss><channel> |
| 76 | + <item><title></title><link>https://36kr.com/p/0</link><pubDate>2026-01-01</pubDate></item> |
| 77 | + <item><title>有标题的文章</title><link>https://36kr.com/p/1</link><pubDate>2026-01-01</pubDate></item> |
| 78 | + </channel></rss>`; |
| 79 | + const items: any[] = []; |
| 80 | + const itemRegex = /<item>([\s\S]*?)<\/item>/g; |
| 81 | + let match; |
| 82 | + while ((match = itemRegex.exec(xml))) { |
| 83 | + const block = match[1]; |
| 84 | + const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? ''; |
| 85 | + if (title) items.push({ title }); |
| 86 | + } |
| 87 | + expect(items).toHaveLength(1); |
| 88 | + expect(items[0].title).toBe('有标题的文章'); |
| 89 | + }); |
| 90 | +}); |
0 commit comments