Skip to content

Commit 22399ce

Browse files
Conn-Hoclaudejackwener
authored
feat(36kr): add 36氪 CLI adapter (#461)
* feat(36kr): add 36氪 CLI adapter with 4 commands - news: latest articles via public RSS feed (no browser needed), includes title/summary/date/url - hot: trending articles via INTERCEPT strategy, supports --type renqi/zonghe/shoucang/catalog - search: keyword search via INTERCEPT + DOM scraping - article: fetch article detail (title/author/date/body) by ID or URL Also adds vitest adapter project entry for 36kr tests. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * docs(36kr): add adapter documentation Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(36kr): use Shanghai hot-list dates and complete docs --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent ed89157 commit 22399ce

11 files changed

Lines changed: 464 additions & 0 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ Run `opencli list` for the live registry.
170170
| **douban** | `search` `top250` `subject` `marks` `reviews` `movie-hot` `book-hot` | Browser |
171171
| **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | Browser |
172172
| **google** | `news` `search` `suggest` `trends` | Public |
173+
| **36kr** | `news` `hot` `search` `article` | Public / Browser |
173174
| **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | Browser |
174175
| **lobsters** | `hot` `newest` `active` `tag` | Public |
175176
| **medium** | `feed` `search` `user` | Browser |

README.zh-CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ npm install -g @jackwener/opencli@latest
172172
| **douban** | `search` `top250` `subject` `marks` `reviews` `movie-hot` `book-hot` | 浏览器 |
173173
| **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 |
174174
| **google** | `news` `search` `suggest` `trends` | 公开 |
175+
| **36kr** | `news` `hot` `search` `article` | 公开 / 浏览器 |
175176
| **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | 浏览器 |
176177
| **lobsters** | `hot` `newest` `active` `tag` | 公开 |
177178
| **medium** | `feed` `search` `user` | 浏览器 |

docs/adapters/browser/36kr.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# 36kr (36氪)
2+
3+
**Mode**: 🌐 Public / 🔐 Browser · **Domain**: `36kr.com`
4+
5+
## Commands
6+
7+
| Command | Description |
8+
|---------|-------------|
9+
| `opencli 36kr hot` | 36氪热榜 — trending articles |
10+
| `opencli 36kr news` | Latest tech/startup news from 36kr |
11+
| `opencli 36kr search <query>` | Search 36kr articles |
12+
| `opencli 36kr article <id-or-url>` | Read full article content |
13+
14+
## Usage Examples
15+
16+
```bash
17+
# Trending articles
18+
opencli 36kr hot --limit 10
19+
20+
# Hot by type
21+
opencli 36kr hot --type renqi --limit 10
22+
opencli 36kr hot --type zonghe --limit 10
23+
24+
# Latest news
25+
opencli 36kr news --limit 20
26+
27+
# Search articles
28+
opencli 36kr search "AI" --limit 10
29+
opencli 36kr search "OpenAI" --limit 5
30+
31+
# Read full article (by ID or URL)
32+
opencli 36kr article 3000000123456
33+
opencli 36kr article https://36kr.com/p/3000000123456
34+
35+
# JSON output
36+
opencli 36kr hot -f json
37+
```
38+
39+
## Notes
40+
41+
- `news` uses the public RSS feed and works without Browser Bridge.
42+
- `hot`, `search`, and `article` use Browser Bridge and are best run with Chrome open.
43+
- `hot --type` accepts `catalog`, `renqi`, `zonghe`, and `shoucang`.
44+
45+
## Prerequisites
46+
47+
- No browser required — uses public API

docs/adapters/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Run `opencli list` for the live registry.
4242
| **[jd](/adapters/browser/jd)** | `item` | 🔐 Browser |
4343
| **[web](/adapters/browser/web)** | `read` | 🔐 Browser |
4444
| **[weixin](/adapters/browser/weixin)** | `download` | 🔐 Browser |
45+
| **[36kr](/adapters/browser/36kr)** | `news` `hot` `search` `article` | 🌐 / 🔐 |
4546

4647
## Public API Adapters
4748

src/clis/36kr/article.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/**
2+
* 36kr article detail — INTERCEPT strategy.
3+
*
4+
* Fetches the full content of a 36kr article given its ID or URL.
5+
*/
6+
import { cli, Strategy } from '../../registry.js';
7+
import { CliError } from '../../errors.js';
8+
import type { IPage } from '../../types.js';
9+
10+
/** Extract article ID from a full URL or a bare numeric ID string */
11+
function parseArticleId(input: string): string {
12+
const m = input.match(/\/p\/(\d+)/);
13+
return m ? m[1] : input.replace(/\D/g, '');
14+
}
15+
16+
cli({
17+
site: '36kr',
18+
name: 'article',
19+
description: '获取36氪文章正文内容',
20+
domain: 'www.36kr.com',
21+
strategy: Strategy.INTERCEPT,
22+
args: [
23+
{ name: 'id', positional: true, required: true, help: 'Article ID or full 36kr article URL' },
24+
],
25+
columns: ['field', 'value'],
26+
func: async (page: IPage, args) => {
27+
const articleId = parseArticleId(String(args.id ?? ''));
28+
if (!articleId) {
29+
throw new CliError('INVALID_ARGUMENT', 'Invalid article ID or URL');
30+
}
31+
32+
await page.installInterceptor('36kr.com/api');
33+
await page.goto(`https://www.36kr.com/p/${articleId}`);
34+
await page.wait(5);
35+
36+
const data: any = await page.evaluate(`
37+
(() => {
38+
// Title: 36kr uses class "article-title" on h1
39+
const title = document.querySelector('.article-title, h1')?.textContent?.trim() || '';
40+
// Author: second .author-name (first is empty nav link, second has real name)
41+
const authorEls = document.querySelectorAll('.author-name');
42+
const author = Array.from(authorEls).map(el => el.textContent?.trim()).filter(Boolean)[0] || '';
43+
// Date: 36kr uses class "title-icon-item item-time" for the publish date
44+
const dateRaw = document.querySelector('.item-time')?.textContent?.trim() || '';
45+
const date = dateRaw.replace(/^[·\s]+/, '').trim();
46+
// Article body paragraphs
47+
const bodyEls = document.querySelectorAll('[class*="article-content"] p, [class*="rich-text"] p, .article p');
48+
const body = Array.from(bodyEls)
49+
.map(el => el.textContent?.trim())
50+
.filter(t => t && t.length > 10)
51+
.join(' ')
52+
.slice(0, 800);
53+
return { title, author, date, body };
54+
})()
55+
`);
56+
57+
if (!data?.title) {
58+
throw new CliError('NOT_FOUND', 'Article not found or failed to load', 'Check the article ID');
59+
}
60+
61+
return [
62+
{ field: 'title', value: data.title },
63+
{ field: 'author', value: data.author || '-' },
64+
{ field: 'date', value: data.date || '-' },
65+
{ field: 'url', value: `https://36kr.com/p/${articleId}` },
66+
{ field: 'body', value: data.body || '-' },
67+
];
68+
},
69+
});

src/clis/36kr/hot.test.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { describe, expect, it } from 'vitest';
2+
3+
import { buildHotListUrl, getShanghaiDate } from './hot.js';
4+
5+
describe('36kr/hot date routing', () => {
6+
it('formats dates in Asia/Shanghai instead of UTC', () => {
7+
const date = new Date('2026-03-25T18:30:00.000Z');
8+
expect(getShanghaiDate(date)).toBe('2026-03-26');
9+
});
10+
11+
it('builds dated hot-list routes with Shanghai-local date', () => {
12+
const date = new Date('2026-03-25T18:30:00.000Z');
13+
expect(buildHotListUrl('renqi', date)).toBe('https://www.36kr.com/hot-list/renqi/2026-03-26/1');
14+
});
15+
16+
it('keeps catalog on the static route', () => {
17+
expect(buildHotListUrl('catalog')).toBe('https://www.36kr.com/hot-list/catalog');
18+
});
19+
});

src/clis/36kr/hot.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/**
2+
* 36kr hot-list — INTERCEPT strategy.
3+
*
4+
* Navigates to the 36kr hot-list page and scrapes rendered article links.
5+
* Supports category types: renqi (人气), zonghe (综合), shoucang (收藏), catalog (综合热门).
6+
*/
7+
import { cli, Strategy } from '../../registry.js';
8+
import { CliError } from '../../errors.js';
9+
import type { IPage } from '../../types.js';
10+
11+
const TYPE_MAP: Record<string, string> = {
12+
renqi: '人气榜',
13+
zonghe: '综合榜',
14+
shoucang: '收藏榜',
15+
catalog: '热门资讯',
16+
};
17+
18+
function getShanghaiDate(date = new Date()): string {
19+
return new Intl.DateTimeFormat('en-CA', {
20+
timeZone: 'Asia/Shanghai',
21+
year: 'numeric',
22+
month: '2-digit',
23+
day: '2-digit',
24+
}).format(date);
25+
}
26+
27+
function buildHotListUrl(listType: string, date = new Date()): string {
28+
if (listType === 'catalog') {
29+
return 'https://www.36kr.com/hot-list/catalog';
30+
}
31+
32+
return `https://www.36kr.com/hot-list/${listType}/${getShanghaiDate(date)}/1`;
33+
}
34+
35+
cli({
36+
site: '36kr',
37+
name: 'hot',
38+
description: '36氪热榜 — trending articles (renqi/zonghe/shoucang/catalog)',
39+
domain: 'www.36kr.com',
40+
strategy: Strategy.INTERCEPT,
41+
args: [
42+
{ name: 'limit', type: 'int', default: 20, help: 'Number of items (max 50)' },
43+
{
44+
name: 'type',
45+
type: 'string',
46+
default: 'catalog',
47+
help: 'List type: renqi (人气), zonghe (综合), shoucang (收藏), catalog (热门资讯)',
48+
},
49+
],
50+
columns: ['rank', 'title', 'url'],
51+
func: async (page: IPage, args) => {
52+
const count = Math.min(Number(args.limit) || 20, 50);
53+
const listType = String(args.type ?? 'catalog');
54+
55+
if (!TYPE_MAP[listType]) {
56+
throw new CliError(
57+
'INVALID_ARGUMENT',
58+
`Unknown type "${listType}". Valid types: ${Object.keys(TYPE_MAP).join(', ')}`,
59+
);
60+
}
61+
62+
const url = buildHotListUrl(listType);
63+
64+
await page.installInterceptor('36kr.com/api');
65+
await page.goto(url);
66+
await page.wait(6);
67+
68+
// Scrape rendered article links from DOM (deduplicated)
69+
const domItems: any = await page.evaluate(`
70+
(() => {
71+
const seen = new Set();
72+
const results = [];
73+
const links = document.querySelectorAll('a[href*="/p/"]');
74+
for (const el of links) {
75+
const href = el.getAttribute('href') || '';
76+
const title = el.textContent?.trim() || '';
77+
if (!title || title.length < 5 || seen.has(href) || seen.has(title)) continue;
78+
seen.add(href);
79+
seen.add(title);
80+
results.push({ title, url: href.startsWith('http') ? href : 'https://36kr.com' + href });
81+
}
82+
return results;
83+
})()
84+
`);
85+
86+
const items = Array.isArray(domItems) ? (domItems as any[]) : [];
87+
if (items.length === 0) {
88+
throw new CliError(
89+
'NO_DATA',
90+
'Could not retrieve 36kr hot list',
91+
'36kr may have changed its DOM structure',
92+
);
93+
}
94+
95+
return items.slice(0, count).map((item: any, i: number) => ({
96+
rank: i + 1,
97+
title: item.title,
98+
url: item.url,
99+
}));
100+
},
101+
});
102+
103+
export { buildHotListUrl, getShanghaiDate };

src/clis/36kr/news.test.ts

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import { describe, it, expect, vi, afterEach } from 'vitest';
2+
3+
const SAMPLE_RSS = `<?xml version="1.0" encoding="UTF-8"?>
4+
<rss version="2.0"><channel><title>36氪</title>
5+
<item>
6+
<title>红杉中国领投AI公司「示例」,金额近2亿元</title>
7+
<link><![CDATA[https://36kr.com/p/1111111111111111?f=rss]]></link>
8+
<pubDate>2026-03-26 10:00:00 +0800</pubDate>
9+
</item>
10+
<item>
11+
<title>马斯克旗下xAI估值突破1000亿美元</title>
12+
<link><![CDATA[https://36kr.com/p/2222222222222222?f=rss]]></link>
13+
<pubDate>2026-03-26 09:00:00 +0800</pubDate>
14+
</item>
15+
<item>
16+
<title>OpenAI发布GPT-5,多模态能力大幅提升</title>
17+
<link><![CDATA[https://36kr.com/p/3333333333333333?f=rss]]></link>
18+
<pubDate>2026-03-25 20:00:00 +0800</pubDate>
19+
</item>
20+
</channel></rss>`;
21+
22+
afterEach(() => {
23+
vi.restoreAllMocks();
24+
});
25+
26+
describe('36kr/news RSS parsing', () => {
27+
it('parses RSS feed into ranked news items', async () => {
28+
vi.spyOn(globalThis, 'fetch').mockResolvedValue({
29+
ok: true,
30+
text: async () => SAMPLE_RSS,
31+
} as Response);
32+
33+
// Direct RSS parse test using the same regex logic as news.ts
34+
const xml = SAMPLE_RSS;
35+
const items: { rank: number; title: string; date: string; url: string }[] = [];
36+
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
37+
let match;
38+
while ((match = itemRegex.exec(xml)) && items.length < 10) {
39+
const block = match[1];
40+
const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
41+
const url =
42+
block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ??
43+
block.match(/<link>(.*?)<\/link>/)?.[1] ??
44+
'';
45+
const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
46+
const date = pubDate.slice(0, 10);
47+
if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() });
48+
}
49+
50+
expect(items).toHaveLength(3);
51+
expect(items[0].rank).toBe(1);
52+
expect(items[0].title).toBe('红杉中国领投AI公司「示例」,金额近2亿元');
53+
expect(items[0].date).toBe('2026-03-26');
54+
expect(items[0].url).toBe('https://36kr.com/p/1111111111111111?f=rss');
55+
});
56+
57+
it('respects limit — returns at most N items', async () => {
58+
const xml = SAMPLE_RSS;
59+
const limit = 2;
60+
const items: { rank: number; title: string; date: string; url: string }[] = [];
61+
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
62+
let match;
63+
while ((match = itemRegex.exec(xml)) && items.length < limit) {
64+
const block = match[1];
65+
const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
66+
const url = block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ?? '';
67+
const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
68+
const date = pubDate.slice(0, 10);
69+
if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() });
70+
}
71+
expect(items).toHaveLength(2);
72+
});
73+
74+
it('skips items with empty title', async () => {
75+
const xml = `<rss><channel>
76+
<item><title></title><link>https://36kr.com/p/0</link><pubDate>2026-01-01</pubDate></item>
77+
<item><title>有标题的文章</title><link>https://36kr.com/p/1</link><pubDate>2026-01-01</pubDate></item>
78+
</channel></rss>`;
79+
const items: any[] = [];
80+
const itemRegex = /<item>([\s\S]*?)<\/item>/g;
81+
let match;
82+
while ((match = itemRegex.exec(xml))) {
83+
const block = match[1];
84+
const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
85+
if (title) items.push({ title });
86+
}
87+
expect(items).toHaveLength(1);
88+
expect(items[0].title).toBe('有标题的文章');
89+
});
90+
});

0 commit comments

Comments
 (0)