feat(36kr): add 36氪 CLI adapter (#461)

Conn-Ho · claude · jackwener · web-flow · commit 22399cee1a96 · 2026-03-26T15:34:46.000+08:00
* feat(36kr): add 36氪 CLI adapter with 4 commands

- news: latest articles via public RSS feed (no browser needed), includes title/summary/date/url
- hot: trending articles via INTERCEPT strategy, supports --type renqi/zonghe/shoucang/catalog
- search: keyword search via INTERCEPT + DOM scraping
- article: fetch article detail (title/author/date/body) by ID or URL

Also adds vitest adapter project entry for 36kr tests.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* docs(36kr): add adapter documentation

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;

* fix(36kr): use Shanghai hot-list dates and complete docs

---------

Co-authored-by: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
Co-authored-by: jackwener &lt;jakevingoo@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -170,6 +170,7 @@ Run `opencli list` for the live registry.
 | **douban** | `search` `top250` `subject` `marks` `reviews` `movie-hot` `book-hot` | Browser |
 | **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | Browser |
 | **google** | `news` `search` `suggest` `trends` | Public |
+| **36kr** | `news` `hot` `search` `article` | Public / Browser |
 | **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | Browser |
 | **lobsters** | `hot` `newest` `active` `tag` | Public |
 | **medium** | `feed` `search` `user` | Browser |
diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -172,6 +172,7 @@ npm install -g @jackwener/opencli@latest
 | **douban** | `search` `top250` `subject` `marks` `reviews` `movie-hot` `book-hot` | 浏览器 |
 | **facebook** | `feed` `profile` `search` `friends` `groups` `events` `notifications` `memories` `add-friend` `join-group` | 浏览器 |
 | **google** | `news` `search` `suggest` `trends` | 公开 |
+| **36kr** | `news` `hot` `search` `article` | 公开 / 浏览器 |
 | **instagram** | `explore` `profile` `search` `user` `followers` `following` `follow` `unfollow` `like` `unlike` `comment` `save` `unsave` `saved` | 浏览器 |
 | **lobsters** | `hot` `newest` `active` `tag` | 公开 |
 | **medium** | `feed` `search` `user` | 浏览器 |
diff --git a/docs/adapters/browser/36kr.md b/docs/adapters/browser/36kr.md
@@ -0,0 +1,47 @@
+# 36kr (36氪)
+
+**Mode**: 🌐 Public / 🔐 Browser · **Domain**: `36kr.com`
+
+## Commands
+
+| Command | Description |
+|---------|-------------|
+| `opencli 36kr hot` | 36氪热榜 — trending articles |
+| `opencli 36kr news` | Latest tech/startup news from 36kr |
+| `opencli 36kr search <query>` | Search 36kr articles |
+| `opencli 36kr article <id-or-url>` | Read full article content |
+
+## Usage Examples
+
+```bash
+# Trending articles
+opencli 36kr hot --limit 10
+
+# Hot by type
+opencli 36kr hot --type renqi --limit 10
+opencli 36kr hot --type zonghe --limit 10
+
+# Latest news
+opencli 36kr news --limit 20
+
+# Search articles
+opencli 36kr search "AI" --limit 10
+opencli 36kr search "OpenAI" --limit 5
+
+# Read full article (by ID or URL)
+opencli 36kr article 3000000123456
+opencli 36kr article https://36kr.com/p/3000000123456
+
+# JSON output
+opencli 36kr hot -f json
+```
+
+## Notes
+
+- `news` uses the public RSS feed and works without Browser Bridge.
+- `hot`, `search`, and `article` use Browser Bridge and are best run with Chrome open.
+- `hot --type` accepts `catalog`, `renqi`, `zonghe`, and `shoucang`.
+
+## Prerequisites
+
+- No browser required — uses public API
diff --git a/docs/adapters/index.md b/docs/adapters/index.md
@@ -42,6 +42,7 @@ Run `opencli list` for the live registry.
 | **[jd](/adapters/browser/jd)** | `item` | 🔐 Browser |
 | **[web](/adapters/browser/web)** | `read` | 🔐 Browser |
 | **[weixin](/adapters/browser/weixin)** | `download` | 🔐 Browser |
+| **[36kr](/adapters/browser/36kr)** | `news` `hot` `search` `article` | 🌐 / 🔐 |
 
 ## Public API Adapters
 
diff --git a/src/clis/36kr/article.ts b/src/clis/36kr/article.ts
@@ -0,0 +1,69 @@
+/**
+ * 36kr article detail — INTERCEPT strategy.
+ *
+ * Fetches the full content of a 36kr article given its ID or URL.
+ */
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+import type { IPage } from '../../types.js';
+
+/** Extract article ID from a full URL or a bare numeric ID string */
+function parseArticleId(input: string): string {
+  const m = input.match(/\/p\/(\d+)/);
+  return m ? m[1] : input.replace(/\D/g, '');
+}
+
+cli({
+  site: '36kr',
+  name: 'article',
+  description: '获取36氪文章正文内容',
+  domain: 'www.36kr.com',
+  strategy: Strategy.INTERCEPT,
+  args: [
+    { name: 'id', positional: true, required: true, help: 'Article ID or full 36kr article URL' },
+  ],
+  columns: ['field', 'value'],
+  func: async (page: IPage, args) => {
+    const articleId = parseArticleId(String(args.id ?? ''));
+    if (!articleId) {
+      throw new CliError('INVALID_ARGUMENT', 'Invalid article ID or URL');
+    }
+
+    await page.installInterceptor('36kr.com/api');
+    await page.goto(`https://www.36kr.com/p/${articleId}`);
+    await page.wait(5);
+
+    const data: any = await page.evaluate(`
+      (() => {
+        // Title: 36kr uses class "article-title" on h1
+        const title = document.querySelector('.article-title, h1')?.textContent?.trim() || '';
+        // Author: second .author-name (first is empty nav link, second has real name)
+        const authorEls = document.querySelectorAll('.author-name');
+        const author = Array.from(authorEls).map(el => el.textContent?.trim()).filter(Boolean)[0] || '';
+        // Date: 36kr uses class "title-icon-item item-time" for the publish date
+        const dateRaw = document.querySelector('.item-time')?.textContent?.trim() || '';
+        const date = dateRaw.replace(/^[·\s]+/, '').trim();
+        // Article body paragraphs
+        const bodyEls = document.querySelectorAll('[class*="article-content"] p, [class*="rich-text"] p, .article p');
+        const body = Array.from(bodyEls)
+          .map(el => el.textContent?.trim())
+          .filter(t => t && t.length > 10)
+          .join(' ')
+          .slice(0, 800);
+        return { title, author, date, body };
+      })()
+    `);
+
+    if (!data?.title) {
+      throw new CliError('NOT_FOUND', 'Article not found or failed to load', 'Check the article ID');
+    }
+
+    return [
+      { field: 'title',  value: data.title },
+      { field: 'author', value: data.author || '-' },
+      { field: 'date',   value: data.date || '-' },
+      { field: 'url',    value: `https://36kr.com/p/${articleId}` },
+      { field: 'body',   value: data.body || '-' },
+    ];
+  },
+});
diff --git a/src/clis/36kr/hot.test.ts b/src/clis/36kr/hot.test.ts
@@ -0,0 +1,19 @@
+import { describe, expect, it } from 'vitest';
+
+import { buildHotListUrl, getShanghaiDate } from './hot.js';
+
+describe('36kr/hot date routing', () => {
+  it('formats dates in Asia/Shanghai instead of UTC', () => {
+    const date = new Date('2026-03-25T18:30:00.000Z');
+    expect(getShanghaiDate(date)).toBe('2026-03-26');
+  });
+
+  it('builds dated hot-list routes with Shanghai-local date', () => {
+    const date = new Date('2026-03-25T18:30:00.000Z');
+    expect(buildHotListUrl('renqi', date)).toBe('https://www.36kr.com/hot-list/renqi/2026-03-26/1');
+  });
+
+  it('keeps catalog on the static route', () => {
+    expect(buildHotListUrl('catalog')).toBe('https://www.36kr.com/hot-list/catalog');
+  });
+});
diff --git a/src/clis/36kr/hot.ts b/src/clis/36kr/hot.ts
@@ -0,0 +1,103 @@
+/**
+ * 36kr hot-list — INTERCEPT strategy.
+ *
+ * Navigates to the 36kr hot-list page and scrapes rendered article links.
+ * Supports category types: renqi (人气), zonghe (综合), shoucang (收藏), catalog (综合热门).
+ */
+import { cli, Strategy } from '../../registry.js';
+import { CliError } from '../../errors.js';
+import type { IPage } from '../../types.js';
+
+const TYPE_MAP: Record<string, string> = {
+  renqi:   '人气榜',
+  zonghe:  '综合榜',
+  shoucang: '收藏榜',
+  catalog: '热门资讯',
+};
+
+function getShanghaiDate(date = new Date()): string {
+  return new Intl.DateTimeFormat('en-CA', {
+    timeZone: 'Asia/Shanghai',
+    year: 'numeric',
+    month: '2-digit',
+    day: '2-digit',
+  }).format(date);
+}
+
+function buildHotListUrl(listType: string, date = new Date()): string {
+  if (listType === 'catalog') {
+    return 'https://www.36kr.com/hot-list/catalog';
+  }
+
+  return `https://www.36kr.com/hot-list/${listType}/${getShanghaiDate(date)}/1`;
+}
+
+cli({
+  site: '36kr',
+  name: 'hot',
+  description: '36氪热榜 — trending articles (renqi/zonghe/shoucang/catalog)',
+  domain: 'www.36kr.com',
+  strategy: Strategy.INTERCEPT,
+  args: [
+    { name: 'limit', type: 'int', default: 20, help: 'Number of items (max 50)' },
+    {
+      name: 'type',
+      type: 'string',
+      default: 'catalog',
+      help: 'List type: renqi (人气), zonghe (综合), shoucang (收藏), catalog (热门资讯)',
+    },
+  ],
+  columns: ['rank', 'title', 'url'],
+  func: async (page: IPage, args) => {
+    const count = Math.min(Number(args.limit) || 20, 50);
+    const listType = String(args.type ?? 'catalog');
+
+    if (!TYPE_MAP[listType]) {
+      throw new CliError(
+        'INVALID_ARGUMENT',
+        `Unknown type "${listType}". Valid types: ${Object.keys(TYPE_MAP).join(', ')}`,
+      );
+    }
+
+    const url = buildHotListUrl(listType);
+
+    await page.installInterceptor('36kr.com/api');
+    await page.goto(url);
+    await page.wait(6);
+
+    // Scrape rendered article links from DOM (deduplicated)
+    const domItems: any = await page.evaluate(`
+      (() => {
+        const seen = new Set();
+        const results = [];
+        const links = document.querySelectorAll('a[href*="/p/"]');
+        for (const el of links) {
+          const href = el.getAttribute('href') || '';
+          const title = el.textContent?.trim() || '';
+          if (!title || title.length < 5 || seen.has(href) || seen.has(title)) continue;
+          seen.add(href);
+          seen.add(title);
+          results.push({ title, url: href.startsWith('http') ? href : 'https://36kr.com' + href });
+        }
+        return results;
+      })()
+    `);
+
+    const items = Array.isArray(domItems) ? (domItems as any[]) : [];
+    if (items.length === 0) {
+      throw new CliError(
+        'NO_DATA',
+        'Could not retrieve 36kr hot list',
+        '36kr may have changed its DOM structure',
+      );
+    }
+
+    return items.slice(0, count).map((item: any, i: number) => ({
+      rank: i + 1,
+      title: item.title,
+      url: item.url,
+    }));
+  },
+});
+
+export { buildHotListUrl, getShanghaiDate };
diff --git a/src/clis/36kr/news.test.ts b/src/clis/36kr/news.test.ts
@@ -0,0 +1,90 @@
+import { describe, it, expect, vi, afterEach } from 'vitest';
+
+const SAMPLE_RSS = `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"><channel><title>36氪</title>
+<item>
+  <title>红杉中国领投AI公司「示例」，金额近2亿元</title>
+  <link><![CDATA[https://36kr.com/p/1111111111111111?f=rss]]></link>
+  <pubDate>2026-03-26 10:00:00  +0800</pubDate>
+</item>
+<item>
+  <title>马斯克旗下xAI估值突破1000亿美元</title>
+  <link><![CDATA[https://36kr.com/p/2222222222222222?f=rss]]></link>
+  <pubDate>2026-03-26 09:00:00  +0800</pubDate>
+</item>
+<item>
+  <title>OpenAI发布GPT-5，多模态能力大幅提升</title>
+  <link><![CDATA[https://36kr.com/p/3333333333333333?f=rss]]></link>
+  <pubDate>2026-03-25 20:00:00  +0800</pubDate>
+</item>
+</channel></rss>`;
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('36kr/news RSS parsing', () => {
+  it('parses RSS feed into ranked news items', async () => {
+    vi.spyOn(globalThis, 'fetch').mockResolvedValue({
+      ok: true,
+      text: async () => SAMPLE_RSS,
+    } as Response);
+
+    // Direct RSS parse test using the same regex logic as news.ts
+    const xml = SAMPLE_RSS;
+    const items: { rank: number; title: string; date: string; url: string }[] = [];
+    const itemRegex = /<item>([\s\S]*?)<\/item>/g;
+    let match;
+    while ((match = itemRegex.exec(xml)) && items.length < 10) {
+      const block = match[1];
+      const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
+      const url =
+        block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ??
+        block.match(/<link>(.*?)<\/link>/)?.[1] ??
+        '';
+      const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
+      const date = pubDate.slice(0, 10);
+      if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() });
+    }
+
+    expect(items).toHaveLength(3);
+    expect(items[0].rank).toBe(1);
+    expect(items[0].title).toBe('红杉中国领投AI公司「示例」，金额近2亿元');
+    expect(items[0].date).toBe('2026-03-26');
+    expect(items[0].url).toBe('https://36kr.com/p/1111111111111111?f=rss');
+  });
+
+  it('respects limit — returns at most N items', async () => {
+    const xml = SAMPLE_RSS;
+    const limit = 2;
+    const items: { rank: number; title: string; date: string; url: string }[] = [];
+    const itemRegex = /<item>([\s\S]*?)<\/item>/g;
+    let match;
+    while ((match = itemRegex.exec(xml)) && items.length < limit) {
+      const block = match[1];
+      const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
+      const url = block.match(/<link><!\[CDATA\[(.*?)\]\]>/)?.[1] ?? '';
+      const pubDate = block.match(/<pubDate>(.*?)<\/pubDate>/)?.[1]?.trim() ?? '';
+      const date = pubDate.slice(0, 10);
+      if (title) items.push({ rank: items.length + 1, title, date, url: url.trim() });
+    }
+    expect(items).toHaveLength(2);
+  });
+
+  it('skips items with empty title', async () => {
+    const xml = `<rss><channel>
+      <item><title></title><link>https://36kr.com/p/0</link><pubDate>2026-01-01</pubDate></item>
+      <item><title>有标题的文章</title><link>https://36kr.com/p/1</link><pubDate>2026-01-01</pubDate></item>
+    </channel></rss>`;
+    const items: any[] = [];
+    const itemRegex = /<item>([\s\S]*?)<\/item>/g;
+    let match;
+    while ((match = itemRegex.exec(xml))) {
+      const block = match[1];
+      const title = block.match(/<title>([\s\S]*?)<\/title>/)?.[1]?.trim() ?? '';
+      if (title) items.push({ title });
+    }
+    expect(items).toHaveLength(1);
+    expect(items[0].title).toBe('有标题的文章');
+  });
+});
diff --git a/src/clis/36kr/news.ts b/src/clis/36kr/news.ts
diff --git a/src/clis/36kr/search.ts b/src/clis/36kr/search.ts
diff --git a/vitest.config.ts b/vitest.config.ts