-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsmart-scraper.ts
More file actions
57 lines (52 loc) · 2.04 KB
/
smart-scraper.ts
File metadata and controls
57 lines (52 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import { defineCommand } from "citty";
import * as scrapegraphai from "scrapegraph-js";
import { resolveApiKey } from "../lib/folders.js";
import * as log from "../lib/log.js";
export default defineCommand({
meta: {
name: "smart-scraper",
description: "Extract structured data from a URL using AI",
},
args: {
url: {
type: "positional",
description: "Website URL to scrape",
required: true,
},
prompt: {
type: "string",
alias: "p",
description: "Extraction prompt",
required: true,
},
schema: { type: "string", description: "Output JSON schema (as JSON string)" },
scrolls: { type: "string", description: "Number of infinite scrolls (0-100)" },
pages: { type: "string", description: "Total pages to scrape (1-100)" },
stealth: { type: "boolean", description: "Bypass bot detection (+4 credits)" },
cookies: { type: "string", description: "Cookies as JSON object string" },
headers: { type: "string", description: "Custom headers as JSON object string" },
"plain-text": { type: "boolean", description: "Return plain text instead of JSON" },
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
},
run: async ({ args }) => {
const out = log.create(!!args.json);
out.docs("https://docs.scrapegraphai.com/services/smartscraper");
const key = await resolveApiKey(!!args.json);
const params: scrapegraphai.SmartScraperParams = {
website_url: args.url,
user_prompt: args.prompt,
};
if (args.schema) params.output_schema = JSON.parse(args.schema);
if (args.scrolls) params.number_of_scrolls = Number(args.scrolls);
if (args.pages) params.total_pages = Number(args.pages);
if (args.stealth) params.stealth = true;
if (args.cookies) params.cookies = JSON.parse(args.cookies);
if (args.headers) params.headers = JSON.parse(args.headers);
if (args["plain-text"]) params.plain_text = true;
out.start("Scraping");
const result = await scrapegraphai.smartScraper(key, params);
out.stop(result.elapsedMs);
if (result.data) out.result(result.data);
else out.error(result.error);
},
});