-
Notifications
You must be signed in to change notification settings - Fork 32
Expand file tree
/
Copy pathbrowser-pool-config.ts
More file actions
74 lines (63 loc) · 2.26 KB
/
browser-pool-config.ts
File metadata and controls
74 lines (63 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env node
/**
* Browser Pool Configuration Example
*
* Demonstrates configuring the browser pool for high-throughput scraping.
* Useful when scraping many URLs to optimize performance and resource usage.
*/
import { ReaderClient } from "@vakra-dev/reader";
async function main() {
console.log("Starting browser pool configuration example\n");
// Configure browser pool for high-throughput scraping
const reader = new ReaderClient({
verbose: true,
// Browser pool configuration
browserPool: {
size: 5, // Run 5 browser instances in parallel
retireAfterPages: 50, // Recycle browser after 50 pages (prevents memory leaks)
retireAfterMinutes: 15, // Recycle browser after 15 minutes
maxQueueSize: 200, // Allow up to 200 pending requests in queue
},
});
// Sample URLs to scrape
const urls = [
"https://example.com",
"https://example.org",
"https://example.net",
];
console.log(`Scraping ${urls.length} URLs with pool size=5, concurrency=3\n`);
try {
const result = await reader.scrape({
urls,
formats: ["markdown"],
batchConcurrency: 3, // Process 3 URLs in parallel
onProgress: (progress) => {
console.log(`Progress: ${progress.completed}/${progress.total} - ${progress.currentUrl}`);
},
});
console.log("\nScrape completed!\n");
console.log("Results:");
for (const page of result.data) {
console.log(`\n ${page.metadata.baseUrl}`);
console.log(` Title: ${page.metadata.website.title}`);
console.log(` Duration: ${page.metadata.duration}ms`);
console.log(` Content: ${page.markdown?.length || 0} chars`);
}
console.log("\nBatch Metadata:");
console.log(` Total URLs: ${result.batchMetadata.totalUrls}`);
console.log(` Successful: ${result.batchMetadata.successfulUrls}`);
console.log(` Failed: ${result.batchMetadata.failedUrls}`);
console.log(` Total Duration: ${result.batchMetadata.totalDuration}ms`);
console.log(
` Avg Per URL: ${Math.round(
result.batchMetadata.totalDuration / result.batchMetadata.totalUrls
)}ms`
);
} catch (error: any) {
console.error("Error:", error.message);
process.exit(1);
} finally {
await reader.close();
}
}
main();