|
| 1 | +import { createLogger } from '@sim/logger' |
1 | 2 | import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types' |
2 | 3 | import type { ToolConfig } from '@/tools/types' |
3 | 4 |
|
| 5 | +const logger = createLogger('tools:brightdata:discover') |
| 6 | + |
| 7 | +const POLL_INTERVAL_MS = 3000 |
| 8 | +const MAX_POLL_TIME_MS = 120000 |
| 9 | + |
4 | 10 | export const brightDataDiscoverTool: ToolConfig< |
5 | 11 | BrightDataDiscoverParams, |
6 | 12 | BrightDataDiscoverResponse |
@@ -92,35 +98,79 @@ export const brightDataDiscoverTool: ToolConfig< |
92 | 98 |
|
93 | 99 | const data = await response.json() |
94 | 100 |
|
95 | | - let results: Array<{ |
96 | | - url: string | null |
97 | | - title: string | null |
98 | | - description: string | null |
99 | | - relevanceScore: number | null |
100 | | - content: string | null |
101 | | - }> = [] |
102 | | - |
103 | | - const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? []) |
104 | | - |
105 | | - if (Array.isArray(items)) { |
106 | | - results = items.map((item: Record<string, unknown>) => ({ |
107 | | - url: (item.link as string) ?? (item.url as string) ?? null, |
108 | | - title: (item.title as string) ?? null, |
109 | | - description: (item.description as string) ?? (item.snippet as string) ?? null, |
110 | | - relevanceScore: (item.relevance_score as number) ?? null, |
111 | | - content: |
112 | | - (item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null, |
113 | | - })) |
114 | | - } |
115 | | - |
116 | 101 | return { |
117 | 102 | success: true, |
118 | 103 | output: { |
119 | | - results, |
| 104 | + results: [], |
120 | 105 | query: params?.query ?? null, |
121 | | - totalResults: results.length, |
| 106 | + totalResults: 0, |
| 107 | + taskId: data.task_id ?? null, |
122 | 108 | }, |
| 109 | + } as BrightDataDiscoverResponse |
| 110 | + }, |
| 111 | + |
| 112 | + postProcess: async (result, params) => { |
| 113 | + if (!result.success) return result |
| 114 | + |
| 115 | + const taskId = (result.output as Record<string, unknown>).taskId as string | null |
| 116 | + if (!taskId) { |
| 117 | + throw new Error('Discover API did not return a task_id. Cannot poll for results.') |
| 118 | + } |
| 119 | + |
| 120 | + logger.info(`Bright Data Discover task ${taskId} created, polling for results...`) |
| 121 | + |
| 122 | + let elapsedTime = 0 |
| 123 | + |
| 124 | + while (elapsedTime < MAX_POLL_TIME_MS) { |
| 125 | + const pollResponse = await fetch( |
| 126 | + `https://api.brightdata.com/discover?task_id=${encodeURIComponent(taskId)}`, |
| 127 | + { |
| 128 | + method: 'GET', |
| 129 | + headers: { |
| 130 | + Authorization: `Bearer ${params.apiKey}`, |
| 131 | + }, |
| 132 | + } |
| 133 | + ) |
| 134 | + |
| 135 | + if (!pollResponse.ok) { |
| 136 | + throw new Error(`Failed to poll discover results: ${pollResponse.statusText}`) |
| 137 | + } |
| 138 | + |
| 139 | + const data = await pollResponse.json() |
| 140 | + logger.info(`Bright Data Discover task ${taskId} status: ${data.status}`) |
| 141 | + |
| 142 | + if (data.status === 'done') { |
| 143 | + const items = Array.isArray(data.results) ? data.results : [] |
| 144 | + |
| 145 | + const results = items.map((item: Record<string, unknown>) => ({ |
| 146 | + url: (item.link as string) ?? (item.url as string) ?? null, |
| 147 | + title: (item.title as string) ?? null, |
| 148 | + description: (item.description as string) ?? (item.snippet as string) ?? null, |
| 149 | + relevanceScore: (item.relevance_score as number) ?? null, |
| 150 | + content: (item.content as string) ?? null, |
| 151 | + })) |
| 152 | + |
| 153 | + return { |
| 154 | + success: true, |
| 155 | + output: { |
| 156 | + results, |
| 157 | + query: params.query ?? null, |
| 158 | + totalResults: results.length, |
| 159 | + }, |
| 160 | + } as BrightDataDiscoverResponse |
| 161 | + } |
| 162 | + |
| 163 | + if (data.status === 'failed' || data.status === 'error') { |
| 164 | + throw new Error(`Discover task failed: ${data.error ?? 'Unknown error'}`) |
| 165 | + } |
| 166 | + |
| 167 | + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) |
| 168 | + elapsedTime += POLL_INTERVAL_MS |
123 | 169 | } |
| 170 | + |
| 171 | + throw new Error( |
| 172 | + `Discover task ${taskId} timed out after ${MAX_POLL_TIME_MS / 1000}s. Check status manually.` |
| 173 | + ) |
124 | 174 | }, |
125 | 175 |
|
126 | 176 | outputs: { |
|
0 commit comments