Skip to content

Commit 594bc96

Browse files
waleedlatif1claude
andcommitted
fix(brightdata): handle async Discover API with polling
The Bright Data Discover API is asynchronous — POST /discover returns a task_id, and results must be polled via GET /discover?task_id=... The previous implementation incorrectly treated it as synchronous, always returning empty results. Uses postProcess (matching Firecrawl crawl pattern) to poll every 3s with a 120s timeout until status is "done". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 22df446 commit 594bc96

1 file changed

Lines changed: 73 additions & 23 deletions

File tree

apps/sim/tools/brightdata/discover.ts

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1+
import { createLogger } from '@sim/logger'
12
import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types'
23
import type { ToolConfig } from '@/tools/types'
34

5+
const logger = createLogger('tools:brightdata:discover')
6+
7+
const POLL_INTERVAL_MS = 3000
8+
const MAX_POLL_TIME_MS = 120000
9+
410
export const brightDataDiscoverTool: ToolConfig<
511
BrightDataDiscoverParams,
612
BrightDataDiscoverResponse
@@ -92,35 +98,79 @@ export const brightDataDiscoverTool: ToolConfig<
9298

9399
const data = await response.json()
94100

95-
let results: Array<{
96-
url: string | null
97-
title: string | null
98-
description: string | null
99-
relevanceScore: number | null
100-
content: string | null
101-
}> = []
102-
103-
const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? [])
104-
105-
if (Array.isArray(items)) {
106-
results = items.map((item: Record<string, unknown>) => ({
107-
url: (item.link as string) ?? (item.url as string) ?? null,
108-
title: (item.title as string) ?? null,
109-
description: (item.description as string) ?? (item.snippet as string) ?? null,
110-
relevanceScore: (item.relevance_score as number) ?? null,
111-
content:
112-
(item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null,
113-
}))
114-
}
115-
116101
return {
117102
success: true,
118103
output: {
119-
results,
104+
results: [],
120105
query: params?.query ?? null,
121-
totalResults: results.length,
106+
totalResults: 0,
107+
taskId: data.task_id ?? null,
122108
},
109+
} as BrightDataDiscoverResponse
110+
},
111+
112+
postProcess: async (result, params) => {
113+
if (!result.success) return result
114+
115+
const taskId = (result.output as Record<string, unknown>).taskId as string | null
116+
if (!taskId) {
117+
throw new Error('Discover API did not return a task_id. Cannot poll for results.')
118+
}
119+
120+
logger.info(`Bright Data Discover task ${taskId} created, polling for results...`)
121+
122+
let elapsedTime = 0
123+
124+
while (elapsedTime < MAX_POLL_TIME_MS) {
125+
const pollResponse = await fetch(
126+
`https://api.brightdata.com/discover?task_id=${encodeURIComponent(taskId)}`,
127+
{
128+
method: 'GET',
129+
headers: {
130+
Authorization: `Bearer ${params.apiKey}`,
131+
},
132+
}
133+
)
134+
135+
if (!pollResponse.ok) {
136+
throw new Error(`Failed to poll discover results: ${pollResponse.statusText}`)
137+
}
138+
139+
const data = await pollResponse.json()
140+
logger.info(`Bright Data Discover task ${taskId} status: ${data.status}`)
141+
142+
if (data.status === 'done') {
143+
const items = Array.isArray(data.results) ? data.results : []
144+
145+
const results = items.map((item: Record<string, unknown>) => ({
146+
url: (item.link as string) ?? (item.url as string) ?? null,
147+
title: (item.title as string) ?? null,
148+
description: (item.description as string) ?? (item.snippet as string) ?? null,
149+
relevanceScore: (item.relevance_score as number) ?? null,
150+
content: (item.content as string) ?? null,
151+
}))
152+
153+
return {
154+
success: true,
155+
output: {
156+
results,
157+
query: params.query ?? null,
158+
totalResults: results.length,
159+
},
160+
} as BrightDataDiscoverResponse
161+
}
162+
163+
if (data.status === 'failed' || data.status === 'error') {
164+
throw new Error(`Discover task failed: ${data.error ?? 'Unknown error'}`)
165+
}
166+
167+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
168+
elapsedTime += POLL_INTERVAL_MS
123169
}
170+
171+
throw new Error(
172+
`Discover task ${taskId} timed out after ${MAX_POLL_TIME_MS / 1000}s. Check status manually.`
173+
)
124174
},
125175

126176
outputs: {

0 commit comments

Comments
 (0)