From c13aaf5d1d240d8d82294b93bfdf3b02723a13cb Mon Sep 17 00:00:00 2001 From: rakshith48 Date: Thu, 4 Jun 2026 13:00:13 +0530 Subject: [PATCH 1/6] feat(firecrawl): migrate FireCrawl loader to Firecrawl v2 (v4) SDK Replace the hand-rolled v1 REST client in the FireCrawl document loader with the official @mendable/firecrawl-js v2 API (Firecrawl class) and bump the dependency from ^1.18.2 to ^4.25.2. - Use `new Firecrawl({ apiKey, apiUrl })` and its `.scrape` / `.crawl` / `.search` / `.extract` methods instead of manual axios calls to /v1/*. - Adapt to v2 response shapes: scrape/crawl return Document(s) directly (no { success, data } envelope); crawl returns a CrawlJob with `.data`; search returns results grouped by source (use `.web`). - Preserve the node's inputs, modes, defaults, and Document/Text output shape. Search `country` now maps to v2's single `location` field, since v1's separate `lang`/`country` params were removed in v2. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../documentloaders/FireCrawl/FireCrawl.ts | 717 ++++-------------- packages/components/package.json | 2 +- 2 files changed, 138 insertions(+), 581 deletions(-) diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index 654fd59e8f3..6c01b618d80 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -1,542 +1,58 @@ import { TextSplitter } from '@langchain/textsplitters' import { Document, DocumentInterface } from '@langchain/core/documents' import { BaseDocumentLoader } from '@langchain/classic/document_loaders/base' +import Firecrawl, { + type Document as FirecrawlDocument, + type ScrapeOptions, + type CrawlOptions, + type SearchRequest, + type SearchResultWeb +} from '@mendable/firecrawl-js' import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface' import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils' -import { AxiosResponse, AxiosRequestHeaders } from 'axios' -import { secureAxiosRequest } from '../../../src/httpSecurity' -import { z } from 'zod/v3' -// FirecrawlApp interfaces -interface FirecrawlAppConfig { - apiKey?: string | null - apiUrl?: string | null +// Identifies Firecrawl requests originating from Flowise (carried through on every call). +const FIRECRAWL_INTEGRATION = 'flowise' + +// Loader-level parameters bundled by the node before delegating to the v2 SDK. +interface ScrapeParams { + includeTags?: string | string[] + excludeTags?: string | string[] + includePaths?: string | string[] + excludePaths?: string | string[] + onlyMainContent?: boolean + mobile?: boolean + skipTlsVerification?: boolean + timeout?: number + limit?: number } -interface FirecrawlDocumentMetadata { - title?: string - description?: string - language?: string - sourceURL?: string - statusCode?: number - error?: string +interface LoaderParams { [key: string]: any -} - -interface FirecrawlDocument { - markdown?: string - html?: string - rawHtml?: string - screenshot?: string - links?: string[] - actions?: { - screenshots?: string[] - } - metadata: FirecrawlDocumentMetadata - llm_extraction?: Record - warning?: string -} - -interface ScrapeResponse { - success: boolean - data?: FirecrawlDocument - error?: string -} - -interface CrawlResponse { - success: boolean - id: string - url: string - error?: string - data?: FirecrawlDocument -} - -interface CrawlStatusResponse { - status: string - total: number - completed: number - creditsUsed: number - expiresAt: string - next?: string - data?: FirecrawlDocument[] -} - -interface ExtractResponse { - success: boolean - id: string - url: string - data?: Record -} - -interface SearchResult { - url: string - title: string - description: string -} - -interface SearchResponse { - success: boolean - data?: SearchResult[] - warning?: string -} - -interface SearchRequest { - query: string + scrapeOptions?: ScrapeParams + // crawl limit?: number + maxDepth?: number + maxDiscoveryDepth?: number + ignoreQueryParameters?: boolean + allowExternalLinks?: boolean + delay?: number + // extract + schema?: Record + prompt?: string + // search tbs?: string - lang?: string - country?: string location?: string - timeout?: number + country?: string ignoreInvalidURLs?: boolean } -interface Params { - [key: string]: any - extractorOptions?: { - extractionSchema: z.ZodSchema | any - mode?: 'llm-extraction' - extractionPrompt?: string - } -} - -interface ExtractRequest { - urls: string[] - prompt?: string - schema?: Record - enableWebSearch?: boolean - ignoreSitemap?: boolean - includeSubdomains?: boolean - showSources?: boolean - scrapeOptions?: { - formats?: string[] - onlyMainContent?: boolean - includeTags?: string | string[] - excludeTags?: string | string[] - mobile?: boolean - skipTlsVerification?: boolean - timeout?: number - jsonOptions?: { - schema?: Record - prompt?: string - } - } -} - -interface ExtractStatusResponse { - success: boolean - data: any - status: 'completed' | 'pending' | 'processing' | 'failed' | 'cancelled' - expiresAt: string -} - -// FirecrawlApp class (not exported) -class FirecrawlApp { - private apiKey: string - private apiUrl: string - - constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) { - this.apiKey = apiKey || '' - this.apiUrl = apiUrl || 'https://api.firecrawl.dev' - if (!this.apiKey) { - throw new Error('No API key provided') - } - } - - async scrapeUrl(url: string, params: Params | null = null): Promise { - const headers = this.prepareHeaders() - - // Create a clean payload with only valid parameters - const validParams: any = { - url, - formats: ['markdown'], - onlyMainContent: true - } - - // Add optional parameters if they exist - if (params?.scrapeOptions) { - if (params.scrapeOptions.includeTags) { - validParams.includeTags = Array.isArray(params.scrapeOptions.includeTags) - ? params.scrapeOptions.includeTags - : params.scrapeOptions.includeTags.split(',') - } - if (params.scrapeOptions.excludeTags) { - validParams.excludeTags = Array.isArray(params.scrapeOptions.excludeTags) - ? params.scrapeOptions.excludeTags - : params.scrapeOptions.excludeTags.split(',') - } - if (params.scrapeOptions.mobile !== undefined) { - validParams.mobile = params.scrapeOptions.mobile - } - if (params.scrapeOptions.skipTlsVerification !== undefined) { - validParams.skipTlsVerification = params.scrapeOptions.skipTlsVerification - } - if (params.scrapeOptions.timeout) { - validParams.timeout = params.scrapeOptions.timeout - } - } - - // Add JSON options if they exist - if (params?.extractorOptions) { - validParams.jsonOptions = { - schema: params.extractorOptions.extractionSchema, - prompt: params.extractorOptions.extractionPrompt - } - } - - try { - const parameters = { - ...validParams, - integration: 'flowise' - } - const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/scrape', parameters, headers) - if (response.status === 200) { - const responseData = response.data - if (responseData.success) { - return responseData - } else { - throw new Error(`Failed to scrape URL. Error: ${responseData.error}`) - } - } else { - this.handleError(response, 'scrape URL') - } - } catch (error: any) { - throw new Error(error.message) - } - return { success: false, error: 'Internal server error.' } - } - - async crawlUrl( - url: string, - params: Params | null = null, - waitUntilDone: boolean = true, - pollInterval: number = 2, - idempotencyKey?: string - ): Promise { - const headers = this.prepareHeaders(idempotencyKey) - - // Create a clean payload with only valid parameters - const validParams: any = { - url - } - - // Add scrape options with only non-empty values - const scrapeOptions: any = { - formats: ['markdown'], - onlyMainContent: true - } - - // Add crawl-specific parameters if they exist and are not empty - if (params) { - const validCrawlParams = [ - 'excludePaths', - 'includePaths', - 'maxDepth', - 'maxDiscoveryDepth', - 'ignoreSitemap', - 'ignoreQueryParameters', - 'limit', - 'allowBackwardLinks', - 'allowExternalLinks', - 'delay' - ] - - validCrawlParams.forEach((param) => { - if (params[param] !== undefined && params[param] !== null && params[param] !== '') { - validParams[param] = params[param] - } - }) - } - - // Add scrape options if they exist and are not empty - if (params?.scrapeOptions) { - if (params.scrapeOptions.includePaths) { - const includePaths = Array.isArray(params.scrapeOptions.includePaths) - ? params.scrapeOptions.includePaths - : params.scrapeOptions.includePaths.split(',') - if (includePaths.length > 0) { - validParams.includePaths = includePaths - } - } - - if (params.scrapeOptions.excludePaths) { - const excludePaths = Array.isArray(params.scrapeOptions.excludePaths) - ? params.scrapeOptions.excludePaths - : params.scrapeOptions.excludePaths.split(',') - if (excludePaths.length > 0) { - validParams.excludePaths = excludePaths - } - } - - if (params.scrapeOptions.limit) { - validParams.limit = params.scrapeOptions.limit - } - - const validScrapeParams = ['mobile', 'skipTlsVerification', 'timeout', 'includeTags', 'excludeTags', 'onlyMainContent'] - - validScrapeParams.forEach((param) => { - if (params.scrapeOptions[param] !== undefined && params.scrapeOptions[param] !== null) { - scrapeOptions[param] = params.scrapeOptions[param] - } - }) - } - - // Only add scrapeOptions if it has more than just the default values - if (Object.keys(scrapeOptions).length > 2) { - validParams.scrapeOptions = scrapeOptions - } - - try { - const parameters = { - ...validParams, - integration: 'flowise' - } - const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/crawl', parameters, headers) - if (response.status === 200) { - const crawlResponse = response.data as CrawlResponse - if (!crawlResponse.success) { - throw new Error(`Crawl request failed: ${crawlResponse.error || 'Unknown error'}`) - } - - if (waitUntilDone) { - return this.monitorJobStatus(crawlResponse.id, headers, pollInterval) - } else { - return crawlResponse - } - } else { - this.handleError(response, 'start crawl job') - } - } catch (error: any) { - if (error.response?.data?.error) { - throw new Error(`Crawl failed: ${error.response.data.error}`) - } - throw new Error(`Crawl failed: ${error.message}`) - } - - return { success: false, id: '', url: '' } - } - - async extract( - request: ExtractRequest, - waitUntilDone: boolean = true, - pollInterval: number = 2 - ): Promise { - const headers = this.prepareHeaders() - - // Create a clean payload with only valid parameters - const validParams: any = { - urls: request.urls - } - - // Add optional parameters if they exist and are not empty - if (request.prompt) { - validParams.prompt = request.prompt - } - - if (request.schema) { - validParams.schema = request.schema - } - - const validExtractParams = ['enableWebSearch', 'ignoreSitemap', 'includeSubdomains', 'showSources'] as const - - validExtractParams.forEach((param) => { - if (request[param] !== undefined && request[param] !== null) { - validParams[param] = request[param] - } - }) - - // Add scrape options if they exist - if (request.scrapeOptions) { - const scrapeOptions: any = { - formats: ['markdown'], - onlyMainContent: true - } - - // Handle includeTags - if (request.scrapeOptions.includeTags) { - const includeTags = Array.isArray(request.scrapeOptions.includeTags) - ? request.scrapeOptions.includeTags - : request.scrapeOptions.includeTags.split(',') - if (includeTags.length > 0) { - scrapeOptions.includeTags = includeTags - } - } - - // Handle excludeTags - if (request.scrapeOptions.excludeTags) { - const excludeTags = Array.isArray(request.scrapeOptions.excludeTags) - ? request.scrapeOptions.excludeTags - : request.scrapeOptions.excludeTags.split(',') - if (excludeTags.length > 0) { - scrapeOptions.excludeTags = excludeTags - } - } - - // Add other scrape options if they exist and are not empty - const validScrapeParams = ['mobile', 'skipTlsVerification', 'timeout'] as const - - validScrapeParams.forEach((param) => { - if (request.scrapeOptions?.[param] !== undefined && request.scrapeOptions?.[param] !== null) { - scrapeOptions[param] = request.scrapeOptions[param] - } - }) - - // Add JSON options if they exist - if (request.scrapeOptions.jsonOptions) { - scrapeOptions.jsonOptions = {} - if (request.scrapeOptions.jsonOptions.schema) { - scrapeOptions.jsonOptions.schema = request.scrapeOptions.jsonOptions.schema - } - if (request.scrapeOptions.jsonOptions.prompt) { - scrapeOptions.jsonOptions.prompt = request.scrapeOptions.jsonOptions.prompt - } - } - - // Only add scrapeOptions if it has more than just the default values - if (Object.keys(scrapeOptions).length > 2) { - validParams.scrapeOptions = scrapeOptions - } - } - - try { - const parameters = { - ...validParams, - integration: 'flowise' - } - const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/extract', parameters, headers) - if (response.status === 200) { - const extractResponse = response.data as ExtractResponse - if (waitUntilDone) { - return this.monitorExtractStatus(extractResponse.id, headers, pollInterval) - } else { - return extractResponse - } - } else { - this.handleError(response, 'start extract job') - } - } catch (error: any) { - throw new Error(error.message) - } - return { success: false, id: '', url: '' } - } - - async search(request: SearchRequest): Promise { - const headers = this.prepareHeaders() - - // Create a clean payload with only valid parameters - const validParams: any = { - query: request.query - } - - // Add optional parameters if they exist and are not empty - const validSearchParams = ['limit', 'tbs', 'lang', 'country', 'location', 'timeout', 'ignoreInvalidURLs'] as const - - validSearchParams.forEach((param) => { - if (request[param] !== undefined && request[param] !== null) { - validParams[param] = request[param] - } - }) - - try { - const parameters = { - ...validParams, - integration: 'flowise' - } - const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/search', parameters, headers) - if (response.status === 200) { - const searchResponse = response.data as SearchResponse - if (!searchResponse.success) { - throw new Error(`Search request failed: ${searchResponse.warning || 'Unknown error'}`) - } - return searchResponse - } else { - this.handleError(response, 'perform search') - } - } catch (error: any) { - throw new Error(error.message) - } - return { success: false } - } - - private prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders { - return { - 'Content-Type': 'application/json', - Authorization: `Bearer ${this.apiKey}`, - ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}) - } as AxiosRequestHeaders & { 'x-idempotency-key'?: string } - } - - private async postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise { - const result = await secureAxiosRequest({ method: 'POST', url, data, headers }) - return result - } - - private getRequest(url: string, headers: AxiosRequestHeaders): Promise { - return secureAxiosRequest({ method: 'GET', url, headers }) - } - - private async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise { - let isJobCompleted = false - while (!isJobCompleted) { - const statusResponse: AxiosResponse = await this.getRequest(this.apiUrl + `/v1/crawl/${jobId}`, headers) - if (statusResponse.status === 200) { - const statusData = statusResponse.data as CrawlStatusResponse - switch (statusData.status) { - case 'completed': - isJobCompleted = true - return statusData - case 'scraping': - case 'failed': - if (statusData.status === 'failed') { - throw new Error('Crawl job failed') - } - await new Promise((resolve) => setTimeout(resolve, Math.max(checkInterval, 2) * 1000)) - break - default: - throw new Error(`Unknown crawl status: ${statusData.status}`) - } - } else { - this.handleError(statusResponse, 'check crawl status') - } - } - throw new Error('Failed to monitor job status') - } - - private async monitorExtractStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise { - let isJobCompleted = false - while (!isJobCompleted) { - const statusResponse: AxiosResponse = await this.getRequest(this.apiUrl + `/v1/extract/${jobId}`, headers) - if (statusResponse.status === 200) { - const statusData = statusResponse.data as ExtractStatusResponse - switch (statusData.status) { - case 'completed': - isJobCompleted = true - return statusData - case 'processing': - case 'failed': - if (statusData.status === 'failed') { - throw new Error('Extract job failed') - } - await new Promise((resolve) => setTimeout(resolve, Math.max(checkInterval, 2) * 1000)) - break - default: - throw new Error(`Unknown extract status: ${statusData.status}`) - } - } else { - this.handleError(statusResponse, 'check extract status') - } - } - throw new Error('Failed to monitor extract status') - } - - private handleError(response: AxiosResponse, action: string): void { - if ([402, 408, 409, 500].includes(response.status)) { - const errorMessage: string = response.data.error || 'Unknown error occurred' - throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`) - } else { - throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`) - } - } +// Normalize a value that may be a comma-separated string or an array into a string array. +function toStringArray(value?: string | string[]): string[] | undefined { + if (value === undefined || value === null) return undefined + const arr = Array.isArray(value) ? value : value.split(',') + const cleaned = arr.map((v) => v.trim()).filter((v) => v.length > 0) + return cleaned.length > 0 ? cleaned : undefined } // FireCrawl Loader @@ -546,7 +62,7 @@ interface FirecrawlLoaderParameters { apiKey?: string apiUrl?: string mode?: 'crawl' | 'scrape' | 'extract' | 'search' - params?: Record + params?: LoaderParams } export class FireCrawlLoader extends BaseDocumentLoader { @@ -555,7 +71,7 @@ export class FireCrawlLoader extends BaseDocumentLoader { private url?: string private query?: string private mode: 'crawl' | 'scrape' | 'extract' | 'search' - private params?: Record + private params?: LoaderParams constructor(loaderParams: FirecrawlLoaderParameters) { super() @@ -572,84 +88,127 @@ export class FireCrawlLoader extends BaseDocumentLoader { this.apiUrl = apiUrl || 'https://api.firecrawl.dev' } + // Build the v2 scrape options shared by scrape/crawl modes. + private buildScrapeOptions(): ScrapeOptions { + const scrapeOptions: ScrapeOptions = { + formats: ['markdown'], + onlyMainContent: true, + integration: FIRECRAWL_INTEGRATION + } + + const opts = this.params?.scrapeOptions + if (opts) { + const includeTags = toStringArray(opts.includeTags) + if (includeTags) scrapeOptions.includeTags = includeTags + + const excludeTags = toStringArray(opts.excludeTags) + if (excludeTags) scrapeOptions.excludeTags = excludeTags + + if (opts.onlyMainContent !== undefined) scrapeOptions.onlyMainContent = opts.onlyMainContent + if (opts.mobile !== undefined) scrapeOptions.mobile = opts.mobile + if (opts.skipTlsVerification !== undefined) scrapeOptions.skipTlsVerification = opts.skipTlsVerification + if (opts.timeout) scrapeOptions.timeout = opts.timeout + } + + return scrapeOptions + } + public async load(): Promise { - const app = new FirecrawlApp({ apiKey: this.apiKey, apiUrl: this.apiUrl }) + const app = new Firecrawl({ apiKey: this.apiKey, apiUrl: this.apiUrl }) let firecrawlDocs: FirecrawlDocument[] if (this.mode === 'search') { if (!this.query) { throw new Error('Firecrawl: Query is required for search mode') } - const response = await app.search({ query: this.query, ...this.params }) - if (!response.success) { - throw new Error(`Firecrawl: Failed to search. Warning: ${response.warning}`) - } - // Convert search results to FirecrawlDocument format - firecrawlDocs = (response.data || []).map((result) => ({ - markdown: result.description, - metadata: { - title: result.title, - sourceURL: result.url, - description: result.description - } - })) + const searchReq: Omit = { + integration: FIRECRAWL_INTEGRATION + } + if (this.params?.limit !== undefined) searchReq.limit = this.params.limit + if (this.params?.tbs) searchReq.tbs = this.params.tbs + // v2 search exposes a single `location` string (v1's separate `country`/`lang` were removed). + // Fall back to the country code so existing node configurations still influence results. + const location = this.params?.location || this.params?.country + if (location) searchReq.location = location + if (this.params?.timeout !== undefined) searchReq.timeout = this.params.timeout + if (this.params?.ignoreInvalidURLs !== undefined) searchReq.ignoreInvalidURLs = this.params.ignoreInvalidURLs + + const response = await app.search(this.query, searchReq) + + // v2 returns results grouped by source. Use web results and normalize each entry + // (which may be a lightweight SearchResultWeb or a full Document when scrapeOptions are set). + const webResults = response.web ?? [] + firecrawlDocs = webResults.map((result: SearchResultWeb | FirecrawlDocument) => { + if ('markdown' in result || 'html' in result || 'metadata' in result) { + return result as FirecrawlDocument + } + const web = result as SearchResultWeb + return { + markdown: web.description, + metadata: { + title: web.title, + sourceURL: web.url, + description: web.description + } + } as FirecrawlDocument + }) } else if (this.mode === 'scrape') { if (!this.url) { throw new Error('Firecrawl: URL is required for scrape mode') } - const response = await app.scrapeUrl(this.url, this.params) - if (!response.success) { - throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`) - } - firecrawlDocs = [response.data as FirecrawlDocument] + const response = await app.scrape(this.url, this.buildScrapeOptions()) + firecrawlDocs = [response] } else if (this.mode === 'crawl') { if (!this.url) { throw new Error('Firecrawl: URL is required for crawl mode') } - const response = await app.crawlUrl(this.url, this.params) - if ('status' in response) { - if (response.status === 'failed') { - throw new Error('Firecrawl: Crawl job failed') - } - firecrawlDocs = response.data || [] - } else { - if (!response.success) { - throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`) - } - firecrawlDocs = [response.data as FirecrawlDocument] + + const crawlOptions: CrawlOptions & { pollInterval?: number } = { + integration: FIRECRAWL_INTEGRATION, + pollInterval: 2, + scrapeOptions: this.buildScrapeOptions() + } + + const includePaths = toStringArray(this.params?.scrapeOptions?.includePaths) + if (includePaths) crawlOptions.includePaths = includePaths + + const excludePaths = toStringArray(this.params?.scrapeOptions?.excludePaths) + if (excludePaths) crawlOptions.excludePaths = excludePaths + + const limit = this.params?.scrapeOptions?.limit ?? this.params?.limit + if (limit !== undefined && limit !== null) crawlOptions.limit = limit + if (this.params?.maxDiscoveryDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDiscoveryDepth + if (this.params?.ignoreQueryParameters !== undefined) crawlOptions.ignoreQueryParameters = this.params.ignoreQueryParameters + if (this.params?.allowExternalLinks !== undefined) crawlOptions.allowExternalLinks = this.params.allowExternalLinks + if (this.params?.delay !== undefined) crawlOptions.delay = this.params.delay + + const response = await app.crawl(this.url, crawlOptions) + if (response.status === 'failed') { + throw new Error('Firecrawl: Crawl job failed') } + firecrawlDocs = response.data || [] } else if (this.mode === 'extract') { if (!this.url) { throw new Error('Firecrawl: URL is required for extract mode') } - this.params!.urls = [this.url] - const response = await app.extract(this.params as any as ExtractRequest) - if (!response.success) { - throw new Error(`Firecrawl: Failed to extract URL.`) - } - // Convert extract response to document format - if ('data' in response && response.data) { - // Create a document from the extracted data - const extractedData = response.data - const content = JSON.stringify(extractedData, null, 2) + const response = await app.extract({ + urls: [this.url], + prompt: this.params?.prompt, + schema: this.params?.schema, + integration: FIRECRAWL_INTEGRATION + }) + if (response.data) { + const content = JSON.stringify(response.data, null, 2) const metadata: Record = { source: this.url, - type: 'extracted_data' - } - - // Add status and expiresAt if they exist in the response - if ('status' in response) { - metadata.status = response.status - } - if ('data' in response) { - metadata.data = response.data - } - if ('expiresAt' in response) { - metadata.expiresAt = response.expiresAt + type: 'extracted_data', + data: response.data } + if (response.status) metadata.status = response.status + if (response.expiresAt) metadata.expiresAt = response.expiresAt return [ new Document({ @@ -960,7 +519,6 @@ class FireCrawl_DocumentLoaders implements INode { const searchQuery = nodeData.inputs?.searchQuery as string const searchLimit = nodeData.inputs?.searchLimit as string - const searchLang = nodeData.inputs?.searchLang as string const searchCountry = nodeData.inputs?.searchCountry as string const searchTimeout = nodeData.inputs?.searchTimeout as number @@ -990,7 +548,6 @@ class FireCrawl_DocumentLoaders implements INode { } input.params = { limit: searchLimit ? parseInt(searchLimit, 10) : 5, - lang: searchLang, country: searchCountry, timeout: searchTimeout } diff --git a/packages/components/package.json b/packages/components/package.json index bebf3c06f3e..68306e22d68 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -85,7 +85,7 @@ "@langchain/weaviate": "1.0.1", "@langchain/xai": "1.3.1", "@mem0/community": "^0.0.1", - "@mendable/firecrawl-js": "^1.18.2", + "@mendable/firecrawl-js": "^4.25.2", "@mistralai/mistralai": "1.14.0", "@modelcontextprotocol/sdk": "1.29.0", "@modelcontextprotocol/server-postgres": "^0.6.2", From 3b46907ca9695f7756f52715259589b8b6078204 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:08:26 +0530 Subject: [PATCH 2/6] fix: use canonical `firecrawl` npm package (not legacy @mendable/firecrawl-js) Both names dual-publish the identical v4 SDK; `firecrawl` is the current canonical package. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../components/nodes/documentloaders/FireCrawl/FireCrawl.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index 6c01b618d80..6d9f3e2d791 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -7,7 +7,7 @@ import Firecrawl, { type CrawlOptions, type SearchRequest, type SearchResultWeb -} from '@mendable/firecrawl-js' +} from 'firecrawl' import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface' import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils' From 64413d9322edebfbb1d3fc9a23d870514e10cac6 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:08:28 +0530 Subject: [PATCH 3/6] fix: use canonical `firecrawl` npm package (not legacy @mendable/firecrawl-js) Both names dual-publish the identical v4 SDK; `firecrawl` is the current canonical package. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/components/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/components/package.json b/packages/components/package.json index 68306e22d68..3ce348bde57 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -85,7 +85,7 @@ "@langchain/weaviate": "1.0.1", "@langchain/xai": "1.3.1", "@mem0/community": "^0.0.1", - "@mendable/firecrawl-js": "^4.25.2", + "firecrawl": "^4.25.2", "@mistralai/mistralai": "1.14.0", "@modelcontextprotocol/sdk": "1.29.0", "@modelcontextprotocol/server-postgres": "^0.6.2", From 88635fc8e9f67f5936591a2f56d0b1e34e49d68c Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 13:35:05 +0530 Subject: [PATCH 4/6] =?UTF-8?q?fix(firecrawl):=20back-compat=20=E2=80=94?= =?UTF-8?q?=20map=20legacy=20crawlerOptions.maxDepth=20to=20v2=20maxDiscov?= =?UTF-8?q?eryDepth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index 6d9f3e2d791..b091461219e 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -179,6 +179,7 @@ export class FireCrawlLoader extends BaseDocumentLoader { const limit = this.params?.scrapeOptions?.limit ?? this.params?.limit if (limit !== undefined && limit !== null) crawlOptions.limit = limit if (this.params?.maxDiscoveryDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDiscoveryDepth + else if (this.params?.maxDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDepth // back-compat: v1 crawlerOptions used maxDepth if (this.params?.ignoreQueryParameters !== undefined) crawlOptions.ignoreQueryParameters = this.params.ignoreQueryParameters if (this.params?.allowExternalLinks !== undefined) crawlOptions.allowExternalLinks = this.params.allowExternalLinks if (this.params?.delay !== undefined) crawlOptions.delay = this.params.delay From 2ad2c9f23163938ec09da3d7ebaff161e6fa4d05 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 16:11:44 +0530 Subject: [PATCH 5/6] fix(firecrawl): throw on failed extract response (ExtractResponse has success/status/error) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../components/nodes/documentloaders/FireCrawl/FireCrawl.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index b091461219e..1bf31168c42 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -201,6 +201,10 @@ export class FireCrawlLoader extends BaseDocumentLoader { integration: FIRECRAWL_INTEGRATION }) + if (response.success === false || response.status === 'failed') { + throw new Error('Firecrawl: extract failed. Error: ' + (response.error ?? 'unknown error')) + } + if (response.data) { const content = JSON.stringify(response.data, null, 2) const metadata: Record = { From acf49f8e86abeb5ff97f4ab47ddcc542fd50faf3 Mon Sep 17 00:00:00 2001 From: Rakshith Ramprakash Date: Thu, 4 Jun 2026 17:33:12 +0530 Subject: [PATCH 6/6] refactor(firecrawl): remove deprecated extract mode from the node Co-Authored-By: Claude Opus 4.8 (1M context) --- .../documentloaders/FireCrawl/FireCrawl.ts | 80 ++----------------- 1 file changed, 6 insertions(+), 74 deletions(-) diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts index 1bf31168c42..76e3a71ed6f 100644 --- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts +++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts @@ -37,9 +37,6 @@ interface LoaderParams { ignoreQueryParameters?: boolean allowExternalLinks?: boolean delay?: number - // extract - schema?: Record - prompt?: string // search tbs?: string location?: string @@ -61,7 +58,7 @@ interface FirecrawlLoaderParameters { query?: string apiKey?: string apiUrl?: string - mode?: 'crawl' | 'scrape' | 'extract' | 'search' + mode?: 'crawl' | 'scrape' | 'search' params?: LoaderParams } @@ -70,7 +67,7 @@ export class FireCrawlLoader extends BaseDocumentLoader { private apiUrl: string private url?: string private query?: string - private mode: 'crawl' | 'scrape' | 'extract' | 'search' + private mode: 'crawl' | 'scrape' | 'search' private params?: LoaderParams constructor(loaderParams: FirecrawlLoaderParameters) { @@ -189,42 +186,8 @@ export class FireCrawlLoader extends BaseDocumentLoader { throw new Error('Firecrawl: Crawl job failed') } firecrawlDocs = response.data || [] - } else if (this.mode === 'extract') { - if (!this.url) { - throw new Error('Firecrawl: URL is required for extract mode') - } - - const response = await app.extract({ - urls: [this.url], - prompt: this.params?.prompt, - schema: this.params?.schema, - integration: FIRECRAWL_INTEGRATION - }) - - if (response.success === false || response.status === 'failed') { - throw new Error('Firecrawl: extract failed. Error: ' + (response.error ?? 'unknown error')) - } - - if (response.data) { - const content = JSON.stringify(response.data, null, 2) - const metadata: Record = { - source: this.url, - type: 'extracted_data', - data: response.data - } - if (response.status) metadata.status = response.status - if (response.expiresAt) metadata.expiresAt = response.expiresAt - - return [ - new Document({ - pageContent: content, - metadata - }) - ] - } - return [] } else { - throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'extract', 'search'.`) + throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'search'.`) } // Convert Firecrawl documents to LangChain documents @@ -301,11 +264,6 @@ class FireCrawl_DocumentLoaders implements INode { name: 'scrape', description: 'Scrape a URL and get its content' }, - { - label: 'Extract', - name: 'extract', - description: 'Extract data from a URL' - }, { label: 'Search', name: 'search', @@ -318,11 +276,11 @@ class FireCrawl_DocumentLoaders implements INode { label: 'URLs', name: 'url', type: 'string', - description: 'URL to be crawled/scraped/extracted', + description: 'URL to be crawled/scraped', placeholder: 'https://docs.flowiseai.com', optional: true, show: { - crawlerType: ['crawl', 'scrape', 'extract'] + crawlerType: ['crawl', 'scrape'] } }, { @@ -399,28 +357,6 @@ class FireCrawl_DocumentLoaders implements INode { crawlerType: ['crawl'] } }, - { - label: 'Schema', - name: 'extractSchema', - type: 'json', - description: 'JSON schema for data extraction', - optional: true, - additionalParams: true, - show: { - crawlerType: ['extract'] - } - }, - { - label: 'Prompt', - name: 'extractPrompt', - type: 'string', - description: 'Prompt for data extraction', - optional: true, - additionalParams: true, - show: { - crawlerType: ['extract'] - } - }, { label: 'Query', name: 'searchQuery', @@ -519,8 +455,6 @@ class FireCrawl_DocumentLoaders implements INode { const includeTags = nodeData.inputs?.includeTags ? (nodeData.inputs.includeTags.split(',') as string[]) : undefined const excludeTags = nodeData.inputs?.excludeTags ? (nodeData.inputs.excludeTags.split(',') as string[]) : undefined - const extractSchema = nodeData.inputs?.extractSchema - const extractPrompt = nodeData.inputs?.extractPrompt as string const searchQuery = nodeData.inputs?.searchQuery as string const searchLimit = nodeData.inputs?.searchLimit as string @@ -530,7 +464,7 @@ class FireCrawl_DocumentLoaders implements INode { const input: FirecrawlLoaderParameters = { url, query: searchQuery, - mode: crawlerType as 'crawl' | 'scrape' | 'extract' | 'search', + mode: crawlerType as 'crawl' | 'scrape' | 'search', apiKey: firecrawlApiToken, apiUrl: firecrawlApiUrl, params: { @@ -541,8 +475,6 @@ class FireCrawl_DocumentLoaders implements INode { includeTags, excludeTags }, - schema: extractSchema || undefined, - prompt: extractPrompt || undefined } }