From c13aaf5d1d240d8d82294b93bfdf3b02723a13cb Mon Sep 17 00:00:00 2001
From: rakshith48 <rak@sideguide.dev>
Date: Thu, 4 Jun 2026 13:00:13 +0530
Subject: [PATCH 1/6] feat(firecrawl): migrate FireCrawl loader to Firecrawl v2
 (v4) SDK

Replace the hand-rolled v1 REST client in the FireCrawl document loader
with the official @mendable/firecrawl-js v2 API (Firecrawl class) and bump
the dependency from ^1.18.2 to ^4.25.2.

- Use `new Firecrawl({ apiKey, apiUrl })` and its `.scrape` / `.crawl` /
  `.search` / `.extract` methods instead of manual axios calls to /v1/*.
- Adapt to v2 response shapes: scrape/crawl return Document(s) directly
  (no { success, data } envelope); crawl returns a CrawlJob with `.data`;
  search returns results grouped by source (use `.web`).
- Preserve the node's inputs, modes, defaults, and Document/Text output
  shape. Search `country` now maps to v2's single `location` field, since
  v1's separate `lang`/`country` params were removed in v2.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../documentloaders/FireCrawl/FireCrawl.ts    | 717 ++++--------------
 packages/components/package.json              |   2 +-
 2 files changed, 138 insertions(+), 581 deletions(-)

diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
index 654fd59e8f3..6c01b618d80 100644
--- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
+++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
@@ -1,542 +1,58 @@
 import { TextSplitter } from '@langchain/textsplitters'
 import { Document, DocumentInterface } from '@langchain/core/documents'
 import { BaseDocumentLoader } from '@langchain/classic/document_loaders/base'
+import Firecrawl, {
+    type Document as FirecrawlDocument,
+    type ScrapeOptions,
+    type CrawlOptions,
+    type SearchRequest,
+    type SearchResultWeb
+} from '@mendable/firecrawl-js'
 import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface'
 import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
-import { AxiosResponse, AxiosRequestHeaders } from 'axios'
-import { secureAxiosRequest } from '../../../src/httpSecurity'
-import { z } from 'zod/v3'
 
-// FirecrawlApp interfaces
-interface FirecrawlAppConfig {
-    apiKey?: string | null
-    apiUrl?: string | null
+// Identifies Firecrawl requests originating from Flowise (carried through on every call).
+const FIRECRAWL_INTEGRATION = 'flowise'
+
+// Loader-level parameters bundled by the node before delegating to the v2 SDK.
+interface ScrapeParams {
+    includeTags?: string | string[]
+    excludeTags?: string | string[]
+    includePaths?: string | string[]
+    excludePaths?: string | string[]
+    onlyMainContent?: boolean
+    mobile?: boolean
+    skipTlsVerification?: boolean
+    timeout?: number
+    limit?: number
 }
 
-interface FirecrawlDocumentMetadata {
-    title?: string
-    description?: string
-    language?: string
-    sourceURL?: string
-    statusCode?: number
-    error?: string
+interface LoaderParams {
     [key: string]: any
-}
-
-interface FirecrawlDocument {
-    markdown?: string
-    html?: string
-    rawHtml?: string
-    screenshot?: string
-    links?: string[]
-    actions?: {
-        screenshots?: string[]
-    }
-    metadata: FirecrawlDocumentMetadata
-    llm_extraction?: Record<string, any>
-    warning?: string
-}
-
-interface ScrapeResponse {
-    success: boolean
-    data?: FirecrawlDocument
-    error?: string
-}
-
-interface CrawlResponse {
-    success: boolean
-    id: string
-    url: string
-    error?: string
-    data?: FirecrawlDocument
-}
-
-interface CrawlStatusResponse {
-    status: string
-    total: number
-    completed: number
-    creditsUsed: number
-    expiresAt: string
-    next?: string
-    data?: FirecrawlDocument[]
-}
-
-interface ExtractResponse {
-    success: boolean
-    id: string
-    url: string
-    data?: Record<string, any>
-}
-
-interface SearchResult {
-    url: string
-    title: string
-    description: string
-}
-
-interface SearchResponse {
-    success: boolean
-    data?: SearchResult[]
-    warning?: string
-}
-
-interface SearchRequest {
-    query: string
+    scrapeOptions?: ScrapeParams
+    // crawl
     limit?: number
+    maxDepth?: number
+    maxDiscoveryDepth?: number
+    ignoreQueryParameters?: boolean
+    allowExternalLinks?: boolean
+    delay?: number
+    // extract
+    schema?: Record<string, any>
+    prompt?: string
+    // search
     tbs?: string
-    lang?: string
-    country?: string
     location?: string
-    timeout?: number
+    country?: string
     ignoreInvalidURLs?: boolean
 }
 
-interface Params {
-    [key: string]: any
-    extractorOptions?: {
-        extractionSchema: z.ZodSchema | any
-        mode?: 'llm-extraction'
-        extractionPrompt?: string
-    }
-}
-
-interface ExtractRequest {
-    urls: string[]
-    prompt?: string
-    schema?: Record<string, any>
-    enableWebSearch?: boolean
-    ignoreSitemap?: boolean
-    includeSubdomains?: boolean
-    showSources?: boolean
-    scrapeOptions?: {
-        formats?: string[]
-        onlyMainContent?: boolean
-        includeTags?: string | string[]
-        excludeTags?: string | string[]
-        mobile?: boolean
-        skipTlsVerification?: boolean
-        timeout?: number
-        jsonOptions?: {
-            schema?: Record<string, any>
-            prompt?: string
-        }
-    }
-}
-
-interface ExtractStatusResponse {
-    success: boolean
-    data: any
-    status: 'completed' | 'pending' | 'processing' | 'failed' | 'cancelled'
-    expiresAt: string
-}
-
-// FirecrawlApp class (not exported)
-class FirecrawlApp {
-    private apiKey: string
-    private apiUrl: string
-
-    constructor({ apiKey = null, apiUrl = null }: FirecrawlAppConfig) {
-        this.apiKey = apiKey || ''
-        this.apiUrl = apiUrl || 'https://api.firecrawl.dev'
-        if (!this.apiKey) {
-            throw new Error('No API key provided')
-        }
-    }
-
-    async scrapeUrl(url: string, params: Params | null = null): Promise<ScrapeResponse> {
-        const headers = this.prepareHeaders()
-
-        // Create a clean payload with only valid parameters
-        const validParams: any = {
-            url,
-            formats: ['markdown'],
-            onlyMainContent: true
-        }
-
-        // Add optional parameters if they exist
-        if (params?.scrapeOptions) {
-            if (params.scrapeOptions.includeTags) {
-                validParams.includeTags = Array.isArray(params.scrapeOptions.includeTags)
-                    ? params.scrapeOptions.includeTags
-                    : params.scrapeOptions.includeTags.split(',')
-            }
-            if (params.scrapeOptions.excludeTags) {
-                validParams.excludeTags = Array.isArray(params.scrapeOptions.excludeTags)
-                    ? params.scrapeOptions.excludeTags
-                    : params.scrapeOptions.excludeTags.split(',')
-            }
-            if (params.scrapeOptions.mobile !== undefined) {
-                validParams.mobile = params.scrapeOptions.mobile
-            }
-            if (params.scrapeOptions.skipTlsVerification !== undefined) {
-                validParams.skipTlsVerification = params.scrapeOptions.skipTlsVerification
-            }
-            if (params.scrapeOptions.timeout) {
-                validParams.timeout = params.scrapeOptions.timeout
-            }
-        }
-
-        // Add JSON options if they exist
-        if (params?.extractorOptions) {
-            validParams.jsonOptions = {
-                schema: params.extractorOptions.extractionSchema,
-                prompt: params.extractorOptions.extractionPrompt
-            }
-        }
-
-        try {
-            const parameters = {
-                ...validParams,
-                integration: 'flowise'
-            }
-            const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/scrape', parameters, headers)
-            if (response.status === 200) {
-                const responseData = response.data
-                if (responseData.success) {
-                    return responseData
-                } else {
-                    throw new Error(`Failed to scrape URL. Error: ${responseData.error}`)
-                }
-            } else {
-                this.handleError(response, 'scrape URL')
-            }
-        } catch (error: any) {
-            throw new Error(error.message)
-        }
-        return { success: false, error: 'Internal server error.' }
-    }
-
-    async crawlUrl(
-        url: string,
-        params: Params | null = null,
-        waitUntilDone: boolean = true,
-        pollInterval: number = 2,
-        idempotencyKey?: string
-    ): Promise<CrawlResponse | CrawlStatusResponse> {
-        const headers = this.prepareHeaders(idempotencyKey)
-
-        // Create a clean payload with only valid parameters
-        const validParams: any = {
-            url
-        }
-
-        // Add scrape options with only non-empty values
-        const scrapeOptions: any = {
-            formats: ['markdown'],
-            onlyMainContent: true
-        }
-
-        // Add crawl-specific parameters if they exist and are not empty
-        if (params) {
-            const validCrawlParams = [
-                'excludePaths',
-                'includePaths',
-                'maxDepth',
-                'maxDiscoveryDepth',
-                'ignoreSitemap',
-                'ignoreQueryParameters',
-                'limit',
-                'allowBackwardLinks',
-                'allowExternalLinks',
-                'delay'
-            ]
-
-            validCrawlParams.forEach((param) => {
-                if (params[param] !== undefined && params[param] !== null && params[param] !== '') {
-                    validParams[param] = params[param]
-                }
-            })
-        }
-
-        // Add scrape options if they exist and are not empty
-        if (params?.scrapeOptions) {
-            if (params.scrapeOptions.includePaths) {
-                const includePaths = Array.isArray(params.scrapeOptions.includePaths)
-                    ? params.scrapeOptions.includePaths
-                    : params.scrapeOptions.includePaths.split(',')
-                if (includePaths.length > 0) {
-                    validParams.includePaths = includePaths
-                }
-            }
-
-            if (params.scrapeOptions.excludePaths) {
-                const excludePaths = Array.isArray(params.scrapeOptions.excludePaths)
-                    ? params.scrapeOptions.excludePaths
-                    : params.scrapeOptions.excludePaths.split(',')
-                if (excludePaths.length > 0) {
-                    validParams.excludePaths = excludePaths
-                }
-            }
-
-            if (params.scrapeOptions.limit) {
-                validParams.limit = params.scrapeOptions.limit
-            }
-
-            const validScrapeParams = ['mobile', 'skipTlsVerification', 'timeout', 'includeTags', 'excludeTags', 'onlyMainContent']
-
-            validScrapeParams.forEach((param) => {
-                if (params.scrapeOptions[param] !== undefined && params.scrapeOptions[param] !== null) {
-                    scrapeOptions[param] = params.scrapeOptions[param]
-                }
-            })
-        }
-
-        // Only add scrapeOptions if it has more than just the default values
-        if (Object.keys(scrapeOptions).length > 2) {
-            validParams.scrapeOptions = scrapeOptions
-        }
-
-        try {
-            const parameters = {
-                ...validParams,
-                integration: 'flowise'
-            }
-            const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/crawl', parameters, headers)
-            if (response.status === 200) {
-                const crawlResponse = response.data as CrawlResponse
-                if (!crawlResponse.success) {
-                    throw new Error(`Crawl request failed: ${crawlResponse.error || 'Unknown error'}`)
-                }
-
-                if (waitUntilDone) {
-                    return this.monitorJobStatus(crawlResponse.id, headers, pollInterval)
-                } else {
-                    return crawlResponse
-                }
-            } else {
-                this.handleError(response, 'start crawl job')
-            }
-        } catch (error: any) {
-            if (error.response?.data?.error) {
-                throw new Error(`Crawl failed: ${error.response.data.error}`)
-            }
-            throw new Error(`Crawl failed: ${error.message}`)
-        }
-
-        return { success: false, id: '', url: '' }
-    }
-
-    async extract(
-        request: ExtractRequest,
-        waitUntilDone: boolean = true,
-        pollInterval: number = 2
-    ): Promise<ExtractResponse | ExtractStatusResponse> {
-        const headers = this.prepareHeaders()
-
-        // Create a clean payload with only valid parameters
-        const validParams: any = {
-            urls: request.urls
-        }
-
-        // Add optional parameters if they exist and are not empty
-        if (request.prompt) {
-            validParams.prompt = request.prompt
-        }
-
-        if (request.schema) {
-            validParams.schema = request.schema
-        }
-
-        const validExtractParams = ['enableWebSearch', 'ignoreSitemap', 'includeSubdomains', 'showSources'] as const
-
-        validExtractParams.forEach((param) => {
-            if (request[param] !== undefined && request[param] !== null) {
-                validParams[param] = request[param]
-            }
-        })
-
-        // Add scrape options if they exist
-        if (request.scrapeOptions) {
-            const scrapeOptions: any = {
-                formats: ['markdown'],
-                onlyMainContent: true
-            }
-
-            // Handle includeTags
-            if (request.scrapeOptions.includeTags) {
-                const includeTags = Array.isArray(request.scrapeOptions.includeTags)
-                    ? request.scrapeOptions.includeTags
-                    : request.scrapeOptions.includeTags.split(',')
-                if (includeTags.length > 0) {
-                    scrapeOptions.includeTags = includeTags
-                }
-            }
-
-            // Handle excludeTags
-            if (request.scrapeOptions.excludeTags) {
-                const excludeTags = Array.isArray(request.scrapeOptions.excludeTags)
-                    ? request.scrapeOptions.excludeTags
-                    : request.scrapeOptions.excludeTags.split(',')
-                if (excludeTags.length > 0) {
-                    scrapeOptions.excludeTags = excludeTags
-                }
-            }
-
-            // Add other scrape options if they exist and are not empty
-            const validScrapeParams = ['mobile', 'skipTlsVerification', 'timeout'] as const
-
-            validScrapeParams.forEach((param) => {
-                if (request.scrapeOptions?.[param] !== undefined && request.scrapeOptions?.[param] !== null) {
-                    scrapeOptions[param] = request.scrapeOptions[param]
-                }
-            })
-
-            // Add JSON options if they exist
-            if (request.scrapeOptions.jsonOptions) {
-                scrapeOptions.jsonOptions = {}
-                if (request.scrapeOptions.jsonOptions.schema) {
-                    scrapeOptions.jsonOptions.schema = request.scrapeOptions.jsonOptions.schema
-                }
-                if (request.scrapeOptions.jsonOptions.prompt) {
-                    scrapeOptions.jsonOptions.prompt = request.scrapeOptions.jsonOptions.prompt
-                }
-            }
-
-            // Only add scrapeOptions if it has more than just the default values
-            if (Object.keys(scrapeOptions).length > 2) {
-                validParams.scrapeOptions = scrapeOptions
-            }
-        }
-
-        try {
-            const parameters = {
-                ...validParams,
-                integration: 'flowise'
-            }
-            const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/extract', parameters, headers)
-            if (response.status === 200) {
-                const extractResponse = response.data as ExtractResponse
-                if (waitUntilDone) {
-                    return this.monitorExtractStatus(extractResponse.id, headers, pollInterval)
-                } else {
-                    return extractResponse
-                }
-            } else {
-                this.handleError(response, 'start extract job')
-            }
-        } catch (error: any) {
-            throw new Error(error.message)
-        }
-        return { success: false, id: '', url: '' }
-    }
-
-    async search(request: SearchRequest): Promise<SearchResponse> {
-        const headers = this.prepareHeaders()
-
-        // Create a clean payload with only valid parameters
-        const validParams: any = {
-            query: request.query
-        }
-
-        // Add optional parameters if they exist and are not empty
-        const validSearchParams = ['limit', 'tbs', 'lang', 'country', 'location', 'timeout', 'ignoreInvalidURLs'] as const
-
-        validSearchParams.forEach((param) => {
-            if (request[param] !== undefined && request[param] !== null) {
-                validParams[param] = request[param]
-            }
-        })
-
-        try {
-            const parameters = {
-                ...validParams,
-                integration: 'flowise'
-            }
-            const response: AxiosResponse = await this.postRequest(this.apiUrl + '/v1/search', parameters, headers)
-            if (response.status === 200) {
-                const searchResponse = response.data as SearchResponse
-                if (!searchResponse.success) {
-                    throw new Error(`Search request failed: ${searchResponse.warning || 'Unknown error'}`)
-                }
-                return searchResponse
-            } else {
-                this.handleError(response, 'perform search')
-            }
-        } catch (error: any) {
-            throw new Error(error.message)
-        }
-        return { success: false }
-    }
-
-    private prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
-        return {
-            'Content-Type': 'application/json',
-            Authorization: `Bearer ${this.apiKey}`,
-            ...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {})
-        } as AxiosRequestHeaders & { 'x-idempotency-key'?: string }
-    }
-
-    private async postRequest(url: string, data: Params, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
-        const result = await secureAxiosRequest({ method: 'POST', url, data, headers })
-        return result
-    }
-
-    private getRequest(url: string, headers: AxiosRequestHeaders): Promise<AxiosResponse> {
-        return secureAxiosRequest({ method: 'GET', url, headers })
-    }
-
-    private async monitorJobStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<CrawlStatusResponse> {
-        let isJobCompleted = false
-        while (!isJobCompleted) {
-            const statusResponse: AxiosResponse = await this.getRequest(this.apiUrl + `/v1/crawl/${jobId}`, headers)
-            if (statusResponse.status === 200) {
-                const statusData = statusResponse.data as CrawlStatusResponse
-                switch (statusData.status) {
-                    case 'completed':
-                        isJobCompleted = true
-                        return statusData
-                    case 'scraping':
-                    case 'failed':
-                        if (statusData.status === 'failed') {
-                            throw new Error('Crawl job failed')
-                        }
-                        await new Promise((resolve) => setTimeout(resolve, Math.max(checkInterval, 2) * 1000))
-                        break
-                    default:
-                        throw new Error(`Unknown crawl status: ${statusData.status}`)
-                }
-            } else {
-                this.handleError(statusResponse, 'check crawl status')
-            }
-        }
-        throw new Error('Failed to monitor job status')
-    }
-
-    private async monitorExtractStatus(jobId: string, headers: AxiosRequestHeaders, checkInterval: number): Promise<ExtractStatusResponse> {
-        let isJobCompleted = false
-        while (!isJobCompleted) {
-            const statusResponse: AxiosResponse = await this.getRequest(this.apiUrl + `/v1/extract/${jobId}`, headers)
-            if (statusResponse.status === 200) {
-                const statusData = statusResponse.data as ExtractStatusResponse
-                switch (statusData.status) {
-                    case 'completed':
-                        isJobCompleted = true
-                        return statusData
-                    case 'processing':
-                    case 'failed':
-                        if (statusData.status === 'failed') {
-                            throw new Error('Extract job failed')
-                        }
-                        await new Promise((resolve) => setTimeout(resolve, Math.max(checkInterval, 2) * 1000))
-                        break
-                    default:
-                        throw new Error(`Unknown extract status: ${statusData.status}`)
-                }
-            } else {
-                this.handleError(statusResponse, 'check extract status')
-            }
-        }
-        throw new Error('Failed to monitor extract status')
-    }
-
-    private handleError(response: AxiosResponse, action: string): void {
-        if ([402, 408, 409, 500].includes(response.status)) {
-            const errorMessage: string = response.data.error || 'Unknown error occurred'
-            throw new Error(`Failed to ${action}. Status code: ${response.status}. Error: ${errorMessage}`)
-        } else {
-            throw new Error(`Unexpected error occurred while trying to ${action}. Status code: ${response.status}`)
-        }
-    }
+// Normalize a value that may be a comma-separated string or an array into a string array.
+function toStringArray(value?: string | string[]): string[] | undefined {
+    if (value === undefined || value === null) return undefined
+    const arr = Array.isArray(value) ? value : value.split(',')
+    const cleaned = arr.map((v) => v.trim()).filter((v) => v.length > 0)
+    return cleaned.length > 0 ? cleaned : undefined
 }
 
 // FireCrawl Loader
@@ -546,7 +62,7 @@ interface FirecrawlLoaderParameters {
     apiKey?: string
     apiUrl?: string
     mode?: 'crawl' | 'scrape' | 'extract' | 'search'
-    params?: Record<string, unknown>
+    params?: LoaderParams
 }
 
 export class FireCrawlLoader extends BaseDocumentLoader {
@@ -555,7 +71,7 @@ export class FireCrawlLoader extends BaseDocumentLoader {
     private url?: string
     private query?: string
     private mode: 'crawl' | 'scrape' | 'extract' | 'search'
-    private params?: Record<string, unknown>
+    private params?: LoaderParams
 
     constructor(loaderParams: FirecrawlLoaderParameters) {
         super()
@@ -572,84 +88,127 @@ export class FireCrawlLoader extends BaseDocumentLoader {
         this.apiUrl = apiUrl || 'https://api.firecrawl.dev'
     }
 
+    // Build the v2 scrape options shared by scrape/crawl modes.
+    private buildScrapeOptions(): ScrapeOptions {
+        const scrapeOptions: ScrapeOptions = {
+            formats: ['markdown'],
+            onlyMainContent: true,
+            integration: FIRECRAWL_INTEGRATION
+        }
+
+        const opts = this.params?.scrapeOptions
+        if (opts) {
+            const includeTags = toStringArray(opts.includeTags)
+            if (includeTags) scrapeOptions.includeTags = includeTags
+
+            const excludeTags = toStringArray(opts.excludeTags)
+            if (excludeTags) scrapeOptions.excludeTags = excludeTags
+
+            if (opts.onlyMainContent !== undefined) scrapeOptions.onlyMainContent = opts.onlyMainContent
+            if (opts.mobile !== undefined) scrapeOptions.mobile = opts.mobile
+            if (opts.skipTlsVerification !== undefined) scrapeOptions.skipTlsVerification = opts.skipTlsVerification
+            if (opts.timeout) scrapeOptions.timeout = opts.timeout
+        }
+
+        return scrapeOptions
+    }
+
     public async load(): Promise<DocumentInterface[]> {
-        const app = new FirecrawlApp({ apiKey: this.apiKey, apiUrl: this.apiUrl })
+        const app = new Firecrawl({ apiKey: this.apiKey, apiUrl: this.apiUrl })
         let firecrawlDocs: FirecrawlDocument[]
 
         if (this.mode === 'search') {
             if (!this.query) {
                 throw new Error('Firecrawl: Query is required for search mode')
             }
-            const response = await app.search({ query: this.query, ...this.params })
-            if (!response.success) {
-                throw new Error(`Firecrawl: Failed to search. Warning: ${response.warning}`)
-            }
 
-            // Convert search results to FirecrawlDocument format
-            firecrawlDocs = (response.data || []).map((result) => ({
-                markdown: result.description,
-                metadata: {
-                    title: result.title,
-                    sourceURL: result.url,
-                    description: result.description
-                }
-            }))
+            const searchReq: Omit<SearchRequest, 'query'> = {
+                integration: FIRECRAWL_INTEGRATION
+            }
+            if (this.params?.limit !== undefined) searchReq.limit = this.params.limit
+            if (this.params?.tbs) searchReq.tbs = this.params.tbs
+            // v2 search exposes a single `location` string (v1's separate `country`/`lang` were removed).
+            // Fall back to the country code so existing node configurations still influence results.
+            const location = this.params?.location || this.params?.country
+            if (location) searchReq.location = location
+            if (this.params?.timeout !== undefined) searchReq.timeout = this.params.timeout
+            if (this.params?.ignoreInvalidURLs !== undefined) searchReq.ignoreInvalidURLs = this.params.ignoreInvalidURLs
+
+            const response = await app.search(this.query, searchReq)
+
+            // v2 returns results grouped by source. Use web results and normalize each entry
+            // (which may be a lightweight SearchResultWeb or a full Document when scrapeOptions are set).
+            const webResults = response.web ?? []
+            firecrawlDocs = webResults.map((result: SearchResultWeb | FirecrawlDocument) => {
+                if ('markdown' in result || 'html' in result || 'metadata' in result) {
+                    return result as FirecrawlDocument
+                }
+                const web = result as SearchResultWeb
+                return {
+                    markdown: web.description,
+                    metadata: {
+                        title: web.title,
+                        sourceURL: web.url,
+                        description: web.description
+                    }
+                } as FirecrawlDocument
+            })
         } else if (this.mode === 'scrape') {
             if (!this.url) {
                 throw new Error('Firecrawl: URL is required for scrape mode')
             }
-            const response = await app.scrapeUrl(this.url, this.params)
-            if (!response.success) {
-                throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`)
-            }
-            firecrawlDocs = [response.data as FirecrawlDocument]
+            const response = await app.scrape(this.url, this.buildScrapeOptions())
+            firecrawlDocs = [response]
         } else if (this.mode === 'crawl') {
             if (!this.url) {
                 throw new Error('Firecrawl: URL is required for crawl mode')
             }
-            const response = await app.crawlUrl(this.url, this.params)
-            if ('status' in response) {
-                if (response.status === 'failed') {
-                    throw new Error('Firecrawl: Crawl job failed')
-                }
-                firecrawlDocs = response.data || []
-            } else {
-                if (!response.success) {
-                    throw new Error(`Firecrawl: Failed to scrape URL. Error: ${response.error}`)
-                }
-                firecrawlDocs = [response.data as FirecrawlDocument]
+
+            const crawlOptions: CrawlOptions & { pollInterval?: number } = {
+                integration: FIRECRAWL_INTEGRATION,
+                pollInterval: 2,
+                scrapeOptions: this.buildScrapeOptions()
+            }
+
+            const includePaths = toStringArray(this.params?.scrapeOptions?.includePaths)
+            if (includePaths) crawlOptions.includePaths = includePaths
+
+            const excludePaths = toStringArray(this.params?.scrapeOptions?.excludePaths)
+            if (excludePaths) crawlOptions.excludePaths = excludePaths
+
+            const limit = this.params?.scrapeOptions?.limit ?? this.params?.limit
+            if (limit !== undefined && limit !== null) crawlOptions.limit = limit
+            if (this.params?.maxDiscoveryDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDiscoveryDepth
+            if (this.params?.ignoreQueryParameters !== undefined) crawlOptions.ignoreQueryParameters = this.params.ignoreQueryParameters
+            if (this.params?.allowExternalLinks !== undefined) crawlOptions.allowExternalLinks = this.params.allowExternalLinks
+            if (this.params?.delay !== undefined) crawlOptions.delay = this.params.delay
+
+            const response = await app.crawl(this.url, crawlOptions)
+            if (response.status === 'failed') {
+                throw new Error('Firecrawl: Crawl job failed')
             }
+            firecrawlDocs = response.data || []
         } else if (this.mode === 'extract') {
             if (!this.url) {
                 throw new Error('Firecrawl: URL is required for extract mode')
             }
-            this.params!.urls = [this.url]
-            const response = await app.extract(this.params as any as ExtractRequest)
-            if (!response.success) {
-                throw new Error(`Firecrawl: Failed to extract URL.`)
-            }
 
-            // Convert extract response to document format
-            if ('data' in response && response.data) {
-                // Create a document from the extracted data
-                const extractedData = response.data
-                const content = JSON.stringify(extractedData, null, 2)
+            const response = await app.extract({
+                urls: [this.url],
+                prompt: this.params?.prompt,
+                schema: this.params?.schema,
+                integration: FIRECRAWL_INTEGRATION
+            })
 
+            if (response.data) {
+                const content = JSON.stringify(response.data, null, 2)
                 const metadata: Record<string, any> = {
                     source: this.url,
-                    type: 'extracted_data'
-                }
-
-                // Add status and expiresAt if they exist in the response
-                if ('status' in response) {
-                    metadata.status = response.status
-                }
-                if ('data' in response) {
-                    metadata.data = response.data
-                }
-                if ('expiresAt' in response) {
-                    metadata.expiresAt = response.expiresAt
+                    type: 'extracted_data',
+                    data: response.data
                 }
+                if (response.status) metadata.status = response.status
+                if (response.expiresAt) metadata.expiresAt = response.expiresAt
 
                 return [
                     new Document({
@@ -960,7 +519,6 @@ class FireCrawl_DocumentLoaders implements INode {
 
         const searchQuery = nodeData.inputs?.searchQuery as string
         const searchLimit = nodeData.inputs?.searchLimit as string
-        const searchLang = nodeData.inputs?.searchLang as string
         const searchCountry = nodeData.inputs?.searchCountry as string
         const searchTimeout = nodeData.inputs?.searchTimeout as number
 
@@ -990,7 +548,6 @@ class FireCrawl_DocumentLoaders implements INode {
             }
             input.params = {
                 limit: searchLimit ? parseInt(searchLimit, 10) : 5,
-                lang: searchLang,
                 country: searchCountry,
                 timeout: searchTimeout
             }
diff --git a/packages/components/package.json b/packages/components/package.json
index bebf3c06f3e..68306e22d68 100644
--- a/packages/components/package.json
+++ b/packages/components/package.json
@@ -85,7 +85,7 @@
         "@langchain/weaviate": "1.0.1",
         "@langchain/xai": "1.3.1",
         "@mem0/community": "^0.0.1",
-        "@mendable/firecrawl-js": "^1.18.2",
+        "@mendable/firecrawl-js": "^4.25.2",
         "@mistralai/mistralai": "1.14.0",
         "@modelcontextprotocol/sdk": "1.29.0",
         "@modelcontextprotocol/server-postgres": "^0.6.2",

From 3b46907ca9695f7756f52715259589b8b6078204 Mon Sep 17 00:00:00 2001
From: Rakshith Ramprakash <rakshithramprakash@gmail.com>
Date: Thu, 4 Jun 2026 13:08:26 +0530
Subject: [PATCH 2/6] fix: use canonical `firecrawl` npm package (not legacy
 @mendable/firecrawl-js)

Both names dual-publish the identical v4 SDK; `firecrawl` is the current canonical package.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../components/nodes/documentloaders/FireCrawl/FireCrawl.ts     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
index 6c01b618d80..6d9f3e2d791 100644
--- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
+++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
@@ -7,7 +7,7 @@ import Firecrawl, {
     type CrawlOptions,
     type SearchRequest,
     type SearchResultWeb
-} from '@mendable/firecrawl-js'
+} from 'firecrawl'
 import { INode, INodeData, INodeParams, ICommonObject, INodeOutputsValue } from '../../../src/Interface'
 import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
 

From 64413d9322edebfbb1d3fc9a23d870514e10cac6 Mon Sep 17 00:00:00 2001
From: Rakshith Ramprakash <rakshithramprakash@gmail.com>
Date: Thu, 4 Jun 2026 13:08:28 +0530
Subject: [PATCH 3/6] fix: use canonical `firecrawl` npm package (not legacy
 @mendable/firecrawl-js)

Both names dual-publish the identical v4 SDK; `firecrawl` is the current canonical package.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/components/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/components/package.json b/packages/components/package.json
index 68306e22d68..3ce348bde57 100644
--- a/packages/components/package.json
+++ b/packages/components/package.json
@@ -85,7 +85,7 @@
         "@langchain/weaviate": "1.0.1",
         "@langchain/xai": "1.3.1",
         "@mem0/community": "^0.0.1",
-        "@mendable/firecrawl-js": "^4.25.2",
+        "firecrawl": "^4.25.2",
         "@mistralai/mistralai": "1.14.0",
         "@modelcontextprotocol/sdk": "1.29.0",
         "@modelcontextprotocol/server-postgres": "^0.6.2",

From 88635fc8e9f67f5936591a2f56d0b1e34e49d68c Mon Sep 17 00:00:00 2001
From: Rakshith Ramprakash <rakshithramprakash@gmail.com>
Date: Thu, 4 Jun 2026 13:35:05 +0530
Subject: [PATCH 4/6] =?UTF-8?q?fix(firecrawl):=20back-compat=20=E2=80=94?=
 =?UTF-8?q?=20map=20legacy=20crawlerOptions.maxDepth=20to=20v2=20maxDiscov?=
 =?UTF-8?q?eryDepth?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
index 6d9f3e2d791..b091461219e 100644
--- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
+++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
@@ -179,6 +179,7 @@ export class FireCrawlLoader extends BaseDocumentLoader {
             const limit = this.params?.scrapeOptions?.limit ?? this.params?.limit
             if (limit !== undefined && limit !== null) crawlOptions.limit = limit
             if (this.params?.maxDiscoveryDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDiscoveryDepth
+            else if (this.params?.maxDepth !== undefined) crawlOptions.maxDiscoveryDepth = this.params.maxDepth // back-compat: v1 crawlerOptions used maxDepth
             if (this.params?.ignoreQueryParameters !== undefined) crawlOptions.ignoreQueryParameters = this.params.ignoreQueryParameters
             if (this.params?.allowExternalLinks !== undefined) crawlOptions.allowExternalLinks = this.params.allowExternalLinks
             if (this.params?.delay !== undefined) crawlOptions.delay = this.params.delay

From 2ad2c9f23163938ec09da3d7ebaff161e6fa4d05 Mon Sep 17 00:00:00 2001
From: Rakshith Ramprakash <rakshithramprakash@gmail.com>
Date: Thu, 4 Jun 2026 16:11:44 +0530
Subject: [PATCH 5/6] fix(firecrawl): throw on failed extract response
 (ExtractResponse has success/status/error)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../components/nodes/documentloaders/FireCrawl/FireCrawl.ts   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
index b091461219e..1bf31168c42 100644
--- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
+++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
@@ -201,6 +201,10 @@ export class FireCrawlLoader extends BaseDocumentLoader {
                 integration: FIRECRAWL_INTEGRATION
             })
 
+            if (response.success === false || response.status === 'failed') {
+                throw new Error('Firecrawl: extract failed. Error: ' + (response.error ?? 'unknown error'))
+            }
+
             if (response.data) {
                 const content = JSON.stringify(response.data, null, 2)
                 const metadata: Record<string, any> = {

From acf49f8e86abeb5ff97f4ab47ddcc542fd50faf3 Mon Sep 17 00:00:00 2001
From: Rakshith Ramprakash <rakshithramprakash@gmail.com>
Date: Thu, 4 Jun 2026 17:33:12 +0530
Subject: [PATCH 6/6] refactor(firecrawl): remove deprecated extract mode from
 the node

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../documentloaders/FireCrawl/FireCrawl.ts    | 80 ++-----------------
 1 file changed, 6 insertions(+), 74 deletions(-)

diff --git a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
index 1bf31168c42..76e3a71ed6f 100644
--- a/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
+++ b/packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts
@@ -37,9 +37,6 @@ interface LoaderParams {
     ignoreQueryParameters?: boolean
     allowExternalLinks?: boolean
     delay?: number
-    // extract
-    schema?: Record<string, any>
-    prompt?: string
     // search
     tbs?: string
     location?: string
@@ -61,7 +58,7 @@ interface FirecrawlLoaderParameters {
     query?: string
     apiKey?: string
     apiUrl?: string
-    mode?: 'crawl' | 'scrape' | 'extract' | 'search'
+    mode?: 'crawl' | 'scrape' | 'search'
     params?: LoaderParams
 }
 
@@ -70,7 +67,7 @@ export class FireCrawlLoader extends BaseDocumentLoader {
     private apiUrl: string
     private url?: string
     private query?: string
-    private mode: 'crawl' | 'scrape' | 'extract' | 'search'
+    private mode: 'crawl' | 'scrape' | 'search'
     private params?: LoaderParams
 
     constructor(loaderParams: FirecrawlLoaderParameters) {
@@ -189,42 +186,8 @@ export class FireCrawlLoader extends BaseDocumentLoader {
                 throw new Error('Firecrawl: Crawl job failed')
             }
             firecrawlDocs = response.data || []
-        } else if (this.mode === 'extract') {
-            if (!this.url) {
-                throw new Error('Firecrawl: URL is required for extract mode')
-            }
-
-            const response = await app.extract({
-                urls: [this.url],
-                prompt: this.params?.prompt,
-                schema: this.params?.schema,
-                integration: FIRECRAWL_INTEGRATION
-            })
-
-            if (response.success === false || response.status === 'failed') {
-                throw new Error('Firecrawl: extract failed. Error: ' + (response.error ?? 'unknown error'))
-            }
-
-            if (response.data) {
-                const content = JSON.stringify(response.data, null, 2)
-                const metadata: Record<string, any> = {
-                    source: this.url,
-                    type: 'extracted_data',
-                    data: response.data
-                }
-                if (response.status) metadata.status = response.status
-                if (response.expiresAt) metadata.expiresAt = response.expiresAt
-
-                return [
-                    new Document({
-                        pageContent: content,
-                        metadata
-                    })
-                ]
-            }
-            return []
         } else {
-            throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'extract', 'search'.`)
+            throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape', 'search'.`)
         }
 
         // Convert Firecrawl documents to LangChain documents
@@ -301,11 +264,6 @@ class FireCrawl_DocumentLoaders implements INode {
                         name: 'scrape',
                         description: 'Scrape a URL and get its content'
                     },
-                    {
-                        label: 'Extract',
-                        name: 'extract',
-                        description: 'Extract data from a URL'
-                    },
                     {
                         label: 'Search',
                         name: 'search',
@@ -318,11 +276,11 @@ class FireCrawl_DocumentLoaders implements INode {
                 label: 'URLs',
                 name: 'url',
                 type: 'string',
-                description: 'URL to be crawled/scraped/extracted',
+                description: 'URL to be crawled/scraped',
                 placeholder: 'https://docs.flowiseai.com',
                 optional: true,
                 show: {
-                    crawlerType: ['crawl', 'scrape', 'extract']
+                    crawlerType: ['crawl', 'scrape']
                 }
             },
             {
@@ -399,28 +357,6 @@ class FireCrawl_DocumentLoaders implements INode {
                     crawlerType: ['crawl']
                 }
             },
-            {
-                label: 'Schema',
-                name: 'extractSchema',
-                type: 'json',
-                description: 'JSON schema for data extraction',
-                optional: true,
-                additionalParams: true,
-                show: {
-                    crawlerType: ['extract']
-                }
-            },
-            {
-                label: 'Prompt',
-                name: 'extractPrompt',
-                type: 'string',
-                description: 'Prompt for data extraction',
-                optional: true,
-                additionalParams: true,
-                show: {
-                    crawlerType: ['extract']
-                }
-            },
             {
                 label: 'Query',
                 name: 'searchQuery',
@@ -519,8 +455,6 @@ class FireCrawl_DocumentLoaders implements INode {
         const includeTags = nodeData.inputs?.includeTags ? (nodeData.inputs.includeTags.split(',') as string[]) : undefined
         const excludeTags = nodeData.inputs?.excludeTags ? (nodeData.inputs.excludeTags.split(',') as string[]) : undefined
 
-        const extractSchema = nodeData.inputs?.extractSchema
-        const extractPrompt = nodeData.inputs?.extractPrompt as string
 
         const searchQuery = nodeData.inputs?.searchQuery as string
         const searchLimit = nodeData.inputs?.searchLimit as string
@@ -530,7 +464,7 @@ class FireCrawl_DocumentLoaders implements INode {
         const input: FirecrawlLoaderParameters = {
             url,
             query: searchQuery,
-            mode: crawlerType as 'crawl' | 'scrape' | 'extract' | 'search',
+            mode: crawlerType as 'crawl' | 'scrape' | 'search',
             apiKey: firecrawlApiToken,
             apiUrl: firecrawlApiUrl,
             params: {
@@ -541,8 +475,6 @@ class FireCrawl_DocumentLoaders implements INode {
                     includeTags,
                     excludeTags
                 },
-                schema: extractSchema || undefined,
-                prompt: extractPrompt || undefined
             }
         }