From a6fe1ffe969aa914d77818df6a017d6d5c8ffbe9 Mon Sep 17 00:00:00 2001 From: Nicolas <20311743+nickscamara@users.noreply.github.com> Date: Fri, 27 Feb 2026 10:55:44 -0800 Subject: [PATCH 1/4] Update browser.ts --- apps/api/src/controllers/v2/browser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/controllers/v2/browser.ts b/apps/api/src/controllers/v2/browser.ts index d2698db90c..8961268086 100644 --- a/apps/api/src/controllers/v2/browser.ts +++ b/apps/api/src/controllers/v2/browser.ts @@ -22,7 +22,7 @@ import { billTeam } from "../../services/billing/credit_billing"; import { enqueueBrowserSessionActivity } from "../../lib/browser-session-activity"; import { logRequest } from "../../services/logging/log_job"; -const BROWSER_CREDITS_PER_HOUR = 100; +const BROWSER_CREDITS_PER_HOUR = 120; /** * Calculate credits to bill for a browser session based on its duration. From e946bd0300fa944ef3f112428304ae62fc69724a Mon Sep 17 00:00:00 2001 From: tomsideguide Date: Fri, 27 Feb 2026 21:00:21 +0000 Subject: [PATCH 2/4] feat(api): promote from reconciler (#2943) * feat(api): promote from reconciler * fix(api): unbounded loop --- apps/api/src/lib/concurrency-limit.ts | 154 +++++++++--------- .../src/lib/concurrency-queue-reconciler.ts | 131 ++++++++++++++- 2 files changed, 207 insertions(+), 78 deletions(-) diff --git a/apps/api/src/lib/concurrency-limit.ts b/apps/api/src/lib/concurrency-limit.ts index 778ad0fe01..267c4738f8 100644 --- a/apps/api/src/lib/concurrency-limit.ts +++ b/apps/api/src/lib/concurrency-limit.ts @@ -173,7 +173,7 @@ export async function pushCrawlConcurrencyLimitActiveJob( ); } -async function removeCrawlConcurrencyLimitActiveJob( +export async function removeCrawlConcurrencyLimitActiveJob( crawl_id: string, id: string, ) { @@ -188,7 +188,7 @@ async function removeCrawlConcurrencyLimitActiveJob( * @param teamId * @returns A job that can be run, or null if there are no more jobs to run. */ -async function getNextConcurrentJob(teamId: string): Promise<{ +export async function getNextConcurrentJob(teamId: string): Promise<{ job: ConcurrencyLimitedJob; timeout: number; } | null> { @@ -283,6 +283,11 @@ async function getNextConcurrentJob(teamId: string): Promise<{ export async function concurrentJobDone(job: NuQJob) { if (job.id && job.data && job.data.team_id) { await removeConcurrencyLimitActiveJob(job.data.team_id, job.id); + await getRedisConnection().zrem( + constructQueueKey(job.data.team_id), + job.id, + ); + await getRedisConnection().del(constructJobKey(job.id)); await cleanOldConcurrencyLimitEntries(job.data.team_id); await cleanOldConcurrencyLimitedJobs(job.data.team_id); @@ -291,89 +296,92 @@ export async function concurrentJobDone(job: NuQJob) { await cleanOldCrawlConcurrencyLimitEntries(job.data.crawl_id); } - let i = 0; - for (; i < 10; i++) { - const maxTeamConcurrency = - ( - await getACUCTeam( - job.data.team_id, - false, - true, - job.data.is_extract - ? RateLimiterMode.Extract - : RateLimiterMode.Crawl, - ) - )?.concurrency ?? 2; + const maxTeamConcurrency = + ( + await getACUCTeam( + job.data.team_id, + false, + true, + job.data.is_extract ? RateLimiterMode.Extract : RateLimiterMode.Crawl, + ) + )?.concurrency ?? 2; + + let staleSkipped = 0; + while (staleSkipped < 100) { const currentActiveConcurrency = ( await getConcurrencyLimitActiveJobs(job.data.team_id) ).length; - if (currentActiveConcurrency < maxTeamConcurrency) { - const nextJob = await getNextConcurrentJob(job.data.team_id); - if (nextJob !== null) { - await pushConcurrencyLimitActiveJob( - job.data.team_id, - nextJob.job.id, - 60 * 1000, - ); + if (currentActiveConcurrency >= maxTeamConcurrency) break; - if (nextJob.job.data.crawl_id) { - await pushCrawlConcurrencyLimitActiveJob( - nextJob.job.data.crawl_id, - nextJob.job.id, - 60 * 1000, - ); - - const sc = await getCrawl(nextJob.job.data.crawl_id); - if (sc !== null && typeof sc.crawlerOptions?.delay === "number") { - await new Promise(resolve => - setTimeout(resolve, sc.crawlerOptions.delay * 1000), - ); - } - } + const nextJob = await getNextConcurrentJob(job.data.team_id); + if (nextJob === null) break; - abTestJob(nextJob.job.data); - - const promotedSuccessfully = - (await scrapeQueue.promoteJobFromBacklogOrAdd( - nextJob.job.id, - nextJob.job.data, - { - priority: nextJob.job.priority, - listenable: nextJob.job.listenable, - ownerId: nextJob.job.data.team_id ?? undefined, - groupId: nextJob.job.data.crawl_id ?? undefined, - }, - )) !== null; - - if (promotedSuccessfully) { - logger.debug("Successfully promoted concurrent queued job", { - teamId: job.data.team_id, - jobId: nextJob.job.id, - zeroDataRetention: nextJob.job.data?.zeroDataRetention, - }); - break; - } else { - logger.warn( - "Was unable to promote concurrent queued job as it already exists in the database", - { - teamId: job.data.team_id, - jobId: nextJob.job.id, - zeroDataRetention: nextJob.job.data?.zeroDataRetention, - }, - ); - } - } else { - break; + await pushConcurrencyLimitActiveJob( + job.data.team_id, + nextJob.job.id, + 60 * 1000, + ); + + if (nextJob.job.data.crawl_id) { + await pushCrawlConcurrencyLimitActiveJob( + nextJob.job.data.crawl_id, + nextJob.job.id, + 60 * 1000, + ); + + const sc = await getCrawl(nextJob.job.data.crawl_id); + if (sc !== null && typeof sc.crawlerOptions?.delay === "number") { + await new Promise(resolve => + setTimeout(resolve, sc.crawlerOptions.delay * 1000), + ); } - } else { + } + + abTestJob(nextJob.job.data); + + const promotedSuccessfully = + (await scrapeQueue.promoteJobFromBacklogOrAdd( + nextJob.job.id, + nextJob.job.data, + { + priority: nextJob.job.priority, + listenable: nextJob.job.listenable, + ownerId: nextJob.job.data.team_id ?? undefined, + groupId: nextJob.job.data.crawl_id ?? undefined, + }, + )) !== null; + + if (promotedSuccessfully) { + logger.debug("Successfully promoted concurrent queued job", { + teamId: job.data.team_id, + jobId: nextJob.job.id, + zeroDataRetention: nextJob.job.data?.zeroDataRetention, + }); break; + } else { + logger.warn( + "Was unable to promote concurrent queued job as it already exists in the database", + { + teamId: job.data.team_id, + jobId: nextJob.job.id, + zeroDataRetention: nextJob.job.data?.zeroDataRetention, + }, + ); + await removeConcurrencyLimitActiveJob(job.data.team_id, nextJob.job.id); + if (nextJob.job.data.crawl_id) { + await removeCrawlConcurrencyLimitActiveJob( + nextJob.job.data.crawl_id, + nextJob.job.id, + ); + } + staleSkipped++; } } - if (i === 10) { + if (staleSkipped >= 100) { logger.warn( - "Failed to promote a concurrent job after 10 iterations, bailing!", + "Skipped 100 stale entries in concurrency queue without a successful promotion", { teamId: job.data.team_id, }, diff --git a/apps/api/src/lib/concurrency-queue-reconciler.ts b/apps/api/src/lib/concurrency-queue-reconciler.ts index b22bf4e1e9..3edc047b96 100644 --- a/apps/api/src/lib/concurrency-queue-reconciler.ts +++ b/apps/api/src/lib/concurrency-queue-reconciler.ts @@ -5,9 +5,12 @@ import { scrapeQueue, type NuQJob } from "../services/worker/nuq"; import { RateLimiterMode, type ScrapeJobData } from "../types"; import { getConcurrencyLimitActiveJobs, + getNextConcurrentJob, pushConcurrencyLimitActiveJob, pushConcurrencyLimitedJob, pushCrawlConcurrencyLimitActiveJob, + removeConcurrencyLimitActiveJob, + removeCrawlConcurrencyLimitActiveJob, } from "./concurrency-limit"; import { getCrawl } from "./crawl-redis"; import { logger as _logger } from "./logger"; @@ -192,6 +195,105 @@ async function reconcileTeam( return { jobsStarted, jobsRequeued }; } +async function drainQueue( + ownerId: string, + teamLogger: Logger, +): Promise<{ jobsPromoted: number; staleSkipped: number }> { + const maxCrawlConcurrency = + (await getACUCTeam(ownerId, false, true, RateLimiterMode.Crawl)) + ?.concurrency ?? 2; + const maxExtractConcurrency = + (await getACUCTeam(ownerId, false, true, RateLimiterMode.Extract)) + ?.concurrency ?? 2; + + const activeIds = await getConcurrencyLimitActiveJobs(ownerId); + const activeJobs = await scrapeQueue.getJobs(activeIds, teamLogger); + let crawlCount = 0; + let extractCount = 0; + for (const aj of activeJobs) { + if (isExtractJob(aj.data)) extractCount++; + else crawlCount++; + } + + let jobsPromoted = 0; + let staleSkipped = 0; + let typeBlocked = 0; + + while (staleSkipped + typeBlocked < 100) { + if ( + crawlCount >= maxCrawlConcurrency && + extractCount >= maxExtractConcurrency + ) + break; + + const nextJob = await getNextConcurrentJob(ownerId); + if (nextJob === null) break; + + const isExtract = isExtractJob(nextJob.job.data); + const typeLimit = isExtract ? maxExtractConcurrency : maxCrawlConcurrency; + const typeCount = isExtract ? extractCount : crawlCount; + + if (typeCount >= typeLimit) { + await pushConcurrencyLimitedJob( + ownerId, + { + id: nextJob.job.id, + data: nextJob.job.data, + priority: nextJob.job.priority, + listenable: nextJob.job.listenable, + }, + nextJob.timeout === Infinity ? 172800000 : nextJob.timeout, + ); + typeBlocked++; + continue; + } + + await pushConcurrencyLimitActiveJob(ownerId, nextJob.job.id, 60 * 1000); + if (nextJob.job.data.crawl_id) { + await pushCrawlConcurrencyLimitActiveJob( + nextJob.job.data.crawl_id, + nextJob.job.id, + 60 * 1000, + ); + } + + const promoted = await scrapeQueue.promoteJobFromBacklogOrAdd( + nextJob.job.id, + nextJob.job.data, + { + priority: nextJob.job.priority, + listenable: nextJob.job.listenable, + ownerId: nextJob.job.data.team_id ?? undefined, + groupId: nextJob.job.data.crawl_id ?? undefined, + }, + ); + + if (promoted !== null) { + if (isExtract) extractCount++; + else crawlCount++; + jobsPromoted++; + } else { + await removeConcurrencyLimitActiveJob(ownerId, nextJob.job.id); + if (nextJob.job.data.crawl_id) { + await removeCrawlConcurrencyLimitActiveJob( + nextJob.job.data.crawl_id, + nextJob.job.id, + ); + } + staleSkipped++; + } + } + + if (staleSkipped >= 100) { + teamLogger.warn( + "Queue drain hit 100 stale entries without fully draining", + { ownerId }, + ); + } + + return { jobsPromoted, staleSkipped }; +} + export async function reconcileConcurrencyQueue( options: ReconcileOptions = {}, ): Promise { @@ -200,11 +302,21 @@ export async function reconcileConcurrencyQueue( scopedTeamId: options.teamId, }); - const owners = options.teamId - ? [options.teamId] - : await scrapeQueue.getBackloggedOwnerIDs(logger); - - const ownerIds = owners.filter((x): x is string => typeof x === "string"); + let ownerIds: string[]; + if (options.teamId) { + ownerIds = [options.teamId]; + } else { + const backlogOwners = ( + await scrapeQueue.getBackloggedOwnerIDs(logger) + ).filter((x): x is string => typeof x === "string"); + const queueKeys = await getRedisConnection().smembers( + "concurrency-limit-queues", + ); + const queueOwners = queueKeys + .map(k => k.replace("concurrency-limit-queue:", "")) + .filter(id => id.length > 0); + ownerIds = [...new Set([...backlogOwners, ...queueOwners])]; + } const result: ReconcileResult = { teamsScanned: ownerIds.length, @@ -223,6 +335,15 @@ export async function reconcileConcurrencyQueue( result.jobsStarted += teamResult.jobsStarted; result.jobsRequeued += teamResult.jobsRequeued; } + + const drainResult = await drainQueue(ownerId, teamLogger); + if (drainResult.jobsPromoted > 0 || drainResult.staleSkipped > 0) { + result.jobsStarted += drainResult.jobsPromoted; + teamLogger.info("Queue drain promoted jobs", { + jobsPromoted: drainResult.jobsPromoted, + staleSkipped: drainResult.staleSkipped, + }); + } } catch (error) { teamLogger.error("Failed to reconcile team, skipping", { error }); } From 9cc10386c7b591e3999f7437a3f8bd303f3cd75e Mon Sep 17 00:00:00 2001 From: tomsideguide Date: Fri, 27 Feb 2026 21:24:29 +0000 Subject: [PATCH 3/4] chore(api): validate owner IDs as UUIDs (#2944) --- apps/api/src/lib/concurrency-queue-reconciler.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/api/src/lib/concurrency-queue-reconciler.ts b/apps/api/src/lib/concurrency-queue-reconciler.ts index 3edc047b96..b6bb041ba4 100644 --- a/apps/api/src/lib/concurrency-queue-reconciler.ts +++ b/apps/api/src/lib/concurrency-queue-reconciler.ts @@ -1,4 +1,5 @@ import { Logger } from "winston"; +import { validate as isUUID } from "uuid"; import { getACUCTeam } from "../controllers/auth"; import { getRedisConnection } from "../services/queue-service"; import { scrapeQueue, type NuQJob } from "../services/worker/nuq"; @@ -314,7 +315,7 @@ export async function reconcileConcurrencyQueue( ); const queueOwners = queueKeys .map(k => k.replace("concurrency-limit-queue:", "")) - .filter(id => id.length > 0); + .filter(id => id.length > 0 && isUUID(id)); ownerIds = [...new Set([...backlogOwners, ...queueOwners])]; } From 3b777bfd47609341f164ef927d487fc7a92b443f Mon Sep 17 00:00:00 2001 From: "firecrawl-spring[bot]" <254786068+firecrawl-spring[bot]@users.noreply.github.com> Date: Fri, 27 Feb 2026 17:00:51 -0500 Subject: [PATCH 4/4] feat(sdk): add Java SDK for Firecrawl v2 API (#2904) Signed-off-by: Gaurav Chadha Co-authored-by: firecrawl-spring[bot] <254786068+firecrawl-spring[bot]@users.noreply.github.com> Co-authored-by: Gaurav Chadha Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Gaurav Chadha <65453826+Chadha93@users.noreply.github.com> Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- .github/workflows/test-java-sdk.yml | 70 ++ apps/java-sdk/.gitignore | 17 + apps/java-sdk/README.md | 434 +++++++++ apps/java-sdk/build.gradle.kts | 67 ++ .../gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 43583 bytes .../gradle/wrapper/gradle-wrapper.properties | 7 + apps/java-sdk/gradlew | 120 +++ apps/java-sdk/settings.gradle.kts | 1 + .../com/firecrawl/client/FirecrawlClient.java | 841 ++++++++++++++++++ .../firecrawl/client/FirecrawlHttpClient.java | 215 +++++ .../errors/AuthenticationException.java | 15 + .../firecrawl/errors/FirecrawlException.java | 42 + .../firecrawl/errors/JobTimeoutException.java | 22 + .../firecrawl/errors/RateLimitException.java | 15 + .../com/firecrawl/models/AgentOptions.java | 80 ++ .../com/firecrawl/models/AgentResponse.java | 23 + .../firecrawl/models/AgentStatusResponse.java | 35 + .../com/firecrawl/models/BatchScrapeJob.java | 39 + .../firecrawl/models/BatchScrapeOptions.java | 78 ++ .../firecrawl/models/BatchScrapeResponse.java | 24 + .../models/BrowserCreateResponse.java | 29 + .../models/BrowserDeleteResponse.java | 25 + .../models/BrowserExecuteResponse.java | 31 + .../firecrawl/models/BrowserListResponse.java | 25 + .../com/firecrawl/models/BrowserSession.java | 31 + .../firecrawl/models/ConcurrencyCheck.java | 21 + .../java/com/firecrawl/models/CrawlJob.java | 40 + .../com/firecrawl/models/CrawlOptions.java | 154 ++++ .../com/firecrawl/models/CrawlResponse.java | 21 + .../com/firecrawl/models/CreditUsage.java | 25 + .../java/com/firecrawl/models/Document.java | 49 + .../java/com/firecrawl/models/JsonFormat.java | 57 ++ .../com/firecrawl/models/LocationConfig.java | 38 + .../java/com/firecrawl/models/MapData.java | 53 ++ .../java/com/firecrawl/models/MapOptions.java | 76 ++ .../com/firecrawl/models/ScrapeOptions.java | 186 ++++ .../java/com/firecrawl/models/SearchData.java | 37 + .../com/firecrawl/models/SearchOptions.java | 82 ++ .../com/firecrawl/models/WebhookConfig.java | 57 ++ .../main/java/com/firecrawl/package-info.java | 23 + .../test/java/com/firecrawl/AgentTest.java | 285 ++++++ .../test/java/com/firecrawl/BrowserTest.java | 310 +++++++ .../test/java/com/firecrawl/CrawlTest.java | 302 +++++++ .../com/firecrawl/FirecrawlClientTest.java | 194 ++++ .../com/firecrawl/FirecrawlLiveSiteTest.java | 131 +++ .../src/test/java/com/firecrawl/MapTest.java | 279 ++++++ .../test/java/com/firecrawl/ScrapeTest.java | 177 ++++ .../test/java/com/firecrawl/SearchTest.java | 337 +++++++ 48 files changed, 5220 insertions(+) create mode 100644 .github/workflows/test-java-sdk.yml create mode 100644 apps/java-sdk/.gitignore create mode 100644 apps/java-sdk/README.md create mode 100644 apps/java-sdk/build.gradle.kts create mode 100644 apps/java-sdk/gradle/wrapper/gradle-wrapper.jar create mode 100644 apps/java-sdk/gradle/wrapper/gradle-wrapper.properties create mode 100755 apps/java-sdk/gradlew create mode 100644 apps/java-sdk/settings.gradle.kts create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlClient.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlHttpClient.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/errors/AuthenticationException.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/errors/FirecrawlException.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/errors/JobTimeoutException.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/errors/RateLimitException.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/AgentOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/AgentResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/AgentStatusResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeJob.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BrowserCreateResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BrowserDeleteResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BrowserExecuteResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BrowserListResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/BrowserSession.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/ConcurrencyCheck.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/CrawlJob.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/CrawlOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/CrawlResponse.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/CreditUsage.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/Document.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/JsonFormat.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/LocationConfig.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/MapData.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/MapOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/ScrapeOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/SearchData.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/SearchOptions.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/models/WebhookConfig.java create mode 100644 apps/java-sdk/src/main/java/com/firecrawl/package-info.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/AgentTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/BrowserTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/CrawlTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/FirecrawlClientTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/FirecrawlLiveSiteTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/MapTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/ScrapeTest.java create mode 100644 apps/java-sdk/src/test/java/com/firecrawl/SearchTest.java diff --git a/.github/workflows/test-java-sdk.yml b/.github/workflows/test-java-sdk.yml new file mode 100644 index 0000000000..c5bef8d6a2 --- /dev/null +++ b/.github/workflows/test-java-sdk.yml @@ -0,0 +1,70 @@ +name: Java SDK Test Suite + +on: + pull_request: + branches: + - main + paths: + - apps/java-sdk/** + - .github/workflows/test-java-sdk.yml + push: + branches: + - main + paths: + - apps/java-sdk/** + workflow_dispatch: + +jobs: + build-and-test: + name: Build and Test + runs-on: blacksmith-4vcpu-ubuntu-2404 + if: >- + github.event_name == 'workflow_dispatch' || + github.event_name == 'pull_request' || + (github.event_name == 'push' && github.ref == 'refs/heads/main') + steps: + - uses: actions/checkout@v4 + + - name: Set up JDK 11 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: "11" + + - name: Cache Gradle packages + uses: actions/cache@v4 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('apps/java-sdk/**/*.gradle.kts', 'apps/java-sdk/gradle/wrapper/gradle-wrapper.properties') }} + restore-keys: | + ${{ runner.os }}-gradle- + + - name: Grant execute permission for gradlew + working-directory: ./apps/java-sdk + run: chmod +x gradlew + + - name: Build + working-directory: ./apps/java-sdk + run: ./gradlew build -x test + + - name: Run unit tests + working-directory: ./apps/java-sdk + run: ./gradlew test + + - name: Run E2E tests + if: env.FIRECRAWL_API_KEY != '' + working-directory: ./apps/java-sdk + env: + FIRECRAWL_API_KEY: ${{ secrets.FIRECRAWL_API_KEY }} + run: ./gradlew test + + - name: Publish test report + if: always() && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false) + uses: dorny/test-reporter@v1 + with: + name: Java SDK Test Report + path: apps/java-sdk/build/test-results/test/*.xml + reporter: java-junit + fail-on-error: true diff --git a/apps/java-sdk/.gitignore b/apps/java-sdk/.gitignore new file mode 100644 index 0000000000..8cc913d551 --- /dev/null +++ b/apps/java-sdk/.gitignore @@ -0,0 +1,17 @@ +.gradle/ +build/ +*.class +*.jar +!gradle/wrapper/gradle-wrapper.jar +*.war +*.ear +*.iml +.idea/ +*.ipr +*.iws +out/ +.settings/ +.classpath +.project +bin/ +local.properties diff --git a/apps/java-sdk/README.md b/apps/java-sdk/README.md new file mode 100644 index 0000000000..c205c738e6 --- /dev/null +++ b/apps/java-sdk/README.md @@ -0,0 +1,434 @@ +# Firecrawl Java SDK + +Java SDK for the [Firecrawl](https://firecrawl.dev) v2 web scraping API. + +## Prerequisites + +Before using the Java SDK, ensure you have the following installed: + +### Java Development Kit (JDK) + +- **Required:** Java 11 or later +- **Installation (macOS):** + ```bash + brew install openjdk + ``` + + Then add Java to your PATH: + ```bash + echo 'export PATH="/opt/homebrew/opt/openjdk/bin:$PATH"' >> ~/.zshrc + source ~/.zshrc + ``` + +- **Installation (Linux):** + ```bash + # Ubuntu/Debian + sudo apt-get update + sudo apt-get install openjdk-11-jdk + + # Fedora/RHEL + sudo dnf install java-11-openjdk-devel + ``` + +- **Verify Installation:** + ```bash + java --version + ``` + +### Gradle (for building from source) + +- **Required:** Gradle 8+ +- **Installation (macOS):** + ```bash + brew install gradle + ``` + +- **Installation (Linux):** + ```bash + # Ubuntu/Debian + sudo apt-get install gradle + + # Or use SDKMAN + curl -s "https://get.sdkman.io" | bash + sdk install gradle + ``` + +- **Verify Installation:** + ```bash + gradle --version + ``` + +### API Key Setup + +1. Get your API key from [Firecrawl Dashboard](https://firecrawl.dev) +2. Set it as an environment variable: + ```bash + export FIRECRAWL_API_KEY="fc-your-api-key-here" + ``` + +3. **Or** add it to your shell profile for persistence: + ```bash + # For Zsh (macOS/Linux) + echo 'export FIRECRAWL_API_KEY="fc-your-api-key-here"' >> ~/.zshrc + source ~/.zshrc + + # For Bash + echo 'export FIRECRAWL_API_KEY="fc-your-api-key-here"' >> ~/.bashrc + source ~/.bashrc + ``` + +## Installation + +### Gradle (Kotlin DSL) + +```kotlin +implementation("com.firecrawl:firecrawl-java:1.0.0") +``` + +### Gradle (Groovy) + +```groovy +implementation 'com.firecrawl:firecrawl-java:1.0.0' +``` + +### Maven + +```xml + + com.firecrawl + firecrawl-java + 1.0.0 + +``` + +## Quick Start + +```java +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.*; +import java.util.List; + +// Create client with explicit API key +FirecrawlClient client = FirecrawlClient.builder() + .apiKey("fc-your-api-key") + .build(); + +// Scrape a page +Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + +System.out.println(doc.getMarkdown()); +``` + +Or create a client from the environment variable: + +```java +// export FIRECRAWL_API_KEY=fc-your-api-key +FirecrawlClient client = FirecrawlClient.fromEnv(); +``` + +## API Reference + +### Scrape + +Scrape a single URL and get the content in various formats. + +```java +Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown", "html")) + .onlyMainContent(true) + .waitFor(5000) + .build()); + +System.out.println(doc.getMarkdown()); +System.out.println(doc.getMetadata().get("title")); +``` + +#### JSON Extraction + +```java +import com.firecrawl.models.JsonFormat; + +JsonFormat jsonFmt = JsonFormat.builder() + .prompt("Extract the product name and price") + .schema(Map.of( + "type", "object", + "properties", Map.of( + "name", Map.of("type", "string"), + "price", Map.of("type", "number") + ) + )) + .build(); + +Document doc = client.scrape("https://example.com/product", + ScrapeOptions.builder() + .formats(List.of(jsonFmt)) + .build()); + +System.out.println(doc.getJson()); +``` + +### Crawl + +Crawl an entire website. The `crawl()` method polls until completion. + +```java +// Convenience method — polls until done +CrawlJob job = client.crawl("https://example.com", + CrawlOptions.builder() + .limit(50) + .maxDiscoveryDepth(3) + .scrapeOptions(ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()) + .build()); + +for (Document doc : job.getData()) { + System.out.println(doc.getMetadata().get("sourceURL")); +} +``` + +#### Async Crawl (manual polling) + +```java +CrawlResponse start = client.startCrawl("https://example.com", + CrawlOptions.builder().limit(100).build()); + +System.out.println("Job started: " + start.getId()); + +// Poll manually +CrawlJob status; +do { + try { Thread.sleep(2000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; } + status = client.getCrawlStatus(start.getId()); + System.out.println(status.getCompleted() + "/" + status.getTotal()); +} while (!status.isDone()); +``` + +### Batch Scrape + +Scrape multiple URLs in parallel. + +```java +BatchScrapeJob job = client.batchScrape( + List.of("https://example.com", "https://example.org"), + BatchScrapeOptions.builder() + .options(ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()) + .build()); + +for (Document doc : job.getData()) { + System.out.println(doc.getMarkdown()); +} +``` + +### Map + +Discover all URLs on a website. + +```java +MapData data = client.map("https://example.com", + MapOptions.builder() + .limit(100) + .search("blog") + .build()); + +for (Map link : data.getLinks()) { + System.out.println(link.get("url") + " - " + link.get("title")); +} +``` + +### Search + +Search the web and optionally scrape results. + +```java +SearchData results = client.search("firecrawl web scraping", + SearchOptions.builder() + .limit(10) + .build()); + +if (results.getWeb() != null) { + for (Map result : results.getWeb()) { + System.out.println(result.get("title") + " — " + result.get("url")); + } +} +``` + +### Agent + +Run an AI-powered agent to research and extract data from the web. + +```java +AgentStatusResponse result = client.agent( + AgentOptions.builder() + .prompt("Find the pricing plans for Firecrawl and compare them") + .build()); + +System.out.println(result.getData()); +``` + +### Usage & Metrics + +```java +ConcurrencyCheck conc = client.getConcurrency(); +System.out.println("Concurrency: " + conc.getConcurrency() + "/" + conc.getMaxConcurrency()); + +CreditUsage credits = client.getCreditUsage(); +System.out.println("Remaining credits: " + credits.getRemainingCredits()); +``` + +## Async Support + +All methods have async variants that return `CompletableFuture`: + +```java +import java.util.concurrent.CompletableFuture; + +CompletableFuture future = client.scrapeAsync( + "https://example.com", + ScrapeOptions.builder().formats(List.of("markdown")).build()); + +future.thenAccept(doc -> System.out.println(doc.getMarkdown())); +``` + +## Error Handling + +The SDK throws unchecked exceptions: + +```java +import com.firecrawl.errors.*; + +try { + Document doc = client.scrape("https://example.com"); +} catch (AuthenticationException e) { + // 401 — invalid API key + System.err.println("Auth failed: " + e.getMessage()); +} catch (RateLimitException e) { + // 429 — too many requests + System.err.println("Rate limited: " + e.getMessage()); +} catch (JobTimeoutException e) { + // Async job timed out + System.err.println("Job " + e.getJobId() + " timed out after " + e.getTimeoutSeconds() + "s"); +} catch (FirecrawlException e) { + // All other API errors + System.err.println("Error " + e.getStatusCode() + ": " + e.getMessage()); +} +``` + +## Configuration + +```java +FirecrawlClient client = FirecrawlClient.builder() + .apiKey("fc-your-api-key") // Required (or set FIRECRAWL_API_KEY env var) + .apiUrl("https://api.firecrawl.dev") // Optional (or set FIRECRAWL_API_URL env var) + .timeoutMs(300_000) // HTTP timeout: 5 min default + .maxRetries(3) // Auto-retries for transient failures + .backoffFactor(0.5) // Exponential backoff factor (seconds) + .asyncExecutor(myExecutor) // Custom executor for async methods + .build(); +``` + +## Building from Source + +### Clone and Build + +```bash +# Clone the repository (if you haven't already) +git clone https://github.com/mendableai/firecrawl.git +cd firecrawl/apps/java-sdk + +# Build the project +gradle build +``` + +### Generate JAR + +```bash +gradle jar +# Output: build/libs/firecrawl-java-1.0.0.jar +``` + +### Install Locally + +```bash +gradle publishToMavenLocal +# Now available as: com.firecrawl:firecrawl-java:1.0.0 in local Maven repository +``` + +## Running Tests + +The SDK includes both unit tests and E2E integration tests. + +### Unit Tests (No API Key Required) + +Unit tests verify SDK functionality without making actual API calls: + +```bash +gradle test +``` + +### E2E Integration Tests (API Key Required) + +E2E tests make real API calls and require a valid API key. These tests will be **skipped** if `FIRECRAWL_API_KEY` is not set: + +```bash +# Set your API key +export FIRECRAWL_API_KEY="fc-your-api-key-here" + +# Run all tests including E2E +gradle test +``` + +### Run Specific Tests + +```bash +# Run only scrape tests +gradle test --tests "*testScrape*" + +# Run only E2E tests +gradle test --tests "*E2E" + +# Run specific test class +gradle test --tests "com.firecrawl.FirecrawlClientTest" +``` + +### View Test Results + +After running tests, view the detailed report: + +```bash +open build/reports/tests/test/index.html # macOS +xdg-open build/reports/tests/test/index.html # Linux +``` + +## Development Setup + +If you're contributing to the SDK or testing local changes: + +1. **Install Prerequisites** (see Prerequisites section above) + +2. **Set Environment Variables:** + ```bash + export FIRECRAWL_API_KEY="fc-your-api-key" + # Optional: use local API server + export FIRECRAWL_API_URL="http://localhost:3002" + ``` + +3. **Build and Test:** + ```bash + gradle clean build test + ``` + +4. **Make Changes and Retest:** + ```bash + # Quick compilation check + gradle compileJava + + # Run tests + gradle test --tests "*testYourFeature*" + ``` diff --git a/apps/java-sdk/build.gradle.kts b/apps/java-sdk/build.gradle.kts new file mode 100644 index 0000000000..3ec9671476 --- /dev/null +++ b/apps/java-sdk/build.gradle.kts @@ -0,0 +1,67 @@ +plugins { + `java-library` + `maven-publish` +} + +group = "com.firecrawl" +version = "1.0.0" + +java { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 + withSourcesJar() + withJavadocJar() +} + +repositories { + mavenCentral() +} + +dependencies { + api("com.squareup.okhttp3:okhttp:4.12.0") + api("com.fasterxml.jackson.core:jackson-databind:2.17.2") + api("com.fasterxml.jackson.core:jackson-annotations:2.17.2") + api("com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.17.2") + + testImplementation("org.junit.jupiter:junit-jupiter:5.10.3") + testRuntimeOnly("org.junit.platform:junit-platform-launcher:1.10.3") +} + +tasks.test { + useJUnitPlatform() +} + +tasks.withType { + options { + (this as StandardJavadocDocletOptions).apply { + addStringOption("Xdoclint:none", "-quiet") + } + } +} + +publishing { + publications { + create("mavenJava") { + from(components["java"]) + + pom { + name.set("Firecrawl Java SDK") + description.set("Java SDK for the Firecrawl web scraping API") + url.set("https://github.com/mendableai/firecrawl") + + licenses { + license { + name.set("MIT License") + url.set("https://opensource.org/licenses/MIT") + } + } + + scm { + url.set("https://github.com/mendableai/firecrawl") + connection.set("scm:git:git://github.com/mendableai/firecrawl.git") + developerConnection.set("scm:git:ssh://github.com/mendableai/firecrawl.git") + } + } + } + } +} diff --git a/apps/java-sdk/gradle/wrapper/gradle-wrapper.jar b/apps/java-sdk/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..a4b76b9530d66f5e68d973ea569d8e19de379189 GIT binary patch literal 43583 zcma&N1CXTcmMvW9vTb(Rwr$&4wr$(C?dmSu>@vG-+vuvg^_??!{yS%8zW-#zn-LkA z5&1^$^{lnmUON?}LBF8_K|(?T0Ra(xUH{($5eN!MR#ZihR#HxkUPe+_R8Cn`RRs(P z_^*#_XlXmGv7!4;*Y%p4nw?{bNp@UZHv1?Um8r6)Fei3p@ClJn0ECfg1hkeuUU@Or zDaPa;U3fE=3L}DooL;8f;P0ipPt0Z~9P0)lbStMS)ag54=uL9ia-Lm3nh|@(Y?B`; zx_#arJIpXH!U{fbCbI^17}6Ri*H<>OLR%c|^mh8+)*h~K8Z!9)DPf zR2h?lbDZQ`p9P;&DQ4F0sur@TMa!Y}S8irn(%d-gi0*WxxCSk*A?3lGh=gcYN?FGl z7D=Js!i~0=u3rox^eO3i@$0=n{K1lPNU zwmfjRVmLOCRfe=seV&P*1Iq=^i`502keY8Uy-WNPwVNNtJFx?IwAyRPZo2Wo1+S(xF37LJZ~%i)kpFQ3Fw=mXfd@>%+)RpYQLnr}B~~zoof(JVm^^&f zxKV^+3D3$A1G;qh4gPVjhrC8e(VYUHv#dy^)(RoUFM?o%W-EHxufuWf(l*@-l+7vt z=l`qmR56K~F|v<^Pd*p~1_y^P0P^aPC##d8+HqX4IR1gu+7w#~TBFphJxF)T$2WEa zxa?H&6=Qe7d(#tha?_1uQys2KtHQ{)Qco)qwGjrdNL7thd^G5i8Os)CHqc>iOidS} z%nFEDdm=GXBw=yXe1W-ShHHFb?Cc70+$W~z_+}nAoHFYI1MV1wZegw*0y^tC*s%3h zhD3tN8b=Gv&rj}!SUM6|ajSPp*58KR7MPpI{oAJCtY~JECm)*m_x>AZEu>DFgUcby z1Qaw8lU4jZpQ_$;*7RME+gq1KySGG#Wql>aL~k9tLrSO()LWn*q&YxHEuzmwd1?aAtI zBJ>P=&$=l1efe1CDU;`Fd+_;&wI07?V0aAIgc(!{a z0Jg6Y=inXc3^n!U0Atk`iCFIQooHqcWhO(qrieUOW8X(x?(RD}iYDLMjSwffH2~tB z)oDgNBLB^AJBM1M^c5HdRx6fBfka`(LD-qrlh5jqH~);#nw|iyp)()xVYak3;Ybik z0j`(+69aK*B>)e_p%=wu8XC&9e{AO4c~O1U`5X9}?0mrd*m$_EUek{R?DNSh(=br# z#Q61gBzEpmy`$pA*6!87 zSDD+=@fTY7<4A?GLqpA?Pb2z$pbCc4B4zL{BeZ?F-8`s$?>*lXXtn*NC61>|*w7J* z$?!iB{6R-0=KFmyp1nnEmLsA-H0a6l+1uaH^g%c(p{iT&YFrbQ$&PRb8Up#X3@Zsk zD^^&LK~111%cqlP%!_gFNa^dTYT?rhkGl}5=fL{a`UViaXWI$k-UcHJwmaH1s=S$4 z%4)PdWJX;hh5UoK?6aWoyLxX&NhNRqKam7tcOkLh{%j3K^4Mgx1@i|Pi&}<^5>hs5 zm8?uOS>%)NzT(%PjVPGa?X%`N2TQCKbeH2l;cTnHiHppPSJ<7y-yEIiC!P*ikl&!B z%+?>VttCOQM@ShFguHVjxX^?mHX^hSaO_;pnyh^v9EumqSZTi+#f&_Vaija0Q-e*| z7ulQj6Fs*bbmsWp{`auM04gGwsYYdNNZcg|ph0OgD>7O}Asn7^Z=eI>`$2*v78;sj-}oMoEj&@)9+ycEOo92xSyY344^ z11Hb8^kdOvbf^GNAK++bYioknrpdN>+u8R?JxG=!2Kd9r=YWCOJYXYuM0cOq^FhEd zBg2puKy__7VT3-r*dG4c62Wgxi52EMCQ`bKgf*#*ou(D4-ZN$+mg&7$u!! z-^+Z%;-3IDwqZ|K=ah85OLwkO zKxNBh+4QHh)u9D?MFtpbl)us}9+V!D%w9jfAMYEb>%$A;u)rrI zuBudh;5PN}_6J_}l55P3l_)&RMlH{m!)ai-i$g)&*M`eN$XQMw{v^r@-125^RRCF0 z^2>|DxhQw(mtNEI2Kj(;KblC7x=JlK$@78`O~>V!`|1Lm-^JR$-5pUANAnb(5}B}JGjBsliK4& zk6y(;$e&h)lh2)L=bvZKbvh@>vLlreBdH8No2>$#%_Wp1U0N7Ank!6$dFSi#xzh|( zRi{Uw%-4W!{IXZ)fWx@XX6;&(m_F%c6~X8hx=BN1&q}*( zoaNjWabE{oUPb!Bt$eyd#$5j9rItB-h*5JiNi(v^e|XKAj*8(k<5-2$&ZBR5fF|JA z9&m4fbzNQnAU}r8ab>fFV%J0z5awe#UZ|bz?Ur)U9bCIKWEzi2%A+5CLqh?}K4JHi z4vtM;+uPsVz{Lfr;78W78gC;z*yTch~4YkLr&m-7%-xc ztw6Mh2d>_iO*$Rd8(-Cr1_V8EO1f*^@wRoSozS) zy1UoC@pruAaC8Z_7~_w4Q6n*&B0AjOmMWa;sIav&gu z|J5&|{=a@vR!~k-OjKEgPFCzcJ>#A1uL&7xTDn;{XBdeM}V=l3B8fE1--DHjSaxoSjNKEM9|U9#m2<3>n{Iuo`r3UZp;>GkT2YBNAh|b z^jTq-hJp(ebZh#Lk8hVBP%qXwv-@vbvoREX$TqRGTgEi$%_F9tZES@z8Bx}$#5eeG zk^UsLBH{bc2VBW)*EdS({yw=?qmevwi?BL6*=12k9zM5gJv1>y#ML4!)iiPzVaH9% zgSImetD@dam~e>{LvVh!phhzpW+iFvWpGT#CVE5TQ40n%F|p(sP5mXxna+Ev7PDwA zamaV4m*^~*xV+&p;W749xhb_X=$|LD;FHuB&JL5?*Y2-oIT(wYY2;73<^#46S~Gx| z^cez%V7x$81}UWqS13Gz80379Rj;6~WdiXWOSsdmzY39L;Hg3MH43o*y8ibNBBH`(av4|u;YPq%{R;IuYow<+GEsf@R?=@tT@!}?#>zIIn0CoyV!hq3mw zHj>OOjfJM3F{RG#6ujzo?y32m^tgSXf@v=J$ELdJ+=5j|=F-~hP$G&}tDZsZE?5rX ztGj`!S>)CFmdkccxM9eGIcGnS2AfK#gXwj%esuIBNJQP1WV~b~+D7PJTmWGTSDrR` zEAu4B8l>NPuhsk5a`rReSya2nfV1EK01+G!x8aBdTs3Io$u5!6n6KX%uv@DxAp3F@{4UYg4SWJtQ-W~0MDb|j-$lwVn znAm*Pl!?Ps&3wO=R115RWKb*JKoexo*)uhhHBncEDMSVa_PyA>k{Zm2(wMQ(5NM3# z)jkza|GoWEQo4^s*wE(gHz?Xsg4`}HUAcs42cM1-qq_=+=!Gk^y710j=66(cSWqUe zklbm8+zB_syQv5A2rj!Vbw8;|$@C!vfNmNV!yJIWDQ>{+2x zKjuFX`~~HKG~^6h5FntRpnnHt=D&rq0>IJ9#F0eM)Y-)GpRjiN7gkA8wvnG#K=q{q z9dBn8_~wm4J<3J_vl|9H{7q6u2A!cW{bp#r*-f{gOV^e=8S{nc1DxMHFwuM$;aVI^ zz6A*}m8N-&x8;aunp1w7_vtB*pa+OYBw=TMc6QK=mbA-|Cf* zvyh8D4LRJImooUaSb7t*fVfih<97Gf@VE0|z>NcBwBQze);Rh!k3K_sfunToZY;f2 z^HmC4KjHRVg+eKYj;PRN^|E0>Gj_zagfRbrki68I^#~6-HaHg3BUW%+clM1xQEdPYt_g<2K+z!$>*$9nQ>; zf9Bei{?zY^-e{q_*|W#2rJG`2fy@{%6u0i_VEWTq$*(ZN37|8lFFFt)nCG({r!q#9 z5VK_kkSJ3?zOH)OezMT{!YkCuSSn!K#-Rhl$uUM(bq*jY? zi1xbMVthJ`E>d>(f3)~fozjg^@eheMF6<)I`oeJYx4*+M&%c9VArn(OM-wp%M<-`x z7sLP1&3^%Nld9Dhm@$3f2}87!quhI@nwd@3~fZl_3LYW-B?Ia>ui`ELg z&Qfe!7m6ze=mZ`Ia9$z|ARSw|IdMpooY4YiPN8K z4B(ts3p%2i(Td=tgEHX z0UQ_>URBtG+-?0E;E7Ld^dyZ;jjw0}XZ(}-QzC6+NN=40oDb2^v!L1g9xRvE#@IBR zO!b-2N7wVfLV;mhEaXQ9XAU+>=XVA6f&T4Z-@AX!leJ8obP^P^wP0aICND?~w&NykJ#54x3_@r7IDMdRNy4Hh;h*!u(Ol(#0bJdwEo$5437-UBjQ+j=Ic>Q2z` zJNDf0yO6@mr6y1#n3)s(W|$iE_i8r@Gd@!DWDqZ7J&~gAm1#~maIGJ1sls^gxL9LLG_NhU!pTGty!TbhzQnu)I*S^54U6Yu%ZeCg`R>Q zhBv$n5j0v%O_j{QYWG!R9W?5_b&67KB$t}&e2LdMvd(PxN6Ir!H4>PNlerpBL>Zvyy!yw z-SOo8caEpDt(}|gKPBd$qND5#a5nju^O>V&;f890?yEOfkSG^HQVmEbM3Ugzu+UtH zC(INPDdraBN?P%kE;*Ae%Wto&sgw(crfZ#Qy(<4nk;S|hD3j{IQRI6Yq|f^basLY; z-HB&Je%Gg}Jt@={_C{L$!RM;$$|iD6vu#3w?v?*;&()uB|I-XqEKqZPS!reW9JkLewLb!70T7n`i!gNtb1%vN- zySZj{8-1>6E%H&=V}LM#xmt`J3XQoaD|@XygXjdZ1+P77-=;=eYpoEQ01B@L*a(uW zrZeZz?HJsw_4g0vhUgkg@VF8<-X$B8pOqCuWAl28uB|@r`19DTUQQsb^pfqB6QtiT z*`_UZ`fT}vtUY#%sq2{rchyfu*pCg;uec2$-$N_xgjZcoumE5vSI{+s@iLWoz^Mf; zuI8kDP{!XY6OP~q5}%1&L}CtfH^N<3o4L@J@zg1-mt{9L`s^z$Vgb|mr{@WiwAqKg zp#t-lhrU>F8o0s1q_9y`gQNf~Vb!F%70f}$>i7o4ho$`uciNf=xgJ>&!gSt0g;M>*x4-`U)ysFW&Vs^Vk6m%?iuWU+o&m(2Jm26Y(3%TL; zA7T)BP{WS!&xmxNw%J=$MPfn(9*^*TV;$JwRy8Zl*yUZi8jWYF>==j~&S|Xinsb%c z2?B+kpet*muEW7@AzjBA^wAJBY8i|#C{WtO_or&Nj2{=6JTTX05}|H>N2B|Wf!*3_ z7hW*j6p3TvpghEc6-wufFiY!%-GvOx*bZrhZu+7?iSrZL5q9}igiF^*R3%DE4aCHZ zqu>xS8LkW+Auv%z-<1Xs92u23R$nk@Pk}MU5!gT|c7vGlEA%G^2th&Q*zfg%-D^=f z&J_}jskj|Q;73NP4<4k*Y%pXPU2Thoqr+5uH1yEYM|VtBPW6lXaetokD0u z9qVek6Q&wk)tFbQ8(^HGf3Wp16gKmr>G;#G(HRBx?F`9AIRboK+;OfHaLJ(P>IP0w zyTbTkx_THEOs%Q&aPrxbZrJlio+hCC_HK<4%f3ZoSAyG7Dn`=X=&h@m*|UYO-4Hq0 z-Bq&+Ie!S##4A6OGoC~>ZW`Y5J)*ouaFl_e9GA*VSL!O_@xGiBw!AF}1{tB)z(w%c zS1Hmrb9OC8>0a_$BzeiN?rkPLc9%&;1CZW*4}CDDNr2gcl_3z+WC15&H1Zc2{o~i) z)LLW=WQ{?ricmC`G1GfJ0Yp4Dy~Ba;j6ZV4r{8xRs`13{dD!xXmr^Aga|C=iSmor% z8hi|pTXH)5Yf&v~exp3o+sY4B^^b*eYkkCYl*T{*=-0HniSA_1F53eCb{x~1k3*`W zr~};p1A`k{1DV9=UPnLDgz{aJH=-LQo<5%+Em!DNN252xwIf*wF_zS^!(XSm(9eoj z=*dXG&n0>)_)N5oc6v!>-bd(2ragD8O=M|wGW z!xJQS<)u70m&6OmrF0WSsr@I%T*c#Qo#Ha4d3COcX+9}hM5!7JIGF>7<~C(Ear^Sn zm^ZFkV6~Ula6+8S?oOROOA6$C&q&dp`>oR-2Ym3(HT@O7Sd5c~+kjrmM)YmgPH*tL zX+znN>`tv;5eOfX?h{AuX^LK~V#gPCu=)Tigtq9&?7Xh$qN|%A$?V*v=&-2F$zTUv z`C#WyIrChS5|Kgm_GeudCFf;)!WH7FI60j^0o#65o6`w*S7R@)88n$1nrgU(oU0M9 zx+EuMkC>(4j1;m6NoGqEkpJYJ?vc|B zOlwT3t&UgL!pX_P*6g36`ZXQ; z9~Cv}ANFnJGp(;ZhS(@FT;3e)0)Kp;h^x;$*xZn*k0U6-&FwI=uOGaODdrsp-!K$Ac32^c{+FhI-HkYd5v=`PGsg%6I`4d9Jy)uW0y%) zm&j^9WBAp*P8#kGJUhB!L?a%h$hJgQrx!6KCB_TRo%9{t0J7KW8!o1B!NC)VGLM5! zpZy5Jc{`r{1e(jd%jsG7k%I+m#CGS*BPA65ZVW~fLYw0dA-H_}O zrkGFL&P1PG9p2(%QiEWm6x;U-U&I#;Em$nx-_I^wtgw3xUPVVu zqSuKnx&dIT-XT+T10p;yjo1Y)z(x1fb8Dzfn8e yu?e%!_ptzGB|8GrCfu%p?(_ zQccdaaVK$5bz;*rnyK{_SQYM>;aES6Qs^lj9lEs6_J+%nIiuQC*fN;z8md>r_~Mfl zU%p5Dt_YT>gQqfr@`cR!$NWr~+`CZb%dn;WtzrAOI>P_JtsB76PYe*<%H(y>qx-`Kq!X_; z<{RpAqYhE=L1r*M)gNF3B8r(<%8mo*SR2hu zccLRZwGARt)Hlo1euqTyM>^!HK*!Q2P;4UYrysje@;(<|$&%vQekbn|0Ruu_Io(w4#%p6ld2Yp7tlA`Y$cciThP zKzNGIMPXX%&Ud0uQh!uQZz|FB`4KGD?3!ND?wQt6!n*f4EmCoJUh&b?;B{|lxs#F- z31~HQ`SF4x$&v00@(P+j1pAaj5!s`)b2RDBp*PB=2IB>oBF!*6vwr7Dp%zpAx*dPr zb@Zjq^XjN?O4QcZ*O+8>)|HlrR>oD*?WQl5ri3R#2?*W6iJ>>kH%KnnME&TT@ZzrHS$Q%LC?n|e>V+D+8D zYc4)QddFz7I8#}y#Wj6>4P%34dZH~OUDb?uP%-E zwjXM(?Sg~1!|wI(RVuxbu)-rH+O=igSho_pDCw(c6b=P zKk4ATlB?bj9+HHlh<_!&z0rx13K3ZrAR8W)!@Y}o`?a*JJsD+twZIv`W)@Y?Amu_u zz``@-e2X}27$i(2=9rvIu5uTUOVhzwu%mNazS|lZb&PT;XE2|B&W1>=B58#*!~D&) zfVmJGg8UdP*fx(>Cj^?yS^zH#o-$Q-*$SnK(ZVFkw+er=>N^7!)FtP3y~Xxnu^nzY zikgB>Nj0%;WOltWIob|}%lo?_C7<``a5hEkx&1ku$|)i>Rh6@3h*`slY=9U}(Ql_< zaNG*J8vb&@zpdhAvv`?{=zDedJ23TD&Zg__snRAH4eh~^oawdYi6A3w8<Ozh@Kw)#bdktM^GVb zrG08?0bG?|NG+w^&JvD*7LAbjED{_Zkc`3H!My>0u5Q}m!+6VokMLXxl`Mkd=g&Xx z-a>m*#G3SLlhbKB!)tnzfWOBV;u;ftU}S!NdD5+YtOjLg?X}dl>7m^gOpihrf1;PY zvll&>dIuUGs{Qnd- zwIR3oIrct8Va^Tm0t#(bJD7c$Z7DO9*7NnRZorrSm`b`cxz>OIC;jSE3DO8`hX955ui`s%||YQtt2 z5DNA&pG-V+4oI2s*x^>-$6J?p=I>C|9wZF8z;VjR??Icg?1w2v5Me+FgAeGGa8(3S z4vg*$>zC-WIVZtJ7}o9{D-7d>zCe|z#<9>CFve-OPAYsneTb^JH!Enaza#j}^mXy1 z+ULn^10+rWLF6j2>Ya@@Kq?26>AqK{A_| zQKb*~F1>sE*=d?A?W7N2j?L09_7n+HGi{VY;MoTGr_)G9)ot$p!-UY5zZ2Xtbm=t z@dpPSGwgH=QtIcEulQNI>S-#ifbnO5EWkI;$A|pxJd885oM+ zGZ0_0gDvG8q2xebj+fbCHYfAXuZStH2j~|d^sBAzo46(K8n59+T6rzBwK)^rfPT+B zyIFw)9YC-V^rhtK`!3jrhmW-sTmM+tPH+;nwjL#-SjQPUZ53L@A>y*rt(#M(qsiB2 zx6B)dI}6Wlsw%bJ8h|(lhkJVogQZA&n{?Vgs6gNSXzuZpEyu*xySy8ro07QZ7Vk1!3tJphN_5V7qOiyK8p z#@jcDD8nmtYi1^l8ml;AF<#IPK?!pqf9D4moYk>d99Im}Jtwj6c#+A;f)CQ*f-hZ< z=p_T86jog%!p)D&5g9taSwYi&eP z#JuEK%+NULWus;0w32-SYFku#i}d~+{Pkho&^{;RxzP&0!RCm3-9K6`>KZpnzS6?L z^H^V*s!8<>x8bomvD%rh>Zp3>Db%kyin;qtl+jAv8Oo~1g~mqGAC&Qi_wy|xEt2iz zWAJEfTV%cl2Cs<1L&DLRVVH05EDq`pH7Oh7sR`NNkL%wi}8n>IXcO40hp+J+sC!W?!krJf!GJNE8uj zg-y~Ns-<~D?yqbzVRB}G>0A^f0!^N7l=$m0OdZuqAOQqLc zX?AEGr1Ht+inZ-Qiwnl@Z0qukd__a!C*CKuGdy5#nD7VUBM^6OCpxCa2A(X;e0&V4 zM&WR8+wErQ7UIc6LY~Q9x%Sn*Tn>>P`^t&idaOEnOd(Ufw#>NoR^1QdhJ8s`h^|R_ zXX`c5*O~Xdvh%q;7L!_!ohf$NfEBmCde|#uVZvEo>OfEq%+Ns7&_f$OR9xsihRpBb z+cjk8LyDm@U{YN>+r46?nn{7Gh(;WhFw6GAxtcKD+YWV?uge>;+q#Xx4!GpRkVZYu zzsF}1)7$?%s9g9CH=Zs+B%M_)+~*j3L0&Q9u7!|+T`^O{xE6qvAP?XWv9_MrZKdo& z%IyU)$Q95AB4!#hT!_dA>4e@zjOBD*Y=XjtMm)V|+IXzjuM;(l+8aA5#Kaz_$rR6! zj>#&^DidYD$nUY(D$mH`9eb|dtV0b{S>H6FBfq>t5`;OxA4Nn{J(+XihF(stSche7$es&~N$epi&PDM_N`As;*9D^L==2Q7Z2zD+CiU(|+-kL*VG+&9!Yb3LgPy?A zm7Z&^qRG_JIxK7-FBzZI3Q<;{`DIxtc48k> zc|0dmX;Z=W$+)qE)~`yn6MdoJ4co;%!`ddy+FV538Y)j(vg}5*k(WK)KWZ3WaOG!8 z!syGn=s{H$odtpqFrT#JGM*utN7B((abXnpDM6w56nhw}OY}0TiTG1#f*VFZr+^-g zbP10`$LPq_;PvrA1XXlyx2uM^mrjTzX}w{yuLo-cOClE8MMk47T25G8M!9Z5ypOSV zAJUBGEg5L2fY)ZGJb^E34R2zJ?}Vf>{~gB!8=5Z) z9y$>5c)=;o0HeHHSuE4U)#vG&KF|I%-cF6f$~pdYJWk_dD}iOA>iA$O$+4%@>JU08 zS`ep)$XLPJ+n0_i@PkF#ri6T8?ZeAot$6JIYHm&P6EB=BiaNY|aA$W0I+nz*zkz_z zkEru!tj!QUffq%)8y0y`T&`fuus-1p>=^hnBiBqD^hXrPs`PY9tU3m0np~rISY09> z`P3s=-kt_cYcxWd{de@}TwSqg*xVhp;E9zCsnXo6z z?f&Sv^U7n4`xr=mXle94HzOdN!2kB~4=%)u&N!+2;z6UYKUDqi-s6AZ!haB;@&B`? z_TRX0%@suz^TRdCb?!vNJYPY8L_}&07uySH9%W^Tc&1pia6y1q#?*Drf}GjGbPjBS zbOPcUY#*$3sL2x4v_i*Y=N7E$mR}J%|GUI(>WEr+28+V z%v5{#e!UF*6~G&%;l*q*$V?&r$Pp^sE^i-0$+RH3ERUUdQ0>rAq2(2QAbG}$y{de( z>{qD~GGuOk559Y@%$?N^1ApVL_a704>8OD%8Y%8B;FCt%AoPu8*D1 zLB5X>b}Syz81pn;xnB}%0FnwazlWfUV)Z-~rZg6~b z6!9J$EcE&sEbzcy?CI~=boWA&eeIa%z(7SE^qgVLz??1Vbc1*aRvc%Mri)AJaAG!p z$X!_9Ds;Zz)f+;%s&dRcJt2==P{^j3bf0M=nJd&xwUGlUFn?H=2W(*2I2Gdu zv!gYCwM10aeus)`RIZSrCK=&oKaO_Ry~D1B5!y0R=%!i2*KfXGYX&gNv_u+n9wiR5 z*e$Zjju&ODRW3phN925%S(jL+bCHv6rZtc?!*`1TyYXT6%Ju=|X;6D@lq$8T zW{Y|e39ioPez(pBH%k)HzFITXHvnD6hw^lIoUMA;qAJ^CU?top1fo@s7xT13Fvn1H z6JWa-6+FJF#x>~+A;D~;VDs26>^oH0EI`IYT2iagy23?nyJ==i{g4%HrAf1-*v zK1)~@&(KkwR7TL}L(A@C_S0G;-GMDy=MJn2$FP5s<%wC)4jC5PXoxrQBFZ_k0P{{s@sz+gX`-!=T8rcB(=7vW}^K6oLWMmp(rwDh}b zwaGGd>yEy6fHv%jM$yJXo5oMAQ>c9j`**}F?MCry;T@47@r?&sKHgVe$MCqk#Z_3S z1GZI~nOEN*P~+UaFGnj{{Jo@16`(qVNtbU>O0Hf57-P>x8Jikp=`s8xWs^dAJ9lCQ z)GFm+=OV%AMVqVATtN@|vp61VVAHRn87}%PC^RAzJ%JngmZTasWBAWsoAqBU+8L8u z4A&Pe?fmTm0?mK-BL9t+{y7o(7jm+RpOhL9KnY#E&qu^}B6=K_dB}*VlSEiC9fn)+V=J;OnN)Ta5v66ic1rG+dGAJ1 z1%Zb_+!$=tQ~lxQrzv3x#CPb?CekEkA}0MYSgx$Jdd}q8+R=ma$|&1a#)TQ=l$1tQ z=tL9&_^vJ)Pk}EDO-va`UCT1m#Uty1{v^A3P~83_#v^ozH}6*9mIjIr;t3Uv%@VeW zGL6(CwCUp)Jq%G0bIG%?{_*Y#5IHf*5M@wPo6A{$Um++Co$wLC=J1aoG93&T7Ho}P z=mGEPP7GbvoG!uD$k(H3A$Z))+i{Hy?QHdk>3xSBXR0j!11O^mEe9RHmw!pvzv?Ua~2_l2Yh~_!s1qS`|0~0)YsbHSz8!mG)WiJE| z2f($6TQtt6L_f~ApQYQKSb=`053LgrQq7G@98#igV>y#i==-nEjQ!XNu9 z~;mE+gtj4IDDNQJ~JVk5Ux6&LCSFL!y=>79kE9=V}J7tD==Ga+IW zX)r7>VZ9dY=V&}DR))xUoV!u(Z|%3ciQi_2jl}3=$Agc(`RPb z8kEBpvY>1FGQ9W$n>Cq=DIpski};nE)`p3IUw1Oz0|wxll^)4dq3;CCY@RyJgFgc# zKouFh!`?Xuo{IMz^xi-h=StCis_M7yq$u) z?XHvw*HP0VgR+KR6wI)jEMX|ssqYvSf*_3W8zVTQzD?3>H!#>InzpSO)@SC8q*ii- z%%h}_#0{4JG;Jm`4zg};BPTGkYamx$Xo#O~lBirRY)q=5M45n{GCfV7h9qwyu1NxOMoP4)jjZMxmT|IQQh0U7C$EbnMN<3)Kk?fFHYq$d|ICu>KbY_hO zTZM+uKHe(cIZfEqyzyYSUBZa8;Fcut-GN!HSA9ius`ltNebF46ZX_BbZNU}}ZOm{M2&nANL9@0qvih15(|`S~z}m&h!u4x~(%MAO$jHRWNfuxWF#B)E&g3ghSQ9|> z(MFaLQj)NE0lowyjvg8z0#m6FIuKE9lDO~Glg}nSb7`~^&#(Lw{}GVOS>U)m8bF}x zVjbXljBm34Cs-yM6TVusr+3kYFjr28STT3g056y3cH5Tmge~ASxBj z%|yb>$eF;WgrcOZf569sDZOVwoo%8>XO>XQOX1OyN9I-SQgrm;U;+#3OI(zrWyow3 zk==|{lt2xrQ%FIXOTejR>;wv(Pb8u8}BUpx?yd(Abh6? zsoO3VYWkeLnF43&@*#MQ9-i-d0t*xN-UEyNKeyNMHw|A(k(_6QKO=nKMCxD(W(Yop zsRQ)QeL4X3Lxp^L%wzi2-WVSsf61dqliPUM7srDB?Wm6Lzn0&{*}|IsKQW;02(Y&| zaTKv|`U(pSzuvR6Rduu$wzK_W-Y-7>7s?G$)U}&uK;<>vU}^^ns@Z!p+9?St1s)dG zK%y6xkPyyS1$~&6v{kl?Md6gwM|>mt6Upm>oa8RLD^8T{0?HC!Z>;(Bob7el(DV6x zi`I)$&E&ngwFS@bi4^xFLAn`=fzTC;aimE^!cMI2n@Vo%Ae-ne`RF((&5y6xsjjAZ zVguVoQ?Z9uk$2ON;ersE%PU*xGO@T*;j1BO5#TuZKEf(mB7|g7pcEA=nYJ{s3vlbg zd4-DUlD{*6o%Gc^N!Nptgay>j6E5;3psI+C3Q!1ZIbeCubW%w4pq9)MSDyB{HLm|k zxv-{$$A*pS@csolri$Ge<4VZ}e~78JOL-EVyrbxKra^d{?|NnPp86!q>t<&IP07?Z z^>~IK^k#OEKgRH+LjllZXk7iA>2cfH6+(e&9ku5poo~6y{GC5>(bRK7hwjiurqAiZ zg*DmtgY}v83IjE&AbiWgMyFbaRUPZ{lYiz$U^&Zt2YjG<%m((&_JUbZcfJ22(>bi5 z!J?<7AySj0JZ&<-qXX;mcV!f~>G=sB0KnjWca4}vrtunD^1TrpfeS^4dvFr!65knK zZh`d;*VOkPs4*-9kL>$GP0`(M!j~B;#x?Ba~&s6CopvO86oM?-? zOw#dIRc;6A6T?B`Qp%^<U5 z19x(ywSH$_N+Io!6;e?`tWaM$`=Db!gzx|lQ${DG!zb1Zl&|{kX0y6xvO1o z220r<-oaS^^R2pEyY;=Qllqpmue|5yI~D|iI!IGt@iod{Opz@*ml^w2bNs)p`M(Io z|E;;m*Xpjd9l)4G#KaWfV(t8YUn@A;nK^#xgv=LtnArX|vWQVuw3}B${h+frU2>9^ z!l6)!Uo4`5k`<<;E(ido7M6lKTgWezNLq>U*=uz&s=cc$1%>VrAeOoUtA|T6gO4>UNqsdK=NF*8|~*sl&wI=x9-EGiq*aqV!(VVXA57 zw9*o6Ir8Lj1npUXvlevtn(_+^X5rzdR>#(}4YcB9O50q97%rW2me5_L=%ffYPUSRc z!vv?Kv>dH994Qi>U(a<0KF6NH5b16enCp+mw^Hb3Xs1^tThFpz!3QuN#}KBbww`(h z7GO)1olDqy6?T$()R7y%NYx*B0k_2IBiZ14&8|JPFxeMF{vW>HF-Vi3+ZOI=+qP}n zw(+!WcTd~4ZJX1!ZM&y!+uyt=&i!+~d(V%GjH;-NsEEv6nS1TERt|RHh!0>W4+4pp z1-*EzAM~i`+1f(VEHI8So`S`akPfPTfq*`l{Fz`hS%k#JS0cjT2mS0#QLGf=J?1`he3W*;m4)ce8*WFq1sdP=~$5RlH1EdWm|~dCvKOi4*I_96{^95p#B<(n!d?B z=o`0{t+&OMwKcxiBECznJcfH!fL(z3OvmxP#oWd48|mMjpE||zdiTBdWelj8&Qosv zZFp@&UgXuvJw5y=q6*28AtxZzo-UUpkRW%ne+Ylf!V-0+uQXBW=5S1o#6LXNtY5!I z%Rkz#(S8Pjz*P7bqB6L|M#Er{|QLae-Y{KA>`^} z@lPjeX>90X|34S-7}ZVXe{wEei1<{*e8T-Nbj8JmD4iwcE+Hg_zhkPVm#=@b$;)h6 z<<6y`nPa`f3I6`!28d@kdM{uJOgM%`EvlQ5B2bL)Sl=|y@YB3KeOzz=9cUW3clPAU z^sYc}xf9{4Oj?L5MOlYxR{+>w=vJjvbyO5}ptT(o6dR|ygO$)nVCvNGnq(6;bHlBd zl?w-|plD8spjDF03g5ip;W3Z z><0{BCq!Dw;h5~#1BuQilq*TwEu)qy50@+BE4bX28+7erX{BD4H)N+7U`AVEuREE8 z;X?~fyhF-x_sRfHIj~6f(+^@H)D=ngP;mwJjxhQUbUdzk8f94Ab%59-eRIq?ZKrwD z(BFI=)xrUlgu(b|hAysqK<}8bslmNNeD=#JW*}^~Nrswn^xw*nL@Tx!49bfJecV&KC2G4q5a!NSv)06A_5N3Y?veAz;Gv+@U3R% z)~UA8-0LvVE{}8LVDOHzp~2twReqf}ODIyXMM6=W>kL|OHcx9P%+aJGYi_Om)b!xe zF40Vntn0+VP>o<$AtP&JANjXBn7$}C@{+@3I@cqlwR2MdwGhVPxlTIcRVu@Ho-wO` z_~Or~IMG)A_`6-p)KPS@cT9mu9RGA>dVh5wY$NM9-^c@N=hcNaw4ITjm;iWSP^ZX| z)_XpaI61<+La+U&&%2a z0za$)-wZP@mwSELo#3!PGTt$uy0C(nTT@9NX*r3Ctw6J~7A(m#8fE)0RBd`TdKfAT zCf@$MAxjP`O(u9s@c0Fd@|}UQ6qp)O5Q5DPCeE6mSIh|Rj{$cAVIWsA=xPKVKxdhg zLzPZ`3CS+KIO;T}0Ip!fAUaNU>++ZJZRk@I(h<)RsJUhZ&Ru9*!4Ptn;gX^~4E8W^TSR&~3BAZc#HquXn)OW|TJ`CTahk+{qe`5+ixON^zA9IFd8)kc%*!AiLu z>`SFoZ5bW-%7}xZ>gpJcx_hpF$2l+533{gW{a7ce^B9sIdmLrI0)4yivZ^(Vh@-1q zFT!NQK$Iz^xu%|EOK=n>ug;(7J4OnS$;yWmq>A;hsD_0oAbLYhW^1Vdt9>;(JIYjf zdb+&f&D4@4AS?!*XpH>8egQvSVX`36jMd>$+RgI|pEg))^djhGSo&#lhS~9%NuWfX zDDH;3T*GzRT@5=7ibO>N-6_XPBYxno@mD_3I#rDD?iADxX`! zh*v8^i*JEMzyN#bGEBz7;UYXki*Xr(9xXax(_1qVW=Ml)kSuvK$coq2A(5ZGhs_pF z$*w}FbN6+QDseuB9=fdp_MTs)nQf!2SlROQ!gBJBCXD&@-VurqHj0wm@LWX-TDmS= z71M__vAok|@!qgi#H&H%Vg-((ZfxPAL8AI{x|VV!9)ZE}_l>iWk8UPTGHs*?u7RfP z5MC&=c6X;XlUzrz5q?(!eO@~* zoh2I*%J7dF!!_!vXoSIn5o|wj1#_>K*&CIn{qSaRc&iFVxt*^20ngCL;QonIS>I5^ zMw8HXm>W0PGd*}Ko)f|~dDd%;Wu_RWI_d;&2g6R3S63Uzjd7dn%Svu-OKpx*o|N>F zZg=-~qLb~VRLpv`k zWSdfHh@?dp=s_X`{yxOlxE$4iuyS;Z-x!*E6eqmEm*j2bE@=ZI0YZ5%Yj29!5+J$4h{s($nakA`xgbO8w zi=*r}PWz#lTL_DSAu1?f%-2OjD}NHXp4pXOsCW;DS@BC3h-q4_l`<))8WgzkdXg3! zs1WMt32kS2E#L0p_|x+x**TFV=gn`m9BWlzF{b%6j-odf4{7a4y4Uaef@YaeuPhU8 zHBvRqN^;$Jizy+ z=zW{E5<>2gp$pH{M@S*!sJVQU)b*J5*bX4h>5VJve#Q6ga}cQ&iL#=(u+KroWrxa%8&~p{WEUF0il=db;-$=A;&9M{Rq`ouZ5m%BHT6%st%saGsD6)fQgLN}x@d3q>FC;=f%O3Cyg=Ke@Gh`XW za@RajqOE9UB6eE=zhG%|dYS)IW)&y&Id2n7r)6p_)vlRP7NJL(x4UbhlcFXWT8?K=%s7;z?Vjts?y2+r|uk8Wt(DM*73^W%pAkZa1Jd zNoE)8FvQA>Z`eR5Z@Ig6kS5?0h;`Y&OL2D&xnnAUzQz{YSdh0k zB3exx%A2TyI)M*EM6htrxSlep!Kk(P(VP`$p0G~f$smld6W1r_Z+o?=IB@^weq>5VYsYZZR@` z&XJFxd5{|KPZmVOSxc@^%71C@;z}}WhbF9p!%yLj3j%YOlPL5s>7I3vj25 z@xmf=*z%Wb4;Va6SDk9cv|r*lhZ`(y_*M@>q;wrn)oQx%B(2A$9(74>;$zmQ!4fN; z>XurIk-7@wZys<+7XL@0Fhe-f%*=(weaQEdR9Eh6>Kl-EcI({qoZqyzziGwpg-GM#251sK_ z=3|kitS!j%;fpc@oWn65SEL73^N&t>Ix37xgs= zYG%eQDJc|rqHFia0!_sm7`@lvcv)gfy(+KXA@E{3t1DaZ$DijWAcA)E0@X?2ziJ{v z&KOYZ|DdkM{}t+@{@*6ge}m%xfjIxi%qh`=^2Rwz@w0cCvZ&Tc#UmCDbVwABrON^x zEBK43FO@weA8s7zggCOWhMvGGE`baZ62cC)VHyy!5Zbt%ieH+XN|OLbAFPZWyC6)p z4P3%8sq9HdS3=ih^0OOlqTPbKuzQ?lBEI{w^ReUO{V?@`ARsL|S*%yOS=Z%sF)>-y z(LAQdhgAcuF6LQjRYfdbD1g4o%tV4EiK&ElLB&^VZHbrV1K>tHTO{#XTo>)2UMm`2 z^t4s;vnMQgf-njU-RVBRw0P0-m#d-u`(kq7NL&2T)TjI_@iKuPAK-@oH(J8?%(e!0Ir$yG32@CGUPn5w4)+9@8c&pGx z+K3GKESI4*`tYlmMHt@br;jBWTei&(a=iYslc^c#RU3Q&sYp zSG){)V<(g7+8W!Wxeb5zJb4XE{I|&Y4UrFWr%LHkdQ;~XU zgy^dH-Z3lmY+0G~?DrC_S4@=>0oM8Isw%g(id10gWkoz2Q%7W$bFk@mIzTCcIB(K8 zc<5h&ZzCdT=9n-D>&a8vl+=ZF*`uTvQviG_bLde*k>{^)&0o*b05x$MO3gVLUx`xZ z43j+>!u?XV)Yp@MmG%Y`+COH2?nQcMrQ%k~6#O%PeD_WvFO~Kct za4XoCM_X!c5vhRkIdV=xUB3xI2NNStK*8_Zl!cFjOvp-AY=D;5{uXj}GV{LK1~IE2 z|KffUiBaStRr;10R~K2VVtf{TzM7FaPm;Y(zQjILn+tIPSrJh&EMf6evaBKIvi42-WYU9Vhj~3< zZSM-B;E`g_o8_XTM9IzEL=9Lb^SPhe(f(-`Yh=X6O7+6ALXnTcUFpI>ekl6v)ZQeNCg2 z^H|{SKXHU*%nBQ@I3It0m^h+6tvI@FS=MYS$ZpBaG7j#V@P2ZuYySbp@hA# ze(kc;P4i_-_UDP?%<6>%tTRih6VBgScKU^BV6Aoeg6Uh(W^#J^V$Xo^4#Ekp ztqQVK^g9gKMTHvV7nb64UU7p~!B?>Y0oFH5T7#BSW#YfSB@5PtE~#SCCg3p^o=NkMk$<8- z6PT*yIKGrvne7+y3}_!AC8NNeI?iTY(&nakN>>U-zT0wzZf-RuyZk^X9H-DT_*wk= z;&0}6LsGtfVa1q)CEUPlx#(ED@-?H<1_FrHU#z5^P3lEB|qsxEyn%FOpjx z3S?~gvoXy~L(Q{Jh6*i~=f%9kM1>RGjBzQh_SaIDfSU_9!<>*Pm>l)cJD@wlyxpBV z4Fmhc2q=R_wHCEK69<*wG%}mgD1=FHi4h!98B-*vMu4ZGW~%IrYSLGU{^TuseqVgV zLP<%wirIL`VLyJv9XG_p8w@Q4HzNt-o;U@Au{7%Ji;53!7V8Rv0^Lu^Vf*sL>R(;c zQG_ZuFl)Mh-xEIkGu}?_(HwkB2jS;HdPLSxVU&Jxy9*XRG~^HY(f0g8Q}iqnVmgjI zfd=``2&8GsycjR?M%(zMjn;tn9agcq;&rR!Hp z$B*gzHsQ~aXw8c|a(L^LW(|`yGc!qOnV(ZjU_Q-4z1&0;jG&vAKuNG=F|H?@m5^N@ zq{E!1n;)kNTJ>|Hb2ODt-7U~-MOIFo%9I)_@7fnX+eMMNh>)V$IXesJpBn|uo8f~#aOFytCT zf9&%MCLf8mp4kwHTcojWmM3LU=#|{3L>E}SKwOd?%{HogCZ_Z1BSA}P#O(%H$;z7XyJ^sjGX;j5 zrzp>|Ud;*&VAU3x#f{CKwY7Vc{%TKKqmB@oTHA9;>?!nvMA;8+Jh=cambHz#J18x~ zs!dF>$*AnsQ{{82r5Aw&^7eRCdvcgyxH?*DV5(I$qXh^zS>us*I66_MbL8y4d3ULj z{S(ipo+T3Ag!+5`NU2sc+@*m{_X|&p#O-SAqF&g_n7ObB82~$p%fXA5GLHMC+#qqL zdt`sJC&6C2)=juQ_!NeD>U8lDVpAOkW*khf7MCcs$A(wiIl#B9HM%~GtQ^}yBPjT@ z+E=|A!Z?A(rwzZ;T}o6pOVqHzTr*i;Wrc%&36kc@jXq~+w8kVrs;%=IFdACoLAcCAmhFNpbP8;s`zG|HC2Gv?I~w4ITy=g$`0qMQdkijLSOtX6xW%Z9Nw<;M- zMN`c7=$QxN00DiSjbVt9Mi6-pjv*j(_8PyV-il8Q-&TwBwH1gz1uoxs6~uU}PrgWB zIAE_I-a1EqlIaGQNbcp@iI8W1sm9fBBNOk(k&iLBe%MCo#?xI$%ZmGA?=)M9D=0t7 zc)Q0LnI)kCy{`jCGy9lYX%mUsDWwsY`;jE(;Us@gmWPqjmXL+Hu#^;k%eT>{nMtzj zsV`Iy6leTA8-PndszF;N^X@CJrTw5IIm!GPeu)H2#FQitR{1p;MasQVAG3*+=9FYK zw*k!HT(YQorfQj+1*mCV458(T5=fH`um$gS38hw(OqVMyunQ;rW5aPbF##A3fGH6h z@W)i9Uff?qz`YbK4c}JzQpuxuE3pcQO)%xBRZp{zJ^-*|oryTxJ-rR+MXJ)!f=+pp z10H|DdGd2exhi+hftcYbM0_}C0ZI-2vh+$fU1acsB-YXid7O|=9L!3e@$H*6?G*Zp z%qFB(sgl=FcC=E4CYGp4CN>=M8#5r!RU!u+FJVlH6=gI5xHVD&k;Ta*M28BsxfMV~ zLz+@6TxnfLhF@5=yQo^1&S}cmTN@m!7*c6z;}~*!hNBjuE>NLVl2EwN!F+)0$R1S! zR|lF%n!9fkZ@gPW|x|B={V6x3`=jS*$Pu0+5OWf?wnIy>Y1MbbGSncpKO0qE(qO=ts z!~@&!N`10S593pVQu4FzpOh!tvg}p%zCU(aV5=~K#bKi zHdJ1>tQSrhW%KOky;iW+O_n;`l9~omqM%sdxdLtI`TrJzN6BQz+7xOl*rM>xVI2~# z)7FJ^Dc{DC<%~VS?@WXzuOG$YPLC;>#vUJ^MmtbSL`_yXtNKa$Hk+l-c!aC7gn(Cg ze?YPYZ(2Jw{SF6MiO5(%_pTo7j@&DHNW`|lD`~{iH+_eSTS&OC*2WTT*a`?|9w1dh zh1nh@$a}T#WE5$7Od~NvSEU)T(W$p$s5fe^GpG+7fdJ9=enRT9$wEk+ZaB>G3$KQO zgq?-rZZnIv!p#>Ty~}c*Lb_jxJg$eGM*XwHUwuQ|o^}b3^T6Bxx{!?va8aC@-xK*H ztJBFvFfsSWu89%@b^l3-B~O!CXs)I6Y}y#0C0U0R0WG zybjroj$io0j}3%P7zADXOwHwafT#uu*zfM!oD$6aJx7+WL%t-@6^rD_a_M?S^>c;z zMK580bZXo1f*L$CuMeM4Mp!;P@}b~$cd(s5*q~FP+NHSq;nw3fbWyH)i2)-;gQl{S zZO!T}A}fC}vUdskGSq&{`oxt~0i?0xhr6I47_tBc`fqaSrMOzR4>0H^;A zF)hX1nfHs)%Zb-(YGX;=#2R6C{BG;k=?FfP?9{_uFLri~-~AJ;jw({4MU7e*d)?P@ zXX*GkNY9ItFjhwgAIWq7Y!ksbMzfqpG)IrqKx9q{zu%Mdl+{Dis#p9q`02pr1LG8R z@As?eG!>IoROgS!@J*to<27coFc1zpkh?w=)h9CbYe%^Q!Ui46Y*HO0mr% zEff-*$ndMNw}H2a5@BsGj5oFfd!T(F&0$<{GO!Qdd?McKkorh=5{EIjDTHU`So>8V zBA-fqVLb2;u7UhDV1xMI?y>fe3~4urv3%PX)lDw+HYa;HFkaLqi4c~VtCm&Ca+9C~ zge+67hp#R9`+Euq59WhHX&7~RlXn=--m8$iZ~~1C8cv^2(qO#X0?vl91gzUKBeR1J z^p4!!&7)3#@@X&2aF2-)1Ffcc^F8r|RtdL2X%HgN&XU-KH2SLCbpw?J5xJ*!F-ypZ zMG%AJ!Pr&}`LW?E!K~=(NJxuSVTRCGJ$2a*Ao=uUDSys!OFYu!Vs2IT;xQ6EubLIl z+?+nMGeQQhh~??0!s4iQ#gm3!BpMpnY?04kK375e((Uc7B3RMj;wE?BCoQGu=UlZt!EZ1Q*auI)dj3Jj{Ujgt zW5hd~-HWBLI_3HuO) zNrb^XzPsTIb=*a69wAAA3J6AAZZ1VsYbIG}a`=d6?PjM)3EPaDpW2YP$|GrBX{q*! z$KBHNif)OKMBCFP5>!1d=DK>8u+Upm-{hj5o|Wn$vh1&K!lVfDB&47lw$tJ?d5|=B z^(_9=(1T3Fte)z^>|3**n}mIX;mMN5v2F#l(q*CvU{Ga`@VMp#%rQkDBy7kYbmb-q z<5!4iuB#Q_lLZ8}h|hPODI^U6`gzLJre9u3k3c#%86IKI*^H-@I48Bi*@avYm4v!n0+v zWu{M{&F8#p9cx+gF0yTB_<2QUrjMPo9*7^-uP#~gGW~y3nfPAoV%amgr>PSyVAd@l)}8#X zR5zV6t*uKJZL}?NYvPVK6J0v4iVpwiN|>+t3aYiZSp;m0!(1`bHO}TEtWR1tY%BPB z(W!0DmXbZAsT$iC13p4f>u*ZAy@JoLAkJhzFf1#4;#1deO8#8d&89}en&z!W&A3++^1(;>0SB1*54d@y&9Pn;^IAf3GiXbfT`_>{R+Xv; zQvgL>+0#8-laO!j#-WB~(I>l0NCMt_;@Gp_f0#^c)t?&#Xh1-7RR0@zPyBz!U#0Av zT?}n({(p?p7!4S2ZBw)#KdCG)uPnZe+U|0{BW!m)9 zi_9$F?m<`2!`JNFv+w8MK_K)qJ^aO@7-Ig>cM4-r0bi=>?B_2mFNJ}aE3<+QCzRr*NA!QjHw# z`1OsvcoD0?%jq{*7b!l|L1+Tw0TTAM4XMq7*ntc-Ived>Sj_ZtS|uVdpfg1_I9knY z2{GM_j5sDC7(W&}#s{jqbybqJWyn?{PW*&cQIU|*v8YGOKKlGl@?c#TCnmnAkAzV- zmK={|1G90zz=YUvC}+fMqts0d4vgA%t6Jhjv?d;(Z}(Ep8fTZfHA9``fdUHkA+z3+ zhh{ohP%Bj?T~{i0sYCQ}uC#5BwN`skI7`|c%kqkyWIQ;!ysvA8H`b-t()n6>GJj6xlYDu~8qX{AFo$Cm3d|XFL=4uvc?Keb zzb0ZmMoXca6Mob>JqkNuoP>B2Z>D`Q(TvrG6m`j}-1rGP!g|qoL=$FVQYxJQjFn33lODt3Wb1j8VR zlR++vIT6^DtYxAv_hxupbLLN3e0%A%a+hWTKDV3!Fjr^cWJ{scsAdfhpI)`Bms^M6 zQG$waKgFr=c|p9Piug=fcJvZ1ThMnNhQvBAg-8~b1?6wL*WyqXhtj^g(Ke}mEfZVM zJuLNTUVh#WsE*a6uqiz`b#9ZYg3+2%=C(6AvZGc=u&<6??!slB1a9K)=VL zY9EL^mfyKnD zSJyYBc_>G;5RRnrNgzJz#Rkn3S1`mZgO`(r5;Hw6MveN(URf_XS-r58Cn80K)ArH4 z#Rrd~LG1W&@ttw85cjp8xV&>$b%nSXH_*W}7Ch2pg$$c0BdEo-HWRTZcxngIBJad> z;C>b{jIXjb_9Jis?NZJsdm^EG}e*pR&DAy0EaSGi3XWTa(>C%tz1n$u?5Fb z1qtl?;_yjYo)(gB^iQq?=jusF%kywm?CJP~zEHi0NbZ);$(H$w(Hy@{i>$wcVRD_X|w-~(0Z9BJyh zhNh;+eQ9BEIs;tPz%jSVnfCP!3L&9YtEP;svoj_bNzeGSQIAjd zBss@A;)R^WAu-37RQrM%{DfBNRx>v!G31Z}8-El9IOJlb_MSoMu2}GDYycNaf>uny z+8xykD-7ONCM!APry_Lw6-yT>5!tR}W;W`C)1>pxSs5o1z#j7%m=&=7O4hz+Lsqm` z*>{+xsabZPr&X=}G@obTb{nPTkccJX8w3CG7X+1+t{JcMabv~UNv+G?txRqXib~c^Mo}`q{$`;EBNJ;#F*{gvS12kV?AZ%O0SFB$^ zn+}!HbmEj}w{Vq(G)OGAzH}R~kS^;(-s&=ectz8vN!_)Yl$$U@HNTI-pV`LSj7Opu zTZ5zZ)-S_{GcEQPIQXLQ#oMS`HPu{`SQiAZ)m1at*Hy%3xma|>o`h%E%8BEbi9p0r zVjcsh<{NBKQ4eKlXU|}@XJ#@uQw*$4BxKn6#W~I4T<^f99~(=}a`&3(ur8R9t+|AQ zWkQx7l}wa48-jO@ft2h+7qn%SJtL%~890FG0s5g*kNbL3I&@brh&f6)TlM`K^(bhr zJWM6N6x3flOw$@|C@kPi7yP&SP?bzP-E|HSXQXG>7gk|R9BTj`e=4de9C6+H7H7n# z#GJeVs1mtHhLDmVO?LkYRQc`DVOJ_vdl8VUihO-j#t=0T3%Fc1f9F73ufJz*adn*p zc%&vi(4NqHu^R>sAT_0EDjVR8bc%wTz#$;%NU-kbDyL_dg0%TFafZwZ?5KZpcuaO54Z9hX zD$u>q!-9`U6-D`E#`W~fIfiIF5_m6{fvM)b1NG3xf4Auw;Go~Fu7cth#DlUn{@~yu z=B;RT*dp?bO}o%4x7k9v{r=Y@^YQ^UUm(Qmliw8brO^=NP+UOohLYiaEB3^DB56&V zK?4jV61B|1Uj_5fBKW;8LdwOFZKWp)g{B%7g1~DgO&N& z#lisxf?R~Z@?3E$Mms$$JK8oe@X`5m98V*aV6Ua}8Xs2#A!{x?IP|N(%nxsH?^c{& z@vY&R1QmQs83BW28qAmJfS7MYi=h(YK??@EhjL-t*5W!p z^gYX!Q6-vBqcv~ruw@oMaU&qp0Fb(dbVzm5xJN%0o_^@fWq$oa3X?9s%+b)x4w-q5Koe(@j6Ez7V@~NRFvd zfBH~)U5!ix3isg`6be__wBJp=1@yfsCMw1C@y+9WYD9_C%{Q~7^0AF2KFryfLlUP# zwrtJEcH)jm48!6tUcxiurAMaiD04C&tPe6DI0#aoqz#Bt0_7_*X*TsF7u*zv(iEfA z;$@?XVu~oX#1YXtceQL{dSneL&*nDug^OW$DSLF0M1Im|sSX8R26&)<0Fbh^*l6!5wfSu8MpMoh=2l z^^0Sr$UpZp*9oqa23fcCfm7`ya2<4wzJ`Axt7e4jJrRFVf?nY~2&tRL* zd;6_njcz01c>$IvN=?K}9ie%Z(BO@JG2J}fT#BJQ+f5LFSgup7i!xWRKw6)iITjZU z%l6hPZia>R!`aZjwCp}I zg)%20;}f+&@t;(%5;RHL>K_&7MH^S+7<|(SZH!u zznW|jz$uA`P9@ZWtJgv$EFp>)K&Gt+4C6#*khZQXS*S~6N%JDT$r`aJDs9|uXWdbg zBwho$phWx}x!qy8&}6y5Vr$G{yGSE*r$^r{}pw zVTZKvikRZ`J_IJrjc=X1uw?estdwm&bEahku&D04HD+0Bm~q#YGS6gp!KLf$A{%Qd z&&yX@Hp>~(wU{|(#U&Bf92+1i&Q*-S+=y=3pSZy$#8Uc$#7oiJUuO{cE6=tsPhwPe| zxQpK>`Dbka`V)$}e6_OXKLB%i76~4N*zA?X+PrhH<&)}prET;kel24kW%+9))G^JI zsq7L{P}^#QsZViX%KgxBvEugr>ZmFqe^oAg?{EI=&_O#e)F3V#rc z8$4}0Zr19qd3tE4#$3_f=Bbx9oV6VO!d3(R===i-7p=Vj`520w0D3W6lQfY48}!D* z&)lZMG;~er2qBoI2gsX+Ts-hnpS~NYRDtPd^FPzn!^&yxRy#CSz(b&E*tL|jIkq|l zf%>)7Dtu>jCf`-7R#*GhGn4FkYf;B$+9IxmqH|lf6$4irg{0ept__%)V*R_OK=T06 zyT_m-o@Kp6U{l5h>W1hGq*X#8*y@<;vsOFqEjTQXFEotR+{3}ODDnj;o0@!bB5x=N z394FojuGOtVKBlVRLtHp%EJv_G5q=AgF)SKyRN5=cGBjDWv4LDn$IL`*=~J7u&Dy5 zrMc83y+w^F&{?X(KOOAl-sWZDb{9X9#jrQtmrEXD?;h-}SYT7yM(X_6qksM=K_a;Z z3u0qT0TtaNvDER_8x*rxXw&C^|h{P1qxK|@pS7vdlZ#P z7PdB7MmC2}%sdzAxt>;WM1s0??`1983O4nFK|hVAbHcZ3x{PzytQLkCVk7hA!Lo` zEJH?4qw|}WH{dc4z%aB=0XqsFW?^p=X}4xnCJXK%c#ItOSjdSO`UXJyuc8bh^Cf}8 z@Ht|vXd^6{Fgai8*tmyRGmD_s_nv~r^Fy7j`Bu`6=G)5H$i7Q7lvQnmea&TGvJp9a|qOrUymZ$6G|Ly z#zOCg++$3iB$!6!>215A4!iryregKuUT344X)jQb3|9qY>c0LO{6Vby05n~VFzd?q zgGZv&FGlkiH*`fTurp>B8v&nSxNz)=5IF$=@rgND4d`!AaaX;_lK~)-U8la_Wa8i?NJC@BURO*sUW)E9oyv3RG^YGfN%BmxzjlT)bp*$<| zX3tt?EAy<&K+bhIuMs-g#=d1}N_?isY)6Ay$mDOKRh z4v1asEGWoAp=srraLW^h&_Uw|6O+r;wns=uwYm=JN4Q!quD8SQRSeEcGh|Eb5Jg8m zOT}u;N|x@aq)=&;wufCc^#)5U^VcZw;d_wwaoh9$p@Xrc{DD6GZUqZ ziC6OT^zSq@-lhbgR8B+e;7_Giv;DK5gn^$bs<6~SUadiosfewWDJu`XsBfOd1|p=q zE>m=zF}!lObA%ePey~gqU8S6h-^J2Y?>7)L2+%8kV}Gp=h`Xm_}rlm)SyUS=`=S7msKu zC|T!gPiI1rWGb1z$Md?0YJQ;%>uPLOXf1Z>N~`~JHJ!^@D5kSXQ4ugnFZ>^`zH8CAiZmp z6Ms|#2gcGsQ{{u7+Nb9sA?U>(0e$5V1|WVwY`Kn)rsnnZ4=1u=7u!4WexZD^IQ1Jk zfF#NLe>W$3m&C^ULjdw+5|)-BSHwpegdyt9NYC{3@QtMfd8GrIWDu`gd0nv-3LpGCh@wgBaG z176tikL!_NXM+Bv#7q^cyn9$XSeZR6#!B4JE@GVH zoobHZN_*RF#@_SVYKkQ_igme-Y5U}cV(hkR#k1c{bQNMji zU7aE`?dHyx=1`kOYZo_8U7?3-7vHOp`Qe%Z*i+FX!s?6huNp0iCEW-Z7E&jRWmUW_ z67j>)Ew!yq)hhG4o?^z}HWH-e=es#xJUhDRc4B51M4~E-l5VZ!&zQq`gWe`?}#b~7w1LH4Xa-UCT5LXkXQWheBa2YJYbyQ zl1pXR%b(KCXMO0OsXgl0P0Og<{(@&z1aokU-Pq`eQq*JYgt8xdFQ6S z6Z3IFSua8W&M#`~*L#r>Jfd6*BzJ?JFdBR#bDv$_0N!_5vnmo@!>vULcDm`MFU823 zpG9pqjqz^FE5zMDoGqhs5OMmC{Y3iVcl>F}5Rs24Y5B^mYQ;1T&ks@pIApHOdrzXF z-SdX}Hf{X;TaSxG_T$0~#RhqKISGKNK47}0*x&nRIPtmdwxc&QT3$8&!3fWu1eZ_P zJveQj^hJL#Sn!*4k`3}(d(aasl&7G0j0-*_2xtAnoX1@9+h zO#c>YQg60Z;o{Bi=3i7S`Ic+ZE>K{(u|#)9y}q*j8uKQ1^>+(BI}m%1v3$=4ojGBc zm+o1*!T&b}-lVvZqIUBc8V}QyFEgm#oyIuC{8WqUNV{Toz`oxhYpP!_p2oHHh5P@iB*NVo~2=GQm+8Yrkm2Xjc_VyHg1c0>+o~@>*Qzo zHVBJS>$$}$_4EniTI;b1WShX<5-p#TPB&!;lP!lBVBbLOOxh6FuYloD%m;n{r|;MU3!q4AVkua~fieeWu2 zQAQ$ue(IklX6+V;F1vCu-&V?I3d42FgWgsb_e^29ol}HYft?{SLf>DrmOp9o!t>I^ zY7fBCk+E8n_|apgM|-;^=#B?6RnFKlN`oR)`e$+;D=yO-(U^jV;rft^G_zl`n7qnM zL z*-Y4Phq+ZI1$j$F-f;`CD#|`-T~OM5Q>x}a>B~Gb3-+9i>Lfr|Ca6S^8g*{*?_5!x zH_N!SoRP=gX1?)q%>QTY!r77e2j9W(I!uAz{T`NdNmPBBUzi2{`XMB^zJGGwFWeA9 z{fk33#*9SO0)DjROug+(M)I-pKA!CX;IY(#gE!UxXVsa)X!UftIN98{pt#4MJHOhY zM$_l}-TJlxY?LS6Nuz1T<44m<4i^8k@D$zuCPrkmz@sdv+{ciyFJG2Zwy&%c7;atIeTdh!a(R^QXnu1Oq1b42*OQFWnyQ zWeQrdvP|w_idy53Wa<{QH^lFmEd+VlJkyiC>6B#s)F;w-{c;aKIm;Kp50HnA-o3lY z9B~F$gJ@yYE#g#X&3ADx&tO+P_@mnQTz9gv30_sTsaGXkfNYXY{$(>*PEN3QL>I!k zp)KibPhrfX3%Z$H6SY`rXGYS~143wZrG2;=FLj50+VM6soI~up_>fU(2Wl@{BRsMi zO%sL3x?2l1cXTF)k&moNsHfQrQ+wu(gBt{sk#CU=UhrvJIncy@tJX5klLjgMn>~h= zg|FR&;@eh|C7`>s_9c~0-{IAPV){l|Ts`i=)AW;d9&KPc3fMeoTS%8@V~D8*h;&(^>yjT84MM}=%#LS7shLAuuj(0VAYoozhWjq z4LEr?wUe2^WGwdTIgWBkDUJa>YP@5d9^Rs$kCXmMRxuF*YMVrn?0NFyPl}>`&dqZb z<5eqR=ZG3>n2{6v6BvJ`YBZeeTtB88TAY(x0a58EWyuf>+^|x8Qa6wA|1Nb_p|nA zWWa}|z8a)--Wj`LqyFk_a3gN2>5{Rl_wbW?#by7&i*^hRknK%jwIH6=dQ8*-_{*x0j^DUfMX0`|K@6C<|1cgZ~D(e5vBFFm;HTZF(!vT8=T$K+|F)x3kqzBV4-=p1V(lzi(s7jdu0>LD#N=$Lk#3HkG!a zIF<7>%B7sRNzJ66KrFV76J<2bdYhxll0y2^_rdG=I%AgW4~)1Nvz=$1UkE^J%BxLo z+lUci`UcU062os*=`-j4IfSQA{w@y|3}Vk?i;&SSdh8n+$iHA#%ERL{;EpXl6u&8@ zzg}?hkEOUOJt?ZL=pWZFJ19mI1@P=$U5*Im1e_8Z${JsM>Ov?nh8Z zP5QvI!{Jy@&BP48%P2{Jr_VgzW;P@7)M9n|lDT|Ep#}7C$&ud&6>C^5ZiwKIg2McPU(4jhM!BD@@L(Gd*Nu$ji(ljZ<{FIeW_1Mmf;76{LU z-ywN~=uNN)Xi6$<12A9y)K%X|(W0p|&>>4OXB?IiYr||WKDOJPxiSe01NSV-h24^L z_>m$;|C+q!Mj**-qQ$L-*++en(g|hw;M!^%_h-iDjFHLo-n3JpB;p?+o2;`*jpvJU zLY^lt)Un4joij^^)O(CKs@7E%*!w>!HA4Q?0}oBJ7Nr8NQ7QmY^4~jvf0-`%waOLn zdNjAPaC0_7c|RVhw)+71NWjRi!y>C+Bl;Z`NiL^zn2*0kmj5gyhCLCxts*cWCdRI| zjsd=sT5BVJc^$GxP~YF$-U{-?kW6r@^vHXB%{CqYzU@1>dzf#3SYedJG-Rm6^RB7s zGM5PR(yKPKR)>?~vpUIeTP7A1sc8-knnJk*9)3t^e%izbdm>Y=W{$wm(cy1RB-19i za#828DMBY+ps#7Y8^6t)=Ea@%Nkt)O6JCx|ybC;Ap}Z@Zw~*}3P>MZLPb4Enxz9Wf zssobT^(R@KuShj8>@!1M7tm|2%-pYYDxz-5`rCbaTCG5{;Uxm z*g=+H1X8{NUvFGzz~wXa%Eo};I;~`37*WrRU&K0dPSB$yk(Z*@K&+mFal^?c zurbqB-+|Kb5|sznT;?Pj!+kgFY1#Dr;_%A(GIQC{3ct|{*Bji%FNa6c-thbpBkA;U zURV!Dr&X{0J}iht#-Qp2=xzuh(fM>zRoiGrYl5ttw2#r34gC41CCOC31m~^UPTK@s z6;A@)7O7_%C)>bnAXerYuAHdE93>j2N}H${zEc6&SbZ|-fiG*-qtGuy-qDelH(|u$ zorf8_T6Zqe#Ub!+e3oSyrskt_HyW_^5lrWt#30l)tHk|j$@YyEkXUOV;6B51L;M@=NIWZXU;GrAa(LGxO%|im%7F<-6N;en0Cr zLH>l*y?pMwt`1*cH~LdBPFY_l;~`N!Clyfr;7w<^X;&(ZiVdF1S5e(+Q%60zgh)s4 zn2yj$+mE=miVERP(g8}G4<85^-5f@qxh2ec?n+$A_`?qN=iyT1?U@t?V6DM~BIlBB z>u~eXm-aE>R0sQy!-I4xtCNi!!qh?R1!kKf6BoH2GG{L4%PAz0{Sh6xpuyI%*~u)s z%rLuFl)uQUCBQAtMyN;%)zFMx4loh7uTfKeB2Xif`lN?2gq6NhWhfz0u5WP9J>=V2 zo{mLtSy&BA!mSzs&CrKWq^y40JF5a&GSXIi2= z{EYb59J4}VwikL4P=>+mc6{($FNE@e=VUwG+KV21;<@lrN`mnz5jYGASyvz7BOG_6(p^eTxD-4O#lROgon;R35=|nj#eHIfJBYPWG>H>`dHKCDZ3`R{-?HO0mE~(5_WYcFmp8sU?wr*UkAQiNDGc6T zA%}GOLXlOWqL?WwfHO8MB#8M8*~Y*gz;1rWWoVSXP&IbKxbQ8+s%4Jnt?kDsq7btI zCDr0PZ)b;B%!lu&CT#RJzm{l{2fq|BcY85`w~3LSK<><@(2EdzFLt9Y_`;WXL6x`0 zDoQ?=?I@Hbr;*VVll1Gmd8*%tiXggMK81a+T(5Gx6;eNb8=uYn z5BG-0g>pP21NPn>$ntBh>`*})Fl|38oC^9Qz>~MAazH%3Q~Qb!ALMf$srexgPZ2@&c~+hxRi1;}+)-06)!#Mq<6GhP z-Q?qmgo${aFBApb5p}$1OJKTClfi8%PpnczyVKkoHw7Ml9e7ikrF0d~UB}i3vizos zXW4DN$SiEV9{faLt5bHy2a>33K%7Td-n5C*N;f&ZqAg#2hIqEb(y<&f4u5BWJ>2^4 z414GosL=Aom#m&=x_v<0-fp1r%oVJ{T-(xnomNJ(Dryv zh?vj+%=II_nV+@NR+(!fZZVM&(W6{6%9cm+o+Z6}KqzLw{(>E86uA1`_K$HqINlb1 zKelh3-jr2I9V?ych`{hta9wQ2c9=MM`2cC{m6^MhlL2{DLv7C^j z$xXBCnDl_;l|bPGMX@*tV)B!c|4oZyftUlP*?$YU9C_eAsuVHJ58?)zpbr30P*C`T z7y#ao`uE-SOG(Pi+`$=e^mle~)pRrdwL5)N;o{gpW21of(QE#U6w%*C~`v-z0QqBML!!5EeYA5IQB0 z^l01c;L6E(iytN!LhL}wfwP7W9PNAkb+)Cst?qg#$n;z41O4&v+8-zPs+XNb-q zIeeBCh#ivnFLUCwfS;p{LC0O7tm+Sf9Jn)~b%uwP{%69;QC)Ok0t%*a5M+=;y8j=v z#!*pp$9@!x;UMIs4~hP#pnfVc!%-D<+wsG@R2+J&%73lK|2G!EQC)O05TCV=&3g)C!lT=czLpZ@Sa%TYuoE?v8T8`V;e$#Zf2_Nj6nvBgh1)2 GZ~q4|mN%#X literal 0 HcmV?d00001 diff --git a/apps/java-sdk/gradle/wrapper/gradle-wrapper.properties b/apps/java-sdk/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000..9355b41557 --- /dev/null +++ b/apps/java-sdk/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/apps/java-sdk/gradlew b/apps/java-sdk/gradlew new file mode 100755 index 0000000000..4c8789e1c5 --- /dev/null +++ b/apps/java-sdk/gradlew @@ -0,0 +1,120 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +app_path=$0 +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld -- "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NonStop* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 ; then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is://undefined. That's://why the redirect is://done to /dev/null 2>&1 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + ;; + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is://undefined. That's://why the redirect is://done to /dev/null 2>&1 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + ;; + esac +fi + +# Collect all arguments for the java command, stracks://the style://of://arguments://after://the://class name +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$@" + +exec "$JAVACMD" "$@" diff --git a/apps/java-sdk/settings.gradle.kts b/apps/java-sdk/settings.gradle.kts new file mode 100644 index 0000000000..d9c10b0235 --- /dev/null +++ b/apps/java-sdk/settings.gradle.kts @@ -0,0 +1 @@ +rootProject.name = "firecrawl-java" diff --git a/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlClient.java b/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlClient.java new file mode 100644 index 0000000000..efbb4724e9 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlClient.java @@ -0,0 +1,841 @@ +package com.firecrawl.client; + +import com.firecrawl.errors.FirecrawlException; +import com.firecrawl.errors.JobTimeoutException; +import com.firecrawl.models.*; + +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.ForkJoinPool; + +/** + * Client for the Firecrawl v2 API. + * + *

Example usage: + *

{@code
+ * FirecrawlClient client = FirecrawlClient.builder()
+ *     .apiKey("fc-your-api-key")
+ *     .build();
+ *
+ * // Scrape a single page
+ * Document doc = client.scrape("https://example.com",
+ *     ScrapeOptions.builder()
+ *         .formats(List.of("markdown"))
+ *         .build());
+ *
+ * // Crawl a website
+ * CrawlJob job = client.crawl("https://example.com",
+ *     CrawlOptions.builder()
+ *         .limit(50)
+ *         .build());
+ * }
+ */ +public class FirecrawlClient { + + private static final String DEFAULT_API_URL = "https://api.firecrawl.dev"; + private static final long DEFAULT_TIMEOUT_MS = 300_000; // 5 minutes + private static final int DEFAULT_MAX_RETRIES = 3; + private static final double DEFAULT_BACKOFF_FACTOR = 0.5; + private static final int DEFAULT_POLL_INTERVAL = 2; // seconds + private static final int DEFAULT_JOB_TIMEOUT = 300; // seconds + + private final FirecrawlHttpClient http; + private final Executor asyncExecutor; + + private FirecrawlClient(FirecrawlHttpClient http, Executor asyncExecutor) { + this.http = http; + this.asyncExecutor = asyncExecutor; + } + + /** + * Creates a new builder for constructing a FirecrawlClient. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Creates a client from the FIRECRAWL_API_KEY environment variable. + */ + public static FirecrawlClient fromEnv() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey == null || apiKey.isBlank()) { + String sysProp = System.getProperty("firecrawl.apiKey"); + if (sysProp == null || sysProp.isBlank()) { + throw new FirecrawlException("FIRECRAWL_API_KEY environment variable or firecrawl.apiKey system property is required"); + } + apiKey = sysProp; + } + return builder().apiKey(apiKey).build(); + } + + // ================================================================ + // SCRAPE + // ================================================================ + + /** + * Scrapes a single URL and returns the document. + * + * @param url the URL to scrape + * @return the scraped document + */ + public Document scrape(String url) { + return scrape(url, null); + } + + /** + * Scrapes a single URL with options. + * + * @param url the URL to scrape + * @param options scrape configuration options + * @return the scraped document + */ + public Document scrape(String url, ScrapeOptions options) { + Objects.requireNonNull(url, "URL is required"); + Map body = new LinkedHashMap<>(); + body.put("url", url); + if (options != null) { + mergeOptions(body, options); + } + return extractData(http.post("/v2/scrape", body, Map.class), Document.class); + } + + // ================================================================ + // CRAWL + // ================================================================ + + /** + * Starts an async crawl job and returns immediately. + * + * @param url the URL to start crawling from + * @param options crawl configuration options + * @return the crawl job reference with ID + */ + public CrawlResponse startCrawl(String url, CrawlOptions options) { + Objects.requireNonNull(url, "URL is required"); + Map body = new LinkedHashMap<>(); + body.put("url", url); + if (options != null) { + mergeOptions(body, options); + } + return http.post("/v2/crawl", body, CrawlResponse.class); + } + + /** + * Gets the status and results of a crawl job. + * + * @param jobId the crawl job ID + * @return the crawl job status + */ + public CrawlJob getCrawlStatus(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.get("/v2/crawl/" + jobId, CrawlJob.class); + } + + /** + * Crawls a website and waits for completion (auto-polling). + * + * @param url the URL to crawl + * @param options crawl configuration options + * @return the completed crawl job with all documents + */ + public CrawlJob crawl(String url, CrawlOptions options) { + return crawl(url, options, DEFAULT_POLL_INTERVAL, DEFAULT_JOB_TIMEOUT); + } + + /** + * Crawls a website and waits for completion with custom polling settings. + * + * @param url the URL to crawl + * @param options crawl configuration options + * @param pollIntervalSec seconds between status checks + * @param timeoutSec maximum seconds to wait + * @return the completed crawl job with all documents + */ + public CrawlJob crawl(String url, CrawlOptions options, int pollIntervalSec, int timeoutSec) { + CrawlResponse start = startCrawl(url, options); + return pollCrawl(start.getId(), pollIntervalSec, timeoutSec); + } + + /** + * Cancels a running crawl job. + * + * @param jobId the crawl job ID + * @return the cancellation response + */ + @SuppressWarnings("unchecked") + public Map cancelCrawl(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.delete("/v2/crawl/" + jobId, Map.class); + } + + /** + * Gets errors from a crawl job. + * + * @param jobId the crawl job ID + * @return error details + */ + @SuppressWarnings("unchecked") + public Map getCrawlErrors(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.get("/v2/crawl/" + jobId + "/errors", Map.class); + } + + // ================================================================ + // BATCH SCRAPE + // ================================================================ + + /** + * Starts an async batch scrape job. + * + * @param urls the URLs to scrape + * @param options batch scrape configuration options + * @return the batch job reference with ID + */ + @SuppressWarnings("unchecked") + public BatchScrapeResponse startBatchScrape(List urls, BatchScrapeOptions options) { + Objects.requireNonNull(urls, "URLs list is required"); + Map body = new LinkedHashMap<>(); + body.put("urls", urls); + Map extraHeaders = Collections.emptyMap(); + if (options != null) { + // Extract idempotencyKey before serialization — it must be sent as an + // HTTP header (x-idempotency-key), not in the JSON body. + String idempotencyKey = options.getIdempotencyKey(); + if (idempotencyKey != null && !idempotencyKey.isEmpty()) { + extraHeaders = Collections.singletonMap("x-idempotency-key", idempotencyKey); + } + + mergeOptions(body, options); + // The API expects scrape options flattened at the top level, not nested + // under an "options" key. Extract and flatten them, but preserve + // batch-level fields so they are not overwritten by scrape options. + Map nested = (Map) body.remove("options"); + if (nested != null) { + Map batchFields = new LinkedHashMap<>(body); + body.putAll(nested); + body.putAll(batchFields); + } + } + return http.post("/v2/batch/scrape", body, BatchScrapeResponse.class, extraHeaders); + } + + /** + * Gets the status and results of a batch scrape job. + * + * @param jobId the batch scrape job ID + * @return the batch scrape job status + */ + public BatchScrapeJob getBatchScrapeStatus(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.get("/v2/batch/scrape/" + jobId, BatchScrapeJob.class); + } + + /** + * Batch-scrapes URLs and waits for completion (auto-polling). + * + * @param urls the URLs to scrape + * @param options batch scrape configuration options + * @return the completed batch scrape job with all documents + */ + public BatchScrapeJob batchScrape(List urls, BatchScrapeOptions options) { + return batchScrape(urls, options, DEFAULT_POLL_INTERVAL, DEFAULT_JOB_TIMEOUT); + } + + /** + * Batch-scrapes URLs and waits for completion with custom polling settings. + * + * @param urls the URLs to scrape + * @param options batch scrape configuration options + * @param pollIntervalSec seconds between status checks + * @param timeoutSec maximum seconds to wait + * @return the completed batch scrape job with all documents + */ + public BatchScrapeJob batchScrape(List urls, BatchScrapeOptions options, + int pollIntervalSec, int timeoutSec) { + BatchScrapeResponse start = startBatchScrape(urls, options); + return pollBatchScrape(start.getId(), pollIntervalSec, timeoutSec); + } + + /** + * Cancels a running batch scrape job. + * + * @param jobId the batch scrape job ID + * @return the cancellation response + */ + @SuppressWarnings("unchecked") + public Map cancelBatchScrape(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.delete("/v2/batch/scrape/" + jobId, Map.class); + } + + // ================================================================ + // MAP + // ================================================================ + + /** + * Discovers URLs on a website. + * + * @param url the URL to map + * @return the discovered URLs + */ + public MapData map(String url) { + return map(url, null); + } + + /** + * Discovers URLs on a website with options. + * + * @param url the URL to map + * @param options map configuration options + * @return the discovered URLs + */ + public MapData map(String url, MapOptions options) { + Objects.requireNonNull(url, "URL is required"); + Map body = new LinkedHashMap<>(); + body.put("url", url); + if (options != null) { + mergeOptions(body, options); + } + return extractData(http.post("/v2/map", body, Map.class), MapData.class); + } + + // ================================================================ + // SEARCH + // ================================================================ + + /** + * Performs a web search. + * + * @param query the search query + * @return search results + */ + public SearchData search(String query) { + return search(query, null); + } + + /** + * Performs a web search with options. + * + * @param query the search query + * @param options search configuration options + * @return search results + */ + public SearchData search(String query, SearchOptions options) { + Objects.requireNonNull(query, "Query is required"); + Map body = new LinkedHashMap<>(); + body.put("query", query); + if (options != null) { + mergeOptions(body, options); + } + return extractData(http.post("/v2/search", body, Map.class), SearchData.class); + } + + // ================================================================ + // AGENT + // ================================================================ + + /** + * Starts an async agent task. + * + * @param options agent configuration options + * @return the agent response with job ID + */ + public AgentResponse startAgent(AgentOptions options) { + Objects.requireNonNull(options, "Agent options are required"); + return http.post("/v2/agent", options, AgentResponse.class); + } + + /** + * Gets the status of an agent task. + * + * @param jobId the agent job ID + * @return the agent status response + */ + public AgentStatusResponse getAgentStatus(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.get("/v2/agent/" + jobId, AgentStatusResponse.class); + } + + /** + * Runs an agent task and waits for completion (auto-polling). + * + * @param options agent configuration options + * @return the completed agent status response + */ + public AgentStatusResponse agent(AgentOptions options) { + return agent(options, DEFAULT_POLL_INTERVAL, DEFAULT_JOB_TIMEOUT); + } + + /** + * Runs an agent task and waits for completion with custom polling settings. + * + * @param options agent configuration options + * @param pollIntervalSec seconds between status checks + * @param timeoutSec maximum seconds to wait + * @return the completed agent status response + */ + public AgentStatusResponse agent(AgentOptions options, int pollIntervalSec, int timeoutSec) { + AgentResponse start = startAgent(options); + if (start.getId() == null) { + throw new FirecrawlException("Agent start did not return a job ID"); + } + long deadline = System.currentTimeMillis() + (timeoutSec * 1000L); + while (System.currentTimeMillis() < deadline) { + AgentStatusResponse status = getAgentStatus(start.getId()); + if (status.isDone()) { + return status; + } + sleep(pollIntervalSec); + } + throw new JobTimeoutException(start.getId(), timeoutSec, "Agent"); + } + + /** + * Cancels a running agent task. + * + * @param jobId the agent job ID + * @return the cancellation response + */ + @SuppressWarnings("unchecked") + public Map cancelAgent(String jobId) { + Objects.requireNonNull(jobId, "Job ID is required"); + return http.delete("/v2/agent/" + jobId, Map.class); + } + + // ================================================================ + // BROWSER + // ================================================================ + + /** + * Creates a new browser session with default settings. + * + * @return the browser session details including id, CDP URL, and live view URL + */ + public BrowserCreateResponse browser() { + return browser(null, null, null); + } + + /** + * Creates a new browser session with options. + * + * @param ttl total session lifetime in seconds (30-3600), or null for default + * @param activityTtl idle timeout in seconds (10-3600), or null for default + * @param streamWebView whether to enable live view streaming, or null for default + * @return the browser session details + */ + public BrowserCreateResponse browser(Integer ttl, Integer activityTtl, Boolean streamWebView) { + Map body = new LinkedHashMap<>(); + if (ttl != null) body.put("ttl", ttl); + if (activityTtl != null) body.put("activityTtl", activityTtl); + if (streamWebView != null) body.put("streamWebView", streamWebView); + return http.post("/v2/browser", body, BrowserCreateResponse.class); + } + + /** + * Executes code in a browser session using the default language (bash). + * + * @param sessionId the browser session ID + * @param code the code to execute + * @return the execution result including stdout, stderr, and exit code + */ + public BrowserExecuteResponse browserExecute(String sessionId, String code) { + return browserExecute(sessionId, code, "bash", null); + } + + /** + * Executes code in a browser session with options. + * + * @param sessionId the browser session ID + * @param code the code to execute + * @param language the language: "python", "node", or "bash" (default: "bash") + * @param timeout execution timeout in seconds (1-300), or null for default (30) + * @return the execution result including stdout, stderr, and exit code + */ + public BrowserExecuteResponse browserExecute(String sessionId, String code, + String language, Integer timeout) { + Objects.requireNonNull(sessionId, "Session ID is required"); + Objects.requireNonNull(code, "Code is required"); + Map body = new LinkedHashMap<>(); + body.put("code", code); + body.put("language", language != null ? language : "bash"); + if (timeout != null) body.put("timeout", timeout); + return http.post("/v2/browser/" + sessionId + "/execute", body, BrowserExecuteResponse.class); + } + + /** + * Deletes a browser session. + * + * @param sessionId the browser session ID + * @return the deletion response with session duration and billing info + */ + public BrowserDeleteResponse deleteBrowser(String sessionId) { + Objects.requireNonNull(sessionId, "Session ID is required"); + return http.delete("/v2/browser/" + sessionId, BrowserDeleteResponse.class); + } + + /** + * Lists all browser sessions. + * + * @return the list of browser sessions + */ + public BrowserListResponse listBrowsers() { + return listBrowsers(null); + } + + /** + * Lists browser sessions with optional status filter. + * + * @param status optional filter: "active" or "destroyed", or null for all + * @return the list of browser sessions + */ + public BrowserListResponse listBrowsers(String status) { + String endpoint = "/v2/browser"; + if (status != null && !status.isEmpty()) { + endpoint += "?status=" + status; + } + return http.get(endpoint, BrowserListResponse.class); + } + + // ================================================================ + // USAGE & METRICS + // ================================================================ + + /** + * Gets current concurrency usage. + */ + public ConcurrencyCheck getConcurrency() { + return http.get("/v2/concurrency-check", ConcurrencyCheck.class); + } + + /** + * Gets current credit usage. + */ + public CreditUsage getCreditUsage() { + return http.get("/v2/team/credit-usage", CreditUsage.class); + } + + // ================================================================ + // ASYNC CONVENIENCE METHODS + // ================================================================ + + /** + * Asynchronously scrapes a URL. + * + * @param url the URL to scrape + * @param options scrape configuration options + * @return a CompletableFuture that resolves to the scraped Document + */ + public CompletableFuture scrapeAsync(String url, ScrapeOptions options) { + return CompletableFuture.supplyAsync(() -> scrape(url, options), asyncExecutor); + } + + /** + * Asynchronously crawls a website and waits for completion. + * + * @param url the URL to crawl + * @param options crawl configuration options + * @return a CompletableFuture that resolves to the completed CrawlJob + */ + public CompletableFuture crawlAsync(String url, CrawlOptions options) { + return CompletableFuture.supplyAsync(() -> crawl(url, options), asyncExecutor); + } + + /** + * Asynchronously crawls with custom polling settings. + * + * @param url the URL to crawl + * @param options crawl configuration options + * @param pollIntervalSec seconds between status checks + * @param timeoutSec maximum seconds to wait + * @return a CompletableFuture that resolves to the completed CrawlJob + */ + public CompletableFuture crawlAsync(String url, CrawlOptions options, + int pollIntervalSec, int timeoutSec) { + return CompletableFuture.supplyAsync(() -> crawl(url, options, pollIntervalSec, timeoutSec), asyncExecutor); + } + + /** + * Asynchronously batch-scrapes URLs and waits for completion. + * + * @param urls the URLs to scrape + * @param options batch scrape configuration options + * @return a CompletableFuture that resolves to the completed BatchScrapeJob + */ + public CompletableFuture batchScrapeAsync(List urls, BatchScrapeOptions options) { + return CompletableFuture.supplyAsync(() -> batchScrape(urls, options), asyncExecutor); + } + + /** + * Asynchronously runs a search. + * + * @param query the search query + * @param options search configuration options + * @return a CompletableFuture that resolves to the SearchData + */ + public CompletableFuture searchAsync(String query, SearchOptions options) { + return CompletableFuture.supplyAsync(() -> search(query, options), asyncExecutor); + } + + /** + * Asynchronously runs a map operation. + * + * @param url the URL to map + * @param options map configuration options + * @return a CompletableFuture that resolves to the MapData + */ + public CompletableFuture mapAsync(String url, MapOptions options) { + return CompletableFuture.supplyAsync(() -> map(url, options), asyncExecutor); + } + + /** + * Asynchronously runs an agent task and waits for completion. + * + * @param options agent configuration options + * @return a CompletableFuture that resolves to the AgentStatusResponse + */ + public CompletableFuture agentAsync(AgentOptions options) { + return CompletableFuture.supplyAsync(() -> agent(options), asyncExecutor); + } + + /** + * Asynchronously creates a new browser session. + * + * @param ttl total session lifetime in seconds, or null for default + * @param activityTtl idle timeout in seconds, or null for default + * @param streamWebView whether to enable live view streaming, or null for default + * @return a CompletableFuture that resolves to the BrowserCreateResponse + */ + public CompletableFuture browserAsync(Integer ttl, Integer activityTtl, + Boolean streamWebView) { + return CompletableFuture.supplyAsync(() -> browser(ttl, activityTtl, streamWebView), asyncExecutor); + } + + /** + * Asynchronously executes code in a browser session. + * + * @param sessionId the browser session ID + * @param code the code to execute + * @param language the language: "python", "node", or "bash" + * @param timeout execution timeout in seconds, or null for default + * @return a CompletableFuture that resolves to the BrowserExecuteResponse + */ + public CompletableFuture browserExecuteAsync(String sessionId, String code, + String language, Integer timeout) { + return CompletableFuture.supplyAsync(() -> browserExecute(sessionId, code, language, timeout), asyncExecutor); + } + + /** + * Asynchronously deletes a browser session. + * + * @param sessionId the browser session ID + * @return a CompletableFuture that resolves to the BrowserDeleteResponse + */ + public CompletableFuture deleteBrowserAsync(String sessionId) { + return CompletableFuture.supplyAsync(() -> deleteBrowser(sessionId), asyncExecutor); + } + + /** + * Asynchronously lists browser sessions. + * + * @param status optional filter: "active" or "destroyed", or null for all + * @return a CompletableFuture that resolves to the BrowserListResponse + */ + public CompletableFuture listBrowsersAsync(String status) { + return CompletableFuture.supplyAsync(() -> listBrowsers(status), asyncExecutor); + } + + // ================================================================ + // INTERNAL POLLING HELPERS + // ================================================================ + + private CrawlJob pollCrawl(String jobId, int pollIntervalSec, int timeoutSec) { + long deadline = System.currentTimeMillis() + (timeoutSec * 1000L); + while (System.currentTimeMillis() < deadline) { + CrawlJob job = getCrawlStatus(jobId); + if (job.isDone()) { + return paginateCrawl(job); + } + sleep(pollIntervalSec); + } + throw new JobTimeoutException(jobId, timeoutSec, "Crawl"); + } + + private BatchScrapeJob pollBatchScrape(String jobId, int pollIntervalSec, int timeoutSec) { + long deadline = System.currentTimeMillis() + (timeoutSec * 1000L); + while (System.currentTimeMillis() < deadline) { + BatchScrapeJob job = getBatchScrapeStatus(jobId); + if (job.isDone()) { + return paginateBatchScrape(job); + } + sleep(pollIntervalSec); + } + throw new JobTimeoutException(jobId, timeoutSec, "Batch scrape"); + } + + /** + * Auto-paginates crawl results by following the "next" cursor. + */ + private CrawlJob paginateCrawl(CrawlJob job) { + if (job.getData() == null) { + job.setData(new ArrayList<>()); + } + CrawlJob current = job; + while (current.getNext() != null && !current.getNext().isEmpty()) { + CrawlJob nextPage = http.getAbsolute(current.getNext(), CrawlJob.class); + if (nextPage.getData() != null && !nextPage.getData().isEmpty()) { + job.getData().addAll(nextPage.getData()); + } + current = nextPage; + } + return job; + } + + /** + * Auto-paginates batch scrape results by following the "next" cursor. + */ + private BatchScrapeJob paginateBatchScrape(BatchScrapeJob job) { + if (job.getData() == null) { + job.setData(new ArrayList<>()); + } + BatchScrapeJob current = job; + while (current.getNext() != null && !current.getNext().isEmpty()) { + BatchScrapeJob nextPage = http.getAbsolute(current.getNext(), BatchScrapeJob.class); + if (nextPage.getData() != null && !nextPage.getData().isEmpty()) { + job.getData().addAll(nextPage.getData()); + } + current = nextPage; + } + return job; + } + + // ================================================================ + // INTERNAL UTILITIES + // ================================================================ + + /** + * Extracts the "data" field from a raw API response map and deserializes it. + */ + @SuppressWarnings("unchecked") + private T extractData(Map rawResponse, Class type) { + Object data = rawResponse.get("data"); + if (data == null) { + // Some endpoints return the data at the top level + return http.objectMapper.convertValue(rawResponse, type); + } + return http.objectMapper.convertValue(data, type); + } + + /** + * Merges a typed options object into a request body map, using Jackson serialization. + */ + @SuppressWarnings("unchecked") + private void mergeOptions(Map body, Object options) { + Map optionsMap = http.objectMapper.convertValue(options, Map.class); + body.putAll(optionsMap); + } + + private void sleep(int seconds) { + try { + Thread.sleep(seconds * 1000L); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new FirecrawlException("Polling interrupted", e); + } + } + + // ================================================================ + // BUILDER + // ================================================================ + + public static final class Builder { + + private String apiKey; + private String apiUrl = DEFAULT_API_URL; + private long timeoutMs = DEFAULT_TIMEOUT_MS; + private int maxRetries = DEFAULT_MAX_RETRIES; + private double backoffFactor = DEFAULT_BACKOFF_FACTOR; + private Executor asyncExecutor; + + private Builder() {} + + /** + * Sets the API key. Falls back to FIRECRAWL_API_KEY env var or + * firecrawl.apiKey system property if not provided. + */ + public Builder apiKey(String apiKey) { + this.apiKey = apiKey; + return this; + } + + /** + * Sets the API base URL. Defaults to https://api.firecrawl.dev. + * Falls back to FIRECRAWL_API_URL env var if not provided. + */ + public Builder apiUrl(String apiUrl) { + this.apiUrl = apiUrl; + return this; + } + + /** + * Sets the HTTP request timeout in milliseconds. Default: 300000 (5 minutes). + */ + public Builder timeoutMs(long timeoutMs) { + this.timeoutMs = timeoutMs; + return this; + } + + /** + * Sets the maximum number of automatic retries for transient failures. Default: 3. + */ + public Builder maxRetries(int maxRetries) { + this.maxRetries = maxRetries; + return this; + } + + /** + * Sets the exponential backoff factor in seconds. Default: 0.5. + */ + public Builder backoffFactor(double backoffFactor) { + this.backoffFactor = backoffFactor; + return this; + } + + /** + * Sets a custom executor for async operations. Default: ForkJoinPool.commonPool(). + */ + public Builder asyncExecutor(Executor asyncExecutor) { + this.asyncExecutor = asyncExecutor; + return this; + } + + public FirecrawlClient build() { + String resolvedKey = apiKey; + if (resolvedKey == null || resolvedKey.isBlank()) { + resolvedKey = System.getenv("FIRECRAWL_API_KEY"); + } + if (resolvedKey == null || resolvedKey.isBlank()) { + resolvedKey = System.getProperty("firecrawl.apiKey"); + } + if (resolvedKey == null || resolvedKey.isBlank()) { + throw new FirecrawlException( + "API key is required. Set it via builder.apiKey(), " + + "FIRECRAWL_API_KEY environment variable, or firecrawl.apiKey system property."); + } + + String resolvedUrl = apiUrl; + if (resolvedUrl == null || resolvedUrl.equals(DEFAULT_API_URL)) { + String envUrl = System.getenv("FIRECRAWL_API_URL"); + if (envUrl != null && !envUrl.isEmpty()) { + resolvedUrl = envUrl; + } + } + + Executor executor = asyncExecutor != null ? asyncExecutor : ForkJoinPool.commonPool(); + FirecrawlHttpClient http = new FirecrawlHttpClient( + resolvedKey, resolvedUrl, timeoutMs, maxRetries, backoffFactor); + return new FirecrawlClient(http, executor); + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlHttpClient.java b/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlHttpClient.java new file mode 100644 index 0000000000..7fbc37f0d5 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/client/FirecrawlHttpClient.java @@ -0,0 +1,215 @@ +package com.firecrawl.client; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; +import com.firecrawl.errors.AuthenticationException; +import com.firecrawl.errors.FirecrawlException; +import com.firecrawl.errors.RateLimitException; +import okhttp3.*; + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * Internal HTTP client for making authenticated requests to the Firecrawl API. + * Handles retry logic with exponential backoff. + */ +class FirecrawlHttpClient { + + private static final MediaType JSON = MediaType.get("application/json; charset=utf-8"); + + private final OkHttpClient httpClient; + private final String apiKey; + private final String baseUrl; + private final int maxRetries; + private final double backoffFactor; + final ObjectMapper objectMapper; + + FirecrawlHttpClient(String apiKey, String baseUrl, long timeoutMs, int maxRetries, double backoffFactor) { + this.apiKey = apiKey; + this.baseUrl = baseUrl.endsWith("/") ? baseUrl.substring(0, baseUrl.length() - 1) : baseUrl; + this.maxRetries = maxRetries; + this.backoffFactor = backoffFactor; + + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(timeoutMs, TimeUnit.MILLISECONDS) + .readTimeout(timeoutMs, TimeUnit.MILLISECONDS) + .writeTimeout(timeoutMs, TimeUnit.MILLISECONDS) + .build(); + + this.objectMapper = new ObjectMapper() + .registerModule(new Jdk8Module()) + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } + + /** + * Sends a POST request with JSON body. + */ + T post(String path, Object body, Class responseType) { + return post(path, body, responseType, Collections.emptyMap()); + } + + /** + * Sends a POST request with JSON body and extra headers. + */ + T post(String path, Object body, Class responseType, Map extraHeaders) { + String url = baseUrl + path; + String json; + try { + json = objectMapper.writeValueAsString(body); + } catch (JsonProcessingException e) { + throw new FirecrawlException("Failed to serialize request body", e); + } + RequestBody requestBody = RequestBody.create(json, JSON); + Request.Builder builder = new Request.Builder() + .url(url) + .header("Authorization", "Bearer " + apiKey) + .header("Content-Type", "application/json") + .post(requestBody); + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + Request request = builder.build(); + return executeWithRetry(request, responseType); + } + + /** + * Sends a GET request. + */ + T get(String path, Class responseType) { + String url = baseUrl + path; + Request request = new Request.Builder() + .url(url) + .header("Authorization", "Bearer " + apiKey) + .get() + .build(); + return executeWithRetry(request, responseType); + } + + /** + * Sends a GET request with full URL (for following next-page cursors). + */ + T getAbsolute(String absoluteUrl, Class responseType) { + Request request = new Request.Builder() + .url(absoluteUrl) + .header("Authorization", "Bearer " + apiKey) + .get() + .build(); + return executeWithRetry(request, responseType); + } + + /** + * Sends a DELETE request. + */ + T delete(String path, Class responseType) { + String url = baseUrl + path; + Request request = new Request.Builder() + .url(url) + .header("Authorization", "Bearer " + apiKey) + .delete() + .build(); + return executeWithRetry(request, responseType); + } + + /** + * Sends a raw GET request and returns the response body as a parsed Map. + */ + @SuppressWarnings("unchecked") + Map getRaw(String path) { + return get(path, Map.class); + } + + private T executeWithRetry(Request request, Class responseType) { + int attempt = 0; + while (true) { + try { + try (Response response = httpClient.newCall(request).execute()) { + ResponseBody responseBody = response.body(); + String bodyStr = responseBody != null ? responseBody.string() : ""; + + if (response.isSuccessful()) { + if (responseType == Void.class || responseType == void.class) { + return null; + } + return objectMapper.readValue(bodyStr, responseType); + } + + int code = response.code(); + + // Parse error details from response + String errorMessage = extractErrorMessage(bodyStr, code); + String errorCode = extractErrorCode(bodyStr); + + // Non-retryable client errors + if (code == 401) { + throw new AuthenticationException(errorMessage, errorCode, null); + } + if (code == 429) { + throw new RateLimitException(errorMessage, errorCode, null); + } + if (code >= 400 && code < 500 && code != 408 && code != 409) { + throw new FirecrawlException(errorMessage, code, errorCode, null); + } + + // Retryable errors: 408, 409, 502, 5xx + if (attempt < maxRetries) { + attempt++; + sleepWithBackoff(attempt); + continue; + } + + throw new FirecrawlException(errorMessage, code, errorCode, null); + } + } catch (FirecrawlException e) { + throw e; + } catch (IOException e) { + if (attempt < maxRetries) { + attempt++; + sleepWithBackoff(attempt); + continue; + } + throw new FirecrawlException("Request failed: " + e.getMessage(), e); + } + } + } + + @SuppressWarnings("unchecked") + private String extractErrorMessage(String body, int statusCode) { + try { + Map parsed = objectMapper.readValue(body, Map.class); + if (parsed.containsKey("error")) { + return String.valueOf(parsed.get("error")); + } + if (parsed.containsKey("message")) { + return String.valueOf(parsed.get("message")); + } + } catch (Exception ignored) { + } + return "HTTP " + statusCode + " error"; + } + + @SuppressWarnings("unchecked") + private String extractErrorCode(String body) { + try { + Map parsed = objectMapper.readValue(body, Map.class); + Object code = parsed.get("code"); + return code != null ? String.valueOf(code) : null; + } catch (Exception ignored) { + } + return null; + } + + private void sleepWithBackoff(int attempt) { + long delayMs = (long) (backoffFactor * 1000 * Math.pow(2, attempt - 1)); + try { + Thread.sleep(delayMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new FirecrawlException("Request interrupted during retry backoff", e); + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/errors/AuthenticationException.java b/apps/java-sdk/src/main/java/com/firecrawl/errors/AuthenticationException.java new file mode 100644 index 0000000000..c88cfe642d --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/errors/AuthenticationException.java @@ -0,0 +1,15 @@ +package com.firecrawl.errors; + +/** + * Thrown when the API returns a 401 Unauthorized response. + */ +public class AuthenticationException extends FirecrawlException { + + public AuthenticationException(String message) { + super(message, 401); + } + + public AuthenticationException(String message, String errorCode, Object details) { + super(message, 401, errorCode, details); + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/errors/FirecrawlException.java b/apps/java-sdk/src/main/java/com/firecrawl/errors/FirecrawlException.java new file mode 100644 index 0000000000..9c14d4051f --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/errors/FirecrawlException.java @@ -0,0 +1,42 @@ +package com.firecrawl.errors; + +/** + * Base exception for all Firecrawl SDK errors. + */ +public class FirecrawlException extends RuntimeException { + + private final int statusCode; + private final String errorCode; + private final Object details; + + public FirecrawlException(String message) { + this(message, 0, null, null); + } + + public FirecrawlException(String message, int statusCode) { + this(message, statusCode, null, null); + } + + public FirecrawlException(String message, int statusCode, String errorCode, Object details) { + super(message); + this.statusCode = statusCode; + this.errorCode = errorCode; + this.details = details; + } + + public FirecrawlException(String message, Throwable cause) { + super(message, cause); + this.statusCode = 0; + this.errorCode = null; + this.details = null; + } + + /** HTTP status code (0 if not an HTTP error). */ + public int getStatusCode() { return statusCode; } + + /** Error code from the API response, if any. */ + public String getErrorCode() { return errorCode; } + + /** Additional error details from the API response, if any. */ + public Object getDetails() { return details; } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/errors/JobTimeoutException.java b/apps/java-sdk/src/main/java/com/firecrawl/errors/JobTimeoutException.java new file mode 100644 index 0000000000..1e2f5955f6 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/errors/JobTimeoutException.java @@ -0,0 +1,22 @@ +package com.firecrawl.errors; + +/** + * Thrown when an async job (crawl, batch, agent) does not complete within the specified timeout. + */ +public class JobTimeoutException extends FirecrawlException { + + private final String jobId; + private final int timeoutSeconds; + + public JobTimeoutException(String jobId, int timeoutSeconds, String jobType) { + super(jobType + " job " + jobId + " did not complete within " + timeoutSeconds + " seconds"); + this.jobId = jobId; + this.timeoutSeconds = timeoutSeconds; + } + + /** The ID of the timed-out job. */ + public String getJobId() { return jobId; } + + /** The timeout in seconds that was exceeded. */ + public int getTimeoutSeconds() { return timeoutSeconds; } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/errors/RateLimitException.java b/apps/java-sdk/src/main/java/com/firecrawl/errors/RateLimitException.java new file mode 100644 index 0000000000..9a7271efe9 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/errors/RateLimitException.java @@ -0,0 +1,15 @@ +package com.firecrawl.errors; + +/** + * Thrown when the API returns a 429 Too Many Requests response. + */ +public class RateLimitException extends FirecrawlException { + + public RateLimitException(String message) { + super(message, 429); + } + + public RateLimitException(String message, String errorCode, Object details) { + super(message, 429, errorCode, details); + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/AgentOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentOptions.java new file mode 100644 index 0000000000..e0e25fd62a --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentOptions.java @@ -0,0 +1,80 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; +import java.util.Map; + +/** + * Options for starting an agent task. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class AgentOptions { + + private List urls; + private String prompt; + private Map schema; + private String integration; + private Integer maxCredits; + private Boolean strictConstrainToURLs; + private String model; + private WebhookConfig webhook; + + private AgentOptions() {} + + public List getUrls() { return urls; } + public String getPrompt() { return prompt; } + public Map getSchema() { return schema; } + public String getIntegration() { return integration; } + public Integer getMaxCredits() { return maxCredits; } + public Boolean getStrictConstrainToURLs() { return strictConstrainToURLs; } + public String getModel() { return model; } + public WebhookConfig getWebhook() { return webhook; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private List urls; + private String prompt; + private Map schema; + private String integration; + private Integer maxCredits; + private Boolean strictConstrainToURLs; + private String model; + private WebhookConfig webhook; + + private Builder() {} + + /** Optional URLs to constrain the agent to. */ + public Builder urls(List urls) { this.urls = urls; return this; } + /** Natural language prompt describing what data to find. */ + public Builder prompt(String prompt) { this.prompt = prompt; return this; } + /** JSON Schema for structured output. */ + public Builder schema(Map schema) { this.schema = schema; return this; } + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + /** Maximum credits to spend. */ + public Builder maxCredits(Integer maxCredits) { this.maxCredits = maxCredits; return this; } + /** Don't navigate outside provided URLs. */ + public Builder strictConstrainToURLs(Boolean strictConstrainToURLs) { this.strictConstrainToURLs = strictConstrainToURLs; return this; } + /** Agent model: "spark-1-pro" or "spark-1-mini". */ + public Builder model(String model) { this.model = model; return this; } + /** Webhook configuration. */ + public Builder webhook(WebhookConfig webhook) { this.webhook = webhook; return this; } + + public AgentOptions build() { + if (prompt == null || prompt.isEmpty()) { + throw new IllegalArgumentException("Agent prompt is required"); + } + AgentOptions o = new AgentOptions(); + o.urls = this.urls; + o.prompt = this.prompt; + o.schema = this.schema; + o.integration = this.integration; + o.maxCredits = this.maxCredits; + o.strictConstrainToURLs = this.strictConstrainToURLs; + o.model = this.model; + o.webhook = this.webhook; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/AgentResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentResponse.java new file mode 100644 index 0000000000..5f5581204e --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentResponse.java @@ -0,0 +1,23 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response from starting an agent task. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class AgentResponse { + + private boolean success; + private String id; + private String error; + + public boolean isSuccess() { return success; } + public String getId() { return id; } + public String getError() { return error; } + + @Override + public String toString() { + return "AgentResponse{success=" + success + ", id=" + id + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/AgentStatusResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentStatusResponse.java new file mode 100644 index 0000000000..4a761013f5 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/AgentStatusResponse.java @@ -0,0 +1,35 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Status response for an agent task. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class AgentStatusResponse { + + private boolean success; + private String status; + private String error; + private Object data; + private String model; + private String expiresAt; + private Integer creditsUsed; + + public boolean isSuccess() { return success; } + public String getStatus() { return status; } + public String getError() { return error; } + public Object getData() { return data; } + public String getModel() { return model; } + public String getExpiresAt() { return expiresAt; } + public Integer getCreditsUsed() { return creditsUsed; } + + public boolean isDone() { + return "completed".equals(status) || "failed".equals(status) || "cancelled".equals(status); + } + + @Override + public String toString() { + return "AgentStatusResponse{status=" + status + ", model=" + model + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeJob.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeJob.java new file mode 100644 index 0000000000..d6555b03f1 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeJob.java @@ -0,0 +1,39 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +/** + * Status and results of a batch scrape job. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BatchScrapeJob { + + private String id; + private String status; + private int completed; + private int total; + private Integer creditsUsed; + private String expiresAt; + private String next; + private List data; + + public String getId() { return id; } + public String getStatus() { return status; } + public int getCompleted() { return completed; } + public int getTotal() { return total; } + public Integer getCreditsUsed() { return creditsUsed; } + public String getExpiresAt() { return expiresAt; } + public String getNext() { return next; } + public List getData() { return data; } + public void setData(List data) { this.data = data; } + + public boolean isDone() { + return "completed".equals(status) || "failed".equals(status) || "cancelled".equals(status); + } + + @Override + public String toString() { + return "BatchScrapeJob{id=" + id + ", status=" + status + ", completed=" + completed + "/" + total + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeOptions.java new file mode 100644 index 0000000000..0119eebdc0 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeOptions.java @@ -0,0 +1,78 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Options for a batch scrape job. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class BatchScrapeOptions { + + private ScrapeOptions options; + private Object webhook; + private String appendToId; + private Boolean ignoreInvalidURLs; + private Integer maxConcurrency; + private Boolean zeroDataRetention; + @JsonIgnore + private String idempotencyKey; + private String integration; + + private BatchScrapeOptions() {} + + public ScrapeOptions getOptions() { return options; } + public Object getWebhook() { return webhook; } + public String getAppendToId() { return appendToId; } + public Boolean getIgnoreInvalidURLs() { return ignoreInvalidURLs; } + public Integer getMaxConcurrency() { return maxConcurrency; } + public Boolean getZeroDataRetention() { return zeroDataRetention; } + @JsonIgnore + public String getIdempotencyKey() { return idempotencyKey; } + public String getIntegration() { return integration; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private ScrapeOptions options; + private Object webhook; + private String appendToId; + private Boolean ignoreInvalidURLs; + private Integer maxConcurrency; + private Boolean zeroDataRetention; + private String idempotencyKey; + private String integration; + + private Builder() {} + + /** Scrape options applied to each URL. */ + public Builder options(ScrapeOptions options) { this.options = options; return this; } + /** Webhook URL string or {@link WebhookConfig} object. */ + public Builder webhook(Object webhook) { this.webhook = webhook; return this; } + /** Append URLs to an existing batch job. */ + public Builder appendToId(String appendToId) { this.appendToId = appendToId; return this; } + /** Ignore invalid URLs instead of failing. */ + public Builder ignoreInvalidURLs(Boolean ignoreInvalidURLs) { this.ignoreInvalidURLs = ignoreInvalidURLs; return this; } + /** Max concurrent scrapes. */ + public Builder maxConcurrency(Integer maxConcurrency) { this.maxConcurrency = maxConcurrency; return this; } + /** Do not store any data on Firecrawl servers. */ + public Builder zeroDataRetention(Boolean zeroDataRetention) { this.zeroDataRetention = zeroDataRetention; return this; } + /** Idempotency key to prevent duplicate batch jobs. */ + public Builder idempotencyKey(String idempotencyKey) { this.idempotencyKey = idempotencyKey; return this; } + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + + public BatchScrapeOptions build() { + BatchScrapeOptions o = new BatchScrapeOptions(); + o.options = this.options; + o.webhook = this.webhook; + o.appendToId = this.appendToId; + o.ignoreInvalidURLs = this.ignoreInvalidURLs; + o.maxConcurrency = this.maxConcurrency; + o.zeroDataRetention = this.zeroDataRetention; + o.idempotencyKey = this.idempotencyKey; + o.integration = this.integration; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeResponse.java new file mode 100644 index 0000000000..1c1084ddc0 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BatchScrapeResponse.java @@ -0,0 +1,24 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +/** + * Response from starting an async batch scrape job. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BatchScrapeResponse { + + private String id; + private String url; + private List invalidURLs; + + public String getId() { return id; } + public String getUrl() { return url; } + public List getInvalidURLs() { return invalidURLs; } + + @Override + public String toString() { + return "BatchScrapeResponse{id=" + id + ", url=" + url + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserCreateResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserCreateResponse.java new file mode 100644 index 0000000000..99dff26141 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserCreateResponse.java @@ -0,0 +1,29 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response from creating a new browser session. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BrowserCreateResponse { + + private boolean success; + private String id; + private String cdpUrl; + private String liveViewUrl; + private String expiresAt; + private String error; + + public boolean isSuccess() { return success; } + public String getId() { return id; } + public String getCdpUrl() { return cdpUrl; } + public String getLiveViewUrl() { return liveViewUrl; } + public String getExpiresAt() { return expiresAt; } + public String getError() { return error; } + + @Override + public String toString() { + return "BrowserCreateResponse{id=" + id + ", success=" + success + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserDeleteResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserDeleteResponse.java new file mode 100644 index 0000000000..1be1c458ac --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserDeleteResponse.java @@ -0,0 +1,25 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response from deleting a browser session. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BrowserDeleteResponse { + + private boolean success; + private Long sessionDurationMs; + private Integer creditsBilled; + private String error; + + public boolean isSuccess() { return success; } + public Long getSessionDurationMs() { return sessionDurationMs; } + public Integer getCreditsBilled() { return creditsBilled; } + public String getError() { return error; } + + @Override + public String toString() { + return "BrowserDeleteResponse{success=" + success + ", creditsBilled=" + creditsBilled + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserExecuteResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserExecuteResponse.java new file mode 100644 index 0000000000..cab26e0bce --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserExecuteResponse.java @@ -0,0 +1,31 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response from executing code in a browser session. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BrowserExecuteResponse { + + private boolean success; + private String stdout; + private String result; + private String stderr; + private Integer exitCode; + private Boolean killed; + private String error; + + public boolean isSuccess() { return success; } + public String getStdout() { return stdout; } + public String getResult() { return result; } + public String getStderr() { return stderr; } + public Integer getExitCode() { return exitCode; } + public Boolean getKilled() { return killed; } + public String getError() { return error; } + + @Override + public String toString() { + return "BrowserExecuteResponse{success=" + success + ", exitCode=" + exitCode + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserListResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserListResponse.java new file mode 100644 index 0000000000..9aba6a2e5b --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserListResponse.java @@ -0,0 +1,25 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +/** + * Response from listing browser sessions. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BrowserListResponse { + + private boolean success; + private List sessions; + private String error; + + public boolean isSuccess() { return success; } + public List getSessions() { return sessions; } + public String getError() { return error; } + + @Override + public String toString() { + int count = sessions != null ? sessions.size() : 0; + return "BrowserListResponse{success=" + success + ", sessions=" + count + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserSession.java b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserSession.java new file mode 100644 index 0000000000..2f4b4e5a4d --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/BrowserSession.java @@ -0,0 +1,31 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Represents a browser session's metadata. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class BrowserSession { + + private String id; + private String status; + private String cdpUrl; + private String liveViewUrl; + private boolean streamWebView; + private String createdAt; + private String lastActivity; + + public String getId() { return id; } + public String getStatus() { return status; } + public String getCdpUrl() { return cdpUrl; } + public String getLiveViewUrl() { return liveViewUrl; } + public boolean isStreamWebView() { return streamWebView; } + public String getCreatedAt() { return createdAt; } + public String getLastActivity() { return lastActivity; } + + @Override + public String toString() { + return "BrowserSession{id=" + id + ", status=" + status + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/ConcurrencyCheck.java b/apps/java-sdk/src/main/java/com/firecrawl/models/ConcurrencyCheck.java new file mode 100644 index 0000000000..55fc65935e --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/ConcurrencyCheck.java @@ -0,0 +1,21 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Current concurrency usage. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class ConcurrencyCheck { + + private int concurrency; + private int maxConcurrency; + + public int getConcurrency() { return concurrency; } + public int getMaxConcurrency() { return maxConcurrency; } + + @Override + public String toString() { + return "ConcurrencyCheck{concurrency=" + concurrency + "/" + maxConcurrency + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlJob.java b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlJob.java new file mode 100644 index 0000000000..3efbec882e --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlJob.java @@ -0,0 +1,40 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; + +/** + * Status and results of a crawl job. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CrawlJob { + + private String id; + private String status; + private int total; + private int completed; + private Integer creditsUsed; + private String expiresAt; + private String next; + private List data; + + public String getId() { return id; } + public String getStatus() { return status; } + public int getTotal() { return total; } + public int getCompleted() { return completed; } + public Integer getCreditsUsed() { return creditsUsed; } + public String getExpiresAt() { return expiresAt; } + public String getNext() { return next; } + public List getData() { return data; } + public void setData(List data) { this.data = data; } + + /** Returns true if the job has finished (completed, failed, or cancelled). */ + public boolean isDone() { + return "completed".equals(status) || "failed".equals(status) || "cancelled".equals(status); + } + + @Override + public String toString() { + return "CrawlJob{id=" + id + ", status=" + status + ", completed=" + completed + "/" + total + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlOptions.java new file mode 100644 index 0000000000..68bcb6ca75 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlOptions.java @@ -0,0 +1,154 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; +import java.util.Map; + +/** + * Options for crawling a website. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class CrawlOptions { + + private String prompt; + private List excludePaths; + private List includePaths; + private Integer maxDiscoveryDepth; + private String sitemap; + private Boolean ignoreQueryParameters; + private Boolean deduplicateSimilarURLs; + private Integer limit; + private Boolean crawlEntireDomain; + private Boolean allowExternalLinks; + private Boolean allowSubdomains; + private Integer delay; + private Integer maxConcurrency; + private Object webhook; + private ScrapeOptions scrapeOptions; + private Boolean regexOnFullURL; + private Boolean zeroDataRetention; + private String integration; + + private CrawlOptions() {} + + public String getPrompt() { return prompt; } + public List getExcludePaths() { return excludePaths; } + public List getIncludePaths() { return includePaths; } + public Integer getMaxDiscoveryDepth() { return maxDiscoveryDepth; } + public String getSitemap() { return sitemap; } + public Boolean getIgnoreQueryParameters() { return ignoreQueryParameters; } + public Boolean getDeduplicateSimilarURLs() { return deduplicateSimilarURLs; } + public Integer getLimit() { return limit; } + public Boolean getCrawlEntireDomain() { return crawlEntireDomain; } + public Boolean getAllowExternalLinks() { return allowExternalLinks; } + public Boolean getAllowSubdomains() { return allowSubdomains; } + public Integer getDelay() { return delay; } + public Integer getMaxConcurrency() { return maxConcurrency; } + public Object getWebhook() { return webhook; } + public ScrapeOptions getScrapeOptions() { return scrapeOptions; } + public Boolean getRegexOnFullURL() { return regexOnFullURL; } + public Boolean getZeroDataRetention() { return zeroDataRetention; } + public String getIntegration() { return integration; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private String prompt; + private List excludePaths; + private List includePaths; + private Integer maxDiscoveryDepth; + private String sitemap; + private Boolean ignoreQueryParameters; + private Boolean deduplicateSimilarURLs; + private Integer limit; + private Boolean crawlEntireDomain; + private Boolean allowExternalLinks; + private Boolean allowSubdomains; + private Integer delay; + private Integer maxConcurrency; + private Object webhook; + private ScrapeOptions scrapeOptions; + private Boolean regexOnFullURL; + private Boolean zeroDataRetention; + private String integration; + + private Builder() {} + + /** Natural language prompt to guide crawling. */ + public Builder prompt(String prompt) { this.prompt = prompt; return this; } + + /** URL path patterns to exclude from crawling. */ + public Builder excludePaths(List excludePaths) { this.excludePaths = excludePaths; return this; } + + /** URL path patterns to include in crawling. */ + public Builder includePaths(List includePaths) { this.includePaths = includePaths; return this; } + + /** Maximum depth to discover links. */ + public Builder maxDiscoveryDepth(Integer maxDiscoveryDepth) { this.maxDiscoveryDepth = maxDiscoveryDepth; return this; } + + /** Sitemap handling: "skip", "include", or "only". */ + public Builder sitemap(String sitemap) { this.sitemap = sitemap; return this; } + + /** Ignore query parameters when deduplicating URLs. */ + public Builder ignoreQueryParameters(Boolean ignoreQueryParameters) { this.ignoreQueryParameters = ignoreQueryParameters; return this; } + + /** Deduplicate URLs that are similar. */ + public Builder deduplicateSimilarURLs(Boolean deduplicateSimilarURLs) { this.deduplicateSimilarURLs = deduplicateSimilarURLs; return this; } + + /** Maximum number of pages to crawl. */ + public Builder limit(Integer limit) { this.limit = limit; return this; } + + /** Whether to crawl the entire domain. */ + public Builder crawlEntireDomain(Boolean crawlEntireDomain) { this.crawlEntireDomain = crawlEntireDomain; return this; } + + /** Follow external links. */ + public Builder allowExternalLinks(Boolean allowExternalLinks) { this.allowExternalLinks = allowExternalLinks; return this; } + + /** Follow subdomains. */ + public Builder allowSubdomains(Boolean allowSubdomains) { this.allowSubdomains = allowSubdomains; return this; } + + /** Delay in milliseconds between requests. */ + public Builder delay(Integer delay) { this.delay = delay; return this; } + + /** Maximum concurrent requests. */ + public Builder maxConcurrency(Integer maxConcurrency) { this.maxConcurrency = maxConcurrency; return this; } + + /** Webhook URL string or {@link WebhookConfig} object. */ + public Builder webhook(Object webhook) { this.webhook = webhook; return this; } + + /** Scrape options applied to each crawled page. */ + public Builder scrapeOptions(ScrapeOptions scrapeOptions) { this.scrapeOptions = scrapeOptions; return this; } + + /** Apply regex patterns to the full URL, not just the path. */ + public Builder regexOnFullURL(Boolean regexOnFullURL) { this.regexOnFullURL = regexOnFullURL; return this; } + + /** Do not store any scraped data on Firecrawl servers. */ + public Builder zeroDataRetention(Boolean zeroDataRetention) { this.zeroDataRetention = zeroDataRetention; return this; } + + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + + public CrawlOptions build() { + CrawlOptions o = new CrawlOptions(); + o.prompt = this.prompt; + o.excludePaths = this.excludePaths; + o.includePaths = this.includePaths; + o.maxDiscoveryDepth = this.maxDiscoveryDepth; + o.sitemap = this.sitemap; + o.ignoreQueryParameters = this.ignoreQueryParameters; + o.deduplicateSimilarURLs = this.deduplicateSimilarURLs; + o.limit = this.limit; + o.crawlEntireDomain = this.crawlEntireDomain; + o.allowExternalLinks = this.allowExternalLinks; + o.allowSubdomains = this.allowSubdomains; + o.delay = this.delay; + o.maxConcurrency = this.maxConcurrency; + o.webhook = this.webhook; + o.scrapeOptions = this.scrapeOptions; + o.regexOnFullURL = this.regexOnFullURL; + o.zeroDataRetention = this.zeroDataRetention; + o.integration = this.integration; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlResponse.java b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlResponse.java new file mode 100644 index 0000000000..4ea20c732d --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/CrawlResponse.java @@ -0,0 +1,21 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response from starting an async crawl job. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CrawlResponse { + + private String id; + private String url; + + public String getId() { return id; } + public String getUrl() { return url; } + + @Override + public String toString() { + return "CrawlResponse{id=" + id + ", url=" + url + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/CreditUsage.java b/apps/java-sdk/src/main/java/com/firecrawl/models/CreditUsage.java new file mode 100644 index 0000000000..554c15ab39 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/CreditUsage.java @@ -0,0 +1,25 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Current credit usage information. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class CreditUsage { + + private int remainingCredits; + private Integer planCredits; + private String billingPeriodStart; + private String billingPeriodEnd; + + public int getRemainingCredits() { return remainingCredits; } + public Integer getPlanCredits() { return planCredits; } + public String getBillingPeriodStart() { return billingPeriodStart; } + public String getBillingPeriodEnd() { return billingPeriodEnd; } + + @Override + public String toString() { + return "CreditUsage{remaining=" + remainingCredits + ", plan=" + planCredits + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/Document.java b/apps/java-sdk/src/main/java/com/firecrawl/models/Document.java new file mode 100644 index 0000000000..beb41fb0fd --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/Document.java @@ -0,0 +1,49 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; +import java.util.Map; + +/** + * A scraped document returned by scrape, crawl, and batch endpoints. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class Document { + + private String markdown; + private String html; + private String rawHtml; + private Object json; + private String summary; + private Map metadata; + private List links; + private List images; + private String screenshot; + private List> attributes; + private Map actions; + private String warning; + private Map changeTracking; + private Map branding; + + public String getMarkdown() { return markdown; } + public String getHtml() { return html; } + public String getRawHtml() { return rawHtml; } + public Object getJson() { return json; } + public String getSummary() { return summary; } + public Map getMetadata() { return metadata; } + public List getLinks() { return links; } + public List getImages() { return images; } + public String getScreenshot() { return screenshot; } + public List> getAttributes() { return attributes; } + public Map getActions() { return actions; } + public String getWarning() { return warning; } + public Map getChangeTracking() { return changeTracking; } + public Map getBranding() { return branding; } + + @Override + public String toString() { + String title = metadata != null ? String.valueOf(metadata.get("title")) : "untitled"; + String url = metadata != null ? String.valueOf(metadata.get("sourceURL")) : "unknown"; + return "Document{title=" + title + ", url=" + url + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/JsonFormat.java b/apps/java-sdk/src/main/java/com/firecrawl/models/JsonFormat.java new file mode 100644 index 0000000000..1d53485ebf --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/JsonFormat.java @@ -0,0 +1,57 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.Map; + +/** + * JSON extraction format with optional schema and prompt. + * + *

Usage: + *

{@code
+ * JsonFormat jsonFmt = JsonFormat.builder()
+ *     .prompt("Extract the product name and price")
+ *     .schema(Map.of(
+ *         "type", "object",
+ *         "properties", Map.of(
+ *             "name", Map.of("type", "string"),
+ *             "price", Map.of("type", "number")
+ *         )
+ *     ))
+ *     .build();
+ * }
+ */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class JsonFormat { + + private final String type = "json"; + private String prompt; + private Map schema; + + private JsonFormat() {} + + public String getType() { return type; } + public String getPrompt() { return prompt; } + public Map getSchema() { return schema; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private String prompt; + private Map schema; + + private Builder() {} + + /** LLM prompt for extraction. */ + public Builder prompt(String prompt) { this.prompt = prompt; return this; } + + /** JSON Schema for structured extraction. */ + public Builder schema(Map schema) { this.schema = schema; return this; } + + public JsonFormat build() { + JsonFormat f = new JsonFormat(); + f.prompt = this.prompt; + f.schema = this.schema; + return f; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/LocationConfig.java b/apps/java-sdk/src/main/java/com/firecrawl/models/LocationConfig.java new file mode 100644 index 0000000000..6e0705dd4f --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/LocationConfig.java @@ -0,0 +1,38 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; + +/** + * Geolocation configuration for requests. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class LocationConfig { + + private String country; + private List languages; + + private LocationConfig() {} + + public String getCountry() { return country; } + public List getLanguages() { return languages; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private String country; + private List languages; + + private Builder() {} + + public Builder country(String country) { this.country = country; return this; } + public Builder languages(List languages) { this.languages = languages; return this; } + + public LocationConfig build() { + LocationConfig c = new LocationConfig(); + c.country = this.country; + c.languages = this.languages; + return c; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/MapData.java b/apps/java-sdk/src/main/java/com/firecrawl/models/MapData.java new file mode 100644 index 0000000000..b9908352a6 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/MapData.java @@ -0,0 +1,53 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Result of a map operation containing discovered URLs. + * + *

The v2 API may return {@code links} as either plain URL strings or + * objects with {@code url}, {@code title}, and {@code description} fields. + * This class normalises both representations into a uniform + * {@code List>} where each entry always contains at + * least a {@code "url"} key. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class MapData { + + private List links; + + /** + * Returns the discovered links, normalised so that every entry is a + * {@code Map} containing at least a {@code "url"} key. + * Plain-string entries returned by the API are wrapped as + * {@code {"url": ""}}. + */ + @SuppressWarnings("unchecked") + public List> getLinks() { + if (links == null) { + return null; + } + List> result = new ArrayList<>(links.size()); + for (Object item : links) { + if (item instanceof Map) { + result.add((Map) item); + } else if (item instanceof String) { + Map wrapped = new LinkedHashMap<>(); + wrapped.put("url", item); + result.add(wrapped); + } + } + return Collections.unmodifiableList(result); + } + + @Override + public String toString() { + int count = links != null ? links.size() : 0; + return "MapData{links=" + count + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/MapOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/MapOptions.java new file mode 100644 index 0000000000..06940939dd --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/MapOptions.java @@ -0,0 +1,76 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; + +/** + * Options for mapping (discovering URLs on) a website. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class MapOptions { + + private String search; + private String sitemap; + private Boolean includeSubdomains; + private Boolean ignoreQueryParameters; + private Integer limit; + private Integer timeout; + private String integration; + private LocationConfig location; + + private MapOptions() {} + + public String getSearch() { return search; } + /** Sitemap mode: "only", "include", or "skip". */ + public String getSitemap() { return sitemap; } + public Boolean getIncludeSubdomains() { return includeSubdomains; } + public Boolean getIgnoreQueryParameters() { return ignoreQueryParameters; } + public Integer getLimit() { return limit; } + public Integer getTimeout() { return timeout; } + public String getIntegration() { return integration; } + public LocationConfig getLocation() { return location; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private String search; + private String sitemap; + private Boolean includeSubdomains; + private Boolean ignoreQueryParameters; + private Integer limit; + private Integer timeout; + private String integration; + private LocationConfig location; + + private Builder() {} + + /** Filter discovered URLs by keyword. */ + public Builder search(String search) { this.search = search; return this; } + /** Sitemap mode: "only", "include", or "skip". */ + public Builder sitemap(String sitemap) { this.sitemap = sitemap; return this; } + /** Include subdomains. */ + public Builder includeSubdomains(Boolean includeSubdomains) { this.includeSubdomains = includeSubdomains; return this; } + /** Ignore query parameters when deduplicating URLs. */ + public Builder ignoreQueryParameters(Boolean ignoreQueryParameters) { this.ignoreQueryParameters = ignoreQueryParameters; return this; } + /** Maximum number of URLs to return. */ + public Builder limit(Integer limit) { this.limit = limit; return this; } + /** Timeout in milliseconds. */ + public Builder timeout(Integer timeout) { this.timeout = timeout; return this; } + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + /** Geolocation configuration. */ + public Builder location(LocationConfig location) { this.location = location; return this; } + + public MapOptions build() { + MapOptions o = new MapOptions(); + o.search = this.search; + o.sitemap = this.sitemap; + o.includeSubdomains = this.includeSubdomains; + o.ignoreQueryParameters = this.ignoreQueryParameters; + o.limit = this.limit; + o.timeout = this.timeout; + o.integration = this.integration; + o.location = this.location; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/ScrapeOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/ScrapeOptions.java new file mode 100644 index 0000000000..b7ab815d24 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/ScrapeOptions.java @@ -0,0 +1,186 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Options for scraping a single URL. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ScrapeOptions { + + private List formats; + private Map headers; + private List includeTags; + private List excludeTags; + private Boolean onlyMainContent; + private Integer timeout; + private Integer waitFor; + private Boolean mobile; + private List parsers; + private List> actions; + private LocationConfig location; + private Boolean skipTlsVerification; + private Boolean removeBase64Images; + private Boolean blockAds; + private String proxy; + @JsonProperty("maxAge") + private Long maxAge; + private Boolean storeInCache; + private String integration; + + private ScrapeOptions() {} + + public List getFormats() { return formats; } + public Map getHeaders() { return headers; } + public List getIncludeTags() { return includeTags; } + public List getExcludeTags() { return excludeTags; } + public Boolean getOnlyMainContent() { return onlyMainContent; } + public Integer getTimeout() { return timeout; } + public Integer getWaitFor() { return waitFor; } + public Boolean getMobile() { return mobile; } + public List getParsers() { return parsers; } + public List> getActions() { return actions; } + public LocationConfig getLocation() { return location; } + public Boolean getSkipTlsVerification() { return skipTlsVerification; } + public Boolean getRemoveBase64Images() { return removeBase64Images; } + public Boolean getBlockAds() { return blockAds; } + public String getProxy() { return proxy; } + public Long getMaxAge() { return maxAge; } + public Boolean getStoreInCache() { return storeInCache; } + public String getIntegration() { return integration; } + + public static Builder builder() { return new Builder(); } + + public Builder toBuilder() { + Builder b = new Builder(); + b.formats = this.formats != null ? new ArrayList<>(this.formats) : null; + b.headers = this.headers != null ? new HashMap<>(this.headers) : null; + b.includeTags = this.includeTags != null ? new ArrayList<>(this.includeTags) : null; + b.excludeTags = this.excludeTags != null ? new ArrayList<>(this.excludeTags) : null; + b.onlyMainContent = this.onlyMainContent; + b.timeout = this.timeout; + b.waitFor = this.waitFor; + b.mobile = this.mobile; + b.parsers = this.parsers != null ? new ArrayList<>(this.parsers) : null; + b.actions = this.actions != null ? new ArrayList<>(this.actions) : null; + b.location = this.location; + b.skipTlsVerification = this.skipTlsVerification; + b.removeBase64Images = this.removeBase64Images; + b.blockAds = this.blockAds; + b.proxy = this.proxy; + b.maxAge = this.maxAge; + b.storeInCache = this.storeInCache; + b.integration = this.integration; + return b; + } + + public static final class Builder { + private List formats; + private Map headers; + private List includeTags; + private List excludeTags; + private Boolean onlyMainContent; + private Integer timeout; + private Integer waitFor; + private Boolean mobile; + private List parsers; + private List> actions; + private LocationConfig location; + private Boolean skipTlsVerification; + private Boolean removeBase64Images; + private Boolean blockAds; + private String proxy; + private Long maxAge; + private Boolean storeInCache; + private String integration; + + private Builder() {} + + /** + * Output formats to request. Accepts strings like "markdown", "html", "rawHtml", + * "links", "screenshot", "json", etc., or format configuration maps for advanced + * formats (e.g., JsonFormat, ScreenshotFormat). + */ + public Builder formats(List formats) { this.formats = formats; return this; } + + /** Custom HTTP headers to send with the request. */ + public Builder headers(Map headers) { this.headers = headers; return this; } + + /** Only include content from these HTML tags. */ + public Builder includeTags(List includeTags) { this.includeTags = includeTags; return this; } + + /** Exclude content from these HTML tags. */ + public Builder excludeTags(List excludeTags) { this.excludeTags = excludeTags; return this; } + + /** Only return the main content of the page, excluding navbars/footers. */ + public Builder onlyMainContent(Boolean onlyMainContent) { this.onlyMainContent = onlyMainContent; return this; } + + /** Timeout in milliseconds for the scrape request. */ + public Builder timeout(Integer timeout) { this.timeout = timeout; return this; } + + /** Wait time in milliseconds before scraping (for JS rendering). */ + public Builder waitFor(Integer waitFor) { this.waitFor = waitFor; return this; } + + /** Scrape as a mobile device. */ + public Builder mobile(Boolean mobile) { this.mobile = mobile; return this; } + + /** Parsers to use (e.g., "pdf" or {"type": "pdf", "maxPages": 10}). */ + public Builder parsers(List parsers) { this.parsers = parsers; return this; } + + /** Actions to execute before/during scraping. */ + public Builder actions(List> actions) { this.actions = actions; return this; } + + /** Geolocation configuration. */ + public Builder location(LocationConfig location) { this.location = location; return this; } + + /** Skip TLS certificate verification. */ + public Builder skipTlsVerification(Boolean skipTlsVerification) { this.skipTlsVerification = skipTlsVerification; return this; } + + /** Remove base64-encoded images from the response. */ + public Builder removeBase64Images(Boolean removeBase64Images) { this.removeBase64Images = removeBase64Images; return this; } + + /** Block advertisements during scraping. */ + public Builder blockAds(Boolean blockAds) { this.blockAds = blockAds; return this; } + + /** Proxy mode: "basic", "stealth", "enhanced", "auto", or a custom proxy URL. */ + public Builder proxy(String proxy) { this.proxy = proxy; return this; } + + /** Use cached result if younger than this many milliseconds. */ + public Builder maxAge(Long maxAge) { this.maxAge = maxAge; return this; } + + /** Whether to cache the result. */ + public Builder storeInCache(Boolean storeInCache) { this.storeInCache = storeInCache; return this; } + + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + + public ScrapeOptions build() { + ScrapeOptions o = new ScrapeOptions(); + o.formats = this.formats != null ? Collections.unmodifiableList(new ArrayList<>(this.formats)) : null; + o.headers = this.headers != null ? Collections.unmodifiableMap(new HashMap<>(this.headers)) : null; + o.includeTags = this.includeTags != null ? Collections.unmodifiableList(new ArrayList<>(this.includeTags)) : null; + o.excludeTags = this.excludeTags != null ? Collections.unmodifiableList(new ArrayList<>(this.excludeTags)) : null; + o.onlyMainContent = this.onlyMainContent; + o.timeout = this.timeout; + o.waitFor = this.waitFor; + o.mobile = this.mobile; + o.parsers = this.parsers != null ? Collections.unmodifiableList(new ArrayList<>(this.parsers)) : null; + o.actions = this.actions != null ? Collections.unmodifiableList(new ArrayList<>(this.actions)) : null; + o.location = this.location; + o.skipTlsVerification = this.skipTlsVerification; + o.removeBase64Images = this.removeBase64Images; + o.blockAds = this.blockAds; + o.proxy = this.proxy; + o.maxAge = this.maxAge; + o.storeInCache = this.storeInCache; + o.integration = this.integration; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/SearchData.java b/apps/java-sdk/src/main/java/com/firecrawl/models/SearchData.java new file mode 100644 index 0000000000..cc65cb4680 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/SearchData.java @@ -0,0 +1,37 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.List; +import java.util.Map; + +/** + * Search results from the v2 search API. + * The API returns an object with web, news, and images arrays. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class SearchData { + + private List> web; + private List> news; + private List> images; + + /** Web search results. */ + public List> getWeb() { return web; } + public void setWeb(List> web) { this.web = web; } + + /** News search results. */ + public List> getNews() { return news; } + public void setNews(List> news) { this.news = news; } + + /** Image search results. */ + public List> getImages() { return images; } + public void setImages(List> images) { this.images = images; } + + @Override + public String toString() { + int webCount = web != null ? web.size() : 0; + int newsCount = news != null ? news.size() : 0; + int imageCount = images != null ? images.size() : 0; + return "SearchData{web=" + webCount + ", news=" + newsCount + ", images=" + imageCount + "}"; + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/SearchOptions.java b/apps/java-sdk/src/main/java/com/firecrawl/models/SearchOptions.java new file mode 100644 index 0000000000..c58447f532 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/SearchOptions.java @@ -0,0 +1,82 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; + +/** + * Options for a web search request. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class SearchOptions { + + private List sources; + private List categories; + private Integer limit; + private String tbs; + private String location; + private Boolean ignoreInvalidURLs; + private Integer timeout; + private ScrapeOptions scrapeOptions; + private String integration; + + private SearchOptions() {} + + public List getSources() { return sources; } + public List getCategories() { return categories; } + public Integer getLimit() { return limit; } + public String getTbs() { return tbs; } + public String getLocation() { return location; } + public Boolean getIgnoreInvalidURLs() { return ignoreInvalidURLs; } + public Integer getTimeout() { return timeout; } + public ScrapeOptions getScrapeOptions() { return scrapeOptions; } + public String getIntegration() { return integration; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private List sources; + private List categories; + private Integer limit; + private String tbs; + private String location; + private Boolean ignoreInvalidURLs; + private Integer timeout; + private ScrapeOptions scrapeOptions; + private String integration; + + private Builder() {} + + /** Source types: "web", "news", "images" as strings or {type: "web"} maps. */ + public Builder sources(List sources) { this.sources = sources; return this; } + /** Categories: "github", "research", "pdf". */ + public Builder categories(List categories) { this.categories = categories; return this; } + /** Maximum number of results. */ + public Builder limit(Integer limit) { this.limit = limit; return this; } + /** Time-based search filter (e.g., "qdr:d" for past day, "qdr:w" for past week). */ + public Builder tbs(String tbs) { this.tbs = tbs; return this; } + /** Location for search results (e.g., "US"). */ + public Builder location(String location) { this.location = location; return this; } + /** Ignore invalid URLs in results. */ + public Builder ignoreInvalidURLs(Boolean ignoreInvalidURLs) { this.ignoreInvalidURLs = ignoreInvalidURLs; return this; } + /** Timeout in milliseconds. */ + public Builder timeout(Integer timeout) { this.timeout = timeout; return this; } + /** Scrape options applied to search result pages. */ + public Builder scrapeOptions(ScrapeOptions scrapeOptions) { this.scrapeOptions = scrapeOptions; return this; } + /** Integration identifier. */ + public Builder integration(String integration) { this.integration = integration; return this; } + + public SearchOptions build() { + SearchOptions o = new SearchOptions(); + o.sources = this.sources; + o.categories = this.categories; + o.limit = this.limit; + o.tbs = this.tbs; + o.location = this.location; + o.ignoreInvalidURLs = this.ignoreInvalidURLs; + o.timeout = this.timeout; + o.scrapeOptions = this.scrapeOptions; + o.integration = this.integration; + return o; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/models/WebhookConfig.java b/apps/java-sdk/src/main/java/com/firecrawl/models/WebhookConfig.java new file mode 100644 index 0000000000..96d0b3fd18 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/models/WebhookConfig.java @@ -0,0 +1,57 @@ +package com.firecrawl.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.List; +import java.util.Map; + +/** + * Webhook configuration for async jobs. + */ +@JsonInclude(JsonInclude.Include.NON_NULL) +public class WebhookConfig { + + private String url; + private Map headers; + private Map metadata; + private List events; + + private WebhookConfig() {} + + public String getUrl() { return url; } + public Map getHeaders() { return headers; } + public Map getMetadata() { return metadata; } + public List getEvents() { return events; } + + public static Builder builder() { return new Builder(); } + + public static final class Builder { + private String url; + private Map headers; + private Map metadata; + private List events; + + private Builder() {} + + public Builder url(String url) { this.url = url; return this; } + public Builder headers(Map headers) { this.headers = headers; return this; } + public Builder metadata(Map metadata) { this.metadata = metadata; return this; } + + /** + * Events to subscribe to. Crawl/batch events: "completed", "failed", "page", "started". + * Agent events: "started", "action", "completed", "failed", "cancelled". + */ + public Builder events(List events) { this.events = events; return this; } + + public WebhookConfig build() { + if (url == null || url.isEmpty()) { + throw new IllegalArgumentException("Webhook URL is required"); + } + WebhookConfig c = new WebhookConfig(); + c.url = this.url; + c.headers = this.headers; + c.metadata = this.metadata; + c.events = this.events; + return c; + } + } +} diff --git a/apps/java-sdk/src/main/java/com/firecrawl/package-info.java b/apps/java-sdk/src/main/java/com/firecrawl/package-info.java new file mode 100644 index 0000000000..415cf33c47 --- /dev/null +++ b/apps/java-sdk/src/main/java/com/firecrawl/package-info.java @@ -0,0 +1,23 @@ +/** + * Firecrawl Java SDK — a type-safe client for the Firecrawl v2 web scraping API. + * + *

Quick start: + *

{@code
+ * import com.firecrawl.client.FirecrawlClient;
+ * import com.firecrawl.models.*;
+ *
+ * FirecrawlClient client = FirecrawlClient.builder()
+ *     .apiKey("fc-your-api-key")
+ *     .build();
+ *
+ * Document doc = client.scrape("https://example.com",
+ *     ScrapeOptions.builder()
+ *         .formats(List.of("markdown"))
+ *         .build());
+ *
+ * System.out.println(doc.getMarkdown());
+ * }
+ * + * @see com.firecrawl.client.FirecrawlClient + */ +package com.firecrawl; diff --git a/apps/java-sdk/src/test/java/com/firecrawl/AgentTest.java b/apps/java-sdk/src/test/java/com/firecrawl/AgentTest.java new file mode 100644 index 0000000000..8cae94f131 --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/AgentTest.java @@ -0,0 +1,285 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.AgentOptions; +import com.firecrawl.models.AgentResponse; +import com.firecrawl.models.AgentStatusResponse; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Comprehensive Agent Tests + * + * Tests the AI agent functionality with various configurations. + * Based on Node.js SDK patterns and tested against live firecrawl.dev. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.AgentTest" + */ +class AgentTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentWithPrompt() { + System.out.println("\n=== Test: Agent with Prompt ==="); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .prompt("Find information about Firecrawl's main features and pricing") + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertNotNull(result.getStatus(), "Status should not be null"); + assertTrue(List.of("completed", "failed").contains(result.getStatus()), + "Status should be completed or failed: " + result.getStatus()); + + System.out.println("✓ Agent task completed"); + System.out.println(" Status: " + result.getStatus()); + if (result.getData() != null) { + System.out.println(" Data returned: ✓"); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentWithURLs() { + System.out.println("\n=== Test: Agent with Specific URLs ==="); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .urls(List.of("https://firecrawl.dev", "https://docs.firecrawl.dev")) + .prompt("What are the main features of Firecrawl?") + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertTrue(List.of("completed", "failed").contains(result.getStatus()), + "Status should be completed or failed"); + + System.out.println("✓ Agent with URLs completed"); + System.out.println(" URLs provided: 2"); + System.out.println(" Status: " + result.getStatus()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentWithSchema() { + System.out.println("\n=== Test: Agent with Schema ==="); + + Map schema = Map.of( + "type", "object", + "properties", Map.of( + "features", Map.of( + "type", "array", + "items", Map.of("type", "string") + ), + "pricing", Map.of( + "type", "object", + "properties", Map.of( + "plans", Map.of("type", "array") + ) + ) + ), + "required", List.of("features") + ); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .urls(List.of("https://firecrawl.dev")) + .prompt("Extract features and pricing information") + .schema(schema) + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertTrue(List.of("completed", "failed").contains(result.getStatus()), + "Status should be completed or failed"); + + System.out.println("✓ Agent with schema completed"); + System.out.println(" Schema provided: ✓"); + System.out.println(" Status: " + result.getStatus()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testStartAgent() { + System.out.println("\n=== Test: Start Agent (Async) ==="); + + AgentResponse response = client.startAgent( + AgentOptions.builder() + .prompt("Research Firecrawl features") + .build()); + + assertNotNull(response, "Agent response should not be null"); + assertNotNull(response.getId(), "Agent ID should not be null"); + assertTrue(response.isSuccess(), "Response should be successful"); + + System.out.println("✓ Agent started successfully"); + System.out.println(" Job ID: " + response.getId()); + System.out.println(" Success: " + response.isSuccess()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentStatusCheck() { + System.out.println("\n=== Test: Check Agent Status ==="); + + // Start an agent + AgentResponse start = client.startAgent( + AgentOptions.builder() + .prompt("Find information about web scraping") + .build()); + + // Check status + AgentStatusResponse status = client.getAgentStatus(start.getId()); + + assertNotNull(status, "Status should not be null"); + assertNotNull(status.getStatus(), "Status field should not be null"); + assertTrue(List.of("scraping", "completed", "failed", "cancelled").contains(status.getStatus()), + "Status should be valid: " + status.getStatus()); + + System.out.println("✓ Agent status retrieved"); + System.out.println(" Status: " + status.getStatus()); + System.out.println(" Job ID: " + start.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCancelAgent() { + System.out.println("\n=== Test: Cancel Agent ==="); + + AgentResponse start = client.startAgent( + AgentOptions.builder() + .prompt("Long-running research task") + .build()); + + Map result = client.cancelAgent(start.getId()); + + assertNotNull(result, "Cancel result should not be null"); + + System.out.println("✓ Agent cancelled successfully"); + System.out.println(" Job ID: " + start.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentWithStrictURLConstraints() { + System.out.println("\n=== Test: Agent with Strict URL Constraints ==="); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .urls(List.of("https://docs.firecrawl.dev")) + .prompt("Extract API documentation structure") + .strictConstrainToURLs(true) + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertTrue(List.of("completed", "failed").contains(result.getStatus()), + "Status should be completed or failed"); + + System.out.println("✓ Agent with strict constraints completed"); + System.out.println(" Strict URL constraint: true"); + System.out.println(" Status: " + result.getStatus()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentWithMaxCredits() { + System.out.println("\n=== Test: Agent with Max Credits Limit ==="); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .prompt("Quick research on Firecrawl") + .maxCredits(10) + .build()); + + assertNotNull(result, "Agent result should not be null"); + + System.out.println("✓ Agent with credit limit completed"); + System.out.println(" Max credits: 10"); + System.out.println(" Status: " + result.getStatus()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentResearchTask() { + System.out.println("\n=== Test: Agent Research - Firecrawl Features ==="); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .urls(List.of("https://firecrawl.dev", "https://docs.firecrawl.dev")) + .prompt("Research and summarize the key features of Firecrawl, including scraping, crawling, and extraction capabilities") + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertEquals("completed", result.getStatus(), "Agent should complete successfully"); + assertNotNull(result.getData(), "Agent should return data"); + + System.out.println("✓ Research task completed"); + System.out.println(" Status: " + result.getStatus()); + System.out.println(" Data collected: ✓"); + + if (result.getData() != null) { + System.out.println(" Data summary: " + + result.getData().toString().substring(0, + Math.min(200, result.getData().toString().length())) + "..."); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testAgentComprehensive() { + System.out.println("\n=== Test: Agent with All Options ==="); + + Map schema = Map.of( + "type", "object", + "properties", Map.of( + "product_name", Map.of("type", "string"), + "features", Map.of( + "type", "array", + "items", Map.of("type", "string") + ), + "pricing", Map.of("type", "string") + ), + "required", List.of("product_name", "features") + ); + + AgentStatusResponse result = client.agent( + AgentOptions.builder() + .urls(List.of("https://firecrawl.dev")) + .prompt("Extract comprehensive product information including name, features, and pricing") + .schema(schema) + .maxCredits(20) + .strictConstrainToURLs(true) + .build()); + + assertNotNull(result, "Agent result should not be null"); + assertTrue(List.of("completed", "failed").contains(result.getStatus()), + "Status should be completed or failed"); + + System.out.println("✓ Comprehensive agent task completed"); + System.out.println(" Configuration:"); + System.out.println(" - URLs: 1"); + System.out.println(" - Schema: ✓"); + System.out.println(" - Max credits: 20"); + System.out.println(" - Strict constraints: true"); + System.out.println(" Results:"); + System.out.println(" - Status: " + result.getStatus()); + if (result.getData() != null) { + System.out.println(" - Data returned: ✓"); + } + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/BrowserTest.java b/apps/java-sdk/src/test/java/com/firecrawl/BrowserTest.java new file mode 100644 index 0000000000..9ea7b6aa6b --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/BrowserTest.java @@ -0,0 +1,310 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.BrowserCreateResponse; +import com.firecrawl.models.BrowserDeleteResponse; +import com.firecrawl.models.BrowserExecuteResponse; +import com.firecrawl.models.BrowserListResponse; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Browser Sandbox Endpoint Tests + * + * Tests the browser session management functionality of the Firecrawl Java SDK. + * These tests require FIRECRAWL_API_KEY environment variable to be set. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.BrowserTest" + */ +class BrowserTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserCreateAndDelete() { + System.out.println("Testing browser session create and delete..."); + + // Create a browser session + BrowserCreateResponse createRes = client.browser(); + assertNotNull(createRes, "Create response should not be null"); + assertTrue(createRes.isSuccess(), "Create should succeed"); + assertNotNull(createRes.getId(), "Session ID should not be null"); + + String sessionId = createRes.getId(); + System.out.println(" Created session: " + sessionId); + + // Delete the browser session + BrowserDeleteResponse deleteRes = client.deleteBrowser(sessionId); + assertNotNull(deleteRes, "Delete response should not be null"); + assertTrue(deleteRes.isSuccess(), "Delete should succeed"); + + System.out.println("✓ Browser create and delete test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserCreateWithOptions() { + System.out.println("Testing browser session create with options..."); + + // Create a session with custom TTL and activity TTL + BrowserCreateResponse createRes = client.browser(300, 120, true); + assertNotNull(createRes, "Create response should not be null"); + assertTrue(createRes.isSuccess(), "Create should succeed"); + assertNotNull(createRes.getId(), "Session ID should not be null"); + + String sessionId = createRes.getId(); + System.out.println(" Created session with options: " + sessionId); + + // Clean up + client.deleteBrowser(sessionId); + + System.out.println("✓ Browser create with options test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserExecuteBash() { + System.out.println("Testing browser execute with bash..."); + + // Create a session + BrowserCreateResponse createRes = client.browser(); + assertTrue(createRes.isSuccess(), "Create should succeed"); + String sessionId = createRes.getId(); + + try { + // Execute bash code + BrowserExecuteResponse execRes = client.browserExecute(sessionId, "echo 'hello from java sdk'"); + assertNotNull(execRes, "Execute response should not be null"); + assertTrue(execRes.isSuccess(), "Execute should succeed"); + assertNotNull(execRes.getStdout(), "Stdout should not be null"); + assertTrue(execRes.getStdout().contains("hello from java sdk"), + "Stdout should contain our echo output"); + + System.out.println(" Stdout: " + execRes.getStdout().trim()); + System.out.println(" Exit code: " + execRes.getExitCode()); + } finally { + // Clean up + client.deleteBrowser(sessionId); + } + + System.out.println("✓ Browser execute bash test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserExecuteNode() { + System.out.println("Testing browser execute with node..."); + + // Create a session + BrowserCreateResponse createRes = client.browser(); + assertTrue(createRes.isSuccess(), "Create should succeed"); + String sessionId = createRes.getId(); + + try { + // Execute node code + BrowserExecuteResponse execRes = client.browserExecute( + sessionId, "console.log(1 + 2)", "node", null); + assertNotNull(execRes, "Execute response should not be null"); + assertTrue(execRes.isSuccess(), "Execute should succeed"); + + System.out.println(" Stdout: " + (execRes.getStdout() != null ? execRes.getStdout().trim() : "null")); + } finally { + client.deleteBrowser(sessionId); + } + + System.out.println("✓ Browser execute node test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserExecutePython() { + System.out.println("Testing browser execute with python..."); + + // Create a session + BrowserCreateResponse createRes = client.browser(); + assertTrue(createRes.isSuccess(), "Create should succeed"); + String sessionId = createRes.getId(); + + try { + // Execute python code + BrowserExecuteResponse execRes = client.browserExecute( + sessionId, "print('hello from python')", "python", null); + assertNotNull(execRes, "Execute response should not be null"); + assertTrue(execRes.isSuccess(), "Execute should succeed"); + + System.out.println(" Stdout: " + (execRes.getStdout() != null ? execRes.getStdout().trim() : "null")); + } finally { + client.deleteBrowser(sessionId); + } + + System.out.println("✓ Browser execute python test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserExecuteWithTimeout() { + System.out.println("Testing browser execute with custom timeout..."); + + // Create a session + BrowserCreateResponse createRes = client.browser(); + assertTrue(createRes.isSuccess(), "Create should succeed"); + String sessionId = createRes.getId(); + + try { + // Execute with custom timeout (60 seconds) + BrowserExecuteResponse execRes = client.browserExecute( + sessionId, "echo 'timeout test'", "bash", 60); + assertNotNull(execRes, "Execute response should not be null"); + assertTrue(execRes.isSuccess(), "Execute should succeed"); + + System.out.println(" Stdout: " + (execRes.getStdout() != null ? execRes.getStdout().trim() : "null")); + } finally { + client.deleteBrowser(sessionId); + } + + System.out.println("✓ Browser execute with timeout test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserListSessions() { + System.out.println("Testing list browser sessions..."); + + // List all sessions + BrowserListResponse listRes = client.listBrowsers(); + assertNotNull(listRes, "List response should not be null"); + assertTrue(listRes.isSuccess(), "List should succeed"); + + System.out.println(" Total sessions: " + (listRes.getSessions() != null ? listRes.getSessions().size() : 0)); + + System.out.println("✓ List browser sessions test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserListActiveFilter() { + System.out.println("Testing list browser sessions with active filter..."); + + // Create a session so we have at least one active + BrowserCreateResponse createRes = client.browser(); + assertTrue(createRes.isSuccess(), "Create should succeed"); + String sessionId = createRes.getId(); + + try { + // List only active sessions + BrowserListResponse listRes = client.listBrowsers("active"); + assertNotNull(listRes, "List response should not be null"); + assertTrue(listRes.isSuccess(), "List should succeed"); + assertNotNull(listRes.getSessions(), "Sessions list should not be null"); + assertFalse(listRes.getSessions().isEmpty(), "Should have at least one active session"); + + System.out.println(" Active sessions: " + listRes.getSessions().size()); + } finally { + client.deleteBrowser(sessionId); + } + + System.out.println("✓ List active browser sessions test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testBrowserFullLifecycle() { + System.out.println("Testing full browser session lifecycle..."); + + // 1. Create session + BrowserCreateResponse createRes = client.browser(300, 120, true); + assertTrue(createRes.isSuccess(), "Create should succeed"); + assertNotNull(createRes.getId(), "Should have session ID"); + String sessionId = createRes.getId(); + System.out.println(" 1. Created session: " + sessionId); + + // CDP URL and live view URL may be present + if (createRes.getCdpUrl() != null) { + System.out.println(" CDP URL present: true"); + } + if (createRes.getLiveViewUrl() != null) { + System.out.println(" Live View URL present: true"); + } + + // 2. Navigate to a page + BrowserExecuteResponse navRes = client.browserExecute( + sessionId, "agent-browser open https://example.com", "bash", 30); + assertTrue(navRes.isSuccess(), "Navigation should succeed"); + System.out.println(" 2. Navigated to example.com"); + + // 3. Take a snapshot + BrowserExecuteResponse snapRes = client.browserExecute( + sessionId, "agent-browser snapshot -i -c", "bash", 30); + assertTrue(snapRes.isSuccess(), "Snapshot should succeed"); + System.out.println(" 3. Took snapshot"); + + // 4. Get page title + BrowserExecuteResponse titleRes = client.browserExecute( + sessionId, "agent-browser get title", "bash", 30); + assertTrue(titleRes.isSuccess(), "Get title should succeed"); + System.out.println(" 4. Page title: " + (titleRes.getStdout() != null ? titleRes.getStdout().trim() : "null")); + + // 5. Verify session is active + BrowserListResponse listRes = client.listBrowsers("active"); + assertTrue(listRes.isSuccess(), "List should succeed"); + System.out.println(" 5. Active sessions: " + (listRes.getSessions() != null ? listRes.getSessions().size() : 0)); + + // 6. Delete session + BrowserDeleteResponse deleteRes = client.deleteBrowser(sessionId); + assertTrue(deleteRes.isSuccess(), "Delete should succeed"); + System.out.println(" 6. Deleted session"); + if (deleteRes.getSessionDurationMs() != null) { + System.out.println(" Session duration: " + deleteRes.getSessionDurationMs() + "ms"); + } + if (deleteRes.getCreditsBilled() != null) { + System.out.println(" Credits billed: " + deleteRes.getCreditsBilled()); + } + + System.out.println("✓ Full browser session lifecycle test passed"); + } + + @Test + void testBrowserExecuteRequiresSessionId() { + FirecrawlClient testClient = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + + assertThrows(NullPointerException.class, () -> + testClient.browserExecute(null, "echo test") + ); + } + + @Test + void testBrowserExecuteRequiresCode() { + FirecrawlClient testClient = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + + assertThrows(NullPointerException.class, () -> + testClient.browserExecute("some-session-id", null) + ); + } + + @Test + void testBrowserDeleteRequiresSessionId() { + FirecrawlClient testClient = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + + assertThrows(NullPointerException.class, () -> + testClient.deleteBrowser(null) + ); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/CrawlTest.java b/apps/java-sdk/src/test/java/com/firecrawl/CrawlTest.java new file mode 100644 index 0000000000..efe984829b --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/CrawlTest.java @@ -0,0 +1,302 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.errors.FirecrawlException; +import com.firecrawl.models.*; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Comprehensive Crawl Tests + * + * Tests the crawl functionality with various configurations. + * Based on Node.js SDK patterns and tested against live firecrawl.dev. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.CrawlTest" + */ +class CrawlTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testStartCrawlMinimal() { + System.out.println("\n=== Test: Start Crawl - Minimal Request ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(3) + .build()); + + assertNotNull(response, "Crawl response should not be null"); + assertNotNull(response.getId(), "Crawl ID should not be null"); + assertNotNull(response.getUrl(), "Crawl URL should not be null"); + + System.out.println("✓ Crawl started successfully"); + System.out.println(" Job ID: " + response.getId()); + System.out.println(" Status URL: " + response.getUrl()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testStartCrawlWithOptions() { + System.out.println("\n=== Test: Start Crawl - With Options ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(5) + .maxDiscoveryDepth(2) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + assertNotNull(response.getUrl(), "Status URL should not be null"); + + System.out.println("✓ Crawl with options started"); + System.out.println(" Limit: 5 pages"); + System.out.println(" Max depth: 2"); + System.out.println(" Job ID: " + response.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testGetCrawlStatus() { + System.out.println("\n=== Test: Get Crawl Status ==="); + + // Start a crawl + CrawlResponse start = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(3) + .build()); + + System.out.println("CrawlResponse: " + start); + System.out.println("ID: " + start.getId()); + assertNotNull(start, "CrawlResponse should not be null"); + assertNotNull(start.getId(), "Crawl ID should not be null"); + + // Get status + CrawlJob status = client.getCrawlStatus(start.getId()); + + assertNotNull(status, "Status should not be null"); + assertNotNull(status.getStatus(), "Status should not be null"); + assertTrue(List.of("scraping", "completed", "failed", "cancelled").contains(status.getStatus()), + "Status should be valid: " + status.getStatus()); + assertTrue(status.getCompleted() >= 0, "Completed count should be non-negative"); + // Data may be null while crawl is still in progress (status=scraping) + if ("completed".equals(status.getStatus())) { + assertNotNull(status.getData(), "Data should not be null when completed"); + } + + System.out.println("✓ Status retrieved successfully"); + System.out.println(" Status: " + status.getStatus()); + System.out.println(" Completed: " + status.getCompleted() + "/" + status.getTotal()); + System.out.println(" Documents: " + (status.getData() != null ? status.getData().size() : 0)); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCancelCrawl() { + System.out.println("\n=== Test: Cancel Crawl ==="); + + CrawlResponse start = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(10) + .build()); + + Map result = client.cancelCrawl(start.getId()); + + assertNotNull(result, "Cancel result should not be null"); + + System.out.println("✓ Crawl cancelled successfully"); + System.out.println(" Job ID: " + start.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithWait() { + System.out.println("\n=== Test: Crawl with Wait (Blocking) ==="); + + CrawlJob job = client.crawl("https://firecrawl.dev", + CrawlOptions.builder() + .limit(3) + .maxDiscoveryDepth(1) + .build(), + 2, // pollInterval in seconds + 120 // timeout in seconds + ); + + assertNotNull(job, "Job should not be null"); + assertTrue(List.of("completed", "failed").contains(job.getStatus()), + "Final status should be completed or failed: " + job.getStatus()); + assertTrue(job.getCompleted() >= 0, "Completed count should be non-negative"); + assertTrue(job.getTotal() >= 0, "Total count should be non-negative"); + assertNotNull(job.getData(), "Data should not be null"); + + System.out.println("✓ Crawl completed (with wait)"); + System.out.println(" Final status: " + job.getStatus()); + System.out.println(" Pages crawled: " + job.getCompleted() + "/" + job.getTotal()); + System.out.println(" Documents returned: " + job.getData().size()); + + if (!job.getData().isEmpty()) { + Document firstDoc = job.getData().get(0); + System.out.println(" Sample URL: " + firstDoc.getMetadata().get("sourceURL")); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithScrapeOptions() { + System.out.println("\n=== Test: Crawl with Scrape Options ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(2) + .scrapeOptions(ScrapeOptions.builder() + .formats(List.of("markdown", "links")) + .onlyMainContent(true) + .build()) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + + System.out.println("✓ Crawl with scrape options started"); + System.out.println(" Formats: markdown, links"); + System.out.println(" Only main content: true"); + System.out.println(" Job ID: " + response.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithExcludePaths() { + System.out.println("\n=== Test: Crawl with Exclude Paths ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(5) + .excludePaths(List.of("/blog/*", "/admin/*")) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + + System.out.println("✓ Crawl with exclude paths started"); + System.out.println(" Excluding: /blog/*, /admin/*"); + System.out.println(" Job ID: " + response.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithIncludePaths() { + System.out.println("\n=== Test: Crawl with Include Paths ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(5) + .includePaths(List.of("/docs/*")) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + + System.out.println("✓ Crawl with include paths started"); + System.out.println(" Including only: /docs/*"); + System.out.println(" Job ID: " + response.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithAllowExternalLinks() { + System.out.println("\n=== Test: Crawl with Allow External Links ==="); + + CrawlResponse response = client.startCrawl("https://docs.firecrawl.dev", + CrawlOptions.builder() + .limit(5) + .allowExternalLinks(true) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + + System.out.println("✓ Crawl with external links allowed"); + System.out.println(" Job ID: " + response.getId()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlWithWebhookConfig() { + System.out.println("\n=== Test: Crawl with Webhook (if available) ==="); + + try { + // Using a test webhook URL (requestbin, webhook.site, etc.) + CrawlResponse response = client.startCrawl("https://firecrawl.dev", + CrawlOptions.builder() + .limit(2) + .webhook(WebhookConfig.builder() + .url("https://webhook.site/test") + .build()) + .build()); + + assertNotNull(response.getId(), "Job ID should not be null"); + + System.out.println("✓ Crawl with webhook started"); + System.out.println(" Job ID: " + response.getId()); + } catch (Exception e) { + System.out.println("⚠ Webhook test skipped or failed: " + e.getMessage()); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlFirecrawlHomepage() { + System.out.println("\n=== Test: Crawl Firecrawl.dev Homepage ==="); + + CrawlJob job = client.crawl("https://firecrawl.dev", + CrawlOptions.builder() + .limit(5) + .maxDiscoveryDepth(2) + .scrapeOptions(ScrapeOptions.builder() + .formats(List.of("markdown")) + .onlyMainContent(true) + .build()) + .build(), + 2, + 120 + ); + + assertNotNull(job, "Job should not be null"); + assertTrue(job.getData() != null && !job.getData().isEmpty(), + "Should have crawled at least one page"); + + // Verify content from Firecrawl site + boolean hasFirecrawlContent = job.getData().stream() + .anyMatch(doc -> { + String markdown = doc.getMarkdown(); + return markdown != null && + (markdown.toLowerCase().contains("firecrawl") || + markdown.toLowerCase().contains("scrape") || + markdown.toLowerCase().contains("crawl")); + }); + + assertTrue(hasFirecrawlContent, "Should contain Firecrawl-related content"); + + System.out.println("✓ Successfully crawled Firecrawl homepage"); + System.out.println(" Pages crawled: " + job.getData().size()); + System.out.println(" Status: " + job.getStatus()); + + // Print sample URLs + System.out.println(" Sample pages:"); + job.getData().stream() + .limit(3) + .forEach(doc -> System.out.println(" - " + doc.getMetadata().get("sourceURL"))); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlClientTest.java b/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlClientTest.java new file mode 100644 index 0000000000..bebed14ff0 --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlClientTest.java @@ -0,0 +1,194 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.errors.FirecrawlException; +import com.firecrawl.models.*; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Integration tests for the Firecrawl Java SDK. + * + *

These tests require a valid FIRECRAWL_API_KEY environment variable. + * Run with: FIRECRAWL_API_KEY=fc-xxx ./gradlew test + */ +class FirecrawlClientTest { + + @Test + void testBuilderRequiresApiKey() { + assertThrows(FirecrawlException.class, () -> + FirecrawlClient.builder().apiKey("").build() + ); + } + + @Test + void testBuilderAcceptsApiKey() { + // Should not throw — just validates construction + FirecrawlClient client = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + assertNotNull(client); + } + + @Test + void testScrapeOptionsBuilder() { + ScrapeOptions options = ScrapeOptions.builder() + .formats(List.of("markdown", "html")) + .onlyMainContent(true) + .timeout(30000) + .mobile(false) + .build(); + + assertEquals(List.of("markdown", "html"), options.getFormats()); + assertTrue(options.getOnlyMainContent()); + assertEquals(30000, options.getTimeout()); + assertFalse(options.getMobile()); + } + + @Test + void testCrawlOptionsBuilder() { + CrawlOptions options = CrawlOptions.builder() + .limit(100) + .maxDiscoveryDepth(3) + .sitemap("include") + .excludePaths(List.of("/admin/*")) + .build(); + + assertEquals(100, options.getLimit()); + assertEquals(3, options.getMaxDiscoveryDepth()); + assertEquals("include", options.getSitemap()); + assertEquals(List.of("/admin/*"), options.getExcludePaths()); + } + + @Test + void testAgentOptionsRequiresPrompt() { + assertThrows(IllegalArgumentException.class, () -> + AgentOptions.builder().build() + ); + } + + @Test + void testWebhookConfigRequiresUrl() { + assertThrows(IllegalArgumentException.class, () -> + WebhookConfig.builder().build() + ); + } + + @Test + void testScrapeOptionsToBuilder() { + ScrapeOptions original = ScrapeOptions.builder() + .formats(List.of("markdown")) + .timeout(5000) + .build(); + + ScrapeOptions modified = original.toBuilder() + .timeout(10000) + .build(); + + assertEquals(5000, original.getTimeout()); + assertEquals(10000, modified.getTimeout()); + assertEquals(List.of("markdown"), modified.getFormats()); + } + + @Test + void testBrowserExecuteRequiresSessionId() { + FirecrawlClient client = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + assertThrows(NullPointerException.class, () -> + client.browserExecute(null, "echo test") + ); + } + + @Test + void testBrowserDeleteRequiresSessionId() { + FirecrawlClient client = FirecrawlClient.builder() + .apiKey("fc-test-key") + .build(); + assertThrows(NullPointerException.class, () -> + client.deleteBrowser(null) + ); + } + + // ================================================================ + // E2E TESTS (require FIRECRAWL_API_KEY) + // ================================================================ + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + + assertNotNull(doc); + assertNotNull(doc.getMarkdown()); + assertFalse(doc.getMarkdown().isEmpty()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + MapData data = client.map("https://example.com", + MapOptions.builder() + .limit(10) + .build()); + + assertNotNull(data); + assertNotNull(data.getLinks()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCrawlE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + CrawlJob job = client.crawl("https://example.com", + CrawlOptions.builder() + .limit(3) + .build(), + 2, 60); + + assertNotNull(job); + assertEquals("completed", job.getStatus()); + assertNotNull(job.getData()); + assertFalse(job.getData().isEmpty()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + SearchData data = client.search("firecrawl web scraping", + SearchOptions.builder() + .limit(5) + .build()); + + assertNotNull(data); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testConcurrencyE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + ConcurrencyCheck check = client.getConcurrency(); + + assertNotNull(check); + assertTrue(check.getMaxConcurrency() > 0); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testCreditUsageE2E() { + FirecrawlClient client = FirecrawlClient.fromEnv(); + CreditUsage usage = client.getCreditUsage(); + + assertNotNull(usage); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlLiveSiteTest.java b/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlLiveSiteTest.java new file mode 100644 index 0000000000..54b3651ce8 --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/FirecrawlLiveSiteTest.java @@ -0,0 +1,131 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.Document; +import com.firecrawl.models.ScrapeOptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Live Site Test - Firecrawl.dev + * + * Tests the Java SDK against the actual Firecrawl production website. + * This demonstrates real-world usage of the API against live content. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.FirecrawlLiveSiteTest" + */ +class FirecrawlLiveSiteTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeFirecrawlHomepage() { + System.out.println("\n=== Testing against LIVE Firecrawl.dev website ===\n"); + System.out.println("Scraping: https://firecrawl.dev"); + + Document doc = client.scrape("https://firecrawl.dev", + ScrapeOptions.builder() + .formats(List.of("markdown", "html")) + .onlyMainContent(true) + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + assertNotNull(doc.getHtml(), "HTML content should not be null"); + assertNotNull(doc.getMetadata(), "Metadata should not be null"); + + // Verify it's actually the Firecrawl site + String markdown = doc.getMarkdown().toLowerCase(); + assertTrue(markdown.contains("firecrawl") || markdown.contains("scrape") || markdown.contains("crawl"), + "Content should mention Firecrawl features"); + + // Check metadata + String sourceUrl = doc.getMetadata().get("sourceURL").toString(); + assertTrue(sourceUrl.contains("firecrawl.dev"), "Source URL should be firecrawl.dev"); + + // Display results + System.out.println("\n✓ Successfully scraped Firecrawl.dev!"); + System.out.println("\nMetadata:"); + System.out.println(" Source URL: " + sourceUrl); + if (doc.getMetadata().get("title") != null) { + System.out.println(" Title: " + doc.getMetadata().get("title")); + } + System.out.println(" Status Code: " + doc.getMetadata().get("statusCode")); + + System.out.println("\nContent Stats:"); + System.out.println(" Markdown length: " + doc.getMarkdown().length() + " characters"); + System.out.println(" HTML length: " + doc.getHtml().length() + " characters"); + + System.out.println("\nFirst 500 characters of markdown:"); + System.out.println(" " + doc.getMarkdown().substring(0, Math.min(500, doc.getMarkdown().length())).replace("\n", "\n ")); + + System.out.println("\n=== Live site test completed successfully! ===\n"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeFirecrawlPricing() { + System.out.println("\n=== Testing Firecrawl Pricing Page ===\n"); + System.out.println("Scraping: https://firecrawl.dev/pricing"); + + Document doc = client.scrape("https://firecrawl.dev/pricing", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + + String markdown = doc.getMarkdown().toLowerCase(); + assertTrue(markdown.contains("pricing") || markdown.contains("plan") || markdown.contains("price"), + "Pricing page should contain pricing information"); + + System.out.println("✓ Successfully scraped pricing page!"); + System.out.println(" Content length: " + doc.getMarkdown().length() + " characters"); + System.out.println(" Source: " + doc.getMetadata().get("sourceURL")); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeFirecrawlDocs() { + System.out.println("\n=== Testing Firecrawl Documentation ===\n"); + System.out.println("Scraping: https://docs.firecrawl.dev"); + + Document doc = client.scrape("https://docs.firecrawl.dev", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .waitFor(2000) // Wait for docs to load + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + assertFalse(doc.getMarkdown().isEmpty(), "Markdown should not be empty"); + + String markdown = doc.getMarkdown().toLowerCase(); + assertTrue(markdown.contains("document") || markdown.contains("api") || markdown.contains("firecrawl"), + "Docs should contain documentation content"); + + System.out.println("✓ Successfully scraped documentation!"); + System.out.println(" Content length: " + doc.getMarkdown().length() + " characters"); + System.out.println(" Source: " + doc.getMetadata().get("sourceURL")); + + System.out.println("\n=== All Firecrawl.dev tests passed! ===\n"); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/MapTest.java b/apps/java-sdk/src/test/java/com/firecrawl/MapTest.java new file mode 100644 index 0000000000..731de37eea --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/MapTest.java @@ -0,0 +1,279 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.MapData; +import com.firecrawl.models.MapOptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Comprehensive Map Tests + * + * Tests the map functionality with various configurations. + * Based on Node.js SDK patterns and tested against live firecrawl.dev. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.MapTest" + */ +class MapTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapMinimal() { + System.out.println("\n=== Test: Map - Minimal Request ==="); + + MapData data = client.map("https://docs.firecrawl.dev"); + + assertNotNull(data, "Map data should not be null"); + assertNotNull(data.getLinks(), "Links should not be null"); + assertTrue(!data.getLinks().isEmpty(), "Should have at least one link"); + + // Verify link structure (v2 links are MapDocument objects with url, title, description) + Map firstLink = data.getLinks().get(0); + assertNotNull(firstLink, "Link should not be null"); + assertNotNull(firstLink.get("url"), "Link should have url"); + assertTrue(firstLink.get("url").toString().startsWith("http"), "URL should start with http"); + + System.out.println("✓ Map completed successfully"); + System.out.println(" Total links found: " + data.getLinks().size()); + System.out.println(" Sample URL: " + firstLink.get("url")); + if (firstLink.get("title") != null) { + System.out.println(" Title: " + firstLink.get("title")); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithLimit() { + System.out.println("\n=== Test: Map with Limit ==="); + + MapData data = client.map("https://docs.firecrawl.dev", + MapOptions.builder() + .limit(10) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + assertTrue(data.getLinks().size() <= 10, + "Should respect limit of 10: got " + data.getLinks().size()); + + System.out.println("✓ Map with limit completed"); + System.out.println(" Requested limit: 10"); + System.out.println(" Actual links: " + data.getLinks().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithSearch() { + System.out.println("\n=== Test: Map with Search Filter ==="); + + MapData data = client.map("https://docs.firecrawl.dev", + MapOptions.builder() + .search("api") + .limit(20) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + + // Verify that filtered results contain the search term + long matchingLinks = data.getLinks().stream() + .filter(link -> { + String url = link.get("url") != null ? link.get("url").toString().toLowerCase() : ""; + String title = link.get("title") != null ? link.get("title").toString().toLowerCase() : ""; + return url.contains("api") || title.contains("api"); + }) + .count(); + + System.out.println("✓ Map with search completed"); + System.out.println(" Total links: " + data.getLinks().size()); + System.out.println(" Links matching 'api': " + matchingLinks); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithSkipSitemap() { + System.out.println("\n=== Test: Map with Sitemap Skip ==="); + + MapData data = client.map("https://firecrawl.dev", + MapOptions.builder() + .sitemap("skip") + .limit(15) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + assertTrue(data.getLinks().size() <= 15, "Should respect limit"); + + // Verify all links are valid HTTP(S) URLs + boolean allValidUrls = data.getLinks().stream() + .allMatch(link -> { + String url = link.get("url") != null ? link.get("url").toString() : ""; + return url.startsWith("http://") || url.startsWith("https://"); + }); + + assertTrue(allValidUrls, "All URLs should be valid HTTP(S)"); + + System.out.println("✓ Map with sitemap=skip completed"); + System.out.println(" Links found: " + data.getLinks().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithSitemapOnly() { + System.out.println("\n=== Test: Map with Sitemap Only ==="); + + MapData data = client.map("https://firecrawl.dev", + MapOptions.builder() + .sitemap("only") + .limit(50) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + // Note: sitemapOnly may not always respect the limit strictly + + // Verify all links are valid HTTP(S) URLs + boolean allValidUrls = data.getLinks().stream() + .allMatch(link -> { + String url = link.get("url") != null ? link.get("url").toString() : ""; + return url.startsWith("http://") || url.startsWith("https://"); + }); + + assertTrue(allValidUrls, "All URLs should be valid HTTP(S)"); + + System.out.println("✓ Map with sitemap=only completed"); + System.out.println(" Links found: " + data.getLinks().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithIncludeSubdomains() { + System.out.println("\n=== Test: Map with Include Subdomains ==="); + + MapData data = client.map("https://firecrawl.dev", + MapOptions.builder() + .includeSubdomains(true) + .limit(20) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + + System.out.println("✓ Map with subdomains completed"); + System.out.println(" Total links: " + data.getLinks().size()); + + // Check if any subdomains were found + boolean hasSubdomains = data.getLinks().stream() + .anyMatch(link -> { + String url = link.get("url") != null ? link.get("url").toString() : ""; + return url.contains("docs.firecrawl.dev") || + url.contains("api.firecrawl.dev") || + (url.contains(".firecrawl.dev") && !url.contains("www.firecrawl.dev")); + }); + + if (hasSubdomains) { + System.out.println(" ✓ Found subdomain links"); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapFirecrawlDocs() { + System.out.println("\n=== Test: Map Firecrawl Documentation ==="); + + MapData data = client.map("https://docs.firecrawl.dev", + MapOptions.builder() + .limit(50) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + assertFalse(data.getLinks().isEmpty(), "Should find documentation links"); + + System.out.println("✓ Mapped Firecrawl documentation"); + System.out.println(" Total links: " + data.getLinks().size()); + + // Print sample links + System.out.println(" Sample documentation pages:"); + data.getLinks().stream() + .limit(5) + .forEach(link -> System.out.println(" - " + link.get("url"))); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapLinkStructure() { + System.out.println("\n=== Test: Verify Map Link Structure ==="); + + MapData data = client.map("https://firecrawl.dev", + MapOptions.builder() + .limit(5) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + assertFalse(data.getLinks().isEmpty(), "Should have links"); + + // Verify each link is a valid URL with expected fields + for (Map link : data.getLinks()) { + assertNotNull(link, "Link should not be null"); + assertNotNull(link.get("url"), "Link should have url field"); + assertTrue(link.get("url").toString().startsWith("http"), "URL should be valid: " + link.get("url")); + } + + System.out.println("✓ All links have correct structure"); + System.out.println(" Verified " + data.getLinks().size() + " links"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapWithTimeout() { + System.out.println("\n=== Test: Map with Timeout ==="); + + MapData data = client.map("https://firecrawl.dev", + MapOptions.builder() + .timeout(15000) // 15 seconds + .limit(10) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + + System.out.println("✓ Map with timeout completed"); + System.out.println(" Timeout: 15000ms"); + System.out.println(" Links found: " + data.getLinks().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testMapComprehensive() { + System.out.println("\n=== Test: Map with All Options ==="); + + MapData data = client.map("https://docs.firecrawl.dev", + MapOptions.builder() + .includeSubdomains(false) + .limit(25) + .sitemap("include") + .timeout(20000) + .build()); + + assertNotNull(data.getLinks(), "Links should not be null"); + assertTrue(data.getLinks().size() <= 25, "Should respect limit"); + + System.out.println("✓ Comprehensive map completed"); + System.out.println(" Configuration:"); + System.out.println(" - Include subdomains: false"); + System.out.println(" - Limit: 25"); + System.out.println(" - Ignore sitemap: false"); + System.out.println(" - Timeout: 20000ms"); + System.out.println(" Results:"); + System.out.println(" - Links found: " + data.getLinks().size()); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/ScrapeTest.java b/apps/java-sdk/src/test/java/com/firecrawl/ScrapeTest.java new file mode 100644 index 0000000000..c91cbc30a3 --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/ScrapeTest.java @@ -0,0 +1,177 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.errors.FirecrawlException; +import com.firecrawl.models.Document; +import com.firecrawl.models.ScrapeOptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Scrape Endpoint Tests + * + * Tests the scrape functionality of the Firecrawl Java SDK. + * These tests require FIRECRAWL_API_KEY environment variable to be set. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.ScrapeTest" + */ +class ScrapeTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + // Initialize client from environment variable + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeBasic() { + // Test basic scraping with markdown format + System.out.println("Testing basic scrape with markdown format..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + assertFalse(doc.getMarkdown().isEmpty(), "Markdown content should not be empty"); + + System.out.println("✓ Basic scrape test passed"); + System.out.println(" Markdown length: " + doc.getMarkdown().length() + " characters"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeWithMultipleFormats() { + // Test scraping with multiple formats + System.out.println("Testing scrape with multiple formats (markdown + html)..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown", "html")) + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + assertNotNull(doc.getHtml(), "HTML content should not be null"); + assertFalse(doc.getMarkdown().isEmpty(), "Markdown should not be empty"); + assertFalse(doc.getHtml().isEmpty(), "HTML should not be empty"); + + System.out.println("✓ Multiple formats test passed"); + System.out.println(" Markdown length: " + doc.getMarkdown().length()); + System.out.println(" HTML length: " + doc.getHtml().length()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeWithMetadata() { + // Test that metadata is properly extracted + System.out.println("Testing scrape with metadata extraction..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + + // Assertions + assertNotNull(doc.getMetadata(), "Metadata should not be null"); + assertNotNull(doc.getMetadata().get("sourceURL"), "Source URL should be in metadata"); + assertTrue(doc.getMetadata().get("sourceURL").toString().contains("example.com"), + "Source URL should contain example.com"); + + System.out.println("✓ Metadata extraction test passed"); + System.out.println(" Source URL: " + doc.getMetadata().get("sourceURL")); + if (doc.getMetadata().get("title") != null) { + System.out.println(" Title: " + doc.getMetadata().get("title")); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeWithOnlyMainContent() { + // Test scraping with onlyMainContent option + System.out.println("Testing scrape with onlyMainContent option..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .onlyMainContent(true) + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown content should not be null"); + assertFalse(doc.getMarkdown().isEmpty(), "Markdown should not be empty"); + + System.out.println("✓ Only main content test passed"); + System.out.println(" Content length: " + doc.getMarkdown().length()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeWithTimeout() { + // Test scraping with custom timeout + System.out.println("Testing scrape with custom timeout..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .timeout(10000) // 10 seconds + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown should not be null"); + + System.out.println("✓ Timeout configuration test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeInvalidUrl() { + // Test that invalid URLs are handled properly + System.out.println("Testing scrape with invalid URL..."); + + assertThrows(FirecrawlException.class, () -> { + client.scrape("not-a-valid-url", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .build()); + }, "Should throw FirecrawlException for invalid URL"); + + System.out.println("✓ Invalid URL handling test passed"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testScrapeWithWaitFor() { + // Test scraping with waitFor option (useful for dynamic content) + System.out.println("Testing scrape with waitFor option..."); + + Document doc = client.scrape("https://example.com", + ScrapeOptions.builder() + .formats(List.of("markdown")) + .waitFor(1000) // Wait 1 second for page to load + .build()); + + // Assertions + assertNotNull(doc, "Document should not be null"); + assertNotNull(doc.getMarkdown(), "Markdown should not be null"); + + System.out.println("✓ WaitFor option test passed"); + } +} diff --git a/apps/java-sdk/src/test/java/com/firecrawl/SearchTest.java b/apps/java-sdk/src/test/java/com/firecrawl/SearchTest.java new file mode 100644 index 0000000000..46c977f797 --- /dev/null +++ b/apps/java-sdk/src/test/java/com/firecrawl/SearchTest.java @@ -0,0 +1,337 @@ +package com.firecrawl; + +import com.firecrawl.client.FirecrawlClient; +import com.firecrawl.models.*; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Comprehensive Search Tests + * + * Tests the search functionality with various configurations. + * Based on Node.js SDK patterns and tested against live firecrawl.dev. + * + * Run with: FIRECRAWL_API_KEY=fc-xxx gradle test --tests "com.firecrawl.SearchTest" + */ +class SearchTest { + + private static FirecrawlClient client; + + @BeforeAll + static void setup() { + String apiKey = System.getenv("FIRECRAWL_API_KEY"); + if (apiKey != null && !apiKey.isBlank()) { + client = FirecrawlClient.fromEnv(); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchMinimal() { + System.out.println("\n=== Test: Search - Minimal Request ==="); + + SearchData results = client.search("What is Firecrawl?"); + + assertNotNull(results, "Search results should not be null"); + assertNotNull(results.getWeb(), "Web results should not be null"); + assertTrue(!results.getWeb().isEmpty(), "Should have at least one web result"); + + // Verify result structure + Map firstResult = results.getWeb().get(0); + assertNotNull(firstResult.get("url"), "Result should have URL"); + assertTrue(firstResult.get("url").toString().startsWith("http"), + "URL should be valid"); + + System.out.println("✓ Search completed successfully"); + System.out.println(" Web results: " + results.getWeb().size()); + System.out.println(" Sample result: " + firstResult.get("url")); + if (firstResult.get("title") != null) { + System.out.println(" Title: " + firstResult.get("title")); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchWithLimit() { + System.out.println("\n=== Test: Search with Limit ==="); + + SearchData results = client.search("artificial intelligence", + SearchOptions.builder() + .limit(5) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + assertTrue(results.getWeb().size() <= 5, + "Should respect limit of 5: got " + results.getWeb().size()); + + System.out.println("✓ Search with limit completed"); + System.out.println(" Requested limit: 5"); + System.out.println(" Actual results: " + results.getWeb().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchWithMultipleSources() { + System.out.println("\n=== Test: Search with Multiple Sources ==="); + + SearchData results = client.search("Firecrawl web scraping", + SearchOptions.builder() + .sources(List.of("web", "news")) + .limit(3) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + assertTrue(results.getWeb().size() <= 3, "Web results should respect limit"); + + System.out.println("✓ Multi-source search completed"); + System.out.println(" Web results: " + results.getWeb().size()); + if (results.getNews() != null) { + System.out.println(" News results: " + results.getNews().size()); + } else { + System.out.println(" News results: 0"); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchResultStructure() { + System.out.println("\n=== Test: Verify Search Result Structure ==="); + + SearchData results = client.search("test query", + SearchOptions.builder() + .limit(1) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + + if (!results.getWeb().isEmpty()) { + Map result = results.getWeb().get(0); + + assertNotNull(result.get("url"), "Result must have URL"); + assertTrue(result.get("url") instanceof String, "URL should be string"); + assertTrue(result.get("url").toString().startsWith("http"), + "URL should be valid"); + + // Title and description may be null but if present should be strings + if (result.get("title") != null) { + assertTrue(result.get("title") instanceof String, + "Title should be string"); + } + if (result.get("description") != null) { + assertTrue(result.get("description") instanceof String, + "Description should be string"); + } + + System.out.println("✓ Result structure verified"); + System.out.println(" URL: ✓"); + System.out.println(" Title: " + (result.get("title") != null ? "✓" : "null")); + System.out.println(" Description: " + (result.get("description") != null ? "✓" : "null")); + } + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchWithLocation() { + System.out.println("\n=== Test: Search with Location ==="); + + SearchData results = client.search("restaurants near me", + SearchOptions.builder() + .location("US") + .limit(5) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + + System.out.println("✓ Search with location completed"); + System.out.println(" Location: US"); + System.out.println(" Results: " + results.getWeb().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchWithTimeFilter() { + System.out.println("\n=== Test: Search with Time Filter ==="); + + SearchData results = client.search("latest AI news", + SearchOptions.builder() + .tbs("qdr:m") // Past month + .limit(5) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + + System.out.println("✓ Search with time filter completed"); + System.out.println(" Time filter: Past month (qdr:m)"); + System.out.println(" Results: " + results.getWeb().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchWithScrapeOptions() { + System.out.println("\n=== Test: Search with Scrape Options ==="); + + SearchData results = client.search("Firecrawl documentation", + SearchOptions.builder() + .limit(2) + .scrapeOptions(ScrapeOptions.builder() + .formats(List.of("markdown")) + .onlyMainContent(true) + .build()) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + + // When scrapeOptions with markdown format are provided, results should include markdown content + if (!results.getWeb().isEmpty()) { + Map first = results.getWeb().get(0); + Object markdown = first.get("markdown"); + assertNotNull(markdown, "Scraped result should contain markdown content when formats=[markdown]"); + assertFalse(markdown.toString().isEmpty(), "Markdown content should not be empty"); + } + + System.out.println("✓ Search with scrape options completed"); + System.out.println(" Results: " + results.getWeb().size()); + System.out.println(" Scrape formats: markdown"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchFirecrawlSpecific() { + System.out.println("\n=== Test: Search for Firecrawl ==="); + + SearchData results = client.search("Firecrawl web scraping API", + SearchOptions.builder() + .limit(10) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + assertFalse(results.getWeb().isEmpty(), "Should find Firecrawl results"); + + // Verify results contain Firecrawl-related content + boolean hasFirecrawlContent = results.getWeb().stream() + .anyMatch(result -> { + String url = result.get("url").toString().toLowerCase(); + String title = result.get("title") != null ? + result.get("title").toString().toLowerCase() : ""; + String desc = result.get("description") != null ? + result.get("description").toString().toLowerCase() : ""; + + return url.contains("firecrawl") || + title.contains("firecrawl") || + desc.contains("firecrawl"); + }); + + assertTrue(hasFirecrawlContent, "Results should mention Firecrawl"); + + System.out.println("✓ Firecrawl search completed"); + System.out.println(" Total results: " + results.getWeb().size()); + System.out.println(" Results mentioning Firecrawl: ✓"); + + // Print sample results + System.out.println(" Sample results:"); + results.getWeb().stream() + .limit(3) + .forEach(result -> { + System.out.println(" - " + result.get("title")); + System.out.println(" " + result.get("url")); + }); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchComprehensive() { + System.out.println("\n=== Test: Search with All Options ==="); + + SearchData results = client.search("web scraping tools", + SearchOptions.builder() + .sources(List.of("web")) + .limit(5) + .tbs("qdr:y") // Past year + .location("US") + .timeout(30000) + .scrapeOptions(ScrapeOptions.builder() + .formats(List.of("markdown")) + .onlyMainContent(true) + .waitFor(1000) + .build()) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + assertTrue(results.getWeb().size() <= 5, "Should respect limit"); + + System.out.println("✓ Comprehensive search completed"); + System.out.println(" Configuration:"); + System.out.println(" - Sources: web"); + System.out.println(" - Limit: 5"); + System.out.println(" - Time filter: Past year"); + System.out.println(" - Location: US"); + System.out.println(" - Timeout: 30000ms"); + System.out.println(" - Scrape: markdown, main content only"); + System.out.println(" Results:"); + System.out.println(" - Web results: " + results.getWeb().size()); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchContentVerification() { + System.out.println("\n=== Test: Search Content Verification ==="); + + SearchData results = client.search("Python programming language", + SearchOptions.builder() + .limit(5) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + assertFalse(results.getWeb().isEmpty(), "Should have results"); + + // Verify results are relevant to the query + boolean hasRelevantContent = results.getWeb().stream() + .anyMatch(result -> { + String text = String.format("%s %s %s", + result.get("url"), + result.get("title"), + result.get("description") + ).toLowerCase(); + return text.contains("python"); + }); + + assertTrue(hasRelevantContent, "Results should be relevant to query"); + + System.out.println("✓ Content verification passed"); + System.out.println(" Query: Python programming language"); + System.out.println(" Relevant results found: ✓"); + } + + @Test + @EnabledIfEnvironmentVariable(named = "FIRECRAWL_API_KEY", matches = ".*\\S.*") + void testSearchIgnoreInvalidURLs() { + System.out.println("\n=== Test: Search with Ignore Invalid URLs ==="); + + SearchData results = client.search("technology news", + SearchOptions.builder() + .limit(5) + .ignoreInvalidURLs(true) + .build()); + + assertNotNull(results.getWeb(), "Web results should not be null"); + + // Verify all URLs are valid + boolean allValidUrls = results.getWeb().stream() + .allMatch(result -> { + String url = result.get("url").toString(); + return url.startsWith("http://") || url.startsWith("https://"); + }); + + assertTrue(allValidUrls, "All URLs should be valid HTTP(S)"); + + System.out.println("✓ Search with URL validation completed"); + System.out.println(" Results: " + results.getWeb().size()); + System.out.println(" All URLs valid: ✓"); + } +}