From ca58f5ea6e243e73a6308e31716fdd6fb8388baa Mon Sep 17 00:00:00 2001 From: "firecrawl-spring[bot]" <254786068+firecrawl-spring[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 10:44:32 -0400 Subject: [PATCH 1/3] feat(api): add scrapeZDR/searchZDR enum support with backward compat (#3156) Add getScrapeZDR/getSearchZDR helpers that resolve the effective ZDR mode from either the new enum fields (scrapeZDR/searchZDR) or the legacy boolean fields (forceZDR/allowZDR). This enables safe deployment alongside the DB migration since ACUC cache may serve stale boolean-format flags for up to 10 minutes. - Add new enum fields to TeamFlags alongside existing boolean fields - Replace all direct forceZDR/allowZDR reads with helper functions - Search endpoints use getSearchZDR, all others use getScrapeZDR - Update tests to use new enum format Co-authored-by: firecrawl-spring[bot] <254786068+firecrawl-spring[bot]@users.noreply.github.com> Co-authored-by: micahstairs --- .../src/__tests__/snips/v1/billing.test.ts | 8 ++--- apps/api/src/__tests__/snips/v1/zdr.test.ts | 21 ++------------ .../src/__tests__/snips/v2/billing.test.ts | 8 ++--- apps/api/src/__tests__/snips/v2/zdr.test.ts | 21 ++------------ apps/api/src/controllers/v0/crawl-cancel.ts | 3 +- apps/api/src/controllers/v0/crawl-status.ts | 3 +- apps/api/src/controllers/v0/crawl.ts | 3 +- apps/api/src/controllers/v0/keyAuth.ts | 3 +- apps/api/src/controllers/v0/scrape.ts | 3 +- apps/api/src/controllers/v0/search.ts | 3 +- apps/api/src/controllers/v1/batch-scrape.ts | 3 +- apps/api/src/controllers/v1/crawl.ts | 3 +- apps/api/src/controllers/v1/deep-research.ts | 3 +- apps/api/src/controllers/v1/extract.ts | 7 +++-- .../src/controllers/v1/generate-llmstxt.ts | 3 +- apps/api/src/controllers/v1/map.ts | 5 ++-- apps/api/src/controllers/v1/scrape-status.ts | 5 ++-- apps/api/src/controllers/v1/scrape.ts | 3 +- apps/api/src/controllers/v1/search.ts | 5 ++-- apps/api/src/controllers/v1/types.ts | 2 ++ apps/api/src/controllers/v1/x402-search.ts | 7 +++-- apps/api/src/controllers/v2/agent.ts | 7 +++-- apps/api/src/controllers/v2/batch-scrape.ts | 3 +- apps/api/src/controllers/v2/crawl.ts | 3 +- apps/api/src/controllers/v2/extract.ts | 7 +++-- apps/api/src/controllers/v2/map.ts | 3 +- apps/api/src/controllers/v2/scrape-status.ts | 5 ++-- apps/api/src/controllers/v2/scrape.ts | 3 +- apps/api/src/controllers/v2/search.ts | 5 ++-- apps/api/src/controllers/v2/types.ts | 2 ++ apps/api/src/controllers/v2/x402-search.ts | 7 +++-- apps/api/src/lib/crawl-redis.ts | 3 +- apps/api/src/lib/map-utils.ts | 3 +- apps/api/src/lib/permissions.ts | 6 ++-- apps/api/src/lib/zdr-helpers.ts | 29 +++++++++++++++++++ apps/api/src/search/scrape.ts | 3 +- 36 files changed, 122 insertions(+), 89 deletions(-) create mode 100644 apps/api/src/lib/zdr-helpers.ts diff --git a/apps/api/src/__tests__/snips/v1/billing.test.ts b/apps/api/src/__tests__/snips/v1/billing.test.ts index 325af196c1..6c6c347b46 100644 --- a/apps/api/src/__tests__/snips/v1/billing.test.ts +++ b/apps/api/src/__tests__/snips/v1/billing.test.ts @@ -394,7 +394,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -462,7 +462,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR batch scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -531,7 +531,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR crawl correctly", credits: 200, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -595,7 +595,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", zdrCost: 0, }, }); diff --git a/apps/api/src/__tests__/snips/v1/zdr.test.ts b/apps/api/src/__tests__/snips/v1/zdr.test.ts index 4787751d11..ab10290cac 100644 --- a/apps/api/src/__tests__/snips/v1/zdr.test.ts +++ b/apps/api/src/__tests__/snips/v1/zdr.test.ts @@ -24,12 +24,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/scrape`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); @@ -66,12 +61,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/crawl`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); @@ -122,12 +112,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/batch-scrape`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); diff --git a/apps/api/src/__tests__/snips/v2/billing.test.ts b/apps/api/src/__tests__/snips/v2/billing.test.ts index 470cf7a16e..71f8c3b1bd 100644 --- a/apps/api/src/__tests__/snips/v2/billing.test.ts +++ b/apps/api/src/__tests__/snips/v2/billing.test.ts @@ -369,7 +369,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -439,7 +439,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR batch scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -510,7 +510,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR crawl correctly", credits: 200, flags: { - allowZDR: true, + scrapeZDR: "allowed", }, }); @@ -576,7 +576,7 @@ describeIf(TEST_PRODUCTION)("Billing tests", () => { name: "billing/bills ZDR scrape correctly", credits: 100, flags: { - allowZDR: true, + scrapeZDR: "allowed", zdrCost: 0, }, }); diff --git a/apps/api/src/__tests__/snips/v2/zdr.test.ts b/apps/api/src/__tests__/snips/v2/zdr.test.ts index 9ed7cce140..3b57a61e4b 100644 --- a/apps/api/src/__tests__/snips/v2/zdr.test.ts +++ b/apps/api/src/__tests__/snips/v2/zdr.test.ts @@ -24,12 +24,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/scrape`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); @@ -66,12 +61,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/crawl`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); @@ -120,12 +110,7 @@ describeIf(TEST_PRODUCTION)("Zero Data Retention", () => { name: `zdr/${scope}/batch-scrape`, credits: 10000, flags: { - allowZDR: true, - ...(scope === "Team-scoped" - ? { - forceZDR: true, - } - : {}), + scrapeZDR: scope === "Team-scoped" ? "forced" : "allowed", }, }); diff --git a/apps/api/src/controllers/v0/crawl-cancel.ts b/apps/api/src/controllers/v0/crawl-cancel.ts index 11ea5b9f6d..91fc109273 100644 --- a/apps/api/src/controllers/v0/crawl-cancel.ts +++ b/apps/api/src/controllers/v0/crawl-cancel.ts @@ -7,6 +7,7 @@ import * as Sentry from "@sentry/node"; import { configDotenv } from "dotenv"; import { redisEvictConnection } from "../../../src/services/redis"; import { crawlGroup } from "../../services/worker/nuq"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; configDotenv(); export async function crawlCancelController(req: Request, res: Response) { @@ -18,7 +19,7 @@ export async function crawlCancelController(req: Request, res: Response) { const { team_id } = auth; - if (auth.chunk?.flags?.forceZDR) { + if (getScrapeZDR(auth.chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v0/crawl-status.ts b/apps/api/src/controllers/v0/crawl-status.ts index 7262c4d58f..3c18b3169d 100644 --- a/apps/api/src/controllers/v0/crawl-status.ts +++ b/apps/api/src/controllers/v0/crawl-status.ts @@ -13,6 +13,7 @@ import type { DBScrape, PseudoJob } from "../v1/crawl-status"; import { getJobFromGCS } from "../../lib/gcs-jobs"; import { scrapeQueue, NuQJob } from "../../services/worker/nuq"; import { includesFormat } from "../../lib/format-utils"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; configDotenv(); async function getJobs( @@ -96,7 +97,7 @@ export async function crawlStatusController(req: Request, res: Response) { return res.status(auth.status).json({ error: auth.error }); } - if (auth.chunk?.flags?.forceZDR) { + if (getScrapeZDR(auth.chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v0/crawl.ts b/apps/api/src/controllers/v0/crawl.ts index 70f8d990e2..58b12d5445 100644 --- a/apps/api/src/controllers/v0/crawl.ts +++ b/apps/api/src/controllers/v0/crawl.ts @@ -35,6 +35,7 @@ import { fromV0ScrapeOptions } from "../v2/types"; import { isSelfHosted } from "../../lib/deployment"; import { crawlGroup } from "../../services/worker/nuq"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function crawlController(req: Request, res: Response) { try { @@ -45,7 +46,7 @@ export async function crawlController(req: Request, res: Response) { const { team_id, chunk } = auth; - if (chunk?.flags?.forceZDR) { + if (getScrapeZDR(chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v0/keyAuth.ts b/apps/api/src/controllers/v0/keyAuth.ts index bbf6361389..fd8e331276 100644 --- a/apps/api/src/controllers/v0/keyAuth.ts +++ b/apps/api/src/controllers/v0/keyAuth.ts @@ -4,6 +4,7 @@ import { Request, Response } from "express"; import { authenticateUser } from "../auth"; import { redisEvictConnection } from "../../../src/services/redis"; import { logger } from "../../lib/logger"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export const keyAuthController = async (req: Request, res: Response) => { try { @@ -13,7 +14,7 @@ export const keyAuthController = async (req: Request, res: Response) => { return res.status(auth.status).json({ error: auth.error }); } - if (auth.chunk?.flags?.forceZDR) { + if (getScrapeZDR(auth.chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v0/scrape.ts b/apps/api/src/controllers/v0/scrape.ts index 22e25054f4..c89484b999 100644 --- a/apps/api/src/controllers/v0/scrape.ts +++ b/apps/api/src/controllers/v0/scrape.ts @@ -25,6 +25,7 @@ import { ScrapeJobTimeoutError } from "../../lib/error"; import { scrapeQueue } from "../../services/worker/nuq"; import { getErrorContactMessage } from "../../lib/deployment"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; async function scrapeHelper( jobId: string, @@ -185,7 +186,7 @@ export async function scrapeController(req: Request, res: Response) { const { team_id, chunk } = auth; - if (chunk?.flags?.forceZDR) { + if (getScrapeZDR(chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v0/search.ts b/apps/api/src/controllers/v0/search.ts index d8dc46aaee..d9955b3986 100644 --- a/apps/api/src/controllers/v0/search.ts +++ b/apps/api/src/controllers/v0/search.ts @@ -24,6 +24,7 @@ import { fromV0Combo } from "../v2/types"; import { ScrapeJobTimeoutError } from "../../lib/error"; import { scrapeQueue } from "../../services/worker/nuq"; import { defaultOrigin } from "../../lib/default-values"; +import { getSearchZDR } from "../../lib/zdr-helpers"; async function searchHelper( jobId: string, @@ -176,7 +177,7 @@ export async function searchController(req: Request, res: Response) { } const { team_id, chunk } = auth; - if (chunk?.flags?.forceZDR) { + if (getSearchZDR(chunk?.flags) === "forced") { return res.status(400).json({ error: "Your team has zero data retention enabled. This is not supported on the v0 API. Please update your code to use the v1 API.", diff --git a/apps/api/src/controllers/v1/batch-scrape.ts b/apps/api/src/controllers/v1/batch-scrape.ts index 3f74fc6b36..6b9cd0b67f 100644 --- a/apps/api/src/controllers/v1/batch-scrape.ts +++ b/apps/api/src/controllers/v1/batch-scrape.ts @@ -28,6 +28,7 @@ import { fromV1ScrapeOptions } from "../v2/types"; import { checkPermissions } from "../../lib/permissions"; import { crawlGroup } from "../../services/worker/nuq"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function batchScrapeController( req: RequestWithAuth<{}, BatchScrapeResponse, BatchScrapeRequest>, @@ -49,7 +50,7 @@ export async function batchScrapeController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || req.body.zeroDataRetention; + getScrapeZDR(req.acuc?.flags) === "forced" || req.body.zeroDataRetention; const id = req.body.appendToId ?? uuidv7(); const logger = _logger.child({ diff --git a/apps/api/src/controllers/v1/crawl.ts b/apps/api/src/controllers/v1/crawl.ts index 65fdb21971..55a5986a34 100644 --- a/apps/api/src/controllers/v1/crawl.ts +++ b/apps/api/src/controllers/v1/crawl.ts @@ -20,6 +20,7 @@ import { fromV1ScrapeOptions } from "../v2/types"; import { checkPermissions } from "../../lib/permissions"; import { crawlGroup } from "../../services/worker/nuq"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function crawlController( req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>, @@ -37,7 +38,7 @@ export async function crawlController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || req.body.zeroDataRetention; + getScrapeZDR(req.acuc?.flags) === "forced" || req.body.zeroDataRetention; const id = uuidv7(); const logger = _logger.child({ diff --git a/apps/api/src/controllers/v1/deep-research.ts b/apps/api/src/controllers/v1/deep-research.ts index d72a525748..f6c45b3c74 100644 --- a/apps/api/src/controllers/v1/deep-research.ts +++ b/apps/api/src/controllers/v1/deep-research.ts @@ -6,6 +6,7 @@ import * as Sentry from "@sentry/node"; import { saveDeepResearch } from "../../lib/deep-research/deep-research-redis"; import { z } from "zod"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; const deepResearchRequestSchema = z .object({ @@ -81,7 +82,7 @@ export async function deepResearchController( req: RequestWithAuth<{}, DeepResearchResponse, DeepResearchRequest>, res: Response, ) { - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v1/extract.ts b/apps/api/src/controllers/v1/extract.ts index 1d0e947caf..f4882df0da 100644 --- a/apps/api/src/controllers/v1/extract.ts +++ b/apps/api/src/controllers/v1/extract.ts @@ -20,6 +20,7 @@ import { } from "../v2/types"; import { createWebhookSender, WebhookEvent } from "../../services/webhook"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; import { config } from "../../config"; async function oldExtract( @@ -105,7 +106,7 @@ export async function extractController( const originalRequest = { ...req.body }; req.body = extractRequestSchema.parse(req.body); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: @@ -138,7 +139,7 @@ export async function extractController( team_id: req.auth.team_id, subId: req.acuc?.sub_id, extractId, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); await logRequest({ @@ -193,7 +194,7 @@ export async function extractController( showLLMUsage: req.body.__experimental_llmUsage, showSources: req.body.__experimental_showSources || req.body.showSources, showCostTracking: req.body.__experimental_showCostTracking, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); await addExtractJobToQueue(extractId, jobData); diff --git a/apps/api/src/controllers/v1/generate-llmstxt.ts b/apps/api/src/controllers/v1/generate-llmstxt.ts index 2fe3c83bca..50210abce0 100644 --- a/apps/api/src/controllers/v1/generate-llmstxt.ts +++ b/apps/api/src/controllers/v1/generate-llmstxt.ts @@ -10,6 +10,7 @@ import { getGenerateLlmsTxtQueue } from "../../services/queue-service"; import * as Sentry from "@sentry/node"; import { saveGeneratedLlmsTxt } from "../../lib/generate-llmstxt/generate-llmstxt-redis"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; type GenerateLLMsTextResponse = | ErrorResponse @@ -28,7 +29,7 @@ export async function generateLLMsTextController( req: RequestWithAuth<{}, GenerateLLMsTextResponse, GenerateLLMsTextRequest>, res: Response, ) { - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v1/map.ts b/apps/api/src/controllers/v1/map.ts index f65481e02a..5026311a1e 100644 --- a/apps/api/src/controllers/v1/map.ts +++ b/apps/api/src/controllers/v1/map.ts @@ -30,6 +30,7 @@ import { } from "../../services/index"; import { MapTimeoutError } from "../../lib/error"; import { checkPermissions } from "../../lib/permissions"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; configDotenv(); const redis = new Redis(config.REDIS_URL!); @@ -126,7 +127,7 @@ export async function getMapResults({ let links: string[] = [url]; let mapResults: MapDocument[] = []; - const zeroDataRetention = flags?.forceZDR || false; + const zeroDataRetention = getScrapeZDR(flags) === "forced" || false; const sc: StoredCrawl = { originUrl: url, @@ -369,7 +370,7 @@ export async function mapController( const originalRequest = req.body; req.body = mapRequestSchema.parse(req.body); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v1/scrape-status.ts b/apps/api/src/controllers/v1/scrape-status.ts index 3cce861ce2..9b7ebc2115 100644 --- a/apps/api/src/controllers/v1/scrape-status.ts +++ b/apps/api/src/controllers/v1/scrape-status.ts @@ -2,6 +2,7 @@ import { Response } from "express"; import { supabaseGetScrapeByIdOnlyData } from "../../lib/supabase-jobs"; import { getJob } from "./crawl-status"; import { logger as _logger } from "../../lib/logger"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function scrapeStatusController(req: any, res: any) { const logger = _logger.child({ @@ -10,10 +11,10 @@ export async function scrapeStatusController(req: any, res: any) { teamId: req.auth.team_id, jobId: req.params.jobId, scrapeId: req.params.jobId, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v1/scrape.ts b/apps/api/src/controllers/v1/scrape.ts index e7e1d2c240..2e22d174ec 100644 --- a/apps/api/src/controllers/v1/scrape.ts +++ b/apps/api/src/controllers/v1/scrape.ts @@ -22,6 +22,7 @@ import { AbortManagerThrownError } from "../../scraper/scrapeURL/lib/abortManage import { logRequest } from "../../services/logging/log_job"; import { getErrorContactMessage } from "../../lib/deployment"; import { captureExceptionWithZdrCheck } from "../../services/sentry"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function scrapeController( req: RequestWithAuth<{}, ScrapeResponse, ScrapeRequest>, @@ -45,7 +46,7 @@ export async function scrapeController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || req.body.zeroDataRetention; + getScrapeZDR(req.acuc?.flags) === "forced" || req.body.zeroDataRetention; const logger = _logger.child({ method: "scrapeController", diff --git a/apps/api/src/controllers/v1/search.ts b/apps/api/src/controllers/v1/search.ts index d144ea7f57..ce3313b778 100644 --- a/apps/api/src/controllers/v1/search.ts +++ b/apps/api/src/controllers/v1/search.ts @@ -26,6 +26,7 @@ import { filterDocumentsWithContent, } from "../../search/transform"; import { fromV1ScrapeOptions } from "../v2/types"; +import { getSearchZDR } from "../../lib/zdr-helpers"; // Used for deep research export async function searchAndScrapeSearchResult( @@ -90,11 +91,11 @@ export async function searchController( teamId: req.auth.team_id, module: "search", method: "searchController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getSearchZDR(req.acuc?.flags) === "forced", searchQuery: req.body.query.slice(0, 100), }); - if (req.acuc?.flags?.forceZDR) { + if (getSearchZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index 045c77b53e..6e230c4986 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -1279,6 +1279,8 @@ export type TeamFlags = { unblockedDomains?: string[]; forceZDR?: boolean; allowZDR?: boolean; + scrapeZDR?: "disabled" | "allowed" | "forced"; + searchZDR?: "disabled" | "allowed" | "forced"; zdrCost?: number; checkRobotsOnScrape?: boolean; crawlTtlHours?: number; diff --git a/apps/api/src/controllers/v1/x402-search.ts b/apps/api/src/controllers/v1/x402-search.ts index 805b2519a4..b1234257c0 100644 --- a/apps/api/src/controllers/v1/x402-search.ts +++ b/apps/api/src/controllers/v1/x402-search.ts @@ -27,6 +27,7 @@ import { captureExceptionWithZdrCheck, } from "../../services/sentry"; import { getJobPriority } from "../../lib/job-priority"; +import { getSearchZDR } from "../../lib/zdr-helpers"; interface DocumentWithCostTracking { document: Document; @@ -51,7 +52,7 @@ async function scrapeX402SearchResult( const costTracking = new CostTracking(); - const zeroDataRetention = flags?.forceZDR ?? false; + const zeroDataRetention = getSearchZDR(flags) === "forced"; applyZdrScope(zeroDataRetention); try { @@ -185,10 +186,10 @@ export async function x402SearchController( teamId: req.auth.team_id, module: "x402-search", method: "x402SearchController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getSearchZDR(req.acuc?.flags) === "forced", }); - if (req.acuc?.flags?.forceZDR) { + if (getSearchZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v2/agent.ts b/apps/api/src/controllers/v2/agent.ts index 85334ea663..34136a277f 100644 --- a/apps/api/src/controllers/v2/agent.ts +++ b/apps/api/src/controllers/v2/agent.ts @@ -10,6 +10,7 @@ import { logger as _logger } from "../../lib/logger"; import { logRequest } from "../../services/logging/log_job"; import { config } from "../../config"; import { supabase_service } from "../../services/supabase"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function agentController( req: RequestWithAuth<{}, AgentResponse, AgentRequest>, @@ -24,13 +25,13 @@ export async function agentController( team_id: req.auth.team_id, module: "api/v2", method: "agentController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); const originalRequest = { ...req.body }; req.body = agentRequestSchema.parse(req.body); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: @@ -42,7 +43,7 @@ export async function agentController( request: req.body, originalRequest, subId: req.acuc?.sub_id, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); if (!config.EXTRACT_V3_BETA_URL) { diff --git a/apps/api/src/controllers/v2/batch-scrape.ts b/apps/api/src/controllers/v2/batch-scrape.ts index 00081a5168..9620ca8b90 100644 --- a/apps/api/src/controllers/v2/batch-scrape.ts +++ b/apps/api/src/controllers/v2/batch-scrape.ts @@ -29,6 +29,7 @@ import { checkPermissions } from "../../lib/permissions"; import { crawlGroup } from "../../services/worker/nuq"; import { logRequest } from "../../services/logging/log_job"; import type { BillingMetadata } from "../../services/billing/types"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function batchScrapeController( req: RequestWithAuth<{}, BatchScrapeResponse, BatchScrapeRequest>, @@ -50,7 +51,7 @@ export async function batchScrapeController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || (req.body.zeroDataRetention ?? false); + getScrapeZDR(req.acuc?.flags) === "forced" || (req.body.zeroDataRetention ?? false); if ( req.body.__agentInterop && diff --git a/apps/api/src/controllers/v2/crawl.ts b/apps/api/src/controllers/v2/crawl.ts index 2d0059b10d..1dc4881a8d 100644 --- a/apps/api/src/controllers/v2/crawl.ts +++ b/apps/api/src/controllers/v2/crawl.ts @@ -22,6 +22,7 @@ import { checkPermissions } from "../../lib/permissions"; import { buildPromptWithWebsiteStructure } from "../../lib/map-utils"; import { crawlGroup } from "../../services/worker/nuq"; import { logRequest } from "../../services/logging/log_job"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function crawlController( req: RequestWithAuth<{}, CrawlResponse, CrawlRequest>, @@ -39,7 +40,7 @@ export async function crawlController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || req.body.zeroDataRetention; + getScrapeZDR(req.acuc?.flags) === "forced" || req.body.zeroDataRetention; const id = uuidv7(); const logger = _logger.child({ diff --git a/apps/api/src/controllers/v2/extract.ts b/apps/api/src/controllers/v2/extract.ts index 44b69a5b7e..d7155a3122 100644 --- a/apps/api/src/controllers/v2/extract.ts +++ b/apps/api/src/controllers/v2/extract.ts @@ -13,6 +13,7 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; import { logger as _logger } from "../../lib/logger"; import { logRequest } from "../../services/logging/log_job"; import { config } from "../../config"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; /** * Extracts data from the provided URLs based on the request parameters. @@ -28,7 +29,7 @@ export async function extractController( const originalRequest = { ...req.body }; req.body = extractRequestSchema.parse(req.body); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: @@ -45,7 +46,7 @@ export async function extractController( team_id: req.auth.team_id, subId: req.acuc?.sub_id, extractId, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); if (req.body.agent?.model === "v3-beta") { @@ -100,7 +101,7 @@ export async function extractController( showLLMUsage: req.body.__experimental_llmUsage, showSources: req.body.__experimental_showSources || req.body.showSources, showCostTracking: req.body.__experimental_showCostTracking, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); await addExtractJobToQueue(extractId, { diff --git a/apps/api/src/controllers/v2/map.ts b/apps/api/src/controllers/v2/map.ts index ac88d64120..a46871b4db 100644 --- a/apps/api/src/controllers/v2/map.ts +++ b/apps/api/src/controllers/v2/map.ts @@ -14,6 +14,7 @@ import { checkPermissions } from "../../lib/permissions"; import { getMapResults, MapResult } from "../../lib/map-utils"; import { v7 as uuidv7 } from "uuid"; import { isBaseDomain, extractBaseDomain } from "../../lib/url-utils"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; configDotenv(); @@ -26,7 +27,7 @@ export async function mapController( teamId: req.auth.team_id, module: "api/v2", method: "mapController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); // Get timing data from middleware (includes all middleware processing time) const middlewareStartTime = diff --git a/apps/api/src/controllers/v2/scrape-status.ts b/apps/api/src/controllers/v2/scrape-status.ts index 2fa51cbfc9..758f354ef1 100644 --- a/apps/api/src/controllers/v2/scrape-status.ts +++ b/apps/api/src/controllers/v2/scrape-status.ts @@ -1,6 +1,7 @@ import { supabaseGetScrapeByIdOnlyData } from "../../lib/supabase-jobs"; import { getJob } from "./crawl-status"; import { logger as _logger } from "../../lib/logger"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; export async function scrapeStatusController(req: any, res: any) { const uuidReg = @@ -18,10 +19,10 @@ export async function scrapeStatusController(req: any, res: any) { teamId: req.auth.team_id, jobId: req.params.jobId, scrapeId: req.params.jobId, - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getScrapeZDR(req.acuc?.flags) === "forced", }); - if (req.acuc?.flags?.forceZDR) { + if (getScrapeZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v2/scrape.ts b/apps/api/src/controllers/v2/scrape.ts index b03f5c036d..afa1af9463 100644 --- a/apps/api/src/controllers/v2/scrape.ts +++ b/apps/api/src/controllers/v2/scrape.ts @@ -23,6 +23,7 @@ import { logRequest } from "../../services/logging/log_job"; import { getErrorContactMessage } from "../../lib/deployment"; import { captureExceptionWithZdrCheck } from "../../services/sentry"; import type { BillingMetadata } from "../../services/billing/types"; +import { getScrapeZDR } from "../../lib/zdr-helpers"; const AGENT_INTEROP_CONCURRENCY_BOOST = 3; @@ -83,7 +84,7 @@ export async function scrapeController( } const zeroDataRetention = - req.acuc?.flags?.forceZDR || (req.body.zeroDataRetention ?? false); + getScrapeZDR(req.acuc?.flags) === "forced" || (req.body.zeroDataRetention ?? false); const billing: BillingMetadata = req.body.__agentInterop ? { endpoint: "agent" as const, jobId } : { endpoint: "scrape" as const, jobId }; diff --git a/apps/api/src/controllers/v2/search.ts b/apps/api/src/controllers/v2/search.ts index 6e557c4dcd..85f4e77e61 100644 --- a/apps/api/src/controllers/v2/search.ts +++ b/apps/api/src/controllers/v2/search.ts @@ -19,6 +19,7 @@ import { } from "../../services/sentry"; import { executeSearch } from "../../search/execute"; import type { BillingMetadata } from "../../services/billing/types"; +import { getSearchZDR } from "../../lib/zdr-helpers"; export async function searchController( req: RequestWithAuth<{}, SearchResponse, SearchRequest>, @@ -34,10 +35,10 @@ export async function searchController( teamId: req.auth.team_id, module: "api/v2", method: "searchController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getSearchZDR(req.acuc?.flags) === "forced", }); - if (req.acuc?.flags?.forceZDR) { + if (getSearchZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/controllers/v2/types.ts b/apps/api/src/controllers/v2/types.ts index 30332c56de..5fd359bf14 100644 --- a/apps/api/src/controllers/v2/types.ts +++ b/apps/api/src/controllers/v2/types.ts @@ -1287,6 +1287,8 @@ export type TeamFlags = { unblockedDomains?: string[]; forceZDR?: boolean; allowZDR?: boolean; + scrapeZDR?: "disabled" | "allowed" | "forced"; + searchZDR?: "disabled" | "allowed" | "forced"; zdrCost?: number; checkRobotsOnScrape?: boolean; crawlTtlHours?: number; diff --git a/apps/api/src/controllers/v2/x402-search.ts b/apps/api/src/controllers/v2/x402-search.ts index 6a1a793a4e..03d43c2000 100644 --- a/apps/api/src/controllers/v2/x402-search.ts +++ b/apps/api/src/controllers/v2/x402-search.ts @@ -32,6 +32,7 @@ import { applyZdrScope, captureExceptionWithZdrCheck, } from "../../services/sentry"; +import { getSearchZDR } from "../../lib/zdr-helpers"; interface DocumentWithCostTracking { document: Document; @@ -61,7 +62,7 @@ async function startX420ScrapeJob( ): Promise { const jobId = uuidv7(); - const zeroDataRetention = flags?.forceZDR ?? false; + const zeroDataRetention = getSearchZDR(flags) === "forced"; applyZdrScope(zeroDataRetention); logger.info("Adding scrape job [x402]", { @@ -210,10 +211,10 @@ export async function x402SearchController( teamId: req.auth.team_id, module: "api/v2", method: "x402SearchController", - zeroDataRetention: req.acuc?.flags?.forceZDR, + zeroDataRetention: getSearchZDR(req.acuc?.flags) === "forced", }); - if (req.acuc?.flags?.forceZDR) { + if (getSearchZDR(req.acuc?.flags) === "forced") { return res.status(400).json({ success: false, error: diff --git a/apps/api/src/lib/crawl-redis.ts b/apps/api/src/lib/crawl-redis.ts index 77b4f6332a..0093b92c19 100644 --- a/apps/api/src/lib/crawl-redis.ts +++ b/apps/api/src/lib/crawl-redis.ts @@ -6,6 +6,7 @@ import { logger as _logger } from "./logger"; import { getAdjustedMaxDepth } from "../scraper/WebScraper/utils/maxDepthUtils"; import type { Logger } from "winston"; import { withSpan, setSpanAttributes } from "./otel-tracer"; +import { getScrapeZDR } from "./zdr-helpers"; export type StoredCrawl = { originUrl?: string; @@ -559,7 +560,7 @@ export function crawlToCrawler( regexOnFullURL: sc.crawlerOptions?.regexOnFullURL ?? false, maxDiscoveryDepth: sc.crawlerOptions?.maxDiscoveryDepth, currentDiscoveryDepth: crawlerOptions?.currentDiscoveryDepth ?? 0, - zeroDataRetention: (teamFlags?.forceZDR || sc.zeroDataRetention) ?? false, + zeroDataRetention: (getScrapeZDR(teamFlags) === "forced" || sc.zeroDataRetention) ?? false, location: sc.scrapeOptions?.location, headers: sc.scrapeOptions?.headers, }); diff --git a/apps/api/src/lib/map-utils.ts b/apps/api/src/lib/map-utils.ts index 5a0a511e82..43bbc8b66b 100644 --- a/apps/api/src/lib/map-utils.ts +++ b/apps/api/src/lib/map-utils.ts @@ -7,6 +7,7 @@ import { MAX_MAP_LIMIT, } from "../controllers/v2/types"; import { crawlToCrawler, StoredCrawl } from "./crawl-redis"; +import { getScrapeZDR } from "./zdr-helpers"; import { checkAndUpdateURLForMap, isSameDomain, @@ -130,7 +131,7 @@ export async function getMapResults({ const id = providedId ?? uuidv7(); let mapResults: MapDocument[] = []; - const zeroDataRetention = flags?.forceZDR || false; + const zeroDataRetention = getScrapeZDR(flags) === "forced" || false; const sc: StoredCrawl = { originUrl: url, diff --git a/apps/api/src/lib/permissions.ts b/apps/api/src/lib/permissions.ts index 3a0d6a5c19..8bca74fb1d 100644 --- a/apps/api/src/lib/permissions.ts +++ b/apps/api/src/lib/permissions.ts @@ -1,4 +1,5 @@ import { TeamFlags } from "../controllers/v2/types"; +import { getScrapeZDR } from "./zdr-helpers"; type LocationOptions = { country?: string }; @@ -16,8 +17,9 @@ export function checkPermissions( request: APIRequest, flags?: TeamFlags, ): { error?: string } { - // zdr perms - if (request.zeroDataRetention && !flags?.allowZDR) { + // zdr perms — scrapeZDR must be 'allowed' or 'forced' for request-scoped ZDR + const scrapeMode = getScrapeZDR(flags); + if (request.zeroDataRetention && scrapeMode !== "allowed" && scrapeMode !== "forced") { return { error: `Zero Data Retention (ZDR) is not enabled for your team. Contact ${SUPPORT_EMAIL} to enable this feature.`, }; diff --git a/apps/api/src/lib/zdr-helpers.ts b/apps/api/src/lib/zdr-helpers.ts new file mode 100644 index 0000000000..1033a0266f --- /dev/null +++ b/apps/api/src/lib/zdr-helpers.ts @@ -0,0 +1,29 @@ +import type { TeamFlags } from "../controllers/v2/types"; + +type ZDRMode = "disabled" | "allowed" | "forced"; + +/** + * Resolves the effective ZDR mode for scrape endpoints from team flags. + * + * Handles both the new enum format (scrapeZDR) and the legacy boolean + * format (forceZDR/allowZDR) for backward compatibility during the + * cache migration window. + */ +export function getScrapeZDR(flags: TeamFlags | undefined): ZDRMode { + if (flags?.scrapeZDR === "forced" || flags?.forceZDR) return "forced"; + if (flags?.scrapeZDR === "allowed" || flags?.allowZDR) return "allowed"; + return "disabled"; +} + +/** + * Resolves the effective ZDR mode for search endpoints from team flags. + * + * Handles both the new enum format (searchZDR) and the legacy boolean + * format (forceZDR/allowZDR) for backward compatibility during the + * cache migration window. + */ +export function getSearchZDR(flags: TeamFlags | undefined): ZDRMode { + if (flags?.searchZDR === "forced" || flags?.forceZDR) return "forced"; + if (flags?.searchZDR === "allowed" || flags?.allowZDR) return "allowed"; + return "disabled"; +} diff --git a/apps/api/src/search/scrape.ts b/apps/api/src/search/scrape.ts index 41081098bb..03c9b05082 100644 --- a/apps/api/src/search/scrape.ts +++ b/apps/api/src/search/scrape.ts @@ -9,6 +9,7 @@ import { processJobInternal } from "../services/worker/scrape-worker"; import { ScrapeJobData } from "../types"; import { SearchV2Response } from "../lib/entities"; import type { BillingMetadata } from "../services/billing/types"; +import { getScrapeZDR } from "../lib/zdr-helpers"; export interface DocumentWithCostTracking { document: Document; @@ -48,7 +49,7 @@ async function scrapeSearchResultDirect( ): Promise { const jobId = uuidv7(); const zeroDataRetention = - flags?.forceZDR || (options.zeroDataRetention ?? false); + getScrapeZDR(flags) === "forced" || (options.zeroDataRetention ?? false); logger.debug("Starting direct scrape for search result", { scrapeId: jobId, From 67c9dbd3ac900cc4094cf15083fe28e40dd39ed8 Mon Sep 17 00:00:00 2001 From: "firecrawl-spring[bot]" <254786068+firecrawl-spring[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 14:46:02 +0000 Subject: [PATCH 2/3] fix(scrape): use selectedRow instead of data[0] for GCS fetch and cacheInfo (#3151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The index engine's row selection logic (lines 294-303) may choose a row other than data[0] — e.g. when recent error entries (403s) precede a valid 2xx entry but haven't hit the errorCountToRegister threshold of 3. In that case, data[newest200Index] is selected, but the GCS fetch and cacheInfo timestamp were still referencing data[0], causing the wrong document to be fetched and the wrong cache age to be reported. Co-authored-by: firecrawl-spring[bot] <254786068+firecrawl-spring[bot]@users.noreply.github.com> Co-authored-by: devhims --- apps/api/src/scraper/scrapeURL/engines/index/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/api/src/scraper/scrapeURL/engines/index/index.ts b/apps/api/src/scraper/scrapeURL/engines/index/index.ts index 908f75cf4a..2bb91843cd 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index/index.ts @@ -324,7 +324,7 @@ export async function scrapeURLWithIndex( const checkpoint2 = Date.now(); - const id = data[0].id; + const id = selectedRow.id; const doc = await getIndexFromGCS( id + ".json", @@ -386,7 +386,7 @@ export async function scrapeURLWithIndex( contentType: doc.contentType, cacheInfo: { - created_at: new Date(data[0].created_at), + created_at: new Date(selectedRow.created_at), }, postprocessorsUsed: doc.postprocessorsUsed, From 9cdaae43a8fa69d4572af9ac16f0d2b05bd1efec Mon Sep 17 00:00:00 2001 From: tomsideguide Date: Mon, 16 Mar 2026 15:28:20 +0000 Subject: [PATCH 3/3] feat(api): allow index with enhanced mode (#3158) --- apps/api/src/lib/native-logging.ts | 2 +- .../api/src/scraper/scrapeURL/engines/index.ts | 18 ++++++++++++------ .../scraper/scrapeURL/engines/index/index.ts | 4 +++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/apps/api/src/lib/native-logging.ts b/apps/api/src/lib/native-logging.ts index a643ab3139..5b25bbc34f 100644 --- a/apps/api/src/lib/native-logging.ts +++ b/apps/api/src/lib/native-logging.ts @@ -3,7 +3,7 @@ import type { Logger } from "winston"; const NATIVE_LOGS_SEPARATOR = "\n__native_logs__:"; /** Matches the NativeLogEntry struct from Rust (@mendable/firecrawl-rs). */ -export interface NativeLogEntry { +interface NativeLogEntry { level: string; target: string; message: string; diff --git a/apps/api/src/scraper/scrapeURL/engines/index.ts b/apps/api/src/scraper/scrapeURL/engines/index.ts index 3524f403d9..312c49f963 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index.ts @@ -213,7 +213,7 @@ const engineOptions: { location: true, skipTlsVerification: true, useFastMode: true, - stealthProxy: false, + stealthProxy: true, branding: false, disableAdblock: true, }, @@ -270,7 +270,7 @@ const engineOptions: { mobile: true, skipTlsVerification: true, useFastMode: true, - stealthProxy: false, + stealthProxy: true, branding: false, disableAdblock: false, }, @@ -467,8 +467,7 @@ export function shouldUseIndex(meta: Meta) { meta.options.maxAge !== 0 && (meta.options.headers === undefined || Object.keys(meta.options.headers).length === 0) && - (meta.options.actions === undefined || meta.options.actions.length === 0) && - meta.options.proxy !== "stealth" + (meta.options.actions === undefined || meta.options.actions.length === 0) ); } @@ -561,7 +560,11 @@ export async function buildFallbackList(meta: Meta): Promise< } } - if (selectedEngines.some(x => engineOptions[x.engine].quality > 0)) { + if ( + selectedEngines.some( + x => engineOptions[x.engine].quality > 0 && !x.engine.startsWith("index"), + ) + ) { selectedEngines = selectedEngines.filter( x => engineOptions[x.engine].quality > 0, ); @@ -628,7 +631,10 @@ export async function scrapeURLWithEngine( }); const featureFlags = new Set(meta.featureFlags); - if (engineOptions[engine].features.stealthProxy) { + if ( + engineOptions[engine].features.stealthProxy && + !engine.startsWith("index") // don't force stealth proxy for index + ) { featureFlags.add("stealthProxy"); } diff --git a/apps/api/src/scraper/scrapeURL/engines/index/index.ts b/apps/api/src/scraper/scrapeURL/engines/index/index.ts index 2bb91843cd..31ba1afd56 100644 --- a/apps/api/src/scraper/scrapeURL/engines/index/index.ts +++ b/apps/api/src/scraper/scrapeURL/engines/index/index.ts @@ -151,6 +151,7 @@ export async function sendDocumentToIndex(meta: Meta, document: Document) { location_languages: meta.options.location?.languages ?? null, status: document.metadata.statusCode, is_precrawl: meta.internalOptions.isPreCrawl === true, + is_stealth: meta.featureFlags.has("stealthProxy"), wait_time_ms: meta.options.waitFor > 0 ? meta.options.waitFor : null, ...urlSplitsHash.slice(0, 10).reduce( (a, x, i) => ({ @@ -259,7 +260,7 @@ export async function scrapeURLWithIndex( const checkpoint1 = Date.now(); const { data, error } = await index_supabase_service.rpc( - "index_get_recent_3", + "index_get_recent_4", { p_url_hash: urlHash, p_max_age_ms: maxAge, @@ -275,6 +276,7 @@ export async function scrapeURLWithIndex( ? meta.options.location?.languages : null, p_wait_time_ms: meta.options.waitFor, + p_is_stealth: meta.featureFlags.has("stealthProxy"), p_min_age_ms: meta.options.minAge ?? null, }, );