From a1752e8697aabc6dd4e6c02b73efe467aee7b530 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 8 May 2026 17:28:38 -0700 Subject: [PATCH 01/11] improvement(resolver): lazy resolution for underlying fields greater than 10MB --- apps/docs/content/docs/en/blocks/function.mdx | 8 + .../docs/en/execution/api-deployment.mdx | 19 ++ .../app/api/workflows/[id]/execute/route.ts | 171 ++++++++++++++---- .../executions/[executionId]/cancel/route.ts | 27 ++- apps/sim/background/webhook-execution.ts | 2 +- apps/sim/executor/execution/block-executor.ts | 12 +- apps/sim/executor/orchestrators/loop.ts | 20 +- apps/sim/executor/orchestrators/node.ts | 19 +- apps/sim/executor/orchestrators/parallel.ts | 17 +- apps/sim/executor/utils/output-filter.ts | 4 + apps/sim/executor/utils/subflow-utils.ts | 2 +- apps/sim/executor/variables/resolver.test.ts | 42 +++++ .../variables/resolvers/block.test.ts | 35 ++++ .../sim/executor/variables/resolvers/block.ts | 2 + .../executor/variables/resolvers/loop.test.ts | 42 ++++- apps/sim/executor/variables/resolvers/loop.ts | 60 ++++-- .../variables/resolvers/parallel.test.ts | 47 +++++ .../executor/variables/resolvers/parallel.ts | 48 +++-- .../executor/variables/resolvers/reference.ts | 26 +++ .../lib/api/contracts/execution-payloads.ts | 21 +++ apps/sim/lib/api/contracts/index.ts | 1 + apps/sim/lib/core/utils/response-format.ts | 10 + apps/sim/lib/execution/event-buffer.test.ts | 35 ++++ apps/sim/lib/execution/event-buffer.ts | 127 ++++++++++++- apps/sim/lib/execution/payloads/cache.ts | 85 +++++++++ apps/sim/lib/execution/payloads/hydration.ts | 29 +++ .../lib/execution/payloads/large-value-ref.ts | 86 +++++++++ .../lib/execution/payloads/serializer.test.ts | 103 +++++++++++ apps/sim/lib/execution/payloads/serializer.ts | 124 +++++++++++++ apps/sim/lib/execution/payloads/store.ts | 149 +++++++++++++++ .../execution/execution-file-manager.ts | 1 - .../utils/user-file-base64.server.test.ts | 43 +++++ .../uploads/utils/user-file-base64.server.ts | 31 +++- .../lib/workflows/executor/execution-core.ts | 8 + .../executor/human-in-the-loop-manager.ts | 57 +++++- .../executor/queued-workflow-execution.ts | 94 ++++++++-- apps/sim/lib/workflows/streaming/streaming.ts | 59 +++++- apps/sim/lib/workflows/utils.ts | 4 +- apps/sim/proxy.ts | 2 +- packages/ts-sdk/README.md | 17 ++ packages/ts-sdk/src/index.ts | 11 ++ 41 files changed, 1569 insertions(+), 131 deletions(-) create mode 100644 apps/sim/lib/api/contracts/execution-payloads.ts create mode 100644 apps/sim/lib/execution/payloads/cache.ts create mode 100644 apps/sim/lib/execution/payloads/hydration.ts create mode 100644 apps/sim/lib/execution/payloads/large-value-ref.ts create mode 100644 apps/sim/lib/execution/payloads/serializer.test.ts create mode 100644 apps/sim/lib/execution/payloads/serializer.ts create mode 100644 apps/sim/lib/execution/payloads/store.ts create mode 100644 apps/sim/lib/uploads/utils/user-file-base64.server.test.ts diff --git a/apps/docs/content/docs/en/blocks/function.mdx b/apps/docs/content/docs/en/blocks/function.mdx index fecc0b590fb..90b0f72d26c 100644 --- a/apps/docs/content/docs/en/blocks/function.mdx +++ b/apps/docs/content/docs/en/blocks/function.mdx @@ -185,6 +185,14 @@ plt.show() ## Best Practices +### Large Inputs and Payload Limits + +Function blocks receive their code, parameters, resolved references, and previous block context in an internal execution request. Sim can safely reference oversized workflow outputs, such as large `loop.results` or `parallel.results`, when you select a smaller nested field like ``. + +Avoid passing a full large object into a Function block when you only need one field. For example, prefer `` over `` when the API response is large. If the complete function request body is still larger than the platform limit, execution can fail before your code starts. + +For large generated data, write the result to a file or table with `outputPath`, `outputSandboxPath`, or `outputTable` instead of returning the entire payload inline. + - **Keep functions focused**: Write functions that do one thing well to improve maintainability and debugging - **Handle errors gracefully**: Use try/catch blocks to handle potential errors and provide meaningful error messages - **Test edge cases**: Ensure your code handles unusual inputs, null values, and boundary conditions correctly diff --git a/apps/docs/content/docs/en/execution/api-deployment.mdx b/apps/docs/content/docs/en/execution/api-deployment.mdx index b74a886271c..31a5e47d585 100644 --- a/apps/docs/content/docs/en/execution/api-deployment.mdx +++ b/apps/docs/content/docs/en/execution/api-deployment.mdx @@ -215,6 +215,25 @@ while (true) { +#### Oversized outputs + +Workflow execution responses are capped by platform request and response limits. When an internal output, log field, streamed field, or async status payload contains a value that is too large to inline, Sim may replace that nested value with a versioned reference: + +```json +{ + "__simLargeValueRef": true, + "version": 1, + "id": "lv_abc123DEF456", + "kind": "array", + "size": 12582912, + "key": "execution-values/workspace/workflow/execution/lv_abc123DEF456.json", + "executionId": "exec_xyz", + "preview": { "length": 25000 } +} +``` + +The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. + ### Asynchronous For long-running workflows, async mode returns a job ID immediately so you don't need to hold the connection open. Add the `X-Execution-Mode: async` header to your request. The API returns HTTP 202 with a job ID and status URL. Poll the status URL until the job completes. diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index b0f0a0b1d4d..cd1f12e1894 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -36,6 +36,7 @@ import { registerManualExecutionAborter, unregisterManualExecutionAborter, } from '@/lib/execution/manual-cancellation' +import { compactBlockLogs, compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { preprocessExecution } from '@/lib/execution/preprocessing' import { LoggingSession } from '@/lib/logs/execution/logging-session' import { @@ -65,7 +66,7 @@ import type { IterationContext, SerializableExecutionState, } from '@/executor/execution/types' -import type { NormalizedBlockOutput, StreamingExecution } from '@/executor/types' +import type { BlockLog, NormalizedBlockOutput, StreamingExecution } from '@/executor/types' import { getExecutionErrorStatus, hasExecutionResult } from '@/executor/utils/errors' import { Serializer } from '@/serializer' import { CORE_TRIGGER_TYPES, type CoreTriggerType } from '@/stores/logs/filters/types' @@ -75,6 +76,20 @@ const logger = createLogger('WorkflowExecuteAPI') export const runtime = 'nodejs' export const dynamic = 'force-dynamic' +async function compactRoutePayload( + value: T, + context: { + workspaceId?: string + workflowId?: string + executionId?: string + userId?: string + preserveUserFileBase64?: boolean + preserveRoot?: boolean + } +): Promise { + return compactExecutionPayload(value, { ...context, requireDurable: true }) +} + function resolveOutputIds( selectedOutputs: string[] | undefined, blocks: Record @@ -719,6 +734,14 @@ async function handleExecutePost( }) await handlePostExecutionPauseState({ result, workflowId, executionId, loggingSession }) + const compactResultOutput = await compactRoutePayload(result.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) if ( result.status === 'cancelled' && @@ -734,7 +757,7 @@ async function handleExecutePost( return NextResponse.json( { success: false, - output: result.output, + output: compactResultOutput, error: timeoutErrorMessage, metadata: result.metadata ? { @@ -756,16 +779,23 @@ async function handleExecutePost( })) as NormalizedBlockOutput) : result.output - const resultWithBase64 = { ...result, output: outputWithBase64 } - - if (auth.authType !== AuthType.INTERNAL_JWT && workflowHasResponseBlock(resultWithBase64)) { - return createHttpResponseFromBlock(resultWithBase64) + if (auth.authType !== AuthType.INTERNAL_JWT && workflowHasResponseBlock(result)) { + return createHttpResponseFromBlock({ ...result, output: outputWithBase64 }) } + const compactOutput = await compactRoutePayload(outputWithBase64, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) + const filteredResult = { success: result.success, executionId, - output: outputWithBase64, + output: compactOutput, error: result.error, metadata: result.metadata ? { @@ -784,11 +814,21 @@ async function handleExecutePost( const executionResult = hasExecutionResult(error) ? error.executionResult : undefined const status = getExecutionErrorStatus(error) + const compactErrorOutput = executionResult?.output + ? await compactRoutePayload(executionResult.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) + : undefined return NextResponse.json( { success: false, - output: executionResult?.output, + output: compactErrorOutput, error: executionResult?.error || errorMessage || 'Execution failed', metadata: executionResult?.metadata ? { @@ -838,6 +878,9 @@ async function handleExecutePost( timeoutMs: preprocessResult.executionTimeout?.sync, }, executionId, + workspaceId, + workflowId, + userId: actorUserId, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( streamWorkflow, @@ -872,7 +915,12 @@ async function handleExecutePost( let isStreamClosed = false let isManualAbortRegistered = false - const eventWriter = createExecutionEventWriter(executionId) + const eventWriter = createExecutionEventWriter(executionId, { + workspaceId, + workflowId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + }) const metaInitialized = await initializeExecutionStreamMeta(executionId, { userId: actorUserId, workflowId, @@ -898,16 +946,18 @@ async function handleExecutePost( terminalStatus?: TerminalExecutionStreamStatus ) => { const isBuffered = event.type !== 'stream:chunk' && event.type !== 'stream:done' + let eventToSend = event if (isBuffered) { const entry = terminalStatus ? await eventWriter.writeTerminal(event, terminalStatus) : await eventWriter.write(event) - event.eventId = entry.eventId + eventToSend = entry.event + eventToSend.eventId = entry.eventId terminalEventPublished ||= Boolean(terminalStatus) } if (!isStreamClosed) { try { - controller.enqueue(encodeSSEEvent(event)) + controller.enqueue(encodeSSEEvent(eventToSend)) } catch { isStreamClosed = true } @@ -971,7 +1021,26 @@ async function handleExecutePost( iterationContext?: IterationContext, childWorkflowContext?: ChildWorkflowContext ) => { - const hasError = callbackData.output?.error + const compactCallbackData = { + ...callbackData, + input: await compactRoutePayload(callbackData.input, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }), + output: await compactRoutePayload(callbackData.output, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }), + } + const hasError = compactCallbackData.output?.error const childWorkflowData = childWorkflowContext ? { childWorkflowBlockId: childWorkflowContext.parentBlockId, @@ -988,7 +1057,7 @@ async function handleExecutePost( blockId, blockName, blockType, - error: callbackData.output.error, + error: compactCallbackData.output.error, }) await sendEvent({ type: 'block:error', @@ -999,12 +1068,12 @@ async function handleExecutePost( blockId, blockName, blockType, - input: callbackData.input, - error: callbackData.output.error, - durationMs: callbackData.executionTime || 0, - startedAt: callbackData.startedAt, - executionOrder: callbackData.executionOrder, - endedAt: callbackData.endedAt, + input: compactCallbackData.input, + error: compactCallbackData.output.error, + durationMs: compactCallbackData.executionTime || 0, + startedAt: compactCallbackData.startedAt, + executionOrder: compactCallbackData.executionOrder, + endedAt: compactCallbackData.endedAt, ...(iterationContext && { iterationCurrent: iterationContext.iterationCurrent, iterationTotal: iterationContext.iterationTotal, @@ -1033,12 +1102,12 @@ async function handleExecutePost( blockId, blockName, blockType, - input: callbackData.input, - output: callbackData.output, - durationMs: callbackData.executionTime || 0, - startedAt: callbackData.startedAt, - executionOrder: callbackData.executionOrder, - endedAt: callbackData.endedAt, + input: compactCallbackData.input, + output: compactCallbackData.output, + durationMs: compactCallbackData.executionTime || 0, + startedAt: compactCallbackData.startedAt, + executionOrder: compactCallbackData.executionOrder, + endedAt: compactCallbackData.endedAt, ...(iterationContext && { iterationCurrent: iterationContext.iterationCurrent, iterationTotal: iterationContext.iterationTotal, @@ -1171,6 +1240,13 @@ async function handleExecutePost( }) await handlePostExecutionPauseState({ result, workflowId, executionId, loggingSession }) + const compactTerminalLogs = await compactBlockLogs(result.logs, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + requireDurable: true, + }) if (result.status === 'cancelled') { if (timeoutController.isTimedOut() && timeoutController.timeoutMs) { @@ -1191,7 +1267,7 @@ async function handleExecutePost( data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactTerminalLogs, }, }, 'error' @@ -1208,7 +1284,7 @@ async function handleExecutePost( workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactTerminalLogs, }, }, 'cancelled' @@ -1224,6 +1300,21 @@ async function handleExecutePost( maxBytes: base64MaxBytes, }) : result.output + const compactSseOutput = await compactRoutePayload(sseOutput, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + preserveUserFileBase64: true, + preserveRoot: true, + }) + const compactFinalBlockLogs = await compactBlockLogs(result.logs, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + requireDurable: true, + }) if (result.status === 'paused') { finalMetaStatus = 'complete' @@ -1234,11 +1325,11 @@ async function handleExecutePost( executionId, workflowId, data: { - output: sseOutput, + output: compactSseOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactFinalBlockLogs, }, }, 'complete' @@ -1253,11 +1344,11 @@ async function handleExecutePost( workflowId, data: { success: result.success, - output: sseOutput, + output: compactSseOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactFinalBlockLogs, }, }, 'complete' @@ -1274,6 +1365,22 @@ async function handleExecutePost( reqLogger.error(`SSE execution failed: ${errorMessage}`, { isTimeout }) const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + let compactErrorLogs: BlockLog[] | undefined + try { + compactErrorLogs = executionResult?.logs + ? await compactBlockLogs(executionResult.logs, { + workspaceId, + workflowId, + executionId, + userId: actorUserId, + requireDurable: true, + }) + : undefined + } catch (compactionError) { + reqLogger.warn('Failed to compact SSE error logs, omitting oversized error details', { + error: toError(compactionError).message, + }) + } finalMetaStatus = 'error' await sendEvent( @@ -1285,7 +1392,7 @@ async function handleExecutePost( data: { error: executionResult?.error || errorMessage, duration: executionResult?.metadata?.duration || 0, - finalBlockLogs: executionResult?.logs, + finalBlockLogs: compactErrorLogs, }, }, 'error' diff --git a/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts b/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts index 841b92c36fd..d7214b14962 100644 --- a/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts +++ b/apps/sim/app/api/workflows/[id]/executions/[executionId]/cancel/route.ts @@ -51,14 +51,19 @@ async function completePausedCancellationWithRetry(executionId: string): Promise async function ensurePausedCancellationEventPublished( executionId: string, - workflowId: string + workflowId: string, + context: { workspaceId?: string; userId?: string } = {} ): Promise { const metaState = await readExecutionMetaState(executionId) if (metaState.status === 'found' && metaState.meta.status === 'cancelled') { return true } - const writer = createExecutionEventWriter(executionId) + const writer = createExecutionEventWriter(executionId, { + workspaceId: context.workspaceId, + workflowId, + userId: context.userId, + }) try { await writer.writeTerminal( { @@ -184,7 +189,11 @@ export const POST = withRouteHandler( if (pausedCancellationStarted) { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished if (pausedCancellationPublished) { @@ -194,14 +203,22 @@ export const POST = withRouteHandler( if (pendingPausedCancellation === 'cancelled') { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished pausedCancelled = pausedCancellationPublished } else if (pendingPausedCancellation === 'cancelling') { pausedCancellationPublished = await ensurePausedCancellationEventPublished( executionId, - workflowId + workflowId, + { + workspaceId: workflowAuthorization.workflow?.workspaceId ?? undefined, + userId: auth.userId, + } ) pausedCancellationPublishFailed = !pausedCancellationPublished if (pausedCancellationPublished) { diff --git a/apps/sim/background/webhook-execution.ts b/apps/sim/background/webhook-execution.ts index bfd515695a3..1753813d849 100644 --- a/apps/sim/background/webhook-execution.ts +++ b/apps/sim/background/webhook-execution.ts @@ -578,7 +578,7 @@ async function executeWebhookJobInternal( snapshot, callbacks: {}, loggingSession, - includeFileBase64: true, + includeFileBase64: false, base64MaxBytes: undefined, abortSignal: timeoutController.signal, }) diff --git a/apps/sim/executor/execution/block-executor.ts b/apps/sim/executor/execution/block-executor.ts index f1506bb33fc..02b3ab58d22 100644 --- a/apps/sim/executor/execution/block-executor.ts +++ b/apps/sim/executor/execution/block-executor.ts @@ -3,6 +3,7 @@ import { toError } from '@sim/utils/errors' import { redactApiKeys } from '@/lib/core/security/redaction' import { normalizeStringArray } from '@/lib/core/utils/arrays' import { getBaseUrl } from '@/lib/core/utils/urls' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { containsUserFileWithMetadata, hydrateUserFilesWithBase64, @@ -189,7 +190,7 @@ export class BlockExecutor { normalizedOutput = this.normalizeOutput(output) } - if (containsUserFileWithMetadata(normalizedOutput)) { + if (ctx.includeFileBase64 === true && containsUserFileWithMetadata(normalizedOutput)) { normalizedOutput = (await hydrateUserFilesWithBase64(normalizedOutput, { requestId: ctx.metadata.requestId, executionId: ctx.executionId, @@ -197,6 +198,15 @@ export class BlockExecutor { })) as NormalizedBlockOutput } + normalizedOutput = (await compactExecutionPayload(normalizedOutput, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + preserveUserFileBase64: ctx.includeFileBase64 === true, + requireDurable: true, + })) as NormalizedBlockOutput + const endedAt = new Date().toISOString() const duration = performance.now() - startTime diff --git a/apps/sim/executor/orchestrators/loop.ts b/apps/sim/executor/orchestrators/loop.ts index 1c089ac3cb3..cbf30bfa18b 100644 --- a/apps/sim/executor/orchestrators/loop.ts +++ b/apps/sim/executor/orchestrators/loop.ts @@ -3,6 +3,7 @@ import { toError } from '@sim/utils/errors' import { generateRequestId } from '@/lib/core/utils/request' import { isExecutionCancelled, isRedisCancellationEnabled } from '@/lib/execution/cancellation' import { executeInIsolatedVM } from '@/lib/execution/isolated-vm' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { buildLoopIndexCondition, DEFAULTS, EDGE, PARALLEL } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { EdgeManager } from '@/executor/execution/edge-manager' @@ -36,7 +37,8 @@ export interface LoopContinuationResult { shouldContinue: boolean shouldExit: boolean selectedRoute: LoopRoute - aggregatedResults?: NormalizedBlockOutput[][] + aggregatedResults?: unknown + totalIterations?: number } export class LoopOrchestrator { @@ -313,8 +315,19 @@ export class LoopOrchestrator { scope: LoopScope ): Promise { const results = scope.allIterationOutputs - const output = { results } + const totalIterations = results.length + const output = (await compactExecutionPayload( + { results }, + { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + } + )) as { results: unknown } this.state.setBlockOutput(loopId, output, DEFAULTS.EXECUTION_TIME) + scope.allIterationOutputs = [] await emitSubflowSuccessEvents(ctx, loopId, 'loop', output, this.contextExtensions) @@ -322,7 +335,8 @@ export class LoopOrchestrator { shouldContinue: false, shouldExit: true, selectedRoute: EDGE.LOOP_EXIT, - aggregatedResults: results, + aggregatedResults: output.results, + totalIterations, } } diff --git a/apps/sim/executor/orchestrators/node.ts b/apps/sim/executor/orchestrators/node.ts index 9844e93fb57..be10a69751b 100644 --- a/apps/sim/executor/orchestrators/node.ts +++ b/apps/sim/executor/orchestrators/node.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import { EDGE } from '@/executor/constants' import type { DAG, DAGNode } from '@/executor/dag/builder' import type { BlockExecutor } from '@/executor/execution/block-executor' @@ -10,6 +11,20 @@ import { extractBaseBlockId } from '@/executor/utils/subflow-utils' const logger = createLogger('NodeExecutionOrchestrator') +function getResultCount(value: unknown): number { + if (isLargeValueRef(value)) { + const preview = value.preview + if ( + preview && + typeof preview === 'object' && + typeof (preview as Record).length === 'number' + ) { + return (preview as { length: number }).length + } + } + return Array.isArray(value) ? value.length : 0 +} + export interface NodeExecutionResult { nodeId: string output: NormalizedBlockOutput @@ -130,7 +145,9 @@ export class NodeExecutionOrchestrator { shouldContinue: false, shouldExit: true, selectedRoute: continuationResult.selectedRoute, - totalIterations: continuationResult.aggregatedResults?.length || 0, + totalIterations: + continuationResult.totalIterations ?? + getResultCount(continuationResult.aggregatedResults), } } diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index 7cc10abbee7..741d69d002e 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { DEFAULTS } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { ParallelScope } from '@/executor/execution/state' @@ -29,7 +30,7 @@ export interface ParallelBranchMetadata { export interface ParallelAggregationResult { allBranchesComplete: boolean - results?: NormalizedBlockOutput[][] + results?: unknown completedBranches?: number totalBranches?: number } @@ -312,14 +313,24 @@ export class ParallelOrchestrator { } results.push(branchOutputs ?? []) } - const output = { results } + const output = (await compactExecutionPayload( + { results }, + { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + } + )) as { results: unknown } this.state.setBlockOutput(parallelId, output) + scope.branchOutputs = new Map() await emitSubflowSuccessEvents(ctx, parallelId, 'parallel', output, this.contextExtensions) return { allBranchesComplete: true, - results, + results: output.results, completedBranches: scope.totalBranches, totalBranches: scope.totalBranches, } diff --git a/apps/sim/executor/utils/output-filter.ts b/apps/sim/executor/utils/output-filter.ts index 5da00faba53..95c3cab5397 100644 --- a/apps/sim/executor/utils/output-filter.ts +++ b/apps/sim/executor/utils/output-filter.ts @@ -1,3 +1,4 @@ +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import { filterHiddenOutputKeys } from '@/lib/logs/execution/trace-spans/trace-spans' import { getBlock } from '@/blocks' import { isHiddenFromDisplay } from '@/blocks/types' @@ -27,6 +28,9 @@ export function filterOutputForLog( if (typeof output !== 'object' || output === null || Array.isArray(output)) { return output as NormalizedBlockOutput } + if (isLargeValueRef(output)) { + return output as NormalizedBlockOutput + } const blockConfig = blockType ? getBlock(blockType) : undefined const filtered: NormalizedBlockOutput = {} const additionalHiddenKeys = options?.additionalHiddenKeys ?? [] diff --git a/apps/sim/executor/utils/subflow-utils.ts b/apps/sim/executor/utils/subflow-utils.ts index 01765360787..282c75ea74c 100644 --- a/apps/sim/executor/utils/subflow-utils.ts +++ b/apps/sim/executor/utils/subflow-utils.ts @@ -408,7 +408,7 @@ export async function emitSubflowSuccessEvents( ctx: ExecutionContext, blockId: string, blockType: 'loop' | 'parallel', - output: { results: any[] }, + output: { results: unknown }, contextExtensions: ContextExtensions | null ): Promise { const now = new Date().toISOString() diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index 8f255269661..8ac37e6cf25 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -84,6 +84,48 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) + it('resolves named loop result bracket paths in function code', () => { + const loopBlock = createBlock('loop-1', 'Loop 1', 'loop') + const functionBlock = createBlock('function', 'Function', BlockType.FUNCTION, { + language: 'javascript', + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [loopBlock, functionBlock], + connections: [], + loops: { 'loop-1': { nodes: ['producer'] } }, + parallels: {}, + } + const state = new ExecutionState() + state.setBlockOutput('loop-1', { + results: [[{ id: 'a' }], [{ id: 'b' }]], + }) + const ctx = { + blockStates: state.getBlockStates(), + blockLogs: [], + environmentVariables: {}, + workflowVariables: {}, + decisions: { router: new Map(), condition: new Map() }, + loopExecutions: new Map(), + executedBlocks: new Set(), + activeExecutionPath: new Set(), + completedLoops: new Set(), + metadata: {}, + } as ExecutionContext + const resolver = new VariableResolver(workflow, {}, state) + + const result = resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return ' }, + functionBlock + ) + + expect(result.resolvedInputs.code).toBe('return globalThis["__blockRef_0"]') + expect(result.displayInputs.code).toBe('return "b"') + expect(result.contextVariables).toEqual({ __blockRef_0: 'b' }) + }) + it('resolves Python block references through globals lookup', () => { const { block, ctx, resolver } = createResolver('python') diff --git a/apps/sim/executor/variables/resolvers/block.test.ts b/apps/sim/executor/variables/resolvers/block.test.ts index 5b9ed37fc3b..1092e34f331 100644 --- a/apps/sim/executor/variables/resolvers/block.test.ts +++ b/apps/sim/executor/variables/resolvers/block.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it, vi } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { ExecutionState } from '@/executor/execution/state' import { BlockResolver } from './block' import { RESOLVED_EMPTY, type ResolutionContext } from './reference' @@ -247,6 +248,40 @@ describe('BlockResolver', () => { expect(resolver.resolve('', ctx)).toBe('alice@test.com') }) + it('should resolve nested scalar paths inside compacted block references', async () => { + const workflow = createTestWorkflow([{ id: 'source' }]) + const resolver = new BlockResolver(workflow) + const compacted = await compactExecutionPayload( + { + user: { profile: { name: 'Alice' } }, + items: Array.from({ length: 100 }, (_, index) => ({ id: index })), + }, + { thresholdBytes: 64 } + ) + const ctx = createTestContext('current', { source: compacted }) + + expect(resolver.resolve('', ctx)).toBe('Alice') + expect(resolver.resolve('', ctx)).toBe(1) + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + }) + + it('should reject full container references that contain compacted children', async () => { + const workflow = createTestWorkflow([{ id: 'source' }]) + const resolver = new BlockResolver(workflow) + const compacted = await compactExecutionPayload( + { + metadata: { id: 'event-1' }, + attachment: { body: 'x'.repeat(2048) }, + }, + { thresholdBytes: 256, preserveRoot: true } + ) + const ctx = createTestContext('current', { source: compacted }) + + expect(resolver.resolve('', ctx)).toBe('event-1') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') + }) + it.concurrent('should resolve array index in path', () => { const workflow = createTestWorkflow([{ id: 'source' }]) const resolver = new BlockResolver(workflow) diff --git a/apps/sim/executor/variables/resolvers/block.ts b/apps/sim/executor/variables/resolvers/block.ts index e1a5be03f7a..b1fcb0bbf7f 100644 --- a/apps/sim/executor/variables/resolvers/block.ts +++ b/apps/sim/executor/variables/resolvers/block.ts @@ -1,3 +1,4 @@ +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, @@ -82,6 +83,7 @@ export class BlockResolver implements Resolver { })! if (result.value !== undefined) { + assertNoLargeValueRefs(result.value) return result.value } diff --git a/apps/sim/executor/variables/resolvers/loop.test.ts b/apps/sim/executor/variables/resolvers/loop.test.ts index 3d3b643b516..3da6e189d0e 100644 --- a/apps/sim/executor/variables/resolvers/loop.test.ts +++ b/apps/sim/executor/variables/resolvers/loop.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import type { LoopScope } from '@/executor/execution/state' import { InvalidFieldError } from '@/executor/utils/block-reference' import { LoopResolver } from './loop' @@ -232,6 +233,9 @@ describe('LoopResolver', () => { const ctx = createTestContext('block-1', loopScope) expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index' + ) }) it.concurrent('should handle iteration index 0 correctly', () => { @@ -361,7 +365,7 @@ describe('LoopResolver', () => { expect(resolver.resolve('', ctx)).toBe(4) }) - it.concurrent('should return undefined for index when block is outside the loop', () => { + it.concurrent('should throw for contextual fields when block is outside the loop', () => { const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ { id: 'loop-1', name: 'Loop 1' }, ]) @@ -370,7 +374,8 @@ describe('LoopResolver', () => { const loopExecutions = new Map([['loop-1', loopScope]]) const ctx = createTestContext('block-outside', undefined, loopExecutions) - expect(resolver.resolve('', ctx)).toBeUndefined() + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should resolve result from anywhere after loop completes', () => { @@ -399,6 +404,25 @@ describe('LoopResolver', () => { expect(resolver.resolve('', ctx)).toEqual([{ response: 'a' }]) expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + }) + + it('should resolve nested paths inside compacted result references', async () => { + const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ + { id: 'loop-1', name: 'Loop 1' }, + ]) + const resolver = new LoopResolver(workflow) + const compacted = await compactExecutionPayload( + { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, + { thresholdBytes: 256 } + ) + const ctx = createTestContext('block-outside', undefined, new Map(), { + 'loop-1': compacted, + }) + + expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') }) it.concurrent('should resolve forEach properties via named reference', () => { @@ -427,6 +451,20 @@ describe('LoopResolver', () => { const ctx = createTestContext('block-1', undefined, loopExecutions) expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: index') + }) + + it.concurrent('should list only results for unknown fields outside a named loop', () => { + const workflow = createTestWorkflow({ 'loop-1': { nodes: ['block-1'] } }, [ + { id: 'loop-1', name: 'Loop 1' }, + ]) + const resolver = new LoopResolver(workflow) + const loopScope = createLoopScope({ iteration: 0 }) + const loopExecutions = new Map([['loop-1', loopScope]]) + const ctx = createTestContext('block-outside', undefined, loopExecutions) + + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should not resolve named ref when no matching block exists', () => { diff --git a/apps/sim/executor/variables/resolvers/loop.ts b/apps/sim/executor/variables/resolvers/loop.ts index 8df57668825..db38e05abbe 100644 --- a/apps/sim/executor/variables/resolvers/loop.ts +++ b/apps/sim/executor/variables/resolvers/loop.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, parseReferencePath, REFERENCE } from '@/executor/constants' import { InvalidFieldError } from '@/executor/utils/block-reference' import { @@ -10,10 +11,14 @@ import { navigatePath, type ResolutionContext, type Resolver, + splitLeadingBracketPath, } from '@/executor/variables/resolvers/reference' import type { SerializedWorkflow } from '@/serializer/types' const logger = createLogger('LoopResolver') +const LOOP_OUTPUT_FIELDS = ['results'] as const +const LOOP_CONTEXT_FIELDS = ['index'] as const +const FOR_EACH_LOOP_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as const export class LoopResolver implements Resolver { private loopNameToId: Map @@ -76,34 +81,30 @@ export class LoopResolver implements Resolver { } if (rest.length > 0) { - const property = rest[0] + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rest[0]) if (LoopResolver.OUTPUT_PROPERTIES.has(property)) { if (!targetLoopId) { return undefined } - return this.resolveOutput(targetLoopId, rest.slice(1), context) + return this.resolveOutput(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) } + const isContextual = + isGenericRef || + (targetLoopId !== undefined && + this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId)) + if (!LoopResolver.KNOWN_PROPERTIES.has(property)) { - const isForEach = targetLoopId - ? this.isForEachLoop(targetLoopId) - : context.loopScope?.items !== undefined - const availableFields = isForEach - ? ['index', 'currentItem', 'items', 'result'] - : ['index', 'result'] - throw new InvalidFieldError(firstPart, property, availableFields) + throw new InvalidFieldError( + firstPart, + rest[0], + this.getAvailableFields(targetLoopId, context) + ) } - if (!isGenericRef && targetLoopId) { - if (!this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId)) { - logger.warn('Block is not inside the referenced loop', { - reference, - blockId: context.currentNodeId, - loopId: targetLoopId, - }) - return undefined - } + if (!isContextual) { + throw new InvalidFieldError(firstPart, rest[0], [...LOOP_OUTPUT_FIELDS]) } } @@ -130,7 +131,9 @@ export class LoopResolver implements Resolver { return obj } - const [property, ...pathParts] = rest + const [rawProperty, ...remainingPathParts] = rest + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rawProperty) + const pathParts = [...bracketPathParts, ...remainingPathParts] let value: any switch (property) { @@ -163,6 +166,7 @@ export class LoopResolver implements Resolver { if (pathParts.length > 0) { return navigatePath(value, pathParts) } + assertNoLargeValueRefs(value) return value } @@ -234,4 +238,22 @@ export class LoopResolver implements Resolver { const loopConfig = this.workflow.loops?.[originalId] return loopConfig?.loopType === 'forEach' } + + private getAvailableFields( + targetLoopId: string | undefined, + context: ResolutionContext + ): string[] { + const isContextual = + targetLoopId === undefined || + this.isBlockInLoopOrDescendant(context.currentNodeId, targetLoopId) + + if (!isContextual) { + return [...LOOP_OUTPUT_FIELDS] + } + + const isForEach = targetLoopId + ? this.isForEachLoop(targetLoopId) + : context.loopScope?.items !== undefined + return isForEach ? [...FOR_EACH_LOOP_CONTEXT_FIELDS] : [...LOOP_CONTEXT_FIELDS] + } } diff --git a/apps/sim/executor/variables/resolvers/parallel.test.ts b/apps/sim/executor/variables/resolvers/parallel.test.ts index cec6294f391..e509249fec4 100644 --- a/apps/sim/executor/variables/resolvers/parallel.test.ts +++ b/apps/sim/executor/variables/resolvers/parallel.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from 'vitest' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { InvalidFieldError } from '@/executor/utils/block-reference' import { ParallelResolver } from './parallel' import type { ResolutionContext } from './reference' @@ -313,6 +314,9 @@ describe('ParallelResolver', () => { const ctx = createTestContext('block-1₍0₎') expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index' + ) }) it.concurrent('should return undefined when block is not in any parallel', () => { @@ -428,6 +432,26 @@ describe('ParallelResolver', () => { expect(resolver.resolve('', ctx)).toEqual([{ response: 'a' }]) expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + }) + + it('should resolve nested paths inside compacted result references', async () => { + const workflow = createTestWorkflow( + { 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b'] } }, + [{ id: 'parallel-1', name: 'Parallel 1' }] + ) + const resolver = new ParallelResolver(workflow) + const compacted = await compactExecutionPayload( + { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, + { thresholdBytes: 256 } + ) + const ctx = createTestContext('block-outside', new Map(), { + 'parallel-1': compacted, + }) + + expect(resolver.resolve('', ctx)).toBe('b') + expect(resolver.resolve('', ctx)).toBe('b') + expect(() => resolver.resolve('', ctx)).toThrow('too large to inline') }) it.concurrent('should resolve result with empty currentNodeId', () => { @@ -489,6 +513,29 @@ describe('ParallelResolver', () => { const ctx = createTestContext('block-1₍0₎') expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow( + 'Available fields: index, currentItem, items' + ) + }) + + it.concurrent('should list only results for contextual fields outside a named parallel', () => { + const workflow = createTestWorkflow( + { + 'parallel-1': { + nodes: ['block-1'], + distribution: ['a'], + parallelType: 'collection', + }, + }, + [{ id: 'parallel-1', name: 'Parallel 1' }] + ) + const resolver = new ParallelResolver(workflow) + const ctx = createTestContext('block-outside', new Map()) + + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') + expect(() => resolver.resolve('', ctx)).toThrow(InvalidFieldError) + expect(() => resolver.resolve('', ctx)).toThrow('Available fields: results') }) it.concurrent('should not resolve named ref when no matching block exists', () => { diff --git a/apps/sim/executor/variables/resolvers/parallel.ts b/apps/sim/executor/variables/resolvers/parallel.ts index 7afeedece97..712d72b244f 100644 --- a/apps/sim/executor/variables/resolvers/parallel.ts +++ b/apps/sim/executor/variables/resolvers/parallel.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { assertNoLargeValueRefs } from '@/lib/execution/payloads/large-value-ref' import { isReference, normalizeName, parseReferencePath, REFERENCE } from '@/executor/constants' import { InvalidFieldError } from '@/executor/utils/block-reference' import { @@ -11,10 +12,14 @@ import { navigatePath, type ResolutionContext, type Resolver, + splitLeadingBracketPath, } from '@/executor/variables/resolvers/reference' import type { SerializedParallel, SerializedWorkflow } from '@/serializer/types' const logger = createLogger('ParallelResolver') +const PARALLEL_OUTPUT_FIELDS = ['results'] as const +const PARALLEL_CONTEXT_FIELDS = ['index'] as const +const COLLECTION_PARALLEL_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as const export class ParallelResolver implements Resolver { private parallelNameToId: Map @@ -74,8 +79,15 @@ export class ParallelResolver implements Resolver { ) } - if (rest.length > 0 && ParallelResolver.OUTPUT_PROPERTIES.has(rest[0])) { - return this.resolveOutput(targetParallelId, rest.slice(1), context) + if (rest.length > 0) { + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rest[0]) + if (ParallelResolver.OUTPUT_PROPERTIES.has(property)) { + return this.resolveOutput( + targetParallelId, + [...bracketPathParts, ...rest.slice(1)], + context + ) + } } // Look up config using the original (non-cloned) ID @@ -86,15 +98,11 @@ export class ParallelResolver implements Resolver { return undefined } - if (!isGenericRef) { - if (!this.isBlockInParallelOrDescendant(context.currentNodeId, originalParallelId)) { - logger.warn('Block is not inside the referenced parallel', { - reference, - blockId: context.currentNodeId, - parallelId: targetParallelId, - }) - return undefined - } + const isContextual = + isGenericRef || this.isBlockInParallelOrDescendant(context.currentNodeId, originalParallelId) + + if (rest.length > 0 && !isContextual) { + throw new InvalidFieldError(firstPart, rest[0], [...PARALLEL_OUTPUT_FIELDS]) } const branchIndex = extractBranchIndex(context.currentNodeId) @@ -116,15 +124,12 @@ export class ParallelResolver implements Resolver { return result } - const property = rest[0] - const pathParts = rest.slice(1) + const [rawProperty, ...remainingPathParts] = rest + const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rawProperty) + const pathParts = [...bracketPathParts, ...remainingPathParts] if (!ParallelResolver.KNOWN_PROPERTIES.has(property)) { - const isCollection = parallelConfig.parallelType === 'collection' - const availableFields = isCollection - ? ['index', 'currentItem', 'items', 'result'] - : ['index', 'result'] - throw new InvalidFieldError(firstPart, property, availableFields) + throw new InvalidFieldError(firstPart, rawProperty, this.getAvailableFields(parallelConfig)) } let value: unknown @@ -236,6 +241,7 @@ export class ParallelResolver implements Resolver { if (pathParts.length > 0) { return navigatePath(value, pathParts) } + assertNoLargeValueRefs(value) return value } @@ -278,4 +284,10 @@ export class ParallelResolver implements Resolver { return [] } + + private getAvailableFields(parallelConfig: SerializedParallel): string[] { + return parallelConfig.parallelType === 'collection' + ? [...COLLECTION_PARALLEL_CONTEXT_FIELDS] + : [...PARALLEL_CONTEXT_FIELDS] + } } diff --git a/apps/sim/executor/variables/resolvers/reference.ts b/apps/sim/executor/variables/resolvers/reference.ts index 35d32272739..389ecaaea83 100644 --- a/apps/sim/executor/variables/resolvers/reference.ts +++ b/apps/sim/executor/variables/resolvers/reference.ts @@ -1,3 +1,5 @@ +import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' +import { assertNoLargeValueRefs, isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import type { ExecutionState, LoopScope } from '@/executor/execution/state' import type { ExecutionContext } from '@/executor/types' export interface ResolutionContext { @@ -20,6 +22,19 @@ export interface Resolver { */ export const RESOLVED_EMPTY = Symbol('RESOLVED_EMPTY') +export function splitLeadingBracketPath(part: string): { property: string; pathParts: string[] } { + const bracketMatch = part.match(/^([^[]+)((?:\[\d+\])+)$/) + if (!bracketMatch) { + return { property: part, pathParts: [] } + } + + const indices = bracketMatch[2].match(/\[(\d+)\]/g) ?? [] + return { + property: bracketMatch[1], + pathParts: indices.map((indexMatch) => indexMatch.slice(1, -1)), + } +} + /** * Navigate through nested object properties using a path array. * Supports dot notation and array indices. @@ -31,6 +46,10 @@ export const RESOLVED_EMPTY = Symbol('RESOLVED_EMPTY') export function navigatePath(obj: any, path: string[]): any { let current = obj for (const part of path) { + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current) + } + if (current === null || current === undefined) { return undefined } @@ -42,6 +61,9 @@ export function navigatePath(obj: any, path: string[]): any { typeof current === 'object' && current !== null ? (current as Record)[prop] : undefined + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current) + } if (current === undefined || current === null) { return undefined } @@ -52,6 +74,9 @@ export function navigatePath(obj: any, path: string[]): any { if (current === null || current === undefined) { return undefined } + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current) + } const idx = Number.parseInt(indexMatch.slice(1, -1), 10) current = Array.isArray(current) ? current[idx] : undefined } @@ -66,5 +91,6 @@ export function navigatePath(obj: any, path: string[]): any { : undefined } } + assertNoLargeValueRefs(current) return current } diff --git a/apps/sim/lib/api/contracts/execution-payloads.ts b/apps/sim/lib/api/contracts/execution-payloads.ts new file mode 100644 index 00000000000..4eaac6358ab --- /dev/null +++ b/apps/sim/lib/api/contracts/execution-payloads.ts @@ -0,0 +1,21 @@ +import { z } from 'zod' +import { + LARGE_VALUE_KINDS, + LARGE_VALUE_REF_MARKER, + LARGE_VALUE_REF_VERSION, +} from '@/lib/execution/payloads/large-value-ref' + +export const largeValueRefSchema = z + .object({ + [LARGE_VALUE_REF_MARKER]: z.literal(true), + version: z.literal(LARGE_VALUE_REF_VERSION), + id: z.string().regex(/^lv_[A-Za-z0-9_-]{12}$/, 'Invalid large value reference ID'), + kind: z.enum(LARGE_VALUE_KINDS), + size: z.number().int().positive(), + key: z.string().optional(), + executionId: z.string().optional(), + preview: z.unknown().optional(), + }) + .strict() + +export type LargeValueRefResponse = z.output diff --git a/apps/sim/lib/api/contracts/index.ts b/apps/sim/lib/api/contracts/index.ts index 062c01a5156..c8a079637d2 100644 --- a/apps/sim/lib/api/contracts/index.ts +++ b/apps/sim/lib/api/contracts/index.ts @@ -11,6 +11,7 @@ export * from './credential-sets' export * from './credentials' export * from './demo-requests' export * from './environment' +export * from './execution-payloads' export * from './file-uploads' export * from './folders' export * from './hotspots' diff --git a/apps/sim/lib/core/utils/response-format.ts b/apps/sim/lib/core/utils/response-format.ts index 7223f17e0fa..7512cc50b59 100644 --- a/apps/sim/lib/core/utils/response-format.ts +++ b/apps/sim/lib/core/utils/response-format.ts @@ -1,4 +1,6 @@ import { createLogger } from '@sim/logger' +import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' const logger = createLogger('ResponseFormatUtils') @@ -196,6 +198,10 @@ function traverseObjectPathInternal(obj: any, path: string): any { const parts = path.split('.') for (const part of parts) { + if (isLargeValueRef(current)) { + current = materializeLargeValueRefSyncOrThrow(current) + } + if (current?.[part] !== undefined) { current = current[part] } else { @@ -203,6 +209,10 @@ function traverseObjectPathInternal(obj: any, path: string): any { } } + if (isLargeValueRef(current)) { + return current + } + return current } diff --git a/apps/sim/lib/execution/event-buffer.test.ts b/apps/sim/lib/execution/event-buffer.test.ts index 7e03ab8954d..d0b4024f3aa 100644 --- a/apps/sim/lib/execution/event-buffer.test.ts +++ b/apps/sim/lib/execution/event-buffer.test.ts @@ -287,6 +287,41 @@ describe('execution event buffer', () => { expect(mockRedis.hset).toHaveBeenCalledWith('meta', { status: 'complete' }) }) + it('preserves requested UserFile base64 when buffering terminal events', async () => { + mockRedis.incrby.mockResolvedValue(100) + const base64 = Buffer.from('hello').toString('base64') + const writer = createExecutionEventWriter('exec-1', { preserveUserFileBase64: true }) + + await writer.writeTerminal( + { + type: 'execution:completed', + timestamp: new Date().toISOString(), + executionId: 'exec-1', + workflowId: 'wf-1', + data: { + success: true, + duration: 1, + output: { + file: { + id: 'file-1', + name: 'small.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64, + }, + }, + }, + }, + 'complete' + ) + + const eventData = persistedEntries[0].event.data as { + output: { file: { base64?: string } } + } + expect(eventData.output.file.base64).toBe(base64) + }) + it('retries active meta initialization before giving up', async () => { mockRedis.hset.mockRejectedValueOnce(new Error('meta write failed')).mockResolvedValueOnce(1) diff --git a/apps/sim/lib/execution/event-buffer.ts b/apps/sim/lib/execution/event-buffer.ts index 02f5d750b18..914c095e55a 100644 --- a/apps/sim/lib/execution/event-buffer.ts +++ b/apps/sim/lib/execution/event-buffer.ts @@ -2,6 +2,9 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' import { env } from '@/lib/core/config/env' import { getRedisClient } from '@/lib/core/config/redis' +import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import type { LargeValueStoreContext } from '@/lib/execution/payloads/store' import type { ExecutionEvent } from '@/lib/workflows/executor/execution-events' const logger = createLogger('ExecutionEventBuffer') @@ -11,6 +14,7 @@ const TTL_SECONDS = 60 * 60 // 1 hour const EVENT_LIMIT = 1000 const RESERVE_BATCH = 100 const FLUSH_INTERVAL_MS = 15 +const FLUSH_MAX_RETRY_INTERVAL_MS = 1000 const FLUSH_MAX_BATCH = 200 const MAX_PENDING_EVENTS = 1000 const ACTIVE_META_ATTEMPTS = 3 @@ -53,6 +57,50 @@ function isExecutionStreamStatus(value: string | undefined): value is ExecutionS return value === 'active' || value === 'complete' || value === 'error' || value === 'cancelled' } +function getJsonSize(value: unknown): number | null { + try { + return Buffer.byteLength(JSON.stringify(value), 'utf8') + } catch { + return null + } +} + +function trimFinalBlockLogsForEventData(data: unknown): unknown { + if (!data || typeof data !== 'object' || Array.isArray(data)) return data + + const record = data as Record + const finalBlockLogs = record.finalBlockLogs + if (!Array.isArray(finalBlockLogs)) return data + const originalSize = getJsonSize(data) + if (originalSize !== null && originalSize <= LARGE_VALUE_THRESHOLD_BYTES) return data + + const total = finalBlockLogs.length + let logs = finalBlockLogs + let trimmed: Record = { + ...record, + finalBlockLogs: logs, + finalBlockLogsTruncated: true, + finalBlockLogsTotal: total, + } + + while (logs.length > 0) { + const size = getJsonSize(trimmed) + if (size !== null && size <= LARGE_VALUE_THRESHOLD_BYTES) { + return trimmed + } + + logs = logs.length === 1 ? [] : logs.slice(Math.ceil(logs.length / 2)) + trimmed = { + ...record, + finalBlockLogs: logs, + finalBlockLogsTruncated: true, + finalBlockLogsTotal: total, + } + } + + return trimmed +} + export interface ExecutionStreamMeta { status: ExecutionStreamStatus userId?: string @@ -97,6 +145,37 @@ export interface ExecutionEventWriter { close: () => Promise } +export interface ExecutionEventWriterContext extends LargeValueStoreContext { + requireDurablePayloads?: boolean + preserveUserFileBase64?: boolean +} + +async function compactEventForBuffer( + event: ExecutionEvent, + context: ExecutionEventWriterContext = {} +): Promise { + if (!('data' in event)) { + return event + } + + const compactedData = await compactExecutionPayload(event.data, { + ...context, + executionId: context.executionId ?? event.executionId, + requireDurable: context.requireDurablePayloads, + preserveUserFileBase64: context.preserveUserFileBase64, + preserveRoot: true, + }) + const eventData = trimFinalBlockLogsForEventData(compactedData) + const eventDataSize = getJsonSize(eventData) + if (eventDataSize !== null && eventDataSize > LARGE_VALUE_THRESHOLD_BYTES) { + throw new Error( + `Execution event data remains too large after compaction (${eventDataSize} bytes)` + ) + } + + return { ...event, data: eventData } as ExecutionEvent +} + const memoryExecutionStreams = new Map() function canUseMemoryEventBuffer(): boolean { @@ -169,13 +248,17 @@ function readMemoryEvents(executionId: string, afterEventId: number): ExecutionE } } -function createMemoryExecutionEventWriter(executionId: string): ExecutionEventWriter { +function createMemoryExecutionEventWriter( + executionId: string, + context: ExecutionEventWriterContext = {} +): ExecutionEventWriter { const writeMemoryEvent = async (event: ExecutionEvent) => { const stream = getMemoryStream(executionId) + const compactEvent = await compactEventForBuffer(event, context) const entry = { eventId: stream.nextEventId++, executionId, - event, + event: compactEvent, } stream.events.push(entry) if (stream.events.length > EVENT_LIMIT) { @@ -450,12 +533,15 @@ export async function readExecutionEventsState( } } -export function createExecutionEventWriter(executionId: string): ExecutionEventWriter { +export function createExecutionEventWriter( + executionId: string, + context: ExecutionEventWriterContext = {} +): ExecutionEventWriter { const redis = getRedisClient() if (!redis) { if (canUseMemoryEventBuffer()) { logger.info('createExecutionEventWriter: using in-memory event buffer', { executionId }) - return createMemoryExecutionEventWriter(executionId) + return createMemoryExecutionEventWriter(executionId, context) } logger.warn( 'createExecutionEventWriter: Redis client unavailable, events will not be buffered', @@ -477,13 +563,23 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW let nextEventId = 0 let maxReservedId = 0 let flushTimer: ReturnType | null = null + let consecutiveFlushFailures = 0 + + const getFlushDelayMs = () => { + if (consecutiveFlushFailures === 0) return FLUSH_INTERVAL_MS + const backoff = Math.min( + FLUSH_INTERVAL_MS * 2 ** Math.min(consecutiveFlushFailures, 6), + FLUSH_MAX_RETRY_INTERVAL_MS + ) + return backoff + Math.floor(Math.random() * FLUSH_INTERVAL_MS) + } - const scheduleFlush = () => { + const scheduleFlush = (delayMs = FLUSH_INTERVAL_MS) => { if (flushTimer) return flushTimer = setTimeout(() => { flushTimer = null void flushPending() - }, FLUSH_INTERVAL_MS) + }, delayMs) } const reserveIds = async (minCount: number) => { @@ -524,11 +620,14 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW terminalStatus ?? '', ...zaddArgs ) + consecutiveFlushFailures = 0 return true } catch (error) { + consecutiveFlushFailures += 1 logger.warn('Failed to flush execution events', { executionId, batchSize: batch.length, + consecutiveFailures: consecutiveFlushFailures, error: toError(error).message, stack: error instanceof Error ? error.stack : undefined, }) @@ -566,7 +665,7 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW flushPromise = null } if (!ok) { - if (scheduleOnFailure && pending.length > 0) scheduleFlush() + if (scheduleOnFailure && pending.length > 0) scheduleFlush(getFlushDelayMs()) return false } } @@ -577,7 +676,12 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW await reserveIds(1) } const eventId = nextEventId++ - const entry: ExecutionEventEntry = { eventId, executionId, event } + const compactEvent = await compactEventForBuffer(event, { + ...context, + executionId, + requireDurablePayloads: true, + }) + const entry: ExecutionEventEntry = { eventId, executionId, event: compactEvent } pending.push(entry) if (pending.length >= FLUSH_MAX_BATCH) { await flushPending() @@ -618,7 +722,12 @@ export function createExecutionEventWriter(executionId: string): ExecutionEventW await reserveIds(1) } const eventId = nextEventId++ - const entry: ExecutionEventEntry = { eventId, executionId, event } + const compactEvent = await compactEventForBuffer(event, { + ...context, + executionId, + requireDurablePayloads: true, + }) + const entry: ExecutionEventEntry = { eventId, executionId, event: compactEvent } pending.push(entry) const ok = await flushPending(false, status) if (!ok) { diff --git a/apps/sim/lib/execution/payloads/cache.ts b/apps/sim/lib/execution/payloads/cache.ts new file mode 100644 index 00000000000..0a25955cc98 --- /dev/null +++ b/apps/sim/lib/execution/payloads/cache.ts @@ -0,0 +1,85 @@ +import { + getLargeValueMaterializationError, + isLargeValueRef, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' + +const FALLBACK_TTL_MS = 15 * 60 * 1000 +const MAX_IN_MEMORY_BYTES = 256 * 1024 * 1024 + +const inMemoryValues = new Map() +let inMemoryBytes = 0 + +function cleanupExpiredValues(now = Date.now()): void { + for (const [id, entry] of inMemoryValues.entries()) { + if (entry.expiresAt <= now) { + inMemoryValues.delete(id) + inMemoryBytes -= entry.size + } + } +} + +export function cacheLargeValue(id: string, value: unknown, size: number): void { + if (size > MAX_IN_MEMORY_BYTES) { + return + } + + cleanupExpiredValues() + + while (inMemoryBytes + size > MAX_IN_MEMORY_BYTES && inMemoryValues.size > 0) { + const oldestId = inMemoryValues.keys().next().value + if (!oldestId) break + const oldest = inMemoryValues.get(oldestId) + inMemoryValues.delete(oldestId) + inMemoryBytes -= oldest?.size ?? 0 + } + + inMemoryValues.set(id, { + value, + size, + expiresAt: Date.now() + FALLBACK_TTL_MS, + }) + inMemoryBytes += size +} + +export function materializeLargeValueRefSync(ref: LargeValueRef): unknown { + cleanupExpiredValues() + return inMemoryValues.get(ref.id)?.value +} + +export function materializeLargeValueRefSyncOrThrow(ref: LargeValueRef): unknown { + const materialized = materializeLargeValueRefSync(ref) + if (materialized === undefined) { + throw getLargeValueMaterializationError(ref) + } + return materialized +} + +export function materializeLargeValueRefsSync( + value: unknown, + seen = new WeakSet() +): unknown { + if (isLargeValueRef(value)) { + return materializeLargeValueRefsSync(materializeLargeValueRefSyncOrThrow(value), seen) + } + + if (!value || typeof value !== 'object') { + return value + } + + if (seen.has(value)) { + return value + } + seen.add(value) + + if (Array.isArray(value)) { + return value.map((item) => materializeLargeValueRefsSync(item, seen)) + } + + return Object.fromEntries( + Object.entries(value).map(([key, entryValue]) => [ + key, + materializeLargeValueRefsSync(entryValue, seen), + ]) + ) +} diff --git a/apps/sim/lib/execution/payloads/hydration.ts b/apps/sim/lib/execution/payloads/hydration.ts new file mode 100644 index 00000000000..ab2f635373e --- /dev/null +++ b/apps/sim/lib/execution/payloads/hydration.ts @@ -0,0 +1,29 @@ +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { materializeLargeValueRef } from '@/lib/execution/payloads/store' + +export async function warmLargeValueRefs( + value: unknown, + seen = new WeakSet() +): Promise { + if (!value || typeof value !== 'object') { + return + } + + if (isLargeValueRef(value)) { + const materialized = await materializeLargeValueRef(value) + await warmLargeValueRefs(materialized, seen) + return + } + + if (seen.has(value)) { + return + } + seen.add(value) + + if (Array.isArray(value)) { + await Promise.all(value.map((item) => warmLargeValueRefs(item, seen))) + return + } + + await Promise.all(Object.values(value).map((entryValue) => warmLargeValueRefs(entryValue, seen))) +} diff --git a/apps/sim/lib/execution/payloads/large-value-ref.ts b/apps/sim/lib/execution/payloads/large-value-ref.ts new file mode 100644 index 00000000000..89eb4b599c8 --- /dev/null +++ b/apps/sim/lib/execution/payloads/large-value-ref.ts @@ -0,0 +1,86 @@ +export const LARGE_VALUE_REF_MARKER = '__simLargeValueRef' + +export const LARGE_VALUE_THRESHOLD_BYTES = 8 * 1024 * 1024 +export const LARGE_VALUE_REF_VERSION = 1 + +export const LARGE_VALUE_KINDS = ['array', 'object', 'string', 'json'] as const + +export type LargeValueKind = (typeof LARGE_VALUE_KINDS)[number] + +export interface LargeValueRef { + [LARGE_VALUE_REF_MARKER]: true + version: typeof LARGE_VALUE_REF_VERSION + id: string + kind: LargeValueKind + size: number + key?: string + executionId?: string + preview?: unknown +} + +const LARGE_VALUE_ID_PATTERN = /^lv_[A-Za-z0-9_-]{12}$/ +export function isLargeValueRef(value: unknown): value is LargeValueRef { + if (!value || typeof value !== 'object') return false + + const candidate = value as Record + const key = candidate.key + const executionId = candidate.executionId + + return ( + candidate[LARGE_VALUE_REF_MARKER] === true && + candidate.version === LARGE_VALUE_REF_VERSION && + typeof candidate.id === 'string' && + LARGE_VALUE_ID_PATTERN.test(candidate.id) && + typeof candidate.kind === 'string' && + (LARGE_VALUE_KINDS as readonly string[]).includes(candidate.kind) && + typeof candidate.size === 'number' && + Number.isFinite(candidate.size) && + candidate.size > 0 && + (key === undefined || typeof key === 'string') && + (executionId === undefined || typeof executionId === 'string') + ) +} + +export function containsLargeValueRef( + value: unknown, + seen = new WeakSet() +): LargeValueRef | null { + if (!value || typeof value !== 'object') return null + if (isLargeValueRef(value)) return value + if (seen.has(value)) return null + + seen.add(value) + + if (Array.isArray(value)) { + for (const item of value) { + const ref = containsLargeValueRef(item, seen) + if (ref) return ref + } + return null + } + + for (const entryValue of Object.values(value)) { + const ref = containsLargeValueRef(entryValue, seen) + if (ref) return ref + } + + return null +} + +export function getLargeValueMaterializationError(ref: LargeValueRef): Error { + return new Error( + `This execution value is too large to inline (${formatLargeValueSize(ref.size)}). Select a nested field or reduce the amount of data passed between blocks.` + ) +} + +function formatLargeValueSize(bytes: number): string { + const megabytes = bytes / (1024 * 1024) + return `${megabytes.toFixed(1)} MB` +} + +export function assertNoLargeValueRefs(value: unknown): void { + const ref = containsLargeValueRef(value) + if (ref) { + throw getLargeValueMaterializationError(ref) + } +} diff --git a/apps/sim/lib/execution/payloads/serializer.test.ts b/apps/sim/lib/execution/payloads/serializer.test.ts new file mode 100644 index 00000000000..c8a429ccc78 --- /dev/null +++ b/apps/sim/lib/execution/payloads/serializer.test.ts @@ -0,0 +1,103 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import type { UserFile } from '@/executor/types' +import { navigatePath } from '@/executor/variables/resolvers/reference' + +describe('compactExecutionPayload', () => { + it('keeps small JSON payloads inline', async () => { + const value = { result: { id: 'event-1', text: 'hello' } } + + await expect(compactExecutionPayload(value, { thresholdBytes: 1024 })).resolves.toEqual(value) + }) + + it('strips UserFile base64 by default while preserving metadata', async () => { + const file: UserFile = { + id: 'file-1', + name: 'large.txt', + url: 'https://example.com/file', + size: 11 * 1024 * 1024, + type: 'text/plain', + key: 'execution/workflow/execution/large.txt', + context: 'execution', + base64: 'Zm9v', + } + + const compacted = await compactExecutionPayload( + { event: { files: [file] } }, + { thresholdBytes: 1024 } + ) + + expect(compacted).toEqual({ + event: { + files: [ + { + id: 'file-1', + name: 'large.txt', + url: 'https://example.com/file', + size: 11 * 1024 * 1024, + type: 'text/plain', + key: 'execution/workflow/execution/large.txt', + context: 'execution', + }, + ], + }, + }) + }) + + it('stores oversized arrays as refs and allows nested path navigation in-process', async () => { + const results = Array.from({ length: 100 }, (_, index) => [{ event: { id: `event-${index}` } }]) + const compacted = await compactExecutionPayload({ results }, { thresholdBytes: 256 }) + + expect(isLargeValueRef(compacted.results)).toBe(true) + expect(navigatePath(compacted, ['results', '1', '0', 'event', 'id'])).toBe('event-1') + }) + + it('does not double-spill existing refs', async () => { + const compacted = await compactExecutionPayload( + { results: [[{ payload: 'x'.repeat(2048) }]] }, + { thresholdBytes: 256 } + ) + + const compactedAgain = await compactExecutionPayload(compacted, { thresholdBytes: 256 }) + + expect(compactedAgain).toEqual(compacted) + }) + + it('rejects durable compaction when storage context is incomplete', async () => { + await expect( + compactExecutionPayload( + { payload: 'x'.repeat(2048) }, + { thresholdBytes: 256, requireDurable: true } + ) + ).rejects.toThrow('Cannot persist large execution value') + }) + + it('does not treat loosely marker-shaped user data as a large-value ref', () => { + expect( + isLargeValueRef({ + __simLargeValueRef: true, + id: 'user-supplied', + }) + ).toBe(false) + }) + + it('omits opaque ref IDs from user-facing materialization errors', () => { + const error = getLargeValueMaterializationError({ + __simLargeValueRef: true, + version: 1, + id: 'lv_CQcekP8gSJI5', + kind: 'string', + size: 23_259_101, + }) + + expect(error.message).toContain('This execution value is too large to inline (22.2 MB)') + expect(error.message).not.toContain('lv_CQcekP8gSJI5') + }) +}) diff --git a/apps/sim/lib/execution/payloads/serializer.ts b/apps/sim/lib/execution/payloads/serializer.ts new file mode 100644 index 00000000000..04450f3362c --- /dev/null +++ b/apps/sim/lib/execution/payloads/serializer.ts @@ -0,0 +1,124 @@ +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + isLargeValueRef, + LARGE_VALUE_THRESHOLD_BYTES, +} from '@/lib/execution/payloads/large-value-ref' +import { type LargeValueStoreContext, storeLargeValue } from '@/lib/execution/payloads/store' +import type { BlockLog } from '@/executor/types' + +export interface CompactExecutionPayloadOptions extends LargeValueStoreContext { + thresholdBytes?: number + preserveUserFileBase64?: boolean + preserveRoot?: boolean +} + +interface CompactState { + seen: WeakSet +} + +function getJsonAndSize(value: unknown): { json: string; size: number } | null { + try { + const json = JSON.stringify(value) + if (json === undefined) { + return null + } + return { + json, + size: Buffer.byteLength(json, 'utf8'), + } + } catch { + return null + } +} + +function stripUserFileBase64(value: T): Omit { + const { base64: _base64, ...rest } = value + return rest +} + +async function compactValue( + value: unknown, + options: CompactExecutionPayloadOptions, + state: CompactState, + depth = 0 +): Promise { + if (!value || typeof value !== 'object') { + const measured = getJsonAndSize(value) + if (measured && measured.size > (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return options.preserveRoot && depth === 0 + ? value + : storeLargeValue(value, measured.json, measured.size, options) + } + return value + } + + if (isLargeValueRef(value)) { + return value + } + + if (isUserFileWithMetadata(value) && !options.preserveUserFileBase64) { + return stripUserFileBase64(value) + } + + if (state.seen.has(value)) { + return value + } + state.seen.add(value) + + const compacted = Array.isArray(value) + ? await Promise.all(value.map((item) => compactValue(item, options, state, depth + 1))) + : Object.fromEntries( + await Promise.all( + Object.entries(value).map(async ([key, entryValue]) => [ + key, + key === 'finalBlockLogs' && Array.isArray(entryValue) + ? await compactBlockLogs(entryValue as BlockLog[], options) + : await compactValue(entryValue, options, state, depth + 1), + ]) + ) + ) + + const measured = getJsonAndSize(compacted) + if (measured && measured.size > (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return options.preserveRoot && depth === 0 + ? compacted + : storeLargeValue(compacted, measured.json, measured.size, options) + } + + return compacted +} + +export async function compactExecutionPayload( + value: T, + options: CompactExecutionPayloadOptions = {} +): Promise { + return (await compactValue(value, options, { seen: new WeakSet() })) as T +} + +export async function compactBlockLogs( + logs: BlockLog[] | undefined, + options: CompactExecutionPayloadOptions = {} +): Promise { + if (!logs) { + return logs + } + + return Promise.all( + logs.map(async (log) => { + const compactedLog = { ...log } + if ('input' in compactedLog) { + compactedLog.input = await compactExecutionPayload(compactedLog.input, options) + } + if ('output' in compactedLog) { + compactedLog.output = await compactExecutionPayload(compactedLog.output, options) + } + if ('childTraceSpans' in compactedLog) { + compactedLog.childTraceSpans = await compactExecutionPayload( + compactedLog.childTraceSpans, + options + ) + } + return compactedLog + }) + ) +} diff --git a/apps/sim/lib/execution/payloads/store.ts b/apps/sim/lib/execution/payloads/store.ts new file mode 100644 index 00000000000..4dd8c902fb3 --- /dev/null +++ b/apps/sim/lib/execution/payloads/store.ts @@ -0,0 +1,149 @@ +import { createLogger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import { generateShortId } from '@sim/utils/id' +import { cacheLargeValue, materializeLargeValueRefSync } from '@/lib/execution/payloads/cache' +import { + LARGE_VALUE_REF_VERSION, + type LargeValueKind, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { generateExecutionFileKey } from '@/lib/uploads/contexts/execution/utils' + +const logger = createLogger('LargeExecutionPayloadStore') + +export interface LargeValueStoreContext { + workspaceId?: string + workflowId?: string + executionId?: string + userId?: string + requireDurable?: boolean +} + +function getKind(value: unknown): LargeValueKind { + if (typeof value === 'string') return 'string' + if (Array.isArray(value)) return 'array' + if (value && typeof value === 'object') return 'object' + return 'json' +} + +function getPreview(value: unknown): unknown { + if (typeof value === 'string') { + return value.length > 256 ? `${value.slice(0, 256)}...` : value + } + if (Array.isArray(value)) { + return { length: value.length } + } + if (value && typeof value === 'object') { + return { keys: Object.keys(value).slice(0, 20) } + } + return value +} + +function isValidLargeValueKey(ref: LargeValueRef): boolean { + if (!ref.key) return false + if (!ref.key.startsWith('execution/')) return false + if (!ref.key.endsWith(`/large-value-${ref.id}.json`)) return false + if (ref.executionId && !ref.key.includes(`/${ref.executionId}/`)) return false + return true +} + +async function persistValue( + id: string, + json: string, + context: LargeValueStoreContext +): Promise { + const { workspaceId, workflowId, executionId, userId } = context + if (!workspaceId || !workflowId || !executionId) { + if (context.requireDurable) { + throw new Error( + 'Cannot persist large execution value without workspace, workflow, and execution IDs' + ) + } + return undefined + } + + const key = generateExecutionFileKey( + { workspaceId, workflowId, executionId }, + `large-value-${id}.json` + ) + + try { + const { StorageService } = await import('@/lib/uploads') + const fileInfo = await StorageService.uploadFile({ + file: Buffer.from(json, 'utf8'), + fileName: key, + contentType: 'application/json', + context: 'execution', + preserveKey: true, + customKey: key, + metadata: { + originalName: `large-value-${id}.json`, + uploadedAt: new Date().toISOString(), + purpose: 'execution-large-value', + workspaceId, + ...(userId ? { userId } : {}), + }, + }) + return fileInfo.key + } catch (error) { + if (context.requireDurable) { + throw new Error(`Failed to persist large execution value: ${toError(error).message}`) + } + logger.warn('Failed to persist large execution value, keeping in memory only', { + id, + error: toError(error).message, + }) + return undefined + } +} + +export async function storeLargeValue( + value: unknown, + json: string, + size: number, + context: LargeValueStoreContext +): Promise { + const id = `lv_${generateShortId(12)}` + const key = await persistValue(id, json, context) + cacheLargeValue(id, value, size) + + return { + __simLargeValueRef: true, + version: LARGE_VALUE_REF_VERSION, + id, + kind: getKind(value), + size, + key, + executionId: context.executionId, + preview: getPreview(value), + } +} + +export async function materializeLargeValueRef(ref: LargeValueRef): Promise { + const cached = materializeLargeValueRefSync(ref) + if (cached !== undefined) { + return cached + } + + if (!ref.key || !isValidLargeValueKey(ref)) { + return undefined + } + + try { + const { StorageService } = await import('@/lib/uploads') + const buffer = await StorageService.downloadFile({ + key: ref.key, + context: 'execution', + }) + const value = JSON.parse(buffer.toString('utf8')) + cacheLargeValue(ref.id, value, ref.size) + return value + } catch (error) { + logger.warn('Failed to materialize persisted large execution value', { + id: ref.id, + key: ref.key, + error, + }) + return undefined + } +} diff --git a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts index 6c237668c73..4665b6fc228 100644 --- a/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/execution/execution-file-manager.ts @@ -114,7 +114,6 @@ export async function uploadExecutionFile( url: presignedUrl, key: fileInfo.key, context: 'execution', - base64: fileBuffer.toString('base64'), } logger.info(`Successfully uploaded execution file: ${fileName} (${fileBuffer.length} bytes)`, { diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts new file mode 100644 index 00000000000..5e016a0c77e --- /dev/null +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts @@ -0,0 +1,43 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { hydrateUserFilesWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' +import type { UserFile } from '@/executor/types' + +describe('hydrateUserFilesWithBase64', () => { + it('strips existing base64 when it exceeds maxBytes', async () => { + const file: UserFile = { + id: 'file-1', + name: 'large.txt', + key: 'execution/workspace/workflow/execution/large.txt', + url: 'https://example.com/large.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64: Buffer.from('hello').toString('base64'), + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 1 }) + + expect(hydrated.file).not.toHaveProperty('base64') + }) + + it('keeps existing base64 when it is within maxBytes', async () => { + const base64 = Buffer.from('hello').toString('base64') + const file: UserFile = { + id: 'file-1', + name: 'small.txt', + key: 'execution/workspace/workflow/execution/small.txt', + url: 'https://example.com/small.txt', + size: 5, + type: 'text/plain', + context: 'execution', + base64, + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 10 }) + + expect(hydrated.file.base64).toBe(base64) + }) +}) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index 3aa2f219eb1..85d52ea77a3 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -3,11 +3,15 @@ import { createLogger } from '@sim/logger' import { getRedisClient } from '@/lib/core/config/redis' import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' import { bufferToBase64 } from '@/lib/uploads/utils/file-utils' import { downloadFileFromStorage, downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' import type { UserFile } from '@/executor/types' -const DEFAULT_MAX_BASE64_BYTES = 10 * 1024 * 1024 +const INLINE_BASE64_JSON_OVERHEAD_BYTES = 512 * 1024 +const DEFAULT_MAX_BASE64_BYTES = Math.floor( + (LARGE_VALUE_THRESHOLD_BYTES - INLINE_BASE64_JSON_OVERHEAD_BYTES) * 0.75 +) const DEFAULT_TIMEOUT_MS = getMaxExecutionTimeout() const DEFAULT_CACHE_TTL_SECONDS = 300 const REDIS_KEY_PREFIX = 'user-file:base64:' @@ -118,16 +122,30 @@ function getFullCacheKey(executionId: string | undefined, file: UserFile): strin return `${REDIS_KEY_PREFIX}${fileKey}` } +function stripBase64(file: UserFile): UserFile { + const { base64: _base64, ...rest } = file + return rest +} + async function resolveBase64( file: UserFile, options: Base64HydrationOptions, logger: Logger ): Promise { + const requestedMaxBytes = options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES + const maxBytes = Math.min(requestedMaxBytes, DEFAULT_MAX_BASE64_BYTES) + if (file.base64) { + const base64Bytes = Buffer.byteLength(file.base64, 'base64') + if (base64Bytes > maxBytes) { + logger.warn( + `[${options.requestId}] Skipping existing base64 for ${file.name} (decoded ${base64Bytes} exceeds ${maxBytes})` + ) + return null + } return file.base64 } - const maxBytes = options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES const allowUnknownSize = options.allowUnknownSize ?? false const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS const hasStableStorageKey = Boolean(file.key) @@ -192,12 +210,19 @@ async function hydrateUserFile( ): Promise { const cached = await state.cache.get(file) if (cached) { + const maxBytes = Math.min( + options.maxBytes ?? DEFAULT_MAX_BASE64_BYTES, + DEFAULT_MAX_BASE64_BYTES + ) + if (Buffer.byteLength(cached, 'base64') > maxBytes) { + return stripBase64(file) + } return { ...file, base64: cached } } const base64 = await resolveBase64(file, options, logger) if (!base64) { - return file + return stripBase64(file) } await state.cache.set(file, base64, state.cacheTtlSeconds) diff --git a/apps/sim/lib/workflows/executor/execution-core.ts b/apps/sim/lib/workflows/executor/execution-core.ts index 22b58c5e707..d47e6f02af1 100644 --- a/apps/sim/lib/workflows/executor/execution-core.ts +++ b/apps/sim/lib/workflows/executor/execution-core.ts @@ -10,6 +10,7 @@ import { z } from 'zod' import { isPlainRecord } from '@/lib/core/utils/records' import { getPersonalAndWorkspaceEnv } from '@/lib/environment/utils' import { clearExecutionCancellation } from '@/lib/execution/cancellation' +import { warmLargeValueRefs } from '@/lib/execution/payloads/hydration' import type { LoggingSession } from '@/lib/logs/execution/logging-session' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { @@ -582,6 +583,13 @@ export async function executeWorkflowCore( callChain: metadata.callChain, } + if (snapshot.state) { + await warmLargeValueRefs(snapshot.state) + } + if (runFromBlock?.sourceSnapshot) { + await warmLargeValueRefs(runFromBlock.sourceSnapshot) + } + for (const variable of Object.values(workflowVariables)) { if ( isPlainRecord(variable) && diff --git a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts index 330fe93e14c..305dcbba0d2 100644 --- a/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts +++ b/apps/sim/lib/workflows/executor/human-in-the-loop-manager.ts @@ -13,6 +13,7 @@ import { resetExecutionStreamBuffer, type TerminalExecutionStreamStatus, } from '@/lib/execution/event-buffer' +import { compactBlockLogs, compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { preprocessExecution } from '@/lib/execution/preprocessing' import { LoggingSession } from '@/lib/logs/execution/logging-session' import { executeWorkflowCore } from '@/lib/workflows/executor/execution-core' @@ -25,6 +26,7 @@ import type { SerializableExecutionState, } from '@/executor/execution/types' import type { + BlockLog, ExecutionResult, PauseKind, PausePoint, @@ -964,7 +966,12 @@ export class PauseResumeManager { throw new Error(RUN_BUFFER_UNAVAILABLE_ERROR) } - const eventWriter = createExecutionEventWriter(resumeExecutionId) + const eventWriter = createExecutionEventWriter(resumeExecutionId, { + workspaceId: metadata.workspaceId, + workflowId, + userId: metadata.userId, + preserveUserFileBase64: true, + }) const metaInitialized = await initializeExecutionStreamMeta(resumeExecutionId, { userId: metadata.userId, workflowId, @@ -1181,6 +1188,23 @@ export class PauseResumeManager { } } + const compactResultLogs = await compactBlockLogs(result.logs, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + requireDurable: true, + }) + const compactResultOutput = await compactExecutionPayload(result.output, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + preserveUserFileBase64: true, + preserveRoot: true, + requireDurable: true, + }) + if ( result.status === 'cancelled' && timeoutController?.isTimedOut() && @@ -1203,7 +1227,7 @@ export class PauseResumeManager { data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'error' @@ -1218,7 +1242,7 @@ export class PauseResumeManager { workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'cancelled' @@ -1232,11 +1256,11 @@ export class PauseResumeManager { executionId: resumeExecutionId, workflowId, data: { - output: result.output, + output: compactResultOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || new Date().toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'complete' @@ -1251,11 +1275,11 @@ export class PauseResumeManager { workflowId, data: { success: result.success, - output: result.output, + output: compactResultOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || new Date().toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactResultLogs, }, }, 'complete' @@ -1264,6 +1288,23 @@ export class PauseResumeManager { } catch (execError) { executionError = execError const execErrorResult = hasExecutionResult(execError) ? execError.executionResult : undefined + let compactErrorLogs: BlockLog[] | undefined + try { + compactErrorLogs = execErrorResult?.logs + ? await compactBlockLogs(execErrorResult.logs, { + workspaceId: baseSnapshot.metadata.workspaceId, + workflowId, + executionId: resumeExecutionId, + userId: metadata.userId, + requireDurable: true, + }) + : undefined + } catch (compactionError) { + logger.warn('Failed to compact resume error logs, omitting oversized error details', { + resumeExecutionId, + error: toError(compactionError).message, + }) + } finalMetaStatus = 'error' await writeBufferedEvent( { @@ -1274,7 +1315,7 @@ export class PauseResumeManager { data: { error: toError(execError).message, duration: 0, - finalBlockLogs: execErrorResult?.logs, + finalBlockLogs: compactErrorLogs, }, }, 'error' diff --git a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts index 0106e823195..ad20414cbb1 100644 --- a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts +++ b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts @@ -7,6 +7,7 @@ import { initializeExecutionStreamMeta, type TerminalExecutionStreamStatus, } from '@/lib/execution/event-buffer' +import { compactBlockLogs, compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { LoggingSession } from '@/lib/logs/execution/logging-session' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { @@ -117,7 +118,14 @@ export async function executeQueuedWorkflowJob( const { metadata } = payload const { executionId, requestId, workflowId, triggerType } = metadata const loggingSession = new LoggingSession(workflowId, executionId, triggerType, requestId) - const eventWriter = payload.streamEvents ? createExecutionEventWriter(executionId) : null + const eventWriter = payload.streamEvents + ? createExecutionEventWriter(executionId, { + workspaceId: metadata.workspaceId, + workflowId, + userId: metadata.userId, + preserveUserFileBase64: payload.includeFileBase64, + }) + : null let eventWriterClosed = false if (payload.streamEvents) { @@ -180,6 +188,22 @@ export async function executeQueuedWorkflowJob( runFromBlock: payload.runFromBlock, abortSignal: timeoutController.signal, }) + const compactTerminalOutput = await compactExecutionPayload(result.output, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + preserveUserFileBase64: true, + preserveRoot: true, + requireDurable: true, + }) + const compactTerminalLogs = await compactBlockLogs(result.logs, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + requireDurable: true, + }) if ( result.status === 'cancelled' && @@ -202,7 +226,7 @@ export async function executeQueuedWorkflowJob( data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactTerminalLogs, }, }, }) @@ -215,9 +239,9 @@ export async function executeQueuedWorkflowJob( 'cancelled', { success: false, - output: result.output, + output: compactTerminalOutput, error: timeoutErrorMessage, - logs: result.logs, + logs: compactTerminalLogs, metadata: result.metadata ? { duration: result.metadata.duration, @@ -240,6 +264,22 @@ export async function executeQueuedWorkflowJob( maxBytes: payload.base64MaxBytes, }) : result.output + const compactOutput = await compactExecutionPayload(outputWithBase64, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + preserveUserFileBase64: true, + preserveRoot: true, + requireDurable: true, + }) + const compactLogs = await compactBlockLogs(result.logs, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + requireDurable: true, + }) if (eventWriter) { if (result.status === 'cancelled') { @@ -254,7 +294,7 @@ export async function executeQueuedWorkflowJob( workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: result.logs, + finalBlockLogs: compactLogs, }, }, }) @@ -269,11 +309,11 @@ export async function executeQueuedWorkflowJob( executionId, workflowId, data: { - output: outputWithBase64, + output: compactOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || metadata.startTime, endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactLogs, }, }, }) @@ -289,11 +329,11 @@ export async function executeQueuedWorkflowJob( workflowId, data: { success: result.success, - output: outputWithBase64, + output: compactOutput, duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || metadata.startTime, endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: result.logs, + finalBlockLogs: compactLogs, }, }, }) @@ -311,9 +351,9 @@ export async function executeQueuedWorkflowJob( : 'success', { success: result.success, - output: outputWithBase64, + output: compactOutput, error: result.error, - logs: result.logs, + logs: compactLogs, metadata: result.metadata ? { duration: result.metadata.duration, @@ -348,6 +388,32 @@ export async function executeQueuedWorkflowJob( } const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + let compactErrorLogs: BlockLog[] | undefined + let compactErrorOutput: NormalizedBlockOutput = {} + try { + compactErrorLogs = executionResult?.logs + ? await compactBlockLogs(executionResult.logs, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + requireDurable: true, + }) + : undefined + compactErrorOutput = await compactExecutionPayload(executionResult?.output ?? {}, { + workspaceId: metadata.workspaceId, + workflowId, + executionId, + userId: metadata.userId, + preserveRoot: true, + requireDurable: true, + }) + } catch (compactionError) { + logger.warn('Failed to compact queued error payload, omitting oversized error details', { + executionId, + error: toError(compactionError).message, + }) + } if (eventWriter) { eventWriterClosed = await publishTerminalExecutionEvent({ @@ -362,7 +428,7 @@ export async function executeQueuedWorkflowJob( data: { error: toError(error).message, duration: 0, - finalBlockLogs: executionResult?.logs, + finalBlockLogs: compactErrorLogs, }, }, }) @@ -375,9 +441,9 @@ export async function executeQueuedWorkflowJob( 'failed', { success: false, - output: executionResult?.output ?? {}, + output: compactErrorOutput, error: executionResult?.error || toError(error).message, - logs: executionResult?.logs, + logs: compactErrorLogs, metadata: executionResult?.metadata ? { duration: executionResult.metadata.duration, diff --git a/apps/sim/lib/workflows/streaming/streaming.ts b/apps/sim/lib/workflows/streaming/streaming.ts index 759bf4d28e5..b72c085e44f 100644 --- a/apps/sim/lib/workflows/streaming/streaming.ts +++ b/apps/sim/lib/workflows/streaming/streaming.ts @@ -6,6 +6,7 @@ import { traverseObjectPath, } from '@/lib/core/utils/response-format' import { encodeSSE } from '@/lib/core/utils/sse' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' import { processStreamingBlockLogs } from '@/lib/tokenization' import { @@ -45,6 +46,9 @@ export interface StreamingResponseOptions { requestId: string streamConfig: StreamingConfig executionId?: string + workspaceId?: string + workflowId?: string + userId?: string executeFn: StreamingExecutorFn } @@ -78,8 +82,18 @@ async function buildMinimalResult( completedBlockIds: Set, requestId: string, includeFileBase64: boolean, - base64MaxBytes: number | undefined + base64MaxBytes: number | undefined, + executionId?: string, + context: Pick = {} ): Promise<{ success: boolean; error?: string; output: Record }> { + const durableContext = { + workspaceId: context.workspaceId, + workflowId: context.workflowId, + executionId, + userId: context.userId, + requireDurable: Boolean(context.workspaceId && context.workflowId && executionId), + } + const minimalResult = { success: result.success, error: result.error, @@ -88,12 +102,20 @@ async function buildMinimalResult( if (result.status === 'paused') { minimalResult.output = result.output || {} - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } if (!selectedOutputs?.length) { minimalResult.output = result.output || {} - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } if (!result.output || !result.logs) { @@ -138,7 +160,11 @@ async function buildMinimalResult( ;(minimalResult.output[blockId] as Record)[path] = value } - return minimalResult + return compactExecutionPayload(minimalResult, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + preserveRoot: true, + }) } function updateLogsWithStreamedContent( @@ -191,6 +217,13 @@ export async function createStreamingResponse( options: StreamingResponseOptions ): Promise { const { requestId, streamConfig, executionId, executeFn } = options + const durableContext = { + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId, + userId: options.userId, + requireDurable: Boolean(options.workspaceId && options.workflowId && executionId), + } const timeoutController = createTimeoutAbortController(streamConfig.timeoutMs) return new ReadableStream({ @@ -285,10 +318,14 @@ export async function createStreamingResponse( maxBytes: base64MaxBytes, }) : outputValue + const compactHydratedOutput = await compactExecutionPayload(hydratedOutput, { + ...durableContext, + preserveUserFileBase64: includeFileBase64, + }) const formattedOutput = - typeof hydratedOutput === 'string' - ? hydratedOutput - : JSON.stringify(hydratedOutput, null, 2) + typeof compactHydratedOutput === 'string' + ? compactHydratedOutput + : JSON.stringify(compactHydratedOutput, null, 2) sendChunk(blockId, formattedOutput) } } @@ -336,7 +373,13 @@ export async function createStreamingResponse( state.completedBlockIds, requestId, streamConfig.includeFileBase64 ?? true, - streamConfig.base64MaxBytes + streamConfig.base64MaxBytes, + executionId, + { + workspaceId: options.workspaceId, + workflowId: options.workflowId, + userId: options.userId, + } ) controller.enqueue( diff --git a/apps/sim/lib/workflows/utils.ts b/apps/sim/lib/workflows/utils.ts index 318d6249d6a..30afa6d81d3 100644 --- a/apps/sim/lib/workflows/utils.ts +++ b/apps/sim/lib/workflows/utils.ts @@ -6,6 +6,7 @@ import { authorizeWorkflowByWorkspacePermission } from '@sim/workflow-authz' import { and, asc, eq, inArray, isNull, max, min, sql } from 'drizzle-orm' import { NextResponse } from 'next/server' import { getSession } from '@/lib/auth' +import { materializeLargeValueRefsSync } from '@/lib/execution/payloads/cache' import { getNextWorkflowColor } from '@/lib/workflows/colors' import { buildDefaultWorkflowArtifacts } from '@/lib/workflows/defaults' import { saveWorkflowToNormalizedTables } from '@/lib/workflows/persistence/utils' @@ -319,13 +320,14 @@ export const createHttpResponseFromBlock = ( executionResult: Pick ): NextResponse => { const { data = {}, status = 200, headers = {} } = executionResult.output + const responseData = materializeLargeValueRefsSync(data) const responseHeaders = new Headers({ 'Content-Type': 'application/json', ...headers, }) - return NextResponse.json(data, { + return NextResponse.json(responseData, { status: status, headers: responseHeaders, }) diff --git a/apps/sim/proxy.ts b/apps/sim/proxy.ts index 5a2e2796269..ed642956360 100644 --- a/apps/sim/proxy.ts +++ b/apps/sim/proxy.ts @@ -203,6 +203,6 @@ export const config = { '/signup', '/invite/:path*', // Match invitation routes // Catch-all for other pages, excluding static assets and public directories - '/((?!_next/static|_next/image|ingest|favicon.ico|logo/|static/|footer/|social/|enterprise/|favicon/|twitter/|robots.txt|sitemap.xml).*)', + '/((?!api/|api$|_next/static|_next/image|ingest|favicon.ico|logo/|static/|footer/|social/|enterprise/|favicon/|twitter/|robots.txt|sitemap.xml).*)', ], } diff --git a/packages/ts-sdk/README.md b/packages/ts-sdk/README.md index 44d21d0c9ed..cc8b558bcae 100644 --- a/packages/ts-sdk/README.md +++ b/packages/ts-sdk/README.md @@ -226,6 +226,23 @@ interface WorkflowExecutionResult { } ``` +### LargeValueRef + +Oversized execution values may be returned as a versioned reference inside `output`, `logs`, streaming events, or async job status responses. + +```typescript +interface LargeValueRef { + __simLargeValueRef: true; + version: 1; + id: string; + kind: 'array' | 'object' | 'string' | 'json'; + size: number; + key?: string; + executionId?: string; + preview?: unknown; +} +``` + ### WorkflowStatus ```typescript diff --git a/packages/ts-sdk/src/index.ts b/packages/ts-sdk/src/index.ts index 31f7a34f263..be9dc831ce4 100644 --- a/packages/ts-sdk/src/index.ts +++ b/packages/ts-sdk/src/index.ts @@ -5,6 +5,17 @@ export interface SimStudioConfig { baseUrl?: string } +export interface LargeValueRef { + __simLargeValueRef: true + version: 1 + id: string + kind: 'array' | 'object' | 'string' | 'json' + size: number + key?: string + executionId?: string + preview?: unknown +} + export interface WorkflowExecutionResult { success: boolean output?: any From 2c6d0c393f5ed0db79199c8d03339ffc0596f12f Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 12:20:44 -0700 Subject: [PATCH 02/11] progress --- apps/docs/content/docs/en/blocks/function.mdx | 2 + apps/docs/content/docs/en/blocks/parallel.mdx | 14 +- .../docs/en/execution/api-deployment.mdx | 2 +- apps/realtime/src/database/operations.ts | 5 + .../app/api/workflows/[id]/execute/route.ts | 2 + .../subflow-editor/subflow-editor.tsx | 32 +++ .../editor/hooks/use-subflow-editor.ts | 31 ++- apps/sim/executor/constants.ts | 1 + apps/sim/executor/execution/block-executor.ts | 10 +- apps/sim/executor/execution/edge-manager.ts | 4 + apps/sim/executor/execution/executor.ts | 5 +- apps/sim/executor/execution/state.ts | 4 + apps/sim/executor/orchestrators/loop.ts | 48 ++-- apps/sim/executor/orchestrators/node.ts | 16 ++ .../executor/orchestrators/parallel.test.ts | 3 +- apps/sim/executor/orchestrators/parallel.ts | 232 ++++++++++++------ apps/sim/executor/types.ts | 4 + apps/sim/executor/utils/block-reference.ts | 41 +++- apps/sim/executor/utils/parallel-expansion.ts | 22 +- apps/sim/executor/utils/subflow-utils.ts | 77 +++++- apps/sim/executor/variables/resolver.test.ts | 64 ++--- apps/sim/executor/variables/resolver.ts | 167 ++++++++----- .../sim/executor/variables/resolvers/block.ts | 132 +++++++++- apps/sim/executor/variables/resolvers/loop.ts | 54 +++- .../executor/variables/resolvers/parallel.ts | 62 ++++- .../resolvers/reference-async.server.ts | 101 ++++++++ .../executor/variables/resolvers/reference.ts | 7 + apps/sim/hooks/use-collaborative-workflow.ts | 45 +++- apps/sim/hooks/use-undo-redo.ts | 13 + apps/sim/lib/execution/event-buffer.test.ts | 34 ++- apps/sim/lib/execution/event-buffer.ts | 34 ++- apps/sim/lib/execution/payloads/serializer.ts | 38 +++ apps/sim/lib/execution/redis-budget.server.ts | 136 ++++++++++ apps/sim/lib/execution/resource-errors.ts | 45 ++++ .../uploads/utils/user-file-base64.server.ts | 40 +++ .../executor/queued-workflow-execution.ts | 1 + .../search-replace/subflow-fields.ts | 22 +- apps/sim/lib/workflows/streaming/streaming.ts | 1 + apps/sim/serializer/types.ts | 1 + .../stores/workflows/workflow/store.test.ts | 37 ++- apps/sim/stores/workflows/workflow/store.ts | 29 ++- apps/sim/stores/workflows/workflow/types.ts | 1 + apps/sim/stores/workflows/workflow/utils.ts | 12 + packages/workflow-types/src/workflow.ts | 3 + 44 files changed, 1416 insertions(+), 218 deletions(-) create mode 100644 apps/sim/executor/variables/resolvers/reference-async.server.ts create mode 100644 apps/sim/lib/execution/redis-budget.server.ts create mode 100644 apps/sim/lib/execution/resource-errors.ts diff --git a/apps/docs/content/docs/en/blocks/function.mdx b/apps/docs/content/docs/en/blocks/function.mdx index 90b0f72d26c..2df481b280b 100644 --- a/apps/docs/content/docs/en/blocks/function.mdx +++ b/apps/docs/content/docs/en/blocks/function.mdx @@ -189,6 +189,8 @@ plt.show() Function blocks receive their code, parameters, resolved references, and previous block context in an internal execution request. Sim can safely reference oversized workflow outputs, such as large `loop.results` or `parallel.results`, when you select a smaller nested field like ``. +File outputs are metadata-first by default. Referencing ``, ``, or similar metadata does not hydrate file contents. Referencing `` explicitly hydrates that file's base64 content at resolver time and can fail if the file exceeds the configured inline limit. + Avoid passing a full large object into a Function block when you only need one field. For example, prefer `` over `` when the API response is large. If the complete function request body is still larger than the platform limit, execution can fail before your code starts. For large generated data, write the result to a file or table with `outputPath`, `outputSandboxPath`, or `outputTable` instead of returning the entire payload inline. diff --git a/apps/docs/content/docs/en/blocks/parallel.mdx b/apps/docs/content/docs/en/blocks/parallel.mdx index f3207d901bc..22f2b330865 100644 --- a/apps/docs/content/docs/en/blocks/parallel.mdx +++ b/apps/docs/content/docs/en/blocks/parallel.mdx @@ -34,6 +34,7 @@ Choose between two types of parallel execution: Use this when you need to run the same operation multiple times concurrently. + If the total count is larger than the batch size, Sim runs the work in serial batches while preserving the original result order. ``` Example: Run 5 parallel instances @@ -57,7 +58,7 @@ Choose between two types of parallel execution: /> - Each instance processes one item from the collection simultaneously. + Each instance processes one item from the collection. Large collections run in serial batches while preserving each item's original index. ``` Example: Process ["task1", "task2", "task3"] in parallel @@ -140,6 +141,12 @@ const allResults = ; // Returns: [result1, result2, result3, ...] ``` +For large result sets, reference only the entry or field you need, such as ``. Sim keeps aggregate results indexable and hydrates stored entries when an indexed path is explicitly referenced. + +### Batch Size + +Parallel blocks run up to 20 branches at a time by default. Increase the total count or collection size to process more work; Sim will execute the next batch after the current batch finishes. You can lower the batch size to reduce concurrency for rate-limited APIs. + ### Instance Isolation Each parallel instance runs independently: @@ -157,7 +164,7 @@ Each parallel instance runs independently: While parallel execution is faster, be mindful of: - API rate limits when making concurrent requests - Memory usage with large datasets - - Maximum of 20 concurrent instances to prevent resource exhaustion + - Maximum of 20 concurrent instances per batch to prevent resource exhaustion ## Parallel vs Loop @@ -186,6 +193,9 @@ Understanding when to use each:
  • Collection: Array or object to distribute (collection-based)
  • +
  • + Batch size: Number of branches to run concurrently, from 1 to 20 +
  • diff --git a/apps/docs/content/docs/en/execution/api-deployment.mdx b/apps/docs/content/docs/en/execution/api-deployment.mdx index 31a5e47d585..d27fbfa3fc2 100644 --- a/apps/docs/content/docs/en/execution/api-deployment.mdx +++ b/apps/docs/content/docs/en/execution/api-deployment.mdx @@ -232,7 +232,7 @@ Workflow execution responses are capped by platform request and response limits. } ``` -The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. +The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. File outputs are metadata-first; request `.base64` only when you need inline file content. ### Asynchronous diff --git a/apps/realtime/src/database/operations.ts b/apps/realtime/src/database/operations.ts index 14fa8639eaf..3cb001d2340 100644 --- a/apps/realtime/src/database/operations.ts +++ b/apps/realtime/src/database/operations.ts @@ -742,6 +742,7 @@ async function handleBlocksOperationTx( config: { parallelType: 'fixed', count: DEFAULT_PARALLEL_COUNT, + batchSize: 20, nodes: [], }, }) @@ -1700,6 +1701,10 @@ async function handleSubflowOperationTx( blockData.parallelType = payload.config.parallelType } + if (payload.config.batchSize !== undefined) { + blockData.batchSize = payload.config.batchSize + } + await tx .update(workflowBlocks) .set({ diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index cd1f12e1894..b21f0d0bc4a 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -775,6 +775,7 @@ async function handleExecutePost( ? ((await hydrateUserFilesWithBase64(result.output, { requestId, executionId, + userId: actorUserId, maxBytes: base64MaxBytes, })) as NormalizedBlockOutput) : result.output @@ -1297,6 +1298,7 @@ async function handleExecutePost( ? await hydrateUserFilesWithBase64(result.output, { requestId, executionId, + userId: actorUserId, maxBytes: base64MaxBytes, }) : result.output diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx index 94c49837f5c..5b51485258b 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx @@ -53,6 +53,7 @@ export function SubflowEditor({ isCountMode, isConditionMode, inputValue, + batchSizeValue, editorValue, typeOptions, showTagDropdown, @@ -61,6 +62,8 @@ export function SubflowEditor({ handleSubflowTypeChange, handleSubflowIterationsChange, handleSubflowIterationsSave, + handleParallelBatchSizeChange, + handleParallelBatchSizeSave, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, @@ -80,6 +83,7 @@ export function SubflowEditor({ activeSearchTarget.canonicalSubBlockId === fieldId) const isTypeHighlighted = isSearchHighlighted(WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.type) const isConfigHighlighted = isSearchHighlighted(configSearchFieldId) + const isBatchSizeHighlighted = isSearchHighlighted(WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize) return (
    @@ -197,6 +201,34 @@ export function SubflowEditor({
    )} + + {currentBlock.type === 'parallel' && ( +
    + + e.key === 'Enter' && handleParallelBatchSizeSave()} + disabled={!userCanEdit} + className='mb-1' + /> +
    + Run 1 to 20 parallel branches at a time. +
    +
    + )} diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts index 08428f5d17c..2d0f5692957 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts @@ -40,7 +40,7 @@ const SUBFLOW_CONFIG = { typeLabels: { count: 'Parallel Count', collection: 'Parallel Each' }, typeKey: 'parallelType' as const, storeKey: 'parallels' as const, - maxIterations: 20, + maxIterations: 1000, configKeys: { iterations: 'count' as const, items: 'distribution' as const, @@ -62,6 +62,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const textareaRef = useRef(null) const editorContainerRef = useRef(null) const [tempInputValue, setTempInputValue] = useState(null) + const [tempBatchSizeValue, setTempBatchSizeValue] = useState(null) const [showTagDropdown, setShowTagDropdown] = useState(false) const [cursorPosition, setCursorPosition] = useState(0) @@ -97,6 +98,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const { collaborativeUpdateLoopType, collaborativeUpdateParallelType, + collaborativeUpdateParallelBatchSize, collaborativeUpdateIterationCount, collaborativeUpdateIterationCollection, } = useCollaborativeWorkflow() @@ -260,6 +262,25 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId collaborativeUpdateIterationCount, ]) + const handleParallelBatchSizeChange = useCallback((e: React.ChangeEvent) => { + const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') + const numValue = Number.parseInt(sanitizedValue) + if (!Number.isNaN(numValue)) { + setTempBatchSizeValue(Math.min(20, numValue).toString()) + } else { + setTempBatchSizeValue(sanitizedValue) + } + }, []) + + const handleParallelBatchSizeSave = useCallback(() => { + if (!currentBlockId || currentBlock?.type !== 'parallel') return + const value = Number.parseInt(tempBatchSizeValue ?? '20') + if (!Number.isNaN(value)) { + collaborativeUpdateParallelBatchSize(currentBlockId, Math.min(20, Math.max(1, value))) + } + setTempBatchSizeValue(null) + }, [tempBatchSizeValue, currentBlockId, currentBlock, collaborativeUpdateParallelBatchSize]) + /** * Handle editor value change (collection/condition) */ @@ -342,11 +363,16 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId : '' const iterations = configIterations + const parallelBatchSize = + isSubflow && currentBlock?.type === 'parallel' + ? ((nodeConfig as any)?.batchSize ?? (blockData as any)?.batchSize ?? 20) + : 20 const collectionString = typeof configCollection === 'string' ? configCollection : JSON.stringify(configCollection) || '' const conditionString = typeof configCondition === 'string' ? configCondition : '' const inputValue = tempInputValue ?? iterations.toString() + const batchSizeValue = tempBatchSizeValue ?? parallelBatchSize.toString() const editorValue = isConditionMode ? conditionString : collectionString // Type options for combobox @@ -366,6 +392,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId isCountMode, isConditionMode, inputValue, + batchSizeValue, editorValue, typeOptions, showTagDropdown, @@ -377,6 +404,8 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId handleSubflowTypeChange, handleSubflowIterationsChange, handleSubflowIterationsSave, + handleParallelBatchSizeChange, + handleParallelBatchSizeSave, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, diff --git a/apps/sim/executor/constants.ts b/apps/sim/executor/constants.ts index 4a347873d76..9c797c4b2fd 100644 --- a/apps/sim/executor/constants.ts +++ b/apps/sim/executor/constants.ts @@ -67,6 +67,7 @@ export const EDGE = { LOOP_CONTINUE: 'loop_continue', LOOP_CONTINUE_ALT: 'loop-continue-source', LOOP_EXIT: 'loop_exit', + PARALLEL_CONTINUE: 'parallel_continue', PARALLEL_EXIT: 'parallel_exit', ERROR: 'error', SOURCE: 'source', diff --git a/apps/sim/executor/execution/block-executor.ts b/apps/sim/executor/execution/block-executor.ts index 02b3ab58d22..7ade4862853 100644 --- a/apps/sim/executor/execution/block-executor.ts +++ b/apps/sim/executor/execution/block-executor.ts @@ -127,7 +127,12 @@ export class BlockExecutor { resolvedInputs: fnInputs, displayInputs, contextVariables, - } = this.resolver.resolveInputsForFunctionBlock(ctx, node.id, block.config.params, block) + } = await this.resolver.resolveInputsForFunctionBlock( + ctx, + node.id, + block.config.params, + block + ) resolvedInputs = { ...fnInputs, [FUNCTION_BLOCK_CONTEXT_VARS_KEY]: contextVariables, @@ -137,7 +142,7 @@ export class BlockExecutor { } inputsForLog = displayInputs } else { - resolvedInputs = this.resolver.resolveInputs(ctx, node.id, block.config.params, block) + resolvedInputs = await this.resolver.resolveInputs(ctx, node.id, block.config.params, block) inputsForLog = resolvedInputs } @@ -194,6 +199,7 @@ export class BlockExecutor { normalizedOutput = (await hydrateUserFilesWithBase64(normalizedOutput, { requestId: ctx.metadata.requestId, executionId: ctx.executionId, + userId: ctx.userId, maxBytes: ctx.base64MaxBytes, })) as NormalizedBlockOutput } diff --git a/apps/sim/executor/execution/edge-manager.ts b/apps/sim/executor/execution/edge-manager.ts index 7bedea3a5a2..63a0748c8c6 100644 --- a/apps/sim/executor/execution/edge-manager.ts +++ b/apps/sim/executor/execution/edge-manager.ts @@ -230,6 +230,10 @@ export class EdgeManager { return handle === EDGE.PARALLEL_EXIT } + if (output.selectedRoute === EDGE.PARALLEL_CONTINUE) { + return false + } + if (!handle) { return true } diff --git a/apps/sim/executor/execution/executor.ts b/apps/sim/executor/execution/executor.ts index a141e017fb1..218719ce65a 100644 --- a/apps/sim/executor/execution/executor.ts +++ b/apps/sim/executor/execution/executor.ts @@ -34,6 +34,7 @@ import { extractParallelIdFromSentinel, } from '@/executor/utils/subflow-utils' import { VariableResolver } from '@/executor/variables/resolver' +import { navigatePathAsync } from '@/executor/variables/resolvers/reference-async.server' import type { SerializedWorkflow } from '@/serializer/types' import type { SubflowType } from '@/stores/workflows/workflow/types' @@ -213,7 +214,9 @@ export class DAGExecutor { } private buildExecutionPipeline(context: ExecutionContext, dag: DAG, state: ExecutionState) { - const resolver = new VariableResolver(this.workflow, this.workflowVariables, state) + const resolver = new VariableResolver(this.workflow, this.workflowVariables, state, { + navigatePathAsync, + }) const allHandlers = createBlockHandlers() const blockExecutor = new BlockExecutor(allHandlers, resolver, this.contextExtensions, state) const edgeManager = new EdgeManager(dag) diff --git a/apps/sim/executor/execution/state.ts b/apps/sim/executor/execution/state.ts index f9a664ca309..eefe09338c2 100644 --- a/apps/sim/executor/execution/state.ts +++ b/apps/sim/executor/execution/state.ts @@ -21,6 +21,10 @@ export interface LoopScope { export interface ParallelScope { parallelId: string totalBranches: number + batchSize?: number + currentBatchStart?: number + currentBatchSize?: number + accumulatedOutputs?: Map branchOutputs: Map items?: any[] /** Error message if parallel validation failed (e.g., exceeded max branches) */ diff --git a/apps/sim/executor/orchestrators/loop.ts b/apps/sim/executor/orchestrators/loop.ts index cbf30bfa18b..911427dfd91 100644 --- a/apps/sim/executor/orchestrators/loop.ts +++ b/apps/sim/executor/orchestrators/loop.ts @@ -3,7 +3,8 @@ import { toError } from '@sim/utils/errors' import { generateRequestId } from '@/lib/core/utils/request' import { isExecutionCancelled, isRedisCancellationEnabled } from '@/lib/execution/cancellation' import { executeInIsolatedVM } from '@/lib/execution/isolated-vm' -import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import { compactSubflowResults } from '@/lib/execution/payloads/serializer' +import { isLikelyReferenceSegment } from '@/lib/workflows/sanitization/references' import { buildLoopIndexCondition, DEFAULTS, EDGE, PARALLEL } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { EdgeManager } from '@/executor/execution/edge-manager' @@ -11,7 +12,7 @@ import type { LoopScope } from '@/executor/execution/state' import type { BlockStateController, ContextExtensions } from '@/executor/execution/types' import type { ExecutionContext, NormalizedBlockOutput } from '@/executor/types' import type { LoopConfigWithNodes } from '@/executor/types/loop' -import { replaceValidReferences } from '@/executor/utils/reference-validation' +import { createReferencePattern } from '@/executor/utils/reference-validation' import { addSubflowErrorLog, buildParallelSentinelEndId, @@ -21,7 +22,7 @@ import { emitEmptySubflowEvents, emitSubflowSuccessEvents, extractBaseBlockId, - resolveArrayInput, + resolveArrayInputAsync, validateMaxCount, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' @@ -31,6 +32,23 @@ const logger = createLogger('LoopOrchestrator') const LOOP_CONDITION_TIMEOUT_MS = 5000 +async function replaceLoopConditionReferences( + condition: string, + replacer: (match: string) => Promise +): Promise { + const pattern = createReferencePattern() + let cursor = 0 + let result = '' + for (const match of condition.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += condition.slice(cursor, index) + result += isLikelyReferenceSegment(fullMatch) ? await replacer(fullMatch) : fullMatch + cursor = index + fullMatch.length + } + return result + condition.slice(cursor) +} + export type LoopRoute = typeof EDGE.LOOP_CONTINUE | typeof EDGE.LOOP_EXIT export interface LoopContinuationResult { @@ -135,7 +153,7 @@ export class LoopOrchestrator { } let items: any[] try { - items = resolveArrayInput(ctx, loopConfig.forEachItems, this.resolver) + items = await resolveArrayInputAsync(ctx, loopConfig.forEachItems, this.resolver) } catch (error) { const errorMessage = `ForEach loop resolution failed: ${toError(error).message}` logger.error(errorMessage, { loopId, forEachItems: loopConfig.forEachItems }) @@ -316,16 +334,14 @@ export class LoopOrchestrator { ): Promise { const results = scope.allIterationOutputs const totalIterations = results.length - const output = (await compactExecutionPayload( - { results }, - { - workspaceId: ctx.workspaceId, - workflowId: ctx.workflowId, - executionId: ctx.executionId, - userId: ctx.userId, - requireDurable: true, - } - )) as { results: unknown } + const compactedResults = await compactSubflowResults(results, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + const output = { results: compactedResults } this.state.setBlockOutput(loopId, output, DEFAULTS.EXECUTION_TIME) scope.allIterationOutputs = [] @@ -694,8 +710,8 @@ export class LoopOrchestrator { workflowVariables: ctx.workflowVariables, }) - const evaluatedCondition = replaceValidReferences(condition, (match) => { - const resolved = this.resolver.resolveSingleReference(ctx, '', match, scope) + const evaluatedCondition = await replaceLoopConditionReferences(condition, async (match) => { + const resolved = await this.resolver.resolveSingleReference(ctx, '', match, scope) logger.debug('Resolved variable reference in loop condition', { reference: match, resolvedValue: resolved, diff --git a/apps/sim/executor/orchestrators/node.ts b/apps/sim/executor/orchestrators/node.ts index be10a69751b..4db656c3254 100644 --- a/apps/sim/executor/orchestrators/node.ts +++ b/apps/sim/executor/orchestrators/node.ts @@ -191,6 +191,14 @@ export class NodeExecutionOrchestrator { if (sentinelType === 'end') { const result = await this.parallelOrchestrator.aggregateParallelResults(ctx, parallelId) + if (!result.allBranchesComplete) { + return { + results: [], + sentinelEnd: true, + selectedRoute: EDGE.PARALLEL_CONTINUE, + totalBranches: result.totalBranches, + } + } return { results: result.results || [], sentinelEnd: true, @@ -275,6 +283,14 @@ export class NodeExecutionOrchestrator { this.loopOrchestrator.restoreLoopEdges(loopId) } } + + if ( + node.metadata.isParallelSentinel && + node.metadata.sentinelType === 'end' && + output.selectedRoute === EDGE.PARALLEL_CONTINUE + ) { + this.state.deleteBlockState(node.id) + } } private findParallelIdForNode(nodeId: string): string | undefined { diff --git a/apps/sim/executor/orchestrators/parallel.test.ts b/apps/sim/executor/orchestrators/parallel.test.ts index f0262b92e9e..e0b73cd38b7 100644 --- a/apps/sim/executor/orchestrators/parallel.test.ts +++ b/apps/sim/executor/orchestrators/parallel.test.ts @@ -99,9 +99,8 @@ describe('ParallelOrchestrator', () => { const ctx = createContext() const initializePromise = orchestrator.initializeParallelScope(ctx, 'parallel-1') - await Promise.resolve() + await vi.waitFor(() => expect(onBlockStart).toHaveBeenCalledTimes(1)) - expect(onBlockStart).toHaveBeenCalledTimes(1) expect(onBlockComplete).not.toHaveBeenCalled() releaseStart?.() diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index 741d69d002e..db1dbcc1392 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -1,25 +1,26 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' -import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import { compactSubflowResults } from '@/lib/execution/payloads/serializer' import { DEFAULTS } from '@/executor/constants' import type { DAG } from '@/executor/dag/builder' import type { ParallelScope } from '@/executor/execution/state' import type { BlockStateWriter, ContextExtensions } from '@/executor/execution/types' import type { ExecutionContext, NormalizedBlockOutput } from '@/executor/types' import type { ParallelConfigWithNodes } from '@/executor/types/parallel' -import { ParallelExpander } from '@/executor/utils/parallel-expansion' +import { type ClonedSubflowInfo, ParallelExpander } from '@/executor/utils/parallel-expansion' import { addSubflowErrorLog, emitEmptySubflowEvents, emitSubflowSuccessEvents, extractBranchIndex, - resolveArrayInput, + resolveArrayInputAsync, validateMaxCount, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' import type { SerializedParallel } from '@/serializer/types' const logger = createLogger('ParallelOrchestrator') +const DEFAULT_PARALLEL_BATCH_SIZE = 20 export interface ParallelBranchMetadata { branchIndex: number @@ -65,7 +66,7 @@ export class ParallelOrchestrator { let isEmpty = false try { - const resolved = this.resolveBranchCount(ctx, parallelConfig, parallelId) + const resolved = await this.resolveBranchCount(ctx, parallelConfig, parallelId) branchCount = resolved.branchCount items = resolved.items isEmpty = resolved.isEmpty ?? false @@ -84,7 +85,7 @@ export class ParallelOrchestrator { const branchError = validateMaxCount( branchCount, - DEFAULTS.MAX_PARALLEL_BRANCHES, + DEFAULTS.MAX_FOREACH_ITEMS, 'Parallel branch count' ) if (branchError) { @@ -123,60 +124,26 @@ export class ParallelOrchestrator { return scope } + const batchSize = this.resolveBatchSize(parallelConfig.batchSize) + const currentBatchSize = Math.min(batchSize, branchCount) + const batchItems = items?.slice(0, currentBatchSize) const { entryNodes, clonedSubflows } = this.expander.expandParallel( this.dag, parallelId, - branchCount, - items + currentBatchSize, + batchItems, + { branchIndexOffset: 0, totalBranches: branchCount } ) - // Register cloned subflows in the parent map so iteration context resolves correctly. - // Build a per-branch clone map so nested clones point to the cloned parent, not the original. - if (clonedSubflows.length > 0 && ctx.subflowParentMap) { - const branchCloneMaps = new Map>() - for (const clone of clonedSubflows) { - let map = branchCloneMaps.get(clone.outerBranchIndex) - if (!map) { - map = new Map() - branchCloneMaps.set(clone.outerBranchIndex, map) - } - map.set(clone.originalId, clone.clonedId) - } - - for (const clone of clonedSubflows) { - const originalEntry = ctx.subflowParentMap.get(clone.originalId) - if (originalEntry) { - const cloneMap = branchCloneMaps.get(clone.outerBranchIndex) - const clonedParentId = cloneMap?.get(originalEntry.parentId) - if (clonedParentId) { - // Parent was also cloned — this is the original (branch 0) inside the cloned parent - ctx.subflowParentMap.set(clone.clonedId, { - parentId: clonedParentId, - parentType: originalEntry.parentType, - branchIndex: 0, - }) - } else { - // Parent was not cloned — direct child of the expanding parallel - ctx.subflowParentMap.set(clone.clonedId, { - parentId: parallelId, - parentType: 'parallel', - branchIndex: clone.outerBranchIndex, - }) - } - } else { - // Not in parent map — direct child of the expanding parallel - ctx.subflowParentMap.set(clone.clonedId, { - parentId: parallelId, - parentType: 'parallel', - branchIndex: clone.outerBranchIndex, - }) - } - } - } + this.registerClonedSubflows(ctx, parallelId, clonedSubflows) const scope: ParallelScope = { parallelId, totalBranches: branchCount, + batchSize, + currentBatchStart: 0, + currentBatchSize, + accumulatedOutputs: new Map(), branchOutputs: new Map(), items, } @@ -197,6 +164,8 @@ export class ParallelOrchestrator { logger.info('Parallel scope initialized', { parallelId, branchCount, + batchSize, + currentBatchSize, entryNodeCount: entryNodes.length, newEntryNodes: newEntryNodes.length, }) @@ -204,16 +173,16 @@ export class ParallelOrchestrator { return scope } - private resolveBranchCount( + private async resolveBranchCount( ctx: ExecutionContext, config: SerializedParallel, parallelId: string - ): { branchCount: number; items?: any[]; isEmpty?: boolean } { + ): Promise<{ branchCount: number; items?: any[]; isEmpty?: boolean }> { if (config.parallelType === 'count') { return { branchCount: config.count ?? 1 } } - const items = this.resolveDistributionItems(ctx, config) + const items = await this.resolveDistributionItems(ctx, config) if (items.length === 0) { logger.info('Parallel has empty distribution, skipping parallel body', { parallelId }) return { branchCount: 0, items: [], isEmpty: true } @@ -252,7 +221,10 @@ export class ParallelOrchestrator { ctx.parallelExecutions.set(parallelId, scope) } - private resolveDistributionItems(ctx: ExecutionContext, config: SerializedParallel): any[] { + private async resolveDistributionItems( + ctx: ExecutionContext, + config: SerializedParallel + ): Promise { if ( config.distribution === undefined || config.distribution === null || @@ -262,7 +234,63 @@ export class ParallelOrchestrator { 'Parallel collection distribution is empty. Provide an array or a reference that resolves to a collection.' ) } - return resolveArrayInput(ctx, config.distribution, this.resolver) + return resolveArrayInputAsync(ctx, config.distribution, this.resolver) + } + + private resolveBatchSize(batchSize: unknown): number { + const parsed = + typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(DEFAULTS.MAX_PARALLEL_BRANCHES, parsed)) + } + + private registerClonedSubflows( + ctx: ExecutionContext, + parallelId: string, + clonedSubflows: ClonedSubflowInfo[] + ): void { + if (clonedSubflows.length === 0 || !ctx.subflowParentMap) { + return + } + + const branchCloneMaps = new Map>() + for (const clone of clonedSubflows) { + let map = branchCloneMaps.get(clone.outerBranchIndex) + if (!map) { + map = new Map() + branchCloneMaps.set(clone.outerBranchIndex, map) + } + map.set(clone.originalId, clone.clonedId) + } + + for (const clone of clonedSubflows) { + const originalEntry = ctx.subflowParentMap.get(clone.originalId) + if (originalEntry) { + const cloneMap = branchCloneMaps.get(clone.outerBranchIndex) + const clonedParentId = cloneMap?.get(originalEntry.parentId) + if (clonedParentId) { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: clonedParentId, + parentType: originalEntry.parentType, + branchIndex: 0, + }) + } else { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: parallelId, + parentType: 'parallel', + branchIndex: clone.outerBranchIndex, + }) + } + } else { + ctx.subflowParentMap.set(clone.clonedId, { + parentId: parallelId, + parentType: 'parallel', + branchIndex: clone.outerBranchIndex, + }) + } + } } /** @@ -283,7 +311,8 @@ export class ParallelOrchestrator { return } - const branchIndex = extractBranchIndex(nodeId) + const branchIndex = + this.dag.nodes.get(nodeId)?.metadata.branchIndex ?? extractBranchIndex(nodeId) if (branchIndex === null) { logger.warn('Could not extract branch index from node ID', { nodeId }) return @@ -305,26 +334,43 @@ export class ParallelOrchestrator { return { allBranchesComplete: false } } + const accumulatedOutputs = + scope.accumulatedOutputs ?? new Map() + for (const [branchIndex, outputs] of scope.branchOutputs.entries()) { + accumulatedOutputs.set(branchIndex, outputs) + } + scope.accumulatedOutputs = accumulatedOutputs + scope.branchOutputs = new Map() + + const nextBatchStart = + (scope.currentBatchStart ?? 0) + (scope.currentBatchSize ?? scope.totalBranches) + if (nextBatchStart < scope.totalBranches) { + await this.scheduleNextBatch(ctx, scope, nextBatchStart) + return { + allBranchesComplete: false, + completedBranches: accumulatedOutputs.size, + totalBranches: scope.totalBranches, + } + } + const results: NormalizedBlockOutput[][] = [] for (let i = 0; i < scope.totalBranches; i++) { - const branchOutputs = scope.branchOutputs.get(i) + const branchOutputs = accumulatedOutputs.get(i) if (!branchOutputs) { logger.warn('Missing branch output during parallel aggregation', { parallelId, branch: i }) } results.push(branchOutputs ?? []) } - const output = (await compactExecutionPayload( - { results }, - { - workspaceId: ctx.workspaceId, - workflowId: ctx.workflowId, - executionId: ctx.executionId, - userId: ctx.userId, - requireDurable: true, - } - )) as { results: unknown } + const compactedResults = await compactSubflowResults(results, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + const output = { results: compactedResults } this.state.setBlockOutput(parallelId, output) - scope.branchOutputs = new Map() + scope.accumulatedOutputs = new Map() await emitSubflowSuccessEvents(ctx, parallelId, 'parallel', output, this.contextExtensions) @@ -335,13 +381,61 @@ export class ParallelOrchestrator { totalBranches: scope.totalBranches, } } + + private async scheduleNextBatch( + ctx: ExecutionContext, + scope: ParallelScope, + nextBatchStart: number + ): Promise { + const batchSize = scope.batchSize ?? DEFAULT_PARALLEL_BATCH_SIZE + const remaining = scope.totalBranches - nextBatchStart + const currentBatchSize = Math.min(batchSize, remaining) + const batchItems = scope.items?.slice(nextBatchStart, nextBatchStart + currentBatchSize) + + const { entryNodes, clonedSubflows } = this.expander.expandParallel( + this.dag, + scope.parallelId, + currentBatchSize, + batchItems, + { branchIndexOffset: nextBatchStart, totalBranches: scope.totalBranches } + ) + + this.registerClonedSubflows(ctx, scope.parallelId, clonedSubflows) + this.resetBatchExecutionState(scope.parallelId) + + scope.currentBatchStart = nextBatchStart + scope.currentBatchSize = currentBatchSize + + if (!ctx.pendingDynamicNodes) { + ctx.pendingDynamicNodes = [] + } + ctx.pendingDynamicNodes.push(...entryNodes) + + logger.info('Scheduled next parallel batch', { + parallelId: scope.parallelId, + nextBatchStart, + currentBatchSize, + totalBranches: scope.totalBranches, + }) + } + + private resetBatchExecutionState(parallelId: string): void { + for (const [nodeId, node] of this.dag.nodes.entries()) { + if (node.metadata.parallelId !== parallelId || !node.metadata.isParallelBranch) { + continue + } + this.state.unmarkExecuted(nodeId) + this.state.deleteBlockState(nodeId) + } + } + extractBranchMetadata(nodeId: string): ParallelBranchMetadata | null { const node = this.dag.nodes.get(nodeId) if (!node?.metadata.isParallelBranch) { return null } - const branchIndex = extractBranchIndex(nodeId) + const branchIndex = node.metadata.branchIndex ?? extractBranchIndex(nodeId) if (branchIndex === null) { return null } diff --git a/apps/sim/executor/types.ts b/apps/sim/executor/types.ts index 151a9c96693..2fac2636436 100644 --- a/apps/sim/executor/types.ts +++ b/apps/sim/executor/types.ts @@ -344,6 +344,10 @@ export interface ExecutionContext { { parallelId: string totalBranches: number + batchSize?: number + currentBatchStart?: number + currentBatchSize?: number + accumulatedOutputs?: Map branchOutputs: Map parallelType?: 'count' | 'collection' items?: any[] diff --git a/apps/sim/executor/utils/block-reference.ts b/apps/sim/executor/utils/block-reference.ts index edf909a6d3b..0ffe6956d4f 100644 --- a/apps/sim/executor/utils/block-reference.ts +++ b/apps/sim/executor/utils/block-reference.ts @@ -1,6 +1,10 @@ import { USER_FILE_ACCESSIBLE_PROPERTIES } from '@/lib/workflows/types' import { normalizeName } from '@/executor/constants' -import { navigatePath } from '@/executor/variables/resolvers/reference' +import { + type AsyncPathNavigator, + navigatePath, + type ResolutionContext, +} from '@/executor/variables/resolvers/reference' /** * A single schema node encountered while walking an `OutputSchema`. Captures @@ -238,3 +242,38 @@ export function resolveBlockReference( return { value, blockId } } + +export async function resolveBlockReferenceAsync( + blockName: string, + pathParts: string[], + context: BlockReferenceContext, + resolutionContext: ResolutionContext, + navigatePathAsync: AsyncPathNavigator +): Promise { + const normalizedName = normalizeName(blockName) + const blockId = context.blockNameMapping[normalizedName] + + if (!blockId) { + return undefined + } + + const blockOutput = context.blockData[blockId] + if (blockOutput === undefined) { + return { value: undefined, blockId } + } + + if (pathParts.length === 0) { + return { value: blockOutput, blockId } + } + + const value = await navigatePathAsync(blockOutput, pathParts, resolutionContext) + + const schema = context.blockOutputSchemas?.[blockId] + if (value === undefined && schema) { + if (!isPathInSchema(schema, pathParts)) { + throw new InvalidFieldError(blockName, pathParts.join('.'), getSchemaFieldNames(schema)) + } + } + + return { value, blockId } +} diff --git a/apps/sim/executor/utils/parallel-expansion.ts b/apps/sim/executor/utils/parallel-expansion.ts index 6d59af91c8d..9bf9e518c31 100644 --- a/apps/sim/executor/utils/parallel-expansion.ts +++ b/apps/sim/executor/utils/parallel-expansion.ts @@ -36,7 +36,8 @@ export class ParallelExpander { dag: DAG, parallelId: string, branchCount: number, - distributionItems?: any[] + distributionItems?: any[], + options: { branchIndexOffset?: number; totalBranches?: number } = {} ): ExpansionResult { const config = dag.parallelConfigs.get(parallelId) if (!config) { @@ -64,6 +65,8 @@ export class ParallelExpander { const regularSet = new Set(regularBlocks) const allBranchNodes: string[] = [] + const branchIndexOffset = options.branchIndexOffset ?? 0 + const branchTotal = options.totalBranches ?? branchCount for (const blockId of regularBlocks) { const templateId = buildBranchNodeId(blockId, 0) @@ -76,10 +79,16 @@ export class ParallelExpander { for (let i = 0; i < branchCount; i++) { const branchNodeId = buildBranchNodeId(blockId, i) + const globalBranchIndex = branchIndexOffset + i allBranchNodes.push(branchNodeId) if (i === 0) { - this.updateBranchMetadata(templateNode, i, branchCount, distributionItems?.[i]) + this.updateBranchMetadata( + templateNode, + globalBranchIndex, + branchTotal, + distributionItems?.[i] + ) continue } @@ -87,7 +96,8 @@ export class ParallelExpander { templateNode, blockId, i, - branchCount, + globalBranchIndex, + branchTotal, distributionItems?.[i] ) dag.nodes.set(branchNodeId, branchNode) @@ -120,6 +130,7 @@ export class ParallelExpander { // Branches 1..N clone the entire subflow graph (recursively for deep nesting) for (let i = 1; i < branchCount; i++) { + const globalBranchIndex = branchIndexOffset + i const cloned = this.cloneNestedSubflow(dag, subflowId, i, clonedSubflows) entryNodes.push(cloned.startId) @@ -127,7 +138,7 @@ export class ParallelExpander { clonedSubflows.push({ clonedId: cloned.clonedId, originalId: subflowId, - outerBranchIndex: i, + outerBranchIndex: globalBranchIndex, }) } } @@ -161,11 +172,12 @@ export class ParallelExpander { private cloneTemplateNode( template: DAGNode, originalBlockId: string, + localBranchIndex: number, branchIndex: number, branchTotal: number, distributionItem?: any ): DAGNode { - const branchNodeId = buildBranchNodeId(originalBlockId, branchIndex) + const branchNodeId = buildBranchNodeId(originalBlockId, localBranchIndex) const blockClone: SerializedBlock = { ...template.block, id: branchNodeId, diff --git a/apps/sim/executor/utils/subflow-utils.ts b/apps/sim/executor/utils/subflow-utils.ts index 282c75ea74c..f03a6a5c5cd 100644 --- a/apps/sim/executor/utils/subflow-utils.ts +++ b/apps/sim/executor/utils/subflow-utils.ts @@ -249,7 +249,82 @@ export function resolveArrayInput( if (resolver) { try { - const resolved = resolver.resolveInputs(ctx, 'subflow_items', { items }).items + const resolved = (resolver.resolveInputs(ctx, 'subflow_items', { items }) as any).items + if (Array.isArray(resolved)) { + return resolved + } + throw new Error(`Resolved items is not an array`) + } catch (error) { + if (error instanceof Error && error.message.startsWith('Resolved items')) { + throw error + } + throw new Error(`Failed to resolve items: ${toError(error).message}`) + } + } + + return [] +} + +/** + * Async variant used by execution paths that may need durable large-value or + * explicit UserFile.base64 materialization while resolving collection inputs. + */ +export async function resolveArrayInputAsync( + ctx: ExecutionContext, + items: any, + resolver: VariableResolver | null +): Promise { + if (Array.isArray(items)) { + return items + } + + if (typeof items === 'object' && items !== null) { + return Object.entries(items) + } + + if (typeof items === 'string') { + if (items.startsWith(REFERENCE.START) && items.endsWith(REFERENCE.END) && resolver) { + try { + const resolved = await resolver.resolveSingleReference(ctx, '', items) + if (Array.isArray(resolved)) { + return resolved + } + if (typeof resolved === 'object' && resolved !== null) { + return Object.entries(resolved) + } + if (resolved === null) { + return [] + } + throw new Error(`Reference "${items}" did not resolve to an array or object`) + } catch (error) { + if (error instanceof Error && error.message.startsWith('Reference "')) { + throw error + } + throw new Error(`Failed to resolve reference "${items}": ${toError(error).message}`) + } + } + + try { + const normalized = items.replace(/'/g, '"') + const parsed = JSON.parse(normalized) + if (Array.isArray(parsed)) { + return parsed + } + if (typeof parsed === 'object' && parsed !== null) { + return Object.entries(parsed) + } + throw new Error(`Parsed value is not an array or object`) + } catch (error) { + if (error instanceof Error && error.message.startsWith('Parsed value')) { + throw error + } + throw new Error(`Failed to parse items as JSON: "${items}"`) + } + } + + if (resolver) { + try { + const resolved = (await resolver.resolveInputs(ctx, 'subflow_items', { items })).items if (Array.isArray(resolved)) { return resolved } diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index 8ac37e6cf25..f938165c01e 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -61,18 +61,18 @@ function createResolver(language = 'javascript') { } describe('VariableResolver function block inputs', () => { - it('returns empty inputs when params are missing', () => { + it('returns empty inputs when params are missing', async () => { const { block, ctx, resolver } = createResolver() - const result = resolver.resolveInputsForFunctionBlock(ctx, 'function', undefined, block) + const result = await resolver.resolveInputsForFunctionBlock(ctx, 'function', undefined, block) expect(result).toEqual({ resolvedInputs: {}, displayInputs: {}, contextVariables: {} }) }) - it('resolves JavaScript block references through globalThis context variables', () => { + it('resolves JavaScript block references through globalThis context variables', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return ' }, @@ -84,7 +84,7 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('resolves named loop result bracket paths in function code', () => { + it('resolves named loop result bracket paths in function code', async () => { const loopBlock = createBlock('loop-1', 'Loop 1', 'loop') const functionBlock = createBlock('function', 'Function', BlockType.FUNCTION, { language: 'javascript', @@ -114,7 +114,7 @@ describe('VariableResolver function block inputs', () => { } as ExecutionContext const resolver = new VariableResolver(workflow, {}, state) - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return ' }, @@ -126,10 +126,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'b' }) }) - it('resolves Python block references through globals lookup', () => { + it('resolves Python block references through globals lookup', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return ' }, @@ -141,10 +141,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks JavaScript string literals around quoted block references', () => { + it('breaks JavaScript string literals around quoted block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "const rawEmail = '';\nreturn rawEmail" }, @@ -158,10 +158,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('uses template interpolation for JavaScript template literal block references', () => { + it('uses template interpolation for JavaScript template literal block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return `value: `' }, @@ -175,10 +175,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('keeps JavaScript block references inside template expressions executable', () => { + it('keeps JavaScript block references inside template expressions executable', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'return `${String()}`' }, @@ -190,10 +190,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores JavaScript comment quotes before later block references', () => { + it('ignores JavaScript comment quotes before later block references', async () => { const { block, ctx, resolver } = createResolver('javascript') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "// don't confuse quote tracking\nreturn " }, @@ -207,10 +207,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python string literals around quoted block references', () => { + it('breaks Python string literals around quoted block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "raw_email = ''\nreturn raw_email" }, @@ -224,10 +224,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python triple-double-quoted strings around block references', () => { + it('breaks Python triple-double-quoted strings around block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'prompt = """\nSummary: \n"""\nreturn prompt' }, @@ -243,10 +243,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores escaped triple-double quotes before later Python block references', () => { + it('ignores escaped triple-double quotes before later Python block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'prompt = """Escaped delimiter: \\"\\"\\"\nSummary: \n"""' }, @@ -262,10 +262,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('breaks Python triple-single-quoted strings around block references', () => { + it('breaks Python triple-single-quoted strings around block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "prompt = '''\nSummary: \n'''\nreturn prompt" }, @@ -281,10 +281,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('ignores Python comment quotes before later block references', () => { + it('ignores Python comment quotes before later block references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "# don't confuse quote tracking\nreturn " }, @@ -298,10 +298,10 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'hello world' }) }) - it('uses separate Python context variables for repeated mutable references', () => { + it('uses separate Python context variables for repeated mutable references', async () => { const { block, ctx, resolver } = createResolver('python') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'a = \nb = \nreturn b' }, @@ -320,10 +320,10 @@ describe('VariableResolver function block inputs', () => { }) }) - it('uses shell-safe expansions for block references', () => { + it('uses shell-safe expansions for block references', async () => { const { block, ctx, resolver } = createResolver('shell') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: 'echo suffix && echo ""' }, @@ -340,10 +340,10 @@ describe('VariableResolver function block inputs', () => { }) }) - it('ignores shell comment quotes when formatting later block references', () => { + it('ignores shell comment quotes when formatting later block references', async () => { const { block, ctx, resolver } = createResolver('shell') - const result = resolver.resolveInputsForFunctionBlock( + const result = await resolver.resolveInputsForFunctionBlock( ctx, 'function', { code: "# don't confuse quote tracking\necho " }, diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index c0ab54d23d9..e73a400d661 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -1,14 +1,16 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' +import { isLikelyReferenceSegment } from '@/lib/workflows/sanitization/references' import { BlockType } from '@/executor/constants' import type { ExecutionState, LoopScope } from '@/executor/execution/state' import type { ExecutionContext } from '@/executor/types' -import { createEnvVarPattern, replaceValidReferences } from '@/executor/utils/reference-validation' +import { createEnvVarPattern, createReferencePattern } from '@/executor/utils/reference-validation' import { BlockResolver } from '@/executor/variables/resolvers/block' import { EnvResolver } from '@/executor/variables/resolvers/env' import { LoopResolver } from '@/executor/variables/resolvers/loop' import { ParallelResolver } from '@/executor/variables/resolvers/parallel' import { + type AsyncPathNavigator, RESOLVED_EMPTY, type ResolutionContext, type Resolver, @@ -23,6 +25,42 @@ export const FUNCTION_BLOCK_DISPLAY_CODE_KEY = '_runtimeDisplayCode' const logger = createLogger('VariableResolver') +async function replaceValidReferencesAsync( + template: string, + replacer: (match: string, index: number, template: string) => Promise +): Promise { + const pattern = createReferencePattern() + let cursor = 0 + let result = '' + for (const match of template.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += template.slice(cursor, index) + result += isLikelyReferenceSegment(fullMatch) + ? await replacer(fullMatch, index, template) + : fullMatch + cursor = index + fullMatch.length + } + return result + template.slice(cursor) +} + +async function replaceEnvVarsAsync( + template: string, + replacer: (match: string) => Promise +): Promise { + const pattern = createEnvVarPattern() + let cursor = 0 + let result = '' + for (const match of template.matchAll(pattern)) { + const fullMatch = match[0] + const index = match.index ?? 0 + result += template.slice(cursor, index) + result += await replacer(fullMatch) + cursor = index + fullMatch.length + } + return result + template.slice(cursor) +} + type ShellQuoteContext = 'single' | 'double' | null type CodeStringQuoteContext = ShellQuoteContext | 'triple-single' | 'triple-double' | 'template' type CodeScanMode = @@ -43,12 +81,13 @@ export class VariableResolver { constructor( workflow: SerializedWorkflow, workflowVariables: Record, - private state: ExecutionState + private state: ExecutionState, + options: { navigatePathAsync?: AsyncPathNavigator } = {} ) { - this.blockResolver = new BlockResolver(workflow) + this.blockResolver = new BlockResolver(workflow, options.navigatePathAsync) this.resolvers = [ - new LoopResolver(workflow), - new ParallelResolver(workflow), + new LoopResolver(workflow, options.navigatePathAsync), + new ParallelResolver(workflow, options.navigatePathAsync), new WorkflowResolver(workflowVariables), new EnvResolver(), this.blockResolver, @@ -64,16 +103,16 @@ export class VariableResolver { * should inject contextVariables into the function execution request body so the * isolated VM can access them as global variables. */ - resolveInputsForFunctionBlock( + async resolveInputsForFunctionBlock( ctx: ExecutionContext, currentNodeId: string, params: Record | null | undefined, block: SerializedBlock - ): { + ): Promise<{ resolvedInputs: Record displayInputs: Record contextVariables: Record - } { + }> { const contextVariables: Record = {} const resolved: Record = {} const display: Record = {} @@ -85,7 +124,7 @@ export class VariableResolver { for (const [key, value] of Object.entries(params)) { if (key === 'code') { if (typeof value === 'string') { - const code = this.resolveCodeWithContextVars( + const code = await this.resolveCodeWithContextVars( ctx, currentNodeId, value, @@ -100,7 +139,7 @@ export class VariableResolver { const displayItems: any[] = [] for (const item of value) { if (item && typeof item === 'object' && typeof item.content === 'string') { - const code = this.resolveCodeWithContextVars( + const code = await this.resolveCodeWithContextVars( ctx, currentNodeId, item.content, @@ -124,11 +163,11 @@ export class VariableResolver { resolved[key] = resolvedItems display[key] = displayItems } else { - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) display[key] = resolved[key] } } else { - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) display[key] = resolved[key] } } @@ -136,12 +175,12 @@ export class VariableResolver { return { resolvedInputs: resolved, displayInputs: display, contextVariables } } - resolveInputs( + async resolveInputs( ctx: ExecutionContext, currentNodeId: string, params: Record, block?: SerializedBlock - ): Record { + ): Promise> { if (!params) { return {} } @@ -152,15 +191,21 @@ export class VariableResolver { try { const parsed = JSON.parse(params.conditions) if (Array.isArray(parsed)) { - resolved.conditions = parsed.map((cond: any) => ({ - ...cond, - value: - typeof cond.value === 'string' - ? this.resolveTemplateWithoutConditionFormatting(ctx, currentNodeId, cond.value) - : cond.value, - })) + resolved.conditions = await Promise.all( + parsed.map(async (cond: any) => ({ + ...cond, + value: + typeof cond.value === 'string' + ? await this.resolveTemplateWithoutConditionFormatting( + ctx, + currentNodeId, + cond.value + ) + : cond.value, + })) + ) } else { - resolved.conditions = this.resolveValue( + resolved.conditions = await this.resolveValue( ctx, currentNodeId, params.conditions, @@ -173,7 +218,7 @@ export class VariableResolver { error: parseError, conditions: params.conditions, }) - resolved.conditions = this.resolveValue( + resolved.conditions = await this.resolveValue( ctx, currentNodeId, params.conditions, @@ -187,17 +232,17 @@ export class VariableResolver { if (isConditionBlock && key === 'conditions') { continue } - resolved[key] = this.resolveValue(ctx, currentNodeId, value, undefined, block) + resolved[key] = await this.resolveValue(ctx, currentNodeId, value, undefined, block) } return resolved } - resolveSingleReference( + async resolveSingleReference( ctx: ExecutionContext, currentNodeId: string, reference: string, loopScope?: LoopScope - ): any { + ): Promise { if (typeof reference === 'string') { const trimmed = reference.trim() if (/^<[^<>]+>$/.test(trimmed)) { @@ -208,7 +253,7 @@ export class VariableResolver { loopScope, } - const result = this.resolveReference(trimmed, resolutionContext) + const result = await this.resolveReference(trimmed, resolutionContext) if (result === RESOLVED_EMPTY) { return null } @@ -219,29 +264,31 @@ export class VariableResolver { return this.resolveValue(ctx, currentNodeId, reference, loopScope) } - private resolveValue( + private async resolveValue( ctx: ExecutionContext, currentNodeId: string, value: any, loopScope?: LoopScope, block?: SerializedBlock - ): any { + ): Promise { if (value === null || value === undefined) { return value } if (Array.isArray(value)) { - return value.map((v) => this.resolveValue(ctx, currentNodeId, v, loopScope, block)) + return Promise.all( + value.map((v) => this.resolveValue(ctx, currentNodeId, v, loopScope, block)) + ) } if (typeof value === 'object') { - return Object.entries(value).reduce( - (acc, [key, val]) => ({ - ...acc, - [key]: this.resolveValue(ctx, currentNodeId, val, loopScope, block), - }), - {} + const entries = await Promise.all( + Object.entries(value).map(async ([key, val]) => [ + key, + await this.resolveValue(ctx, currentNodeId, val, loopScope, block), + ]) ) + return Object.fromEntries(entries) } if (typeof value === 'string') { @@ -256,14 +303,14 @@ export class VariableResolver { * items, workflow variables, env vars) are still inlined as literals so they remain * available without any extra passing mechanism. */ - private resolveCodeWithContextVars( + private async resolveCodeWithContextVars( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope: LoopScope | undefined, block: SerializedBlock, contextVarAccumulator: Record - ): { resolvedCode: string; displayCode: string } { + ): Promise<{ resolvedCode: string; displayCode: string }> { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, @@ -279,14 +326,14 @@ export class VariableResolver { let displayResult = '' let displayCursor = 0 - let result = replaceValidReferences(template, (match, index) => { + let result = await replaceValidReferencesAsync(template, async (match, index) => { if (replacementError) return match displayResult += template.slice(displayCursor, index) displayCursor = index + match.length try { if (this.blockResolver.canResolve(match)) { - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match return match @@ -314,7 +361,7 @@ export class VariableResolver { return replacement } - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match return match @@ -342,12 +389,12 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) - displayResult = displayResult.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + displayResult = await replaceEnvVarsAsync(displayResult, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) @@ -669,13 +716,13 @@ export class VariableResolver { return previous === undefined || /\s|[;&|()<>]/.test(previous) } - private resolveTemplate( + private async resolveTemplate( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope?: LoopScope, block?: SerializedBlock - ): string { + ): Promise { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, @@ -693,11 +740,11 @@ export class VariableResolver { | undefined) : undefined - let result = replaceValidReferences(template, (match) => { + let result = await replaceValidReferencesAsync(template, async (match) => { if (replacementError) return match try { - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { return match } @@ -720,19 +767,19 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) return result } - private resolveTemplateWithoutConditionFormatting( + private async resolveTemplateWithoutConditionFormatting( ctx: ExecutionContext, currentNodeId: string, template: string, loopScope?: LoopScope - ): string { + ): Promise { const resolutionContext: ResolutionContext = { executionContext: ctx, executionState: this.state, @@ -742,11 +789,11 @@ export class VariableResolver { let replacementError: Error | null = null - let result = replaceValidReferences(template, (match) => { + let result = await replaceValidReferencesAsync(template, async (match) => { if (replacementError) return match try { - const resolved = this.resolveReference(match, resolutionContext) + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { return match } @@ -779,17 +826,19 @@ export class VariableResolver { throw replacementError } - result = result.replace(createEnvVarPattern(), (match) => { - const resolved = this.resolveReference(match, resolutionContext) + result = await replaceEnvVarsAsync(result, async (match) => { + const resolved = await this.resolveReference(match, resolutionContext) return typeof resolved === 'string' ? resolved : match }) return result } - private resolveReference(reference: string, context: ResolutionContext): any { + private async resolveReference(reference: string, context: ResolutionContext): Promise { for (const resolver of this.resolvers) { if (resolver.canResolve(reference)) { - const result = resolver.resolve(reference, context) + const result = resolver.resolveAsync + ? await resolver.resolveAsync(reference, context) + : resolver.resolve(reference, context) return result } } diff --git a/apps/sim/executor/variables/resolvers/block.ts b/apps/sim/executor/variables/resolvers/block.ts index b1fcb0bbf7f..c17810a4f55 100644 --- a/apps/sim/executor/variables/resolvers/block.ts +++ b/apps/sim/executor/variables/resolvers/block.ts @@ -10,9 +10,11 @@ import { InvalidFieldError, type OutputSchema, resolveBlockReference, + resolveBlockReferenceAsync, } from '@/executor/utils/block-reference' import { formatLiteralForCode } from '@/executor/utils/code-formatting' import { + type AsyncPathNavigator, navigatePath, RESOLVED_EMPTY, type ResolutionContext, @@ -24,7 +26,10 @@ export class BlockResolver implements Resolver { private nameToBlockId: Map private blockById: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.nameToBlockId = new Map() this.blockById = new Map() for (const block of workflow.blocks) { @@ -87,7 +92,7 @@ export class BlockResolver implements Resolver { return result.value } - const backwardsCompat = this.handleBackwardsCompat(block, output, pathParts) + const backwardsCompat = this.handleBackwardsCompatSync(block, output, pathParts) if (backwardsCompat !== undefined) { return backwardsCompat } @@ -95,7 +100,7 @@ export class BlockResolver implements Resolver { return RESOLVED_EMPTY } catch (error) { if (error instanceof InvalidFieldError) { - const fallback = this.handleBackwardsCompat(block, output, pathParts) + const fallback = this.handleBackwardsCompatSync(block, output, pathParts) if (fallback !== undefined) { return fallback } @@ -104,7 +109,74 @@ export class BlockResolver implements Resolver { } } - private handleBackwardsCompat( + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + const parts = parseReferencePath(reference) + if (parts.length === 0) { + return undefined + } + const [blockName, ...pathParts] = parts + + const blockId = this.findBlockIdByName(blockName) + if (!blockId) { + return undefined + } + + const block = this.blockById.get(blockId)! + const output = this.getBlockOutput(blockId, context) + + const blockData: Record = {} + const blockOutputSchemas: Record = {} + + if (output !== undefined) { + blockData[blockId] = output + } + + const outputSchema = getBlockSchema(block) + + if (outputSchema && Object.keys(outputSchema).length > 0) { + blockOutputSchemas[blockId] = outputSchema + } + + try { + const blockReferenceContext = { + blockNameMapping: Object.fromEntries(this.nameToBlockId), + blockData, + blockOutputSchemas, + } + const result = (await resolveBlockReferenceAsync( + blockName, + pathParts, + blockReferenceContext, + context, + this.navigatePathAsync + ))! + + if (result.value !== undefined) { + assertNoLargeValueRefs(result.value) + return result.value + } + + const backwardsCompat = await this.handleBackwardsCompat(block, output, pathParts, context) + if (backwardsCompat !== undefined) { + return backwardsCompat + } + + return RESOLVED_EMPTY + } catch (error) { + if (error instanceof InvalidFieldError) { + const fallback = await this.handleBackwardsCompat(block, output, pathParts, context) + if (fallback !== undefined) { + return fallback + } + } + throw error + } + } + + private handleBackwardsCompatSync( block: SerializedBlock, output: unknown, pathParts: string[] @@ -128,6 +200,56 @@ export class BlockResolver implements Resolver { } } + const outputRecord = output as Record | undefined + if ( + (block.metadata?.id === 'workflow' || block.metadata?.id === 'workflow_input') && + pathParts[0] === 'result' && + pathParts[1] === 'response' && + outputRecord?.result !== undefined && + typeof outputRecord.result === 'object' && + outputRecord.result !== null && + (outputRecord.result as Record)?.response === undefined + ) { + const adjustedPathParts = ['result', ...pathParts.slice(2)] + const fallbackResult = navigatePath(output, adjustedPathParts) + if (fallbackResult !== undefined) { + return fallbackResult + } + } + + return undefined + } + + private async handleBackwardsCompat( + block: SerializedBlock, + output: unknown, + pathParts: string[], + context: ResolutionContext + ): Promise { + const navigatePathAsync = this.navigatePathAsync + if (!navigatePathAsync) { + return this.handleBackwardsCompatSync(block, output, pathParts) + } + + if (output === undefined || pathParts.length === 0) { + return undefined + } + + if ( + block.metadata?.id === 'response' && + pathParts[0] === 'response' && + (output as Record)?.response === undefined + ) { + const adjustedPathParts = pathParts.slice(1) + if (adjustedPathParts.length === 0) { + return output + } + const fallbackResult = await navigatePathAsync(output, adjustedPathParts, context) + if (fallbackResult !== undefined) { + return fallbackResult + } + } + const isWorkflowBlock = block.metadata?.id === 'workflow' || block.metadata?.id === 'workflow_input' const outputRecord = output as Record | undefined> @@ -138,7 +260,7 @@ export class BlockResolver implements Resolver { outputRecord?.result?.response === undefined ) { const adjustedPathParts = ['result', ...pathParts.slice(2)] - const fallbackResult = navigatePath(output, adjustedPathParts) + const fallbackResult = await navigatePathAsync(output, adjustedPathParts, context) if (fallbackResult !== undefined) { return fallbackResult } diff --git a/apps/sim/executor/variables/resolvers/loop.ts b/apps/sim/executor/variables/resolvers/loop.ts index db38e05abbe..64e977b7536 100644 --- a/apps/sim/executor/variables/resolvers/loop.ts +++ b/apps/sim/executor/variables/resolvers/loop.ts @@ -8,6 +8,7 @@ import { stripOuterBranchSuffix, } from '@/executor/utils/subflow-utils' import { + type AsyncPathNavigator, navigatePath, type ResolutionContext, type Resolver, @@ -23,7 +24,10 @@ const FOR_EACH_LOOP_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as const export class LoopResolver implements Resolver { private loopNameToId: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.loopNameToId = new Map() for (const block of workflow.blocks) { if (workflow.loops[block.id] && block.metadata?.name) { @@ -48,6 +52,27 @@ export class LoopResolver implements Resolver { } resolve(reference: string, context: ResolutionContext): any { + return this.resolveInternal(reference, context, false) + } + + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + return this.resolveInternal(reference, context, true) + } + + private async resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: true + ): Promise + private resolveInternal(reference: string, context: ResolutionContext, useAsyncPath: false): any + private resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: boolean + ): any | Promise { const parts = parseReferencePath(reference) if (parts.length === 0) { logger.warn('Invalid loop reference', { reference }) @@ -87,7 +112,9 @@ export class LoopResolver implements Resolver { if (!targetLoopId) { return undefined } - return this.resolveOutput(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) + return useAsyncPath + ? this.resolveOutputAsync(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) + : this.resolveOutput(targetLoopId, [...bracketPathParts, ...rest.slice(1)], context) } const isContextual = @@ -151,7 +178,9 @@ export class LoopResolver implements Resolver { } if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return useAsyncPath && this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts) } return value @@ -170,6 +199,25 @@ export class LoopResolver implements Resolver { return value } + private async resolveOutputAsync( + loopId: string, + pathParts: string[], + context: ResolutionContext + ): Promise { + const output = context.executionState.getBlockOutput(loopId) + if (!output || typeof output !== 'object') { + return undefined + } + const value = (output as Record).results + if (pathParts.length > 0) { + return this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts) + } + assertNoLargeValueRefs(value) + return value + } + private findInnermostLoopForBlock(blockId: string): string | undefined { const baseId = stripCloneSuffixes(blockId) const loops = this.workflow.loops || {} diff --git a/apps/sim/executor/variables/resolvers/parallel.ts b/apps/sim/executor/variables/resolvers/parallel.ts index 712d72b244f..b0414f16da1 100644 --- a/apps/sim/executor/variables/resolvers/parallel.ts +++ b/apps/sim/executor/variables/resolvers/parallel.ts @@ -9,6 +9,7 @@ import { stripOuterBranchSuffix, } from '@/executor/utils/subflow-utils' import { + type AsyncPathNavigator, navigatePath, type ResolutionContext, type Resolver, @@ -24,7 +25,10 @@ const COLLECTION_PARALLEL_CONTEXT_FIELDS = ['index', 'currentItem', 'items'] as export class ParallelResolver implements Resolver { private parallelNameToId: Map - constructor(private workflow: SerializedWorkflow) { + constructor( + private workflow: SerializedWorkflow, + private navigatePathAsync?: AsyncPathNavigator + ) { this.parallelNameToId = new Map() for (const block of workflow.blocks) { if (workflow.parallels?.[block.id] && block.metadata?.name) { @@ -49,6 +53,27 @@ export class ParallelResolver implements Resolver { } resolve(reference: string, context: ResolutionContext): any { + return this.resolveInternal(reference, context, false) + } + + async resolveAsync(reference: string, context: ResolutionContext): Promise { + if (!this.navigatePathAsync) { + return this.resolve(reference, context) + } + return this.resolveInternal(reference, context, true) + } + + private async resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: true + ): Promise + private resolveInternal(reference: string, context: ResolutionContext, useAsyncPath: false): any + private resolveInternal( + reference: string, + context: ResolutionContext, + useAsyncPath: boolean + ): any | Promise { const parts = parseReferencePath(reference) if (parts.length === 0) { logger.warn('Invalid parallel reference', { reference }) @@ -82,11 +107,13 @@ export class ParallelResolver implements Resolver { if (rest.length > 0) { const { property, pathParts: bracketPathParts } = splitLeadingBracketPath(rest[0]) if (ParallelResolver.OUTPUT_PROPERTIES.has(property)) { - return this.resolveOutput( - targetParallelId, - [...bracketPathParts, ...rest.slice(1)], - context - ) + return useAsyncPath + ? this.resolveOutputAsync( + targetParallelId, + [...bracketPathParts, ...rest.slice(1)], + context + ) + : this.resolveOutput(targetParallelId, [...bracketPathParts, ...rest.slice(1)], context) } } @@ -147,7 +174,9 @@ export class ParallelResolver implements Resolver { } if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return useAsyncPath && this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts) } return value @@ -245,6 +274,25 @@ export class ParallelResolver implements Resolver { return value } + private async resolveOutputAsync( + parallelId: string, + pathParts: string[], + context: ResolutionContext + ): Promise { + const output = context.executionState.getBlockOutput(parallelId) + if (!output || typeof output !== 'object') { + return undefined + } + const value = (output as Record).results + if (pathParts.length > 0) { + return this.navigatePathAsync + ? this.navigatePathAsync(value, pathParts, context) + : navigatePath(value, pathParts) + } + assertNoLargeValueRefs(value) + return value + } + private getDistributionItems(parallelConfig: SerializedParallel): unknown[] { const rawItems = parallelConfig.distribution ?? [] diff --git a/apps/sim/executor/variables/resolvers/reference-async.server.ts b/apps/sim/executor/variables/resolvers/reference-async.server.ts new file mode 100644 index 00000000000..c481c42e119 --- /dev/null +++ b/apps/sim/executor/variables/resolvers/reference-async.server.ts @@ -0,0 +1,101 @@ +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' +import { assertNoLargeValueRefs, isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { materializeLargeValueRef } from '@/lib/execution/payloads/store' +import { hydrateUserFileWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' +import type { ResolutionContext } from '@/executor/variables/resolvers/reference' + +async function materializeLargeValueRefOrThrow(value: unknown): Promise { + if (!isLargeValueRef(value)) { + return value + } + const materialized = await materializeLargeValueRef(value) + if (materialized === undefined) { + return materializeLargeValueRefSyncOrThrow(value) + } + return materialized +} + +async function hydrateExplicitBase64( + file: unknown, + context: ResolutionContext +): Promise { + if (!isUserFileWithMetadata(file)) { + return undefined + } + const hydrated = await hydrateUserFileWithBase64(file, { + requestId: context.executionContext.metadata.requestId, + executionId: context.executionContext.executionId, + userId: context.executionContext.userId, + maxBytes: context.executionContext.base64MaxBytes, + }) + if (!hydrated.base64) { + throw new Error( + `Base64 content for ${file.name} is unavailable or exceeds the configured inline limit.` + ) + } + return hydrated.base64 +} + +/** + * Server-side path navigation used during execution. It can hydrate persisted + * large values and UserFile.base64 only when the requested path explicitly asks + * for base64. + */ +export async function navigatePathAsync( + obj: any, + path: string[], + context: ResolutionContext +): Promise { + let current = obj + for (const part of path) { + current = await materializeLargeValueRefOrThrow(current) + + if (current === null || current === undefined) { + return undefined + } + + if (part === 'base64') { + const base64 = await hydrateExplicitBase64(current, context) + if (base64 !== undefined) { + current = base64 + continue + } + } + + const arrayMatch = part.match(/^([^[]+)(\[.+)$/) + if (arrayMatch) { + const [, prop, bracketsPart] = arrayMatch + current = + typeof current === 'object' && current !== null + ? (current as Record)[prop] + : undefined + current = await materializeLargeValueRefOrThrow(current) + if (current === undefined || current === null) { + return undefined + } + + const indices = bracketsPart.match(/\[(\d+)\]/g) + if (indices) { + for (const indexMatch of indices) { + current = await materializeLargeValueRefOrThrow(current) + if (current === null || current === undefined) { + return undefined + } + const idx = Number.parseInt(indexMatch.slice(1, -1), 10) + current = Array.isArray(current) ? current[idx] : undefined + } + } + } else if (/^\d+$/.test(part)) { + const index = Number.parseInt(part, 10) + current = Array.isArray(current) ? current[index] : undefined + } else { + current = + typeof current === 'object' && current !== null + ? (current as Record)[part] + : undefined + } + } + assertNoLargeValueRefs(current) + return current +} diff --git a/apps/sim/executor/variables/resolvers/reference.ts b/apps/sim/executor/variables/resolvers/reference.ts index 389ecaaea83..7524b8f86a1 100644 --- a/apps/sim/executor/variables/resolvers/reference.ts +++ b/apps/sim/executor/variables/resolvers/reference.ts @@ -12,8 +12,15 @@ export interface ResolutionContext { export interface Resolver { canResolve(reference: string): boolean resolve(reference: string, context: ResolutionContext): any + resolveAsync?(reference: string, context: ResolutionContext): Promise } +export type AsyncPathNavigator = ( + obj: any, + path: string[], + context: ResolutionContext +) => Promise + /** * Sentinel value indicating a reference was resolved to a known block * that produced no output (e.g., the block exists in the workflow but diff --git a/apps/sim/hooks/use-collaborative-workflow.ts b/apps/sim/hooks/use-collaborative-workflow.ts index 10585e1f8a9..80b4c53c71c 100644 --- a/apps/sim/hooks/use-collaborative-workflow.ts +++ b/apps/sim/hooks/use-collaborative-workflow.ts @@ -337,6 +337,9 @@ export function useCollaborativeWorkflow() { if (config.count !== undefined) { useWorkflowStore.getState().updateParallelCount(payload.id, config.count) } + if (config.batchSize !== undefined) { + useWorkflowStore.getState().updateParallelBatchSize(payload.id, config.batchSize) + } if (config.distribution !== undefined) { useWorkflowStore .getState() @@ -1728,6 +1731,7 @@ export function useCollaborativeWorkflow() { let newCount = currentBlock.data?.count || 5 let newDistribution = currentBlock.data?.collection || '' + const batchSize = currentBlock.data?.batchSize || 20 if (parallelType === 'count') { newDistribution = '' @@ -1742,6 +1746,7 @@ export function useCollaborativeWorkflow() { count: newCount, distribution: newDistribution, parallelType, + batchSize, } executeQueuedOperation( @@ -1752,6 +1757,7 @@ export function useCollaborativeWorkflow() { useWorkflowStore.getState().updateParallelType(parallelId, parallelType) useWorkflowStore.getState().updateParallelCount(parallelId, newCount) useWorkflowStore.getState().updateParallelCollection(parallelId, newDistribution) + useWorkflowStore.getState().updateParallelBatchSize(parallelId, batchSize) } ) }, @@ -1789,13 +1795,15 @@ export function useCollaborativeWorkflow() { } else { const currentDistribution = currentBlock.data?.collection || '' const currentParallelType = currentBlock.data?.parallelType || 'count' + const batchSize = currentBlock.data?.batchSize || 20 const config = { id: nodeId, nodes: childNodes, - count: Math.max(1, Math.min(20, count)), // Clamp between 1-20 for parallels + count: Math.max(1, Math.min(1000, count)), distribution: currentDistribution, parallelType: currentParallelType, + batchSize, } executeQueuedOperation( @@ -1860,6 +1868,7 @@ export function useCollaborativeWorkflow() { } else { const currentCount = currentBlock.data?.count || 5 const currentParallelType = currentBlock.data?.parallelType || 'count' + const batchSize = currentBlock.data?.batchSize || 20 const config = { id: nodeId, @@ -1867,6 +1876,7 @@ export function useCollaborativeWorkflow() { count: currentCount, distribution: collection, parallelType: currentParallelType, + batchSize, } executeQueuedOperation( @@ -1880,6 +1890,38 @@ export function useCollaborativeWorkflow() { [executeQueuedOperation] ) + const collaborativeUpdateParallelBatchSize = useCallback( + (parallelId: string, batchSize: number) => { + const currentBlock = useWorkflowStore.getState().blocks[parallelId] + if (!currentBlock || currentBlock.type !== 'parallel') return + + const childNodes = Object.values(useWorkflowStore.getState().blocks) + .filter((b) => b.data?.parentId === parallelId) + .map((b) => b.id) + const currentCount = currentBlock.data?.count || 5 + const currentDistribution = currentBlock.data?.collection || '' + const currentParallelType = currentBlock.data?.parallelType || 'count' + const clampedBatchSize = Math.max(1, Math.min(20, batchSize)) + + const config = { + id: parallelId, + nodes: childNodes, + count: currentCount, + distribution: currentDistribution, + parallelType: currentParallelType, + batchSize: clampedBatchSize, + } + + executeQueuedOperation( + SUBFLOW_OPERATIONS.UPDATE, + OPERATION_TARGETS.SUBFLOW, + { id: parallelId, type: 'parallel', config }, + () => useWorkflowStore.getState().updateParallelBatchSize(parallelId, clampedBatchSize) + ) + }, + [executeQueuedOperation] + ) + const collaborativeUpdateVariable = useCallback( (variableId: string, field: 'name' | 'value' | 'type', value: any) => { executeQueuedOperation( @@ -2137,6 +2179,7 @@ export function useCollaborativeWorkflow() { // Collaborative loop/parallel operations collaborativeUpdateLoopType, collaborativeUpdateParallelType, + collaborativeUpdateParallelBatchSize, // Unified iteration operations collaborativeUpdateIterationCount, diff --git a/apps/sim/hooks/use-undo-redo.ts b/apps/sim/hooks/use-undo-redo.ts index 86fe2a6bcce..025c087de0e 100644 --- a/apps/sim/hooks/use-undo-redo.ts +++ b/apps/sim/hooks/use-undo-redo.ts @@ -617,7 +617,9 @@ export function useUndoRedo() { const currentCount = currentBlock.data?.count || 5 const currentParallelType = currentBlock.data?.parallelType || 'count' const currentDistribution = currentBlock.data?.collection || '' + const currentBatchSize = currentBlock.data?.batchSize || 20 const nextCount = Number.parseInt(String(update.after), 10) + const nextBatchSize = Number.parseInt(String(update.after), 10) const config = { id: update.blockId, nodes: childNodes, @@ -630,6 +632,10 @@ export function useUndoRedo() { ? update.after : currentDistribution, parallelType: currentParallelType, + batchSize: + update.fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ? nextBatchSize + : currentBatchSize, } addToQueue({ @@ -650,6 +656,13 @@ export function useUndoRedo() { return } + if (update.fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize) { + if (!Number.isNaN(nextBatchSize)) { + useWorkflowStore.getState().updateParallelBatchSize(update.blockId, nextBatchSize) + } + return + } + useWorkflowStore.getState().updateParallelCollection(update.blockId, String(update.after)) }, [activeWorkflowId, addToQueue, userId] diff --git a/apps/sim/lib/execution/event-buffer.test.ts b/apps/sim/lib/execution/event-buffer.test.ts index d0b4024f3aa..da7de9404ca 100644 --- a/apps/sim/lib/execution/event-buffer.test.ts +++ b/apps/sim/lib/execution/event-buffer.test.ts @@ -57,7 +57,7 @@ describe('execution event buffer', () => { mockRedis.zremrangebyrank.mockResolvedValue(0) mockRedis.eval.mockImplementation( async ( - _script: string, + script: string, _keyCount: number, _eventsKey: string, _seqKey: string, @@ -68,6 +68,12 @@ describe('execution event buffer', () => { terminalStatus: string, ...args: (string | number)[] ) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } + if (script.includes('DECRBY')) { + return 1 + } for (let i = 0; i < args.length; i += 2) { persistedEntries.push(JSON.parse(args[i + 1] as string) as ExecutionEventEntry) } @@ -152,7 +158,10 @@ describe('execution event buffer', () => { () => Promise.resolve(), ] - mockRedis.eval.mockImplementation(async (_script: string, ...args: unknown[]) => { + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } const batchEntries: ExecutionEventEntry[] = [] const zaddArgs = args.slice(8) as (string | number)[] for (let i = 0; i < zaddArgs.length; i += 2) { @@ -237,7 +246,10 @@ describe('execution event buffer', () => { it('flushes replay events after a recovered final replay flush without terminal meta', async () => { mockRedis.incrby.mockResolvedValue(100) let flushAttempt = 0 - mockRedis.eval.mockImplementation(async (_script: string, ...args: unknown[]) => { + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('execution_redis_bytes')) { + return [1, 'ok', 0, 0] + } const zaddArgs = args.slice(8) as (string | number)[] if (flushAttempt > 0) { for (let i = 0; i < zaddArgs.length; i += 2) { @@ -287,6 +299,22 @@ describe('execution event buffer', () => { expect(mockRedis.hset).toHaveBeenCalledWith('meta', { status: 'complete' }) }) + it('surfaces execution memory limit errors when the Redis budget is exceeded', async () => { + mockRedis.incrby.mockResolvedValue(100) + mockRedis.eval.mockImplementationOnce(async () => [ + 0, + 'execution_redis_bytes', + 64 * 1024 * 1024, + ]) + + const writer = createExecutionEventWriter('exec-1') + + await expect(writer.writeTerminal(makeEvent('terminal'), 'complete')).rejects.toThrow( + 'Execution memory limit exceeded' + ) + expect(persistedEntries).toEqual([]) + }) + it('preserves requested UserFile base64 when buffering terminal events', async () => { mockRedis.incrby.mockResolvedValue(100) const base64 = Buffer.from('hello').toString('base64') diff --git a/apps/sim/lib/execution/event-buffer.ts b/apps/sim/lib/execution/event-buffer.ts index 914c095e55a..81aebee8170 100644 --- a/apps/sim/lib/execution/event-buffer.ts +++ b/apps/sim/lib/execution/event-buffer.ts @@ -5,6 +5,12 @@ import { getRedisClient } from '@/lib/core/config/redis' import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import type { LargeValueStoreContext } from '@/lib/execution/payloads/store' +import { + type ExecutionRedisBudgetReservation, + releaseExecutionRedisBytes, + reserveExecutionRedisBytes, +} from '@/lib/execution/redis-budget.server' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import type { ExecutionEvent } from '@/lib/workflows/executor/execution-events' const logger = createLogger('ExecutionEventBuffer') @@ -65,6 +71,10 @@ function getJsonSize(value: unknown): number | null { } } +function getExecutionEventEntryJson(entry: ExecutionEventEntry): string { + return JSON.stringify(entry) +} + function trimFinalBlockLogsForEventData(data: unknown): unknown { if (!data || typeof data !== 'object' || Array.isArray(data)) return data @@ -602,12 +612,27 @@ export function createExecutionEventWriter( if (pending.length === 0) return true const batch = pending pending = [] + let reservedBudget: ExecutionRedisBudgetReservation | null = null + let budgetReserved = false try { const key = getEventsKey(executionId) const zaddArgs: (string | number)[] = [] + let batchBytes = 0 for (const entry of batch) { - zaddArgs.push(entry.eventId, JSON.stringify(entry)) + const entryJson = getExecutionEventEntryJson(entry) + batchBytes += Buffer.byteLength(entryJson, 'utf8') + zaddArgs.push(entry.eventId, entryJson) + } + reservedBudget = { + executionId, + userId: context.userId, + category: 'event_buffer', + operation: terminalStatus ? 'write_terminal_events' : 'write_events', + bytes: batchBytes, + logger, } + await reserveExecutionRedisBytes(redis, reservedBudget) + budgetReserved = true await redis.eval( FLUSH_EVENTS_SCRIPT, 3, @@ -623,6 +648,13 @@ export function createExecutionEventWriter( consecutiveFlushFailures = 0 return true } catch (error) { + if (budgetReserved && reservedBudget) { + await releaseExecutionRedisBytes(redis, reservedBudget) + } + if (isExecutionResourceLimitError(error)) { + pending = batch.concat(pending) + throw error + } consecutiveFlushFailures += 1 logger.warn('Failed to flush execution events', { executionId, diff --git a/apps/sim/lib/execution/payloads/serializer.ts b/apps/sim/lib/execution/payloads/serializer.ts index 04450f3362c..d892b2a3226 100644 --- a/apps/sim/lib/execution/payloads/serializer.ts +++ b/apps/sim/lib/execution/payloads/serializer.ts @@ -88,6 +88,20 @@ async function compactValue( return compacted } +async function forceStoreValue( + value: unknown, + options: CompactExecutionPayloadOptions +): Promise { + if (isLargeValueRef(value)) { + return value + } + const measured = getJsonAndSize(value) + if (!measured) { + return value + } + return storeLargeValue(value, measured.json, measured.size, options) +} + export async function compactExecutionPayload( value: T, options: CompactExecutionPayloadOptions = {} @@ -95,6 +109,30 @@ export async function compactExecutionPayload( return (await compactValue(value, options, { seen: new WeakSet() })) as T } +/** + * Compacts subflow result aggregates while preserving indexable `results`. + */ +export async function compactSubflowResults( + results: T[], + options: CompactExecutionPayloadOptions = {} +): Promise { + const entryOptions = { ...options, preserveRoot: false } + let compactedResults = (await Promise.all( + results.map((result) => compactExecutionPayload(result, entryOptions)) + )) as T[] + + const aggregate = getJsonAndSize({ results: compactedResults }) + if (aggregate && aggregate.size <= (options.thresholdBytes ?? LARGE_VALUE_THRESHOLD_BYTES)) { + return compactedResults + } + + compactedResults = (await Promise.all( + compactedResults.map((result) => forceStoreValue(result, options)) + )) as T[] + + return compactedResults +} + export async function compactBlockLogs( logs: BlockLog[] | undefined, options: CompactExecutionPayloadOptions = {} diff --git a/apps/sim/lib/execution/redis-budget.server.ts b/apps/sim/lib/execution/redis-budget.server.ts new file mode 100644 index 00000000000..1e78199029e --- /dev/null +++ b/apps/sim/lib/execution/redis-budget.server.ts @@ -0,0 +1,136 @@ +import { createLogger, type Logger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import type { getRedisClient } from '@/lib/core/config/redis' +import { ExecutionResourceLimitError } from '@/lib/execution/resource-errors' + +type RedisClient = NonNullable> + +const logger = createLogger('ExecutionRedisBudget') +const REDIS_BUDGET_PREFIX = 'execution:redis-budget:' +const MAX_SINGLE_REDIS_WRITE_BYTES = 8 * 1024 * 1024 +const MAX_EXECUTION_REDIS_BYTES = 64 * 1024 * 1024 +const MAX_USER_REDIS_BYTES = 256 * 1024 * 1024 +const REDIS_BUDGET_TTL_SECONDS = 60 * 60 + +const RESERVE_REDIS_BYTES_SCRIPT = ` +local bytes = tonumber(ARGV[1]) +local execution_limit = tonumber(ARGV[2]) +local user_limit = tonumber(ARGV[3]) +local ttl_seconds = tonumber(ARGV[4]) +local execution_current = tonumber(redis.call('GET', KEYS[1]) or '0') +if execution_limit > 0 and execution_current + bytes > execution_limit then + return {0, 'execution_redis_bytes', execution_current} +end +local user_current = 0 +if #KEYS >= 2 then + user_current = tonumber(redis.call('GET', KEYS[2]) or '0') + if user_limit > 0 and user_current + bytes > user_limit then + return {0, 'user_redis_bytes', user_current} + end +end +redis.call('INCRBY', KEYS[1], bytes) +redis.call('EXPIRE', KEYS[1], ttl_seconds) +if #KEYS >= 2 then + redis.call('INCRBY', KEYS[2], bytes) + redis.call('EXPIRE', KEYS[2], ttl_seconds) +end +return {1, 'ok', execution_current + bytes, user_current + bytes} +` + +const RELEASE_REDIS_BYTES_SCRIPT = ` +local bytes = tonumber(ARGV[1]) +for i = 1, #KEYS do + local next_value = redis.call('DECRBY', KEYS[i], bytes) + if next_value <= 0 then + redis.call('DEL', KEYS[i]) + end +end +return 1 +` + +export type ExecutionRedisBudgetCategory = 'event_buffer' | 'base64_cache' + +export interface ExecutionRedisBudgetReservation { + executionId: string + userId?: string + category: ExecutionRedisBudgetCategory + bytes: number + operation: string + logger?: Logger +} + +export function getExecutionRedisBudgetLimits() { + return { + maxSingleWriteBytes: MAX_SINGLE_REDIS_WRITE_BYTES, + maxExecutionBytes: MAX_EXECUTION_REDIS_BYTES, + maxUserBytes: MAX_USER_REDIS_BYTES, + ttlSeconds: REDIS_BUDGET_TTL_SECONDS, + } +} + +function getBudgetKeys(reservation: ExecutionRedisBudgetReservation): string[] { + const keys = [`${REDIS_BUDGET_PREFIX}execution:${reservation.executionId}`] + if (reservation.userId) { + keys.push(`${REDIS_BUDGET_PREFIX}user:${reservation.userId}`) + } + return keys +} + +export async function reserveExecutionRedisBytes( + redis: RedisClient, + reservation: ExecutionRedisBudgetReservation +): Promise { + if (reservation.bytes <= 0) return + + const limits = getExecutionRedisBudgetLimits() + if (reservation.bytes > limits.maxSingleWriteBytes) { + throw new ExecutionResourceLimitError({ + resource: 'redis_key_bytes', + attemptedBytes: reservation.bytes, + limitBytes: limits.maxSingleWriteBytes, + }) + } + + const keys = getBudgetKeys(reservation) + const result = (await redis.eval( + RESERVE_REDIS_BYTES_SCRIPT, + keys.length, + ...keys, + reservation.bytes, + limits.maxExecutionBytes, + limits.maxUserBytes, + limits.ttlSeconds + )) as [number, string, number | string | null] + + const [allowed, resource, current] = result + if (allowed === 1) return + + throw new ExecutionResourceLimitError({ + resource: resource === 'user_redis_bytes' ? 'user_redis_bytes' : 'execution_redis_bytes', + attemptedBytes: reservation.bytes, + currentBytes: Number(current ?? 0), + limitBytes: resource === 'user_redis_bytes' ? limits.maxUserBytes : limits.maxExecutionBytes, + }) +} + +export async function releaseExecutionRedisBytes( + redis: RedisClient, + reservation: ExecutionRedisBudgetReservation +): Promise { + if (reservation.bytes <= 0) return + + try { + const keys = getBudgetKeys(reservation) + await redis.eval(RELEASE_REDIS_BYTES_SCRIPT, keys.length, ...keys, reservation.bytes) + } catch (error) { + const log = reservation.logger ?? logger + log.warn('Failed to release execution Redis budget reservation', { + executionId: reservation.executionId, + userId: reservation.userId, + category: reservation.category, + operation: reservation.operation, + bytes: reservation.bytes, + error: toError(error).message, + }) + } +} diff --git a/apps/sim/lib/execution/resource-errors.ts b/apps/sim/lib/execution/resource-errors.ts new file mode 100644 index 00000000000..3cd2f61bad9 --- /dev/null +++ b/apps/sim/lib/execution/resource-errors.ts @@ -0,0 +1,45 @@ +export const EXECUTION_RESOURCE_LIMIT_CODE = 'execution_resource_limit_exceeded' as const + +export type ExecutionResourceLimitResource = + | 'redis_key_bytes' + | 'execution_redis_bytes' + | 'user_redis_bytes' + | 'execution_payload_bytes' + +export interface ExecutionResourceLimitDetails { + resource: ExecutionResourceLimitResource + attemptedBytes: number + limitBytes: number + currentBytes?: number + statusCode?: number +} + +export class ExecutionResourceLimitError extends Error { + readonly code = EXECUTION_RESOURCE_LIMIT_CODE + readonly statusCode: number + readonly resource: ExecutionResourceLimitResource + readonly attemptedBytes: number + readonly limitBytes: number + readonly currentBytes?: number + + constructor(details: ExecutionResourceLimitDetails) { + super('Execution memory limit exceeded. Reduce payload size and try again.') + this.name = 'ExecutionResourceLimitError' + this.resource = details.resource + this.attemptedBytes = details.attemptedBytes + this.limitBytes = details.limitBytes + this.currentBytes = details.currentBytes + this.statusCode = details.statusCode ?? (details.resource === 'user_redis_bytes' ? 429 : 413) + } +} + +export function isExecutionResourceLimitError( + error: unknown +): error is ExecutionResourceLimitError { + return ( + error instanceof ExecutionResourceLimitError || + (typeof error === 'object' && + error !== null && + (error as { code?: unknown }).code === EXECUTION_RESOURCE_LIMIT_CODE) + ) +} diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index 85d52ea77a3..2a922172e04 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -4,6 +4,12 @@ import { getRedisClient } from '@/lib/core/config/redis' import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' +import { + type ExecutionRedisBudgetReservation, + releaseExecutionRedisBytes, + reserveExecutionRedisBytes, +} from '@/lib/execution/redis-budget.server' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import { bufferToBase64 } from '@/lib/uploads/utils/file-utils' import { downloadFileFromStorage, downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' import type { UserFile } from '@/executor/types' @@ -30,6 +36,7 @@ interface HydrationState { export interface Base64HydrationOptions { requestId?: string executionId?: string + userId?: string logger?: Logger maxBytes?: number allowUnknownSize?: boolean @@ -82,10 +89,31 @@ function createBase64Cache(options: Base64HydrationOptions, logger: Logger): Bas } }, async set(file: UserFile, value: string, ttlSeconds: number) { + const budgetReservation: ExecutionRedisBudgetReservation | null = executionId + ? { + executionId, + userId: options.userId, + category: 'base64_cache', + operation: 'set_base64_cache', + bytes: Buffer.byteLength(value, 'utf8'), + logger, + } + : null + let budgetReserved = false try { const key = getFullCacheKey(executionId, file) + if (budgetReservation) { + await reserveExecutionRedisBytes(redis, budgetReservation) + budgetReserved = true + } await redis.set(key, value, 'EX', ttlSeconds) } catch (error) { + if (budgetReserved && budgetReservation) { + await releaseExecutionRedisBytes(redis, budgetReservation) + } + if (isExecutionResourceLimitError(error)) { + throw error + } logger.warn(`[${options.requestId}] Redis set failed, skipping cache`, error) } }, @@ -278,6 +306,18 @@ export async function hydrateUserFilesWithBase64( return (await hydrateValue(value, options, state, logger)) as T } +/** + * Hydrates a single UserFile object when a resolver explicitly asks for base64. + */ +export async function hydrateUserFileWithBase64( + file: UserFile, + options: Base64HydrationOptions +): Promise { + const logger = getHydrationLogger(options) + const state = createHydrationState(options, logger) + return hydrateUserFile(file, options, state, logger) +} + function isPlainObject(value: unknown): value is Record { if (!value || typeof value !== 'object') { return false diff --git a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts index ad20414cbb1..ac3007adf39 100644 --- a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts +++ b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts @@ -261,6 +261,7 @@ export async function executeQueuedWorkflowJob( ? await hydrateUserFilesWithBase64(result.output, { requestId, executionId, + userId: metadata.userId, maxBytes: payload.base64MaxBytes, }) : result.output diff --git a/apps/sim/lib/workflows/search-replace/subflow-fields.ts b/apps/sim/lib/workflows/search-replace/subflow-fields.ts index c87b982efb2..1826ecee442 100644 --- a/apps/sim/lib/workflows/search-replace/subflow-fields.ts +++ b/apps/sim/lib/workflows/search-replace/subflow-fields.ts @@ -5,6 +5,7 @@ export const WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS = { iterations: 'subflowIterations', items: 'subflowItems', condition: 'subflowCondition', + batchSize: 'subflowBatchSize', } as const export type WorkflowSearchSubflowFieldId = @@ -18,6 +19,7 @@ interface WorkflowSearchSubflowBlock { loopType?: string parallelType?: string count?: unknown + batchSize?: unknown collection?: unknown whileCondition?: unknown doWhileCondition?: unknown @@ -113,6 +115,14 @@ export function getWorkflowSearchSubflowFields( editable: true, valueKind: parallelType === 'count' ? 'number' : 'text', }, + { + id: WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize, + title: 'Parallel Batch Size', + type: 'short-input', + value: String(block.data?.batchSize ?? 20), + editable: true, + valueKind: 'number', + }, ] } @@ -146,7 +156,10 @@ export function parseWorkflowSearchSubflowReplacement({ }): | { success: true; value: WorkflowSearchSubflowEditableValue } | { success: false; reason: string } { - if (fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.iterations) { + if ( + fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.iterations && + fieldId !== WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ) { return { success: true, value: replacement } } @@ -156,11 +169,14 @@ export function parseWorkflowSearchSubflowReplacement({ } const count = Number.parseInt(trimmed, 10) - const max = blockType === 'parallel' ? 20 : 1000 + const max = fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize ? 20 : 1000 if (count < 1 || count > max) { return { success: false, - reason: `Subflow iteration count must be between 1 and ${max}`, + reason: + fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize + ? `Parallel batch size must be between 1 and ${max}` + : `Subflow iteration count must be between 1 and ${max}`, } } diff --git a/apps/sim/lib/workflows/streaming/streaming.ts b/apps/sim/lib/workflows/streaming/streaming.ts index b72c085e44f..910ac68db5e 100644 --- a/apps/sim/lib/workflows/streaming/streaming.ts +++ b/apps/sim/lib/workflows/streaming/streaming.ts @@ -315,6 +315,7 @@ export async function createStreamingResponse( ? await hydrateUserFilesWithBase64(outputValue, { requestId, executionId, + userId: options.userId, maxBytes: base64MaxBytes, }) : outputValue diff --git a/apps/sim/serializer/types.ts b/apps/sim/serializer/types.ts index 8192014a4ae..8d7bc56e4ed 100644 --- a/apps/sim/serializer/types.ts +++ b/apps/sim/serializer/types.ts @@ -58,4 +58,5 @@ export interface SerializedParallel { distribution?: any[] | Record | string // Items to distribute or expression to evaluate count?: number // Number of parallel executions for count-based parallel parallelType?: 'count' | 'collection' // Explicit parallel type to avoid inference bugs + batchSize?: number // Maximum number of branches to run concurrently per batch } diff --git a/apps/sim/stores/workflows/workflow/store.test.ts b/apps/sim/stores/workflows/workflow/store.test.ts index dc24da784e6..7286cf7627f 100644 --- a/apps/sim/stores/workflows/workflow/store.test.ts +++ b/apps/sim/stores/workflows/workflow/store.test.ts @@ -576,7 +576,7 @@ describe('workflow store', () => { expect(parsedDistribution).toHaveLength(3) }) - it('should clamp parallel count between 1 and 20', () => { + it('should clamp parallel count between 1 and 1000', () => { const { updateParallelCount } = useWorkflowStore.getState() addBlock( @@ -592,13 +592,46 @@ describe('workflow store', () => { updateParallelCount('parallel1', 100) let state = useWorkflowStore.getState() - expect(state.blocks.parallel1?.data?.count).toBe(20) + expect(state.blocks.parallel1?.data?.count).toBe(100) + + updateParallelCount('parallel1', 1001) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.count).toBe(1000) updateParallelCount('parallel1', 0) state = useWorkflowStore.getState() expect(state.blocks.parallel1?.data?.count).toBe(1) }) + it('should clamp parallel batch size between 1 and 20', () => { + const { updateParallelBatchSize } = useWorkflowStore.getState() + + addBlock( + 'parallel1', + 'parallel', + 'Test Parallel', + { x: 0, y: 0 }, + { + count: 5, + batchSize: 20, + collection: '', + } + ) + + updateParallelBatchSize('parallel1', 7) + let state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(7) + expect(state.parallels.parallel1.batchSize).toBe(7) + + updateParallelBatchSize('parallel1', 50) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(20) + + updateParallelBatchSize('parallel1', 0) + state = useWorkflowStore.getState() + expect(state.blocks.parallel1?.data?.batchSize).toBe(1) + }) + it('should regenerate parallels when updateParallelType is called', () => { const { updateParallelType } = useWorkflowStore.getState() diff --git a/apps/sim/stores/workflows/workflow/store.ts b/apps/sim/stores/workflows/workflow/store.ts index 888bf069bef..f05cd9468f5 100644 --- a/apps/sim/stores/workflows/workflow/store.ts +++ b/apps/sim/stores/workflows/workflow/store.ts @@ -26,6 +26,7 @@ import type { WorkflowStore, } from '@/stores/workflows/workflow/types' import { + clampParallelBatchSize, findAllDescendantNodes, generateLoopBlocks, generateParallelBlocks, @@ -1163,7 +1164,7 @@ export const useWorkflowStore = create()( ...block, data: { ...block.data, - count: Math.max(1, Math.min(20, count)), // Clamp between 1-20 + count: Math.max(1, Math.min(1000, count)), }, }, } @@ -1180,6 +1181,32 @@ export const useWorkflowStore = create()( // Note: Socket.IO handles real-time sync automatically }, + updateParallelBatchSize: (parallelId: string, batchSize: number) => { + const block = get().blocks[parallelId] + if (!block || block.type !== 'parallel') return + + const newBlocks = { + ...get().blocks, + [parallelId]: { + ...block, + data: { + ...block.data, + batchSize: clampParallelBatchSize(batchSize), + }, + }, + } + + const newState = { + blocks: newBlocks, + edges: [...get().edges], + loops: { ...get().loops }, + parallels: generateParallelBlocks(newBlocks), + } + + set(newState) + get().updateLastSaved() + }, + updateParallelCollection: (parallelId: string, collection: string) => { const block = get().blocks[parallelId] if (!block || block.type !== 'parallel') return diff --git a/apps/sim/stores/workflows/workflow/types.ts b/apps/sim/stores/workflows/workflow/types.ts index c209cfd0eef..1f32f318764 100644 --- a/apps/sim/stores/workflows/workflow/types.ts +++ b/apps/sim/stores/workflows/workflow/types.ts @@ -84,6 +84,7 @@ export interface WorkflowActions { setLoopWhileCondition: (loopId: string, condition: string) => void setLoopDoWhileCondition: (loopId: string, condition: string) => void updateParallelCount: (parallelId: string, count: number) => void + updateParallelBatchSize: (parallelId: string, batchSize: number) => void updateParallelCollection: (parallelId: string, collection: string) => void updateParallelType: (parallelId: string, parallelType: 'count' | 'collection') => void generateLoopBlocks: () => Record diff --git a/apps/sim/stores/workflows/workflow/utils.ts b/apps/sim/stores/workflows/workflow/utils.ts index 26c2f642a85..a7077dc0903 100644 --- a/apps/sim/stores/workflows/workflow/utils.ts +++ b/apps/sim/stores/workflows/workflow/utils.ts @@ -6,6 +6,16 @@ import type { Edge } from 'reactflow' import type { BlockState, Loop, Parallel } from '@/stores/workflows/workflow/types' const DEFAULT_LOOP_ITERATIONS = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 +const MAX_PARALLEL_BATCH_SIZE = 20 + +export function clampParallelBatchSize(batchSize: unknown): number { + const parsed = typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(MAX_PARALLEL_BATCH_SIZE, parsed)) +} /** * Check if adding an edge would create a cycle in the graph. @@ -111,6 +121,7 @@ export function convertParallelBlockToParallel( validatedParallelType === 'collection' ? parallelBlock.data?.collection || '' : undefined const count = parallelBlock.data?.count || 5 + const batchSize = clampParallelBatchSize(parallelBlock.data?.batchSize) return { id: parallelBlockId, @@ -118,6 +129,7 @@ export function convertParallelBlockToParallel( distribution, count, parallelType: validatedParallelType, + batchSize, enabled: parallelBlock.enabled, } } diff --git a/packages/workflow-types/src/workflow.ts b/packages/workflow-types/src/workflow.ts index 006bd2ccab6..06b9692ddba 100644 --- a/packages/workflow-types/src/workflow.ts +++ b/packages/workflow-types/src/workflow.ts @@ -25,6 +25,7 @@ export interface ParallelConfig { nodes: string[] distribution?: unknown[] | Record | string parallelType?: 'count' | 'collection' + batchSize?: number } export interface Subflow { @@ -52,6 +53,7 @@ export interface BlockData { whileCondition?: string doWhileCondition?: string parallelType?: 'collection' | 'count' + batchSize?: number type?: string canonicalModes?: Record } @@ -178,6 +180,7 @@ export interface Parallel { distribution?: any[] | Record | string count?: number parallelType?: 'count' | 'collection' + batchSize?: number enabled: boolean locked?: boolean } From 37c2d463dad8599f23d24631d3675841a8c57b55 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 16:55:40 -0700 Subject: [PATCH 03/11] feat(parallel): batching --- .../docs/de/api-reference/getting-started.mdx | 10 +- .../docs/en/api-reference/getting-started.mdx | 10 +- .../content/docs/en/api-reference/python.mdx | 29 +- .../docs/en/api-reference/typescript.mdx | 31 +- apps/docs/content/docs/en/blocks/function.mdx | 43 +- apps/docs/content/docs/en/blocks/parallel.mdx | 2 +- .../docs/en/execution/api-deployment.mdx | 4 +- .../docs/es/api-reference/getting-started.mdx | 10 +- .../docs/fr/api-reference/getting-started.mdx | 10 +- .../docs/ja/api-reference/getting-started.mdx | 10 +- .../docs/zh/api-reference/getting-started.mdx | 10 +- .../content/docs/zh/api-reference/python.mdx | 25 +- .../docs/zh/api-reference/typescript.mdx | 27 +- apps/sim/app/api/chat/[identifier]/route.ts | 3 + apps/sim/app/api/form/[identifier]/route.ts | 3 + apps/sim/app/api/function/execute/route.ts | 265 ++++++++++-- .../[executionId]/[contextId]/route.ts | 4 + .../app/api/workflows/[id]/execute/route.ts | 9 + .../subflow-editor/subflow-editor.tsx | 2 +- .../editor/hooks/use-subflow-editor.ts | 6 +- .../components/structured-output.tsx | 140 +++++-- .../preview-editor/preview-editor.tsx | 4 +- apps/sim/executor/constants.ts | 3 +- apps/sim/executor/execution/block-executor.ts | 4 + apps/sim/executor/execution/executor.ts | 48 +++ .../execution/snapshot-serializer.test.ts | 70 ++++ .../executor/execution/snapshot-serializer.ts | 15 +- apps/sim/executor/execution/types.ts | 4 + .../function/function-handler.test.ts | 3 + .../handlers/function/function-handler.ts | 7 +- apps/sim/executor/orchestrators/loop.ts | 60 +-- .../executor/orchestrators/parallel.test.ts | 44 ++ apps/sim/executor/orchestrators/parallel.ts | 55 ++- apps/sim/executor/types.ts | 2 + apps/sim/executor/utils/block-reference.ts | 8 +- .../executor/utils/parallel-expansion.test.ts | 59 +++ apps/sim/executor/utils/parallel-expansion.ts | 47 ++- apps/sim/executor/utils/subflow-utils.test.ts | 16 +- apps/sim/executor/utils/subflow-utils.ts | 33 +- apps/sim/executor/variables/resolver.test.ts | 226 ++++++++++ apps/sim/executor/variables/resolver.ts | 168 +++++++- .../sim/executor/variables/resolvers/block.ts | 26 +- apps/sim/executor/variables/resolvers/loop.ts | 14 +- .../variables/resolvers/parallel.test.ts | 42 +- .../executor/variables/resolvers/parallel.ts | 31 +- .../resolvers/reference-async.server.ts | 37 +- .../executor/variables/resolvers/reference.ts | 17 +- .../executor/variables/resolvers/workflow.ts | 2 +- apps/sim/hooks/use-collaborative-workflow.ts | 4 +- .../lib/api/contracts/execution-payloads.ts | 10 + apps/sim/lib/api/contracts/hotspots.ts | 3 + apps/sim/lib/execution/isolated-vm-worker.cjs | 72 +++- apps/sim/lib/execution/payloads/cache.ts | 66 ++- apps/sim/lib/execution/payloads/hydration.ts | 16 +- .../lib/execution/payloads/large-value-ref.ts | 19 +- .../payloads/materialization.server.ts | 294 +++++++++++++ .../lib/execution/payloads/serializer.test.ts | 13 + apps/sim/lib/execution/payloads/store.test.ts | 385 ++++++++++++++++++ apps/sim/lib/execution/payloads/store.ts | 53 ++- .../utils/user-file-base64.server.test.ts | 76 +++- .../uploads/utils/user-file-base64.server.ts | 77 ++-- .../lib/workflows/executor/execution-core.ts | 28 +- .../search-replace/replacements.test.ts | 11 +- .../search-replace/subflow-fields.ts | 11 +- apps/sim/lib/workflows/streaming/streaming.ts | 6 + .../stores/workflows/workflow/store.test.ts | 8 +- apps/sim/stores/workflows/workflow/store.ts | 4 +- apps/sim/tools/function/execute.test.ts | 3 + apps/sim/tools/function/execute.ts | 3 + apps/sim/tools/function/types.ts | 3 + packages/python-sdk/README.md | 15 +- packages/python-sdk/simstudio/__init__.py | 26 +- packages/python-sdk/tests/test_client.py | 18 +- packages/ts-sdk/README.md | 18 +- packages/ts-sdk/src/index.ts | 1 + 75 files changed, 2485 insertions(+), 456 deletions(-) create mode 100644 apps/sim/executor/execution/snapshot-serializer.test.ts create mode 100644 apps/sim/lib/execution/payloads/materialization.server.ts create mode 100644 apps/sim/lib/execution/payloads/store.test.ts diff --git a/apps/docs/content/docs/de/api-reference/getting-started.mdx b/apps/docs/content/docs/de/api-reference/getting-started.mdx index fa9fad0baa3..25c8cfdbf2e 100644 --- a/apps/docs/content/docs/de/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/de/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/en/api-reference/getting-started.mdx b/apps/docs/content/docs/en/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/en/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/en/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/en/api-reference/python.mdx b/apps/docs/content/docs/en/api-reference/python.mdx index 903bac51f1c..d70bb50e3aa 100644 --- a/apps/docs/content/docs/en/api-reference/python.mdx +++ b/apps/docs/content/docs/en/api-reference/python.mdx @@ -80,7 +80,7 @@ result = client.execute_workflow( **Returns:** `WorkflowExecutionResult | AsyncExecutionResult` -When `async_execution=True`, returns immediately with a task ID for polling. Otherwise, waits for completion. +When `async_execution=True`, returns immediately with a `job_id` and `status_url` for polling. Otherwise, waits for completion. ##### get_workflow_status() @@ -117,20 +117,20 @@ if is_ready: Get the status of an async job execution. ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Status:", status["status"]) # 'queued', 'processing', 'completed', 'failed' if status["status"] == "completed": print("Output:", status["output"]) ``` **Parameters:** -- `task_id` (str): The task ID returned from async execution +- `task_id` (str): The job ID returned from async execution **Returns:** `Dict[str, Any]` **Response fields:** - `success` (bool): Whether the request was successful -- `taskId` (str): The task ID +- `taskId` (str): The job ID - `status` (str): One of `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (dict): Contains `startedAt`, `completedAt`, and `duration` - `output` (any, optional): The workflow output (when completed) @@ -270,10 +270,11 @@ class WorkflowExecutionResult: @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] # e.g., {"status": "/api/jobs/{taskId}"} + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### WorkflowStatus @@ -493,17 +494,17 @@ def execute_async(): ) # Check if result is an async execution - if hasattr(result, 'task_id'): - print(f"Task ID: {result.task_id}") - print(f"Status endpoint: {result.links['status']}") + if hasattr(result, 'job_id'): + print(f"Job ID: {result.job_id}") + print(f"Status endpoint: {result.status_url}") # Poll for completion - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) while status["status"] in ["queued", "processing"]: print(f"Current status: {status['status']}") time.sleep(2) # Wait 2 seconds - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) if status["status"] == "completed": print("Workflow completed!") @@ -764,7 +765,7 @@ import { FAQ } from '@/components/ui/faq' ` -When `async: true`, returns immediately with a task ID for polling. Otherwise, waits for completion. +When `async: true`, returns immediately with a `jobId` and `statusUrl` for polling. Otherwise, waits for completion. ##### getWorkflowStatus() @@ -131,7 +131,7 @@ if (isReady) { Get the status of an async job execution. ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Status:', status.status); // 'queued', 'processing', 'completed', 'failed' if (status.status === 'completed') { console.log('Output:', status.output); @@ -139,13 +139,13 @@ if (status.status === 'completed') { ``` **Parameters:** -- `taskId` (string): The task ID returned from async execution +- `jobId` (string): The job ID returned from async execution **Returns:** `Promise` **Response fields:** - `success` (boolean): Whether the request was successful -- `taskId` (string): The task ID +- `taskId` (string): The job ID - `status` (string): One of `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (object): Contains `startedAt`, `completedAt`, and `duration` - `output` (any, optional): The workflow output (when completed) @@ -278,12 +278,11 @@ interface WorkflowExecutionResult { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; // e.g., "/api/jobs/{taskId}" - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` @@ -767,17 +766,17 @@ async function executeAsync() { }); // Check if result is an async execution - if ('taskId' in result) { - console.log('Task ID:', result.taskId); - console.log('Status endpoint:', result.links.status); + if ('jobId' in result) { + console.log('Job ID:', result.jobId); + console.log('Status endpoint:', result.statusUrl); // Poll for completion - let status = await client.getJobStatus(result.taskId); + let status = await client.getJobStatus(result.jobId); while (status.status === 'queued' || status.status === 'processing') { console.log('Current status:', status.status); await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds - status = await client.getJobStatus(result.taskId); + status = await client.getJobStatus(result.jobId); } if (status.status === 'completed') { @@ -1022,7 +1021,7 @@ import { FAQ } from '@/components/ui/faq' `. +Function blocks receive their code, parameters, resolved references, and previous block context in an internal execution request. Sim can safely reference oversized workflow outputs, such as large `loop.results` or `parallel.results`, when you select a smaller nested field like ``. Larger values are stored in execution storage and passed around as small references until code explicitly reads them. -File outputs are metadata-first by default. Referencing ``, ``, or similar metadata does not hydrate file contents. Referencing `` explicitly hydrates that file's base64 content at resolver time and can fail if the file exceeds the configured inline limit. +File outputs are metadata-first by default. Referencing ``, ``, or similar metadata does not hydrate file contents. In JavaScript functions without imports, a direct base64 reference like `` is automatically rewritten to a lazy server-side read so the base64 string does not cross the Function request body. -Avoid passing a full large object into a Function block when you only need one field. For example, prefer `` over `` when the API response is large. If the complete function request body is still larger than the platform limit, execution can fail before your code starts. +You can also call the helper explicitly: + +```javascript +const file = ; +const base64 = await sim.files.readBase64(file); +``` + +`sim.files.readBase64(file)`, `sim.files.readText(file)`, `sim.files.readBase64Chunk(file, { offset, length })`, and `sim.files.readTextChunk(file, { offset, length })` read from server-side execution storage under memory caps. `sim.values.read(ref)` can explicitly read a large execution value reference. These helpers are available only in JavaScript functions without imports. JavaScript with imports, Python, and shell do not support these lazy helpers yet. + +Very large full reads can still fail by design; use chunk helpers or return a file when you need to handle more data. + +Use text chunks for text-like files such as logs, CSV, JSONL, and markdown: + +```javascript +const file = ; +const firstMegabyte = await sim.files.readTextChunk(file, { + offset: 0, + length: 1024 * 1024, +}); + +return firstMegabyte.split('\n').slice(0, 10); +``` + +Use base64 chunks for binary files such as images, PDFs, audio, archives, or APIs that expect base64 input: + +```javascript +const file = ; +const firstMegabyteBase64 = await sim.files.readBase64Chunk(file, { + offset: 0, + length: 1024 * 1024, +}); + +return { name: file.name, chunk: firstMegabyteBase64 }; +``` + +Chunk `offset` and `length` are byte-based. For Unicode text, a chunk can split a multi-byte character at the boundary; use text chunks for approximate text processing and prefer smaller structured references when exact parsing matters. + +Avoid passing a full large object into a Function block when you only need one field. For example, prefer `` over `` when the API response is large. If a JavaScript Function without imports references a large execution value, Sim automatically reads it through `sim.values.read(...)` at runtime under memory caps. For large generated data, write the result to a file or table with `outputPath`, `outputSandboxPath`, or `outputTable` instead of returning the entire payload inline. diff --git a/apps/docs/content/docs/en/blocks/parallel.mdx b/apps/docs/content/docs/en/blocks/parallel.mdx index 22f2b330865..24fccc7ebf3 100644 --- a/apps/docs/content/docs/en/blocks/parallel.mdx +++ b/apps/docs/content/docs/en/blocks/parallel.mdx @@ -141,7 +141,7 @@ const allResults = ; // Returns: [result1, result2, result3, ...] ``` -For large result sets, reference only the entry or field you need, such as ``. Sim keeps aggregate results indexable and hydrates stored entries when an indexed path is explicitly referenced. +For large result sets, reference only the entry or field you need, such as ``. Sim keeps aggregate results indexable by storing oversized entries in execution storage and hydrating them only when an indexed server-side path is explicitly referenced. ### Batch Size diff --git a/apps/docs/content/docs/en/execution/api-deployment.mdx b/apps/docs/content/docs/en/execution/api-deployment.mdx index d27fbfa3fc2..b7f1de3fbf9 100644 --- a/apps/docs/content/docs/en/execution/api-deployment.mdx +++ b/apps/docs/content/docs/en/execution/api-deployment.mdx @@ -226,13 +226,13 @@ Workflow execution responses are capped by platform request and response limits. "id": "lv_abc123DEF456", "kind": "array", "size": 12582912, - "key": "execution-values/workspace/workflow/execution/lv_abc123DEF456.json", + "key": "execution/workspace-id/workflow-id/exec_xyz/large-value-lv_abc123DEF456.json", "executionId": "exec_xyz", "preview": { "length": 25000 } } ``` -The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. File outputs are metadata-first; request `.base64` only when you need inline file content. +The `version` field is part of the external API contract. Treat the reference as an opaque placeholder for a value that could not be safely embedded in the response. `id`, `key`, and `executionId` are not fetch URLs; `key` points to execution-scoped server storage. Use `selectedOutputs` to request a smaller nested field, reduce the data passed between blocks, or return the data from a Response block when your workflow intentionally owns the HTTP response body. File outputs are metadata-first; request `.base64` only when you need inline file content. JavaScript Function blocks can explicitly read large files or value refs with the `sim.files` and `sim.values` helpers under memory caps. ### Asynchronous diff --git a/apps/docs/content/docs/es/api-reference/getting-started.mdx b/apps/docs/content/docs/es/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/es/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/es/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/fr/api-reference/getting-started.mdx b/apps/docs/content/docs/fr/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/fr/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/fr/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/ja/api-reference/getting-started.mdx b/apps/docs/content/docs/ja/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/ja/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/ja/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/zh/api-reference/getting-started.mdx b/apps/docs/content/docs/zh/api-reference/getting-started.mdx index dced7aca61c..038998853cf 100644 --- a/apps/docs/content/docs/zh/api-reference/getting-started.mdx +++ b/apps/docs/content/docs/zh/api-reference/getting-started.mdx @@ -109,20 +109,22 @@ curl -X POST https://www.sim.ai/api/workflows/{workflowId}/execute \ -d '{"inputs": {}, "async": true}' ``` -This returns immediately with a `taskId`: +This returns immediately with a `jobId` and `statusUrl`: ```json { "success": true, - "taskId": "job_abc123", - "status": "queued" + "jobId": "job_abc123", + "statusUrl": "https://www.sim.ai/api/jobs/job_abc123", + "message": "Workflow execution started", + "async": true } ``` Poll the [Get Job Status](/api-reference/workflows/getJobStatus) endpoint until the status is `completed` or `failed`: ```bash -curl https://www.sim.ai/api/jobs/{taskId} \ +curl https://www.sim.ai/api/jobs/{jobId} \ -H "X-API-Key: YOUR_API_KEY" ``` diff --git a/apps/docs/content/docs/zh/api-reference/python.mdx b/apps/docs/content/docs/zh/api-reference/python.mdx index c44973c8660..608942d1baf 100644 --- a/apps/docs/content/docs/zh/api-reference/python.mdx +++ b/apps/docs/content/docs/zh/api-reference/python.mdx @@ -117,20 +117,20 @@ if is_ready: 获取异步任务执行的状态。 ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Status:", status["status"]) # 'queued', 'processing', 'completed', 'failed' if status["status"] == "completed": print("Output:", status["output"]) ``` **参数:** -- `task_id` (str): 异步执行返回的任务 ID +- `job_id` (str): 异步执行返回的作业 ID **返回值:** `Dict[str, Any]` **响应字段:** - `success` (bool): 请求是否成功 -- `taskId` (str): 任务 ID +- `taskId` (str): 作业 ID - `status` (str): 可能的值包括 `'queued'`, `'processing'`, `'completed'`, `'failed'`, `'cancelled'` - `metadata` (dict): 包含 `startedAt`, `completedAt` 和 `duration` - `output` (any, optional): 工作流输出(完成时) @@ -271,10 +271,11 @@ class WorkflowExecutionResult: @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] # e.g., {"status": "/api/jobs/{taskId}"} + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### WorkflowStatus @@ -494,17 +495,17 @@ def execute_async(): ) # Check if result is an async execution - if hasattr(result, 'task_id'): - print(f"Task ID: {result.task_id}") - print(f"Status endpoint: {result.links['status']}") + if hasattr(result, 'job_id'): + print(f"Job ID: {result.job_id}") + print(f"Status endpoint: {result.status_url}") # Poll for completion - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) while status["status"] in ["queued", "processing"]: print(f"Current status: {status['status']}") time.sleep(2) # Wait 2 seconds - status = client.get_job_status(result.task_id) + status = client.get_job_status(result.job_id) if status["status"] == "completed": print("Workflow completed!") diff --git a/apps/docs/content/docs/zh/api-reference/typescript.mdx b/apps/docs/content/docs/zh/api-reference/typescript.mdx index 0f038db92dd..fac3bdffb73 100644 --- a/apps/docs/content/docs/zh/api-reference/typescript.mdx +++ b/apps/docs/content/docs/zh/api-reference/typescript.mdx @@ -138,7 +138,7 @@ if (isReady) { 获取异步任务执行的状态。 ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Status:', status.status); // 'queued', 'processing', 'completed', 'failed' if (status.status === 'completed') { console.log('Output:', status.output); @@ -146,13 +146,13 @@ if (status.status === 'completed') { ``` **参数:** -- `taskId`(字符串):异步执行返回的任务 ID +- `jobId`(字符串):异步执行返回的作业 ID **返回值:** `Promise` **响应字段:** - `success`(布尔值):请求是否成功 -- `taskId`(字符串):任务 ID +- `taskId`(字符串):作业 ID - `status`(字符串):以下之一 `'queued'`、`'processing'`、`'completed'`、`'failed'`、`'cancelled'` - `metadata`(对象):包含 `startedAt`、`completedAt` 和 `duration` - `output`(任意类型,可选):工作流输出(完成时) @@ -286,12 +286,11 @@ interface WorkflowExecutionResult { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; // e.g., "/api/jobs/{taskId}" - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` @@ -797,17 +796,17 @@ async function executeAsync() { }); // Check if result is an async execution - if ('taskId' in result) { - console.log('Task ID:', result.taskId); - console.log('Status endpoint:', result.links.status); + if ('jobId' in result) { + console.log('Job ID:', result.jobId); + console.log('Status endpoint:', result.statusUrl); // Poll for completion - let status = await client.getJobStatus(result.taskId); + let status = await client.getJobStatus(result.jobId); while (status.status === 'queued' || status.status === 'processing') { console.log('Current status:', status.status); await new Promise(resolve => setTimeout(resolve, 2000)); // Wait 2 seconds - status = await client.getJobStatus(result.taskId); + status = await client.getJobStatus(result.jobId); } if (status.status === 'completed') { diff --git a/apps/sim/app/api/chat/[identifier]/route.ts b/apps/sim/app/api/chat/[identifier]/route.ts index a6dff447355..f35d950a21c 100644 --- a/apps/sim/app/api/chat/[identifier]/route.ts +++ b/apps/sim/app/api/chat/[identifier]/route.ts @@ -274,6 +274,9 @@ export const POST = withRouteHandler( workflowTriggerType: 'chat', }, executionId, + workspaceId, + workflowId: deployment.workflowId, + userId: workspaceOwnerId, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( workflowForExecution, diff --git a/apps/sim/app/api/form/[identifier]/route.ts b/apps/sim/app/api/form/[identifier]/route.ts index b91c6ef932a..d5ed51c4af7 100644 --- a/apps/sim/app/api/form/[identifier]/route.ts +++ b/apps/sim/app/api/form/[identifier]/route.ts @@ -227,6 +227,9 @@ export const POST = withRouteHandler( workflowTriggerType: 'api', }, executionId, + workspaceId, + workflowId: deployment.workflowId, + userId: workspaceOwnerId, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( workflowForExecution, diff --git a/apps/sim/app/api/function/execute/route.ts b/apps/sim/app/api/function/execute/route.ts index fcfda730c4b..b7137a383dc 100644 --- a/apps/sim/app/api/function/execute/route.ts +++ b/apps/sim/app/api/function/execute/route.ts @@ -12,8 +12,18 @@ import { isE2bEnabled } from '@/lib/core/config/feature-flags' import { generateRequestId } from '@/lib/core/utils/request' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' import { executeInE2B, executeShellInE2B } from '@/lib/execution/e2b' -import { executeInIsolatedVM } from '@/lib/execution/isolated-vm' +import { executeInIsolatedVM, type IsolatedVMBrokerHandler } from '@/lib/execution/isolated-vm' import { CodeLanguage, DEFAULT_CODE_LANGUAGE, isValidCodeLanguage } from '@/lib/execution/languages' +import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { + MAX_FUNCTION_INLINE_BYTES, + MAX_INLINE_MATERIALIZATION_BYTES, + readLargeValueRefFromStorage, + readUserFileContent, + unavailableLargeValueError, +} from '@/lib/execution/payloads/materialization.server' +import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { getWorkflowById } from '@/lib/workflows/utils' import { escapeRegExp, normalizeName, REFERENCE } from '@/executor/constants' @@ -684,6 +694,125 @@ function serializeForShellEnv(value: unknown, nullValue = ''): string { } } +interface FunctionRouteExecutionContext { + workflowId?: string + workspaceId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string + requestId: string +} + +function asRecord(value: unknown): Record { + return value && typeof value === 'object' && !Array.isArray(value) + ? (value as Record) + : {} +} + +function getPositiveNumber(value: unknown): number | undefined { + if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) { + return undefined + } + return value +} + +function clampInlineBytes(value: unknown, limit = MAX_FUNCTION_INLINE_BYTES): number { + const requested = getPositiveNumber(value) + return Math.min(requested ?? limit, limit) +} + +function getBrokerFileArgs(args: unknown): { + file: unknown + maxBytes: number + offset?: number + length?: number +} { + const record = asRecord(args) + const options = asRecord(record.options) + return { + file: record.file, + maxBytes: clampInlineBytes(options.maxBytes), + offset: getPositiveNumber(options.offset), + length: getPositiveNumber(options.length), + } +} + +function createFunctionRuntimeBrokers( + context: FunctionRouteExecutionContext +): Record { + const base = { + requestId: context.requestId, + workflowId: context.workflowId, + workspaceId: context.workspaceId, + executionId: context.executionId, + largeValueExecutionIds: context.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.allowLargeValueWorkflowScope, + userId: context.userId, + logger, + } + + const readFile = async (args: unknown, encoding: 'base64' | 'text', chunked = false) => { + const fileArgs = getBrokerFileArgs(args) + return readUserFileContent(fileArgs.file, { + ...base, + encoding, + maxBytes: fileArgs.maxBytes, + chunked, + offset: chunked ? fileArgs.offset : undefined, + length: chunked ? fileArgs.length : undefined, + }) + } + + return { + 'sim.files.readBase64': (args) => readFile(args, 'base64'), + 'sim.files.readText': (args) => readFile(args, 'text'), + 'sim.files.readBase64Chunk': (args) => readFile(args, 'base64', true), + 'sim.files.readTextChunk': (args) => readFile(args, 'text', true), + 'sim.values.read': async (args) => { + const record = asRecord(args) + const options = asRecord(record.options) + const ref = record.ref + if (!isLargeValueRef(ref)) { + throw new Error('Expected a large execution value reference.') + } + if (!context.executionId) { + throw new Error('Large execution values require an execution context.') + } + const value = await readLargeValueRefFromStorage(ref, { + ...base, + maxBytes: clampInlineBytes(options.maxBytes, MAX_INLINE_MATERIALIZATION_BYTES), + }) + if (value === undefined) { + throw unavailableLargeValueError(ref) + } + return value + }, + } +} + +async function compactFunctionRouteBody( + body: T, + context: FunctionRouteExecutionContext +): Promise { + return compactExecutionPayload(body, { + workflowId: context.workflowId, + workspaceId: context.workspaceId, + executionId: context.executionId, + userId: context.userId, + preserveRoot: true, + requireDurable: Boolean(context.workspaceId && context.workflowId && context.executionId), + }) +} + +async function functionJsonResponse( + body: T, + context: FunctionRouteExecutionContext, + init?: ResponseInit +) { + return NextResponse.json(await compactFunctionRouteBody(body, context), init) +} + async function maybeExportSandboxFileToWorkspace(args: { authUserId: string workflowId?: string @@ -792,6 +921,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { let userCodeStartLine = 3 // Default value for error reporting let resolvedCode = '' // Store resolved code for error reporting let sourceCodeForErrors: string | undefined + let routeContext: FunctionRouteExecutionContext | undefined try { const auth = await checkInternalAuth(req) @@ -823,6 +953,9 @@ export const POST = withRouteHandler(async (req: NextRequest) => { workflowVariables = {}, contextVariables: preResolvedContextVariables = {}, workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope = false, workspaceId, isCustomTool = false, _sandboxFiles, @@ -837,9 +970,20 @@ export const POST = withRouteHandler(async (req: NextRequest) => { paramsCount: Object.keys(executionParams).length, timeout, workflowId, + executionId, isCustomTool, }) + routeContext = { + workflowId, + workspaceId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId: auth.userId, + requestId, + } + const lang = isValidCodeLanguage(language) ? language : DEFAULT_CODE_LANGUAGE let contextVariables: Record = {} @@ -927,12 +1071,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { }) if (shellError) { - return NextResponse.json( + return functionJsonResponse( { success: false, error: shellError, output: { result: null, stdout: cleanStdout(shellStdout), executionTime }, }, + routeContext, { status: 500 } ) } @@ -953,10 +1098,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: shellResult ?? null, stdout: cleanStdout(shellStdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: shellResult ?? null, stdout: cleanStdout(shellStdout), executionTime }, + }, + routeContext + ) } if (lang === CodeLanguage.Python && !isE2bEnabled) { @@ -1054,12 +1202,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorDisplayCode, prologueLineCount + importLineCount ) - return NextResponse.json( + return functionJsonResponse( { success: false, error: formattedError, output: { result: null, stdout: cleanedOutput, executionTime }, }, + routeContext, { status: 500 } ) } @@ -1080,10 +1229,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } let prologueLineCount = 0 @@ -1137,12 +1289,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorDisplayCode, prologueLineCount ) - return NextResponse.json( + return functionJsonResponse( { success: false, error: formattedError, output: { result: null, stdout: cleanedOutput, executionTime }, }, + routeContext, { status: 500 } ) } @@ -1163,10 +1316,13 @@ export const POST = withRouteHandler(async (req: NextRequest) => { if (fileExportResponse) return fileExportResponse } - return NextResponse.json({ - success: true, - output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: e2bResult ?? null, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } const executionMethod = 'isolated-vm' @@ -1194,16 +1350,19 @@ export const POST = withRouteHandler(async (req: NextRequest) => { prependedLineCount = paramKeys.length } - const isolatedResult = await executeInIsolatedVM({ - code: codeToExecute, - params: executionParams, - envVars, - contextVariables, - timeoutMs: timeout, - requestId, - ownerKey: `user:${auth.userId}`, - ownerWeight: 1, - }) + const isolatedResult = await executeInIsolatedVM( + { + code: codeToExecute, + params: executionParams, + envVars, + contextVariables, + timeoutMs: timeout, + requestId, + ownerKey: `user:${auth.userId}`, + ownerWeight: 1, + }, + { brokers: createFunctionRuntimeBrokers(routeContext) } + ) const executionTime = Date.now() - startTime @@ -1255,7 +1414,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { errorType: enhancedError.name, }) - return NextResponse.json( + return functionJsonResponse( { success: false, error: userFriendlyErrorMessage, @@ -1272,6 +1431,7 @@ export const POST = withRouteHandler(async (req: NextRequest) => { stack: enhancedError.stack, }, }, + routeContext, { status: isSystemError ? 500 : 422 } ) } @@ -1281,12 +1441,51 @@ export const POST = withRouteHandler(async (req: NextRequest) => { executionTime, }) - return NextResponse.json({ - success: true, - output: { result: isolatedResult.result, stdout: cleanStdout(stdout), executionTime }, - }) + return functionJsonResponse( + { + success: true, + output: { result: isolatedResult.result, stdout: cleanStdout(stdout), executionTime }, + }, + routeContext + ) } catch (error: any) { const executionTime = Date.now() - startTime + if (isExecutionResourceLimitError(error)) { + logger.warn(`[${requestId}] Function execution exceeded resource limits`, { + resource: error.resource, + attemptedBytes: error.attemptedBytes, + limitBytes: error.limitBytes, + executionTime, + }) + if (routeContext) { + return functionJsonResponse( + { + success: false, + error: error.message, + output: { + result: null, + stdout: cleanStdout(stdout), + executionTime, + }, + }, + routeContext, + { status: error.statusCode } + ) + } + return NextResponse.json( + { + success: false, + error: error.message, + output: { + result: null, + stdout: cleanStdout(stdout), + executionTime, + }, + }, + { status: error.statusCode } + ) + } + logger.error(`[${requestId}] Function execution failed`, { error: error.message || 'Unknown error', stack: error.stack, @@ -1328,6 +1527,10 @@ export const POST = withRouteHandler(async (req: NextRequest) => { }, } + if (routeContext) { + return functionJsonResponse(errorResponse, routeContext, { status: 500 }) + } + return NextResponse.json(errorResponse, { status: 500 }) } }) diff --git a/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts b/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts index ff70c6f1898..47f2f381168 100644 --- a/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts +++ b/apps/sim/app/api/resume/[workflowId]/[executionId]/[contextId]/route.ts @@ -180,6 +180,10 @@ export const POST = withRouteHandler( timeoutMs: preprocessResult.executionTimeout?.sync, }, executionId: enqueueResult.resumeExecutionId, + workspaceId: workflow.workspaceId || undefined, + workflowId, + userId: enqueueResult.userId, + allowLargeValueWorkflowScope: true, executeFn: async ({ onStream, onBlockComplete, abortSignal }) => PauseResumeManager.startResumeExecution({ ...resumeArgs, diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index b21f0d0bc4a..6538e7eb397 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -774,7 +774,10 @@ async function handleExecutePost( const outputWithBase64 = includeFileBase64 ? ((await hydrateUserFilesWithBase64(result.output, { requestId, + workspaceId, + workflowId, executionId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), userId: actorUserId, maxBytes: base64MaxBytes, })) as NormalizedBlockOutput) @@ -882,6 +885,7 @@ async function handleExecutePost( workspaceId, workflowId, userId: actorUserId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), executeFn: async ({ onStream, onBlockComplete, abortSignal }) => executeWorkflow( streamWorkflow, @@ -900,6 +904,8 @@ async function handleExecutePost( base64MaxBytes, abortSignal, executionMode: 'stream', + stopAfterBlockId, + runFromBlock: resolvedRunFromBlock, }, executionId ), @@ -1297,7 +1303,10 @@ async function handleExecutePost( const sseOutput = includeFileBase64 ? await hydrateUserFilesWithBase64(result.output, { requestId, + workspaceId, + workflowId, executionId, + allowLargeValueWorkflowScope: Boolean(resolvedRunFromBlock?.sourceSnapshot), userId: actorUserId, maxBytes: base64MaxBytes, }) diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx index 8d4baec5de1..af6bbc2d922 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx @@ -159,7 +159,7 @@ export function SubflowEditor({ className='mb-1' />
    - Enter a number between 1 and {subflowConfig.maxIterations} + Enter a whole number greater than 0.
    ) : ( diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts index 2d0f5692957..90eefe3d8b2 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts @@ -29,7 +29,6 @@ const SUBFLOW_CONFIG = { }, typeKey: 'loopType' as const, storeKey: 'loops' as const, - maxIterations: 1000, configKeys: { iterations: 'iterations' as const, items: 'forEachItems' as const, @@ -40,7 +39,6 @@ const SUBFLOW_CONFIG = { typeLabels: { count: 'Parallel Count', collection: 'Parallel Each' }, typeKey: 'parallelType' as const, storeKey: 'parallels' as const, - maxIterations: 1000, configKeys: { iterations: 'count' as const, items: 'distribution' as const, @@ -229,7 +227,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const numValue = Number.parseInt(sanitizedValue) if (!Number.isNaN(numValue)) { - setTempInputValue(Math.min(subflowConfig.maxIterations, numValue).toString()) + setTempInputValue(numValue.toString()) } else { setTempInputValue(sanitizedValue) } @@ -245,7 +243,7 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const value = Number.parseInt(tempInputValue ?? '5') if (!Number.isNaN(value)) { - const newValue = Math.min(subflowConfig.maxIterations, Math.max(1, value)) + const newValue = Math.max(1, value) collaborativeUpdateIterationCount( currentBlockId, currentBlock.type as 'loop' | 'parallel', diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx index 6cc83299279..57e8318fbf3 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx @@ -14,6 +14,7 @@ import { import { List, type RowComponentProps, useListRef } from 'react-window' import { Badge, ChevronDown } from '@/components/emcn' import { cn } from '@/lib/core/utils/cn' +import { isLargeValueRef, type LargeValueRef } from '@/lib/execution/payloads/large-value-ref' type ValueType = 'null' | 'undefined' | 'array' | 'string' | 'number' | 'boolean' | 'object' type BadgeVariant = 'green' | 'blue' | 'orange' | 'purple' | 'gray' | 'red' @@ -74,6 +75,33 @@ const STYLES = { } as const const EMPTY_MATCH_INDICES: number[] = [] +const USER_FILE_BASE64_PLACEHOLDER = '[TRUNCATED]' + +function formatLargeValueSize(bytes: number): string { + return `${(bytes / (1024 * 1024)).toFixed(1)} MB` +} + +function getLargeValueDisplayValue(ref: LargeValueRef): unknown { + return ref.preview ?? `[Large value: ${formatLargeValueSize(ref.size)}]` +} + +function getDisplayValue(value: unknown): unknown { + return isLargeValueRef(value) ? getLargeValueDisplayValue(value) : value +} + +function isDisplayedUserFileMetadata(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) return false + const candidate = value as Record + const url = typeof candidate.url === 'string' ? candidate.url : '' + return ( + typeof candidate.id === 'string' && + typeof candidate.name === 'string' && + url.length > 0 && + typeof candidate.size === 'number' && + typeof candidate.type === 'string' && + (candidate.id.startsWith('file_') || url.includes('/api/files/serve/')) + ) +} function getTypeLabel(value: unknown): ValueType { if (value === null) return 'null' @@ -109,23 +137,39 @@ function extractErrorMessage(data: unknown): string { } function buildEntries(value: unknown, basePath: string): NodeEntry[] { - if (Array.isArray(value)) { - return value.map((item, i) => ({ key: String(i), value: item, path: `${basePath}[${i}]` })) + const displayValue = getDisplayValue(value) + + if (Array.isArray(displayValue)) { + return displayValue.map((item, i) => ({ + key: String(i), + value: item, + path: `${basePath}[${i}]`, + })) } - return Object.entries(value as Record).map(([k, v]) => ({ + const entries = Object.entries(displayValue as Record).map(([k, v]) => ({ key: k, value: v, path: `${basePath}.${k}`, })) + if (isDisplayedUserFileMetadata(displayValue) && !('base64' in displayValue)) { + entries.push({ + key: 'base64', + value: USER_FILE_BASE64_PLACEHOLDER, + path: `${basePath}.base64`, + }) + } + return entries } function getCollapsedSummary(value: unknown): string | null { - if (Array.isArray(value)) { - const len = value.length + const displayValue = getDisplayValue(value) + + if (Array.isArray(displayValue)) { + const len = displayValue.length return `${len} item${len !== 1 ? 's' : ''}` } - if (typeof value === 'object' && value !== null) { - const count = Object.keys(value).length + if (typeof displayValue === 'object' && displayValue !== null) { + const count = buildEntries(displayValue, '').length return `${count} key${count !== 1 ? 's' : ''}` } return null @@ -133,10 +177,11 @@ function getCollapsedSummary(value: unknown): string | null { function computeInitialPaths(data: unknown, isError: boolean): Set { if (isError) return new Set(['root.error']) - if (!data || typeof data !== 'object') return new Set() - const entries = Array.isArray(data) - ? data.map((_, i) => `root[${i}]`) - : Object.keys(data).map((k) => `root.${k}`) + const displayData = getDisplayValue(data) + if (!displayData || typeof displayData !== 'object') return new Set() + const entries = Array.isArray(displayData) + ? displayData.map((_, i) => `root[${i}]`) + : Object.keys(displayData).map((k) => `root.${k}`) return new Set(entries) } @@ -184,13 +229,14 @@ function collectAllMatchPaths(data: unknown, query: string, basePath: string, de if (!query || depth > CONFIG.MAX_SEARCH_DEPTH) return [] const matches: string[] = [] + const displayData = getDisplayValue(data) - if (isPrimitive(data)) { - addPrimitiveMatches(data, `${basePath}.value`, query, matches) + if (isPrimitive(displayData)) { + addPrimitiveMatches(displayData, `${basePath}.value`, query, matches) return matches } - for (const entry of buildEntries(data, basePath)) { + for (const entry of buildEntries(displayData, basePath)) { if (isPrimitive(entry.value)) { addPrimitiveMatches(entry.value, entry.path, query, matches) } else { @@ -317,9 +363,10 @@ const StructuredNode = memo(function StructuredNode({ isError = false, }: StructuredNodeProps) { const searchContext = useContext(SearchContext) - const type = getTypeLabel(value) - const isPrimitiveValue = isPrimitive(value) - const isEmptyValue = !isPrimitiveValue && isEmpty(value) + const displayValue = getDisplayValue(value) + const type = getTypeLabel(displayValue) + const isPrimitiveValue = isPrimitive(displayValue) + const isEmptyValue = !isPrimitiveValue && isEmpty(displayValue) const isExpanded = expandedPaths.has(path) const handleToggle = useCallback(() => onToggle(path), [onToggle, path]) @@ -335,17 +382,17 @@ const StructuredNode = memo(function StructuredNode({ ) const childEntries = useMemo( - () => (isPrimitiveValue || isEmptyValue ? [] : buildEntries(value, path)), - [value, isPrimitiveValue, isEmptyValue, path] + () => (isPrimitiveValue || isEmptyValue ? [] : buildEntries(displayValue, path)), + [displayValue, isPrimitiveValue, isEmptyValue, path] ) const collapsedSummary = useMemo( - () => (isPrimitiveValue ? null : getCollapsedSummary(value)), - [value, isPrimitiveValue] + () => (isPrimitiveValue ? null : getCollapsedSummary(displayValue)), + [displayValue, isPrimitiveValue] ) const badgeVariant = isError ? 'red' : BADGE_VARIANTS[type] - const valueText = isPrimitiveValue ? formatPrimitive(value) : '' + const valueText = isPrimitiveValue ? formatPrimitive(displayValue) : '' const matchIndices = searchContext?.pathToMatchIndices.get(path) ?? EMPTY_MATCH_INDICES return ( @@ -472,16 +519,17 @@ function flattenTree( } function processNode(key: string, value: unknown, path: string, depth: number): void { - const valueType = getTypeLabel(value) - const isPrimitiveValue = isPrimitive(value) - const isEmptyValue = !isPrimitiveValue && isEmpty(value) + const displayValue = getDisplayValue(value) + const valueType = getTypeLabel(displayValue) + const isPrimitiveValue = isPrimitive(displayValue) + const isEmptyValue = !isPrimitiveValue && isEmpty(displayValue) const isExpanded = expandedPaths.has(path) - const collapsedSummary = isPrimitiveValue ? null : getCollapsedSummary(value) + const collapsedSummary = isPrimitiveValue ? null : getCollapsedSummary(displayValue) rows.push({ path, key, - value, + value: displayValue, depth, type: 'header', valueType, @@ -497,42 +545,43 @@ function flattenTree( rows.push({ path: `${path}.value`, key: '', - value, + value: displayValue, depth: depth + 1, type: 'value', valueType, isExpanded: false, isError: false, collapsedSummary: null, - displayText: formatPrimitive(value), + displayText: formatPrimitive(displayValue), matchIndices: pathToMatchIndices.get(path) ?? [], }) } else if (isEmptyValue) { rows.push({ path: `${path}.empty`, key: '', - value, + value: displayValue, depth: depth + 1, type: 'empty', valueType, isExpanded: false, isError: false, collapsedSummary: null, - displayText: Array.isArray(value) ? '[]' : '{}', + displayText: Array.isArray(displayValue) ? '[]' : '{}', matchIndices: [], }) } else { - for (const entry of buildEntries(value, path)) { + for (const entry of buildEntries(displayValue, path)) { processNode(entry.key, entry.value, entry.path, depth + 1) } } } } - if (isPrimitive(data)) { - processNode('value', data, 'root.value', 0) - } else if (data && typeof data === 'object') { - for (const entry of buildEntries(data, 'root')) { + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) { + processNode('value', displayData, 'root.value', 0) + } else if (displayData && typeof displayData === 'object') { + for (const entry of buildEntries(displayData, 'root')) { processNode(entry.key, entry.value, entry.path, 0) } } @@ -549,22 +598,24 @@ function countVisibleRows(data: unknown, expandedPaths: Set, isError: bo let count = 0 function countNode(value: unknown, path: string): void { + const displayValue = getDisplayValue(value) count++ if (!expandedPaths.has(path)) return - if (isPrimitive(value) || isEmpty(value)) { + if (isPrimitive(displayValue) || isEmpty(displayValue)) { count++ } else { - for (const entry of buildEntries(value, path)) { + for (const entry of buildEntries(displayValue, path)) { countNode(entry.value, entry.path) } } } - if (isPrimitive(data)) { - countNode(data, 'root.value') - } else if (data && typeof data === 'object') { - for (const entry of buildEntries(data, 'root')) { + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) { + countNode(displayData, 'root.value') + } else if (displayData && typeof displayData === 'object') { + for (const entry of buildEntries(displayData, 'root')) { countNode(entry.value, entry.path) } } @@ -782,8 +833,9 @@ export const StructuredOutput = memo(function StructuredOutput({ }, []) const rootEntries = useMemo(() => { - if (isPrimitive(data)) return [{ key: 'value', value: data, path: 'root.value' }] - return buildEntries(data, 'root') + const displayData = getDisplayValue(data) + if (isPrimitive(displayData)) return [{ key: 'value', value: displayData, path: 'root.value' }] + return buildEntries(displayData, 'root') }, [data]) const searchContextValue = useMemo(() => { diff --git a/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx index 735408dae4b..e9d82206878 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/components/preview/components/preview-editor/preview-editor.tsx @@ -572,14 +572,12 @@ const SUBFLOW_CONFIG = { while: 'While Loop', doWhile: 'Do While Loop', }, - maxIterations: 1000, }, parallel: { typeLabels: { count: 'Parallel Count', collection: 'Parallel Each', }, - maxIterations: 20, }, } as const @@ -685,7 +683,7 @@ function SubflowConfigDisplay({ block, loop, parallel }: SubflowConfigDisplayPro className='mb-1' />
    - Enter a number between 1 and {config.maxIterations} + Enter a whole number greater than 0.
    ) : ( diff --git a/apps/sim/executor/constants.ts b/apps/sim/executor/constants.ts index 924c3dfdd67..71fa8dea3f1 100644 --- a/apps/sim/executor/constants.ts +++ b/apps/sim/executor/constants.ts @@ -159,8 +159,7 @@ export const DEFAULTS = { BLOCK_TYPE: 'unknown', BLOCK_TITLE: 'Untitled Block', WORKFLOW_NAME: 'Workflow', - MAX_LOOP_ITERATIONS: 1000, - MAX_FOREACH_ITEMS: 1000, + DEFAULT_LOOP_ITERATIONS: 1000, MAX_PARALLEL_BRANCHES: 20, MAX_NESTING_DEPTH: 10, /** Maximum child workflow depth for propagating SSE callbacks (block:started, block:completed). */ diff --git a/apps/sim/executor/execution/block-executor.ts b/apps/sim/executor/execution/block-executor.ts index 7ade4862853..a8fd36c63a7 100644 --- a/apps/sim/executor/execution/block-executor.ts +++ b/apps/sim/executor/execution/block-executor.ts @@ -198,7 +198,11 @@ export class BlockExecutor { if (ctx.includeFileBase64 === true && containsUserFileWithMetadata(normalizedOutput)) { normalizedOutput = (await hydrateUserFilesWithBase64(normalizedOutput, { requestId: ctx.metadata.requestId, + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, executionId: ctx.executionId, + largeValueExecutionIds: ctx.largeValueExecutionIds, + allowLargeValueWorkflowScope: ctx.allowLargeValueWorkflowScope, userId: ctx.userId, maxBytes: ctx.base64MaxBytes, })) as NormalizedBlockOutput diff --git a/apps/sim/executor/execution/executor.ts b/apps/sim/executor/execution/executor.ts index 218719ce65a..4866ebeba81 100644 --- a/apps/sim/executor/execution/executor.ts +++ b/apps/sim/executor/execution/executor.ts @@ -18,6 +18,7 @@ import { LoopOrchestrator } from '@/executor/orchestrators/loop' import { NodeExecutionOrchestrator } from '@/executor/orchestrators/node' import { ParallelOrchestrator } from '@/executor/orchestrators/parallel' import type { BlockState, ExecutionContext, ExecutionResult } from '@/executor/types' +import { ParallelExpander } from '@/executor/utils/parallel-expansion' import { computeExecutionSets, type RunFromBlockContext, @@ -79,6 +80,8 @@ export class DAGExecutor { triggerBlockId, savedIncomingEdges, }) + this.restoreSnapshotParallelBatches(dag, this.contextExtensions.snapshotState) + this.restoreSavedIncomingEdges(dag, savedIncomingEdges) const { context, state } = this.createExecutionContext(workflowId, triggerBlockId) context.subflowParentMap = this.buildSubflowParentMap(dag) @@ -213,6 +216,41 @@ export class DAGExecutor { return await engine.run() } + private restoreSavedIncomingEdges(dag: DAG, savedIncomingEdges?: Record): void { + if (!savedIncomingEdges) return + + for (const [nodeId, incomingEdges] of Object.entries(savedIncomingEdges)) { + const node = dag.nodes.get(nodeId) + if (node) { + node.incomingEdges = new Set(incomingEdges) + } + } + } + + private restoreSnapshotParallelBatches( + dag: DAG, + snapshotState?: SerializableExecutionState + ): void { + if (!snapshotState?.parallelExecutions) return + + const expander = new ParallelExpander() + for (const [parallelId, scope] of Object.entries(snapshotState.parallelExecutions)) { + const currentBatchSize = Number(scope.currentBatchSize ?? 0) + if (!Number.isFinite(currentBatchSize) || currentBatchSize <= 0) continue + + const currentBatchStart = Number(scope.currentBatchStart ?? 0) + const totalBranches = Number(scope.totalBranches ?? currentBatchStart + currentBatchSize) + const items = Array.isArray(scope.items) + ? scope.items.slice(currentBatchStart, currentBatchStart + currentBatchSize) + : undefined + + expander.expandParallel(dag, parallelId, currentBatchSize, items, { + branchIndexOffset: currentBatchStart, + totalBranches, + }) + } + } + private buildExecutionPipeline(context: ExecutionContext, dag: DAG, state: ExecutionState) { const resolver = new VariableResolver(this.workflow, this.workflowVariables, state, { navigatePathAsync, @@ -274,6 +312,8 @@ export class DAGExecutor { workflowId, workspaceId: this.contextExtensions.workspaceId, executionId: this.contextExtensions.executionId, + largeValueExecutionIds: this.contextExtensions.largeValueExecutionIds, + allowLargeValueWorkflowScope: this.contextExtensions.allowLargeValueWorkflowScope, userId: this.contextExtensions.userId, isDeployedContext: this.contextExtensions.isDeployedContext, enforceCredentialAccess: this.contextExtensions.enforceCredentialAccess, @@ -320,10 +360,18 @@ export class DAGExecutor { branchOutputs: scope.branchOutputs ? new Map(Object.entries(scope.branchOutputs).map(([k, v]) => [Number(k), v])) : new Map(), + accumulatedOutputs: scope.accumulatedOutputs + ? new Map( + Object.entries(scope.accumulatedOutputs).map(([k, v]) => [Number(k), v]) + ) + : new Map(), }, ]) ) : new Map(), + parallelBlockMapping: snapshotState?.parallelBlockMapping + ? new Map(Object.entries(snapshotState.parallelBlockMapping)) + : new Map(), executedBlocks: state.getExecutedBlocks(), activeExecutionPath: snapshotState?.activeExecutionPath ? new Set(snapshotState.activeExecutionPath) diff --git a/apps/sim/executor/execution/snapshot-serializer.test.ts b/apps/sim/executor/execution/snapshot-serializer.test.ts new file mode 100644 index 00000000000..9aa273d2bbd --- /dev/null +++ b/apps/sim/executor/execution/snapshot-serializer.test.ts @@ -0,0 +1,70 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { serializePauseSnapshot } from '@/executor/execution/snapshot-serializer' +import type { ExecutionContext } from '@/executor/types' + +function createContext(overrides: Partial = {}): ExecutionContext { + return { + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + executionId: 'execution-1', + userId: 'user-1', + blockStates: new Map(), + executedBlocks: new Set(), + blockLogs: [], + metadata: { + requestId: 'request-1', + executionId: 'execution-1', + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + userId: 'user-1', + triggerType: 'manual', + useDraftState: true, + startTime: '2026-01-01T00:00:00.000Z', + }, + environmentVariables: {}, + decisions: { + router: new Map(), + condition: new Map(), + }, + completedLoops: new Set(), + activeExecutionPath: new Set(), + ...overrides, + } as ExecutionContext +} + +describe('serializePauseSnapshot', () => { + it('serializes batched parallel accumulated outputs for cross-process resume', () => { + const context = createContext({ + parallelExecutions: new Map([ + [ + 'parallel-1', + { + parallelId: 'parallel-1', + totalBranches: 3, + branchOutputs: new Map([[2, [{ output: 'current-batch' }]]]), + accumulatedOutputs: new Map([ + [0, [{ output: 'batch-0' }]], + [1, [{ output: 'batch-1' }]], + ]), + }, + ], + ]), + }) + + const snapshot = serializePauseSnapshot(context, ['next-block']) + const serialized = JSON.parse(snapshot.snapshot) + + expect(serialized.state.parallelExecutions?.['parallel-1']).toMatchObject({ + branchOutputs: { + 2: [{ output: 'current-batch' }], + }, + accumulatedOutputs: { + 0: [{ output: 'batch-0' }], + 1: [{ output: 'batch-1' }], + }, + }) + }) +}) diff --git a/apps/sim/executor/execution/snapshot-serializer.ts b/apps/sim/executor/execution/snapshot-serializer.ts index 76c2a3dba5f..fbac5b893c2 100644 --- a/apps/sim/executor/execution/snapshot-serializer.ts +++ b/apps/sim/executor/execution/snapshot-serializer.ts @@ -35,16 +35,19 @@ function serializeParallelExecutions( if (!parallelExecutions) return undefined const result: Record = {} for (const [parallelId, scope] of parallelExecutions.entries()) { - let branchOutputs: any - if (scope.branchOutputs instanceof Map) { - branchOutputs = Object.fromEntries(scope.branchOutputs) - } else { - branchOutputs = scope.branchOutputs ?? {} - } + const branchOutputs = + scope.branchOutputs instanceof Map + ? Object.fromEntries(scope.branchOutputs) + : (scope.branchOutputs ?? {}) + const accumulatedOutputs = + scope.accumulatedOutputs instanceof Map + ? Object.fromEntries(scope.accumulatedOutputs) + : (scope.accumulatedOutputs ?? {}) result[parallelId] = { ...scope, branchOutputs, + accumulatedOutputs, } } return result diff --git a/apps/sim/executor/execution/types.ts b/apps/sim/executor/execution/types.ts index 1ed3db20a32..0180d9e0ad7 100644 --- a/apps/sim/executor/execution/types.ts +++ b/apps/sim/executor/execution/types.ts @@ -35,6 +35,8 @@ export interface ExecutionMetadata { parallels?: Record deploymentVersionId?: string } + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean callChain?: string[] correlation?: AsyncExecutionCorrelation executionMode?: 'sync' | 'stream' | 'async' @@ -143,6 +145,8 @@ export interface ExecutionCallbacks { export interface ContextExtensions { workspaceId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string stream?: boolean selectedOutputs?: string[] diff --git a/apps/sim/executor/handlers/function/function-handler.test.ts b/apps/sim/executor/handlers/function/function-handler.test.ts index b288940850d..aafd49faea5 100644 --- a/apps/sim/executor/handlers/function/function-handler.test.ts +++ b/apps/sim/executor/handlers/function/function-handler.test.ts @@ -81,6 +81,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, @@ -121,6 +122,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, @@ -154,6 +156,7 @@ describe('FunctionBlockHandler', () => { _context: { workflowId: mockContext.workflowId, workspaceId: mockContext.workspaceId, + executionId: mockContext.executionId, userId: mockContext.userId, isDeployedContext: mockContext.isDeployedContext, enforceCredentialAccess: mockContext.enforceCredentialAccess, diff --git a/apps/sim/executor/handlers/function/function-handler.ts b/apps/sim/executor/handlers/function/function-handler.ts index 53fa8b4451b..ec08996ba5b 100644 --- a/apps/sim/executor/handlers/function/function-handler.ts +++ b/apps/sim/executor/handlers/function/function-handler.ts @@ -49,7 +49,7 @@ export class FunctionBlockHandler implements BlockHandler { readCodeContent(inputs[FUNCTION_BLOCK_DISPLAY_CODE_KEY]) ?? readCodeContent((block.config?.params as Record | undefined)?.code) - const { blockData, blockNameMapping, blockOutputSchemas } = collectBlockData(ctx) + const { blockNameMapping, blockOutputSchemas } = collectBlockData(ctx) const contextVariables = normalizeRecord(inputs[FUNCTION_BLOCK_CONTEXT_VARS_KEY]) @@ -60,13 +60,16 @@ export class FunctionBlockHandler implements BlockHandler { timeout: inputs.timeout || DEFAULT_EXECUTION_TIMEOUT_MS, envVars: normalizeStringRecord(ctx.environmentVariables), workflowVariables: normalizeWorkflowVariables(ctx.workflowVariables), - blockData, + blockData: {}, blockNameMapping, blockOutputSchemas, contextVariables, _context: { workflowId: ctx.workflowId, workspaceId: ctx.workspaceId, + executionId: ctx.executionId, + largeValueExecutionIds: ctx.largeValueExecutionIds, + allowLargeValueWorkflowScope: ctx.allowLargeValueWorkflowScope, userId: ctx.userId, isDeployedContext: ctx.isDeployedContext, enforceCredentialAccess: ctx.enforceCredentialAccess, diff --git a/apps/sim/executor/orchestrators/loop.ts b/apps/sim/executor/orchestrators/loop.ts index 911427dfd91..2087bf09c45 100644 --- a/apps/sim/executor/orchestrators/loop.ts +++ b/apps/sim/executor/orchestrators/loop.ts @@ -23,7 +23,6 @@ import { emitSubflowSuccessEvents, extractBaseBlockId, resolveArrayInputAsync, - validateMaxCount, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' import type { SerializedLoop } from '@/serializer/types' @@ -107,25 +106,7 @@ export class LoopOrchestrator { switch (loopType) { case 'for': { scope.loopType = 'for' - const requestedIterations = loopConfig.iterations || DEFAULTS.MAX_LOOP_ITERATIONS - - const iterationError = validateMaxCount( - requestedIterations, - DEFAULTS.MAX_LOOP_ITERATIONS, - 'For loop iterations' - ) - if (iterationError) { - logger.error(iterationError, { loopId, requestedIterations }) - await this.addLoopErrorLog(ctx, loopId, loopType, iterationError, { - iterations: requestedIterations, - }) - scope.maxIterations = 0 - scope.validationError = iterationError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(iterationError) - } - + const requestedIterations = loopConfig.iterations || DEFAULTS.DEFAULT_LOOP_ITERATIONS scope.maxIterations = requestedIterations scope.condition = buildLoopIndexCondition(scope.maxIterations) break @@ -168,25 +149,6 @@ export class LoopOrchestrator { throw new Error(errorMessage) } - const sizeError = validateMaxCount( - items.length, - DEFAULTS.MAX_FOREACH_ITEMS, - 'ForEach loop collection size' - ) - if (sizeError) { - logger.error(sizeError, { loopId, collectionSize: items.length }) - await this.addLoopErrorLog(ctx, loopId, loopType, sizeError, { - forEachItems: loopConfig.forEachItems, - collectionSize: items.length, - }) - scope.items = [] - scope.maxIterations = 0 - scope.validationError = sizeError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(sizeError) - } - scope.items = items scope.maxIterations = items.length scope.item = items[0] @@ -204,25 +166,7 @@ export class LoopOrchestrator { if (loopConfig.doWhileCondition) { scope.condition = loopConfig.doWhileCondition } else { - const requestedIterations = loopConfig.iterations || DEFAULTS.MAX_LOOP_ITERATIONS - - const iterationError = validateMaxCount( - requestedIterations, - DEFAULTS.MAX_LOOP_ITERATIONS, - 'Do-While loop iterations' - ) - if (iterationError) { - logger.error(iterationError, { loopId, requestedIterations }) - await this.addLoopErrorLog(ctx, loopId, loopType, iterationError, { - iterations: requestedIterations, - }) - scope.maxIterations = 0 - scope.validationError = iterationError - scope.condition = buildLoopIndexCondition(0) - ctx.loopExecutions?.set(loopId, scope) - throw new Error(iterationError) - } - + const requestedIterations = loopConfig.iterations || DEFAULTS.DEFAULT_LOOP_ITERATIONS scope.maxIterations = requestedIterations scope.condition = buildLoopIndexCondition(scope.maxIterations) } diff --git a/apps/sim/executor/orchestrators/parallel.test.ts b/apps/sim/executor/orchestrators/parallel.test.ts index e0b73cd38b7..8db279e50cc 100644 --- a/apps/sim/executor/orchestrators/parallel.test.ts +++ b/apps/sim/executor/orchestrators/parallel.test.ts @@ -129,4 +129,48 @@ describe('ParallelOrchestrator', () => { isEmpty: true, }) }) + + it('records resumed later-batch outputs under restored global branch indexes', () => { + const dag = createDag() + dag.nodes.set('task-1', { + id: 'task-1', + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { branchIndex: 0 }, + }) + const orchestrator = new ParallelOrchestrator(dag, createState(), null, {}) + const ctx = createContext({ + parallelBlockMapping: new Map([ + ['task-1', { originalBlockId: 'task', parallelId: 'parallel-1', iterationIndex: 20 }], + ]), + parallelExecutions: new Map([ + [ + 'parallel-1', + { + parallelId: 'parallel-1', + totalBranches: 25, + currentBatchStart: 20, + currentBatchSize: 5, + accumulatedOutputs: new Map([[0, [{ output: 'previous' }]]]), + branchOutputs: new Map(), + }, + ], + ]), + }) + + orchestrator.handleParallelBranchCompletion(ctx, 'parallel-1', 'task-1', { output: 'resumed' }) + + const scope = ctx.parallelExecutions?.get('parallel-1') + expect(scope?.branchOutputs.get(20)).toEqual([{ output: 'resumed' }]) + expect(scope?.branchOutputs.has(0)).toBe(false) + }) }) diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index db1dbcc1392..6c89554195d 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -14,7 +14,6 @@ import { emitSubflowSuccessEvents, extractBranchIndex, resolveArrayInputAsync, - validateMaxCount, } from '@/executor/utils/subflow-utils' import type { VariableResolver } from '@/executor/variables/resolver' import type { SerializedParallel } from '@/serializer/types' @@ -83,21 +82,6 @@ export class ParallelOrchestrator { throw new Error(errorMessage) } - const branchError = validateMaxCount( - branchCount, - DEFAULTS.MAX_FOREACH_ITEMS, - 'Parallel branch count' - ) - if (branchError) { - logger.error(branchError, { parallelId, branchCount }) - await this.addParallelErrorLog(ctx, parallelId, branchError, { - distribution: parallelConfig.distribution, - branchCount, - }) - this.setErrorScope(ctx, parallelId, branchError) - throw new Error(branchError) - } - if (isEmpty || branchCount === 0) { const scope: ParallelScope = { parallelId, @@ -127,7 +111,7 @@ export class ParallelOrchestrator { const batchSize = this.resolveBatchSize(parallelConfig.batchSize) const currentBatchSize = Math.min(batchSize, branchCount) const batchItems = items?.slice(0, currentBatchSize) - const { entryNodes, clonedSubflows } = this.expander.expandParallel( + const { entryNodes, clonedSubflows, allBranchNodes } = this.expander.expandParallel( this.dag, parallelId, currentBatchSize, @@ -136,6 +120,7 @@ export class ParallelOrchestrator { ) this.registerClonedSubflows(ctx, parallelId, clonedSubflows) + this.registerBranchMappings(ctx, parallelId, allBranchNodes) const scope: ParallelScope = { parallelId, @@ -311,8 +296,11 @@ export class ParallelOrchestrator { return } + const mappedBranch = ctx.parallelBlockMapping?.get(nodeId) const branchIndex = - this.dag.nodes.get(nodeId)?.metadata.branchIndex ?? extractBranchIndex(nodeId) + mappedBranch?.parallelId === parallelId + ? mappedBranch.iterationIndex + : (this.dag.nodes.get(nodeId)?.metadata.branchIndex ?? extractBranchIndex(nodeId)) if (branchIndex === null) { logger.warn('Could not extract branch index from node ID', { nodeId }) return @@ -392,7 +380,7 @@ export class ParallelOrchestrator { const currentBatchSize = Math.min(batchSize, remaining) const batchItems = scope.items?.slice(nextBatchStart, nextBatchStart + currentBatchSize) - const { entryNodes, clonedSubflows } = this.expander.expandParallel( + const { entryNodes, clonedSubflows, allBranchNodes } = this.expander.expandParallel( this.dag, scope.parallelId, currentBatchSize, @@ -401,6 +389,7 @@ export class ParallelOrchestrator { ) this.registerClonedSubflows(ctx, scope.parallelId, clonedSubflows) + this.registerBranchMappings(ctx, scope.parallelId, allBranchNodes) this.resetBatchExecutionState(scope.parallelId) scope.currentBatchStart = nextBatchStart @@ -429,6 +418,34 @@ export class ParallelOrchestrator { } } + private registerBranchMappings( + ctx: ExecutionContext, + parallelId: string, + branchNodeIds: string[] + ): void { + if (branchNodeIds.length === 0) { + return + } + + if (!ctx.parallelBlockMapping) { + ctx.parallelBlockMapping = new Map() + } + + for (const nodeId of branchNodeIds) { + const node = this.dag.nodes.get(nodeId) + const branchIndex = node?.metadata.branchIndex ?? extractBranchIndex(nodeId) + if (branchIndex === null || branchIndex === undefined) { + continue + } + + ctx.parallelBlockMapping.set(nodeId, { + originalBlockId: node?.metadata.originalBlockId ?? nodeId, + parallelId, + iterationIndex: branchIndex, + }) + } + } + extractBranchMetadata(nodeId: string): ParallelBranchMetadata | null { const node = this.dag.nodes.get(nodeId) if (!node?.metadata.isParallelBranch) { diff --git a/apps/sim/executor/types.ts b/apps/sim/executor/types.ts index fe25abb4671..d2569706085 100644 --- a/apps/sim/executor/types.ts +++ b/apps/sim/executor/types.ts @@ -290,6 +290,8 @@ export interface ExecutionContext { workflowId: string workspaceId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string isDeployedContext?: boolean enforceCredentialAccess?: boolean diff --git a/apps/sim/executor/utils/block-reference.ts b/apps/sim/executor/utils/block-reference.ts index 0ffe6956d4f..082a9339782 100644 --- a/apps/sim/executor/utils/block-reference.ts +++ b/apps/sim/executor/utils/block-reference.ts @@ -208,7 +208,11 @@ function getSchemaFieldNames(schema: OutputSchema | undefined): string[] { export function resolveBlockReference( blockName: string, pathParts: string[], - context: BlockReferenceContext + context: BlockReferenceContext, + options: { + allowLargeValueRefs?: boolean + executionContext?: ResolutionContext['executionContext'] + } = {} ): BlockReferenceResult | undefined { const normalizedName = normalizeName(blockName) const blockId = context.blockNameMapping[normalizedName] @@ -231,7 +235,7 @@ export function resolveBlockReference( return { value: blockOutput, blockId } } - const value = navigatePath(blockOutput, pathParts) + const value = navigatePath(blockOutput, pathParts, options) const schema = context.blockOutputSchemas?.[blockId] if (value === undefined && schema) { diff --git a/apps/sim/executor/utils/parallel-expansion.test.ts b/apps/sim/executor/utils/parallel-expansion.test.ts index bcb2fbeb5c5..f31970cfc9e 100644 --- a/apps/sim/executor/utils/parallel-expansion.test.ts +++ b/apps/sim/executor/utils/parallel-expansion.test.ts @@ -207,6 +207,65 @@ describe('Nested parallel expansion + edge resolution', () => { expect(readyAfterClonedInnerEnd).toContain(outerEndId) }) + it('uses global branch indexes for nested subflow clones in later batches', () => { + const outerParallelId = 'outer-parallel' + const innerParallelId = 'inner-parallel' + const functionId = 'func-1' + + const workflow: SerializedWorkflow = { + version: '1', + blocks: [ + createBlock('start', BlockType.STARTER), + createBlock(outerParallelId, BlockType.PARALLEL), + createBlock(innerParallelId, BlockType.PARALLEL), + createBlock(functionId, BlockType.FUNCTION), + ], + connections: [ + { source: 'start', target: outerParallelId }, + { + source: outerParallelId, + target: innerParallelId, + sourceHandle: 'parallel-start-source', + }, + { + source: innerParallelId, + target: functionId, + sourceHandle: 'parallel-start-source', + }, + ], + loops: {}, + parallels: { + [innerParallelId]: { + id: innerParallelId, + nodes: [functionId], + count: 1, + parallelType: 'count', + }, + [outerParallelId]: { + id: outerParallelId, + nodes: [innerParallelId], + count: 4, + parallelType: 'count', + }, + }, + } + + const builder = new DAGBuilder() + const dag = builder.build(workflow) + const expander = new ParallelExpander() + const result = expander.expandParallel(dag, outerParallelId, 2, undefined, { + branchIndexOffset: 2, + totalBranches: 4, + }) + + expect(result.entryNodes).not.toContain(buildParallelSentinelStartId(innerParallelId)) + expect(result.clonedSubflows.map((clone) => clone.outerBranchIndex)).toEqual([2, 3]) + expect(result.clonedSubflows.map((clone) => clone.clonedId)).toEqual([ + `${innerParallelId}__obranch-2`, + `${innerParallelId}__obranch-3`, + ]) + }) + it('3-level nesting: pre-expansion clone IDs do not collide with runtime expansion', () => { const p1 = 'p1' const p2 = 'p2' diff --git a/apps/sim/executor/utils/parallel-expansion.ts b/apps/sim/executor/utils/parallel-expansion.ts index 9bf9e518c31..fc1ff1926fe 100644 --- a/apps/sim/executor/utils/parallel-expansion.ts +++ b/apps/sim/executor/utils/parallel-expansion.ts @@ -29,9 +29,6 @@ export interface ExpansionResult { } export class ParallelExpander { - /** Monotonically increasing counter for generating unique pre-expansion clone IDs. */ - private cloneSeq = 0 - expandParallel( dag: DAG, parallelId: string, @@ -124,14 +121,15 @@ export class ParallelExpander { ? buildParallelSentinelEndId(subflowId) : buildSentinelEndId(subflowId) - // Branch 0 uses original nodes - if (dag.nodes.has(startId)) entryNodes.push(startId) - if (dag.nodes.has(endId)) terminalNodes.push(endId) - - // Branches 1..N clone the entire subflow graph (recursively for deep nesting) - for (let i = 1; i < branchCount; i++) { + for (let i = 0; i < branchCount; i++) { const globalBranchIndex = branchIndexOffset + i - const cloned = this.cloneNestedSubflow(dag, subflowId, i, clonedSubflows) + if (globalBranchIndex === 0) { + if (dag.nodes.has(startId)) entryNodes.push(startId) + if (dag.nodes.has(endId)) terminalNodes.push(endId) + continue + } + + const cloned = this.cloneNestedSubflow(dag, subflowId, globalBranchIndex, clonedSubflows) entryNodes.push(cloned.startId) terminalNodes.push(cloned.endId) @@ -290,14 +288,23 @@ export class ParallelExpander { /** * Generates a unique clone ID for pre-expansion cloning. * - * Pre-expansion clones use `{originalId}__clone{N}__obranch-{branchIndex}` instead + * Pre-expansion clones use `{originalId}__clone{hash}__obranch-{branchIndex}` instead * of the plain `{originalId}__obranch-{branchIndex}` used by runtime expansion. - * The `__clone{N}` segment (from a monotonic counter) prevents naming collisions - * when the original (branch-0) subflow later expands at runtime and creates - * `{child}__obranch-{branchIndex}`. + * The clone segment prevents naming collisions when the original (branch-0) + * subflow later expands at runtime and creates `{child}__obranch-{branchIndex}`. + * Keeping it deterministic lets pause/resume rebuild the same active branch IDs. */ - private buildPreCloneId(originalId: string, outerBranchIndex: number): string { - return `${originalId}__clone${this.cloneSeq++}__obranch-${outerBranchIndex}` + private buildPreCloneIdForParent( + originalId: string, + outerBranchIndex: number, + parentCloneId: string + ): string { + let hash = 0 + const input = `${parentCloneId}:${originalId}:${outerBranchIndex}` + for (let i = 0; i < input.length; i++) { + hash = (hash * 31 + input.charCodeAt(i)) >>> 0 + } + return `${originalId}__clone${hash}__obranch-${outerBranchIndex}` } /** @@ -305,8 +312,8 @@ export class ParallelExpander { * * The top-level subflow gets a standard `__obranch-{N}` clone ID (needed by * `findEffectiveContainerId` at runtime). All deeper children — both containers - * and regular blocks — receive unique `__clone{N}__obranch-{M}` IDs via - * {@link buildPreCloneId} to avoid collisions with runtime expansion. + * and regular blocks — receive deterministic `__clone{N}__obranch-{M}` IDs to + * avoid collisions with runtime expansion. */ private cloneNestedSubflow( dag: DAG, @@ -369,7 +376,7 @@ export class ParallelExpander { const isNestedLoop = dag.loopConfigs.has(blockId) if (isNestedParallel || isNestedLoop) { - const nestedClonedId = this.buildPreCloneId(blockId, outerBranchIndex) + const nestedClonedId = this.buildPreCloneIdForParent(blockId, outerBranchIndex, clonedId) clonedBlockIds.push(nestedClonedId) const innerResult = this.cloneSubflowGraph( @@ -389,7 +396,7 @@ export class ParallelExpander { outerBranchIndex, }) } else { - const clonedBlockId = this.buildPreCloneId(blockId, outerBranchIndex) + const clonedBlockId = this.buildPreCloneIdForParent(blockId, outerBranchIndex, clonedId) clonedBlockIds.push(clonedBlockId) if (isParallel) { diff --git a/apps/sim/executor/utils/subflow-utils.test.ts b/apps/sim/executor/utils/subflow-utils.test.ts index 18f7e2097d8..7f9a8787c58 100644 --- a/apps/sim/executor/utils/subflow-utils.test.ts +++ b/apps/sim/executor/utils/subflow-utils.test.ts @@ -4,7 +4,7 @@ import { describe, expect, it, vi } from 'vitest' import type { ExecutionContext } from '@/executor/types' import type { VariableResolver } from '@/executor/variables/resolver' -import { resolveArrayInput } from './subflow-utils' +import { findEffectiveContainerId, resolveArrayInput } from './subflow-utils' describe('resolveArrayInput', () => { const fakeCtx = {} as unknown as ExecutionContext @@ -84,3 +84,17 @@ describe('resolveArrayInput', () => { expect(() => resolveArrayInput(fakeCtx, 'not json', null)).toThrow() }) }) + +describe('findEffectiveContainerId', () => { + it('finds pre-cloned nested subflow IDs with clone sequence suffixes', () => { + const executionMap = new Map([ + ['inner-parallel', {}], + ['inner-parallel__obranch-2', {}], + ['inner-parallel__clone3__obranch-2', {}], + ]) + + expect( + findEffectiveContainerId('inner-parallel', 'leaf__clone7__obranch-2₍0₎', executionMap) + ).toBe('inner-parallel__clone3__obranch-2') + }) +}) diff --git a/apps/sim/executor/utils/subflow-utils.ts b/apps/sim/executor/utils/subflow-utils.ts index f03a6a5c5cd..d29559db00e 100644 --- a/apps/sim/executor/utils/subflow-utils.ts +++ b/apps/sim/executor/utils/subflow-utils.ts @@ -154,10 +154,30 @@ export function findEffectiveContainerId( // and cloned variants coexist in the map; the clone is the correct scope. const match = currentNodeId.match(OUTER_BRANCH_PATTERN) if (match) { - const candidateId = buildClonedSubflowId(originalId, Number.parseInt(match[1], 10)) + const branchIndex = Number.parseInt(match[1], 10) + const cloneSuffix = `__obranch-${branchIndex}` + if (currentNodeId.includes('__clone')) { + for (const scopeId of executionMap.keys()) { + if ( + scopeId.includes('__clone') && + scopeId.endsWith(cloneSuffix) && + stripOuterBranchSuffix(scopeId) === originalId + ) { + return scopeId + } + } + } + + const candidateId = buildClonedSubflowId(originalId, branchIndex) if (executionMap.has(candidateId)) { return candidateId } + + for (const scopeId of executionMap.keys()) { + if (scopeId.endsWith(cloneSuffix) && stripOuterBranchSuffix(scopeId) === originalId) { + return scopeId + } + } } // Return original ID — for branch-0 (non-cloned) or when scope is missing. @@ -178,17 +198,6 @@ export function normalizeNodeId(nodeId: string): string { return nodeId } -/** - * Validates that a count doesn't exceed a maximum limit. - * Returns an error message if validation fails, undefined otherwise. - */ -export function validateMaxCount(count: number, max: number, itemType: string): string | undefined { - if (count > max) { - return `${itemType} (${count}) exceeds maximum allowed (${max}). Execution blocked.` - } - return undefined -} - /** * Resolves array input at runtime. Handles arrays, objects, references, and JSON strings. * Used by both loop forEach and parallel distribution resolution. diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index 6d50aa36993..828c8205c9b 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -18,6 +18,7 @@ function createBlock(id: string, name: string, type: string, params = {}): Seria outputs: { result: 'string', items: 'json', + file: 'file', }, enabled: true, } @@ -39,6 +40,16 @@ function createResolver(language = 'javascript') { state.setBlockOutput('producer', { result: 'hello world', items: ['a', 'b'], + file: { + id: 'file-1', + name: 'image.png', + url: 'https://example.com/image.png', + key: 'execution/workspace-1/workflow-1/execution-1/image.png', + context: 'execution', + size: 12 * 1024 * 1024, + type: 'image/png', + base64: 'large-inline-base64', + }, }) const ctx = { blockStates: state.getBlockStates(), @@ -126,6 +137,221 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables).toEqual({ __blockRef_0: 'b' }) }) + it('rewrites JavaScript file base64 references to lazy runtime reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'const base64 = ;\nreturn base64' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'const base64 = await sim.files.readBase64(globalThis["__blockRef_0"]);\nreturn base64' + ) + expect(result.displayInputs.code).toBe('const base64 = ;\nreturn base64') + expect(result.contextVariables.__blockRef_0).toMatchObject({ + id: 'file-1', + name: 'image.png', + }) + expect(result.contextVariables.__blockRef_0).not.toHaveProperty('base64') + }) + + it('uses existing inline base64 for keyless files instead of lazy storage reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + file: { + id: 'file-keyless', + name: 'inline.txt', + key: '', + url: 'https://example.com/inline.txt', + size: 5, + type: 'text/plain', + base64: 'aGVsbG8=', + }, + }) + + const keylessResolver = new VariableResolver( + { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + }, + {}, + state + ) + + const result = await keylessResolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return ' }, + block + ) + + expect(result.resolvedInputs.code).toBe('return globalThis["__blockRef_0"]') + expect(result.contextVariables.__blockRef_0).toBe('aGVsbG8=') + }) + + it('rewrites JavaScript large value refs to lazy runtime reads', async () => { + const { block, ctx, resolver } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'return await sim.values.read(globalThis["__blockRef_0"])' + ) + expect(result.contextVariables.__blockRef_0).toMatchObject({ + __simLargeValueRef: true, + id: 'lv_ABCDEFGHIJKL', + }) + }) + + it('fails whole large value refs for Function runtimes without lazy helpers', async () => { + const { block, ctx } = createResolver('python') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + + it('fails whole large value refs for JavaScript with imports', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: "import x from 'x'\nreturn " }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + + it('fails nested large value refs for Function runtimes without lazy helpers', async () => { + const { block, ctx } = createResolver('python') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + rows: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'array', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return ' }, + block + ) + ).rejects.toThrow('This execution value is too large to inline') + }) + it('resolves Python block references through globals lookup', async () => { const { block, ctx, resolver } = createResolver('python') diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index e73a400d661..4f35c3dd6c4 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -1,7 +1,14 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + assertNoLargeValueRefs, + containsLargeValueRef, + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' import { isLikelyReferenceSegment } from '@/lib/workflows/sanitization/references' -import { BlockType } from '@/executor/constants' +import { BlockType, parseReferencePath, REFERENCE } from '@/executor/constants' import type { ExecutionState, LoopScope } from '@/executor/execution/state' import type { ExecutionContext } from '@/executor/types' import { createEnvVarPattern, createReferencePattern } from '@/executor/utils/reference-validation' @@ -316,6 +323,7 @@ export class VariableResolver { executionState: this.state, currentNodeId, loopScope, + allowLargeValueRefs: true, } const language = (block.config?.params as Record | undefined)?.language as @@ -333,6 +341,19 @@ export class VariableResolver { try { if (this.blockResolver.canResolve(match)) { + const lazyBase64 = await this.resolveLazyFileBase64Reference( + match, + resolutionContext, + language, + template, + index, + contextVarAccumulator + ) + if (lazyBase64) { + displayResult += lazyBase64.display + return lazyBase64.replacement + } + const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match @@ -345,13 +366,33 @@ export class VariableResolver { // with language-specific runtime access to that stored value. const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` contextVarAccumulator[varName] = effectiveValue - const replacement = this.formatContextVariableReference( - varName, - language, - template, - index, - effectiveValue - ) + let replacement: string + if (isLargeValueRef(effectiveValue)) { + const lazyReplacement = this.formatLazyLargeValueReference( + varName, + language, + template, + index + ) + if (!lazyReplacement) { + throw getLargeValueMaterializationError(effectiveValue) + } + replacement = lazyReplacement + } else if ( + containsLargeValueRef(effectiveValue) && + !this.canUseJavaScriptRuntimeHelpers(language, template) + ) { + assertNoLargeValueRefs(effectiveValue) + throw new Error('This execution value is too large to inline.') + } else { + replacement = this.formatContextVariableReference( + varName, + language, + template, + index, + effectiveValue + ) + } displayResult += this.formatDisplayValueForCodeContext( effectiveValue, language, @@ -369,6 +410,35 @@ export class VariableResolver { const effectiveValue = resolved === RESOLVED_EMPTY ? null : resolved + if (isLargeValueRef(effectiveValue)) { + const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` + contextVarAccumulator[varName] = effectiveValue + const lazyReplacement = this.formatLazyLargeValueReference( + varName, + language, + template, + index + ) + if (lazyReplacement) { + displayResult += this.formatDisplayValueForCodeContext( + effectiveValue, + language, + template, + index + ) + return lazyReplacement + } + throw getLargeValueMaterializationError(effectiveValue) + } + + if ( + containsLargeValueRef(effectiveValue) && + !this.canUseJavaScriptRuntimeHelpers(language, template) + ) { + assertNoLargeValueRefs(effectiveValue) + throw new Error('This execution value is too large to inline.') + } + // Non-block reference (loop, parallel, workflow, env): embed as literal const replacement = this.blockResolver.formatValueForBlock( effectiveValue, @@ -401,6 +471,88 @@ export class VariableResolver { return { resolvedCode: result, displayCode: displayResult } } + private async resolveLazyFileBase64Reference( + reference: string, + context: ResolutionContext, + language: string | undefined, + template: string, + matchIndex: number, + contextVarAccumulator: Record + ): Promise<{ replacement: string; display: string } | null> { + if (!this.canUseJavaScriptRuntimeHelpers(language, template)) { + return null + } + + const parts = parseReferencePath(reference) + if (parts.length < 3 || parts.at(-1) !== 'base64') { + return null + } + + const fileReference = `${REFERENCE.START}${parts.slice(0, -1).join(REFERENCE.PATH_DELIMITER)}${REFERENCE.END}` + const file = await this.resolveReference(fileReference, context) + if (!isUserFileWithMetadata(file)) { + return null + } + if (!file.key) { + return null + } + + const varName = `__blockRef_${Object.keys(contextVarAccumulator).length}` + const { base64: _base64, ...fileMetadata } = file + contextVarAccumulator[varName] = fileMetadata + const fileExpression = `globalThis[${JSON.stringify(varName)}]` + const lazyExpression = `await sim.files.readBase64(${fileExpression})` + + return { + replacement: this.formatJavaScriptAsyncExpression(lazyExpression, template, matchIndex), + display: reference, + } + } + + private formatLazyLargeValueReference( + varName: string, + language: string | undefined, + template: string, + matchIndex: number + ): string | null { + if (!this.canUseJavaScriptRuntimeHelpers(language, template)) { + return null + } + + const expression = `await sim.values.read(globalThis[${JSON.stringify(varName)}])` + return this.formatJavaScriptAsyncExpression(expression, template, matchIndex, { + stringifyInStringContext: true, + }) + } + + private formatJavaScriptAsyncExpression( + expression: string, + template: string, + matchIndex: number, + options: { stringifyInStringContext?: boolean } = {} + ): string { + const quoteContext = this.getCodeStringQuoteContext(template, matchIndex, 'javascript') + const stringExpression = options.stringifyInStringContext + ? `JSON.stringify(${expression})` + : expression + + if (quoteContext === 'template') { + return `\${${stringExpression}}` + } + if (quoteContext === 'single' || quoteContext === 'double') { + const quote = this.getCodeStringQuoteToken(quoteContext) + return `${quote} + ${stringExpression} + ${quote}` + } + return expression + } + + private canUseJavaScriptRuntimeHelpers(language: string | undefined, template: string): boolean { + if (language !== 'javascript') { + return false + } + return !/(^|\n)\s*import\s/.test(template) && !/require\s*\(\s*['"`]/.test(template) + } + private formatContextVariableReference( varName: string, language: string | undefined, diff --git a/apps/sim/executor/variables/resolvers/block.ts b/apps/sim/executor/variables/resolvers/block.ts index c17810a4f55..7a03093e6a1 100644 --- a/apps/sim/executor/variables/resolvers/block.ts +++ b/apps/sim/executor/variables/resolvers/block.ts @@ -81,14 +81,24 @@ export class BlockResolver implements Resolver { } try { - const result = resolveBlockReference(blockName, pathParts, { - blockNameMapping: Object.fromEntries(this.nameToBlockId), - blockData, - blockOutputSchemas, - })! + const result = resolveBlockReference( + blockName, + pathParts, + { + blockNameMapping: Object.fromEntries(this.nameToBlockId), + blockData, + blockOutputSchemas, + }, + { + allowLargeValueRefs: context.allowLargeValueRefs, + executionContext: context.executionContext, + } + )! if (result.value !== undefined) { - assertNoLargeValueRefs(result.value) + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(result.value) + } return result.value } @@ -155,7 +165,9 @@ export class BlockResolver implements Resolver { ))! if (result.value !== undefined) { - assertNoLargeValueRefs(result.value) + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(result.value) + } return result.value } diff --git a/apps/sim/executor/variables/resolvers/loop.ts b/apps/sim/executor/variables/resolvers/loop.ts index 64e977b7536..3b0a3e1b611 100644 --- a/apps/sim/executor/variables/resolvers/loop.ts +++ b/apps/sim/executor/variables/resolvers/loop.ts @@ -180,7 +180,7 @@ export class LoopResolver implements Resolver { if (pathParts.length > 0) { return useAsyncPath && this.navigatePathAsync ? this.navigatePathAsync(value, pathParts, context) - : navigatePath(value, pathParts) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value @@ -193,9 +193,11 @@ export class LoopResolver implements Resolver { } const value = (output as Record).results if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } - assertNoLargeValueRefs(value) return value } @@ -212,9 +214,11 @@ export class LoopResolver implements Resolver { if (pathParts.length > 0) { return this.navigatePathAsync ? this.navigatePathAsync(value, pathParts, context) - : navigatePath(value, pathParts) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } - assertNoLargeValueRefs(value) return value } diff --git a/apps/sim/executor/variables/resolvers/parallel.test.ts b/apps/sim/executor/variables/resolvers/parallel.test.ts index e509249fec4..3d4764acd4a 100644 --- a/apps/sim/executor/variables/resolvers/parallel.test.ts +++ b/apps/sim/executor/variables/resolvers/parallel.test.ts @@ -77,11 +77,16 @@ function createParallelScope(items: any[]) { function createTestContext( currentNodeId: string, parallelExecutions?: Map, - blockOutputs?: Record + blockOutputs?: Record, + parallelBlockMapping?: Map ): ResolutionContext { return { executionContext: { + workflowId: 'workflow-1', + workspaceId: 'workspace-1', + executionId: 'execution-1', parallelExecutions: parallelExecutions ?? new Map(), + parallelBlockMapping, }, executionState: { getBlockOutput: (id: string) => blockOutputs?.[id], @@ -159,6 +164,34 @@ describe('ParallelResolver', () => { expect(resolver.resolve('', createTestContext('block-1₍2₎'))).toBe(2) }) + it.concurrent('uses runtime branch mapping for batched local branch node IDs', () => { + const workflow = createTestWorkflow({ + 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b', 'c', 'd'] }, + }) + const resolver = new ParallelResolver(workflow) + const parallelScope = createParallelScope(['a', 'b', 'c', 'd']) + const parallelExecutions = new Map([['parallel-1', parallelScope]]) + const parallelBlockMapping = new Map([ + [ + 'block-1₍0₎', + { + originalBlockId: 'block-1', + parallelId: 'parallel-1', + iterationIndex: 2, + }, + ], + ]) + const ctx = createTestContext( + 'block-1₍0₎', + parallelExecutions, + undefined, + parallelBlockMapping + ) + + expect(resolver.resolve('', ctx)).toBe(2) + expect(resolver.resolve('', ctx)).toBe('c') + }) + it.concurrent('should return undefined when branch index cannot be extracted', () => { const workflow = createTestWorkflow({ 'parallel-1': { nodes: ['block-1'], distribution: ['a', 'b'] }, @@ -443,7 +476,12 @@ describe('ParallelResolver', () => { const resolver = new ParallelResolver(workflow) const compacted = await compactExecutionPayload( { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, - { thresholdBytes: 256 } + { + thresholdBytes: 256, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } ) const ctx = createTestContext('block-outside', new Map(), { 'parallel-1': compacted, diff --git a/apps/sim/executor/variables/resolvers/parallel.ts b/apps/sim/executor/variables/resolvers/parallel.ts index b0414f16da1..538fc69780d 100644 --- a/apps/sim/executor/variables/resolvers/parallel.ts +++ b/apps/sim/executor/variables/resolvers/parallel.ts @@ -4,6 +4,7 @@ import { isReference, normalizeName, parseReferencePath, REFERENCE } from '@/exe import { InvalidFieldError } from '@/executor/utils/block-reference' import { extractBranchIndex, + extractOuterBranchIndex, findEffectiveContainerId, stripCloneSuffixes, stripOuterBranchSuffix, @@ -132,7 +133,7 @@ export class ParallelResolver implements Resolver { throw new InvalidFieldError(firstPart, rest[0], [...PARALLEL_OUTPUT_FIELDS]) } - const branchIndex = extractBranchIndex(context.currentNodeId) + const branchIndex = this.resolveBranchIndex(targetParallelId, context) if (branchIndex === null) { return undefined } @@ -176,12 +177,26 @@ export class ParallelResolver implements Resolver { if (pathParts.length > 0) { return useAsyncPath && this.navigatePathAsync ? this.navigatePathAsync(value, pathParts, context) - : navigatePath(value, pathParts) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value } + private resolveBranchIndex(targetParallelId: string, context: ResolutionContext): number | null { + const mapping = context.executionContext.parallelBlockMapping?.get(context.currentNodeId) + if (mapping?.parallelId === targetParallelId) { + return mapping.iterationIndex + } + + const outerBranchIndex = extractOuterBranchIndex(context.currentNodeId) + if (outerBranchIndex !== undefined) { + return outerBranchIndex + } + + return extractBranchIndex(context.currentNodeId) + } + private findInnermostParallelForBlock(blockId: string): string | undefined { const baseId = stripCloneSuffixes(blockId) const parallels = this.workflow.parallels @@ -268,9 +283,11 @@ export class ParallelResolver implements Resolver { } const value = (output as Record).results if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } - assertNoLargeValueRefs(value) return value } @@ -287,9 +304,11 @@ export class ParallelResolver implements Resolver { if (pathParts.length > 0) { return this.navigatePathAsync ? this.navigatePathAsync(value, pathParts, context) - : navigatePath(value, pathParts) + : navigatePath(value, pathParts, { executionContext: context.executionContext }) + } + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(value) } - assertNoLargeValueRefs(value) return value } diff --git a/apps/sim/executor/variables/resolvers/reference-async.server.ts b/apps/sim/executor/variables/resolvers/reference-async.server.ts index c481c42e119..78dca4a3712 100644 --- a/apps/sim/executor/variables/resolvers/reference-async.server.ts +++ b/apps/sim/executor/variables/resolvers/reference-async.server.ts @@ -1,17 +1,30 @@ import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' -import { materializeLargeValueRefSyncOrThrow } from '@/lib/execution/payloads/cache' -import { assertNoLargeValueRefs, isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' +import { + assertNoLargeValueRefs, + getLargeValueMaterializationError, + isLargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' import { materializeLargeValueRef } from '@/lib/execution/payloads/store' import { hydrateUserFileWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' import type { ResolutionContext } from '@/executor/variables/resolvers/reference' -async function materializeLargeValueRefOrThrow(value: unknown): Promise { +async function materializeLargeValueRefOrThrow( + value: unknown, + context: ResolutionContext +): Promise { if (!isLargeValueRef(value)) { return value } - const materialized = await materializeLargeValueRef(value) + const materialized = await materializeLargeValueRef(value, { + workspaceId: context.executionContext.workspaceId, + workflowId: context.executionContext.workflowId, + executionId: context.executionContext.executionId, + largeValueExecutionIds: context.executionContext.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.executionContext.allowLargeValueWorkflowScope, + userId: context.executionContext.userId, + }) if (materialized === undefined) { - return materializeLargeValueRefSyncOrThrow(value) + throw getLargeValueMaterializationError(value) } return materialized } @@ -25,7 +38,11 @@ async function hydrateExplicitBase64( } const hydrated = await hydrateUserFileWithBase64(file, { requestId: context.executionContext.metadata.requestId, + workspaceId: context.executionContext.workspaceId, + workflowId: context.executionContext.workflowId, executionId: context.executionContext.executionId, + largeValueExecutionIds: context.executionContext.largeValueExecutionIds, + allowLargeValueWorkflowScope: context.executionContext.allowLargeValueWorkflowScope, userId: context.executionContext.userId, maxBytes: context.executionContext.base64MaxBytes, }) @@ -49,7 +66,7 @@ export async function navigatePathAsync( ): Promise { let current = obj for (const part of path) { - current = await materializeLargeValueRefOrThrow(current) + current = await materializeLargeValueRefOrThrow(current, context) if (current === null || current === undefined) { return undefined @@ -70,7 +87,7 @@ export async function navigatePathAsync( typeof current === 'object' && current !== null ? (current as Record)[prop] : undefined - current = await materializeLargeValueRefOrThrow(current) + current = await materializeLargeValueRefOrThrow(current, context) if (current === undefined || current === null) { return undefined } @@ -78,7 +95,7 @@ export async function navigatePathAsync( const indices = bracketsPart.match(/\[(\d+)\]/g) if (indices) { for (const indexMatch of indices) { - current = await materializeLargeValueRefOrThrow(current) + current = await materializeLargeValueRefOrThrow(current, context) if (current === null || current === undefined) { return undefined } @@ -96,6 +113,8 @@ export async function navigatePathAsync( : undefined } } - assertNoLargeValueRefs(current) + if (!context.allowLargeValueRefs) { + assertNoLargeValueRefs(current) + } return current } diff --git a/apps/sim/executor/variables/resolvers/reference.ts b/apps/sim/executor/variables/resolvers/reference.ts index 7524b8f86a1..70a49a4d11b 100644 --- a/apps/sim/executor/variables/resolvers/reference.ts +++ b/apps/sim/executor/variables/resolvers/reference.ts @@ -7,6 +7,7 @@ export interface ResolutionContext { executionState: ExecutionState currentNodeId: string loopScope?: LoopScope + allowLargeValueRefs?: boolean } export interface Resolver { @@ -50,11 +51,15 @@ export function splitLeadingBracketPath(part: string): { property: string; pathP * navigatePath({a: {b: {c: 1}}}, ['a', 'b', 'c']) => 1 * navigatePath({items: [{name: 'test'}]}, ['items', '0', 'name']) => 'test' */ -export function navigatePath(obj: any, path: string[]): any { +export function navigatePath( + obj: any, + path: string[], + options: { allowLargeValueRefs?: boolean; executionContext?: ExecutionContext } = {} +): any { let current = obj for (const part of path) { if (isLargeValueRef(current)) { - current = materializeLargeValueRefSyncOrThrow(current) + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) } if (current === null || current === undefined) { @@ -69,7 +74,7 @@ export function navigatePath(obj: any, path: string[]): any { ? (current as Record)[prop] : undefined if (isLargeValueRef(current)) { - current = materializeLargeValueRefSyncOrThrow(current) + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) } if (current === undefined || current === null) { return undefined @@ -82,7 +87,7 @@ export function navigatePath(obj: any, path: string[]): any { return undefined } if (isLargeValueRef(current)) { - current = materializeLargeValueRefSyncOrThrow(current) + current = materializeLargeValueRefSyncOrThrow(current, options.executionContext) } const idx = Number.parseInt(indexMatch.slice(1, -1), 10) current = Array.isArray(current) ? current[idx] : undefined @@ -98,6 +103,8 @@ export function navigatePath(obj: any, path: string[]): any { : undefined } } - assertNoLargeValueRefs(current) + if (!options.allowLargeValueRefs) { + assertNoLargeValueRefs(current) + } return current } diff --git a/apps/sim/executor/variables/resolvers/workflow.ts b/apps/sim/executor/variables/resolvers/workflow.ts index f11612e2ee2..ad2c667949e 100644 --- a/apps/sim/executor/variables/resolvers/workflow.ts +++ b/apps/sim/executor/variables/resolvers/workflow.ts @@ -57,7 +57,7 @@ export class WorkflowResolver implements Resolver { // If there are additional path parts, navigate deeper if (pathParts.length > 0) { - return navigatePath(value, pathParts) + return navigatePath(value, pathParts, { executionContext: context.executionContext }) } return value diff --git a/apps/sim/hooks/use-collaborative-workflow.ts b/apps/sim/hooks/use-collaborative-workflow.ts index 80b4c53c71c..f4442fb617b 100644 --- a/apps/sim/hooks/use-collaborative-workflow.ts +++ b/apps/sim/hooks/use-collaborative-workflow.ts @@ -1781,7 +1781,7 @@ export function useCollaborativeWorkflow() { const config = { id: nodeId, nodes: childNodes, - iterations: Math.max(1, Math.min(1000, count)), // Clamp between 1-1000 for loops + iterations: Math.max(1, count), loopType: currentLoopType, forEachItems: currentCollection, } @@ -1800,7 +1800,7 @@ export function useCollaborativeWorkflow() { const config = { id: nodeId, nodes: childNodes, - count: Math.max(1, Math.min(1000, count)), + count: Math.max(1, count), distribution: currentDistribution, parallelType: currentParallelType, batchSize, diff --git a/apps/sim/lib/api/contracts/execution-payloads.ts b/apps/sim/lib/api/contracts/execution-payloads.ts index 4eaac6358ab..485918dc4ac 100644 --- a/apps/sim/lib/api/contracts/execution-payloads.ts +++ b/apps/sim/lib/api/contracts/execution-payloads.ts @@ -1,5 +1,6 @@ import { z } from 'zod' import { + isLargeValueStorageKey, LARGE_VALUE_KINDS, LARGE_VALUE_REF_MARKER, LARGE_VALUE_REF_VERSION, @@ -17,5 +18,14 @@ export const largeValueRefSchema = z preview: z.unknown().optional(), }) .strict() + .superRefine((value, ctx) => { + if (value.key && !isLargeValueStorageKey(value.key, value.id, value.executionId)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + path: ['key'], + message: 'Large value reference key must point to execution-scoped server storage', + }) + } + }) export type LargeValueRefResponse = z.output diff --git a/apps/sim/lib/api/contracts/hotspots.ts b/apps/sim/lib/api/contracts/hotspots.ts index 099170bc8be..4a3b92d371d 100644 --- a/apps/sim/lib/api/contracts/hotspots.ts +++ b/apps/sim/lib/api/contracts/hotspots.ts @@ -102,6 +102,9 @@ export const functionExecuteContract = defineRouteContract({ workflowVariables: unknownRecordSchema.optional().default({}), contextVariables: unknownRecordSchema.optional().default({}), workflowId: z.string().optional(), + executionId: z.string().optional(), + largeValueExecutionIds: z.array(z.string()).optional(), + allowLargeValueWorkflowScope: z.boolean().optional(), workspaceId: z.string().optional(), userId: z.string().optional(), isCustomTool: z.boolean().optional().default(false), diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index 18828eebc60..44ef92142cf 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -180,6 +180,7 @@ async function executeCode(request, executionId) { let logCallback = null let errorCallback = null let fetchCallback = null + let brokerCallback = null const externalCopies = [] try { @@ -243,6 +244,27 @@ async function executeCode(request, executionId) { }) await jail.set('__fetchRef', fetchCallback) + brokerCallback = new ivm.Reference(async (brokerName, argsJson) => { + return new Promise((resolve) => { + const brokerId = ++brokerIdCounter + const timeout = setTimeout(() => { + if (pendingBrokerCalls.has(brokerId)) { + pendingBrokerCalls.delete(brokerId) + resolve(JSON.stringify({ error: `Broker "${brokerName}" timed out` })) + } + }, BROKER_TIMEOUT_MS) + pendingBrokerCalls.set(brokerId, { resolve, timeout, executionId }) + if (process.send && process.connected) { + process.send({ type: 'broker', brokerId, executionId, brokerName, argsJson }) + } else { + clearTimeout(timeout) + pendingBrokerCalls.delete(brokerId) + resolve(JSON.stringify({ error: 'Parent process disconnected' })) + } + }) + }) + await jail.set('__brokerRef', brokerCallback) + const bootstrap = ` // Set up console object const console = { @@ -299,10 +321,57 @@ async function executeCode(request, executionId) { }; } + const sim = (() => { + const broker = __brokerRef; + async function callSimBroker(name, args) { + let argsJson; + try { + argsJson = args === undefined ? undefined : JSON.stringify(args); + } catch { + throw new Error('sim helper arguments must be JSON-serializable'); + } + if (argsJson && argsJson.length > ${MAX_FETCH_OPTIONS_JSON_CHARS}) { + throw new Error('sim helper arguments exceed maximum payload size'); + } + const responseJson = await broker.apply(undefined, [name, argsJson], { result: { promise: true } }); + let response; + try { + response = JSON.parse(responseJson); + } catch { + throw new Error('Invalid sim helper response'); + } + if (typeof response.error === 'string') { + throw new Error(response.error || 'Sim helper call failed'); + } + return response.resultJson === undefined || response.resultJson === null + ? null + : JSON.parse(response.resultJson); + } + + return Object.freeze({ + files: Object.freeze({ + readBase64: (file, options) => callSimBroker('sim.files.readBase64', { file, options }), + readText: (file, options) => callSimBroker('sim.files.readText', { file, options }), + readBase64Chunk: (file, options) => callSimBroker('sim.files.readBase64Chunk', { file, options }), + readTextChunk: (file, options) => callSimBroker('sim.files.readTextChunk', { file, options }), + }), + values: Object.freeze({ + read: (ref, options) => callSimBroker('sim.values.read', { ref, options }), + }), + }); + })(); + Object.defineProperty(global, 'sim', { + value: sim, + writable: false, + configurable: false, + enumerable: true + }); + // Prevent access to dangerous globals with stronger protection const undefined_globals = [ 'Isolate', 'Context', 'Script', 'Module', 'Callback', 'Reference', - 'ExternalCopy', 'process', 'require', 'module', 'exports', '__dirname', '__filename' + 'ExternalCopy', 'process', 'require', 'module', 'exports', '__dirname', '__filename', + '__brokerRef', '__broker', '__callSimBroker' ]; for (const name of undefined_globals) { try { @@ -439,6 +508,7 @@ async function executeCode(request, executionId) { bootstrapScript, ...externalCopies, fetchCallback, + brokerCallback, errorCallback, logCallback, context, diff --git a/apps/sim/lib/execution/payloads/cache.ts b/apps/sim/lib/execution/payloads/cache.ts index 0a25955cc98..c1b6ec216ee 100644 --- a/apps/sim/lib/execution/payloads/cache.ts +++ b/apps/sim/lib/execution/payloads/cache.ts @@ -7,7 +7,18 @@ import { const FALLBACK_TTL_MS = 15 * 60 * 1000 const MAX_IN_MEMORY_BYTES = 256 * 1024 * 1024 -const inMemoryValues = new Map() +interface LargeValueCacheScope { + workspaceId?: string + workflowId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean +} + +const inMemoryValues = new Map< + string, + { value: unknown; size: number; expiresAt: number; scope?: LargeValueCacheScope } +>() let inMemoryBytes = 0 function cleanupExpiredValues(now = Date.now()): void { @@ -19,7 +30,12 @@ function cleanupExpiredValues(now = Date.now()): void { } } -export function cacheLargeValue(id: string, value: unknown, size: number): void { +export function cacheLargeValue( + id: string, + value: unknown, + size: number, + scope?: LargeValueCacheScope +): void { if (size > MAX_IN_MEMORY_BYTES) { return } @@ -37,18 +53,56 @@ export function cacheLargeValue(id: string, value: unknown, size: number): void inMemoryValues.set(id, { value, size, + scope, expiresAt: Date.now() + FALLBACK_TTL_MS, }) inMemoryBytes += size } -export function materializeLargeValueRefSync(ref: LargeValueRef): unknown { +function scopeMatchesRef( + ref: LargeValueRef, + cachedScope: LargeValueCacheScope | undefined, + callerScope?: LargeValueCacheScope +): boolean { + if (!cachedScope?.executionId) { + return false + } + if (ref.executionId && ref.executionId !== cachedScope.executionId) { + return false + } + if (!callerScope) { + return Boolean(ref.key) && (!ref.executionId || ref.executionId === cachedScope.executionId) + } + + const allowedExecutionIds = new Set([ + callerScope.executionId, + ...(callerScope.largeValueExecutionIds ?? []), + ]) + const workflowScopeAllowed = + callerScope.allowLargeValueWorkflowScope && + callerScope.workspaceId === cachedScope.workspaceId && + callerScope.workflowId === cachedScope.workflowId + + return allowedExecutionIds.has(cachedScope.executionId) || Boolean(workflowScopeAllowed) +} + +export function materializeLargeValueRefSync( + ref: LargeValueRef, + callerScope?: LargeValueCacheScope +): unknown { cleanupExpiredValues() - return inMemoryValues.get(ref.id)?.value + const cached = inMemoryValues.get(ref.id) + if (!cached || !scopeMatchesRef(ref, cached.scope, callerScope)) { + return undefined + } + return cached.value } -export function materializeLargeValueRefSyncOrThrow(ref: LargeValueRef): unknown { - const materialized = materializeLargeValueRefSync(ref) +export function materializeLargeValueRefSyncOrThrow( + ref: LargeValueRef, + callerScope?: LargeValueCacheScope +): unknown { + const materialized = materializeLargeValueRefSync(ref, callerScope) if (materialized === undefined) { throw getLargeValueMaterializationError(ref) } diff --git a/apps/sim/lib/execution/payloads/hydration.ts b/apps/sim/lib/execution/payloads/hydration.ts index ab2f635373e..bfc825280ae 100644 --- a/apps/sim/lib/execution/payloads/hydration.ts +++ b/apps/sim/lib/execution/payloads/hydration.ts @@ -1,8 +1,12 @@ import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' -import { materializeLargeValueRef } from '@/lib/execution/payloads/store' +import { + type LargeValueStoreContext, + materializeLargeValueRef, +} from '@/lib/execution/payloads/store' export async function warmLargeValueRefs( value: unknown, + context: LargeValueStoreContext = {}, seen = new WeakSet() ): Promise { if (!value || typeof value !== 'object') { @@ -10,8 +14,8 @@ export async function warmLargeValueRefs( } if (isLargeValueRef(value)) { - const materialized = await materializeLargeValueRef(value) - await warmLargeValueRefs(materialized, seen) + const materialized = await materializeLargeValueRef(value, context) + await warmLargeValueRefs(materialized, context, seen) return } @@ -21,9 +25,11 @@ export async function warmLargeValueRefs( seen.add(value) if (Array.isArray(value)) { - await Promise.all(value.map((item) => warmLargeValueRefs(item, seen))) + await Promise.all(value.map((item) => warmLargeValueRefs(item, context, seen))) return } - await Promise.all(Object.values(value).map((entryValue) => warmLargeValueRefs(entryValue, seen))) + await Promise.all( + Object.values(value).map((entryValue) => warmLargeValueRefs(entryValue, context, seen)) + ) } diff --git a/apps/sim/lib/execution/payloads/large-value-ref.ts b/apps/sim/lib/execution/payloads/large-value-ref.ts index 89eb4b599c8..d770f6ed37d 100644 --- a/apps/sim/lib/execution/payloads/large-value-ref.ts +++ b/apps/sim/lib/execution/payloads/large-value-ref.ts @@ -19,25 +19,36 @@ export interface LargeValueRef { } const LARGE_VALUE_ID_PATTERN = /^lv_[A-Za-z0-9_-]{12}$/ + +export function isLargeValueStorageKey(key: string, id: string, executionId?: string): boolean { + if (!key.startsWith('execution/')) return false + if (!key.endsWith(`/large-value-${id}.json`)) return false + if (executionId && !key.includes(`/${executionId}/`)) return false + return true +} + export function isLargeValueRef(value: unknown): value is LargeValueRef { if (!value || typeof value !== 'object') return false const candidate = value as Record + const id = candidate.id const key = candidate.key const executionId = candidate.executionId return ( candidate[LARGE_VALUE_REF_MARKER] === true && candidate.version === LARGE_VALUE_REF_VERSION && - typeof candidate.id === 'string' && - LARGE_VALUE_ID_PATTERN.test(candidate.id) && + typeof id === 'string' && + LARGE_VALUE_ID_PATTERN.test(id) && typeof candidate.kind === 'string' && (LARGE_VALUE_KINDS as readonly string[]).includes(candidate.kind) && typeof candidate.size === 'number' && Number.isFinite(candidate.size) && candidate.size > 0 && - (key === undefined || typeof key === 'string') && - (executionId === undefined || typeof executionId === 'string') + (executionId === undefined || typeof executionId === 'string') && + (key === undefined || + (typeof key === 'string' && + isLargeValueStorageKey(key, id, executionId as string | undefined))) ) } diff --git a/apps/sim/lib/execution/payloads/materialization.server.ts b/apps/sim/lib/execution/payloads/materialization.server.ts new file mode 100644 index 00000000000..5e337e35914 --- /dev/null +++ b/apps/sim/lib/execution/payloads/materialization.server.ts @@ -0,0 +1,294 @@ +import { createLogger, type Logger } from '@sim/logger' +import { toError } from '@sim/utils/errors' +import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' +import { + getLargeValueMaterializationError, + isLargeValueRef, + isLargeValueStorageKey, + type LargeValueRef, +} from '@/lib/execution/payloads/large-value-ref' +import { ExecutionResourceLimitError } from '@/lib/execution/resource-errors' +import type { StorageContext } from '@/lib/uploads' +import { bufferToBase64, inferContextFromKey } from '@/lib/uploads/utils/file-utils' +import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server' +import type { UserFile } from '@/executor/types' + +const logger = createLogger('ExecutionPayloadMaterialization') + +export const MAX_DURABLE_LARGE_VALUE_BYTES = 64 * 1024 * 1024 +export const MAX_INLINE_MATERIALIZATION_BYTES = 16 * 1024 * 1024 +export const MAX_FUNCTION_FILE_BYTES = 64 * 1024 * 1024 +export const MAX_FUNCTION_INLINE_BYTES = 10 * 1024 * 1024 + +export interface ExecutionMaterializationContext { + workflowId?: string + workspaceId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean + userId?: string + requestId?: string + logger?: Logger +} + +export interface MaterializeLargeValueOptions extends ExecutionMaterializationContext { + maxBytes?: number +} + +export interface ReadUserFileContentOptions extends ExecutionMaterializationContext { + maxBytes?: number + maxSourceBytes?: number + offset?: number + length?: number + chunked?: boolean + encoding: 'base64' | 'text' +} + +function getLogger(options: ExecutionMaterializationContext): Logger { + return options.logger ?? logger +} + +export function assertDurableLargeValueSize(size: number): void { + if (size > MAX_DURABLE_LARGE_VALUE_BYTES) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: size, + limitBytes: MAX_DURABLE_LARGE_VALUE_BYTES, + }) + } +} + +export function assertInlineMaterializationSize(size: number, maxBytes?: number): void { + const limit = maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES + if (size > limit) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: size, + limitBytes: limit, + }) + } +} + +export function isValidLargeValueKey(ref: LargeValueRef): boolean { + return Boolean(ref.key && isLargeValueStorageKey(ref.key, ref.id, ref.executionId)) +} + +export function assertLargeValueRefAccess( + ref: LargeValueRef, + context: ExecutionMaterializationContext +): void { + if (!context.executionId) { + throw new Error('Large execution value requires an execution context.') + } + const allowedExecutionIds = new Set([ + context.executionId, + ...(context.largeValueExecutionIds ?? []), + ]) + + const parts = ref.key?.split('/') ?? [] + const [, workspaceId, workflowId, executionId] = parts + + if (!ref.key) { + if (ref.executionId && !allowedExecutionIds.has(ref.executionId)) { + throw new Error('Large execution value is not available in this execution.') + } + return + } + if (!context.workspaceId || !context.workflowId) { + throw new Error('Large execution value requires workspace and workflow context.') + } + const workflowScopeAllowed = + context.allowLargeValueWorkflowScope && + context.workspaceId === workspaceId && + context.workflowId === workflowId + if (ref.executionId && !allowedExecutionIds.has(ref.executionId) && !workflowScopeAllowed) { + throw new Error('Large execution value is not available in this execution.') + } + if (!allowedExecutionIds.has(executionId) && !workflowScopeAllowed) { + throw new Error('Large execution value is not available in this execution.') + } + if (context.workspaceId && workspaceId !== context.workspaceId) { + throw new Error('Large execution value is not available in this execution.') + } + if (context.workflowId && workflowId !== context.workflowId) { + throw new Error('Large execution value is not available in this execution.') + } +} + +export async function readLargeValueRefFromStorage( + ref: LargeValueRef, + options: MaterializeLargeValueOptions = {} +): Promise { + const log = getLogger(options) + if (!isLargeValueRef(ref) || !ref.key || !isValidLargeValueKey(ref)) { + return undefined + } + + assertLargeValueRefAccess(ref, options) + assertInlineMaterializationSize(ref.size, options.maxBytes) + + try { + const { StorageService } = await import('@/lib/uploads') + const buffer = await StorageService.downloadFile({ + key: ref.key, + context: 'execution', + }) + if (buffer.length > (options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES)) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: buffer.length, + limitBytes: options.maxBytes ?? MAX_INLINE_MATERIALIZATION_BYTES, + }) + } + return JSON.parse(buffer.toString('utf8')) + } catch (error) { + if (error instanceof ExecutionResourceLimitError) { + throw error + } + log.warn('Failed to materialize persisted large execution value', { + id: ref.id, + key: ref.key, + error: toError(error).message, + }) + return undefined + } +} + +function normalizeRange(buffer: Buffer, options: ReadUserFileContentOptions): Buffer { + const offset = Math.max(0, Math.floor(options.offset ?? 0)) + const maxLength = options.maxBytes ?? MAX_FUNCTION_INLINE_BYTES + const requestedLength = options.length === undefined ? maxLength : Math.floor(options.length) + const length = Math.max(0, Math.min(requestedLength, maxLength)) + return buffer.subarray(offset, offset + length) +} + +function getExecutionKeyParts(key: string): + | { + workspaceId: string + workflowId: string + executionId: string + } + | undefined { + const parts = key.split('/') + if (parts[0] !== 'execution' || parts.length < 5) { + return undefined + } + + return { + workspaceId: parts[1], + workflowId: parts[2], + executionId: parts[3], + } +} + +function assertExecutionFileScope(key: string, options: ExecutionMaterializationContext): void { + const parts = getExecutionKeyParts(key) + if (!parts) { + throw new Error('File is not available in this execution.') + } + + const allowedExecutionIds = new Set([ + options.executionId, + ...(options.largeValueExecutionIds ?? []), + ]) + const workflowScopeAllowed = + options.allowLargeValueWorkflowScope && + options.workspaceId === parts.workspaceId && + options.workflowId === parts.workflowId + if ( + !options.executionId || + (!allowedExecutionIds.has(parts.executionId) && !workflowScopeAllowed) + ) { + throw new Error('File is not available in this execution.') + } + + if (options.workspaceId && parts.workspaceId !== options.workspaceId) { + throw new Error('File is not available in this execution.') + } + + if (options.workflowId && parts.workflowId !== options.workflowId) { + throw new Error('File is not available in this execution.') + } +} + +function getVerifiedStorageContext(file: UserFile): StorageContext { + if (!file.key) { + throw new Error('File content requires a storage key.') + } + + const inferredContext = inferContextFromKey(file.key) + if (file.context && file.context !== inferredContext) { + throw new Error('File context does not match its storage key.') + } + + return inferredContext +} + +export async function assertUserFileContentAccess( + file: UserFile, + options: ExecutionMaterializationContext +): Promise { + const context = getVerifiedStorageContext(file) + + if (context === 'execution') { + assertExecutionFileScope(file.key, options) + } + + if (!options.userId) { + throw new Error('File access requires an authenticated user.') + } + + const { verifyFileAccess } = await import('@/app/api/files/authorization') + const hasAccess = await verifyFileAccess(file.key, options.userId, undefined, context, false) + if (!hasAccess) { + throw new Error('File is not available in this execution.') + } +} + +export async function readUserFileContent( + file: unknown, + options: ReadUserFileContentOptions +): Promise { + if (!isUserFileWithMetadata(file)) { + throw new Error('Expected a file object with metadata.') + } + + await assertUserFileContentAccess(file, options) + + const maxSourceBytes = options.maxSourceBytes ?? MAX_FUNCTION_FILE_BYTES + if (Number.isFinite(file.size) && file.size > maxSourceBytes) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: file.size, + limitBytes: maxSourceBytes, + }) + } + + let buffer: Buffer | null = null + const log = getLogger(options) + const requestId = options.requestId ?? 'unknown' + + buffer = await downloadFileFromStorage(file, requestId, log) + + if (!buffer) { + throw new Error(`File content for ${file.name} is unavailable.`) + } + if (buffer.length > maxSourceBytes) { + throw new ExecutionResourceLimitError({ + resource: 'execution_payload_bytes', + attemptedBytes: buffer.length, + limitBytes: maxSourceBytes, + }) + } + + const shouldSlice = + options.chunked || options.offset !== undefined || options.length !== undefined + const selected = shouldSlice ? normalizeRange(buffer, options) : buffer + assertInlineMaterializationSize(selected.length, options.maxBytes ?? MAX_FUNCTION_INLINE_BYTES) + + return options.encoding === 'base64' ? bufferToBase64(selected) : selected.toString('utf8') +} + +export function unavailableLargeValueError(ref: LargeValueRef): Error { + return getLargeValueMaterializationError(ref) +} diff --git a/apps/sim/lib/execution/payloads/serializer.test.ts b/apps/sim/lib/execution/payloads/serializer.test.ts index c8a429ccc78..0119201ecb8 100644 --- a/apps/sim/lib/execution/payloads/serializer.test.ts +++ b/apps/sim/lib/execution/payloads/serializer.test.ts @@ -88,6 +88,19 @@ describe('compactExecutionPayload', () => { ).toBe(false) }) + it('rejects ref-shaped user data with non-execution storage keys', () => { + expect( + isLargeValueRef({ + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 1024, + key: 'https://example.com/large-value-lv_ABCDEFGHIJKL.json', + }) + ).toBe(false) + }) + it('omits opaque ref IDs from user-facing materialization errors', () => { const error = getLargeValueMaterializationError({ __simLargeValueRef: true, diff --git a/apps/sim/lib/execution/payloads/store.test.ts b/apps/sim/lib/execution/payloads/store.test.ts new file mode 100644 index 00000000000..13bb05cdb9a --- /dev/null +++ b/apps/sim/lib/execution/payloads/store.test.ts @@ -0,0 +1,385 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { cacheLargeValue, materializeLargeValueRefSync } from '@/lib/execution/payloads/cache' +import { + MAX_DURABLE_LARGE_VALUE_BYTES, + readLargeValueRefFromStorage, + readUserFileContent, +} from '@/lib/execution/payloads/materialization.server' +import { materializeLargeValueRef, storeLargeValue } from '@/lib/execution/payloads/store' +import { EXECUTION_RESOURCE_LIMIT_CODE } from '@/lib/execution/resource-errors' + +const { mockDownloadFile, mockUploadFile, mockVerifyFileAccess } = vi.hoisted(() => ({ + mockDownloadFile: vi.fn(), + mockUploadFile: vi.fn(), + mockVerifyFileAccess: vi.fn(), +})) + +vi.mock('@/lib/uploads', () => ({ + StorageService: { + uploadFile: mockUploadFile, + downloadFile: mockDownloadFile, + }, +})) + +vi.mock('@/app/api/files/authorization', () => ({ + verifyFileAccess: mockVerifyFileAccess, +})) + +describe('large execution payload store', () => { + beforeEach(() => { + vi.clearAllMocks() + mockUploadFile.mockImplementation(async ({ customKey }) => ({ key: customKey })) + mockVerifyFileAccess.mockResolvedValue(true) + }) + + it('stores oversized JSON in execution object storage and returns a small ref', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + + const ref = await storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + requireDurable: true, + }) + + expect(ref).toMatchObject({ + __simLargeValueRef: true, + version: 1, + kind: 'object', + size: Buffer.byteLength(json, 'utf8'), + executionId: 'execution-1', + }) + expect(ref.key).toBe(`execution/workspace-1/workflow-1/execution-1/large-value-${ref.id}.json`) + expect(mockUploadFile).toHaveBeenCalledWith( + expect.objectContaining({ + contentType: 'application/json', + context: 'execution', + preserveKey: true, + customKey: ref.key, + }) + ) + }) + + it('fails durable writes before producing refs when execution context is missing', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + + await expect( + storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { requireDurable: true }) + ).rejects.toThrow('Cannot persist large execution value') + + expect(mockUploadFile).not.toHaveBeenCalled() + }) + + it('fails durable writes when storage upload fails', async () => { + const value = { payload: 'x'.repeat(2048) } + const json = JSON.stringify(value) + mockUploadFile.mockRejectedValueOnce(new Error('storage down')) + + await expect( + storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ).rejects.toThrow('Failed to persist large execution value: storage down') + }) + + it('materializes object-storage refs through the server helper', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from(JSON.stringify({ ok: true }), 'utf8')) + + await expect( + materializeLargeValueRef( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { + workspaceId: 'workflow-1', + workflowId: 'workflow-2', + executionId: 'execution-1', + } + ) + ).resolves.toEqual({ ok: true }) + }) + + it('bounds durable large-value writes', async () => { + const size = MAX_DURABLE_LARGE_VALUE_BYTES + 1 + + await expect( + storeLargeValue('x', JSON.stringify('x'), size, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('bounds explicit server-side materialization', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 2048, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { + workspaceId: 'workflow-1', + workflowId: 'workflow-2', + executionId: 'execution-1', + maxBytes: 1024, + } + ) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('does not materialize durable refs without caller execution context', async () => { + await expect( + materializeLargeValueRef({ + __simLargeValueRef: true, + version: 1, + id: 'lv_NOCTXVALUE12', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_NOCTXVALUE12.json', + executionId: 'execution-1', + }) + ).resolves.toBeUndefined() + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('checks caller execution context before returning cached large values', async () => { + const value = { payload: 'cached' } + const json = JSON.stringify(value) + const ref = await storeLargeValue(value, json, Buffer.byteLength(json, 'utf8'), { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + requireDurable: true, + }) + + await expect( + materializeLargeValueRef(ref, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'other-execution', + userId: 'user-1', + }) + ).rejects.toThrow('Large execution value is not available in this execution.') + }) + + it('rejects durable refs whose key does not match caller execution context', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { workspaceId: 'workflow-1', workflowId: 'workflow-2', executionId: 'other-execution' } + ) + ).rejects.toThrow('Large execution value is not available in this execution.') + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('allows prior-execution durable refs only when workflow-scoped reads are explicitly enabled', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from(JSON.stringify({ ok: true }), 'utf8')) + + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workspace-1/workflow-1/source-execution/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'source-execution', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'resume-execution', + allowLargeValueWorkflowScope: true, + } + ) + ).resolves.toEqual({ ok: true }) + }) + + it('does not materialize forged keyless refs from another cached execution', () => { + cacheLargeValue('lv_FORGEDCACHE1', { secret: true }, 16, { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'source-execution', + }) + + const forged = { + __simLargeValueRef: true, + version: 1, + id: 'lv_FORGEDCACHE1', + kind: 'object', + size: 16, + executionId: 'other-execution', + } as const + + expect( + materializeLargeValueRefSync(forged, { + workspaceId: 'workspace-2', + workflowId: 'workflow-2', + executionId: 'other-execution', + }) + ).toBeUndefined() + }) + + it('rejects durable refs when caller omits workspace and workflow context', async () => { + await expect( + readLargeValueRefFromStorage( + { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 11, + key: 'execution/workflow-1/workflow-2/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + { executionId: 'execution-1' } + ) + ).rejects.toThrow('Large execution value requires workspace and workflow context.') + + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('rejects execution files with forged public contexts before storage download', async () => { + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'secret.txt', + url: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/secret.txt', + key: 'execution/workspace-1/workflow-1/execution-1/secret.txt', + context: 'profile-pictures', + size: 32, + type: 'text/plain', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + encoding: 'text', + } + ) + ).rejects.toThrow('File context does not match its storage key.') + + expect(mockVerifyFileAccess).not.toHaveBeenCalled() + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('rejects URL-only file objects instead of reading internal URLs directly', async () => { + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'secret.txt', + url: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/secret.txt?context=execution', + key: '', + size: 32, + type: 'text/plain', + }, + { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + userId: 'user-1', + encoding: 'text', + } + ) + ).rejects.toThrow('File content requires a storage key.') + + expect(mockVerifyFileAccess).not.toHaveBeenCalled() + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('throws instead of truncating non-chunked file reads over the inline cap', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello world', 'utf8')) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 11, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxBytes: 5, + } + ) + ).rejects.toMatchObject({ code: EXECUTION_RESOURCE_LIMIT_CODE }) + }) + + it('allows explicit chunked file reads to slice within the inline cap', async () => { + const workspaceId = '11111111-1111-4111-8111-111111111111' + const workflowId = '22222222-2222-4222-8222-222222222222' + const executionId = '33333333-3333-4333-8333-333333333333' + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello world', 'utf8')) + + await expect( + readUserFileContent( + { + id: 'file_1', + name: 'hello.txt', + url: `/api/files/serve/execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + key: `execution/${workspaceId}/${workflowId}/${executionId}/hello.txt`, + context: 'execution', + size: 11, + type: 'text/plain', + }, + { + workspaceId, + workflowId, + executionId, + userId: 'user-1', + encoding: 'text', + maxBytes: 5, + chunked: true, + } + ) + ).resolves.toBe('hello') + }) +}) diff --git a/apps/sim/lib/execution/payloads/store.ts b/apps/sim/lib/execution/payloads/store.ts index 4dd8c902fb3..222b743e498 100644 --- a/apps/sim/lib/execution/payloads/store.ts +++ b/apps/sim/lib/execution/payloads/store.ts @@ -7,6 +7,12 @@ import { type LargeValueKind, type LargeValueRef, } from '@/lib/execution/payloads/large-value-ref' +import { + assertDurableLargeValueSize, + assertLargeValueRefAccess, + isValidLargeValueKey, + readLargeValueRefFromStorage, +} from '@/lib/execution/payloads/materialization.server' import { generateExecutionFileKey } from '@/lib/uploads/contexts/execution/utils' const logger = createLogger('LargeExecutionPayloadStore') @@ -15,6 +21,8 @@ export interface LargeValueStoreContext { workspaceId?: string workflowId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string requireDurable?: boolean } @@ -39,14 +47,6 @@ function getPreview(value: unknown): unknown { return value } -function isValidLargeValueKey(ref: LargeValueRef): boolean { - if (!ref.key) return false - if (!ref.key.startsWith('execution/')) return false - if (!ref.key.endsWith(`/large-value-${ref.id}.json`)) return false - if (ref.executionId && !ref.key.includes(`/${ref.executionId}/`)) return false - return true -} - async function persistValue( id: string, json: string, @@ -103,9 +103,10 @@ export async function storeLargeValue( size: number, context: LargeValueStoreContext ): Promise { + assertDurableLargeValueSize(size) const id = `lv_${generateShortId(12)}` const key = await persistValue(id, json, context) - cacheLargeValue(id, value, size) + cacheLargeValue(id, value, size, context) return { __simLargeValueRef: true, @@ -119,8 +120,17 @@ export async function storeLargeValue( } } -export async function materializeLargeValueRef(ref: LargeValueRef): Promise { - const cached = materializeLargeValueRefSync(ref) +export async function materializeLargeValueRef( + ref: LargeValueRef, + context?: LargeValueStoreContext +): Promise { + if (!context?.executionId) { + return undefined + } + + assertLargeValueRefAccess(ref, context) + + const cached = materializeLargeValueRefSync(ref, context) if (cached !== undefined) { return cached } @@ -130,13 +140,22 @@ export async function materializeLargeValueRef(ref: LargeValueRef): Promise ({ + mockDownloadFile: vi.fn(), + mockVerifyFileAccess: vi.fn(), +})) + +vi.mock('@/lib/core/config/redis', () => ({ + getRedisClient: () => null, +})) + +vi.mock('@/lib/uploads', () => ({ + StorageService: { + downloadFile: mockDownloadFile, + }, +})) + +vi.mock('@/lib/uploads/contexts/execution/execution-file-manager', () => ({ + downloadExecutionFile: mockDownloadFile, +})) + +vi.mock('@/lib/uploads/utils/file-utils.server', () => ({ + downloadFileFromStorage: mockDownloadFile, +})) + +vi.mock('@/app/api/files/authorization', () => ({ + verifyFileAccess: mockVerifyFileAccess, +})) + describe('hydrateUserFilesWithBase64', () => { + beforeEach(() => { + vi.clearAllMocks() + mockVerifyFileAccess.mockResolvedValue(true) + }) + it('strips existing base64 when it exceeds maxBytes', async () => { const file: UserFile = { id: 'file-1', @@ -40,4 +72,46 @@ describe('hydrateUserFilesWithBase64', () => { expect(hydrated.file.base64).toBe(base64) }) + + it('does not hydrate URL-only internal file objects', async () => { + const file: UserFile = { + id: 'file-1', + name: 'private.txt', + key: '', + url: '/api/files/serve/execution/workspace/workflow/execution/private.txt?context=execution', + size: 5, + type: 'text/plain', + } + + const hydrated = await hydrateUserFilesWithBase64({ file }, { maxBytes: 10, userId: 'user-1' }) + + expect(hydrated.file).not.toHaveProperty('base64') + }) + + it('hydrates prior-execution files when workflow-scoped reads are enabled', async () => { + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello', 'utf8')) + const file: UserFile = { + id: 'file-1', + name: 'prior.txt', + key: 'execution/workspace/workflow/source-execution/prior.txt', + url: '/api/files/serve/execution/workspace/workflow/source-execution/prior.txt?context=execution', + size: 5, + type: 'text/plain', + context: 'execution', + } + + const hydrated = await hydrateUserFilesWithBase64( + { file }, + { + workspaceId: 'workspace', + workflowId: 'workflow', + executionId: 'resume-execution', + allowLargeValueWorkflowScope: true, + userId: 'user-1', + maxBytes: 10, + } + ) + + expect(hydrated.file.base64).toBe(Buffer.from('hello').toString('base64')) + }) }) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index 2a922172e04..299490b18e9 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -1,24 +1,24 @@ import type { Logger } from '@sim/logger' import { createLogger } from '@sim/logger' import { getRedisClient } from '@/lib/core/config/redis' -import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' import { LARGE_VALUE_THRESHOLD_BYTES } from '@/lib/execution/payloads/large-value-ref' +import { + assertUserFileContentAccess, + readUserFileContent, +} from '@/lib/execution/payloads/materialization.server' import { type ExecutionRedisBudgetReservation, releaseExecutionRedisBytes, reserveExecutionRedisBytes, } from '@/lib/execution/redis-budget.server' import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' -import { bufferToBase64 } from '@/lib/uploads/utils/file-utils' -import { downloadFileFromStorage, downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' import type { UserFile } from '@/executor/types' const INLINE_BASE64_JSON_OVERHEAD_BYTES = 512 * 1024 const DEFAULT_MAX_BASE64_BYTES = Math.floor( (LARGE_VALUE_THRESHOLD_BYTES - INLINE_BASE64_JSON_OVERHEAD_BYTES) * 0.75 ) -const DEFAULT_TIMEOUT_MS = getMaxExecutionTimeout() const DEFAULT_CACHE_TTL_SECONDS = 300 const REDIS_KEY_PREFIX = 'user-file:base64:' @@ -35,7 +35,11 @@ interface HydrationState { export interface Base64HydrationOptions { requestId?: string + workspaceId?: string + workflowId?: string executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string logger?: Logger maxBytes?: number @@ -175,7 +179,6 @@ async function resolveBase64( } const allowUnknownSize = options.allowUnknownSize ?? false - const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS const hasStableStorageKey = Boolean(file.key) if (Number.isFinite(file.size) && file.size > maxBytes) { @@ -194,40 +197,24 @@ async function resolveBase64( return null } - let buffer: Buffer | null = null const requestId = options.requestId ?? 'unknown' - - if (file.key) { - try { - buffer = await downloadFileFromStorage(file, requestId, logger) - } catch (error) { - logger.warn( - `[${requestId}] Failed to download ${file.name} from storage, trying URL fallback`, - error - ) - } - } - - if (!buffer && file.url) { - try { - buffer = await downloadFileFromUrl(file.url, timeoutMs) - } catch (error) { - logger.warn(`[${requestId}] Failed to download ${file.name} from URL`, error) - } - } - - if (!buffer) { - return null - } - - if (buffer.length > maxBytes) { - logger.warn( - `[${options.requestId}] Skipping base64 for ${file.name} (downloaded ${buffer.length} exceeds ${maxBytes})` - ) + try { + return await readUserFileContent(file, { + requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId: options.executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, + userId: options.userId, + encoding: 'base64', + maxBytes, + maxSourceBytes: maxBytes, + }) + } catch (error) { + logger.warn(`[${requestId}] Failed to hydrate base64 for ${file.name}`, error) return null } - - return bufferToBase64(buffer) } async function hydrateUserFile( @@ -236,6 +223,24 @@ async function hydrateUserFile( state: HydrationState, logger: Logger ): Promise { + if (!file.base64) { + try { + await assertUserFileContentAccess(file, { + requestId: options.requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, + executionId: options.executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, + userId: options.userId, + logger, + }) + } catch (error) { + logger.warn(`[${options.requestId ?? 'unknown'}] Skipping unauthorized file base64`, error) + return stripBase64(file) + } + } + const cached = await state.cache.get(file) if (cached) { const maxBytes = Math.min( diff --git a/apps/sim/lib/workflows/executor/execution-core.ts b/apps/sim/lib/workflows/executor/execution-core.ts index d47e6f02af1..c099ce3151b 100644 --- a/apps/sim/lib/workflows/executor/execution-core.ts +++ b/apps/sim/lib/workflows/executor/execution-core.ts @@ -553,10 +553,20 @@ export async function executeWorkflowCore( return persistencePromise } + const largeValueExecutionIds = Array.from( + new Set([executionId, ...(metadata.largeValueExecutionIds ?? [])].filter(Boolean)) + ) + const allowLargeValueWorkflowScope = + metadata.allowLargeValueWorkflowScope === true || + metadata.resumeFromSnapshot === true || + Boolean(runFromBlock?.sourceSnapshot) + const contextExtensions: ContextExtensions = { stream: !!onStream, selectedOutputs, executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, workspaceId: providedWorkspaceId, userId, isDeployedContext: !metadata.isClientSession, @@ -584,10 +594,24 @@ export async function executeWorkflowCore( } if (snapshot.state) { - await warmLargeValueRefs(snapshot.state) + await warmLargeValueRefs(snapshot.state, { + workspaceId: providedWorkspaceId, + workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId, + }) } if (runFromBlock?.sourceSnapshot) { - await warmLargeValueRefs(runFromBlock.sourceSnapshot) + await warmLargeValueRefs(runFromBlock.sourceSnapshot, { + workspaceId: providedWorkspaceId, + workflowId, + executionId, + largeValueExecutionIds, + allowLargeValueWorkflowScope, + userId, + }) } for (const variable of Object.values(workflowVariables)) { diff --git a/apps/sim/lib/workflows/search-replace/replacements.test.ts b/apps/sim/lib/workflows/search-replace/replacements.test.ts index 8dd90198605..3ea5963acaa 100644 --- a/apps/sim/lib/workflows/search-replace/replacements.test.ts +++ b/apps/sim/lib/workflows/search-replace/replacements.test.ts @@ -1201,6 +1201,13 @@ describe('buildWorkflowSearchReplacePlan', () => { expect(countPlan.conflicts).toEqual([]) expect(countPlan.subflowUpdates).toEqual([ + expect.objectContaining({ + blockId: 'parallel-1', + blockType: 'parallel', + fieldId: WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize, + previousValue: '20', + nextValue: 3, + }), expect.objectContaining({ blockId: 'parallel-1', blockType: 'parallel', @@ -1569,8 +1576,8 @@ describe('buildWorkflowSearchReplacePlan', () => { expect(plan.subflowUpdates).toEqual([]) expect(plan.conflicts).toEqual([ { - matchId: matches[0].id, - reason: 'Subflow iteration count must be between 1 and 20', + matchId: 'subflow-text:parallel-1:subflowBatchSize:0:0', + reason: 'Parallel batch size must be between 1 and 20', }, ]) }) diff --git a/apps/sim/lib/workflows/search-replace/subflow-fields.ts b/apps/sim/lib/workflows/search-replace/subflow-fields.ts index 1826ecee442..6f46d9039e8 100644 --- a/apps/sim/lib/workflows/search-replace/subflow-fields.ts +++ b/apps/sim/lib/workflows/search-replace/subflow-fields.ts @@ -169,14 +169,17 @@ export function parseWorkflowSearchSubflowReplacement({ } const count = Number.parseInt(trimmed, 10) - const max = fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize ? 20 : 1000 - if (count < 1 || count > max) { + const maxBatchSize = 20 + if ( + count < 1 || + (fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize && count > maxBatchSize) + ) { return { success: false, reason: fieldId === WORKFLOW_SEARCH_SUBFLOW_FIELD_IDS.batchSize - ? `Parallel batch size must be between 1 and ${max}` - : `Subflow iteration count must be between 1 and ${max}`, + ? `Parallel batch size must be between 1 and ${maxBatchSize}` + : 'Subflow iteration count must be greater than 0', } } diff --git a/apps/sim/lib/workflows/streaming/streaming.ts b/apps/sim/lib/workflows/streaming/streaming.ts index 08b99fe1ae8..3336f17a9c2 100644 --- a/apps/sim/lib/workflows/streaming/streaming.ts +++ b/apps/sim/lib/workflows/streaming/streaming.ts @@ -46,6 +46,8 @@ export interface StreamingResponseOptions { requestId: string streamConfig: StreamingConfig executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean workspaceId?: string workflowId?: string userId?: string @@ -314,7 +316,11 @@ export async function createStreamingResponse( const hydratedOutput = includeFileBase64 ? await hydrateUserFilesWithBase64(outputValue, { requestId, + workspaceId: options.workspaceId, + workflowId: options.workflowId, executionId, + largeValueExecutionIds: options.largeValueExecutionIds, + allowLargeValueWorkflowScope: options.allowLargeValueWorkflowScope, userId: options.userId, maxBytes: base64MaxBytes, }) diff --git a/apps/sim/stores/workflows/workflow/store.test.ts b/apps/sim/stores/workflows/workflow/store.test.ts index 7286cf7627f..720fee128b8 100644 --- a/apps/sim/stores/workflows/workflow/store.test.ts +++ b/apps/sim/stores/workflows/workflow/store.test.ts @@ -500,7 +500,7 @@ describe('workflow store', () => { expect(state.loops.loop1.forEachItems).toBe('["item1", "item2", "item3"]') }) - it('should clamp loop count between 1 and 1000', () => { + it('should allow loop counts above 1000 and clamp only to at least 1', () => { const { updateLoopCount } = useWorkflowStore.getState() addBlock( @@ -517,7 +517,7 @@ describe('workflow store', () => { updateLoopCount('loop1', 1500) let state = useWorkflowStore.getState() - expect(state.blocks.loop1?.data?.count).toBe(1000) + expect(state.blocks.loop1?.data?.count).toBe(1500) updateLoopCount('loop1', 0) state = useWorkflowStore.getState() @@ -576,7 +576,7 @@ describe('workflow store', () => { expect(parsedDistribution).toHaveLength(3) }) - it('should clamp parallel count between 1 and 1000', () => { + it('should allow parallel counts above 1000 and clamp only to at least 1', () => { const { updateParallelCount } = useWorkflowStore.getState() addBlock( @@ -596,7 +596,7 @@ describe('workflow store', () => { updateParallelCount('parallel1', 1001) state = useWorkflowStore.getState() - expect(state.blocks.parallel1?.data?.count).toBe(1000) + expect(state.blocks.parallel1?.data?.count).toBe(1001) updateParallelCount('parallel1', 0) state = useWorkflowStore.getState() diff --git a/apps/sim/stores/workflows/workflow/store.ts b/apps/sim/stores/workflows/workflow/store.ts index f05cd9468f5..e6fd406b80e 100644 --- a/apps/sim/stores/workflows/workflow/store.ts +++ b/apps/sim/stores/workflows/workflow/store.ts @@ -996,7 +996,7 @@ export const useWorkflowStore = create()( ...block, data: { ...block.data, - count: Math.max(1, Math.min(1000, count)), // Clamp between 1-1000 + count: Math.max(1, count), }, }, } @@ -1164,7 +1164,7 @@ export const useWorkflowStore = create()( ...block, data: { ...block.data, - count: Math.max(1, Math.min(1000, count)), + count: Math.max(1, count), }, }, } diff --git a/apps/sim/tools/function/execute.test.ts b/apps/sim/tools/function/execute.test.ts index 73eb21de9e6..b174634e57f 100644 --- a/apps/sim/tools/function/execute.test.ts +++ b/apps/sim/tools/function/execute.test.ts @@ -66,6 +66,7 @@ describe('Function Execute Tool', () => { outputTable: undefined, timeout: 5000, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) @@ -101,6 +102,7 @@ describe('Function Execute Tool', () => { outputSandboxPath: undefined, outputTable: undefined, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) @@ -128,6 +130,7 @@ describe('Function Execute Tool', () => { outputSandboxPath: undefined, outputTable: undefined, workflowId: undefined, + executionId: undefined, workspaceId: undefined, userId: undefined, }) diff --git a/apps/sim/tools/function/execute.ts b/apps/sim/tools/function/execute.ts index 4d096ce7cf4..6821131b30a 100644 --- a/apps/sim/tools/function/execute.ts +++ b/apps/sim/tools/function/execute.ts @@ -137,6 +137,9 @@ export const functionExecuteTool: ToolConfig _context?: { workflowId?: string + executionId?: string + largeValueExecutionIds?: string[] + allowLargeValueWorkflowScope?: boolean userId?: string workspaceId?: string } diff --git a/packages/python-sdk/README.md b/packages/python-sdk/README.md index e193e951c13..2690f635a17 100644 --- a/packages/python-sdk/README.md +++ b/packages/python-sdk/README.md @@ -115,17 +115,17 @@ result = client.execute_workflow_sync("workflow-id", {"data": "some input"}, tim **Returns:** `WorkflowExecutionResult` -##### get_job_status(task_id) +##### get_job_status(job_id) Get the status of an async job. ```python -status = client.get_job_status("task-id-from-async-execution") +status = client.get_job_status("job-id-from-async-execution") print("Job status:", status) ``` **Parameters:** -- `task_id` (str): The task ID returned from async execution +- `job_id` (str): The job ID returned from async execution **Returns:** `dict` @@ -248,10 +248,11 @@ class SimStudioError(Exception): @dataclass class AsyncExecutionResult: success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True ``` ### RateLimitInfo diff --git a/packages/python-sdk/simstudio/__init__.py b/packages/python-sdk/simstudio/__init__.py index ec242338ec5..0e2609e2f26 100644 --- a/packages/python-sdk/simstudio/__init__.py +++ b/packages/python-sdk/simstudio/__init__.py @@ -49,10 +49,11 @@ class WorkflowStatus: class AsyncExecutionResult: """Result of an async workflow execution.""" success: bool - task_id: str - status: str # 'queued' - created_at: str - links: Dict[str, str] + job_id: str + status_url: str + execution_id: Optional[str] = None + message: str = "" + async_execution: bool = True @dataclass @@ -237,13 +238,14 @@ def execute_workflow( result_data = response.json() # Check if this is an async execution response (202 status) - if response.status_code == 202 and 'taskId' in result_data: + if response.status_code == 202 and 'jobId' in result_data: return AsyncExecutionResult( success=result_data.get('success', True), - task_id=result_data['taskId'], - status=result_data.get('status', 'queued'), - created_at=result_data.get('createdAt', ''), - links=result_data.get('links', {}) + job_id=result_data['jobId'], + status_url=result_data['statusUrl'], + execution_id=result_data.get('executionId'), + message=result_data.get('message', ''), + async_execution=result_data.get('async', True) ) return WorkflowExecutionResult( @@ -374,12 +376,12 @@ def close(self) -> None: """Close the underlying HTTP session.""" self._session.close() - def get_job_status(self, task_id: str) -> Dict[str, Any]: + def get_job_status(self, job_id: str) -> Dict[str, Any]: """ Get the status of an async job. Args: - task_id: The task ID returned from async execution + job_id: The job ID returned from async execution Returns: Dictionary containing the job status @@ -387,7 +389,7 @@ def get_job_status(self, task_id: str) -> Dict[str, Any]: Raises: SimStudioError: If getting the status fails """ - url = f"{self.base_url}/api/jobs/{task_id}" + url = f"{self.base_url}/api/jobs/{job_id}" try: response = self._session.get(url) diff --git a/packages/python-sdk/tests/test_client.py b/packages/python-sdk/tests/test_client.py index 8dfdee99b61..814ad7610ef 100644 --- a/packages/python-sdk/tests/test_client.py +++ b/packages/python-sdk/tests/test_client.py @@ -95,17 +95,18 @@ def test_context_manager(mock_close): @patch('simstudio.requests.Session.post') -def test_async_execution_returns_task_id(mock_post): +def test_async_execution_returns_job_id(mock_post): """Test async execution returns AsyncExecutionResult.""" mock_response = Mock() mock_response.ok = True mock_response.status_code = 202 mock_response.json.return_value = { "success": True, - "taskId": "task-123", - "status": "queued", - "createdAt": "2024-01-01T00:00:00Z", - "links": {"status": "/api/jobs/task-123"} + "jobId": "job-123", + "statusUrl": "https://test.sim.ai/api/jobs/job-123", + "executionId": "execution-123", + "message": "Workflow execution started", + "async": True } mock_response.headers.get.return_value = None mock_post.return_value = mock_response @@ -118,9 +119,10 @@ def test_async_execution_returns_task_id(mock_post): ) assert result.success is True - assert result.task_id == "task-123" - assert result.status == "queued" - assert result.links["status"] == "/api/jobs/task-123" + assert result.job_id == "job-123" + assert result.status_url == "https://test.sim.ai/api/jobs/job-123" + assert result.execution_id == "execution-123" + assert result.async_execution is True call_args = mock_post.call_args assert call_args[1]["headers"]["X-Execution-Mode"] == "async" diff --git a/packages/ts-sdk/README.md b/packages/ts-sdk/README.md index cc8b558bcae..0ce547f6e51 100644 --- a/packages/ts-sdk/README.md +++ b/packages/ts-sdk/README.md @@ -125,17 +125,17 @@ const result = await client.executeWorkflowSync('workflow-id', { data: 'some inp **Returns:** `Promise` -##### getJobStatus(taskId) +##### getJobStatus(jobId) Get the status of an async job. ```typescript -const status = await client.getJobStatus('task-id-from-async-execution'); +const status = await client.getJobStatus('job-id-from-async-execution'); console.log('Job status:', status); ``` **Parameters:** -- `taskId` (string): The task ID returned from async execution +- `jobId` (string): The job ID returned from async execution **Returns:** `Promise` @@ -229,6 +229,7 @@ interface WorkflowExecutionResult { ### LargeValueRef Oversized execution values may be returned as a versioned reference inside `output`, `logs`, streaming events, or async job status responses. +The `key` field is an opaque execution-scoped server storage pointer, not a client-readable download URL. ```typescript interface LargeValueRef { @@ -267,12 +268,11 @@ class SimStudioError extends Error { ```typescript interface AsyncExecutionResult { success: boolean; - taskId: string; - status: 'queued'; - createdAt: string; - links: { - status: string; - }; + jobId: string; + statusUrl: string; + executionId?: string; + message: string; + async: true; } ``` diff --git a/packages/ts-sdk/src/index.ts b/packages/ts-sdk/src/index.ts index be9dc831ce4..ffed7ca1e7b 100644 --- a/packages/ts-sdk/src/index.ts +++ b/packages/ts-sdk/src/index.ts @@ -11,6 +11,7 @@ export interface LargeValueRef { id: string kind: 'array' | 'object' | 'string' | 'json' size: number + /** Opaque execution-scoped server storage key. This is not a download URL. */ key?: string executionId?: string preview?: unknown From 2b6c8129803a1a0da44ffe957e31929d9a5427a6 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 17:04:15 -0700 Subject: [PATCH 04/11] codegen to allow inline substitution --- apps/sim/executor/variables/resolver.test.ts | 19 +++++++++++++++++-- apps/sim/executor/variables/resolver.ts | 4 ++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index 828c8205c9b..d6ee9acffdd 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -148,7 +148,7 @@ describe('VariableResolver function block inputs', () => { ) expect(result.resolvedInputs.code).toBe( - 'const base64 = await sim.files.readBase64(globalThis["__blockRef_0"]);\nreturn base64' + 'const base64 = (await sim.files.readBase64(globalThis["__blockRef_0"]));\nreturn base64' ) expect(result.displayInputs.code).toBe('const base64 = ;\nreturn base64') expect(result.contextVariables.__blockRef_0).toMatchObject({ @@ -158,6 +158,21 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables.__blockRef_0).not.toHaveProperty('base64') }) + it('wraps lazy JavaScript file base64 reads before member access', async () => { + const { block, ctx, resolver } = createResolver('javascript') + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return .length' }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'return (await sim.files.readBase64(globalThis["__blockRef_0"])).length' + ) + }) + it('uses existing inline base64 for keyless files instead of lazy storage reads', async () => { const { block, ctx, resolver } = createResolver('javascript') const state = new ExecutionState() @@ -231,7 +246,7 @@ describe('VariableResolver function block inputs', () => { ) expect(result.resolvedInputs.code).toBe( - 'return await sim.values.read(globalThis["__blockRef_0"])' + 'return (await sim.values.read(globalThis["__blockRef_0"]))' ) expect(result.contextVariables.__blockRef_0).toMatchObject({ __simLargeValueRef: true, diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index 4f35c3dd6c4..4b4234b4eed 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -501,7 +501,7 @@ export class VariableResolver { const { base64: _base64, ...fileMetadata } = file contextVarAccumulator[varName] = fileMetadata const fileExpression = `globalThis[${JSON.stringify(varName)}]` - const lazyExpression = `await sim.files.readBase64(${fileExpression})` + const lazyExpression = `(await sim.files.readBase64(${fileExpression}))` return { replacement: this.formatJavaScriptAsyncExpression(lazyExpression, template, matchIndex), @@ -519,7 +519,7 @@ export class VariableResolver { return null } - const expression = `await sim.values.read(globalThis[${JSON.stringify(varName)}])` + const expression = `(await sim.values.read(globalThis[${JSON.stringify(varName)}]))` return this.formatJavaScriptAsyncExpression(expression, template, matchIndex, { stringifyInStringContext: true, }) From 92f26f9947c072b7ce773daa78ff102c5eb695f7 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 17:36:43 -0700 Subject: [PATCH 05/11] address comments --- .../components/structured-output.tsx | 17 +-- .../executor/orchestrators/parallel.test.ts | 71 +++++++++ apps/sim/executor/orchestrators/parallel.ts | 9 +- .../executor/utils/parallel-expansion.test.ts | 2 +- apps/sim/executor/utils/parallel-expansion.ts | 10 +- apps/sim/executor/utils/subflow-utils.test.ts | 52 ++++--- apps/sim/executor/utils/subflow-utils.ts | 82 +--------- apps/sim/executor/variables/resolver.test.ts | 80 ++++++++++ apps/sim/executor/variables/resolver.ts | 143 +++++++++++++++++- .../variables/resolvers/block.test.ts | 10 +- .../executor/variables/resolvers/loop.test.ts | 10 +- apps/sim/lib/core/utils/user-file.ts | 21 +++ apps/sim/lib/execution/payloads/cache.ts | 46 +++++- .../lib/execution/payloads/serializer.test.ts | 17 ++- apps/sim/lib/execution/payloads/store.test.ts | 34 ++++- apps/sim/lib/execution/payloads/store.ts | 19 ++- 16 files changed, 474 insertions(+), 149 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx index 57e8318fbf3..3144de9d5e4 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/terminal/components/output-panel/components/structured-output.tsx @@ -14,6 +14,7 @@ import { import { List, type RowComponentProps, useListRef } from 'react-window' import { Badge, ChevronDown } from '@/components/emcn' import { cn } from '@/lib/core/utils/cn' +import { isUserFileDisplayMetadata } from '@/lib/core/utils/user-file' import { isLargeValueRef, type LargeValueRef } from '@/lib/execution/payloads/large-value-ref' type ValueType = 'null' | 'undefined' | 'array' | 'string' | 'number' | 'boolean' | 'object' @@ -89,20 +90,6 @@ function getDisplayValue(value: unknown): unknown { return isLargeValueRef(value) ? getLargeValueDisplayValue(value) : value } -function isDisplayedUserFileMetadata(value: unknown): value is Record { - if (!value || typeof value !== 'object' || Array.isArray(value)) return false - const candidate = value as Record - const url = typeof candidate.url === 'string' ? candidate.url : '' - return ( - typeof candidate.id === 'string' && - typeof candidate.name === 'string' && - url.length > 0 && - typeof candidate.size === 'number' && - typeof candidate.type === 'string' && - (candidate.id.startsWith('file_') || url.includes('/api/files/serve/')) - ) -} - function getTypeLabel(value: unknown): ValueType { if (value === null) return 'null' if (value === undefined) return 'undefined' @@ -151,7 +138,7 @@ function buildEntries(value: unknown, basePath: string): NodeEntry[] { value: v, path: `${basePath}.${k}`, })) - if (isDisplayedUserFileMetadata(displayValue) && !('base64' in displayValue)) { + if (isUserFileDisplayMetadata(displayValue) && !('base64' in displayValue)) { entries.push({ key: 'base64', value: USER_FILE_BASE64_PLACEHOLDER, diff --git a/apps/sim/executor/orchestrators/parallel.test.ts b/apps/sim/executor/orchestrators/parallel.test.ts index 8db279e50cc..85de7846fb8 100644 --- a/apps/sim/executor/orchestrators/parallel.test.ts +++ b/apps/sim/executor/orchestrators/parallel.test.ts @@ -6,6 +6,7 @@ import type { DAG } from '@/executor/dag/builder' import type { BlockStateWriter, ContextExtensions } from '@/executor/execution/types' import { ParallelOrchestrator } from '@/executor/orchestrators/parallel' import type { ExecutionContext } from '@/executor/types' +import { buildBranchNodeId } from '@/executor/utils/subflow-utils' function createDag(): DAG { return { @@ -173,4 +174,74 @@ describe('ParallelOrchestrator', () => { expect(scope?.branchOutputs.get(20)).toEqual([{ output: 'resumed' }]) expect(scope?.branchOutputs.has(0)).toBe(false) }) + + it('resets only incoming batch branch state when scheduling later batches', async () => { + const dag = createDag() + const incomingBranchId = buildBranchNodeId('task-1', 0) + const previousBranchId = buildBranchNodeId('task-1', 1) + dag.nodes.set(incomingBranchId, { + id: incomingBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 0 }, + }) + dag.nodes.set(previousBranchId, { + id: previousBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 1 }, + }) + const state = createState() + const orchestrator = new ParallelOrchestrator(dag, state, null, {}) + + await ( + orchestrator as unknown as { + scheduleNextBatch( + ctx: ExecutionContext, + scope: NonNullable extends Map< + string, + infer Scope + > + ? Scope + : never, + nextBatchStart: number + ): Promise + } + ).scheduleNextBatch( + createContext(), + { + parallelId: 'parallel-1', + totalBranches: 3, + batchSize: 1, + currentBatchStart: 0, + currentBatchSize: 2, + accumulatedOutputs: new Map([[1, [{ output: 'previous' }]]]), + branchOutputs: new Map(), + }, + 2 + ) + + expect(state.deleteBlockState).toHaveBeenCalledWith(incomingBranchId) + expect(state.deleteBlockState).not.toHaveBeenCalledWith(previousBranchId) + expect(state.unmarkExecuted).toHaveBeenCalledWith(incomingBranchId) + expect(state.unmarkExecuted).not.toHaveBeenCalledWith(previousBranchId) + }) }) diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index 6c89554195d..e2574aa4876 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -390,7 +390,7 @@ export class ParallelOrchestrator { this.registerClonedSubflows(ctx, scope.parallelId, clonedSubflows) this.registerBranchMappings(ctx, scope.parallelId, allBranchNodes) - this.resetBatchExecutionState(scope.parallelId) + this.resetBatchExecutionState(allBranchNodes) scope.currentBatchStart = nextBatchStart scope.currentBatchSize = currentBatchSize @@ -408,9 +408,10 @@ export class ParallelOrchestrator { }) } - private resetBatchExecutionState(parallelId: string): void { - for (const [nodeId, node] of this.dag.nodes.entries()) { - if (node.metadata.parallelId !== parallelId || !node.metadata.isParallelBranch) { + private resetBatchExecutionState(branchNodeIds: string[]): void { + for (const nodeId of branchNodeIds) { + const node = this.dag.nodes.get(nodeId) + if (!node?.metadata.isParallelBranch) { continue } this.state.unmarkExecuted(nodeId) diff --git a/apps/sim/executor/utils/parallel-expansion.test.ts b/apps/sim/executor/utils/parallel-expansion.test.ts index f31970cfc9e..67f0e865aef 100644 --- a/apps/sim/executor/utils/parallel-expansion.test.ts +++ b/apps/sim/executor/utils/parallel-expansion.test.ts @@ -310,7 +310,7 @@ describe('Nested parallel expansion + edge resolution', () => { // P3 should also be cloned (inside P2__obranch-1) with a __clone prefix const p3Clone = p1Result.clonedSubflows.find((c) => c.originalId === p3)! expect(p3Clone).toBeDefined() - expect(p3Clone.clonedId).toMatch(/^p3__clone\d+__obranch-1$/) + expect(p3Clone.clonedId).toMatch(/^p3__clone[0-9a-f]{24}__obranch-1$/) expect(stripCloneSuffixes(p3Clone.clonedId)).toBe('p3') // Step 2: Expand P2 (original, branch 0 of P1) — this creates P3__obranch-1 at runtime diff --git a/apps/sim/executor/utils/parallel-expansion.ts b/apps/sim/executor/utils/parallel-expansion.ts index fc1ff1926fe..64ddcac73fe 100644 --- a/apps/sim/executor/utils/parallel-expansion.ts +++ b/apps/sim/executor/utils/parallel-expansion.ts @@ -1,4 +1,5 @@ import { createLogger } from '@sim/logger' +import { sha256Hex } from '@sim/security/hash' import { EDGE } from '@/executor/constants' import type { DAG, DAGNode } from '@/executor/dag/builder' import type { SerializedBlock } from '@/serializer/types' @@ -288,7 +289,7 @@ export class ParallelExpander { /** * Generates a unique clone ID for pre-expansion cloning. * - * Pre-expansion clones use `{originalId}__clone{hash}__obranch-{branchIndex}` instead + * Pre-expansion clones use `{originalId}__clone{digest}__obranch-{branchIndex}` instead * of the plain `{originalId}__obranch-{branchIndex}` used by runtime expansion. * The clone segment prevents naming collisions when the original (branch-0) * subflow later expands at runtime and creates `{child}__obranch-{branchIndex}`. @@ -299,12 +300,9 @@ export class ParallelExpander { outerBranchIndex: number, parentCloneId: string ): string { - let hash = 0 const input = `${parentCloneId}:${originalId}:${outerBranchIndex}` - for (let i = 0; i < input.length; i++) { - hash = (hash * 31 + input.charCodeAt(i)) >>> 0 - } - return `${originalId}__clone${hash}__obranch-${outerBranchIndex}` + const digest = sha256Hex(input).slice(0, 24) + return `${originalId}__clone${digest}__obranch-${outerBranchIndex}` } /** diff --git a/apps/sim/executor/utils/subflow-utils.test.ts b/apps/sim/executor/utils/subflow-utils.test.ts index 7f9a8787c58..478319d6ca9 100644 --- a/apps/sim/executor/utils/subflow-utils.test.ts +++ b/apps/sim/executor/utils/subflow-utils.test.ts @@ -4,84 +4,86 @@ import { describe, expect, it, vi } from 'vitest' import type { ExecutionContext } from '@/executor/types' import type { VariableResolver } from '@/executor/variables/resolver' -import { findEffectiveContainerId, resolveArrayInput } from './subflow-utils' +import { findEffectiveContainerId, resolveArrayInputAsync } from './subflow-utils' -describe('resolveArrayInput', () => { +describe('resolveArrayInputAsync', () => { const fakeCtx = {} as unknown as ExecutionContext - it('returns arrays as-is', () => { - expect(resolveArrayInput(fakeCtx, [1, 2, 3], null)).toEqual([1, 2, 3]) + it('returns arrays as-is', async () => { + await expect(resolveArrayInputAsync(fakeCtx, [1, 2, 3], null)).resolves.toEqual([1, 2, 3]) }) - it('converts plain objects to entries', () => { - expect(resolveArrayInput(fakeCtx, { a: 1, b: 2 }, null)).toEqual([ + it('converts plain objects to entries', async () => { + await expect(resolveArrayInputAsync(fakeCtx, { a: 1, b: 2 }, null)).resolves.toEqual([ ['a', 1], ['b', 2], ]) }) - it('returns empty array when a pure reference resolves to null (skipped block)', () => { + it('returns empty array when a pure reference resolves to null (skipped block)', async () => { // `resolveSingleReference` returns `null` for a reference that points at a // block that exists in the workflow but did not execute on this path. // A loop/parallel over such a reference should run zero iterations rather // than fail the workflow. const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(null), + resolveSingleReference: vi.fn().mockResolvedValue(null), } as unknown as VariableResolver - const result = resolveArrayInput(fakeCtx, '', resolver) + const result = await resolveArrayInputAsync(fakeCtx, '', resolver) expect(result).toEqual([]) expect(resolver.resolveSingleReference).toHaveBeenCalled() }) - it('returns the array from a pure reference that resolved to an array', () => { + it('returns the array from a pure reference that resolved to an array', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue([1, 2, 3]), + resolveSingleReference: vi.fn().mockResolvedValue([1, 2, 3]), } as unknown as VariableResolver - expect(resolveArrayInput(fakeCtx, '', resolver)).toEqual([1, 2, 3]) + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).resolves.toEqual([ + 1, 2, 3, + ]) }) - it('converts resolved objects to entries', () => { + it('converts resolved objects to entries', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue({ x: 1, y: 2 }), + resolveSingleReference: vi.fn().mockResolvedValue({ x: 1, y: 2 }), } as unknown as VariableResolver - expect(resolveArrayInput(fakeCtx, '', resolver)).toEqual([ + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).resolves.toEqual([ ['x', 1], ['y', 2], ]) }) - it('throws when a pure reference resolves to a non-array, non-object, non-null value', () => { + it('throws when a pure reference resolves to a non-array, non-object, non-null value', async () => { const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(42), + resolveSingleReference: vi.fn().mockResolvedValue(42), } as unknown as VariableResolver - expect(() => resolveArrayInput(fakeCtx, '', resolver)).toThrow( + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).rejects.toThrow( /did not resolve to an array or object/ ) }) - it('throws when a pure reference resolves to undefined (unknown block)', () => { + it('throws when a pure reference resolves to undefined (unknown block)', async () => { // `undefined` means the reference could not be matched to any block at // all (typo / deleted block). This must still fail loudly. const resolver = { - resolveSingleReference: vi.fn().mockReturnValue(undefined), + resolveSingleReference: vi.fn().mockResolvedValue(undefined), } as unknown as VariableResolver - expect(() => resolveArrayInput(fakeCtx, '', resolver)).toThrow( + await expect(resolveArrayInputAsync(fakeCtx, '', resolver)).rejects.toThrow( /did not resolve to an array or object/ ) }) - it('parses a JSON array string', () => { - expect(resolveArrayInput(fakeCtx, '[1, 2, 3]', null)).toEqual([1, 2, 3]) + it('parses a JSON array string', async () => { + await expect(resolveArrayInputAsync(fakeCtx, '[1, 2, 3]', null)).resolves.toEqual([1, 2, 3]) }) - it('throws on a string that is neither a reference nor valid JSON array/object', () => { - expect(() => resolveArrayInput(fakeCtx, 'not json', null)).toThrow() + it('throws on a string that is neither a reference nor valid JSON array/object', async () => { + await expect(resolveArrayInputAsync(fakeCtx, 'not json', null)).rejects.toThrow() }) }) diff --git a/apps/sim/executor/utils/subflow-utils.ts b/apps/sim/executor/utils/subflow-utils.ts index d29559db00e..7f363365627 100644 --- a/apps/sim/executor/utils/subflow-utils.ts +++ b/apps/sim/executor/utils/subflow-utils.ts @@ -96,7 +96,7 @@ export function isBranchNodeId(nodeId: string): boolean { const OUTER_BRANCH_PATTERN = /__obranch-(\d+)/ const OUTER_BRANCH_STRIP_PATTERN = /__obranch-\d+/g -const CLONE_SEQ_STRIP_PATTERN = /__clone\d+/g +const CLONE_DIGEST_STRIP_PATTERN = /__clone[0-9a-f]+/gi /** * Extracts the outer branch index from a cloned subflow ID. @@ -114,7 +114,7 @@ export function extractOuterBranchIndex(clonedId: string): number | undefined { */ export function stripCloneSuffixes(nodeId: string): string { return extractBaseBlockId( - nodeId.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_SEQ_STRIP_PATTERN, '') + nodeId.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_DIGEST_STRIP_PATTERN, '') ) } @@ -130,7 +130,7 @@ export function buildClonedSubflowId(originalId: string, branchIndex: number): s * returning the original workflow-level subflow ID. */ export function stripOuterBranchSuffix(id: string): string { - return id.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_SEQ_STRIP_PATTERN, '') + return id.replace(OUTER_BRANCH_STRIP_PATTERN, '').replace(CLONE_DIGEST_STRIP_PATTERN, '') } /** @@ -198,82 +198,6 @@ export function normalizeNodeId(nodeId: string): string { return nodeId } -/** - * Resolves array input at runtime. Handles arrays, objects, references, and JSON strings. - * Used by both loop forEach and parallel distribution resolution. - * Throws an error if resolution fails. - */ -export function resolveArrayInput( - ctx: ExecutionContext, - items: any, - resolver: VariableResolver | null -): any[] { - if (Array.isArray(items)) { - return items - } - - if (typeof items === 'object' && items !== null) { - return Object.entries(items) - } - - if (typeof items === 'string') { - if (items.startsWith(REFERENCE.START) && items.endsWith(REFERENCE.END) && resolver) { - try { - const resolved = resolver.resolveSingleReference(ctx, '', items) - if (Array.isArray(resolved)) { - return resolved - } - if (typeof resolved === 'object' && resolved !== null) { - return Object.entries(resolved) - } - if (resolved === null) { - return [] - } - throw new Error(`Reference "${items}" did not resolve to an array or object`) - } catch (error) { - if (error instanceof Error && error.message.startsWith('Reference "')) { - throw error - } - throw new Error(`Failed to resolve reference "${items}": ${toError(error).message}`) - } - } - - try { - const normalized = items.replace(/'/g, '"') - const parsed = JSON.parse(normalized) - if (Array.isArray(parsed)) { - return parsed - } - if (typeof parsed === 'object' && parsed !== null) { - return Object.entries(parsed) - } - throw new Error(`Parsed value is not an array or object`) - } catch (error) { - if (error instanceof Error && error.message.startsWith('Parsed value')) { - throw error - } - throw new Error(`Failed to parse items as JSON: "${items}"`) - } - } - - if (resolver) { - try { - const resolved = (resolver.resolveInputs(ctx, 'subflow_items', { items }) as any).items - if (Array.isArray(resolved)) { - return resolved - } - throw new Error(`Resolved items is not an array`) - } catch (error) { - if (error instanceof Error && error.message.startsWith('Resolved items')) { - throw error - } - throw new Error(`Failed to resolve items: ${toError(error).message}`) - } - } - - return [] -} - /** * Async variant used by execution paths that may need durable large-value or * explicit UserFile.base64 materialization while resolving collection inputs. diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index d6ee9acffdd..e92eac9439e 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -328,6 +328,86 @@ describe('VariableResolver function block inputs', () => { ).rejects.toThrow('This execution value is too large to inline') }) + it('keeps JavaScript lazy helpers enabled when import appears in comments or strings', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { + code: "/** @import { Foo } from 'foo' */\nconst text = \"import bar from 'bar'\"\nreturn ", + }, + block + ) + + expect(result.resolvedInputs.code).toBe( + '/** @import { Foo } from \'foo\' */\nconst text = "import bar from \'bar\'"\nreturn (await sim.values.read(globalThis["__blockRef_0"]))' + ) + }) + + it('keeps JavaScript lazy helpers enabled for dynamic import expressions', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'object', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + const result = await largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: "const mod = import('foo')\nreturn " }, + block + ) + + expect(result.resolvedInputs.code).toBe( + 'const mod = import(\'foo\')\nreturn (await sim.values.read(globalThis["__blockRef_0"]))' + ) + }) + it('fails nested large value refs for Function runtimes without lazy helpers', async () => { const { block, ctx } = createResolver('python') const state = new ExecutionState() diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index 4b4234b4eed..80bb193fde4 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -550,7 +550,148 @@ export class VariableResolver { if (language !== 'javascript') { return false } - return !/(^|\n)\s*import\s/.test(template) && !/require\s*\(\s*['"`]/.test(template) + return !this.hasJavaScriptModuleDependencySyntax(template) + } + + private hasJavaScriptModuleDependencySyntax(template: string): boolean { + const modes: CodeScanMode[] = [{ type: 'normal' }] + + for (let i = 0; i < template.length; i++) { + const char = template[i] + const next = template[i + 1] + const mode = modes[modes.length - 1] + + if (mode.type === 'line-comment') { + if (char === '\n') modes.pop() + continue + } + + if (mode.type === 'block-comment') { + if (char === '*' && next === '/') { + modes.pop() + i++ + } + continue + } + + if (mode.type === 'single' || mode.type === 'double') { + const quote = mode.type === 'single' ? "'" : '"' + if (char === '\\') { + i++ + continue + } + if (char === quote || char === '\n') modes.pop() + continue + } + + if (mode.type === 'template') { + if (char === '\\') { + i++ + continue + } + if (char === '`') { + modes.pop() + continue + } + if (char === '$' && next === '{') { + modes.push({ type: 'template-expression', depth: 1 }) + i++ + } + continue + } + + const isCodeMode = mode.type === 'normal' || mode.type === 'template-expression' + if (!isCodeMode) continue + + if (char === '/' && next === '/') { + modes.push({ type: 'line-comment' }) + i++ + continue + } + if (char === '/' && next === '*') { + modes.push({ type: 'block-comment' }) + i++ + continue + } + if (char === "'") { + modes.push({ type: 'single' }) + continue + } + if (char === '"') { + modes.push({ type: 'double' }) + continue + } + if (char === '`') { + modes.push({ type: 'template' }) + continue + } + + if (mode.type === 'template-expression') { + if (char === '{') { + mode.depth += 1 + continue + } + if (char === '}') { + mode.depth -= 1 + if (mode.depth === 0) modes.pop() + continue + } + } + + if (this.startsWithStaticImport(template, i) || this.startsWithRequireCall(template, i)) { + return true + } + } + + return false + } + + private startsWithStaticImport(template: string, index: number): boolean { + if (!this.matchesKeywordAt(template, index, 'import')) { + return false + } + const nextIndex = this.skipWhitespace(template, index + 'import'.length) + if (nextIndex === index + 'import'.length) { + return false + } + return template[nextIndex] !== '(' + } + + private startsWithRequireCall(template: string, index: number): boolean { + if (!this.matchesKeywordAt(template, index, 'require')) { + return false + } + const openParenIndex = this.skipWhitespace(template, index + 'require'.length) + if (template[openParenIndex] !== '(') { + return false + } + const argumentIndex = this.skipWhitespace(template, openParenIndex + 1) + return ( + template[argumentIndex] === "'" || + template[argumentIndex] === '"' || + template[argumentIndex] === '`' + ) + } + + private matchesKeywordAt(template: string, index: number, keyword: string): boolean { + if (!template.startsWith(keyword, index)) { + return false + } + const before = index > 0 ? template[index - 1] : '' + const after = template[index + keyword.length] ?? '' + return !this.isJavaScriptIdentifierChar(before) && !this.isJavaScriptIdentifierChar(after) + } + + private skipWhitespace(template: string, index: number): number { + let cursor = index + while (cursor < template.length && /\s/.test(template[cursor])) { + cursor++ + } + return cursor + } + + private isJavaScriptIdentifierChar(char: string): boolean { + return /[A-Za-z0-9_$]/.test(char) } private formatContextVariableReference( diff --git a/apps/sim/executor/variables/resolvers/block.test.ts b/apps/sim/executor/variables/resolvers/block.test.ts index 1092e34f331..a739dc28734 100644 --- a/apps/sim/executor/variables/resolvers/block.test.ts +++ b/apps/sim/executor/variables/resolvers/block.test.ts @@ -175,6 +175,9 @@ function createTestContext( return { executionContext: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', blockStates: contextBlockStates ?? new Map(), }, executionState: state, @@ -256,7 +259,12 @@ describe('BlockResolver', () => { user: { profile: { name: 'Alice' } }, items: Array.from({ length: 100 }, (_, index) => ({ id: index })), }, - { thresholdBytes: 64 } + { + thresholdBytes: 64, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } ) const ctx = createTestContext('current', { source: compacted }) diff --git a/apps/sim/executor/variables/resolvers/loop.test.ts b/apps/sim/executor/variables/resolvers/loop.test.ts index 3da6e189d0e..48576ffc67a 100644 --- a/apps/sim/executor/variables/resolvers/loop.test.ts +++ b/apps/sim/executor/variables/resolvers/loop.test.ts @@ -62,6 +62,9 @@ function createTestContext( ): ResolutionContext { return { executionContext: { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', loopExecutions: loopExecutions ?? new Map(), }, executionState: { @@ -414,7 +417,12 @@ describe('LoopResolver', () => { const resolver = new LoopResolver(workflow) const compacted = await compactExecutionPayload( { results: [[{ response: 'a' }], [{ response: 'b', payload: 'x'.repeat(2048) }]] }, - { thresholdBytes: 256 } + { + thresholdBytes: 256, + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } ) const ctx = createTestContext('block-outside', undefined, new Map(), { 'loop-1': compacted, diff --git a/apps/sim/lib/core/utils/user-file.ts b/apps/sim/lib/core/utils/user-file.ts index 0069eb4fbae..deee12cbf04 100644 --- a/apps/sim/lib/core/utils/user-file.ts +++ b/apps/sim/lib/core/utils/user-file.ts @@ -42,6 +42,27 @@ export function isUserFileWithMetadata(value: unknown): value is UserFile { return typeof candidate.size === 'number' && typeof candidate.type === 'string' } +/** + * Checks if a value matches the display-safe UserFile metadata shape after internal fields are stripped. + */ +export function isUserFileDisplayMetadata(value: unknown): value is Record { + if (!value || typeof value !== 'object' || Array.isArray(value)) { + return false + } + + const candidate = value as Record + const url = typeof candidate.url === 'string' ? candidate.url : '' + + return ( + typeof candidate.id === 'string' && + typeof candidate.name === 'string' && + url.length > 0 && + typeof candidate.size === 'number' && + typeof candidate.type === 'string' && + (candidate.id.startsWith('file_') || url.includes('/api/files/serve/')) + ) +} + /** * Filters a UserFile object to only include display fields. * Used for both UI display and log sanitization. diff --git a/apps/sim/lib/execution/payloads/cache.ts b/apps/sim/lib/execution/payloads/cache.ts index c1b6ec216ee..507a8dd4ccc 100644 --- a/apps/sim/lib/execution/payloads/cache.ts +++ b/apps/sim/lib/execution/payloads/cache.ts @@ -17,10 +17,21 @@ interface LargeValueCacheScope { const inMemoryValues = new Map< string, - { value: unknown; size: number; expiresAt: number; scope?: LargeValueCacheScope } + { + value: unknown + size: number + expiresAt: number + scope?: LargeValueCacheScope + recoverable: boolean + } >() let inMemoryBytes = 0 +export function clearLargeValueCacheForTests(): void { + inMemoryValues.clear() + inMemoryBytes = 0 +} + function cleanupExpiredValues(now = Date.now()): void { for (const [id, entry] of inMemoryValues.entries()) { if (entry.expiresAt <= now) { @@ -34,29 +45,48 @@ export function cacheLargeValue( id: string, value: unknown, size: number, - scope?: LargeValueCacheScope -): void { + scope?: LargeValueCacheScope, + options: { recoverable?: boolean } = {} +): boolean { if (size > MAX_IN_MEMORY_BYTES) { - return + return false } cleanupExpiredValues() + const existing = inMemoryValues.get(id) + if (existing) { + inMemoryValues.delete(id) + inMemoryBytes -= existing.size + } + while (inMemoryBytes + size > MAX_IN_MEMORY_BYTES && inMemoryValues.size > 0) { - const oldestId = inMemoryValues.keys().next().value - if (!oldestId) break - const oldest = inMemoryValues.get(oldestId) - inMemoryValues.delete(oldestId) + const oldestRecoverableId = Array.from(inMemoryValues.entries()).find( + ([, entry]) => entry.recoverable + )?.[0] + if (!oldestRecoverableId) break + const oldest = inMemoryValues.get(oldestRecoverableId) + inMemoryValues.delete(oldestRecoverableId) inMemoryBytes -= oldest?.size ?? 0 } + if (inMemoryBytes + size > MAX_IN_MEMORY_BYTES) { + if (existing) { + inMemoryValues.set(id, existing) + inMemoryBytes += existing.size + } + return false + } + inMemoryValues.set(id, { value, size, scope, + recoverable: options.recoverable ?? false, expiresAt: Date.now() + FALLBACK_TTL_MS, }) inMemoryBytes += size + return true } function scopeMatchesRef( diff --git a/apps/sim/lib/execution/payloads/serializer.test.ts b/apps/sim/lib/execution/payloads/serializer.test.ts index 0119201ecb8..453c1637ece 100644 --- a/apps/sim/lib/execution/payloads/serializer.test.ts +++ b/apps/sim/lib/execution/payloads/serializer.test.ts @@ -10,6 +10,12 @@ import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import type { UserFile } from '@/executor/types' import { navigatePath } from '@/executor/variables/resolvers/reference' +const TEST_EXECUTION_CONTEXT = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', +} + describe('compactExecutionPayload', () => { it('keeps small JSON payloads inline', async () => { const value = { result: { id: 'event-1', text: 'hello' } } @@ -53,10 +59,17 @@ describe('compactExecutionPayload', () => { it('stores oversized arrays as refs and allows nested path navigation in-process', async () => { const results = Array.from({ length: 100 }, (_, index) => [{ event: { id: `event-${index}` } }]) - const compacted = await compactExecutionPayload({ results }, { thresholdBytes: 256 }) + const compacted = await compactExecutionPayload( + { results }, + { thresholdBytes: 256, ...TEST_EXECUTION_CONTEXT } + ) expect(isLargeValueRef(compacted.results)).toBe(true) - expect(navigatePath(compacted, ['results', '1', '0', 'event', 'id'])).toBe('event-1') + expect( + navigatePath(compacted, ['results', '1', '0', 'event', 'id'], { + executionContext: TEST_EXECUTION_CONTEXT, + }) + ).toBe('event-1') }) it('does not double-spill existing refs', async () => { diff --git a/apps/sim/lib/execution/payloads/store.test.ts b/apps/sim/lib/execution/payloads/store.test.ts index 13bb05cdb9a..d9c052654ef 100644 --- a/apps/sim/lib/execution/payloads/store.test.ts +++ b/apps/sim/lib/execution/payloads/store.test.ts @@ -2,7 +2,11 @@ * @vitest-environment node */ import { beforeEach, describe, expect, it, vi } from 'vitest' -import { cacheLargeValue, materializeLargeValueRefSync } from '@/lib/execution/payloads/cache' +import { + cacheLargeValue, + clearLargeValueCacheForTests, + materializeLargeValueRefSync, +} from '@/lib/execution/payloads/cache' import { MAX_DURABLE_LARGE_VALUE_BYTES, readLargeValueRefFromStorage, @@ -31,6 +35,7 @@ vi.mock('@/app/api/files/authorization', () => ({ describe('large execution payload store', () => { beforeEach(() => { vi.clearAllMocks() + clearLargeValueCacheForTests() mockUploadFile.mockImplementation(async ({ customKey }) => ({ key: customKey })) mockVerifyFileAccess.mockResolvedValue(true) }) @@ -254,6 +259,33 @@ describe('large execution payload store', () => { ).toBeUndefined() }) + it('does not evict unrecoverable in-memory refs for recoverable cache entries', () => { + const scope = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + const unrecoverableId = 'lv_UNRECOVER001' + const unrecoverableRef = { + __simLargeValueRef: true, + version: 1, + id: unrecoverableId, + kind: 'object', + size: 200 * 1024 * 1024, + executionId: scope.executionId, + } as const + + expect(cacheLargeValue(unrecoverableId, { retained: true }, unrecoverableRef.size, scope)).toBe( + true + ) + expect( + cacheLargeValue('lv_RECOVER00001', { recoverable: true }, 70 * 1024 * 1024, scope, { + recoverable: true, + }) + ).toBe(false) + expect(materializeLargeValueRefSync(unrecoverableRef, scope)).toEqual({ retained: true }) + }) + it('rejects durable refs when caller omits workspace and workflow context', async () => { await expect( readLargeValueRefFromStorage( diff --git a/apps/sim/lib/execution/payloads/store.ts b/apps/sim/lib/execution/payloads/store.ts index 222b743e498..2256813b941 100644 --- a/apps/sim/lib/execution/payloads/store.ts +++ b/apps/sim/lib/execution/payloads/store.ts @@ -106,7 +106,10 @@ export async function storeLargeValue( assertDurableLargeValueSize(size) const id = `lv_${generateShortId(12)}` const key = await persistValue(id, json, context) - cacheLargeValue(id, value, size, context) + const cached = cacheLargeValue(id, value, size, context, { recoverable: Boolean(key) }) + if (!key && !cached) { + throw new Error('Cannot retain large execution value without durable storage') + } return { __simLargeValueRef: true, @@ -152,10 +155,16 @@ export async function materializeLargeValueRef( if (value === undefined) { return undefined } - cacheLargeValue(ref.id, value, ref.size, { - ...context, - executionId: ref.executionId ?? context.executionId, - }) + cacheLargeValue( + ref.id, + value, + ref.size, + { + ...context, + executionId: ref.executionId ?? context.executionId, + }, + { recoverable: true } + ) return value } catch (error) { logger.warn('Failed to materialize persisted large execution value', { From f9d4725dec3f2ffda5223085bd61fc093bcbdc5a Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 18:44:42 -0700 Subject: [PATCH 06/11] ui inconsistencies --- apps/realtime/src/database/operations.ts | 71 +++++++----- .../subflow-editor/subflow-editor.tsx | 10 +- .../editor/hooks/use-subflow-editor.ts | 103 +++++++++--------- apps/sim/executor/utils/parallel-expansion.ts | 6 +- apps/sim/hooks/use-collaborative-workflow.ts | 21 +++- apps/sim/lib/api/contracts/workflows.ts | 2 + .../lib/workflows/persistence/utils.test.ts | 37 ++++++- packages/workflow-persistence/src/load.ts | 16 +++ .../src/subflow-helpers.ts | 12 ++ 9 files changed, 182 insertions(+), 96 deletions(-) diff --git a/apps/realtime/src/database/operations.ts b/apps/realtime/src/database/operations.ts index 3cb001d2340..38a98b14bb3 100644 --- a/apps/realtime/src/database/operations.ts +++ b/apps/realtime/src/database/operations.ts @@ -40,6 +40,7 @@ const db = socketDb const DEFAULT_LOOP_ITERATIONS = 5 const DEFAULT_PARALLEL_COUNT = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 /** Minimal block shape needed for protection and descendant checks */ interface DbBlockRef { @@ -740,9 +741,9 @@ async function handleBlocksOperationTx( workflowId, type: 'parallel', config: { - parallelType: 'fixed', + parallelType: 'count', count: DEFAULT_PARALLEL_COUNT, - batchSize: 20, + batchSize: DEFAULT_PARALLEL_BATCH_SIZE, nodes: [], }, }) @@ -1621,11 +1622,23 @@ async function handleSubflowOperationTx( logger.debug(`Updating subflow ${payload.id} with config:`, payload.config) - // Update the subflow configuration + // Read-modify-write merge so partial config payloads never wipe other fields + // (e.g. an iteration-only update from one client should not drop batchSize set by another) + const existingSubflow = await tx + .select({ config: workflowSubflows.config }) + .from(workflowSubflows) + .where( + and(eq(workflowSubflows.id, payload.id), eq(workflowSubflows.workflowId, workflowId)) + ) + .limit(1) + + const existingConfig = (existingSubflow[0]?.config as Record) || {} + const mergedConfig = { ...existingConfig, ...payload.config } + const updateResult = await tx .update(workflowSubflows) .set({ - config: payload.config, + config: mergedConfig, updatedAt: new Date(), }) .where( @@ -1678,31 +1691,35 @@ async function handleSubflowOperationTx( }) .where(and(eq(workflowBlocks.id, payload.id), eq(workflowBlocks.workflowId, workflowId))) } else if (payload.type === 'parallel') { - // Update the parallel block's data properties - const blockData = { - ...payload.config, - width: 500, - height: 300, - type: 'subflowNode', - } - - // Include count if provided - if (payload.config.count !== undefined) { - blockData.count = payload.config.count - } - - // Include collection if provided - if (payload.config.distribution !== undefined) { - blockData.collection = payload.config.distribution - } + const existingBlock = await tx + .select({ data: workflowBlocks.data }) + .from(workflowBlocks) + .where(and(eq(workflowBlocks.id, payload.id), eq(workflowBlocks.workflowId, workflowId))) + .limit(1) - // Include parallelType if provided - if (payload.config.parallelType !== undefined) { - blockData.parallelType = payload.config.parallelType - } + const existingData = (existingBlock[0]?.data as any) || {} - if (payload.config.batchSize !== undefined) { - blockData.batchSize = payload.config.batchSize + const blockData: any = { + ...existingData, + type: 'subflowNode', + width: existingData.width ?? 500, + height: existingData.height ?? 300, + count: + payload.config.count !== undefined + ? payload.config.count + : (existingData.count ?? DEFAULT_PARALLEL_COUNT), + parallelType: + payload.config.parallelType !== undefined + ? payload.config.parallelType + : (existingData.parallelType ?? 'count'), + collection: + payload.config.distribution !== undefined + ? payload.config.distribution + : (existingData.collection ?? ''), + batchSize: + payload.config.batchSize !== undefined + ? payload.config.batchSize + : (existingData.batchSize ?? DEFAULT_PARALLEL_BATCH_SIZE), } await tx diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx index af6bbc2d922..4805266b950 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/subflow-editor/subflow-editor.tsx @@ -61,9 +61,9 @@ export function SubflowEditor({ editorContainerRef, handleSubflowTypeChange, handleSubflowIterationsChange, - handleSubflowIterationsSave, + handleSubflowIterationsBlur, handleParallelBatchSizeChange, - handleParallelBatchSizeSave, + handleParallelBatchSizeBlur, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, @@ -153,8 +153,7 @@ export function SubflowEditor({ type='text' value={inputValue} onChange={handleSubflowIterationsChange} - onBlur={handleSubflowIterationsSave} - onKeyDown={(e) => e.key === 'Enter' && handleSubflowIterationsSave()} + onBlur={handleSubflowIterationsBlur} disabled={!userCanEdit} className='mb-1' /> @@ -219,8 +218,7 @@ export function SubflowEditor({ type='text' value={batchSizeValue} onChange={handleParallelBatchSizeChange} - onBlur={handleParallelBatchSizeSave} - onKeyDown={(e) => e.key === 'Enter' && handleParallelBatchSizeSave()} + onBlur={handleParallelBatchSizeBlur} disabled={!userCanEdit} className='mb-1' /> diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts index 90eefe3d8b2..915e7fb77dd 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/hooks/use-subflow-editor.ts @@ -59,10 +59,17 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId const textareaRef = useRef(null) const editorContainerRef = useRef(null) - const [tempInputValue, setTempInputValue] = useState(null) - const [tempBatchSizeValue, setTempBatchSizeValue] = useState(null) const [showTagDropdown, setShowTagDropdown] = useState(false) const [cursorPosition, setCursorPosition] = useState(0) + /** + * In-flight string buffers for the numeric inputs. These let the user + * temporarily clear or mid-type the field (e.g. backspace to empty before + * typing a new value) without React snapping the value back from the store. + * Persistence still happens on every keystroke that parses to a number; + * the buffer is cleared on blur so the input rebinds to the store value. + */ + const [iterationsBuffer, setIterationsBuffer] = useState(null) + const [batchSizeBuffer, setBatchSizeBuffer] = useState(null) const isSubflow = currentBlock && (currentBlock.type === 'loop' || currentBlock.type === 'parallel') @@ -218,66 +225,54 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId ) /** - * Handle iterations input change + * Persist iterations on every keystroke that parses to a number. The + * visible string is buffered so transient states (empty, "0", partial typing) + * render correctly without snapping back to the persisted value. */ const handleSubflowIterationsChange = useCallback( (e: React.ChangeEvent) => { - if (!subflowConfig) return + if (!currentBlockId || !isSubflow || !subflowConfig || !currentBlock) return const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') + setIterationsBuffer(sanitizedValue) const numValue = Number.parseInt(sanitizedValue) - - if (!Number.isNaN(numValue)) { - setTempInputValue(numValue.toString()) - } else { - setTempInputValue(sanitizedValue) - } + if (Number.isNaN(numValue)) return + collaborativeUpdateIterationCount( + currentBlockId, + currentBlock.type as 'loop' | 'parallel', + Math.max(1, numValue) + ) }, - [subflowConfig] + [currentBlockId, isSubflow, subflowConfig, currentBlock, collaborativeUpdateIterationCount] ) /** - * Save iterations value + * Clears the iterations buffer on blur so the field re-binds to the + * canonical store value (e.g. if the user left it empty, it snaps back + * to the last persisted count). */ - const handleSubflowIterationsSave = useCallback(() => { - if (!currentBlockId || !isSubflow || !subflowConfig || !currentBlock) return - const value = Number.parseInt(tempInputValue ?? '5') - - if (!Number.isNaN(value)) { - const newValue = Math.max(1, value) - collaborativeUpdateIterationCount( - currentBlockId, - currentBlock.type as 'loop' | 'parallel', - newValue - ) - } - setTempInputValue(null) - }, [ - tempInputValue, - currentBlockId, - isSubflow, - subflowConfig, - currentBlock, - collaborativeUpdateIterationCount, - ]) - - const handleParallelBatchSizeChange = useCallback((e: React.ChangeEvent) => { - const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') - const numValue = Number.parseInt(sanitizedValue) - if (!Number.isNaN(numValue)) { - setTempBatchSizeValue(Math.min(20, numValue).toString()) - } else { - setTempBatchSizeValue(sanitizedValue) - } + const handleSubflowIterationsBlur = useCallback(() => { + setIterationsBuffer(null) }, []) - const handleParallelBatchSizeSave = useCallback(() => { - if (!currentBlockId || currentBlock?.type !== 'parallel') return - const value = Number.parseInt(tempBatchSizeValue ?? '20') - if (!Number.isNaN(value)) { - collaborativeUpdateParallelBatchSize(currentBlockId, Math.min(20, Math.max(1, value))) - } - setTempBatchSizeValue(null) - }, [tempBatchSizeValue, currentBlockId, currentBlock, collaborativeUpdateParallelBatchSize]) + /** + * Persist parallel batch size on every keystroke that parses to a number, + * clamped to 1..20. Buffered the same way as iterations. + */ + const handleParallelBatchSizeChange = useCallback( + (e: React.ChangeEvent) => { + if (!currentBlockId || currentBlock?.type !== 'parallel') return + const sanitizedValue = e.target.value.replace(/[^0-9]/g, '') + setBatchSizeBuffer(sanitizedValue) + const numValue = Number.parseInt(sanitizedValue) + if (Number.isNaN(numValue)) return + collaborativeUpdateParallelBatchSize(currentBlockId, Math.min(20, Math.max(1, numValue))) + }, + [currentBlockId, currentBlock, collaborativeUpdateParallelBatchSize] + ) + + const handleParallelBatchSizeBlur = useCallback(() => { + setBatchSizeBuffer(null) + }, []) /** * Handle editor value change (collection/condition) @@ -369,8 +364,8 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId typeof configCollection === 'string' ? configCollection : JSON.stringify(configCollection) || '' const conditionString = typeof configCondition === 'string' ? configCondition : '' - const inputValue = tempInputValue ?? iterations.toString() - const batchSizeValue = tempBatchSizeValue ?? parallelBatchSize.toString() + const inputValue = iterationsBuffer ?? iterations.toString() + const batchSizeValue = batchSizeBuffer ?? parallelBatchSize.toString() const editorValue = isConditionMode ? conditionString : collectionString // Type options for combobox @@ -401,9 +396,9 @@ export function useSubflowEditor(currentBlock: BlockState | null, currentBlockId // Handlers handleSubflowTypeChange, handleSubflowIterationsChange, - handleSubflowIterationsSave, + handleSubflowIterationsBlur, handleParallelBatchSizeChange, - handleParallelBatchSizeSave, + handleParallelBatchSizeBlur, handleSubflowEditorChange, handleSubflowTagSelect, highlightWithReferences, diff --git a/apps/sim/executor/utils/parallel-expansion.ts b/apps/sim/executor/utils/parallel-expansion.ts index 64ddcac73fe..f98c9f49e57 100644 --- a/apps/sim/executor/utils/parallel-expansion.ts +++ b/apps/sim/executor/utils/parallel-expansion.ts @@ -212,7 +212,11 @@ export class ParallelExpander { const baseTargetId = extractBaseBlockId(edge.target) if (!blocksSet.has(baseTargetId)) continue - for (let i = 1; i < branchCount; i++) { + // Include branch 0 so per-batch re-expansion restores the template's + // incoming-edge bookkeeping that earlier batches consumed during + // edge processing. Without this, identifyBoundaryNodes mis-classifies + // chained children as entry nodes after the first batch. + for (let i = 0; i < branchCount; i++) { const sourceNodeId = buildBranchNodeId(blockId, i) const targetNodeId = buildBranchNodeId(baseTargetId, i) const sourceNode = dag.nodes.get(sourceNodeId) diff --git a/apps/sim/hooks/use-collaborative-workflow.ts b/apps/sim/hooks/use-collaborative-workflow.ts index f4442fb617b..a4b58338886 100644 --- a/apps/sim/hooks/use-collaborative-workflow.ts +++ b/apps/sim/hooks/use-collaborative-workflow.ts @@ -1774,23 +1774,32 @@ export function useCollaborativeWorkflow() { .filter((b) => b.data?.parentId === nodeId) .map((b) => b.id) + const clampedCount = Math.max(1, count) + if (iterationType === 'loop') { const currentLoopType = currentBlock.data?.loopType || 'for' - const currentCollection = currentBlock.data?.collection || '' + const existingLoop = useWorkflowStore.getState().loops[nodeId] + const nextForEachItems = existingLoop?.forEachItems ?? currentBlock.data?.collection ?? '' + const nextWhileCondition = + existingLoop?.whileCondition ?? currentBlock.data?.whileCondition ?? '' + const nextDoWhileCondition = + existingLoop?.doWhileCondition ?? currentBlock.data?.doWhileCondition ?? '' const config = { id: nodeId, nodes: childNodes, - iterations: Math.max(1, count), + iterations: clampedCount, loopType: currentLoopType, - forEachItems: currentCollection, + forEachItems: nextForEachItems, + whileCondition: nextWhileCondition, + doWhileCondition: nextDoWhileCondition, } executeQueuedOperation( SUBFLOW_OPERATIONS.UPDATE, OPERATION_TARGETS.SUBFLOW, { id: nodeId, type: 'loop', config }, - () => useWorkflowStore.getState().updateLoopCount(nodeId, count) + () => useWorkflowStore.getState().updateLoopCount(nodeId, clampedCount) ) } else { const currentDistribution = currentBlock.data?.collection || '' @@ -1800,7 +1809,7 @@ export function useCollaborativeWorkflow() { const config = { id: nodeId, nodes: childNodes, - count: Math.max(1, count), + count: clampedCount, distribution: currentDistribution, parallelType: currentParallelType, batchSize, @@ -1810,7 +1819,7 @@ export function useCollaborativeWorkflow() { SUBFLOW_OPERATIONS.UPDATE, OPERATION_TARGETS.SUBFLOW, { id: nodeId, type: 'parallel', config }, - () => useWorkflowStore.getState().updateParallelCount(nodeId, count) + () => useWorkflowStore.getState().updateParallelCount(nodeId, clampedCount) ) } }, diff --git a/apps/sim/lib/api/contracts/workflows.ts b/apps/sim/lib/api/contracts/workflows.ts index af55e5ef708..46e5095c933 100644 --- a/apps/sim/lib/api/contracts/workflows.ts +++ b/apps/sim/lib/api/contracts/workflows.ts @@ -20,6 +20,7 @@ const workflowBlockDataSchema = z.object({ whileCondition: z.string().optional(), doWhileCondition: z.string().optional(), parallelType: z.enum(['collection', 'count']).optional(), + batchSize: z.number().optional(), type: z.string().optional(), canonicalModes: z.record(z.string(), z.enum(['basic', 'advanced'])).optional(), }) @@ -90,6 +91,7 @@ const workflowParallelSchema = z.object({ .optional(), count: z.number().optional(), parallelType: z.enum(['count', 'collection']).optional(), + batchSize: z.number().optional(), enabled: z.boolean().optional(), locked: z.boolean().optional(), }) diff --git a/apps/sim/lib/workflows/persistence/utils.test.ts b/apps/sim/lib/workflows/persistence/utils.test.ts index 82997c4f518..e6b9dbb086a 100644 --- a/apps/sim/lib/workflows/persistence/utils.test.ts +++ b/apps/sim/lib/workflows/persistence/utils.test.ts @@ -179,6 +179,7 @@ const mockBlocksFromDb = [ name: 'Parallel Container', position: { x: 600, y: 50 }, height: 250, + count: 3, data: { width: 500, height: 300, parallelType: 'count', count: 3 }, }), mockWorkflowId @@ -225,7 +226,10 @@ const mockSubflowsFromDb = [ config: { id: 'parallel-1', nodes: ['block-3'], + count: 5, distribution: ['item1', 'item2'], + parallelType: 'count', + batchSize: 1, }, }, ] @@ -260,7 +264,8 @@ const mockWorkflowState = createWorkflowState({ name: 'Parallel Container', position: { x: 600, y: 50 }, height: 250, - data: { width: 500, height: 300, parallelType: 'count', count: 3 }, + count: 3, + data: { width: 500, height: 300, parallelType: 'count', count: 3, batchSize: 1 }, }), 'block-3': createApiBlock({ id: 'block-3', @@ -292,6 +297,8 @@ const mockWorkflowState = createWorkflowState({ id: 'parallel-1', nodes: ['block-3'], distribution: ['item1', 'item2'], + parallelType: 'count', + batchSize: 1, }, }, }) @@ -418,8 +425,16 @@ describe('Database Helpers', () => { count: 5, distribution: ['item1', 'item2'], parallelType: 'count', + batchSize: 1, enabled: true, }) + expect(result?.blocks['parallel-1'].data).toEqual( + expect.objectContaining({ + count: 5, + parallelType: 'count', + batchSize: 1, + }) + ) }) it('should return null when no blocks are found', async () => { @@ -709,6 +724,20 @@ describe('Database Helpers', () => { workflowId: mockWorkflowId, type: 'loop', }) + expect(capturedSubflowInserts).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: 'parallel-1', + workflowId: mockWorkflowId, + type: 'parallel', + config: expect.objectContaining({ + count: 3, + parallelType: 'count', + batchSize: 1, + }), + }), + ]) + ) }) it('should regenerate missing loop and parallel definitions from block data', async () => { @@ -748,7 +777,11 @@ describe('Database Helpers', () => { expect(capturedSubflowInserts).toEqual( expect.arrayContaining([ expect.objectContaining({ id: 'loop-1', type: 'loop' }), - expect.objectContaining({ id: 'parallel-1', type: 'parallel' }), + expect.objectContaining({ + id: 'parallel-1', + type: 'parallel', + config: expect.objectContaining({ batchSize: 1 }), + }), ]) ) }) diff --git a/packages/workflow-persistence/src/load.ts b/packages/workflow-persistence/src/load.ts index 3f6f8d2de39..288e9217e8a 100644 --- a/packages/workflow-persistence/src/load.ts +++ b/packages/workflow-persistence/src/load.ts @@ -4,6 +4,7 @@ import type { BlockState, Loop, Parallel } from '@sim/workflow-types/workflow' import { SUBFLOW_TYPES } from '@sim/workflow-types/workflow' import { and, eq, isNull } from 'drizzle-orm' import type { Edge } from 'reactflow' +import { clampParallelBatchSize } from './subflow-helpers' import type { DbOrTx, NormalizedWorkflowData } from './types' const logger = createLogger('WorkflowPersistenceLoad') @@ -141,9 +142,24 @@ export async function loadWorkflowFromNormalizedTablesRaw( (config as Parallel).parallelType === 'collection' ? (config as Parallel).parallelType : 'count', + batchSize: clampParallelBatchSize((config as Parallel).batchSize), enabled: blocksMap[subflow.id]?.enabled ?? true, } parallels[subflow.id] = parallel + + if (blocksMap[subflow.id]) { + const block = blocksMap[subflow.id] + blocksMap[subflow.id] = { + ...block, + data: { + ...block.data, + count: parallel.count, + collection: parallel.distribution ?? block.data?.collection ?? '', + parallelType: parallel.parallelType, + batchSize: parallel.batchSize, + }, + } + } } else { logger.warn(`Unknown subflow type: ${subflow.type} for subflow ${subflow.id}`) } diff --git a/packages/workflow-persistence/src/subflow-helpers.ts b/packages/workflow-persistence/src/subflow-helpers.ts index b0f552f1977..cf0c92b370b 100644 --- a/packages/workflow-persistence/src/subflow-helpers.ts +++ b/packages/workflow-persistence/src/subflow-helpers.ts @@ -1,6 +1,16 @@ import type { BlockState, Loop, Parallel } from '@sim/workflow-types/workflow' const DEFAULT_LOOP_ITERATIONS = 5 +const DEFAULT_PARALLEL_BATCH_SIZE = 20 +const MAX_PARALLEL_BATCH_SIZE = 20 + +export function clampParallelBatchSize(batchSize: unknown): number { + const parsed = typeof batchSize === 'number' ? batchSize : Number.parseInt(String(batchSize), 10) + if (Number.isNaN(parsed)) { + return DEFAULT_PARALLEL_BATCH_SIZE + } + return Math.max(1, Math.min(MAX_PARALLEL_BATCH_SIZE, parsed)) +} export function findChildNodes(containerId: string, blocks: Record): string[] { return Object.values(blocks) @@ -50,6 +60,7 @@ export function convertParallelBlockToParallel( validatedParallelType === 'collection' ? parallelBlock.data?.collection || '' : undefined const count = parallelBlock.data?.count || 5 + const batchSize = clampParallelBatchSize(parallelBlock.data?.batchSize) return { id: parallelBlockId, @@ -57,6 +68,7 @@ export function convertParallelBlockToParallel( distribution, count, parallelType: validatedParallelType, + batchSize, enabled: parallelBlock.enabled, } } From 27f6a68ce5f72eb8d3e0922bc2ea53c1a7ddac27 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 18:54:37 -0700 Subject: [PATCH 07/11] cleanup redundant code --- .../app/api/workflows/[id]/execute/route.ts | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index 6538e7eb397..9d042cea756 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -1247,7 +1247,14 @@ async function handleExecutePost( }) await handlePostExecutionPauseState({ result, workflowId, executionId, loggingSession }) - const compactTerminalLogs = await compactBlockLogs(result.logs, { + + /** + * Compact block logs once and reuse across cancelled/timeout/paused/complete + * SSE events. Walks all block logs and durably serializes large values to + * object storage, so doing it twice would double the latency and storage + * load on the happy path. + */ + const compactedBlockLogs = await compactBlockLogs(result.logs, { workspaceId, workflowId, executionId, @@ -1274,7 +1281,7 @@ async function handleExecutePost( data: { error: timeoutErrorMessage, duration: result.metadata?.duration || 0, - finalBlockLogs: compactTerminalLogs, + finalBlockLogs: compactedBlockLogs, }, }, 'error' @@ -1291,7 +1298,7 @@ async function handleExecutePost( workflowId, data: { duration: result.metadata?.duration || 0, - finalBlockLogs: compactTerminalLogs, + finalBlockLogs: compactedBlockLogs, }, }, 'cancelled' @@ -1319,13 +1326,6 @@ async function handleExecutePost( preserveUserFileBase64: true, preserveRoot: true, }) - const compactFinalBlockLogs = await compactBlockLogs(result.logs, { - workspaceId, - workflowId, - executionId, - userId: actorUserId, - requireDurable: true, - }) if (result.status === 'paused') { finalMetaStatus = 'complete' @@ -1340,7 +1340,7 @@ async function handleExecutePost( duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: compactFinalBlockLogs, + finalBlockLogs: compactedBlockLogs, }, }, 'complete' @@ -1359,7 +1359,7 @@ async function handleExecutePost( duration: result.metadata?.duration || 0, startTime: result.metadata?.startTime || startTime.toISOString(), endTime: result.metadata?.endTime || new Date().toISOString(), - finalBlockLogs: compactFinalBlockLogs, + finalBlockLogs: compactedBlockLogs, }, }, 'complete' From 2de66c56dd91965e65f5d1459862038f4fb88a4d Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Mon, 11 May 2026 19:06:11 -0700 Subject: [PATCH 08/11] address more comments --- .../executor/orchestrators/parallel.test.ts | 62 +++++++++++++++++++ apps/sim/executor/orchestrators/parallel.ts | 24 +++++++ apps/sim/lib/execution/isolated-vm-worker.cjs | 51 ++++++++++++--- 3 files changed, 128 insertions(+), 9 deletions(-) diff --git a/apps/sim/executor/orchestrators/parallel.test.ts b/apps/sim/executor/orchestrators/parallel.test.ts index 85de7846fb8..96aa1ae684d 100644 --- a/apps/sim/executor/orchestrators/parallel.test.ts +++ b/apps/sim/executor/orchestrators/parallel.test.ts @@ -8,6 +8,14 @@ import { ParallelOrchestrator } from '@/executor/orchestrators/parallel' import type { ExecutionContext } from '@/executor/types' import { buildBranchNodeId } from '@/executor/utils/subflow-utils' +const { mockCompactSubflowResults } = vi.hoisted(() => ({ + mockCompactSubflowResults: vi.fn(async (results: unknown) => results), +})) + +vi.mock('@/lib/execution/payloads/serializer', () => ({ + compactSubflowResults: mockCompactSubflowResults, +})) + function createDag(): DAG { return { nodes: new Map(), @@ -76,6 +84,7 @@ function createContext(overrides: Partial = {}): ExecutionCont describe('ParallelOrchestrator', () => { beforeEach(() => { vi.clearAllMocks() + mockCompactSubflowResults.mockImplementation(async (results: unknown) => results) }) it('awaits empty-subflow lifecycle callbacks before returning the empty scope', async () => { @@ -244,4 +253,57 @@ describe('ParallelOrchestrator', () => { expect(state.unmarkExecuted).toHaveBeenCalledWith(incomingBranchId) expect(state.unmarkExecuted).not.toHaveBeenCalledWith(previousBranchId) }) + + it('compacts accumulated outputs before scheduling later batches', async () => { + const dag = createDag() + const templateBranchId = buildBranchNodeId('task-1', 0) + dag.nodes.set(templateBranchId, { + id: templateBranchId, + block: { + id: 'task-1', + position: { x: 0, y: 0 }, + config: { tool: '', params: {} }, + inputs: {}, + outputs: {}, + metadata: { id: 'function', name: 'Task 1' }, + enabled: true, + }, + incomingEdges: new Set(), + outgoingEdges: new Set(), + metadata: { parallelId: 'parallel-1', isParallelBranch: true, branchIndex: 0 }, + }) + const orchestrator = new ParallelOrchestrator(dag, createState(), null, {}) + const previousOutputs = [{ output: 'previous' }] + const incomingOutputs = [{ output: 'incoming' }] + const compactedPrevious = [{ output: 'compacted-previous' }] + const compactedIncoming = [{ output: 'compacted-incoming' }] + mockCompactSubflowResults.mockResolvedValueOnce([compactedPrevious, compactedIncoming]) + const scope = { + parallelId: 'parallel-1', + totalBranches: 3, + batchSize: 1, + currentBatchStart: 0, + currentBatchSize: 2, + accumulatedOutputs: new Map([[0, previousOutputs]]), + branchOutputs: new Map([[1, incomingOutputs]]), + } + const ctx = createContext({ + parallelExecutions: new Map([['parallel-1', scope]]), + }) + + const result = await orchestrator.aggregateParallelResults(ctx, 'parallel-1') + + expect(result).toMatchObject({ allBranchesComplete: false, completedBranches: 2 }) + expect(mockCompactSubflowResults).toHaveBeenCalledWith( + [previousOutputs, incomingOutputs], + expect.objectContaining({ + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + requireDurable: true, + }) + ) + expect(scope.accumulatedOutputs.get(0)).toBe(compactedPrevious) + expect(scope.accumulatedOutputs.get(1)).toBe(compactedIncoming) + }) }) diff --git a/apps/sim/executor/orchestrators/parallel.ts b/apps/sim/executor/orchestrators/parallel.ts index e2574aa4876..aa9d0ad8c6e 100644 --- a/apps/sim/executor/orchestrators/parallel.ts +++ b/apps/sim/executor/orchestrators/parallel.ts @@ -333,6 +333,30 @@ export class ParallelOrchestrator { const nextBatchStart = (scope.currentBatchStart ?? 0) + (scope.currentBatchSize ?? scope.totalBranches) if (nextBatchStart < scope.totalBranches) { + /** + * Compact accumulated outputs before scheduling the next batch. Each + * block output is already individually compacted by `block-executor`, but + * many below-threshold branch results can still exceed the aggregate + * threshold over time. Re-running the existing subflow compactor over the + * accumulated entries forces aggregate-size spills while existing + * LargeValueRefs stay stable. + */ + if (accumulatedOutputs.size > 0) { + const accumulatedBranchIndexes = Array.from(accumulatedOutputs.keys()).sort((a, b) => a - b) + const accumulatedResults = accumulatedBranchIndexes.map( + (idx) => accumulatedOutputs.get(idx) ?? [] + ) + const compactedAccumulated = await compactSubflowResults(accumulatedResults, { + workspaceId: ctx.workspaceId, + workflowId: ctx.workflowId, + executionId: ctx.executionId, + userId: ctx.userId, + requireDurable: true, + }) + accumulatedBranchIndexes.forEach((branchIdx, position) => { + accumulatedOutputs.set(branchIdx, compactedAccumulated[position]) + }) + } await this.scheduleNextBatch(ctx, scope, nextBatchStart) return { allBranchesComplete: false, diff --git a/apps/sim/lib/execution/isolated-vm-worker.cjs b/apps/sim/lib/execution/isolated-vm-worker.cjs index 44ef92142cf..a924beb8dfe 100644 --- a/apps/sim/lib/execution/isolated-vm-worker.cjs +++ b/apps/sim/lib/execution/isolated-vm-worker.cjs @@ -27,6 +27,21 @@ const SANDBOX_BUNDLE_FILES = { const bundleSourceCache = new Map() const activeIsolates = new Map() +/** + * Sends an IPC request and reports only actual delivery failures. + * Node queues messages under backpressure, so the boolean return value is not + * a failure signal. + */ +function sendIpcRequest(message, onError) { + try { + process.send(message, (err) => { + if (err) onError(err) + }) + } catch (error) { + onError(error instanceof Error ? error : new Error(String(error))) + } +} + function getBundleSource(bundleName) { const cached = bundleSourceCache.get(bundleName) if (cached) return cached @@ -233,13 +248,19 @@ async function executeCode(request, executionId) { } }, FETCH_TIMEOUT_MS) pendingFetches.set(fetchId, { resolve, timeout }) - if (process.send && process.connected) { - process.send({ type: 'fetch', fetchId, requestId, url, optionsJson }) - } else { + if (!process.send || !process.connected) { clearTimeout(timeout) pendingFetches.delete(fetchId) resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return } + sendIpcRequest({ type: 'fetch', fetchId, requestId, url, optionsJson }, (err) => { + const pending = pendingFetches.get(fetchId) + if (!pending) return + clearTimeout(pending.timeout) + pendingFetches.delete(fetchId) + pending.resolve(JSON.stringify({ error: `Fetch IPC send failed: ${err.message}` })) + }) }) }) await jail.set('__fetchRef', fetchCallback) @@ -254,13 +275,19 @@ async function executeCode(request, executionId) { } }, BROKER_TIMEOUT_MS) pendingBrokerCalls.set(brokerId, { resolve, timeout, executionId }) - if (process.send && process.connected) { - process.send({ type: 'broker', brokerId, executionId, brokerName, argsJson }) - } else { + if (!process.send || !process.connected) { clearTimeout(timeout) pendingBrokerCalls.delete(brokerId) resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return } + sendIpcRequest({ type: 'broker', brokerId, executionId, brokerName, argsJson }, (err) => { + const pending = pendingBrokerCalls.get(brokerId) + if (!pending) return + clearTimeout(pending.timeout) + pendingBrokerCalls.delete(brokerId) + pending.resolve(JSON.stringify({ error: `Broker IPC send failed: ${err.message}` })) + }) }) }) await jail.set('__brokerRef', brokerCallback) @@ -732,13 +759,19 @@ async function executeTask(request, executionId) { } }, BROKER_TIMEOUT_MS) pendingBrokerCalls.set(brokerId, { resolve, timeout, executionId }) - if (process.send && process.connected) { - process.send({ type: 'broker', brokerId, executionId, brokerName, argsJson }) - } else { + if (!process.send || !process.connected) { clearTimeout(timeout) pendingBrokerCalls.delete(brokerId) resolve(JSON.stringify({ error: 'Parent process disconnected' })) + return } + sendIpcRequest({ type: 'broker', brokerId, executionId, brokerName, argsJson }, (err) => { + const pending = pendingBrokerCalls.get(brokerId) + if (!pending) return + clearTimeout(pending.timeout) + pendingBrokerCalls.delete(brokerId) + pending.resolve(JSON.stringify({ error: `Broker IPC send failed: ${err.message}` })) + }) }) }) releaseables.push(brokerRef) From 8d2ce64a0a7ffe59375045e449fb24a50662a686 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Tue, 12 May 2026 11:26:36 -0700 Subject: [PATCH 09/11] address comments --- apps/sim/executor/variables/resolver.test.ts | 158 ++++++++++- apps/sim/executor/variables/resolver.ts | 49 ++-- apps/sim/lib/execution/event-buffer.test.ts | 103 ++++--- apps/sim/lib/execution/event-buffer.ts | 224 +++++++++++++-- apps/sim/lib/execution/redis-budget.server.ts | 8 +- .../utils/user-file-base64.server.test.ts | 140 ++++++++- .../uploads/utils/user-file-base64.server.ts | 266 +++++++++++++++--- 7 files changed, 816 insertions(+), 132 deletions(-) diff --git a/apps/sim/executor/variables/resolver.test.ts b/apps/sim/executor/variables/resolver.test.ts index e92eac9439e..6058a2053e8 100644 --- a/apps/sim/executor/variables/resolver.test.ts +++ b/apps/sim/executor/variables/resolver.test.ts @@ -211,6 +211,123 @@ describe('VariableResolver function block inputs', () => { expect(result.contextVariables.__blockRef_0).toBe('aGVsbG8=') }) + it('rewrites loop current item base64 references to lazy runtime reads', async () => { + const functionBlock = createBlock('function', 'Function', BlockType.FUNCTION, { + language: 'javascript', + }) + const loopBlock = createBlock('loop-1', 'Loop 1', 'loop') + const workflow: SerializedWorkflow = { + version: '1', + blocks: [loopBlock, functionBlock], + connections: [], + loops: { 'loop-1': { id: 'loop-1', nodes: ['function'], iterations: 1 } }, + parallels: {}, + } + const state = new ExecutionState() + const file = { + id: 'file-loop', + name: 'loop.png', + url: 'https://example.com/loop.png', + key: 'execution/workspace-1/workflow-1/execution-1/loop.png', + context: 'execution', + size: 12 * 1024 * 1024, + type: 'image/png', + base64: 'large-inline-base64', + } + const ctx = { + ...createResolver().ctx, + loopExecutions: new Map([ + [ + 'loop-1', + { + iteration: 0, + currentIterationOutputs: new Map(), + allIterationOutputs: [], + item: file, + items: [file], + }, + ], + ]), + } as ExecutionContext + const resolver = new VariableResolver(workflow, {}, state) + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return .length' }, + functionBlock + ) + + expect(result.resolvedInputs.code).toBe( + 'return (await sim.files.readBase64(globalThis["__blockRef_0"])).length' + ) + expect(result.contextVariables.__blockRef_0).toMatchObject({ id: 'file-loop' }) + expect(result.contextVariables.__blockRef_0).not.toHaveProperty('base64') + }) + + it('rewrites parallel current item base64 references to lazy runtime reads', async () => { + const functionBlock = createBlock('function', 'Function', BlockType.FUNCTION, { + language: 'javascript', + }) + const parallelBlock = createBlock('parallel-1', 'Parallel 1', 'parallel') + const workflow: SerializedWorkflow = { + version: '1', + blocks: [parallelBlock, functionBlock], + connections: [], + loops: {}, + parallels: { + 'parallel-1': { + id: 'parallel-1', + nodes: ['function'], + parallelType: 'collection', + distribution: [], + }, + }, + } + const state = new ExecutionState() + const file = { + id: 'file-parallel', + name: 'parallel.png', + url: 'https://example.com/parallel.png', + key: 'execution/workspace-1/workflow-1/execution-1/parallel.png', + context: 'execution', + size: 12 * 1024 * 1024, + type: 'image/png', + base64: 'large-inline-base64', + } + const ctx = { + ...createResolver().ctx, + parallelExecutions: new Map([ + [ + 'parallel-1', + { + parallelId: 'parallel-1', + totalBranches: 1, + branchOutputs: new Map(), + items: [{ file }], + }, + ], + ]), + parallelBlockMapping: new Map([ + ['function', { originalBlockId: 'function', parallelId: 'parallel-1', iterationIndex: 0 }], + ]), + } as ExecutionContext + const resolver = new VariableResolver(workflow, {}, state) + + const result = await resolver.resolveInputsForFunctionBlock( + ctx, + 'function', + { code: 'return .length' }, + functionBlock + ) + + expect(result.resolvedInputs.code).toBe( + 'return (await sim.files.readBase64(globalThis["__blockRef_0"])).length' + ) + expect(result.contextVariables.__blockRef_0).toMatchObject({ id: 'file-parallel' }) + expect(result.contextVariables.__blockRef_0).not.toHaveProperty('base64') + }) + it('rewrites JavaScript large value refs to lazy runtime reads', async () => { const { block, ctx, resolver } = createResolver('javascript') const state = new ExecutionState() @@ -444,7 +561,46 @@ describe('VariableResolver function block inputs', () => { { code: 'return ' }, block ) - ).rejects.toThrow('This execution value is too large to inline') + ).rejects.toThrow('This execution value contains nested large values') + }) + + it('fails nested large value refs for JavaScript instead of leaking ref markers', async () => { + const { block, ctx } = createResolver('javascript') + const state = new ExecutionState() + state.setBlockOutput('producer', { + result: { + rows: { + __simLargeValueRef: true, + version: 1, + id: 'lv_ABCDEFGHIJKL', + kind: 'array', + size: 12 * 1024 * 1024, + key: 'execution/workspace-1/workflow-1/execution-1/large-value-lv_ABCDEFGHIJKL.json', + executionId: 'execution-1', + }, + }, + }) + const workflow: SerializedWorkflow = { + version: '1', + blocks: [createBlock('producer', 'Producer', BlockType.API), block], + connections: [], + loops: {}, + parallels: {}, + } + const largeResolver = new VariableResolver(workflow, {}, state) + const largeCtx = { + ...ctx, + blockStates: state.getBlockStates(), + } as ExecutionContext + + await expect( + largeResolver.resolveInputsForFunctionBlock( + largeCtx, + 'function', + { code: 'return .rows.length' }, + block + ) + ).rejects.toThrow('This execution value contains nested large values') }) it('resolves Python block references through globals lookup', async () => { diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index 80bb193fde4..33ac7e181b0 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -2,7 +2,6 @@ import { createLogger } from '@sim/logger' import { toError } from '@sim/utils/errors' import { isUserFileWithMetadata } from '@/lib/core/utils/user-file' import { - assertNoLargeValueRefs, containsLargeValueRef, getLargeValueMaterializationError, isLargeValueRef, @@ -32,6 +31,12 @@ export const FUNCTION_BLOCK_DISPLAY_CODE_KEY = '_runtimeDisplayCode' const logger = createLogger('VariableResolver') +function getNestedLargeValueMaterializationError(): Error { + return new Error( + 'This execution value contains nested large values. Reference the nested field directly so it can be lazy-loaded.' + ) +} + async function replaceValidReferencesAsync( template: string, replacer: (match: string, index: number, template: string) => Promise @@ -340,20 +345,20 @@ export class VariableResolver { displayCursor = index + match.length try { - if (this.blockResolver.canResolve(match)) { - const lazyBase64 = await this.resolveLazyFileBase64Reference( - match, - resolutionContext, - language, - template, - index, - contextVarAccumulator - ) - if (lazyBase64) { - displayResult += lazyBase64.display - return lazyBase64.replacement - } + const lazyBase64 = await this.resolveLazyFileBase64Reference( + match, + resolutionContext, + language, + template, + index, + contextVarAccumulator + ) + if (lazyBase64) { + displayResult += lazyBase64.display + return lazyBase64.replacement + } + if (this.blockResolver.canResolve(match)) { const resolved = await this.resolveReference(match, resolutionContext) if (resolved === undefined) { displayResult += match @@ -378,12 +383,8 @@ export class VariableResolver { throw getLargeValueMaterializationError(effectiveValue) } replacement = lazyReplacement - } else if ( - containsLargeValueRef(effectiveValue) && - !this.canUseJavaScriptRuntimeHelpers(language, template) - ) { - assertNoLargeValueRefs(effectiveValue) - throw new Error('This execution value is too large to inline.') + } else if (containsLargeValueRef(effectiveValue)) { + throw getNestedLargeValueMaterializationError() } else { replacement = this.formatContextVariableReference( varName, @@ -431,12 +432,8 @@ export class VariableResolver { throw getLargeValueMaterializationError(effectiveValue) } - if ( - containsLargeValueRef(effectiveValue) && - !this.canUseJavaScriptRuntimeHelpers(language, template) - ) { - assertNoLargeValueRefs(effectiveValue) - throw new Error('This execution value is too large to inline.') + if (containsLargeValueRef(effectiveValue)) { + throw getNestedLargeValueMaterializationError() } // Non-block reference (loop, parallel, workflow, env): embed as literal diff --git a/apps/sim/lib/execution/event-buffer.test.ts b/apps/sim/lib/execution/event-buffer.test.ts index da7de9404ca..c76365d74cf 100644 --- a/apps/sim/lib/execution/event-buffer.test.ts +++ b/apps/sim/lib/execution/event-buffer.test.ts @@ -8,6 +8,7 @@ import type { ExecutionEvent } from '@/lib/workflows/executor/execution-events' const { mockGetRedisClient, mockRedis, persistedEntries } = vi.hoisted(() => { const persistedEntries: ExecutionEventEntry[] = [] const mockRedis = { + get: vi.fn(), incrby: vi.fn(), hset: vi.fn(), expire: vi.fn(), @@ -30,6 +31,7 @@ import { flushExecutionStreamReplayBuffer, initializeExecutionStreamMeta, readExecutionEventsState, + resetExecutionStreamBuffer, } from '@/lib/execution/event-buffer' function makeEvent(blockId: string): ExecutionEvent { @@ -47,42 +49,42 @@ function makeEvent(blockId: string): ExecutionEvent { } } +function parseFlushEvalArgs(args: unknown[]): { + terminalStatus: string + zaddArgs: (string | number)[] +} { + const keyCount = Number(args[0]) + return { + terminalStatus: String(args[keyCount + 4] ?? ''), + zaddArgs: args.slice(keyCount + 9) as (string | number)[], + } +} + describe('execution event buffer', () => { beforeEach(() => { vi.clearAllMocks() persistedEntries.length = 0 mockGetRedisClient.mockReturnValue(mockRedis) + mockRedis.get.mockResolvedValue(null) mockRedis.hgetall.mockResolvedValue({}) mockRedis.zrangebyscore.mockResolvedValue([]) mockRedis.zremrangebyrank.mockResolvedValue(0) - mockRedis.eval.mockImplementation( - async ( - script: string, - _keyCount: number, - _eventsKey: string, - _seqKey: string, - _metaKey: string, - _ttl: number, - _eventLimit: number, - _updatedAt: string, - terminalStatus: string, - ...args: (string | number)[] - ) => { - if (script.includes('execution_redis_bytes')) { - return [1, 'ok', 0, 0] - } - if (script.includes('DECRBY')) { - return 1 - } - for (let i = 0; i < args.length; i += 2) { - persistedEntries.push(JSON.parse(args[i + 1] as string) as ExecutionEventEntry) + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('ZADD')) { + const { terminalStatus, zaddArgs } = parseFlushEvalArgs(args) + for (let i = 0; i < zaddArgs.length; i += 2) { + persistedEntries.push(JSON.parse(zaddArgs[i + 1] as string) as ExecutionEventEntry) } if (terminalStatus) { await mockRedis.hset('meta', { status: terminalStatus }) } - return persistedEntries[0]?.eventId ?? false + return [1, persistedEntries[0]?.eventId ?? false, 0] } - ) + if (script.includes('DECRBY')) { + return 1 + } + return [1, 'ok', 0, 0] + }) mockRedis.pipeline.mockImplementation(() => ({ zadd: vi.fn((_key: string, ...args: (string | number)[]) => { for (let i = 0; i < args.length; i += 2) { @@ -159,17 +161,14 @@ describe('execution event buffer', () => { ] mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { - if (script.includes('execution_redis_bytes')) { - return [1, 'ok', 0, 0] - } const batchEntries: ExecutionEventEntry[] = [] - const zaddArgs = args.slice(8) as (string | number)[] + const { zaddArgs } = parseFlushEvalArgs(args) for (let i = 0; i < zaddArgs.length; i += 2) { batchEntries.push(JSON.parse(zaddArgs[i + 1] as string) as ExecutionEventEntry) } await (execCalls.shift() ?? (() => Promise.resolve()))() persistedEntries.push(...batchEntries) - return persistedEntries[0]?.eventId ?? false + return [1, persistedEntries[0]?.eventId ?? false, 0] }) mockRedis.pipeline.mockImplementation(() => { const batchEntries: ExecutionEventEntry[] = [] @@ -247,10 +246,7 @@ describe('execution event buffer', () => { mockRedis.incrby.mockResolvedValue(100) let flushAttempt = 0 mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { - if (script.includes('execution_redis_bytes')) { - return [1, 'ok', 0, 0] - } - const zaddArgs = args.slice(8) as (string | number)[] + const { zaddArgs } = parseFlushEvalArgs(args) if (flushAttempt > 0) { for (let i = 0; i < zaddArgs.length; i += 2) { persistedEntries.push(JSON.parse(zaddArgs[i + 1] as string) as ExecutionEventEntry) @@ -259,7 +255,7 @@ describe('execution event buffer', () => { if (flushAttempt++ === 0) { throw new Error('first flush failed') } - return persistedEntries[0]?.eventId ?? false + return [1, persistedEntries[0]?.eventId ?? false, 0] }) mockRedis.pipeline.mockImplementation(() => ({ zadd: vi.fn((_key: string, ...args: (string | number)[]) => { @@ -299,6 +295,47 @@ describe('execution event buffer', () => { expect(mockRedis.hset).toHaveBeenCalledWith('meta', { status: 'complete' }) }) + it('budgets only net event bytes after pruning during flush', async () => { + mockRedis.incrby.mockResolvedValue(100) + let netBudgetBytes = 0 + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + const keyCount = Number(args[0]) + netBudgetBytes = Number(args[keyCount + 5]) + const { zaddArgs } = parseFlushEvalArgs(args) + for (let i = 0; i < zaddArgs.length; i += 2) { + persistedEntries.push(JSON.parse(zaddArgs[i + 1] as string) as ExecutionEventEntry) + } + return [1, persistedEntries[0]?.eventId ?? false, 123] + }) + + const writer = createExecutionEventWriter('exec-1') + await writer.writeTerminal(makeEvent('terminal'), 'complete') + + expect(netBudgetBytes).toBeGreaterThan(0) + }) + + it('releases retained event budget when resetting the stream buffer', async () => { + mockRedis.get.mockResolvedValueOnce(41) + mockRedis.hgetall.mockResolvedValueOnce({ userId: 'user-1' }) + let releasedBytes = 0 + mockRedis.eval.mockImplementationOnce(async (script: string, ...args: unknown[]) => { + expect(script).toContain('retained_bytes') + expect(args.slice(0, 5)).toEqual([ + 4, + 'execution:stream:exec-1:events', + 'execution:stream:exec-1:meta', + 'execution:redis-budget:execution:exec-1', + 'execution:redis-budget:user:user-1', + ]) + releasedBytes = 256 + return releasedBytes + }) + + await expect(resetExecutionStreamBuffer('exec-1')).resolves.toBe(true) + + expect(releasedBytes).toBe(256) + }) + it('surfaces execution memory limit errors when the Redis budget is exceeded', async () => { mockRedis.incrby.mockResolvedValue(100) mockRedis.eval.mockImplementationOnce(async () => [ diff --git a/apps/sim/lib/execution/event-buffer.ts b/apps/sim/lib/execution/event-buffer.ts index 81aebee8170..96f667560c5 100644 --- a/apps/sim/lib/execution/event-buffer.ts +++ b/apps/sim/lib/execution/event-buffer.ts @@ -7,10 +7,13 @@ import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' import type { LargeValueStoreContext } from '@/lib/execution/payloads/store' import { type ExecutionRedisBudgetReservation, - releaseExecutionRedisBytes, - reserveExecutionRedisBytes, + getExecutionRedisBudgetKeys, + getExecutionRedisBudgetLimits, } from '@/lib/execution/redis-budget.server' -import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' +import { + ExecutionResourceLimitError, + isExecutionResourceLimitError, +} from '@/lib/execution/resource-errors' import type { ExecutionEvent } from '@/lib/workflows/executor/execution-events' const logger = createLogger('ExecutionEventBuffer') @@ -27,7 +30,87 @@ const ACTIVE_META_ATTEMPTS = 3 const FINALIZE_FLUSH_ATTEMPTS = 2 const FLUSH_EVENTS_SCRIPT = ` local terminal_status = ARGV[4] -for i = 5, #ARGV, 2 do +local batch_bytes = tonumber(ARGV[5]) +local execution_limit = tonumber(ARGV[6]) +local user_limit = tonumber(ARGV[7]) +local budget_ttl_seconds = tonumber(ARGV[8]) +local event_limit = tonumber(ARGV[2]) +local new_count = 0 +local new_bytes = 0 +local new_entries = {} +for i = 9, #ARGV, 2 do + local entry = ARGV[i + 1] + if not redis.call('ZSCORE', KEYS[1], entry) then + new_count = new_count + 1 + new_bytes = new_bytes + string.len(entry) + table.insert(new_entries, entry) + end +end +local current_count = redis.call('ZCARD', KEYS[1]) +local prune_count = current_count + new_count - event_limit +local pruned = {} +if prune_count < 0 then + prune_count = 0 +end +local existing_prune_count = math.min(prune_count, current_count) +local new_prune_count = prune_count - existing_prune_count +if existing_prune_count > 0 then + pruned = redis.call('ZRANGE', KEYS[1], 0, existing_prune_count - 1) +end +local pruned_bytes = 0 +for _, entry in ipairs(pruned) do + pruned_bytes = pruned_bytes + string.len(entry) +end +for i = 1, new_prune_count do + local entry = new_entries[i] + if entry then + pruned_bytes = pruned_bytes + string.len(entry) + end +end +local net_bytes = new_bytes - pruned_bytes +if net_bytes > 0 then + local execution_current = tonumber(redis.call('GET', KEYS[4]) or '0') + if execution_limit > 0 and execution_current + net_bytes > execution_limit then + return {0, 'execution_redis_bytes', execution_current, pruned_bytes} + end + local user_current = 0 + if #KEYS >= 5 then + user_current = tonumber(redis.call('GET', KEYS[5]) or '0') + if user_limit > 0 and user_current + net_bytes > user_limit then + return {0, 'user_redis_bytes', user_current, pruned_bytes} + end + end + redis.call('INCRBY', KEYS[4], net_bytes) + redis.call('EXPIRE', KEYS[4], budget_ttl_seconds) + if #KEYS >= 5 then + redis.call('INCRBY', KEYS[5], net_bytes) + redis.call('EXPIRE', KEYS[5], budget_ttl_seconds) + end +elseif net_bytes < 0 then + local release_bytes = -net_bytes + local execution_next = redis.call('DECRBY', KEYS[4], release_bytes) + if execution_next <= 0 then + redis.call('DEL', KEYS[4]) + else + redis.call('EXPIRE', KEYS[4], budget_ttl_seconds) + end + if #KEYS >= 5 then + local user_next = redis.call('DECRBY', KEYS[5], release_bytes) + if user_next <= 0 then + redis.call('DEL', KEYS[5]) + else + redis.call('EXPIRE', KEYS[5], budget_ttl_seconds) + end + end +else + if redis.call('EXISTS', KEYS[4]) == 1 then + redis.call('EXPIRE', KEYS[4], budget_ttl_seconds) + end + if #KEYS >= 5 and redis.call('EXISTS', KEYS[5]) == 1 then + redis.call('EXPIRE', KEYS[5], budget_ttl_seconds) + end +end +for i = 9, #ARGV, 2 do redis.call('ZADD', KEYS[1], ARGV[i], ARGV[i + 1]) end redis.call('EXPIRE', KEYS[1], tonumber(ARGV[1])) @@ -42,7 +125,34 @@ if oldest[2] then redis.call('HSET', KEYS[3], 'earliestEventId', tostring(math.floor(tonumber(oldest[2]))), 'updatedAt', ARGV[3]) redis.call('EXPIRE', KEYS[3], tonumber(ARGV[1])) end -return oldest[2] or false +return {1, oldest[2] or false, pruned_bytes} +` +const RESET_STREAM_SCRIPT = ` +local entries = redis.call('ZRANGE', KEYS[1], 0, -1) +local retained_bytes = 0 +for _, entry in ipairs(entries) do + retained_bytes = retained_bytes + string.len(entry) +end +redis.call('DEL', KEYS[1], KEYS[2]) +redis.call('HSET', KEYS[2], 'replayStartEventId', ARGV[1], 'updatedAt', ARGV[2]) +redis.call('EXPIRE', KEYS[2], tonumber(ARGV[3])) +if retained_bytes > 0 then + local execution_next = redis.call('DECRBY', KEYS[3], retained_bytes) + if execution_next <= 0 then + redis.call('DEL', KEYS[3]) + else + redis.call('EXPIRE', KEYS[3], tonumber(ARGV[4])) + end + if #KEYS >= 4 then + local user_next = redis.call('DECRBY', KEYS[4], retained_bytes) + if user_next <= 0 then + redis.call('DEL', KEYS[4]) + else + redis.call('EXPIRE', KEYS[4], tonumber(ARGV[4])) + end + end +end +return retained_bytes ` function getEventsKey(executionId: string) { @@ -75,6 +185,21 @@ function getExecutionEventEntryJson(entry: ExecutionEventEntry): string { return JSON.stringify(entry) } +function getFlushScriptResult(value: unknown): { + allowed: boolean + resource?: string + currentBytes?: number +} { + if (Array.isArray(value)) { + return { + allowed: Number(value[0]) === 1, + resource: typeof value[1] === 'string' ? value[1] : undefined, + currentBytes: Number(value[2] ?? 0), + } + } + return { allowed: true } +} + function trimFinalBlockLogsForEventData(data: unknown): unknown { if (!data || typeof data !== 'object' || Array.isArray(data)) return data @@ -350,12 +475,28 @@ export async function resetExecutionStreamBuffer(executionId: string): Promise 0)) const replayStartEventId = Number.isFinite(currentSequence) ? currentSequence + 1 : 1 const metaKey = getMetaKey(executionId) - await redis.del(getEventsKey(executionId), metaKey) - await redis.hset(metaKey, { - replayStartEventId: String(replayStartEventId), - updatedAt: new Date().toISOString(), - }) - await redis.expire(metaKey, TTL_SECONDS) + const meta = (await redis.hgetall(metaKey).catch(() => ({}))) as Record + const userId = typeof meta.userId === 'string' ? meta.userId : undefined + const budgetReservation: ExecutionRedisBudgetReservation = { + executionId, + userId, + category: 'event_buffer', + operation: 'reset_events', + bytes: 0, + logger, + } + const budgetKeys = getExecutionRedisBudgetKeys(budgetReservation) + await redis.eval( + RESET_STREAM_SCRIPT, + 2 + budgetKeys.length, + getEventsKey(executionId), + metaKey, + ...budgetKeys, + String(replayStartEventId), + new Date().toISOString(), + TTL_SECONDS, + getExecutionRedisBudgetLimits().ttlSeconds + ) return true } catch (error) { logger.warn('Failed to reset execution stream buffer', { @@ -612,8 +753,6 @@ export function createExecutionEventWriter( if (pending.length === 0) return true const batch = pending pending = [] - let reservedBudget: ExecutionRedisBudgetReservation | null = null - let budgetReserved = false try { const key = getEventsKey(executionId) const zaddArgs: (string | number)[] = [] @@ -623,7 +762,7 @@ export function createExecutionEventWriter( batchBytes += Buffer.byteLength(entryJson, 'utf8') zaddArgs.push(entry.eventId, entryJson) } - reservedBudget = { + const budgetReservation: ExecutionRedisBudgetReservation = { executionId, userId: context.userId, category: 'event_buffer', @@ -631,26 +770,51 @@ export function createExecutionEventWriter( bytes: batchBytes, logger, } - await reserveExecutionRedisBytes(redis, reservedBudget) - budgetReserved = true - await redis.eval( - FLUSH_EVENTS_SCRIPT, - 3, - key, - getSeqKey(executionId), - getMetaKey(executionId), - TTL_SECONDS, - EVENT_LIMIT, - new Date().toISOString(), - terminalStatus ?? '', - ...zaddArgs + const limits = getExecutionRedisBudgetLimits() + if (batchBytes > limits.maxSingleWriteBytes) { + throw new ExecutionResourceLimitError({ + resource: 'redis_key_bytes', + attemptedBytes: batchBytes, + limitBytes: limits.maxSingleWriteBytes, + }) + } + const budgetKeys = getExecutionRedisBudgetKeys(budgetReservation) + const flushResult = getFlushScriptResult( + await redis.eval( + FLUSH_EVENTS_SCRIPT, + 3 + budgetKeys.length, + key, + getSeqKey(executionId), + getMetaKey(executionId), + ...budgetKeys, + TTL_SECONDS, + EVENT_LIMIT, + new Date().toISOString(), + terminalStatus ?? '', + batchBytes, + limits.maxExecutionBytes, + limits.maxUserBytes, + limits.ttlSeconds, + ...zaddArgs + ) ) + if (!flushResult.allowed) { + throw new ExecutionResourceLimitError({ + resource: + flushResult.resource === 'user_redis_bytes' + ? 'user_redis_bytes' + : 'execution_redis_bytes', + attemptedBytes: batchBytes, + currentBytes: flushResult.currentBytes ?? 0, + limitBytes: + flushResult.resource === 'user_redis_bytes' + ? limits.maxUserBytes + : limits.maxExecutionBytes, + }) + } consecutiveFlushFailures = 0 return true } catch (error) { - if (budgetReserved && reservedBudget) { - await releaseExecutionRedisBytes(redis, reservedBudget) - } if (isExecutionResourceLimitError(error)) { pending = batch.concat(pending) throw error diff --git a/apps/sim/lib/execution/redis-budget.server.ts b/apps/sim/lib/execution/redis-budget.server.ts index 1e78199029e..ddf58b1772d 100644 --- a/apps/sim/lib/execution/redis-budget.server.ts +++ b/apps/sim/lib/execution/redis-budget.server.ts @@ -68,7 +68,9 @@ export function getExecutionRedisBudgetLimits() { } } -function getBudgetKeys(reservation: ExecutionRedisBudgetReservation): string[] { +export function getExecutionRedisBudgetKeys( + reservation: ExecutionRedisBudgetReservation +): string[] { const keys = [`${REDIS_BUDGET_PREFIX}execution:${reservation.executionId}`] if (reservation.userId) { keys.push(`${REDIS_BUDGET_PREFIX}user:${reservation.userId}`) @@ -91,7 +93,7 @@ export async function reserveExecutionRedisBytes( }) } - const keys = getBudgetKeys(reservation) + const keys = getExecutionRedisBudgetKeys(reservation) const result = (await redis.eval( RESERVE_REDIS_BYTES_SCRIPT, keys.length, @@ -120,7 +122,7 @@ export async function releaseExecutionRedisBytes( if (reservation.bytes <= 0) return try { - const keys = getBudgetKeys(reservation) + const keys = getExecutionRedisBudgetKeys(reservation) await redis.eval(RELEASE_REDIS_BYTES_SCRIPT, keys.length, ...keys, reservation.bytes) } catch (error) { const log = reservation.logger ?? logger diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts index b65d0795a06..00fe9ab75d4 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.test.ts @@ -2,16 +2,34 @@ * @vitest-environment node */ import { beforeEach, describe, expect, it, vi } from 'vitest' -import { hydrateUserFilesWithBase64 } from '@/lib/uploads/utils/user-file-base64.server' +import { + cleanupExecutionBase64Cache, + hydrateUserFilesWithBase64, +} from '@/lib/uploads/utils/user-file-base64.server' import type { UserFile } from '@/executor/types' -const { mockDownloadFile, mockVerifyFileAccess } = vi.hoisted(() => ({ - mockDownloadFile: vi.fn(), - mockVerifyFileAccess: vi.fn(), -})) +const { mockDownloadFile, mockGetRedisClient, mockRedis, mockVerifyFileAccess } = vi.hoisted(() => { + const mockRedis = { + get: vi.fn(), + set: vi.fn(), + hget: vi.fn(), + hset: vi.fn(), + hgetall: vi.fn(), + expire: vi.fn(), + scan: vi.fn(), + del: vi.fn(), + eval: vi.fn(), + } + return { + mockDownloadFile: vi.fn(), + mockGetRedisClient: vi.fn(), + mockRedis, + mockVerifyFileAccess: vi.fn(), + } +}) vi.mock('@/lib/core/config/redis', () => ({ - getRedisClient: () => null, + getRedisClient: mockGetRedisClient, })) vi.mock('@/lib/uploads', () => ({ @@ -35,6 +53,16 @@ vi.mock('@/app/api/files/authorization', () => ({ describe('hydrateUserFilesWithBase64', () => { beforeEach(() => { vi.clearAllMocks() + mockGetRedisClient.mockReturnValue(null) + mockRedis.get.mockResolvedValue(null) + mockRedis.set.mockResolvedValue('OK') + mockRedis.hget.mockResolvedValue(null) + mockRedis.hset.mockResolvedValue(1) + mockRedis.hgetall.mockResolvedValue({}) + mockRedis.expire.mockResolvedValue(1) + mockRedis.scan.mockResolvedValue(['0', []]) + mockRedis.del.mockResolvedValue(1) + mockRedis.eval.mockResolvedValue([1, 'ok', 0, 0]) mockVerifyFileAccess.mockResolvedValue(true) }) @@ -114,4 +142,104 @@ describe('hydrateUserFilesWithBase64', () => { expect(hydrated.file.base64).toBe(Buffer.from('hello').toString('base64')) }) + + it('releases reserved Redis budget when cleaning up execution cache entries', async () => { + mockGetRedisClient.mockReturnValue(mockRedis) + const rawEntry = JSON.stringify({ bytes: 12, userId: 'user-1' }) + mockRedis.hgetall.mockResolvedValueOnce({ + 'key:file-1': rawEntry, + }) + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('HGET') && script.includes('HDEL') && script.includes('DECRBY')) { + expect(args).toEqual([ + 4, + 'user-file:base64-budget:exec:exec-1', + 'user-file:base64:exec:exec-1:key:file-1', + 'execution:redis-budget:execution:exec-1', + 'execution:redis-budget:user:user-1', + 'key:file-1', + rawEntry, + 12, + 60 * 60, + ]) + return [1, 1] + } + return 1 + }) + + await cleanupExecutionBase64Cache('exec-1') + + expect(mockRedis.eval).toHaveBeenCalledOnce() + }) + + it('releases indexed budget entries even when cache keys already expired', async () => { + mockGetRedisClient.mockReturnValue(mockRedis) + mockRedis.hgetall.mockResolvedValueOnce({ + 'key:file-1': JSON.stringify({ bytes: 7, userId: 'user-1' }), + }) + mockRedis.eval.mockResolvedValueOnce([1, 0]) + + await cleanupExecutionBase64Cache('exec-1') + + expect(mockRedis.eval).toHaveBeenCalledOnce() + }) + + it('writes execution cache and budget index through one delta-aware script', async () => { + mockGetRedisClient.mockReturnValue(mockRedis) + mockDownloadFile.mockResolvedValueOnce(Buffer.from('hello world!', 'utf8')) + let reservedBytes = 0 + mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { + if (script.includes('HGET') && script.includes('HSET') && script.includes('SET')) { + const keyCount = Number(args[0]) + const valueBytes = Number(args[keyCount + 5]) + reservedBytes = valueBytes - 10 + return [1, 'ok', reservedBytes, reservedBytes] + } + return 1 + }) + const file: UserFile = { + id: 'file-1', + name: 'delta.txt', + key: 'execution/workspace/workflow/exec-1/delta.txt', + url: '/api/files/serve/execution/workspace/workflow/exec-1/delta.txt?context=execution', + size: 12, + type: 'text/plain', + context: 'execution', + } + + const hydrated = await hydrateUserFilesWithBase64( + { file }, + { + workspaceId: 'workspace', + workflowId: 'workflow', + executionId: 'exec-1', + userId: 'user-1', + maxBytes: 20, + } + ) + + expect(hydrated.file.base64).toBe(Buffer.from('hello world!').toString('base64')) + expect(reservedBytes).toBe(Buffer.from('hello world!').toString('base64').length - 10) + expect(mockRedis.eval).toHaveBeenCalledWith( + expect.stringContaining('HGET'), + 4, + 'user-file:base64:exec:exec-1:key:execution/workspace/workflow/exec-1/delta.txt', + 'user-file:base64-budget:exec:exec-1', + 'execution:redis-budget:execution:exec-1', + 'execution:redis-budget:user:user-1', + Buffer.from('hello world!').toString('base64'), + 60 * 60, + 'key:execution/workspace/workflow/exec-1/delta.txt', + JSON.stringify({ + bytes: Buffer.from('hello world!').toString('base64').length, + userId: 'user-1', + }), + Buffer.from('hello world!').toString('base64').length, + 64 * 1024 * 1024, + 256 * 1024 * 1024, + 60 * 60 + ) + expect(mockRedis.hget).not.toHaveBeenCalled() + expect(mockRedis.set).not.toHaveBeenCalled() + }) }) diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index 299490b18e9..8d5e7b048d1 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -9,10 +9,13 @@ import { } from '@/lib/execution/payloads/materialization.server' import { type ExecutionRedisBudgetReservation, - releaseExecutionRedisBytes, - reserveExecutionRedisBytes, + getExecutionRedisBudgetKeys, + getExecutionRedisBudgetLimits, } from '@/lib/execution/redis-budget.server' -import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' +import { + ExecutionResourceLimitError, + isExecutionResourceLimitError, +} from '@/lib/execution/resource-errors' import type { UserFile } from '@/executor/types' const INLINE_BASE64_JSON_OVERHEAD_BYTES = 512 * 1024 @@ -21,6 +24,113 @@ const DEFAULT_MAX_BASE64_BYTES = Math.floor( ) const DEFAULT_CACHE_TTL_SECONDS = 300 const REDIS_KEY_PREFIX = 'user-file:base64:' +const REDIS_BUDGET_KEY_PREFIX = 'user-file:base64-budget:' +const CLEANUP_BASE64_CACHE_ENTRY_SCRIPT = ` +local file_key = ARGV[1] +local expected_entry = ARGV[2] +local bytes = tonumber(ARGV[3]) +local budget_ttl_seconds = tonumber(ARGV[4]) +local current_entry = redis.call('HGET', KEYS[1], file_key) +if not current_entry or current_entry ~= expected_entry then + return {0, 0} +end +local deleted = redis.call('DEL', KEYS[2]) +redis.call('HDEL', KEYS[1], file_key) +if bytes and bytes > 0 then + local execution_next = redis.call('DECRBY', KEYS[3], bytes) + if execution_next <= 0 then + redis.call('DEL', KEYS[3]) + else + redis.call('EXPIRE', KEYS[3], budget_ttl_seconds) + end + if #KEYS >= 4 then + local user_next = redis.call('DECRBY', KEYS[4], bytes) + if user_next <= 0 then + redis.call('DEL', KEYS[4]) + else + redis.call('EXPIRE', KEYS[4], budget_ttl_seconds) + end + end +end +if redis.call('HLEN', KEYS[1]) == 0 then + redis.call('DEL', KEYS[1]) +end +return {1, deleted} +` +const SET_BASE64_CACHE_SCRIPT = ` +local value = ARGV[1] +local cache_ttl_seconds = tonumber(ARGV[2]) +local file_key = ARGV[3] +local next_entry = ARGV[4] +local next_bytes = tonumber(ARGV[5]) +local execution_limit = tonumber(ARGV[6]) +local user_limit = tonumber(ARGV[7]) +local budget_ttl_seconds = tonumber(ARGV[8]) +local previous_entry = redis.call('HGET', KEYS[2], file_key) +local previous_bytes = 0 +if previous_entry then + local parsed_previous_bytes = string.match(previous_entry, '"bytes"%s*:%s*(%d+)') + if parsed_previous_bytes then + previous_bytes = tonumber(parsed_previous_bytes) + end +end +local execution_current_raw = redis.call('GET', KEYS[3]) +local execution_current = tonumber(execution_current_raw or '0') +local execution_delta = next_bytes - previous_bytes +if not execution_current_raw then + execution_delta = next_bytes +end +if execution_delta > 0 and execution_limit > 0 and execution_current + execution_delta > execution_limit then + return {0, 'execution_redis_bytes', execution_current} +end +local user_delta = 0 +local user_current = 0 +local user_current_raw = nil +if #KEYS >= 4 then + user_current_raw = redis.call('GET', KEYS[4]) + user_current = tonumber(user_current_raw or '0') + user_delta = next_bytes - previous_bytes + if not user_current_raw then + user_delta = next_bytes + end + if user_delta > 0 and user_limit > 0 and user_current + user_delta > user_limit then + return {0, 'user_redis_bytes', user_current} + end +end +if execution_delta > 0 then + redis.call('INCRBY', KEYS[3], execution_delta) +elseif execution_delta < 0 and execution_current_raw then + local execution_next = redis.call('DECRBY', KEYS[3], -execution_delta) + if execution_next <= 0 then + redis.call('DEL', KEYS[3]) + end +end +if redis.call('EXISTS', KEYS[3]) == 1 then + redis.call('EXPIRE', KEYS[3], budget_ttl_seconds) +end +if #KEYS >= 4 then + if user_delta > 0 then + redis.call('INCRBY', KEYS[4], user_delta) + elseif user_delta < 0 and user_current_raw then + local user_next = redis.call('DECRBY', KEYS[4], -user_delta) + if user_next <= 0 then + redis.call('DEL', KEYS[4]) + end + end + if redis.call('EXISTS', KEYS[4]) == 1 then + redis.call('EXPIRE', KEYS[4], budget_ttl_seconds) + end +end +redis.call('SET', KEYS[1], value, 'EX', cache_ttl_seconds) +redis.call('HSET', KEYS[2], file_key, next_entry) +redis.call('EXPIRE', KEYS[2], cache_ttl_seconds) +return {1, 'ok', execution_delta, user_delta} +` + +interface Base64BudgetEntry { + bytes: number + userId?: string +} interface Base64Cache { get(file: UserFile): Promise @@ -93,28 +203,59 @@ function createBase64Cache(options: Base64HydrationOptions, logger: Logger): Bas } }, async set(file: UserFile, value: string, ttlSeconds: number) { - const budgetReservation: ExecutionRedisBudgetReservation | null = executionId - ? { - executionId, - userId: options.userId, - category: 'base64_cache', - operation: 'set_base64_cache', - bytes: Buffer.byteLength(value, 'utf8'), - logger, - } - : null - let budgetReserved = false + const key = getFullCacheKey(executionId, file) + const valueBytes = Buffer.byteLength(value, 'utf8') try { - const key = getFullCacheKey(executionId, file) - if (budgetReservation) { - await reserveExecutionRedisBytes(redis, budgetReservation) - budgetReserved = true + if (!executionId) { + await redis.set(key, value, 'EX', ttlSeconds) + return } - await redis.set(key, value, 'EX', ttlSeconds) - } catch (error) { - if (budgetReserved && budgetReservation) { - await releaseExecutionRedisBytes(redis, budgetReservation) + + const limits = getExecutionRedisBudgetLimits() + if (valueBytes > limits.maxSingleWriteBytes) { + throw new ExecutionResourceLimitError({ + resource: 'redis_key_bytes', + attemptedBytes: valueBytes, + limitBytes: limits.maxSingleWriteBytes, + }) + } + const cacheTtlSeconds = Math.max(ttlSeconds, limits.ttlSeconds) + const budgetReservation: ExecutionRedisBudgetReservation = { + executionId, + userId: options.userId, + category: 'base64_cache', + operation: 'set_base64_cache', + bytes: valueBytes, + logger, + } + const budgetKeys = getExecutionRedisBudgetKeys(budgetReservation) + const result = (await redis.eval( + SET_BASE64_CACHE_SCRIPT, + 2 + budgetKeys.length, + key, + getBudgetIndexKey(executionId), + ...budgetKeys, + value, + cacheTtlSeconds, + getFileCacheKey(file), + serializeBudgetEntry({ bytes: valueBytes, userId: options.userId }), + valueBytes, + limits.maxExecutionBytes, + limits.maxUserBytes, + limits.ttlSeconds + )) as [number, string, number | string | null] + const [allowed, resource, current] = result + if (allowed !== 1) { + throw new ExecutionResourceLimitError({ + resource: + resource === 'user_redis_bytes' ? 'user_redis_bytes' : 'execution_redis_bytes', + attemptedBytes: valueBytes, + currentBytes: Number(current ?? 0), + limitBytes: + resource === 'user_redis_bytes' ? limits.maxUserBytes : limits.maxExecutionBytes, + }) } + } catch (error) { if (isExecutionResourceLimitError(error)) { throw error } @@ -154,6 +295,62 @@ function getFullCacheKey(executionId: string | undefined, file: UserFile): strin return `${REDIS_KEY_PREFIX}${fileKey}` } +function getBudgetIndexKey(executionId: string): string { + return `${REDIS_BUDGET_KEY_PREFIX}exec:${executionId}` +} + +function serializeBudgetEntry(entry: Base64BudgetEntry): string { + return JSON.stringify(entry) +} + +function parseBudgetEntry(value: unknown): Base64BudgetEntry | null { + if (typeof value !== 'string') { + return null + } + try { + const parsed = JSON.parse(value) as Partial + if (typeof parsed.bytes !== 'number' || !Number.isFinite(parsed.bytes) || parsed.bytes <= 0) { + return null + } + return { + bytes: parsed.bytes, + userId: typeof parsed.userId === 'string' ? parsed.userId : undefined, + } + } catch { + return null + } +} + +async function cleanupBudgetEntry( + redis: NonNullable>, + executionId: string, + fileKey: string, + rawEntry: string, + entry: Base64BudgetEntry +): Promise<{ claimed: boolean; deletedCount: number }> { + const limits = getExecutionRedisBudgetLimits() + const budgetReservation: ExecutionRedisBudgetReservation = { + executionId, + userId: entry.userId, + category: 'base64_cache', + operation: 'cleanup_base64_cache', + bytes: entry.bytes, + } + const budgetKeys = getExecutionRedisBudgetKeys(budgetReservation) + const result = (await redis.eval( + CLEANUP_BASE64_CACHE_ENTRY_SCRIPT, + 2 + budgetKeys.length, + getBudgetIndexKey(executionId), + `${REDIS_KEY_PREFIX}exec:${executionId}:${fileKey}`, + ...budgetKeys, + fileKey, + rawEntry, + entry.bytes, + limits.ttlSeconds + )) as [number, number] + return { claimed: Number(result[0]) === 1, deletedCount: Number(result[1] ?? 0) } +} + function stripBase64(file: UserFile): UserFile { const { base64: _base64, ...rest } = file return rest @@ -364,22 +561,25 @@ export async function cleanupExecutionBase64Cache(executionId: string): Promise< return } - const pattern = `${REDIS_KEY_PREFIX}exec:${executionId}:*` const logger = createLogger('UserFileBase64') try { - let cursor = '0' + const budgetEntries = await redis.hgetall(getBudgetIndexKey(executionId)) let deletedCount = 0 - - do { - const [nextCursor, keys] = await redis.scan(cursor, 'MATCH', pattern, 'COUNT', 100) - cursor = nextCursor - - if (keys.length > 0) { - await redis.del(...keys) - deletedCount += keys.length + for (const [fileKey, rawEntry] of Object.entries(budgetEntries ?? {})) { + const budgetEntry = parseBudgetEntry(rawEntry) + if (!budgetEntry) continue + const cleanupResult = await cleanupBudgetEntry( + redis, + executionId, + fileKey, + rawEntry, + budgetEntry + ) + if (cleanupResult.claimed) { + deletedCount += cleanupResult.deletedCount } - } while (cursor !== '0') + } if (deletedCount > 0) { logger.info(`Cleaned up ${deletedCount} base64 cache entries for execution ${executionId}`) From 0790766c7b04ee900e496e4799263328b796e47a Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Tue, 12 May 2026 11:35:25 -0700 Subject: [PATCH 10/11] replace helper --- apps/sim/app/api/function/execute/route.ts | 4 +- apps/sim/lib/execution/payloads/store.test.ts | 44 +++++++++++++++++++ apps/sim/lib/execution/payloads/store.ts | 5 ++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/apps/sim/app/api/function/execute/route.ts b/apps/sim/app/api/function/execute/route.ts index b7137a383dc..1b2d5f844e1 100644 --- a/apps/sim/app/api/function/execute/route.ts +++ b/apps/sim/app/api/function/execute/route.ts @@ -18,11 +18,11 @@ import { isLargeValueRef } from '@/lib/execution/payloads/large-value-ref' import { MAX_FUNCTION_INLINE_BYTES, MAX_INLINE_MATERIALIZATION_BYTES, - readLargeValueRefFromStorage, readUserFileContent, unavailableLargeValueError, } from '@/lib/execution/payloads/materialization.server' import { compactExecutionPayload } from '@/lib/execution/payloads/serializer' +import { materializeLargeValueRef } from '@/lib/execution/payloads/store' import { isExecutionResourceLimitError } from '@/lib/execution/resource-errors' import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { getWorkflowById } from '@/lib/workflows/utils' @@ -779,7 +779,7 @@ function createFunctionRuntimeBrokers( if (!context.executionId) { throw new Error('Large execution values require an execution context.') } - const value = await readLargeValueRefFromStorage(ref, { + const value = await materializeLargeValueRef(ref, { ...base, maxBytes: clampInlineBytes(options.maxBytes, MAX_INLINE_MATERIALIZATION_BYTES), }) diff --git a/apps/sim/lib/execution/payloads/store.test.ts b/apps/sim/lib/execution/payloads/store.test.ts index d9c052654ef..089f8284f07 100644 --- a/apps/sim/lib/execution/payloads/store.test.ts +++ b/apps/sim/lib/execution/payloads/store.test.ts @@ -286,6 +286,50 @@ describe('large execution payload store', () => { expect(materializeLargeValueRefSync(unrecoverableRef, scope)).toEqual({ retained: true }) }) + it('materializes keyless cached refs through the async helper', async () => { + const scope = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + const ref = { + __simLargeValueRef: true, + version: 1, + id: 'lv_KEYLESSCACHE', + kind: 'object', + size: 32, + executionId: scope.executionId, + } as const + cacheLargeValue(ref.id, { retained: true }, ref.size, scope) + + await expect(materializeLargeValueRef(ref, scope)).resolves.toEqual({ retained: true }) + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + + it('enforces maxBytes before returning cached refs', async () => { + const scope = { + workspaceId: 'workspace-1', + workflowId: 'workflow-1', + executionId: 'execution-1', + } + const ref = { + __simLargeValueRef: true, + version: 1, + id: 'lv_CACHEDMAXBYTE', + kind: 'object', + size: 2048, + executionId: scope.executionId, + } as const + cacheLargeValue(ref.id, { retained: true }, ref.size, scope) + + await expect(materializeLargeValueRef(ref, { ...scope, maxBytes: 1024 })).rejects.toMatchObject( + { + code: EXECUTION_RESOURCE_LIMIT_CODE, + } + ) + expect(mockDownloadFile).not.toHaveBeenCalled() + }) + it('rejects durable refs when caller omits workspace and workflow context', async () => { await expect( readLargeValueRefFromStorage( diff --git a/apps/sim/lib/execution/payloads/store.ts b/apps/sim/lib/execution/payloads/store.ts index 2256813b941..cf1eb00367b 100644 --- a/apps/sim/lib/execution/payloads/store.ts +++ b/apps/sim/lib/execution/payloads/store.ts @@ -9,6 +9,7 @@ import { } from '@/lib/execution/payloads/large-value-ref' import { assertDurableLargeValueSize, + assertInlineMaterializationSize, assertLargeValueRefAccess, isValidLargeValueKey, readLargeValueRefFromStorage, @@ -25,6 +26,7 @@ export interface LargeValueStoreContext { allowLargeValueWorkflowScope?: boolean userId?: string requireDurable?: boolean + maxBytes?: number } function getKind(value: unknown): LargeValueKind { @@ -132,6 +134,7 @@ export async function materializeLargeValueRef( } assertLargeValueRefAccess(ref, context) + assertInlineMaterializationSize(ref.size, context.maxBytes) const cached = materializeLargeValueRefSync(ref, context) if (cached !== undefined) { @@ -150,7 +153,7 @@ export async function materializeLargeValueRef( largeValueExecutionIds: context.largeValueExecutionIds, allowLargeValueWorkflowScope: context.allowLargeValueWorkflowScope, userId: context.userId, - maxBytes: ref.size, + maxBytes: context.maxBytes ?? ref.size, }) if (value === undefined) { return undefined From 71acd650f191aee86eeec6342592ba845ccc31fa Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Tue, 12 May 2026 11:46:56 -0700 Subject: [PATCH 11/11] fix tests --- apps/sim/lib/execution/event-buffer.test.ts | 24 +++++++++++++++------ apps/sim/lib/execution/event-buffer.ts | 13 ++++++----- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/apps/sim/lib/execution/event-buffer.test.ts b/apps/sim/lib/execution/event-buffer.test.ts index c76365d74cf..cd1753570ec 100644 --- a/apps/sim/lib/execution/event-buffer.test.ts +++ b/apps/sim/lib/execution/event-buffer.test.ts @@ -60,6 +60,14 @@ function parseFlushEvalArgs(args: unknown[]): { } } +function isFlushScript(script: string): boolean { + return script.includes("redis.call('ZADD'") && script.includes('new_count') +} + +function isResetScript(script: string): boolean { + return script.includes('retained_bytes') && script.includes('replayStartEventId') +} + describe('execution event buffer', () => { beforeEach(() => { vi.clearAllMocks() @@ -70,7 +78,7 @@ describe('execution event buffer', () => { mockRedis.zrangebyscore.mockResolvedValue([]) mockRedis.zremrangebyrank.mockResolvedValue(0) mockRedis.eval.mockImplementation(async (script: string, ...args: unknown[]) => { - if (script.includes('ZADD')) { + if (isFlushScript(script)) { const { terminalStatus, zaddArgs } = parseFlushEvalArgs(args) for (let i = 0; i < zaddArgs.length; i += 2) { persistedEntries.push(JSON.parse(zaddArgs[i + 1] as string) as ExecutionEventEntry) @@ -80,6 +88,9 @@ describe('execution event buffer', () => { } return [1, persistedEntries[0]?.eventId ?? false, 0] } + if (isResetScript(script)) { + return 0 + } if (script.includes('DECRBY')) { return 1 } @@ -338,11 +349,12 @@ describe('execution event buffer', () => { it('surfaces execution memory limit errors when the Redis budget is exceeded', async () => { mockRedis.incrby.mockResolvedValue(100) - mockRedis.eval.mockImplementationOnce(async () => [ - 0, - 'execution_redis_bytes', - 64 * 1024 * 1024, - ]) + mockRedis.eval.mockImplementation(async (script: string) => { + if (isFlushScript(script)) { + return [0, 'execution_redis_bytes', 64 * 1024 * 1024] + } + return [1, 'ok', 0, 0] + }) const writer = createExecutionEventWriter('exec-1') diff --git a/apps/sim/lib/execution/event-buffer.ts b/apps/sim/lib/execution/event-buffer.ts index 96f667560c5..d04f9ea63cf 100644 --- a/apps/sim/lib/execution/event-buffer.ts +++ b/apps/sim/lib/execution/event-buffer.ts @@ -453,7 +453,12 @@ export async function flushExecutionStreamReplayBuffer( } export async function resetExecutionStreamBuffer(executionId: string): Promise { - if (canUseMemoryEventBuffer()) { + const redis = getRedisClient() + if (!redis) { + if (!canUseMemoryEventBuffer()) { + logger.warn('resetExecutionStreamBuffer: Redis client unavailable', { executionId }) + return false + } const stream = getMemoryStream(executionId) stream.events = [] stream.meta = { @@ -465,12 +470,6 @@ export async function resetExecutionStreamBuffer(executionId: string): Promise 0)) const replayStartEventId = Number.isFinite(currentSequence) ? currentSequence + 1 : 1