diff --git a/packages/durabletask-js/src/worker/orchestration-executor.ts b/packages/durabletask-js/src/worker/orchestration-executor.ts index c5eea00..ac21732 100644 --- a/packages/durabletask-js/src/worker/orchestration-executor.ts +++ b/packages/durabletask-js/src/worker/orchestration-executor.ts @@ -792,16 +792,20 @@ export class OrchestrationExecutor { task.recordFailure(errorMessage, failureDetails); const retryResult = await task.shouldRetry(ctx._currentUtcDatetime); - if (retryResult !== false) { + // Only retry when the handler explicitly returns true or a finite number. + // Using a positive check (=== true || finite number) instead of !== false + // ensures that undefined/null (e.g., from a missing return statement) is + // treated as "don't retry" rather than causing an infinite retry loop. + if (retryResult === true || (typeof retryResult === "number" && Number.isFinite(retryResult))) { WorkerLogs.retryingTask(this._logger, ctx._instanceId, task.taskName, task.attemptCount); task.incrementAttemptCount(); if (typeof retryResult === "number") { if (retryResult <= 0) { - // Handler returned true — retry immediately + // Zero or negative delay — retry immediately ctx.rescheduleRetryTask(task); } else { - // Handler returned a delay in milliseconds — use a timer + // Positive delay in milliseconds — use a timer ctx.createRetryTimer(task, retryResult); } } else { diff --git a/packages/durabletask-js/test/orchestration_executor.spec.ts b/packages/durabletask-js/test/orchestration_executor.spec.ts index 7b2795e..fb8200d 100644 --- a/packages/durabletask-js/test/orchestration_executor.spec.ts +++ b/packages/durabletask-js/test/orchestration_executor.spec.ts @@ -1127,6 +1127,135 @@ describe("Orchestration Executor", () => { const completeAction = getAndValidateSingleCompleteOrchestrationAction(result); expect(completeAction?.getOrchestrationstatus()).toEqual(pb.OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); }); + + it("should fail the task (not retry) when retry handler returns undefined", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + // Cast to bypass TypeScript — simulates a JavaScript consumer or a handler + // with a code path that omits a return statement + const retryHandler = (async (_retryCtx: any) => { + // Intentionally missing return statement + }) as any; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns undefined → should NOT retry, task should fail + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Activity error"))], + ); + const completeAction = getAndValidateSingleCompleteOrchestrationAction(result); + expect(completeAction?.getOrchestrationstatus()).toEqual(pb.OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + }); + + it("should fail the task (not retry) when retry handler returns null", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return null as any; + }; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns null → should NOT retry, task should fail + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Activity error"))], + ); + const completeAction = getAndValidateSingleCompleteOrchestrationAction(result); + expect(completeAction?.getOrchestrationstatus()).toEqual(pb.OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + }); + + it("should create a retry timer when retry handler returns a positive delay", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return 5000; // Retry after 5 seconds + }; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns 5000 → should create a timer + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Transient failure"))], + ); + expect(result.actions.length).toBe(1); + expect(result.actions[0].hasCreatetimer()).toBe(true); + }); + + it("should retry immediately when retry handler returns 0", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return 0; // Retry immediately via zero delay + }; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns 0 → should reschedule immediately (no timer) + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Transient failure"))], + ); + expect(result.actions.length).toBe(1); + expect(result.actions[0].hasScheduletask()).toBe(true); + expect(result.actions[0].getScheduletask()?.getName()).toBe("flakyActivity"); + }); + + it("should fail the task when retry handler returns NaN", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return NaN; + }; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns NaN → should NOT retry + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Activity error"))], + ); + const completeAction = getAndValidateSingleCompleteOrchestrationAction(result); + expect(completeAction?.getOrchestrationstatus()).toEqual(pb.OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + }); + + it("should fail the task when retry handler returns Infinity", async () => { + const { result: startResult, replay } = await startOrchestration( + async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return Infinity; + }; + return yield ctx.callActivity("flakyActivity", undefined, { retry: retryHandler }); + }, + ); + + expect(startResult.actions[0].hasScheduletask()).toBe(true); + + // Activity fails → handler returns Infinity → should NOT retry + const result = await replay( + [newTaskScheduledEvent(1, "flakyActivity")], + [newTaskFailedEvent(1, new Error("Activity error"))], + ); + const completeAction = getAndValidateSingleCompleteOrchestrationAction(result); + expect(completeAction?.getOrchestrationstatus()).toEqual(pb.OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + }); }); it("should complete immediately when whenAll is called with an empty task array", async () => { diff --git a/test/e2e-azuremanaged/orchestration.spec.ts b/test/e2e-azuremanaged/orchestration.spec.ts index 47792a2..cee61b7 100644 --- a/test/e2e-azuremanaged/orchestration.spec.ts +++ b/test/e2e-azuremanaged/orchestration.spec.ts @@ -657,6 +657,144 @@ describe("Durable Task Scheduler (DTS) E2E Tests", () => { expect(invoked).toBe(true); }, 31000); + // ==================== Retry Handler Tests ==================== + + it("should fail (not retry infinitely) when retry handler returns undefined", async () => { + // Issue: A retry handler with a missing return statement returns undefined. + // Before the fix, `undefined !== false` was truthy, so the executor treated it + // as "retry", causing an infinite retry loop. The fix uses a positive check: + // only `true` or a finite number triggers a retry. + let attemptCount = 0; + + const failingActivity = async (_: ActivityContext) => { + attemptCount++; + throw new Error(`Failure on attempt ${attemptCount}`); + }; + + const orchestrator: TOrchestrator = async function* (ctx: OrchestrationContext): any { + // Cast to any to simulate a JavaScript consumer or handler with a missing return + const retryHandler = (async (_retryCtx: any) => { + // Intentionally no return statement — returns undefined + }) as any; + const result = yield ctx.callActivity(failingActivity, undefined, { retry: retryHandler }); + return result; + }; + + taskHubWorker.addActivity(failingActivity); + taskHubWorker.addOrchestrator(orchestrator); + await taskHubWorker.start(); + + const id = await taskHubClient.scheduleNewOrchestration(orchestrator); + const state = await taskHubClient.waitForOrchestrationCompletion(id, undefined, 30); + + expect(state).toBeDefined(); + // Should fail after exactly 1 attempt — the handler returned undefined so no retry + expect(state?.runtimeStatus).toEqual(OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + expect(state?.failureDetails).toBeDefined(); + expect(attemptCount).toBe(1); + }, 31000); + + it("should fail (not retry infinitely) when retry handler returns null", async () => { + let attemptCount = 0; + + const failingActivity = async (_: ActivityContext) => { + attemptCount++; + throw new Error(`Failure on attempt ${attemptCount}`); + }; + + const orchestrator: TOrchestrator = async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return null as any; // Explicitly returning null + }; + const result = yield ctx.callActivity(failingActivity, undefined, { retry: retryHandler }); + return result; + }; + + taskHubWorker.addActivity(failingActivity); + taskHubWorker.addOrchestrator(orchestrator); + await taskHubWorker.start(); + + const id = await taskHubClient.scheduleNewOrchestration(orchestrator); + const state = await taskHubClient.waitForOrchestrationCompletion(id, undefined, 30); + + expect(state).toBeDefined(); + expect(state?.runtimeStatus).toEqual(OrchestrationStatus.ORCHESTRATION_STATUS_FAILED); + expect(state?.failureDetails).toBeDefined(); + expect(attemptCount).toBe(1); + }, 31000); + + it("should retry and succeed when retry handler returns true", async () => { + let attemptCount = 0; + + const flakyActivity = async (_: ActivityContext, input: number) => { + attemptCount++; + if (attemptCount < 3) { + throw new Error(`Transient failure on attempt ${attemptCount}`); + } + return input * 2; + }; + + const orchestrator: TOrchestrator = async function* (ctx: OrchestrationContext, input: number): any { + const retryHandler = async (retryCtx: any) => retryCtx.lastAttemptNumber < 5; + const result = yield ctx.callActivity(flakyActivity, input, { retry: retryHandler }); + return result; + }; + + taskHubWorker.addActivity(flakyActivity); + taskHubWorker.addOrchestrator(orchestrator); + await taskHubWorker.start(); + + const id = await taskHubClient.scheduleNewOrchestration(orchestrator, 21); + const state = await taskHubClient.waitForOrchestrationCompletion(id, undefined, 30); + + expect(state).toBeDefined(); + expect(state?.runtimeStatus).toEqual(OrchestrationStatus.ORCHESTRATION_STATUS_COMPLETED); + expect(state?.failureDetails).toBeUndefined(); + expect(state?.serializedOutput).toEqual(JSON.stringify(42)); + expect(attemptCount).toBe(3); + }, 31000); + + it("should retry with delay when retry handler returns a positive number", async () => { + let attemptCount = 0; + const attemptTimes: number[] = []; + + const flakyActivity = async (_: ActivityContext) => { + attemptCount++; + attemptTimes.push(Date.now()); + if (attemptCount < 2) { + throw new Error(`Transient failure on attempt ${attemptCount}`); + } + return "success"; + }; + + const orchestrator: TOrchestrator = async function* (ctx: OrchestrationContext): any { + const retryHandler = async (_retryCtx: any) => { + return 1000; // Retry after 1 second + }; + const result = yield ctx.callActivity(flakyActivity, undefined, { retry: retryHandler }); + return result; + }; + + taskHubWorker.addActivity(flakyActivity); + taskHubWorker.addOrchestrator(orchestrator); + await taskHubWorker.start(); + + const id = await taskHubClient.scheduleNewOrchestration(orchestrator); + const state = await taskHubClient.waitForOrchestrationCompletion(id, undefined, 30); + + expect(state).toBeDefined(); + expect(state?.runtimeStatus).toEqual(OrchestrationStatus.ORCHESTRATION_STATUS_COMPLETED); + expect(state?.failureDetails).toBeUndefined(); + expect(state?.serializedOutput).toEqual(JSON.stringify("success")); + expect(attemptCount).toBe(2); + + // Verify there was at least ~1s delay between attempts + if (attemptTimes.length >= 2) { + const delay = attemptTimes[1] - attemptTimes[0]; + expect(delay).toBeGreaterThanOrEqual(900); // Allow some tolerance + } + }, 31000); + // // ==================== newGuid Tests ==================== it("should generate deterministic GUIDs with newGuid", async () => {