@@ -16,6 +16,7 @@ DECLARE
1616 v_when_exhausted text ;
1717 v_task_exhausted boolean ; -- True if task has exhausted retries
1818 v_flow_slug_for_deps text ; -- Used for decrementing remaining_deps on plain skip
19+ v_prev_step_status text ; -- Previous step status for transition-based decrement
1920begin
2021
2122-- If run is already failed, no retries allowed
@@ -47,6 +48,45 @@ IF EXISTS (SELECT 1 FROM pgflow.runs WHERE pgflow.runs.run_id = fail_task.run_id
4748 RETURN;
4849END IF;
4950
51+ -- Late callback guard: if step is not 'started', don't mutate step/run state
52+ -- This handles callbacks arriving after step was skipped/completed/failed
53+ -- Also capture previous status for transition-based decrement
54+ DECLARE
55+ v_step_status text ;
56+ v_flow_slug text ;
57+ v_prev_step_status text ;
58+ BEGIN
59+ -- Capture previous status BEFORE any CTE updates (for transition-based decrement)
60+ SELECT ss .status INTO v_prev_step_status
61+ FROM pgflow .step_states ss
62+ WHERE ss .run_id = fail_task .run_id
63+ AND ss .step_slug = fail_task .step_slug ;
64+
65+ v_step_status := v_prev_step_status;
66+
67+ IF v_step_status IS NOT NULL AND v_step_status != ' started' THEN
68+ -- Archive the task message if present
69+ SELECT r .flow_slug INTO v_flow_slug
70+ FROM pgflow .runs r
71+ WHERE r .run_id = fail_task .run_id ;
72+
73+ PERFORM pgmq .archive (v_flow_slug, ARRAY_AGG(st .message_id ))
74+ FROM pgflow .step_tasks st
75+ WHERE st .run_id = fail_task .run_id
76+ AND st .step_slug = fail_task .step_slug
77+ AND st .task_index = fail_task .task_index
78+ AND st .message_id IS NOT NULL
79+ HAVING COUNT (st .message_id ) > 0 ;
80+
81+ -- Return current task row without mutations
82+ RETURN QUERY SELECT * FROM pgflow .step_tasks
83+ WHERE pgflow .step_tasks .run_id = fail_task .run_id
84+ AND pgflow .step_tasks .step_slug = fail_task .step_slug
85+ AND pgflow .step_tasks .task_index = fail_task .task_index ;
86+ RETURN;
87+ END IF;
88+ END;
89+
5090WITH run_lock AS (
5191 SELECT * FROM pgflow .runs
5292 WHERE pgflow .runs .run_id = fail_task .run_id
@@ -58,6 +98,10 @@ step_lock AS (
5898 AND pgflow .step_states .step_slug = fail_task .step_slug
5999 FOR UPDATE
60100),
101+ prev_step_status AS (
102+ -- Capture previous status BEFORE any updates (must be separate CTE for correct visibility)
103+ SELECT status FROM step_lock
104+ ),
61105flow_info AS (
62106 SELECT r .flow_slug
63107 FROM pgflow .runs r
@@ -152,9 +196,12 @@ run_update AS (
152196 WHEN (select status from maybe_fail_step) = ' failed' THEN now()
153197 ELSE NULL
154198 END,
155- -- Decrement remaining_steps when step was skipped (not failed, run continues)
199+ -- Decrement remaining_steps only on FIRST transition to skipped
200+ -- (not when step was already skipped and a second task fails)
156201 remaining_steps = CASE
157- WHEN (select status from maybe_fail_step) = ' skipped' THEN pgflow .runs .remaining_steps - 1
202+ WHEN (select status from maybe_fail_step) = ' skipped'
203+ AND (select status from prev_step_status) != ' skipped'
204+ THEN pgflow .runs .remaining_steps - 1
158205 ELSE pgflow .runs .remaining_steps
159206 END
160207 WHERE pgflow .runs .run_id = fail_task .run_id
@@ -193,6 +240,17 @@ END IF;
193240
194241-- Handle step skipping (when_exhausted = 'skip' or 'skip-cascade')
195242 IF v_task_exhausted AND v_step_skipped THEN
243+ -- Archive all queued/started sibling task messages for this step
244+ PERFORM pgmq .archive (r .flow_slug , ARRAY_AGG(st .message_id ))
245+ FROM pgflow .step_tasks st
246+ JOIN pgflow .runs r ON st .run_id = r .run_id
247+ WHERE st .run_id = fail_task .run_id
248+ AND st .step_slug = fail_task .step_slug
249+ AND st .status IN (' queued' , ' started' )
250+ AND st .message_id IS NOT NULL
251+ GROUP BY r .flow_slug
252+ HAVING COUNT (st .message_id ) > 0 ;
253+
196254 -- Send broadcast event for step skipped
197255 PERFORM realtime .send (
198256 jsonb_build_object(
0 commit comments