From 5c1788cd9044909071fcd35433902c770475091e Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 24 Mar 2026 11:05:19 -0500 Subject: [PATCH] Fix fence completion race in virtio-gpu worker When fences complete out of order (e.g., an immediate-retire for fence N+1 arrives before the timeline signal for fence N), the unconditional insert() would overwrite the higher fence_id with the lower one. This causes fence N+1 to appear incomplete forever, hanging the guest. Use entry().or_insert() with a max check so only strictly higher fence_ids update the completed_fences map. Signed-off-by: Adam Ford --- src/devices/src/virtio/gpu/virtio_gpu.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/devices/src/virtio/gpu/virtio_gpu.rs b/src/devices/src/virtio/gpu/virtio_gpu.rs index dc4bc99d6..ebca60486 100644 --- a/src/devices/src/virtio/gpu/virtio_gpu.rs +++ b/src/devices/src/virtio/gpu/virtio_gpu.rs @@ -203,10 +203,13 @@ impl VirtioGpu { i += 1; } } - // Update the last completed fence for this context - fence_state - .completed_fences - .insert(ring, completed_fence.fence_id); + // Update the last completed fence for this context. + // Use max() to avoid a race where an out-of-order completion + // (e.g., immediate-retire for fence N+1 followed by timeline + // signal for fence N) would overwrite a higher fence_id with + // a lower one, causing fence N+1 to be stuck forever. + let entry = fence_state.completed_fences.entry(ring).or_insert(0); + *entry = (*entry).max(completed_fence.fence_id); }) }