From 977ebcbb6449b129e5e7a4695f3a658d87f5aa41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Cuervo?= Date: Mon, 11 May 2026 14:58:16 -0400 Subject: [PATCH 1/2] Fix Vulkan texture retirement --- builtin-programs/gpu/textures.folk | 65 +++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 10 deletions(-) diff --git a/builtin-programs/gpu/textures.folk b/builtin-programs/gpu/textures.folk index e74e4074..ab417073 100644 --- a/builtin-programs/gpu/textures.folk +++ b/builtin-programs/gpu/textures.folk @@ -87,9 +87,11 @@ defineVulkanHandleType $gpuc VkImageView defineVulkanHandleType $gpuc VkSampler $gpuc struct GpuTextureBlock { bool _Atomic alive; + bool _Atomic retiring; int width; int height; + int retireAfterFrame; GpuTextureHandle handle; @@ -118,6 +120,9 @@ $gpuc code { struct DeferredTextureEntry deferredQueue[DEFERRED_QUEUE_CAP]; int _Atomic deferredQueueCount = 0; pthread_mutex_t deferredQueueMutex = PTHREAD_MUTEX_INITIALIZER; + + #define TEXTURE_RETIRE_GRACE_FRAMES 2 + int textureFrameEpoch = 0; } $gpuc proc textureManagerInit {} void { $[vktry volkInitialize()] @@ -520,6 +525,8 @@ $gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* { block->width = width; block->height = height; + block->retiring = false; + block->retireAfterFrame = 0; createImage(width, height, (VkFormat) format, VK_IMAGE_TILING_OPTIMAL, @@ -568,13 +575,9 @@ $gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* { return block; } -# In-flight texture upload ring, per worker thread. Lets -# copyImageToGpuTexture submit a staging-buffer->image copy without -# waiting for it to complete; the staging buffer is reclaimed lazily -# on a later call once the GPU is done with it. We only block when -# the ring fills up (i.e. we've pipelined enough uploads that the -# oldest one still isn't done), which is the natural backpressure -# point for a single worker. +# Per-worker reusable texture upload slots. copyImageToGpuTexture waits +# for each upload before publishing the descriptor so a texture can't be +# freed while its upload command buffer is still using the image. $gpuc code { #define INFLIGHT_UPLOADS 8 struct InflightUpload { @@ -605,6 +608,8 @@ $gpuc code { #endif vmaDestroyBuffer(vmaGetAllocator(), slot->stagingBuffer, slot->stagingBufferAllocation); + slot->stagingBuffer = VK_NULL_HANDLE; + slot->stagingBufferAllocation = NULL; slot->inUse = false; } if (slot->fence == VK_NULL_HANDLE) { @@ -726,6 +731,16 @@ $gpuc proc copyImageToGpuTexture {Image im} GpuTextureHandle { } upload->inUse = true; + vkWaitForFences(device, 1, &upload->fence, VK_TRUE, UINT64_MAX); +#ifdef TRACY_ENABLE + TracyCFree(upload->stagingBufferAllocation); +#endif + vmaDestroyBuffer(vmaGetAllocator(), + upload->stagingBuffer, upload->stagingBufferAllocation); + upload->stagingBuffer = VK_NULL_HANDLE; + upload->stagingBufferAllocation = NULL; + upload->inUse = false; + addToTextureDescriptorSet(block->handle); return block->handle; @@ -757,8 +772,17 @@ $gpuc proc freeGpuTexture {GpuTextureHandle gim} void { # Actually destroy a texture's GPU resources. Must only be called # on the GPU thread between frames when the GPU is idle. $gpuc code { + static void retireGpuTexture(GpuTextureHandle gim) { + GpuTextureBlock* block = &gpuTextures[gim]; + if (gim == 0 || !block->alive || block->retiring) return; + + block->retiring = true; + block->retireAfterFrame = textureFrameEpoch + TEXTURE_RETIRE_GRACE_FRAMES; + } + static void destroyGpuTextureResources(GpuTextureHandle gim) { GpuTextureBlock* block = &gpuTextures[gim]; + if (gim == 0 || !block->alive) return; // Point this descriptor slot at the placeholder texture (slot 0) // so later frames don't reference a destroyed image. @@ -783,18 +807,37 @@ $gpuc code { #ifdef TRACY_ENABLE TracyCFree(block->textureImageAllocation); #endif - vmaDestroyImage(vmaGetAllocator(), block->textureImage, block->textureImageAllocation); vkDestroySampler(device, block->textureSampler, NULL); vkDestroyImageView(device, block->textureImageView, NULL); + vmaDestroyImage(vmaGetAllocator(), block->textureImage, block->textureImageAllocation); free(block->description); + block->description = NULL; + block->textureImage = VK_NULL_HANDLE; + block->textureImageAllocation = NULL; + block->textureImageView = VK_NULL_HANDLE; + block->textureSampler = VK_NULL_HANDLE; + block->retiring = false; + block->retireAfterFrame = 0; block->alive = false; } + + static void destroyRetiredGpuTextures() { + for (GpuTextureHandle gim = 1; gim < getMaxTextures(); gim++) { + GpuTextureBlock* block = &gpuTextures[gim]; + if (block->alive && block->retiring && + block->retireAfterFrame <= textureFrameEpoch) { + destroyGpuTextureResources(gim); + } + } + } } # Called on the GPU thread between frames, after the previous # frame's fence has been waited on (GPU is idle). $gpuc proc processDeferredTextureOps {} void { + textureFrameEpoch++; + pthread_mutex_lock(&deferredQueueMutex); int count = deferredQueueCount; struct DeferredTextureEntry localQueue[DEFERRED_QUEUE_CAP]; @@ -808,10 +851,12 @@ $gpuc proc processDeferredTextureOps {} void { writeTextureDescriptor(localQueue[i].handle); break; case DEFERRED_FREE: - destroyGpuTextureResources(localQueue[i].handle); + retireGpuTexture(localQueue[i].handle); break; } } + + destroyRetiredGpuTextures(); } $gpuc proc initPlaceholderTexture {} void { @@ -829,7 +874,7 @@ $gpuc proc initPlaceholderTexture {} void { debugIm.data[i+0] = 255; debugIm.data[i+1] = 0; debugIm.data[i+2] = 255; - debugIm.data[i+3] = 0; + debugIm.data[i+3] = 255; } } GpuTextureHandle han = copyImageToGpuTexture(debugIm); From 0c520689e7010002f7800940172f69cfa5fd574c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20Cuervo?= Date: Mon, 11 May 2026 18:59:54 -0400 Subject: [PATCH 2/2] Fix Vulkan texture publication ordering --- builtin-programs/gpu/canvases.folk | 9 +++++-- builtin-programs/gpu/draw.folk | 32 +++++++++++++++++++----- builtin-programs/gpu/textures.folk | 39 ++++++++++++------------------ 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/builtin-programs/gpu/canvases.folk b/builtin-programs/gpu/canvases.folk index 6ffb5667..8e0f7de0 100644 --- a/builtin-programs/gpu/canvases.folk +++ b/builtin-programs/gpu/canvases.folk @@ -286,7 +286,7 @@ When the GPU library is /gpuLib/ &\ with settle $settle } - Wish the GPU runs frame prelude handler [list apply {{gpuCanvasLib} { + Wish the GPU runs frame prelude handler [list apply {{gpuCanvasLib gpuTextureLib} { upvar missingPipelines missingPipelines upvar mostRecentDrawListsByTexture mostRecentDrawListsByTexture if {![info exists missingCanvases]} { @@ -400,6 +400,11 @@ When the GPU library is /gpuLib/ &\ } } } + # Image texture wishes can publish draw commands while their + # descriptor writes are still queued. Drain before recording + # canvas command buffers so those handles are drawable. + $gpuTextureLib drainDeferredTextureOps + dict for {id drawLists} $drawListsByTexture { if {[dict exists $mostRecentDrawListsByTexture $id] && ($drawLists eq $mostRecentDrawListsByTexture($id))} { @@ -424,7 +429,7 @@ When the GPU library is /gpuLib/ &\ foreach ref $acquiredRefs { StatementRelease! $ref } - }} $gpuCanvasLib] + }} $gpuCanvasLib $gpuTextureLib] When /someone/ wishes /p/ has a canvas { Wish $p has a canvas with width 1024 height 1024 settle 3ms diff --git a/builtin-programs/gpu/draw.folk b/builtin-programs/gpu/draw.folk index d48291a4..c59db073 100644 --- a/builtin-programs/gpu/draw.folk +++ b/builtin-programs/gpu/draw.folk @@ -12,6 +12,7 @@ if {[info exists this] && $::tcl_platform(os) eq "darwin"} { When the GPU library is /gpuLib/ &\ the GPU Vulkan handle type definer is /defineVulkanHandleType/ &\ + the GPU texture library is /gpuTextureLib/ &\ the GPU pipeline library is /pipelineLib/ { fn defineVulkanHandleType @@ -59,7 +60,7 @@ $cc code [subst { uint32_t imageIndex; VkSemaphore imageAvailableSemaphore; - VkSemaphore renderFinishedSemaphore; + VkSemaphore* renderFinishedSemaphores; VkFence inFlightFence; [expr { $useGlfw ? "GLFWwindow* window;" : "" }] } DisplayState; @@ -343,7 +344,10 @@ $cc proc initDisplay {char* display uint32_t width uint32_t height uint32_t refr fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; $[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->imageAvailableSemaphore)}] - $[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->renderFinishedSemaphore)}] + ds->renderFinishedSemaphores = calloc(ds->swapchainImageCount, sizeof(VkSemaphore)); + for (uint32_t i = 0; i < ds->swapchainImageCount; i++) { + $[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->renderFinishedSemaphores[i])}] + } $[vktry {vkCreateFence(device, &fenceInfo, NULL, &ds->inFlightFence)}] } @@ -358,7 +362,13 @@ $cc proc drawStart {DisplayState* ds} void { vkResetFences(device, 1, &ds->inFlightFence); - vkAcquireNextImageKHR(device, ds->swapchain, UINT64_MAX, ds->imageAvailableSemaphore, VK_NULL_HANDLE, &ds->imageIndex); + VkResult acquireResult = vkAcquireNextImageKHR(device, ds->swapchain, UINT64_MAX, + ds->imageAvailableSemaphore, VK_NULL_HANDLE, + &ds->imageIndex); + if (acquireResult != VK_SUCCESS && acquireResult != VK_SUBOPTIMAL_KHR) { + FOLK_ERROR("Failed vkAcquireNextImageKHR: %s (%d)\n", + VkResultToString(acquireResult), acquireResult); + } vkResetCommandBuffer(commandBuffer, 0); @@ -445,7 +455,7 @@ $cc proc drawEnd {} void { vkCmdEndRenderPass(commandBuffer); $[vktry {vkEndCommandBuffer(commandBuffer)}] - VkSemaphore signalSemaphores[] = {ds->renderFinishedSemaphore}; + VkSemaphore signalSemaphores[] = {ds->renderFinishedSemaphores[ds->imageIndex]}; { VkSubmitInfo submitInfo = {0}; submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; @@ -466,6 +476,7 @@ $cc proc drawEnd {} void { $[vktry {vkQueueSubmit(*graphicsQueue_ptr(), 1, &submitInfo, ds->inFlightFence)}] pthread_mutex_unlock(graphicsQueueMutex_ptr()); } + { VkPresentInfoKHR presentInfo = {0}; presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; @@ -479,8 +490,12 @@ $cc proc drawEnd {} void { presentInfo.pResults = NULL; pthread_mutex_lock(graphicsQueueMutex_ptr()); - vkQueuePresentKHR(*presentQueue_ptr(), &presentInfo); + VkResult presentResult = vkQueuePresentKHR(*presentQueue_ptr(), &presentInfo); pthread_mutex_unlock(graphicsQueueMutex_ptr()); + if (presentResult != VK_SUCCESS && presentResult != VK_SUBOPTIMAL_KHR) { + FOLK_ERROR("Failed vkQueuePresentKHR: %s (%d)\n", + VkResultToString(presentResult), presentResult); + } } // Wait for this display's submission to complete before we @@ -937,7 +952,8 @@ When /wisher/ wishes the GPU compiles pipeline /name/ /source/ { set missingPipelines [dict create] while true { - # Run prelude handlers once per frame (textures, canvases). + # Advance texture retirement once per frame, then run canvas redraw work. + $gpuTextureLib beginTextureFrame ForEach! /someone/ wishes the GPU runs frame prelude handler /hd/ { {*}$hd } @@ -1004,6 +1020,10 @@ while true { } } + # Make textures published while building the display list drawable + # before we record display command buffers. + $gpuTextureLib drainDeferredTextureOps + # Draw to each active display. dict for {displayName ds} $displays { if {$ds eq "missing"} { continue } diff --git a/builtin-programs/gpu/textures.folk b/builtin-programs/gpu/textures.folk index ab417073..6321c8ac 100644 --- a/builtin-programs/gpu/textures.folk +++ b/builtin-programs/gpu/textures.folk @@ -22,6 +22,8 @@ $gpuc code { VmaAllocator vmaGetAllocator(); } $gpuc include +$gpuc include +$gpuc include $gpuc include $gpuc extend $gpuLib @@ -575,9 +577,8 @@ $gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* { return block; } -# Per-worker reusable texture upload slots. copyImageToGpuTexture waits -# for each upload before publishing the descriptor so a texture can't be -# freed while its upload command buffer is still using the image. +# Per-worker reusable texture upload slots. Staging buffers stay alive +# until their slot is reused and the previous upload fence has signaled. $gpuc code { #define INFLIGHT_UPLOADS 8 struct InflightUpload { @@ -626,6 +627,8 @@ $gpuc code { allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; allocInfo.commandBufferCount = 1; vkAllocateCommandBuffers(device, &allocInfo, &slot->cmdBuffer); + } else { + vkResetCommandBuffer(slot->cmdBuffer, 0); } return slot; } @@ -731,16 +734,6 @@ $gpuc proc copyImageToGpuTexture {Image im} GpuTextureHandle { } upload->inUse = true; - vkWaitForFences(device, 1, &upload->fence, VK_TRUE, UINT64_MAX); -#ifdef TRACY_ENABLE - TracyCFree(upload->stagingBufferAllocation); -#endif - vmaDestroyBuffer(vmaGetAllocator(), - upload->stagingBuffer, upload->stagingBufferAllocation); - upload->stagingBuffer = VK_NULL_HANDLE; - upload->stagingBufferAllocation = NULL; - upload->inUse = false; - addToTextureDescriptorSet(block->handle); return block->handle; @@ -833,11 +826,8 @@ $gpuc code { } } -# Called on the GPU thread between frames, after the previous -# frame's fence has been waited on (GPU is idle). -$gpuc proc processDeferredTextureOps {} void { - textureFrameEpoch++; - +# Called on the GPU thread before recording work that may sample textures. +$gpuc proc drainDeferredTextureOps {} void { pthread_mutex_lock(&deferredQueueMutex); int count = deferredQueueCount; struct DeferredTextureEntry localQueue[DEFERRED_QUEUE_CAP]; @@ -859,6 +849,13 @@ $gpuc proc processDeferredTextureOps {} void { destroyRetiredGpuTextures(); } +# Called once per GPU frame so retired textures age exactly once, +# even though descriptor work may be drained multiple times. +$gpuc proc beginTextureFrame {} void { + textureFrameEpoch++; + drainDeferredTextureOps(); +} + $gpuc proc initPlaceholderTexture {} void { // Set up a placeholder texture in slot 0 that can always be drawn // that we can swap in when textures get invalidated. @@ -884,7 +881,7 @@ $gpuc proc initPlaceholderTexture {} void { // drain the queued DEFERRED_ADD for slot 0 (which is now redundant // but harmless). initializeDescriptorSet(han); - processDeferredTextureOps(); + drainDeferredTextureOps(); } set gpuTextureLib [$gpuc compile] @@ -893,10 +890,6 @@ $gpuTextureLib textureManagerInit Claim the GPU texture library is $gpuTextureLib -Wish the GPU runs frame prelude handler [list apply {{gpuTextureLib} { - $gpuTextureLib processDeferredTextureOps -}} $gpuTextureLib] - When /someone/ wishes the GPU loads image /im/ as texture { set gtex [$gpuTextureLib copyImageToGpuTexture $im] Claim the GPU has loaded image $im as texture $gtex \