Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions builtin-programs/gpu/canvases.folk
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ When the GPU library is /gpuLib/ &\
with settle $settle
}

Wish the GPU runs frame prelude handler [list apply {{gpuCanvasLib} {
Wish the GPU runs frame prelude handler [list apply {{gpuCanvasLib gpuTextureLib} {
upvar missingPipelines missingPipelines
upvar mostRecentDrawListsByTexture mostRecentDrawListsByTexture
if {![info exists missingCanvases]} {
Expand Down Expand Up @@ -400,6 +400,11 @@ When the GPU library is /gpuLib/ &\
} }
}

# Image texture wishes can publish draw commands while their
# descriptor writes are still queued. Drain before recording
# canvas command buffers so those handles are drawable.
$gpuTextureLib drainDeferredTextureOps

dict for {id drawLists} $drawListsByTexture {
if {[dict exists $mostRecentDrawListsByTexture $id] &&
($drawLists eq $mostRecentDrawListsByTexture($id))} {
Expand All @@ -424,7 +429,7 @@ When the GPU library is /gpuLib/ &\
foreach ref $acquiredRefs {
StatementRelease! $ref
}
}} $gpuCanvasLib]
}} $gpuCanvasLib $gpuTextureLib]

When /someone/ wishes /p/ has a canvas {
Wish $p has a canvas with width 1024 height 1024 settle 3ms
Expand Down
32 changes: 26 additions & 6 deletions builtin-programs/gpu/draw.folk
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if {[info exists this] && $::tcl_platform(os) eq "darwin"} {

When the GPU library is /gpuLib/ &\
the GPU Vulkan handle type definer is /defineVulkanHandleType/ &\
the GPU texture library is /gpuTextureLib/ &\
the GPU pipeline library is /pipelineLib/ {

fn defineVulkanHandleType
Expand Down Expand Up @@ -59,7 +60,7 @@ $cc code [subst {
uint32_t imageIndex;

VkSemaphore imageAvailableSemaphore;
VkSemaphore renderFinishedSemaphore;
VkSemaphore* renderFinishedSemaphores;
VkFence inFlightFence;
[expr { $useGlfw ? "GLFWwindow* window;" : "" }]
} DisplayState;
Expand Down Expand Up @@ -343,7 +344,10 @@ $cc proc initDisplay {char* display uint32_t width uint32_t height uint32_t refr
fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT;

$[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->imageAvailableSemaphore)}]
$[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->renderFinishedSemaphore)}]
ds->renderFinishedSemaphores = calloc(ds->swapchainImageCount, sizeof(VkSemaphore));
for (uint32_t i = 0; i < ds->swapchainImageCount; i++) {
$[vktry {vkCreateSemaphore(device, &semaphoreInfo, NULL, &ds->renderFinishedSemaphores[i])}]
}
$[vktry {vkCreateFence(device, &fenceInfo, NULL, &ds->inFlightFence)}]
}

Expand All @@ -358,7 +362,13 @@ $cc proc drawStart {DisplayState* ds} void {

vkResetFences(device, 1, &ds->inFlightFence);

vkAcquireNextImageKHR(device, ds->swapchain, UINT64_MAX, ds->imageAvailableSemaphore, VK_NULL_HANDLE, &ds->imageIndex);
VkResult acquireResult = vkAcquireNextImageKHR(device, ds->swapchain, UINT64_MAX,
ds->imageAvailableSemaphore, VK_NULL_HANDLE,
&ds->imageIndex);
if (acquireResult != VK_SUCCESS && acquireResult != VK_SUBOPTIMAL_KHR) {
FOLK_ERROR("Failed vkAcquireNextImageKHR: %s (%d)\n",
VkResultToString(acquireResult), acquireResult);
}

vkResetCommandBuffer(commandBuffer, 0);

Expand Down Expand Up @@ -445,7 +455,7 @@ $cc proc drawEnd {} void {
vkCmdEndRenderPass(commandBuffer);
$[vktry {vkEndCommandBuffer(commandBuffer)}]

VkSemaphore signalSemaphores[] = {ds->renderFinishedSemaphore};
VkSemaphore signalSemaphores[] = {ds->renderFinishedSemaphores[ds->imageIndex]};
{
VkSubmitInfo submitInfo = {0};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
Expand All @@ -466,6 +476,7 @@ $cc proc drawEnd {} void {
$[vktry {vkQueueSubmit(*graphicsQueue_ptr(), 1, &submitInfo, ds->inFlightFence)}]
pthread_mutex_unlock(graphicsQueueMutex_ptr());
}

{
VkPresentInfoKHR presentInfo = {0};
presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
Expand All @@ -479,8 +490,12 @@ $cc proc drawEnd {} void {
presentInfo.pResults = NULL;

pthread_mutex_lock(graphicsQueueMutex_ptr());
vkQueuePresentKHR(*presentQueue_ptr(), &presentInfo);
VkResult presentResult = vkQueuePresentKHR(*presentQueue_ptr(), &presentInfo);
pthread_mutex_unlock(graphicsQueueMutex_ptr());
if (presentResult != VK_SUCCESS && presentResult != VK_SUBOPTIMAL_KHR) {
FOLK_ERROR("Failed vkQueuePresentKHR: %s (%d)\n",
VkResultToString(presentResult), presentResult);
}
}

// Wait for this display's submission to complete before we
Expand Down Expand Up @@ -937,7 +952,8 @@ When /wisher/ wishes the GPU compiles pipeline /name/ /source/ {

set missingPipelines [dict create]
while true {
# Run prelude handlers once per frame (textures, canvases).
# Advance texture retirement once per frame, then run canvas redraw work.
$gpuTextureLib beginTextureFrame
ForEach! /someone/ wishes the GPU runs frame prelude handler /hd/ {
{*}$hd
}
Expand Down Expand Up @@ -1004,6 +1020,10 @@ while true {
}
}

# Make textures published while building the display list drawable
# before we record display command buffers.
$gpuTextureLib drainDeferredTextureOps

# Draw to each active display.
dict for {displayName ds} $displays {
if {$ds eq "missing"} { continue }
Expand Down
74 changes: 56 additions & 18 deletions builtin-programs/gpu/textures.folk
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ $gpuc code {
VmaAllocator vmaGetAllocator();
}
$gpuc include <pthread.h>
$gpuc include <stdlib.h>
$gpuc include <string.h>
$gpuc include <stdatomic.h>

$gpuc extend $gpuLib
Expand Down Expand Up @@ -87,9 +89,11 @@ defineVulkanHandleType $gpuc VkImageView
defineVulkanHandleType $gpuc VkSampler
$gpuc struct GpuTextureBlock {
bool _Atomic alive;
bool _Atomic retiring;

int width;
int height;
int retireAfterFrame;

GpuTextureHandle handle;

Expand Down Expand Up @@ -118,6 +122,9 @@ $gpuc code {
struct DeferredTextureEntry deferredQueue[DEFERRED_QUEUE_CAP];
int _Atomic deferredQueueCount = 0;
pthread_mutex_t deferredQueueMutex = PTHREAD_MUTEX_INITIALIZER;

#define TEXTURE_RETIRE_GRACE_FRAMES 2
int textureFrameEpoch = 0;
}
$gpuc proc textureManagerInit {} void {
$[vktry volkInitialize()]
Expand Down Expand Up @@ -520,6 +527,8 @@ $gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* {

block->width = width;
block->height = height;
block->retiring = false;
block->retireAfterFrame = 0;

createImage(width, height,
(VkFormat) format, VK_IMAGE_TILING_OPTIMAL,
Expand Down Expand Up @@ -568,13 +577,8 @@ $gpuc proc createGpuTexture {int width int height int format} GpuTextureBlock* {
return block;
}

# In-flight texture upload ring, per worker thread. Lets
# copyImageToGpuTexture submit a staging-buffer->image copy without
# waiting for it to complete; the staging buffer is reclaimed lazily
# on a later call once the GPU is done with it. We only block when
# the ring fills up (i.e. we've pipelined enough uploads that the
# oldest one still isn't done), which is the natural backpressure
# point for a single worker.
# Per-worker reusable texture upload slots. Staging buffers stay alive
# until their slot is reused and the previous upload fence has signaled.
$gpuc code {
#define INFLIGHT_UPLOADS 8
struct InflightUpload {
Expand Down Expand Up @@ -605,6 +609,8 @@ $gpuc code {
#endif
vmaDestroyBuffer(vmaGetAllocator(),
slot->stagingBuffer, slot->stagingBufferAllocation);
slot->stagingBuffer = VK_NULL_HANDLE;
slot->stagingBufferAllocation = NULL;
slot->inUse = false;
}
if (slot->fence == VK_NULL_HANDLE) {
Expand All @@ -621,6 +627,8 @@ $gpuc code {
allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
allocInfo.commandBufferCount = 1;
vkAllocateCommandBuffers(device, &allocInfo, &slot->cmdBuffer);
} else {
vkResetCommandBuffer(slot->cmdBuffer, 0);
}
return slot;
}
Expand Down Expand Up @@ -757,8 +765,17 @@ $gpuc proc freeGpuTexture {GpuTextureHandle gim} void {
# Actually destroy a texture's GPU resources. Must only be called
# on the GPU thread between frames when the GPU is idle.
$gpuc code {
static void retireGpuTexture(GpuTextureHandle gim) {
GpuTextureBlock* block = &gpuTextures[gim];
if (gim == 0 || !block->alive || block->retiring) return;

block->retiring = true;
block->retireAfterFrame = textureFrameEpoch + TEXTURE_RETIRE_GRACE_FRAMES;
}

static void destroyGpuTextureResources(GpuTextureHandle gim) {
GpuTextureBlock* block = &gpuTextures[gim];
if (gim == 0 || !block->alive) return;

// Point this descriptor slot at the placeholder texture (slot 0)
// so later frames don't reference a destroyed image.
Expand All @@ -783,18 +800,34 @@ $gpuc code {
#ifdef TRACY_ENABLE
TracyCFree(block->textureImageAllocation);
#endif
vmaDestroyImage(vmaGetAllocator(), block->textureImage, block->textureImageAllocation);
vkDestroySampler(device, block->textureSampler, NULL);
vkDestroyImageView(device, block->textureImageView, NULL);
vmaDestroyImage(vmaGetAllocator(), block->textureImage, block->textureImageAllocation);

free(block->description);
block->description = NULL;
block->textureImage = VK_NULL_HANDLE;
block->textureImageAllocation = NULL;
block->textureImageView = VK_NULL_HANDLE;
block->textureSampler = VK_NULL_HANDLE;
block->retiring = false;
block->retireAfterFrame = 0;
block->alive = false;
}

static void destroyRetiredGpuTextures() {
for (GpuTextureHandle gim = 1; gim < getMaxTextures(); gim++) {
GpuTextureBlock* block = &gpuTextures[gim];
if (block->alive && block->retiring &&
block->retireAfterFrame <= textureFrameEpoch) {
destroyGpuTextureResources(gim);
}
}
}
}

# Called on the GPU thread between frames, after the previous
# frame's fence has been waited on (GPU is idle).
$gpuc proc processDeferredTextureOps {} void {
# Called on the GPU thread before recording work that may sample textures.
$gpuc proc drainDeferredTextureOps {} void {
pthread_mutex_lock(&deferredQueueMutex);
int count = deferredQueueCount;
struct DeferredTextureEntry localQueue[DEFERRED_QUEUE_CAP];
Expand All @@ -808,10 +841,19 @@ $gpuc proc processDeferredTextureOps {} void {
writeTextureDescriptor(localQueue[i].handle);
break;
case DEFERRED_FREE:
destroyGpuTextureResources(localQueue[i].handle);
retireGpuTexture(localQueue[i].handle);
break;
}
}

destroyRetiredGpuTextures();
}

# Called once per GPU frame so retired textures age exactly once,
# even though descriptor work may be drained multiple times.
$gpuc proc beginTextureFrame {} void {
textureFrameEpoch++;
drainDeferredTextureOps();
}

$gpuc proc initPlaceholderTexture {} void {
Expand All @@ -829,7 +871,7 @@ $gpuc proc initPlaceholderTexture {} void {
debugIm.data[i+0] = 255;
debugIm.data[i+1] = 0;
debugIm.data[i+2] = 255;
debugIm.data[i+3] = 0;
debugIm.data[i+3] = 255;
}
}
GpuTextureHandle han = copyImageToGpuTexture(debugIm);
Expand All @@ -839,7 +881,7 @@ $gpuc proc initPlaceholderTexture {} void {
// drain the queued DEFERRED_ADD for slot 0 (which is now redundant
// but harmless).
initializeDescriptorSet(han);
processDeferredTextureOps();
drainDeferredTextureOps();
}

set gpuTextureLib [$gpuc compile]
Expand All @@ -848,10 +890,6 @@ $gpuTextureLib textureManagerInit

Claim the GPU texture library is $gpuTextureLib

Wish the GPU runs frame prelude handler [list apply {{gpuTextureLib} {
$gpuTextureLib processDeferredTextureOps
}} $gpuTextureLib]

When /someone/ wishes the GPU loads image /im/ as texture {
set gtex [$gpuTextureLib copyImageToGpuTexture $im]
Claim the GPU has loaded image $im as texture $gtex \
Expand Down