Skip to content

Commit 62ccdc6

Browse files
Metal: Support vertex stride 0 and null buffers, and fix resource release race (#38)
* Fix race on resource release + fixes for unleashed * Add nullBuffer and stride=0 support * Use array instead of vector for vertex slot tracking. * Remove extra comment * Fix slotUsed initialization --------- Co-authored-by: tanmaysachan <tnmysachan@gmail.com>
1 parent c2bdf2d commit 62ccdc6

2 files changed

Lines changed: 40 additions & 13 deletions

File tree

plume_metal.cpp

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@ namespace plume {
2626
static constexpr size_t PUSH_CONSTANT_MAX_INDEX = 15;
2727
static constexpr size_t VERTEX_BUFFER_MAX_INDEX = 30;
2828

29-
static constexpr uint32_t COLOR_COUNT = DESCRIPTOR_SET_MAX_INDEX;
30-
static constexpr uint32_t DEPTH_INDEX = COLOR_COUNT;
31-
static constexpr uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
32-
static constexpr uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
33-
3429
// MARK: - Prototypes
3530

3631
MTL::PixelFormat mapPixelFormat(RenderFormat format);
@@ -1440,9 +1435,17 @@ namespace plume {
14401435
// Set index right after push constants, clamp at Metal's limit of 31
14411436
const uint32_t vertexBufferIndex = std::min(PUSH_CONSTANT_MAX_INDEX + 1 + inputSlot.index, VERTEX_BUFFER_MAX_INDEX);
14421437
MTL::VertexBufferLayoutDescriptor *layout = vertexDescriptor->layouts()->object(vertexBufferIndex);
1443-
layout->setStride(inputSlot.stride);
1444-
layout->setStepFunction(mapVertexStepFunction(inputSlot.classification));
1445-
layout->setStepRate((layout->stepFunction() == MTL::VertexStepFunctionPerInstance) ? inputSlot.stride : 1);
1438+
if (inputSlot.stride == 0) {
1439+
// Metal does not support stride 0, we must provide a
1440+
// substitute "null" buffer to match behaviour of other robust APIs.
1441+
layout->setStride(1);
1442+
layout->setStepFunction(MTL::VertexStepFunctionConstant);
1443+
layout->setStepRate(0);
1444+
} else {
1445+
layout->setStride(inputSlot.stride);
1446+
layout->setStepFunction(mapVertexStepFunction(inputSlot.classification));
1447+
layout->setStepRate((layout->stepFunction() == MTL::VertexStepFunctionPerInstance) ? inputSlot.stride : 1);
1448+
}
14461449
}
14471450

14481451
for (uint32_t i = 0; i < desc.inputElementsCount; i++) {
@@ -1610,6 +1613,10 @@ namespace plume {
16101613
}
16111614

16121615
MetalDescriptorSet::~MetalDescriptorSet() {
1616+
for (const auto resource: toReleaseOnDestruction) {
1617+
resource->release();
1618+
}
1619+
16131620
for (const auto &entry : resourceEntries) {
16141621
if (entry.resource != nullptr) {
16151622
entry.resource->release();
@@ -1705,8 +1712,7 @@ namespace plume {
17051712

17061713
if (dtype != MTL::DataTypeSampler) {
17071714
if (resourceEntries[descriptorIndex].resource != nullptr) {
1708-
resourceEntries[descriptorIndex].resource->release();
1709-
resourceEntries[descriptorIndex].resource = nullptr;
1715+
toReleaseOnDestruction.push_back(resourceEntries[descriptorIndex].resource);
17101716
}
17111717
}
17121718

@@ -2216,7 +2222,7 @@ namespace plume {
22162222
pushConstants[rangeIndex].binding = range.binding;
22172223
pushConstants[rangeIndex].set = range.set;
22182224
pushConstants[rangeIndex].offset = range.offset;
2219-
pushConstants[rangeIndex].size = range.size;
2225+
pushConstants[rangeIndex].size = alignUp(range.size);
22202226
pushConstants[rangeIndex].stageFlags = range.stageFlags;
22212227

22222228
dirtyComputeState.pushConstants = 1;
@@ -2267,7 +2273,7 @@ namespace plume {
22672273
pushConstants[rangeIndex].binding = range.binding;
22682274
pushConstants[rangeIndex].set = range.set;
22692275
pushConstants[rangeIndex].offset = range.offset;
2270-
pushConstants[rangeIndex].size = range.size;
2276+
pushConstants[rangeIndex].size = alignUp(range.size);
22712277
pushConstants[rangeIndex].stageFlags = range.stageFlags;
22722278

22732279
dirtyGraphicsState.pushConstants = 1;
@@ -2322,9 +2328,14 @@ namespace plume {
23222328
// Check for changes in bindings
23232329
for (uint32_t i = 0; i < viewCount; i++) {
23242330
const MetalBuffer* interfaceBuffer = static_cast<const MetalBuffer*>(views[i].buffer.ref);
2325-
const uint64_t newOffset = views[i].buffer.offset;
2331+
uint64_t newOffset = views[i].buffer.offset;
23262332
const uint32_t newIndex = startSlot + i;
23272333

2334+
if (interfaceBuffer == nullptr) {
2335+
interfaceBuffer = static_cast<const MetalBuffer*>(queue->device->nullBuffer.get());
2336+
newOffset = 0;
2337+
}
2338+
23282339
vertexBuffers[i] = interfaceBuffer->mtl;
23292340
vertexBufferOffsets[i] = newOffset;
23302341
vertexBufferIndices[i] = newIndex;
@@ -2962,10 +2973,21 @@ namespace plume {
29622973
}
29632974

29642975
if (dirtyGraphicsState.vertexBuffers) {
2976+
std::array<bool, 31> slotUsed{};
2977+
29652978
for (uint32_t i = 0; i < viewCount; i++) {
29662979
// Bind right after the push constants, up till the max vertex buffer index
29672980
const uint32_t bindIndex = std::min(PUSH_CONSTANT_MAX_INDEX + 1 + vertexBufferIndices[i], VERTEX_BUFFER_MAX_INDEX);
29682981
activeRenderEncoder->setVertexBuffer(vertexBuffers[i], vertexBufferOffsets[i], bindIndex);
2982+
slotUsed[bindIndex] = true;
2983+
}
2984+
2985+
// Bind all unbound slots to the null buffer
2986+
auto nullBuffer = static_cast<const MetalBuffer*>(queue->device->nullBuffer.get());
2987+
for (uint32_t i = PUSH_CONSTANT_MAX_INDEX + 1; i <= VERTEX_BUFFER_MAX_INDEX; i++) {
2988+
if (!slotUsed[i]) {
2989+
activeRenderEncoder->setVertexBuffer(nullBuffer->mtl, 0, i);
2990+
}
29692991
}
29702992

29712993
stateCache.lastVertexBuffers = vertexBuffers;
@@ -3262,6 +3284,8 @@ namespace plume {
32623284
capabilities.dynamicDepthBias = true;
32633285
capabilities.uma = mtl->hasUnifiedMemory();
32643286
capabilities.queryPools = false;
3287+
3288+
nullBuffer = createBuffer(RenderBufferDesc::DefaultBuffer(16, RenderBufferFlag::VERTEX));
32653289
}
32663290

32673291
MetalDevice::~MetalDevice() {

plume_metal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ namespace plume {
209209
std::vector<Descriptor> descriptors;
210210
MetalArgumentBuffer argumentBuffer;
211211
std::vector<ResourceEntry> resourceEntries;
212+
std::vector<MTL::Resource *> toReleaseOnDestruction;
212213

213214
MetalDescriptorSet(MetalDevice *device, const RenderDescriptorSetDesc &desc);
214215
MetalDescriptorSet(MetalDevice *device, uint32_t entryCount);
@@ -608,6 +609,8 @@ namespace plume {
608609
// Blit functionality
609610
MTL::BlitPassDescriptor *sharedBlitDescriptor = nullptr;
610611

612+
std::unique_ptr<RenderBuffer> nullBuffer;
613+
611614
explicit MetalDevice(MetalInterface *renderInterface, const std::string &preferredDeviceName);
612615
~MetalDevice() override;
613616
std::unique_ptr<RenderDescriptorSet> createDescriptorSet(const RenderDescriptorSetDesc &desc) override;

0 commit comments

Comments
 (0)