Skip to content

Commit aa3a822

Browse files
committed
Add MetalDriver global vertex and fragment data caching
1 parent 7256873 commit aa3a822

1 file changed

Lines changed: 29 additions & 21 deletions

File tree

h3d/impl/MetalDriver.hx

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ private class MetalNative {
115115
public static function set_cull_mode(encoder:Dynamic, cullMode:Int):Void {}
116116
public static function set_triangle_fill_mode(encoder:Dynamic, wireframe:Bool):Void {}
117117
public static function set_vertex_buffer(encoder:Dynamic, buffer:Dynamic, offset:Int, index:Int):Void {}
118+
public static function set_vertex_bytes(encoder:Dynamic, data:hl.Bytes, length:Int, index:Int):Void {}
119+
public static function set_fragment_bytes(encoder:Dynamic, data:hl.Bytes, length:Int, index:Int):Void {}
118120
public static function set_fragment_texture(encoder:Dynamic, texture:Dynamic, index:Int):Void {}
119121
public static function set_fragment_buffer(encoder:Dynamic, buffer:Dynamic, offset:Int, index:Int):Void {}
120122
public static function draw_primitives(encoder:Dynamic, primitiveType:Int, vertexStart:Int, vertexCount:Int):Void {}
@@ -219,6 +221,11 @@ class MetalDriver extends Driver {
219221
static inline var MAX_FRAMES_IN_FLIGHT = 3;
220222
var currentFrameIndex : Int = 0;
221223
var drawCallIndex : Int = 0; // Track draw calls within current frame for buffer offsets
224+
// Cached globals data for per-draw upload (ensures each draw call gets its own buffer slot)
225+
var cachedVertexGlobalsData : hl.Bytes;
226+
var cachedVertexGlobalsBytes : Int = 0;
227+
var cachedFragmentGlobalsData : hl.Bytes;
228+
var cachedFragmentGlobalsBytes : Int = 0;
222229
var defStencil : h3d.mat.Stencil;
223230

224231
// Bindless texturing support
@@ -697,7 +704,7 @@ class MetalDriver extends Driver {
697704
compiled.vertex.paramsSize = shader.vertex.paramsSize;
698705
compiled.vertex.texturesCount = shader.vertex.texturesCount;
699706
if (compiled.vertex.globalsSize > 0) {
700-
compiled.vertex.globals = MetalNative.create_buffer(compiled.vertex.globalsSize << 4, 2);
707+
compiled.vertex.globals = MetalNative.create_buffer((compiled.vertex.globalsSize << 4) * 1024, 2);
701708
}
702709

703710
if (compiled.vertex.paramsSize > 0) {
@@ -755,7 +762,7 @@ class MetalDriver extends Driver {
755762
compiled.fragment.globalsSize = shader.fragment.globalsSize;
756763
compiled.fragment.paramsSize = shader.fragment.paramsSize;
757764
compiled.fragment.texturesCount = shader.fragment.texturesCount;
758-
if (compiled.fragment.globalsSize > 0) compiled.fragment.globals = MetalNative.create_buffer(compiled.fragment.globalsSize << 4, 2);
765+
compiled.fragment.globals = MetalNative.create_buffer((compiled.fragment.globalsSize << 4) * 1024, 2);
759766
if (compiled.fragment.paramsSize > 0) {
760767
// paramsSize is in vec4 units, each vec4 = 16 bytes, so multiply by 16 (shift left 4)
761768
var singleDrawSize = compiled.fragment.paramsSize << 4;
@@ -837,7 +844,7 @@ class MetalDriver extends Driver {
837844
compiled.vertex.texturesCount = shader.compute.texturesCount;
838845

839846
if (compiled.vertex.globalsSize > 0) {
840-
compiled.vertex.globals = MetalNative.create_buffer(compiled.vertex.globalsSize << 4, 2);
847+
compiled.vertex.globals = MetalNative.create_buffer((compiled.vertex.globalsSize << 4) * 1024, 2);
841848
}
842849

843850
if (compiled.vertex.paramsSize > 0) {
@@ -1217,7 +1224,9 @@ class MetalDriver extends Driver {
12171224
var vertexBufferIndex = currentShader.hasPerInstanceInputs ? 2 : 1;
12181225
if (currentShader.vertex != null) {
12191226
if (currentShader.vertex.globals != null) {
1220-
MetalNative.set_vertex_buffer(currentRenderEncoder, currentShader.vertex.globals, 0, vertexBufferIndex);
1227+
var vgOffset = drawCallIndex * (currentShader.vertex.globalsSize << 4);
1228+
if (cachedVertexGlobalsData != null) MetalNative.upload_buffer_data(currentShader.vertex.globals, cachedVertexGlobalsData, cachedVertexGlobalsBytes, vgOffset);
1229+
MetalNative.set_vertex_buffer(currentRenderEncoder, currentShader.vertex.globals, vgOffset, vertexBufferIndex);
12211230
vertexBufferIndex++;
12221231
}
12231232
if (currentShader.vertex.paramsBuffers != null && currentShader.vertex.paramsBuffers.length > 0) {
@@ -1233,7 +1242,9 @@ class MetalDriver extends Driver {
12331242
if (currentShader.fragment != null) {
12341243
var fragmentBufferIndex = 0; // Fragment buffers can start at 0
12351244
if (currentShader.fragment.globals != null) {
1236-
MetalNative.set_fragment_buffer(currentRenderEncoder, currentShader.fragment.globals, 0, fragmentBufferIndex);
1245+
var fgOffset = drawCallIndex * (currentShader.fragment.globalsSize << 4);
1246+
if (cachedFragmentGlobalsData != null) MetalNative.upload_buffer_data(currentShader.fragment.globals, cachedFragmentGlobalsData, cachedFragmentGlobalsBytes, fgOffset);
1247+
MetalNative.set_fragment_buffer(currentRenderEncoder, currentShader.fragment.globals, fgOffset, fragmentBufferIndex);
12371248
fragmentBufferIndex++;
12381249
}
12391250
if (currentShader.fragment.paramsBuffers != null && currentShader.fragment.paramsBuffers.length > 0) {
@@ -1312,7 +1323,9 @@ class MetalDriver extends Driver {
13121323
var vertexBufferIndex = currentShader.hasPerInstanceInputs ? 2 : 1;
13131324
if (currentShader.vertex != null) {
13141325
if (currentShader.vertex.globals != null) {
1315-
MetalNative.set_vertex_buffer(currentRenderEncoder, currentShader.vertex.globals, 0, vertexBufferIndex);
1326+
var vgOffset = drawCallIndex * (currentShader.vertex.globalsSize << 4);
1327+
if (cachedVertexGlobalsData != null) MetalNative.upload_buffer_data(currentShader.vertex.globals, cachedVertexGlobalsData, cachedVertexGlobalsBytes, vgOffset);
1328+
MetalNative.set_vertex_buffer(currentRenderEncoder, currentShader.vertex.globals, vgOffset, vertexBufferIndex);
13161329
vertexBufferIndex++;
13171330
}
13181331
if (currentShader.vertex.paramsBuffers != null && currentShader.vertex.paramsBuffers.length > 0) {
@@ -1327,7 +1340,9 @@ class MetalDriver extends Driver {
13271340
if (currentShader.fragment != null) {
13281341
var fragmentBufferIndex = 0;
13291342
if (currentShader.fragment.globals != null) {
1330-
MetalNative.set_fragment_buffer(currentRenderEncoder, currentShader.fragment.globals, 0, fragmentBufferIndex);
1343+
var fgOffset = drawCallIndex * (currentShader.fragment.globalsSize << 4);
1344+
if (cachedFragmentGlobalsData != null) MetalNative.upload_buffer_data(currentShader.fragment.globals, cachedFragmentGlobalsData, cachedFragmentGlobalsBytes, fgOffset);
1345+
MetalNative.set_fragment_buffer(currentRenderEncoder, currentShader.fragment.globals, fgOffset, fragmentBufferIndex);
13311346
fragmentBufferIndex++;
13321347
}
13331348
if (currentShader.fragment.paramsBuffers != null && currentShader.fragment.paramsBuffers.length > 0) {
@@ -1704,22 +1719,15 @@ class MetalDriver extends Driver {
17041719

17051720
switch (which) {
17061721
case Globals:
1707-
// Upload global uniforms to vertex and fragment shaders
1708-
if (currentShader.vertex != null && currentShader.vertex.globals != null && currentShader.vertex.globalsSize > 0) {
1709-
var data = hl.Bytes.getArray(buffers.vertex.globals.toData());
1710-
if (data != null) {
1711-
var bytes = currentShader.vertex.globalsSize << 4; // Size in vec4s, convert to bytes (16 bytes per vec4)
1712-
MetalNative.upload_buffer_data(currentShader.vertex.globals, data, bytes, 0);
1713-
}
1722+
// Cache globals data; actual upload happens in draw() at drawCallIndex offset
1723+
if (currentShader.vertex != null && currentShader.vertex.globalsSize > 0) {
1724+
cachedVertexGlobalsData = hl.Bytes.getArray(buffers.vertex.globals.toData());
1725+
cachedVertexGlobalsBytes = currentShader.vertex.globalsSize << 4;
17141726
}
1715-
if (currentShader.fragment != null && currentShader.fragment.globals != null && currentShader.fragment.globalsSize > 0) {
1716-
var data = hl.Bytes.getArray(buffers.fragment.globals.toData());
1717-
if (data != null) {
1718-
var bytes = currentShader.fragment.globalsSize << 4;
1719-
MetalNative.upload_buffer_data(currentShader.fragment.globals, data, bytes, 0);
1720-
}
1727+
if (currentShader.fragment != null && currentShader.fragment.globalsSize > 0) {
1728+
cachedFragmentGlobalsData = hl.Bytes.getArray(buffers.fragment.globals.toData());
1729+
cachedFragmentGlobalsBytes = currentShader.fragment.globalsSize << 4;
17211730
}
1722-
17231731
case Params:
17241732
// Upload shader parameters to vertex and fragment shaders using triple buffering with per-draw-call offsets
17251733
if (currentShader.vertex != null && currentShader.vertex.paramsBuffers != null && currentShader.vertex.paramsSize > 0) {

0 commit comments

Comments
 (0)