From 81ffe74c6902b313fd73d21852ea58449a90544e Mon Sep 17 00:00:00 2001 From: MaxNiftyNine <83783498+MaxNiftyNine@users.noreply.github.com> Date: Sun, 22 Feb 2026 16:41:15 -0500 Subject: [PATCH] Compute Shader Support --- include/gx2/draw.h | 18 ++++++ include/gx2/shaders.h | 90 +++++++++++++++++++++++++++++ include/gx2/texture.h | 4 ++ libraries/libgfd/include/gfd.h | 17 ++++++ libraries/libgfd/src/gfd.c | 6 +- libraries/libwhb/include/whb/gfx.h | 7 +++ libraries/libwhb/src/gfx_shader.c | 92 ++++++++++++++++++++++++++++++ 7 files changed, 232 insertions(+), 2 deletions(-) mode change 100644 => 100755 include/gx2/draw.h mode change 100644 => 100755 include/gx2/shaders.h mode change 100644 => 100755 include/gx2/texture.h mode change 100644 => 100755 libraries/libgfd/include/gfd.h mode change 100644 => 100755 libraries/libgfd/src/gfd.c mode change 100644 => 100755 libraries/libwhb/include/whb/gfx.h mode change 100644 => 100755 libraries/libwhb/src/gfx_shader.c diff --git a/include/gx2/draw.h b/include/gx2/draw.h old mode 100644 new mode 100755 index a1c96da67..66f0747ed --- a/include/gx2/draw.h +++ b/include/gx2/draw.h @@ -12,6 +12,21 @@ extern "C" { #endif +typedef struct GX2DispatchParams GX2DispatchParams; + +struct GX2DispatchParams +{ + uint32_t numGroupsX; + uint32_t numGroupsY; + uint32_t numGroupsZ; + uint32_t _padding; +}; +WUT_CHECK_OFFSET(GX2DispatchParams, 0x00, numGroupsX); +WUT_CHECK_OFFSET(GX2DispatchParams, 0x04, numGroupsY); +WUT_CHECK_OFFSET(GX2DispatchParams, 0x08, numGroupsZ); +WUT_CHECK_OFFSET(GX2DispatchParams, 0x0C, _padding); +WUT_CHECK_SIZE(GX2DispatchParams, 0x10); + void GX2SetAttribBuffer(uint32_t index, uint32_t size, @@ -59,6 +74,9 @@ GX2DrawIndexedImmediateEx(GX2PrimitiveMode mode, void GX2SetPrimitiveRestartIndex(uint32_t index); +void +GX2DispatchCompute(GX2DispatchParams *dispatchParams); + #ifdef __cplusplus } #endif diff --git a/include/gx2/shaders.h b/include/gx2/shaders.h old mode 100644 new mode 100755 index c27d03a96..f8adb9c08 --- a/include/gx2/shaders.h +++ b/include/gx2/shaders.h @@ -18,6 +18,7 @@ extern "C" { typedef struct GX2AttribVar GX2AttribVar; typedef struct GX2AttribStream GX2AttribStream; typedef struct GX2FetchShader GX2FetchShader; +typedef struct GX2ComputeShader GX2ComputeShader; typedef struct GX2GeometryShader GX2GeometryShader; typedef struct GX2LoopVar GX2LoopVar; typedef struct GX2PixelShader GX2PixelShader; @@ -336,6 +337,57 @@ WUT_CHECK_OFFSET(GX2GeometryShader, 0x90, streamOutStride); WUT_CHECK_OFFSET(GX2GeometryShader, 0xA0, gx2rBuffer); WUT_CHECK_SIZE(GX2GeometryShader, 0xB0); +struct GX2ComputeShader +{ + uint32_t regs[12]; + + uint32_t size; + void *program; + + uint32_t uniformBlockCount; + GX2UniformBlock *uniformBlocks; + + uint32_t uniformVarCount; + GX2UniformVar *uniformVars; + + uint32_t initialValueCount; + GX2UniformInitialValue *initialValues; + + uint32_t loopVarCount; + GX2LoopVar *loopVars; + + uint32_t samplerVarCount; + GX2SamplerVar *samplerVars; + + uint32_t workgroupSizeX; + uint32_t workgroupSizeY; + uint32_t workgroupSizeZ; + BOOL over64Mode; + uint32_t numWavesPerSimd; + + GX2RBuffer gx2rBuffer; +}; +WUT_CHECK_OFFSET(GX2ComputeShader, 0x00, regs); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x30, size); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x34, program); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x38, uniformBlockCount); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x3C, uniformBlocks); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x40, uniformVarCount); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x44, uniformVars); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x48, initialValueCount); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x4C, initialValues); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x50, loopVarCount); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x54, loopVars); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x58, samplerVarCount); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x5C, samplerVars); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x60, workgroupSizeX); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x64, workgroupSizeY); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x68, workgroupSizeZ); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x6C, over64Mode); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x70, numWavesPerSimd); +WUT_CHECK_OFFSET(GX2ComputeShader, 0x74, gx2rBuffer); +WUT_CHECK_SIZE(GX2ComputeShader, 0x84); + struct GX2AttribStream { uint32_t location; @@ -388,6 +440,9 @@ GX2SetPixelShader(const GX2PixelShader *shader); void GX2SetGeometryShader(const GX2GeometryShader *shader); +void +GX2SetComputeShader(const GX2ComputeShader *shader); + void GX2SetVertexSampler(const GX2Sampler *sampler, uint32_t id); @@ -400,6 +455,10 @@ void GX2SetGeometrySampler(const GX2Sampler *sampler, uint32_t id); +void +GX2SetComputeSampler(const GX2Sampler *sampler, + uint32_t id); + void GX2SetVertexUniformReg(uint32_t offset, uint32_t count, @@ -425,6 +484,11 @@ GX2SetGeometryUniformBlock(uint32_t location, uint32_t size, const void *data); +void +GX2SetComputeUniformBlock(uint32_t location, + uint32_t size, + const void *data); + void GX2SetShaderModeEx(GX2ShaderMode mode, uint32_t numVsGpr, @@ -502,6 +566,19 @@ GX2GetVertexUniformBlock(const GX2VertexShader *shader, return NULL; } +static inline GX2UniformBlock * +GX2GetComputeUniformBlock(const GX2ComputeShader *shader, + const char *name) +{ + for (uint32_t i = 0; i < shader->uniformBlockCount; ++i) { + if (strcmp(name, shader->uniformBlocks[i].name) == 0) { + return &shader->uniformBlocks[i]; + } + } + + return NULL; +} + static inline GX2UniformVar * GX2GetGeometryUniformVar(const GX2GeometryShader *shader, const char *name) @@ -541,6 +618,19 @@ GX2GetVertexUniformVar(const GX2VertexShader *shader, return NULL; } +static inline GX2UniformVar * +GX2GetComputeUniformVar(const GX2ComputeShader *shader, + const char *name) +{ + for (uint32_t i = 0; i < shader->uniformVarCount; ++i) { + if (strcmp(name, shader->uniformVars[i].name) == 0) { + return &shader->uniformVars[i]; + } + } + + return NULL; +} + static inline void GX2SetShaderMode(GX2ShaderMode mode) { diff --git a/include/gx2/texture.h b/include/gx2/texture.h old mode 100644 new mode 100755 index 978d9d16b..bf9c85e9f --- a/include/gx2/texture.h +++ b/include/gx2/texture.h @@ -49,6 +49,10 @@ void GX2SetGeometryTexture(const GX2Texture *texture, uint32_t unit); +void +GX2SetComputeTexture(const GX2Texture *texture, + uint32_t unit); + #ifdef __cplusplus } #endif diff --git a/libraries/libgfd/include/gfd.h b/libraries/libgfd/include/gfd.h old mode 100644 new mode 100755 index 046cacd39..072bab59b --- a/libraries/libgfd/include/gfd.h +++ b/libraries/libgfd/include/gfd.h @@ -111,6 +111,23 @@ WUT_CHECK_SIZE(GFDRelocationHeader, 0x28); char * GFDGetLastErrorString(); +uint32_t +GFDGetComputeShaderCount(const void *file); + +uint32_t +GFDGetComputeShaderHeaderSize(uint32_t index, + const void *file); + +uint32_t +GFDGetComputeShaderProgramSize(uint32_t index, + const void *file); + +BOOL +GFDGetComputeShader(GX2ComputeShader *shader, + void *program, + uint32_t index, + const void *file); + uint32_t GFDGetGeometryShaderCount(const void *file); diff --git a/libraries/libgfd/src/gfd.c b/libraries/libgfd/src/gfd.c old mode 100644 new mode 100755 index faa1003eb..5197f205e --- a/libraries/libgfd/src/gfd.c +++ b/libraries/libgfd/src/gfd.c @@ -559,13 +559,16 @@ GFDGetComputeShaderProgramSize(uint32_t index, file); } -/* BOOL GFDGetComputeShader(GX2ComputeShader *shader, void *program, uint32_t index, const void *file) { + if (!_GFDCheckShaderAlign(program)) { + return FALSE; + } + return _GFDGetGenericBlock(GFD_BLOCK_COMPUTE_SHADER_HEADER, shader, GFD_BLOCK_COMPUTE_SHADER_PROGRAM, @@ -577,7 +580,6 @@ GFDGetComputeShader(GX2ComputeShader *shader, index, file); } -*/ uint32_t GFDGetGeometryShaderCount(const void *file) diff --git a/libraries/libwhb/include/whb/gfx.h b/libraries/libwhb/include/whb/gfx.h old mode 100644 new mode 100755 index 7cde90e97..337c962ae --- a/libraries/libwhb/include/whb/gfx.h +++ b/libraries/libwhb/include/whb/gfx.h @@ -67,6 +67,13 @@ WHBGfxLoadGFDVertexShader(uint32_t index, BOOL WHBGfxFreeVertexShader(GX2VertexShader *shader); +GX2ComputeShader * +WHBGfxLoadGFDComputeShader(uint32_t index, + const void *file); + +BOOL +WHBGfxFreeComputeShader(GX2ComputeShader *shader); + BOOL WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group, uint32_t index, diff --git a/libraries/libwhb/src/gfx_shader.c b/libraries/libwhb/src/gfx_shader.c old mode 100644 new mode 100755 index 0aef9349a..f301c0c91 --- a/libraries/libwhb/src/gfx_shader.c +++ b/libraries/libwhb/src/gfx_shader.c @@ -192,6 +192,98 @@ WHBGfxFreeVertexShader(GX2VertexShader *shader) return TRUE; } +GX2ComputeShader * +WHBGfxLoadGFDComputeShader(uint32_t index, + const void *file) +{ + uint32_t headerSize, programSize; + GX2ComputeShader *shader = NULL; + void *program = NULL; + + if (index >= GFDGetComputeShaderCount(file)) { + WHBLogPrintf("%s: index %u >= %u GFDGetComputeShaderCount(file)", + __FUNCTION__, + index, + GFDGetComputeShaderCount(file)); + goto error; + } + + headerSize = GFDGetComputeShaderHeaderSize(index, file); + if (!headerSize) { + WHBLogPrintf("%s: headerSize == 0", __FUNCTION__); + goto error; + } + + programSize = GFDGetComputeShaderProgramSize(index, file); + if (!programSize) { + WHBLogPrintf("%s: programSize == 0", __FUNCTION__); + goto error; + } + + shader = (GX2ComputeShader *)GfxHeapAllocMEM2(headerSize, 64); + if (!shader) { + WHBLogPrintf("%s: GfxHeapAllocMEM2(%u, 64) failed", __FUNCTION__, + headerSize); + goto error; + } + + shader->gx2rBuffer.flags = GX2R_RESOURCE_BIND_SHADER_PROGRAM | + GX2R_RESOURCE_USAGE_CPU_READ | + GX2R_RESOURCE_USAGE_CPU_WRITE | + GX2R_RESOURCE_USAGE_GPU_READ; + shader->gx2rBuffer.elemSize = programSize; + shader->gx2rBuffer.elemCount = 1; + shader->gx2rBuffer.buffer = NULL; + if (!GX2RCreateBuffer(&shader->gx2rBuffer)) { + WHBLogPrintf("%s: GX2RCreateBuffer failed with programSize = %u", + __FUNCTION__, programSize); + goto error; + } + + program = GX2RLockBufferEx(&shader->gx2rBuffer, 0); + if (!program) { + WHBLogPrintf("%s: GX2RLockBufferEx failed", __FUNCTION__); + goto error; + } + + if (!GFDGetComputeShader(shader, program, index, file)) { + WHBLogPrintf("%s: GFDGetComputeShader failed", __FUNCTION__); + GX2RUnlockBufferEx(&shader->gx2rBuffer, + GX2R_RESOURCE_DISABLE_CPU_INVALIDATE | + GX2R_RESOURCE_DISABLE_GPU_INVALIDATE); + goto error; + } + + GX2RUnlockBufferEx(&shader->gx2rBuffer, 0); + + // For some reason we still need to manually invalidate the buffers, + // even though GX2RUnlockBuffer SHOULD be doing that for us + GX2Invalidate(GX2_INVALIDATE_MODE_CPU_SHADER, shader->program, shader->size); + return shader; + +error: + if (shader) { + if (shader->gx2rBuffer.buffer) { + GX2RDestroyBufferEx(&shader->gx2rBuffer, 0); + } + + GfxHeapFreeMEM2(shader); + } + + return NULL; +} + +BOOL +WHBGfxFreeComputeShader(GX2ComputeShader *shader) +{ + if (shader->gx2rBuffer.buffer) { + GX2RDestroyBufferEx(&shader->gx2rBuffer, 0); + } + + GfxHeapFreeMEM2(shader); + return TRUE; +} + BOOL WHBGfxLoadGFDShaderGroup(WHBGfxShaderGroup *group, uint32_t index,