diff --git a/code/ddsutils/ddsutils.cpp b/code/ddsutils/ddsutils.cpp index 3f42e532dcc..3b4a4dbd574 100644 --- a/code/ddsutils/ddsutils.cpp +++ b/code/ddsutils/ddsutils.cpp @@ -223,7 +223,7 @@ static size_t compute_dds_size(const DDS_HEADER &dds_header, bool converting = f if (dds_header.ddspf.dwFlags & DDPF_FOURCC) { // size of data block (4x4) - d_size += ((d_width + 3) / 4) * ((d_height + 3) / 4) * d_depth * ((dds_header.ddspf.dwFourCC == FOURCC_DXT1) ? 8 : 16); + d_size += dds_compressed_mip_size(d_width, d_height, (dds_header.ddspf.dwFourCC == FOURCC_DXT1) ? 8 : 16) * d_depth; } else { d_size += d_width * d_height * d_depth * (dds_header.ddspf.dwRGBBitCount / 8); } diff --git a/code/ddsutils/ddsutils.h b/code/ddsutils/ddsutils.h index 0d6a6ca6392..e262204a397 100644 --- a/code/ddsutils/ddsutils.h +++ b/code/ddsutils/ddsutils.h @@ -273,6 +273,27 @@ typedef struct { } DDS_HEADER_DXT10; #pragma pack() +// Block size in bytes for a 4x4 texel block of a compressed DDS format. +// comp_type is one of the DDS_DXT*/DDS_CUBEMAP_DXT* constants. +inline int dds_block_size(int comp_type) { + switch (comp_type) { + case DDS_DXT1: + case DDS_CUBEMAP_DXT1: + return 8; + case DDS_DXT3: case DDS_CUBEMAP_DXT3: + case DDS_DXT5: case DDS_CUBEMAP_DXT5: + case DDS_BC7: + return 16; + default: + return 0; + } +} + +// Size in bytes of one mip level of a block-compressed texture. +inline size_t dds_compressed_mip_size(int w, int h, int block_size) { + return static_cast(((w + 3) / 4) * ((h + 3) / 4) * block_size); +} + #define DDS_OFFSET 4+sizeof(DDS_HEADER) //place where the data starts -- should be 128 #define DX10_OFFSET DDS_OFFSET+sizeof(DDS_HEADER_DXT10) // Unless a DX10 header is present diff --git a/code/globalincs/pstypes.h b/code/globalincs/pstypes.h index 90b80c7a8fb..4e8160ce6ed 100644 --- a/code/globalincs/pstypes.h +++ b/code/globalincs/pstypes.h @@ -592,6 +592,11 @@ inline void* memset_if_trivial_else_error(ImDrawListSplitter* memset_data, int c return ptr_memcpy(memcpy_dest, memcpy_src, count); } + inline void *memcpy_if_trivial_else_error(void *memcpy_dest, const void *memcpy_src, size_t count) + { + return ptr_memcpy(memcpy_dest, memcpy_src, count); + } + // MEMMOVE! const auto ptr_memmove = std::memmove; #define memmove memmove_if_trivial_else_error diff --git a/code/graphics/2d.cpp b/code/graphics/2d.cpp index 2a9adbdbdd9..8fa6440d05b 100644 --- a/code/graphics/2d.cpp +++ b/code/graphics/2d.cpp @@ -1309,6 +1309,10 @@ void gr_close() graphics::paths::PathRenderer::shutdown(); + // Free bitmaps before destroying the graphics backend, since + // gf_bm_free_data needs the backend (texture manager, GL context, etc.) + bm_close(); + switch (gr_screen.mode) { case GR_OPENGL: #ifdef WITH_OPENGL @@ -1324,13 +1328,11 @@ void gr_close() case GR_STUB: break; - + default: Int3(); // Invalid graphics mode } - bm_close(); - Gr_inited = 0; } @@ -2924,6 +2926,16 @@ void gr_flip(bool execute_scripting) } } + if (Cmdline_graphics_debug_output) { + output_uniform_debug_data(); + } + + // IMPORTANT: No rendering may happen after this point until gf_flip()/gr_setup_frame(). + // gr_reset_immediate_buffer() resets the write offset to 0, so any subsequent immediate + // buffer write would overwrite vertex data that already-recorded draw commands reference. + // In Vulkan (deferred submission), the GPU reads the final buffer state at submit time, + // so overwrites here silently corrupt earlier draws. OpenGL's immediate execution hides + // this, but it is still logically wrong for any deferred-submission backend. gr_reset_immediate_buffer(); // Do per frame operations on the matrix state @@ -2933,10 +2945,6 @@ void gr_flip(bool execute_scripting) mouse_reset_deltas(); - if (Cmdline_graphics_debug_output) { - output_uniform_debug_data(); - } - // Use this opportunity for retiring the uniform buffers uniform_buffer_managers_retire_buffers(); diff --git a/code/graphics/2d.h b/code/graphics/2d.h index b701bdeefc6..65a5d4eea24 100644 --- a/code/graphics/2d.h +++ b/code/graphics/2d.h @@ -214,6 +214,8 @@ enum shader_type { SDR_TYPE_IRRADIANCE_MAP_GEN, + SDR_TYPE_SHADOW_MAP, + NUM_SHADER_TYPES }; @@ -262,7 +264,6 @@ struct vertex_format_data POSITION4, POSITION3, POSITION2, - SCREEN_POS, COLOR3, COLOR4, COLOR4F, @@ -339,7 +340,8 @@ enum class gr_capability { CAPABILITY_PERSISTENT_BUFFER_MAPPING, CAPABILITY_BPTC, CAPABILITY_LARGE_SHADER, - CAPABILITY_INSTANCED_RENDERING + CAPABILITY_INSTANCED_RENDERING, + CAPABILITY_QUERIES_REUSABLE }; struct gr_capability_def { @@ -934,6 +936,10 @@ typedef struct screen { std::function gf_override_fog; + // ImGui backend integration + std::function gf_imgui_new_frame; + std::function gf_imgui_render_draw_data; + //OpenXR functions std::function()> gf_openxr_get_extensions; std::function gf_openxr_test_capabilities; @@ -1189,6 +1195,9 @@ inline void gr_post_process_restore_zbuffer() #define gr_override_fog GR_CALL(gr_screen.gf_override_fog) +#define gr_imgui_new_frame GR_CALL(gr_screen.gf_imgui_new_frame) +#define gr_imgui_render_draw_data GR_CALL(gr_screen.gf_imgui_render_draw_data) + inline void gr_render_primitives(material* material_info, primitive_type prim_type, vertex_layout* layout, diff --git a/code/graphics/matrix.cpp b/code/graphics/matrix.cpp index c2331e329b2..ed3326e1f1a 100644 --- a/code/graphics/matrix.cpp +++ b/code/graphics/matrix.cpp @@ -53,9 +53,18 @@ static void create_perspective_projection_matrix(matrix4 *out, float left, float out->a1d[5] = 2.0f * near_dist / (top - bottom); out->a1d[8] = (right + left) / (right - left); out->a1d[9] = (top + bottom) / (top - bottom); - out->a1d[10] = -(far_dist + near_dist) / (far_dist - near_dist); out->a1d[11] = -1.0f; - out->a1d[14] = -2.0f * far_dist * near_dist / (far_dist - near_dist); + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan NDC Z range is [0, 1] (OpenGL is [-1, 1]) + // Y-flip is handled by negative viewport height (VK_KHR_maintenance1) + out->a1d[10] = -far_dist / (far_dist - near_dist); + out->a1d[14] = -far_dist * near_dist / (far_dist - near_dist); + } else { + // OpenGL NDC Z range is [-1, 1] + out->a1d[10] = -(far_dist + near_dist) / (far_dist - near_dist); + out->a1d[14] = -2.0f * far_dist * near_dist / (far_dist - near_dist); + } } static void create_orthographic_projection_matrix(matrix4* out, float left, float right, float bottom, float top, float near_dist, float far_dist) @@ -64,11 +73,20 @@ static void create_orthographic_projection_matrix(matrix4* out, float left, floa out->a1d[0] = 2.0f / (right - left); out->a1d[5] = 2.0f / (top - bottom); - out->a1d[10] = -2.0f / (far_dist - near_dist); out->a1d[12] = -(right + left) / (right - left); out->a1d[13] = -(top + bottom) / (top - bottom); - out->a1d[14] = -(far_dist + near_dist) / (far_dist - near_dist); out->a1d[15] = 1.0f; + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan NDC Z range is [0, 1] (OpenGL is [-1, 1]) + // Y-flip is handled by negative viewport height (VK_KHR_maintenance1) + out->a1d[10] = -1.0f / (far_dist - near_dist); + out->a1d[14] = -near_dist / (far_dist - near_dist); + } else { + // OpenGL NDC Z range is [-1, 1] + out->a1d[10] = -2.0f / (far_dist - near_dist); + out->a1d[14] = -(far_dist + near_dist) / (far_dist - near_dist); + } } void gr_start_instance_matrix(const vec3d *offset, const matrix *rotation) @@ -272,7 +290,11 @@ void gr_end_2d_matrix() Assert( htl_2d_matrix_depth == 1 ); // reset viewport to what it was originally set to by the proj matrix - gr_set_viewport(gr_screen.offset_x, (gr_screen.max_h - gr_screen.offset_y - gr_screen.clip_height), gr_screen.clip_width, gr_screen.clip_height); + if (gr_screen.rendering_to_texture != -1) { + gr_set_viewport(gr_screen.offset_x, gr_screen.offset_y, gr_screen.clip_width, gr_screen.clip_height); + } else { + gr_set_viewport(gr_screen.offset_x, (gr_screen.max_h - gr_screen.offset_y - gr_screen.clip_height), gr_screen.clip_width, gr_screen.clip_height); + } gr_projection_matrix = gr_last_projection_matrix; diff --git a/code/graphics/opengl/gropengl.cpp b/code/graphics/opengl/gropengl.cpp index 632000ff7e8..394d6eca0b6 100644 --- a/code/graphics/opengl/gropengl.cpp +++ b/code/graphics/opengl/gropengl.cpp @@ -43,6 +43,8 @@ #include "osapi/osregistry.h" #include "pngutils/pngutils.h" +#include "backends/imgui_impl_opengl3.h" + #include // minimum GL version we can reliably support is 3.2 @@ -972,6 +974,16 @@ int opengl_init_display_device() return 0; } +void gr_opengl_imgui_new_frame() +{ + ImGui_ImplOpenGL3_NewFrame(); +} + +void gr_opengl_imgui_render_draw_data() +{ + ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); +} + void gr_opengl_init_function_pointers() { gr_screen.gf_flip = gr_opengl_flip; @@ -1104,6 +1116,9 @@ void gr_opengl_init_function_pointers() gr_screen.gf_override_fog = gr_opengl_override_fog; + gr_screen.gf_imgui_new_frame = gr_opengl_imgui_new_frame; + gr_screen.gf_imgui_render_draw_data = gr_opengl_imgui_render_draw_data; + gr_screen.gf_openxr_get_extensions = gr_opengl_openxr_get_extensions; gr_screen.gf_openxr_test_capabilities = gr_opengl_openxr_test_capabilities; gr_screen.gf_openxr_create_session = gr_opengl_openxr_create_session; @@ -1502,6 +1517,8 @@ bool gr_opengl_is_capable(gr_capability capability) return !Cmdline_no_large_shaders; case gr_capability::CAPABILITY_INSTANCED_RENDERING: return GLAD_GL_ARB_vertex_attrib_binding; + case gr_capability::CAPABILITY_QUERIES_REUSABLE: + return true; } diff --git a/code/graphics/opengl/gropengldeferred.cpp b/code/graphics/opengl/gropengldeferred.cpp index b3acf8faf84..3b16f36f9cd 100644 --- a/code/graphics/opengl/gropengldeferred.cpp +++ b/code/graphics/opengl/gropengldeferred.cpp @@ -1,7 +1,7 @@ #include "gropengldeferred.h" -#include "globalincs/vmallocator.h" +#include "graphics/util/primitives.h" #include "ShaderProgram.h" #include "gropengldraw.h" @@ -700,69 +700,12 @@ void gr_opengl_draw_deferred_light_sphere(const vec3d *position) } -void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a cylinder of radius and height 1.0f, based on code at http://www.ogre3d.org/tikiwiki/ManualSphereMeshes +void gr_opengl_deferred_light_cylinder_init(int segments) { - unsigned int nVertex = (segments + 1) * 2 * 3 + 6; // Can someone verify this? - unsigned int nIndex = deferred_light_cylinder_icount = 12 * (segments + 1) - 6; //This too - float *Vertices = (float*)vm_malloc(sizeof(float) * nVertex); - float *pVertex = Vertices; - ushort *Indices = (ushort*)vm_malloc(sizeof(ushort) * nIndex); - ushort *pIndex = Indices; - - float fDeltaSegAngle = (2.0f * PI / segments); - unsigned short wVerticeIndex = 0 ; - - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - wVerticeIndex ++; - *pVertex++ = 0.0f; - *pVertex++ = 0.0f; - *pVertex++ = 1.0f; - wVerticeIndex ++; - - for( int ring = 0; ring <= 1; ring++ ) { - float z0 = (float)ring; - - // Generate the group of segments for the current ring - for(int seg = 0; seg <= segments; seg++) { - float x0 = sinf(seg * fDeltaSegAngle); - float y0 = cosf(seg * fDeltaSegAngle); - - // Add one vertex to the strip which makes up the cylinder - *pVertex++ = x0; - *pVertex++ = y0; - *pVertex++ = z0; - - if (!ring) { - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = wVerticeIndex + (ushort)segments; - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - if(seg != segments) - { - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = 0; - } - wVerticeIndex ++; - } - else - { - if(seg != segments) - { - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = 1; - wVerticeIndex ++; - } - } - }; // end for seg - } // end for ring + auto mesh = graphics::util::generate_cylinder_mesh(segments); - deferred_light_cylinder_vcount = wVerticeIndex; + deferred_light_cylinder_vcount = static_cast(mesh.vertex_count); + deferred_light_cylinder_icount = mesh.index_count; glGetError(); @@ -771,17 +714,12 @@ void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a // make sure we have one if (deferred_light_cylinder_vbo) { glBindBuffer(GL_ARRAY_BUFFER, deferred_light_cylinder_vbo); - glBufferData(GL_ARRAY_BUFFER, nVertex * sizeof(float), Vertices, GL_STATIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, mesh.vertices.size() * sizeof(float), mesh.vertices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_cylinder_vbo); deferred_light_cylinder_vbo = 0; - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; return; } @@ -793,71 +731,25 @@ void gr_opengl_deferred_light_cylinder_init(int segments) // Generate a VBO of a // make sure we have one if (deferred_light_cylinder_ibo) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, deferred_light_cylinder_ibo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, nIndex * sizeof(ushort), Indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, mesh.indices.size() * sizeof(ushort), mesh.indices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_cylinder_ibo); deferred_light_cylinder_ibo = 0; - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; return; } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } - - vm_free(Indices); - Indices = nullptr; - vm_free(Vertices); - Vertices = nullptr; } -void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a VBO of a sphere of radius 1.0f, based on code at http://www.ogre3d.org/tikiwiki/ManualSphereMeshes +void gr_opengl_deferred_light_sphere_init(int rings, int segments) { - unsigned int nVertex = (rings + 1) * (segments+1) * 3; - unsigned int nIndex = deferred_light_sphere_icount = 6 * rings * (segments + 1); - float *Vertices = (float*)vm_malloc(sizeof(float) * nVertex); - float *pVertex = Vertices; - ushort *Indices = (ushort*)vm_malloc(sizeof(ushort) * nIndex); - ushort *pIndex = Indices; - - float fDeltaRingAngle = (PI / rings); - float fDeltaSegAngle = (2.0f * PI / segments); - unsigned short wVerticeIndex = 0 ; - - // Generate the group of rings for the sphere - for( int ring = 0; ring <= rings; ring++ ) { - float r0 = sinf (ring * fDeltaRingAngle); - float y0 = cosf (ring * fDeltaRingAngle); - - // Generate the group of segments for the current ring - for(int seg = 0; seg <= segments; seg++) { - float x0 = r0 * sinf(seg * fDeltaSegAngle); - float z0 = r0 * cosf(seg * fDeltaSegAngle); - - // Add one vertex to the strip which makes up the sphere - *pVertex++ = x0; - *pVertex++ = y0; - *pVertex++ = z0; - - if (ring != rings) { - // each vertex (except the last) has six indices pointing to it - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex; - *pIndex++ = wVerticeIndex + (ushort)segments; - *pIndex++ = wVerticeIndex + (ushort)segments + 1; - *pIndex++ = wVerticeIndex + 1; - *pIndex++ = wVerticeIndex; - wVerticeIndex ++; - } - }; // end for seg - } // end for ring + auto mesh = graphics::util::generate_sphere_mesh(rings, segments); - deferred_light_sphere_vcount = wVerticeIndex; + deferred_light_sphere_vcount = static_cast(mesh.vertex_count); + deferred_light_sphere_icount = mesh.index_count; glGetError(); @@ -866,17 +758,12 @@ void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a // make sure we have one if (deferred_light_sphere_vbo) { glBindBuffer(GL_ARRAY_BUFFER, deferred_light_sphere_vbo); - glBufferData(GL_ARRAY_BUFFER, nVertex * sizeof(float), Vertices, GL_STATIC_DRAW); + glBufferData(GL_ARRAY_BUFFER, mesh.vertices.size() * sizeof(float), mesh.vertices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_sphere_vbo); deferred_light_sphere_vbo = 0; - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; return; } @@ -888,27 +775,17 @@ void gr_opengl_deferred_light_sphere_init(int rings, int segments) // Generate a // make sure we have one if (deferred_light_sphere_ibo) { glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, deferred_light_sphere_ibo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, nIndex * sizeof(ushort), Indices, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, mesh.indices.size() * sizeof(ushort), mesh.indices.data(), GL_STATIC_DRAW); // just in case if ( opengl_check_for_errors() ) { glDeleteBuffers(1, &deferred_light_sphere_ibo); deferred_light_sphere_ibo = 0; - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; return; } glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } - - vm_free(Vertices); - Vertices = nullptr; - vm_free(Indices); - Indices = nullptr; } void opengl_draw_sphere() diff --git a/code/graphics/opengl/gropengltexture.cpp b/code/graphics/opengl/gropengltexture.cpp index 76f540f78c8..1996c863e32 100644 --- a/code/graphics/opengl/gropengltexture.cpp +++ b/code/graphics/opengl/gropengltexture.cpp @@ -415,29 +415,25 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap } // check for compressed image types - auto block_size = 0; + auto block_size = dds_block_size(bm_is_compressed(bitmap_handle)); switch (bm_is_compressed(bitmap_handle)) { case DDS_DXT1: case DDS_CUBEMAP_DXT1: intFormat = GL_COMPRESSED_RGB_S3TC_DXT1_EXT; - block_size = 8; break; case DDS_DXT3: case DDS_CUBEMAP_DXT3: intFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; - block_size = 16; break; case DDS_DXT5: case DDS_CUBEMAP_DXT5: intFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; - block_size = 16; break; case DDS_BC7: intFormat = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB; - block_size = 16; break; } @@ -457,8 +453,7 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap auto mipmap_h = bmap_h; for (auto i = 0; i < mipmap_levels + base_level; i++) { - // size of data block (4x4) - dsize = ((mipmap_h + 3) / 4) * ((mipmap_w + 3) / 4) * block_size; + dsize = dds_compressed_mip_size(mipmap_w, mipmap_h, block_size); if (i >= base_level) { glCompressedTexSubImage3D(tSlot->texture_target, i - base_level, 0, 0, tSlot->array_index, mipmap_w, @@ -584,8 +579,7 @@ static int opengl_texture_set_level(int bitmap_handle, int bitmap_type, int bmap // check if it's a compressed cubemap first if (block_size > 0) { for (auto level = 0; level < mipmap_levels + base_level; level++) { - // size of data block (4x4) - dsize = ((mipmap_h + 3) / 4) * ((mipmap_w + 3) / 4) * block_size; + dsize = dds_compressed_mip_size(mipmap_w, mipmap_h, block_size); if (level >= base_level) { // We skipped ahead to the base level so we can start uploading frames now diff --git a/code/graphics/opengl/gropengltnl.cpp b/code/graphics/opengl/gropengltnl.cpp index defcc3bcd2e..01837de1d4c 100644 --- a/code/graphics/opengl/gropengltnl.cpp +++ b/code/graphics/opengl/gropengltnl.cpp @@ -74,7 +74,6 @@ static opengl_vertex_bind GL_array_binding_data[] = { vertex_format_data::POSITION4, 4, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::POSITION3, 3, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::POSITION2, 2, GL_FLOAT, GL_FALSE, opengl_vert_attrib::POSITION }, - { vertex_format_data::SCREEN_POS, 2, GL_INT, GL_FALSE, opengl_vert_attrib::POSITION }, { vertex_format_data::COLOR3, 3, GL_UNSIGNED_BYTE, GL_TRUE, opengl_vert_attrib::COLOR }, { vertex_format_data::COLOR4, 4, GL_UNSIGNED_BYTE, GL_TRUE, opengl_vert_attrib::COLOR }, { vertex_format_data::COLOR4F, 4, GL_FLOAT, GL_FALSE, opengl_vert_attrib::COLOR }, @@ -1290,4 +1289,4 @@ void opengl_bind_vertex_layout_multiple(vertex_layout &layout, const SCP_vector< i++; } GL_state.Array.BindElementBuffer(indexBuffer); -} \ No newline at end of file +} diff --git a/code/graphics/render.cpp b/code/graphics/render.cpp index b05d44dcbd8..6ea62b7bc34 100644 --- a/code/graphics/render.cpp +++ b/code/graphics/render.cpp @@ -36,13 +36,14 @@ static void gr_flash_internal(int r, int g, int b, int a, bool alpha_flash) render_material.set_blend_mode(ALPHA_BLEND_ALPHA_ADDITIVE); } - int glVertices[8] = { x1, y1, x1, y2, x2, y1, x2, y2 }; + float glVertices[8] = { (float)x1, (float)y1, (float)x1, (float)y2, + (float)x2, (float)y1, (float)x2, (float)y2 }; vertex_layout vert_def; - vert_def.add_vertex_component(vertex_format_data::SCREEN_POS, sizeof(int) * 2, 0); + vert_def.add_vertex_component(vertex_format_data::POSITION2, sizeof(float) * 2, 0); - gr_render_primitives_2d_immediate(&render_material, PRIM_TYPE_TRISTRIP, &vert_def, 4, glVertices, sizeof(int) * 8); + gr_render_primitives_2d_immediate(&render_material, PRIM_TYPE_TRISTRIP, &vert_def, 4, glVertices, sizeof(float) * 8); } void gr_flash(int r, int g, int b) { diff --git a/code/graphics/shaders/batched.frag b/code/graphics/shaders/batched.frag new file mode 100644 index 00000000000..95d67c52f1c --- /dev/null +++ b/code/graphics/shaders/batched.frag @@ -0,0 +1,44 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +#include "gamma.sdr" + +// Inputs from vertex shader +layout (location = 0) in vec4 fragTexCoord; +layout (location = 1) in vec4 fragColor; + +// Output +layout (location = 0) out vec4 fragOut0; + +// Texture sampler array (binding 1 in Material set) +layout (set = 1, binding = 1) uniform sampler2DArray baseMap; + +// Uniform buffer: GenericData (binding 0 in PerDraw set) +// Must match the layout used by vulkan_set_default_material_uniforms() +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; + +void main() +{ + float y = fragTexCoord.y / fragTexCoord.w; + vec4 baseColor = texture(baseMap, vec3(fragTexCoord.x, y, fragTexCoord.z)); + + baseColor.rgb = srgb_to_linear(baseColor.rgb); + vec4 blendColor = vec4(srgb_to_linear(fragColor.rgb), fragColor.a); + + fragOut0 = baseColor * blendColor * intensity; +} diff --git a/code/graphics/shaders/batched.vert b/code/graphics/shaders/batched.vert new file mode 100644 index 00000000000..af6ff487f7b --- /dev/null +++ b/code/graphics/shaders/batched.vert @@ -0,0 +1,43 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Vertex inputs +layout (location = 0) in vec4 vertPosition; +layout (location = 1) in vec4 vertColor; +layout (location = 2) in vec4 vertTexCoord; + +// Outputs to fragment shader +layout (location = 0) out vec4 fragTexCoord; +layout (location = 1) out vec4 fragColor; + +// Uniform buffer: Matrices (binding 1 in PerDraw set) +layout (set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +// Uniform buffer: GenericData (binding 0 in PerDraw set) +// Must match the layout used by vulkan_set_default_material_uniforms() +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; + +void main() +{ + fragColor = vertColor * color; + gl_Position = projMatrix * modelViewMatrix * vertPosition; + fragTexCoord = vertTexCoord; +} diff --git a/code/graphics/shaders/bloom-comp.frag b/code/graphics/shaders/bloom-comp.frag new file mode 100644 index 00000000000..7f086a92c43 --- /dev/null +++ b/code/graphics/shaders/bloom-comp.frag @@ -0,0 +1,26 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D bloomed; + +layout(std140, set = 2, binding = 0) uniform genericData { + float bloom_intensity; + int levels; +}; + +void main() +{ + vec4 color_out = vec4(0.0, 0.0, 0.0, 1.0); + float factor = 0.0; + for (int mipmap = 0; mipmap < levels; ++mipmap) { + float scale = 1.0 / exp2(float(mipmap)); + factor += scale; + color_out.rgb += textureLod(bloomed, fragTexCoord, float(mipmap)).rgb * scale; + } + color_out.rgb /= factor; + color_out.rgb *= bloom_intensity; + fragOut0 = color_out; +} diff --git a/code/graphics/shaders/blur.frag b/code/graphics/shaders/blur.frag new file mode 100644 index 00000000000..9b0e1e6f1f1 --- /dev/null +++ b/code/graphics/shaders/blur.frag @@ -0,0 +1,39 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +layout(std140, set = 2, binding = 0) uniform genericData { + float texSize; + int level; + int direction; // 0 = horizontal, 1 = vertical +}; + +void main() +{ + float BlurWeights[6]; + BlurWeights[0] = 0.1362; + BlurWeights[1] = 0.1297; + BlurWeights[2] = 0.1120; + BlurWeights[3] = 0.0877; + BlurWeights[4] = 0.0623; + BlurWeights[5] = 0.0402; + + vec4 sum = textureLod(tex, fragTexCoord, float(level)) * BlurWeights[0]; + + for (int i = 1; i < 6; i++) { + float offset = float(i) * texSize; + if (direction == 0) { + sum += textureLod(tex, vec2(clamp(fragTexCoord.x - offset, 0.0, 1.0), fragTexCoord.y), float(level)) * BlurWeights[i]; + sum += textureLod(tex, vec2(clamp(fragTexCoord.x + offset, 0.0, 1.0), fragTexCoord.y), float(level)) * BlurWeights[i]; + } else { + sum += textureLod(tex, vec2(fragTexCoord.x, clamp(fragTexCoord.y - offset, 0.0, 1.0)), float(level)) * BlurWeights[i]; + sum += textureLod(tex, vec2(fragTexCoord.x, clamp(fragTexCoord.y + offset, 0.0, 1.0)), float(level)) * BlurWeights[i]; + } + } + + fragOut0 = sum; +} diff --git a/code/graphics/shaders/brightpass.frag b/code/graphics/shaders/brightpass.frag new file mode 100644 index 00000000000..a500484dd13 --- /dev/null +++ b/code/graphics/shaders/brightpass.frag @@ -0,0 +1,13 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +void main() +{ + vec4 color = texture(tex, fragTexCoord); + fragOut0 = vec4(max(vec3(0.0), color.rgb - vec3(1.0)), 1.0); +} diff --git a/code/graphics/shaders/compiled/batched.frag.spv b/code/graphics/shaders/compiled/batched.frag.spv new file mode 100644 index 00000000000..61284464c66 Binary files /dev/null and b/code/graphics/shaders/compiled/batched.frag.spv differ diff --git a/code/graphics/shaders/compiled/batched.vert.spv b/code/graphics/shaders/compiled/batched.vert.spv new file mode 100644 index 00000000000..23a26209d2f Binary files /dev/null and b/code/graphics/shaders/compiled/batched.vert.spv differ diff --git a/code/graphics/shaders/compiled/bloom-comp.frag.spv b/code/graphics/shaders/compiled/bloom-comp.frag.spv new file mode 100644 index 00000000000..9e5a6fed6ec Binary files /dev/null and b/code/graphics/shaders/compiled/bloom-comp.frag.spv differ diff --git a/code/graphics/shaders/compiled/blur.frag.spv b/code/graphics/shaders/compiled/blur.frag.spv new file mode 100644 index 00000000000..de08d4caddf Binary files /dev/null and b/code/graphics/shaders/compiled/blur.frag.spv differ diff --git a/code/graphics/shaders/compiled/brightpass.frag.spv b/code/graphics/shaders/compiled/brightpass.frag.spv new file mode 100644 index 00000000000..c37e86c0105 Binary files /dev/null and b/code/graphics/shaders/compiled/brightpass.frag.spv differ diff --git a/code/graphics/shaders/compiled/copy.frag.spv b/code/graphics/shaders/compiled/copy.frag.spv new file mode 100644 index 00000000000..fe1524f0abb Binary files /dev/null and b/code/graphics/shaders/compiled/copy.frag.spv differ diff --git a/code/graphics/shaders/compiled/copy.vert.spv b/code/graphics/shaders/compiled/copy.vert.spv new file mode 100644 index 00000000000..6615f86174b Binary files /dev/null and b/code/graphics/shaders/compiled/copy.vert.spv differ diff --git a/code/graphics/shaders/compiled/decal.frag.spv b/code/graphics/shaders/compiled/decal.frag.spv new file mode 100644 index 00000000000..5a9948ef4b4 Binary files /dev/null and b/code/graphics/shaders/compiled/decal.frag.spv differ diff --git a/code/graphics/shaders/compiled/decal.vert.spv b/code/graphics/shaders/compiled/decal.vert.spv new file mode 100644 index 00000000000..cbb0c35a6b8 Binary files /dev/null and b/code/graphics/shaders/compiled/decal.vert.spv differ diff --git a/code/graphics/shaders/compiled/default-material.frag.spv b/code/graphics/shaders/compiled/default-material.frag.spv index 72854a6895e..cff7399e866 100644 Binary files a/code/graphics/shaders/compiled/default-material.frag.spv and b/code/graphics/shaders/compiled/default-material.frag.spv differ diff --git a/code/graphics/shaders/compiled/default-material.frag.spv.glsl b/code/graphics/shaders/compiled/default-material.frag.spv.glsl index 060a12e4634..765adbcb51c 100644 --- a/code/graphics/shaders/compiled/default-material.frag.spv.glsl +++ b/code/graphics/shaders/compiled/default-material.frag.spv.glsl @@ -28,24 +28,30 @@ void main() discard; } bool _66 = _39.srgb == 1; - vec3 _146; + vec3 _160; if (_66) { - _146 = pow(_48.xyz, vec3(2.2000000476837158203125)); + _160 = pow(_48.xyz, vec3(2.2000000476837158203125)); } else { - _146 = _48.xyz; + _160 = _48.xyz; } - vec4 _148; + vec4 _153 = _48; + _153.x = _160.x; + vec4 _155 = _153; + _155.y = _160.y; + vec4 _157 = _155; + _157.z = _160.z; + vec4 _162; if (_66) { - _148 = vec4(pow(fragColor.xyz, vec3(2.2000000476837158203125)), fragColor.w); + _162 = vec4(pow(fragColor.xyz, vec3(2.2000000476837158203125)), fragColor.w); } else { - _148 = fragColor; + _162 = fragColor; } - fragOut0 = mix(mix(vec4(_146.x, _146.y, _146.z, _48.w) * _148, vec4(_148.xyz, _146.x * _148.w), vec4(float(_39.alphaTexture))), _148, vec4(float(_39.noTexturing))) * _39.intensity; + fragOut0 = mix(mix(_157 * _162, vec4(_162.xyz, _160.x * _162.w), vec4(float(_39.alphaTexture))), _162, vec4(float(_39.noTexturing))) * _39.intensity; } diff --git a/code/graphics/shaders/compiled/default-material.vert.spv b/code/graphics/shaders/compiled/default-material.vert.spv index b1ad969cb55..c1b6f8a1327 100644 Binary files a/code/graphics/shaders/compiled/default-material.vert.spv and b/code/graphics/shaders/compiled/default-material.vert.spv differ diff --git a/code/graphics/shaders/compiled/default-material.vert.spv.glsl b/code/graphics/shaders/compiled/default-material.vert.spv.glsl index 60decc4f843..edee7689429 100644 --- a/code/graphics/shaders/compiled/default-material.vert.spv.glsl +++ b/code/graphics/shaders/compiled/default-material.vert.spv.glsl @@ -37,5 +37,9 @@ void main() { gl_ClipDistance[0] = dot(_22.clipEquation, _22.modelMatrix * vertPosition); } + else + { + gl_ClipDistance[0] = 1.0; + } } diff --git a/code/graphics/shaders/compiled/deferred.frag.spv b/code/graphics/shaders/compiled/deferred.frag.spv new file mode 100644 index 00000000000..ecb5965e678 Binary files /dev/null and b/code/graphics/shaders/compiled/deferred.frag.spv differ diff --git a/code/graphics/shaders/compiled/deferred.vert.spv b/code/graphics/shaders/compiled/deferred.vert.spv new file mode 100644 index 00000000000..23c8ff90183 Binary files /dev/null and b/code/graphics/shaders/compiled/deferred.vert.spv differ diff --git a/code/graphics/shaders/compiled/effect-distort.frag.spv b/code/graphics/shaders/compiled/effect-distort.frag.spv new file mode 100644 index 00000000000..e793527f180 Binary files /dev/null and b/code/graphics/shaders/compiled/effect-distort.frag.spv differ diff --git a/code/graphics/shaders/compiled/effect-distort.vert.spv b/code/graphics/shaders/compiled/effect-distort.vert.spv new file mode 100644 index 00000000000..3f42350fc04 Binary files /dev/null and b/code/graphics/shaders/compiled/effect-distort.vert.spv differ diff --git a/code/graphics/shaders/compiled/effect.frag.spv b/code/graphics/shaders/compiled/effect.frag.spv new file mode 100644 index 00000000000..e1eecf2fd2a Binary files /dev/null and b/code/graphics/shaders/compiled/effect.frag.spv differ diff --git a/code/graphics/shaders/compiled/effect.vert.spv b/code/graphics/shaders/compiled/effect.vert.spv new file mode 100644 index 00000000000..43f06ae6e33 Binary files /dev/null and b/code/graphics/shaders/compiled/effect.vert.spv differ diff --git a/code/graphics/shaders/compiled/fog.frag.spv b/code/graphics/shaders/compiled/fog.frag.spv new file mode 100644 index 00000000000..996332f461a Binary files /dev/null and b/code/graphics/shaders/compiled/fog.frag.spv differ diff --git a/code/graphics/shaders/compiled/fog.vert.spv b/code/graphics/shaders/compiled/fog.vert.spv new file mode 100644 index 00000000000..878b3c8537a Binary files /dev/null and b/code/graphics/shaders/compiled/fog.vert.spv differ diff --git a/code/graphics/shaders/compiled/fxaa.frag.spv b/code/graphics/shaders/compiled/fxaa.frag.spv new file mode 100644 index 00000000000..39985b7a8c0 Binary files /dev/null and b/code/graphics/shaders/compiled/fxaa.frag.spv differ diff --git a/code/graphics/shaders/compiled/fxaapre.frag.spv b/code/graphics/shaders/compiled/fxaapre.frag.spv new file mode 100644 index 00000000000..cc1e8422ae6 Binary files /dev/null and b/code/graphics/shaders/compiled/fxaapre.frag.spv differ diff --git a/code/graphics/shaders/compiled/irradiance.frag.spv b/code/graphics/shaders/compiled/irradiance.frag.spv new file mode 100644 index 00000000000..9d3270b7c5d Binary files /dev/null and b/code/graphics/shaders/compiled/irradiance.frag.spv differ diff --git a/code/graphics/shaders/compiled/irradiance.vert.spv b/code/graphics/shaders/compiled/irradiance.vert.spv new file mode 100644 index 00000000000..8eb7a619684 Binary files /dev/null and b/code/graphics/shaders/compiled/irradiance.vert.spv differ diff --git a/code/graphics/shaders/compiled/lightshafts.frag.spv b/code/graphics/shaders/compiled/lightshafts.frag.spv new file mode 100644 index 00000000000..154fa4a2085 Binary files /dev/null and b/code/graphics/shaders/compiled/lightshafts.frag.spv differ diff --git a/code/graphics/shaders/compiled/main.frag.spv b/code/graphics/shaders/compiled/main.frag.spv new file mode 100644 index 00000000000..94ecff4a20f Binary files /dev/null and b/code/graphics/shaders/compiled/main.frag.spv differ diff --git a/code/graphics/shaders/compiled/main.vert.spv b/code/graphics/shaders/compiled/main.vert.spv new file mode 100644 index 00000000000..dca60fe555d Binary files /dev/null and b/code/graphics/shaders/compiled/main.vert.spv differ diff --git a/code/graphics/shaders/compiled/msaa-resolve.frag.spv b/code/graphics/shaders/compiled/msaa-resolve.frag.spv new file mode 100644 index 00000000000..5cb47c31106 Binary files /dev/null and b/code/graphics/shaders/compiled/msaa-resolve.frag.spv differ diff --git a/code/graphics/shaders/compiled/msaa-resolve.vert.spv b/code/graphics/shaders/compiled/msaa-resolve.vert.spv new file mode 100644 index 00000000000..b2a5812bbcd Binary files /dev/null and b/code/graphics/shaders/compiled/msaa-resolve.vert.spv differ diff --git a/code/graphics/shaders/compiled/nanovg.frag.spv b/code/graphics/shaders/compiled/nanovg.frag.spv new file mode 100644 index 00000000000..6aac1246a68 Binary files /dev/null and b/code/graphics/shaders/compiled/nanovg.frag.spv differ diff --git a/code/graphics/shaders/compiled/nanovg.vert.spv b/code/graphics/shaders/compiled/nanovg.vert.spv new file mode 100644 index 00000000000..cd6e3e6f894 Binary files /dev/null and b/code/graphics/shaders/compiled/nanovg.vert.spv differ diff --git a/code/graphics/shaders/compiled/passthrough.frag.spv b/code/graphics/shaders/compiled/passthrough.frag.spv new file mode 100644 index 00000000000..69defe28db5 Binary files /dev/null and b/code/graphics/shaders/compiled/passthrough.frag.spv differ diff --git a/code/graphics/shaders/compiled/passthrough.vert.spv b/code/graphics/shaders/compiled/passthrough.vert.spv new file mode 100644 index 00000000000..a77c3e9552e Binary files /dev/null and b/code/graphics/shaders/compiled/passthrough.vert.spv differ diff --git a/code/graphics/shaders/compiled/post.frag.spv b/code/graphics/shaders/compiled/post.frag.spv new file mode 100644 index 00000000000..856b6ff259d Binary files /dev/null and b/code/graphics/shaders/compiled/post.frag.spv differ diff --git a/code/graphics/shaders/compiled/postprocess.vert.spv b/code/graphics/shaders/compiled/postprocess.vert.spv new file mode 100644 index 00000000000..a358072a582 Binary files /dev/null and b/code/graphics/shaders/compiled/postprocess.vert.spv differ diff --git a/code/graphics/shaders/compiled/rocketui.frag.spv b/code/graphics/shaders/compiled/rocketui.frag.spv new file mode 100644 index 00000000000..1f31bd5795d Binary files /dev/null and b/code/graphics/shaders/compiled/rocketui.frag.spv differ diff --git a/code/graphics/shaders/compiled/rocketui.vert.spv b/code/graphics/shaders/compiled/rocketui.vert.spv new file mode 100644 index 00000000000..51fb825b264 Binary files /dev/null and b/code/graphics/shaders/compiled/rocketui.vert.spv differ diff --git a/code/graphics/shaders/compiled/shadow.frag.spv b/code/graphics/shaders/compiled/shadow.frag.spv new file mode 100644 index 00000000000..f71b3020c3d Binary files /dev/null and b/code/graphics/shaders/compiled/shadow.frag.spv differ diff --git a/code/graphics/shaders/compiled/shadow.vert.spv b/code/graphics/shaders/compiled/shadow.vert.spv new file mode 100644 index 00000000000..0c7a534df87 Binary files /dev/null and b/code/graphics/shaders/compiled/shadow.vert.spv differ diff --git a/code/graphics/shaders/compiled/shield-impact.frag.spv b/code/graphics/shaders/compiled/shield-impact.frag.spv new file mode 100644 index 00000000000..889d40fe4f2 Binary files /dev/null and b/code/graphics/shaders/compiled/shield-impact.frag.spv differ diff --git a/code/graphics/shaders/compiled/shield-impact.vert.spv b/code/graphics/shaders/compiled/shield-impact.vert.spv new file mode 100644 index 00000000000..44bb6d40d1a Binary files /dev/null and b/code/graphics/shaders/compiled/shield-impact.vert.spv differ diff --git a/code/graphics/shaders/compiled/tonemapping.frag.spv b/code/graphics/shaders/compiled/tonemapping.frag.spv new file mode 100644 index 00000000000..6b3a709a584 Binary files /dev/null and b/code/graphics/shaders/compiled/tonemapping.frag.spv differ diff --git a/code/graphics/shaders/compiled/video.frag.spv b/code/graphics/shaders/compiled/video.frag.spv new file mode 100644 index 00000000000..e8f46402d9b Binary files /dev/null and b/code/graphics/shaders/compiled/video.frag.spv differ diff --git a/code/graphics/shaders/compiled/video.vert.spv b/code/graphics/shaders/compiled/video.vert.spv new file mode 100644 index 00000000000..6325e800f97 Binary files /dev/null and b/code/graphics/shaders/compiled/video.vert.spv differ diff --git a/code/graphics/shaders/compiled/volumetric-fog.frag.spv b/code/graphics/shaders/compiled/volumetric-fog.frag.spv new file mode 100644 index 00000000000..1cea7646c04 Binary files /dev/null and b/code/graphics/shaders/compiled/volumetric-fog.frag.spv differ diff --git a/code/graphics/shaders/compiled/volumetric-fog.vert.spv b/code/graphics/shaders/compiled/volumetric-fog.vert.spv new file mode 100644 index 00000000000..984f1c749b8 Binary files /dev/null and b/code/graphics/shaders/compiled/volumetric-fog.vert.spv differ diff --git a/code/graphics/shaders/compiled/vulkan.frag.spv b/code/graphics/shaders/compiled/vulkan.frag.spv deleted file mode 100644 index d267d4906c2..00000000000 Binary files a/code/graphics/shaders/compiled/vulkan.frag.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/vulkan.frag.spv.glsl b/code/graphics/shaders/compiled/vulkan.frag.spv.glsl deleted file mode 100644 index baa87038095..00000000000 --- a/code/graphics/shaders/compiled/vulkan.frag.spv.glsl +++ /dev/null @@ -1,10 +0,0 @@ -#version 150 - -out vec4 outColor; -in vec3 fragColor; - -void main() -{ - outColor = vec4(fragColor, 1.0); -} - diff --git a/code/graphics/shaders/compiled/vulkan.vert.spv b/code/graphics/shaders/compiled/vulkan.vert.spv deleted file mode 100644 index c3e37b6e672..00000000000 Binary files a/code/graphics/shaders/compiled/vulkan.vert.spv and /dev/null differ diff --git a/code/graphics/shaders/compiled/vulkan.vert.spv.glsl b/code/graphics/shaders/compiled/vulkan.vert.spv.glsl deleted file mode 100644 index 6e396118336..00000000000 --- a/code/graphics/shaders/compiled/vulkan.vert.spv.glsl +++ /dev/null @@ -1,13 +0,0 @@ -#version 150 - -const vec2 _20[3] = vec2[](vec2(0.0, -0.5), vec2(0.5), vec2(-0.5, 0.5)); -const vec3 _29[3] = vec3[](vec3(1.0, 0.0, 0.0), vec3(0.0, 1.0, 0.0), vec3(0.0, 0.0, 1.0)); - -out vec3 fragColor; - -void main() -{ - gl_Position = vec4(_20[gl_VertexID], 0.0, 1.0); - fragColor = _29[gl_VertexID]; -} - diff --git a/code/graphics/shaders/compiled/vulkan_structs.frag.h b/code/graphics/shaders/compiled/vulkan_structs.frag.h deleted file mode 100644 index 37f6fcd2e46..00000000000 --- a/code/graphics/shaders/compiled/vulkan_structs.frag.h +++ /dev/null @@ -1,6 +0,0 @@ - -#pragma once - -#include -#include - diff --git a/code/graphics/shaders/compiled/vulkan_structs.vert.h b/code/graphics/shaders/compiled/vulkan_structs.vert.h deleted file mode 100644 index 37f6fcd2e46..00000000000 --- a/code/graphics/shaders/compiled/vulkan_structs.vert.h +++ /dev/null @@ -1,6 +0,0 @@ - -#pragma once - -#include -#include - diff --git a/code/graphics/shaders/copy.frag b/code/graphics/shaders/copy.frag new file mode 100644 index 00000000000..3e53b9182e5 --- /dev/null +++ b/code/graphics/shaders/copy.frag @@ -0,0 +1,12 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +void main() +{ + fragOut0 = texture(tex, fragTexCoord); +} diff --git a/code/graphics/shaders/copy.vert b/code/graphics/shaders/copy.vert new file mode 100644 index 00000000000..ced8a6a7a62 --- /dev/null +++ b/code/graphics/shaders/copy.vert @@ -0,0 +1,11 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/decal.frag b/code/graphics/shaders/decal.frag new file mode 100644 index 00000000000..5983d2ee7b1 --- /dev/null +++ b/code/graphics/shaders/decal.frag @@ -0,0 +1,180 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Decal fragment shader — screen-space decal projection into G-buffer +// Port of OpenGL decal-f.sdr to Vulkan +// +// Technique based on: +// http://martindevans.me/game-development/2015/02/27/Drawing-Stuff-On-Other-Stuff-With-Deferred-Screenspace-Decals/ + +// G-buffer outputs: 6 locations matching the G-buffer render pass +// Attachments 1, 3, 5 are write-masked to 0 by the pipeline blend state +layout (location = 0) out vec4 fragOut0; // [0] Color/Diffuse +layout (location = 1) out vec4 fragOut1; // [1] Position (masked) +layout (location = 2) out vec4 fragOut2; // [2] Normal +layout (location = 3) out vec4 fragOut3; // [3] Specular (masked) +layout (location = 4) out vec4 fragOut4; // [4] Emissive +layout (location = 5) out vec4 fragOut5; // [5] Composite (masked) + +layout (location = 0) flat in mat4 invModelMatrix; // locations 0-3 +layout (location = 4) flat in vec3 decalDirection; +layout (location = 5) flat in float normal_angle_cutoff; +layout (location = 6) flat in float angle_fade_start; +layout (location = 7) flat in float alpha_scale; + +// Set 1 = Material, Binding 1 = texture array (diffuse/glow/normal in slots 0/1/2) +layout (set = 1, binding = 1) uniform sampler2DArray decalTextures; + +// Set 1 = Material, Binding 4 = depth copy (same slot as soft particle depth) +layout (set = 1, binding = 4) uniform sampler2D gDepthBuffer; + +// Set 1 = Material, Binding 6 = normal copy (distortion map slot, unused during G-buffer pass) +layout (set = 1, binding = 6) uniform sampler2D gNormalBuffer; + +// Set 1 = Material, Binding 2 = DecalGlobals UBO +layout (set = 1, binding = 2, std140) uniform decalGlobalData { + mat4 viewMatrix; + mat4 projMatrix; + mat4 invViewMatrix; + mat4 invProjMatrix; + + vec2 viewportSize; +}; + +// Set 2 = PerDraw, Binding 3 = DecalInfo UBO +layout (set = 2, binding = 3, std140) uniform decalInfoData { + int diffuse_index; + int glow_index; + int normal_index; + int diffuse_blend_mode; + + int glow_blend_mode; +}; + +#include "gamma.sdr" +#include "lighting.sdr" +#include "normals.sdr" + +vec3 computeViewPosition(vec2 textureCoord) { + vec4 clipSpaceLocation; + vec2 normalizedCoord = textureCoord / viewportSize; + + clipSpaceLocation.x = normalizedCoord.x * 2.0 - 1.0; + // Vulkan negative viewport (y=H, height=-H) inverts the Y mapping: + // pixel_y = H/2 * (1 - NDC_y), so NDC_y = 1 - 2*pixel_y/H + clipSpaceLocation.y = 1.0 - normalizedCoord.y * 2.0; + // Vulkan depth is [0,1] — use directly (no *2-1 like OpenGL) + clipSpaceLocation.z = texelFetch(gDepthBuffer, ivec2(textureCoord), 0).r; + clipSpaceLocation.w = 1.0; + + vec4 homogenousLocation = invProjMatrix * clipSpaceLocation; + + return homogenousLocation.xyz / homogenousLocation.w; +} + +vec3 getPixelNormal(vec3 frag_position, vec2 tex_coord, inout float alpha, out vec3 binormal, out vec3 tangent) { + vec3 normal; + + if (normal_index < 0) { + // No decal normal map: read scene normal from the copy texture + // for more accurate angle rejection (matching OpenGL USE_NORMAL_MAP path) + normal = texelFetch(gNormalBuffer, ivec2(tex_coord), 0).xyz; + binormal = vec3(0.0); + tangent = vec3(0.0); + } else { + // Has decal normal map: use screen-space derivatives for tangent frame + vec3 pos_dx = dFdx(frag_position); + vec3 pos_dy = dFdy(frag_position); + normal = normalize(cross(pos_dx, pos_dy)); + binormal = normalize(pos_dx); + tangent = normalize(pos_dy); + } + + // Calculate angle between surface normal and decal direction + float angle = acos(clamp(dot(normal, decalDirection), -1.0, 1.0)); + + if (angle > normal_angle_cutoff) { + discard; + } + + // Smooth alpha transition near edges + alpha = alpha * (1.0 - smoothstep(angle_fade_start, normal_angle_cutoff, angle)); + + return normal; +} + +vec2 getDecalTexCoord(vec3 view_pos, inout float alpha) { + vec4 object_pos = invModelMatrix * invViewMatrix * vec4(view_pos, 1.0); + + bvec3 invalidComponents = greaterThan(abs(object_pos.xyz), vec3(0.5)); + bvec4 nanComponents = isnan(object_pos); + + if (any(invalidComponents) || any(nanComponents)) { + discard; + } + + // Fade out near top/bottom of decal box + alpha = alpha * (1.0 - smoothstep(0.4, 0.5, abs(object_pos.z))); + + return object_pos.xy + 0.5; +} + +void main() { + vec3 frag_position = computeViewPosition(gl_FragCoord.xy); + + float alpha = alpha_scale; + + vec2 tex_coord = getDecalTexCoord(frag_position, alpha); + + vec3 binormal; + vec3 tangent; + vec3 normal = getPixelNormal(frag_position, gl_FragCoord.xy, alpha, binormal, tangent); + + vec4 diffuse_out = vec4(0.0); + vec4 emissive_out = vec4(0.0); + vec3 normal_out = vec3(0.0); + + if (diffuse_index >= 0) { + vec4 color = texture(decalTextures, vec3(tex_coord, float(diffuse_index))); + color.rgb = srgb_to_linear(color.rgb); + + if (diffuse_blend_mode == 0) { + diffuse_out = vec4(color.rgb, color.a * alpha); + } else { + diffuse_out = vec4(color.rgb * alpha, 1.0); + } + } + + if (glow_index >= 0) { + vec4 color = texture(decalTextures, vec3(tex_coord, float(glow_index))); + color.rgb = srgb_to_linear(color.rgb) * GLOW_MAP_SRGB_MULTIPLIER; + color.rgb *= GLOW_MAP_INTENSITY; + + if (glow_blend_mode == 0) { + emissive_out = vec4(color.rgb + emissive_out.rgb * emissive_out.a, color.a * alpha); + } else { + emissive_out.rgb += color.rgb * alpha; + } + } + + if (normal_index >= 0) { + vec3 decalNormal = unpackNormal(texture(decalTextures, vec3(tex_coord, float(normal_index))).ag); + + mat3 tangentToView; + tangentToView[0] = tangent; + tangentToView[1] = binormal; + tangentToView[2] = normal; + + normal_out = tangentToView * decalNormal * alpha; + } + + // Active outputs (blend enabled by pipeline) + fragOut0 = diffuse_out; // [0] Color + fragOut2 = vec4(normal_out, 0.0); // [2] Normal + fragOut4 = emissive_out; // [4] Emissive + + // Masked outputs (write mask = 0 in pipeline, zero cost) + fragOut1 = vec4(0.0); // [1] Position + fragOut3 = vec4(0.0); // [3] Specular + fragOut5 = vec4(0.0); // [5] Composite +} diff --git a/code/graphics/shaders/decal.vert b/code/graphics/shaders/decal.vert new file mode 100644 index 00000000000..ddbd67876de --- /dev/null +++ b/code/graphics/shaders/decal.vert @@ -0,0 +1,60 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Decal vertex shader — screen-space decal projection +// Port of OpenGL decal-v.sdr to Vulkan + +// Binding 0: box vertex positions +layout (location = 0) in vec4 vertPosition; + +// Binding 1: per-instance model matrix (mat4 = 4 vec4s at locations 8-11) +layout (location = 8) in vec4 vertModelMatrix0; +layout (location = 9) in vec4 vertModelMatrix1; +layout (location = 10) in vec4 vertModelMatrix2; +layout (location = 11) in vec4 vertModelMatrix3; + +layout (location = 0) flat out mat4 invModelMatrix; // locations 0-3 +layout (location = 4) flat out vec3 decalDirection; +layout (location = 5) flat out float normal_angle_cutoff; +layout (location = 6) flat out float angle_fade_start; +layout (location = 7) flat out float alpha_scale; + +// Set 1 = Material, Binding 2 = DecalGlobals UBO +layout (set = 1, binding = 2, std140) uniform decalGlobalData { + mat4 viewMatrix; + mat4 projMatrix; + mat4 invViewMatrix; + mat4 invProjMatrix; + + vec2 viewportSize; +}; + +// Set 2 = PerDraw, Binding 3 = DecalInfo UBO +layout (set = 2, binding = 3, std140) uniform decalInfoData { + int diffuse_index; + int glow_index; + int normal_index; + int diffuse_blend_mode; + + int glow_blend_mode; +}; + +void main() { + // Reconstruct per-instance model matrix from 4 vec4 columns + mat4 vertModelMatrix = mat4(vertModelMatrix0, vertModelMatrix1, vertModelMatrix2, vertModelMatrix3); + + // Extract packed data from matrix column w-components + normal_angle_cutoff = vertModelMatrix[0][3]; + angle_fade_start = vertModelMatrix[1][3]; + alpha_scale = vertModelMatrix[2][3]; + + // Clean the matrix (zero out the packed w-components) + mat4 modelMatrix = vertModelMatrix; + modelMatrix[0][3] = 0.0; + modelMatrix[1][3] = 0.0; + modelMatrix[2][3] = 0.0; + + invModelMatrix = inverse(modelMatrix); + decalDirection = mat3(viewMatrix) * modelMatrix[2].xyz; + gl_Position = projMatrix * viewMatrix * modelMatrix * vertPosition; +} diff --git a/code/graphics/shaders/default-material.frag b/code/graphics/shaders/default-material.frag index 8ea9de01f4e..4736783c4d8 100644 --- a/code/graphics/shaders/default-material.frag +++ b/code/graphics/shaders/default-material.frag @@ -8,7 +8,7 @@ layout (location = 1) in vec4 fragColor; layout (location = 0) out vec4 fragOut0; -layout (binding = 1, std140) uniform genericData { +layout (set = 2, binding = 0, std140) uniform genericData { mat4 modelMatrix; vec4 color; @@ -22,10 +22,10 @@ layout (binding = 1, std140) uniform genericData { float intensity; float alphaThreshold; - bool clipEnabled; + uint clipEnabled; }; -layout(binding = 2) uniform sampler2DArray baseMap; +layout (set = 1, binding = 1) uniform sampler2DArray baseMap; void main() { diff --git a/code/graphics/shaders/default-material.vert b/code/graphics/shaders/default-material.vert index 2529adaf8a5..1e7d2f3d608 100644 --- a/code/graphics/shaders/default-material.vert +++ b/code/graphics/shaders/default-material.vert @@ -8,12 +8,12 @@ layout (location = 2) in vec4 vertTexCoord; layout (location = 0) out vec4 fragTexCoord; layout (location = 1) out vec4 fragColor; -layout (binding = 0, std140) uniform matrixData { +layout (set = 2, binding = 1, std140) uniform matrixData { mat4 modelViewMatrix; mat4 projMatrix; }; -layout (binding = 1, std140) uniform genericData { +layout (set = 2, binding = 0, std140) uniform genericData { mat4 modelMatrix; vec4 color; @@ -27,7 +27,7 @@ layout (binding = 1, std140) uniform genericData { float intensity; float alphaThreshold; - bool clipEnabled; + uint clipEnabled; }; void main() @@ -36,7 +36,12 @@ void main() fragColor = vertColor * color; gl_Position = projMatrix * modelViewMatrix * vertPosition; - if (clipEnabled) { + // Vulkan has no glEnable(GL_CLIP_DISTANCE0) equivalent — clip distances are + // always evaluated when declared in the shader. Must write a positive value + // when clipping is disabled to prevent undefined-value vertex culling. + if (clipEnabled != 0u) { gl_ClipDistance[0] = dot(clipEquation, modelMatrix * vertPosition); + } else { + gl_ClipDistance[0] = 1.0; } } diff --git a/code/graphics/shaders/deferred.frag b/code/graphics/shaders/deferred.frag new file mode 100644 index 00000000000..8deee7ab302 --- /dev/null +++ b/code/graphics/shaders/deferred.frag @@ -0,0 +1,353 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#include "lighting.sdr" +#include "gamma.sdr" + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D sTextures[16]; +// sTextures[0] = ColorBuffer +// sTextures[1] = NormalBuffer +// sTextures[2] = PositionBuffer +// sTextures[3] = SpecBuffer + +layout(set = 0, binding = 0, std140) uniform lightData { + vec3 diffuseLightColor; + float coneAngle; + + vec3 lightDir; + float coneInnerAngle; + + vec3 coneDir; + float dualCone; + + vec3 scale; + float lightRadius; + + int lightType; + int enable_shadows; + float sourceRadius; + + float pad0; +}; + +layout(set = 0, binding = 1, std140) uniform globalDeferredData { + mat4 shadow_mv_matrix; + mat4 shadow_proj_matrix[4]; + + mat4 inv_view_matrix; + + float veryneardist; + float neardist; + float middist; + float fardist; + + float invScreenWidth; + float invScreenHeight; + + float nearPlane; + + int use_env_map; +}; + +layout(set = 0, binding = 2) uniform sampler2DArray shadowMap; +layout(set = 0, binding = 3) uniform samplerCube sEnvmap; +layout(set = 0, binding = 4) uniform samplerCube sIrrmap; + +layout(set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +// ===== Variance Shadow Mapping (ported from shadows.sdr) ===== + +const float VARIANCE_SHADOW_SCALE = 1000000.0; + +vec2 sampleShadowMap(vec2 uv, vec2 offset_uv, int cascade, float shadowMapSizeInv) +{ + return texture(shadowMap, vec3(uv + offset_uv * shadowMapSizeInv, float(cascade))).xy; +} + +float computeShadowFactor(float shadowDepth, vec2 moments, float bias) +{ + float shadow = 1.0; + if((moments.x - bias) > shadowDepth) + { + float variance = moments.y * VARIANCE_SHADOW_SCALE - moments.x * moments.x; + float mD = moments.x - bias - shadowDepth; + shadow = variance / (variance + mD * mD); + shadow = clamp(shadow, 0.0, 1.0); + } + return shadow; +} + +float samplePoissonPCF(float shadowDepth, int cascade, vec4 shadowUV[4], bool use_simple_pass) +{ + if(cascade > 3 || cascade < 0) return 1.0; + + vec2 poissonDisc[16] = vec2[]( + vec2(-0.76275, -0.3432573), + vec2(-0.5226235, -0.8277544), + vec2(-0.3780261, 0.01528688), + vec2(-0.7742821, 0.4245702), + vec2(0.04196143, -0.02622231), + vec2(-0.2974772, -0.4722782), + vec2(-0.516093, 0.71495), + vec2(-0.3257416, 0.3910343), + vec2(0.2705966, 0.6670476), + vec2(0.4918377, 0.1853267), + vec2(0.4428544, -0.6251478), + vec2(-0.09204347, 0.9267113), + vec2(0.391505, -0.2558275), + vec2(0.05605913, -0.7570801), + vec2(0.81772, -0.02475523), + vec2(0.6890262, 0.5191521) + ); + + float maxUVOffset[4]; + maxUVOffset[0] = 1.0/300.0; + maxUVOffset[1] = 1.0/250.0; + maxUVOffset[2] = 1.0/200.0; + maxUVOffset[3] = 1.0/200.0; + + if (use_simple_pass) { + float visibility = 1.0f; + for (int i=0; i<16; i++) { + vec2 shadow_sample = sampleShadowMap(shadowUV[cascade].xy, poissonDisc[i], cascade, maxUVOffset[cascade]); + if( ((shadow_sample.x - 0.002f) > shadowDepth) ) { + visibility -= (1.0f/16.0f); + } + } + return visibility; + } else { + vec2 sum = vec2(0.0f); + for (int i=0; i<16; i++) { + sum += sampleShadowMap(shadowUV[cascade].xy, poissonDisc[i], cascade, maxUVOffset[cascade]); + } + return computeShadowFactor(shadowDepth, sum*(1.0f/16.0f), 0.1f); + } +} + +float getShadowValue(float depth, float shadowDepth, vec4 shadowUV[4]) +{ + int cascade = 4; + cascade -= int(step(depth, fardist)); + cascade -= int(step(depth, middist)); + cascade -= int(step(depth, neardist)); + cascade -= int(step(depth, veryneardist)); + float cascade_start_dist[5]; + cascade_start_dist[0] = 0.0; + cascade_start_dist[1] = veryneardist; + cascade_start_dist[2] = neardist; + cascade_start_dist[3] = middist; + cascade_start_dist[4] = fardist; + if(cascade > 3 || cascade < 0) return 1.0; + + bool use_simple_pass; + if (fardist < 50.0f) { + use_simple_pass = true; + } else { + use_simple_pass = false; + } + + float dist_threshold = (cascade_start_dist[cascade+1] - cascade_start_dist[cascade])*0.2; + if(cascade_start_dist[cascade+1] - dist_threshold > depth) + return samplePoissonPCF(shadowDepth, cascade, shadowUV, use_simple_pass); + return mix(samplePoissonPCF(shadowDepth, cascade, shadowUV, use_simple_pass), + samplePoissonPCF(shadowDepth, cascade+1, shadowUV, use_simple_pass), + smoothstep(cascade_start_dist[cascade+1] - dist_threshold, cascade_start_dist[cascade+1], depth)); +} + +vec4 transformToShadowMap(mat4 proj, int i, vec4 pos) +{ + vec4 shadow_proj = proj * pos; + // Vulkan shadow projection maps to [0,1] depth, but XY is still [-1,1] + // Transform XY from [-1,1] to [0,1] + shadow_proj.xy = shadow_proj.xy * 0.5 + 0.5; + shadow_proj.w = shadow_proj.z; // depth for shadow comparison + shadow_proj.z = float(i); // cascade index for array layer + return shadow_proj; +} + +// ===== Light calculations ===== + +// Nearest point sphere and tube light calculations taken from +// "Real Shading in Unreal Engine 4" by Brian Karis, Epic Games +// Part of SIGGRAPH 2013 Course: Physically Based Shading in Theory and Practice + +vec3 ExpandLightSize(in vec3 lightDirIn, in vec3 reflectDir) { + vec3 centerToRay = max(dot(lightDirIn, reflectDir),sourceRadius) * reflectDir - lightDirIn; + return lightDirIn + centerToRay * clamp(sourceRadius/length(centerToRay), 0.0, 1.0); +} + +void GetLightInfo(vec3 position, in float alpha, in vec3 reflectDir, out vec3 lightDirOut, out float attenuation, out float area_normalisation) +{ + if (lightType == LT_DIRECTIONAL) { + lightDirOut = normalize(lightDir); + attenuation = 1.0; + area_normalisation = 1.0; + } else { + vec3 lightPosition = modelViewMatrix[3].xyz; + if (lightType == LT_POINT) { + lightDirOut = lightPosition - position.xyz; + float dist = length(lightDirOut); + + lightDirOut = ExpandLightSize(lightDirOut, reflectDir); + dist = length(lightDirOut); + float alpha_adjust = clamp(alpha + (sourceRadius/(2*dist)), 0.0, 1.0); + area_normalisation = alpha/alpha_adjust; + area_normalisation *= area_normalisation; + + if(dist > lightRadius) { + discard; + } + attenuation = 1.0 - clamp(sqrt(dist / lightRadius), 0.0, 1.0); + } + else if (lightType == LT_TUBE) { + vec3 beamVec = vec3(modelViewMatrix * vec4(0.0, 0.0, -scale.z, 0.0)); + vec3 beamDir = normalize(beamVec); + vec3 adjustedLightPos = lightPosition - (beamDir * lightRadius); + vec3 adjustedbeamVec = beamVec - 2.0 * lightRadius * beamDir; + float beamLength = length(adjustedbeamVec); + vec3 sourceDir = adjustedLightPos - position.xyz; + + vec3 a_t = reflectDir; + vec3 b_t = beamDir; + vec3 b_0 = sourceDir; + vec3 c = cross(a_t, b_t); + vec3 d = b_0; + vec3 r = d - a_t * dot(d, a_t) - c * dot(d,c); + float tubeneardist = dot(r, r)/dot(b_t, r); + lightDirOut = sourceDir - beamDir * clamp(tubeneardist, 0.0, beamLength); + + lightDirOut = ExpandLightSize(lightDirOut, reflectDir); + float dist = length(lightDirOut); + float alpha_adjust = min(alpha + (sourceRadius/(2*dist)), 1.0); + area_normalisation = alpha/alpha_adjust; + + if(dist > lightRadius) { + discard; + } + attenuation = 1.0 - clamp(sqrt(dist / lightRadius), 0.0, 1.0); + } + else if (lightType == LT_CONE) { + lightDirOut = lightPosition - position.xyz; + float coneDot = dot(normalize(-lightDirOut), coneDir); + float dist = length(lightDirOut); + attenuation = 1.0 - clamp(sqrt(dist / lightRadius), 0.0, 1.0); + area_normalisation = 1.0; + + if(dualCone > 0.5) { + if(abs(coneDot) < coneAngle) { + discard; + } else { + attenuation *= smoothstep(coneAngle, coneInnerAngle, abs(coneDot)); + } + } else { + if (coneDot < coneAngle) { + discard; + } else { + attenuation *= smoothstep(coneAngle, coneInnerAngle, coneDot); + } + } + } + attenuation *= attenuation; + lightDirOut = normalize(lightDirOut); + } +} + +// ===== Environment Map Lighting ===== +// Ported from deferred-f.sdr ComputeEnvLight() + +void ComputeEnvLight(float alpha, float ao, vec3 light_dir, vec3 eyeDir, + vec3 normal, vec4 baseColor, vec4 specColor, out vec3 envLight) +{ + const float ENV_REZ = 512.0; + const float REZ_BIAS = log2(ENV_REZ * sqrt(3.0)); + + float alphaSqr = alpha * alpha; + float rough_bias = 0.5 * log2(2.0 / alphaSqr - 1.0); + float mip_bias = REZ_BIAS - rough_bias; + + // Sample specular environment map with roughness-based mip bias + vec3 env_light_dir = vec3(modelViewMatrix * vec4(light_dir, 0.0)); + vec4 specEnvColour = srgb_to_linear(textureLod(sEnvmap, env_light_dir, mip_bias)); + + vec3 halfVec = normal; + + // Fresnel using Schlick approximation + vec3 fresnel = mix(specColor.rgb, FresnelSchlick(halfVec, eyeDir, specColor.rgb), specColor.a); + + // Pseudo-IBL geometry term (k = alpha^2 / 2) + float k = alphaSqr / 2.0; + float NdotL = max(dot(light_dir, normal), 0.0); + float g1vNL = GeometrySchlickGGX(NdotL, k); + + vec3 specEnvLighting = specEnvColour.rgb * fresnel * g1vNL; + + // Diffuse from irradiance map + vec3 kD = vec3(1.0) - fresnel; + kD *= (vec3(1.0) - specColor.rgb); + vec3 diffEnvColor = srgb_to_linear(texture(sIrrmap, vec3(modelViewMatrix * vec4(normal, 0.0))).rgb); + vec3 diffEnvLighting = kD * baseColor.rgb * diffEnvColor * ao; + + envLight = (specEnvLighting + diffEnvLighting) * baseColor.a; +} + +void main() +{ + vec2 screenPos = gl_FragCoord.xy * vec2(invScreenWidth, invScreenHeight); + vec4 position_buffer = texture(sTextures[2], screenPos); + vec3 position = position_buffer.xyz; + + if(abs(dot(position, position)) < nearPlane * nearPlane) + discard; + + vec4 diffuse = texture(sTextures[0], screenPos); + vec3 diffColor = diffuse.rgb; + vec4 normalData = texture(sTextures[1], screenPos); + vec3 normal = normalize(normalData.xyz); + float gloss = normalData.a; + float roughness = clamp(1.0f - gloss, 0.0f, 1.0f); + float alpha = roughness * roughness; + vec3 eyeDir = normalize(-position); + vec3 reflectDir = reflect(-eyeDir, normal); + vec4 specColor = texture(sTextures[3], screenPos); + + vec4 fragmentColor = vec4(1.0); + + if (lightType == LT_AMBIENT) { + float ao = position_buffer.w; + fragmentColor.rgb = diffuseLightColor * diffColor * ao; + if (use_env_map != 0) { + vec3 envLight; + ComputeEnvLight(alpha, ao, reflectDir, eyeDir, normal, diffuse, specColor, envLight); + fragmentColor.rgb += envLight; + } + } + else { + float fresnel = specColor.a; + + vec3 lightDirCalc; + float attenuation; + float area_normalisation; + GetLightInfo(position, alpha, reflectDir, lightDirCalc, attenuation, area_normalisation); + + // Shadow attenuation for directional lights + if (enable_shadows != 0 && lightType == LT_DIRECTIONAL) { + vec4 fragShadowPos = shadow_mv_matrix * inv_view_matrix * vec4(position, 1.0); + vec4 fragShadowUV[4]; + for (int i = 0; i < 4; i++) { + fragShadowUV[i] = transformToShadowMap(shadow_proj_matrix[i], i, fragShadowPos); + } + float shadowVal = getShadowValue(-position.z, fragShadowPos.z, fragShadowUV); + attenuation *= shadowVal; + } + + vec3 halfVec = normalize(lightDirCalc + eyeDir); + float NdotL = clamp(dot(normal, lightDirCalc), 0.0, 1.0); + fragmentColor.rgb = computeLighting(specColor.rgb, diffColor, lightDirCalc, normal.xyz, halfVec, eyeDir, roughness, fresnel, NdotL).rgb * diffuseLightColor * attenuation * area_normalisation; + } + + fragOut0 = max(fragmentColor, vec4(0.0)); +} diff --git a/code/graphics/shaders/deferred.vert b/code/graphics/shaders/deferred.vert new file mode 100644 index 00000000000..d897c199a06 --- /dev/null +++ b/code/graphics/shaders/deferred.vert @@ -0,0 +1,41 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#include "lighting.sdr" + +layout(location = 0) in vec4 vertPosition; + +layout(set = 0, binding = 0, std140) uniform lightData { + vec3 diffuseLightColor; + float coneAngle; + + vec3 lightDir; + float coneInnerAngle; + + vec3 coneDir; + float dualCone; + + vec3 scale; + float lightRadius; + + int lightType; + int enable_shadows; + float sourceRadius; + + float pad0; +}; + +layout(set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +void main() +{ + if (lightType == LT_DIRECTIONAL || lightType == LT_AMBIENT) { + // Fullscreen triangle from gl_VertexIndex (same as postprocess.vert) + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); + } else { + gl_Position = projMatrix * modelViewMatrix * vec4(vertPosition.xyz * scale, 1.0); + } +} diff --git a/code/graphics/shaders/effect-distort.frag b/code/graphics/shaders/effect-distort.frag new file mode 100644 index 00000000000..265cf0b117a --- /dev/null +++ b/code/graphics/shaders/effect-distort.frag @@ -0,0 +1,38 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec4 fragTexCoord; +layout(location = 1) in vec4 fragColor; +layout(location = 2) in float fragOffset; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; +layout(set = 1, binding = 5) uniform sampler2D frameBuffer; +layout(set = 1, binding = 6) uniform sampler2D distMap; + +layout(set = 2, binding = 0, std140) uniform GenericData { + float window_width; + float window_height; + float use_offset; + float pad; +}; + +void main() +{ + vec2 depthCoord = vec2(gl_FragCoord.x / window_width, gl_FragCoord.y / window_height); + + // Sample distortion offset from ping-pong distortion texture + vec2 distortion = texture(distMap, fragTexCoord.xy + vec2(0.0, fragOffset)).rg; + + // Get particle alpha from base texture (multiply by vertex alpha only, not RGB) + vec4 fragmentColor = texture(baseMap, fragTexCoord.xyz) * fragColor.a; + + // Scale distortion by particle luminance + float alpha = clamp(dot(fragmentColor.rgb, vec3(0.3333)) * 10.0, 0.0, 1.0); + distortion = ((distortion - 0.5) * 0.01) * alpha; + + // Sample scene color at distorted UV, blend via particle alpha + fragOut0 = texture(frameBuffer, depthCoord + distortion); + fragOut0.a = alpha; +} diff --git a/code/graphics/shaders/effect-distort.vert b/code/graphics/shaders/effect-distort.vert new file mode 100644 index 00000000000..e9d7d566986 --- /dev/null +++ b/code/graphics/shaders/effect-distort.vert @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec3 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 6) in float vertRadius; + +layout(location = 0) out vec4 fragTexCoord; +layout(location = 1) out vec4 fragColor; +layout(location = 2) out float fragOffset; + +layout(set = 2, binding = 0, std140) uniform GenericData { + float window_width; + float window_height; + float use_offset; + float pad; +}; + +layout(set = 2, binding = 1, std140) uniform Matrices { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +void main() +{ + fragTexCoord = vertTexCoord; + fragColor = vertColor; + fragOffset = vertRadius * use_offset; + gl_Position = projMatrix * modelViewMatrix * vec4(vertPosition, 1.0); +} diff --git a/code/graphics/shaders/effect.frag b/code/graphics/shaders/effect.frag new file mode 100644 index 00000000000..98926e4f72d --- /dev/null +++ b/code/graphics/shaders/effect.frag @@ -0,0 +1,58 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +#include "gamma.sdr" + +layout(location = 0) in vec4 fragPosition; +layout(location = 1) in vec4 fragTexCoord; +layout(location = 2) in vec4 fragColor; +layout(location = 3) in float fragRadius; + +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2DArray baseMap; +layout(set = 1, binding = 4) uniform sampler2D depthMap; + +layout(set = 2, binding = 0, std140) uniform genericData { + float window_width; + float window_height; + float nearZ; + float farZ; + int linear_depth; + int srgb; + int blend_alpha; +}; + +void main() +{ + vec4 fragmentColor = texture(baseMap, fragTexCoord.xyz); + fragmentColor.rgb = mix(fragmentColor.rgb, srgb_to_linear(fragmentColor.rgb), float(srgb)); + fragmentColor *= mix(fragColor, vec4(srgb_to_linear(fragColor.rgb), fragColor.a), float(srgb)); + vec2 offset = vec2(fragRadius * abs(0.5 - fragTexCoord.x) * 2.0, fragRadius * abs(0.5 - fragTexCoord.y) * 2.0); + float offset_len = length(offset); + if ( offset_len > fragRadius ) { + fragOut0 = vec4(0.0, 0.0, 0.0, 0.0); + return; + } + vec2 depthCoord = vec2(gl_FragCoord.x / window_width, gl_FragCoord.y / window_height ); + vec4 sceneDepth = texture(depthMap, depthCoord); + float sceneDepthLinear; + float fragDepthLinear; + if ( linear_depth == 1 ) { + // Background pixels have position (0,0,0) from G-buffer clear; + // treat as infinitely far so particles remain visible against background + sceneDepthLinear = sceneDepth.z != 0.0 ? -sceneDepth.z : farZ; + fragDepthLinear = -fragPosition.z; + } else { + sceneDepthLinear = ( 2.0 * farZ * nearZ ) / ( farZ + nearZ - sceneDepth.x * (farZ-nearZ) ); + fragDepthLinear = ( 2.0 * farZ * nearZ ) / ( farZ + nearZ - gl_FragCoord.z * (farZ-nearZ) ); + } + // assume UV of 0.5, 0.5 is the centroid of this sphere volume + float depthOffset = sqrt((fragRadius*fragRadius) - (offset_len*offset_len)); + float frontDepth = fragDepthLinear - depthOffset; + float backDepth = fragDepthLinear + depthOffset; + float intensity = smoothstep(max(nearZ, frontDepth), backDepth, sceneDepthLinear); + fragmentColor.rgb *= (srgb == 1) ? 1.5 : 1.0; + fragmentColor = (blend_alpha == 1) ? vec4(fragmentColor.rgb, fragmentColor.a * intensity) : vec4(fragmentColor.rgb * intensity, fragmentColor.a); + fragOut0 = max(fragmentColor, vec4(0.0)); +} diff --git a/code/graphics/shaders/effect.vert b/code/graphics/shaders/effect.vert new file mode 100644 index 00000000000..47eebf983a8 --- /dev/null +++ b/code/graphics/shaders/effect.vert @@ -0,0 +1,26 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; +layout(location = 2) in vec4 vertTexCoord; +layout(location = 6) in float vertRadius; + +layout(location = 0) out vec4 fragPosition; +layout(location = 1) out vec4 fragTexCoord; +layout(location = 2) out vec4 fragColor; +layout(location = 3) out float fragRadius; + +layout(set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +void main() +{ + fragRadius = vertRadius; + gl_Position = projMatrix * modelViewMatrix * vertPosition; + fragPosition = modelViewMatrix * vertPosition; + fragTexCoord = vec4(vertTexCoord.xyz, 0.0); + fragColor = vertColor; +} diff --git a/code/graphics/shaders/fog.frag b/code/graphics/shaders/fog.frag new file mode 100644 index 00000000000..271e31da20f --- /dev/null +++ b/code/graphics/shaders/fog.frag @@ -0,0 +1,45 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Scene fog fragment shader — port of fog-f.sdr to Vulkan +// Applies distance-based exponential fog to the lit composite image. + +#include "gamma.sdr" + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; // composite (lit scene) +layout(set = 1, binding = 4) uniform sampler2D depth_tex; // scene depth copy + +layout(std140, set = 2, binding = 0) uniform genericData { + vec3 fog_color; + float fog_start; + + float fog_density; + float zNear; + float zFar; + + float pad0; +}; + +void main() +{ + vec4 color_in = texture(tex, fragTexCoord.xy); + + float depth_val = texture(depth_tex, fragTexCoord.xy).x; + // Vulkan depth range [0,1] — linearize directly (no 2*d-1 transform) + float view_depth = zNear * zFar / (zFar - depth_val * (zFar - zNear)); + + // Cap infinite depth: Vulkan's formula yields infinity at d=1.0 due to + // float precision with extreme zFar. OpenGL's formula gives finite zFar + // instead. Capping to zFar makes both renderers apply full fog to + // background pixels. + if (isinf(view_depth)) view_depth = zFar; + + float fog_dist = clamp(1 - pow(fog_density, view_depth - fog_start), 0.0, 1.0); + vec3 finalFogColor = srgb_to_linear(fog_color); + + fragOut0.rgb = mix(color_in.rgb, finalFogColor, fog_dist); + fragOut0.a = 1.0; +} diff --git a/code/graphics/shaders/fog.vert b/code/graphics/shaders/fog.vert new file mode 100644 index 00000000000..193fb877b2e --- /dev/null +++ b/code/graphics/shaders/fog.vert @@ -0,0 +1,14 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Fullscreen triangle vertex shader for scene fog pass. +// Same as postprocess.vert — uses gl_VertexIndex, no vertex buffer needed. + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/fxaa.frag b/code/graphics/shaders/fxaa.frag new file mode 100644 index 00000000000..08795afa3f2 --- /dev/null +++ b/code/graphics/shaders/fxaa.frag @@ -0,0 +1,155 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// FXAA 3.11 - Medium Quality (Preset 26) +// Ported from NVIDIA FXAA 3.11 by Timothy Lottes +// Uses pre-computed luma in alpha channel (from FXAA prepass) + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +layout(std140, set = 2, binding = 0) uniform genericData { + float rt_w; + float rt_h; + float pad0; + float pad1; +}; + +// Quality parameters for Medium preset +const float EDGE_THRESHOLD = 1.0 / 12.0; +const float EDGE_THRESHOLD_MIN = 1.0 / 24.0; +const float SUBPIX = 0.33; + +// Search step offsets for preset 26 (9 steps) +const float QUALITY_P[9] = float[9](1.0, 1.5, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 8.0); + +float FxaaLuma(vec4 rgba) { return rgba.y; } + +void main() +{ + vec2 rcpFrame = vec2(1.0 / rt_w, 1.0 / rt_h); + vec2 posM = fragTexCoord; + + // Sample center pixel (luma pre-computed in alpha by prepass) + vec4 rgbyM = textureLod(tex, posM, 0.0); + float lumaM = rgbyM.w; + + // Sample 4-connected neighbors (use green channel as luma approximation) + float lumaS = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2( 0, 1))); + float lumaE = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2( 1, 0))); + float lumaN = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2( 0,-1))); + float lumaW = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2(-1, 0))); + + // Local contrast range + float rangeMax = max(max(max(lumaS, lumaE), max(lumaN, lumaW)), lumaM); + float rangeMin = min(min(min(lumaS, lumaE), min(lumaN, lumaW)), lumaM); + float range = rangeMax - rangeMin; + + // Early exit for low-contrast regions + if (range < max(EDGE_THRESHOLD_MIN, rangeMax * EDGE_THRESHOLD)) { + fragOut0 = rgbyM; + return; + } + + // Sample diagonal neighbors + float lumaNW = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2(-1,-1))); + float lumaSE = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2( 1, 1))); + float lumaNE = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2( 1,-1))); + float lumaSW = FxaaLuma(textureLodOffset(tex, posM, 0.0, ivec2(-1, 1))); + + // Subpixel blending factor + float lumaNS = lumaN + lumaS; + float lumaWE = lumaW + lumaE; + float subpixNSWE = lumaNS + lumaWE; + float subpixNWSWNESE = (lumaNW + lumaSW) + (lumaNE + lumaSE); + float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE; + float subpixB = (subpixA * (1.0 / 12.0)) - lumaM; + float subpixC = clamp(abs(subpixB) / range, 0.0, 1.0); + float subpixD = ((-2.0) * subpixC) + 3.0; + float subpixE = subpixC * subpixC; + float subpixF = subpixD * subpixE; + float subpixH = subpixF * subpixF * SUBPIX; + + // Edge orientation detection (horizontal vs vertical) + float edgeHorz1 = (-2.0 * lumaM) + lumaNS; + float edgeVert1 = (-2.0 * lumaM) + lumaWE; + float edgeHorz2 = (-2.0 * lumaE) + (lumaNE + lumaSE); + float edgeVert2 = (-2.0 * lumaN) + (lumaNW + lumaNE); + float edgeHorz3 = (-2.0 * lumaW) + (lumaNW + lumaSW); + float edgeVert3 = (-2.0 * lumaS) + (lumaSW + lumaSE); + float edgeHorz = abs(edgeHorz3) + (abs(edgeHorz1) * 2.0) + abs(edgeHorz2); + float edgeVert = abs(edgeVert3) + (abs(edgeVert1) * 2.0) + abs(edgeVert2); + bool horzSpan = edgeHorz >= edgeVert; + + // Select edge perpendicular direction + float lengthSign = horzSpan ? rcpFrame.y : rcpFrame.x; + float lumaN2 = horzSpan ? lumaN : lumaW; + float lumaS2 = horzSpan ? lumaS : lumaE; + + float gradientN = lumaN2 - lumaM; + float gradientS = lumaS2 - lumaM; + float lumaNN = lumaN2 + lumaM; + float lumaSS = lumaS2 + lumaM; + bool pairN = abs(gradientN) >= abs(gradientS); + float gradient = max(abs(gradientN), abs(gradientS)); + if (pairN) lengthSign = -lengthSign; + + // Setup search along the edge + vec2 posB = posM; + vec2 offNP; + offNP.x = (!horzSpan) ? 0.0 : rcpFrame.x; + offNP.y = ( horzSpan) ? 0.0 : rcpFrame.y; + if (!horzSpan) posB.x += lengthSign * 0.5; + if ( horzSpan) posB.y += lengthSign * 0.5; + + vec2 posN = posB - offNP * QUALITY_P[0]; + vec2 posP = posB + offNP * QUALITY_P[0]; + + float lumaEndN = FxaaLuma(textureLod(tex, posN, 0.0)); + float lumaEndP = FxaaLuma(textureLod(tex, posP, 0.0)); + + if (!pairN) lumaNN = lumaSS; + float gradientScaled = gradient * 0.25; + bool lumaMLTZero = (lumaM - lumaNN * 0.5) < 0.0; + lumaEndN -= lumaNN * 0.5; + lumaEndP -= lumaNN * 0.5; + + bool doneN = abs(lumaEndN) >= gradientScaled; + bool doneP = abs(lumaEndP) >= gradientScaled; + + // Search loop (preset 26: 9 steps) + for (int i = 1; i < 9 && (!doneN || !doneP); i++) { + if (!doneN) { + posN -= offNP * QUALITY_P[i]; + lumaEndN = FxaaLuma(textureLod(tex, posN, 0.0)) - lumaNN * 0.5; + doneN = abs(lumaEndN) >= gradientScaled; + } + if (!doneP) { + posP += offNP * QUALITY_P[i]; + lumaEndP = FxaaLuma(textureLod(tex, posP, 0.0)) - lumaNN * 0.5; + doneP = abs(lumaEndP) >= gradientScaled; + } + } + + // Compute final pixel offset + float dstN = horzSpan ? (posM.x - posN.x) : (posM.y - posN.y); + float dstP = horzSpan ? (posP.x - posM.x) : (posP.y - posM.y); + bool directionN = dstN < dstP; + float dst = min(dstN, dstP); + float spanLength = dstP + dstN; + + bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero; + bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero; + bool goodSpan = directionN ? goodSpanN : goodSpanP; + float pixelOffset = goodSpan ? ((dst * (-1.0 / spanLength)) + 0.5) : 0.0; + float pixelOffsetSubpix = max(pixelOffset, subpixH); + + // Apply offset and sample + vec2 finalPos = posM; + if (!horzSpan) finalPos.x += pixelOffsetSubpix * lengthSign; + if ( horzSpan) finalPos.y += pixelOffsetSubpix * lengthSign; + + fragOut0 = textureLod(tex, finalPos, 0.0); +} diff --git a/code/graphics/shaders/fxaapre.frag b/code/graphics/shaders/fxaapre.frag new file mode 100644 index 00000000000..e46598ccb11 --- /dev/null +++ b/code/graphics/shaders/fxaapre.frag @@ -0,0 +1,14 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +void main() +{ + vec4 color = texture(tex, fragTexCoord); + // Store computed luma in alpha channel for FXAA main pass + fragOut0 = vec4(color.rgb, dot(color.rgb, vec3(0.299, 0.587, 0.114))); +} diff --git a/code/graphics/shaders/irradiance.frag b/code/graphics/shaders/irradiance.frag new file mode 100644 index 00000000000..58917beaf92 --- /dev/null +++ b/code/graphics/shaders/irradiance.frag @@ -0,0 +1,188 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#include "gamma.sdr" + +#define PI 3.1415926535897932384626433832795 + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform samplerCube envmap; + +layout(set = 2, binding = 0, std140) uniform genericData { + int face; +}; + +// Iteratively optimised points to give low discrepancy distribution on arbitrary hemisphere. +const vec3 points[128] = vec3[128]( +vec3(-0.2268, 0.6185, 0.7523), +vec3( 0.9958, -0.0871, -0.0277), +vec3( 0.7132, -0.6099, 0.3455), +vec3(-0.9243, -0.3644, 0.1137), +vec3( 0.4580, -0.3977, -0.7950), +vec3(-0.2989, -0.7422, -0.5998), +vec3(-0.7263, -0.5965, 0.3416), +vec3(-0.7906, 0.4955, 0.3597), +vec3(-0.5006, -0.5116, 0.6984), +vec3( 0.3924, 0.8514, 0.3479), +vec3(-0.5916, -0.8009, 0.0925), +vec3( 0.5076, 0.2359, -0.8287), +vec3(-0.6712, 0.5976, -0.4386), +vec3( 0.5255, -0.5998, -0.6034), +vec3( 0.2083, 0.7588, -0.6172), +vec3( 0.8282, 0.0486, 0.5584), +vec3( 0.2563, -0.6217, -0.7401), +vec3(-0.4601, 0.2289, -0.8579), +vec3( 0.6840, 0.5803, -0.4421), +vec3(-0.0167, 0.9512, -0.3080), +vec3( 0.2259, 0.3423, 0.9120), +vec3( 0.3726, -0.7409, 0.5587), +vec3( 0.0440, -0.4696, 0.8818), +vec3(-0.5011, -0.6169, -0.6069), +vec3( 0.2801, 0.8534, 0.4396), +vec3(-0.2464, -0.9090, 0.3360), +vec3(-0.7297, 0.1734, 0.6614), +vec3(-0.2619, 0.2954, 0.9188), +vec3( 0.1519, -0.5555, -0.8176), +vec3(-0.6161, -0.7542, 0.2274), +vec3( 0.3067, -0.8999, -0.3100), +vec3( 0.4223, -0.6110, 0.6696), +vec3(-0.7079, 0.5325, 0.4640), +vec3( 0.9522, -0.3022, 0.0443), +vec3(-0.3054, -0.5260, -0.7937), +vec3(-0.7502, 0.1605, -0.6414), +vec3( 0.9070, -0.0741, -0.4145), +vec3(-0.1592, 0.8002, -0.5783), +vec3( 0.7672, 0.4286, 0.4772), +vec3(-0.9340, 0.2468, -0.2584), +vec3( 0.9905, -0.1254, 0.0563), +vec3(-0.7776, -0.4144, -0.4729), +vec3(-0.7503, 0.6316, -0.1952), +vec3(-0.3313, -0.2916, -0.8973), +vec3( 0.1147, -0.7738, 0.6230), +vec3( 0.1481, 0.3008, -0.9421), +vec3(-0.8300, 0.0980, 0.5491), +vec3(-0.1374, 0.9904, 0.0163), +vec3( 0.0996, -0.9950, -0.0014), +vec3( 0.7040, 0.0177, -0.7100), +vec3(-0.3455, 0.8607, -0.3740), +vec3(-0.6005, 0.2897, 0.7453), +vec3( 0.3546, -0.8403, -0.4100), +vec3(-0.5317, 0.5463, -0.6472), +vec3( 0.4780, -0.1825, -0.8592), +vec3( 0.3033, -0.1953, 0.9327), +vec3(-0.8416, -0.1018, -0.5304), +vec3( 0.2834, -0.6792, -0.6771), +vec3( 0.9488, 0.1900, -0.2522), +vec3( 0.3215, 0.7569, -0.5690), +vec3(-0.7391, 0.5883, -0.3281), +vec3( 0.4351, -0.2649, 0.8605), +vec3( 0.0829, -0.8264, 0.5570), +vec3(-0.6900, 0.5097, 0.5139), +vec3( 0.6869, -0.5626, 0.4601), +vec3(-0.9385, -0.1405, -0.3153), +vec3(-0.2377, 0.7921, 0.5622), +vec3( 0.3259, -0.1633, -0.9312), +vec3( 0.4753, 0.8652, -0.1597), +vec3( 0.9267, 0.0849, -0.3661), +vec3(-0.8777, -0.4241, -0.2231), +vec3( 0.5195, -0.8463, -0.1177), +vec3(-0.2962, -0.5401, 0.7878), +vec3( 0.9439, 0.3300, -0.0130), +vec3(-0.9481, 0.2886, 0.1333), +vec3(-0.0272, -0.5312, 0.8468), +vec3(-0.3538, 0.6180, -0.7021), +vec3(-0.9996, 0.0160, -0.0217), +vec3( 0.2800, -0.3399, 0.8978), +vec3( 0.2090, 0.9773, 0.0338), +vec3( 0.5249, 0.0450, 0.8500), +vec3( 0.8165, 0.3275, -0.4755), +vec3( 0.1568, -0.6439, -0.7489), +vec3( 0.3034, 0.6192, -0.7243), +vec3(-0.7233, -0.6519, -0.2278), +vec3(-0.8259, -0.4757, 0.3027), +vec3( 0.2690, -0.7823, 0.5618), +vec3( 0.3309, 0.4864, -0.8087), +vec3(-0.3891, 0.4036, 0.8281), +vec3(-0.0919, 0.1208, 0.9884), +vec3(-0.2482, -0.5348, 0.8077), +vec3( 0.8195, 0.3333, 0.4661), +vec3( 0.7890, -0.6104, 0.0702), +vec3(-0.4653, 0.8046, 0.3688), +vec3(-0.0413, -0.2247, -0.9736), +vec3( 0.1926, 0.2839, -0.9393), +vec3( 0.9205, -0.2443, -0.3048), +vec3(-0.4891, 0.8229, 0.2891), +vec3(-0.9010, -0.4189, -0.1128), +vec3( 0.0706, 0.2715, 0.9598), +vec3(-0.2682, 0.4639, -0.8443), +vec3(-0.6389, -0.0442, 0.7680), +vec3(-0.2588, 0.9613, -0.0949), +vec3( 0.5469, -0.4194, 0.7246), +vec3( 0.8399, 0.3814, 0.3860), +vec3( 0.8060, -0.3544, -0.4741), +vec3(-0.7109, -0.4466, 0.5432), +vec3(-0.2986, -0.9381, -0.1753), +vec3( 0.3983, -0.3642, -0.8418), +vec3( 0.4518, 0.8744, 0.1770), +vec3( 0.8792, -0.4098, 0.2432), +vec3(-0.7904, 0.6106, -0.0494), +vec3( 0.1667, -0.2852, -0.9438), +vec3( 0.5566, 0.6694, 0.4920), +vec3( 0.1761, 0.9638, 0.2000), +vec3( 0.6951, 0.7188, 0.0090), +vec3(-0.2659, -0.0544, 0.9625), +vec3(-0.0370, 0.6667, 0.7444), +vec3( 0.1119, -0.5816, 0.8058), +vec3(-0.1706, -0.9310, -0.3226), +vec3( 0.5573, -0.8143, 0.1624), +vec3( 0.3899, -0.9183, 0.0682), +vec3(-0.2921, -0.7937, 0.5336), +vec3( 0.8488, 0.4895, 0.1997), +vec3(-0.6887, 0.1527, 0.7088), +vec3(-0.3092, 0.0011, -0.9510), +vec3( 0.3066, 0.6124, 0.7287), +vec3( 0.7305, 0.6698, 0.1333) +); + +void main() { + // Irradiance map is 16x16 — remap fragment position to (-1, 1) + float xpos = 2.0 * gl_FragCoord.x / 16.0 - 1.0; + float ypos = 2.0 * gl_FragCoord.y / 16.0 - 1.0; + vec3 fragDir; + vec4 fragCol = vec4(0.0); + + switch (face) { + case 0: // +X + fragDir = vec3(1.0, -ypos, -xpos); + break; + case 1: // -X + fragDir = vec3(-1.0, -ypos, xpos); + break; + case 2: // +Y + fragDir = vec3(xpos, 1.0, ypos); + break; + case 3: // -Y + fragDir = vec3(xpos, -1.0, -ypos); + break; + case 4: // +Z + fragDir = vec3(xpos, -ypos, 1.0); + break; + case 5: // -Z + fragDir = vec3(-xpos, -ypos, -1.0); + break; + } + fragDir = normalize(fragDir); + + for (int i = 0; i < 128; ++i) + { + // Pre-calculated sampling: fixed directions mirrored onto correct hemisphere. + // ~50% of envmap texture samples are shared between adjacent fragments, + // improving texture cache hit rate. + float PdotD = dot(points[i], fragDir); + float aPdotD = abs(PdotD); + vec3 sampDir = points[i] + (aPdotD - PdotD) * fragDir; + fragCol += aPdotD * vec4(srgb_to_linear(texture(envmap, sampDir)).rgb, 1.0); + } + fragOut0 = linear_to_srgb(PI * fragCol * (1.0 / 128.0)); +} diff --git a/code/graphics/shaders/irradiance.vert b/code/graphics/shaders/irradiance.vert new file mode 100644 index 00000000000..bbb80f1843d --- /dev/null +++ b/code/graphics/shaders/irradiance.vert @@ -0,0 +1,14 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Fullscreen triangle vertex shader for irradiance map generation. +// Identical to postprocess.vert — draw with vkCmdDraw(3, 1, 0, 0). + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/lightshafts.frag b/code/graphics/shaders/lightshafts.frag new file mode 100644 index 00000000000..b77419c000d --- /dev/null +++ b/code/graphics/shaders/lightshafts.frag @@ -0,0 +1,47 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Lightshafts (god rays) post-processing shader +// Raymarches from each fragment toward the sun position, +// accumulating brightness from depth==1.0 (sky) pixels. + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D scene; // Depth texture + +const int SAMPLE_NUM = 50; + +layout(std140, set = 2, binding = 0) uniform genericData { + vec2 sun_pos; + float density; + float weight; + + float falloff; + float intensity; + float cp_intensity; + + float pad0; +}; + +void main() +{ + vec2 step = fragTexCoord.st - sun_pos.xy; + vec2 pos = fragTexCoord.st; + step *= 1.0 / float(SAMPLE_NUM) * density; + + float decay = 1.0; + vec4 sum = vec4(0.0); + + // Raymarch from fragment toward sun, accumulating bright sky pixels + for (int i = 0; i < SAMPLE_NUM; i++) { + pos.st -= step; + vec4 tex_sample = texture(scene, pos); + if (tex_sample.r == 1.0) // Depth == 1.0 means far plane (sky) + sum += decay * weight; + decay *= falloff; + } + + fragOut0 = sum * intensity; + fragOut0.a = 1.0; +} diff --git a/code/graphics/shaders/main.frag b/code/graphics/shaders/main.frag new file mode 100644 index 00000000000..8e5bcfd6d26 --- /dev/null +++ b/code/graphics/shaders/main.frag @@ -0,0 +1,387 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Include shared lighting and gamma functions from legacy shader directory +#include "lighting.sdr" +#include "gamma.sdr" + +// Model shader flags (from model_shader_flags.h) +const int MODEL_SDR_FLAG_LIGHT = (1 << 0); +const int MODEL_SDR_FLAG_DEFERRED = (1 << 1); +const int MODEL_SDR_FLAG_HDR = (1 << 2); +const int MODEL_SDR_FLAG_DIFFUSE = (1 << 3); +const int MODEL_SDR_FLAG_GLOW = (1 << 4); +const int MODEL_SDR_FLAG_SPEC = (1 << 5); +const int MODEL_SDR_FLAG_NORMAL = (1 << 6); +const int MODEL_SDR_FLAG_AMBIENT = (1 << 7); +const int MODEL_SDR_FLAG_MISC = (1 << 8); +const int MODEL_SDR_FLAG_TEAMCOLOR = (1 << 9); +const int MODEL_SDR_FLAG_FOG = (1 << 10); +const int MODEL_SDR_FLAG_SHADOWS = (1 << 12); +const int MODEL_SDR_FLAG_ALPHA_MULT = (1 << 14); + +#define MAX_LIGHTS 8 + +struct model_light { + vec4 position; + + vec3 diffuse_color; + int light_type; + + vec3 direction; + float attenuation; + + float ml_sourceRadius; +}; + +layout(set = 1, binding = 0, std140) uniform modelData { + mat4 modelViewMatrix; + mat4 modelMatrix; + mat4 viewMatrix; + mat4 projMatrix; + mat4 textureMatrix; + mat4 shadow_mv_matrix; + mat4 shadow_proj_matrix[4]; + + vec4 color; + + model_light lights[MAX_LIGHTS]; + + float outlineWidth; + float fogStart; + float fogScale; + int buffer_matrix_offset; + + vec4 clip_equation; + + float thruster_scale; + int use_clip_plane; + int n_lights; + float defaultGloss; + + vec3 ambientFactor; + int desaturate; + + vec3 diffuseFactor; + int blend_alpha; + + vec3 emissionFactor; + int alphaGloss; + + int gammaSpec; + int envGloss; + int effect_num; + int sBasemapIndex; + + vec4 fogColor; + + vec3 base_color; + float anim_timer; + + vec3 stripe_color; + float vpwidth; + + float vpheight; + int team_glow_enabled; + float znear; + float zfar; + + float veryneardist; + float neardist; + float middist; + float fardist; + + int sGlowmapIndex; + int sSpecmapIndex; + int sNormalmapIndex; + int sAmbientmapIndex; + + int sMiscmapIndex; + float alphaMult; + int flags; + float _pad0; +}; + +// Textures - Material set (set 1), binding 1 as descriptor array +// Indices: 0=Base, 1=Glow, 2=Spec, 3=Normal, 4=Height, 5=Ambient, 6=Misc +layout(set = 1, binding = 1) uniform sampler2DArray materialTextures[16]; + +// Inputs from vertex shader +layout(location = 0) in vec4 fragPosition; +layout(location = 1) in vec3 fragNormal; +layout(location = 2) in vec4 fragTexCoord; +layout(location = 3) in vec3 fragTangent; +layout(location = 4) in vec3 fragBitangent; +layout(location = 5) in vec3 fragTangentNormal; +layout(location = 6) in float fragFogDist; + +// Output +layout(location = 0) out vec4 fragOut0; +layout(location = 1) out vec4 fragOut1; +layout(location = 2) out vec4 fragOut2; +layout(location = 3) out vec4 fragOut3; +layout(location = 4) out vec4 fragOut4; + +vec3 FresnelLazarovEnv(vec3 specColor, vec3 view, vec3 normal, float gloss) +{ + return specColor + (vec3(1.0) - specColor) * pow(1.0 - clamp(dot(view, normal), 0.0, 1.0), 5.0) / (4.0 - 3.0 * gloss); +} + +void GetLightInfo(int i, out vec3 lightDir, out float attenuation) +{ + lightDir = normalize(lights[i].position.xyz); + attenuation = 1.0; + if (lights[i].light_type != LT_DIRECTIONAL) { + // Positional light source + float dist = distance(lights[i].position.xyz, fragPosition.xyz); + lightDir = (lights[i].position.xyz - fragPosition.xyz); + + if (lights[i].light_type == LT_TUBE) { // Tube light + float beamlength = length(lights[i].direction); + vec3 beamDir = normalize(lights[i].direction); + // Get nearest point on line + float neardist = dot(fragPosition.xyz - lights[i].position.xyz, beamDir); + // Move back from the endpoint of the beam along the beam by the distance we calculated + vec3 nearest = lights[i].position.xyz - beamDir * abs(neardist); + lightDir = nearest - fragPosition.xyz; + dist = length(lightDir); + } + + lightDir = normalize(lightDir); + attenuation = 1.0 / (1.0 + lights[i].attenuation * dist); + } +} + +vec3 CalculateLighting(vec3 normal, vec3 diffuseMaterial, vec3 specularMaterial, float gloss, float fresnel, float shadow, float aoFactor) +{ + vec3 eyeDir = normalize(-fragPosition.xyz); + vec3 lightAmbient = ambientFactor * aoFactor; + vec3 lightDiffuse = vec3(0.0, 0.0, 0.0); + vec3 lightSpecular = vec3(0.0, 0.0, 0.0); + for (int i = 0; i < n_lights; ++i) { + if (i > 0) { + shadow = 1.0; + } + float roughness = clamp(1.0f - gloss, 0.0f, 1.0f); + float alpha = roughness * roughness; + vec3 lightDir; + float attenuation; + // gather light params + GetLightInfo(i, lightDir, attenuation); + vec3 halfVec = normalize(lightDir + eyeDir); + float NdotL = clamp(dot(normal, lightDir), 0.0f, 1.0f); + // Ambient, Diffuse, and Specular + lightDiffuse += (lights[i].diffuse_color.rgb * diffuseFactor * NdotL * attenuation) * shadow; + lightSpecular += lights[i].diffuse_color.rgb * computeLighting(specularMaterial, diffuseMaterial, lightDir, normal, halfVec, eyeDir, roughness, fresnel, NdotL) * attenuation * shadow; + } + return diffuseMaterial * lightAmbient + lightSpecular; +} + +void main() +{ + vec3 eyeDir = normalize(-fragPosition.xyz); + vec2 texCoord = fragTexCoord.xy; + mat3 tangentMatrix = mat3(fragTangent, fragBitangent, fragTangentNormal); + + // setup our baseline values for base, emissive, fresnel, gloss, AO and normal + vec4 baseColor = color; + vec4 emissiveColor = vec4(0.0, 0.0, 0.0, 1.0); + float fresnelFactor = 0.0; + float glossData = defaultGloss; + vec2 aoFactors = vec2(1.0, 1.0); + vec3 unitNormal = normalize(fragNormal); + vec3 normal = unitNormal; + + // Ambient occlusion map + if ((flags & MODEL_SDR_FLAG_AMBIENT) != 0) { + // red channel is ambient occlusion factor, green is cavity occlusion factor + aoFactors = texture(materialTextures[5], vec3(texCoord, float(sAmbientmapIndex))).xy; + } + + // Normal map - convert from DXT5nm + if ((flags & MODEL_SDR_FLAG_NORMAL) != 0) { + vec2 normalSample; + normal.rg = normalSample = (texture(materialTextures[3], vec3(texCoord, float(sNormalmapIndex))).ag * 2.0) - 1.0; + normal.b = clamp(sqrt(1.0 - dot(normal.rg, normal.rg)), 0.0001, 1.0); + normal = tangentMatrix * normal; + float norm = length(normal); + // prevent breaking of normal maps + if (norm > 0.0) + normal /= norm; + else + normal = unitNormal; + } + + vec2 distort = vec2(0.0, 0.0); + + if (effect_num >= 0) { + distort = vec2(cos(fragPosition.x*fragPosition.w*0.005+anim_timer*20.0)*sin(fragPosition.y*fragPosition.w*0.005),sin(fragPosition.x*fragPosition.w*0.005+anim_timer*20.0)*cos(fragPosition.y*fragPosition.w*0.005))*0.03; + } + + // Diffuse map + if ((flags & MODEL_SDR_FLAG_DIFFUSE) != 0) { + vec2 diffuseTexCoord = texCoord; + if (effect_num == 2) { + diffuseTexCoord = texCoord + distort*(1.0-anim_timer); + } + baseColor = texture(materialTextures[0], vec3(diffuseTexCoord, float(sBasemapIndex))); + + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + baseColor.rgb = srgb_to_linear(baseColor.rgb); + } + + if ((flags & MODEL_SDR_FLAG_ALPHA_MULT) != 0) { + baseColor.a *= alphaMult; + } + + if (blend_alpha == 0 && baseColor.a < 0.95) discard; // if alpha blending is not on, discard transparent pixels + // premultiply alpha if blend_alpha is 1. assume that our blend function is srcColor + (1-Alpha)*destColor. + // if blend_alpha is 2, assume blend func is additive and don't modify color + if (blend_alpha == 1) baseColor.rgb = baseColor.rgb * baseColor.a; + } + + // Anti-glint "trick" based on Valve's "Advanced VR Rendering" talk at GDC2015 + vec2 normDx = dFdx(unitNormal.xy); + vec2 normDy = dFdy(unitNormal.xy); + float glossGeo = 1.0f - pow(clamp(max(dot(normDx,normDx), dot(normDy,normDy)),0.0,1.0),0.33); + glossData = min(glossData, glossGeo); + + // Now that we have a base color and min gloss value, compute the spec color + vec4 specColor = vec4(baseColor.rgb * SPEC_FACTOR_NO_SPEC_MAP, glossData); + + if ((flags & MODEL_SDR_FLAG_SPEC) != 0) { + specColor = texture(materialTextures[2], vec3(texCoord, float(sSpecmapIndex))); + if ((flags & MODEL_SDR_FLAG_ALPHA_MULT) != 0) { + specColor *= alphaMult; + } + + if (alphaGloss != 0) glossData = specColor.a; + if (gammaSpec != 0) { + specColor.rgb = max(specColor.rgb, vec3(0.03f)); + fresnelFactor = 1.0; + } + + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + specColor.rgb = srgb_to_linear(specColor.rgb); + } + } + + baseColor.rgb *= aoFactors.y; + specColor.rgb *= aoFactors.y; + + vec4 teamMask = vec4(0.0); + vec3 team_color_glow = vec3(0.0); + + // Misc map / team colors + if ((flags & MODEL_SDR_FLAG_MISC) != 0) { + if ((flags & MODEL_SDR_FLAG_TEAMCOLOR) != 0) { + teamMask = texture(materialTextures[6], vec3(texCoord, float(sMiscmapIndex))); + + vec3 color_offset = vec3(-0.5) * (teamMask.x + teamMask.y); + + vec3 team_color = base_color * teamMask.x + stripe_color * teamMask.y + color_offset; + team_color_glow = (base_color * teamMask.b) + (stripe_color * teamMask.a); + + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + baseColor.rgb = linear_to_srgb(baseColor.rgb); + specColor.rgb = linear_to_srgb(specColor.rgb); + } + + baseColor.rgb += team_color; + baseColor.rgb = max(baseColor.rgb, vec3(0.0)); + specColor.rgb += team_color; + specColor.rgb = max(specColor.rgb, vec3(0.03)); + + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + baseColor.rgb = srgb_to_linear(baseColor.rgb); + specColor.rgb = srgb_to_linear(specColor.rgb); + } + } + } + + // Lights aren't applied when we are rendering to the G-buffers since that gets handled later + if ((flags & MODEL_SDR_FLAG_DEFERRED) == 0) { + if ((flags & MODEL_SDR_FLAG_LIGHT) != 0) { + float shadow = 1.0; + // TODO: Shadow mapping support via shadow_map texture + baseColor.rgb = CalculateLighting(normal, baseColor.rgb, specColor.rgb, glossData, fresnelFactor, shadow, aoFactors.x); + } else { + if ((flags & MODEL_SDR_FLAG_SPEC) != 0) { + baseColor.rgb += pow(1.0 - clamp(dot(eyeDir, normal), 0.0, 1.0), 5.0 * clamp(glossData, 0.01, 1.0)) * specColor.rgb; + } + } + } + + // Glow map + if ((flags & MODEL_SDR_FLAG_GLOW) != 0) { + vec3 glowColor = texture(materialTextures[1], vec3(texCoord, float(sGlowmapIndex))).rgb; + if ((flags & MODEL_SDR_FLAG_MISC) != 0) { + if ((flags & MODEL_SDR_FLAG_TEAMCOLOR) != 0) { + float glowColorLuminance = dot(glowColor, vec3(0.299, 0.587, 0.114)); + glowColor = (team_glow_enabled != 0) ? mix(max(team_color_glow, vec3(0.0)), glowColor, clamp(glowColorLuminance - teamMask.b - teamMask.a, 0.0, 1.0)) : glowColor; + } + } + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + glowColor = srgb_to_linear(glowColor) * GLOW_MAP_SRGB_MULTIPLIER; + } + emissiveColor.rgb += glowColor * GLOW_MAP_INTENSITY; + } + + if ((flags & MODEL_SDR_FLAG_ALPHA_MULT) != 0) { + emissiveColor *= alphaMult; + } + + // Fog + if ((flags & MODEL_SDR_FLAG_FOG) != 0) { + vec3 finalFogColor = fogColor.rgb; + if ((flags & MODEL_SDR_FLAG_HDR) != 0) { + finalFogColor = srgb_to_linear(finalFogColor); + } + if ((flags & MODEL_SDR_FLAG_DIFFUSE) != 0) { + if (blend_alpha == 1) finalFogColor *= baseColor.a; + } + // Apply fog to both emissive and base color for forward rendering + baseColor.rgb = mix(emissiveColor.rgb + baseColor.rgb, finalFogColor, fragFogDist); + emissiveColor.rgb = vec3(0.0); + specColor.rgb *= fragFogDist; + } + + // Desaturation + if ((flags & MODEL_SDR_FLAG_DIFFUSE) != 0) { + if (desaturate == 1) { + baseColor.rgb = color.rgb * dot(vec3(1.0), baseColor.rgb) * 0.3333333; + } + } + + // Ship effects + if (effect_num == 0) { + float shinefactor = 1.0/(1.0 + pow(abs((fract(abs(texCoord.x))-anim_timer) * 1000.0), 2.0)) * 1000.0; + emissiveColor.rgb += vec3(shinefactor); + baseColor.a = baseColor.a * clamp(shinefactor * (fract(abs(texCoord.x))-anim_timer) * -10000.0,0.0,1.0); + } else if (effect_num == 1) { + float shinefactor = 1.0/(1.0 + pow(abs(fragPosition.y-anim_timer), 2.0)); + emissiveColor.rgb += vec3(shinefactor); + if ((flags & MODEL_SDR_FLAG_LIGHT) == 0) { + baseColor.a = clamp((fragPosition.y-anim_timer) * 10000.0,0.0,1.0); + } + } else if (effect_num == 2) { + vec2 screenPos = gl_FragCoord.xy * vec2(vpwidth,vpheight); + baseColor.a = baseColor.a; + float cloak_interp = (sin(fragPosition.x*fragPosition.w*0.005+anim_timer*20.0)*sin(fragPosition.y*fragPosition.w*0.005)*0.5)-0.5; + // Note: framebuffer sampling not yet implemented for Vulkan cloaking effect + } + + // emissive colors won't be added later when we are using forward rendering so we need to do that here + if ((flags & MODEL_SDR_FLAG_DEFERRED) == 0) { + baseColor.rgb += emissiveColor.rgb; + } + + fragOut0 = baseColor; + + if ((flags & MODEL_SDR_FLAG_DEFERRED) != 0) { + fragOut1 = vec4(fragPosition.xyz, aoFactors.x); + fragOut2 = vec4(normal, glossData); + fragOut3 = vec4(specColor.rgb, fresnelFactor); + fragOut4 = emissiveColor; + } +} diff --git a/code/graphics/shaders/main.vert b/code/graphics/shaders/main.vert new file mode 100644 index 00000000000..9a711989f59 --- /dev/null +++ b/code/graphics/shaders/main.vert @@ -0,0 +1,174 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Vertex inputs - match FSO vertex layout (VertexAttributeLocation enum) +layout(location = 0) in vec4 vertPosition; +layout(location = 1) in vec4 vertColor; // Not used by model shader, declared for pipeline compatibility +layout(location = 2) in vec4 vertTexCoord; +layout(location = 3) in vec3 vertNormal; +layout(location = 4) in vec4 vertTangent; +layout(location = 5) in float vertModelID; + +// Model shader flags (from model_shader_flags.h) +const int MODEL_SDR_FLAG_LIGHT = (1 << 0); +const int MODEL_SDR_FLAG_FOG = (1 << 10); +const int MODEL_SDR_FLAG_TRANSFORM = (1 << 11); +const int MODEL_SDR_FLAG_THRUSTER = (1 << 13); + +#define MAX_LIGHTS 8 + +struct model_light { + vec4 position; + + vec3 diffuse_color; + int light_type; + + vec3 direction; + float attenuation; + + float ml_sourceRadius; +}; + +layout(set = 1, binding = 0, std140) uniform modelData { + mat4 modelViewMatrix; + mat4 modelMatrix; + mat4 viewMatrix; + mat4 projMatrix; + mat4 textureMatrix; + mat4 shadow_mv_matrix; + mat4 shadow_proj_matrix[4]; + + vec4 color; + + model_light lights[MAX_LIGHTS]; + + float outlineWidth; + float fogStart; + float fogScale; + int buffer_matrix_offset; + + vec4 clip_equation; + + float thruster_scale; + int use_clip_plane; + int n_lights; + float defaultGloss; + + vec3 ambientFactor; + int desaturate; + + vec3 diffuseFactor; + int blend_alpha; + + vec3 emissionFactor; + int alphaGloss; + + int gammaSpec; + int envGloss; + int effect_num; + int sBasemapIndex; + + vec4 fogColor; + + vec3 base_color; + float anim_timer; + + vec3 stripe_color; + float vpwidth; + + float vpheight; + int team_glow_enabled; + float znear; + float zfar; + + float veryneardist; + float neardist; + float middist; + float fardist; + + int sGlowmapIndex; + int sSpecmapIndex; + int sNormalmapIndex; + int sAmbientmapIndex; + + int sMiscmapIndex; + float alphaMult; + int flags; + float _pad0; +}; + +// Transform buffer for batched submodel rendering (set 1, binding 3) +// Contains per-submodel transform matrices indexed by vertModelID + buffer_matrix_offset. +// The visibility flag is stored in transform[3].w: >= 0.9 means invisible. +layout(set = 1, binding = 3, std430) readonly buffer TransformBuffer { + mat4 transforms[]; +} transformBuf; + +// Outputs to fragment shader +layout(location = 0) out vec4 outPosition; +layout(location = 1) out vec3 outNormal; +layout(location = 2) out vec4 outTexCoord; +layout(location = 3) out vec3 outTangent; +layout(location = 4) out vec3 outBitangent; +layout(location = 5) out vec3 outTangentNormal; +layout(location = 6) out float outFogDist; + +void main() +{ + mat4 orient = mat4(1.0); + bool clipModel = false; + + // Batched submodel transforms: read per-submodel matrix from the SSBO + if ((flags & MODEL_SDR_FLAG_TRANSFORM) != 0) { + int id = int(vertModelID); + orient = transformBuf.transforms[buffer_matrix_offset + id]; + clipModel = (orient[3].w >= 0.9); + orient[3].w = 1.0; + } + + vec4 texCoord = textureMatrix * vertTexCoord; + vec4 vertex = vertPosition; + + // Thruster scale + if ((flags & MODEL_SDR_FLAG_THRUSTER) != 0) { + if (vertex.z < -1.5) { + vertex.z *= thruster_scale; + } + } + + // Transform the normal into eye space and normalize the result. + vec3 normal = normalize(mat3(modelViewMatrix) * mat3(orient) * vertNormal); + vec4 position = modelViewMatrix * orient * vertex; + + gl_Position = projMatrix * position; + + // Clip invisible submodels by moving vertices off-screen + if ((flags & MODEL_SDR_FLAG_TRANSFORM) != 0 && clipModel) { + gl_Position = vec4(-2.0, -2.0, -2.0, 1.0); + } + + // Setup stuff for normal maps and envmaps + vec3 t = normalize(mat3(modelViewMatrix) * mat3(orient) * vertTangent.xyz); + vec3 b = cross(normal, t) * vertTangent.w; + outTangent = t; + outBitangent = b; + outTangentNormal = normal; + + // Fog + if ((flags & MODEL_SDR_FLAG_FOG) != 0) { + outFogDist = clamp((gl_Position.z - fogStart) * 0.75 * fogScale, 0.0, 1.0); + } else { + outFogDist = 0.0; + } + + // Clip plane + if (use_clip_plane != 0) { + gl_ClipDistance[0] = dot(clip_equation, modelMatrix * orient * vertex); + } else { + gl_ClipDistance[0] = 1.0; + } + + outPosition = position; + outNormal = normal; + outTexCoord = texCoord; +} diff --git a/code/graphics/shaders/msaa-resolve.frag b/code/graphics/shaders/msaa-resolve.frag new file mode 100644 index 00000000000..b2af184bc59 --- /dev/null +++ b/code/graphics/shaders/msaa-resolve.frag @@ -0,0 +1,82 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// MSAA resolve fragment shader — depth-weighted resolve from multisampled G-buffer +// to non-MSAA G-buffer. Ported from OpenGL msaa-f.sdr. + +layout(location = 0) in vec2 fragTexCoord; + +layout(location = 0) out vec4 fragOut0; // color +layout(location = 1) out vec4 fragOut1; // position +layout(location = 2) out vec4 fragOut2; // normal +layout(location = 3) out vec4 fragOut3; // specular +layout(location = 4) out vec4 fragOut4; // emissive + +// All 6 MSAA input textures via Material Set (Set 1) Binding 1 texture array. +// Elements 0-5 hold MSAA image views; 6-15 are fallback (unused by this shader). +// [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=depth +layout(set = 1, binding = 1) uniform sampler2DMS msaaTex[6]; + +// GenericData UBO at PerDraw Set (Set 2) Binding 0 +layout(std140, set = 2, binding = 0) uniform genericData { + int samples; + float fov; +}; + +const float voxelDepth = 2.5; +const float voxelDepthFalloff = 2.5; + +// Runtime fallback median distance — simple max loop (no sorting networks needed) +float getMedianDist(ivec2 texel) { + float minDist = -1000000.0; + for (int i = 0; i < samples; i++) { + minDist = max(minDist, texelFetch(msaaTex[1], texel, i).z); + } + return minDist; +} + +void main() +{ + vec2 texSize = vec2(textureSize(msaaTex[0])); + ivec2 texel = ivec2(texSize * fragTexCoord); + + float texelWidthFactor = tan(fov / texSize.y); + float dist = getMedianDist(texel); + + float weight = 0.0; + vec4 color = vec4(0.0); + vec4 pos = vec4(0.0); + vec4 normal = vec4(0.0); + vec4 specular = vec4(0.0); + vec4 emissive = vec4(0.0); + float depth = 0.0; + + for (int i = 0; i < samples; i++) { + vec4 localPos = texelFetch(msaaTex[1], texel, i); + // Calculate local weight from distance voxel. If the distance is 0 + // (no model), set weight to 1 to allow background emissive through. + // If median distance is 0, only process samples that are also 0. + float localWeight = max( + step(-0.001, dist) * step(-0.001, localPos.z), + smoothstep(dist + dist * texelWidthFactor * (voxelDepth + voxelDepthFalloff), + dist + dist * texelWidthFactor * voxelDepth, localPos.z) * + smoothstep(dist - dist * texelWidthFactor * voxelDepth, + dist + dist * texelWidthFactor * (voxelDepth + voxelDepthFalloff), localPos.z) + ); + + pos += localPos * localWeight; + color += texelFetch(msaaTex[0], texel, i) * localWeight; + normal += texelFetch(msaaTex[2], texel, i) * localWeight; + specular += texelFetch(msaaTex[3], texel, i) * localWeight; + emissive += texelFetch(msaaTex[4], texel, i) * localWeight; + depth += texelFetch(msaaTex[5], texel, i).x * localWeight; + weight += localWeight; + } + + fragOut0 = color / weight; + fragOut1 = pos / weight; + fragOut2 = vec4(normalize(normal.xyz), normal.a / weight); + fragOut3 = specular / weight; + fragOut4 = emissive / weight; + gl_FragDepth = depth / weight; +} diff --git a/code/graphics/shaders/msaa-resolve.vert b/code/graphics/shaders/msaa-resolve.vert new file mode 100644 index 00000000000..fffe023d39c --- /dev/null +++ b/code/graphics/shaders/msaa-resolve.vert @@ -0,0 +1,14 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Fullscreen triangle vertex shader for MSAA resolve pass. +// Reuses the same fullscreen triangle pattern as postprocess.vert. + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/nanovg.frag b/code/graphics/shaders/nanovg.frag new file mode 100644 index 00000000000..4467d6591f6 --- /dev/null +++ b/code/graphics/shaders/nanovg.frag @@ -0,0 +1,91 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout (location = 0) in vec2 ftcoord; +layout (location = 1) in vec2 fpos; + +layout (location = 0) out vec4 outColor; + +// Set 2 = PerDraw, Binding 2 = NanoVGData +layout (set = 2, binding = 2, std140) uniform NanoVGUniformData { + mat3 scissorMat; + mat3 paintMat; + vec4 innerCol; + vec4 outerCol; + vec2 scissorExt; + vec2 scissorScale; + vec2 extent; + float radius; + float feather; + float strokeMult; + float strokeThr; + int texType; + int type; + vec2 viewSize; + int texArrayIndex; +}; + +// Set 1 = Material, Binding 1 = texture array +layout (set = 1, binding = 1) uniform sampler2DArray nvg_tex; + +float sdroundrect(vec2 pt, vec2 ext, float rad) { + vec2 ext2 = ext - vec2(rad, rad); + vec2 d = abs(pt) - ext2; + return min(max(d.x, d.y), 0.0) + length(max(d, 0.0)) - rad; +} + +float scissorMask(vec2 p) { + vec2 sc = (abs((scissorMat * vec3(p, 1.0)).xy) - scissorExt); + sc = vec2(0.5, 0.5) - sc * scissorScale; + return clamp(sc.x, 0.0, 1.0) * clamp(sc.y, 0.0, 1.0); +} + +#ifdef EDGE_AA +float strokeMask() { + return min(1.0, (1.0 - abs(ftcoord.x * 2.0 - 1.0)) * strokeMult) * min(1.0, ftcoord.y); +} +#endif + +void main() +{ + vec4 result; + float scissor = scissorMask(fpos); + +#ifdef EDGE_AA + float strokeAlpha = strokeMask(); +#else + float strokeAlpha = 1.0; +#endif + +#ifdef EDGE_AA + if (strokeAlpha < strokeThr) { + discard; + } +#endif + + if (type == 0) { // Gradient + vec2 pt = (paintMat * vec3(fpos, 1.0)).xy; + float d = clamp((sdroundrect(pt, extent, radius) + feather * 0.5) / feather, 0.0, 1.0); + vec4 color = mix(innerCol, outerCol, d); + color *= strokeAlpha * scissor; + result = color; + } else if (type == 1) { // Image + vec2 pt = (paintMat * vec3(fpos, 1.0)).xy / extent; + vec4 color = texture(nvg_tex, vec3(pt, float(texArrayIndex))); + if (texType == 1) color = vec4(color.xyz * color.w, color.w); + if (texType == 2) color = vec4(color.r); + color *= innerCol; + color *= strokeAlpha * scissor; + result = color; + } else if (type == 2) { // Stencil fill + result = vec4(1, 1, 1, 1); + } else if (type == 3) { // Textured tris + vec4 color = texture(nvg_tex, vec3(ftcoord, float(texArrayIndex))); + if (texType == 1) color = vec4(color.xyz * color.w, color.w); + if (texType == 2) color = vec4(color.x); + color *= scissor; + result = color * innerCol; + } + + outColor = result; +} diff --git a/code/graphics/shaders/nanovg.vert b/code/graphics/shaders/nanovg.vert new file mode 100644 index 00000000000..2a44359a3a2 --- /dev/null +++ b/code/graphics/shaders/nanovg.vert @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout (location = 0) in vec4 vertPosition; +layout (location = 2) in vec4 vertTexCoord; + +layout (location = 0) out vec2 ftcoord; +layout (location = 1) out vec2 fpos; + +// Set 2 = PerDraw, Binding 2 = NanoVGData +layout (set = 2, binding = 2, std140) uniform NanoVGUniformData { + mat3 scissorMat; + mat3 paintMat; + vec4 innerCol; + vec4 outerCol; + vec2 scissorExt; + vec2 scissorScale; + vec2 extent; + float radius; + float feather; + float strokeMult; + float strokeThr; + int texType; + int type; + vec2 viewSize; + int texArrayIndex; +}; + +void main() +{ + ftcoord = vertTexCoord.xy; + fpos = vertPosition.xy; + gl_Position = vec4(2.0 * vertPosition.x / viewSize.x - 1.0, + 1.0 - 2.0 * vertPosition.y / viewSize.y, + 0.0, 1.0); +} diff --git a/code/graphics/shaders/passthrough.frag b/code/graphics/shaders/passthrough.frag new file mode 100644 index 00000000000..beba39e2f91 --- /dev/null +++ b/code/graphics/shaders/passthrough.frag @@ -0,0 +1,42 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +#include "gamma.sdr" + +// Inputs from vertex shader +layout (location = 0) in vec4 fragTexCoord; +layout (location = 1) in vec4 fragColor; + +// Output +layout (location = 0) out vec4 fragOut0; + +// Texture sampler (binding 1 in Material set texture array) +layout (set = 1, binding = 1) uniform sampler2DArray baseMap; + +// Uniform buffer: GenericData (binding 0 in PerDraw set) +// Must match the layout used by vulkan_set_default_material_uniforms() +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 modelMatrix; + + vec4 color; + + vec4 clipEquation; + + int baseMapIndex; + int alphaTexture; + int noTexturing; + int srgb; + + float intensity; + float alphaThreshold; + uint clipEnabled; +}; + +void main() +{ + vec4 baseColor = texture(baseMap, vec3(fragTexCoord.xy, float(baseMapIndex))); + + baseColor.rgb = (srgb == 1) ? srgb_to_linear(baseColor.rgb) : baseColor.rgb; + vec4 blendColor = (srgb == 1) ? vec4(srgb_to_linear(fragColor.rgb), fragColor.a) : fragColor; + fragOut0 = mix(baseColor * blendColor, blendColor, float(noTexturing)); +} diff --git a/code/graphics/shaders/passthrough.vert b/code/graphics/shaders/passthrough.vert new file mode 100644 index 00000000000..33c75892294 --- /dev/null +++ b/code/graphics/shaders/passthrough.vert @@ -0,0 +1,24 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Vertex inputs - only position and texcoord required +// Color is passed via uniform or defaults to white +layout (location = 0) in vec4 vertPosition; +layout (location = 2) in vec4 vertTexCoord; + +// Outputs to fragment shader +layout (location = 0) out vec4 fragTexCoord; +layout (location = 1) out vec4 fragColor; + +// Uniform buffer: Matrices (binding 1 in PerDraw set) +layout (set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +void main() +{ + fragTexCoord = vertTexCoord; + fragColor = vec4(1.0); // Default white - color modulation via uniform if needed + gl_Position = projMatrix * modelViewMatrix * vertPosition; +} diff --git a/code/graphics/shaders/post.frag b/code/graphics/shaders/post.frag new file mode 100644 index 00000000000..7a1260d8c50 --- /dev/null +++ b/code/graphics/shaders/post.frag @@ -0,0 +1,129 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Post-processing effects shader (Vulkan port of post-f.sdr) +// Uses runtime effectFlags instead of compile-time #ifdef flags + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D tex; + +// Effect flag bits (match post_effect_t index in post_processing.tbl) +const int FLAG_DISTORT_NOISE = (1 << 0); +const int FLAG_SATURATION = (1 << 1); +const int FLAG_BRIGHTNESS = (1 << 2); +const int FLAG_CONTRAST = (1 << 3); +const int FLAG_GRAIN = (1 << 4); +const int FLAG_STRIPES = (1 << 5); +const int FLAG_CUTOFF = (1 << 6); +const int FLAG_DITH = (1 << 7); +const int FLAG_TINT = (1 << 8); + +layout(std140, set = 2, binding = 0) uniform genericData { + float timer; + float noise_amount; + float saturation; + float brightness; + + float contrast; + float film_grain; + float tv_stripes; + float cutoff; + + vec3 tint; + float dither; + + vec3 custom_effect_vec3_a; + float custom_effect_float_a; + + vec3 custom_effect_vec3_b; + float custom_effect_float_b; + + int effectFlags; +}; + +void main() +{ + vec2 distort = vec2(0.0, 0.0); + + // Distort noise + if ((effectFlags & FLAG_DISTORT_NOISE) != 0) { + float distort_factor = timer * sin(fragTexCoord.x * fragTexCoord.y * 100.0 + timer); + distort_factor = mod(distort_factor, 8.0) * mod(distort_factor, 4.0); + distort = vec2(mod(distort_factor, noise_amount), mod(distort_factor, noise_amount + 0.002)); + } + + vec4 color_in = texture(tex, fragTexCoord.xy + distort); + vec4 color_out; + + // Saturation + if ((effectFlags & FLAG_SATURATION) != 0) { + vec4 color_grayscale = color_in; + color_grayscale.rgb = vec3(dot(color_in.rgb, vec3(0.299, 0.587, 0.184))); + color_out = mix(color_in, color_grayscale, 1.0 - saturation); + } else { + color_out = color_in; + } + + // Brightness + if ((effectFlags & FLAG_BRIGHTNESS) != 0) { + color_out.rgb = color_out.rgb * vec3(brightness); + } + + // Contrast + if ((effectFlags & FLAG_CONTRAST) != 0) { + color_out.rgb = color_out.rgb + vec3(0.5 - 0.5 * contrast); + } + + // Film grain + if ((effectFlags & FLAG_GRAIN) != 0) { + float x = fragTexCoord.x * fragTexCoord.y * timer * 1000.0; + x = mod(x, 13.0) * mod(x, 123.0); + float dx = mod(x, 0.01); + vec3 result = color_out.rgb + color_out.rgb * clamp(0.1 + dx * 100.0, 0.0, 1.0); + color_out.rgb = mix(color_out.rgb, result, film_grain); + } + + // TV stripes + if ((effectFlags & FLAG_STRIPES) != 0) { + vec2 sc; + sc.x = sin(fragTexCoord.y * 2048.0); + sc.y = cos(fragTexCoord.y * 2048.0); + vec3 stripes = color_out.rgb + color_out.rgb * vec3(sc.x, sc.y, sc.x) * 0.8; + color_out.rgb = mix(color_out.rgb, stripes, tv_stripes); + } + + // Cutoff + if ((effectFlags & FLAG_CUTOFF) != 0) { + if (cutoff > 0.0) { + vec4 color_greyscale; + color_greyscale.rgb = vec3(dot(color_in.rgb, vec3(0.299, 0.587, 0.184))); + vec4 normalized_col; + float col_length = length(color_out.rgb); + if (col_length > 1.0) { + normalized_col = color_out / col_length; + } else { + normalized_col = color_out; + } + vec3 unit_grey = vec3(0.5773); + float sat = dot(normalized_col.rgb, unit_grey); + color_out = mix(color_greyscale, color_out, sat * cutoff); + } + } + + // Dithering + if ((effectFlags & FLAG_DITH) != 0) { + float downsampling_factor = 4.0; + float bias = 0.5; + color_out.rgb = floor(color_out.rgb * downsampling_factor + bias) / downsampling_factor; + } + + // Tint + if ((effectFlags & FLAG_TINT) != 0) { + color_out.rgb += tint; + } + + color_out.a = 1.0; + fragOut0 = color_out; +} diff --git a/code/graphics/shaders/postprocess.vert b/code/graphics/shaders/postprocess.vert new file mode 100644 index 00000000000..bba72d1324e --- /dev/null +++ b/code/graphics/shaders/postprocess.vert @@ -0,0 +1,20 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Fullscreen triangle vertex shader for post-processing passes. +// Uses gl_VertexIndex to generate a single triangle covering the entire screen. +// No vertex buffer required — draw with vkCmdDraw(3, 1, 0, 0). + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + // Generate fullscreen triangle vertices from vertex index: + // 0: (-1, -1) uv (0, 0) — top-left in Vulkan NDC + // 1: ( 3, -1) uv (2, 0) — oversize right + // 2: (-1, 3) uv (0, 2) — oversize bottom + // After viewport clipping, UV [0,1] maps to screen corners. + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/rocketui.frag b/code/graphics/shaders/rocketui.frag new file mode 100644 index 00000000000..e8b411e6b96 --- /dev/null +++ b/code/graphics/shaders/rocketui.frag @@ -0,0 +1,49 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Inputs from vertex shader +layout (location = 0) in vec2 fragTexCoord; +layout (location = 1) in vec4 fragColor; +layout (location = 2) in vec2 fragScreenPosition; + +// Output +layout (location = 0) out vec4 fragOut0; + +// Texture sampler array (binding 1 in Material set) +layout (set = 1, binding = 1) uniform sampler2DArray baseMap; + +// Uniform buffer: GenericData/RocketUI (binding 0 in PerDraw set) +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 projMatrix; + + vec2 offset; + int textured; + int baseMapIndex; + + float horizontalSwipeOffset; + float pad[3]; +}; + +void main() +{ + if (fragScreenPosition.x > horizontalSwipeOffset) { + discard; + } + + float distance = horizontalSwipeOffset - fragScreenPosition.x; + + vec4 color; + if (textured != 0) { + color = texture(baseMap, vec3(fragTexCoord, float(baseMapIndex))) * fragColor; + } else { + color = fragColor; + } + + // Hard-coded for now but can be easily made configurable should that be needed at some point + if (distance < 10.0) { + // Only change the colors but not the alpha channel to preserve the transparent part of text + color.xyz = vec3(1.0); + } + + fragOut0 = color; +} diff --git a/code/graphics/shaders/rocketui.vert b/code/graphics/shaders/rocketui.vert new file mode 100644 index 00000000000..cacb176187d --- /dev/null +++ b/code/graphics/shaders/rocketui.vert @@ -0,0 +1,35 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Vertex inputs (note: rocketui uses vec2 for position) +layout (location = 0) in vec2 vertPosition; +layout (location = 1) in vec4 vertColor; +layout (location = 2) in vec2 vertTexCoord; + +// Outputs to fragment shader +layout (location = 0) out vec2 fragTexCoord; +layout (location = 1) out vec4 fragColor; +layout (location = 2) out vec2 fragScreenPosition; + +// Uniform buffer: GenericData/RocketUI (binding 0 in PerDraw set) +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 projMatrix; + + vec2 offset; + int textured; + int baseMapIndex; + + float horizontalSwipeOffset; + float pad[3]; +}; + +void main() +{ + fragTexCoord = vertTexCoord; + fragColor = vertColor; + + vec4 position = vec4(vertPosition + offset, 0.0, 1.0); + + fragScreenPosition = position.xy; + gl_Position = projMatrix * position; +} diff --git a/code/graphics/shaders/shadow.frag b/code/graphics/shaders/shadow.frag new file mode 100644 index 00000000000..1fc0c687b3a --- /dev/null +++ b/code/graphics/shaders/shadow.frag @@ -0,0 +1,12 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) out vec4 fragColor; + +void main() +{ + float depth = gl_FragCoord.z; + // Variance Shadow Mapping: store (depth, depth^2 * scale_inv, 0, 1) + // VARIANCE_SHADOW_SCALE = 1000000.0 in shadows.sdr + fragColor = vec4(depth, depth * depth * (1.0 / 1000000.0), 0.0, 1.0); +} diff --git a/code/graphics/shaders/shadow.vert b/code/graphics/shaders/shadow.vert new file mode 100644 index 00000000000..eed92f8754b --- /dev/null +++ b/code/graphics/shaders/shadow.vert @@ -0,0 +1,127 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shader_viewport_layer_array : enable + +// Vertex inputs - match FSO vertex layout (VertexAttributeLocation enum) +layout(location = 0) in vec4 vertPosition; +layout(location = 5) in float vertModelID; + +// Model shader flags (from model_shader_flags.h) +const int MODEL_SDR_FLAG_TRANSFORM = (1 << 11); +const int MODEL_SDR_FLAG_THRUSTER = (1 << 13); + +layout(set = 1, binding = 0, std140) uniform modelData { + mat4 modelViewMatrix; + mat4 modelMatrix; + mat4 viewMatrix; + mat4 projMatrix; + mat4 textureMatrix; + mat4 shadow_mv_matrix; + mat4 shadow_proj_matrix[4]; + + vec4 color; + + // lights[8] — 8 * 48 bytes = 384 bytes + // We don't use lights in shadow shader but must match UBO layout + vec4 _light_pad[24]; + + float outlineWidth; + float fogStart; + float fogScale; + int buffer_matrix_offset; + + vec4 clip_equation; + + float thruster_scale; + int use_clip_plane; + int n_lights; + float defaultGloss; + + vec3 ambientFactor; + int desaturate; + + vec3 diffuseFactor; + int blend_alpha; + + vec3 emissionFactor; + int alphaGloss; + + int gammaSpec; + int envGloss; + int effect_num; + int sBasemapIndex; + + vec4 fogColor; + + vec3 base_color; + float anim_timer; + + vec3 stripe_color; + float vpwidth; + + float vpheight; + int team_glow_enabled; + float znear; + float zfar; + + float veryneardist; + float neardist; + float middist; + float fardist; + + int sGlowmapIndex; + int sSpecmapIndex; + int sNormalmapIndex; + int sAmbientmapIndex; + + int sMiscmapIndex; + float alphaMult; + int flags; + float _pad0; +}; + +// Transform buffer for batched submodel rendering (set 1, binding 3) +layout(set = 1, binding = 3, std430) readonly buffer TransformBuffer { + mat4 transforms[]; +} transformBuf; + +void main() +{ + mat4 orient = mat4(1.0); + bool clipModel = false; + + // Batched submodel transforms + if ((flags & MODEL_SDR_FLAG_TRANSFORM) != 0) { + int id = int(vertModelID); + orient = transformBuf.transforms[buffer_matrix_offset + id]; + clipModel = (orient[3].w >= 0.9); + orient[3].w = 1.0; + } + + vec4 vertex = vertPosition; + + // Thruster scale + if ((flags & MODEL_SDR_FLAG_THRUSTER) != 0) { + if (vertex.z < -1.5) { + vertex.z *= thruster_scale; + } + } + + // modelViewMatrix = light_view * model_transform (set by gr_set_view_matrix) + vec4 lightViewPos = modelViewMatrix * orient * vertex; + gl_Position = shadow_proj_matrix[gl_InstanceIndex] * lightViewPos; + + // Clamp depth to [0, w] for Vulkan + gl_Position.z = clamp(gl_Position.z, 0.0, gl_Position.w); + + // Route to cascade layer via instanced rendering + gl_Layer = gl_InstanceIndex; + + // No clip plane in shadow pass + gl_ClipDistance[0] = 1.0; + + // Clip invisible submodels + if ((flags & MODEL_SDR_FLAG_TRANSFORM) != 0 && clipModel) { + gl_Position = vec4(-2.0, -2.0, -2.0, 1.0); + } +} diff --git a/code/graphics/shaders/shield-impact.frag b/code/graphics/shaders/shield-impact.frag new file mode 100644 index 00000000000..662d5869d45 --- /dev/null +++ b/code/graphics/shaders/shield-impact.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +#include "gamma.sdr" + +const float EMISSIVE_GAIN = 2.0; + +layout(location = 0) in vec4 fragImpactUV; +layout(location = 1) in float fragNormOffset; + +layout(location = 0) out vec4 fragOut0; + +layout (set = 1, binding = 1) uniform sampler2DArray shieldMap; + +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 shieldModelViewMatrix; + mat4 shieldProjMatrix; + + vec3 hitNormal; + int srgb; + + vec4 color; + + int shieldMapIndex; +}; + +void main() +{ + if (fragNormOffset < 0.0) discard; + if (fragImpactUV.x < 0.0 || fragImpactUV.x > 1.0 || fragImpactUV.y < 0.0 || fragImpactUV.y > 1.0) discard; + vec4 shieldColor = texture(shieldMap, vec3(fragImpactUV.xy, float(shieldMapIndex))); + shieldColor.rgb = (srgb == 1) ? srgb_to_linear(shieldColor.rgb) * EMISSIVE_GAIN : shieldColor.rgb; + vec4 blendColor = color; + blendColor.rgb = (srgb == 1) ? srgb_to_linear(blendColor.rgb) * EMISSIVE_GAIN : blendColor.rgb; + fragOut0 = shieldColor * blendColor; +} diff --git a/code/graphics/shaders/shield-impact.vert b/code/graphics/shaders/shield-impact.vert new file mode 100644 index 00000000000..71d4b055fb9 --- /dev/null +++ b/code/graphics/shaders/shield-impact.vert @@ -0,0 +1,34 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(location = 0) in vec4 vertPosition; +layout(location = 3) in vec3 vertNormal; + +layout(location = 0) out vec4 fragImpactUV; +layout(location = 1) out float fragNormOffset; + +layout (set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +layout (set = 2, binding = 0, std140) uniform genericData { + mat4 shieldModelViewMatrix; + mat4 shieldProjMatrix; + + vec3 hitNormal; + int srgb; + + vec4 color; + + int shieldMapIndex; +}; + +void main() +{ + gl_Position = projMatrix * modelViewMatrix * vertPosition; + fragNormOffset = dot(hitNormal, vertNormal); + fragImpactUV = shieldProjMatrix * shieldModelViewMatrix * vertPosition; + fragImpactUV += 1.0; + fragImpactUV *= 0.5; +} diff --git a/code/graphics/shaders/tonemapping.frag b/code/graphics/shaders/tonemapping.frag new file mode 100644 index 00000000000..c04099182eb --- /dev/null +++ b/code/graphics/shaders/tonemapping.frag @@ -0,0 +1,173 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +#include "gamma.sdr" + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +layout(set = 1, binding = 1) uniform sampler2D sceneTex; + +layout(set = 2, binding = 0, std140) uniform genericData { + float exposure; + int tonemapper; + float x0; + float y0; + float x1; + float toe_B; + float toe_lnA; + float sh_B; + float sh_lnA; + float sh_offsetX; + float sh_offsetY; + int linearOut; +}; + +// Tonemapping operators — matched to OpenGL tonemapping-f.sdr implementations + +vec3 linear_tonemap(vec3 color) { + return clamp(color, 0.0, 1.0); +} + +vec3 uc2_tonemap(vec3 color) { + float A = 0.15; + float B = 0.50; + float C = 0.10; + float D = 0.20; + float E = 0.02; + float F = 0.30; + float W = 11.2; + color = ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F; + float white = ((W * (A * W + C * B) + D * E) / (W * (A * W + B) + D * F)) - E / F; + color /= white; + return color; +} + +vec3 aces_tonemap(vec3 color) { + mat3 m1 = mat3( + 0.59719, 0.07600, 0.02840, + 0.35458, 0.90834, 0.13383, + 0.04823, 0.01566, 0.83777 + ); + mat3 m2 = mat3( + 1.60475, -0.10208, -0.00327, + -0.53108, 1.10813, -0.07276, + -0.07367, -0.00605, 1.07602 + ); + vec3 v = m1 * color; + vec3 a = v * (v + 0.0245786) - 0.000090537; + vec3 b = v * (0.983729 * v + 0.4329510) + 0.238081; + return clamp(m2 * (a / b), 0.0, 1.0); +} + +vec3 aces_approx_tonemap(vec3 color) { + color *= 0.6; + float a = 2.51; + float b = 0.03; + float c = 2.43; + float d = 0.59; + float e = 0.14; + return clamp((color * (a * color + b)) / (color * (c * color + d) + e), 0.0, 1.0); +} + +vec3 cineon_tonemap(vec3 color) { + // optimized filmic operator by Jim Hejl and Richard Burgess-Dawson + // linear to sRGB conversion embedded in shader + color = max(vec3(0.0), color - 0.004); + return (color * (6.2 * color + 0.5)) / (color * (6.2 * color + 1.7) + 0.06); +} + +vec3 reinhard_jodie_tonemap(vec3 color) { + float luma = dot(color, vec3(0.2126, 0.7152, 0.0722)); + float toneMappedLuma = luma / (1.0 + luma); + color *= toneMappedLuma / luma; + return color; +} + +vec3 reinhard_extended_tonemap(vec3 color) { + float max_white = 1.0; + vec3 numerator = color * (1.0 + (color / vec3(max_white * max_white))); + return numerator / (1.0 + color); +} + +// Piecewise Power Curve helpers — matched to OpenGL shoulder sign convention +float ppc_toe(float x) { + return exp(toe_lnA + toe_B * log(x)); +} + +float ppc_linear(float x) { + return y0 + (x - x0); +} + +float ppc_shoulder(float x) { + // Scale is -1 so reverse subtraction to save a mult + x = sh_offsetX - x; + x = exp(sh_lnA + sh_B * log(x)); + x = sh_offsetY - x; + return x; +} + +float ppc_eval(float x_in) { + if (x_in <= x0) { + return ppc_toe(x_in); + } else if (x_in <= x1) { + return ppc_linear(x_in); + } else if (x_in < sh_offsetX) { + return ppc_shoulder(x_in); + } else { + return sh_offsetY; + } +} + +vec3 ppc_tonemap(vec3 color) { + float luma = dot(color, vec3(0.2126, 0.7152, 0.0722)); + if (luma <= 0.0) return vec3(0.0); + float luma_tone; + if (luma <= x0) { + luma_tone = ppc_toe(luma); + } else if (luma <= x1) { + luma_tone = ppc_linear(luma); + } else if (luma < sh_offsetX) { + luma_tone = ppc_shoulder(luma); + } else { + luma_tone = sh_offsetY; + } + return color * luma_tone / luma; +} + +vec3 ppc_rgb_tonemap(vec3 color) { + return vec3(ppc_eval(color.r), ppc_eval(color.g), ppc_eval(color.b)); +} + +void main() +{ + vec3 color = texture(sceneTex, fragTexCoord).rgb; + color *= exposure; + + // Apply selected tonemapper + if (tonemapper == 0) { + color = linear_tonemap(color); + } else if (tonemapper == 1) { + color = uc2_tonemap(color); + } else if (tonemapper == 2) { + color = aces_tonemap(color); + } else if (tonemapper == 3) { + color = aces_approx_tonemap(color); + } else if (tonemapper == 4) { + color = cineon_tonemap(color); + } else if (tonemapper == 5) { + color = reinhard_jodie_tonemap(color); + } else if (tonemapper == 6) { + color = reinhard_extended_tonemap(color); + } else if (tonemapper == 7) { + color = ppc_tonemap(color); + } else if (tonemapper == 8) { + color = ppc_rgb_tonemap(color); + } + + if (linearOut == 0) { + color = linear_to_srgb(color); + } + + fragOut0 = vec4(color, 1.0); +} diff --git a/code/graphics/shaders/video.frag b/code/graphics/shaders/video.frag new file mode 100644 index 00000000000..ac23e9a768f --- /dev/null +++ b/code/graphics/shaders/video.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Inputs from vertex shader +layout (location = 0) in vec4 fragTexCoord; + +// Output +layout (location = 0) out vec4 fragOut0; + +// YUV textures use the texture array at binding 1 in Material set +// Array indices: 0 = Y, 1 = U, 2 = V +layout (set = 1, binding = 1) uniform sampler2DArray textures[16]; + +// Uniform buffer: MovieData (binding 4 in PerDraw set) +layout (set = 2, binding = 4, std140) uniform movieData { + float alpha; + float pad[3]; +}; + +void main() +{ + // Sample YUV from texture array slots 0, 1, 2 + float y = texture(textures[0], vec3(fragTexCoord.st, 0.0)).r; + float u = texture(textures[1], vec3(fragTexCoord.st, 0.0)).r; + float v = texture(textures[2], vec3(fragTexCoord.st, 0.0)).r; + vec3 val = vec3(y - 0.0625, u - 0.5, v - 0.5); + fragOut0.r = dot(val, vec3(1.1640625, 0.0, 1.59765625)); + fragOut0.g = dot(val, vec3(1.1640625, -0.390625, -0.8125)); + fragOut0.b = dot(val, vec3(1.1640625, 2.015625, 0.0)); + fragOut0.a = alpha; +} diff --git a/code/graphics/shaders/video.vert b/code/graphics/shaders/video.vert new file mode 100644 index 00000000000..a5e133feee7 --- /dev/null +++ b/code/graphics/shaders/video.vert @@ -0,0 +1,21 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Vertex inputs +layout (location = 0) in vec4 vertPosition; +layout (location = 2) in vec4 vertTexCoord; + +// Outputs to fragment shader +layout (location = 0) out vec4 fragTexCoord; + +// Uniform buffer: Matrices (binding 1 in PerDraw set) +layout (set = 2, binding = 1, std140) uniform matrixData { + mat4 modelViewMatrix; + mat4 projMatrix; +}; + +void main() +{ + fragTexCoord = vertTexCoord; + gl_Position = projMatrix * modelViewMatrix * vertPosition; +} diff --git a/code/graphics/shaders/volumetric-fog.frag b/code/graphics/shaders/volumetric-fog.frag new file mode 100644 index 00000000000..b528b1805e0 --- /dev/null +++ b/code/graphics/shaders/volumetric-fog.frag @@ -0,0 +1,163 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Volumetric nebula fragment shader — port of volumetric-f.sdr to Vulkan +// Raymarches through a 3D volume texture to render volumetric nebulae. +// #ifdef variants replaced with runtime UBO flags (doEdgeSmoothing, useNoise). + +layout(location = 0) in vec2 fragTexCoord; +layout(location = 0) out vec4 fragOut0; + +// Binding 1 is a 16-element sampler2D array in the Material descriptor set layout. +// We use elements [0]=composite and [1]=emissive. +layout(set = 1, binding = 1) uniform sampler2D tex2D[16]; +#define composite tex2D[0] +#define emissive tex2D[1] +layout(set = 1, binding = 4) uniform sampler2D depth; // scene depth copy +layout(set = 1, binding = 5) uniform sampler3D volume_tex; // 3D nebula volume +layout(set = 1, binding = 6) uniform sampler3D noise_volume_tex; // 3D noise volume + +layout(std140, set = 2, binding = 0) uniform genericData { + mat4 p_inv; + mat4 v_inv; + vec3 camera; + float zNear; + vec3 globalLightDirection; + float zFar; + vec3 globalLightDiffuse; + float stepsize; + vec3 nebPos; + float opacitydistance; + vec3 nebSize; + float alphalim; + vec3 nebulaColor; + float udfScale; + float emissiveSpreadFactor; + float emissiveIntensity; + float emissiveFalloff; + float henyeyGreensteinCoeff; + vec3 noiseColor; + int directionalLightSampleSteps; + float directionalLightStepSize; + float noiseColorScale1; + float noiseColorScale2; + float noiseIntensity; + float aspect; + float fov; + int doEdgeSmoothing; + int useNoise; +}; + +const float sqrt4pi_inv = inversesqrt(4.0 * 3.14159); +const float beer_powder_norm = 3.0/2.0 * sqrt(3.0); + +// Henyey-Greenstein phase function (assumes scatter vectors both point away from scatter point) +float henyey_greenstein(float cosTheta) { + float radicant = 1.0 + henyeyGreensteinCoeff * henyeyGreensteinCoeff + 2.0 * henyeyGreensteinCoeff * cosTheta; + return sqrt4pi_inv * (1.0 - henyeyGreensteinCoeff * henyeyGreensteinCoeff) / pow(radicant, 3.0 / 2.0); +} + +void main() +{ + vec4 eyeDirection = p_inv * vec4(fragTexCoord.xy * 2.0 - 1.0, -1, 1); + eyeDirection.w = 0; + vec3 rayDirection = normalize((v_inv * eyeDirection).xyz); + + vec4 color_in = texture(composite, fragTexCoord.xy); + + vec3 lCorner = nebPos - nebSize * 0.5; + vec3 rCorner = nebPos + nebSize * 0.5; + + vec3 t1 = (lCorner - camera) / rayDirection; + vec3 t2 = (rCorner - camera) / rayDirection; + + vec3 tMin = min(t1, t2); + vec3 tMax = max(t1, t2); + + vec2 fragcoordAngle = (fragTexCoord.xy - 0.5) * fov; + fragcoordAngle.x *= aspect; + // Vulkan depth range [0,1] — linearize directly (no 2*d-1 transform) + float depth_val = texture(depth, fragTexCoord.xy).x; + float linearDepth = zNear * zFar / (zFar - depth_val * (zFar - zNear)); + float fragDepth = linearDepth * sqrt(1.0 + tan(fragcoordAngle.x) * tan(fragcoordAngle.x) + tan(fragcoordAngle.y) * tan(fragcoordAngle.y)); + + // t at which the ray enters/leaves the nebula cube + float maxtMin = max(0, max(tMin.x, max(tMin.y, tMin.z))); + float mintMax = min(fragDepth, min(tMax.x, min(tMax.y, tMax.z))); + + // Cumulative one-minus-alpha, distance, and color + float cumOMAlpha = 1; + float cumnebdist = 0; + vec3 cumcolor = vec3(0, 0, 0); + + // Pre-compute texture gradients (approximate, shared for all steps) + vec3 initialPos = (camera + rayDirection * maxtMin) / nebSize + 0.5; + vec3 gradX = dFdx(initialPos); + vec3 gradY = dFdy(initialPos); + + vec3 sidestep = 1.0 / vec3(textureSize(volume_tex, 0)); + + for (float stept = maxtMin; stept < mintMax;) { + // Step setup + vec3 position = camera + rayDirection * stept - nebPos; + vec3 sampleposition = position / nebSize + 0.5; + vec4 volume_sample = textureGrad(volume_tex, sampleposition, gradX, gradY); + + float stepcolor_alpha = volume_sample.a; + + // Edge smoothing: average 3D texel with corner neighbors to reduce jaggies + if (doEdgeSmoothing != 0 && cumOMAlpha > 0.8) { + stepcolor_alpha = stepcolor_alpha / 2.0 + ( + textureGrad(volume_tex, sampleposition + vec3(sidestep.x, sidestep.y, sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(sidestep.x, sidestep.y, -sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(sidestep.x, -sidestep.y, sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(sidestep.x, -sidestep.y, -sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(-sidestep.x, sidestep.y, sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(-sidestep.x, sidestep.y, -sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(-sidestep.x, -sidestep.y, sidestep.z), gradX, gradY).a + + textureGrad(volume_tex, sampleposition + vec3(-sidestep.x, -sidestep.y, -sidestep.z), gradX, gradY).a) / 16.0; + } + + float stepsize_current = min(max(stepsize, step(stepcolor_alpha, 0.01) * volume_sample.x * udfScale), mintMax - stept); + + float stepalpha = -(pow(alphalim, 1.0 / (opacitydistance / stepsize_current)) - 1.0) * stepcolor_alpha; + // All following computations only needed if alpha is non-zero + if (stepcolor_alpha > 0.01) { + // Diffuse color (with optional noise mixing) + vec3 stepcolor_neb; + if (useNoise != 0) { + stepcolor_neb = mix(nebulaColor, noiseColor, + smoothstep(0, 1, (textureGrad(noise_volume_tex, position / noiseColorScale1, gradX, gradY).r + textureGrad(noise_volume_tex, position / noiseColorScale2, gradX, gradY).g) / 2.0 * noiseIntensity)); + } else { + stepcolor_neb = nebulaColor; + } + vec3 stepcolor_diffuse = stepcolor_neb * henyey_greenstein(dot(rayDirection, globalLightDirection)); + float directionalLightStep = 4.0 / float(directionalLightSampleSteps); + float directionalLightDepth = 0.1; + // Sample toward sun to determine lighting + for (int dlstep = 1; dlstep <= directionalLightSampleSteps; dlstep++) { + vec3 dlsteppos = (position - globalLightDirection * (dlstep * directionalLightStepSize)) / nebSize + 0.5; + float dlstepalpha = textureGrad(volume_tex, dlsteppos, gradX, gradY).a * step(0, dlsteppos.x) * step(dlsteppos.x, 1) * step(0, dlsteppos.y) * step(dlsteppos.y, 1) * step(0, dlsteppos.z) * step(dlsteppos.z, 1); + directionalLightDepth += dlstepalpha * directionalLightStep; + } + stepcolor_diffuse *= beer_powder_norm * (1 - exp(-directionalLightDepth * 2.0)) * exp(-directionalLightDepth); + + // Emissive contribution (LOD based on cumulative nebula distance) + cumnebdist += stepcolor_alpha * stepsize_current; + vec3 emissive_lod = textureLod(emissive, fragTexCoord.xy, clamp(cumnebdist * emissiveSpreadFactor, 0, float(textureQueryLevels(emissive) - 1))).rgb; + vec3 stepcolor_emissive = clamp(emissive_lod.rgb * pow(alphalim, 1.0 / (opacitydistance / ((fragDepth - stept) * emissiveFalloff + 0.01))) * emissiveIntensity, 0, 1); + + // Combine diffuse and emissive + vec3 stepcolor = clamp(stepcolor_diffuse + stepcolor_emissive, 0, 1); + cumcolor += stepalpha * cumOMAlpha * stepcolor; + } + + cumOMAlpha *= 1.0 - stepalpha; + stept += stepsize_current; + + if (cumOMAlpha < alphalim) + break; + } + + fragOut0 = vec4(cumOMAlpha * color_in.rgb + ((1.0 - cumOMAlpha) * cumcolor), 1); +} diff --git a/code/graphics/shaders/volumetric-fog.vert b/code/graphics/shaders/volumetric-fog.vert new file mode 100644 index 00000000000..52a641e9c06 --- /dev/null +++ b/code/graphics/shaders/volumetric-fog.vert @@ -0,0 +1,14 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +// Fullscreen triangle vertex shader for volumetric fog pass. +// Same as postprocess.vert — uses gl_VertexIndex, no vertex buffer needed. + +layout(location = 0) out vec2 fragTexCoord; + +void main() +{ + vec2 pos = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2); + fragTexCoord = pos; + gl_Position = vec4(pos * 2.0 - 1.0, 0.0, 1.0); +} diff --git a/code/graphics/shaders/vulkan.frag b/code/graphics/shaders/vulkan.frag deleted file mode 100644 index 84daf5e0d0e..00000000000 --- a/code/graphics/shaders/vulkan.frag +++ /dev/null @@ -1,10 +0,0 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -layout(location = 0) in vec3 fragColor; - -layout(location = 0) out vec4 outColor; - -void main() { - outColor = vec4(fragColor, 1.0); -} diff --git a/code/graphics/shaders/vulkan.vert b/code/graphics/shaders/vulkan.vert deleted file mode 100644 index ec247650646..00000000000 --- a/code/graphics/shaders/vulkan.vert +++ /dev/null @@ -1,21 +0,0 @@ -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -layout(location = 0) out vec3 fragColor; - -vec2 positions[3] = vec2[]( - vec2(0.0, -0.5), - vec2(0.5, 0.5), - vec2(-0.5, 0.5) -); - -vec3 colors[3] = vec3[]( - vec3(1.0, 0.0, 0.0), - vec3(0.0, 1.0, 0.0), - vec3(0.0, 0.0, 1.0) -); - -void main() { - gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0); - fragColor = colors[gl_VertexIndex]; -} diff --git a/code/graphics/shadows.cpp b/code/graphics/shadows.cpp index 7e76994bb2b..1f319f190da 100644 --- a/code/graphics/shadows.cpp +++ b/code/graphics/shadows.cpp @@ -99,11 +99,19 @@ void shadows_construct_light_proj(light_frustum_info *shadow_data) shadow_data->proj_matrix.a1d[0] = 2.0f / ( shadow_data->max.xyz.x - shadow_data->min.xyz.x ); shadow_data->proj_matrix.a1d[5] = 2.0f / ( shadow_data->max.xyz.y - shadow_data->min.xyz.y ); - shadow_data->proj_matrix.a1d[10] = -2.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); shadow_data->proj_matrix.a1d[12] = -(shadow_data->max.xyz.x + shadow_data->min.xyz.x) / ( shadow_data->max.xyz.x - shadow_data->min.xyz.x ); shadow_data->proj_matrix.a1d[13] = -(shadow_data->max.xyz.y + shadow_data->min.xyz.y) / ( shadow_data->max.xyz.y - shadow_data->min.xyz.y ); - shadow_data->proj_matrix.a1d[14] = -(shadow_data->max.xyz.z + shadow_data->min.xyz.z) / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); shadow_data->proj_matrix.a1d[15] = 1.0f; + + if (gr_screen.mode == GR_VULKAN) { + // Vulkan uses [0, 1] depth range + shadow_data->proj_matrix.a1d[10] = -1.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + shadow_data->proj_matrix.a1d[14] = -shadow_data->min.xyz.z / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + } else { + // OpenGL uses [-1, 1] depth range + shadow_data->proj_matrix.a1d[10] = -2.0f / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + shadow_data->proj_matrix.a1d[14] = -(shadow_data->max.xyz.z + shadow_data->min.xyz.z) / ( shadow_data->max.xyz.z - shadow_data->min.xyz.z ); + } } void shadows_debug_show_frustum(matrix* orient, vec3d *pos, float fov, float aspect, float z_near, float z_far) diff --git a/code/graphics/util/primitives.cpp b/code/graphics/util/primitives.cpp new file mode 100644 index 00000000000..1e0b2a282e8 --- /dev/null +++ b/code/graphics/util/primitives.cpp @@ -0,0 +1,125 @@ +#include "graphics/util/primitives.h" + +#include + +namespace graphics { +namespace util { + +generated_mesh generate_sphere_mesh(int rings, int segments) +{ + generated_mesh mesh; + + unsigned int nVertex = (rings + 1) * (segments + 1) * 3; + unsigned int nIndex = 6 * rings * (segments + 1); + + mesh.vertices.reserve(nVertex); + mesh.indices.reserve(nIndex); + + float fDeltaRingAngle = (PI / rings); + float fDeltaSegAngle = (2.0f * PI / segments); + unsigned short wVerticeIndex = 0; + + // Generate the group of rings for the sphere + for (int ring = 0; ring <= rings; ring++) { + float r0 = sinf(ring * fDeltaRingAngle); + float y0 = cosf(ring * fDeltaRingAngle); + + // Generate the group of segments for the current ring + for (int seg = 0; seg <= segments; seg++) { + float x0 = r0 * sinf(seg * fDeltaSegAngle); + float z0 = r0 * cosf(seg * fDeltaSegAngle); + + // Add one vertex to the strip which makes up the sphere + mesh.vertices.push_back(x0); + mesh.vertices.push_back(y0); + mesh.vertices.push_back(z0); + + if (ring != rings) { + // each vertex (except the last) has six indices pointing to it + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(wVerticeIndex + (ushort)segments); + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + wVerticeIndex++; + } + } + } + + mesh.vertex_count = wVerticeIndex; + mesh.index_count = nIndex; + + return mesh; +} + +generated_mesh generate_cylinder_mesh(int segments) +{ + generated_mesh mesh; + + unsigned int nVertex = (segments + 1) * 2 * 3 + 6; + unsigned int nIndex = 12 * (segments + 1) - 6; + + mesh.vertices.reserve(nVertex); + mesh.indices.reserve(nIndex); + + float fDeltaSegAngle = (2.0f * PI / segments); + unsigned short wVerticeIndex = 0; + + // Bottom cap center vertex + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + wVerticeIndex++; + + // Top cap center vertex + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(0.0f); + mesh.vertices.push_back(1.0f); + wVerticeIndex++; + + for (int ring = 0; ring <= 1; ring++) { + float z0 = (float)ring; + + // Generate the group of segments for the current ring + for (int seg = 0; seg <= segments; seg++) { + float x0 = sinf(seg * fDeltaSegAngle); + float y0 = cosf(seg * fDeltaSegAngle); + + // Add one vertex to the strip which makes up the cylinder + mesh.vertices.push_back(x0); + mesh.vertices.push_back(y0); + mesh.vertices.push_back(z0); + + if (!ring) { + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(wVerticeIndex + (ushort)segments); + mesh.indices.push_back(wVerticeIndex + (ushort)segments + 1); + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + if (seg != segments) { + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(0); + } + wVerticeIndex++; + } else { + if (seg != segments) { + mesh.indices.push_back(wVerticeIndex + 1); + mesh.indices.push_back(wVerticeIndex); + mesh.indices.push_back(1); + wVerticeIndex++; + } + } + } + } + + mesh.vertex_count = wVerticeIndex; + mesh.index_count = nIndex; + + return mesh; +} + +} +} diff --git a/code/graphics/util/primitives.h b/code/graphics/util/primitives.h new file mode 100644 index 00000000000..b4054b6ec14 --- /dev/null +++ b/code/graphics/util/primitives.h @@ -0,0 +1,37 @@ +#pragma once + +#include "globalincs/pstypes.h" + +namespace graphics { +namespace util { + +struct generated_mesh { + SCP_vector vertices; // position-only, 3 floats per vertex + SCP_vector indices; + unsigned int vertex_count; // number of unique vertices generated + unsigned int index_count; // number of indices +}; + +/** + * @brief Generate a unit sphere mesh (radius 1.0) suitable for deferred light volumes + * + * Based on http://www.ogre3d.org/tikiwiki/ManualSphereMeshes + * + * @param rings Number of horizontal rings + * @param segments Number of vertical segments + * @return generated_mesh containing position-only vertices and triangle indices + */ +generated_mesh generate_sphere_mesh(int rings, int segments); + +/** + * @brief Generate a unit cylinder mesh (radius 1.0, height 1.0) suitable for deferred light volumes + * + * Based on http://www.ogre3d.org/tikiwiki/ManualSphereMeshes + * + * @param segments Number of radial segments + * @return generated_mesh containing position-only vertices and triangle indices + */ +generated_mesh generate_cylinder_mesh(int segments); + +} +} diff --git a/code/graphics/util/uniform_structs.h b/code/graphics/util/uniform_structs.h index 391e6d36fa5..a37cfc058a9 100644 --- a/code/graphics/util/uniform_structs.h +++ b/code/graphics/util/uniform_structs.h @@ -32,7 +32,7 @@ struct deferred_global_data { float invScreenHeight; float nearPlane; - float pad; + int use_env_map; }; /** @@ -227,7 +227,7 @@ struct tonemapping_data { float sh_lnA; float sh_offsetX; float sh_offsetY; - float pad[1]; + int linearOut; // 1 = skip sRGB conversion (Vulkan runtime flag, replaces compile-time LINEAR_OUT) }; struct smaa_data { @@ -329,7 +329,8 @@ struct volumetric_fog_data { float aspect; float fov; - float pad[2]; + int doEdgeSmoothing; + int useNoise; }; struct msaa_data { diff --git a/code/graphics/vulkan/VulkanBuffer.cpp b/code/graphics/vulkan/VulkanBuffer.cpp new file mode 100644 index 00000000000..1989a1da7de --- /dev/null +++ b/code/graphics/vulkan/VulkanBuffer.cpp @@ -0,0 +1,784 @@ +#include "VulkanBuffer.h" +#include "VulkanDeletionQueue.h" +#include "VulkanDraw.h" + +#include "globalincs/pstypes.h" + +namespace graphics { +namespace vulkan { + +namespace { +VulkanBufferManager* g_bufferManager = nullptr; +} + +VulkanBufferManager* getBufferManager() +{ + Assertion(g_bufferManager != nullptr, "Vulkan BufferManager not initialized!"); + return g_bufferManager; +} + +void setBufferManager(VulkanBufferManager* manager) +{ + g_bufferManager = manager; +} + +VulkanBufferManager::VulkanBufferManager() = default; + +VulkanBufferManager::~VulkanBufferManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanBufferManager::createOneShotBuffer(vk::Flags usage, const void* data, size_t size, vk::Buffer& buf, VulkanAllocation& alloc) const +{ + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + buf = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(buf, MemoryUsage::CpuToGpu, alloc)) { + m_device.destroyBuffer(buf); + buf = nullptr; + mprintf(("Failed to allocate buffer memory!\n")); + return false; + } + + void* mapped = m_memoryManager->mapMemory(alloc); + if (mapped) { + memcpy(mapped, data, size); + m_memoryManager->flushMemory(alloc, 0, size); + m_memoryManager->unmapMemory(alloc); + } else { + m_memoryManager->freeAllocation(alloc); + m_device.destroyBuffer(buf); + buf = nullptr; + + mprintf(("Failed to map buffer memory!\n")); + return false; + } + return true; +} + +// ========== Frame bump allocator ========== + +bool VulkanBufferManager::createFrameAllocBuffer(FrameBumpAllocator& alloc, size_t size) +{ + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer + | vk::BufferUsageFlagBits::eIndexBuffer + | vk::BufferUsageFlagBits::eUniformBuffer + | vk::BufferUsageFlagBits::eStorageBuffer + | vk::BufferUsageFlagBits::eTransferDst; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + alloc.buffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create frame allocator buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(alloc.buffer, MemoryUsage::CpuToGpu, alloc.allocation)) { + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + mprintf(("Failed to allocate frame allocator buffer memory!\n")); + return false; + } + + alloc.mappedPtr = m_memoryManager->mapMemory(alloc.allocation); + if (!alloc.mappedPtr) { + m_memoryManager->freeAllocation(alloc.allocation); + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + alloc.allocation = {}; + mprintf(("Failed to map frame allocator buffer!\n")); + return false; + } + + alloc.capacity = size; + alloc.cursor = 0; + return true; +} + +void VulkanBufferManager::initFrameAllocators() +{ + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + Verify(createFrameAllocBuffer(m_frameAllocs[i], FRAME_ALLOC_INITIAL_SIZE)); + } + mprintf(("Frame bump allocators initialized: %u x %zuKB\n", + MAX_FRAMES_IN_FLIGHT, FRAME_ALLOC_INITIAL_SIZE / 1024)); +} + +void VulkanBufferManager::shutdownFrameAllocators() +{ + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + auto& alloc = m_frameAllocs[i]; + if (alloc.mappedPtr) { + m_memoryManager->unmapMemory(alloc.allocation); + alloc.mappedPtr = nullptr; + } + if (alloc.buffer) { + m_device.destroyBuffer(alloc.buffer); + alloc.buffer = nullptr; + } + if (alloc.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(alloc.allocation); + alloc.allocation = {}; + } + alloc.capacity = 0; + alloc.cursor = 0; + } +} + +size_t VulkanBufferManager::bumpAllocate(size_t size) +{ + auto& alloc = m_frameAllocs[m_currentFrame]; + + // Align cursor up to UBO alignment (satisfies UBO/SSBO/vertex alignment) + size_t alignedOffset = (alloc.cursor + m_uboAlignment - 1) & ~(static_cast(m_uboAlignment) - 1); + + if (alignedOffset + size > alloc.capacity) { + growFrameAllocator(); + // After growth, cursor is 0 so alignedOffset is 0 + alignedOffset = 0; + Assertion(size <= alloc.capacity, "Frame allocator growth failed to provide enough capacity"); + } + + alloc.cursor = alignedOffset + size; + return alignedOffset; +} + +void VulkanBufferManager::growFrameAllocator() +{ + auto& alloc = m_frameAllocs[m_currentFrame]; + + // Double capacity until sufficient + size_t newCapacity = alloc.capacity > 0 ? alloc.capacity * 2 : FRAME_ALLOC_INITIAL_SIZE; + // Ensure at least the current cursor position can fit (handles pathological single-alloc case) + while (newCapacity < alloc.cursor) { + newCapacity *= 2; + } + + mprintf(("Growing frame allocator %u: %zuKB -> %zuKB\n", + m_currentFrame, alloc.capacity / 1024, newCapacity / 1024)); + + // Queue old buffer for deferred destruction - the deletion queue's FRAMES_TO_WAIT=2 + // ensures the old buffer survives through current frame's GPU execution. + // Existing handles with frameAllocBuffer pointing to the old buffer remain valid. + auto* deletionQueue = getDeletionQueue(); + if (alloc.mappedPtr) { + m_memoryManager->unmapMemory(alloc.allocation); + } + deletionQueue->queueBuffer(alloc.buffer, alloc.allocation); + + // Create new buffer + alloc = {}; + Verify(createFrameAllocBuffer(alloc, newCapacity)); +} + +// ========== Init / Shutdown ========== + +bool VulkanBufferManager::init(vk::Device device, + VulkanMemoryManager* memoryManager, + uint32_t graphicsQueueFamily, + uint32_t transferQueueFamily, + uint32_t minUboAlignment) +{ + if (m_initialized) { + mprintf(("VulkanBufferManager::init called when already initialized!\n")); + return false; + } + + if (!device || !memoryManager) { + mprintf(("VulkanBufferManager::init called with null device or memory manager!\n")); + return false; + } + + m_device = device; + m_memoryManager = memoryManager; + m_graphicsQueueFamily = graphicsQueueFamily; + m_transferQueueFamily = transferQueueFamily; + m_currentFrame = 0; + m_uboAlignment = minUboAlignment > 0 ? minUboAlignment : 256; + + // Create fallback color buffer with white (1,1,1,1) for shaders expecting vertColor + float whiteColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, whiteColor, sizeof(whiteColor), m_fallbackColorBuffer, m_fallbackColorAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback color buffer\n")); + return false; + } + + float zeroTexCoord[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, zeroTexCoord, sizeof(zeroTexCoord), m_fallbackTexCoordBuffer, m_fallbackTexCoordAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback texcoord buffer\n")); + return false; + } + + // Create fallback uniform buffer (zeros) for uninitialized descriptor set bindings + // Without this, descriptor set UBO bindings left unwritten after pool reset + // contain undefined data, causing intermittent rendering failures + float dummy_ubo[FALLBACK_UNIFORM_BUFFER_SIZE] = {}; + if (!createOneShotBuffer(vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer, dummy_ubo, sizeof(dummy_ubo), m_fallbackUniformBuffer, m_fallbackUniformAllocation)) { + mprintf(("VulkanBufferManager::init could not create fallback uniform buffer\n")); + return false; + } + + initFrameAllocators(); + + m_initialized = true; + mprintf(("Vulkan Buffer Manager initialized (frame bump allocator, UBO alignment=%u, %u frames)\n", + m_uboAlignment, MAX_FRAMES_IN_FLIGHT)); + return true; +} + +void VulkanBufferManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy fallback color buffer + if (m_fallbackColorBuffer) { + m_device.destroyBuffer(m_fallbackColorBuffer); + m_fallbackColorBuffer = nullptr; + } + if (m_fallbackColorAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallbackColorAllocation); + m_fallbackColorAllocation = {}; + } + + // Destroy fallback texcoord buffer + if (m_fallbackTexCoordBuffer) { + m_device.destroyBuffer(m_fallbackTexCoordBuffer); + m_fallbackTexCoordBuffer = nullptr; + } + if (m_fallbackTexCoordAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallbackTexCoordAllocation); + m_fallbackTexCoordAllocation = {}; + } + + // Destroy fallback uniform buffer + if (m_fallbackUniformBuffer) { + m_device.destroyBuffer(m_fallbackUniformBuffer); + m_fallbackUniformBuffer = nullptr; + } + if (m_fallbackUniformAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallbackUniformAllocation); + m_fallbackUniformAllocation = {}; + } + + // Free all remaining static buffers + for (auto& bufferObj : m_buffers) { + if (bufferObj.valid) { + if (!bufferObj.isStreaming() && bufferObj.buffer) { + m_device.destroyBuffer(bufferObj.buffer); + } + if (!bufferObj.isStreaming() && bufferObj.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(bufferObj.allocation); + } + bufferObj.valid = false; + } + } + + shutdownFrameAllocators(); + + m_buffers.clear(); + m_freeIndices.clear(); + m_activeBufferCount = 0; + m_totalBufferMemory = 0; + m_initialized = false; + + mprintf(("Vulkan Buffer Manager shutdown\n")); +} + +void VulkanBufferManager::setCurrentFrame(uint32_t frameIndex) +{ + m_currentFrame = frameIndex % MAX_FRAMES_IN_FLIGHT; + // Reset bump cursor — safe because the GPU fence for this frame-in-flight + // was already waited on before setCurrentFrame is called. + m_frameAllocs[m_currentFrame].cursor = 0; +} + +// ========== Buffer usage / memory helpers ========== + +vk::BufferUsageFlags VulkanBufferManager::getVkUsageFlags(BufferType type) const +{ + vk::BufferUsageFlags flags = vk::BufferUsageFlagBits::eTransferDst; + + switch (type) { + case BufferType::Vertex: + flags |= vk::BufferUsageFlagBits::eVertexBuffer; + break; + case BufferType::Index: + flags |= vk::BufferUsageFlagBits::eIndexBuffer; + break; + case BufferType::Uniform: + flags |= vk::BufferUsageFlagBits::eUniformBuffer; + break; + } + + return flags; +} + +MemoryUsage VulkanBufferManager::getMemoryUsage(BufferUsageHint hint) const +{ + switch (hint) { + case BufferUsageHint::Static: + // Static data goes to device-local memory for best GPU performance + // For simplicity, we use CpuToGpu which allows host writes + // A more optimized path would use staging buffers for truly static data + return MemoryUsage::CpuToGpu; + + case BufferUsageHint::Dynamic: + case BufferUsageHint::Streaming: + // Frequently updated data needs to be host visible + return MemoryUsage::CpuToGpu; + + case BufferUsageHint::PersistentMapping: + // Persistent mapping requires host visible memory + return MemoryUsage::CpuOnly; + + default: + return MemoryUsage::CpuToGpu; + } +} + +// ========== Buffer create / delete ========== + +gr_buffer_handle VulkanBufferManager::createBuffer(BufferType type, BufferUsageHint usage) +{ + Verify(m_initialized); + + VulkanBufferObject bufferObj; + bufferObj.type = type; + bufferObj.usage = usage; + bufferObj.valid = true; + // Note: actual buffer creation is deferred until data is uploaded + + int index; + if (!m_freeIndices.empty()) { + // Reuse a freed slot + index = m_freeIndices.back(); + m_freeIndices.pop_back(); + m_buffers[index] = bufferObj; + } else { + // Add new slot + index = static_cast(m_buffers.size()); + m_buffers.push_back(bufferObj); + } + + ++m_activeBufferCount; + return gr_buffer_handle(index); +} + +void VulkanBufferManager::deleteBuffer(gr_buffer_handle handle) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (!bufferObj.isStreaming()) { + // Queue static buffer for deferred destruction + auto* deletionQueue = getDeletionQueue(); + if (bufferObj.buffer) { + deletionQueue->queueBuffer(bufferObj.buffer, bufferObj.allocation); + m_totalBufferMemory -= bufferObj.dataSize; + } + bufferObj.buffer = nullptr; + bufferObj.allocation = {}; + bufferObj.dataSize = 0; + } else { + // Streaming buffers have no per-buffer resources — just mark invalid + } + + --m_activeBufferCount; + bufferObj.valid = false; + + // Add to free list for reuse + m_freeIndices.push_back(handle.value()); +} + +// ========== createOrResizeBuffer (static only) ========== + +bool VulkanBufferManager::createOrResizeBuffer(VulkanBufferObject& bufferObj, size_t size) +{ + Assertion(!bufferObj.isStreaming(), "createOrResizeBuffer called on streaming buffer!"); + + // If buffer exists and is large enough, no-op + if (bufferObj.buffer && bufferObj.dataSize >= size) { + return true; + } + + // Save old buffer info for data copy + vk::Buffer oldBuffer = bufferObj.buffer; + VulkanAllocation oldAllocation = bufferObj.allocation; + size_t oldDataSize = bufferObj.dataSize; + + // Create new buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = size; + bufferInfo.usage = getVkUsageFlags(bufferObj.type); + + // Handle queue family sharing + uint32_t queueFamilies[] = {m_graphicsQueueFamily, m_transferQueueFamily}; + if (m_graphicsQueueFamily != m_transferQueueFamily) { + bufferInfo.sharingMode = vk::SharingMode::eConcurrent; + bufferInfo.queueFamilyIndexCount = 2; + bufferInfo.pQueueFamilyIndices = queueFamilies; + } else { + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + } + + try { + bufferObj.buffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create Vulkan buffer: %s\n", e.what())); + bufferObj.buffer = oldBuffer; + return false; + } + + // Allocate memory + MemoryUsage memUsage = getMemoryUsage(bufferObj.usage); + if (!m_memoryManager->allocateBufferMemory(bufferObj.buffer, memUsage, bufferObj.allocation)) { + m_device.destroyBuffer(bufferObj.buffer); + bufferObj.buffer = oldBuffer; + bufferObj.allocation = oldAllocation; + return false; + } + + // Copy existing data from old buffer + if (oldBuffer && oldDataSize > 0) { + void* oldMapped = m_memoryManager->mapMemory(oldAllocation); + void* newMapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(oldMapped); + Verify(newMapped); + + size_t copySize = std::min(oldDataSize, size); + memcpy(newMapped, oldMapped, copySize); + m_memoryManager->flushMemory(bufferObj.allocation, 0, copySize); + + m_memoryManager->unmapMemory(oldAllocation); + m_memoryManager->unmapMemory(bufferObj.allocation); + } + + // Queue old buffer for deferred destruction + if (oldBuffer) { + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(oldBuffer, oldAllocation); + m_totalBufferMemory -= oldDataSize; + } + + bufferObj.dataSize = size; + m_totalBufferMemory += size; + + return true; +} + +// ========== Buffer data updates ========== + +void VulkanBufferManager::updateBufferData(gr_buffer_handle handle, size_t size, const void* data) +{ + Verify(m_initialized && isValidHandle(handle)); + + if (size == 0) { + mprintf(("WARNING: updateBufferData called with size 0\n")); + return; + } + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + auto& alloc = m_frameAllocs[m_currentFrame]; + + if (data) { + // Pattern A: full replacement — allocate and copy + size_t offset = bumpAllocate(size); + memcpy(static_cast(alloc.mappedPtr) + offset, data, size); + m_memoryManager->flushMemory(alloc.allocation, offset, size); + + bufferObj.frameAllocBuffer = alloc.buffer; + bufferObj.frameAllocOffset = offset; + bufferObj.dataSize = size; + bufferObj.frameAllocFrame = m_currentFrame; + } else { + // Pattern B: pre-alloc for offset writes (null data) + if (bufferObj.frameAllocFrame != m_currentFrame || size > bufferObj.dataSize) { + // First allocation this frame, or need more space + size_t offset = bumpAllocate(size); + bufferObj.frameAllocBuffer = alloc.buffer; + bufferObj.frameAllocOffset = offset; + bufferObj.dataSize = size; + bufferObj.frameAllocFrame = m_currentFrame; + } + // Otherwise: same frame and size fits — keep current allocation + } + } else { + // Static / PersistentMapping path + Verify(createOrResizeBuffer(bufferObj, size)); + + // A null data pointer just allocates/resizes the buffer without writing + if (data) { + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(mapped); + memcpy(mapped, data, size); + m_memoryManager->flushMemory(bufferObj.allocation, 0, size); + m_memoryManager->unmapMemory(bufferObj.allocation); + } + } +} + +void VulkanBufferManager::updateBufferDataOffset(gr_buffer_handle handle, size_t offset, size_t size, const void* data) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + // Auto-allocate if not yet allocated this frame. This happens when + // the caller skips updateBufferData (e.g. gr_add_to_immediate_buffer + // when the data fits the existing buffer size). + if (bufferObj.frameAllocFrame != m_currentFrame) { + size_t allocSize = std::max(bufferObj.dataSize, offset + size); + Verify(allocSize > 0); + auto& fa = m_frameAllocs[m_currentFrame]; + size_t allocOffset = bumpAllocate(allocSize); + bufferObj.frameAllocBuffer = fa.buffer; + bufferObj.frameAllocOffset = allocOffset; + bufferObj.dataSize = allocSize; + bufferObj.frameAllocFrame = m_currentFrame; + } + + Verify(offset + size <= bufferObj.dataSize); + + auto& alloc = m_frameAllocs[m_currentFrame]; + size_t totalOffset = bufferObj.frameAllocOffset + offset; + memcpy(static_cast(alloc.mappedPtr) + totalOffset, data, size); + m_memoryManager->flushMemory(alloc.allocation, totalOffset, size); + } else { + // Static path + Verify(bufferObj.buffer); + Verify(offset + size <= bufferObj.dataSize); + + // Map, update region, and unmap + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + Verify(mapped); + memcpy(static_cast(mapped) + offset, data, size); + m_memoryManager->flushMemory(bufferObj.allocation, offset, size); + m_memoryManager->unmapMemory(bufferObj.allocation); + } +} + +// ========== Map / Flush ========== + +void* VulkanBufferManager::mapBuffer(gr_buffer_handle handle) +{ + if (!m_initialized || !isValidHandle(handle)) { + return nullptr; + } + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return nullptr; + } + + if (bufferObj.isStreaming()) { + Verify(bufferObj.frameAllocFrame == m_currentFrame); + auto& alloc = m_frameAllocs[m_currentFrame]; + return static_cast(alloc.mappedPtr) + bufferObj.frameAllocOffset; + } + + // Static / PersistentMapping + if (!bufferObj.buffer) { + return nullptr; + } + + // Only persistent mapping buffers should stay mapped + if (bufferObj.usage != BufferUsageHint::PersistentMapping) { + mprintf(("WARNING: mapBuffer called on non-persistent buffer\n")); + } + + // Map the entire buffer + void* mapped = m_memoryManager->mapMemory(bufferObj.allocation); + return mapped; +} + +void VulkanBufferManager::flushMappedBuffer(gr_buffer_handle handle, size_t offset, size_t size) +{ + Verify(m_initialized && isValidHandle(handle)); + + VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + Verify(bufferObj.valid); + + if (bufferObj.isStreaming()) { + // Adjust offset for current frame's allocation + Verify(bufferObj.frameAllocFrame == m_currentFrame); + auto& alloc = m_frameAllocs[m_currentFrame]; + m_memoryManager->flushMemory(alloc.allocation, bufferObj.frameAllocOffset + offset, size); + } else { + m_memoryManager->flushMemory(bufferObj.allocation, offset, size); + } +} + +// ========== Uniform buffer binding ========== + +void VulkanBufferManager::bindUniformBuffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer) +{ + // Resolve the full offset NOW (frame base + caller offset) so the binding + // captures the correct allocation. The vk::Buffer is still looked up at + // draw time (via handle) to survive buffer recreation. + size_t resolvedOffset = getFrameBaseOffset(buffer) + offset; + + auto* drawManager = getDrawManager(); + drawManager->setPendingUniformBinding(blockType, buffer, + static_cast(resolvedOffset), + static_cast(size)); +} + +// ========== Buffer queries ========== + +vk::Buffer VulkanBufferManager::getVkBuffer(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return nullptr; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return nullptr; + } + + if (bufferObj.isStreaming()) { + // Streaming buffers return the frame allocator buffer they were uploaded to + Verify(bufferObj.frameAllocFrame == m_currentFrame); + return bufferObj.frameAllocBuffer; + } else { + return bufferObj.buffer; + } +} + +size_t VulkanBufferManager::getBufferSize(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return 0; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return 0; + } + + return bufferObj.dataSize; +} + +size_t VulkanBufferManager::getFrameBaseOffset(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return 0; + } + + const VulkanBufferObject& bufferObj = m_buffers[handle.value()]; + if (!bufferObj.valid) { + return 0; + } + + if (bufferObj.isStreaming()) { + // Return the bump allocator offset for the most recent upload this frame. + // Stale handle detection: if frameAllocFrame != m_currentFrame, this buffer + // was not uploaded this frame and the offset would be meaningless (the bump + // allocator has been reset). This indicates a buffer marked Streaming/Dynamic + // is being bound for rendering without being uploaded first. + Verify(bufferObj.frameAllocFrame == m_currentFrame); + return bufferObj.frameAllocOffset; + } else { + return 0; + } +} + +bool VulkanBufferManager::isValidHandle(gr_buffer_handle handle) const +{ + if (!handle.isValid()) { + return false; + } + if (static_cast(handle.value()) >= m_buffers.size()) { + return false; + } + return m_buffers[handle.value()].valid; +} + +VulkanBufferObject* VulkanBufferManager::getBufferObject(gr_buffer_handle handle) +{ + if (!isValidHandle(handle)) { + return nullptr; + } + return &m_buffers[handle.value()]; +} + +const VulkanBufferObject* VulkanBufferManager::getBufferObject(gr_buffer_handle handle) const +{ + if (!isValidHandle(handle)) { + return nullptr; + } + return &m_buffers[handle.value()]; +} + +// ========== gr_screen function pointer implementations ========== + +gr_buffer_handle vulkan_create_buffer(BufferType type, BufferUsageHint usage) +{ + auto* bufferManager = getBufferManager(); + return bufferManager->createBuffer(type, usage); +} + +void vulkan_delete_buffer(gr_buffer_handle handle) +{ + auto* bufferManager = getBufferManager(); + bufferManager->deleteBuffer(handle); +} + +void vulkan_update_buffer_data(gr_buffer_handle handle, size_t size, const void* data) +{ + auto* bufferManager = getBufferManager(); + bufferManager->updateBufferData(handle, size, data); +} + +void vulkan_update_buffer_data_offset(gr_buffer_handle handle, size_t offset, size_t size, const void* data) +{ + auto* bufferManager = getBufferManager(); + bufferManager->updateBufferDataOffset(handle, offset, size, data); +} + +void* vulkan_map_buffer(gr_buffer_handle handle) +{ + auto* bufferManager = getBufferManager(); + void* result = bufferManager->mapBuffer(handle); + Verify(result); + return result; +} + +void vulkan_flush_mapped_buffer(gr_buffer_handle handle, size_t offset, size_t size) +{ + auto* bufferManager = getBufferManager(); + bufferManager->flushMappedBuffer(handle, offset, size); +} + +void vulkan_bind_uniform_buffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer) +{ + auto* bufferManager = getBufferManager(); + bufferManager->bindUniformBuffer(blockType, offset, size, buffer); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanBuffer.h b/code/graphics/vulkan/VulkanBuffer.h new file mode 100644 index 00000000000..b0d06b7ab02 --- /dev/null +++ b/code/graphics/vulkan/VulkanBuffer.h @@ -0,0 +1,305 @@ +#pragma once + +#include "graphics/2d.h" +#include "VulkanConstants.h" +#include "VulkanMemory.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Per-frame bump allocator for streaming/dynamic buffers + * + * Two of these exist (one per frame-in-flight). At frame start the cursor + * resets to 0. Each streaming upload bumps the cursor forward. + * The buffer is persistently mapped for the lifetime of the allocator. + */ +struct FrameBumpAllocator { + vk::Buffer buffer; + VulkanAllocation allocation = {}; + void* mappedPtr = nullptr; + size_t capacity = 0; + size_t cursor = 0; +}; + +/** + * @brief Internal representation of a Vulkan buffer + * + * Static buffers own their own VkBuffer. Streaming/Dynamic buffers are + * sub-allocated from a shared FrameBumpAllocator each frame. + */ +struct VulkanBufferObject { + BufferType type = BufferType::Vertex; + BufferUsageHint usage = BufferUsageHint::Static; + bool valid = false; + size_t dataSize = 0; // Usable data size. Static: total VkBuffer allocation. Streaming: current frame allocation. + + // Static buffer fields (unused for streaming) + vk::Buffer buffer = nullptr; + VulkanAllocation allocation = {}; + + // Frame bump allocator sub-allocation (streaming/dynamic only) + vk::Buffer frameAllocBuffer; // VkBuffer at upload time (may be old allocator buffer after growth) + size_t frameAllocOffset = 0; // Byte offset within the frame allocator buffer + uint32_t frameAllocFrame = UINT32_MAX; // Frame index when last allocated + + bool isStreaming() const { + return usage == BufferUsageHint::Streaming || usage == BufferUsageHint::Dynamic; + } +}; + +/** + * @brief Manages GPU buffer creation, updates, and destruction + * + * Streaming/Dynamic buffers are sub-allocated from a global per-frame bump + * allocator (two large VkBuffers, one per frame-in-flight). Static buffers + * keep their own VkBuffer. PersistentMapping buffers are handled separately. + */ +class VulkanBufferManager { +public: + VulkanBufferManager(); + ~VulkanBufferManager(); + + // Non-copyable + VulkanBufferManager(const VulkanBufferManager&) = delete; + VulkanBufferManager& operator=(const VulkanBufferManager&) = delete; + + /** + * @brief Initialize the buffer manager + * @param device The Vulkan logical device + * @param memoryManager The memory manager for allocations + * @param graphicsQueueFamily Graphics queue family index + * @param transferQueueFamily Transfer queue family index + * @param minUboAlignment Minimum uniform buffer offset alignment from device limits + * @return true on success + */ + bool init(vk::Device device, + VulkanMemoryManager* memoryManager, + uint32_t graphicsQueueFamily, + uint32_t transferQueueFamily, + uint32_t minUboAlignment); + + /** + * @brief Shutdown and free all buffers + */ + void shutdown(); + + /** + * @brief Set the current frame index and reset the bump allocator cursor + * Must be called at the start of each frame before any buffer updates + * @param frameIndex The current frame index (0 to MAX_FRAMES_IN_FLIGHT-1) + */ + void setCurrentFrame(uint32_t frameIndex); + + /** + * @brief Get the current frame index + */ + uint32_t getCurrentFrame() const { return m_currentFrame; } + + /** + * @brief Get the Vulkan logical device + */ + vk::Device getDevice() const { return m_device; } + + /** + * @brief Create a new buffer + * @param type The buffer type (Vertex, Index, Uniform) + * @param usage Usage hint for optimization + * @return Handle to the created buffer, or invalid handle on failure + */ + gr_buffer_handle createBuffer(BufferType type, BufferUsageHint usage); + + /** + * @brief Delete a buffer + * @param handle The buffer to delete + */ + void deleteBuffer(gr_buffer_handle handle); + + /** + * @brief Update buffer data (full replacement) + * @param handle The buffer to update + * @param size Size of data in bytes + * @param data Pointer to data + */ + void updateBufferData(gr_buffer_handle handle, size_t size, const void* data); + + /** + * @brief Update buffer data at an offset + * @param handle The buffer to update + * @param offset Offset in bytes + * @param size Size of data in bytes + * @param data Pointer to data + */ + void updateBufferDataOffset(gr_buffer_handle handle, size_t offset, size_t size, const void* data); + + /** + * @brief Map buffer for CPU access + * @param handle The buffer to map + * @return Pointer to mapped memory, or nullptr on failure + */ + void* mapBuffer(gr_buffer_handle handle); + + /** + * @brief Flush a range of a mapped buffer + * @param handle The buffer to flush + * @param offset Offset in bytes + * @param size Size of range in bytes + */ + void flushMappedBuffer(gr_buffer_handle handle, size_t offset, size_t size); + + /** + * @brief Bind uniform buffer to a binding slot + * @param blockType The uniform block type + * @param offset Offset within the buffer + * @param size Size of the bound range + * @param buffer The buffer to bind + */ + void bindUniformBuffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer); + + /** + * @brief Get the Vulkan buffer handle for the current frame + * @param handle The buffer handle + * @return The VkBuffer, or VK_NULL_HANDLE if invalid + */ + vk::Buffer getVkBuffer(gr_buffer_handle handle) const; + + /** + * @brief Get buffer size + * For streaming buffers, returns the current frame allocation size. + * For static buffers, returns the total buffer size. + * @param handle The buffer handle + * @return Size in bytes, or 0 if invalid + */ + size_t getBufferSize(gr_buffer_handle handle) const; + + /** + * @brief Get the base offset for the current frame's allocation + * For streaming buffers, returns the bump allocator offset. + * For static buffers, returns 0. + * @param handle The buffer handle + * @return Byte offset for current frame's allocation + */ + size_t getFrameBaseOffset(gr_buffer_handle handle) const; + + /** + * @brief Check if a handle is valid + */ + bool isValidHandle(gr_buffer_handle handle) const; + + /** + * @brief Get statistics + */ + size_t getBufferCount() const { return m_activeBufferCount; } + size_t getTotalBufferMemory() const { return m_totalBufferMemory; } + + /** + * @brief Get the constant white color buffer for fallback vertex colors + * This buffer contains vec4(1,1,1,1) for shaders expecting vertColor + */ + vk::Buffer getFallbackColorBuffer() const { return m_fallbackColorBuffer; } + + /** + * @brief Get the constant zero texcoord buffer for fallback vertex texcoords + * This buffer contains vec4(0,0,0,0) for shaders expecting vertTexCoord + */ + vk::Buffer getFallbackTexCoordBuffer() const { return m_fallbackTexCoordBuffer; } + + /** + * @brief Get the fallback uniform buffer for uninitialized descriptor bindings + * This buffer contains zeros and is used to pre-fill all UBO descriptor bindings + * to avoid undefined behavior from uninitialized descriptors after pool reset + */ + vk::Buffer getFallbackUniformBuffer() const { return m_fallbackUniformBuffer; } + + /** + * @brief Get the size of the fallback uniform buffer + */ + size_t getFallbackUniformBufferSize() const { return FALLBACK_UNIFORM_BUFFER_SIZE; } + +private: + /** + * @brief Create a one-shot buffer (used in initialization only). + */ + bool createOneShotBuffer(vk::Flags usage, const void* data, size_t size, vk::Buffer& buf, VulkanAllocation& alloc) const; + + /** + * @brief Convert BufferType to Vulkan usage flags + */ + vk::BufferUsageFlags getVkUsageFlags(BufferType type) const; + + /** + * @brief Convert BufferUsageHint to memory usage + */ + MemoryUsage getMemoryUsage(BufferUsageHint hint) const; + + /** + * @brief Create or resize a static buffer + * Streaming buffers must NOT call this — they use the frame bump allocator. + */ + bool createOrResizeBuffer(VulkanBufferObject& bufferObj, size_t size); + + /** + * @brief Get buffer object from handle + */ + VulkanBufferObject* getBufferObject(gr_buffer_handle handle); + const VulkanBufferObject* getBufferObject(gr_buffer_handle handle) const; + + // Frame bump allocator + static constexpr size_t FRAME_ALLOC_INITIAL_SIZE = 4 * 1024 * 1024; + + bool createFrameAllocBuffer(FrameBumpAllocator& alloc, size_t size); + void initFrameAllocators(); + void shutdownFrameAllocators(); + size_t bumpAllocate(size_t size); + void growFrameAllocator(); + + FrameBumpAllocator m_frameAllocs[MAX_FRAMES_IN_FLIGHT]; + uint32_t m_uboAlignment = 256; + + vk::Device m_device; + VulkanMemoryManager* m_memoryManager = nullptr; + + uint32_t m_graphicsQueueFamily = 0; + uint32_t m_transferQueueFamily = 0; + uint32_t m_currentFrame = 0; + + SCP_vector m_buffers; + SCP_vector m_freeIndices; // Recycled buffer indices + + // Fallback color buffer containing white (1,1,1,1) for vertex data without colors + vk::Buffer m_fallbackColorBuffer; + VulkanAllocation m_fallbackColorAllocation; + + // Fallback texcoord buffer containing (0,0,0,0) for vertex data without texcoords + vk::Buffer m_fallbackTexCoordBuffer; + VulkanAllocation m_fallbackTexCoordAllocation; + + // Fallback uniform buffer (zeros) for uninitialized descriptor set UBO bindings + static constexpr size_t FALLBACK_UNIFORM_BUFFER_SIZE = 4096; + vk::Buffer m_fallbackUniformBuffer; + VulkanAllocation m_fallbackUniformAllocation; + + size_t m_activeBufferCount = 0; + size_t m_totalBufferMemory = 0; + + bool m_initialized = false; +}; + +// Global buffer manager instance (set during renderer init) +VulkanBufferManager* getBufferManager(); +void setBufferManager(VulkanBufferManager* manager); + +// ========== gr_screen function pointer implementations ========== + +gr_buffer_handle vulkan_create_buffer(BufferType type, BufferUsageHint usage); +void vulkan_delete_buffer(gr_buffer_handle handle); +void vulkan_update_buffer_data(gr_buffer_handle handle, size_t size, const void* data); +void vulkan_update_buffer_data_offset(gr_buffer_handle handle, size_t offset, size_t size, const void* data); +void* vulkan_map_buffer(gr_buffer_handle handle); +void vulkan_flush_mapped_buffer(gr_buffer_handle handle, size_t offset, size_t size); +void vulkan_bind_uniform_buffer(uniform_block_type blockType, size_t offset, size_t size, gr_buffer_handle buffer); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanConstants.h b/code/graphics/vulkan/VulkanConstants.h new file mode 100644 index 00000000000..b199039bc6b --- /dev/null +++ b/code/graphics/vulkan/VulkanConstants.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace graphics { +namespace vulkan { + +static constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 2; + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDeferred.cpp b/code/graphics/vulkan/VulkanDeferred.cpp new file mode 100644 index 00000000000..e1baca720ed --- /dev/null +++ b/code/graphics/vulkan/VulkanDeferred.cpp @@ -0,0 +1,1227 @@ + +#include "VulkanDeferred.h" + +#include + +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanPostProcessing.h" +#include "gr_vulkan.h" + +#include "cmdline/cmdline.h" +#include "graphics/2d.h" +#include "graphics/matrix.h" +#include "graphics/material.h" +#include "graphics/grinternal.h" +#include "graphics/shadows.h" +#include "lighting/lighting.h" +#include "mission/missionparse.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "render/3d.h" + +namespace graphics { +namespace vulkan { + +namespace { + +static bool s_vulkanOverrideFog = false; + +} // anonymous namespace + +// ========== Deferred Lighting ========== + +void vulkan_deferred_lighting_begin(bool clearNonColorBufs) +{ + if (!light_deferred_enabled()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->isGbufInitialized()) { + return; + } + + auto* renderer = getRendererInstance(); + if (!renderer->isSceneRendering()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + const bool msaaActive = (Cmdline_msaa_enabled > 0 && pp->isMsaaInitialized()); + + // End the current G-buffer render pass to perform the color→emissive copy. + // All 6 color attachments transition to eShaderReadOnlyOptimal (finalLayout). + cmd.endRenderPass(); + + // Copy scene color → non-MSAA emissive (pre-deferred content becomes emissive). + // Skip both post-barriers — conditional MSAA/non-MSAA code below handles transitions. + copyImageToImage(cmd, + pp->getSceneColorImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, + pp->getGbufEmissiveImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferDstOptimal, + pp->getSceneExtent()); + + if (msaaActive) { + // --- MSAA path --- + // Transition scene color: eTransferSrcOptimal → eShaderReadOnlyOptimal + // (will be sampled inside MSAA pass to fill emissive) + // Transition non-MSAA emissive: eTransferDstOptimal → eShaderReadOnlyOptimal (preserved for later) + { + std::array barriers; + + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[0].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = pp->getSceneColorImage(); + barriers[0].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].dstAccessMask = {}; + barriers[1].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = pp->getGbufEmissiveImage(); + barriers[1].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barriers); + } + + // Transition MSAA images to expected initial layouts + pp->transitionMsaaGbufForBegin(cmd); + + // Begin MSAA G-buffer render pass (eClear — clears all attachments) + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getMsaaGbufRenderPass(); + rpBegin.framebuffer = pp->getMsaaGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[1].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[2].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[3].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[4].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + clearValues[5].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getMsaaGbufRenderPass(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::MSAA_COLOR_ATTACHMENT_COUNT); + stateTracker->setCurrentSampleCount(renderer->getMsaaSampleCount()); + } + + // Fill MSAA emissive with pre-deferred scene content (starfield, backgrounds). + // Draw a fullscreen tri sampling non-MSAA scene color, writing to all attachments. + // Only emissive (attachment 4) matters — the other attachments will be overwritten + // by model rendering. Use per-attachment color write mask to write only att 4. + { + auto* pipelineMgr = getPipelineManager(); + + PipelineConfig config; + config.shaderType = SDR_TYPE_COPY; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = pp->getMsaaGbufRenderPass(); + config.sampleCount = renderer->getMsaaSampleCount(); + config.colorAttachmentCount = VulkanPostProcessor::MSAA_COLOR_ATTACHMENT_COUNT; + + // Per-attachment blend: only write to attachment 4 (emissive) + config.perAttachmentBlendEnabled = true; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + config.attachmentBlends[i].blendMode = ALPHA_BLEND_NONE; + config.attachmentBlends[i].writeMask = {false, false, false, false}; + } + config.attachmentBlends[4].writeMask = {true, true, true, true}; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (pipeline) { + // Use drawFullscreenTriangle pattern but inline since we're already in a render pass + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + auto extent = pp->getSceneExtent(); + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + // Bind descriptors with scene color as source + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + auto fallbackBufSize = static_cast(bufferMgr->getFallbackUniformBufferSize()); + auto fallbackView = texMgr->getFallbackTextureView2D(); + auto fallbackSampler = texMgr->getDefaultSampler(); + + DescriptorWriter writer; + writer.reset(descriptorMgr->getDevice()); + + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeUniformBuffer(globalSet, 0, fallbackBuf, 0, fallbackBufSize); + writer.writeUniformBuffer(globalSet, 1, fallbackBuf, 0, fallbackBufSize); + writer.writeTexture(globalSet, 2, fallbackView, fallbackSampler); + auto fallbackCubeView = texMgr->getFallbackCubeView(); + writer.writeTexture(globalSet, 3, fallbackCubeView, fallbackSampler); + writer.writeTexture(globalSet, 4, fallbackCubeView, fallbackSampler); + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Global), globalSet, {}); + + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeUniformBuffer(materialSet, 0, fallbackBuf, 0, fallbackBufSize); + writer.writeUniformBuffer(materialSet, 2, fallbackBuf, 0, fallbackBufSize); + writer.writeStorageBuffer(materialSet, 3, fallbackBuf, 0, fallbackBufSize); + + // Build texture array with scene color at slot 0, fallback at slots 1-15 + std::array texImages; + texImages[0].sampler = pp->getSceneColorSampler(); + texImages[0].imageView = pp->getSceneColorView(); + texImages[0].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + for (uint32_t slot = 1; slot < VulkanDescriptorManager::MAX_TEXTURE_BINDINGS; ++slot) { + texImages[slot].sampler = fallbackSampler; + texImages[slot].imageView = fallbackView; + texImages[slot].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + writer.writeTextureArray(materialSet, 1, texImages.data(), static_cast(texImages.size())); + writer.writeTexture(materialSet, 4, fallbackView, fallbackSampler); + writer.writeTexture(materialSet, 5, fallbackView, fallbackSampler); + writer.writeTexture(materialSet, 6, fallbackView, fallbackSampler); + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Material), materialSet, {}); + + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + for (uint32_t b = 0; b < 5; ++b) { + writer.writeUniformBuffer(perDrawSet, b, fallbackBuf, 0, fallbackBufSize); + } + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::PerDraw), perDrawSet, {}); + + cmd.draw(3, 1, 0, 0); + } + } + } else { + // --- Non-MSAA path (original) --- + // Transition scene color back to eColorAttachmentOptimal. + // Transition emissive to eShaderReadOnlyOptimal (where transitionGbufForResume expects it). + { + std::array barriers; + + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barriers[0].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = pp->getSceneColorImage(); + barriers[0].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].dstAccessMask = {}; + barriers[1].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = pp->getGbufEmissiveImage(); + barriers[1].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barriers); + } + + // Transition G-buffer attachments 1-5 from eShaderReadOnlyOptimal → eColorAttachmentOptimal + pp->transitionGbufForResume(cmd); + + // Resume G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getGbufRenderPassLoad(), 0); + } + + // Optionally clear non-color G-buffer attachments + if (clearNonColorBufs) { + vk::ClearAttachment clearAtt; + clearAtt.aspectMask = vk::ImageAspectFlagBits::eColor; + clearAtt.clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); + + auto extent = pp->getSceneExtent(); + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = extent; + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + for (uint32_t att : {1u, 2u, 3u, 5u}) { + clearAtt.colorAttachment = att; + cmd.clearAttachments(clearAtt, clearRect); + } + } + } + + Deferred_lighting = true; +} + +void vulkan_deferred_lighting_msaa() +{ + if (Cmdline_msaa_enabled <= 0) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->isMsaaInitialized()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End MSAA G-buffer render pass. + // With finalLayout == subpass layout, all attachments stay in their subpass layouts: + // colors remain eColorAttachmentOptimal, depth remains eDepthStencilAttachmentOptimal. + cmd.endRenderPass(); + + // Reset sample count to 1x (resolve and subsequent passes are non-MSAA) + stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); + + // Explicit barriers: transition all 6 MSAA images to eShaderReadOnlyOptimal + // for sampling by the resolve shader. We use explicit barriers instead of + // render pass finalLayout transitions to ensure the validation layer tracks + // the layout changes correctly. + { + std::array barriers; + + // 5 color images: eColorAttachmentOptimal → eShaderReadOnlyOptimal + vk::Image msaaImages[5] = { + pp->getMsaaColorImage(), + pp->getMsaaPositionImage(), + pp->getMsaaNormalImage(), + pp->getMsaaSpecularImage(), + pp->getMsaaEmissiveImage(), + }; + for (int i = 0; i < 5; ++i) { + barriers[i].srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barriers[i].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[i].oldLayout = vk::ImageLayout::eColorAttachmentOptimal; + barriers[i].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].image = msaaImages[i]; + barriers[i].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + } + + // Depth: eDepthStencilAttachmentOptimal → eShaderReadOnlyOptimal + barriers[5].srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + barriers[5].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[5].oldLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + barriers[5].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[5].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[5].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[5].image = pp->getMsaaDepthImage(); + barriers[5].subresourceRange = {vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barriers); + } + + // Begin resolve render pass (non-MSAA, writes to standard G-buffer images) + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getMsaaResolveRenderPass(); + rpBegin.framebuffer = pp->getMsaaResolveFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + // 6 attachments: 5 color + depth. loadOp=eDontCare for all (fully overwritten). + std::array clearValues{}; + clearValues[5].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + PipelineConfig config; + config.shaderType = SDR_TYPE_MSAA_RESOLVE; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_FULL; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = true; + config.renderPass = pp->getMsaaResolveRenderPass(); + config.colorAttachmentCount = 5; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (pipeline) { + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + auto fallbackBufSize = static_cast(bufferMgr->getFallbackUniformBufferSize()); + auto fallbackView = texMgr->getFallbackTextureView2D(); + auto fallbackSampler = texMgr->getDefaultSampler(); + + DescriptorWriter writer; + writer.reset(descriptorMgr->getDevice()); + + // Global set (fallback — resolve shader doesn't use global bindings) + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeUniformBuffer(globalSet, 0, fallbackBuf, 0, fallbackBufSize); + writer.writeUniformBuffer(globalSet, 1, fallbackBuf, 0, fallbackBufSize); + writer.writeTexture(globalSet, 2, fallbackView, fallbackSampler); + auto fallbackCubeView = texMgr->getFallbackCubeView(); + writer.writeTexture(globalSet, 3, fallbackCubeView, fallbackSampler); + writer.writeTexture(globalSet, 4, fallbackCubeView, fallbackSampler); + writer.flush(); + writer.reset(descriptorMgr->getDevice()); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Global), globalSet, {}); + + // Material set: All 6 MSAA textures in binding 1 array (elements 0-5) + // [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=depth + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeUniformBuffer(materialSet, 0, fallbackBuf, 0, fallbackBufSize); + writer.writeUniformBuffer(materialSet, 2, fallbackBuf, 0, fallbackBufSize); + writer.writeStorageBuffer(materialSet, 3, fallbackBuf, 0, fallbackBufSize); + + // Build texture array: elements 0-5 are MSAA textures, 6-15 are fallback + vk::Sampler nearestSampler = texMgr->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + + std::array texImages; + // MSAA textures at slots 0-5 + texImages[0] = {nearestSampler, pp->getMsaaColorView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[1] = {nearestSampler, pp->getMsaaPositionView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[2] = {nearestSampler, pp->getMsaaNormalView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[3] = {nearestSampler, pp->getMsaaSpecularView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[4] = {nearestSampler, pp->getMsaaEmissiveView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + texImages[5] = {nearestSampler, pp->getMsaaDepthView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + // Remaining slots must also be multisampled (validation checks ALL + // elements even though the shader only accesses 0-5). Reuse the + // MSAA color view — content doesn't matter, only sample count. + for (uint32_t slot = 6; slot < VulkanDescriptorManager::MAX_TEXTURE_BINDINGS; ++slot) { + texImages[slot] = {nearestSampler, pp->getMsaaColorView(), vk::ImageLayout::eShaderReadOnlyOptimal}; + } + writer.writeTextureArray(materialSet, 1, texImages.data(), static_cast(texImages.size())); + + // Fallback for single-sampler bindings 4-6 + writer.writeTexture(materialSet, 4, fallbackView, fallbackSampler); + writer.writeTexture(materialSet, 5, fallbackView, fallbackSampler); + writer.writeTexture(materialSet, 6, fallbackView, fallbackSampler); + writer.flush(); + writer.reset(descriptorMgr->getDevice()); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::Material), materialSet, {}); + + // PerDraw set: GenericData UBO with {samples, fov} at binding 0 + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + // Write resolve data to per-frame UBO slot + struct MsaaResolveData { + int samples; + float fov; + } resolveData; + resolveData.samples = Cmdline_msaa_enabled; + resolveData.fov = g3_get_hfov(Proj_fov); + + uint32_t frame = bufferMgr->getCurrentFrame(); + uint32_t slotOffset = frame * 256; + memcpy(static_cast(pp->getMsaaResolveUBOMapped()) + slotOffset, + &resolveData, sizeof(resolveData)); + + writer.writeUniformBuffer(perDrawSet, 0, + pp->getMsaaResolveUBO(), slotOffset, 256); + + // Fallback for remaining PerDraw UBO bindings (1-4) + for (uint32_t b = 1; b <= 4; ++b) { + writer.writeUniformBuffer(perDrawSet, b, fallbackBuf, 0, fallbackBufSize); + } + writer.flush(); + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, + pipelineMgr->getPipelineLayout(), + static_cast(DescriptorSetIndex::PerDraw), perDrawSet, {}); + + cmd.draw(3, 1, 0, 0); + } + + cmd.endRenderPass(); + } + + // Transition MSAA images back to their resting layout (eColorAttachmentOptimal / + // eDepthStencilAttachmentOptimal) so they match the validation layer's global + // tracking state for the next frame. The post-G-buffer barriers moved them to + // eShaderReadOnlyOptimal for the resolve pass; now we restore them. + { + std::array restoreBarriers; + + vk::Image msaaImages[5] = { + pp->getMsaaColorImage(), + pp->getMsaaPositionImage(), + pp->getMsaaNormalImage(), + pp->getMsaaSpecularImage(), + pp->getMsaaEmissiveImage(), + }; + for (int i = 0; i < 5; ++i) { + restoreBarriers[i].srcAccessMask = vk::AccessFlagBits::eShaderRead; + restoreBarriers[i].dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + restoreBarriers[i].oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + restoreBarriers[i].newLayout = vk::ImageLayout::eColorAttachmentOptimal; + restoreBarriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + restoreBarriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + restoreBarriers[i].image = msaaImages[i]; + restoreBarriers[i].subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + } + + restoreBarriers[5].srcAccessMask = vk::AccessFlagBits::eShaderRead; + restoreBarriers[5].dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + restoreBarriers[5].oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + restoreBarriers[5].newLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + restoreBarriers[5].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + restoreBarriers[5].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + restoreBarriers[5].image = pp->getMsaaDepthImage(); + restoreBarriers[5].subresourceRange = {vk::ImageAspectFlagBits::eDepth, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests, + {}, nullptr, nullptr, restoreBarriers); + } + + // After resolve, the non-MSAA G-buffer has properly resolved data. + // Color attachments 0-4 are in eShaderReadOnlyOptimal (from resolve pass finalLayout). + // Depth is in eDepthStencilAttachmentOptimal. + // Subsequent deferred_lighting_end/finish operate on the non-MSAA G-buffer unchanged. + + // Transition scene color from eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (deferred_lighting_end resumes the non-MSAA gbuf pass and needs scene color writable) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Composite is not part of the resolve framebuffer, so its layout is + // indeterminate (UNDEFINED on first frame, eTransferSrcOptimal from + // previous frame's composite→scene copy, etc.). Use oldLayout=eUndefined + // to transition it regardless of current state — content will be fully + // overwritten by emissive→composite copy in deferred_lighting_finish(). + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = {}; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getGbufCompositeImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for resume + // (all now in eShaderReadOnlyOptimal: 1-4 from resolve finalLayout, 5 from above) + pp->transitionGbufForResume(cmd); + + // Resume the non-MSAA G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getGbufRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::GBUF_COLOR_ATTACHMENT_COUNT); + } +} + +void vulkan_deferred_lighting_end() +{ + if (!Deferred_lighting) { + return; + } + + Deferred_lighting = false; + + // After this, rendering goes back to writing only attachment 0. + // The pipeline still has 6 blend states (matching the G-buffer render pass) + // but the shader only outputs to location 0. Attachments 1-5 are untouched. +} + +void vulkan_deferred_lighting_finish() +{ + if (!light_deferred_enabled()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp || !pp->isGbufInitialized()) { + return; + } + + auto* renderer = getRendererInstance(); + if (!renderer->isSceneRendering()) { + return; + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // 1. End G-buffer render pass + // All 6 color attachments → eShaderReadOnlyOptimal + // Depth → eDepthStencilAttachmentOptimal + cmd.endRenderPass(); + + // 2. Copy emissive → composite (the emissive data becomes the base for light accumulation) + // Emissive → eShaderReadOnlyOptimal (done), composite → eColorAttachmentOptimal (for light accum) + copyImageToImage(cmd, + pp->getGbufEmissiveImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + pp->getGbufCompositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->getSceneExtent()); + + // 3. Render deferred lights (begins + ends light accum render pass internally) + // After this, composite is in eShaderReadOnlyOptimal + pp->renderDeferredLights(cmd); + + // 4. Fog rendering (between light accumulation and forward rendering) + // Matches OpenGL flow in opengl_deferred_lighting_finish() + bool bDrawFullNeb = The_mission.flags[Mission::Mission_Flags::Fullneb] + && Neb2_render_mode != NEB2_RENDER_NONE && !s_vulkanOverrideFog; + bool bDrawNebVolumetrics = The_mission.volumetrics + && The_mission.volumetrics->get_enabled() && !s_vulkanOverrideFog; + + bool fogRendered = false; + if (bDrawFullNeb) { + // Scene fog reads composite + depth → writes scene color + pp->renderSceneFog(cmd); + fogRendered = true; + + if (bDrawNebVolumetrics) { + // Copy scene color → composite so volumetric reads the fogged result + copyImageToImage(cmd, + pp->getSceneColorImage(), vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->getGbufCompositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + pp->getSceneExtent()); + } + } + if (bDrawNebVolumetrics) { + // Volumetric fog reads composite + emissive + depth + 3D volumes → writes scene color + pp->renderVolumetricFog(cmd); + fogRendered = true; + } + + if (!fogRendered) { + // No fog — copy composite → scene color (existing behavior) + // Skip src post-barrier (composite not used again in this path) + copyImageToImage(cmd, + pp->getGbufCompositeImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eTransferSrcOptimal, + pp->getSceneColorImage(), vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + pp->getSceneExtent()); + } + + // 5. Switch to scene render pass for forward transparent objects + // After light accumulation, use the 2-attachment scene render pass instead + // of the 6-attachment G-buffer pass. Forward-rendered transparent objects + // only write to fragOut0 — using the G-buffer pass would leave undefined + // values at attachment locations 1-5. + renderer->setUseGbufRenderPass(false); + stateTracker->setColorAttachmentCount(1); + + // Resume scene render pass (loadOp=eLoad) with depth preserved + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getSceneRenderPassLoad(); + rpBegin.framebuffer = pp->getSceneFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getSceneRenderPassLoad(), 0); + } +} + +void vulkan_override_fog(bool set_override) { + s_vulkanOverrideFog = set_override; +} + +// ========== Shadow Map Rendering ========== + +} // namespace vulkan +} // namespace graphics + +extern bool Glowpoint_override; +extern bool gr_htl_projection_matrix_set; + +namespace graphics { +namespace vulkan { + +namespace { +static bool Glowpoint_override_save = false; +} // anonymous namespace + +void vulkan_shadow_map_start(matrix4* shadow_view_matrix, const matrix* light_matrix, vec3d* eye_pos) +{ + if (Shadow_quality == ShadowQuality::Disabled || !getRendererInstance()->supportsShaderViewportLayerOutput()) { + return; + } + + // Shadows require the G-buffer render pass (deferred lighting). + // In contexts without deferred lighting (e.g. tech room), the active + // render pass is the swap chain or 2-attachment scene pass — ending it + // and resuming the G-buffer pass would break rendering. + if (!getRendererInstance()->isUsingGbufRenderPass()) { + return; + } + + auto* pp = getPostProcessor(); + if (!pp) { + return; + } + + // Lazy-init shadow resources + if (!pp->isShadowInitialized()) { + if (!pp->initShadowPass()) { + return; + } + } + + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the current G-buffer render pass + cmd.endRenderPass(); + + // Shadow render pass is always non-MSAA (1x sample count) + stateTracker->setCurrentSampleCount(vk::SampleCountFlagBits::e1); + + // Begin shadow render pass (eClear for both color and depth) + { + int shadowSize = pp->getShadowTextureSize(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getShadowRenderPass(); + rpBegin.framebuffer = pp->getShadowFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = vk::Extent2D(static_cast(shadowSize), static_cast(shadowSize)); + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getShadowRenderPass(), 0); + stateTracker->setColorAttachmentCount(1); + } + + // Set viewport and scissor to shadow texture size + { + int shadowSize = pp->getShadowTextureSize(); + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(shadowSize); + viewport.height = static_cast(shadowSize); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = vk::Extent2D(static_cast(shadowSize), static_cast(shadowSize)); + cmd.setScissor(0, scissor); + } + + Rendering_to_shadow_map = true; + Glowpoint_override_save = Glowpoint_override; + Glowpoint_override = true; + + gr_htl_projection_matrix_set = true; + + gr_set_view_matrix(eye_pos, light_matrix); + + *shadow_view_matrix = gr_view_matrix; +} + +void vulkan_shadow_map_end() +{ + if (!Rendering_to_shadow_map) { + return; + } + + auto* pp = getPostProcessor(); + auto* stateTracker = getStateTracker(); + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + gr_end_view_matrix(); + Rendering_to_shadow_map = false; + + gr_zbuffer_set(ZBUFFER_TYPE_FULL); + + Glowpoint_override = Glowpoint_override_save; + gr_htl_projection_matrix_set = false; + + // End shadow render pass (color transitions to eShaderReadOnlyOptimal via finalLayout) + cmd.endRenderPass(); + + const bool msaaActive = (Cmdline_msaa_enabled > 0 && pp->isMsaaInitialized()); + + if (msaaActive) { + // Resume MSAA G-buffer render pass + pp->transitionMsaaGbufForResume(cmd); + + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getMsaaGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getMsaaGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + std::array clearValues{}; + clearValues[5].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getMsaaGbufRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::MSAA_COLOR_ATTACHMENT_COUNT); + stateTracker->setCurrentSampleCount(getRendererInstance()->getMsaaSampleCount()); + } else { + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (Scene color was in eShaderReadOnlyOptimal from ending G-buffer pass before shadow start) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for resume + pp->transitionGbufForResume(cmd); + + // Resume G-buffer render pass with eLoad + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getGbufRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::GBUF_COLOR_ATTACHMENT_COUNT); + } + + // Restore viewport and scissor to scene size + { + vk::Viewport viewport; + viewport.x = static_cast(gr_screen.offset_x); + viewport.y = static_cast(gr_screen.offset_y); + viewport.width = static_cast(gr_screen.clip_width); + viewport.height = static_cast(gr_screen.clip_height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(gr_screen.offset_x, gr_screen.offset_y); + scissor.extent = vk::Extent2D(static_cast(gr_screen.clip_width), static_cast(gr_screen.clip_height)); + cmd.setScissor(0, scissor); + } +} + +// ========== Decal Pass ========== + +void vulkan_start_decal_pass() +{ + auto* renderer = getRendererInstance(); + auto* pp = getPostProcessor(); + auto* stateTracker = getStateTracker(); + + if (!renderer->isSceneRendering() || !pp || !pp->isGbufInitialized()) { + return; + } + + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the G-buffer render pass (transitions all color attachments to eShaderReadOnlyOptimal) + cmd.endRenderPass(); + + // Copy scene depth → samplable depth copy (for fragment depth reconstruction) + pp->copySceneDepth(cmd); + + // Copy G-buffer normal → samplable normal copy (for angle rejection) + pp->copyGbufNormal(cmd); + + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Transition G-buffer attachments 1-5 for render pass resume + pp->transitionGbufForResume(cmd); + + // Resume G-buffer render pass with eLoad + { + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getGbufRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::GBUF_COLOR_ATTACHMENT_COUNT); + } + + // Restore viewport (Y-flipped for Vulkan scene rendering) + auto extent = pp->getSceneExtent(); + stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); +} + +void vulkan_stop_decal_pass() +{ + // No-op — decals draw within the resumed G-buffer render pass +} + +void vulkan_render_decals(decal_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int num_elements, + const indexed_vertex_source& buffers, + const gr_buffer_handle& instance_buffer, + int num_instances) +{ + if (!material_info || !layout || num_instances <= 0) { + return; + } + + auto* stateTracker = getStateTracker(); + auto* pipelineManager = getPipelineManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + auto* drawManager = getDrawManager(); + auto* texManager = getTextureManager(); + auto* pp = getPostProcessor(); + + // Set up matrices + gr_matrix_set_uniforms(); + + // Build pipeline config for decal rendering + PipelineConfig config; + config.shaderType = SDR_TYPE_DECAL; + config.primitiveType = prim_type; + config.depthMode = material_info->get_depth_mode(); + config.depthWriteEnabled = false; + config.cullEnabled = false; + config.frontFaceCW = false; + config.blendMode = material_info->get_blend_mode(); + config.renderPass = stateTracker->getCurrentRenderPass(); + config.colorAttachmentCount = stateTracker->getColorAttachmentCount(); + + // Per-attachment blend: active attachments (0=color, 2=normal, 4=emissive) get + // the material's blend mode with RGB-only write mask. Inactive attachments get + // write mask = 0 to avoid corrupting G-buffer data. + config.perAttachmentBlendEnabled = true; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + config.attachmentBlends[i].blendMode = ALPHA_BLEND_NONE; + config.attachmentBlends[i].writeMask = {false, false, false, false}; + } + // Attachment 0: color/diffuse — use material blend mode 0 + config.attachmentBlends[0].blendMode = material_info->get_blend_mode(0); + config.attachmentBlends[0].writeMask = {true, true, true, false}; + // Attachment 2: normal — always additive + config.attachmentBlends[2].blendMode = ALPHA_BLEND_ADDITIVE; + config.attachmentBlends[2].writeMask = {true, true, true, false}; + // Attachment 4: emissive — use material blend mode 2 + config.attachmentBlends[4].blendMode = material_info->get_blend_mode(2); + config.attachmentBlends[4].writeMask = {true, true, true, false}; + + // Get or create pipeline + vk::Pipeline pipeline = pipelineManager->getPipeline(config, *layout); + if (!pipeline) { + mprintf(("vulkan_render_decals: Failed to get pipeline!\n")); + return; + } + + stateTracker->bindPipeline(pipeline, pipelineManager->getPipelineLayout()); + + // Get fallback resources + vk::Buffer fallbackUBO = bufferManager->getFallbackUniformBuffer(); + vk::DeviceSize fallbackUBOSize = static_cast(bufferManager->getFallbackUniformBufferSize()); + vk::Sampler fallbackSampler = texManager->getDefaultSampler(); + vk::ImageView fallbackView = texManager->getFallback2DArrayView(); + vk::ImageView fallbackView2D = texManager->getFallbackTextureView2D(); + + // Helper: write real pending UBO or fallback + auto writeUBOOrFallback = [&](DescriptorWriter& w, vk::DescriptorSet set, + uint32_t binding, size_t blockIdx) { + const auto& pending = drawManager->getPendingUniformBinding(blockIdx); + if (pending.valid) { + vk::Buffer buf = bufferManager->getVkBuffer(pending.bufferHandle); + if (buf) { + w.writeUniformBuffer(set, binding, buf, pending.offset, pending.size); + return; + } + } + w.writeUniformBuffer(set, binding, fallbackUBO, 0, fallbackUBOSize); + }; + + DescriptorWriter writer; + writer.reset(descManager->getDevice()); + + // Set 0: Global + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeUniformBuffer(globalSet, 0, fallbackUBO, 0, fallbackUBOSize); + writer.writeUniformBuffer(globalSet, 1, fallbackUBO, 0, fallbackUBOSize); + writer.writeTexture(globalSet, 2, fallbackView, fallbackSampler); + vk::ImageView fallbackCubeView = texManager->getFallbackCubeView(); + writer.writeTexture(globalSet, 3, fallbackCubeView, fallbackSampler); + writer.writeTexture(globalSet, 4, fallbackCubeView, fallbackSampler); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Global, globalSet); + + // Set 1: Material + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeUniformBuffer(materialSet, 0, fallbackUBO, 0, fallbackUBOSize); + writer.writeStorageBuffer(materialSet, 3, fallbackUBO, 0, fallbackUBOSize); + + // Binding 1: decal textures (diffuse, glow, normal as texture array) + drawManager->bindMaterialTextures(material_info, materialSet, &writer); + + // Binding 2: DecalGlobals UBO + writeUBOOrFallback(writer, materialSet, 2, + static_cast(uniform_block_type::DecalGlobals)); + + // Binding 4: scene depth copy (for fragment depth reconstruction) + { + vk::Sampler nearestSampler = texManager->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + vk::ImageView depthView = pp->getSceneDepthCopyView(); + if (depthView && nearestSampler) { + writer.writeTexture(materialSet, 4, depthView, nearestSampler); + } else { + writer.writeTexture(materialSet, 4, fallbackView2D, fallbackSampler); + } + } + + // Binding 5: scene color (fallback — not used by decals) + writer.writeTexture(materialSet, 5, fallbackView2D, fallbackSampler); + + // Binding 6: G-buffer normal copy (for angle rejection) + { + vk::Sampler nearestSampler = texManager->getSampler( + vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false); + vk::ImageView normalView = pp->getGbufNormalCopyView(); + if (normalView && nearestSampler) { + writer.writeTexture(materialSet, 6, normalView, nearestSampler); + } else { + writer.writeTexture(materialSet, 6, fallbackView2D, fallbackSampler); + } + } + + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + + // Set 2: PerDraw + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + // Pre-initialize all bindings with fallback, then overwrite real ones + for (uint32_t b = 0; b < 5; ++b) { + writer.writeUniformBuffer(perDrawSet, b, fallbackUBO, 0, fallbackUBOSize); + } + + // Binding 1: Matrices UBO (overwrite fallback if valid) + { + size_t idx = static_cast(uniform_block_type::Matrices); + const auto& binding = drawManager->getPendingUniformBinding(idx); + if (binding.valid) { + vk::Buffer buf = bufferManager->getVkBuffer(binding.bufferHandle); + if (buf) { + writer.writeUniformBuffer(perDrawSet, 1, buf, binding.offset, binding.size); + } + } + } + + // Binding 3: DecalInfo UBO (overwrite fallback if valid) + { + size_t idx = static_cast(uniform_block_type::DecalInfo); + const auto& binding = drawManager->getPendingUniformBinding(idx); + if (binding.valid) { + vk::Buffer buf = bufferManager->getVkBuffer(binding.bufferHandle); + if (buf) { + writer.writeUniformBuffer(perDrawSet, 3, buf, binding.offset, binding.size); + } + } + } + + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + + // Bind vertex buffers: binding 0 = box VBO, binding 1 = instance buffer + vk::Buffer boxVBO = bufferManager->getVkBuffer(buffers.Vbuffer_handle); + vk::Buffer boxIBO = bufferManager->getVkBuffer(buffers.Ibuffer_handle); + vk::Buffer instBuf = bufferManager->getVkBuffer(instance_buffer); + + if (!boxVBO || !boxIBO || !instBuf) { + mprintf(("vulkan_render_decals: Missing buffer(s)!\n")); + return; + } + + stateTracker->bindVertexBuffer(0, boxVBO, 0); + + // Instance buffer needs frame base offset for streaming buffers + size_t instFrameOffset = bufferManager->getFrameBaseOffset(instance_buffer); + stateTracker->bindVertexBuffer(1, instBuf, static_cast(instFrameOffset)); + + stateTracker->bindIndexBuffer(boxIBO, 0, vk::IndexType::eUint32); + + // Flush dynamic state and draw + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed( + static_cast(num_elements), // index count + static_cast(num_instances), // instance count + 0, // first index + 0, // vertex offset + 0 // first instance + ); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDeferred.h b/code/graphics/vulkan/VulkanDeferred.h new file mode 100644 index 00000000000..c3731308bff --- /dev/null +++ b/code/graphics/vulkan/VulkanDeferred.h @@ -0,0 +1,39 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +struct matrix; +struct matrix4; +struct vec3d; + +namespace graphics { +namespace vulkan { + +// Deferred lighting pipeline entry points (gr_screen.gf_* implementations) +void vulkan_deferred_lighting_begin(bool clearNonColorBufs); +void vulkan_deferred_lighting_msaa(); +void vulkan_deferred_lighting_end(); +void vulkan_deferred_lighting_finish(); + +// Fog control +void vulkan_override_fog(bool set_override); + +// Shadow map rendering +void vulkan_shadow_map_start(matrix4* shadow_view_matrix, const matrix* light_matrix, vec3d* eye_pos); +void vulkan_shadow_map_end(); + +// Decal pass +void vulkan_start_decal_pass(); +void vulkan_stop_decal_pass(); +void vulkan_render_decals(decal_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int num_elements, + const indexed_vertex_source& buffers, + const gr_buffer_handle& instance_buffer, + int num_instances); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDeletionQueue.cpp b/code/graphics/vulkan/VulkanDeletionQueue.cpp new file mode 100644 index 00000000000..daec3b4cb78 --- /dev/null +++ b/code/graphics/vulkan/VulkanDeletionQueue.cpp @@ -0,0 +1,195 @@ + +#include "VulkanDeletionQueue.h" + +namespace graphics { +namespace vulkan { + +namespace { +VulkanDeletionQueue* g_deletionQueue = nullptr; +} + +VulkanDeletionQueue* getDeletionQueue() +{ + Assertion(g_deletionQueue != nullptr, "Vulkan DeletionQueue not initialized!"); + return g_deletionQueue; +} + +void setDeletionQueue(VulkanDeletionQueue* queue) +{ + g_deletionQueue = queue; +} + +VulkanDeletionQueue::~VulkanDeletionQueue() +{ + shutdown(); +} + +void VulkanDeletionQueue::init(vk::Device device, VulkanMemoryManager* memoryManager) +{ + m_device = device; + m_memoryManager = memoryManager; + m_initialized = true; +} + +void VulkanDeletionQueue::shutdown() +{ + if (!m_initialized) { + return; + } + + flushAll(); + m_initialized = false; +} + +void VulkanDeletionQueue::queueBuffer(vk::Buffer buffer, VulkanAllocation allocation) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueBuffer called before initialization!"); + if (!buffer) { + return; + } + + PendingDestruction pending; + pending.resource = PendingBuffer{buffer, allocation}; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueImage(vk::Image image, VulkanAllocation allocation) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueImage called before initialization!"); + if (!image) { + return; + } + + PendingDestruction pending; + pending.resource = PendingImage{image, allocation}; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueImageView(vk::ImageView imageView) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueImageView called before initialization!"); + if (!imageView) { + return; + } + + PendingDestruction pending; + pending.resource = imageView; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueFramebuffer(vk::Framebuffer framebuffer) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueFramebuffer called before initialization!"); + if (!framebuffer) { + return; + } + + PendingDestruction pending; + pending.resource = framebuffer; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueRenderPass(vk::RenderPass renderPass) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueRenderPass called before initialization!"); + if (!renderPass) { + return; + } + + PendingDestruction pending; + pending.resource = renderPass; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::queueSampler(vk::Sampler sampler) +{ + Assertion(m_initialized, "VulkanDeletionQueue::queueSampler called before initialization!"); + if (!sampler) { + return; + } + + PendingDestruction pending; + pending.resource = sampler; + pending.framesRemaining = FRAMES_TO_WAIT; + m_pendingDestructions.push_back(pending); +} + +void VulkanDeletionQueue::processDestructions() +{ + Assertion(m_initialized, "VulkanDeletionQueue::processDestructions called before initialization!"); + if (m_pendingDestructions.empty()) { + return; + } + + auto it = m_pendingDestructions.begin(); + while (it != m_pendingDestructions.end()) { + if (it->framesRemaining > 0) { + it->framesRemaining--; + ++it; + } else { + destroyResource(it->resource); + it = m_pendingDestructions.erase(it); + } + } +} + +void VulkanDeletionQueue::flushAll() +{ + if (!m_initialized) { + return; + } + + for (const auto& pending : m_pendingDestructions) { + destroyResource(pending.resource); + } + m_pendingDestructions.clear(); +} + +void VulkanDeletionQueue::destroyResource(const PendingResource& resource) +{ + std::visit([this](auto&& res) -> void { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + if (res.buffer) { + m_device.destroyBuffer(res.buffer); + } + if (res.allocation.memory != VK_NULL_HANDLE && m_memoryManager) { + VulkanAllocation alloc = res.allocation; // Copy for non-const ref + m_memoryManager->freeAllocation(alloc); + } + } else if constexpr (std::is_same_v) { + if (res.image) { + m_device.destroyImage(res.image); + } + if (res.allocation.memory != VK_NULL_HANDLE && m_memoryManager) { + VulkanAllocation alloc = res.allocation; // Copy for non-const ref + m_memoryManager->freeAllocation(alloc); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyImageView(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyFramebuffer(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroyRenderPass(res); + } + } else if constexpr (std::is_same_v) { + if (res) { + m_device.destroySampler(res); + } + } + }, resource); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDeletionQueue.h b/code/graphics/vulkan/VulkanDeletionQueue.h new file mode 100644 index 00000000000..76b44c4a64d --- /dev/null +++ b/code/graphics/vulkan/VulkanDeletionQueue.h @@ -0,0 +1,118 @@ + +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanConstants.h" +#include "VulkanMemory.h" + +#include +#include +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Unified deferred resource deletion queue for Vulkan + * + * Resources that may still be referenced by in-flight command buffers are + * queued here instead of being destroyed immediately. After waiting the + * configured number of frames, they are safely destroyed. + * + * This prevents validation errors like "can't be called on VkImageView that + * is currently in use by VkDescriptorSet". + */ +class VulkanDeletionQueue { +public: + static constexpr uint32_t FRAMES_TO_WAIT = MAX_FRAMES_IN_FLIGHT; + + VulkanDeletionQueue() = default; + ~VulkanDeletionQueue(); + + void init(vk::Device device, VulkanMemoryManager* memoryManager); + void shutdown(); + + /** + * @brief Queue a buffer for deferred destruction + */ + void queueBuffer(vk::Buffer buffer, VulkanAllocation allocation); + + /** + * @brief Queue an image for deferred destruction + */ + void queueImage(vk::Image image, VulkanAllocation allocation); + + /** + * @brief Queue an image view for deferred destruction + */ + void queueImageView(vk::ImageView imageView); + + /** + * @brief Queue a framebuffer for deferred destruction + */ + void queueFramebuffer(vk::Framebuffer framebuffer); + + /** + * @brief Queue a render pass for deferred destruction + */ + void queueRenderPass(vk::RenderPass renderPass); + + /** + * @brief Queue a sampler for deferred destruction + */ + void queueSampler(vk::Sampler sampler); + + /** + * @brief Process pending destructions - call once per frame + * + * Decrements frame counters and destroys resources that have waited + * enough frames. + */ + void processDestructions(); + + /** + * @brief Flush all pending destructions immediately + * + * Used during shutdown when we know the device is idle. + */ + void flushAll(); + +private: + struct PendingBuffer { + vk::Buffer buffer; + VulkanAllocation allocation; + }; + + struct PendingImage { + vk::Image image; + VulkanAllocation allocation; + }; + + using PendingResource = std::variant< + PendingBuffer, + PendingImage, + vk::ImageView, + vk::Framebuffer, + vk::RenderPass, + vk::Sampler + >; + + struct PendingDestruction { + PendingResource resource; + uint32_t framesRemaining; + }; + + void destroyResource(const PendingResource& resource); + + vk::Device m_device; + VulkanMemoryManager* m_memoryManager = nullptr; + SCP_vector m_pendingDestructions; + bool m_initialized = false; +}; + +// Global deletion queue instance +VulkanDeletionQueue* getDeletionQueue(); +void setDeletionQueue(VulkanDeletionQueue* queue); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDescriptorManager.cpp b/code/graphics/vulkan/VulkanDescriptorManager.cpp new file mode 100644 index 00000000000..177b2d40608 --- /dev/null +++ b/code/graphics/vulkan/VulkanDescriptorManager.cpp @@ -0,0 +1,355 @@ +#include "VulkanDescriptorManager.h" + +namespace graphics { +namespace vulkan { + +// Global descriptor manager pointer +static VulkanDescriptorManager* g_descriptorManager = nullptr; + +VulkanDescriptorManager* getDescriptorManager() +{ + Assertion(g_descriptorManager != nullptr, "Vulkan DescriptorManager not initialized!"); + return g_descriptorManager; +} + +void setDescriptorManager(VulkanDescriptorManager* manager) +{ + g_descriptorManager = manager; +} + +bool VulkanDescriptorManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + createSetLayouts(); + createDescriptorPools(); + + m_initialized = true; + mprintf(("VulkanDescriptorManager: Initialized\n")); + return true; +} + +void VulkanDescriptorManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Wait for device idle before destroying + m_device.waitIdle(); + + // Destroy pools (automatically frees allocated sets) + for (auto& poolChain : m_framePools) { + poolChain.clear(); + } + + // Destroy layouts + for (auto& layout : m_setLayouts) { + layout.reset(); + } + + m_initialized = false; + mprintf(("VulkanDescriptorManager: Shutdown complete\n")); +} + +vk::DescriptorSetLayout VulkanDescriptorManager::getSetLayout(DescriptorSetIndex setIndex) const +{ + return m_setLayouts[static_cast(setIndex)].get(); +} + +SCP_vector VulkanDescriptorManager::getAllSetLayouts() const +{ + SCP_vector layouts; + layouts.reserve(static_cast(DescriptorSetIndex::Count)); + + for (const auto& layout : m_setLayouts) { + layouts.push_back(layout.get()); + } + + return layouts; +} + +vk::DescriptorSet VulkanDescriptorManager::allocateFrameSet(DescriptorSetIndex setIndex) +{ + if (!m_initialized) { + return {}; + } + + vk::DescriptorSetLayout layout = m_setLayouts[static_cast(setIndex)].get(); + auto& pools = m_framePools[m_currentFrame]; + + // Try allocating from the last pool in the list + if (!pools.empty()) { + vk::DescriptorSetAllocateInfo allocInfo; + allocInfo.descriptorPool = pools.back().get(); + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout; + + try { + auto sets = m_device.allocateDescriptorSets(allocInfo); + return sets[0]; + } catch (const vk::OutOfPoolMemoryError&) { + // Pool exhausted, fall through to create a new one + } catch (const vk::FragmentedPoolError&) { + // Pool fragmented, fall through to create a new one + } + } + + // Create a new pool and retry + pools.push_back(createFramePool()); + mprintf(("VulkanDescriptorManager: Grew frame %u pool count to %zu\n", + m_currentFrame, pools.size())); + + vk::DescriptorSetAllocateInfo allocInfo; + allocInfo.descriptorPool = pools.back().get(); + allocInfo.descriptorSetCount = 1; + allocInfo.pSetLayouts = &layout; + + try { + auto sets = m_device.allocateDescriptorSets(allocInfo); + return sets[0]; + } catch (const vk::SystemError& e) { + mprintf(("VulkanDescriptorManager: Failed to allocate frame descriptor set after pool growth: %s\n", e.what())); + return {}; + } +} + +void VulkanDescriptorManager::beginFrame() +{ + if (!m_initialized) { + return; + } + + auto& pools = m_framePools[m_currentFrame]; + + // Reset all pools for the current frame + for (auto& pool : pools) { + m_device.resetDescriptorPool(pool.get()); + } + + // If we grew beyond the initial pool, shrink back to 1 to reclaim memory + // (the single pool will grow again next frame if needed) + if (pools.size() > 1) { + vk::UniqueDescriptorPool first = std::move(pools[0]); + pools.clear(); + pools.push_back(std::move(first)); + } +} + +void VulkanDescriptorManager::endFrame() +{ + // Advance to next frame + m_currentFrame = (m_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; +} + +bool VulkanDescriptorManager::getUniformBlockBinding(uniform_block_type blockType, + DescriptorSetIndex& setIndex, uint32_t& binding) +{ + // Map uniform_block_type to descriptor set and binding + // Based on the descriptor layout design in the plan + switch (blockType) { + case uniform_block_type::Lights: + setIndex = DescriptorSetIndex::Global; + binding = 0; + return true; + + case uniform_block_type::DeferredGlobals: + setIndex = DescriptorSetIndex::Global; + binding = 1; + return true; + + case uniform_block_type::ModelData: + setIndex = DescriptorSetIndex::Material; + binding = 0; + return true; + + case uniform_block_type::DecalGlobals: + setIndex = DescriptorSetIndex::Material; + binding = 2; + return true; + + case uniform_block_type::GenericData: + setIndex = DescriptorSetIndex::PerDraw; + binding = 0; + return true; + + case uniform_block_type::Matrices: + setIndex = DescriptorSetIndex::PerDraw; + binding = 1; + return true; + + case uniform_block_type::NanoVGData: + setIndex = DescriptorSetIndex::PerDraw; + binding = 2; + return true; + + case uniform_block_type::DecalInfo: + setIndex = DescriptorSetIndex::PerDraw; + binding = 3; + return true; + + case uniform_block_type::MovieData: + setIndex = DescriptorSetIndex::PerDraw; + binding = 4; + return true; + + default: + return false; + } +} + +void VulkanDescriptorManager::createSetLayouts() +{ + // Set 0: Global (per-frame data) + // NOTE: Using regular UBOs for now; dynamic UBOs need offset tracking + { + SCP_vector bindings = { + // Binding 0: Lights UBO + { 0, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 1: DeferredGlobals UBO + { 1, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 2: Shadow map texture + { 2, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + + // Binding 3: Environment map (samplerCube) + { 3, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + + // Binding 4: Irradiance map (samplerCube) + { 4, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + }; + m_setLayouts[static_cast(DescriptorSetIndex::Global)] = createSetLayout(bindings); + } + + // Set 1: Material (per-batch data) + { + SCP_vector bindings = { + // Binding 0: ModelData UBO + { 0, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 1: Texture array (diffuse, glow, spec, normal, ambient, misc, etc.) + { 1, vk::DescriptorType::eCombinedImageSampler, MAX_TEXTURE_BINDINGS, + vk::ShaderStageFlagBits::eFragment }, + + // Binding 2: DecalGlobals UBO + { 2, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 3: Transform buffer SSBO (for batched submodel transforms) + { 3, vk::DescriptorType::eStorageBuffer, 1, + vk::ShaderStageFlagBits::eVertex }, + + // Binding 4: Depth map (sampler2D for soft particles) + { 4, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + + // Binding 5: Scene color / frameBuffer (distortion effects) + { 5, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + + // Binding 6: Distortion map (distortion effects) + { 6, vk::DescriptorType::eCombinedImageSampler, 1, + vk::ShaderStageFlagBits::eFragment }, + }; + m_setLayouts[static_cast(DescriptorSetIndex::Material)] = createSetLayout(bindings); + } + + // Set 2: Per-Draw (per-draw-call data) + { + SCP_vector bindings = { + // Binding 0: GenericData UBO + { 0, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 1: Matrices UBO + { 1, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 2: NanoVGData UBO + { 2, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 3: DecalInfo UBO + { 3, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment }, + + // Binding 4: MovieData UBO + { 4, vk::DescriptorType::eUniformBuffer, 1, + vk::ShaderStageFlagBits::eFragment }, + }; + m_setLayouts[static_cast(DescriptorSetIndex::PerDraw)] = createSetLayout(bindings); + } + + mprintf(("VulkanDescriptorManager: Created %zu descriptor set layouts\n", + static_cast(DescriptorSetIndex::Count))); +} + +vk::UniqueDescriptorPool VulkanDescriptorManager::createFramePool() +{ + // Pool sizes per chunk - supports ~330 draw calls (3 sets each) + // If more are needed, additional pools are created automatically + constexpr uint32_t MAX_SETS_PER_POOL = 1024; + constexpr uint32_t MAX_UNIFORM_BUFFERS = MAX_SETS_PER_POOL * 9; // up to 9 UBOs per draw + constexpr uint32_t MAX_SAMPLERS = MAX_SETS_PER_POOL * 16; // up to 16 samplers per material set + + SCP_vector poolSizes = { + { vk::DescriptorType::eUniformBuffer, MAX_UNIFORM_BUFFERS }, + { vk::DescriptorType::eCombinedImageSampler, MAX_SAMPLERS }, + { vk::DescriptorType::eStorageBuffer, MAX_SETS_PER_POOL }, + }; + + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.maxSets = MAX_SETS_PER_POOL; + poolInfo.poolSizeCount = static_cast(poolSizes.size()); + poolInfo.pPoolSizes = poolSizes.data(); + + return m_device.createDescriptorPoolUnique(poolInfo); +} + +void VulkanDescriptorManager::createDescriptorPools() +{ + // Create one initial pool per frame (more will be added on demand) + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + m_framePools[i].push_back(createFramePool()); + } + + mprintf(("VulkanDescriptorManager: Created %u frame pool chains\n", + MAX_FRAMES_IN_FLIGHT)); +} + +vk::UniqueDescriptorSetLayout VulkanDescriptorManager::createSetLayout( + const SCP_vector& bindings) +{ + SCP_vector vkBindings; + vkBindings.reserve(bindings.size()); + + for (const auto& info : bindings) { + vk::DescriptorSetLayoutBinding binding; + binding.binding = info.binding; + binding.descriptorType = info.type; + binding.descriptorCount = info.count; + binding.stageFlags = info.stages; + binding.pImmutableSamplers = nullptr; + vkBindings.push_back(binding); + } + + vk::DescriptorSetLayoutCreateInfo layoutInfo; + layoutInfo.bindingCount = static_cast(vkBindings.size()); + layoutInfo.pBindings = vkBindings.data(); + + return m_device.createDescriptorSetLayoutUnique(layoutInfo); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDescriptorManager.h b/code/graphics/vulkan/VulkanDescriptorManager.h new file mode 100644 index 00000000000..ccda398ce3b --- /dev/null +++ b/code/graphics/vulkan/VulkanDescriptorManager.h @@ -0,0 +1,265 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "VulkanConstants.h" + +#include +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Stack-allocated batch writer for descriptor set updates. + * + * Accumulates WriteDescriptorSet entries with stable backing storage, + * then submits them all in a single vkUpdateDescriptorSets call. + * All storage is on the stack — no heap allocations. + */ +class DescriptorWriter { +public: + static constexpr uint32_t MAX_WRITES = 32; + static constexpr uint32_t MAX_BUFFER_INFOS = 20; + static constexpr uint32_t MAX_IMAGE_INFOS = 24; + + void reset(vk::Device device) { + m_device = device; + m_writeCount = 0; + m_bufferInfoCount = 0; + m_imageInfoCount = 0; + } + + void writeUniformBuffer(vk::DescriptorSet set, uint32_t binding, + vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize range) { + Verify(buffer); + Verify(m_writeCount < MAX_WRITES && m_bufferInfoCount < MAX_BUFFER_INFOS); + auto& buf = m_bufferInfos[m_bufferInfoCount++]; + buf.buffer = buffer; + buf.offset = offset; + buf.range = range; + + auto& w = m_writes[m_writeCount++]; + w = vk::WriteDescriptorSet(); + w.dstSet = set; + w.dstBinding = binding; + w.descriptorCount = 1; + w.descriptorType = vk::DescriptorType::eUniformBuffer; + w.pBufferInfo = &buf; + } + + void writeStorageBuffer(vk::DescriptorSet set, uint32_t binding, + vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize range) { + Verify(buffer); + Verify(m_writeCount < MAX_WRITES && m_bufferInfoCount < MAX_BUFFER_INFOS); + auto& buf = m_bufferInfos[m_bufferInfoCount++]; + buf.buffer = buffer; + buf.offset = offset; + buf.range = range; + + auto& w = m_writes[m_writeCount++]; + w = vk::WriteDescriptorSet(); + w.dstSet = set; + w.dstBinding = binding; + w.descriptorCount = 1; + w.descriptorType = vk::DescriptorType::eStorageBuffer; + w.pBufferInfo = &buf; + } + + void writeTexture(vk::DescriptorSet set, uint32_t binding, + vk::ImageView imageView, vk::Sampler sampler, + vk::ImageLayout layout = vk::ImageLayout::eShaderReadOnlyOptimal) { + Verify(m_writeCount < MAX_WRITES && m_imageInfoCount < MAX_IMAGE_INFOS); + auto& img = m_imageInfos[m_imageInfoCount++]; + img.imageView = imageView; + img.sampler = sampler; + img.imageLayout = layout; + + auto& w = m_writes[m_writeCount++]; + w = vk::WriteDescriptorSet(); + w.dstSet = set; + w.dstBinding = binding; + w.descriptorCount = 1; + w.descriptorType = vk::DescriptorType::eCombinedImageSampler; + w.pImageInfo = &img; + } + + void writeTextureArray(vk::DescriptorSet set, uint32_t binding, + const vk::DescriptorImageInfo* images, uint32_t count) { + if (count == 0) { + return; + } + Verify(m_writeCount < MAX_WRITES && m_imageInfoCount + count <= MAX_IMAGE_INFOS); + auto* dst = &m_imageInfos[m_imageInfoCount]; + memcpy(dst, images, count * sizeof(vk::DescriptorImageInfo)); + m_imageInfoCount += count; + + auto& w = m_writes[m_writeCount++]; + w = vk::WriteDescriptorSet(); + w.dstSet = set; + w.dstBinding = binding; + w.descriptorCount = count; + w.descriptorType = vk::DescriptorType::eCombinedImageSampler; + w.pImageInfo = dst; + } + + void flush() { + if (m_writeCount > 0) { + m_device.updateDescriptorSets(m_writeCount, m_writes.data(), 0, nullptr); + } + m_writeCount = 0; + m_bufferInfoCount = 0; + m_imageInfoCount = 0; + } + +private: + vk::Device m_device; + std::array m_writes; + std::array m_bufferInfos; + std::array m_imageInfos; + uint32_t m_writeCount = 0; + uint32_t m_bufferInfoCount = 0; + uint32_t m_imageInfoCount = 0; +}; + +/** + * @brief Descriptor set indices for the 3-tier layout + * + * Set 0: Global - per-frame data (lights, deferred globals, shadow maps) + * Set 1: Material - per-material data (model data, textures) + * Set 2: Per-Draw - per-draw-call data (generic data, matrices, etc.) + */ +enum class DescriptorSetIndex : uint32_t { + Global = 0, + Material = 1, + PerDraw = 2, + + Count = 3 +}; + +/** + * @brief Descriptor binding info for a single binding point + */ +struct DescriptorBindingInfo { + uint32_t binding; + vk::DescriptorType type; + uint32_t count; + vk::ShaderStageFlags stages; +}; + +/** + * @brief Manages Vulkan descriptor sets, pools, and layouts + * + * Provides descriptor set allocation and update functionality. + * Uses per-frame pools for transient descriptors. + */ +class VulkanDescriptorManager { +public: + static constexpr uint32_t MAX_TEXTURE_BINDINGS = 16; // Texture array size + + VulkanDescriptorManager() = default; + ~VulkanDescriptorManager() = default; + + // Non-copyable + VulkanDescriptorManager(const VulkanDescriptorManager&) = delete; + VulkanDescriptorManager& operator=(const VulkanDescriptorManager&) = delete; + + /** + * @brief Initialize descriptor manager + * @param device Vulkan logical device + * @return true on success + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Get descriptor set layout for a given set index + */ + vk::DescriptorSetLayout getSetLayout(DescriptorSetIndex setIndex) const; + + /** + * @brief Get all descriptor set layouts (for pipeline layout creation) + * @return Vector of layouts in order (Global, Material, PerDraw) + */ + SCP_vector getAllSetLayouts() const; + + /** + * @brief Allocate a descriptor set from the per-frame pool + * @param setIndex Which set type to allocate + * @return Allocated descriptor set, or null handle on failure + */ + vk::DescriptorSet allocateFrameSet(DescriptorSetIndex setIndex); + + /** + * @brief Begin a new frame - reset current frame's pool + */ + void beginFrame(); + + /** + * @brief End current frame - advance to next pool + */ + void endFrame(); + + /** + * @brief Get current frame index + */ + uint32_t getCurrentFrame() const { return m_currentFrame; } + + /** + * @brief Get the Vulkan device (for DescriptorWriter) + */ + vk::Device getDevice() const { return m_device; } + + /** + * @brief Map uniform_block_type to descriptor set and binding + * @param blockType The uniform block type + * @param setIndex Output: which descriptor set + * @param binding Output: which binding within the set + * @return true if mapping exists + */ + static bool getUniformBlockBinding(uniform_block_type blockType, + DescriptorSetIndex& setIndex, uint32_t& binding); + +private: + /** + * @brief Create all descriptor set layouts + */ + void createSetLayouts(); + + /** + * @brief Create descriptor pools + */ + void createDescriptorPools(); + + /** + * @brief Create a single descriptor set layout + */ + vk::UniqueDescriptorSetLayout createSetLayout(const SCP_vector& bindings); + + /** + * @brief Create a new descriptor pool with standard sizes + */ + vk::UniqueDescriptorPool createFramePool(); + + vk::Device m_device; + + // Descriptor set layouts (one per set type) + std::array(DescriptorSetIndex::Count)> m_setLayouts; + + // Per-frame descriptor pools (growable - new pools added on demand) + std::array, MAX_FRAMES_IN_FLIGHT> m_framePools; + + uint32_t m_currentFrame = 0; + bool m_initialized = false; +}; + +// Global descriptor manager access +VulkanDescriptorManager* getDescriptorManager(); +void setDescriptorManager(VulkanDescriptorManager* manager); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDraw.cpp b/code/graphics/vulkan/VulkanDraw.cpp new file mode 100644 index 00000000000..0b0df77c366 --- /dev/null +++ b/code/graphics/vulkan/VulkanDraw.cpp @@ -0,0 +1,2327 @@ +#include "VulkanDraw.h" + +#include + +#include "VulkanState.h" +#include "VulkanBuffer.h" +#include "VulkanPipeline.h" +#include "VulkanShader.h" +#include "VulkanTexture.h" +#include "VulkanRenderer.h" +#include "VulkanPostProcessing.h" +#include "VulkanDescriptorManager.h" +#include "VulkanDeletionQueue.h" +#include "VulkanMemory.h" +#include "VulkanConstants.h" +#include "gr_vulkan.h" +#include "VulkanVertexFormat.h" +#include "bmpman/bmpman.h" +#include "ddsutils/ddsutils.h" +#include "graphics/grinternal.h" +#include "graphics/material.h" +#include "graphics/matrix.h" +#include "graphics/util/primitives.h" +#include "graphics/util/uniform_structs.h" +#include "lighting/lighting.h" +#include "graphics/util/UniformBuffer.h" +#include "graphics/shaders/compiled/default-material_structs.vert.h" + +namespace graphics { +namespace vulkan { + +// Texture slot mapping - material texture types to descriptor binding indices +// Binding 1 in Material set is a texture array with up to 16 textures +static constexpr uint32_t TEXTURE_BINDING_BASE_MAP = 0; +static constexpr uint32_t TEXTURE_BINDING_GLOW_MAP = 1; +static constexpr uint32_t TEXTURE_BINDING_SPEC_MAP = 2; +static constexpr uint32_t TEXTURE_BINDING_NORMAL_MAP = 3; +static constexpr uint32_t TEXTURE_BINDING_HEIGHT_MAP = 4; +static constexpr uint32_t TEXTURE_BINDING_AMBIENT_MAP = 5; +static constexpr uint32_t TEXTURE_BINDING_MISC_MAP = 6; + +// Convert FSO texture addressing mode to Vulkan sampler address mode +static vk::SamplerAddressMode convertTextureAddressing(int mode) +{ + switch (mode) { + case TMAP_ADDRESS_MIRROR: + return vk::SamplerAddressMode::eMirroredRepeat; + case TMAP_ADDRESS_CLAMP: + return vk::SamplerAddressMode::eClampToEdge; + case TMAP_ADDRESS_WRAP: + default: + return vk::SamplerAddressMode::eRepeat; + } +} + +// Global draw manager pointer +static VulkanDrawManager* g_drawManager = nullptr; + +// ========== Transform buffer for batched submodel rendering ========== +// Per-frame sub-allocating buffer. Multiple draw lists may upload transforms +// in a single frame (e.g. space view + HUD targeting). Because Vulkan defers +// command submission until flip(), each upload must be preserved — we append +// rather than overwrite, and bind the SSBO with the per-upload byte offset. + +// SSBO descriptor offsets must be aligned to minStorageBufferOffsetAlignment. +// The Vulkan spec guarantees this value is <= 256, so 256 is always safe. +static constexpr size_t SSBO_OFFSET_ALIGNMENT = 256; + +struct TransformBufferState { + vk::Buffer buffer; + VulkanAllocation allocation; + size_t capacity = 0; // allocated bytes + size_t writeOffset = 0; // append cursor (resets each frame) + size_t lastUploadOffset = 0; // byte offset of most recent upload + size_t lastUploadSize = 0; // byte size of most recent upload +}; +static TransformBufferState g_transformBuffers[MAX_FRAMES_IN_FLIGHT]; +static uint32_t g_lastTransformWriteFrame = UINT32_MAX; + +void vulkan_update_transform_buffer(void* data, size_t size) +{ + if (!data || size == 0) { + return; + } + + auto* descManager = getDescriptorManager(); + uint32_t frameIdx = descManager->getCurrentFrame(); + auto& tb = g_transformBuffers[frameIdx]; + + // Reset write cursor on first call of each frame + if (g_lastTransformWriteFrame != frameIdx) { + tb.writeOffset = 0; + g_lastTransformWriteFrame = frameIdx; + } + + // Align the write offset for SSBO descriptor binding + size_t alignedOffset = (tb.writeOffset + SSBO_OFFSET_ALIGNMENT - 1) & ~(SSBO_OFFSET_ALIGNMENT - 1); + size_t needed = alignedOffset + size; + + auto* memManager = getMemoryManager(); + + // Resize if needed, preserving data already written this frame + if (needed > tb.capacity) { + size_t newCapacity = std::max(needed * 2, static_cast(4096)); + + auto* bufferManager = getBufferManager(); + vk::Device device = bufferManager->getDevice(); + + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = static_cast(newCapacity); + bufferInfo.usage = vk::BufferUsageFlagBits::eStorageBuffer; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer newBuffer; + VulkanAllocation newAllocation; + + try { + newBuffer = device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("vulkan_update_transform_buffer: Failed to create buffer: %s\n", e.what())); + return; + } + + Verify(memManager->allocateBufferMemory(newBuffer, MemoryUsage::CpuToGpu, newAllocation)); + + // Copy data already written this frame from old buffer + if (tb.buffer && tb.writeOffset > 0) { + void* oldMapped = memManager->mapMemory(tb.allocation); + void* newMapped = memManager->mapMemory(newAllocation); + Verify(oldMapped); + Verify(newMapped); + memcpy(newMapped, oldMapped, tb.writeOffset); + memManager->unmapMemory(tb.allocation); + memManager->unmapMemory(newAllocation); + } + + // Defer destruction of old buffer + if (tb.buffer) { + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(tb.buffer, tb.allocation); + } + + tb.buffer = newBuffer; + tb.allocation = newAllocation; + tb.capacity = newCapacity; + } + + // Upload new data at the aligned offset + void* mapped = memManager->mapMemory(tb.allocation); + Verify(mapped); + memcpy(static_cast(mapped) + alignedOffset, data, size); + memManager->flushMemory(tb.allocation, alignedOffset, size); + memManager->unmapMemory(tb.allocation); + + tb.lastUploadOffset = alignedOffset; + tb.lastUploadSize = size; + tb.writeOffset = alignedOffset + size; +} + +VulkanDrawManager* getDrawManager() +{ + Assertion(g_drawManager != nullptr, "Vulkan DrawManager not initialized!"); + return g_drawManager; +} + +void setDrawManager(VulkanDrawManager* manager) +{ + g_drawManager = manager; +} + +bool VulkanDrawManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + initSphereBuffers(); + + m_initialized = true; + mprintf(("VulkanDrawManager: Initialized\n")); + return true; +} + +void VulkanDrawManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy transform SSBO buffers (static globals, not tracked by deletion queue) + auto* bufferManager = getBufferManager(); + auto* memManager = getMemoryManager(); + if (bufferManager && memManager) { + vk::Device device = bufferManager->getDevice(); + for (auto& tb : g_transformBuffers) { + if (tb.buffer) { + device.destroyBuffer(tb.buffer); + memManager->freeAllocation(tb.allocation); + tb.buffer = nullptr; + tb.capacity = 0; + tb.writeOffset = 0; + } + } + } + + shutdownSphereBuffers(); + + m_initialized = false; + mprintf(("VulkanDrawManager: Shutdown complete\n")); +} + +void VulkanDrawManager::clear() +{ + auto* stateTracker = getStateTracker(); + + // Use the current clip/scissor region for clearing, matching OpenGL behavior. + // In OpenGL, glClear() respects the scissor test - if a clip region is set, + // only that region is cleared. Without this, HUD code that does + // gr_set_clip(panel) + gr_clear() would wipe the entire screen in Vulkan. + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eColor; + clearAttachment.colorAttachment = 0; + clearAttachment.clearValue.color = stateTracker->getClearColor(); + + vk::ClearRect clearRect; + if (stateTracker->isScissorEnabled()) { + // Respect the current clip region (matches OpenGL scissor behavior) + clearRect.rect.offset = vk::Offset2D(gr_screen.offset_x + gr_screen.clip_left, + gr_screen.offset_y + gr_screen.clip_top); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.clip_width), + static_cast(gr_screen.clip_height)); + } else { + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + } + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.clearAttachments(1, &clearAttachment, 1, &clearRect); +} + +void VulkanDrawManager::setClearColor(int r, int g, int b) +{ + auto* stateTracker = getStateTracker(); + + float fr = static_cast(r) / 255.0f; + float fg = static_cast(g) / 255.0f; + float fb = static_cast(b) / 255.0f; + + // Apply HDR gamma if needed + if (High_dynamic_range) { + const float SRGB_GAMMA = 2.2f; + fr = powf(fr, SRGB_GAMMA); + fg = powf(fg, SRGB_GAMMA); + fb = powf(fb, SRGB_GAMMA); + } + + stateTracker->setClearColor(fr, fg, fb, 1.0f); + + // Also update gr_screen for compatibility + gr_screen.current_clear_color.red = static_cast(r); + gr_screen.current_clear_color.green = static_cast(g); + gr_screen.current_clear_color.blue = static_cast(b); + gr_screen.current_clear_color.alpha = 255; +} + +void VulkanDrawManager::setClip(int x, int y, int w, int h, int resize_mode) +{ + auto* stateTracker = getStateTracker(); + + // Clamp values + if (x < 0) x = 0; + if (y < 0) y = 0; + + int to_resize = (resize_mode != GR_RESIZE_NONE && resize_mode != GR_RESIZE_REPLACE && + (gr_screen.custom_size || (gr_screen.rendering_to_texture != -1))); + + int max_w = (to_resize) ? gr_screen.max_w_unscaled : gr_screen.max_w; + int max_h = (to_resize) ? gr_screen.max_h_unscaled : gr_screen.max_h; + + if ((gr_screen.rendering_to_texture != -1) && to_resize) { + gr_unsize_screen_pos(&max_w, &max_h); + } + + if (resize_mode != GR_RESIZE_REPLACE) { + if (x >= max_w) x = max_w - 1; + if (y >= max_h) y = max_h - 1; + if (x + w > max_w) w = max_w - x; + if (y + h > max_h) h = max_h - y; + if (w > max_w) w = max_w; + if (h > max_h) h = max_h; + } + + // Store unscaled values + gr_screen.offset_x_unscaled = x; + gr_screen.offset_y_unscaled = y; + gr_screen.clip_left_unscaled = 0; + gr_screen.clip_right_unscaled = w - 1; + gr_screen.clip_top_unscaled = 0; + gr_screen.clip_bottom_unscaled = h - 1; + gr_screen.clip_width_unscaled = w; + gr_screen.clip_height_unscaled = h; + + if (to_resize) { + gr_resize_screen_pos(&x, &y, &w, &h, resize_mode); + } else { + gr_unsize_screen_pos(&gr_screen.offset_x_unscaled, &gr_screen.offset_y_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_right_unscaled, &gr_screen.clip_bottom_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_width_unscaled, &gr_screen.clip_height_unscaled); + } + + // Update gr_screen clip state (scaled values) + gr_screen.offset_x = x; + gr_screen.offset_y = y; + gr_screen.clip_left = 0; + gr_screen.clip_top = 0; + gr_screen.clip_right = w - 1; + gr_screen.clip_bottom = h - 1; + gr_screen.clip_width = w; + gr_screen.clip_height = h; + + gr_screen.clip_aspect = i2fl(w) / i2fl(h); + gr_screen.clip_center_x = (gr_screen.clip_left + gr_screen.clip_right) * 0.5f; + gr_screen.clip_center_y = (gr_screen.clip_top + gr_screen.clip_bottom) * 0.5f; + + // Check if full screen (disable scissor) + if ((x == 0) && (y == 0) && (w == max_w) && (h == max_h)) { + stateTracker->setScissorEnabled(false); + return; + } + + // Enable scissor test + stateTracker->setScissorEnabled(true); + stateTracker->setScissor(x, y, static_cast(w), static_cast(h)); +} + +void VulkanDrawManager::resetClip() +{ + auto* stateTracker = getStateTracker(); + + int max_w = gr_screen.max_w; + int max_h = gr_screen.max_h; + + gr_screen.offset_x = gr_screen.offset_x_unscaled = 0; + gr_screen.offset_y = gr_screen.offset_y_unscaled = 0; + gr_screen.clip_left = gr_screen.clip_left_unscaled = 0; + gr_screen.clip_top = gr_screen.clip_top_unscaled = 0; + gr_screen.clip_right = gr_screen.clip_right_unscaled = max_w - 1; + gr_screen.clip_bottom = gr_screen.clip_bottom_unscaled = max_h - 1; + gr_screen.clip_width = gr_screen.clip_width_unscaled = max_w; + gr_screen.clip_height = gr_screen.clip_height_unscaled = max_h; + + if (gr_screen.custom_size) { + gr_unsize_screen_pos(&gr_screen.clip_right_unscaled, &gr_screen.clip_bottom_unscaled); + gr_unsize_screen_pos(&gr_screen.clip_width_unscaled, &gr_screen.clip_height_unscaled); + } + + gr_screen.clip_aspect = i2fl(max_w) / i2fl(max_h); + gr_screen.clip_center_x = (gr_screen.clip_left + gr_screen.clip_right) * 0.5f; + gr_screen.clip_center_y = (gr_screen.clip_top + gr_screen.clip_bottom) * 0.5f; + + stateTracker->setScissorEnabled(false); +} + +int VulkanDrawManager::zbufferGet() +{ + if (!gr_global_zbuffering) { + return GR_ZBUFF_NONE; + } + return m_zbufferMode; +} + +int VulkanDrawManager::zbufferSet(int mode) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_zbufferMode; + m_zbufferMode = mode; + + // Update FSO global state + if (mode == GR_ZBUFF_NONE) { + gr_zbuffering = 0; + } else { + gr_zbuffering = 1; + } + gr_zbuffering_mode = mode; + + gr_zbuffer_type zbufType; + switch (mode) { + case GR_ZBUFF_NONE: + zbufType = ZBUFFER_TYPE_NONE; + break; + case GR_ZBUFF_READ: + zbufType = ZBUFFER_TYPE_READ; + break; + case GR_ZBUFF_WRITE: + zbufType = ZBUFFER_TYPE_WRITE; + break; + case GR_ZBUFF_FULL: + default: + zbufType = ZBUFFER_TYPE_FULL; + break; + } + stateTracker->setZBufferMode(zbufType); + + return prev; +} + +void VulkanDrawManager::zbufferClear(int mode) +{ + auto* stateTracker = getStateTracker(); + + if (mode) { + // Enable zbuffering and clear + gr_zbuffering = 1; + gr_zbuffering_mode = GR_ZBUFF_FULL; + gr_global_zbuffering = 1; + m_zbufferMode = GR_ZBUFF_FULL; + stateTracker->setZBufferMode(ZBUFFER_TYPE_FULL); + + // Clear depth buffer + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eDepth; + clearAttachment.clearValue.depthStencil.depth = 1.0f; + clearAttachment.clearValue.depthStencil.stencil = 0; + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + stateTracker->getCommandBuffer().clearAttachments(1, &clearAttachment, 1, &clearRect); + } else { + // Disable zbuffering + gr_zbuffering = 0; + gr_zbuffering_mode = GR_ZBUFF_NONE; + gr_global_zbuffering = 0; + m_zbufferMode = GR_ZBUFF_NONE; + stateTracker->setZBufferMode(ZBUFFER_TYPE_NONE); + } +} + +int VulkanDrawManager::stencilSet(int mode) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_stencilMode; + m_stencilMode = mode; + gr_stencil_mode = mode; + + stateTracker->setStencilMode(mode); + + // Set stencil reference based on mode + if (mode == GR_STENCIL_READ || mode == GR_STENCIL_WRITE) { + stateTracker->setStencilReference(1); + } else { + stateTracker->setStencilReference(0); + } + + return prev; +} + +void VulkanDrawManager::stencilClear() +{ + auto* stateTracker = getStateTracker(); + + // Clear stencil buffer + vk::ClearAttachment clearAttachment; + clearAttachment.aspectMask = vk::ImageAspectFlagBits::eStencil; + clearAttachment.clearValue.depthStencil.depth = 1.0f; + clearAttachment.clearValue.depthStencil.stencil = 0; + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + stateTracker->getCommandBuffer().clearAttachments(1, &clearAttachment, 1, &clearRect); +} + +int VulkanDrawManager::setCull(int cull) +{ + auto* stateTracker = getStateTracker(); + + int prev = m_cullEnabled ? 1 : 0; + m_cullEnabled = (cull != 0); + + stateTracker->setCullMode(m_cullEnabled); + + return prev; +} + +void VulkanDrawManager::renderPrimitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + m_frameStats.renderPrimitiveCalls++; + + // Apply material state and bind pipeline + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + + // Bind vertex buffer + bindVertexBuffer(buffer_handle, buffer_offset); + + // Issue draw call + draw(prim_type, offset, n_verts); +} + +void VulkanDrawManager::renderPrimitivesBatched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + m_frameStats.renderBatchedCalls++; + + // Apply base material state and bind pipeline + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + + // Bind vertex buffer + bindVertexBuffer(buffer_handle, 0); + + // Issue draw call + draw(prim_type, offset, n_verts); +} + +void VulkanDrawManager::renderPrimitivesParticle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + m_frameStats.renderParticleCalls++; + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(buffer_handle, 0); + draw(prim_type, offset, n_verts); +} + +void VulkanDrawManager::renderPrimitivesDistortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_verts, gr_buffer_handle buffer_handle) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(buffer_handle, 0); + draw(prim_type, 0, n_verts); +} + +void VulkanDrawManager::renderMovie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer_handle) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + m_frameStats.renderMovieCalls++; + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(buffer_handle, 0); + draw(prim_type, 0, n_verts); +} + +void VulkanDrawManager::renderNanoVG(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle) +{ + if (!material_info || !layout || n_verts <= 0) { + return; + } + + m_frameStats.renderNanoVGCalls++; + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(buffer_handle, 0); + draw(prim_type, offset, n_verts); +} + +void VulkanDrawManager::renderRocketPrimitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_indices, gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer) +{ + if (!material_info || !layout || n_indices <= 0) { + return; + } + + m_frameStats.renderRocketCalls++; + + if (!applyMaterial(material_info, prim_type, layout)) { + return; + } + bindVertexBuffer(vertex_buffer, 0); + bindIndexBuffer(index_buffer); + drawIndexed(prim_type, n_indices, 0, 0); +} + +void VulkanDrawManager::renderModel(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi) +{ + if (!material_info || !vert_source || !bufferp) { + return; + } + + m_frameStats.renderModelCalls++; + + // Validate buffers + if (!vert_source->Vbuffer_handle.isValid() || !vert_source->Ibuffer_handle.isValid()) { + nprintf(("Vulkan", "VulkanDrawManager: renderModel called with invalid buffer handles\n")); + return; + } + + if (texi >= bufferp->tex_buf.size()) { + nprintf(("Vulkan", "VulkanDrawManager: renderModel texi out of range\n")); + return; + } + + auto* stateTracker = getStateTracker(); + + // Get buffer data for this texture/draw + buffer_data* datap = &bufferp->tex_buf[texi]; + + if (datap->n_verts == 0) { + return; // Nothing to draw + } + + // Apply model material state and bind pipeline + // Model rendering always uses triangles + if (!applyMaterial(material_info, PRIM_TYPE_TRIS, &bufferp->layout)) { + return; + } + + // Bind vertex buffer with the model's vertex offset + auto* bufferManager = getBufferManager(); + + vk::Buffer vbuffer = bufferManager->getVkBuffer(vert_source->Vbuffer_handle); + vk::Buffer ibuffer = bufferManager->getVkBuffer(vert_source->Ibuffer_handle); + + Assertion(vbuffer, "VulkanDrawManager::renderModel got null vertex buffer from valid handle!"); + Assertion(ibuffer, "VulkanDrawManager::renderModel got null index buffer from valid handle!"); + + // Bind vertex buffer at offset 0 (start of heap buffer), matching OpenGL behavior. + // The Base_vertex_offset in drawIndexed handles the heap allocation offset. + stateTracker->bindVertexBuffer(0, vbuffer, 0); + + // Determine index type based on VB_FLAG_LARGE_INDEX flag + vk::IndexType indexType = (datap->flags & VB_FLAG_LARGE_INDEX) ? + vk::IndexType::eUint32 : vk::IndexType::eUint16; + + // Bind index buffer at the model's heap allocation offset. + // The firstIndex (from datap->index_offset) handles per-mesh offset within the model. + stateTracker->bindIndexBuffer(ibuffer, static_cast(vert_source->Index_offset), indexType); + + // Base vertex offset: accounts for heap allocation position + per-mesh vertex offset. + // This matches OpenGL's glDrawElementsBaseVertex usage. + int32_t baseVertex = static_cast(vert_source->Base_vertex_offset + bufferp->vertex_num_offset); + + // Calculate first index + // The index_offset in buffer_data is in bytes, need to convert to index count + uint32_t firstIndex; + if (indexType == vk::IndexType::eUint32) { + firstIndex = static_cast(datap->index_offset / sizeof(uint32_t)); + } else { + firstIndex = static_cast(datap->index_offset / sizeof(uint16_t)); + } + + // Issue indexed draw call + m_frameStats.drawIndexedCalls++; + m_frameStats.totalIndices += datap->n_verts; + + // Flush any dirty dynamic state before draw + stateTracker->applyDynamicState(); + + // Shadow map rendering uses 4 instances (one per cascade), routed via gl_InstanceIndex → gl_Layer + uint32_t instanceCount = Rendering_to_shadow_map ? 4 : 1; + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed( + static_cast(datap->n_verts), // index count + instanceCount, // instance count + firstIndex, // first index + baseVertex, // vertex offset + 0 // first instance + ); +} + +void VulkanDrawManager::setFillMode(int mode) +{ + m_fillMode = mode; +} + +int VulkanDrawManager::setColorBuffer(int mode) +{ + int prev = m_colorBufferEnabled ? 1 : 0; + m_colorBufferEnabled = (mode != 0); + return prev; +} + +void VulkanDrawManager::setTextureAddressing(int mode) +{ + m_textureAddressing = mode; +} + +void VulkanDrawManager::setDepthBiasEnabled(bool enabled) +{ + m_depthBiasEnabled = enabled; +} + +void VulkanDrawManager::setDepthTextureOverride(vk::ImageView view, vk::Sampler sampler) +{ + m_depthTextureOverride = view; + m_depthSamplerOverride = sampler; +} + +void VulkanDrawManager::clearDepthTextureOverride() +{ + m_depthTextureOverride = nullptr; + m_depthSamplerOverride = nullptr; +} + +void VulkanDrawManager::setSceneColorOverride(vk::ImageView view, vk::Sampler sampler) +{ + m_sceneColorOverride = view; + m_sceneColorSamplerOverride = sampler; +} + +void VulkanDrawManager::setDistMapOverride(vk::ImageView view, vk::Sampler sampler) +{ + m_distMapOverride = view; + m_distMapSamplerOverride = sampler; +} + +void VulkanDrawManager::clearDistortionOverrides() +{ + m_sceneColorOverride = nullptr; + m_sceneColorSamplerOverride = nullptr; + m_distMapOverride = nullptr; + m_distMapSamplerOverride = nullptr; +} + +void VulkanDrawManager::clearStates() +{ + auto* stateTracker = getStateTracker(); + + // Match OpenGL's gr_opengl_clear_states() behavior: + // gr_zbias(0), gr_zbuffer_set(ZBUFFER_TYPE_READ), gr_set_cull(0), + // gr_set_fill_mode(GR_FILL_MODE_SOLID) + m_zbufferMode = GR_ZBUFF_READ; + m_stencilMode = GR_STENCIL_NONE; + m_cullEnabled = false; + m_fillMode = GR_FILL_MODE_SOLID; + m_colorBufferEnabled = true; + m_textureAddressing = TMAP_ADDRESS_WRAP; + m_depthBiasEnabled = false; + + gr_zbuffering = 1; + gr_zbuffering_mode = GR_ZBUFF_READ; + gr_global_zbuffering = 1; + gr_stencil_mode = GR_STENCIL_NONE; + + stateTracker->setZBufferMode(ZBUFFER_TYPE_READ); + stateTracker->setStencilMode(GR_STENCIL_NONE); + stateTracker->setCullMode(false); + stateTracker->setScissorEnabled(false); + stateTracker->setDepthBias(0.0f, 0.0f); + stateTracker->setLineWidth(1.0f); + + // Clear pending uniform bindings + clearPendingUniformBindings(); + + // NOTE: Do NOT call resetClip() here. OpenGL's gr_opengl_clear_states() does + // not reset the clip region, and callers (e.g. model_render_immediate) rely on + // the clip/offset state surviving through clear_states for subsequent 2D draws. +} + +void VulkanDrawManager::setPendingUniformBinding(uniform_block_type blockType, gr_buffer_handle bufferHandle, + vk::DeviceSize offset, vk::DeviceSize size) +{ + size_t index = static_cast(blockType); + if (index >= NUM_UNIFORM_BLOCK_TYPES) { + return; + } + + m_pendingUniformBindings[index].bufferHandle = bufferHandle; + m_pendingUniformBindings[index].offset = offset; + m_pendingUniformBindings[index].size = size; + m_pendingUniformBindings[index].valid = bufferHandle.isValid(); +} + +void VulkanDrawManager::clearPendingUniformBindings() +{ + for (auto& binding : m_pendingUniformBindings) { + binding.valid = false; + binding.bufferHandle = gr_buffer_handle(); + binding.offset = 0; + binding.size = 0; + } +} + +void VulkanDrawManager::resetFrameStats() +{ + m_frameStats = {}; +} + +void VulkanDrawManager::printFrameStats() +{ + // Print summary every frame for the first 200 frames, then every 60 frames + bool shouldPrint = (m_frameStatsFrameNum < 200) || (m_frameStatsFrameNum % 60 == 0); + + if (shouldPrint) { + mprintf(("FRAME %d STATS: draws=%d indexed=%d verts=%d idxs=%d | applyMat=%d/%d fails | noPipeline=%d sdrNeg1=%d\n", + m_frameStatsFrameNum, + m_frameStats.drawCalls, + m_frameStats.drawIndexedCalls, + m_frameStats.totalVertices, + m_frameStats.totalIndices, + m_frameStats.applyMaterialFailures, + m_frameStats.applyMaterialCalls, + m_frameStats.noPipelineSkips, + m_frameStats.shaderHandleNeg1)); + mprintf((" CALLS: prim=%d batch=%d model=%d particle=%d nanovg=%d rocket=%d movie=%d\n", + m_frameStats.renderPrimitiveCalls, + m_frameStats.renderBatchedCalls, + m_frameStats.renderModelCalls, + m_frameStats.renderParticleCalls, + m_frameStats.renderNanoVGCalls, + m_frameStats.renderRocketCalls, + m_frameStats.renderMovieCalls)); + } + + m_frameStatsFrameNum++; +} + + +PipelineConfig VulkanDrawManager::buildPipelineConfig(material* mat, primitive_type prim_type) +{ + PipelineConfig config; + + // Get shader info from material + int shaderHandle = mat->get_shader_handle(); + auto* shaderManager = getShaderManager(); + if (shaderHandle >= 0) { + const auto* shaderModule = shaderManager->getShaderByHandle(shaderHandle); + if (shaderModule) { + config.shaderType = shaderModule->type; + } + } + + // Primitive type + config.primitiveType = prim_type; + + // Depth mode + config.depthMode = mat->get_depth_mode(); + + // Blend mode + config.blendMode = mat->get_blend_mode(); + + // Cull mode + config.cullEnabled = mat->get_cull_mode(); + + // Override shader for shadow map rendering + if (Rendering_to_shadow_map && config.shaderType == SDR_TYPE_MODEL) { + config.shaderType = SDR_TYPE_SHADOW_MAP; + } + + // Front face winding: match OpenGL which defaults to CCW and only switches to CW + // for model rendering (opengl_tnl_set_model_material sets GL_CW). + config.frontFaceCW = (config.shaderType == SDR_TYPE_MODEL || config.shaderType == SDR_TYPE_SHADOW_MAP); + + // Depth write + config.depthWriteEnabled = (config.depthMode == ZBUFFER_TYPE_FULL || + config.depthMode == ZBUFFER_TYPE_WRITE); + + // Stencil state + config.stencilEnabled = mat->is_stencil_enabled(); + if (config.stencilEnabled) { + config.stencilFunc = mat->get_stencil_func().compare; + config.stencilMask = mat->get_stencil_func().mask; + config.frontStencilOp = mat->get_front_stencil_op(); + config.backStencilOp = mat->get_back_stencil_op(); + } + + // Color write mask + config.colorWriteMask = mat->get_color_mask(); + + // Override color write mask if color buffer writes are disabled + if (!m_colorBufferEnabled) { + config.colorWriteMask = {false, false, false, false}; + } + + // Fill mode and depth bias from draw manager state + config.fillMode = m_fillMode; + config.depthBiasEnabled = m_depthBiasEnabled; + + // Get current render pass, attachment count, and sample count from state tracker + auto* stateTracker = getStateTracker(); + config.renderPass = stateTracker->getCurrentRenderPass(); + config.colorAttachmentCount = stateTracker->getColorAttachmentCount(); + config.sampleCount = stateTracker->getCurrentSampleCount(); + + return config; +} + +bool VulkanDrawManager::bindMaterialTextures(material* mat, vk::DescriptorSet materialSet, + DescriptorWriter* writer) +{ + auto* texManager = getTextureManager(); + + if (!materialSet) { + return false; + } + + // Get sampler matching current texture addressing mode and fallback texture + vk::SamplerAddressMode addressMode = convertTextureAddressing(m_textureAddressing); + vk::Sampler sampler = texManager->getSampler( + vk::Filter::eLinear, vk::Filter::eLinear, addressMode, true, 0.0f, true); + // OpenGL skips applying texture addressing for AABITMAP, INTERFACE, and CUBEMAP + // types - they always stay clamped. We need a clamp sampler for those cases. + vk::Sampler clampSampler = texManager->getSampler( + vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerAddressMode::eClampToEdge, true, 0.0f, true); + vk::ImageView fallbackView = texManager->getFallback2DArrayView(); + + // Check for movie material - needs special YUV texture handling + auto* movieMat = dynamic_cast(mat); + if (movieMat) { + // Movie materials use 3 YUV textures in the texture array at indices 0, 1, 2 + std::array textureInfos; + + // Initialize all slots with fallback + for (auto& info : textureInfos) { + info.sampler = sampler; + info.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + info.imageView = fallbackView; + } + + auto loadYuvTexture = [&](int handle, uint32_t slot) { + if (handle < 0 || slot >= textureInfos.size()) return; + auto* texSlot = texManager->getTextureSlot(handle); + if (!texSlot || !texSlot->imageView) { + // Load on demand - YUV planes are 8bpp grayscale + bitmap* bmp = bm_lock(handle, 8, BMP_TEX_OTHER); + if (bmp) { + texManager->bm_data(handle, bmp); + bm_unlock(handle); + texSlot = texManager->getTextureSlot(handle); + } + } + if (texSlot && texSlot->imageView) { + textureInfos[slot].imageView = texSlot->imageView; + } + }; + + loadYuvTexture(movieMat->getYtex(), 0); // Y at index 0 + loadYuvTexture(movieMat->getUtex(), 1); // U at index 1 + loadYuvTexture(movieMat->getVtex(), 2); // V at index 2 + + writer->writeTextureArray(materialSet, 1, textureInfos.data(), static_cast(textureInfos.size())); + return true; + } + + // Build texture info array for all material texture slots + std::array textureInfos; + + // Initialize all slots with fallback texture (1x1 white) + for (auto& info : textureInfos) { + info.sampler = sampler; + info.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + info.imageView = fallbackView; // Fallback texture for unbound slots + } + + // Helper to set texture at a specific slot - loads on-demand if not present + static int texLogCount = 0; + + // Get material's expected texture type for the base map + int materialTextureType = mat->get_texture_type(); + + auto setTexture = [&](int textureHandle, uint32_t slot, bool isBaseMap = false) { + if (textureHandle < 0 || slot >= textureInfos.size()) { + return; + } + + // Determine bitmap type - match OpenGL's gr_opengl_tcache_set logic: + // Override material texture type with bitmap's own type if not NORMAL + int bitmapType = isBaseMap ? materialTextureType : TCACHE_TYPE_NORMAL; + int overrideType = bm_get_tcache_type(textureHandle); + if (overrideType != TCACHE_TYPE_NORMAL) { + bitmapType = overrideType; + } + + // OpenGL skips applying texture addressing for AABITMAP, INTERFACE, and + // CUBEMAP types - they always stay clamped (gropengltexture.cpp:1140-1141). + // Match that behavior by using a clamp sampler for these types. + if (bitmapType == TCACHE_TYPE_AABITMAP || bitmapType == TCACHE_TYPE_INTERFACE + || bitmapType == TCACHE_TYPE_CUBEMAP) { + textureInfos[slot].sampler = clampSampler; + } + + auto* texSlot = texManager->getTextureSlot(textureHandle); + + // If texture isn't loaded, try to load it on-demand (like OpenGL does) + if (!texSlot || !texSlot->imageView) { + // Determine bpp and flags - matches OpenGL's opengl_determine_bpp_and_flags + ushort lockFlags = 0; + int bpp = 16; + + switch (bitmapType) { + case TCACHE_TYPE_AABITMAP: + lockFlags = BMP_AABITMAP; + bpp = 8; + break; + case TCACHE_TYPE_INTERFACE: + case TCACHE_TYPE_XPARENT: + lockFlags = BMP_TEX_XPARENT; + if (bm_get_type(textureHandle) == BM_TYPE_PCX) { + bpp = 16; + } else { + bpp = 32; + } + break; + case TCACHE_TYPE_COMPRESSED: + switch (bm_is_compressed(textureHandle)) { + case DDS_DXT1: + bpp = 24; + lockFlags = BMP_TEX_DXT1; + break; + case DDS_DXT3: + bpp = 32; + lockFlags = BMP_TEX_DXT3; + break; + case DDS_DXT5: + bpp = 32; + lockFlags = BMP_TEX_DXT5; + break; + default: + bpp = 32; + lockFlags = BMP_TEX_OTHER; + break; + } + break; + case TCACHE_TYPE_NORMAL: + default: + lockFlags = BMP_TEX_OTHER; + if (bm_get_type(textureHandle) == BM_TYPE_PCX) { + bpp = 16; // PCX locking only works with bpp=16 + } else { + if (bm_has_alpha_channel(textureHandle)) { + bpp = 32; + } else { + bpp = 24; + } + } + break; + } + + // Lock bitmap with appropriate flags + bitmap* bmp = bm_lock(textureHandle, bpp, lockFlags); + if (bmp) { + // Upload texture + texManager->bm_data(textureHandle, bmp); + bm_unlock(textureHandle); + + // Re-get the slot after upload + texSlot = texManager->getTextureSlot(textureHandle); + + if (texLogCount < 20) { + mprintf(("bindMaterialTextures: loaded tex %d (type=%d bpp=%d lockFlags=0x%x bmType=%d), slot=%p\n", + textureHandle, bitmapType, bpp, lockFlags, static_cast(bm_get_type(textureHandle)), texSlot)); + texLogCount++; + } + } + } + + if (texSlot && texSlot->imageView) { + textureInfos[slot].imageView = texSlot->imageView; + } else { + if (texLogCount < 20) { + mprintf(("bindMaterialTextures: slot %u handle %d FAILED to load\n", + slot, textureHandle)); + texLogCount++; + } + } + }; + + // Bind material textures to their slots + // Base map uses material's texture type (may be AABITMAP for fonts) + setTexture(mat->get_texture_map(TM_BASE_TYPE), TEXTURE_BINDING_BASE_MAP, true); + setTexture(mat->get_texture_map(TM_GLOW_TYPE), TEXTURE_BINDING_GLOW_MAP); + + // Specular - prefer spec_gloss if available + int specMap = mat->get_texture_map(TM_SPEC_GLOSS_TYPE); + if (specMap < 0) { + specMap = mat->get_texture_map(TM_SPECULAR_TYPE); + } + setTexture(specMap, TEXTURE_BINDING_SPEC_MAP); + + setTexture(mat->get_texture_map(TM_NORMAL_TYPE), TEXTURE_BINDING_NORMAL_MAP); + setTexture(mat->get_texture_map(TM_HEIGHT_TYPE), TEXTURE_BINDING_HEIGHT_MAP); + setTexture(mat->get_texture_map(TM_AMBIENT_TYPE), TEXTURE_BINDING_AMBIENT_MAP); + setTexture(mat->get_texture_map(TM_MISC_TYPE), TEXTURE_BINDING_MISC_MAP); + + // Update the texture array in the descriptor set + // All slots now have valid views (either actual texture or fallback) + writer->writeTextureArray(materialSet, 1, textureInfos.data(), static_cast(textureInfos.size())); + + return true; +} + +bool VulkanDrawManager::applyMaterial(material* mat, primitive_type prim_type, vertex_layout* layout) +{ + auto* stateTracker = getStateTracker(); + auto* pipelineManager = getPipelineManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + + if (!mat || !layout) { + return false; + } + + // Helper to get vk::Buffer from handle at draw time (survives buffer recreation) + auto getBuffer = [bufferManager](const PendingUniformBinding& binding) -> vk::Buffer { + return bufferManager->getVkBuffer(binding.bufferHandle); + }; + + // Offset is already fully resolved at bind time (includes frame base offset) + // to prevent stale lastWriteStreamOffset if the buffer is updated between bind and draw. + auto getResolvedOffset = [](const PendingUniformBinding& binding) -> vk::DeviceSize { + return binding.offset; + }; + + m_frameStats.applyMaterialCalls++; + + // Build pipeline configuration from material + PipelineConfig config = buildPipelineConfig(mat, prim_type); + + // Track shader handle issues + if (mat->get_shader_handle() < 0) { + m_frameStats.shaderHandleNeg1++; + } + + // Check if we have a valid render pass + if (!config.renderPass) { + m_frameStats.applyMaterialFailures++; + mprintf(("VulkanDrawManager: applyMaterial FAIL - no render pass (shaderType=%d)\n", + static_cast(config.shaderType))); + return false; + } + + // Get or create pipeline + vk::Pipeline pipeline = pipelineManager->getPipeline(config, *layout); + if (!pipeline) { + m_frameStats.applyMaterialFailures++; + mprintf(("VulkanDrawManager: applyMaterial FAIL - no pipeline (shaderType=%d handle=%d)\n", + static_cast(config.shaderType), mat->get_shader_handle())); + return false; + } + + // Bind pipeline with layout + stateTracker->bindPipeline(pipeline, pipelineManager->getPipelineLayout()); + + // Bind fallback vertex buffers for attributes the layout doesn't provide but the shader needs + if (pipelineManager->needsFallbackAttribute(*layout, config.shaderType, VertexAttributeLocation::Color)) { + vk::Buffer fallbackColor = bufferManager->getFallbackColorBuffer(); + if (fallbackColor) { + stateTracker->bindVertexBuffer(FALLBACK_COLOR_BINDING, fallbackColor, 0); + } + } + if (pipelineManager->needsFallbackAttribute(*layout, config.shaderType, VertexAttributeLocation::TexCoord)) { + vk::Buffer fallbackTexCoord = bufferManager->getFallbackTexCoordBuffer(); + if (fallbackTexCoord) { + stateTracker->bindVertexBuffer(FALLBACK_TEXCOORD_BINDING, fallbackTexCoord, 0); + } + } + + // Allocate and bind descriptor sets for this draw. + // Vulkan requires all bindings in a descriptor set to be valid before use. + // After pool reset, descriptors contain undefined data. We MUST pre-initialize + // ALL bindings with fallback values, then overwrite with actual pending data. + // All writes are batched into a single vkUpdateDescriptorSets call. + { + DescriptorWriter writer; + writer.reset(descManager->getDevice()); + + // Get fallback resources for uninitialized bindings + vk::Buffer fallbackUBO = bufferManager->getFallbackUniformBuffer(); + vk::DeviceSize fallbackUBOSize = static_cast(bufferManager->getFallbackUniformBufferSize()); + auto* texManager = getTextureManager(); + vk::Sampler fallbackSampler = texManager->getDefaultSampler(); + vk::ImageView fallbackView = texManager->getFallback2DArrayView(); + + // Helper: write a pending UBO or fallback if the buffer is null/invalid + auto writeUBOOrFallback = [&](DescriptorWriter& w, vk::DescriptorSet set, + uint32_t binding, size_t blockIdx) { + if (m_pendingUniformBindings[blockIdx].valid) { + vk::Buffer buf = getBuffer(m_pendingUniformBindings[blockIdx]); + if (buf) { + w.writeUniformBuffer(set, binding, buf, + getResolvedOffset(m_pendingUniformBindings[blockIdx]), + m_pendingUniformBindings[blockIdx].size); + return; + } + } + w.writeUniformBuffer(set, binding, fallbackUBO, 0, fallbackUBOSize); + }; + + // Set 0: Global - bindings: 0=Lights UBO, 1=DeferredGlobals UBO, 2=Shadow tex, 3=Env cube, 4=Irr cube + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + // UBO bindings: write real pending buffer or fallback (one write per binding) + for (size_t i = 0; i < NUM_UNIFORM_BLOCK_TYPES; ++i) { + uniform_block_type blockType = static_cast(i); + DescriptorSetIndex setIndex; + uint32_t binding; + if (VulkanDescriptorManager::getUniformBlockBinding(blockType, setIndex, binding) && + setIndex == DescriptorSetIndex::Global) { + writeUBOOrFallback(writer, globalSet, binding, i); + } + } + // Texture bindings + writer.writeTexture(globalSet, 2, fallbackView, fallbackSampler); + vk::ImageView fallbackCubeView = texManager->getFallbackCubeView(); + writer.writeTexture(globalSet, 3, fallbackCubeView, fallbackSampler); + writer.writeTexture(globalSet, 4, fallbackCubeView, fallbackSampler); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Global, globalSet); + + // Set 1: Material - bindings: 0=ModelData UBO, 1=Texture array, 2=DecalGlobals UBO, + // 3=Transform SSBO, 4=depth, 5=scene color, 6=dist map + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + // UBO bindings: write real pending buffer or fallback (one write per binding) + for (size_t i = 0; i < NUM_UNIFORM_BLOCK_TYPES; ++i) { + uniform_block_type blockType = static_cast(i); + DescriptorSetIndex setIndex; + uint32_t binding; + if (VulkanDescriptorManager::getUniformBlockBinding(blockType, setIndex, binding) && + setIndex == DescriptorSetIndex::Material) { + writeUBOOrFallback(writer, materialSet, binding, i); + } + } + // Binding 3: Transform buffer SSBO — real if available, else fallback + { + uint32_t tfIdx = descManager->getCurrentFrame(); + auto& tf = g_transformBuffers[tfIdx]; + if (tf.buffer && tf.lastUploadSize > 0) { + writer.writeStorageBuffer(materialSet, 3, tf.buffer, + static_cast(tf.lastUploadOffset), + static_cast(tf.lastUploadSize)); + } else { + writer.writeStorageBuffer(materialSet, 3, fallbackUBO, 0, fallbackUBOSize); + } + } + // Binding 4: depth map for soft particles + { + vk::ImageView depthView = m_depthTextureOverride ? m_depthTextureOverride + : texManager->getFallbackTextureView2D(); + vk::Sampler depthSampler = m_depthSamplerOverride ? m_depthSamplerOverride + : texManager->getDefaultSampler(); + writer.writeTexture(materialSet, 4, depthView, depthSampler); + } + // Binding 5: scene color / frameBuffer for distortion + { + vk::ImageView sceneView = m_sceneColorOverride ? m_sceneColorOverride + : texManager->getFallbackTextureView2D(); + vk::Sampler sceneSampler = m_sceneColorSamplerOverride ? m_sceneColorSamplerOverride + : texManager->getDefaultSampler(); + writer.writeTexture(materialSet, 5, sceneView, sceneSampler); + } + // Binding 6: distortion map + { + vk::ImageView distView = m_distMapOverride ? m_distMapOverride + : texManager->getFallbackTextureView2D(); + vk::Sampler distSampler = m_distMapSamplerOverride ? m_distMapSamplerOverride + : texManager->getDefaultSampler(); + writer.writeTexture(materialSet, 6, distView, distSampler); + } + // Binding 1: Texture array + bindMaterialTextures(mat, materialSet, &writer); + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + + // Set 2: PerDraw - bindings: 0=GenericData, 1=Matrices, 2=NanoVGData, 3=DecalInfo, 4=MovieData + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + // UBO bindings: write real pending buffer or fallback (one write per binding) + for (size_t i = 0; i < NUM_UNIFORM_BLOCK_TYPES; ++i) { + uniform_block_type blockType = static_cast(i); + DescriptorSetIndex setIndex; + uint32_t binding; + if (VulkanDescriptorManager::getUniformBlockBinding(blockType, setIndex, binding) && + setIndex == DescriptorSetIndex::PerDraw) { + writeUBOOrFallback(writer, perDrawSet, binding, i); + } + } + writer.flush(); + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + } + + // Update tracked state for FSO compatibility + stateTracker->setZBufferMode(mat->get_depth_mode()); + stateTracker->setCullMode(mat->get_cull_mode()); + + if (mat->is_stencil_enabled()) { + stateTracker->setStencilMode(GR_STENCIL_READ); + stateTracker->setStencilReference(mat->get_stencil_func().ref); + } else { + stateTracker->setStencilMode(GR_STENCIL_NONE); + } + + // Set depth bias if needed + stateTracker->setDepthBias(static_cast(mat->get_depth_bias()), 0.0f); + + return true; +} + +void VulkanDrawManager::bindVertexBuffer(gr_buffer_handle handle, size_t offset) +{ + auto* bufferManager = getBufferManager(); + auto* stateTracker = getStateTracker(); + + if (!handle.isValid()) { + return; + } + + vk::Buffer buffer = bufferManager->getVkBuffer(handle); + if (buffer) { + // Add frame base offset for ring buffer support + // This maps the caller's offset into the current frame's span + size_t frameOffset = bufferManager->getFrameBaseOffset(handle); + size_t totalOffset = frameOffset + offset; + stateTracker->bindVertexBuffer(0, buffer, static_cast(totalOffset)); + } +} + +void VulkanDrawManager::bindIndexBuffer(gr_buffer_handle handle) +{ + auto* bufferManager = getBufferManager(); + auto* stateTracker = getStateTracker(); + + if (!handle.isValid()) { + return; + } + + vk::Buffer buffer = bufferManager->getVkBuffer(handle); + if (buffer) { + // Add frame base offset for ring buffer support (mirrors bindVertexBuffer) + size_t frameOffset = bufferManager->getFrameBaseOffset(handle); + stateTracker->bindIndexBuffer(buffer, static_cast(frameOffset), vk::IndexType::eUint32); + } +} + +void VulkanDrawManager::draw(primitive_type prim_type, int first_vertex, int vertex_count) +{ + auto* stateTracker = getStateTracker(); + + Assertion(stateTracker->getCurrentPipeline(), + "draw() called with no bound pipeline! prim_type=%d first_vertex=%d vertex_count=%d", + static_cast(prim_type), first_vertex, vertex_count); + if (!stateTracker->getCurrentPipeline()) { + m_frameStats.noPipelineSkips++; + return; + } + + m_frameStats.drawCalls++; + m_frameStats.totalVertices += vertex_count; + + // Flush any dirty dynamic state (viewport, scissor, depth bias, stencil ref) + // before issuing the draw command. applyMaterial sets these AFTER bindPipeline, + // so they may be dirty even when the pipeline didn't change. + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.draw(static_cast(vertex_count), + 1, + static_cast(first_vertex), + 0); +} + +void VulkanDrawManager::drawIndexed(primitive_type prim_type, int index_count, int first_index, int vertex_offset) +{ + auto* stateTracker = getStateTracker(); + + Assertion(stateTracker->getCurrentPipeline(), + "drawIndexed() called with no bound pipeline! prim_type=%d index_count=%d first_index=%d vertex_offset=%d", + static_cast(prim_type), index_count, first_index, vertex_offset); + if (!stateTracker->getCurrentPipeline()) { + m_frameStats.noPipelineSkips++; + return; + } + + m_frameStats.drawIndexedCalls++; + m_frameStats.totalIndices += index_count; + + // Flush any dirty dynamic state before draw + stateTracker->applyDynamicState(); + + auto cmdBuffer = stateTracker->getCommandBuffer(); + cmdBuffer.drawIndexed(static_cast(index_count), + 1, + static_cast(first_index), + vertex_offset, + 0); +} + +void VulkanDrawManager::initSphereBuffers() +{ + auto* bufferManager = getBufferManager(); + + auto mesh = graphics::util::generate_sphere_mesh(16, 16); + + m_sphereIndexCount = mesh.index_count; + + m_sphereVBO = bufferManager->createBuffer(BufferType::Vertex, BufferUsageHint::Static); + bufferManager->updateBufferData(m_sphereVBO, mesh.vertices.size() * sizeof(float), mesh.vertices.data()); + + m_sphereIBO = bufferManager->createBuffer(BufferType::Index, BufferUsageHint::Static); + bufferManager->updateBufferData(m_sphereIBO, mesh.indices.size() * sizeof(ushort), mesh.indices.data()); + + m_sphereVertexLayout.add_vertex_component(vertex_format_data::POSITION3, sizeof(float) * 3, 0); + + mprintf(("VulkanDrawManager: Sphere mesh created (%u vertices, %u indices)\n", + mesh.vertex_count, mesh.index_count)); +} + +void VulkanDrawManager::shutdownSphereBuffers() +{ + auto* bufferManager = getBufferManager(); + + if (m_sphereVBO.isValid()) { + bufferManager->deleteBuffer(m_sphereVBO); + m_sphereVBO = gr_buffer_handle::invalid(); + } + if (m_sphereIBO.isValid()) { + bufferManager->deleteBuffer(m_sphereIBO); + m_sphereIBO = gr_buffer_handle::invalid(); + } +} + +void VulkanDrawManager::drawSphere(material* material_def) +{ + if (!material_def || m_sphereIndexCount == 0) { + return; + } + + auto* stateTracker = getStateTracker(); + + auto* bufferManager = getBufferManager(); + + if (!applyMaterial(material_def, PRIM_TYPE_TRIS, &m_sphereVertexLayout)) { + return; + } + + // Bind sphere vertex buffer + vk::Buffer vbo = bufferManager->getVkBuffer(m_sphereVBO); + if (!vbo) { + return; + } + stateTracker->bindVertexBuffer(0, vbo, 0); + + // Bind sphere index buffer with uint16 indices (matching the ushort mesh data) + vk::Buffer ibo = bufferManager->getVkBuffer(m_sphereIBO); + if (!ibo) { + return; + } + stateTracker->bindIndexBuffer(ibo, 0, vk::IndexType::eUint16); + + drawIndexed(PRIM_TYPE_TRIS, static_cast(m_sphereIndexCount), 0, 0); +} + +} // namespace vulkan +} // namespace graphics + +// GL_alpha_threshold is defined in gropengl.cpp +extern float GL_alpha_threshold; + +// PostProcessing_override is defined in globalincs/systemvars.cpp +extern bool PostProcessing_override; + +namespace graphics { +namespace vulkan { + +// ========== gr_screen function pointer implementations ========== +// These free functions are assigned to gr_screen.gf_* in gr_vulkan.cpp. + +namespace { + +// Helper to set up GenericData uniform for default material shader +// Similar to opengl_shader_set_default_material() in gropenglshader.cpp +void vulkan_set_default_material_uniforms(material* material_info) +{ + if (!material_info) { + return; + } + + // Get uniform buffer for GenericData + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, sizeof(genericData_default_material_vert)); + auto* data = buffer.aligner().addTypedElement(); + + // Get base map from material + int base_map = material_info->get_texture_map(TM_BASE_TYPE); + bool textured = (base_map >= 0); + bool alpha = (material_info->get_texture_type() == TCACHE_TYPE_AABITMAP); + + // Texturing flags + if (textured) { + data->noTexturing = 0; + // Get array index for animated texture arrays + auto* texSlot = getTextureManager()->getTextureSlot(base_map); + data->baseMapIndex = texSlot ? static_cast(texSlot->arrayIndex) : 0; + } else { + data->noTexturing = 1; + data->baseMapIndex = 0; + } + + // Alpha texture flag + data->alphaTexture = alpha ? 1 : 0; + + // HDR / intensity settings + if (High_dynamic_range) { + data->srgb = 1; + data->intensity = material_info->get_color_scale(); + } else { + data->srgb = 0; + data->intensity = 1.0f; + } + + // Alpha threshold + data->alphaThreshold = GL_alpha_threshold; + + // Color from material + vec4 clr = material_info->get_color(); + data->color.a1d[0] = clr.xyzw.x; + data->color.a1d[1] = clr.xyzw.y; + data->color.a1d[2] = clr.xyzw.z; + data->color.a1d[3] = clr.xyzw.w; + + // Clip plane + const auto& clip_plane = material_info->get_clip_plane(); + if (clip_plane.enabled) { + data->clipEnabled = 1; + + data->clipEquation.a1d[0] = clip_plane.normal.xyz.x; + data->clipEquation.a1d[1] = clip_plane.normal.xyz.y; + data->clipEquation.a1d[2] = clip_plane.normal.xyz.z; + // Calculate 'd' value: d = -dot(normal, position) + data->clipEquation.a1d[3] = -(clip_plane.normal.xyz.x * clip_plane.position.xyz.x + + clip_plane.normal.xyz.y * clip_plane.position.xyz.y + + clip_plane.normal.xyz.z * clip_plane.position.xyz.z); + + // Model matrix (identity for now, material doesn't provide one) + vm_matrix4_set_identity(&data->modelMatrix); + } else { + data->clipEnabled = 0; + vm_matrix4_set_identity(&data->modelMatrix); + data->clipEquation.a1d[0] = 0.0f; + data->clipEquation.a1d[1] = 0.0f; + data->clipEquation.a1d[2] = 0.0f; + data->clipEquation.a1d[3] = 0.0f; + } + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(genericData_default_material_vert), buffer.bufferHandle()); +} + +} // anonymous namespace + +int vulkan_zbuffer_get() +{ + auto* drawManager = getDrawManager(); + return drawManager->zbufferGet(); +} + +int vulkan_zbuffer_set(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->zbufferSet(mode); +} + +void vulkan_zbuffer_clear(int mode) +{ + auto* drawManager = getDrawManager(); + drawManager->zbufferClear(mode); +} + +int vulkan_stencil_set(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->stencilSet(mode); +} + +void vulkan_stencil_clear() +{ + auto* drawManager = getDrawManager(); + drawManager->stencilClear(); +} + +void vulkan_set_fill_mode(int mode) +{ + auto* drawManager = getDrawManager(); + // GR_FILL_MODE_WIRE = 1, GR_FILL_MODE_SOLID = 2 + drawManager->setFillMode(mode); +} + +void vulkan_clear() +{ + auto* drawManager = getDrawManager(); + drawManager->clear(); +} + +void vulkan_reset_clip() +{ + auto* drawManager = getDrawManager(); + drawManager->resetClip(); +} + +void vulkan_set_clear_color(int r, int g, int b) +{ + auto* drawManager = getDrawManager(); + drawManager->setClearColor(r, g, b); +} + +void vulkan_set_clip(int x, int y, int w, int h, int resize_mode) +{ + auto* drawManager = getDrawManager(); + drawManager->setClip(x, y, w, h, resize_mode); +} + +int vulkan_set_cull(int cull) +{ + auto* drawManager = getDrawManager(); + return drawManager->setCull(cull); +} + +int vulkan_set_color_buffer(int mode) +{ + auto* drawManager = getDrawManager(); + return drawManager->setColorBuffer(mode); +} + +void vulkan_set_texture_addressing(int mode) +{ + auto* drawManager = getDrawManager(); + drawManager->setTextureAddressing(mode); +} + +void vulkan_set_line_width(float width) +{ + auto* stateTracker = getStateTracker(); + if (width <= 1.0f) { + stateTracker->setLineWidth(width); + } + gr_screen.line_width = width; +} + +void vulkan_clear_states() +{ + auto* drawManager = getDrawManager(); + drawManager->clearStates(); +} + +void vulkan_scene_texture_begin() +{ + auto* renderer = getRendererInstance(); + + // Switch to HDR scene render pass when post-processing is enabled + auto* pp = getPostProcessor(); + if (pp && pp->isInitialized() && Gr_post_processing_enabled && !PostProcessing_override) { + renderer->beginSceneRendering(); + High_dynamic_range = true; + } else { + // Fallback: just clear within the current swap chain pass + auto* stateTracker = getStateTracker(); + auto cmdBuffer = stateTracker->getCommandBuffer(); + + vk::ClearAttachment clearAttachments[2]; + clearAttachments[0].aspectMask = vk::ImageAspectFlagBits::eColor; + clearAttachments[0].colorAttachment = 0; + clearAttachments[0].clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + + clearAttachments[1].aspectMask = vk::ImageAspectFlagBits::eDepth; + clearAttachments[1].clearValue.depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + + vk::ClearRect clearRect; + clearRect.rect.offset = vk::Offset2D(0, 0); + clearRect.rect.extent = vk::Extent2D(static_cast(gr_screen.max_w), + static_cast(gr_screen.max_h)); + clearRect.baseArrayLayer = 0; + clearRect.layerCount = 1; + + cmdBuffer.clearAttachments(2, clearAttachments, 1, &clearRect); + } +} + +void vulkan_scene_texture_end() +{ + auto* renderer = getRendererInstance(); + + // If we were rendering to the HDR scene target, switch back to swap chain + if (renderer->isSceneRendering()) { + renderer->endSceneRendering(); + } + + High_dynamic_range = false; +} + +void vulkan_copy_effect_texture() +{ + auto* renderer = getRendererInstance(); + + // Only copy if we're actively rendering the HDR scene + if (!renderer->isSceneRendering()) { + return; + } + + renderer->copyEffectTexture(); +} + +void vulkan_draw_sphere(material* material_def, float /*rad*/) +{ + auto* drawManager = getDrawManager(); + drawManager->drawSphere(material_def); +} + +void vulkan_render_shield_impact(shield_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + gr_buffer_handle buffer_handle, + int n_verts) +{ + auto* drawManager = getDrawManager(); + + // Compute impact projection matrices + float radius = material_info->get_impact_radius(); + vec3d min_v, max_v; + min_v.xyz.x = min_v.xyz.y = min_v.xyz.z = -radius; + max_v.xyz.x = max_v.xyz.y = max_v.xyz.z = radius; + + matrix4 impact_projection; + vm_matrix4_set_orthographic(&impact_projection, &max_v, &min_v); + + matrix impact_orient = material_info->get_impact_orient(); + vec3d impact_pos = material_info->get_impact_pos(); + + matrix4 impact_transform; + vm_matrix4_set_inverse_transform(&impact_transform, &impact_orient, &impact_pos); + + // Set shield impact uniform data (GenericData UBO) + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::shield_impact_data)); + auto* data = buffer.aligner().addTypedElement(); + data->hitNormal = impact_orient.vec.fvec; + data->shieldProjMatrix = impact_projection; + data->shieldModelViewMatrix = impact_transform; + data->shieldMapIndex = 0; // Vulkan binds textures individually, always layer 0 + data->srgb = High_dynamic_range ? 1 : 0; + data->color = material_info->get_color(); + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::shield_impact_data), buffer.bufferHandle()); + + // Set matrix uniforms + gr_matrix_set_uniforms(); + + // Draw the shield mesh + drawManager->renderPrimitives(material_info, prim_type, layout, 0, n_verts, buffer_handle, 0); +} + +void vulkan_render_model(model_material* material_info, + indexed_vertex_source* vert_source, + vertex_buffer* bufferp, + size_t texi) +{ + // ModelData UBO (matrices, lights, material params) is already bound by the model + // rendering pipeline (model_draw_list::render_buffer) before this function is called. + // Do NOT call vulkan_set_default_material_uniforms here - that would set GenericData + // uniforms for SDR_TYPE_DEFAULT_MATERIAL, but models use SDR_TYPE_MODEL with ModelData. + + auto* drawManager = getDrawManager(); + drawManager->renderModel(material_info, vert_source, bufferp, texi); +} + +void vulkan_render_primitives(material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle, + size_t buffer_offset) +{ + // Set up uniform buffers before rendering (like OpenGL does) + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderPrimitives(material_info, prim_type, layout, offset, n_verts, buffer_handle, buffer_offset); +} + +void vulkan_render_primitives_particle(particle_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + auto* renderer = getRendererInstance(); + auto* drawManager = getDrawManager(); + auto* pp = getPostProcessor(); + + // In deferred mode, once the G-buffer pass has ended the position texture + // (view-space XYZ) is in eShaderReadOnlyOptimal and free to sample. + bool usePosTexture = light_deferred_enabled() + && !renderer->isUsingGbufRenderPass() + && pp && pp->isGbufInitialized(); + + if (!usePosTexture) { + // Non-deferred path: copy hardware depth buffer + renderer->copySceneDepthForParticles(); + } + + // Set up matrices + gr_matrix_set_uniforms(); + + // Set effect_data GenericData UBO (matching OpenGL's opengl_tnl_set_material_particle) + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::effect_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->window_width = static_cast(gr_screen.max_w); + data->window_height = static_cast(gr_screen.max_h); + data->nearZ = Min_draw_distance; + data->farZ = Max_draw_distance; + data->srgb = High_dynamic_range ? 1 : 0; + data->blend_alpha = material_info->get_blend_mode() != ALPHA_BLEND_ADDITIVE ? 1 : 0; + // In deferred mode, bind the G-buffer position texture (view-space XYZ) + // so linear_depth=1 reads .z directly (matches OpenGL behavior). + // Otherwise use the NDC conversion path with the hardware depth copy. + data->linear_depth = usePosTexture ? 1 : 0; + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::effect_data), buffer.bufferHandle()); + } + + // Set depth texture override + if (usePosTexture) { + // Deferred path: bind G-buffer position texture directly + auto* texMgr = getTextureManager(); + drawManager->setDepthTextureOverride( + pp->getGbufPositionView(), + texMgr->getSampler(vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false)); + } else if (renderer->isSceneDepthCopied() && pp) { + // Non-deferred path: bind the hardware depth copy + auto* texMgr = getTextureManager(); + drawManager->setDepthTextureOverride( + pp->getSceneDepthCopyView(), + texMgr->getSampler(vk::Filter::eNearest, vk::Filter::eNearest, + vk::SamplerAddressMode::eClampToEdge, false, 0.0f, false)); + } + + drawManager->renderPrimitivesParticle(material_info, prim_type, layout, offset, n_verts, buffer_handle); + + // Clear the override + drawManager->clearDepthTextureOverride(); +} + +void vulkan_render_primitives_distortion(distortion_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + auto* drawManager = getDrawManager(); + auto* pp = getPostProcessor(); + + // Set up matrices + gr_matrix_set_uniforms(); + + // Set effect_distort_data GenericData UBO (16 bytes — NOT genericData_default_material_vert!) + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::effect_distort_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->window_width = static_cast(gr_screen.max_w); + data->window_height = static_cast(gr_screen.max_h); + data->use_offset = material_info->get_thruster_rendering() ? 1.0f : 0.0f; + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::effect_distort_data), buffer.bufferHandle()); + } + + // Set scene color override (binding 5) — snapshot of scene color for distortion sampling + if (pp && pp->getSceneEffectView()) { + drawManager->setSceneColorOverride( + pp->getSceneEffectView(), pp->getSceneEffectSampler()); + } + + // Set distortion map override (binding 6) — ping-pong noise texture for thrusters + if (material_info->get_thruster_rendering() && pp && pp->getDistortionTextureView()) { + drawManager->setDistMapOverride( + pp->getDistortionTextureView(), pp->getDistortionSampler()); + } + + drawManager->renderPrimitivesDistortion(material_info, prim_type, layout, n_verts, buffer_handle); + + // Clear overrides so subsequent draws use fallback textures + drawManager->clearDistortionOverrides(); +} + +void vulkan_render_movie(movie_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int n_verts, + gr_buffer_handle buffer, + size_t buffer_offset) +{ + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderMovie(material_info, prim_type, layout, n_verts, buffer); +} + +void vulkan_render_nanovg(nanovg_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + // NanoVG shader reads from NanoVGData UBO (set 2 binding 2), not GenericData. + // The NanoVGRenderer binds NanoVGData before calling gr_render_nanovg(). + + // NanoVG uses its own software scissor (scissorMat/scissorExt in the fragment shader). + // Disable hardware scissor to match nanovg_gl.h which calls glDisable(GL_SCISSOR_TEST). + // Without this, NanoVG draws get clipped by gr_set_clip's hardware scissor. + auto* stateTracker = getStateTracker(); + bool savedScissorEnabled = stateTracker->isScissorEnabled(); + stateTracker->setScissorEnabled(false); + + auto* drawManager = getDrawManager(); + drawManager->renderNanoVG(material_info, prim_type, layout, offset, n_verts, buffer_handle); + + // Restore scissor state + stateTracker->setScissorEnabled(savedScissorEnabled); +} + +void vulkan_render_primitives_batched(batched_bitmap_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int offset, + int n_verts, + gr_buffer_handle buffer_handle) +{ + gr_matrix_set_uniforms(); + vulkan_set_default_material_uniforms(material_info); + + auto* drawManager = getDrawManager(); + drawManager->renderPrimitivesBatched(material_info, prim_type, layout, offset, n_verts, buffer_handle); +} + +void vulkan_render_rocket_primitives(interface_material* material_info, + primitive_type prim_type, + vertex_layout* layout, + int n_indices, + gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer) +{ + // Set up 2D orthographic projection (matches OpenGL's gr_opengl_render_rocket_primitives) + gr_set_2d_matrix(); + + // Fill GenericData UBO with rocketui_data layout (NOT default material layout). + // The rocketui shader reads projMatrix, offset, textured, baseMapIndex, and + // horizontalSwipeOffset from GenericData — a completely different layout than + // the default material shader's genericData. + { + auto buffer = gr_get_uniform_buffer(uniform_block_type::GenericData, 1, + sizeof(graphics::generic_data::rocketui_data)); + auto* data = buffer.aligner().addTypedElement(); + + data->projMatrix = gr_projection_matrix; + + const vec2d& offset = material_info->get_offset(); + data->offset = offset; + data->textured = material_info->is_textured() ? 1 : 0; + data->baseMapIndex = 0; // Vulkan texture array: always layer 0 + data->horizontalSwipeOffset = material_info->get_horizontal_swipe(); + + buffer.submitData(); + gr_bind_uniform_buffer(uniform_block_type::GenericData, buffer.getBufferOffset(0), + sizeof(graphics::generic_data::rocketui_data), buffer.bufferHandle()); + } + + // Matrices UBO is still needed for descriptor set completeness + gr_matrix_set_uniforms(); + + auto* drawManager = getDrawManager(); + drawManager->renderRocketPrimitives(material_info, prim_type, layout, n_indices, vertex_buffer, index_buffer); + + gr_end_2d_matrix(); +} + +void vulkan_calculate_irrmap() +{ + if (ENVMAP < 0 || gr_screen.irrmap_render_target < 0) { + return; + } + + auto* renderer = getRendererInstance(); + auto* stateTracker = getStateTracker(); + auto* texManager = getTextureManager(); + auto* descManager = getDescriptorManager(); + auto* bufferManager = getBufferManager(); + auto* pipelineManager = getPipelineManager(); + auto* pp = getPostProcessor(); + + if (!renderer || !stateTracker || !texManager || !descManager || !bufferManager || !pipelineManager || !pp) { + return; + } + + // Get envmap cubemap view + auto* envSlot = bm_get_slot(ENVMAP, true); + if (!envSlot || !envSlot->gr_info) { + return; + } + auto* envTs = static_cast(envSlot->gr_info); + vk::ImageView envmapView = envTs->isCubemap ? envTs->cubeImageView : envTs->imageView; + if (!envmapView) { + return; + } + + // Get irrmap render target (cubemap with per-face framebuffers) + auto* irrSlot = bm_get_slot(gr_screen.irrmap_render_target, true); + if (!irrSlot || !irrSlot->gr_info) { + return; + } + auto* irrTs = static_cast(irrSlot->gr_info); + if (!irrTs->isCubemap || !irrTs->renderPass) { + return; + } + + vk::CommandBuffer cmd = stateTracker->getCommandBuffer(); + + // End the current render pass (G-buffer or scene) + cmd.endRenderPass(); + + // Create pipeline for irradiance map generation + PipelineConfig config; + config.shaderType = SDR_TYPE_IRRADIANCE_MAP_GEN; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = irrTs->renderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineManager->getPipeline(config, emptyLayout); + if (!pipeline) { + mprintf(("vulkan_calculate_irrmap: Failed to get pipeline!\n")); + return; + } + + vk::PipelineLayout pipelineLayout = pipelineManager->getPipelineLayout(); + + // Create a small host-visible UBO for the 6 face indices + // minUniformBufferOffsetAlignment is typically 256 bytes + const uint32_t UBO_SLOT_SIZE = 256; // Safe alignment for all GPUs + const uint32_t UBO_TOTAL_SIZE = 6 * UBO_SLOT_SIZE; + + vk::Device device = bufferManager->getDevice(); + auto* memManager = getMemoryManager(); + + vk::BufferCreateInfo uboBufInfo; + uboBufInfo.size = UBO_TOTAL_SIZE; + uboBufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + uboBufInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer faceUBO; + VulkanAllocation faceUBOAlloc; + try { + faceUBO = device.createBuffer(uboBufInfo); + } catch (const vk::SystemError& e) { + mprintf(("vulkan_calculate_irrmap: Failed to create face UBO: %s\n", e.what())); + return; + } + + if (!memManager->allocateBufferMemory(faceUBO, MemoryUsage::CpuToGpu, faceUBOAlloc)) { + device.destroyBuffer(faceUBO); + return; + } + + // Map and write face indices + auto* mapped = static_cast(memManager->mapMemory(faceUBOAlloc)); + if (!mapped) { + device.destroyBuffer(faceUBO); + memManager->freeAllocation(faceUBOAlloc); + return; + } + memset(mapped, 0, UBO_TOTAL_SIZE); + for (int i = 0; i < 6; i++) { + *reinterpret_cast(mapped + i * UBO_SLOT_SIZE) = i; + } + memManager->unmapMemory(faceUBOAlloc); + + // Get fallback resources + vk::Buffer fallbackUBO = bufferManager->getFallbackUniformBuffer(); + vk::DeviceSize fallbackUBOSize = static_cast(bufferManager->getFallbackUniformBufferSize()); + vk::Sampler defaultSampler = texManager->getDefaultSampler(); + vk::ImageView fallbackView = texManager->getFallback2DArrayView(); + vk::ImageView fallbackView2D = texManager->getFallbackTextureView2D(); + vk::ImageView fallbackCubeView = texManager->getFallbackCubeView(); + + vk::Extent2D irrExtent(irrTs->width, irrTs->height); + + for (int face = 0; face < 6; face++) { + vk::Framebuffer fb = irrTs->cubeFaceFramebuffers[face]; + if (!fb) { + continue; + } + + // Begin render pass for this face (loadOp=eClear, finalLayout=eShaderReadOnlyOptimal) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = irrTs->renderPass; + rpBegin.framebuffer = fb; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = irrExtent; + + vk::ClearValue clearValue; + clearValue.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + rpBegin.clearValueCount = 1; + rpBegin.pClearValues = &clearValue; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(irrExtent.width); + viewport.height = static_cast(irrExtent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = irrExtent; + cmd.setScissor(0, scissor); + + DescriptorWriter writer; + writer.reset(device); + + // Set 0: Global (all fallback) + vk::DescriptorSet globalSet = descManager->allocateFrameSet(DescriptorSetIndex::Global); + Verify(globalSet); + writer.writeUniformBuffer(globalSet, 0, fallbackUBO, 0, fallbackUBOSize); + writer.writeUniformBuffer(globalSet, 1, fallbackUBO, 0, fallbackUBOSize); + writer.writeTexture(globalSet, 2, fallbackView, defaultSampler); + writer.writeTexture(globalSet, 3, fallbackCubeView, defaultSampler); + writer.writeTexture(globalSet, 4, fallbackCubeView, defaultSampler); + writer.flush(); + + // Set 1: Material (envmap cubemap at binding 1) + vk::DescriptorSet materialSet = descManager->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + writer.writeUniformBuffer(materialSet, 0, fallbackUBO, 0, fallbackUBOSize); + writer.writeUniformBuffer(materialSet, 2, fallbackUBO, 0, fallbackUBOSize); + writer.writeStorageBuffer(materialSet, 3, fallbackUBO, 0, fallbackUBOSize); + + // Binding 1: envmap cubemap (element 0) + fallback for rest of array + { + std::array texImages; + texImages[0].sampler = defaultSampler; + texImages[0].imageView = envmapView; + texImages[0].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + for (uint32_t slot = 1; slot < VulkanDescriptorManager::MAX_TEXTURE_BINDINGS; ++slot) { + texImages[slot].sampler = defaultSampler; + texImages[slot].imageView = fallbackView2D; + texImages[slot].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + writer.writeTextureArray(materialSet, 1, texImages.data(), static_cast(texImages.size())); + } + writer.writeTexture(materialSet, 4, fallbackView2D, defaultSampler); + writer.writeTexture(materialSet, 5, fallbackView2D, defaultSampler); + writer.writeTexture(materialSet, 6, fallbackView2D, defaultSampler); + writer.flush(); + + // Set 2: PerDraw (face UBO at binding 0) + vk::DescriptorSet perDrawSet = descManager->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + writer.writeUniformBuffer(perDrawSet, 0, faceUBO, + static_cast(face) * UBO_SLOT_SIZE, UBO_SLOT_SIZE); + for (uint32_t b = 1; b <= 4; ++b) { + writer.writeUniformBuffer(perDrawSet, b, fallbackUBO, 0, fallbackUBOSize); + } + writer.flush(); + + // Bind all descriptor sets + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + 0, {globalSet, materialSet, perDrawSet}, {}); + + // Draw fullscreen triangle + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + } + + // Queue UBO for deferred destruction (safe to destroy after frame submission) + getDeletionQueue()->queueBuffer(faceUBO, faceUBOAlloc); + + // Resume the scene/G-buffer render pass + bool useGbuf = renderer->isSceneRendering() && pp->isGbufInitialized() && light_deferred_enabled(); + if (useGbuf) { + // Transition G-buffer attachments for resume + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = pp->getSceneColorImage(); + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + pp->transitionGbufForResume(cmd); + + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getGbufRenderPassLoad(); + rpBegin.framebuffer = pp->getGbufFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getGbufRenderPassLoad(), 0); + stateTracker->setColorAttachmentCount(VulkanPostProcessor::GBUF_COLOR_ATTACHMENT_COUNT); + } else { + // Resume simple scene render pass + auto extent = pp->getSceneExtent(); + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = pp->getSceneRenderPassLoad(); + rpBegin.framebuffer = pp->getSceneFramebuffer(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + stateTracker->setRenderPass(pp->getSceneRenderPassLoad(), 0); + } + + // Restore viewport and scissor + { + vk::Viewport viewport; + viewport.x = static_cast(gr_screen.offset_x); + viewport.y = static_cast(gr_screen.offset_y); + viewport.width = static_cast(gr_screen.clip_width); + viewport.height = static_cast(gr_screen.clip_height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(gr_screen.offset_x, gr_screen.offset_y); + scissor.extent = vk::Extent2D(static_cast(gr_screen.clip_width), + static_cast(gr_screen.clip_height)); + cmd.setScissor(0, scissor); + } + + mprintf(("vulkan_calculate_irrmap: Generated irradiance cubemap (%ux%u)\n", irrTs->width, irrTs->height)); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanDraw.h b/code/graphics/vulkan/VulkanDraw.h new file mode 100644 index 00000000000..ce086d82290 --- /dev/null +++ b/code/graphics/vulkan/VulkanDraw.h @@ -0,0 +1,457 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" +#include "VulkanPipeline.h" + +#include +#include + +namespace graphics { +namespace vulkan { + +class DescriptorWriter; + +/** + * @brief Tracks a pending uniform buffer binding + * Stores handle instead of raw vk::Buffer to survive buffer recreation. + * The offset is fully resolved at bind time (includes frame base offset) + * to prevent stale lastWriteStreamOffset if the buffer is updated between bind and draw. + */ +struct PendingUniformBinding { + gr_buffer_handle bufferHandle; // FSO buffer handle - lookup vk::Buffer at draw time + vk::DeviceSize offset = 0; // Fully resolved offset (frame base + caller offset) + vk::DeviceSize size = 0; + bool valid = false; +}; + +/** + * @brief Handles Vulkan draw command recording + * + * Provides functions to record draw commands to the command buffer, + * including primitive rendering, batched rendering, and special effects. + */ +class VulkanDrawManager { +public: + VulkanDrawManager() = default; + ~VulkanDrawManager() = default; + + // Non-copyable + VulkanDrawManager(const VulkanDrawManager&) = delete; + VulkanDrawManager& operator=(const VulkanDrawManager&) = delete; + + /** + * @brief Initialize draw manager + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + // ========== Clear Operations ========== + + /** + * @brief Clear the color buffer + */ + void clear(); + + /** + * @brief Set clear color + */ + void setClearColor(int r, int g, int b); + + // ========== Clipping ========== + + /** + * @brief Set clip region (scissor) + */ + void setClip(int x, int y, int w, int h, int resize_mode); + + /** + * @brief Reset clip to full screen + */ + void resetClip(); + + // ========== Z-Buffer ========== + + /** + * @brief Get current zbuffer mode + */ + int zbufferGet(); + + /** + * @brief Set zbuffer mode + * @return Previous mode + */ + int zbufferSet(int mode); + + /** + * @brief Clear zbuffer + */ + void zbufferClear(int mode); + + // ========== Stencil ========== + + /** + * @brief Set stencil mode + * @return Previous mode + */ + int stencilSet(int mode); + + /** + * @brief Clear stencil buffer + */ + void stencilClear(); + + // ========== Culling ========== + + /** + * @brief Set cull mode + * @return Previous mode + */ + int setCull(int cull); + + // ========== Primitive Rendering ========== + + /** + * @brief Render primitives with material + */ + void renderPrimitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle, size_t buffer_offset); + + /** + * @brief Render batched bitmaps + */ + void renderPrimitivesBatched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render particles + */ + void renderPrimitivesParticle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int offset, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render distortion effect + */ + void renderPrimitivesDistortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render movie frame + */ + void renderMovie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer_handle); + + /** + * @brief Render NanoVG UI + */ + void renderNanoVG(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, + gr_buffer_handle buffer_handle); + + /** + * @brief Render Rocket UI primitives (indexed) + */ + void renderRocketPrimitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, + int n_indices, gr_buffer_handle vertex_buffer, + gr_buffer_handle index_buffer); + + /** + * @brief Render 3D model with indexed geometry + * @param material_info Model material settings + * @param vert_source Indexed vertex source with buffer handles + * @param bufferp Vertex buffer with layout and texture info + * @param texi Index into tex_buf array for this draw + */ + void renderModel(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi); + + /** + * @brief Draw a unit sphere with the given material + * Used for debug visualization and deferred light volumes + */ + void drawSphere(material* material_def); + + // ========== Render State ========== + + /** + * @brief Set polygon fill mode (GR_FILL_MODE_SOLID / GR_FILL_MODE_WIRE) + */ + void setFillMode(int mode); + + /** + * @brief Set color buffer write enable + * @return Previous state (1 = was enabled, 0 = was disabled) + */ + int setColorBuffer(int mode); + + /** + * @brief Set texture addressing mode (TMAP_ADDRESS_WRAP/MIRROR/CLAMP) + */ + void setTextureAddressing(int mode); + + /** + * @brief Enable or disable depth bias in pipeline + */ + void setDepthBiasEnabled(bool enabled); + + /** + * @brief Set depth texture override for soft particle rendering + * + * When set, applyMaterial() binds this texture to Material set binding 4 + * instead of the fallback white texture. Must be set before the render call + * and cleared afterwards. + */ + void setDepthTextureOverride(vk::ImageView view, vk::Sampler sampler); + + /** + * @brief Clear depth texture override (reverts to fallback) + */ + void clearDepthTextureOverride(); + + /** + * @brief Set scene color texture override for binding 5 (distortion effects) + */ + void setSceneColorOverride(vk::ImageView view, vk::Sampler sampler); + + /** + * @brief Set distortion map texture override for binding 6 (distortion effects) + */ + void setDistMapOverride(vk::ImageView view, vk::Sampler sampler); + + /** + * @brief Clear distortion texture overrides (bindings 5 and 6, reverts to fallback) + */ + void clearDistortionOverrides(); + + /** + * @brief Get current texture addressing mode + */ + int getTextureAddressing() const { return m_textureAddressing; } + + /** + * @brief Clear all graphics states to defaults + */ + void clearStates(); + + // ========== Uniform Buffers ========== + + /** + * @brief Set a pending uniform buffer binding + * @param blockType The uniform block type + * @param bufferHandle The FSO buffer handle (looked up at bind time) + * @param offset Offset within the buffer + * @param size Size of the bound range + */ + void setPendingUniformBinding(uniform_block_type blockType, gr_buffer_handle bufferHandle, + vk::DeviceSize offset, vk::DeviceSize size); + + /** + * @brief Clear all pending uniform bindings + */ + void clearPendingUniformBindings(); + + /** + * @brief Get a pending uniform binding by block type index + */ + const PendingUniformBinding& getPendingUniformBinding(size_t index) const { + Assertion(index < NUM_UNIFORM_BLOCK_TYPES, "getPendingUniformBinding: index %zu out of range!", index); + return m_pendingUniformBindings[index]; + } + + /** + * @brief Bind material textures to descriptor set (public for decal rendering) + * @param writer If non-null, texture array write is batched into writer instead of flushed immediately + */ + bool bindMaterialTextures(material* mat, vk::DescriptorSet materialSet, + DescriptorWriter* writer); + + /** + * @brief Reset per-frame diagnostic counters (called at start of frame) + */ + void resetFrameStats(); + + /** + * @brief Print per-frame diagnostic summary (called at end of frame) + */ + void printFrameStats(); + +private: + /** + * @brief Apply material state and bind pipeline + * @return true if pipeline was successfully bound + */ + bool applyMaterial(material* mat, primitive_type prim_type, vertex_layout* layout); + + /** + * @brief Build pipeline config from material + */ + PipelineConfig buildPipelineConfig(material* mat, primitive_type prim_type); + + /** + * @brief Bind vertex buffer from handle + */ + void bindVertexBuffer(gr_buffer_handle handle, size_t offset = 0); + + /** + * @brief Bind index buffer from handle + */ + void bindIndexBuffer(gr_buffer_handle handle); + + /** + * @brief Issue draw call + */ + void draw(primitive_type prim_type, int first_vertex, int vertex_count); + + /** + * @brief Issue indexed draw call + */ + void drawIndexed(primitive_type prim_type, int index_count, int first_index, int vertex_offset); + + /** + * @brief Create sphere VBO/IBO from shared mesh generator + */ + void initSphereBuffers(); + + /** + * @brief Destroy sphere VBO/IBO + */ + void shutdownSphereBuffers(); + + vk::Device m_device; + + // Current render state + int m_zbufferMode = GR_ZBUFF_FULL; + int m_stencilMode = GR_STENCIL_NONE; + bool m_cullEnabled = true; + int m_fillMode = GR_FILL_MODE_SOLID; + bool m_colorBufferEnabled = true; + int m_textureAddressing = TMAP_ADDRESS_WRAP; + bool m_depthBiasEnabled = false; + + // Pending uniform buffer bindings (indexed by uniform_block_type) + static constexpr size_t NUM_UNIFORM_BLOCK_TYPES = static_cast(uniform_block_type::NUM_BLOCK_TYPES); + std::array m_pendingUniformBindings; + + // Per-frame diagnostic counters + struct FrameStats { + int drawCalls = 0; + int drawIndexedCalls = 0; + int applyMaterialCalls = 0; + int applyMaterialFailures = 0; + int noPipelineSkips = 0; + int shaderHandleNeg1 = 0; + int totalVertices = 0; + int totalIndices = 0; + + // Per-function call counters + int renderPrimitiveCalls = 0; + int renderBatchedCalls = 0; + int renderModelCalls = 0; + int renderParticleCalls = 0; + int renderNanoVGCalls = 0; + int renderRocketCalls = 0; + int renderMovieCalls = 0; + }; + FrameStats m_frameStats; + int m_frameStatsFrameNum = 0; + + // Depth texture override for soft particle rendering + // Set before applyMaterial() so binding 4 gets the real depth texture instead of fallback + vk::ImageView m_depthTextureOverride; + vk::Sampler m_depthSamplerOverride; + + // Scene color override for distortion rendering (binding 5) + vk::ImageView m_sceneColorOverride; + vk::Sampler m_sceneColorSamplerOverride; + + // Distortion map override for distortion rendering (binding 6) + vk::ImageView m_distMapOverride; + vk::Sampler m_distMapSamplerOverride; + + // Pre-built sphere mesh for draw_sphere / deferred light volumes + gr_buffer_handle m_sphereVBO; + gr_buffer_handle m_sphereIBO; + unsigned int m_sphereIndexCount = 0; + vertex_layout m_sphereVertexLayout; + + bool m_initialized = false; +}; + +// Global draw manager access +VulkanDrawManager* getDrawManager(); +void setDrawManager(VulkanDrawManager* manager); + +// ========== gr_screen function pointer implementations ========== +// These free functions implement gr_screen.gf_* function pointers. +// They are assigned in gr_vulkan.cpp::init_function_pointers(). + +// Clear operations +void vulkan_clear(); +void vulkan_set_clear_color(int r, int g, int b); + +// Clipping +void vulkan_set_clip(int x, int y, int w, int h, int resize_mode); +void vulkan_reset_clip(); + +// Z-buffer +int vulkan_zbuffer_get(); +int vulkan_zbuffer_set(int mode); +void vulkan_zbuffer_clear(int mode); + +// Stencil +int vulkan_stencil_set(int mode); +void vulkan_stencil_clear(); + +// Render state +int vulkan_set_cull(int cull); +int vulkan_set_color_buffer(int mode); +void vulkan_set_fill_mode(int mode); +void vulkan_set_texture_addressing(int mode); +void vulkan_set_line_width(float width); +void vulkan_clear_states(); + +// Scene texture +void vulkan_scene_texture_begin(); +void vulkan_scene_texture_end(); +void vulkan_copy_effect_texture(); + +// 3D primitives +void vulkan_draw_sphere(material* material_def, float rad); +void vulkan_render_shield_impact(shield_material* material_info, primitive_type prim_type, + vertex_layout* layout, gr_buffer_handle buffer_handle, int n_verts); +void vulkan_render_model(model_material* material_info, indexed_vertex_source* vert_source, + vertex_buffer* bufferp, size_t texi); +void vulkan_render_primitives(material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle, size_t buffer_offset); +void vulkan_render_primitives_particle(particle_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_primitives_distortion(distortion_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_primitives_batched(batched_bitmap_material* material_info, + primitive_type prim_type, vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_movie(movie_material* material_info, primitive_type prim_type, + vertex_layout* layout, int n_verts, gr_buffer_handle buffer, size_t buffer_offset); +void vulkan_render_nanovg(nanovg_material* material_info, primitive_type prim_type, + vertex_layout* layout, int offset, int n_verts, gr_buffer_handle buffer_handle); +void vulkan_render_rocket_primitives(interface_material* material_info, + primitive_type prim_type, vertex_layout* layout, int n_indices, + gr_buffer_handle vertex_buffer, gr_buffer_handle index_buffer); + +// Transform buffer for batched submodel rendering +void vulkan_update_transform_buffer(void* data, size_t size); + +// Environment mapping +void vulkan_calculate_irrmap(); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanMemory.cpp b/code/graphics/vulkan/VulkanMemory.cpp new file mode 100644 index 00000000000..1ce3208790b --- /dev/null +++ b/code/graphics/vulkan/VulkanMemory.cpp @@ -0,0 +1,326 @@ +#include "VulkanMemory.h" + +#include "globalincs/pstypes.h" + +namespace graphics { +namespace vulkan { + +namespace { +VulkanMemoryManager* g_memoryManager = nullptr; +} + +VulkanMemoryManager* getMemoryManager() +{ + return g_memoryManager; +} + +void setMemoryManager(VulkanMemoryManager* manager) +{ + g_memoryManager = manager; +} + +VulkanMemoryManager::VulkanMemoryManager() = default; + +VulkanMemoryManager::~VulkanMemoryManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanMemoryManager::init(vk::PhysicalDevice physicalDevice, vk::Device device) +{ + if (m_initialized) { + mprintf(("VulkanMemoryManager::init called when already initialized!\n")); + return false; + } + + m_physicalDevice = physicalDevice; + m_device = device; + m_memoryProperties = physicalDevice.getMemoryProperties(); + + mprintf(("Vulkan Memory Manager initialized\n")); + mprintf((" Memory heaps: %u\n", m_memoryProperties.memoryHeapCount)); + for (uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; ++i) { + const auto& heap = m_memoryProperties.memoryHeaps[i]; + mprintf((" Heap %u: %zu MB%s\n", + i, + static_cast(heap.size / (1024 * 1024)), + (heap.flags & vk::MemoryHeapFlagBits::eDeviceLocal) ? " (device local)" : "")); + } + + mprintf((" Memory types: %u\n", m_memoryProperties.memoryTypeCount)); + for (uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; ++i) { + const auto& type = m_memoryProperties.memoryTypes[i]; + SCP_string flags; + if (type.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) + flags += "DeviceLocal "; + if (type.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible) + flags += "HostVisible "; + if (type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) + flags += "HostCoherent "; + if (type.propertyFlags & vk::MemoryPropertyFlagBits::eHostCached) + flags += "HostCached "; + mprintf((" Type %u: heap %u, flags: %s\n", i, type.heapIndex, flags.c_str())); + } + + m_initialized = true; + return true; +} + +void VulkanMemoryManager::shutdown() +{ + if (!m_initialized) { + return; + } + + if (m_allocationCount > 0) { + mprintf(("WARNING: VulkanMemoryManager shutdown with %zu allocations still active!\n", m_allocationCount)); + } + + m_physicalDevice = nullptr; + m_device = nullptr; + m_allocationCount = 0; + m_totalAllocatedBytes = 0; + m_initialized = false; +} + +bool VulkanMemoryManager::findMemoryType(uint32_t memoryTypeBits, + vk::MemoryPropertyFlags requiredFlags, + vk::MemoryPropertyFlags preferredFlags, + uint32_t& memoryTypeIndex) +{ + // First try to find a memory type with both required and preferred flags + for (uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; ++i) { + if ((memoryTypeBits & (1u << i)) && + (m_memoryProperties.memoryTypes[i].propertyFlags & (requiredFlags | preferredFlags)) == + (requiredFlags | preferredFlags)) { + memoryTypeIndex = i; + return true; + } + } + + // Fall back to just required flags + for (uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; ++i) { + if ((memoryTypeBits & (1u << i)) && + (m_memoryProperties.memoryTypes[i].propertyFlags & requiredFlags) == requiredFlags) { + memoryTypeIndex = i; + return true; + } + } + + return false; +} + +void VulkanMemoryManager::getMemoryFlags(MemoryUsage usage, + vk::MemoryPropertyFlags& requiredFlags, + vk::MemoryPropertyFlags& preferredFlags) +{ + switch (usage) { + case MemoryUsage::GpuOnly: + requiredFlags = vk::MemoryPropertyFlagBits::eDeviceLocal; + preferredFlags = {}; + break; + + case MemoryUsage::CpuToGpu: + requiredFlags = vk::MemoryPropertyFlagBits::eHostVisible; + preferredFlags = vk::MemoryPropertyFlagBits::eDeviceLocal; + break; + + case MemoryUsage::GpuToCpu: + requiredFlags = vk::MemoryPropertyFlagBits::eHostVisible; + preferredFlags = vk::MemoryPropertyFlagBits::eHostCached; + break; + + case MemoryUsage::CpuOnly: + requiredFlags = vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; + preferredFlags = {}; + break; + } +} + +bool VulkanMemoryManager::allocateBufferMemory(vk::Buffer buffer, MemoryUsage usage, VulkanAllocation& allocation) +{ + if (!m_initialized) { + mprintf(("VulkanMemoryManager::allocateBufferMemory called before initialization!\n")); + return false; + } + + vk::MemoryRequirements memReqs = m_device.getBufferMemoryRequirements(buffer); + + vk::MemoryPropertyFlags requiredFlags, preferredFlags; + getMemoryFlags(usage, requiredFlags, preferredFlags); + + uint32_t memoryTypeIndex; + if (!findMemoryType(memReqs.memoryTypeBits, requiredFlags, preferredFlags, memoryTypeIndex)) { + mprintf(("Failed to find suitable memory type for buffer!\n")); + return false; + } + + vk::MemoryAllocateInfo allocInfo; + allocInfo.allocationSize = memReqs.size; + allocInfo.memoryTypeIndex = memoryTypeIndex; + + try { + allocation.memory = m_device.allocateMemory(allocInfo); + allocation.offset = 0; + allocation.size = memReqs.size; + allocation.memoryTypeIndex = memoryTypeIndex; + allocation.mappedPtr = nullptr; + allocation.dedicated = true; // Simple allocator always does dedicated allocations + + m_device.bindBufferMemory(buffer, allocation.memory, 0); + + ++m_allocationCount; + m_totalAllocatedBytes += allocation.size; + + return true; + } catch (const vk::SystemError& e) { + mprintf(("Failed to allocate buffer memory: %s\n", e.what())); + return false; + } +} + +bool VulkanMemoryManager::allocateImageMemory(vk::Image image, MemoryUsage usage, VulkanAllocation& allocation) +{ + if (!m_initialized) { + mprintf(("VulkanMemoryManager::allocateImageMemory called before initialization!\n")); + return false; + } + + vk::MemoryRequirements memReqs = m_device.getImageMemoryRequirements(image); + + vk::MemoryPropertyFlags requiredFlags, preferredFlags; + getMemoryFlags(usage, requiredFlags, preferredFlags); + + uint32_t memoryTypeIndex; + if (!findMemoryType(memReqs.memoryTypeBits, requiredFlags, preferredFlags, memoryTypeIndex)) { + mprintf(("Failed to find suitable memory type for image!\n")); + return false; + } + + vk::MemoryAllocateInfo allocInfo; + allocInfo.allocationSize = memReqs.size; + allocInfo.memoryTypeIndex = memoryTypeIndex; + + try { + allocation.memory = m_device.allocateMemory(allocInfo); + allocation.offset = 0; + allocation.size = memReqs.size; + allocation.memoryTypeIndex = memoryTypeIndex; + allocation.mappedPtr = nullptr; + allocation.dedicated = true; + + m_device.bindImageMemory(image, allocation.memory, 0); + + ++m_allocationCount; + m_totalAllocatedBytes += allocation.size; + + return true; + } catch (const vk::SystemError& e) { + mprintf(("Failed to allocate image memory: %s\n", e.what())); + return false; + } +} + +void VulkanMemoryManager::freeAllocation(VulkanAllocation& allocation) +{ + if (!m_initialized || allocation.memory == VK_NULL_HANDLE) { + return; + } + + // Unmap if mapped + if (allocation.mappedPtr != nullptr) { + unmapMemory(allocation); + } + + m_device.freeMemory(allocation.memory); + + --m_allocationCount; + m_totalAllocatedBytes -= allocation.size; + + allocation.memory = VK_NULL_HANDLE; + allocation.offset = 0; + allocation.size = 0; + allocation.mappedPtr = nullptr; +} + +void* VulkanMemoryManager::mapMemory(VulkanAllocation& allocation) +{ + if (!m_initialized || allocation.memory == VK_NULL_HANDLE) { + return nullptr; + } + + if (allocation.mappedPtr != nullptr) { + // Already mapped + return allocation.mappedPtr; + } + + // Check if memory is host visible + const auto& memType = m_memoryProperties.memoryTypes[allocation.memoryTypeIndex]; + if (!(memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)) { + mprintf(("Attempted to map non-host-visible memory!\n")); + return nullptr; + } + + try { + allocation.mappedPtr = m_device.mapMemory(allocation.memory, allocation.offset, allocation.size); + return allocation.mappedPtr; + } catch (const vk::SystemError& e) { + mprintf(("Failed to map memory: %s\n", e.what())); + return nullptr; + } +} + +void VulkanMemoryManager::unmapMemory(VulkanAllocation& allocation) +{ + if (!m_initialized || allocation.memory == VK_NULL_HANDLE || allocation.mappedPtr == nullptr) { + return; + } + + m_device.unmapMemory(allocation.memory); + allocation.mappedPtr = nullptr; +} + +void VulkanMemoryManager::flushMemory(const VulkanAllocation& allocation, VkDeviceSize offset, VkDeviceSize size) +{ + if (!m_initialized || allocation.memory == VK_NULL_HANDLE) { + return; + } + + // Check if memory is host coherent - if so, no flush needed + const auto& memType = m_memoryProperties.memoryTypes[allocation.memoryTypeIndex]; + if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) { + return; // Coherent memory doesn't need explicit flushing + } + + vk::MappedMemoryRange range; + range.memory = allocation.memory; + range.offset = allocation.offset + offset; + range.size = (size == VK_WHOLE_SIZE) ? allocation.size : size; + + m_device.flushMappedMemoryRanges(range); +} + +void VulkanMemoryManager::invalidateMemory(const VulkanAllocation& allocation, VkDeviceSize offset, VkDeviceSize size) +{ + if (!m_initialized || allocation.memory == VK_NULL_HANDLE) { + return; + } + + // Check if memory is host coherent - if so, no invalidate needed + const auto& memType = m_memoryProperties.memoryTypes[allocation.memoryTypeIndex]; + if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent) { + return; // Coherent memory doesn't need explicit invalidation + } + + vk::MappedMemoryRange range; + range.memory = allocation.memory; + range.offset = allocation.offset + offset; + range.size = (size == VK_WHOLE_SIZE) ? allocation.size : size; + + m_device.invalidateMappedMemoryRanges(range); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanMemory.h b/code/graphics/vulkan/VulkanMemory.h new file mode 100644 index 00000000000..7d6cbc921f7 --- /dev/null +++ b/code/graphics/vulkan/VulkanMemory.h @@ -0,0 +1,159 @@ +#pragma once + +#include "globalincs/pstypes.h" + +#include + +namespace graphics { +namespace vulkan { + +// Forward declarations +class VulkanRenderer; + +/** + * @brief Memory allocation info returned when allocating GPU memory + */ +struct VulkanAllocation { + VkDeviceMemory memory = VK_NULL_HANDLE; + VkDeviceSize offset = 0; + VkDeviceSize size = 0; + void* mappedPtr = nullptr; // Non-null if memory is mapped + uint32_t memoryTypeIndex = 0; + bool dedicated = false; // True if this is a dedicated allocation +}; + +/** + * @brief Flags for memory allocation requirements + */ +enum class MemoryUsage { + GpuOnly, // Device local, not host visible (fastest for GPU) + CpuToGpu, // Host visible, preferably device local (for uploads) + GpuToCpu, // Host visible, preferably cached (for readbacks) + CpuOnly // Host visible and coherent (for staging) +}; + +/** + * @brief Memory manager for Vulkan GPU memory allocations + * + * This is a simple allocator that creates one VkDeviceMemory per allocation. + * It's designed to be easily replaceable with VMA (Vulkan Memory Allocator) + * in the future for better memory efficiency. + */ +class VulkanMemoryManager { +public: + VulkanMemoryManager(); + ~VulkanMemoryManager(); + + // Non-copyable + VulkanMemoryManager(const VulkanMemoryManager&) = delete; + VulkanMemoryManager& operator=(const VulkanMemoryManager&) = delete; + + /** + * @brief Initialize the memory manager + * @param physicalDevice The physical device to query memory properties from + * @param device The logical device for allocations + * @return true on success + */ + bool init(vk::PhysicalDevice physicalDevice, vk::Device device); + + /** + * @brief Shutdown and free all allocations + */ + void shutdown(); + + /** + * @brief Allocate memory for a buffer + * @param buffer The buffer to allocate memory for + * @param usage The intended memory usage pattern + * @param[out] allocation Output allocation info + * @return true on success + */ + bool allocateBufferMemory(vk::Buffer buffer, MemoryUsage usage, VulkanAllocation& allocation); + + /** + * @brief Allocate memory for an image + * @param image The image to allocate memory for + * @param usage The intended memory usage pattern + * @param[out] allocation Output allocation info + * @return true on success + */ + bool allocateImageMemory(vk::Image image, MemoryUsage usage, VulkanAllocation& allocation); + + /** + * @brief Free a previous allocation + * @param allocation The allocation to free + */ + void freeAllocation(VulkanAllocation& allocation); + + /** + * @brief Map memory for CPU access + * @param allocation The allocation to map + * @return Pointer to mapped memory, or nullptr on failure + */ + void* mapMemory(VulkanAllocation& allocation); + + /** + * @brief Unmap previously mapped memory + * @param allocation The allocation to unmap + */ + void unmapMemory(VulkanAllocation& allocation); + + /** + * @brief Flush mapped memory to make writes visible to GPU + * @param allocation The allocation containing the range to flush + * @param offset Offset within the allocation + * @param size Size of the range to flush (VK_WHOLE_SIZE for entire allocation) + */ + void flushMemory(const VulkanAllocation& allocation, VkDeviceSize offset, VkDeviceSize size); + + /** + * @brief Invalidate mapped memory to make GPU writes visible to CPU + * @param allocation The allocation containing the range to invalidate + * @param offset Offset within the allocation + * @param size Size of the range to invalidate (VK_WHOLE_SIZE for entire allocation) + */ + void invalidateMemory(const VulkanAllocation& allocation, VkDeviceSize offset, VkDeviceSize size); + + /** + * @brief Get memory statistics + */ + size_t getAllocationCount() const { return m_allocationCount; } + size_t getTotalAllocatedBytes() const { return m_totalAllocatedBytes; } + +private: + /** + * @brief Find a suitable memory type index + * @param memoryTypeBits Bitmask of acceptable memory types + * @param requiredFlags Required memory property flags + * @param preferredFlags Preferred memory property flags (optional) + * @param[out] memoryTypeIndex Output memory type index + * @return true if a suitable memory type was found + */ + bool findMemoryType(uint32_t memoryTypeBits, + vk::MemoryPropertyFlags requiredFlags, + vk::MemoryPropertyFlags preferredFlags, + uint32_t& memoryTypeIndex); + + /** + * @brief Convert usage enum to Vulkan memory property flags + */ + void getMemoryFlags(MemoryUsage usage, + vk::MemoryPropertyFlags& requiredFlags, + vk::MemoryPropertyFlags& preferredFlags); + + vk::PhysicalDevice m_physicalDevice; + vk::Device m_device; + vk::PhysicalDeviceMemoryProperties m_memoryProperties; + + size_t m_allocationCount = 0; + size_t m_totalAllocatedBytes = 0; + + bool m_initialized = false; +}; + +// Global memory manager instance (set during renderer init) +VulkanMemoryManager* getMemoryManager(); +void setMemoryManager(VulkanMemoryManager* manager); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanPipeline.cpp b/code/graphics/vulkan/VulkanPipeline.cpp new file mode 100644 index 00000000000..55fba465623 --- /dev/null +++ b/code/graphics/vulkan/VulkanPipeline.cpp @@ -0,0 +1,460 @@ +#include "VulkanPipeline.h" +#include "VulkanRenderState.h" + +#include "cfile/cfile.h" + +namespace graphics { +namespace vulkan { + +// Global pipeline manager pointer +static VulkanPipelineManager* g_pipelineManager = nullptr; + +VulkanPipelineManager* getPipelineManager() +{ + Assertion(g_pipelineManager != nullptr, "Vulkan PipelineManager not initialized!"); + return g_pipelineManager; +} + +void setPipelineManager(VulkanPipelineManager* manager) +{ + g_pipelineManager = manager; +} + +bool PipelineConfig::operator==(const PipelineConfig& other) const +{ + return shaderType == other.shaderType && + vertexLayoutHash == other.vertexLayoutHash && + primitiveType == other.primitiveType && + depthMode == other.depthMode && + blendMode == other.blendMode && + cullEnabled == other.cullEnabled && + frontFaceCW == other.frontFaceCW && + depthWriteEnabled == other.depthWriteEnabled && + stencilEnabled == other.stencilEnabled && + stencilFunc == other.stencilFunc && + stencilMask == other.stencilMask && + frontStencilOp.stencilFailOperation == other.frontStencilOp.stencilFailOperation && + frontStencilOp.depthFailOperation == other.frontStencilOp.depthFailOperation && + frontStencilOp.successOperation == other.frontStencilOp.successOperation && + backStencilOp.stencilFailOperation == other.backStencilOp.stencilFailOperation && + backStencilOp.depthFailOperation == other.backStencilOp.depthFailOperation && + backStencilOp.successOperation == other.backStencilOp.successOperation && + colorWriteMask.x == other.colorWriteMask.x && + colorWriteMask.y == other.colorWriteMask.y && + colorWriteMask.z == other.colorWriteMask.z && + colorWriteMask.w == other.colorWriteMask.w && + fillMode == other.fillMode && + depthBiasEnabled == other.depthBiasEnabled && + renderPass == other.renderPass && + subpass == other.subpass && + colorAttachmentCount == other.colorAttachmentCount && + sampleCount == other.sampleCount && + perAttachmentBlendEnabled == other.perAttachmentBlendEnabled && + [&]() { + if (!perAttachmentBlendEnabled) return true; + for (uint32_t i = 0; i < colorAttachmentCount; ++i) { + if (attachmentBlends[i].blendMode != other.attachmentBlends[i].blendMode || + attachmentBlends[i].writeMask.x != other.attachmentBlends[i].writeMask.x || + attachmentBlends[i].writeMask.y != other.attachmentBlends[i].writeMask.y || + attachmentBlends[i].writeMask.z != other.attachmentBlends[i].writeMask.z || + attachmentBlends[i].writeMask.w != other.attachmentBlends[i].writeMask.w) + return false; + } + return true; + }(); +} + +size_t PipelineConfig::hash() const +{ + size_t h = 0; + + // Combine all fields into hash + h ^= std::hash()(static_cast(shaderType)) << 0; + h ^= std::hash()(vertexLayoutHash) << 8; + h ^= std::hash()(static_cast(primitiveType)) << 12; + h ^= std::hash()(static_cast(depthMode)) << 16; + h ^= std::hash()(static_cast(blendMode)) << 20; + h ^= std::hash()(cullEnabled) << 24; + h ^= std::hash()(frontFaceCW) << 25; + h ^= std::hash()(depthWriteEnabled) << 26; + h ^= std::hash()(stencilEnabled) << 27; + h ^= std::hash()(static_cast(stencilFunc)) << 28; + h ^= std::hash()(stencilMask) << 31; + h ^= std::hash()(static_cast(frontStencilOp.stencilFailOperation)) << 33; + h ^= std::hash()(static_cast(frontStencilOp.depthFailOperation)) << 35; + h ^= std::hash()(static_cast(frontStencilOp.successOperation)) << 37; + h ^= std::hash()(static_cast(backStencilOp.stencilFailOperation)) << 39; + h ^= std::hash()(static_cast(backStencilOp.depthFailOperation)) << 41; + h ^= std::hash()(static_cast(backStencilOp.successOperation)) << 43; + h ^= std::hash()((colorWriteMask.x ? 1 : 0) | (colorWriteMask.y ? 2 : 0) | + (colorWriteMask.z ? 4 : 0) | (colorWriteMask.w ? 8 : 0)) << 44; + h ^= std::hash()(fillMode) << 45; + h ^= std::hash()(depthBiasEnabled) << 46; + h ^= std::hash()(reinterpret_cast(static_cast(renderPass))) << 47; + h ^= std::hash()(subpass) << 51; + h ^= std::hash()(colorAttachmentCount) << 55; + h ^= std::hash()(static_cast(sampleCount)) << 56; + h ^= std::hash()(perAttachmentBlendEnabled) << 57; + if (perAttachmentBlendEnabled) { + for (uint32_t i = 0; i < colorAttachmentCount; ++i) { + h ^= std::hash()(static_cast(attachmentBlends[i].blendMode)) << (i * 3 + 2); + h ^= std::hash()((attachmentBlends[i].writeMask.x ? 1 : 0) | + (attachmentBlends[i].writeMask.y ? 2 : 0) | + (attachmentBlends[i].writeMask.z ? 4 : 0) | + (attachmentBlends[i].writeMask.w ? 8 : 0)) << (i * 3 + 5); + } + } + + return h; +} + +bool VulkanPipelineManager::init(vk::Device device, VulkanShaderManager* shaderManager, + VulkanDescriptorManager* descriptorManager) +{ + if (m_initialized) { + return true; + } + + m_device = device; + m_shaderManager = shaderManager; + m_descriptorManager = descriptorManager; + + // Create empty pipeline cache + vk::PipelineCacheCreateInfo cacheInfo; + m_pipelineCache = m_device.createPipelineCacheUnique(cacheInfo); + + // Create common pipeline layout + createPipelineLayout(); + + m_initialized = true; + mprintf(("VulkanPipelineManager: Initialized\n")); + return true; +} + +void VulkanPipelineManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Wait for device idle + m_device.waitIdle(); + + // Clear all pipelines + m_pipelines.clear(); + m_pipelineLayout.reset(); + m_pipelineCache.reset(); + m_vertexFormatCache.clear(); + + m_initialized = false; + mprintf(("VulkanPipelineManager: Shutdown complete\n")); +} + +vk::Pipeline VulkanPipelineManager::getPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout) +{ + Assertion(m_initialized, "VulkanPipelineManager::getPipeline called before initialization!"); + + // Update vertex layout hash in config + PipelineConfig fullConfig = config; + fullConfig.vertexLayoutHash = vertexLayout.hash(); + + // Check cache + auto it = m_pipelines.find(fullConfig); + if (it != m_pipelines.end()) { + return it->second.get(); + } + + // Create new pipeline + auto pipeline = createPipeline(fullConfig, vertexLayout); + if (!pipeline) { + return {}; + } + + vk::Pipeline result = pipeline.get(); + m_pipelines[fullConfig] = std::move(pipeline); + + nprintf(("Vulkan", "VulkanPipelineManager: Created pipeline for shader type %d (hash 0x%zx)\n", + static_cast(config.shaderType), fullConfig.hash())); + + return result; +} + +bool VulkanPipelineManager::loadPipelineCache(const SCP_string& filename) +{ + // Try to load cache file + CFILE* fp = cfopen(filename.c_str(), "rb", CF_TYPE_CACHE); + if (!fp) { + nprintf(("Vulkan", "VulkanPipelineManager: No pipeline cache file found: %s\n", filename.c_str())); + return false; + } + + // Get file size + int fileSize = cfilelength(fp); + if (fileSize <= 0) { + cfclose(fp); + return false; + } + + // Read cache data + SCP_vector cacheData(fileSize); + if (cfread(cacheData.data(), 1, fileSize, fp) != fileSize) { + cfclose(fp); + return false; + } + cfclose(fp); + + // Create new pipeline cache with data + vk::PipelineCacheCreateInfo cacheInfo; + cacheInfo.initialDataSize = cacheData.size(); + cacheInfo.pInitialData = cacheData.data(); + + try { + auto newCache = m_device.createPipelineCacheUnique(cacheInfo); + m_pipelineCache = std::move(newCache); + mprintf(("VulkanPipelineManager: Loaded pipeline cache: %s (%d bytes)\n", + filename.c_str(), fileSize)); + return true; + } catch (const vk::SystemError& e) { + mprintf(("VulkanPipelineManager: Failed to load pipeline cache: %s\n", e.what())); + return false; + } +} + +bool VulkanPipelineManager::savePipelineCache(const SCP_string& filename) +{ + if (!m_pipelineCache) { + return false; + } + + // Get cache data + auto cacheData = m_device.getPipelineCacheData(m_pipelineCache.get()); + if (cacheData.empty()) { + return false; + } + + // Write to file + CFILE* fp = cfopen(filename.c_str(), "wb", CF_TYPE_CACHE); + if (!fp) { + mprintf(("VulkanPipelineManager: Could not create cache file: %s\n", filename.c_str())); + return false; + } + + bool success = (cfwrite(cacheData.data(), 1, static_cast(cacheData.size()), fp) == + static_cast(cacheData.size())); + cfclose(fp); + + if (success) { + mprintf(("VulkanPipelineManager: Saved pipeline cache: %s (%zu bytes)\n", + filename.c_str(), cacheData.size())); + } + + return success; +} + +bool VulkanPipelineManager::needsFallbackAttribute(const vertex_layout& vertexLayout, shader_type shaderType, + VertexAttributeLocation location) +{ + // Empty layouts (fullscreen triangle etc.) don't use fallbacks + if (vertexLayout.get_num_vertex_components() == 0) return false; + + const VertexInputConfig& config = m_vertexFormatCache.getVertexInputConfig(vertexLayout); + uint32_t bit = 1u << static_cast(location); + + // Layout natively provides this attribute — no fallback needed + if (config.providedInputMask & bit) return false; + + // Fallback needed only if the shader actually consumes this attribute + const VulkanShaderModule* shader = m_shaderManager->getShaderByType(shaderType); + if (shader && shader->vertexInputMask != 0) { + return (shader->vertexInputMask & bit) != 0; + } + return true; +} + +void VulkanPipelineManager::createPipelineLayout() +{ + // Get descriptor set layouts from descriptor manager + auto setLayouts = m_descriptorManager->getAllSetLayouts(); + + // Optional: Define push constant range for frequently-changing data + // For now, we rely entirely on uniform buffers + // vk::PushConstantRange pushConstantRange; + // pushConstantRange.stageFlags = vk::ShaderStageFlagBits::eVertex; + // pushConstantRange.offset = 0; + // pushConstantRange.size = sizeof(mat4); // Example: MVP matrix + + vk::PipelineLayoutCreateInfo layoutInfo; + layoutInfo.setLayoutCount = static_cast(setLayouts.size()); + layoutInfo.pSetLayouts = setLayouts.data(); + layoutInfo.pushConstantRangeCount = 0; + layoutInfo.pPushConstantRanges = nullptr; + + m_pipelineLayout = m_device.createPipelineLayoutUnique(layoutInfo); + + mprintf(("VulkanPipelineManager: Created pipeline layout with %zu descriptor sets\n", + setLayouts.size())); +} + +vk::UniquePipeline VulkanPipelineManager::createPipeline(const PipelineConfig& config, + const vertex_layout& vertexLayout) +{ + // Ensure shader is loaded (lazy creation on first use) + m_shaderManager->maybeCreateShader(config.shaderType, 0); + + // Get shader modules + const VulkanShaderModule* shader = m_shaderManager->getShaderByType(config.shaderType); + if (!shader || !shader->valid) { + mprintf(("VulkanPipelineManager: Shader not available for type %d\n", + static_cast(config.shaderType))); + return {}; + } + + // Debug: Log which shader and vertex layout is being used + mprintf(("VulkanPipelineManager: Creating pipeline for shader type %d (%s)\n", + static_cast(config.shaderType), shader->description.c_str())); + mprintf((" Vertex layout has %zu components:\n", vertexLayout.get_num_vertex_components())); + for (size_t i = 0; i < vertexLayout.get_num_vertex_components(); ++i) { + const vertex_format_data* comp = vertexLayout.get_vertex_component(i); + mprintf((" [%zu] format=%d offset=%zu stride=%zu\n", i, + static_cast(comp->format_type), comp->offset, comp->stride)); + } + + // Shader stages + SCP_vector shaderStages; + + vk::PipelineShaderStageCreateInfo vertStage; + vertStage.stage = vk::ShaderStageFlagBits::eVertex; + vertStage.module = shader->vertexModule.get(); + vertStage.pName = "main"; + shaderStages.push_back(vertStage); + + vk::PipelineShaderStageCreateInfo fragStage; + fragStage.stage = vk::ShaderStageFlagBits::eFragment; + fragStage.module = shader->fragmentModule.get(); + fragStage.pName = "main"; + shaderStages.push_back(fragStage); + + // Vertex input state — filter out attributes the shader doesn't consume. + // The vertex format cache may add fallback color/texcoord attributes that + // shaders like NanoVG don't declare; the SPIR-V compiler strips unused + // inputs, so we must match the pipeline to the actual shader inputs. + VertexInputConfig vertexInputConfig = m_vertexFormatCache.getVertexInputConfig(vertexLayout); + if (shader->vertexInputMask != 0) { + uint32_t mask = shader->vertexInputMask; + auto& attrs = vertexInputConfig.attributes; + SCP_unordered_set usedBindings; + + // Remove attributes at locations the shader doesn't use + attrs.erase(std::remove_if(attrs.begin(), attrs.end(), + [mask](const vk::VertexInputAttributeDescription& a) { + return (mask & (1u << a.location)) == 0; + }), attrs.end()); + + // Collect bindings still referenced by remaining attributes + for (auto& a : attrs) { + usedBindings.insert(a.binding); + } + + // Remove orphaned bindings + auto& binds = vertexInputConfig.bindings; + binds.erase(std::remove_if(binds.begin(), binds.end(), + [&usedBindings](const vk::VertexInputBindingDescription& b) { + return usedBindings.count(b.binding) == 0; + }), binds.end()); + + vertexInputConfig.updatePointers(); + } + + // Input assembly + vk::PipelineInputAssemblyStateCreateInfo inputAssembly; + inputAssembly.topology = convertPrimitiveType(config.primitiveType); + inputAssembly.primitiveRestartEnable = VK_FALSE; + + // Viewport state (dynamic) + vk::PipelineViewportStateCreateInfo viewportState; + viewportState.viewportCount = 1; + viewportState.pViewports = nullptr; // Dynamic + viewportState.scissorCount = 1; + viewportState.pScissors = nullptr; // Dynamic + + // Rasterization state + vk::PipelineRasterizationStateCreateInfo rasterizer = createRasterizationState( + config.cullEnabled, config.fillMode, config.frontFaceCW, config.depthBiasEnabled); + + // Multisample state + vk::PipelineMultisampleStateCreateInfo multisampling; + multisampling.rasterizationSamples = config.sampleCount; + multisampling.sampleShadingEnable = VK_FALSE; + + // Depth stencil state + vk::PipelineDepthStencilStateCreateInfo depthStencil = createDepthStencilState( + config.depthMode, + config.stencilEnabled, + config.stencilFunc, + config.stencilEnabled ? &config.frontStencilOp : nullptr, + config.stencilEnabled ? &config.backStencilOp : nullptr, + config.stencilMask); + + // Override depth write if specified + if (!config.depthWriteEnabled) { + depthStencil.depthWriteEnable = VK_FALSE; + } + + // Color blend state + SCP_vector colorBlendAttachments; + for (uint32_t i = 0; i < config.colorAttachmentCount; ++i) { + if (config.perAttachmentBlendEnabled) { + colorBlendAttachments.push_back(createColorBlendAttachment( + config.attachmentBlends[i].blendMode, config.attachmentBlends[i].writeMask)); + } else { + colorBlendAttachments.push_back(createColorBlendAttachment(config.blendMode, config.colorWriteMask)); + } + } + + vk::PipelineColorBlendStateCreateInfo colorBlending; + colorBlending.logicOpEnable = VK_FALSE; + colorBlending.attachmentCount = static_cast(colorBlendAttachments.size()); + colorBlending.pAttachments = colorBlendAttachments.data(); + + // Dynamic state + std::array dynamicStates = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + vk::DynamicState::eLineWidth, + vk::DynamicState::eDepthBias, + vk::DynamicState::eStencilReference, + }; + + vk::PipelineDynamicStateCreateInfo dynamicState; + dynamicState.dynamicStateCount = static_cast(dynamicStates.size()); + dynamicState.pDynamicStates = dynamicStates.data(); + + // Create pipeline + vk::GraphicsPipelineCreateInfo pipelineInfo; + pipelineInfo.stageCount = static_cast(shaderStages.size()); + pipelineInfo.pStages = shaderStages.data(); + pipelineInfo.pVertexInputState = &vertexInputConfig.createInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizer; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.pDynamicState = &dynamicState; + pipelineInfo.layout = m_pipelineLayout.get(); + pipelineInfo.renderPass = config.renderPass; + pipelineInfo.subpass = config.subpass; + pipelineInfo.basePipelineHandle = nullptr; + pipelineInfo.basePipelineIndex = -1; + + try { + auto result = m_device.createGraphicsPipelineUnique(m_pipelineCache.get(), pipelineInfo); + return std::move(result.value); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPipelineManager: Failed to create pipeline: %s\n", e.what())); + return {}; + } +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanPipeline.h b/code/graphics/vulkan/VulkanPipeline.h new file mode 100644 index 00000000000..5a55314bf3c --- /dev/null +++ b/code/graphics/vulkan/VulkanPipeline.h @@ -0,0 +1,192 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include "VulkanShader.h" +#include "VulkanVertexFormat.h" +#include "VulkanDescriptorManager.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Pipeline configuration key + * + * All state that affects pipeline creation. Two configurations with the + * same values will produce identical pipelines. + */ +struct PipelineConfig { + // Shader identification + shader_type shaderType = SDR_TYPE_NONE; + + // Vertex format + size_t vertexLayoutHash = 0; + + // Render state + primitive_type primitiveType = PRIM_TYPE_TRIS; + gr_zbuffer_type depthMode = ZBUFFER_TYPE_NONE; + gr_alpha_blend blendMode = ALPHA_BLEND_NONE; + bool cullEnabled = true; + bool frontFaceCW = false; // Match OpenGL default (CCW); models override to CW + bool depthWriteEnabled = true; + + // Stencil state + bool stencilEnabled = false; + ComparisionFunction stencilFunc = ComparisionFunction::Always; + uint32_t stencilMask = 0xFF; + material::StencilOp frontStencilOp; + material::StencilOp backStencilOp; + + // Fill mode (0 = solid, 1 = wireframe) + int fillMode = 0; + + // Depth bias + bool depthBiasEnabled = false; + + // Color write mask + bvec4 colorWriteMask = {true, true, true, true}; + + // Render pass compatibility + vk::RenderPass renderPass; + uint32_t subpass = 0; + + // Color attachment count (for multiple render targets) + uint32_t colorAttachmentCount = 1; + + // MSAA sample count (default e1 = no multisampling) + vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1; + + // Per-attachment blend (used by decal rendering to write-mask unused G-buffer attachments) + bool perAttachmentBlendEnabled = false; + struct AttachmentBlend { + gr_alpha_blend blendMode = ALPHA_BLEND_NONE; + bvec4 writeMask = {true, true, true, true}; + }; + static constexpr uint32_t MAX_COLOR_ATTACHMENTS = 8; + AttachmentBlend attachmentBlends[MAX_COLOR_ATTACHMENTS]; + + bool operator==(const PipelineConfig& other) const; + size_t hash() const; +}; + +struct PipelineConfigHasher { + size_t operator()(const PipelineConfig& config) const { + return config.hash(); + } +}; + +/** + * @brief Manages Vulkan graphics pipelines + * + * Creates and caches pipelines based on configuration. Uses VkPipelineCache + * for driver-level caching and an application-level cache for fast lookups. + */ +class VulkanPipelineManager { +public: + VulkanPipelineManager() = default; + ~VulkanPipelineManager() = default; + + // Non-copyable + VulkanPipelineManager(const VulkanPipelineManager&) = delete; + VulkanPipelineManager& operator=(const VulkanPipelineManager&) = delete; + + /** + * @brief Initialize the pipeline manager + * @param device Vulkan logical device + * @param shaderManager Shader manager for loading shader modules + * @param descriptorManager Descriptor manager for set layouts + * @return true on success + */ + bool init(vk::Device device, VulkanShaderManager* shaderManager, + VulkanDescriptorManager* descriptorManager); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Get or create a pipeline for the given configuration + * @param config Pipeline configuration + * @param vertexLayout Vertex layout for the pipeline + * @return Pipeline handle, or null handle on failure + */ + vk::Pipeline getPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout); + + /** + * @brief Get the common pipeline layout + * + * All pipelines share the same pipeline layout (descriptor set layouts + * and push constant ranges). + */ + vk::PipelineLayout getPipelineLayout() const { return m_pipelineLayout.get(); } + + /** + * @brief Load pipeline cache from file + * @param filename Cache file path + * @return true if cache was loaded + */ + bool loadPipelineCache(const SCP_string& filename); + + /** + * @brief Save pipeline cache to file + * @param filename Cache file path + * @return true if cache was saved + */ + bool savePipelineCache(const SCP_string& filename); + + /** + * @brief Get number of cached pipelines + */ + size_t getPipelineCount() const { return m_pipelines.size(); } + + /** + * @brief Check if a draw needs a fallback buffer for a given vertex attribute + * @param vertexLayout The vertex layout to check + * @param shaderType The shader being used (checked against vertexInputMask) + * @param location The vertex attribute location to check + * @return true if the layout doesn't provide this attribute AND the shader consumes it + */ + bool needsFallbackAttribute(const vertex_layout& vertexLayout, shader_type shaderType, + VertexAttributeLocation location); + +private: + /** + * @brief Create the common pipeline layout + */ + void createPipelineLayout(); + + /** + * @brief Create a new pipeline + */ + vk::UniquePipeline createPipeline(const PipelineConfig& config, const vertex_layout& vertexLayout); + + vk::Device m_device; + VulkanShaderManager* m_shaderManager = nullptr; + VulkanDescriptorManager* m_descriptorManager = nullptr; + + // Common pipeline layout (shared by all pipelines) + vk::UniquePipelineLayout m_pipelineLayout; + + // Driver-level pipeline cache + vk::UniquePipelineCache m_pipelineCache; + + // Application-level pipeline cache: config -> pipeline + SCP_unordered_map m_pipelines; + + // Vertex format cache + VulkanVertexFormatCache m_vertexFormatCache; + + bool m_initialized = false; +}; + +// Global pipeline manager access +VulkanPipelineManager* getPipelineManager(); +void setPipelineManager(VulkanPipelineManager* manager); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanPostProcessing.cpp b/code/graphics/vulkan/VulkanPostProcessing.cpp new file mode 100644 index 00000000000..fe79b8625b2 --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessing.cpp @@ -0,0 +1,5385 @@ +#include "VulkanPostProcessing.h" + +#include + +#include "cmdline/cmdline.h" +#include "gr_vulkan.h" +#include "VulkanRenderer.h" +#include "VulkanBuffer.h" +#include "VulkanDeletionQueue.h" +#include "VulkanTexture.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanDescriptorManager.h" +#include "graphics/util/uniform_structs.h" +#include "graphics/util/primitives.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "graphics/light.h" +#include "graphics/matrix.h" +#include "graphics/shadows.h" +#include "graphics/2d.h" +#include "bmpman/bmpman.h" +#include "io/timer.h" +#include "lighting/lighting_profiles.h" +#include "lighting/lighting.h" +#include "math/floating.h" +#include "math/vecmat.h" +#include "render/3d.h" +#include "tracing/tracing.h" +#include "utils/Random.h" +#include "nebula/neb.h" +#include "nebula/volumetrics.h" +#include "mission/missionparse.h" + +extern float Sun_spot; +extern int Game_subspace_effect; +extern SCP_vector Lights; +extern int Num_lights; + +namespace graphics { +namespace vulkan { + +// Global post-processor pointer +static VulkanPostProcessor* g_postProcessor = nullptr; + +VulkanPostProcessor* getPostProcessor() +{ + return g_postProcessor; +} + +void setPostProcessor(VulkanPostProcessor* pp) +{ + g_postProcessor = pp; +} + +bool VulkanPostProcessor::init(vk::Device device, vk::PhysicalDevice physDevice, + VulkanMemoryManager* memMgr, vk::Extent2D extent, + vk::Format depthFormat) +{ + if (m_initialized) { + return true; + } + + m_device = device; + m_memoryManager = memMgr; + m_extent = extent; + m_depthFormat = depthFormat; + + // Verify RGBA16F support for color attachment + sampling + { + vk::FormatProperties props = physDevice.getFormatProperties(vk::Format::eR16G16B16A16Sfloat); + if (!(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eColorAttachment) || + !(props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eSampledImage)) { + mprintf(("VulkanPostProcessor: RGBA16F not supported for color attachment + sampling!\n")); + return false; + } + } + + // Create HDR scene color target (RGBA16F) + // eTransferSrc needed for copy_effect_texture (mid-scene snapshot) + // eTransferDst needed for deferred_lighting_finish (emissive→color copy) + if (!createImage(extent.width, extent.height, vk::Format::eR16G16B16A16Sfloat, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + vk::ImageAspectFlagBits::eColor, + m_sceneColor.image, m_sceneColor.view, m_sceneColor.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene color image!\n")); + return false; + } + m_sceneColor.format = vk::Format::eR16G16B16A16Sfloat; + m_sceneColor.width = extent.width; + m_sceneColor.height = extent.height; + + // Create scene depth target + vk::ImageAspectFlags depthAspect = vk::ImageAspectFlagBits::eDepth; + if (depthFormat == vk::Format::eD24UnormS8Uint || depthFormat == vk::Format::eD32SfloatS8Uint) { + depthAspect |= vk::ImageAspectFlagBits::eStencil; + } + + if (!createImage(extent.width, extent.height, depthFormat, + vk::ImageUsageFlagBits::eDepthStencilAttachment + | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc, + vk::ImageAspectFlagBits::eDepth, // View uses depth-only aspect + m_sceneDepth.image, m_sceneDepth.view, m_sceneDepth.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene depth image!\n")); + shutdown(); + return false; + } + m_sceneDepth.format = depthFormat; + m_sceneDepth.width = extent.width; + m_sceneDepth.height = extent.height; + + // Create effect/composite texture (RGBA16F, snapshot of scene color for distortion/soft particles) + if (!createImage(extent.width, extent.height, vk::Format::eR16G16B16A16Sfloat, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneEffect.image, m_sceneEffect.view, m_sceneEffect.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene effect image!\n")); + shutdown(); + return false; + } + m_sceneEffect.format = vk::Format::eR16G16B16A16Sfloat; + m_sceneEffect.width = extent.width; + m_sceneEffect.height = extent.height; + + // Create scene depth copy (samplable copy for soft particles) + // Same depth format, usage: eTransferDst (copy target) + eSampled (fragment shader reads) + if (!createImage(extent.width, extent.height, depthFormat, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eDepth, + m_sceneDepthCopy.image, m_sceneDepthCopy.view, m_sceneDepthCopy.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create scene depth copy image!\n")); + shutdown(); + return false; + } + m_sceneDepthCopy.format = depthFormat; + m_sceneDepthCopy.width = extent.width; + m_sceneDepthCopy.height = extent.height; + + // Create HDR scene render pass + // Attachment 0: Color (RGBA16F) + // loadOp=eClear: clear to black each frame + // finalLayout=eShaderReadOnlyOptimal: ready for post-processing sampling + // Attachment 1: Depth + // loadOp=eClear: clear to far plane + // finalLayout=eDepthStencilAttachmentOptimal + { + std::array attachments; + + // Color + attachments[0].format = vk::Format::eR16G16B16A16Sfloat; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eClear; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eUndefined; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth — storeOp=eStore required for: + // 1. copy_effect_texture mid-scene interruption (depth must survive render pass end/resume) + // 2. lightshafts pass (samples scene depth after render pass ends) + attachments[1].format = depthFormat; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eClear; + attachments[1].storeOp = vk::AttachmentStoreOp::eStore; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eClear; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[1].initialLayout = vk::ImageLayout::eUndefined; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + // Dependency: external → subpass 0 + // Includes eTransfer in srcStageMask so this render pass is compatible with + // m_sceneRenderPassLoad (which follows copy_effect_texture transfer ops). + // Vulkan requires render passes sharing a framebuffer to have identical dependencies. + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_sceneRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene render pass: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create scene render pass with loadOp=eLoad (for resuming after copy_effect_texture) + // Compatible with m_sceneRenderPass (same formats/samples) so shares the same framebuffer + { + std::array attachments; + + // Color — load existing content, keep final layout for post-processing + attachments[0].format = vk::Format::eR16G16B16A16Sfloat; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth — load existing content + attachments[1].format = depthFormat; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[1].storeOp = vk::AttachmentStoreOp::eStore; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eLoad; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[1].initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + // Must match m_sceneRenderPass dependency exactly for render pass compatibility + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_sceneRenderPassLoad = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene load render pass: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create scene framebuffer + { + std::array fbAttachments = {m_sceneColor.view, m_sceneDepth.view}; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_sceneRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = extent.width; + fbInfo.height = extent.height; + fbInfo.layers = 1; + + try { + m_sceneFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene framebuffer: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create linear sampler for post-processing texture reads + { + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = 0.0f; + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_linearSampler = m_device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sampler: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create mipmap sampler for bloom textures (supports textureLod) + { + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = static_cast(MAX_MIP_BLUR_LEVELS); + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_mipmapSampler = m_device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmap sampler: %s\n", e.what())); + shutdown(); + return false; + } + } + + // Create persistent UBO for tonemapping parameters + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = sizeof(graphics::generic_data::tonemapping_data); + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_tonemapUBO = m_device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create tonemap UBO: %s\n", e.what())); + shutdown(); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_tonemapUBO, MemoryUsage::CpuToGpu, m_tonemapUBOAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate tonemap UBO memory!\n")); + m_device.destroyBuffer(m_tonemapUBO); + m_tonemapUBO = nullptr; + shutdown(); + return false; + } + + // Write default passthrough tonemapping data (linear, exposure=1.0) + auto* mapped = static_cast(m_memoryManager->mapMemory(m_tonemapUBOAlloc)); + if (mapped) { + memset(mapped, 0, sizeof(graphics::generic_data::tonemapping_data)); + mapped->exposure = 1.0f; + mapped->tonemapper = 0; // Linear + m_memoryManager->unmapMemory(m_tonemapUBOAlloc); + } + } + + // Initialize bloom resources (non-fatal if it fails) + if (!initBloom()) { + mprintf(("VulkanPostProcessor: Bloom initialization failed (non-fatal)\n")); + } + + // Initialize LDR targets for tonemapping + FXAA (non-fatal if it fails) + if (!initLDRTargets()) { + mprintf(("VulkanPostProcessor: LDR target initialization failed (non-fatal)\n")); + } + + // Initialize distortion ping-pong textures (non-fatal if it fails) + { + bool distOk = true; + for (int i = 0; i < 2; i++) { + if (!createImage(32, 32, vk::Format::eR8G8B8A8Unorm, + vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst + | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_distortionTex[i].image, m_distortionTex[i].view, + m_distortionTex[i].allocation)) { + mprintf(("VulkanPostProcessor: Failed to create distortion texture %d\n", i)); + distOk = false; + break; + } + m_distortionTex[i].format = vk::Format::eR8G8B8A8Unorm; + m_distortionTex[i].width = 32; + m_distortionTex[i].height = 32; + } + + if (distOk) { + // Create LINEAR/REPEAT sampler for distortion textures + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eNearest; + samplerInfo.addressModeU = vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeV = vk::SamplerAddressMode::eRepeat; + samplerInfo.addressModeW = vk::SamplerAddressMode::eRepeat; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.anisotropyEnable = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = 0.0f; + samplerInfo.borderColor = vk::BorderColor::eFloatOpaqueBlack; + + try { + m_distortionSampler = m_device.createSampler(samplerInfo); + m_distortionInitialized = true; + mprintf(("VulkanPostProcessor: Distortion textures initialized\n")); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create distortion sampler: %s\n", e.what())); + } + } + } + + // Initialize G-buffer for deferred lighting (non-fatal) + if (!initGBuffer()) { + mprintf(("VulkanPostProcessor: G-buffer initialization failed (non-fatal)\n")); + } + + // Initialize MSAA resources if MSAA is enabled and G-buffer is ready + if (m_gbufInitialized && Cmdline_msaa_enabled > 0) { + if (!initMSAA()) { + mprintf(("VulkanPostProcessor: MSAA initialization failed (non-fatal, disabling MSAA)\n")); + Cmdline_msaa_enabled = 0; + } + } + + m_initialized = true; + mprintf(("VulkanPostProcessor: Initialized (%ux%u, RGBA16F scene color)\n", + extent.width, extent.height)); + return true; +} + +void VulkanPostProcessor::shutdown() +{ + if (m_device) { + m_device.waitIdle(); + + shutdownFogPass(); + shutdownShadowPass(); + shutdownMSAA(); + shutdownLightVolumes(); + shutdownGBuffer(); + shutdownLDRTargets(); + shutdownBloom(); + + if (m_mipmapSampler) { + m_device.destroySampler(m_mipmapSampler); + m_mipmapSampler = nullptr; + } + + if (m_tonemapUBO) { + m_device.destroyBuffer(m_tonemapUBO); + m_tonemapUBO = nullptr; + } + if (m_tonemapUBOAlloc.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_tonemapUBOAlloc); + } + + if (m_linearSampler) { + m_device.destroySampler(m_linearSampler); + m_linearSampler = nullptr; + } + if (m_sceneFramebuffer) { + m_device.destroyFramebuffer(m_sceneFramebuffer); + m_sceneFramebuffer = nullptr; + } + if (m_sceneRenderPassLoad) { + m_device.destroyRenderPass(m_sceneRenderPassLoad); + m_sceneRenderPassLoad = nullptr; + } + if (m_sceneRenderPass) { + m_device.destroyRenderPass(m_sceneRenderPass); + m_sceneRenderPass = nullptr; + } + + // Destroy scene effect/composite target + if (m_sceneEffect.view) { + m_device.destroyImageView(m_sceneEffect.view); + m_sceneEffect.view = nullptr; + } + if (m_sceneEffect.image) { + m_device.destroyImage(m_sceneEffect.image); + m_sceneEffect.image = nullptr; + } + if (m_sceneEffect.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneEffect.allocation); + } + + // Destroy scene color target + if (m_sceneColor.view) { + m_device.destroyImageView(m_sceneColor.view); + m_sceneColor.view = nullptr; + } + if (m_sceneColor.image) { + m_device.destroyImage(m_sceneColor.image); + m_sceneColor.image = nullptr; + } + if (m_sceneColor.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneColor.allocation); + } + + // Destroy scene depth target + if (m_sceneDepth.view) { + m_device.destroyImageView(m_sceneDepth.view); + m_sceneDepth.view = nullptr; + } + if (m_sceneDepth.image) { + m_device.destroyImage(m_sceneDepth.image); + m_sceneDepth.image = nullptr; + } + if (m_sceneDepth.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneDepth.allocation); + } + + // Destroy scene depth copy target + if (m_sceneDepthCopy.view) { + m_device.destroyImageView(m_sceneDepthCopy.view); + m_sceneDepthCopy.view = nullptr; + } + if (m_sceneDepthCopy.image) { + m_device.destroyImage(m_sceneDepthCopy.image); + m_sceneDepthCopy.image = nullptr; + } + if (m_sceneDepthCopy.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneDepthCopy.allocation); + } + + // Destroy distortion textures + if (m_distortionSampler) { + m_device.destroySampler(m_distortionSampler); + m_distortionSampler = nullptr; + } + for (int i = 0; i < 2; i++) { + if (m_distortionTex[i].view) { + m_device.destroyImageView(m_distortionTex[i].view); + m_distortionTex[i].view = nullptr; + } + if (m_distortionTex[i].image) { + m_device.destroyImage(m_distortionTex[i].image); + m_distortionTex[i].image = nullptr; + } + if (m_distortionTex[i].allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_distortionTex[i].allocation); + } + } + m_distortionInitialized = false; + } + + m_initialized = false; +} + +void VulkanPostProcessor::updateTonemappingUBO() +{ + if (!m_tonemapUBO || !m_memoryManager) { + return; + } + + namespace ltp = lighting_profiles; + + auto* mapped = static_cast( + m_memoryManager->mapMemory(m_tonemapUBOAlloc)); + if (mapped) { + auto ppc = ltp::current_piecewise_intermediates(); + mapped->exposure = ltp::current_exposure(); + mapped->tonemapper = static_cast(ltp::current_tonemapper()); + mapped->x0 = ppc.x0; + mapped->y0 = ppc.y0; + mapped->x1 = ppc.x1; + mapped->toe_B = ppc.toe_B; + mapped->toe_lnA = ppc.toe_lnA; + mapped->sh_B = ppc.sh_B; + mapped->sh_lnA = ppc.sh_lnA; + mapped->sh_offsetX = ppc.sh_offsetX; + mapped->sh_offsetY = ppc.sh_offsetY; + mapped->linearOut = 0; // Apply sRGB conversion (HDR → swap chain) + m_memoryManager->unmapMemory(m_tonemapUBOAlloc); + } +} + +// ===== G-Buffer (Deferred Lighting) Implementation ===== + +bool VulkanPostProcessor::initGBuffer() +{ + if (m_gbufInitialized) { + return true; + } + + const uint32_t w = m_extent.width; + const uint32_t h = m_extent.height; + const vk::ImageUsageFlags gbufUsage = + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst; + + // Create G-buffer images (position, normal, specular, emissive, composite) + struct GbufTarget { + RenderTarget* target; + vk::Format format; + const char* name; + }; + + GbufTarget targets[] = { + {&m_gbufPosition, vk::Format::eR16G16B16A16Sfloat, "position"}, + {&m_gbufNormal, vk::Format::eR16G16B16A16Sfloat, "normal"}, + {&m_gbufSpecular, vk::Format::eR8G8B8A8Unorm, "specular"}, + {&m_gbufEmissive, vk::Format::eR16G16B16A16Sfloat, "emissive"}, + {&m_gbufComposite, vk::Format::eR16G16B16A16Sfloat, "composite"}, + }; + + for (auto& t : targets) { + if (!createImage(w, h, t.format, gbufUsage, vk::ImageAspectFlagBits::eColor, + t.target->image, t.target->view, t.target->allocation)) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer %s image!\n", t.name)); + shutdownGBuffer(); + return false; + } + t.target->format = t.format; + t.target->width = w; + t.target->height = h; + } + + // Create samplable copy of G-buffer normal (for decal angle rejection) + { + vk::ImageUsageFlags copyUsage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst; + if (!createImage(w, h, vk::Format::eR16G16B16A16Sfloat, copyUsage, + vk::ImageAspectFlagBits::eColor, + m_gbufNormalCopy.image, m_gbufNormalCopy.view, m_gbufNormalCopy.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer normal copy!\n")); + shutdownGBuffer(); + return false; + } + m_gbufNormalCopy.format = vk::Format::eR16G16B16A16Sfloat; + m_gbufNormalCopy.width = w; + m_gbufNormalCopy.height = h; + } + + // Create G-buffer render pass (eClear) — 6 color + depth + // Attachment order: [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=composite, [6]=depth + { + std::array attachments; + + // Formats for the 6 color attachments + vk::Format colorFormats[6] = { + vk::Format::eR16G16B16A16Sfloat, // 0: color (scene color) + vk::Format::eR16G16B16A16Sfloat, // 1: position + vk::Format::eR16G16B16A16Sfloat, // 2: normal + vk::Format::eR8G8B8A8Unorm, // 3: specular + vk::Format::eR16G16B16A16Sfloat, // 4: emissive + vk::Format::eR16G16B16A16Sfloat, // 5: composite + }; + + for (uint32_t i = 0; i < 6; ++i) { + attachments[i].format = colorFormats[i]; + attachments[i].samples = vk::SampleCountFlagBits::e1; + attachments[i].loadOp = vk::AttachmentLoadOp::eClear; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = vk::ImageLayout::eUndefined; + attachments[i].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + // Depth + attachments[6].format = m_depthFormat; + attachments[6].samples = vk::SampleCountFlagBits::e1; + attachments[6].loadOp = vk::AttachmentLoadOp::eClear; + attachments[6].storeOp = vk::AttachmentStoreOp::eStore; + attachments[6].stencilLoadOp = vk::AttachmentLoadOp::eClear; + attachments[6].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[6].initialLayout = vk::ImageLayout::eUndefined; + attachments[6].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < 6; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = 6; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 6; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + // Dependency matching the scene render pass (for render pass compatibility) + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead + | vk::AccessFlagBits::eTransferWrite; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_gbufRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer render pass: %s\n", e.what())); + shutdownGBuffer(); + return false; + } + } + + // Create G-buffer render pass (eLoad) — for resuming after mid-pass copies + { + std::array attachments; + + vk::Format colorFormats[6] = { + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR8G8B8A8Unorm, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + }; + + for (uint32_t i = 0; i < 6; ++i) { + attachments[i].format = colorFormats[i]; + attachments[i].samples = vk::SampleCountFlagBits::e1; + attachments[i].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + attachments[i].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + // Depth + attachments[6].format = m_depthFormat; + attachments[6].samples = vk::SampleCountFlagBits::e1; + attachments[6].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[6].storeOp = vk::AttachmentStoreOp::eStore; + attachments[6].stencilLoadOp = vk::AttachmentLoadOp::eLoad; + attachments[6].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[6].initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + attachments[6].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < 6; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = 6; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 6; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + // Must match eClear pass dependency for compatibility + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead + | vk::AccessFlagBits::eTransferWrite; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_gbufRenderPassLoad = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer load render pass: %s\n", e.what())); + shutdownGBuffer(); + return false; + } + } + + // Create G-buffer framebuffer (6 color + depth) + { + std::array fbAttachments = { + m_sceneColor.view, // 0: color (shared with scene framebuffer) + m_gbufPosition.view, // 1: position + m_gbufNormal.view, // 2: normal + m_gbufSpecular.view, // 3: specular + m_gbufEmissive.view, // 4: emissive + m_gbufComposite.view, // 5: composite + m_sceneDepth.view, // 6: depth (shared with scene framebuffer) + }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_gbufRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = w; + fbInfo.height = h; + fbInfo.layers = 1; + + try { + m_gbufFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create G-buffer framebuffer: %s\n", e.what())); + shutdownGBuffer(); + return false; + } + } + + m_gbufInitialized = true; + mprintf(("VulkanPostProcessor: G-buffer initialized (%ux%u, 6 color + depth)\n", w, h)); + return true; +} + +void VulkanPostProcessor::shutdownGBuffer() +{ + if (!m_device) { + return; + } + + if (m_gbufFramebuffer) { + m_device.destroyFramebuffer(m_gbufFramebuffer); + m_gbufFramebuffer = nullptr; + } + if (m_gbufRenderPassLoad) { + m_device.destroyRenderPass(m_gbufRenderPassLoad); + m_gbufRenderPassLoad = nullptr; + } + if (m_gbufRenderPass) { + m_device.destroyRenderPass(m_gbufRenderPass); + m_gbufRenderPass = nullptr; + } + + RenderTarget* gbufTargets[] = { + &m_gbufPosition, &m_gbufNormal, &m_gbufSpecular, + &m_gbufEmissive, &m_gbufComposite, &m_gbufNormalCopy, + }; + for (auto* rt : gbufTargets) { + if (rt->view) { + m_device.destroyImageView(rt->view); + rt->view = nullptr; + } + if (rt->image) { + m_device.destroyImage(rt->image); + rt->image = nullptr; + } + if (rt->allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(rt->allocation); + } + } + + m_gbufInitialized = false; +} + +void VulkanPostProcessor::transitionGbufForResume(vk::CommandBuffer cmd) +{ + if (!m_gbufInitialized) { + return; + } + + // After ending the G-buffer render pass, color attachments 1-5 are in + // eShaderReadOnlyOptimal (from finalLayout). The eLoad pass expects + // eColorAttachmentOptimal. Transition them in a single barrier batch. + vk::Image gbufImages[5] = { + m_gbufPosition.image, + m_gbufNormal.image, + m_gbufSpecular.image, + m_gbufEmissive.image, + m_gbufComposite.image, + }; + + std::array barriers; + for (int i = 0; i < 5; ++i) { + barriers[i].srcAccessMask = {}; + barriers[i].dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barriers[i].oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[i].newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[i].image = gbufImages[i]; + barriers[i].subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barriers[i].subresourceRange.baseMipLevel = 0; + barriers[i].subresourceRange.levelCount = 1; + barriers[i].subresourceRange.baseArrayLayer = 0; + barriers[i].subresourceRange.layerCount = 1; + } + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barriers); +} + +// ===== MSAA G-Buffer ===== + +bool VulkanPostProcessor::initMSAA() +{ + if (m_msaaInitialized) { + return true; + } + + auto* renderer = getRendererInstance(); + vk::SampleCountFlagBits msaaSamples = renderer->getMsaaSampleCount(); + if (msaaSamples == vk::SampleCountFlagBits::e1) { + return false; + } + + const uint32_t w = m_extent.width; + const uint32_t h = m_extent.height; + const vk::ImageUsageFlags msaaUsage = + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled; + + // Create MSAA color images (5 total: color, position, normal, specular, emissive) + struct MsaaTarget { + RenderTarget* target; + vk::Format format; + const char* name; + }; + + MsaaTarget targets[] = { + {&m_msaaColor, vk::Format::eR16G16B16A16Sfloat, "msaa-color"}, + {&m_msaaPosition, vk::Format::eR16G16B16A16Sfloat, "msaa-position"}, + {&m_msaaNormal, vk::Format::eR16G16B16A16Sfloat, "msaa-normal"}, + {&m_msaaSpecular, vk::Format::eR8G8B8A8Unorm, "msaa-specular"}, + {&m_msaaEmissive, vk::Format::eR16G16B16A16Sfloat, "msaa-emissive"}, + }; + + for (auto& t : targets) { + if (!createImage(w, h, t.format, msaaUsage, vk::ImageAspectFlagBits::eColor, + t.target->image, t.target->view, t.target->allocation, msaaSamples)) { + mprintf(("VulkanPostProcessor: Failed to create %s image!\n", t.name)); + shutdownMSAA(); + return false; + } + t.target->format = t.format; + t.target->width = w; + t.target->height = h; + } + + // Create MSAA depth image + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = m_depthFormat; + imageInfo.extent = vk::Extent3D(w, h, 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = msaaSamples; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment | vk::ImageUsageFlagBits::eSampled; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_msaaDepthImage = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA depth image: %s\n", e.what())); + shutdownMSAA(); + return false; + } + + if (!m_memoryManager->allocateImageMemory(m_msaaDepthImage, MemoryUsage::GpuOnly, m_msaaDepthAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate MSAA depth memory!\n")); + m_device.destroyImage(m_msaaDepthImage); + m_msaaDepthImage = nullptr; + shutdownMSAA(); + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_msaaDepthImage; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = m_depthFormat; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + m_msaaDepthView = m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA depth view: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // MSAA G-buffer render pass (eClear) — 5 color + depth + // Attachment order: [0]=color(MS), [1]=pos(MS), [2]=norm(MS), [3]=spec(MS), [4]=emissive(MS), [5]=depth(MS) + { + std::array attachments; + + vk::Format colorFormats[5] = { + vk::Format::eR16G16B16A16Sfloat, // 0: color + vk::Format::eR16G16B16A16Sfloat, // 1: position + vk::Format::eR16G16B16A16Sfloat, // 2: normal + vk::Format::eR8G8B8A8Unorm, // 3: specular + vk::Format::eR16G16B16A16Sfloat, // 4: emissive + }; + + for (uint32_t i = 0; i < 5; ++i) { + attachments[i].format = colorFormats[i]; + attachments[i].samples = msaaSamples; + attachments[i].loadOp = vk::AttachmentLoadOp::eClear; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + attachments[i].finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + } + + // Depth (MS) + attachments[5].format = m_depthFormat; + attachments[5].samples = msaaSamples; + attachments[5].loadOp = vk::AttachmentLoadOp::eClear; + attachments[5].storeOp = vk::AttachmentStoreOp::eStore; + attachments[5].stencilLoadOp = vk::AttachmentLoadOp::eClear; + attachments[5].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[5].initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + attachments[5].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < 5; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = 5; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 5; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead + | vk::AccessFlagBits::eTransferWrite; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_msaaGbufRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer render pass: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // MSAA G-buffer render pass (eLoad) — emissive preserving variant + // All attachments eLoad except we accept eColorAttachmentOptimal as initial layout + { + std::array attachments; + + vk::Format colorFormats[5] = { + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eR8G8B8A8Unorm, + vk::Format::eR16G16B16A16Sfloat, + }; + + for (uint32_t i = 0; i < 5; ++i) { + attachments[i].format = colorFormats[i]; + attachments[i].samples = msaaSamples; + attachments[i].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + attachments[i].finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + } + + attachments[5].format = m_depthFormat; + attachments[5].samples = msaaSamples; + attachments[5].loadOp = vk::AttachmentLoadOp::eLoad; + attachments[5].storeOp = vk::AttachmentStoreOp::eStore; + attachments[5].stencilLoadOp = vk::AttachmentLoadOp::eLoad; + attachments[5].stencilStoreOp = vk::AttachmentStoreOp::eStore; + attachments[5].initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + attachments[5].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < 5; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = 5; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 5; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests + | vk::PipelineStageFlagBits::eTransfer; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eTransferRead + | vk::AccessFlagBits::eTransferWrite; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_msaaGbufRenderPassLoad = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer load render pass: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // MSAA G-buffer framebuffer (5 color + depth) + { + std::array fbAttachments = { + m_msaaColor.view, + m_msaaPosition.view, + m_msaaNormal.view, + m_msaaSpecular.view, + m_msaaEmissive.view, + m_msaaDepthView, + }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_msaaGbufRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = w; + fbInfo.height = h; + fbInfo.layers = 1; + + try { + m_msaaGbufFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA G-buffer framebuffer: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // Emissive copy render pass — 1 MS color attachment for upsampling non-MSAA → MSAA + { + vk::AttachmentDescription att; + att.format = vk::Format::eR16G16B16A16Sfloat; + att.samples = msaaSamples; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; + dependency.srcAccessMask = vk::AccessFlagBits::eShaderRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_msaaEmissiveCopyRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA emissive copy render pass: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // Emissive copy framebuffer (MSAA emissive as sole attachment) + { + vk::ImageView att = m_msaaEmissive.view; + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_msaaEmissiveCopyRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &att; + fbInfo.width = w; + fbInfo.height = h; + fbInfo.layers = 1; + + try { + m_msaaEmissiveCopyFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA emissive copy framebuffer: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // MSAA Resolve render pass — 5 non-MSAA color + depth (via gl_FragDepth) + // Writes to the non-MSAA G-buffer images. loadOp=eDontCare (fully overwritten). + { + std::array attachments; + + vk::Format colorFormats[5] = { + vk::Format::eR16G16B16A16Sfloat, // 0: color + vk::Format::eR16G16B16A16Sfloat, // 1: position + vk::Format::eR16G16B16A16Sfloat, // 2: normal + vk::Format::eR8G8B8A8Unorm, // 3: specular + vk::Format::eR16G16B16A16Sfloat, // 4: emissive + }; + + for (uint32_t i = 0; i < 5; ++i) { + attachments[i].format = colorFormats[i]; + attachments[i].samples = vk::SampleCountFlagBits::e1; + attachments[i].loadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].storeOp = vk::AttachmentStoreOp::eStore; + attachments[i].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[i].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[i].initialLayout = vk::ImageLayout::eUndefined; + attachments[i].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + // Depth (non-MSAA, written via gl_FragDepth) + attachments[5].format = m_depthFormat; + attachments[5].samples = vk::SampleCountFlagBits::e1; + attachments[5].loadOp = vk::AttachmentLoadOp::eDontCare; + attachments[5].storeOp = vk::AttachmentStoreOp::eStore; + attachments[5].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[5].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[5].initialLayout = vk::ImageLayout::eUndefined; + attachments[5].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + std::array colorRefs; + for (uint32_t i = 0; i < 5; ++i) { + colorRefs[i].attachment = i; + colorRefs[i].layout = vk::ImageLayout::eColorAttachmentOptimal; + } + + vk::AttachmentReference depthRef; + depthRef.attachment = 5; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 5; + subpass.pColorAttachments = colorRefs.data(); + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dependency; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.srcAccessMask = vk::AccessFlagBits::eShaderRead; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dependency; + + try { + m_msaaResolveRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve render pass: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // MSAA Resolve framebuffer — references non-MSAA G-buffer images + // Attachment order: [0]=scene color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=depth + { + std::array fbAttachments = { + m_sceneColor.view, + m_gbufPosition.view, + m_gbufNormal.view, + m_gbufSpecular.view, + m_gbufEmissive.view, + m_sceneDepth.view, + }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_msaaResolveRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = w; + fbInfo.height = h; + fbInfo.layers = 1; + + try { + m_msaaResolveFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve framebuffer: %s\n", e.what())); + shutdownMSAA(); + return false; + } + } + + // Create per-frame MSAA resolve UBO (persistently mapped) + // Two 256-byte slots (one per frame in flight) hold {int samples; float fov;} data. + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = MAX_FRAMES_IN_FLIGHT * 256; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_msaaResolveUBO = m_device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create MSAA resolve UBO: %s\n", e.what())); + shutdownMSAA(); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_msaaResolveUBO, MemoryUsage::CpuToGpu, m_msaaResolveUBOAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate MSAA resolve UBO memory!\n")); + m_device.destroyBuffer(m_msaaResolveUBO); + m_msaaResolveUBO = nullptr; + shutdownMSAA(); + return false; + } + + m_msaaResolveUBOMapped = m_memoryManager->mapMemory(m_msaaResolveUBOAlloc); + if (!m_msaaResolveUBOMapped) { + mprintf(("VulkanPostProcessor: Failed to map MSAA resolve UBO!\n")); + shutdownMSAA(); + return false; + } + } + + // Transition MSAA images to the render pass's initial layout at creation time. + // The validation layer tracks framebuffer attachment layouts from creation, + // so we must match the eClear render pass's initialLayout exactly. + { + auto* texMgr = getTextureManager(); + + RenderTarget* colorTargets[] = { + &m_msaaColor, &m_msaaPosition, &m_msaaNormal, + &m_msaaSpecular, &m_msaaEmissive, + }; + for (auto* t : colorTargets) { + texMgr->transitionImageLayout(t->image, t->format, + vk::ImageLayout::eUndefined, vk::ImageLayout::eColorAttachmentOptimal); + } + + texMgr->transitionImageLayout(m_msaaDepthImage, m_depthFormat, + vk::ImageLayout::eUndefined, vk::ImageLayout::eDepthStencilAttachmentOptimal); + } + + m_msaaInitialized = true; + mprintf(("VulkanPostProcessor: MSAA initialized (%ux%u, %dx samples, 5 color + depth)\n", + w, h, Cmdline_msaa_enabled)); + return true; +} + +void VulkanPostProcessor::shutdownMSAA() +{ + if (!m_device) { + return; + } + + // Destroy MSAA resolve UBO + if (m_msaaResolveUBOMapped) { + m_memoryManager->unmapMemory(m_msaaResolveUBOAlloc); + m_msaaResolveUBOMapped = nullptr; + } + if (m_msaaResolveUBO) { + m_device.destroyBuffer(m_msaaResolveUBO); + m_msaaResolveUBO = nullptr; + } + if (m_msaaResolveUBOAlloc.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_msaaResolveUBOAlloc); + } + + if (m_msaaResolveFramebuffer) { + m_device.destroyFramebuffer(m_msaaResolveFramebuffer); + m_msaaResolveFramebuffer = nullptr; + } + if (m_msaaResolveRenderPass) { + m_device.destroyRenderPass(m_msaaResolveRenderPass); + m_msaaResolveRenderPass = nullptr; + } + if (m_msaaEmissiveCopyFramebuffer) { + m_device.destroyFramebuffer(m_msaaEmissiveCopyFramebuffer); + m_msaaEmissiveCopyFramebuffer = nullptr; + } + if (m_msaaEmissiveCopyRenderPass) { + m_device.destroyRenderPass(m_msaaEmissiveCopyRenderPass); + m_msaaEmissiveCopyRenderPass = nullptr; + } + if (m_msaaGbufFramebuffer) { + m_device.destroyFramebuffer(m_msaaGbufFramebuffer); + m_msaaGbufFramebuffer = nullptr; + } + if (m_msaaGbufRenderPassLoad) { + m_device.destroyRenderPass(m_msaaGbufRenderPassLoad); + m_msaaGbufRenderPassLoad = nullptr; + } + if (m_msaaGbufRenderPass) { + m_device.destroyRenderPass(m_msaaGbufRenderPass); + m_msaaGbufRenderPass = nullptr; + } + + // Destroy MSAA depth + if (m_msaaDepthView) { + m_device.destroyImageView(m_msaaDepthView); + m_msaaDepthView = nullptr; + } + if (m_msaaDepthImage) { + m_device.destroyImage(m_msaaDepthImage); + m_msaaDepthImage = nullptr; + } + if (m_msaaDepthAlloc.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_msaaDepthAlloc); + } + + // Destroy MSAA color targets + RenderTarget* msaaTargets[] = { + &m_msaaColor, &m_msaaPosition, &m_msaaNormal, + &m_msaaSpecular, &m_msaaEmissive, + }; + for (auto* rt : msaaTargets) { + if (rt->view) { + m_device.destroyImageView(rt->view); + rt->view = nullptr; + } + if (rt->image) { + m_device.destroyImage(rt->image); + rt->image = nullptr; + } + if (rt->allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(rt->allocation); + } + } + + m_msaaInitialized = false; +} + +void VulkanPostProcessor::transitionMsaaGbufForResume(vk::CommandBuffer /*cmd*/) +{ + // No-op: MSAA render passes use finalLayout == subpass layout (no implicit + // transition at endRenderPass), so color attachments remain in + // eColorAttachmentOptimal — exactly what the eLoad pass expects. +} + +void VulkanPostProcessor::transitionMsaaGbufForBegin(vk::CommandBuffer /*cmd*/) +{ + // No-op: MSAA images are always in eColorAttachmentOptimal / + // eDepthStencilAttachmentOptimal between frames. Init-time transitions + // set this layout, and the post-resolve barriers in + // vulkan_deferred_lighting_msaa restore it after each frame's resolve pass. +} + +// ===== Light Accumulation (Deferred Lighting) ===== + +bool VulkanPostProcessor::initLightVolumes() +{ + if (m_lightVolumesInitialized) { + return true; + } + + // Generate sphere mesh (16 rings x 16 segments) + { + auto mesh = graphics::util::generate_sphere_mesh(16, 16); + m_sphereMesh.vertexCount = mesh.vertex_count; + m_sphereMesh.indexCount = mesh.index_count; + + // Create VBO + vk::BufferCreateInfo vboInfo; + vboInfo.size = mesh.vertices.size() * sizeof(float); + vboInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer; + vboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_sphereMesh.vbo = m_device.createBuffer(vboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sphere VBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_sphereMesh.vbo, MemoryUsage::CpuToGpu, m_sphereMesh.vboAlloc)) { + m_device.destroyBuffer(m_sphereMesh.vbo); + m_sphereMesh.vbo = nullptr; + return false; + } + + auto* mapped = m_memoryManager->mapMemory(m_sphereMesh.vboAlloc); + if (mapped) { + memcpy(mapped, mesh.vertices.data(), mesh.vertices.size() * sizeof(float)); + m_memoryManager->unmapMemory(m_sphereMesh.vboAlloc); + } + + // Create IBO + vk::BufferCreateInfo iboInfo; + iboInfo.size = mesh.indices.size() * sizeof(ushort); + iboInfo.usage = vk::BufferUsageFlagBits::eIndexBuffer; + iboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_sphereMesh.ibo = m_device.createBuffer(iboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create sphere IBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_sphereMesh.ibo, MemoryUsage::CpuToGpu, m_sphereMesh.iboAlloc)) { + m_device.destroyBuffer(m_sphereMesh.ibo); + m_sphereMesh.ibo = nullptr; + return false; + } + + mapped = m_memoryManager->mapMemory(m_sphereMesh.iboAlloc); + if (mapped) { + memcpy(mapped, mesh.indices.data(), mesh.indices.size() * sizeof(ushort)); + m_memoryManager->unmapMemory(m_sphereMesh.iboAlloc); + } + } + + // Generate cylinder mesh (16 segments) + { + auto mesh = graphics::util::generate_cylinder_mesh(16); + m_cylinderMesh.vertexCount = mesh.vertex_count; + m_cylinderMesh.indexCount = mesh.index_count; + + vk::BufferCreateInfo vboInfo; + vboInfo.size = mesh.vertices.size() * sizeof(float); + vboInfo.usage = vk::BufferUsageFlagBits::eVertexBuffer; + vboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_cylinderMesh.vbo = m_device.createBuffer(vboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create cylinder VBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_cylinderMesh.vbo, MemoryUsage::CpuToGpu, m_cylinderMesh.vboAlloc)) { + m_device.destroyBuffer(m_cylinderMesh.vbo); + m_cylinderMesh.vbo = nullptr; + return false; + } + + auto* mapped = m_memoryManager->mapMemory(m_cylinderMesh.vboAlloc); + if (mapped) { + memcpy(mapped, mesh.vertices.data(), mesh.vertices.size() * sizeof(float)); + m_memoryManager->unmapMemory(m_cylinderMesh.vboAlloc); + } + + vk::BufferCreateInfo iboInfo; + iboInfo.size = mesh.indices.size() * sizeof(ushort); + iboInfo.usage = vk::BufferUsageFlagBits::eIndexBuffer; + iboInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_cylinderMesh.ibo = m_device.createBuffer(iboInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create cylinder IBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_cylinderMesh.ibo, MemoryUsage::CpuToGpu, m_cylinderMesh.iboAlloc)) { + m_device.destroyBuffer(m_cylinderMesh.ibo); + m_cylinderMesh.ibo = nullptr; + return false; + } + + mapped = m_memoryManager->mapMemory(m_cylinderMesh.iboAlloc); + if (mapped) { + memcpy(mapped, mesh.indices.data(), mesh.indices.size() * sizeof(ushort)); + m_memoryManager->unmapMemory(m_cylinderMesh.iboAlloc); + } + } + + // Create deferred UBO for light data (per-frame, host-visible) + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = DEFERRED_UBO_SIZE; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_deferredUBO = m_device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create deferred UBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_deferredUBO, MemoryUsage::CpuToGpu, m_deferredUBOAlloc)) { + m_device.destroyBuffer(m_deferredUBO); + m_deferredUBO = nullptr; + return false; + } + } + + m_lightVolumesInitialized = true; + mprintf(("VulkanPostProcessor: Light volumes initialized (sphere: %u verts/%u idx, cylinder: %u verts/%u idx)\n", + m_sphereMesh.vertexCount, m_sphereMesh.indexCount, + m_cylinderMesh.vertexCount, m_cylinderMesh.indexCount)); + return true; +} + +void VulkanPostProcessor::shutdownLightVolumes() +{ + if (!m_device) { + return; + } + + auto destroyMesh = [&](LightVolumeMesh& mesh) { + if (mesh.vbo) { m_device.destroyBuffer(mesh.vbo); mesh.vbo = nullptr; } + if (mesh.vboAlloc.memory != VK_NULL_HANDLE) { m_memoryManager->freeAllocation(mesh.vboAlloc); } + if (mesh.ibo) { m_device.destroyBuffer(mesh.ibo); mesh.ibo = nullptr; } + if (mesh.iboAlloc.memory != VK_NULL_HANDLE) { m_memoryManager->freeAllocation(mesh.iboAlloc); } + mesh.vertexCount = 0; + mesh.indexCount = 0; + }; + + destroyMesh(m_sphereMesh); + destroyMesh(m_cylinderMesh); + + if (m_deferredUBO) { + m_device.destroyBuffer(m_deferredUBO); + m_deferredUBO = nullptr; + } + if (m_deferredUBOAlloc.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_deferredUBOAlloc); + } + + if (m_lightAccumFramebuffer) { + m_device.destroyFramebuffer(m_lightAccumFramebuffer); + m_lightAccumFramebuffer = nullptr; + } + if (m_lightAccumRenderPass) { + m_device.destroyRenderPass(m_lightAccumRenderPass); + m_lightAccumRenderPass = nullptr; + } + + m_lightVolumesInitialized = false; +} + +bool VulkanPostProcessor::initLightAccumPass() +{ + // Light accumulation render pass: single RGBA16F color attachment + // loadOp=eLoad (preserves emissive copy), storeOp=eStore + // initialLayout=eColorAttachmentOptimal, finalLayout=eShaderReadOnlyOptimal + { + vk::AttachmentDescription att; + att.format = vk::Format::eR16G16B16A16Sfloat; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eTransfer + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eFragmentShader; + dep.srcAccessMask = vk::AccessFlagBits::eTransferWrite + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eShaderRead; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_lightAccumRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create light accum render pass: %s\n", e.what())); + return false; + } + } + + // Framebuffer using composite image as sole color attachment + { + vk::ImageView attachments[] = { m_gbufComposite.view }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_lightAccumRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = attachments; + fbInfo.width = m_extent.width; + fbInfo.height = m_extent.height; + fbInfo.layers = 1; + + try { + m_lightAccumFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create light accum framebuffer: %s\n", e.what())); + return false; + } + } + + return true; +} + +namespace ltp = lighting_profiles; + +static graphics::deferred_light_data* prepare_light_uniforms(light& l, uint8_t* dest, const ltp::profile* lp) +{ + auto* light_data = reinterpret_cast(dest); + memset(light_data, 0, sizeof(graphics::deferred_light_data)); + + light_data->lightType = static_cast(l.type); + + float intensity = + (Lighting_mode == lighting_mode::COCKPIT) ? lp->cockpit_light_intensity_modifier.handle(l.intensity) : l.intensity; + + vec3d diffuse; + diffuse.xyz.x = l.r * intensity; + diffuse.xyz.y = l.g * intensity; + diffuse.xyz.z = l.b * intensity; + + light_data->diffuseLightColor = diffuse; + light_data->enable_shadows = 0; + light_data->sourceRadius = l.source_radius; + return light_data; +} + +void VulkanPostProcessor::renderDeferredLights(vk::CommandBuffer cmd) +{ + TRACE_SCOPE(tracing::ApplyLights); + + if (!m_gbufInitialized) { + return; + } + + // Lazy-init light volumes and accumulation pass on first use + if (!m_lightVolumesInitialized) { + if (!initLightVolumes() || !initLightAccumPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Sort lights by type (same stable sort as OpenGL) + std::stable_sort(Lights.begin(), Lights.end(), light_compare_by_type); + + // Categorize lights + SCP_vector full_frame_lights; + SCP_vector sphere_lights; + SCP_vector cylinder_lights; + for (auto& l : Lights) { + switch (l.type) { + case Light_Type::Directional: + full_frame_lights.push_back(l); + break; + case Light_Type::Cone: + case Light_Type::Point: + sphere_lights.push_back(l); + break; + case Light_Type::Tube: + cylinder_lights.push_back(l); + break; + case Light_Type::Ambient: + break; + } + } + + // Add ambient light + { + light& l = full_frame_lights.emplace_back(); + memset(&l, 0, sizeof(light)); + vec3d ambient; + gr_get_ambient_light(&ambient); + l.r = ambient.xyz.x; + l.g = ambient.xyz.y; + l.b = ambient.xyz.z; + l.type = Light_Type::Ambient; + l.intensity = 1.f; + l.source_radius = 0.f; + } + + size_t total_lights = full_frame_lights.size() + sphere_lights.size() + cylinder_lights.size(); + if (total_lights == 0) { + return; + } + + // Map UBO and pack data + auto* uboMapped = static_cast(m_memoryManager->mapMemory(m_deferredUBOAlloc)); + if (!uboMapped) { + return; + } + + // Determine alignment requirement + uint32_t uboAlign = getRendererInstance()->getMinUniformBufferOffsetAlignment(); + auto alignUp = [uboAlign](uint32_t v) -> uint32_t { + return (v + uboAlign - 1) & ~(uboAlign - 1); + }; + + // Layout in UBO: + // [0]: deferred_global_data (header) + // [aligned offset 1..N]: deferred_light_data per light + // [aligned offset N+1..2N]: matrix_uniforms per light + uint32_t globalDataSize = alignUp(static_cast(sizeof(graphics::deferred_global_data))); + uint32_t lightDataSize = alignUp(static_cast(sizeof(graphics::deferred_light_data))); + uint32_t matrixDataSize = alignUp(static_cast(sizeof(graphics::matrix_uniforms))); + + uint32_t lightDataOffset = globalDataSize; + uint32_t matrixDataOffset = lightDataOffset + static_cast(total_lights) * lightDataSize; + uint32_t totalUBOSize = matrixDataOffset + static_cast(total_lights) * matrixDataSize; + + if (totalUBOSize > DEFERRED_UBO_SIZE) { + mprintf(("VulkanPostProcessor: Deferred UBO overflow (%u > %u), skipping lights\n", totalUBOSize, DEFERRED_UBO_SIZE)); + m_memoryManager->unmapMemory(m_deferredUBOAlloc); + return; + } + + // Pack global header + auto lp = ltp::current(); + // Determine if environment maps are available + bool envMapAvailable = (ENVMAP > 0); + tcache_slot_vulkan* envMapSlot = nullptr; + tcache_slot_vulkan* irrMapSlot = nullptr; + if (envMapAvailable) { + envMapSlot = texMgr->getTextureSlot(ENVMAP); + if (!envMapSlot || !envMapSlot->imageView || !envMapSlot->isCubemap) { + envMapAvailable = false; + } + } + if (envMapAvailable && IRRMAP > 0) { + irrMapSlot = texMgr->getTextureSlot(IRRMAP); + if (!irrMapSlot || !irrMapSlot->imageView || !irrMapSlot->isCubemap) { + irrMapSlot = nullptr; // Fall back to fallback cube for irrmap + } + } + + { + auto* header = reinterpret_cast(uboMapped); + memset(header, 0, sizeof(graphics::deferred_global_data)); + header->invScreenWidth = 1.0f / gr_screen.max_w; + header->invScreenHeight = 1.0f / gr_screen.max_h; + header->nearPlane = gr_near_plane; + header->use_env_map = envMapAvailable ? 1 : 0; + + if (m_shadowInitialized && Shadow_quality != ShadowQuality::Disabled) { + header->shadow_mv_matrix = Shadow_view_matrix_light; + for (size_t i = 0; i < MAX_SHADOW_CASCADES; ++i) { + header->shadow_proj_matrix[i] = Shadow_proj_matrix[i]; + } + header->veryneardist = Shadow_cascade_distances[0]; + header->neardist = Shadow_cascade_distances[1]; + header->middist = Shadow_cascade_distances[2]; + header->fardist = Shadow_cascade_distances[3]; + vm_inverse_matrix4(&header->inv_view_matrix, &Shadow_view_matrix_render); + } + } + + // Pack per-light data + size_t lightIdx = 0; + bool first_directional = true; + + for (auto& l : full_frame_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + lightIdx * lightDataSize, lp); + + if (l.type == Light_Type::Directional) { + if (m_shadowInitialized && Shadow_quality != ShadowQuality::Disabled) { + ld->enable_shadows = first_directional ? 1 : 0; + } + + if (first_directional) { + first_directional = false; + } + + vec4 light_dir; + light_dir.xyzw.x = -l.vec.xyz.x; + light_dir.xyzw.y = -l.vec.xyz.y; + light_dir.xyzw.z = -l.vec.xyz.z; + light_dir.xyzw.w = 0.0f; + vec4 view_dir; + vm_vec_transform(&view_dir, &light_dir, &gr_view_matrix); + ld->lightDir.xyz.x = view_dir.xyzw.x; + ld->lightDir.xyz.y = view_dir.xyzw.y; + ld->lightDir.xyz.z = view_dir.xyzw.z; + } + + // Matrix: env texture matrix for full-frame lights + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + lightIdx * matrixDataSize); + memset(md, 0, sizeof(graphics::matrix_uniforms)); + md->modelViewMatrix = gr_env_texture_matrix; + ++lightIdx; + } + + for (auto& l : sphere_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + lightIdx * lightDataSize, lp); + + if (l.type == Light_Type::Cone) { + ld->dualCone = (l.flags & LF_DUAL_CONE) ? 1.0f : 0.0f; + ld->coneAngle = l.cone_angle; + ld->coneInnerAngle = l.cone_inner_angle; + ld->coneDir = l.vec2; + } + float rad = (Lighting_mode == lighting_mode::COCKPIT) + ? lp->cockpit_light_radius_modifier.handle(MAX(l.rada, l.radb)) + : MAX(l.rada, l.radb); + ld->lightRadius = rad; + ld->scale.xyz.x = rad * 1.05f; + ld->scale.xyz.y = rad * 1.05f; + ld->scale.xyz.z = rad * 1.05f; + + // Matrix: model-view + projection for light volume + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + lightIdx * matrixDataSize); + g3_start_instance_matrix(&l.vec, &vmd_identity_matrix, true); + md->modelViewMatrix = gr_model_view_matrix; + md->projMatrix = gr_projection_matrix; + g3_done_instance(true); + ++lightIdx; + } + + for (auto& l : cylinder_lights) { + auto* ld = prepare_light_uniforms(l, uboMapped + lightDataOffset + lightIdx * lightDataSize, lp); + float rad = + (Lighting_mode == lighting_mode::COCKPIT) ? lp->cockpit_light_radius_modifier.handle(l.radb) : l.radb; + ld->lightRadius = rad; + ld->lightType = LT_TUBE; + + vec3d a; + vm_vec_sub(&a, &l.vec, &l.vec2); + auto length = vm_vec_mag(&a); + length += ld->lightRadius * 2.0f; + + ld->scale.xyz.x = rad * 1.05f; + ld->scale.xyz.y = rad * 1.05f; + ld->scale.xyz.z = length; + + // Matrix: oriented instance matrix for cylinder + auto* md = reinterpret_cast(uboMapped + matrixDataOffset + lightIdx * matrixDataSize); + vec3d dir, newPos; + matrix orient; + vm_vec_normalized_dir(&dir, &l.vec, &l.vec2); + vm_vector_2_matrix_norm(&orient, &dir, nullptr, nullptr); + vm_vec_scale_sub(&newPos, &l.vec2, &dir, l.radb); + + g3_start_instance_matrix(&newPos, &orient, true); + md->modelViewMatrix = gr_model_view_matrix; + md->projMatrix = gr_projection_matrix; + g3_done_instance(true); + ++lightIdx; + } + + m_memoryManager->unmapMemory(m_deferredUBOAlloc); + + // Both fullscreen and volume lights use the same vertex layout (POSITION3). + // For fullscreen lights the shader ignores vertex data and generates positions + // from gl_VertexIndex, but Vulkan requires all declared vertex inputs to have + // matching pipeline attributes and bound buffers. + vertex_layout volLayout; + volLayout.add_vertex_component(vertex_format_data::POSITION3, sizeof(float) * 3, 0); + + PipelineConfig lightConfig; + lightConfig.shaderType = SDR_TYPE_DEFERRED_LIGHTING; + lightConfig.vertexLayoutHash = volLayout.hash(); + lightConfig.primitiveType = PRIM_TYPE_TRIS; + lightConfig.depthMode = ZBUFFER_TYPE_NONE; + lightConfig.blendMode = ALPHA_BLEND_ADDITIVE; + lightConfig.cullEnabled = false; + lightConfig.depthWriteEnabled = false; + lightConfig.renderPass = m_lightAccumRenderPass; + + vk::Pipeline lightPipeline = pipelineMgr->getPipeline(lightConfig, volLayout); + if (!lightPipeline) { + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Prepare G-buffer texture infos for material descriptor set + vk::DescriptorImageInfo gbufTexInfos[4]; + gbufTexInfos[0].sampler = m_linearSampler; + gbufTexInfos[0].imageView = m_sceneColor.view; // ColorBuffer + gbufTexInfos[0].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + gbufTexInfos[1].sampler = m_linearSampler; + gbufTexInfos[1].imageView = m_gbufNormal.view; // NormalBuffer + gbufTexInfos[1].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + gbufTexInfos[2].sampler = m_linearSampler; + gbufTexInfos[2].imageView = m_gbufPosition.view; // PositionBuffer + gbufTexInfos[2].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + gbufTexInfos[3].sampler = m_linearSampler; + gbufTexInfos[3].imageView = m_gbufSpecular.view; // SpecBuffer + gbufTexInfos[3].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Fallback buffer and textures for unused descriptor bindings + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackBufInfo; + fallbackBufInfo.buffer = fallbackBuf; + fallbackBufInfo.offset = 0; + fallbackBufInfo.range = 4096; + + vk::ImageView fallbackView = texMgr->getFallbackTextureView2D(); + vk::Sampler defaultSampler = texMgr->getDefaultSampler(); + + // Begin light accumulation render pass + { + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_lightAccumRenderPass; + rpBegin.framebuffer = m_lightAccumFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_extent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + } + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_extent.width); + viewport.height = static_cast(m_extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_extent; + cmd.setScissor(0, scissor); + + // Helper lambda to allocate + write descriptor sets for a single light draw + auto bindLightDescriptors = [&](size_t li) { + // Global set (Set 0): light UBO at binding 0, globals UBO at binding 1 + vk::DescriptorSet globalSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Global); + if (!globalSet) return false; + + vk::DescriptorBufferInfo lightBufInfo; + lightBufInfo.buffer = m_deferredUBO; + lightBufInfo.offset = lightDataOffset + li * lightDataSize; + lightBufInfo.range = sizeof(graphics::deferred_light_data); + + vk::DescriptorBufferInfo globalBufInfo; + globalBufInfo.buffer = m_deferredUBO; + globalBufInfo.offset = 0; + globalBufInfo.range = sizeof(graphics::deferred_global_data); + + // Shadow map at binding 2 + vk::DescriptorImageInfo shadowTexInfo; + if (m_shadowInitialized && m_shadowColor.view) { + shadowTexInfo.sampler = m_linearSampler; + shadowTexInfo.imageView = m_shadowColor.view; + shadowTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } else { + shadowTexInfo.sampler = defaultSampler; + shadowTexInfo.imageView = fallbackView; + shadowTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + // Env map at binding 3 + vk::ImageView fallbackCubeView = texMgr->getFallbackCubeView(); + vk::DescriptorImageInfo envTexInfo; + if (envMapAvailable && envMapSlot) { + envTexInfo.sampler = defaultSampler; + envTexInfo.imageView = envMapSlot->imageView; + envTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } else { + envTexInfo.sampler = defaultSampler; + envTexInfo.imageView = fallbackCubeView; + envTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + // Irradiance map at binding 4 + vk::DescriptorImageInfo irrTexInfo; + if (envMapAvailable && irrMapSlot) { + irrTexInfo.sampler = defaultSampler; + irrTexInfo.imageView = irrMapSlot->imageView; + irrTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } else { + irrTexInfo.sampler = defaultSampler; + irrTexInfo.imageView = fallbackCubeView; + irrTexInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + std::array globalWrites; + globalWrites[0].dstSet = globalSet; + globalWrites[0].dstBinding = 0; + globalWrites[0].dstArrayElement = 0; + globalWrites[0].descriptorCount = 1; + globalWrites[0].descriptorType = vk::DescriptorType::eUniformBuffer; + globalWrites[0].pBufferInfo = &lightBufInfo; + + globalWrites[1].dstSet = globalSet; + globalWrites[1].dstBinding = 1; + globalWrites[1].dstArrayElement = 0; + globalWrites[1].descriptorCount = 1; + globalWrites[1].descriptorType = vk::DescriptorType::eUniformBuffer; + globalWrites[1].pBufferInfo = &globalBufInfo; + + globalWrites[2].dstSet = globalSet; + globalWrites[2].dstBinding = 2; + globalWrites[2].dstArrayElement = 0; + globalWrites[2].descriptorCount = 1; + globalWrites[2].descriptorType = vk::DescriptorType::eCombinedImageSampler; + globalWrites[2].pImageInfo = &shadowTexInfo; + + globalWrites[3].dstSet = globalSet; + globalWrites[3].dstBinding = 3; + globalWrites[3].dstArrayElement = 0; + globalWrites[3].descriptorCount = 1; + globalWrites[3].descriptorType = vk::DescriptorType::eCombinedImageSampler; + globalWrites[3].pImageInfo = &envTexInfo; + + globalWrites[4].dstSet = globalSet; + globalWrites[4].dstBinding = 4; + globalWrites[4].dstArrayElement = 0; + globalWrites[4].descriptorCount = 1; + globalWrites[4].descriptorType = vk::DescriptorType::eCombinedImageSampler; + globalWrites[4].pImageInfo = &irrTexInfo; + + m_device.updateDescriptorSets(globalWrites, {}); + + // Material set (Set 1): G-buffer textures at binding 1[0..3] + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + if (!materialSet) return false; + + // ModelData UBO at binding 0 (fallback) + vk::WriteDescriptorSet modelWrite; + modelWrite.dstSet = materialSet; + modelWrite.dstBinding = 0; + modelWrite.dstArrayElement = 0; + modelWrite.descriptorCount = 1; + modelWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + modelWrite.pBufferInfo = &fallbackBufInfo; + + // G-buffer textures at binding 1 elements 0-3 + vk::WriteDescriptorSet gbufTexWrite; + gbufTexWrite.dstSet = materialSet; + gbufTexWrite.dstBinding = 1; + gbufTexWrite.dstArrayElement = 0; + gbufTexWrite.descriptorCount = 4; + gbufTexWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + gbufTexWrite.pImageInfo = gbufTexInfos; + + // Fill remaining texture array elements with fallback + std::array fallbackImages; + for (auto& fi : fallbackImages) { + fi.sampler = defaultSampler; + fi.imageView = fallbackView; + fi.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + vk::WriteDescriptorSet fallbackTexWrite; + fallbackTexWrite.dstSet = materialSet; + fallbackTexWrite.dstBinding = 1; + fallbackTexWrite.dstArrayElement = 4; + fallbackTexWrite.descriptorCount = static_cast(fallbackImages.size()); + fallbackTexWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + fallbackTexWrite.pImageInfo = fallbackImages.data(); + + // DecalGlobals at binding 2 (fallback) + vk::WriteDescriptorSet decalWrite; + decalWrite.dstSet = materialSet; + decalWrite.dstBinding = 2; + decalWrite.dstArrayElement = 0; + decalWrite.descriptorCount = 1; + decalWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + decalWrite.pBufferInfo = &fallbackBufInfo; + + // Transform SSBO at binding 3 (fallback) + vk::WriteDescriptorSet ssboWrite; + ssboWrite.dstSet = materialSet; + ssboWrite.dstBinding = 3; + ssboWrite.dstArrayElement = 0; + ssboWrite.descriptorCount = 1; + ssboWrite.descriptorType = vk::DescriptorType::eStorageBuffer; + ssboWrite.pBufferInfo = &fallbackBufInfo; + + // Bindings 4-6: depth, scene color, distortion fallbacks + vk::DescriptorImageInfo fallbackImg; + fallbackImg.sampler = defaultSampler; + fallbackImg.imageView = fallbackView; + fallbackImg.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + std::array texFallbackWrites; + for (uint32_t b = 4; b <= 6; ++b) { + auto& w = texFallbackWrites[b - 4]; + w.dstSet = materialSet; + w.dstBinding = b; + w.dstArrayElement = 0; + w.descriptorCount = 1; + w.descriptorType = vk::DescriptorType::eCombinedImageSampler; + w.pImageInfo = &fallbackImg; + } + + SCP_vector matWrites = { + modelWrite, gbufTexWrite, fallbackTexWrite, decalWrite, ssboWrite, + texFallbackWrites[0], texFallbackWrites[1], texFallbackWrites[2] + }; + m_device.updateDescriptorSets(matWrites, {}); + + // PerDraw set (Set 2): matrices UBO at binding 1 + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + if (!perDrawSet) return false; + + vk::DescriptorBufferInfo matrixBufInfo; + matrixBufInfo.buffer = m_deferredUBO; + matrixBufInfo.offset = matrixDataOffset + li * matrixDataSize; + matrixBufInfo.range = sizeof(graphics::matrix_uniforms); + + // GenericData at binding 0 (fallback) + vk::WriteDescriptorSet genWrite; + genWrite.dstSet = perDrawSet; + genWrite.dstBinding = 0; + genWrite.dstArrayElement = 0; + genWrite.descriptorCount = 1; + genWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + genWrite.pBufferInfo = &fallbackBufInfo; + + vk::WriteDescriptorSet matWrite; + matWrite.dstSet = perDrawSet; + matWrite.dstBinding = 1; + matWrite.dstArrayElement = 0; + matWrite.descriptorCount = 1; + matWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + matWrite.pBufferInfo = &matrixBufInfo; + + // Bindings 2-4: NanoVG, Decal, Movie (fallback) + std::array pdFallbacks; + for (uint32_t b = 2; b <= 4; ++b) { + auto& w = pdFallbacks[b - 2]; + w.dstSet = perDrawSet; + w.dstBinding = b; + w.dstArrayElement = 0; + w.descriptorCount = 1; + w.descriptorType = vk::DescriptorType::eUniformBuffer; + w.pBufferInfo = &fallbackBufInfo; + } + + SCP_vector pdWrites = {genWrite, matWrite, pdFallbacks[0], pdFallbacks[1], pdFallbacks[2]}; + m_device.updateDescriptorSets(pdWrites, {}); + + // Bind all 3 descriptor sets + std::array sets = { globalSet, materialSet, perDrawSet }; + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, 0, sets, {}); + + return true; + }; + + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, lightPipeline); + + // Draw full-frame lights (directional + ambient) + // Bind sphere VBO as dummy — shader ignores vertex data for these light types. + lightIdx = 0; + if (!full_frame_lights.empty()) { + cmd.bindVertexBuffers(0, m_sphereMesh.vbo, vk::DeviceSize(0)); + for (size_t i = 0; i < full_frame_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.draw(3, 1, 0, 0); + } + ++lightIdx; + } + } + + // Draw sphere lights (point + cone) + if (!sphere_lights.empty()) { + cmd.bindVertexBuffers(0, m_sphereMesh.vbo, vk::DeviceSize(0)); + cmd.bindIndexBuffer(m_sphereMesh.ibo, 0, vk::IndexType::eUint16); + for (size_t i = 0; i < sphere_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.drawIndexed(m_sphereMesh.indexCount, 1, 0, 0, 0); + } + ++lightIdx; + } + } + + // Draw cylinder lights (tube) + if (!cylinder_lights.empty()) { + cmd.bindVertexBuffers(0, m_cylinderMesh.vbo, vk::DeviceSize(0)); + cmd.bindIndexBuffer(m_cylinderMesh.ibo, 0, vk::IndexType::eUint16); + for (size_t i = 0; i < cylinder_lights.size(); ++i) { + if (bindLightDescriptors(lightIdx)) { + cmd.drawIndexed(m_cylinderMesh.indexCount, 1, 0, 0, 0); + } + ++lightIdx; + } + } + + // End render pass (composite → eShaderReadOnlyOptimal) + cmd.endRenderPass(); +} + +// ===== Bloom Pipeline Implementation ===== + +// Local UBO struct for blur shader (extends blur_data with runtime direction parameter) +struct BlurUBOData { + float texSize; + int level; + int direction; // 0 = horizontal, 1 = vertical + int pad; +}; + +bool VulkanPostProcessor::initBloom() +{ + m_bloomWidth = m_extent.width / 2; + m_bloomHeight = m_extent.height / 2; + + const uint32_t mipLevels = MAX_MIP_BLUR_LEVELS; + + // Create 2 bloom textures (RGBA16F, half-res, 4 mip levels each) + for (int i = 0; i < 2; i++) { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = vk::Format::eR16G16B16A16Sfloat; + imageInfo.extent.width = m_bloomWidth; + imageInfo.extent.height = m_bloomHeight; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = 1; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eColorAttachment + | vk::ImageUsageFlagBits::eSampled + | vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_bloomTex[i].image = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom image %d: %s\n", i, e.what())); + return false; + } + + if (!m_memoryManager->allocateImageMemory(m_bloomTex[i].image, MemoryUsage::GpuOnly, m_bloomTex[i].allocation)) { + mprintf(("VulkanPostProcessor: Failed to allocate bloom image %d memory!\n", i)); + return false; + } + + // Full image view (all mip levels, for textureLod sampling) + vk::ImageViewCreateInfo fullViewInfo; + fullViewInfo.image = m_bloomTex[i].image; + fullViewInfo.viewType = vk::ImageViewType::e2D; + fullViewInfo.format = vk::Format::eR16G16B16A16Sfloat; + fullViewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + fullViewInfo.subresourceRange.baseMipLevel = 0; + fullViewInfo.subresourceRange.levelCount = mipLevels; + fullViewInfo.subresourceRange.baseArrayLayer = 0; + fullViewInfo.subresourceRange.layerCount = 1; + + try { + m_bloomTex[i].fullView = m_device.createImageView(fullViewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom %d full view: %s\n", i, e.what())); + return false; + } + + // Per-mip image views (for framebuffer attachment) + for (uint32_t mip = 0; mip < mipLevels; mip++) { + vk::ImageViewCreateInfo mipViewInfo = fullViewInfo; + mipViewInfo.subresourceRange.baseMipLevel = mip; + mipViewInfo.subresourceRange.levelCount = 1; + + try { + m_bloomTex[i].mipViews[mip] = m_device.createImageView(mipViewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom %d mip %u view: %s\n", i, mip, e.what())); + return false; + } + } + } + + // Create bloom render pass (color-only RGBA16F, loadOp=eDontCare for overwriting) + { + vk::AttachmentDescription att; + att.format = vk::Format::eR16G16B16A16Sfloat; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_bloomRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom render pass: %s\n", e.what())); + return false; + } + } + + // Create bloom composite render pass (loadOp=eLoad for additive compositing onto scene color) + { + vk::AttachmentDescription att; + att.format = vk::Format::eR16G16B16A16Sfloat; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_bloomCompositeRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom composite render pass: %s\n", e.what())); + return false; + } + } + + // Create per-mip framebuffers for bloom textures + for (int i = 0; i < 2; i++) { + for (uint32_t mip = 0; mip < mipLevels; mip++) { + uint32_t mipW = std::max(1u, m_bloomWidth >> mip); + uint32_t mipH = std::max(1u, m_bloomHeight >> mip); + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_bloomRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_bloomTex[i].mipViews[mip]; + fbInfo.width = mipW; + fbInfo.height = mipH; + fbInfo.layers = 1; + + try { + m_bloomTex[i].mipFramebuffers[mip] = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom %d mip %u framebuffer: %s\n", i, mip, e.what())); + return false; + } + } + } + + // Create scene color framebuffer for bloom composite (wraps m_sceneColor as attachment) + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_bloomCompositeRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneColor.view; + fbInfo.width = m_extent.width; + fbInfo.height = m_extent.height; + fbInfo.layers = 1; + + try { + m_sceneColorBloomFB = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create scene color bloom framebuffer: %s\n", e.what())); + return false; + } + } + + // Create bloom UBO buffer (slot-based allocation for per-draw data) + { + vk::BufferCreateInfo bufInfo; + bufInfo.size = BLOOM_UBO_MAX_SLOTS * BLOOM_UBO_SLOT_SIZE; + bufInfo.usage = vk::BufferUsageFlagBits::eUniformBuffer; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + try { + m_bloomUBO = m_device.createBuffer(bufInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create bloom UBO: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(m_bloomUBO, MemoryUsage::CpuToGpu, m_bloomUBOAlloc)) { + mprintf(("VulkanPostProcessor: Failed to allocate bloom UBO memory!\n")); + m_device.destroyBuffer(m_bloomUBO); + m_bloomUBO = nullptr; + return false; + } + + } + + m_bloomInitialized = true; + mprintf(("VulkanPostProcessor: Bloom initialized (%ux%u, %d mip levels)\n", + m_bloomWidth, m_bloomHeight, MAX_MIP_BLUR_LEVELS)); + return true; +} + +void VulkanPostProcessor::shutdownBloom() +{ + if (!m_bloomInitialized) { + return; + } + + if (m_bloomUBO) { + m_device.destroyBuffer(m_bloomUBO); + m_bloomUBO = nullptr; + } + if (m_bloomUBOAlloc.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_bloomUBOAlloc); + } + + if (m_sceneColorBloomFB) { + m_device.destroyFramebuffer(m_sceneColorBloomFB); + m_sceneColorBloomFB = nullptr; + } + + for (int i = 0; i < 2; i++) { + for (uint32_t mip = 0; mip < MAX_MIP_BLUR_LEVELS; mip++) { + if (m_bloomTex[i].mipFramebuffers[mip]) { + m_device.destroyFramebuffer(m_bloomTex[i].mipFramebuffers[mip]); + m_bloomTex[i].mipFramebuffers[mip] = nullptr; + } + if (m_bloomTex[i].mipViews[mip]) { + m_device.destroyImageView(m_bloomTex[i].mipViews[mip]); + m_bloomTex[i].mipViews[mip] = nullptr; + } + } + if (m_bloomTex[i].fullView) { + m_device.destroyImageView(m_bloomTex[i].fullView); + m_bloomTex[i].fullView = nullptr; + } + if (m_bloomTex[i].image) { + m_device.destroyImage(m_bloomTex[i].image); + m_bloomTex[i].image = nullptr; + } + if (m_bloomTex[i].allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_bloomTex[i].allocation); + } + } + + if (m_bloomCompositeRenderPass) { + m_device.destroyRenderPass(m_bloomCompositeRenderPass); + m_bloomCompositeRenderPass = nullptr; + } + if (m_bloomRenderPass) { + m_device.destroyRenderPass(m_bloomRenderPass); + m_bloomRenderPass = nullptr; + } + + m_bloomInitialized = false; +} + +void VulkanPostProcessor::generateMipmaps(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, uint32_t mipLevels) +{ + // Transition mip 0 from eShaderReadOnlyOptimal (after brightpass) to eTransferSrcOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + vulkan_generate_mipmap_chain(cmd, image, width, height, mipLevels); +} + +void VulkanPostProcessor::drawFullscreenTriangle(vk::CommandBuffer cmd, vk::RenderPass renderPass, + vk::Framebuffer framebuffer, vk::Extent2D extent, + int shaderType, + vk::ImageView textureView, vk::Sampler sampler, + const void* uboData, size_t uboSize, + int blendMode) +{ + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Get/create pipeline for this shader + render pass combination + PipelineConfig config; + config.shaderType = static_cast(shaderType); + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = static_cast(blendMode); + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = renderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = renderPass; + rpBegin.framebuffer = framebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = extent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + // Set viewport and scissor + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(extent.width); + viewport.height = static_cast(extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = extent; + cmd.setScissor(0, scissor); + + // Allocate Material descriptor set (Set 1) + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + + { + // Source texture at binding 1 element 0 + vk::DescriptorImageInfo imageInfo; + imageInfo.sampler = sampler; + imageInfo.imageView = textureView; + imageInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet texWrite; + texWrite.dstSet = materialSet; + texWrite.dstBinding = 1; + texWrite.dstArrayElement = 0; + texWrite.descriptorCount = 1; + texWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + texWrite.pImageInfo = &imageInfo; + + // Fallback UBO for binding 0 (ModelData) and binding 2 (DecalGlobals) + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackBufInfo; + fallbackBufInfo.buffer = fallbackBuf; + fallbackBufInfo.offset = 0; + fallbackBufInfo.range = 4096; + + vk::WriteDescriptorSet modelWrite; + modelWrite.dstSet = materialSet; + modelWrite.dstBinding = 0; + modelWrite.dstArrayElement = 0; + modelWrite.descriptorCount = 1; + modelWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + modelWrite.pBufferInfo = &fallbackBufInfo; + + vk::WriteDescriptorSet decalWrite; + decalWrite.dstSet = materialSet; + decalWrite.dstBinding = 2; + decalWrite.dstArrayElement = 0; + decalWrite.descriptorCount = 1; + decalWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + decalWrite.pBufferInfo = &fallbackBufInfo; + + // Fill remaining texture array elements with fallback (use 2D view since + // post-processing shaders declare sampler2D, not sampler2DArray) + vk::ImageView fallbackView = texMgr->getFallbackTextureView2D(); + vk::Sampler defaultSampler = texMgr->getDefaultSampler(); + + std::array fallbackImages; + for (auto& fi : fallbackImages) { + fi.sampler = defaultSampler; + fi.imageView = fallbackView; + fi.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + vk::WriteDescriptorSet fallbackTexWrite; + fallbackTexWrite.dstSet = materialSet; + fallbackTexWrite.dstBinding = 1; + fallbackTexWrite.dstArrayElement = 1; + fallbackTexWrite.descriptorCount = static_cast(fallbackImages.size()); + fallbackTexWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + fallbackTexWrite.pImageInfo = fallbackImages.data(); + + // Binding 3: Transform SSBO (fallback to zero UBO) + vk::WriteDescriptorSet ssboWrite; + ssboWrite.dstSet = materialSet; + ssboWrite.dstBinding = 3; + ssboWrite.dstArrayElement = 0; + ssboWrite.descriptorCount = 1; + ssboWrite.descriptorType = vk::DescriptorType::eStorageBuffer; + ssboWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 4: Depth map (fallback to 2D white texture) + vk::DescriptorImageInfo depthMapFallback; + depthMapFallback.sampler = defaultSampler; + depthMapFallback.imageView = fallbackView; + depthMapFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet depthMapWrite; + depthMapWrite.dstSet = materialSet; + depthMapWrite.dstBinding = 4; + depthMapWrite.dstArrayElement = 0; + depthMapWrite.descriptorCount = 1; + depthMapWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + depthMapWrite.pImageInfo = &depthMapFallback; + + // Binding 5: Scene color / frameBuffer (fallback to 2D white texture) + vk::DescriptorImageInfo sceneColorFallback; + sceneColorFallback.sampler = defaultSampler; + sceneColorFallback.imageView = fallbackView; + sceneColorFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet sceneColorWrite; + sceneColorWrite.dstSet = materialSet; + sceneColorWrite.dstBinding = 5; + sceneColorWrite.dstArrayElement = 0; + sceneColorWrite.descriptorCount = 1; + sceneColorWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + sceneColorWrite.pImageInfo = &sceneColorFallback; + + // Binding 6: Distortion map (fallback to 2D white texture) + vk::DescriptorImageInfo distMapFallback; + distMapFallback.sampler = defaultSampler; + distMapFallback.imageView = fallbackView; + distMapFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet distMapWrite; + distMapWrite.dstSet = materialSet; + distMapWrite.dstBinding = 6; + distMapWrite.dstArrayElement = 0; + distMapWrite.descriptorCount = 1; + distMapWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + distMapWrite.pImageInfo = &distMapFallback; + + std::array writes = {texWrite, modelWrite, decalWrite, fallbackTexWrite, ssboWrite, depthMapWrite, sceneColorWrite, distMapWrite}; + m_device.updateDescriptorSets(writes, {}); + } + + // Allocate PerDraw descriptor set (Set 2) + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + + { + vk::DescriptorBufferInfo uboInfo; + + if (uboData && uboSize > 0 && m_bloomUBOMapped) { + // Write UBO data to current bloom UBO slot + Assertion(m_bloomUBOCursor < BLOOM_UBO_MAX_SLOTS, "Bloom UBO slot overflow!"); + uint32_t slotOffset = m_bloomUBOCursor * static_cast(BLOOM_UBO_SLOT_SIZE); + memcpy(static_cast(m_bloomUBOMapped) + slotOffset, uboData, uboSize); + m_bloomUBOCursor++; + + uboInfo.buffer = m_bloomUBO; + uboInfo.offset = slotOffset; + uboInfo.range = BLOOM_UBO_SLOT_SIZE; + } else { + // No UBO data — use fallback zero buffer + uboInfo.buffer = bufferMgr->getFallbackUniformBuffer(); + uboInfo.offset = 0; + uboInfo.range = 4096; + } + + vk::WriteDescriptorSet write; + write.dstSet = perDrawSet; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = vk::DescriptorType::eUniformBuffer; + write.pBufferInfo = &uboInfo; + + // Fallback for remaining per-draw bindings (1-4: Matrices, NanoVGData, DecalInfo, MovieData) + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackInfo; + fallbackInfo.buffer = fallbackBuf; + fallbackInfo.offset = 0; + fallbackInfo.range = 4096; + + SCP_vector writes; + writes.push_back(write); + for (uint32_t b = 1; b <= 4; ++b) { + vk::WriteDescriptorSet fw; + fw.dstSet = perDrawSet; + fw.dstBinding = b; + fw.dstArrayElement = 0; + fw.descriptorCount = 1; + fw.descriptorType = vk::DescriptorType::eUniformBuffer; + fw.pBufferInfo = &fallbackInfo; + writes.push_back(fw); + } + + m_device.updateDescriptorSets(writes, {}); + } + + // Bind descriptor sets (Set 0 already bound from frame setup) + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); +} + +void VulkanPostProcessor::executeBloom(vk::CommandBuffer cmd) +{ + if (!m_bloomInitialized || gr_bloom_intensity() <= 0) { + return; + } + + // Map bloom UBO for writing per-draw data + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + if (!m_bloomUBOMapped) { + return; + } + m_bloomUBOCursor = 0; + + // 1. Bright pass: extract pixels brighter than 1.0 from scene color → bloom_tex[0] mip 0 + drawFullscreenTriangle(cmd, m_bloomRenderPass, + m_bloomTex[0].mipFramebuffers[0], + vk::Extent2D(m_bloomWidth, m_bloomHeight), + SDR_TYPE_POST_PROCESS_BRIGHTPASS, + m_sceneColor.view, m_linearSampler, + nullptr, 0, // Brightpass has no UBO + ALPHA_BLEND_NONE); + + // 2. Generate mipmaps for bloom_tex[0] (fill mips 1-3 from mip 0) + generateMipmaps(cmd, m_bloomTex[0].image, m_bloomWidth, m_bloomHeight, MAX_MIP_BLUR_LEVELS); + + // 3. Blur iterations (2 iterations of vertical + horizontal ping-pong) + for (int iteration = 0; iteration < 2; iteration++) { + for (int pass = 0; pass < 2; pass++) { + // pass 0 = vertical (tex[0] → tex[1]), pass 1 = horizontal (tex[1] → tex[0]) + int srcIdx = pass; + int dstIdx = 1 - pass; + int direction = (pass == 0) ? 1 : 0; // 1=vertical, 0=horizontal + + for (int mip = 0; mip < MAX_MIP_BLUR_LEVELS; mip++) { + uint32_t mipW = std::max(1u, m_bloomWidth >> mip); + uint32_t mipH = std::max(1u, m_bloomHeight >> mip); + + BlurUBOData blurData; + blurData.texSize = (direction == 0) ? 1.0f / static_cast(mipW) + : 1.0f / static_cast(mipH); + blurData.level = mip; + blurData.direction = direction; + blurData.pad = 0; + + drawFullscreenTriangle(cmd, m_bloomRenderPass, + m_bloomTex[dstIdx].mipFramebuffers[mip], + vk::Extent2D(mipW, mipH), + SDR_TYPE_POST_PROCESS_BLUR, + m_bloomTex[srcIdx].fullView, m_mipmapSampler, + &blurData, sizeof(blurData), + ALPHA_BLEND_NONE); + } + } + } + + // 4. Transition scene color for bloom composite (eShaderReadOnlyOptimal → eColorAttachmentOptimal) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor.image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // 5. Bloom composite: additively blend blurred bloom onto scene color + graphics::generic_data::bloom_composition_data compData; + compData.bloom_intensity = gr_bloom_intensity() / 100.0f; + compData.levels = MAX_MIP_BLUR_LEVELS; + compData.pad[0] = 0.0f; + compData.pad[1] = 0.0f; + + drawFullscreenTriangle(cmd, m_bloomCompositeRenderPass, + m_sceneColorBloomFB, + m_extent, + SDR_TYPE_POST_PROCESS_BLOOM_COMP, + m_bloomTex[0].fullView, m_mipmapSampler, + &compData, sizeof(compData), + ALPHA_BLEND_ADDITIVE); + + // Scene_color is now in eShaderReadOnlyOptimal (from bloom composite render pass finalLayout) + + // Unmap bloom UBO + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +// ===== LDR Targets + FXAA Pipeline Implementation ===== + +bool VulkanPostProcessor::initLDRTargets() +{ + // Create Scene_ldr (RGBA8, full resolution) — tonemapped LDR output + if (!createImage(m_extent.width, m_extent.height, vk::Format::eR8G8B8A8Unorm, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneLdr.image, m_sceneLdr.view, m_sceneLdr.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create Scene_ldr image!\n")); + return false; + } + m_sceneLdr.format = vk::Format::eR8G8B8A8Unorm; + m_sceneLdr.width = m_extent.width; + m_sceneLdr.height = m_extent.height; + + // Create Scene_luminance (RGBA8, full resolution) — LDR with luma in alpha for FXAA + if (!createImage(m_extent.width, m_extent.height, vk::Format::eR8G8B8A8Unorm, + vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + vk::ImageAspectFlagBits::eColor, + m_sceneLuminance.image, m_sceneLuminance.view, m_sceneLuminance.allocation)) { + mprintf(("VulkanPostProcessor: Failed to create Scene_luminance image!\n")); + return false; + } + m_sceneLuminance.format = vk::Format::eR8G8B8A8Unorm; + m_sceneLuminance.width = m_extent.width; + m_sceneLuminance.height = m_extent.height; + + // Create LDR render pass (color-only RGBA8, loadOp=eDontCare, finalLayout=eShaderReadOnlyOptimal) + { + vk::AttachmentDescription att; + att.format = vk::Format::eR8G8B8A8Unorm; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eUndefined; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_ldrRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create LDR render pass: %s\n", e.what())); + return false; + } + } + + // Create framebuffers + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_ldrRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneLdr.view; + fbInfo.width = m_extent.width; + fbInfo.height = m_extent.height; + fbInfo.layers = 1; + + try { + m_sceneLdrFB = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create Scene_ldr framebuffer: %s\n", e.what())); + return false; + } + + fbInfo.pAttachments = &m_sceneLuminance.view; + try { + m_sceneLuminanceFB = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create Scene_luminance framebuffer: %s\n", e.what())); + return false; + } + } + + // Create LDR load render pass (loadOp=eLoad for additive blending onto existing content) + { + vk::AttachmentDescription att; + att.format = vk::Format::eR8G8B8A8Unorm; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eLoad; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_ldrLoadRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create LDR load render pass: %s\n", e.what())); + return false; + } + } + + m_ldrInitialized = true; + mprintf(("VulkanPostProcessor: LDR targets initialized (%ux%u, RGBA8)\n", + m_extent.width, m_extent.height)); + return true; +} + +void VulkanPostProcessor::shutdownLDRTargets() +{ + if (!m_ldrInitialized) { + return; + } + + if (m_sceneLuminanceFB) { + m_device.destroyFramebuffer(m_sceneLuminanceFB); + m_sceneLuminanceFB = nullptr; + } + if (m_sceneLdrFB) { + m_device.destroyFramebuffer(m_sceneLdrFB); + m_sceneLdrFB = nullptr; + } + if (m_ldrLoadRenderPass) { + m_device.destroyRenderPass(m_ldrLoadRenderPass); + m_ldrLoadRenderPass = nullptr; + } + if (m_ldrRenderPass) { + m_device.destroyRenderPass(m_ldrRenderPass); + m_ldrRenderPass = nullptr; + } + + // Scene_luminance + if (m_sceneLuminance.view) { + m_device.destroyImageView(m_sceneLuminance.view); + m_sceneLuminance.view = nullptr; + } + if (m_sceneLuminance.image) { + m_device.destroyImage(m_sceneLuminance.image); + m_sceneLuminance.image = nullptr; + } + if (m_sceneLuminance.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneLuminance.allocation); + } + + // Scene_ldr + if (m_sceneLdr.view) { + m_device.destroyImageView(m_sceneLdr.view); + m_sceneLdr.view = nullptr; + } + if (m_sceneLdr.image) { + m_device.destroyImage(m_sceneLdr.image); + m_sceneLdr.image = nullptr; + } + if (m_sceneLdr.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_sceneLdr.allocation); + } + + m_ldrInitialized = false; +} + +void VulkanPostProcessor::executeTonemap(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized) { + return; + } + + namespace ltp = lighting_profiles; + + // Map bloom UBO for the tonemapping draw's UBO slot + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + if (!m_bloomUBOMapped) { + return; + } + + // Reset cursor if bloom didn't run this frame (bloom resets to 0 when it runs) + if (gr_bloom_intensity() <= 0 || !m_bloomInitialized) { + m_bloomUBOCursor = 0; + } + + // Build tonemapping data directly from lighting profiles + graphics::generic_data::tonemapping_data tmData; + memset(&tmData, 0, sizeof(tmData)); + auto ppc = ltp::current_piecewise_intermediates(); + tmData.exposure = ltp::current_exposure(); + tmData.tonemapper = static_cast(ltp::current_tonemapper()); + tmData.x0 = ppc.x0; + tmData.y0 = ppc.y0; + tmData.x1 = ppc.x1; + tmData.toe_B = ppc.toe_B; + tmData.toe_lnA = ppc.toe_lnA; + tmData.sh_B = ppc.sh_B; + tmData.sh_lnA = ppc.sh_lnA; + tmData.sh_offsetX = ppc.sh_offsetX; + tmData.sh_offsetY = ppc.sh_offsetY; + + // HDR scene → Scene_ldr via tonemapping shader + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLdrFB, m_extent, + SDR_TYPE_POST_PROCESS_TONEMAPPING, + m_sceneColor.view, m_linearSampler, + &tmData, sizeof(tmData), + ALPHA_BLEND_NONE); + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +void VulkanPostProcessor::executeFXAA(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized || !gr_is_fxaa_mode(Gr_aa_mode)) { + return; + } + + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + if (!m_bloomUBOMapped) { + return; + } + + // FXAA prepass: Scene_ldr → Scene_luminance (compute luma in alpha) + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLuminanceFB, m_extent, + SDR_TYPE_POST_PROCESS_FXAA_PREPASS, + m_sceneLdr.view, m_linearSampler, + nullptr, 0, + ALPHA_BLEND_NONE); + + // FXAA main pass: Scene_luminance → Scene_ldr + graphics::generic_data::fxaa_data fxaaData; + fxaaData.rt_w = static_cast(m_extent.width); + fxaaData.rt_h = static_cast(m_extent.height); + fxaaData.pad[0] = 0.0f; + fxaaData.pad[1] = 0.0f; + + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLdrFB, m_extent, + SDR_TYPE_POST_PROCESS_FXAA, + m_sceneLuminance.view, m_linearSampler, + &fxaaData, sizeof(fxaaData), + ALPHA_BLEND_NONE); + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +bool VulkanPostProcessor::executePostEffects(vk::CommandBuffer cmd) +{ + m_postEffectsApplied = false; + + if (!m_ldrInitialized || !graphics::Post_processing_manager) { + return false; + } + + const auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + if (postEffects.empty()) { + return false; + } + + // Compute effect flags from current state + int effectFlags = 0; + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (postEffects[idx].always_on || (postEffects[idx].intensity != postEffects[idx].default_intensity)) { + effectFlags |= (1 << idx); + } + } + + if (effectFlags == 0) { + return false; + } + + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + if (!m_bloomUBOMapped) { + return false; + } + + // Build the extended post_data UBO with effectFlags appended + struct PostEffectsUBOData { + graphics::generic_data::post_data base; + int effectFlags; + int pad[3]; + }; + + PostEffectsUBOData uboData; + memset(&uboData, 0, sizeof(uboData)); + uboData.base.timer = static_cast(timer_get_milliseconds() % 100 + 1); + uboData.effectFlags = effectFlags; + + // Fill effect parameters + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (!(effectFlags & (1 << idx))) { + continue; + } + float value = postEffects[idx].intensity; + switch (postEffects[idx].uniform_type) { + case graphics::PostEffectUniformType::NoiseAmount: + uboData.base.noise_amount = value; + break; + case graphics::PostEffectUniformType::Saturation: + uboData.base.saturation = value; + break; + case graphics::PostEffectUniformType::Brightness: + uboData.base.brightness = value; + break; + case graphics::PostEffectUniformType::Contrast: + uboData.base.contrast = value; + break; + case graphics::PostEffectUniformType::FilmGrain: + uboData.base.film_grain = value; + break; + case graphics::PostEffectUniformType::TvStripes: + uboData.base.tv_stripes = value; + break; + case graphics::PostEffectUniformType::Cutoff: + uboData.base.cutoff = value; + break; + case graphics::PostEffectUniformType::Dither: + uboData.base.dither = value; + break; + case graphics::PostEffectUniformType::Tint: + uboData.base.tint = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectVEC3A: + uboData.base.custom_effect_vec3_a = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectFloatA: + uboData.base.custom_effect_float_a = value; + break; + case graphics::PostEffectUniformType::CustomEffectVEC3B: + uboData.base.custom_effect_vec3_b = postEffects[idx].rgb; + break; + case graphics::PostEffectUniformType::CustomEffectFloatB: + uboData.base.custom_effect_float_b = value; + break; + default: + break; + } + } + + // Post-effects: Scene_ldr → Scene_luminance (reusing luminance target as temp) + drawFullscreenTriangle(cmd, m_ldrRenderPass, + m_sceneLuminanceFB, m_extent, + SDR_TYPE_POST_PROCESS_MAIN, + m_sceneLdr.view, m_linearSampler, + &uboData, sizeof(uboData), + ALPHA_BLEND_NONE); + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; + + m_postEffectsApplied = true; + return true; +} + +void VulkanPostProcessor::executeLightshafts(vk::CommandBuffer cmd) +{ + if (!m_ldrInitialized || !graphics::Post_processing_manager) { + return; + } + + if (Game_subspace_effect || !gr_sunglare_enabled() || !gr_lightshafts_enabled()) { + return; + } + + // Find a global light with glare facing the camera + int n_lights = light_get_global_count(); + float sun_x = 0.0f, sun_y = 0.0f; + bool found = false; + + for (int idx = 0; idx < n_lights; idx++) { + vec3d light_dir; + light_get_global_dir(&light_dir, idx); + + if (!light_has_glare(idx)) { + continue; + } + + float dot = vm_vec_dot(&light_dir, &Eye_matrix.vec.fvec); + if (dot > 0.7f) { + sun_x = asinf_safe(vm_vec_dot(&light_dir, &Eye_matrix.vec.rvec)) / PI * 1.5f + 0.5f; + sun_y = asinf_safe(vm_vec_dot(&light_dir, &Eye_matrix.vec.uvec)) / PI * 1.5f * gr_screen.clip_aspect + 0.5f; + found = true; + break; + } + } + + if (!found) { + return; + } + + // Transition scene depth from eDepthStencilAttachmentOptimal to eShaderReadOnlyOptimal for sampling + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneDepth.image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + if (m_depthFormat == vk::Format::eD24UnormS8Uint || m_depthFormat == vk::Format::eD32SfloatS8Uint) { + barrier.subresourceRange.aspectMask |= vk::ImageAspectFlagBits::eStencil; + } + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barrier); + } + + // Transition Scene_ldr to eColorAttachmentOptimal for loadOp=eLoad render pass + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneLdr.image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // Build lightshaft UBO data + auto& ls_params = graphics::Post_processing_manager->getLightshaftParams(); + + graphics::generic_data::lightshaft_data lsData; + lsData.sun_pos.x = sun_x; + lsData.sun_pos.y = sun_y; + lsData.density = ls_params.density; + lsData.weight = ls_params.weight; + lsData.falloff = ls_params.falloff; + lsData.intensity = Sun_spot * ls_params.intensity; + lsData.cp_intensity = Sun_spot * ls_params.cpintensity; + lsData.pad[0] = 0.0f; + + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + if (!m_bloomUBOMapped) { + return; + } + + // Additive blend lightshafts onto Scene_ldr + drawFullscreenTriangle(cmd, m_ldrLoadRenderPass, + m_sceneLdrFB, m_extent, + SDR_TYPE_POST_PROCESS_LIGHTSHAFTS, + m_sceneDepth.view, m_linearSampler, + &lsData, sizeof(lsData), + ALPHA_BLEND_ADDITIVE); + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +void VulkanPostProcessor::copyEffectTexture(vk::CommandBuffer cmd) +{ + // Called mid-scene, outside a render pass. + // Scene color is in eShaderReadOnlyOptimal (from the ended scene render pass). + // Copies scene color → effect texture so distortion/soft particle shaders can sample it. + copyImageToImage(cmd, + m_sceneColor.image, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eColorAttachmentOptimal, + m_sceneEffect.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_extent); +} + +void VulkanPostProcessor::copySceneDepth(vk::CommandBuffer cmd) +{ + // Called mid-scene, outside a render pass. + // Copies scene depth → depth copy texture so soft particle shaders can sample it. + // Scene depth is in eDepthStencilAttachmentOptimal (from the ended scene render pass). + copyImageToImage(cmd, + m_sceneDepth.image, vk::ImageLayout::eDepthStencilAttachmentOptimal, vk::ImageLayout::eDepthStencilAttachmentOptimal, + m_sceneDepthCopy.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_extent, + vk::ImageAspectFlagBits::eDepth); +} + +void VulkanPostProcessor::copyGbufNormal(vk::CommandBuffer cmd) +{ + // Called mid-scene, outside a render pass. + // Copies G-buffer normal → normal copy so decal shader can sample it for angle rejection. + // G-buffer normal is in eShaderReadOnlyOptimal (from the ended G-buffer render pass). + // Normal goes back to eShaderReadOnlyOptimal (transitionGbufForResume handles the rest). + copyImageToImage(cmd, + m_gbufNormal.image, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + m_gbufNormalCopy.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eShaderReadOnlyOptimal, + m_extent); +} + +void VulkanPostProcessor::updateDistortion(vk::CommandBuffer cmd, float frametime) +{ + if (!m_distortionInitialized) { + return; + } + + m_distortionTimer += frametime; + if (m_distortionTimer < 0.03f) { + return; + } + m_distortionTimer = 0.0f; + + int dst = !m_distortionSwitch; // Write target + int src = m_distortionSwitch; // Read source + + // On first update, images are still in eUndefined layout + vk::ImageLayout srcOldLayout = m_distortionFirstUpdate + ? vk::ImageLayout::eUndefined : vk::ImageLayout::eShaderReadOnlyOptimal; + vk::AccessFlags srcOldAccess = m_distortionFirstUpdate + ? vk::AccessFlags{} : vk::AccessFlagBits::eShaderRead; + + // Transition both distortion textures for transfer operations + { + std::array barriers; + + // dst: eShaderReadOnlyOptimal (or eUndefined on first use) → eTransferDstOptimal + barriers[0].srcAccessMask = srcOldAccess; + barriers[0].dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[0].oldLayout = srcOldLayout; + barriers[0].newLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = m_distortionTex[dst].image; + barriers[0].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + // src: eShaderReadOnlyOptimal (or eUndefined on first use) → eTransferSrcOptimal + barriers[1].srcAccessMask = srcOldAccess; + barriers[1].dstAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[1].oldLayout = srcOldLayout; + barriers[1].newLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = m_distortionTex[src].image; + barriers[1].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eFragmentShader, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barriers); + } + + // Clear dest to mid-gray (0.5, 0.5, 0.0, 1.0) = no distortion + { + vk::ClearColorValue clearColor; + clearColor.setFloat32({0.5f, 0.5f, 0.0f, 1.0f}); + vk::ImageSubresourceRange range(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + cmd.clearColorImage(m_distortionTex[dst].image, + vk::ImageLayout::eTransferDstOptimal, clearColor, range); + } + + // Blit: scroll old data right by 1 pixel + // src columns 0-30 → dst columns 1-31 (with LINEAR filtering) + { + vk::ImageBlit blit; + blit.srcSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + blit.srcOffsets[0] = vk::Offset3D(0, 0, 0); + blit.srcOffsets[1] = vk::Offset3D(31, 32, 1); + blit.dstSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + blit.dstOffsets[0] = vk::Offset3D(1, 0, 0); + blit.dstOffsets[1] = vk::Offset3D(32, 32, 1); + + cmd.blitImage( + m_distortionTex[src].image, vk::ImageLayout::eTransferSrcOptimal, + m_distortionTex[dst].image, vk::ImageLayout::eTransferDstOptimal, + blit, vk::Filter::eLinear); + } + + // Generate random noise and copy to column 0 of dst + // OpenGL draws 33 GL_POINTS at x=0 with random R,G values — we write 32 pixels + { + // Create a small host-visible staging buffer for 32 RGBA8 pixels (128 bytes) + vk::BufferCreateInfo bufInfo; + bufInfo.size = 32 * 4; + bufInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuf; + VulkanAllocation stagingAlloc; + try { + stagingBuf = m_device.createBuffer(bufInfo); + } catch (const vk::SystemError&) { + // Non-fatal: skip noise injection this frame + goto skip_noise; + } + + Verify(m_memoryManager->allocateBufferMemory(stagingBuf, MemoryUsage::CpuOnly, stagingAlloc)); + + { + auto* pixels = static_cast(m_memoryManager->mapMemory(stagingAlloc)); + Verify(pixels); + for (int i = 0; i < 32; i++) { + pixels[i * 4 + 0] = static_cast(::util::Random::next(256)); // R + pixels[i * 4 + 1] = static_cast(::util::Random::next(256)); // G + pixels[i * 4 + 2] = 255; // B + pixels[i * 4 + 3] = 255; // A + } + m_memoryManager->unmapMemory(stagingAlloc); + + // Copy staging buffer → column 0 of dst (1 pixel wide, 32 pixels tall) + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; // Tightly packed + region.bufferImageHeight = 0; + region.imageSubresource = vk::ImageSubresourceLayers( + vk::ImageAspectFlagBits::eColor, 0, 0, 1); + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(1, 32, 1); + + cmd.copyBufferToImage(stagingBuf, m_distortionTex[dst].image, + vk::ImageLayout::eTransferDstOptimal, region); + } + + // Schedule staging buffer for deferred destruction (GPU may still be reading) + auto* delQueue = getDeletionQueue(); + if (delQueue) { + delQueue->queueBuffer(stagingBuf, stagingAlloc); + } else { + m_device.destroyBuffer(stagingBuf); + m_memoryManager->freeAllocation(stagingAlloc); + } + } + +skip_noise: + // Transition both textures back to eShaderReadOnlyOptimal + { + std::array barriers; + + // dst: eTransferDstOptimal → eShaderReadOnlyOptimal + barriers[0].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[0].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[0].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[0].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = m_distortionTex[dst].image; + barriers[0].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + // src: eTransferSrcOptimal → eShaderReadOnlyOptimal + barriers[1].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[1].dstAccessMask = vk::AccessFlagBits::eShaderRead; + barriers[1].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[1].newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = m_distortionTex[src].image; + barriers[1].subresourceRange = vk::ImageSubresourceRange( + vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1); + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barriers); + } + + m_distortionSwitch = !m_distortionSwitch; + m_distortionFirstUpdate = false; +} + +vk::ImageView VulkanPostProcessor::getDistortionTextureView() const +{ + if (!m_distortionInitialized) { + return nullptr; + } + // Return the most recently written texture (matching OpenGL's + // Distortion_texture[!Distortion_switch] binding for thrusters). + // After updateDistortion toggles the switch, m_distortionSwitch points + // to the old read source. The write target was !old_switch = new switch. + // So the most recently written texture is m_distortionTex[m_distortionSwitch]. + return m_distortionTex[m_distortionSwitch].view; +} + +void VulkanPostProcessor::blitToSwapChain(vk::CommandBuffer cmd) +{ + // If LDR targets exist, executeTonemap()+executeFXAA() already ran. + // Blit from the latest post-processing result with passthrough settings. + // Otherwise, fall back to direct HDR→swap chain tonemapping. + bool useLdr = m_ldrInitialized; + + if (!useLdr) { + // Update tonemapping parameters from engine lighting profile + updateTonemappingUBO(); + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* stateTracker = getStateTracker(); + auto* bufferMgr = getBufferManager(); + + if (!pipelineMgr || !descriptorMgr || !stateTracker || !bufferMgr) { + return; + } + + // Build pipeline config for tonemapping (fullscreen, no depth, no blending) + // sRGB conversion is controlled by the linearOut UBO field, not shader variants + PipelineConfig config; + config.shaderType = SDR_TYPE_POST_PROCESS_TONEMAPPING; + config.vertexLayoutHash = 0; // Empty vertex layout + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = stateTracker->getCurrentRenderPass(); + + // Get or create the pipeline + vertex_layout emptyLayout; // No vertex components + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + mprintf(("VulkanPostProcessor: Failed to get tonemapping pipeline!\n")); + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + stateTracker->bindPipeline(pipeline, pipelineLayout); + + // Set viewport (non-flipped for post-processing — textures are already + // in the correct Vulkan orientation, no Y-flip needed) + stateTracker->setViewport(0.0f, 0.0f, + static_cast(m_extent.width), + static_cast(m_extent.height)); + + stateTracker->applyDynamicState(); + + // Allocate and write Material descriptor set (Set 1) with source texture + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + + { + // Bind source texture based on post-processing chain state: + // - Post-effects ran: read Scene_luminance (post-effects output) + // - LDR only (tonemap/FXAA): read Scene_ldr + // - No LDR: read Scene_color (raw HDR, tonemapping applied by this shader) + vk::DescriptorImageInfo imageInfo; + imageInfo.sampler = m_linearSampler; + imageInfo.imageView = m_postEffectsApplied ? m_sceneLuminance.view + : useLdr ? m_sceneLdr.view + : m_sceneColor.view; + imageInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet write; + write.dstSet = materialSet; + write.dstBinding = 1; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = vk::DescriptorType::eCombinedImageSampler; + write.pImageInfo = &imageInfo; + + // Pre-initialize binding 0 (ModelData UBO) with fallback zero buffer + auto fallbackBuffer = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo bufferInfo; + bufferInfo.buffer = fallbackBuffer; + bufferInfo.offset = 0; + bufferInfo.range = 4096; + + vk::WriteDescriptorSet uboWrite; + uboWrite.dstSet = materialSet; + uboWrite.dstBinding = 0; + uboWrite.dstArrayElement = 0; + uboWrite.descriptorCount = 1; + uboWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + uboWrite.pBufferInfo = &bufferInfo; + + // Pre-initialize binding 2 (DecalGlobals UBO) with fallback + vk::WriteDescriptorSet decalWrite; + decalWrite.dstSet = materialSet; + decalWrite.dstBinding = 2; + decalWrite.dstArrayElement = 0; + decalWrite.descriptorCount = 1; + decalWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + decalWrite.pBufferInfo = &bufferInfo; + + // Fill remaining texture array elements with fallback (use 2D view since + // post-processing shaders declare sampler2D, not sampler2DArray) + auto* texMgr = getTextureManager(); + vk::ImageView fallbackView = texMgr->getFallbackTextureView2D(); + vk::Sampler defaultSampler = texMgr->getDefaultSampler(); + + std::array fallbackImages; + for (auto& fi : fallbackImages) { + fi.sampler = defaultSampler; + fi.imageView = fallbackView; + fi.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + vk::WriteDescriptorSet fallbackTexWrite; + fallbackTexWrite.dstSet = materialSet; + fallbackTexWrite.dstBinding = 1; + fallbackTexWrite.dstArrayElement = 1; + fallbackTexWrite.descriptorCount = static_cast(fallbackImages.size()); + fallbackTexWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + fallbackTexWrite.pImageInfo = fallbackImages.data(); + + // Binding 3: Transform SSBO (fallback to zero UBO) + vk::WriteDescriptorSet ssboWrite; + ssboWrite.dstSet = materialSet; + ssboWrite.dstBinding = 3; + ssboWrite.dstArrayElement = 0; + ssboWrite.descriptorCount = 1; + ssboWrite.descriptorType = vk::DescriptorType::eStorageBuffer; + ssboWrite.pBufferInfo = &bufferInfo; + + // Binding 4: Depth map (fallback to 2D white texture) + vk::DescriptorImageInfo depthMapFallback; + depthMapFallback.sampler = defaultSampler; + depthMapFallback.imageView = fallbackView; + depthMapFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet depthMapWrite; + depthMapWrite.dstSet = materialSet; + depthMapWrite.dstBinding = 4; + depthMapWrite.dstArrayElement = 0; + depthMapWrite.descriptorCount = 1; + depthMapWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + depthMapWrite.pImageInfo = &depthMapFallback; + + // Binding 5: Scene color / frameBuffer (fallback to 2D white texture) + vk::DescriptorImageInfo sceneColorFallback; + sceneColorFallback.sampler = defaultSampler; + sceneColorFallback.imageView = fallbackView; + sceneColorFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet sceneColorWrite; + sceneColorWrite.dstSet = materialSet; + sceneColorWrite.dstBinding = 5; + sceneColorWrite.dstArrayElement = 0; + sceneColorWrite.descriptorCount = 1; + sceneColorWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + sceneColorWrite.pImageInfo = &sceneColorFallback; + + // Binding 6: Distortion map (fallback to 2D white texture) + vk::DescriptorImageInfo distMapFallback; + distMapFallback.sampler = defaultSampler; + distMapFallback.imageView = fallbackView; + distMapFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet distMapWrite; + distMapWrite.dstSet = materialSet; + distMapWrite.dstBinding = 6; + distMapWrite.dstArrayElement = 0; + distMapWrite.descriptorCount = 1; + distMapWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + distMapWrite.pImageInfo = &distMapFallback; + + std::array writes = {write, uboWrite, decalWrite, fallbackTexWrite, ssboWrite, depthMapWrite, sceneColorWrite, distMapWrite}; + m_device.updateDescriptorSets(writes, {}); + } + + stateTracker->bindDescriptorSet(DescriptorSetIndex::Material, materialSet); + + // Allocate and write PerDraw descriptor set (Set 2) with tonemapping UBO + // For now, use fallback (zero) UBO — exposure=0 would give black, + // so we need a valid graphics::generic_data::tonemapping_data with exposure=1.0 and tonemapper=0 (linear). + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + + { + // When blitting LDR, use passthrough tonemapping (exposure=1, linear) + // Otherwise, use the real tonemapping UBO (already updated above) + if (useLdr) { + auto* mapped = static_cast( + m_memoryManager->mapMemory(m_tonemapUBOAlloc)); + Verify(mapped); + memset(mapped, 0, sizeof(graphics::generic_data::tonemapping_data)); + mapped->exposure = 1.0f; + mapped->tonemapper = 0; // Linear passthrough + mapped->linearOut = 1; // Skip sRGB — LDR input already has sRGB applied + m_memoryManager->unmapMemory(m_tonemapUBOAlloc); + } + + vk::DescriptorBufferInfo uboInfo; + uboInfo.buffer = m_tonemapUBO; + uboInfo.offset = 0; + uboInfo.range = sizeof(graphics::generic_data::tonemapping_data); + + vk::WriteDescriptorSet write; + write.dstSet = perDrawSet; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = vk::DescriptorType::eUniformBuffer; + write.pBufferInfo = &uboInfo; + + // Pre-initialize other bindings with fallback + auto fallbackBuffer = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackInfo; + fallbackInfo.buffer = fallbackBuffer; + fallbackInfo.offset = 0; + fallbackInfo.range = 4096; + + SCP_vector writes; + writes.push_back(write); + + // Bindings 1-4: Matrices, NanoVGData, DecalInfo, MovieData + for (uint32_t b = 1; b <= 4; ++b) { + vk::WriteDescriptorSet fw; + fw.dstSet = perDrawSet; + fw.dstBinding = b; + fw.dstArrayElement = 0; + fw.descriptorCount = 1; + fw.descriptorType = vk::DescriptorType::eUniformBuffer; + fw.pBufferInfo = &fallbackInfo; + writes.push_back(fw); + } + + m_device.updateDescriptorSets(writes, {}); + } + + stateTracker->bindDescriptorSet(DescriptorSetIndex::PerDraw, perDrawSet); + + // Draw fullscreen triangle (3 vertices from gl_VertexIndex, no vertex buffer) + cmd.draw(3, 1, 0, 0); +} + +// ===== Shadow Map Implementation ===== + +bool VulkanPostProcessor::initShadowPass() +{ + if (m_shadowInitialized) { + return true; + } + + if (Shadow_quality == ShadowQuality::Disabled) { + return false; + } + + int size; + switch (Shadow_quality) { + case ShadowQuality::Low: size = 512; break; + case ShadowQuality::Medium: size = 1024; break; + case ShadowQuality::High: size = 2048; break; + case ShadowQuality::Ultra: size = 4096; break; + default: size = 512; break; + } + + mprintf(("VulkanPostProcessor: Creating %dx%d shadow map (4 cascades)\n", size, size)); + + const uint32_t layers = 4; + + // Create shadow color image (RGBA16F, 2D array, 4 layers) + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = vk::Format::eR16G16B16A16Sfloat; + imageInfo.extent = vk::Extent3D(static_cast(size), static_cast(size), 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = layers; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_shadowColor.image = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow color image: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateImageMemory(m_shadowColor.image, MemoryUsage::GpuOnly, m_shadowColor.allocation)) { + m_device.destroyImage(m_shadowColor.image); + m_shadowColor.image = nullptr; + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_shadowColor.image; + viewInfo.viewType = vk::ImageViewType::e2DArray; + viewInfo.format = vk::Format::eR16G16B16A16Sfloat; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = layers; + + try { + m_shadowColor.view = m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow color view: %s\n", e.what())); + return false; + } + + m_shadowColor.format = vk::Format::eR16G16B16A16Sfloat; + m_shadowColor.width = static_cast(size); + m_shadowColor.height = static_cast(size); + } + + // Create shadow depth image (D32F, 2D array, 4 layers) + { + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = vk::Format::eD32Sfloat; + imageInfo.extent = vk::Extent3D(static_cast(size), static_cast(size), 1); + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = layers; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_shadowDepth.image = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow depth image: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateImageMemory(m_shadowDepth.image, MemoryUsage::GpuOnly, m_shadowDepth.allocation)) { + m_device.destroyImage(m_shadowDepth.image); + m_shadowDepth.image = nullptr; + return false; + } + + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_shadowDepth.image; + viewInfo.viewType = vk::ImageViewType::e2DArray; + viewInfo.format = vk::Format::eD32Sfloat; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = layers; + + try { + m_shadowDepth.view = m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow depth view: %s\n", e.what())); + return false; + } + + m_shadowDepth.format = vk::Format::eD32Sfloat; + m_shadowDepth.width = static_cast(size); + m_shadowDepth.height = static_cast(size); + } + + // Create shadow render pass: 1 color (RGBA16F) + 1 depth (D32F), both eClear + { + std::array attachments; + + // Color attachment (RGBA16F) — stores VSM depth variance + attachments[0].format = vk::Format::eR16G16B16A16Sfloat; + attachments[0].samples = vk::SampleCountFlagBits::e1; + attachments[0].loadOp = vk::AttachmentLoadOp::eClear; + attachments[0].storeOp = vk::AttachmentStoreOp::eStore; + attachments[0].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[0].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[0].initialLayout = vk::ImageLayout::eUndefined; + attachments[0].finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + // Depth attachment (D32F) + attachments[1].format = vk::Format::eD32Sfloat; + attachments[1].samples = vk::SampleCountFlagBits::e1; + attachments[1].loadOp = vk::AttachmentLoadOp::eClear; + attachments[1].storeOp = vk::AttachmentStoreOp::eDontCare; + attachments[1].stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + attachments[1].stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + attachments[1].initialLayout = vk::ImageLayout::eUndefined; + attachments[1].finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference depthRef; + depthRef.attachment = 1; + depthRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + subpass.pDepthStencilAttachment = &depthRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dep.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dep.srcAccessMask = {}; + dep.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eDepthStencilAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = static_cast(attachments.size()); + rpInfo.pAttachments = attachments.data(); + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_shadowRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow render pass: %s\n", e.what())); + return false; + } + } + + // Create layered framebuffer (all 4 layers at once) + { + std::array fbAttachments = { + m_shadowColor.view, + m_shadowDepth.view, + }; + + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_shadowRenderPass; + fbInfo.attachmentCount = static_cast(fbAttachments.size()); + fbInfo.pAttachments = fbAttachments.data(); + fbInfo.width = static_cast(size); + fbInfo.height = static_cast(size); + fbInfo.layers = layers; + + try { + m_shadowFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create shadow framebuffer: %s\n", e.what())); + return false; + } + } + + m_shadowTextureSize = size; + m_shadowInitialized = true; + mprintf(("VulkanPostProcessor: Shadow map initialized (%dx%d, 4 cascades)\n", size, size)); + return true; +} + +void VulkanPostProcessor::shutdownShadowPass() +{ + if (!m_shadowInitialized) { + return; + } + + if (m_shadowFramebuffer) { + m_device.destroyFramebuffer(m_shadowFramebuffer); + m_shadowFramebuffer = nullptr; + } + if (m_shadowRenderPass) { + m_device.destroyRenderPass(m_shadowRenderPass); + m_shadowRenderPass = nullptr; + } + + if (m_shadowColor.view) { + m_device.destroyImageView(m_shadowColor.view); + m_shadowColor.view = nullptr; + } + if (m_shadowColor.image) { + m_device.destroyImage(m_shadowColor.image); + m_shadowColor.image = nullptr; + } + if (m_shadowColor.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_shadowColor.allocation); + } + + if (m_shadowDepth.view) { + m_device.destroyImageView(m_shadowDepth.view); + m_shadowDepth.view = nullptr; + } + if (m_shadowDepth.image) { + m_device.destroyImage(m_shadowDepth.image); + m_shadowDepth.image = nullptr; + } + if (m_shadowDepth.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_shadowDepth.allocation); + } + + m_shadowTextureSize = 0; + m_shadowInitialized = false; +} + +bool VulkanPostProcessor::createImage(uint32_t width, uint32_t height, vk::Format format, + vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect, + vk::Image& outImage, vk::ImageView& outView, + VulkanAllocation& outAllocation, + vk::SampleCountFlagBits sampleCount) +{ + // Create image + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = format; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = sampleCount; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = usage; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + outImage = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create image: %s\n", e.what())); + return false; + } + + // Allocate memory + if (!m_memoryManager->allocateImageMemory(outImage, MemoryUsage::GpuOnly, outAllocation)) { + mprintf(("VulkanPostProcessor: Failed to allocate image memory!\n")); + m_device.destroyImage(outImage); + outImage = nullptr; + return false; + } + + // Create image view (plain 2D, not array) + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = outImage; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = aspect; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + outView = m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create image view: %s\n", e.what())); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAllocation); + outImage = nullptr; + return false; + } + + return true; +} + +// ========== Fog / Volumetric Nebula ========== + +bool VulkanPostProcessor::initFogPass() +{ + if (m_fogInitialized) { + return true; + } + + // Create fog render pass: 1 RGBA16F color attachment, loadOp=eDontCare (writing every pixel), + // initialLayout/finalLayout = eColorAttachmentOptimal (scene color stays as render target) + { + vk::AttachmentDescription att; + att.format = vk::Format::eR16G16B16A16Sfloat; + att.samples = vk::SampleCountFlagBits::e1; + att.loadOp = vk::AttachmentLoadOp::eDontCare; + att.storeOp = vk::AttachmentStoreOp::eStore; + att.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + att.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + att.initialLayout = vk::ImageLayout::eColorAttachmentOptimal; + att.finalLayout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::AttachmentReference colorRef; + colorRef.attachment = 0; + colorRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorRef; + + vk::SubpassDependency dep; + dep.srcSubpass = VK_SUBPASS_EXTERNAL; + dep.dstSubpass = 0; + dep.srcStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.dstStageMask = vk::PipelineStageFlagBits::eFragmentShader + | vk::PipelineStageFlagBits::eColorAttachmentOutput; + dep.srcAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + dep.dstAccessMask = vk::AccessFlagBits::eShaderRead + | vk::AccessFlagBits::eColorAttachmentWrite; + + vk::RenderPassCreateInfo rpInfo; + rpInfo.attachmentCount = 1; + rpInfo.pAttachments = &att; + rpInfo.subpassCount = 1; + rpInfo.pSubpasses = &subpass; + rpInfo.dependencyCount = 1; + rpInfo.pDependencies = &dep; + + try { + m_fogRenderPass = m_device.createRenderPass(rpInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create fog render pass: %s\n", e.what())); + return false; + } + } + + // Create fog framebuffer (scene color as attachment) + { + vk::FramebufferCreateInfo fbInfo; + fbInfo.renderPass = m_fogRenderPass; + fbInfo.attachmentCount = 1; + fbInfo.pAttachments = &m_sceneColor.view; + fbInfo.width = m_extent.width; + fbInfo.height = m_extent.height; + fbInfo.layers = 1; + + try { + m_fogFramebuffer = m_device.createFramebuffer(fbInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create fog framebuffer: %s\n", e.what())); + return false; + } + } + + m_fogInitialized = true; + mprintf(("VulkanPostProcessor: Fog pass initialized\n")); + return true; +} + +void VulkanPostProcessor::shutdownFogPass() +{ + if (m_emissiveMipmappedFullView) { + m_device.destroyImageView(m_emissiveMipmappedFullView); + m_emissiveMipmappedFullView = nullptr; + } + if (m_emissiveMipmapped.view) { + m_device.destroyImageView(m_emissiveMipmapped.view); + m_emissiveMipmapped.view = nullptr; + } + if (m_emissiveMipmapped.image) { + m_device.destroyImage(m_emissiveMipmapped.image); + m_emissiveMipmapped.image = nullptr; + } + if (m_emissiveMipmapped.allocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_emissiveMipmapped.allocation); + } + m_emissiveMipmappedInitialized = false; + + if (m_fogFramebuffer) { + m_device.destroyFramebuffer(m_fogFramebuffer); + m_fogFramebuffer = nullptr; + } + if (m_fogRenderPass) { + m_device.destroyRenderPass(m_fogRenderPass); + m_fogRenderPass = nullptr; + } + m_fogInitialized = false; +} + +void VulkanPostProcessor::renderSceneFog(vk::CommandBuffer cmd) +{ + GR_DEBUG_SCOPE("Scene Fog"); + + if (!m_fogInitialized) { + if (!initFogPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + // Copy scene depth for fog sampling + copySceneDepth(cmd); + + // Transition scene color: eShaderReadOnlyOptimal -> eColorAttachmentOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor.image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Map bloom UBO for fog UBO data + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + Verify(m_bloomUBOMapped); + + // Fill fog UBO + graphics::generic_data::fog_data fogData; + { + float fog_near, fog_far, fog_density; + neb2_get_adjusted_fog_values(&fog_near, &fog_far, &fog_density); + unsigned char r, g, b; + neb2_get_fog_color(&r, &g, &b); + + fogData.fog_start = fog_near; + fogData.fog_density = fog_density; + fogData.fog_color.xyz.x = r / 255.f; + fogData.fog_color.xyz.y = g / 255.f; + fogData.fog_color.xyz.z = b / 255.f; + fogData.zNear = Min_draw_distance; + fogData.zFar = Max_draw_distance; + } + + // Custom descriptor writes to bind depth copy at binding 4 + PipelineConfig config; + config.shaderType = SDR_TYPE_SCENE_FOG; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = m_fogRenderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_fogRenderPass; + rpBegin.framebuffer = m_fogFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_extent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_extent.width); + viewport.height = static_cast(m_extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_extent; + cmd.setScissor(0, scissor); + + // Allocate Material descriptor set (Set 1) + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + + { + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackBufInfo; + fallbackBufInfo.buffer = fallbackBuf; + fallbackBufInfo.offset = 0; + fallbackBufInfo.range = 4096; + + vk::Sampler defaultSampler = texMgr->getDefaultSampler(); + vk::ImageView fallbackView = texMgr->getFallbackTextureView2D(); + + // Binding 0: ModelData UBO (fallback) + vk::WriteDescriptorSet modelWrite; + modelWrite.dstSet = materialSet; + modelWrite.dstBinding = 0; + modelWrite.dstArrayElement = 0; + modelWrite.descriptorCount = 1; + modelWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + modelWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 1: composite (lit result) at element [0] + vk::DescriptorImageInfo compositeInfo; + compositeInfo.sampler = m_linearSampler; + compositeInfo.imageView = m_gbufComposite.view; + compositeInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet texWrite; + texWrite.dstSet = materialSet; + texWrite.dstBinding = 1; + texWrite.dstArrayElement = 0; + texWrite.descriptorCount = 1; + texWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + texWrite.pImageInfo = &compositeInfo; + + // Fill remaining texture array elements with fallback + std::array fallbackImages; + for (auto& fi : fallbackImages) { + fi.sampler = defaultSampler; + fi.imageView = fallbackView; + fi.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + vk::WriteDescriptorSet fallbackTexWrite; + fallbackTexWrite.dstSet = materialSet; + fallbackTexWrite.dstBinding = 1; + fallbackTexWrite.dstArrayElement = 1; + fallbackTexWrite.descriptorCount = static_cast(fallbackImages.size()); + fallbackTexWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + fallbackTexWrite.pImageInfo = fallbackImages.data(); + + // Binding 2: DecalGlobals UBO (fallback) + vk::WriteDescriptorSet decalWrite; + decalWrite.dstSet = materialSet; + decalWrite.dstBinding = 2; + decalWrite.dstArrayElement = 0; + decalWrite.descriptorCount = 1; + decalWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + decalWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 3: Transform SSBO (fallback) + vk::WriteDescriptorSet ssboWrite; + ssboWrite.dstSet = materialSet; + ssboWrite.dstBinding = 3; + ssboWrite.dstArrayElement = 0; + ssboWrite.descriptorCount = 1; + ssboWrite.descriptorType = vk::DescriptorType::eStorageBuffer; + ssboWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 4: Depth copy (actual depth, not fallback) + vk::DescriptorImageInfo depthInfo; + depthInfo.sampler = m_linearSampler; + depthInfo.imageView = m_sceneDepthCopy.view; + depthInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet depthWrite; + depthWrite.dstSet = materialSet; + depthWrite.dstBinding = 4; + depthWrite.dstArrayElement = 0; + depthWrite.descriptorCount = 1; + depthWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + depthWrite.pImageInfo = &depthInfo; + + // Bindings 5, 6: Fallback texture + vk::DescriptorImageInfo sceneColorFallback; + sceneColorFallback.sampler = defaultSampler; + sceneColorFallback.imageView = fallbackView; + sceneColorFallback.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet bind5Write; + bind5Write.dstSet = materialSet; + bind5Write.dstBinding = 5; + bind5Write.dstArrayElement = 0; + bind5Write.descriptorCount = 1; + bind5Write.descriptorType = vk::DescriptorType::eCombinedImageSampler; + bind5Write.pImageInfo = &sceneColorFallback; + + vk::WriteDescriptorSet bind6Write; + bind6Write.dstSet = materialSet; + bind6Write.dstBinding = 6; + bind6Write.dstArrayElement = 0; + bind6Write.descriptorCount = 1; + bind6Write.descriptorType = vk::DescriptorType::eCombinedImageSampler; + bind6Write.pImageInfo = &sceneColorFallback; + + std::array writes = { + texWrite, modelWrite, decalWrite, fallbackTexWrite, + ssboWrite, depthWrite, bind5Write, bind6Write + }; + m_device.updateDescriptorSets(writes, {}); + } + + // Allocate PerDraw descriptor set (Set 2) with fog UBO + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + + { + Assertion(m_bloomUBOCursor < BLOOM_UBO_MAX_SLOTS, "Fog UBO slot overflow!"); + uint32_t slotOffset = m_bloomUBOCursor * static_cast(BLOOM_UBO_SLOT_SIZE); + memcpy(static_cast(m_bloomUBOMapped) + slotOffset, &fogData, sizeof(fogData)); + m_bloomUBOCursor++; + + vk::DescriptorBufferInfo uboInfo; + uboInfo.buffer = m_bloomUBO; + uboInfo.offset = slotOffset; + uboInfo.range = BLOOM_UBO_SLOT_SIZE; + + vk::WriteDescriptorSet write; + write.dstSet = perDrawSet; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = vk::DescriptorType::eUniformBuffer; + write.pBufferInfo = &uboInfo; + + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackInfo; + fallbackInfo.buffer = fallbackBuf; + fallbackInfo.offset = 0; + fallbackInfo.range = 4096; + + SCP_vector writes; + writes.push_back(write); + for (uint32_t b = 1; b <= 4; ++b) { + vk::WriteDescriptorSet fw; + fw.dstSet = perDrawSet; + fw.dstBinding = b; + fw.dstArrayElement = 0; + fw.descriptorCount = 1; + fw.descriptorType = vk::DescriptorType::eUniformBuffer; + fw.pBufferInfo = &fallbackInfo; + writes.push_back(fw); + } + + m_device.updateDescriptorSets(writes, {}); + } + + // Bind descriptor sets and draw + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + + // Scene color is now in eColorAttachmentOptimal (fog render pass finalLayout) + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +void VulkanPostProcessor::renderVolumetricFog(vk::CommandBuffer cmd) +{ + GR_DEBUG_SCOPE("Volumetric Nebulae"); + TRACE_SCOPE(tracing::Volumetrics); + + if (!m_fogInitialized) { + if (!initFogPass()) { + return; + } + } + + auto* pipelineMgr = getPipelineManager(); + auto* descriptorMgr = getDescriptorManager(); + auto* bufferMgr = getBufferManager(); + auto* texMgr = getTextureManager(); + + if (!pipelineMgr || !descriptorMgr || !bufferMgr || !texMgr) { + return; + } + + const volumetric_nebula& neb = *The_mission.volumetrics; + Assertion(neb.isVolumeBitmapValid(), "Volumetric nebula was not properly initialized!"); + + // Get 3D texture handles + int volHandle = neb.getVolumeBitmapHandle(); + auto* volSlot = texMgr->getTextureSlot(volHandle); + if (!volSlot || !volSlot->imageView) { + mprintf(("VulkanPostProcessor::renderVolumetricFog: Volume texture not available\n")); + return; + } + + bool noiseActive = neb.getNoiseActive(); + tcache_slot_vulkan* noiseSlot = nullptr; + if (noiseActive) { + int noiseHandle = neb.getNoiseVolumeBitmapHandle(); + noiseSlot = texMgr->getTextureSlot(noiseHandle); + } + + // Prepare mipmapped emissive copy for LOD sampling + if (!m_emissiveMipmappedInitialized) { + m_emissiveMipLevels = 1; + uint32_t dim = std::max(m_extent.width, m_extent.height); + while (dim > 1) { + dim >>= 1; + m_emissiveMipLevels++; + } + + vk::ImageCreateInfo imgInfo; + imgInfo.imageType = vk::ImageType::e2D; + imgInfo.format = vk::Format::eR16G16B16A16Sfloat; + imgInfo.extent = vk::Extent3D(m_extent.width, m_extent.height, 1); + imgInfo.mipLevels = m_emissiveMipLevels; + imgInfo.arrayLayers = 1; + imgInfo.samples = vk::SampleCountFlagBits::e1; + imgInfo.tiling = vk::ImageTiling::eOptimal; + imgInfo.usage = vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst + | vk::ImageUsageFlagBits::eSampled; + imgInfo.sharingMode = vk::SharingMode::eExclusive; + imgInfo.initialLayout = vk::ImageLayout::eUndefined; + + try { + m_emissiveMipmapped.image = m_device.createImage(imgInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmapped emissive: %s\n", e.what())); + return; + } + + Verify(m_memoryManager->allocateImageMemory(m_emissiveMipmapped.image, MemoryUsage::GpuOnly, m_emissiveMipmapped.allocation)); + + // Create full-mip-chain view for LOD sampling + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_emissiveMipmapped.image; + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = vk::Format::eR16G16B16A16Sfloat; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = m_emissiveMipLevels; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + try { + m_emissiveMipmappedFullView = m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanPostProcessor: Failed to create mipmapped emissive view: %s\n", e.what())); + return; + } + + m_emissiveMipmapped.format = vk::Format::eR16G16B16A16Sfloat; + m_emissiveMipmapped.width = m_extent.width; + m_emissiveMipmapped.height = m_extent.height; + m_emissiveMipmappedInitialized = true; + } + + // Copy G-buffer emissive (mip 0) to mipmapped emissive, then generate mips. + // dstMipLevels transitions ALL mip levels to eTransferDstOptimal in the pre-barrier. + // Skip dst post-barrier (stays in eTransferDstOptimal for generateMipmaps). + copyImageToImage(cmd, + m_gbufEmissive.image, vk::ImageLayout::eShaderReadOnlyOptimal, vk::ImageLayout::eShaderReadOnlyOptimal, + m_emissiveMipmapped.image, vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferDstOptimal, + m_extent, + vk::ImageAspectFlagBits::eColor, + m_emissiveMipLevels); + + // Generate mipmaps via blit chain (expects dst in eTransferDstOptimal). + // After return, all mips are in eShaderReadOnlyOptimal. + generateMipmaps(cmd, m_emissiveMipmapped.image, m_extent.width, m_extent.height, m_emissiveMipLevels); + + // Copy scene depth (if not already done by renderSceneFog) + // copySceneDepth is safe to call multiple times — but it re-transitions the depth buffer. + // The fog pass already called it if scene fog ran. For standalone volumetric, we need it. + copySceneDepth(cmd); + + // Transition scene color → eColorAttachmentOptimal for the fog render pass. + // oldLayout=eUndefined is safe: render pass has loadOp=eDontCare (overwrites every pixel). + // Scene color may be in eShaderReadOnlyOptimal (volumetric-only) or + // eColorAttachmentOptimal (after scene fog + copySceneColorToComposite). + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_sceneColor.image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, nullptr, nullptr, barrier); + } + + // Map bloom UBO for volumetric fog UBO data + m_bloomUBOMapped = m_memoryManager->mapMemory(m_bloomUBOAlloc); + Verify(m_bloomUBOMapped); + + // Fill volumetric fog UBO + graphics::generic_data::volumetric_fog_data volData; + { + gr_set_proj_matrix(Proj_fov, gr_screen.clip_aspect, Min_draw_distance, Max_draw_distance); + gr_set_view_matrix(&Eye_position, &Eye_matrix); + vm_inverse_matrix4(&volData.p_inv, &gr_projection_matrix); + vm_inverse_matrix4(&volData.v_inv, &gr_view_matrix); + gr_end_view_matrix(); + gr_end_proj_matrix(); + + volData.zNear = Min_draw_distance; + volData.zFar = Max_draw_distance; + volData.cameraPos = Eye_position; + + // Find first directional light for global light direction/color + vec3d global_light_dir = ZERO_VECTOR; + vec3d global_light_diffuse = ZERO_VECTOR; + for (const auto& l : Lights) { + if (l.type == Light_Type::Directional) { + global_light_dir = l.vec; + global_light_diffuse.xyz.x = l.r * l.intensity; + global_light_diffuse.xyz.y = l.g * l.intensity; + global_light_diffuse.xyz.z = l.b * l.intensity; + break; + } + } + + volData.globalLightDirection = global_light_dir; + volData.globalLightDiffuse = global_light_diffuse; + volData.nebPos = neb.getPos(); + volData.nebSize = neb.getSize(); + volData.stepsize = neb.getStepsize(); + volData.opacitydistance = neb.getOpacityDistance(); + volData.alphalimit = neb.getAlphaLim(); + auto nebColor = neb.getNebulaColor(); + volData.nebColor[0] = std::get<0>(nebColor); + volData.nebColor[1] = std::get<1>(nebColor); + volData.nebColor[2] = std::get<2>(nebColor); + volData.udfScale = neb.getUDFScale(); + volData.emissiveSpreadFactor = neb.getEmissiveSpread(); + volData.emissiveIntensity = neb.getEmissiveIntensity(); + volData.emissiveFalloff = neb.getEmissiveFalloff(); + volData.henyeyGreensteinCoeff = neb.getHenyeyGreensteinCoeff(); + volData.directionalLightSampleSteps = neb.getGlobalLightSteps(); + volData.directionalLightStepSize = neb.getGlobalLightStepsize(); + auto noiseColor = neb.getNoiseColor(); + volData.noiseColor[0] = std::get<0>(noiseColor); + volData.noiseColor[1] = std::get<1>(noiseColor); + volData.noiseColor[2] = std::get<2>(noiseColor); + auto noiseScale = neb.getNoiseColorScale(); + volData.noiseColorScale1 = std::get<0>(noiseScale); + volData.noiseColorScale2 = std::get<1>(noiseScale); + volData.noiseColorIntensity = neb.getNoiseColorIntensity(); + volData.aspect = gr_screen.clip_aspect; + volData.fov = g3_get_hfov(Proj_fov); + volData.doEdgeSmoothing = neb.getEdgeSmoothing() ? 1 : 0; + volData.useNoise = noiseActive ? 1 : 0; + } + + // We need to use a custom descriptor write because the volumetric shader uses sampler3D + // at bindings 5 and 6, which differs from the default drawFullscreenTriangle fallbacks (sampler2D). + // So we replicate the drawFullscreenTriangle pattern but customize the material set. + + PipelineConfig config; + config.shaderType = SDR_TYPE_VOLUMETRIC_FOG; + config.vertexLayoutHash = 0; + config.primitiveType = PRIM_TYPE_TRIS; + config.depthMode = ZBUFFER_TYPE_NONE; + config.blendMode = ALPHA_BLEND_NONE; + config.cullEnabled = false; + config.depthWriteEnabled = false; + config.renderPass = m_fogRenderPass; + + vertex_layout emptyLayout; + vk::Pipeline pipeline = pipelineMgr->getPipeline(config, emptyLayout); + if (!pipeline) { + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; + return; + } + + vk::PipelineLayout pipelineLayout = pipelineMgr->getPipelineLayout(); + + // Begin render pass + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_fogRenderPass; + rpBegin.framebuffer = m_fogFramebuffer; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_extent; + + cmd.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + cmd.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + + vk::Viewport viewport; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = static_cast(m_extent.width); + viewport.height = static_cast(m_extent.height); + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + cmd.setViewport(0, viewport); + + vk::Rect2D scissor; + scissor.offset = vk::Offset2D(0, 0); + scissor.extent = m_extent; + cmd.setScissor(0, scissor); + + // Allocate Material descriptor set (Set 1) + vk::DescriptorSet materialSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::Material); + Verify(materialSet); + + { + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackBufInfo; + fallbackBufInfo.buffer = fallbackBuf; + fallbackBufInfo.offset = 0; + fallbackBufInfo.range = 4096; + + vk::Sampler defaultSampler = texMgr->getDefaultSampler(); + vk::ImageView fallbackView = texMgr->getFallbackTextureView2D(); + vk::ImageView fallback3DView = texMgr->getFallback3DView(); + + // Binding 0: ModelData UBO (fallback) + vk::WriteDescriptorSet modelWrite; + modelWrite.dstSet = materialSet; + modelWrite.dstBinding = 0; + modelWrite.dstArrayElement = 0; + modelWrite.descriptorCount = 1; + modelWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + modelWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 1: Texture array — [0]=composite, [1]=emissive, rest=fallback + std::array texArrayInfos; + texArrayInfos[0].sampler = m_linearSampler; + texArrayInfos[0].imageView = m_gbufComposite.view; + texArrayInfos[0].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + texArrayInfos[1].sampler = m_mipmapSampler; + texArrayInfos[1].imageView = m_emissiveMipmappedFullView; + texArrayInfos[1].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + for (size_t i = 2; i < texArrayInfos.size(); i++) { + texArrayInfos[i].sampler = defaultSampler; + texArrayInfos[i].imageView = fallbackView; + texArrayInfos[i].imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + } + + vk::WriteDescriptorSet texWrite; + texWrite.dstSet = materialSet; + texWrite.dstBinding = 1; + texWrite.dstArrayElement = 0; + texWrite.descriptorCount = static_cast(texArrayInfos.size()); + texWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + texWrite.pImageInfo = texArrayInfos.data(); + + // Binding 2: DecalGlobals UBO (fallback) + vk::WriteDescriptorSet decalWrite; + decalWrite.dstSet = materialSet; + decalWrite.dstBinding = 2; + decalWrite.dstArrayElement = 0; + decalWrite.descriptorCount = 1; + decalWrite.descriptorType = vk::DescriptorType::eUniformBuffer; + decalWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 3: Transform SSBO (fallback) + vk::WriteDescriptorSet ssboWrite; + ssboWrite.dstSet = materialSet; + ssboWrite.dstBinding = 3; + ssboWrite.dstArrayElement = 0; + ssboWrite.descriptorCount = 1; + ssboWrite.descriptorType = vk::DescriptorType::eStorageBuffer; + ssboWrite.pBufferInfo = &fallbackBufInfo; + + // Binding 4: Depth copy + vk::DescriptorImageInfo depthInfo; + depthInfo.sampler = m_linearSampler; + depthInfo.imageView = m_sceneDepthCopy.view; + depthInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet depthWrite; + depthWrite.dstSet = materialSet; + depthWrite.dstBinding = 4; + depthWrite.dstArrayElement = 0; + depthWrite.descriptorCount = 1; + depthWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + depthWrite.pImageInfo = &depthInfo; + + // Binding 5: 3D volume texture + vk::DescriptorImageInfo volumeInfo; + volumeInfo.sampler = m_linearSampler; + volumeInfo.imageView = volSlot->imageView; + volumeInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet volumeWrite; + volumeWrite.dstSet = materialSet; + volumeWrite.dstBinding = 5; + volumeWrite.dstArrayElement = 0; + volumeWrite.descriptorCount = 1; + volumeWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + volumeWrite.pImageInfo = &volumeInfo; + + // Binding 6: 3D noise texture (or fallback 3D if noise inactive) + vk::DescriptorImageInfo noiseInfo; + noiseInfo.sampler = m_linearSampler; + if (noiseSlot && noiseSlot->imageView) { + noiseInfo.imageView = noiseSlot->imageView; + } else { + noiseInfo.imageView = fallback3DView; + } + noiseInfo.imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::WriteDescriptorSet noiseWrite; + noiseWrite.dstSet = materialSet; + noiseWrite.dstBinding = 6; + noiseWrite.dstArrayElement = 0; + noiseWrite.descriptorCount = 1; + noiseWrite.descriptorType = vk::DescriptorType::eCombinedImageSampler; + noiseWrite.pImageInfo = &noiseInfo; + + std::array writes = { + modelWrite, texWrite, decalWrite, ssboWrite, + depthWrite, volumeWrite, noiseWrite + }; + m_device.updateDescriptorSets(writes, {}); + } + + // Allocate PerDraw descriptor set (Set 2) with volumetric fog UBO + vk::DescriptorSet perDrawSet = descriptorMgr->allocateFrameSet(DescriptorSetIndex::PerDraw); + Verify(perDrawSet); + + { + Assertion(m_bloomUBOCursor < BLOOM_UBO_MAX_SLOTS, "Fog UBO slot overflow!"); + uint32_t slotOffset = m_bloomUBOCursor * static_cast(BLOOM_UBO_SLOT_SIZE); + memcpy(static_cast(m_bloomUBOMapped) + slotOffset, &volData, sizeof(volData)); + m_bloomUBOCursor++; + + vk::DescriptorBufferInfo uboInfo; + uboInfo.buffer = m_bloomUBO; + uboInfo.offset = slotOffset; + uboInfo.range = BLOOM_UBO_SLOT_SIZE; + + vk::WriteDescriptorSet write; + write.dstSet = perDrawSet; + write.dstBinding = 0; + write.dstArrayElement = 0; + write.descriptorCount = 1; + write.descriptorType = vk::DescriptorType::eUniformBuffer; + write.pBufferInfo = &uboInfo; + + auto fallbackBuf = bufferMgr->getFallbackUniformBuffer(); + vk::DescriptorBufferInfo fallbackInfo; + fallbackInfo.buffer = fallbackBuf; + fallbackInfo.offset = 0; + fallbackInfo.range = 4096; + + SCP_vector writes; + writes.push_back(write); + for (uint32_t b = 1; b <= 4; ++b) { + vk::WriteDescriptorSet fw; + fw.dstSet = perDrawSet; + fw.dstBinding = b; + fw.dstArrayElement = 0; + fw.descriptorCount = 1; + fw.descriptorType = vk::DescriptorType::eUniformBuffer; + fw.pBufferInfo = &fallbackInfo; + writes.push_back(fw); + } + + m_device.updateDescriptorSets(writes, {}); + } + + // Bind descriptor sets and draw + cmd.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipelineLayout, + static_cast(DescriptorSetIndex::Material), + {materialSet, perDrawSet}, {}); + + cmd.draw(3, 1, 0, 0); + cmd.endRenderPass(); + + // Scene color is now in eColorAttachmentOptimal (fog render pass finalLayout) + + m_memoryManager->unmapMemory(m_bloomUBOAlloc); + m_bloomUBOMapped = nullptr; +} + +void copyImageToImage( + vk::CommandBuffer cmd, + vk::Image src, vk::ImageLayout srcOldLayout, vk::ImageLayout srcNewLayout, + vk::Image dst, vk::ImageLayout dstOldLayout, vk::ImageLayout dstNewLayout, + vk::Extent2D extent, + vk::ImageAspectFlags aspect, + uint32_t dstMipLevels) +{ + // Derive access mask and pipeline stage from a layout. + // 'leaving' = true for srcAccessMask (flushing writes before transition), + // false for dstAccessMask (making data available after transition). + auto layoutInfo = [](vk::ImageLayout layout, bool leaving) + -> std::pair { + switch (layout) { + case vk::ImageLayout::eUndefined: + return {{}, vk::PipelineStageFlagBits::eTopOfPipe}; + case vk::ImageLayout::eShaderReadOnlyOptimal: + return {leaving ? vk::AccessFlags{} : vk::AccessFlagBits::eShaderRead, + vk::PipelineStageFlagBits::eFragmentShader}; + case vk::ImageLayout::eColorAttachmentOptimal: + return {leaving ? vk::AccessFlagBits::eColorAttachmentWrite + : (vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite), + vk::PipelineStageFlagBits::eColorAttachmentOutput}; + case vk::ImageLayout::eDepthStencilAttachmentOptimal: + return {leaving ? vk::AccessFlagBits::eDepthStencilAttachmentWrite + : (vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite), + leaving ? vk::PipelineStageFlagBits::eLateFragmentTests + : vk::PipelineStageFlagBits::eEarlyFragmentTests}; + case vk::ImageLayout::eTransferSrcOptimal: + return {vk::AccessFlagBits::eTransferRead, vk::PipelineStageFlagBits::eTransfer}; + case vk::ImageLayout::eTransferDstOptimal: + return {vk::AccessFlagBits::eTransferWrite, vk::PipelineStageFlagBits::eTransfer}; + default: + Assertion(false, "copyImageToImage: unsupported layout %d", static_cast(layout)); + return {{}, vk::PipelineStageFlagBits::eAllCommands}; + } + }; + + // 1. Pre-barriers: transition src → eTransferSrcOptimal, dst → eTransferDstOptimal + { + auto [srcAccess, srcStage] = layoutInfo(srcOldLayout, true); + auto [dstAccess, dstStage] = layoutInfo(dstOldLayout, true); + + std::array barriers; + + barriers[0].srcAccessMask = srcAccess; + barriers[0].dstAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[0].oldLayout = srcOldLayout; + barriers[0].newLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[0].image = src; + barriers[0].subresourceRange = {aspect, 0, 1, 0, 1}; + + barriers[1].srcAccessMask = dstAccess; + barriers[1].dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[1].oldLayout = dstOldLayout; + barriers[1].newLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[1].image = dst; + barriers[1].subresourceRange = {aspect, 0, dstMipLevels, 0, 1}; + + cmd.pipelineBarrier( + srcStage | dstStage, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barriers); + } + + // 2. Copy (always mip 0, layer 0) + { + vk::ImageCopy region; + region.srcSubresource = {aspect, 0, 0, 1}; + region.dstSubresource = {aspect, 0, 0, 1}; + region.extent = vk::Extent3D(extent.width, extent.height, 1); + + cmd.copyImage( + src, vk::ImageLayout::eTransferSrcOptimal, + dst, vk::ImageLayout::eTransferDstOptimal, + region); + } + + // 3. Post-barriers: transition src → srcNewLayout, dst → dstNewLayout + // Skip rule: if newLayout matches the transfer layout, skip that barrier + { + bool skipSrc = (srcNewLayout == vk::ImageLayout::eTransferSrcOptimal); + bool skipDst = (dstNewLayout == vk::ImageLayout::eTransferDstOptimal); + + if (skipSrc && skipDst) { + return; + } + + std::array barriers; + uint32_t count = 0; + vk::PipelineStageFlags postDstStage = {}; + + if (!skipSrc) { + auto [access, stage] = layoutInfo(srcNewLayout, false); + barriers[count].srcAccessMask = vk::AccessFlagBits::eTransferRead; + barriers[count].dstAccessMask = access; + barriers[count].oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barriers[count].newLayout = srcNewLayout; + barriers[count].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].image = src; + barriers[count].subresourceRange = {aspect, 0, 1, 0, 1}; + count++; + postDstStage |= stage; + } + + if (!skipDst) { + auto [access, stage] = layoutInfo(dstNewLayout, false); + barriers[count].srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barriers[count].dstAccessMask = access; + barriers[count].oldLayout = vk::ImageLayout::eTransferDstOptimal; + barriers[count].newLayout = dstNewLayout; + barriers[count].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barriers[count].image = dst; + barriers[count].subresourceRange = {aspect, 0, dstMipLevels, 0, 1}; + count++; + postDstStage |= stage; + } + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + postDstStage, + {}, nullptr, nullptr, + vk::ArrayProxy(count, barriers.data())); + } +} + +// No-op: In OpenGL, begin/end push/pop an FBO and run the post-processing +// pipeline. In Vulkan, this is handled by vulkan_scene_texture_begin/end +// which manage the HDR render pass and post-processing passes. These +// functions are not actively called by the engine. +void vulkan_post_process_begin() {} +void vulkan_post_process_end() {} + +// No-op: In OpenGL, save/restore swap the depth attachment between +// Scene_depth_texture and Cockpit_depth_texture to isolate cockpit +// depth from the main scene. In Vulkan, the render pass loadOp=eClear +// clears depth at the start of each scene pass, and separate cockpit +// depth isolation is not yet implemented. Called from ship.cpp during +// cockpit rendering but degrades gracefully as a no-op (cockpit just +// shares the scene depth buffer). +void vulkan_post_process_save_zbuffer() {} +void vulkan_post_process_restore_zbuffer() {} + +void vulkan_post_process_set_effect(const char* name, int value, const vec3d* rgb) +{ + if (!Gr_post_processing_enabled || !graphics::Post_processing_manager) { + return; + } + if (name == nullptr) { + return; + } + + auto& ls_params = graphics::Post_processing_manager->getLightshaftParams(); + if (!stricmp("lightshafts", name)) { + ls_params.intensity = value / 100.0f; + ls_params.on = !!value; + return; + } + + auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + for (size_t idx = 0; idx < postEffects.size(); idx++) { + if (!stricmp(postEffects[idx].name.c_str(), name)) { + postEffects[idx].intensity = (value / postEffects[idx].div) + postEffects[idx].add; + if ((rgb != nullptr) && !(vmd_zero_vector == *rgb)) { + postEffects[idx].rgb = *rgb; + } + break; + } + } +} + +void vulkan_post_process_set_defaults() +{ + if (!graphics::Post_processing_manager) { + return; + } + + auto& postEffects = graphics::Post_processing_manager->getPostEffects(); + for (auto& effect : postEffects) { + effect.intensity = effect.default_intensity; + } +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanPostProcessing.h b/code/graphics/vulkan/VulkanPostProcessing.h new file mode 100644 index 00000000000..65535c1df4d --- /dev/null +++ b/code/graphics/vulkan/VulkanPostProcessing.h @@ -0,0 +1,722 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanMemory.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Manages Vulkan post-processing pipeline + * + * Owns offscreen render targets (HDR scene color + depth), render passes, + * and executes post-processing passes (tonemapping, bloom, FXAA, etc.) + * between the 3D scene rendering and the final swap chain presentation. + */ +class VulkanPostProcessor { +public: + VulkanPostProcessor() = default; + ~VulkanPostProcessor() = default; + + // Non-copyable + VulkanPostProcessor(const VulkanPostProcessor&) = delete; + VulkanPostProcessor& operator=(const VulkanPostProcessor&) = delete; + + /** + * @brief Initialize post-processing resources + * @param device Vulkan logical device + * @param physDevice Physical device (for format checks) + * @param memMgr Memory manager for allocations + * @param extent Scene rendering resolution + * @param depthFormat Depth format (matches main depth buffer) + * @return true on success + */ + bool init(vk::Device device, vk::PhysicalDevice physDevice, + VulkanMemoryManager* memMgr, vk::Extent2D extent, + vk::Format depthFormat); + + /** + * @brief Shutdown and free all post-processing resources + */ + void shutdown(); + + /** + * @brief Get the HDR scene render pass (for 3D scene rendering) + * + * This render pass has RGBA16F color + depth attachments with loadOp=eClear. + * Used between scene_texture_begin() and scene_texture_end(). + */ + vk::RenderPass getSceneRenderPass() const { return m_sceneRenderPass; } + + /** + * @brief Get the HDR scene render pass with loadOp=eLoad + * + * Compatible with getSceneRenderPass() (same formats/samples) so uses + * the same framebuffer. Used to resume scene rendering after + * copy_effect_texture interrupts the pass. + */ + vk::RenderPass getSceneRenderPassLoad() const { return m_sceneRenderPassLoad; } + + /** + * @brief Get the HDR scene framebuffer + */ + vk::Framebuffer getSceneFramebuffer() const { return m_sceneFramebuffer; } + + /** + * @brief Get the scene rendering extent + */ + vk::Extent2D getSceneExtent() const { return m_extent; } + + /** + * @brief Execute post-processing passes and draw result to swap chain + * + * Called after the HDR scene render pass ends and before the resumed + * swap chain render pass begins. Runs tonemapping (and later bloom, + * FXAA, etc.) then draws a fullscreen triangle to blit the result + * into the swap chain. + * + * The caller is responsible for: + * 1. Ending the HDR scene render pass before calling this + * 2. Beginning the resumed swap chain render pass before calling this + * (the blit draws INTO the resumed pass) + * + * @param cmd Active command buffer + */ + void blitToSwapChain(vk::CommandBuffer cmd); + + /** + * @brief Execute bloom post-processing passes + * + * Called after the HDR scene render pass ends and before the resumed + * swap chain render pass begins. Manages its own render passes internally. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeBloom(vk::CommandBuffer cmd); + + /** + * @brief Execute tonemapping pass (HDR scene → LDR) + * + * Called after bloom and before FXAA. Renders to Scene_ldr (RGBA8). + * Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeTonemap(vk::CommandBuffer cmd); + + /** + * @brief Execute FXAA anti-aliasing passes + * + * Called after tonemapping. Runs prepass (LDR→luminance) then + * FXAA main pass (luminance→LDR). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeFXAA(vk::CommandBuffer cmd); + + /** + * @brief Execute post-processing effects (saturation, brightness, etc.) + * + * Called after FXAA and before the final blit. Reads Scene_ldr, writes + * Scene_luminance (reused as temp target). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + * @return true if effects were applied (blit should read Scene_luminance) + */ + bool executePostEffects(vk::CommandBuffer cmd); + + /** + * @brief Execute lightshafts (god rays) pass + * + * Called after FXAA and before post-effects. Additively blends god rays + * onto Scene_ldr based on sun position and depth buffer sampling. + * Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void executeLightshafts(vk::CommandBuffer cmd); + + /** + * @brief Update distortion ping-pong textures + * + * Called every frame from endSceneRendering(). Internally tracks a ~30ms + * timer. When triggered, scrolls old distortion data right by 1 pixel and + * injects random noise at the left edge (matching OpenGL's + * gr_opengl_update_distortion()). Must be called outside a render pass. + * + * @param cmd Active command buffer (must be outside a render pass) + * @param frametime Time since last frame in seconds + */ + void updateDistortion(vk::CommandBuffer cmd, float frametime); + + /** + * @brief Get the current distortion texture view for thruster sampling + * + * Returns the most recently written distortion texture (the one thrusters + * should read from). Returns nullptr if distortion textures aren't initialized. + */ + vk::ImageView getDistortionTextureView() const; + + /** + * @brief Get the distortion texture sampler (LINEAR, REPEAT) + */ + vk::Sampler getDistortionSampler() const { return m_distortionSampler; } + + /** + * @brief Copy scene color to effect texture for distortion/soft particle sampling + * + * Must be called outside a render pass. Transitions scene color through + * eTransferSrcOptimal and back to eColorAttachmentOptimal (ready for resumed + * scene render pass). Transitions effect texture to eShaderReadOnlyOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void copyEffectTexture(vk::CommandBuffer cmd); + + /** + * @brief Copy G-buffer normal to samplable copy for decal angle rejection + * + * Must be called outside a render pass. Transitions G-buffer normal through + * eTransferSrcOptimal and back to eShaderReadOnlyOptimal. Transitions + * normal copy to eShaderReadOnlyOptimal for fragment shader sampling. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void copyGbufNormal(vk::CommandBuffer cmd); + + /** + * @brief Copy scene depth to samplable depth copy for soft particle rendering + * + * Must be called outside a render pass. Transitions scene depth through + * eTransferSrcOptimal and back to eDepthStencilAttachmentOptimal. Transitions + * depth copy to eShaderReadOnlyOptimal for fragment shader sampling. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void copySceneDepth(vk::CommandBuffer cmd); + + /** + * @brief Check if LDR targets are available (tonemapping + FXAA ready) + */ + bool hasLDRTargets() const { return m_ldrInitialized; } + + /** + * @brief Get the scene color image (for layout transitions outside post-processor) + */ + vk::Image getSceneColorImage() const { return m_sceneColor.image; } + + /** + * @brief Get the scene color image view (for post-processing texture binding) + */ + vk::ImageView getSceneColorView() const { return m_sceneColor.view; } + + /** + * @brief Get the scene color sampler + */ + vk::Sampler getSceneColorSampler() const { return m_linearSampler; } + + /** + * @brief Get the effect/composite texture view (snapshot of scene color) + * + * Available for sampling after copyEffectTexture() has been called. + * Used by distortion and soft particle shaders. + */ + vk::ImageView getSceneEffectView() const { return m_sceneEffect.view; } + + /** + * @brief Get the scene depth copy view (for soft particle depth sampling) + * + * Available for sampling after copySceneDepth() has been called. + */ + vk::ImageView getSceneDepthCopyView() const { return m_sceneDepthCopy.view; } + + /** + * @brief Get the effect texture sampler (linear, clamp-to-edge) + */ + vk::Sampler getSceneEffectSampler() const { return m_linearSampler; } + + /** + * @brief Check if post-processing is initialized + */ + bool isInitialized() const { return m_initialized; } + + // ========== G-Buffer (deferred lighting) ========== + + /** + * @brief Get the G-buffer render pass (6 color + depth, loadOp=eClear) + */ + vk::RenderPass getGbufRenderPass() const { return m_gbufRenderPass; } + + /** + * @brief Get the G-buffer render pass with loadOp=eLoad (resume after mid-pass copy) + */ + vk::RenderPass getGbufRenderPassLoad() const { return m_gbufRenderPassLoad; } + + /** + * @brief Get the G-buffer framebuffer (6 color + depth) + */ + vk::Framebuffer getGbufFramebuffer() const { return m_gbufFramebuffer; } + + /** + * @brief Check if G-buffer resources are initialized + */ + bool isGbufInitialized() const { return m_gbufInitialized; } + + // G-buffer image views (for future light pass texture sampling) + vk::ImageView getGbufPositionView() const { return m_gbufPosition.view; } + vk::ImageView getGbufNormalView() const { return m_gbufNormal.view; } + vk::ImageView getGbufSpecularView() const { return m_gbufSpecular.view; } + vk::ImageView getGbufEmissiveView() const { return m_gbufEmissive.view; } + vk::ImageView getGbufCompositeView() const { return m_gbufComposite.view; } + + // G-buffer images (for copy operations) + vk::Image getGbufEmissiveImage() const { return m_gbufEmissive.image; } + vk::Image getGbufCompositeImage() const { return m_gbufComposite.image; } + vk::Image getGbufNormalImage() const { return m_gbufNormal.image; } + + // G-buffer normal copy (for decal angle rejection sampling) + vk::ImageView getGbufNormalCopyView() const { return m_gbufNormalCopy.view; } + + /** + * @brief Transition G-buffer color attachments 1-5 for render pass resume + * + * After ending the G-buffer render pass, all color attachments are in + * eShaderReadOnlyOptimal. The eLoad pass expects eColorAttachmentOptimal. + * The caller handles attachment 0 (scene color); this transitions the rest. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void transitionGbufForResume(vk::CommandBuffer cmd); + + // ========== Deferred Light Accumulation ========== + + /** + * @brief Render deferred lights into the composite buffer + * + * Reads G-buffer textures, renders light volumes (fullscreen, sphere, cylinder) + * with additive blending into the composite attachment. Manages its own render + * pass internally. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderDeferredLights(vk::CommandBuffer cmd); + + /** + * @brief Get the light accumulation render pass + */ + vk::RenderPass getLightAccumRenderPass() const { return m_lightAccumRenderPass; } + + /** + * @brief Get the light accumulation framebuffer + */ + vk::Framebuffer getLightAccumFramebuffer() const { return m_lightAccumFramebuffer; } + + // ========== Shadow Map ========== + + /** + * @brief Initialize shadow map resources (lazy, called on first use) + * @return true on success + */ + bool initShadowPass(); + + /** + * @brief Shutdown shadow map resources + */ + void shutdownShadowPass(); + + /** + * @brief Check if shadow map resources are initialized + */ + bool isShadowInitialized() const { return m_shadowInitialized; } + + /** + * @brief Get shadow map texture size (square) + */ + int getShadowTextureSize() const { return m_shadowTextureSize; } + + /** + * @brief Get shadow color image view (2D array, 4 layers) for descriptor binding + */ + vk::ImageView getShadowColorView() const { return m_shadowColor.view; } + + /** + * @brief Get shadow color image (for layout transitions) + */ + vk::Image getShadowColorImage() const { return m_shadowColor.image; } + + /** + * @brief Get shadow depth image (for layout transitions) + */ + vk::Image getShadowDepthImage() const { return m_shadowDepth.image; } + + /** + * @brief Get shadow render pass + */ + vk::RenderPass getShadowRenderPass() const { return m_shadowRenderPass; } + + /** + * @brief Get shadow framebuffer + */ + vk::Framebuffer getShadowFramebuffer() const { return m_shadowFramebuffer; } + + /** + * @brief Get shadow map sampler (linear, clamp-to-edge) + */ + vk::Sampler getShadowSampler() const { return m_linearSampler; } + + // ========== MSAA (deferred lighting) ========== + + /** + * @brief Check if MSAA G-buffer resources are initialized + */ + bool isMsaaInitialized() const { return m_msaaInitialized; } + + /** + * @brief Get the MSAA G-buffer render pass (eClear variant) + */ + vk::RenderPass getMsaaGbufRenderPass() const { return m_msaaGbufRenderPass; } + + /** + * @brief Get the MSAA G-buffer render pass (eLoad variant, emissive preserving) + */ + vk::RenderPass getMsaaGbufRenderPassLoad() const { return m_msaaGbufRenderPassLoad; } + + /** + * @brief Get the MSAA G-buffer framebuffer + */ + vk::Framebuffer getMsaaGbufFramebuffer() const { return m_msaaGbufFramebuffer; } + + /** + * @brief Get the MSAA resolve render pass (writes to non-MSAA G-buffer) + */ + vk::RenderPass getMsaaResolveRenderPass() const { return m_msaaResolveRenderPass; } + + /** + * @brief Get the MSAA resolve framebuffer (non-MSAA G-buffer images) + */ + vk::Framebuffer getMsaaResolveFramebuffer() const { return m_msaaResolveFramebuffer; } + + /** + * @brief Get the emissive copy render pass (for upsampling to MSAA) + */ + vk::RenderPass getMsaaEmissiveCopyRenderPass() const { return m_msaaEmissiveCopyRenderPass; } + + /** + * @brief Get the emissive copy framebuffer (MSAA emissive target) + */ + vk::Framebuffer getMsaaEmissiveCopyFramebuffer() const { return m_msaaEmissiveCopyFramebuffer; } + + /** + * @brief Get MSAA image views for resolve shader binding + */ + vk::ImageView getMsaaColorView() const { return m_msaaColor.view; } + vk::ImageView getMsaaPositionView() const { return m_msaaPosition.view; } + vk::ImageView getMsaaNormalView() const { return m_msaaNormal.view; } + vk::ImageView getMsaaSpecularView() const { return m_msaaSpecular.view; } + vk::ImageView getMsaaEmissiveView() const { return m_msaaEmissive.view; } + vk::ImageView getMsaaDepthView() const { return m_msaaDepthView; } + vk::Image getMsaaColorImage() const { return m_msaaColor.image; } + vk::Image getMsaaPositionImage() const { return m_msaaPosition.image; } + vk::Image getMsaaNormalImage() const { return m_msaaNormal.image; } + vk::Image getMsaaSpecularImage() const { return m_msaaSpecular.image; } + vk::Image getMsaaEmissiveImage() const { return m_msaaEmissive.image; } + vk::Image getMsaaDepthImage() const { return m_msaaDepthImage; } + + /** + * @brief Get MSAA resolve UBO buffer and mapped pointer + * + * Per-frame slots (one per MAX_FRAMES_IN_FLIGHT) hold {samples, fov} data. + * Persistently mapped. Caller writes to the current frame's slot. + */ + vk::Buffer getMsaaResolveUBO() const { return m_msaaResolveUBO; } + void* getMsaaResolveUBOMapped() const { return m_msaaResolveUBOMapped; } + + /** + * @brief Transition MSAA images to expected layout before eClear render pass + * + * Uses oldLayout=eUndefined so it works regardless of current layout (first + * frame: UNDEFINED, subsequent: eShaderReadOnlyOptimal from resolve). + * Content is discarded — caller must use eClear loadOp. + */ + void transitionMsaaGbufForBegin(vk::CommandBuffer cmd); + + /** + * @brief Get MSAA color attachment count (5 — no composite in MSAA pass) + */ + static constexpr uint32_t MSAA_COLOR_ATTACHMENT_COUNT = 5; + + /** + * @brief Transition MSAA G-buffer color attachments for render pass resume + */ + void transitionMsaaGbufForResume(vk::CommandBuffer cmd); + + // ========== Fog / Volumetric Nebula ========== + + /** + * @brief Render scene fog into scene color + * + * Reads composite (lit result) + depth copy -> writes scene color. + * Must be called outside a render pass. After return, scene color + * is in eColorAttachmentOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderSceneFog(vk::CommandBuffer cmd); + + /** + * @brief Render volumetric nebula fog into scene color + * + * Reads composite + mipmapped emissive + depth copy + 3D volume textures + * -> writes scene color. Must be called outside a render pass. + * After return, scene color is in eColorAttachmentOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + */ + void renderVolumetricFog(vk::CommandBuffer cmd); + +private: + void updateTonemappingUBO(); + + bool createImage(uint32_t width, uint32_t height, vk::Format format, + vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect, + vk::Image& outImage, vk::ImageView& outView, + VulkanAllocation& outAllocation, + vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); + + // G-buffer methods (deferred lighting) + bool initGBuffer(); + void shutdownGBuffer(); + + // Light volume methods (deferred lighting) + bool initLightVolumes(); + void shutdownLightVolumes(); + bool initLightAccumPass(); + + // LDR target methods + bool initLDRTargets(); + void shutdownLDRTargets(); + + // Bloom pipeline methods + bool initBloom(); + void shutdownBloom(); + void generateMipmaps(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, uint32_t mipLevels); + void drawFullscreenTriangle(vk::CommandBuffer cmd, vk::RenderPass renderPass, + vk::Framebuffer framebuffer, vk::Extent2D extent, + int shaderType, + vk::ImageView textureView, vk::Sampler sampler, + const void* uboData, size_t uboSize, + int blendMode); + + struct RenderTarget { + vk::Image image; + vk::ImageView view; + VulkanAllocation allocation; + vk::Format format = vk::Format::eUndefined; + uint32_t width = 0; + uint32_t height = 0; + }; + + RenderTarget m_sceneColor; // RGBA16F HDR scene color + RenderTarget m_sceneDepth; // Depth buffer for scene + RenderTarget m_sceneDepthCopy; // Samplable copy of scene depth (for soft particles) + RenderTarget m_sceneEffect; // RGBA16F effect/composite (snapshot of scene color) + + // Scene render pass and framebuffer + vk::RenderPass m_sceneRenderPass; // loadOp=eClear (initial scene begin) + vk::RenderPass m_sceneRenderPassLoad; // loadOp=eLoad (resume after copy_effect_texture) + vk::Framebuffer m_sceneFramebuffer; // Shared by both scene render passes (compatible) + + // Sampler for post-processing texture reads (maxLod=0) + vk::Sampler m_linearSampler; + // Sampler with mipmap support for bloom textures + vk::Sampler m_mipmapSampler; + + // Persistent UBO for tonemapping shader parameters + vk::Buffer m_tonemapUBO; + VulkanAllocation m_tonemapUBOAlloc; + + // ---- Bloom resources ---- + static constexpr int MAX_MIP_BLUR_LEVELS = 4; + static constexpr size_t BLOOM_UBO_SLOT_SIZE = 256; // >= minUniformBufferOffsetAlignment + + struct BloomTarget { + vk::Image image; + VulkanAllocation allocation; + vk::ImageView fullView; // All mip levels (for textureLod sampling) + vk::ImageView mipViews[MAX_MIP_BLUR_LEVELS]; // Per-mip views (for framebuffer attachment) + vk::Framebuffer mipFramebuffers[MAX_MIP_BLUR_LEVELS]; + }; + + BloomTarget m_bloomTex[2]; // Half-res RGBA16F, 4 mip levels + uint32_t m_bloomWidth = 0; // Half of scene width + uint32_t m_bloomHeight = 0; // Half of scene height + vk::RenderPass m_bloomRenderPass; // Color-only RGBA16F, loadOp=eDontCare + vk::RenderPass m_bloomCompositeRenderPass; // Color-only RGBA16F, loadOp=eLoad (additive to scene) + vk::Framebuffer m_sceneColorBloomFB; // Scene_color as color attachment for bloom composite + + // Per-draw UBO for bloom passes (each draw uses different offset) + vk::Buffer m_bloomUBO; + VulkanAllocation m_bloomUBOAlloc; + void* m_bloomUBOMapped = nullptr; + uint32_t m_bloomUBOCursor = 0; // Current slot index (reset per frame) + static constexpr uint32_t BLOOM_UBO_MAX_SLOTS = 24; + + bool m_bloomInitialized = false; + + // ---- LDR / FXAA resources ---- + RenderTarget m_sceneLdr; // RGBA8 LDR after tonemapping + RenderTarget m_sceneLuminance; // RGBA8 LDR with luma in alpha (for FXAA) + vk::RenderPass m_ldrRenderPass; // Color-only RGBA8, loadOp=eDontCare + vk::RenderPass m_ldrLoadRenderPass; // Color-only RGBA8, loadOp=eLoad (for additive blending) + vk::Framebuffer m_sceneLdrFB; + vk::Framebuffer m_sceneLuminanceFB; + bool m_ldrInitialized = false; + bool m_postEffectsApplied = false; // Set per-frame by executePostEffects + +public: + // Attachment layout: [0]=color, [1]=position, [2]=normal, [3]=specular, [4]=emissive, [5]=composite, [6]=depth + static constexpr uint32_t GBUF_COLOR_ATTACHMENT_COUNT = 6; + +private: + // ---- G-Buffer (deferred lighting) ---- + RenderTarget m_gbufPosition; // RGBA16F - view-space position (xyz) + AO (w) + RenderTarget m_gbufNormal; // RGBA16F - view-space normal (xyz) + gloss (w) + RenderTarget m_gbufNormalCopy; // RGBA16F - samplable copy of G-buffer normal (for decals) + RenderTarget m_gbufSpecular; // RGBA8 - specular color (rgb) + fresnel (a) + RenderTarget m_gbufEmissive; // RGBA16F - emissive / pre-lit color + RenderTarget m_gbufComposite; // RGBA16F - light accumulation scratch buffer + vk::RenderPass m_gbufRenderPass; // loadOp=eClear (initial) + vk::RenderPass m_gbufRenderPassLoad; // loadOp=eLoad (resume after mid-pass copy) + vk::Framebuffer m_gbufFramebuffer; + bool m_gbufInitialized = false; + + // ---- Light accumulation (deferred lighting) ---- + vk::RenderPass m_lightAccumRenderPass; // Single RGBA16F color, loadOp=eLoad, additive blend + vk::Framebuffer m_lightAccumFramebuffer; // Composite image as attachment 0 + + // Light volume meshes (sphere + cylinder for positional lights) + struct LightVolumeMesh { + vk::Buffer vbo; + VulkanAllocation vboAlloc; + vk::Buffer ibo; + VulkanAllocation iboAlloc; + uint32_t vertexCount = 0; + uint32_t indexCount = 0; + }; + LightVolumeMesh m_sphereMesh; + LightVolumeMesh m_cylinderMesh; + + // Per-frame UBO for deferred light data (lights + globals + matrices) + vk::Buffer m_deferredUBO; + VulkanAllocation m_deferredUBOAlloc; + static constexpr uint32_t DEFERRED_UBO_SIZE = 256 * 1024; // 256KB for light data + + bool m_lightVolumesInitialized = false; + + // ---- MSAA G-buffer ---- + RenderTarget m_msaaColor; // RGBA16F (MS) + RenderTarget m_msaaPosition; // RGBA16F (MS) + RenderTarget m_msaaNormal; // RGBA16F (MS) + RenderTarget m_msaaSpecular; // RGBA8 (MS) + RenderTarget m_msaaEmissive; // RGBA16F (MS) + vk::Image m_msaaDepthImage; + vk::ImageView m_msaaDepthView; + VulkanAllocation m_msaaDepthAlloc; + vk::RenderPass m_msaaGbufRenderPass; // eClear, 5 MS color + MS depth + vk::RenderPass m_msaaGbufRenderPassLoad; // eLoad (emissive preserved), 5 MS color + MS depth + vk::Framebuffer m_msaaGbufFramebuffer; + vk::RenderPass m_msaaResolveRenderPass; // 5 non-MSAA color + depth (via gl_FragDepth) + vk::Framebuffer m_msaaResolveFramebuffer; + vk::RenderPass m_msaaEmissiveCopyRenderPass; // 1 MS color att (for upsample) + vk::Framebuffer m_msaaEmissiveCopyFramebuffer; + // Per-frame UBO for MSAA resolve shader data (samples, fov) + vk::Buffer m_msaaResolveUBO; + VulkanAllocation m_msaaResolveUBOAlloc; + void* m_msaaResolveUBOMapped = nullptr; + bool m_msaaInitialized = false; + bool initMSAA(); + void shutdownMSAA(); + + // ---- Shadow map (cascaded VSM) ---- + RenderTarget m_shadowColor; // RGBA16F, 2D array (4 layers) + RenderTarget m_shadowDepth; // D32F, 2D array (4 layers) + vk::RenderPass m_shadowRenderPass; + vk::Framebuffer m_shadowFramebuffer; + int m_shadowTextureSize = 0; + bool m_shadowInitialized = false; + + // ---- Fog resources ---- + vk::RenderPass m_fogRenderPass; // Color-only RGBA16F, loadOp=eDontCare, finalLayout=eColorAttachmentOptimal + vk::Framebuffer m_fogFramebuffer; // Scene color as color attachment + bool m_fogInitialized = false; + bool initFogPass(); + void shutdownFogPass(); + + // Mipmapped emissive copy for volumetric fog LOD sampling + RenderTarget m_emissiveMipmapped; // RGBA16F with full mip chain + uint32_t m_emissiveMipLevels = 0; + vk::ImageView m_emissiveMipmappedFullView; // View with all mip levels + bool m_emissiveMipmappedInitialized = false; + + // ---- Distortion ping-pong textures (32x32 RGBA8) ---- + RenderTarget m_distortionTex[2]; + int m_distortionSwitch = 0; // Which texture is the current read source + float m_distortionTimer = 0.0f; // Accumulator for ~30ms update interval + vk::Sampler m_distortionSampler; // LINEAR filter, REPEAT wrapping + bool m_distortionInitialized = false; + bool m_distortionFirstUpdate = true; // First update needs eUndefined old layout + + vk::Device m_device; + VulkanMemoryManager* m_memoryManager = nullptr; + vk::Extent2D m_extent; + vk::Format m_depthFormat = vk::Format::eUndefined; + + bool m_initialized = false; +}; + +// Global post-processor access +VulkanPostProcessor* getPostProcessor(); +void setPostProcessor(VulkanPostProcessor* pp); + +// gr_screen function pointer implementations for post-processing +void vulkan_post_process_begin(); +void vulkan_post_process_end(); +void vulkan_post_process_save_zbuffer(); +void vulkan_post_process_restore_zbuffer(); +void vulkan_post_process_set_effect(const char* name, int value, const vec3d* rgb); +void vulkan_post_process_set_defaults(); + +/** + * @brief Copy one image to another with automatic barrier management + * + * Handles pre-barriers (src→eTransferSrcOptimal, dst→eTransferDstOptimal), + * the copy command, and post-barriers (eTransferSrc→srcNewLayout, eTransferDst→dstNewLayout). + * Access masks and pipeline stages are derived from the layouts automatically. + * + * Skip rule: if srcNewLayout == eTransferSrcOptimal, the src post-barrier is skipped + * (image stays in transfer source layout). Same for dst + eTransferDstOptimal. + * + * @param cmd Active command buffer (must be outside a render pass) + * @param src Source image + * @param srcOldLayout Current layout of source image + * @param srcNewLayout Desired layout of source image after copy + * @param dst Destination image + * @param dstOldLayout Current layout of destination image + * @param dstNewLayout Desired layout of destination image after copy + * @param extent Copy region (width x height) + * @param aspect Image aspect (eColor or eDepth) + * @param dstMipLevels Number of mip levels in dst subresource range (for pre-barrier) + */ +void copyImageToImage( + vk::CommandBuffer cmd, + vk::Image src, vk::ImageLayout srcOldLayout, vk::ImageLayout srcNewLayout, + vk::Image dst, vk::ImageLayout dstOldLayout, vk::ImageLayout dstNewLayout, + vk::Extent2D extent, + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor, + uint32_t dstMipLevels = 1); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanQuery.cpp b/code/graphics/vulkan/VulkanQuery.cpp new file mode 100644 index 00000000000..2bb0c767bca --- /dev/null +++ b/code/graphics/vulkan/VulkanQuery.cpp @@ -0,0 +1,311 @@ + +#include "VulkanQuery.h" +#include "VulkanState.h" + +namespace graphics { +namespace vulkan { + +static VulkanQueryManager* g_queryManager = nullptr; + +VulkanQueryManager* getQueryManager() +{ + return g_queryManager; +} + +void setQueryManager(VulkanQueryManager* mgr) +{ + g_queryManager = mgr; +} + +bool VulkanQueryManager::init(vk::Device device, float timestampPeriod, + vk::CommandPool commandPool, vk::Queue queue) +{ + m_device = device; + m_timestampPeriod = timestampPeriod; + + vk::QueryPoolCreateInfo poolInfo; + poolInfo.queryType = vk::QueryType::eTimestamp; + poolInfo.queryCount = POOL_CAPACITY; + + m_queryPool = m_device.createQueryPool(poolInfo); + if (!m_queryPool) { + mprintf(("Vulkan: Failed to create timestamp query pool!\n")); + return false; + } + + // Reset the entire pool via a one-shot command buffer so all queries + // start in the "unavailable" state required by the spec. + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = commandPool; + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device.allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + cmd.resetQueryPool(m_queryPool, 0, POOL_CAPACITY); + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + queue.submit(submitInfo, nullptr); + queue.waitIdle(); + + m_device.freeCommandBuffers(commandPool, cmdBuffers); + + m_slots.clear(); + m_slots.resize(POOL_CAPACITY); + for (uint32_t idx = 0; idx < POOL_CAPACITY; ++idx) { + m_freeSlots.push(idx); + } + + m_resetList.clear(); + m_inflightResets.clear(); + m_pendingWrites.clear(); + m_deferredFreeSlots.clear(); + m_lastFrameSubmitted = true; + + mprintf(("Vulkan: Created timestamp query pool (capacity %u, period %.1f ns/tick)\n", + POOL_CAPACITY, m_timestampPeriod)); + + return true; +} + +void VulkanQueryManager::shutdown() +{ + if (m_device && m_queryPool) { + m_device.destroyQueryPool(m_queryPool); + m_queryPool = nullptr; + } + m_slots.clear(); + while (!m_freeSlots.empty()) { + m_freeSlots.pop(); + } + m_resetList.clear(); + m_inflightResets.clear(); + m_pendingWrites.clear(); + m_deferredFreeSlots.clear(); + m_device = nullptr; +} + +void VulkanQueryManager::beginFrame(vk::CommandBuffer commandBuffer) +{ + // If the previous frame's command buffer was abandoned (no flip/submit), + // the resets and writes we recorded never executed on the GPU. + if (!m_lastFrameSubmitted) { + // Orphaned writes: the vkCmdWriteTimestamp never executed, so the + // slot is still in its pre-write state. Mark as orphaned so that + // queryValueAvailable returns true and getQueryValue returns 0, + // letting the tracing drain proceed to deleteQueryObject. + for (auto idx : m_pendingWrites) { + m_slots[idx].orphaned = true; + m_slots[idx].wasReset = true; + } + m_pendingWrites.clear(); + // Orphaned resets: the vkCmdResetQueryPool never executed. Override + // wasReset back to false for slots whose reset was ALSO on the + // abandoned command buffer, and re-schedule the reset. + for (auto idx : m_inflightResets) { + m_slots[idx].wasReset = false; + m_resetList.push_back(idx); + } + } + m_inflightResets.clear(); + + // Record resets for this frame. Only slots returned via deleteQueryObject + // are in this list. Must happen outside render passes (vkCmdResetQueryPool). + for (auto idx : m_resetList) { + Assertion(!m_slots[idx].inUse, + "Query slot %u in resetList but inUse=true!", idx); + commandBuffer.resetQueryPool(m_queryPool, idx, 1); + m_slots[idx].wasReset = true; + m_inflightResets.push_back(idx); + } + m_resetList.clear(); + + // Slots that were deleted while awaiting reset can now return to the free pool. + // The vkCmdResetQueryPool recorded above makes them safe for new writes on + // this same command buffer. + if (!m_deferredFreeSlots.empty()) { + for (auto idx : m_deferredFreeSlots) { + m_freeSlots.push(idx); + } + m_deferredFreeSlots.clear(); + } + + // Report and reset exhaustion counter from previous frame + if (m_exhaustionMessageCount > 0) { + mprintf(("Vulkan: Query pool exhaustion — %u queries dropped last frame (free: %u)\n", + m_exhaustionMessageCount, static_cast(m_freeSlots.size()))); + m_exhaustionMessageCount = 0; + } + + m_lastFrameSubmitted = false; +} + +void VulkanQueryManager::notifySubmission() +{ + m_lastFrameSubmitted = true; + m_inflightResets.clear(); + + // Confirm all pending writes were submitted to the GPU. + for (auto idx : m_pendingWrites) { + m_slots[idx].submitted = true; + } + m_pendingWrites.clear(); +} + +int VulkanQueryManager::createQueryObject() +{ + if (!m_freeSlots.empty()) { + auto idx = m_freeSlots.front(); + m_freeSlots.pop(); + m_slots[idx].inUse = true; + return static_cast(idx); + } else { + if (m_exhaustionMessageCount == 0) { + uint32_t inUseCount = 0, pendingResetCount = 0; + for (const auto& s : m_slots) { + if (s.inUse) inUseCount++; + } + pendingResetCount = static_cast(m_resetList.size() + m_inflightResets.size() + m_deferredFreeSlots.size()); + mprintf(("Vulkan: Query pool exhausted (%u slots: %u in-use, %u pending-reset, %u pending-write)\n", + POOL_CAPACITY, inUseCount, pendingResetCount, static_cast(m_pendingWrites.size()))); + } + m_exhaustionMessageCount++; + return -1; + } +} + +void VulkanQueryManager::queryValue(int obj, QueryType type) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + switch (type) { + case QueryType::Timestamp: { + // Slots must be reset by beginFrame before a new write. + Assertion(slot.wasReset, + "Query slot %d written before reset! wasReset=%d inUse=%d", + obj, (int)slot.wasReset, (int)slot.inUse); + + getStateTracker()->getCommandBuffer().writeTimestamp( + vk::PipelineStageFlagBits::eBottomOfPipe, + m_queryPool, static_cast(obj)); + + slot.submitted = false; + slot.wasReset = false; + m_pendingWrites.push_back(static_cast(obj)); + break; + } + default: + UNREACHABLE("Unhandled QueryType value!"); + break; + } +} + +bool VulkanQueryManager::queryValueAvailable(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + if (!slot.inUse || slot.orphaned) { + return true; + } + + // Written on current frame but not yet submitted (flip hasn't happened). + // Return false so process_gpu_events skips this and tries next frame. + if (!slot.submitted) { + return false; + } + + uint64_t dummy; + auto result = m_device.getQueryPoolResults( + m_queryPool, + static_cast(obj), 1, + sizeof(uint64_t), &dummy, sizeof(uint64_t), + vk::QueryResultFlagBits::e64); + + return (result == vk::Result::eSuccess); +} + +std::uint64_t VulkanQueryManager::getQueryValue(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + if (!slot.inUse || slot.orphaned) { + return 0; + } + + if (!slot.submitted) { + return 0; + } + + uint64_t ticks; + auto result = m_device.getQueryPoolResults( + m_queryPool, + static_cast(obj), 1, + sizeof(uint64_t), &ticks, sizeof(uint64_t), + vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait); + Assertion(result == vk::Result::eSuccess, "Failed to read query %d result!", obj); + + return static_cast(static_cast(ticks) * static_cast(m_timestampPeriod)); +} + +void VulkanQueryManager::deleteQueryObject(int obj) +{ + Assertion(obj >= 0 && obj < static_cast(m_slots.size()), + "Query object index %d is invalid!", obj); + auto& slot = m_slots[obj]; + + slot.inUse = false; + slot.orphaned = false; + + if (!slot.wasReset) { + m_resetList.push_back(static_cast(obj)); + m_deferredFreeSlots.push_back(static_cast(obj)); + } else { + m_freeSlots.push(static_cast(obj)); + } +} + +// Free function wrappers for gr_screen function pointers +int vulkan_create_query_object() +{ + return getQueryManager()->createQueryObject(); +} + +void vulkan_query_value(int obj, QueryType type) +{ + if (obj < 0) return; + getQueryManager()->queryValue(obj, type); +} + +bool vulkan_query_value_available(int obj) +{ + if (obj < 0) return true; + return getQueryManager()->queryValueAvailable(obj); +} + +std::uint64_t vulkan_get_query_value(int obj) +{ + if (obj < 0) return 0; + return getQueryManager()->getQueryValue(obj); +} + +void vulkan_delete_query_object(int obj) +{ + if (obj < 0) return; + getQueryManager()->deleteQueryObject(obj); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanQuery.h b/code/graphics/vulkan/VulkanQuery.h new file mode 100644 index 00000000000..fba6498a195 --- /dev/null +++ b/code/graphics/vulkan/VulkanQuery.h @@ -0,0 +1,59 @@ +#pragma once + +#include "graphics/2d.h" + +#include + +namespace graphics { +namespace vulkan { + +class VulkanQueryManager { + public: + bool init(vk::Device device, float timestampPeriod, + vk::CommandPool commandPool, vk::Queue queue); + void shutdown(); + + void beginFrame(vk::CommandBuffer commandBuffer); + void notifySubmission(); + + int createQueryObject(); + void queryValue(int obj, QueryType type); + bool queryValueAvailable(int obj); + std::uint64_t getQueryValue(int obj); + void deleteQueryObject(int obj); + + private: + static const uint32_t POOL_CAPACITY = 4096; + + struct QuerySlot { + bool inUse = false; // true after createQueryObject, false after deleteQueryObject + bool submitted = false; // true after notifySubmission confirms the write was submitted + bool wasReset = true; // true after reset (init or beginFrame), false after write + bool orphaned = false; // true if write was on an abandoned command buffer + }; + + vk::Device m_device; + vk::QueryPool m_queryPool; + SCP_vector m_slots; + SCP_queue m_freeSlots; // available slot indices + SCP_vector m_resetList; // slots to reset in next beginFrame + SCP_vector m_inflightResets; // resets recorded but not yet confirmed submitted + SCP_vector m_pendingWrites; // writes recorded but not yet confirmed submitted + SCP_vector m_deferredFreeSlots; // deleted slots waiting for reset before returning to freeSlots + float m_timestampPeriod = 0.0f; + bool m_lastFrameSubmitted = true; // false after beginFrame, true after notifySubmission + uint32_t m_exhaustionMessageCount = 0; // throttle exhaustion log spam +}; + +VulkanQueryManager* getQueryManager(); +void setQueryManager(VulkanQueryManager* mgr); + +// Free functions for gr_screen function pointers +int vulkan_create_query_object(); +void vulkan_query_value(int obj, QueryType type); +bool vulkan_query_value_available(int obj); +std::uint64_t vulkan_get_query_value(int obj); +void vulkan_delete_query_object(int obj); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/RenderFrame.cpp b/code/graphics/vulkan/VulkanRenderFrame.cpp similarity index 64% rename from code/graphics/vulkan/RenderFrame.cpp rename to code/graphics/vulkan/VulkanRenderFrame.cpp index ae113fd1031..1e7ca842829 100644 --- a/code/graphics/vulkan/RenderFrame.cpp +++ b/code/graphics/vulkan/VulkanRenderFrame.cpp @@ -1,10 +1,10 @@ -#include "RenderFrame.h" +#include "VulkanRenderFrame.h" namespace graphics { namespace vulkan { -RenderFrame::RenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue) +VulkanRenderFrame::VulkanRenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue) : m_device(device), m_swapChain(swapChain), m_graphicsQueue(graphicsQueue), m_presentQueue(presentQueue) { constexpr vk::SemaphoreCreateInfo semaphoreCreateInfo; @@ -14,7 +14,7 @@ RenderFrame::RenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queu m_renderingFinishedSemaphore = device.createSemaphoreUnique(semaphoreCreateInfo); m_frameInFlightFence = device.createFenceUnique(fenceCreateInfo); } -void RenderFrame::waitForFinish() +void VulkanRenderFrame::waitForFinish() { if (!m_inFlight) { return; @@ -34,32 +34,40 @@ void RenderFrame::waitForFinish() // Our fence has been signaled so we are no longer in flight and ready to be reused m_inFlight = false; } -void RenderFrame::onFrameFinished(std::function finishFunc) +void VulkanRenderFrame::onFrameFinished(std::function finishFunc) { m_frameFinishedCallbacks.push_back(std::move(finishFunc)); } -uint32_t RenderFrame::acquireSwapchainImage() +SwapChainStatus VulkanRenderFrame::acquireSwapchainImage(uint32_t& outImageIndex) { Assertion(!m_inFlight, "Cannot acquire swapchain image when frame is still in flight."); uint32_t imageIndex; - vk::Result res = m_device.acquireNextImageKHR(m_swapChain, - std::numeric_limits::max(), - m_imageAvailableSemaphore.get(), - nullptr, - &imageIndex); - // TODO: This should handle at least VK_SUBOPTIMAL_KHR, which means that the swap chain is no longer - // optimal and should be recreated. - (void)res; + vk::Result res; + try { + res = m_device.acquireNextImageKHR(m_swapChain, + std::numeric_limits::max(), + m_imageAvailableSemaphore.get(), + nullptr, + &imageIndex); + } catch (vk::OutOfDateKHRError&) { + return SwapChainStatus::eOutOfDate; + } m_swapChainIdx = imageIndex; + outImageIndex = imageIndex; - return imageIndex; + if (res == vk::Result::eSuboptimalKHR) { + return SwapChainStatus::eSuboptimal; + } + return SwapChainStatus::eSuccess; } -void RenderFrame::submitAndPresent(const std::vector& cmdBuffers) +SwapChainStatus VulkanRenderFrame::submitAndPresent(const SCP_vector& cmdBuffers) { Assertion(!m_inFlight, "Cannot submit a frame for presentation when it is still in flight."); + // Wait at color attachment output stage — the first use of the swap chain image + // is loadOp=eClear at the start of the render pass, which is a color attachment write. const std::array waitStages = {vk::PipelineStageFlagBits::eColorAttachmentOutput}; const std::array waitSemaphores = {m_imageAvailableSemaphore.get()}; @@ -77,7 +85,7 @@ void RenderFrame::submitAndPresent(const std::vector& cmdBuff m_graphicsQueue.submit(submitInfo, m_frameInFlightFence.get()); - // This frame is now officially in flight + // This frame is now officially in flight (fence pending even if present fails) m_inFlight = true; vk::PresentInfoKHR presentInfo; @@ -90,10 +98,21 @@ void RenderFrame::submitAndPresent(const std::vector& cmdBuff presentInfo.pImageIndices = &m_swapChainIdx; presentInfo.pResults = nullptr; - vk::Result res = m_presentQueue.presentKHR(presentInfo); - // TODO: This should handle at least VK_SUBOPTIMAL_KHR, which means that the swap chain is no longer - // optimal and should be recreated. - (void)res; + vk::Result res; + try { + res = m_presentQueue.presentKHR(presentInfo); + } catch (vk::OutOfDateKHRError&) { + return SwapChainStatus::eOutOfDate; + } + + if (res == vk::Result::eSuboptimalKHR) { + return SwapChainStatus::eSuboptimal; + } + return SwapChainStatus::eSuccess; +} +void VulkanRenderFrame::updateSwapChain(vk::SwapchainKHR swapChain) +{ + m_swapChain = swapChain; } } // namespace vulkan diff --git a/code/graphics/vulkan/RenderFrame.h b/code/graphics/vulkan/VulkanRenderFrame.h similarity index 56% rename from code/graphics/vulkan/RenderFrame.h rename to code/graphics/vulkan/VulkanRenderFrame.h index a5c2ad51c33..d9243e99452 100644 --- a/code/graphics/vulkan/RenderFrame.h +++ b/code/graphics/vulkan/VulkanRenderFrame.h @@ -7,17 +7,25 @@ namespace graphics { namespace vulkan { -class RenderFrame { +enum class SwapChainStatus { + eSuccess, + eSuboptimal, // Swap chain works but should be recreated + eOutOfDate, // Must recreate before next use +}; + +class VulkanRenderFrame { public: - RenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue); + VulkanRenderFrame(vk::Device device, vk::SwapchainKHR swapChain, vk::Queue graphicsQueue, vk::Queue presentQueue); void waitForFinish(); - uint32_t acquireSwapchainImage(); + SwapChainStatus acquireSwapchainImage(uint32_t& outImageIndex); void onFrameFinished(std::function finishFunc); - void submitAndPresent(const std::vector& cmdBuffers); + SwapChainStatus submitAndPresent(const SCP_vector& cmdBuffers); + + void updateSwapChain(vk::SwapchainKHR swapChain); private: vk::Device m_device; diff --git a/code/graphics/vulkan/VulkanRenderState.cpp b/code/graphics/vulkan/VulkanRenderState.cpp new file mode 100644 index 00000000000..f9d256f5939 --- /dev/null +++ b/code/graphics/vulkan/VulkanRenderState.cpp @@ -0,0 +1,273 @@ +#include "VulkanRenderState.h" + +namespace graphics { +namespace vulkan { + +void convertBlendMode(gr_alpha_blend mode, vk::BlendFactor& srcFactor, vk::BlendFactor& dstFactor) +{ + // Based on SetAlphaBlendMode in gropenglstate.cpp + switch (mode) { + case ALPHA_BLEND_NONE: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eZero; + break; + case ALPHA_BLEND_ALPHA_ADDITIVE: + srcFactor = vk::BlendFactor::eSrcAlpha; + dstFactor = vk::BlendFactor::eOne; + break; + case ALPHA_BLEND_ALPHA_BLEND_ALPHA: + srcFactor = vk::BlendFactor::eSrcAlpha; + dstFactor = vk::BlendFactor::eOneMinusSrcAlpha; + break; + case ALPHA_BLEND_ALPHA_BLEND_SRC_COLOR: + srcFactor = vk::BlendFactor::eSrcColor; + dstFactor = vk::BlendFactor::eOneMinusSrcColor; + break; + case ALPHA_BLEND_ADDITIVE: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eOne; + break; + case ALPHA_BLEND_PREMULTIPLIED: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eOneMinusSrcAlpha; + break; + default: + srcFactor = vk::BlendFactor::eOne; + dstFactor = vk::BlendFactor::eZero; + break; + } +} + +void convertDepthMode(gr_zbuffer_type type, vk::CompareOp& compareOp, bool& writeEnable) +{ + // Based on SetZbufferType in gropenglstate.cpp + switch (type) { + case ZBUFFER_TYPE_NONE: + compareOp = vk::CompareOp::eAlways; + writeEnable = false; + break; + case ZBUFFER_TYPE_READ: + compareOp = vk::CompareOp::eLess; + writeEnable = false; + break; + case ZBUFFER_TYPE_WRITE: + compareOp = vk::CompareOp::eAlways; + writeEnable = true; + break; + case ZBUFFER_TYPE_FULL: + compareOp = vk::CompareOp::eLess; + writeEnable = true; + break; + default: + compareOp = vk::CompareOp::eAlways; + writeEnable = false; + break; + } +} + +vk::CompareOp convertStencilCompare(ComparisionFunction func) +{ + switch (func) { + case ComparisionFunction::Never: + return vk::CompareOp::eNever; + case ComparisionFunction::Less: + return vk::CompareOp::eLess; + case ComparisionFunction::Equal: + return vk::CompareOp::eEqual; + case ComparisionFunction::LessOrEqual: + return vk::CompareOp::eLessOrEqual; + case ComparisionFunction::Greater: + return vk::CompareOp::eGreater; + case ComparisionFunction::NotEqual: + return vk::CompareOp::eNotEqual; + case ComparisionFunction::GreaterOrEqual: + return vk::CompareOp::eGreaterOrEqual; + case ComparisionFunction::Always: + default: + return vk::CompareOp::eAlways; + } +} + +vk::StencilOp convertStencilOp(StencilOperation op) +{ + switch (op) { + case StencilOperation::Keep: + return vk::StencilOp::eKeep; + case StencilOperation::Zero: + return vk::StencilOp::eZero; + case StencilOperation::Replace: + return vk::StencilOp::eReplace; + case StencilOperation::Increment: + return vk::StencilOp::eIncrementAndClamp; + case StencilOperation::Decrement: + return vk::StencilOp::eDecrementAndClamp; + case StencilOperation::Invert: + return vk::StencilOp::eInvert; + case StencilOperation::IncrementWrap: + return vk::StencilOp::eIncrementAndWrap; + case StencilOperation::DecrementWrap: + return vk::StencilOp::eDecrementAndWrap; + default: + return vk::StencilOp::eKeep; + } +} + +vk::PrimitiveTopology convertPrimitiveType(primitive_type type) +{ + switch (type) { + case PRIM_TYPE_POINTS: + return vk::PrimitiveTopology::ePointList; + case PRIM_TYPE_LINES: + return vk::PrimitiveTopology::eLineList; + case PRIM_TYPE_LINESTRIP: + return vk::PrimitiveTopology::eLineStrip; + case PRIM_TYPE_TRIS: + return vk::PrimitiveTopology::eTriangleList; + case PRIM_TYPE_TRISTRIP: + return vk::PrimitiveTopology::eTriangleStrip; + case PRIM_TYPE_TRIFAN: + return vk::PrimitiveTopology::eTriangleFan; + default: + return vk::PrimitiveTopology::eTriangleList; + } +} + +vk::CullModeFlags convertCullMode(bool cullEnabled) +{ + return cullEnabled ? vk::CullModeFlagBits::eBack : vk::CullModeFlagBits::eNone; +} + +bool isBlendingEnabled(gr_alpha_blend mode) +{ + return mode != ALPHA_BLEND_NONE; +} + +vk::PipelineColorBlendAttachmentState createColorBlendAttachment(gr_alpha_blend mode, const bvec4& colorWriteMask) +{ + vk::PipelineColorBlendAttachmentState attachment; + + attachment.blendEnable = isBlendingEnabled(mode) ? VK_TRUE : VK_FALSE; + + vk::BlendFactor srcFactor, dstFactor; + convertBlendMode(mode, srcFactor, dstFactor); + + attachment.srcColorBlendFactor = srcFactor; + attachment.dstColorBlendFactor = dstFactor; + attachment.colorBlendOp = vk::BlendOp::eAdd; + + // Alpha blend - same as color for most modes + attachment.srcAlphaBlendFactor = srcFactor; + attachment.dstAlphaBlendFactor = dstFactor; + attachment.alphaBlendOp = vk::BlendOp::eAdd; + + // Color write mask from material + vk::ColorComponentFlags writeMask; + if (colorWriteMask.x) writeMask |= vk::ColorComponentFlagBits::eR; + if (colorWriteMask.y) writeMask |= vk::ColorComponentFlagBits::eG; + if (colorWriteMask.z) writeMask |= vk::ColorComponentFlagBits::eB; + if (colorWriteMask.w) writeMask |= vk::ColorComponentFlagBits::eA; + attachment.colorWriteMask = writeMask; + + return attachment; +} + +vk::PipelineDepthStencilStateCreateInfo createDepthStencilState( + gr_zbuffer_type depthMode, + bool stencilEnabled, + ComparisionFunction stencilFunc, + const material::StencilOp* frontOp, + const material::StencilOp* backOp, + uint32_t stencilMask) +{ + vk::PipelineDepthStencilStateCreateInfo info; + + // Depth settings + vk::CompareOp depthCompare; + bool depthWrite; + convertDepthMode(depthMode, depthCompare, depthWrite); + + info.depthTestEnable = (depthMode != ZBUFFER_TYPE_NONE) ? VK_TRUE : VK_FALSE; + info.depthWriteEnable = depthWrite ? VK_TRUE : VK_FALSE; + info.depthCompareOp = depthCompare; + info.depthBoundsTestEnable = VK_FALSE; + info.minDepthBounds = 0.0f; + info.maxDepthBounds = 1.0f; + + // Stencil settings + info.stencilTestEnable = stencilEnabled ? VK_TRUE : VK_FALSE; + + if (stencilEnabled) { + // Front face stencil + info.front.compareOp = convertStencilCompare(stencilFunc); + info.front.compareMask = 0xFF; + info.front.writeMask = stencilMask; + info.front.reference = 0; // Set dynamically + + if (frontOp) { + info.front.failOp = convertStencilOp(frontOp->stencilFailOperation); + info.front.depthFailOp = convertStencilOp(frontOp->depthFailOperation); + info.front.passOp = convertStencilOp(frontOp->successOperation); + } else { + info.front.failOp = vk::StencilOp::eKeep; + info.front.depthFailOp = vk::StencilOp::eKeep; + info.front.passOp = vk::StencilOp::eKeep; + } + + // Back face stencil + info.back.compareOp = convertStencilCompare(stencilFunc); + info.back.compareMask = 0xFF; + info.back.writeMask = stencilMask; + info.back.reference = 0; + + if (backOp) { + info.back.failOp = convertStencilOp(backOp->stencilFailOperation); + info.back.depthFailOp = convertStencilOp(backOp->depthFailOperation); + info.back.passOp = convertStencilOp(backOp->successOperation); + } else { + info.back.failOp = vk::StencilOp::eKeep; + info.back.depthFailOp = vk::StencilOp::eKeep; + info.back.passOp = vk::StencilOp::eKeep; + } + } + + return info; +} + +vk::PipelineRasterizationStateCreateInfo createRasterizationState( + bool cullEnabled, + int fillMode, + bool frontFaceCW, + bool depthBiasEnabled) +{ + vk::PipelineRasterizationStateCreateInfo info; + + info.depthClampEnable = VK_FALSE; + info.rasterizerDiscardEnable = VK_FALSE; + + // Fill mode + switch (fillMode) { + case GR_FILL_MODE_WIRE: + info.polygonMode = vk::PolygonMode::eLine; + break; + case GR_FILL_MODE_SOLID: + default: + info.polygonMode = vk::PolygonMode::eFill; + break; + } + + info.cullMode = convertCullMode(cullEnabled); + info.frontFace = frontFaceCW ? vk::FrontFace::eClockwise : vk::FrontFace::eCounterClockwise; + + // Depth bias - actual values set dynamically via vkCmdSetDepthBias + info.depthBiasEnable = depthBiasEnabled ? VK_TRUE : VK_FALSE; + info.depthBiasConstantFactor = 0.0f; + info.depthBiasClamp = 0.0f; + info.depthBiasSlopeFactor = 0.0f; + + info.lineWidth = 1.0f; + + return info; +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanRenderState.h b/code/graphics/vulkan/VulkanRenderState.h new file mode 100644 index 00000000000..7e3c7e91900 --- /dev/null +++ b/code/graphics/vulkan/VulkanRenderState.h @@ -0,0 +1,103 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Convert FSO alpha blend mode to Vulkan blend factors + * @param mode FSO blend mode + * @param srcFactor Output source blend factor + * @param dstFactor Output destination blend factor + */ +void convertBlendMode(gr_alpha_blend mode, vk::BlendFactor& srcFactor, vk::BlendFactor& dstFactor); + +/** + * @brief Convert FSO depth buffer type to Vulkan compare op and write mask + * @param type FSO zbuffer type + * @param compareOp Output compare operation + * @param writeEnable Output depth write enable + */ +void convertDepthMode(gr_zbuffer_type type, vk::CompareOp& compareOp, bool& writeEnable); + +/** + * @brief Convert FSO stencil comparison function to Vulkan compare op + * @param func FSO comparison function + * @return Vulkan compare operation + */ +vk::CompareOp convertStencilCompare(ComparisionFunction func); + +/** + * @brief Convert FSO stencil operation to Vulkan stencil op + * @param op FSO stencil operation + * @return Vulkan stencil operation + */ +vk::StencilOp convertStencilOp(StencilOperation op); + +/** + * @brief Convert FSO primitive type to Vulkan topology + * @param type FSO primitive type + * @return Vulkan primitive topology + */ +vk::PrimitiveTopology convertPrimitiveType(primitive_type type); + +/** + * @brief Convert FSO cull mode to Vulkan cull mode + * @param cullEnabled Whether culling is enabled + * @return Vulkan cull mode flags + */ +vk::CullModeFlags convertCullMode(bool cullEnabled); + +/** + * @brief Check if a blend mode requires blending to be enabled + * @param mode FSO blend mode + * @return true if blending should be enabled + */ +bool isBlendingEnabled(gr_alpha_blend mode); + +/** + * @brief Create a complete color blend attachment state + * @param mode FSO blend mode + * @return Vulkan color blend attachment state + */ +vk::PipelineColorBlendAttachmentState createColorBlendAttachment(gr_alpha_blend mode, + const bvec4& colorWriteMask = {true, true, true, true}); + +/** + * @brief Create depth stencil state create info + * @param depthMode FSO depth buffer mode + * @param stencilEnabled Whether stencil testing is enabled + * @param stencilFunc Stencil comparison function + * @param frontOp Front face stencil operations + * @param backOp Back face stencil operations + * @param stencilMask Stencil write mask + * @return Vulkan depth stencil state create info + */ +vk::PipelineDepthStencilStateCreateInfo createDepthStencilState( + gr_zbuffer_type depthMode, + bool stencilEnabled = false, + ComparisionFunction stencilFunc = ComparisionFunction::Always, + const material::StencilOp* frontOp = nullptr, + const material::StencilOp* backOp = nullptr, + uint32_t stencilMask = 0xFF); + +/** + * @brief Create rasterization state create info + * @param cullEnabled Whether back-face culling is enabled + * @param fillMode Polygon fill mode (0 = fill, 1 = line, 2 = point) + * @param frontFace Front face winding (true = CW, false = CCW) + * @return Vulkan rasterization state create info + */ +vk::PipelineRasterizationStateCreateInfo createRasterizationState( + bool cullEnabled = true, + int fillMode = 0, + bool frontFaceCW = true, + bool depthBiasEnabled = false); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanRenderer.cpp b/code/graphics/vulkan/VulkanRenderer.cpp index 50b6da7e220..86ad2f9f4b2 100644 --- a/code/graphics/vulkan/VulkanRenderer.cpp +++ b/code/graphics/vulkan/VulkanRenderer.cpp @@ -1,12 +1,20 @@ #include "VulkanRenderer.h" +#include "VulkanMemory.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "bmpman/bmpman.h" +#include "cmdline/cmdline.h" #include "globalincs/version.h" +#include "graphics/grinternal.h" +#include "graphics/post_processing.h" #include "backends/imgui_impl_sdl.h" #include "backends/imgui_impl_vulkan.h" #include "def_files/def_files.h" #include "graphics/2d.h" +#include "lighting/lighting.h" #include "libs/renderdoc/renderdoc.h" #include "mod_table/mod_table.h" @@ -16,6 +24,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE +extern float flFrametime; + namespace graphics { namespace vulkan { @@ -51,7 +61,8 @@ const SCP_vector RequiredDeviceExtensions = { bool checkDeviceExtensionSupport(PhysicalDeviceValues& values) { - values.extensions = values.device.enumerateDeviceExtensionProperties(); + auto exts = values.device.enumerateDeviceExtensionProperties(); + values.extensions.assign(exts.begin(), exts.end()); std::set requiredExtensions(RequiredDeviceExtensions.cbegin(), RequiredDeviceExtensions.cend()); for (const auto& extension : values.extensions) { @@ -64,8 +75,10 @@ bool checkDeviceExtensionSupport(PhysicalDeviceValues& values) bool checkSwapChainSupport(PhysicalDeviceValues& values, const vk::UniqueSurfaceKHR& surface) { values.surfaceCapabilities = values.device.getSurfaceCapabilitiesKHR(surface.get()); - values.surfaceFormats = values.device.getSurfaceFormatsKHR(surface.get()); - values.presentModes = values.device.getSurfacePresentModesKHR(surface.get()); + auto fmts = values.device.getSurfaceFormatsKHR(surface.get()); + values.surfaceFormats.assign(fmts.begin(), fmts.end()); + auto modes = values.device.getSurfacePresentModesKHR(surface.get()); + values.presentModes.assign(modes.begin(), modes.end()); return !values.surfaceFormats.empty() && !values.presentModes.empty(); } @@ -188,9 +201,11 @@ void printPhysicalDevice(const PhysicalDeviceValues& values) vk::SurfaceFormatKHR chooseSurfaceFormat(const PhysicalDeviceValues& values) { + // Use a non-sRGB (UNORM) format to match OpenGL's default framebuffer behavior. + // The FSO shaders handle gamma correction manually in the fragment shader and + // post-processing pipeline, so hardware sRGB conversion would double-correct. for (const auto& availableFormat : values.surfaceFormats) { - // Simple check is enough for now - if (availableFormat.format == vk::Format::eB8G8R8A8Srgb && + if (availableFormat.format == vk::Format::eB8G8R8A8Unorm && availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { return availableFormat; } @@ -201,21 +216,34 @@ vk::SurfaceFormatKHR chooseSurfaceFormat(const PhysicalDeviceValues& values) vk::PresentModeKHR choosePresentMode(const PhysicalDeviceValues& values) { + vk::PresentModeKHR chosen = vk::PresentModeKHR::eFifo; // guaranteed to be supported + // Depending on if we want Vsync or not, choose the best mode for (const auto& availablePresentMode : values.presentModes) { if (Gr_enable_vsync) { if (availablePresentMode == vk::PresentModeKHR::eMailbox) { - return availablePresentMode; + chosen = availablePresentMode; + break; } } else { if (availablePresentMode == vk::PresentModeKHR::eImmediate) { - return availablePresentMode; + chosen = availablePresentMode; + break; } } } - // Guaranteed to be supported - return vk::PresentModeKHR::eFifo; + const char* name = "Unknown"; + switch (chosen) { + case vk::PresentModeKHR::eImmediate: name = "Immediate"; break; + case vk::PresentModeKHR::eMailbox: name = "Mailbox"; break; + case vk::PresentModeKHR::eFifo: name = "FIFO (vsync)"; break; + case vk::PresentModeKHR::eFifoRelaxed: name = "FIFO Relaxed"; break; + default: break; + } + mprintf(("Vulkan: Present mode: %s (Gr_enable_vsync=%d)\n", name, Gr_enable_vsync ? 1 : 0)); + + return chosen; } vk::Extent2D chooseSwapChainExtent(const PhysicalDeviceValues& values, uint32_t width, uint32_t height) @@ -270,25 +298,153 @@ bool VulkanRenderer::initialize() return false; } + // Validate MSAA sample count against device limits + if (Cmdline_msaa_enabled > 0) { + auto limits = deviceValues.properties.limits; + vk::SampleCountFlags supported = limits.framebufferColorSampleCounts & limits.framebufferDepthSampleCounts; + + // Map requested count to vk::SampleCountFlagBits + vk::SampleCountFlagBits requested = vk::SampleCountFlagBits::e1; + switch (Cmdline_msaa_enabled) { + case 4: requested = vk::SampleCountFlagBits::e4; break; + case 8: requested = vk::SampleCountFlagBits::e8; break; + case 16: requested = vk::SampleCountFlagBits::e16; break; + default: + mprintf(("Vulkan: Unsupported MSAA count %d, disabling MSAA\n", Cmdline_msaa_enabled)); + Cmdline_msaa_enabled = 0; + break; + } + + if (Cmdline_msaa_enabled > 0) { + if (supported & requested) { + m_msaaSampleCount = requested; + mprintf(("Vulkan: MSAA enabled with %dx sample count\n", Cmdline_msaa_enabled)); + } else { + // Clamp down to highest supported + vk::SampleCountFlagBits fallback = vk::SampleCountFlagBits::e1; + int fallbackCount = 0; + if ((supported & vk::SampleCountFlagBits::e8) && Cmdline_msaa_enabled >= 8) { + fallback = vk::SampleCountFlagBits::e8; fallbackCount = 8; + } else if (supported & vk::SampleCountFlagBits::e4) { + fallback = vk::SampleCountFlagBits::e4; fallbackCount = 4; + } + + if (fallbackCount > 0) { + mprintf(("Vulkan: Requested MSAA %dx not supported, falling back to %dx\n", + Cmdline_msaa_enabled, fallbackCount)); + Cmdline_msaa_enabled = fallbackCount; + m_msaaSampleCount = fallback; + } else { + mprintf(("Vulkan: No suitable MSAA support, disabling MSAA\n")); + Cmdline_msaa_enabled = 0; + } + } + } + } + if (!createLogicalDevice(deviceValues)) { mprintf(("Failed to create logical device.\n")); return false; } + createCommandPool(deviceValues); + if (!createSwapChain(deviceValues)) { mprintf(("Failed to create swap chain.\n")); return false; } + createDepthResources(); createRenderPass(); - createGraphicsPipeline(); createFrameBuffers(); + createPresentSyncObjects(); - createCommandPool(deviceValues); + + // Initialize texture manager (needs command pool for uploads) + m_textureManager = std::unique_ptr(new VulkanTextureManager()); + if (!m_textureManager->init(m_device.get(), m_physicalDevice, m_memoryManager.get(), + m_graphicsCommandPool.get(), m_graphicsQueue)) { + mprintf(("Failed to initialize Vulkan texture manager!\n")); + return false; + } + setTextureManager(m_textureManager.get()); + + // Initialize shader manager + m_shaderManager = std::unique_ptr(new VulkanShaderManager()); + if (!m_shaderManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan shader manager!\n")); + return false; + } + setShaderManager(m_shaderManager.get()); + + // Initialize descriptor manager + m_descriptorManager = std::unique_ptr(new VulkanDescriptorManager()); + if (!m_descriptorManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan descriptor manager!\n")); + return false; + } + setDescriptorManager(m_descriptorManager.get()); + + // Initialize pipeline manager + m_pipelineManager = std::unique_ptr(new VulkanPipelineManager()); + if (!m_pipelineManager->init(m_device.get(), m_shaderManager.get(), m_descriptorManager.get())) { + mprintf(("Failed to initialize Vulkan pipeline manager!\n")); + return false; + } + setPipelineManager(m_pipelineManager.get()); + m_pipelineManager->loadPipelineCache("vulkan_pipeline.cache"); + + // Initialize state tracker + m_stateTracker = std::unique_ptr(new VulkanStateTracker()); + if (!m_stateTracker->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan state tracker!\n")); + return false; + } + setStateTracker(m_stateTracker.get()); + + // Initialize draw manager + m_drawManager = std::unique_ptr(new VulkanDrawManager()); + if (!m_drawManager->init(m_device.get())) { + mprintf(("Failed to initialize Vulkan draw manager!\n")); + return false; + } + setDrawManager(m_drawManager.get()); + + // Initialize post-processing + m_postProcessor = std::unique_ptr(new VulkanPostProcessor()); + if (!m_postProcessor->init(m_device.get(), m_physicalDevice, m_memoryManager.get(), + m_swapChainExtent, m_depthFormat)) { + mprintf(("Warning: Failed to initialize Vulkan post-processor, post-processing will be disabled\n")); + m_postProcessor.reset(); + } else { + setPostProcessor(m_postProcessor.get()); + } + + // Initialize shared post-processing manager (bloom/lightshaft settings, post-effect table) + // This is renderer-agnostic; OpenGL creates it in opengl_post_process_init(). + if (!graphics::Post_processing_manager) { + graphics::Post_processing_manager.reset(new graphics::PostProcessingManager()); + if (!graphics::Post_processing_manager->parse_table()) { + mprintf(("Warning: Unable to read post-processing table\n")); + } + } + + // Initialize query manager for GPU timestamp profiling + m_queryManager = std::unique_ptr(new VulkanQueryManager()); + if (!m_queryManager->init(m_device.get(), m_physicalDevice.getProperties().limits.timestampPeriod, + m_graphicsCommandPool.get(), m_graphicsQueue)) { + mprintf(("Warning: Failed to initialize Vulkan query manager, GPU profiling will be disabled\n")); + m_queryManager.reset(); + } else { + setQueryManager(m_queryManager.get()); + } // Prepare the rendering state by acquiring our first swap chain image acquireNextSwapChainImage(); + // Initialize ImGui Vulkan rendering backend + initImGui(); + return true; } @@ -356,7 +512,7 @@ bool VulkanRenderer::initializeInstance() return false; } - std::vector extensions; + SCP_vector extensions; extensions.resize(count); if (!SDL_Vulkan_GetInstanceExtensions(window, &count, extensions.data())) { @@ -386,10 +542,14 @@ bool VulkanRenderer::initializeInstance() extensions.push_back(VK_EXT_DEBUG_REPORT_EXTENSION_NAME); m_debugReportEnabled = true; } + if (!stricmp(ext.extensionName, VK_EXT_DEBUG_UTILS_EXTENSION_NAME)) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + m_debugUtilsEnabled = true; + } } } - std::vector layers; + SCP_vector layers; const auto supportedLayers = vk::enumerateInstanceLayerProperties(); mprintf(("Instance layers:\n")); for (const auto& layer : supportedLayers) { @@ -401,7 +561,9 @@ bool VulkanRenderer::initializeInstance() VK_VERSION_PATCH(layer.specVersion), layer.implementationVersion)); if (FSO_DEBUG || Cmdline_graphics_debug_output) { - if (!stricmp(layer.layerName, "VK_LAYER_LUNARG_core_validation")) { + if (!stricmp(layer.layerName, "VK_LAYER_KHRONOS_validation")) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } else if (!stricmp(layer.layerName, "VK_LAYER_LUNARG_core_validation")) { layers.push_back("VK_LAYER_LUNARG_core_validation"); } } @@ -490,7 +652,8 @@ bool VulkanRenderer::pickPhysicalDevice(PhysicalDeviceValues& deviceValues) vals.device = dev; vals.properties = dev.getProperties2().properties; vals.features = dev.getFeatures2().features; - vals.queueProperties = dev.getQueueFamilyProperties(); + auto qprops = dev.getQueueFamilyProperties(); + vals.queueProperties.assign(qprops.begin(), qprops.end()); return vals; }); @@ -525,7 +688,7 @@ bool VulkanRenderer::createLogicalDevice(const PhysicalDeviceValues& deviceValue { float queuePriority = 1.0f; - std::vector queueInfos; + SCP_vector queueInfos; const std::set familyIndices{deviceValues.graphicsQueueIndex.index, deviceValues.transferQueueIndex.index, deviceValues.presentQueueIndex.index}; @@ -535,24 +698,72 @@ bool VulkanRenderer::createLogicalDevice(const PhysicalDeviceValues& deviceValue queueInfos.emplace_back(vk::DeviceQueueCreateFlags(), index, 1, &queuePriority); } + // Build extension list: required + optional + SCP_vector enabledExtensions(RequiredDeviceExtensions.begin(), RequiredDeviceExtensions.end()); + + // Check for VK_EXT_shader_viewport_index_layer (needed for shadow cascade routing) + m_supportsShaderViewportLayerOutput = false; + for (const auto& ext : deviceValues.extensions) { + if (strcmp(ext.extensionName, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME) == 0) { + m_supportsShaderViewportLayerOutput = true; + enabledExtensions.push_back(VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME); + mprintf(("Vulkan: Enabling %s (shadow cascade support)\n", VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME)); + break; + } + } + vk::DeviceCreateInfo deviceCreate; deviceCreate.pQueueCreateInfos = queueInfos.data(); deviceCreate.queueCreateInfoCount = static_cast(queueInfos.size()); deviceCreate.pEnabledFeatures = &deviceValues.features; - deviceCreate.ppEnabledExtensionNames = RequiredDeviceExtensions.data(); - deviceCreate.enabledExtensionCount = static_cast(RequiredDeviceExtensions.size()); + deviceCreate.ppEnabledExtensionNames = enabledExtensions.data(); + deviceCreate.enabledExtensionCount = static_cast(enabledExtensions.size()); m_device = deviceValues.device.createDeviceUnique(deviceCreate); + // Load device-level function pointers for the dynamic dispatcher + VULKAN_HPP_DEFAULT_DISPATCHER.init(m_device.get()); + // Create queues m_graphicsQueue = m_device->getQueue(deviceValues.graphicsQueueIndex.index, 0); m_transferQueue = m_device->getQueue(deviceValues.transferQueueIndex.index, 0); m_presentQueue = m_device->getQueue(deviceValues.presentQueueIndex.index, 0); + // Store physical device and queue family indices for later use + m_physicalDevice = deviceValues.device; + m_graphicsQueueFamilyIndex = deviceValues.graphicsQueueIndex.index; + m_transferQueueFamilyIndex = deviceValues.transferQueueIndex.index; + m_presentQueueFamilyIndex = deviceValues.presentQueueIndex.index; + + // Initialize memory manager + m_memoryManager = std::unique_ptr(new VulkanMemoryManager()); + if (!m_memoryManager->init(m_physicalDevice, m_device.get())) { + mprintf(("Failed to initialize Vulkan memory manager!\n")); + return false; + } + setMemoryManager(m_memoryManager.get()); + + // Initialize deletion queue for deferred resource destruction + m_deletionQueue = std::unique_ptr(new VulkanDeletionQueue()); + m_deletionQueue->init(m_device.get(), m_memoryManager.get()); + setDeletionQueue(m_deletionQueue.get()); + + // Initialize buffer manager + m_bufferManager = std::unique_ptr(new VulkanBufferManager()); + if (!m_bufferManager->init(m_device.get(), m_memoryManager.get(), + m_graphicsQueueFamilyIndex, m_transferQueueFamilyIndex, + getMinUniformBufferOffsetAlignment())) { + mprintf(("Failed to initialize Vulkan buffer manager!\n")); + return false; + } + setBufferManager(m_bufferManager.get()); + // Set initial frame index for buffer manager + m_bufferManager->setCurrentFrame(m_currentFrame); + return true; } -bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues) +bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues, vk::SwapchainKHR oldSwapchain) { // Choose one more than the minimum to avoid driver synchronization if it is not done with a thread yet uint32_t imageCount = deviceValues.surfaceCapabilities.minImageCount + 1; @@ -570,7 +781,9 @@ bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues) createInfo.imageColorSpace = surfaceFormat.colorSpace; createInfo.imageExtent = chooseSwapChainExtent(deviceValues, gr_screen.max_w, gr_screen.max_h); createInfo.imageArrayLayers = 1; - createInfo.imageUsage = vk::ImageUsageFlagBits::eColorAttachment; + createInfo.imageUsage = vk::ImageUsageFlagBits::eColorAttachment + | vk::ImageUsageFlagBits::eTransferSrc + | vk::ImageUsageFlagBits::eTransferDst; const uint32_t queueFamilyIndices[] = {deviceValues.graphicsQueueIndex.index, deviceValues.presentQueueIndex.index}; if (deviceValues.graphicsQueueIndex.index != deviceValues.presentQueueIndex.index) { @@ -579,20 +792,24 @@ bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues) createInfo.pQueueFamilyIndices = queueFamilyIndices; } else { createInfo.imageSharingMode = vk::SharingMode::eExclusive; - createInfo.queueFamilyIndexCount = 0; // Optional - createInfo.pQueueFamilyIndices = nullptr; // Optional } createInfo.preTransform = deviceValues.surfaceCapabilities.currentTransform; createInfo.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque; createInfo.presentMode = choosePresentMode(deviceValues); createInfo.clipped = true; - createInfo.oldSwapchain = nullptr; + createInfo.oldSwapchain = oldSwapchain; + + auto newSwapChain = m_device->createSwapchainKHRUnique(createInfo); + + // Clear old resources before replacing the swap chain + m_swapChainFramebuffers.clear(); + m_swapChainImageViews.clear(); - m_swapChain = m_device->createSwapchainKHRUnique(createInfo); + m_swapChain = std::move(newSwapChain); - std::vector swapChainImages = m_device->getSwapchainImagesKHR(m_swapChain.get()); - m_swapChainImages = SCP_vector(swapChainImages.begin(), swapChainImages.end()); + auto swapChainImages = m_device->getSwapchainImagesKHR(m_swapChain.get()); + m_swapChainImages.assign(swapChainImages.begin(), swapChainImages.end()); m_swapChainImageFormat = surfaceFormat.format; m_swapChainExtent = createInfo.imageExtent; @@ -617,29 +834,68 @@ bool VulkanRenderer::createSwapChain(const PhysicalDeviceValues& deviceValues) m_swapChainImageViews.push_back(m_device->createImageViewUnique(viewCreateInfo)); } - return true; -} -vk::UniqueShaderModule VulkanRenderer::loadShader(const SCP_string& name) -{ - const auto def_file = defaults_get_file(name.c_str()); + // Transition new images eUndefined → ePresentSrcKHR so the render pass + // can use initialLayout=ePresentSrcKHR from the start. + { + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = m_graphicsCommandPool.get(); + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + for (auto& image : m_swapChainImages) { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::ePresentSrcKHR; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = {}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eBottomOfPipe, + {}, nullptr, nullptr, barrier); + } + + cmd.end(); - vk::ShaderModuleCreateInfo createInfo; - createInfo.codeSize = def_file.size; - createInfo.pCode = static_cast(def_file.data); + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); - return m_device->createShaderModuleUnique(createInfo); + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + } + + return true; } void VulkanRenderer::createFrameBuffers() { m_swapChainFramebuffers.reserve(m_swapChainImageViews.size()); for (const auto& imageView : m_swapChainImageViews) { + // Attachment 0: color, Attachment 1: depth (shared across all framebuffers) const vk::ImageView attachments[] = { imageView.get(), + m_depthImageView.get(), }; vk::FramebufferCreateInfo framebufferInfo; framebufferInfo.renderPass = m_renderPass.get(); - framebufferInfo.attachmentCount = 1; + framebufferInfo.attachmentCount = 2; framebufferInfo.pAttachments = attachments; framebufferInfo.width = m_swapChainExtent.width; framebufferInfo.height = m_swapChainExtent.height; @@ -648,168 +904,139 @@ void VulkanRenderer::createFrameBuffers() m_swapChainFramebuffers.push_back(m_device->createFramebufferUnique(framebufferInfo)); } } +vk::Format VulkanRenderer::findDepthFormat() +{ + // Prefer D32_SFLOAT for best precision, fall back to D32_SFLOAT_S8 or D24_UNORM_S8 + const vk::Format candidates[] = { + vk::Format::eD32Sfloat, + vk::Format::eD32SfloatS8Uint, + vk::Format::eD24UnormS8Uint, + }; + + for (auto format : candidates) { + auto props = m_physicalDevice.getFormatProperties(format); + if (props.optimalTilingFeatures & vk::FormatFeatureFlagBits::eDepthStencilAttachment) { + return format; + } + } + + // Should never happen on any real GPU + Error(LOCATION, "Failed to find supported depth format!"); + return vk::Format::eD32Sfloat; +} +void VulkanRenderer::createDepthResources() +{ + m_depthFormat = findDepthFormat(); + + // Create depth image + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = m_depthFormat; + imageInfo.extent.width = m_swapChainExtent.width; + imageInfo.extent.height = m_swapChainExtent.height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.samples = vk::SampleCountFlagBits::e1; + imageInfo.tiling = vk::ImageTiling::eOptimal; + imageInfo.usage = vk::ImageUsageFlagBits::eDepthStencilAttachment; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + + m_depthImage = m_device->createImageUnique(imageInfo); + + // Allocate GPU memory for the depth image + m_memoryManager->allocateImageMemory(m_depthImage.get(), MemoryUsage::GpuOnly, m_depthImageMemory); + + // Create depth image view + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = m_depthImage.get(); + viewInfo.viewType = vk::ImageViewType::e2D; + viewInfo.format = m_depthFormat; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + + m_depthImageView = m_device->createImageViewUnique(viewInfo); + + mprintf(("Vulkan: Created depth buffer (%dx%d, format %d)\n", + m_swapChainExtent.width, m_swapChainExtent.height, static_cast(m_depthFormat))); +} void VulkanRenderer::createRenderPass() { + // Attachment 0: Color - clear each frame + // UI screens draw their own full-screen backgrounds; 3D clears via scene_texture_begin. + // Popups that need previous frame content use gr_save_screen/gr_restore_screen. vk::AttachmentDescription colorAttachment; colorAttachment.format = m_swapChainImageFormat; colorAttachment.samples = vk::SampleCountFlagBits::e1; - colorAttachment.loadOp = vk::AttachmentLoadOp::eClear; colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; - colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; - colorAttachment.initialLayout = vk::ImageLayout::eUndefined; colorAttachment.finalLayout = vk::ImageLayout::ePresentSrcKHR; + // Attachment 1: Depth + vk::AttachmentDescription depthAttachment; + depthAttachment.format = m_depthFormat; + depthAttachment.samples = vk::SampleCountFlagBits::e1; + depthAttachment.loadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.storeOp = vk::AttachmentStoreOp::eDontCare; + depthAttachment.stencilLoadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + depthAttachment.initialLayout = vk::ImageLayout::eUndefined; + depthAttachment.finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + vk::AttachmentReference colorAttachRef; colorAttachRef.attachment = 0; colorAttachRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + vk::AttachmentReference depthAttachRef; + depthAttachRef.attachment = 1; + depthAttachRef.layout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + vk::SubpassDescription subpass; subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; subpass.colorAttachmentCount = 1; subpass.pColorAttachments = &colorAttachRef; + subpass.pDepthStencilAttachment = &depthAttachRef; vk::SubpassDependency dependency; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; dependency.dstSubpass = 0; + dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput + | vk::PipelineStageFlagBits::eEarlyFragmentTests; + dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite + | vk::AccessFlagBits::eDepthStencilAttachmentWrite; - dependency.srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - - dependency.dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput; - dependency.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + std::array attachments = {colorAttachment, depthAttachment}; vk::RenderPassCreateInfo renderPassInfo; - renderPassInfo.attachmentCount = 1; - renderPassInfo.pAttachments = &colorAttachment; + renderPassInfo.attachmentCount = static_cast(attachments.size()); + renderPassInfo.pAttachments = attachments.data(); renderPassInfo.subpassCount = 1; renderPassInfo.pSubpasses = &subpass; renderPassInfo.dependencyCount = 1; renderPassInfo.pDependencies = &dependency; m_renderPass = m_device->createRenderPassUnique(renderPassInfo); -} -void VulkanRenderer::createGraphicsPipeline() -{ - auto vertShaderMod = loadShader("vulkan.vert.spv"); - vk::PipelineShaderStageCreateInfo vertStageCreate; - vertStageCreate.stage = vk::ShaderStageFlagBits::eVertex; - vertStageCreate.module = vertShaderMod.get(); - vertStageCreate.pName = "main"; - - auto fragShaderMod = loadShader("vulkan.frag.spv"); - vk::PipelineShaderStageCreateInfo fragStageCreate; - fragStageCreate.stage = vk::ShaderStageFlagBits::eFragment; - fragStageCreate.module = fragShaderMod.get(); - fragStageCreate.pName = "main"; - - std::array shaderStages = {vertStageCreate, fragStageCreate}; - - vk::PipelineVertexInputStateCreateInfo vertInCreate; - vertInCreate.vertexBindingDescriptionCount = 0; - vertInCreate.vertexAttributeDescriptionCount = 0; - - vk::PipelineInputAssemblyStateCreateInfo inputAssembly; - inputAssembly.topology = vk::PrimitiveTopology::eTriangleList; - inputAssembly.primitiveRestartEnable = false; - - vk::Viewport viewport; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = i2fl(gr_screen.max_w); - viewport.height = i2fl(gr_screen.max_h); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - - vk::Rect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent = m_swapChainExtent; - - vk::PipelineViewportStateCreateInfo viewportState; - viewportState.viewportCount = 1; - viewportState.pViewports = &viewport; - viewportState.scissorCount = 1; - viewportState.pScissors = &scissor; - - vk::PipelineRasterizationStateCreateInfo rasterizer; - rasterizer.depthClampEnable = false; - rasterizer.rasterizerDiscardEnable = false; - rasterizer.polygonMode = vk::PolygonMode::eFill; - rasterizer.lineWidth = 1.0f; - rasterizer.cullMode |= vk::CullModeFlagBits::eBack; - rasterizer.frontFace = vk::FrontFace::eClockwise; - rasterizer.depthBiasEnable = false; - rasterizer.depthBiasConstantFactor = 0.0f; - rasterizer.depthBiasClamp = 0.0f; - rasterizer.depthBiasSlopeFactor = 0.0f; - - vk::PipelineMultisampleStateCreateInfo multisampling; - multisampling.sampleShadingEnable = false; - multisampling.rasterizationSamples = vk::SampleCountFlagBits::e1; - multisampling.minSampleShading = 1.0f; - multisampling.pSampleMask = nullptr; - multisampling.alphaToCoverageEnable = false; - multisampling.alphaToOneEnable = false; - - vk::PipelineColorBlendAttachmentState colorBlendAttachment; - colorBlendAttachment.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | - vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA; - colorBlendAttachment.blendEnable = false; - colorBlendAttachment.srcColorBlendFactor = vk::BlendFactor::eOne; // Optional - colorBlendAttachment.dstColorBlendFactor = vk::BlendFactor::eZero; // Optional - colorBlendAttachment.colorBlendOp = vk::BlendOp::eAdd; // Optional - colorBlendAttachment.srcAlphaBlendFactor = vk::BlendFactor::eOne; // Optional - colorBlendAttachment.dstAlphaBlendFactor = vk::BlendFactor::eZero; // Optional - colorBlendAttachment.alphaBlendOp = vk::BlendOp::eAdd; // Optional - - vk::PipelineColorBlendStateCreateInfo colorBlending; - colorBlending.logicOpEnable = false; - colorBlending.logicOp = vk::LogicOp::eCopy; - colorBlending.attachmentCount = 1; - colorBlending.pAttachments = &colorBlendAttachment; - colorBlending.blendConstants[0] = 0.0f; - colorBlending.blendConstants[1] = 0.0f; - colorBlending.blendConstants[2] = 0.0f; - colorBlending.blendConstants[3] = 0.0f; - - vk::DynamicState dynamicStates[] = { - vk::DynamicState::eViewport, - vk::DynamicState::eLineWidth, - }; - vk::PipelineDynamicStateCreateInfo dynamicStateInfo; - dynamicStateInfo.dynamicStateCount = 2; - dynamicStateInfo.pDynamicStates = dynamicStates; - - vk::PipelineLayoutCreateInfo pipelineLayout; - pipelineLayout.setLayoutCount = 0; - pipelineLayout.pSetLayouts = nullptr; - pipelineLayout.pushConstantRangeCount = 0; - pipelineLayout.pPushConstantRanges = nullptr; - - m_pipelineLayout = m_device->createPipelineLayoutUnique(pipelineLayout); - - vk::GraphicsPipelineCreateInfo pipelineInfo; - pipelineInfo.stageCount = 2; - pipelineInfo.pStages = shaderStages.data(); - pipelineInfo.pVertexInputState = &vertInCreate; - pipelineInfo.pInputAssemblyState = &inputAssembly; - pipelineInfo.pViewportState = &viewportState; - pipelineInfo.pRasterizationState = &rasterizer; - pipelineInfo.pMultisampleState = &multisampling; - pipelineInfo.pDepthStencilState = nullptr; - pipelineInfo.pColorBlendState = &colorBlending; - pipelineInfo.pDynamicState = nullptr; - pipelineInfo.layout = m_pipelineLayout.get(); - pipelineInfo.renderPass = m_renderPass.get(); - pipelineInfo.subpass = 0; - pipelineInfo.basePipelineHandle = nullptr; - pipelineInfo.basePipelineIndex = -1; - - m_graphicsPipeline = m_device->createGraphicsPipelineUnique(nullptr, pipelineInfo).value; + // Create a second render pass with loadOp=eLoad for resuming the swap chain + // after post-processing. Same formats/samples = render-pass-compatible with m_renderPass. + colorAttachment.loadOp = vk::AttachmentLoadOp::eLoad; + colorAttachment.initialLayout = vk::ImageLayout::ePresentSrcKHR; + + depthAttachment.loadOp = vk::AttachmentLoadOp::eClear; + depthAttachment.initialLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal; + + attachments = {colorAttachment, depthAttachment}; + + m_renderPassLoad = m_device->createRenderPassUnique(renderPassInfo); } void VulkanRenderer::createCommandPool(const PhysicalDeviceValues& values) { @@ -822,7 +1049,7 @@ void VulkanRenderer::createCommandPool(const PhysicalDeviceValues& values) void VulkanRenderer::createPresentSyncObjects() { for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { - m_frames[i].reset(new RenderFrame(m_device.get(), m_swapChain.get(), m_graphicsQueue, m_presentQueue)); + m_frames[i].reset(new VulkanRenderFrame(m_device.get(), m_swapChain.get(), m_graphicsQueue, m_presentQueue)); } m_swapChainImageRenderImage.resize(m_swapChainImages.size(), nullptr); @@ -831,7 +1058,43 @@ void VulkanRenderer::acquireNextSwapChainImage() { m_frames[m_currentFrame]->waitForFinish(); - m_currentSwapChainImage = m_frames[m_currentFrame]->acquireSwapchainImage(); + // Recreate swap chain if flagged from a previous frame + if (m_swapChainNeedsRecreation) { + // Wait for minimized window (0x0 extent) before recreating + while (true) { + if (recreateSwapChain()) { + break; + } + // Window is minimized — wait and pump events until surface is valid again + os_sleep(100); + SDL_PumpEvents(); + } + } + + uint32_t imageIndex = 0; + auto status = m_frames[m_currentFrame]->acquireSwapchainImage(imageIndex); + + if (status == SwapChainStatus::eOutOfDate) { + // Must recreate immediately and retry + while (true) { + if (recreateSwapChain()) { + break; + } + os_sleep(100); + SDL_PumpEvents(); + } + status = m_frames[m_currentFrame]->acquireSwapchainImage(imageIndex); + if (status == SwapChainStatus::eOutOfDate) { + // If still failing after recreation, flag for next frame + m_swapChainNeedsRecreation = true; + } + } + + if (status == SwapChainStatus::eSuboptimal) { + m_swapChainNeedsRecreation = true; + } + + m_currentSwapChainImage = imageIndex; // Ensure that this image is no longer in use if (m_swapChainImageRenderImage[m_currentSwapChainImage]) { @@ -840,61 +1103,769 @@ void VulkanRenderer::acquireNextSwapChainImage() // Reserve the image as in use m_swapChainImageRenderImage[m_currentSwapChainImage] = m_frames[m_currentFrame].get(); } -void VulkanRenderer::drawScene(vk::Framebuffer destinationFb, vk::CommandBuffer cmdBuffer) +void VulkanRenderer::setupFrame() { + if (m_frameInProgress) { + Warning(LOCATION, "VulkanRenderer::setupFrame called while frame already in progress!"); + return; + } + + // Free completed texture upload command buffers + Assertion(m_textureManager, "Vulkan TextureManager not initialized in setupFrame!"); + m_textureManager->frameStart(); + + // Allocate command buffer for this frame + vk::CommandBufferAllocateInfo cmdBufferAlloc; + cmdBufferAlloc.commandPool = m_graphicsCommandPool.get(); + cmdBufferAlloc.level = vk::CommandBufferLevel::ePrimary; + cmdBufferAlloc.commandBufferCount = 1; + + auto cmdBufs = m_device->allocateCommandBuffers(cmdBufferAlloc); + m_currentCommandBuffers.assign(cmdBufs.begin(), cmdBufs.end()); + m_currentCommandBuffer = m_currentCommandBuffers.front(); + + // Begin command buffer vk::CommandBufferBeginInfo beginInfo; beginInfo.flags |= vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + m_currentCommandBuffer.begin(beginInfo); + + Assertion(m_descriptorManager, "Vulkan DescriptorManager not initialized in setupFrame!"); + m_descriptorManager->beginFrame(); + + Assertion(m_stateTracker, "Vulkan StateTracker not initialized in setupFrame!"); + m_stateTracker->beginFrame(m_currentCommandBuffer); + + // Reset timestamp queries that were written last frame (must be outside render pass) + if (m_queryManager) { + m_queryManager->beginFrame(m_currentCommandBuffer); + } - cmdBuffer.begin(beginInfo); + // Reset per-frame flags + m_sceneDepthCopiedThisFrame = false; + // Reset per-frame draw statistics + Assertion(m_drawManager, "Vulkan DrawManager not initialized in setupFrame!"); + m_drawManager->resetFrameStats(); + + // Begin render pass vk::RenderPassBeginInfo renderPassBegin; renderPassBegin.renderPass = m_renderPass.get(); - renderPassBegin.framebuffer = destinationFb; + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); renderPassBegin.renderArea.offset.x = 0; renderPassBegin.renderArea.offset.y = 0; renderPassBegin.renderArea.extent = m_swapChainExtent; - vk::ClearValue clearColor; - clearColor.color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); - - renderPassBegin.clearValueCount = 1; - renderPassBegin.pClearValues = &clearColor; + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); // Clear to black each frame + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); // Clear depth to far plane - cmdBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); - cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, m_graphicsPipeline.get()); + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); - cmdBuffer.draw(3, 1, 0, 0); + // Set up state tracker for FSO draws + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + // Negative viewport height for OpenGL-compatible Y-up NDC (VK_KHR_maintenance1) + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); - cmdBuffer.endRenderPass(); - - cmdBuffer.end(); + m_frameInProgress = true; } + void VulkanRenderer::flip() { - vk::CommandBufferAllocateInfo cmdBufferAlloc; - cmdBufferAlloc.commandPool = m_graphicsCommandPool.get(); - cmdBufferAlloc.level = vk::CommandBufferLevel::ePrimary; - cmdBufferAlloc.commandBufferCount = 1; + if (!m_frameInProgress) { + nprintf(("Vulkan", "VulkanRenderer::flip called without frame in progress, skipping\n")); + return; + } - // Uses the non-unique version since we can't get the buffers into the lambda below otherwise. Only C++14 can do - // that - auto allocatedBuffers = m_device->allocateCommandBuffers(cmdBufferAlloc); - auto& cmdBuffer = allocatedBuffers.front(); + // Print per-frame diagnostic summary before ending + Assertion(m_drawManager, "Vulkan DrawManager not initialized in flip!"); + m_drawManager->printFrameStats(); - drawScene(m_swapChainFramebuffers[m_currentSwapChainImage].get(), cmdBuffer); - m_frames[m_currentFrame]->onFrameFinished([this, allocatedBuffers]() mutable { - m_device->freeCommandBuffers(m_graphicsCommandPool.get(), allocatedBuffers); - allocatedBuffers.clear(); + // End render pass + m_currentCommandBuffer.endRenderPass(); + m_stateTracker->endFrame(); + m_descriptorManager->endFrame(); + + // End command buffer + m_currentCommandBuffer.end(); + + // Set up cleanup callback for command buffers + auto buffersToFree = m_currentCommandBuffers; + m_frames[m_currentFrame]->onFrameFinished([this, buffersToFree]() mutable { + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), buffersToFree); }); - m_frames[m_currentFrame]->submitAndPresent(allocatedBuffers); + // Submit and present + auto presentStatus = m_frames[m_currentFrame]->submitAndPresent(m_currentCommandBuffers); + + if (presentStatus == SwapChainStatus::eSuboptimal || presentStatus == SwapChainStatus::eOutOfDate) { + m_swapChainNeedsRecreation = true; + } + + // Notify query manager that this frame's command buffer was submitted + if (m_queryManager) { + m_queryManager->notifySubmission(); + } + + // Track which swap chain image was just presented so saveScreen() can read it + m_previousSwapChainImage = m_currentSwapChainImage; + + // Clear current command buffer reference + m_currentCommandBuffer = nullptr; + m_currentCommandBuffers.clear(); + m_frameInProgress = false; // Advance counters to prepare for the next frame m_currentFrame = (m_currentFrame + 1) % MAX_FRAMES_IN_FLIGHT; + ++m_frameNumber; + + // Set the frame index for the buffer manager immediately after incrementing + // This ensures any buffer operations that happen before setupFrame() use the correct frame + m_bufferManager->setCurrentFrame(m_currentFrame); acquireNextSwapChainImage(); + + // Process deferred resource deletions AFTER the fence wait in + // acquireNextSwapChainImage, so we know the previous frame's commands + // (including async upload CBs) have completed before destroying resources. + m_deletionQueue->processDestructions(); } + +bool VulkanRenderer::readbackFramebuffer(ubyte** outPixels, uint32_t* outWidth, uint32_t* outHeight) +{ + *outPixels = nullptr; + *outWidth = 0; + *outHeight = 0; + + if (m_previousSwapChainImage == UINT32_MAX) { + mprintf(("VulkanRenderer::readbackFramebuffer - no previous frame available\n")); + return false; + } + + if (!m_frameInProgress) { + mprintf(("VulkanRenderer::readbackFramebuffer - no frame in progress\n")); + return false; + } + + auto prevImage = m_swapChainImages[m_previousSwapChainImage]; + uint32_t w = m_swapChainExtent.width; + uint32_t h = m_swapChainExtent.height; + vk::DeviceSize bufferSize = static_cast(w) * h * 4; + + // End the current render pass so we can record transfer commands + m_currentCommandBuffer.endRenderPass(); + + // --- One-shot command buffer to copy previous frame to staging buffer --- + + vk::CommandBufferAllocateInfo cmdAlloc; + cmdAlloc.commandPool = m_graphicsCommandPool.get(); + cmdAlloc.level = vk::CommandBufferLevel::ePrimary; + cmdAlloc.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(cmdAlloc); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + // Transition previous swap chain image for transfer read + vk::ImageMemoryBarrier preBarrier; + preBarrier.oldLayout = vk::ImageLayout::ePresentSrcKHR; + preBarrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + preBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + preBarrier.image = prevImage; + preBarrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + preBarrier.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + preBarrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, preBarrier); + + // Create staging buffer for readback + vk::BufferCreateInfo bufferCreateInfo; + bufferCreateInfo.size = bufferSize; + bufferCreateInfo.usage = vk::BufferUsageFlagBits::eTransferDst; + bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive; + + auto stagingBuffer = m_device->createBuffer(bufferCreateInfo); + + VulkanAllocation stagingAlloc{}; + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::GpuToCpu, stagingAlloc)) { + mprintf(("VulkanRenderer::readbackFramebuffer - failed to allocate staging buffer\n")); + m_device->destroyBuffer(stagingBuffer); + cmd.end(); + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + + // Re-begin render pass so the frame can continue + vk::RenderPassBeginInfo renderPassBegin; + renderPassBegin.renderPass = m_renderPass.get(); + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + renderPassBegin.renderArea.offset.x = 0; + renderPassBegin.renderArea.offset.y = 0; + renderPassBegin.renderArea.extent = m_swapChainExtent; + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + return false; + } + + // Copy image to staging buffer + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; // tightly packed + region.bufferImageHeight = 0; // tightly packed + region.imageSubresource = {vk::ImageAspectFlagBits::eColor, 0, 0, 1}; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(w, h, 1); + + cmd.copyImageToBuffer(prevImage, vk::ImageLayout::eTransferSrcOptimal, stagingBuffer, region); + + // Transition previous swap chain image back + vk::ImageMemoryBarrier postBarrier; + postBarrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal; + postBarrier.newLayout = vk::ImageLayout::ePresentSrcKHR; + postBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + postBarrier.image = prevImage; + postBarrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + postBarrier.srcAccessMask = vk::AccessFlagBits::eTransferRead; + postBarrier.dstAccessMask = {}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eBottomOfPipe, + {}, nullptr, nullptr, postBarrier); + + cmd.end(); + + // Submit one-shot command buffer and wait + auto fence = m_device->createFence({}); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, fence); + + auto waitResult = m_device->waitForFences(fence, VK_TRUE, UINT64_MAX); + if (waitResult != vk::Result::eSuccess) { + mprintf(("VulkanRenderer::readbackFramebuffer - fence wait failed\n")); + } + + m_device->destroyFence(fence); + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + + // Read back pixels from staging buffer (raw BGRA matching swap chain format) + bool success = false; + auto* mappedPtr = static_cast(m_memoryManager->mapMemory(stagingAlloc)); + + if (mappedPtr) { + auto* pixels = static_cast(vm_malloc(static_cast(bufferSize))); + if (pixels) { + memcpy(pixels, mappedPtr, bufferSize); + *outPixels = pixels; + *outWidth = w; + *outHeight = h; + success = true; + } + m_memoryManager->unmapMemory(stagingAlloc); + } + + // Free staging buffer + m_device->destroyBuffer(stagingBuffer); + m_memoryManager->freeAllocation(stagingAlloc); + + // Re-begin render pass on main command buffer + vk::RenderPassBeginInfo renderPassBegin; + renderPassBegin.renderPass = m_renderPass.get(); + renderPassBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + renderPassBegin.renderArea.offset.x = 0; + renderPassBegin.renderArea.offset.y = 0; + renderPassBegin.renderArea.extent = m_swapChainExtent; + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + + renderPassBegin.clearValueCount = static_cast(clearValues.size()); + renderPassBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(renderPassBegin, vk::SubpassContents::eInline); + + m_stateTracker->setRenderPass(m_renderPass.get(), 0); + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + + return success; +} + +uint32_t VulkanRenderer::getMinUniformBufferOffsetAlignment() const +{ + if (!m_physicalDevice) { + // Fallback to common value if device not initialized + return 256; + } + + auto properties = m_physicalDevice.getProperties(); + return static_cast(properties.limits.minUniformBufferOffsetAlignment); +} + +uint32_t VulkanRenderer::getMaxUniformBufferSize() const +{ + if (!m_physicalDevice) { + return 65536; + } + + auto properties = m_physicalDevice.getProperties(); + return properties.limits.maxUniformBufferRange; +} + +float VulkanRenderer::getMaxAnisotropy() const +{ + if (!m_physicalDevice) { + return 1.0f; + } + + auto properties = m_physicalDevice.getProperties(); + return properties.limits.maxSamplerAnisotropy; +} + +bool VulkanRenderer::isTextureCompressionBCSupported() const +{ + if (!m_physicalDevice) { + return false; + } + + auto features = m_physicalDevice.getFeatures(); + return features.textureCompressionBC == VK_TRUE; +} + +void VulkanRenderer::waitIdle() +{ + if (m_device) { + m_device->waitIdle(); + } +} + +void VulkanRenderer::waitForFrame(uint64_t frameNumber) +{ + // Fast path: if enough frames have elapsed, the work is definitely done + if (m_frameNumber >= frameNumber + MAX_FRAMES_IN_FLIGHT) { + return; + } + + // Wait on the specific frame's fence + uint32_t frameIndex = static_cast(frameNumber % MAX_FRAMES_IN_FLIGHT); + m_frames[frameIndex]->waitForFinish(); +} + +VkCommandBuffer VulkanRenderer::getVkCurrentCommandBuffer() const +{ + return static_cast(m_currentCommandBuffer); +} + +void VulkanRenderer::createImGuiDescriptorPool() +{ + vk::DescriptorPoolSize poolSize; + poolSize.type = vk::DescriptorType::eCombinedImageSampler; + poolSize.descriptorCount = 100; + + vk::DescriptorPoolCreateInfo poolInfo; + poolInfo.flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet; + poolInfo.maxSets = 100; + poolInfo.poolSizeCount = 1; + poolInfo.pPoolSizes = &poolSize; + + m_imguiDescriptorPool = m_device->createDescriptorPoolUnique(poolInfo); +} + +void VulkanRenderer::initImGui() +{ + createImGuiDescriptorPool(); + + ImGui_ImplVulkan_InitInfo initInfo = {}; + initInfo.Instance = static_cast(*m_vkInstance); + initInfo.PhysicalDevice = static_cast(m_physicalDevice); + initInfo.Device = static_cast(*m_device); + initInfo.QueueFamily = m_graphicsQueueFamilyIndex; + initInfo.Queue = static_cast(m_graphicsQueue); + initInfo.PipelineCache = VK_NULL_HANDLE; + initInfo.DescriptorPool = static_cast(*m_imguiDescriptorPool); + initInfo.Subpass = 0; + initInfo.MinImageCount = 2; + initInfo.ImageCount = static_cast(m_swapChainImages.size()); + initInfo.MSAASamples = VK_SAMPLE_COUNT_1_BIT; + initInfo.Allocator = nullptr; + initInfo.CheckVkResultFn = nullptr; + + ImGui_ImplVulkan_Init(&initInfo, static_cast(*m_renderPass)); + + // Upload font textures via one-time command buffer + { + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.commandPool = m_graphicsCommandPool.get(); + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandBufferCount = 1; + + auto cmdBuffers = m_device->allocateCommandBuffers(allocInfo); + auto cmd = cmdBuffers.front(); + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + cmd.begin(beginInfo); + + ImGui_ImplVulkan_CreateFontsTexture(static_cast(cmd)); + + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); + + m_device->freeCommandBuffers(m_graphicsCommandPool.get(), cmdBuffers); + ImGui_ImplVulkan_DestroyFontUploadObjects(); + } + + mprintf(("Vulkan: ImGui backend initialized successfully\n")); +} + +void VulkanRenderer::shutdownImGui() +{ + ImGui_ImplVulkan_Shutdown(); + m_imguiDescriptorPool.reset(); + mprintf(("Vulkan: ImGui backend shut down\n")); +} + +void VulkanRenderer::beginSceneRendering() +{ + if (!m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + if (m_sceneRendering) { + return; + } + + // End the current swap chain render pass + m_currentCommandBuffer.endRenderPass(); + + // Use G-buffer render pass when deferred lighting is enabled and G-buffer is ready + m_useGbufRenderPass = m_postProcessor->isGbufInitialized() && light_deferred_enabled(); + + // Begin the HDR scene render pass (or G-buffer render pass for deferred) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->getGbufRenderPass(); + rpBegin.framebuffer = m_postProcessor->getGbufFramebuffer(); + + // 7 clear values: 6 color + depth + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); // color + clearValues[1].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); // position + clearValues[2].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); // normal + clearValues[3].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); // specular + clearValues[4].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); // emissive + clearValues[5].color.setFloat32({0.0f, 0.0f, 0.0f, 0.0f}); // composite + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getGbufRenderPass(), 0); + m_stateTracker->setColorAttachmentCount(VulkanPostProcessor::GBUF_COLOR_ATTACHMENT_COUNT); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPass(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPass(), 0); + m_stateTracker->setColorAttachmentCount(1); + } + + // Negative viewport height for Y-flip (same as swap chain pass) + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); + + m_sceneRendering = true; +} + +void VulkanRenderer::endSceneRendering() +{ + if (!m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + if (!m_sceneRendering) { + return; + } + + // End HDR scene render pass (transitions scene color to eShaderReadOnlyOptimal) + m_currentCommandBuffer.endRenderPass(); + + // Update distortion ping-pong textures (every ~30ms, matching OpenGL) + if (Gr_framebuffer_effects.any_set()) { + m_postProcessor->updateDistortion(m_currentCommandBuffer, flFrametime); + } + + // Execute post-processing passes (all between HDR scene pass and swap chain pass) + m_postProcessor->executeBloom(m_currentCommandBuffer); + m_postProcessor->executeTonemap(m_currentCommandBuffer); + m_postProcessor->executeFXAA(m_currentCommandBuffer); + m_postProcessor->executeLightshafts(m_currentCommandBuffer); + m_postProcessor->executePostEffects(m_currentCommandBuffer); + + // Begin the resumed swap chain render pass (loadOp=eLoad to preserve pre-scene content) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderPass = m_renderPassLoad.get(); + rpBegin.framebuffer = m_swapChainFramebuffers[m_currentSwapChainImage].get(); + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_swapChainExtent; + + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + + // Update state tracker for the resumed swap chain pass + m_stateTracker->setRenderPass(m_renderPassLoad.get(), 0); + m_stateTracker->setColorAttachmentCount(1); + // Non-flipped viewport for post-processing blit (HDR texture is already correct orientation) + m_stateTracker->setViewport(0.0f, 0.0f, + static_cast(m_swapChainExtent.width), + static_cast(m_swapChainExtent.height)); + + // Blit the HDR scene to swap chain through post-processing + m_postProcessor->blitToSwapChain(m_currentCommandBuffer); + + // Restore Y-flipped viewport for HUD rendering + m_stateTracker->setViewport(0.0f, + static_cast(m_swapChainExtent.height), + static_cast(m_swapChainExtent.width), + -static_cast(m_swapChainExtent.height)); + + m_sceneRendering = false; + m_useGbufRenderPass = false; +} + +void VulkanRenderer::copyEffectTexture() +{ + if (!m_sceneRendering || !m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + + // End the current scene render pass + // This transitions scene color to eShaderReadOnlyOptimal (the render pass's finalLayout) + // For G-buffer: all 6 color attachments transition to eShaderReadOnlyOptimal + m_currentCommandBuffer.endRenderPass(); + + // Copy scene color → effect texture (handles scene color transitions) + m_postProcessor->copyEffectTexture(m_currentCommandBuffer); + + // If G-buffer is active, transition attachments 1-5 for render pass resume + if (m_useGbufRenderPass) { + m_postProcessor->transitionGbufForResume(m_currentCommandBuffer); + } + + // Resume the scene render pass with loadOp=eLoad to preserve existing content + // Scene color is now in eColorAttachmentOptimal (copyEffectTexture transitions it back) + // Depth is still in eDepthStencilAttachmentOptimal (untouched by the copy) + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->getGbufRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getGbufFramebuffer(); + // Clear values ignored for eLoad but array must cover all attachments + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getGbufRenderPassLoad(), 0); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPassLoad(), 0); + } + + // Restore Y-flipped viewport for scene rendering + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); +} + +void VulkanRenderer::copySceneDepthForParticles() +{ + if (m_sceneDepthCopiedThisFrame || !m_sceneRendering || !m_postProcessor || !m_postProcessor->isInitialized()) { + return; + } + + // End the current scene render pass + // This transitions: color → eShaderReadOnlyOptimal, depth → eDepthStencilAttachmentOptimal + // For G-buffer: all 6 color attachments → eShaderReadOnlyOptimal + m_currentCommandBuffer.endRenderPass(); + + // Copy scene depth → samplable depth copy (handles all depth image transitions) + m_postProcessor->copySceneDepth(m_currentCommandBuffer); + + // Transition scene color: eShaderReadOnlyOptimal → eColorAttachmentOptimal + // (needed for the resumed render pass with loadOp=eLoad, which expects + // initialLayout=eColorAttachmentOptimal; copySceneDepth only touches depth) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + barrier.oldLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.newLayout = vk::ImageLayout::eColorAttachmentOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = m_postProcessor->getSceneColorImage(); + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + m_currentCommandBuffer.pipelineBarrier( + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + {}, {}, {}, barrier); + } + + // If G-buffer is active, transition attachments 1-5 for render pass resume + if (m_useGbufRenderPass) { + m_postProcessor->transitionGbufForResume(m_currentCommandBuffer); + } + + // Resume the scene render pass with loadOp=eLoad + vk::RenderPassBeginInfo rpBegin; + rpBegin.renderArea.offset = vk::Offset2D(0, 0); + rpBegin.renderArea.extent = m_postProcessor->getSceneExtent(); + + if (m_useGbufRenderPass) { + rpBegin.renderPass = m_postProcessor->getGbufRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getGbufFramebuffer(); + std::array clearValues{}; + clearValues[6].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getGbufRenderPassLoad(), 0); + } else { + rpBegin.renderPass = m_postProcessor->getSceneRenderPassLoad(); + rpBegin.framebuffer = m_postProcessor->getSceneFramebuffer(); + std::array clearValues; + clearValues[0].color.setFloat32({0.0f, 0.0f, 0.0f, 1.0f}); + clearValues[1].depthStencil = vk::ClearDepthStencilValue(1.0f, 0); + rpBegin.clearValueCount = static_cast(clearValues.size()); + rpBegin.pClearValues = clearValues.data(); + m_currentCommandBuffer.beginRenderPass(rpBegin, vk::SubpassContents::eInline); + m_stateTracker->setRenderPass(m_postProcessor->getSceneRenderPassLoad(), 0); + } + + // Restore Y-flipped viewport for scene rendering + auto extent = m_postProcessor->getSceneExtent(); + m_stateTracker->setViewport(0.0f, + static_cast(extent.height), + static_cast(extent.width), + -static_cast(extent.height)); + + m_sceneDepthCopiedThisFrame = true; +} + +bool VulkanRenderer::recreateSwapChain() +{ + mprintf(("Vulkan: Recreating swap chain...\n")); + + // Wait for all frames to finish so no resources are in use + for (uint32_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + m_frames[i]->waitForFinish(); + } + m_device->waitIdle(); + + // Re-query surface state (may have changed due to resize/compositor) + PhysicalDeviceValues freshValues; + freshValues.device = m_physicalDevice; + freshValues.surfaceCapabilities = m_physicalDevice.getSurfaceCapabilitiesKHR(m_vkSurface.get()); + auto fmts = m_physicalDevice.getSurfaceFormatsKHR(m_vkSurface.get()); + freshValues.surfaceFormats.assign(fmts.begin(), fmts.end()); + auto modes = m_physicalDevice.getSurfacePresentModesKHR(m_vkSurface.get()); + freshValues.presentModes.assign(modes.begin(), modes.end()); + freshValues.graphicsQueueIndex = {true, m_graphicsQueueFamilyIndex}; + freshValues.presentQueueIndex = {true, m_presentQueueFamilyIndex}; + + // Check for 0x0 extent (minimized window) — caller should retry later + auto extent = chooseSwapChainExtent(freshValues, gr_screen.max_w, gr_screen.max_h); + if (extent.width == 0 || extent.height == 0) { + mprintf(("Vulkan: Surface extent is 0x0 (minimized), deferring swap chain recreation\n")); + return false; + } + + // Recreate swap chain, image views, and framebuffers + // (createSwapChain clears old resources and transitions new images internally) + createSwapChain(freshValues, m_swapChain.get()); + createFrameBuffers(); + + // Update VulkanRenderFrame handles to point to the new swap chain + for (auto& frame : m_frames) { + frame->updateSwapChain(m_swapChain.get()); + } + + // Reset swap chain image tracking + m_swapChainImageRenderImage.clear(); + m_swapChainImageRenderImage.resize(m_swapChainImages.size(), nullptr); + m_previousSwapChainImage = UINT32_MAX; + + m_swapChainNeedsRecreation = false; + + mprintf(("Vulkan: Swap chain recreated successfully (%ux%u, %zu images)\n", + m_swapChainExtent.width, m_swapChainExtent.height, m_swapChainImages.size())); + + return true; +} + void VulkanRenderer::shutdown() { // Wait for all frames to complete to ensure no drawing is in progress when we destroy the device @@ -903,6 +1874,91 @@ void VulkanRenderer::shutdown() } // For good measure, also wait until the device is idle m_device->waitIdle(); + + // Shutdown ImGui Vulkan backend before destroying any Vulkan objects + shutdownImGui(); + + // Shutdown managers in reverse order of initialization + if (m_queryManager) { + setQueryManager(nullptr); + m_queryManager->shutdown(); + m_queryManager.reset(); + } + + if (m_postProcessor) { + setPostProcessor(nullptr); + m_postProcessor->shutdown(); + m_postProcessor.reset(); + } + + // Clean up shared post-processing manager + if (graphics::Post_processing_manager) { + graphics::Post_processing_manager->clear(); + graphics::Post_processing_manager = nullptr; + } + + if (m_drawManager) { + setDrawManager(nullptr); + m_drawManager->shutdown(); + m_drawManager.reset(); + } + + if (m_stateTracker) { + setStateTracker(nullptr); + m_stateTracker->shutdown(); + m_stateTracker.reset(); + } + + if (m_pipelineManager) { + m_pipelineManager->savePipelineCache("vulkan_pipeline.cache"); + setPipelineManager(nullptr); + m_pipelineManager->shutdown(); + m_pipelineManager.reset(); + } + + if (m_descriptorManager) { + setDescriptorManager(nullptr); + m_descriptorManager->shutdown(); + m_descriptorManager.reset(); + } + + if (m_shaderManager) { + setShaderManager(nullptr); + m_shaderManager->shutdown(); + m_shaderManager.reset(); + } + + if (m_textureManager) { + setTextureManager(nullptr); + m_textureManager->shutdown(); + m_textureManager.reset(); + } + + if (m_bufferManager) { + setBufferManager(nullptr); + m_bufferManager->shutdown(); + m_bufferManager.reset(); + } + + // Destroy depth resources before memory manager + m_depthImageView.reset(); + m_depthImage.reset(); + if (m_memoryManager && m_depthImageMemory.memory) { + m_memoryManager->freeAllocation(m_depthImageMemory); + } + + // Deletion queue must be flushed before memory manager shutdown + if (m_deletionQueue) { + setDeletionQueue(nullptr); + m_deletionQueue->shutdown(); + m_deletionQueue.reset(); + } + + if (m_memoryManager) { + setMemoryManager(nullptr); + m_memoryManager->shutdown(); + m_memoryManager.reset(); + } } } // namespace vulkan diff --git a/code/graphics/vulkan/VulkanRenderer.h b/code/graphics/vulkan/VulkanRenderer.h index c2d53f21f42..bd6737de2b3 100644 --- a/code/graphics/vulkan/VulkanRenderer.h +++ b/code/graphics/vulkan/VulkanRenderer.h @@ -2,7 +2,18 @@ #include "osapi/osapi.h" -#include "RenderFrame.h" +#include "VulkanMemory.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanShader.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanDeletionQueue.h" +#include "VulkanPostProcessing.h" +#include "VulkanQuery.h" +#include "VulkanRenderFrame.h" #include @@ -26,13 +37,13 @@ struct PhysicalDeviceValues { vk::PhysicalDeviceProperties properties; vk::PhysicalDeviceFeatures features; - std::vector extensions; + SCP_vector extensions; vk::SurfaceCapabilitiesKHR surfaceCapabilities; - std::vector surfaceFormats; - std::vector presentModes; + SCP_vector surfaceFormats; + SCP_vector presentModes; - std::vector queueProperties; + SCP_vector queueProperties; QueueIndex graphicsQueueIndex; QueueIndex transferQueueIndex; QueueIndex presentQueueIndex; @@ -44,13 +55,147 @@ class VulkanRenderer { bool initialize(); + /** + * @brief Setup for a new frame - begins command buffer and render pass + * Called at the START of each frame before any draw calls + */ + void setupFrame(); + + /** + * @brief End frame - ends render pass, submits, and presents + * Called at the END of each frame after all draw calls + */ void flip(); void shutdown(); - private: - static constexpr uint32_t MAX_FRAMES_IN_FLIGHT = 2; + /** + * @brief Read back the previous frame's framebuffer to CPU memory + * + * Copies the previously presented swap chain image to a vm_malloc'd RGBA + * pixel buffer. Handles the BGRA→RGBA swizzle since the swap chain uses + * B8G8R8A8 format. Caller must vm_free the returned buffer. + * + * @param[out] outPixels Receives the vm_malloc'd RGBA pixel buffer + * @param[out] outWidth Receives the image width + * @param[out] outHeight Receives the image height + * @return true on success, false on failure + */ + bool readbackFramebuffer(ubyte** outPixels, uint32_t* outWidth, uint32_t* outHeight); + + /** + * @brief Get the minimum uniform buffer offset alignment requirement + * @return The alignment in bytes (typically 64 or 256) + */ + uint32_t getMinUniformBufferOffsetAlignment() const; + + /** + * @brief Get the current frame number (total frames rendered) + */ + uint64_t getCurrentFrameNumber() const { return m_frameNumber; } + + /** + * @brief Wait for a specific frame's GPU work to complete + * + * Waits on that frame's fence rather than stalling the entire device. + * No-op if the frame has already completed. + */ + void waitForFrame(uint64_t frameNumber); + + /** + * @brief Wait for all GPU work to complete + */ + void waitIdle(); + + /** + * @brief Get the current command buffer as a raw Vulkan handle (for ImGui) + */ + VkCommandBuffer getVkCurrentCommandBuffer() const; + + /** + * @brief Check if VK_EXT_debug_utils is enabled + */ + bool isDebugUtilsEnabled() const { return m_debugUtilsEnabled; } + + /** + * @brief Get the maximum uniform buffer range + */ + uint32_t getMaxUniformBufferSize() const; + + /** + * @brief Get the maximum sampler anisotropy + */ + float getMaxAnisotropy() const; + + /** + * @brief Check if BC texture compression is supported + */ + bool isTextureCompressionBCSupported() const; + + /** + * @brief Check if vertex shader layer output is supported (for shadow cascades) + */ + bool supportsShaderViewportLayerOutput() const { return m_supportsShaderViewportLayerOutput; } + + /** + * @brief Switch from swap chain pass to HDR scene pass + * + * Called by vulkan_scene_texture_begin(). Ends the current swap chain + * render pass and begins the HDR scene render pass. + */ + void beginSceneRendering(); + + /** + * @brief Switch from HDR scene pass back to swap chain + * + * Called by vulkan_scene_texture_end(). Ends the HDR scene render pass, + * runs post-processing, and begins the resumed swap chain render pass. + */ + void endSceneRendering(); + + /** + * @brief Copy scene color to effect texture mid-scene + * + * Called by vulkan_copy_effect_texture(). Ends the current scene render + * pass, copies scene color → effect texture, then resumes the scene + * render pass with loadOp=eLoad to preserve existing content. + */ + void copyEffectTexture(); + + /** + * @brief Copy scene depth mid-scene for soft particle sampling + * + * Called lazily from the first particle draw per frame. Ends the current + * scene render pass, copies depth → samplable copy, then resumes the + * scene render pass with loadOp=eLoad. No-op if already copied this frame. + */ + void copySceneDepthForParticles(); + + /** + * @brief Check if scene depth copy is available for sampling this frame + */ + bool isSceneDepthCopied() const { return m_sceneDepthCopiedThisFrame; } + + /** + * @brief Check if we're currently rendering to the HDR scene target + */ + bool isSceneRendering() const { return m_sceneRendering; } + + /** + * @brief Set whether the G-buffer render pass is active + * + * Called by deferred_lighting_finish() to switch from G-buffer to + * scene render pass mid-frame for forward transparent rendering. + */ + void setUseGbufRenderPass(bool use) { m_useGbufRenderPass = use; } + bool isUsingGbufRenderPass() const { return m_useGbufRenderPass; } + + /** + * @brief Get the validated MSAA sample count for deferred lighting + */ + vk::SampleCountFlagBits getMsaaSampleCount() const { return m_msaaSampleCount; } + private: bool initDisplayDevice() const; bool initializeInstance(); @@ -61,24 +206,28 @@ class VulkanRenderer { bool createLogicalDevice(const PhysicalDeviceValues& deviceValues); - bool createSwapChain(const PhysicalDeviceValues& deviceValues); - - vk::UniqueShaderModule loadShader(const SCP_string& name); - - void createGraphicsPipeline(); + bool createSwapChain(const PhysicalDeviceValues& deviceValues, vk::SwapchainKHR oldSwapchain = nullptr); void createRenderPass(); void createFrameBuffers(); + void createDepthResources(); + + vk::Format findDepthFormat(); + void createCommandPool(const PhysicalDeviceValues& values); void createPresentSyncObjects(); - void drawScene(vk::Framebuffer destinationFb, vk::CommandBuffer cmdBuffer); - void acquireNextSwapChainImage(); + bool recreateSwapChain(); + + void createImGuiDescriptorPool(); + void initImGui(); + void shutdownImGui(); + std::unique_ptr m_graphicsOps; vk::UniqueInstance m_vkInstance; @@ -98,22 +247,73 @@ class VulkanRenderer { SCP_vector m_swapChainImages; SCP_vector m_swapChainImageViews; SCP_vector m_swapChainFramebuffers; - SCP_vector m_swapChainImageRenderImage; + SCP_vector m_swapChainImageRenderImage; uint32_t m_currentSwapChainImage = 0; + uint32_t m_previousSwapChainImage = UINT32_MAX; // For saveScreen() readback of previous frame + + // Depth buffer + vk::UniqueImage m_depthImage; + vk::UniqueImageView m_depthImageView; + VulkanAllocation m_depthImageMemory; + vk::Format m_depthFormat = vk::Format::eUndefined; - vk::UniqueRenderPass m_renderPass; - vk::UniquePipelineLayout m_pipelineLayout; - vk::UniquePipeline m_graphicsPipeline; + vk::UniqueRenderPass m_renderPass; // Swap chain pass with loadOp=eClear + vk::UniqueRenderPass m_renderPassLoad; // Swap chain pass with loadOp=eLoad (resumed after post-processing) + vk::UniqueDescriptorPool m_imguiDescriptorPool; uint32_t m_currentFrame = 0; - std::array, MAX_FRAMES_IN_FLIGHT> m_frames; + uint64_t m_frameNumber = 0; // Total frames rendered (for sync tracking) + std::array, MAX_FRAMES_IN_FLIGHT> m_frames; vk::UniqueCommandPool m_graphicsCommandPool; + // Current frame command buffer (valid between setupFrame and flip) + vk::CommandBuffer m_currentCommandBuffer; + SCP_vector m_currentCommandBuffers; // For cleanup + bool m_frameInProgress = false; + + // Swap chain recreation + bool m_swapChainNeedsRecreation = false; + + // Physical device info (needed for memory manager) + vk::PhysicalDevice m_physicalDevice; + uint32_t m_graphicsQueueFamilyIndex = 0; + uint32_t m_transferQueueFamilyIndex = 0; + uint32_t m_presentQueueFamilyIndex = 0; + + // Memory, buffer, and texture management + std::unique_ptr m_memoryManager; + std::unique_ptr m_bufferManager; + std::unique_ptr m_textureManager; + std::unique_ptr m_deletionQueue; + + // Shader, descriptor, and pipeline management + std::unique_ptr m_shaderManager; + std::unique_ptr m_descriptorManager; + std::unique_ptr m_pipelineManager; + + // State tracking and draw management + std::unique_ptr m_stateTracker; + std::unique_ptr m_drawManager; + + // Query management (GPU timestamp profiling) + std::unique_ptr m_queryManager; + + // Post-processing + std::unique_ptr m_postProcessor; + bool m_sceneRendering = false; + bool m_sceneDepthCopiedThisFrame = false; + bool m_useGbufRenderPass = false; // True when scene uses G-buffer (deferred lighting) + + bool m_supportsShaderViewportLayerOutput = false; // VK_EXT_shader_viewport_index_layer + vk::SampleCountFlagBits m_msaaSampleCount = vk::SampleCountFlagBits::e1; // Validated MSAA sample count + #if SDL_SUPPORTS_VULKAN bool m_debugReportEnabled = false; + bool m_debugUtilsEnabled = false; #endif + }; } // namespace vulkan diff --git a/code/graphics/vulkan/VulkanShader.cpp b/code/graphics/vulkan/VulkanShader.cpp new file mode 100644 index 00000000000..b91e3ff23c3 --- /dev/null +++ b/code/graphics/vulkan/VulkanShader.cpp @@ -0,0 +1,294 @@ +#include "VulkanShader.h" +#include "VulkanVertexFormat.h" + +#include "def_files/def_files.h" + +namespace graphics { +namespace vulkan { + +// Global shader manager pointer +static VulkanShaderManager* g_shaderManager = nullptr; + +VulkanShaderManager* getShaderManager() +{ + Assertion(g_shaderManager != nullptr, "Vulkan ShaderManager not initialized!"); + return g_shaderManager; +} + +void setShaderManager(VulkanShaderManager* manager) +{ + g_shaderManager = manager; +} + +// ========== gr_screen function pointer implementations ========== + +int vulkan_maybe_create_shader(shader_type shader_t, unsigned int flags) +{ + auto* shaderManager = getShaderManager(); + return shaderManager->maybeCreateShader(shader_t, flags); +} + +void vulkan_recompile_all_shaders(const std::function& progressCallback) +{ + auto* shaderManager = getShaderManager(); + shaderManager->recompileAllShaders(progressCallback); +} + +// Shader type definitions - maps shader_type to SPIR-V filenames +// Vertex input location bits +// Values reflect what actually survives SPIR-V dead-code elimination, not just what's +// declared in GLSL. Used to filter out fallback vertex attributes the shader doesn't consume. +static constexpr uint32_t VTX_POSITION = 1u << static_cast(VertexAttributeLocation::Position); +static constexpr uint32_t VTX_COLOR = 1u << static_cast(VertexAttributeLocation::Color); +static constexpr uint32_t VTX_TEXCOORD = 1u << static_cast(VertexAttributeLocation::TexCoord); +static constexpr uint32_t VTX_NORMAL = 1u << static_cast(VertexAttributeLocation::Normal); +static constexpr uint32_t VTX_TANGENT = 1u << static_cast(VertexAttributeLocation::Tangent); +static constexpr uint32_t VTX_MODELID = 1u << static_cast(VertexAttributeLocation::ModelId); +static constexpr uint32_t VTX_RADIUS = 1u << static_cast(VertexAttributeLocation::Radius); +static constexpr uint32_t VTX_UVEC = 1u << static_cast(VertexAttributeLocation::Uvec); +static constexpr uint32_t VTX_MATRIX = (15u << static_cast(VertexAttributeLocation::ModelMatrix)); // Four consecutive locations +static constexpr uint32_t VTX_NONE = 0; + +// Filenames match the compiled SPIR-V files: {basename}.{stage}.spv +const VulkanShaderTypeInfo VULKAN_SHADER_TYPES[] = { + { SDR_TYPE_MODEL, "main", "main", "Model rendering", VTX_POSITION | VTX_TEXCOORD | VTX_NORMAL | VTX_TANGENT | VTX_MODELID }, + { SDR_TYPE_EFFECT_PARTICLE, "effect", "effect", "Particle effects", VTX_POSITION | VTX_COLOR | VTX_TEXCOORD | VTX_RADIUS }, + { SDR_TYPE_EFFECT_DISTORTION, "effect-distort", "effect-distort", "Distortion effects", VTX_POSITION | VTX_COLOR | VTX_TEXCOORD | VTX_RADIUS }, + { SDR_TYPE_POST_PROCESS_MAIN, "postprocess", "post", "Post-processing main", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_BLUR, "postprocess", "blur", "Gaussian blur", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_BLOOM_COMP, "postprocess", "bloom-comp", "Bloom composition", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_BRIGHTPASS, "postprocess", "brightpass", "Bright pass filter", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_FXAA, "postprocess", "fxaa", "FXAA anti-aliasing", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_FXAA_PREPASS, "postprocess", "fxaapre", "FXAA luma prepass", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_LIGHTSHAFTS, "postprocess", "lightshafts", "Light shafts", VTX_NONE }, + { SDR_TYPE_POST_PROCESS_TONEMAPPING, "postprocess", "tonemapping", "Tonemapping", VTX_NONE }, + { SDR_TYPE_DEFERRED_LIGHTING, "deferred", "deferred", "Deferred lighting", VTX_POSITION }, + { SDR_TYPE_VIDEO_PROCESS, "video", "video", "Video playback", VTX_POSITION | VTX_TEXCOORD }, + { SDR_TYPE_PASSTHROUGH_RENDER, "passthrough", "passthrough", "Passthrough rendering", VTX_POSITION | VTX_TEXCOORD }, + { SDR_TYPE_SHIELD_DECAL, "shield-impact", "shield-impact", "Shield impact", VTX_POSITION | VTX_NORMAL }, + { SDR_TYPE_BATCHED_BITMAP, "batched", "batched", "Batched bitmaps", VTX_POSITION | VTX_COLOR | VTX_TEXCOORD }, + { SDR_TYPE_DEFAULT_MATERIAL, "default-material", "default-material", "Default material", VTX_POSITION | VTX_COLOR | VTX_TEXCOORD }, + { SDR_TYPE_NANOVG, "nanovg", "nanovg", "NanoVG UI", VTX_POSITION | VTX_TEXCOORD }, + { SDR_TYPE_DECAL, "decal", "decal", "Decals", VTX_POSITION | VTX_MATRIX }, + { SDR_TYPE_SCENE_FOG, "fog", "fog", "Scene fog", VTX_NONE }, + { SDR_TYPE_VOLUMETRIC_FOG, "volumetric-fog", "volumetric-fog", "Volumetric fog", VTX_NONE }, + { SDR_TYPE_ROCKET_UI, "rocketui", "rocketui", "Rocket UI", VTX_POSITION | VTX_COLOR | VTX_TEXCOORD }, + { SDR_TYPE_COPY, "copy", "copy", "Texture copy", VTX_NONE }, + { SDR_TYPE_MSAA_RESOLVE, "msaa-resolve", "msaa-resolve", "MSAA resolve", VTX_NONE }, + { SDR_TYPE_IRRADIANCE_MAP_GEN, "irradiance", "irradiance", "Irradiance map generation", VTX_NONE }, + { SDR_TYPE_SHADOW_MAP, "shadow", "shadow", "Shadow map generation", VTX_POSITION | VTX_MODELID }, +}; + +const size_t VULKAN_SHADER_TYPES_COUNT = sizeof(VULKAN_SHADER_TYPES) / sizeof(VULKAN_SHADER_TYPES[0]); + +bool VulkanShaderManager::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + m_initialized = true; + + mprintf(("VulkanShaderManager: Initialized\n")); + return true; +} + +void VulkanShaderManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Clear all shaders (unique_ptrs will clean up) + m_shaders.clear(); + m_shaderMap.clear(); + m_freeSlots.clear(); + + m_initialized = false; + mprintf(("VulkanShaderManager: Shutdown complete\n")); +} + +int VulkanShaderManager::maybeCreateShader(shader_type type, unsigned int /*flags*/) +{ + if (!m_initialized) { + return -1; + } + + // Flags are ignored — Vulkan uses pre-compiled SPIR-V with runtime UBO flags + int key = static_cast(type); + auto it = m_shaderMap.find(key); + if (it != m_shaderMap.end()) { + return static_cast(it->second); + } + + return loadShader(type); +} + +void VulkanShaderManager::recompileAllShaders(const std::function& progressCallback) +{ + if (!m_initialized) { + return; + } + + size_t total = m_shaders.size(); + size_t current = 0; + + for (auto& shader : m_shaders) { + if (shader.valid) { + // Re-load this shader + shader_type type = shader.type; + + // Release old modules + shader.vertexModule.reset(); + shader.fragmentModule.reset(); + shader.valid = false; + + const VulkanShaderTypeInfo* typeInfo = getShaderTypeInfo(type); + if (typeInfo) { + SCP_string vertFile = SCP_string(typeInfo->vertexFile) + ".vert"; + shader.vertexModule = loadSpirvModule(vertFile); + shader.vertexInputMask = typeInfo->vertexInputMask; + + SCP_string fragFile = SCP_string(typeInfo->fragmentFile) + ".frag"; + shader.fragmentModule = loadSpirvModule(fragFile); + + shader.valid = shader.vertexModule && shader.fragmentModule; + } + } + + ++current; + if (progressCallback) { + progressCallback(current, total); + } + } + + mprintf(("VulkanShaderManager: Recompiled %zu shaders\n", total)); +} + +const VulkanShaderModule* VulkanShaderManager::getShader(int handle) const +{ + if (handle < 0 || static_cast(handle) >= m_shaders.size()) { + return nullptr; + } + + const VulkanShaderModule& shader = m_shaders[handle]; + return shader.valid ? &shader : nullptr; +} + +const VulkanShaderModule* VulkanShaderManager::getShaderByType(shader_type type) const +{ + int key = static_cast(type); + auto it = m_shaderMap.find(key); + if (it == m_shaderMap.end()) { + return nullptr; + } + + return getShader(static_cast(it->second)); +} + +bool VulkanShaderManager::isShaderTypeSupported(shader_type type) const +{ + return getShaderTypeInfo(type) != nullptr; +} + +vk::UniqueShaderModule VulkanShaderManager::loadSpirvModule(const SCP_string& filename) +{ + // Try to load from def_files + SCP_string fullName = filename + ".spv"; + + const auto def_file = defaults_get_file(fullName.c_str()); + if (def_file.data == nullptr || def_file.size == 0) { + mprintf(("VulkanShaderManager: Could not load SPIR-V file: %s\n", fullName.c_str())); + return {}; + } + + // Validate SPIR-V magic number + if (def_file.size < 4) { + mprintf(("VulkanShaderManager: SPIR-V file too small: %s\n", fullName.c_str())); + return {}; + } + + const uint32_t* spirvData = static_cast(def_file.data); + if (spirvData[0] != 0x07230203) { + mprintf(("VulkanShaderManager: Invalid SPIR-V magic number in: %s\n", fullName.c_str())); + return {}; + } + + vk::ShaderModuleCreateInfo createInfo; + createInfo.codeSize = def_file.size; + createInfo.pCode = spirvData; + + try { + auto module = m_device.createShaderModuleUnique(createInfo); + mprintf(("VulkanShaderManager: Loaded SPIR-V: %s (size=%zu)\n", fullName.c_str(), def_file.size)); + return module; + } catch (const vk::SystemError& e) { + mprintf(("VulkanShaderManager: Failed to create shader module from %s: %s\n", + fullName.c_str(), e.what())); + return {}; + } +} + +int VulkanShaderManager::loadShader(shader_type type) +{ + const VulkanShaderTypeInfo* typeInfo = getShaderTypeInfo(type); + if (!typeInfo) { + mprintf(("VulkanShaderManager: Unknown shader type: %d\n", static_cast(type))); + return -1; + } + + VulkanShaderModule shader; + shader.type = type; + shader.description = typeInfo->description; + + // Load vertex shader + SCP_string vertFile = SCP_string(typeInfo->vertexFile) + ".vert"; + shader.vertexModule = loadSpirvModule(vertFile); + shader.vertexInputMask = typeInfo->vertexInputMask; + + // Load fragment shader + SCP_string fragFile = SCP_string(typeInfo->fragmentFile) + ".frag"; + shader.fragmentModule = loadSpirvModule(fragFile); + + // Check if essential modules loaded + shader.valid = shader.vertexModule && shader.fragmentModule; + + if (!shader.valid) { + mprintf(("VulkanShaderManager: Failed to load shader type %d\n", static_cast(type))); + } + + // Find or allocate slot + size_t index; + if (!m_freeSlots.empty()) { + index = m_freeSlots.back(); + m_freeSlots.pop_back(); + m_shaders[index] = std::move(shader); + } else { + index = m_shaders.size(); + m_shaders.push_back(std::move(shader)); + } + + // Add to lookup map + m_shaderMap[static_cast(type)] = index; + + if (m_shaders[index].valid) { + nprintf(("Vulkan", "VulkanShaderManager: Created shader %zu: %s\n", + index, typeInfo->description)); + } + + return static_cast(index); +} + +const VulkanShaderTypeInfo* VulkanShaderManager::getShaderTypeInfo(shader_type type) const +{ + for (size_t i = 0; i < VULKAN_SHADER_TYPES_COUNT; ++i) { + if (VULKAN_SHADER_TYPES[i].type == type) { + return &VULKAN_SHADER_TYPES[i]; + } + } + return nullptr; +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanShader.h b/code/graphics/vulkan/VulkanShader.h new file mode 100644 index 00000000000..79d0704a86c --- /dev/null +++ b/code/graphics/vulkan/VulkanShader.h @@ -0,0 +1,182 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" + +#include +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Holds SPIR-V shader modules for a single shader program + * + * Corresponds to an OpenGL shader program (vertex + fragment). + * Vulkan uses pre-compiled SPIR-V with no runtime variants — all conditional + * behavior is handled via UBO runtime flags (not compile-time defines). + */ +struct VulkanShaderModule { + vk::UniqueShaderModule vertexModule; + vk::UniqueShaderModule fragmentModule; + + shader_type type = SDR_TYPE_NONE; + + SCP_string description; + bool valid = false; + + // Bitmask of vertex input locations this shader declares (bit N = location N). + // Used at pipeline creation to filter out fallback attributes the shader + // doesn't consume. Copied from VulkanShaderTypeInfo at load time. + uint32_t vertexInputMask = 0; +}; + +/** + * @brief Shader type definition - maps shader type to SPIR-V filenames + * + * Based on opengl_shader_type_t from gropenglshader.h + */ +struct VulkanShaderTypeInfo { + shader_type type; + const char* vertexFile; // Vertex shader SPIR-V filename (without .spv) + const char* fragmentFile; // Fragment shader SPIR-V filename + const char* description; + uint32_t vertexInputMask; // Bitmask of vertex input locations (bit N = location N) +}; + +/** + * @brief Manages Vulkan shader modules (SPIR-V loading and caching) + * + * Provides the implementation for gr_screen.gf_maybe_create_shader and + * gr_screen.gf_recompile_all_shaders function pointers. + * + * Unlike OpenGL, Vulkan shaders are pre-compiled to SPIR-V with no + * runtime variant support. The flags parameter in maybeCreateShader is + * accepted for API compatibility but ignored — all conditional behavior + * is handled via UBO runtime flags in the shader code. + */ +class VulkanShaderManager { +public: + VulkanShaderManager() = default; + ~VulkanShaderManager() = default; + + // Non-copyable + VulkanShaderManager(const VulkanShaderManager&) = delete; + VulkanShaderManager& operator=(const VulkanShaderManager&) = delete; + + /** + * @brief Initialize the shader manager + * @param device Vulkan logical device + * @return true on success + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release all shader modules + */ + void shutdown(); + + /** + * @brief Get or create a shader program + * + * Implements gr_screen.gf_maybe_create_shader. + * The flags parameter is ignored — Vulkan uses pre-compiled SPIR-V + * with runtime UBO flags instead of compile-time variants. + * + * @param type Shader type + * @param flags Ignored (accepted for API compatibility) + * @return Shader handle (index), or -1 on failure + */ + int maybeCreateShader(shader_type type, unsigned int flags); + + /** + * @brief Recompile all loaded shaders + * + * Implements gr_screen.gf_recompile_all_shaders + * + * @param progressCallback Called with (current, total) progress + */ + void recompileAllShaders(const std::function& progressCallback); + + /** + * @brief Get a shader by handle + * @param handle Shader handle from maybeCreateShader + * @return Pointer to shader module, or nullptr if invalid + */ + const VulkanShaderModule* getShader(int handle) const; + + /** + * @brief Get a shader by handle (alias for getShader) + */ + const VulkanShaderModule* getShaderByHandle(int handle) const { return getShader(handle); } + + /** + * @brief Get a shader by type + * @param type Shader type + * @return Pointer to shader module, or nullptr if not found + */ + const VulkanShaderModule* getShaderByType(shader_type type) const; + + /** + * @brief Get total number of loaded shaders + */ + size_t getShaderCount() const { return m_shaders.size(); } + + /** + * @brief Check if a shader type is supported + * @param type Shader type to check + * @return true if the shader type has SPIR-V files defined + */ + bool isShaderTypeSupported(shader_type type) const; + +private: + /** + * @brief Load a SPIR-V shader module from embedded files + * @param filename Base filename (e.g., "model.vert") + * @return Shader module, or empty unique_ptr on failure + */ + vk::UniqueShaderModule loadSpirvModule(const SCP_string& filename); + + /** + * @brief Load a shader for the given type + * @param type Shader type + * @return Index of new shader, or -1 on failure + */ + int loadShader(shader_type type); + + /** + * @brief Get shader type info for a shader type + * @param type Shader type + * @return Pointer to type info, or nullptr if not found + */ + const VulkanShaderTypeInfo* getShaderTypeInfo(shader_type type) const; + + vk::Device m_device; + + // Shader lookup: type -> index in m_shaders + SCP_unordered_map m_shaderMap; + + // All loaded shaders + SCP_vector m_shaders; + + // Free list for shader slot reuse + SCP_vector m_freeSlots; + + bool m_initialized = false; +}; + +// Global shader type definitions +extern const VulkanShaderTypeInfo VULKAN_SHADER_TYPES[]; +extern const size_t VULKAN_SHADER_TYPES_COUNT; + +// Global shader manager access +VulkanShaderManager* getShaderManager(); +void setShaderManager(VulkanShaderManager* manager); + +// ========== gr_screen function pointer implementations ========== + +int vulkan_maybe_create_shader(shader_type shader_t, unsigned int flags); +void vulkan_recompile_all_shaders(const std::function& progressCallback); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanState.cpp b/code/graphics/vulkan/VulkanState.cpp new file mode 100644 index 00000000000..744df9f482e --- /dev/null +++ b/code/graphics/vulkan/VulkanState.cpp @@ -0,0 +1,349 @@ +#include "VulkanState.h" +#include "VulkanDraw.h" + +namespace graphics { +namespace vulkan { + +// Global state tracker pointer +static VulkanStateTracker* g_stateTracker = nullptr; + +VulkanStateTracker* getStateTracker() +{ + Assertion(g_stateTracker != nullptr, "Vulkan StateTracker not initialized!"); + return g_stateTracker; +} + +void setStateTracker(VulkanStateTracker* tracker) +{ + g_stateTracker = tracker; +} + +bool VulkanStateTracker::init(vk::Device device) +{ + if (m_initialized) { + return true; + } + + m_device = device; + + // Initialize default viewport + m_viewport.x = 0.0f; + m_viewport.y = 0.0f; + m_viewport.width = static_cast(gr_screen.max_w); + m_viewport.height = static_cast(gr_screen.max_h); + m_viewport.minDepth = 0.0f; + m_viewport.maxDepth = 1.0f; + + // Initialize default scissor + m_scissor.offset.x = 0; + m_scissor.offset.y = 0; + m_scissor.extent.width = gr_screen.max_w; + m_scissor.extent.height = gr_screen.max_h; + + // Initialize clear color (dark blue for debugging - shows clears are working) + m_clearColor.float32[0] = 0.0f; + m_clearColor.float32[1] = 0.0f; + m_clearColor.float32[2] = 0.3f; + m_clearColor.float32[3] = 1.0f; + + m_initialized = true; + mprintf(("VulkanStateTracker: Initialized\n")); + return true; +} + +void VulkanStateTracker::shutdown() +{ + if (!m_initialized) { + return; + } + + m_cmdBuffer = nullptr; + m_currentPipeline = nullptr; + m_currentRenderPass = nullptr; + + m_initialized = false; + mprintf(("VulkanStateTracker: Shutdown complete\n")); +} + +void VulkanStateTracker::beginFrame(vk::CommandBuffer cmdBuffer) +{ + mprintf(("VulkanStateTracker::beginFrame - cmdBuffer=%p\n", + static_cast(static_cast(cmdBuffer)))); + + m_cmdBuffer = cmdBuffer; + + // Reset state for new frame + m_currentPipeline = nullptr; + m_currentRenderPass = nullptr; + + for (auto& set : m_boundDescriptorSets) { + set = nullptr; + } + + // Mark all dynamic state as dirty + m_viewportDirty = true; + m_scissorDirty = true; + m_depthBiasDirty = true; + m_stencilRefDirty = true; + m_lineWidthDirty = true; +} + +void VulkanStateTracker::endFrame() +{ + mprintf(("VulkanStateTracker::endFrame - clearing cmdBuffer (was %p)\n", + static_cast(static_cast(m_cmdBuffer)))); + m_cmdBuffer = nullptr; +} + +void VulkanStateTracker::setRenderPass(vk::RenderPass renderPass, uint32_t subpass) +{ + m_currentRenderPass = renderPass; + m_currentSubpass = subpass; + + // Pipeline needs to be rebound when render pass changes + m_currentPipeline = nullptr; + + // Dynamic state must be re-applied after a render pass change. + // Vulkan doesn't preserve dynamic state across render pass instances, + // and mid-frame render passes (e.g. light accumulation) may have set + // different viewport/scissor values directly on the command buffer. + m_viewportDirty = true; + m_scissorDirty = true; +} + +void VulkanStateTracker::setViewport(float x, float y, float width, float height, float minDepth, float maxDepth) +{ + if (m_viewport.x != x || m_viewport.y != y || + m_viewport.width != width || m_viewport.height != height || + m_viewport.minDepth != minDepth || m_viewport.maxDepth != maxDepth) { + m_viewport.x = x; + m_viewport.y = y; + m_viewport.width = width; + m_viewport.height = height; + m_viewport.minDepth = minDepth; + m_viewport.maxDepth = maxDepth; + m_viewportDirty = true; + + // When scissor is disabled, applyDynamicState derives the scissor rect + // from the viewport dimensions. So a viewport change invalidates that + // computed scissor and must trigger a re-flush. + if (!m_scissorEnabled) { + m_scissorDirty = true; + } + } +} + +void VulkanStateTracker::setScissor(int32_t x, int32_t y, uint32_t width, uint32_t height) +{ + if (m_scissor.offset.x != x || m_scissor.offset.y != y || + m_scissor.extent.width != width || m_scissor.extent.height != height) { + m_scissor.offset.x = x; + m_scissor.offset.y = y; + m_scissor.extent.width = width; + m_scissor.extent.height = height; + m_scissorDirty = true; + } +} + +void VulkanStateTracker::setScissorEnabled(bool enabled) +{ + if (m_scissorEnabled != enabled) { + m_scissorEnabled = enabled; + m_scissorDirty = true; + } +} + +void VulkanStateTracker::setDepthBias(float constantFactor, float slopeFactor) +{ + if (m_depthBiasConstant != constantFactor || m_depthBiasSlope != slopeFactor) { + m_depthBiasConstant = constantFactor; + m_depthBiasSlope = slopeFactor; + m_depthBiasDirty = true; + } +} + +void VulkanStateTracker::setStencilReference(uint32_t reference) +{ + if (m_stencilReference != reference) { + m_stencilReference = reference; + m_stencilRefDirty = true; + } +} + +void VulkanStateTracker::setLineWidth(float width) +{ + if (m_lineWidth != width) { + m_lineWidth = width; + m_lineWidthDirty = true; + } +} + +void VulkanStateTracker::bindPipeline(vk::Pipeline pipeline, vk::PipelineLayout layout) +{ + if (m_currentPipeline != pipeline && pipeline && m_cmdBuffer) { + m_cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + m_currentPipeline = pipeline; + m_currentPipelineLayout = layout; + + // After binding new pipeline, need to re-apply dynamic state + applyDynamicState(); + + // Clear bound descriptor sets since they need to be rebound with new layout + for (auto& set : m_boundDescriptorSets) { + set = nullptr; + } + } +} + +void VulkanStateTracker::bindDescriptorSet(DescriptorSetIndex setIndex, vk::DescriptorSet set, + const SCP_vector& dynamicOffsets) +{ + Assertion(m_cmdBuffer, "bindDescriptorSet called without active command buffer!"); + Assertion(m_currentPipelineLayout, "bindDescriptorSet called without bound pipeline layout!"); + Assertion(set, "bindDescriptorSet called with null descriptor set!"); + + uint32_t index = static_cast(setIndex); + + if (m_boundDescriptorSets[index] != set) { + m_cmdBuffer.bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, + m_currentPipelineLayout, + index, + 1, &set, + static_cast(dynamicOffsets.size()), + dynamicOffsets.empty() ? nullptr : dynamicOffsets.data()); + + m_boundDescriptorSets[index] = set; + } +} + +void VulkanStateTracker::bindVertexBuffer(uint32_t binding, vk::Buffer buffer, vk::DeviceSize offset) +{ + Assertion(m_cmdBuffer, "bindVertexBuffer called without active command buffer!"); + Assertion(buffer, "bindVertexBuffer called with null buffer!"); + m_cmdBuffer.bindVertexBuffers(binding, 1, &buffer, &offset); +} + +void VulkanStateTracker::bindIndexBuffer(vk::Buffer buffer, vk::DeviceSize offset, vk::IndexType indexType) +{ + Assertion(m_cmdBuffer, "bindIndexBuffer called without active command buffer!"); + Assertion(buffer, "bindIndexBuffer called with null buffer!"); + m_cmdBuffer.bindIndexBuffer(buffer, offset, indexType); +} + +void VulkanStateTracker::setClearColor(float r, float g, float b, float a) +{ + m_clearColor.float32[0] = r; + m_clearColor.float32[1] = g; + m_clearColor.float32[2] = b; + m_clearColor.float32[3] = a; +} + +void VulkanStateTracker::applyDynamicState() +{ + Assertion(m_cmdBuffer, "applyDynamicState called without active command buffer!"); + + if (m_viewportDirty) { + m_cmdBuffer.setViewport(0, 1, &m_viewport); + m_viewportDirty = false; + } + + if (m_scissorDirty) { + if (m_scissorEnabled) { + m_cmdBuffer.setScissor(0, 1, &m_scissor); + } else { + // Set scissor to full viewport when disabled. + // Handle negative viewport height (VK_KHR_maintenance1 Y-flip): + // when height < 0, the viewport covers [y+height, y] in framebuffer Y. + vk::Rect2D fullScissor; + float vy = m_viewport.y; + float vh = m_viewport.height; + if (vh < 0.0f) { + vy = vy + vh; + vh = -vh; + } + fullScissor.offset.x = static_cast(m_viewport.x); + fullScissor.offset.y = static_cast(vy); + fullScissor.extent.width = static_cast(m_viewport.width); + fullScissor.extent.height = static_cast(vh); + m_cmdBuffer.setScissor(0, 1, &fullScissor); + } + m_scissorDirty = false; + } + + if (m_depthBiasDirty) { + m_cmdBuffer.setDepthBias(m_depthBiasConstant, 0.0f, m_depthBiasSlope); + m_depthBiasDirty = false; + } + + if (m_stencilRefDirty) { + m_cmdBuffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, m_stencilReference); + m_stencilRefDirty = false; + } + + if (m_lineWidthDirty) { + m_cmdBuffer.setLineWidth(m_lineWidth); + m_lineWidthDirty = false; + } +} + +} // namespace vulkan +} // namespace graphics + +// GL_alpha_threshold is defined in gropengl.cpp +extern float GL_alpha_threshold; + +namespace graphics { +namespace vulkan { + +// ========== gr_screen function pointer implementations ========== + +void vulkan_zbias(int bias) +{ + auto* stateTracker = getStateTracker(); + auto* drawManager = getDrawManager(); + + if (bias) { + drawManager->setDepthBiasEnabled(true); + if (bias < 0) { + stateTracker->setDepthBias(1.0f, static_cast(-bias)); + } else { + stateTracker->setDepthBias(0.0f, static_cast(-bias)); + } + } else { + drawManager->setDepthBiasEnabled(false); + stateTracker->setDepthBias(0.0f, 0.0f); + } +} + +int vulkan_alpha_mask_set(int mode, float alpha) +{ + if (mode) { + GL_alpha_threshold = alpha; + } else { + GL_alpha_threshold = 0.0f; + } + return mode; +} + +void vulkan_set_viewport(int x, int y, int width, int height) +{ + auto* stateTracker = getStateTracker(); + if (gr_screen.rendering_to_texture == -1) { + // Screen rendering: use negative viewport height for OpenGL-compatible Y-up NDC + // (VK_KHR_maintenance1, core since Vulkan 1.1) + stateTracker->setViewport( + static_cast(x), + static_cast(gr_screen.max_h - y), + static_cast(width), + static_cast(-height)); + } else { + // RTT: standard positive viewport (RTT projection matrix handles Y-flip) + stateTracker->setViewport( + static_cast(x), static_cast(y), + static_cast(width), static_cast(height)); + } +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanState.h b/code/graphics/vulkan/VulkanState.h new file mode 100644 index 00000000000..b7a85e7fad5 --- /dev/null +++ b/code/graphics/vulkan/VulkanState.h @@ -0,0 +1,257 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" +#include "graphics/material.h" + +#include "VulkanPipeline.h" +#include "VulkanDescriptorManager.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Tracks current Vulkan render state + * + * Unlike OpenGL where state is set globally, Vulkan requires explicit + * command buffer recording. This class tracks what state has been set + * and what needs to be updated before draw calls. + */ +class VulkanStateTracker { +public: + VulkanStateTracker() = default; + ~VulkanStateTracker() = default; + + // Non-copyable + VulkanStateTracker(const VulkanStateTracker&) = delete; + VulkanStateTracker& operator=(const VulkanStateTracker&) = delete; + + /** + * @brief Initialize state tracker + */ + bool init(vk::Device device); + + /** + * @brief Shutdown and release resources + */ + void shutdown(); + + /** + * @brief Begin recording for a new frame + * @param cmdBuffer Command buffer to record to + */ + void beginFrame(vk::CommandBuffer cmdBuffer); + + /** + * @brief End frame recording + */ + void endFrame(); + + /** + * @brief Set the current render pass + */ + void setRenderPass(vk::RenderPass renderPass, uint32_t subpass = 0); + + /** + * @brief Get current render pass + */ + vk::RenderPass getCurrentRenderPass() const { return m_currentRenderPass; } + + // ========== Dynamic State ========== + + /** + * @brief Set viewport (dynamic state) + */ + void setViewport(float x, float y, float width, float height, float minDepth = 0.0f, float maxDepth = 1.0f); + + /** + * @brief Set scissor rectangle (dynamic state) + */ + void setScissor(int32_t x, int32_t y, uint32_t width, uint32_t height); + + /** + * @brief Enable or disable scissor test + */ + void setScissorEnabled(bool enabled); + + /** + * @brief Set depth bias (dynamic state) + */ + void setDepthBias(float constantFactor, float slopeFactor); + + /** + * @brief Set stencil reference value (dynamic state) + */ + void setStencilReference(uint32_t reference); + + /** + * @brief Set line width (dynamic state) + */ + void setLineWidth(float width); + + // ========== Pipeline State ========== + + /** + * @brief Bind a pipeline + */ + void bindPipeline(vk::Pipeline pipeline, vk::PipelineLayout layout); + + /** + * @brief Get currently bound pipeline + */ + vk::Pipeline getCurrentPipeline() const { return m_currentPipeline; } + + /** + * @brief Get current pipeline layout + */ + vk::PipelineLayout getCurrentPipelineLayout() const { return m_currentPipelineLayout; } + + // ========== Descriptor State ========== + + /** + * @brief Bind descriptor set + */ + void bindDescriptorSet(DescriptorSetIndex setIndex, vk::DescriptorSet set, + const SCP_vector& dynamicOffsets = {}); + + // ========== Buffer Binding ========== + + /** + * @brief Bind vertex buffer + */ + void bindVertexBuffer(uint32_t binding, vk::Buffer buffer, vk::DeviceSize offset = 0); + + /** + * @brief Bind index buffer + */ + void bindIndexBuffer(vk::Buffer buffer, vk::DeviceSize offset, vk::IndexType indexType); + + // ========== State Queries ========== + + /** + * @brief Get current command buffer. + * Asserts if no command buffer is active — rendering outside a frame is always a bug. + */ + vk::CommandBuffer getCommandBuffer() const { + Assertion(m_cmdBuffer, "No active command buffer — rendering outside a frame?"); + return m_cmdBuffer; + } + + /** + * @brief Check if scissor test is enabled + */ + bool isScissorEnabled() const { return m_scissorEnabled; } + + // ========== Clear Operations ========== + + /** + * @brief Set clear color for next clear operation + */ + void setClearColor(float r, float g, float b, float a); + + /** + * @brief Get current clear color + */ + const vk::ClearColorValue& getClearColor() const { return m_clearColor; } + + // ========== Render State Tracking ========== + + /** + * @brief Set current zbuffer mode (for tracking) + */ + void setZBufferMode(gr_zbuffer_type mode) { m_zbufferMode = mode; } + gr_zbuffer_type getZBufferMode() const { return m_zbufferMode; } + + /** + * @brief Set current stencil mode (for tracking) + */ + void setStencilMode(int mode) { m_stencilMode = mode; } + int getStencilMode() const { return m_stencilMode; } + + /** + * @brief Set current cull mode (for tracking) + */ + void setCullMode(bool enabled) { m_cullEnabled = enabled; } + bool getCullMode() const { return m_cullEnabled; } + + /** + * @brief Set color attachment count for current render pass + */ + void setColorAttachmentCount(uint32_t count) { m_colorAttachmentCount = count; } + uint32_t getColorAttachmentCount() const { return m_colorAttachmentCount; } + + /** + * @brief Set current MSAA sample count for pipeline creation + */ + void setCurrentSampleCount(vk::SampleCountFlagBits count) { m_currentSampleCount = count; } + vk::SampleCountFlagBits getCurrentSampleCount() const { return m_currentSampleCount; } + + /** + * @brief Apply pending dynamic state to command buffer + * + * Must be called before every draw command to ensure dirty dynamic state + * (viewport, scissor, depth bias, stencil ref, line width) is flushed. + * applyMaterial() sets depth bias/stencil AFTER bindPipeline(), so if + * the pipeline didn't change, those changes would be lost without this. + */ + void applyDynamicState(); + +private: + + vk::Device m_device; + vk::CommandBuffer m_cmdBuffer; + + // Current render pass state + vk::RenderPass m_currentRenderPass; + uint32_t m_currentSubpass = 0; + + // Current pipeline state + vk::Pipeline m_currentPipeline; + vk::PipelineLayout m_currentPipelineLayout; + + // Descriptor sets + std::array(DescriptorSetIndex::Count)> m_boundDescriptorSets; + + // Dynamic state + vk::Viewport m_viewport; + vk::Rect2D m_scissor; + bool m_scissorEnabled = false; + float m_depthBiasConstant = 0.0f; + float m_depthBiasSlope = 0.0f; + uint32_t m_stencilReference = 0; + float m_lineWidth = 1.0f; + + // Dirty flags for dynamic state + bool m_viewportDirty = true; + bool m_scissorDirty = true; + bool m_depthBiasDirty = false; + bool m_stencilRefDirty = false; + bool m_lineWidthDirty = false; + + // Clear values + vk::ClearColorValue m_clearColor; + + // Render state tracking (for FSO compatibility) + gr_zbuffer_type m_zbufferMode = ZBUFFER_TYPE_NONE; + int m_stencilMode = 0; + bool m_cullEnabled = true; + uint32_t m_colorAttachmentCount = 1; + vk::SampleCountFlagBits m_currentSampleCount = vk::SampleCountFlagBits::e1; + + bool m_initialized = false; +}; + +// Global state tracker access +VulkanStateTracker* getStateTracker(); +void setStateTracker(VulkanStateTracker* tracker); + +// ========== gr_screen function pointer implementations ========== + +void vulkan_zbias(int bias); +int vulkan_alpha_mask_set(int mode, float alpha); +void vulkan_set_viewport(int x, int y, int width, int height); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanTexture.cpp b/code/graphics/vulkan/VulkanTexture.cpp new file mode 100644 index 00000000000..b3624260eda --- /dev/null +++ b/code/graphics/vulkan/VulkanTexture.cpp @@ -0,0 +1,2434 @@ +#include "VulkanTexture.h" +#include "VulkanBuffer.h" +#include "VulkanDeletionQueue.h" + +#include "bmpman/bmpman.h" +#include "ddsutils/ddsutils.h" +#include "globalincs/systemvars.h" + +namespace graphics { +namespace vulkan { + +namespace { +VulkanTextureManager* g_textureManager = nullptr; +} + +VulkanTextureManager* getTextureManager() +{ + Assertion(g_textureManager != nullptr, "Vulkan TextureManager not initialized!"); + return g_textureManager; +} + +void setTextureManager(VulkanTextureManager* manager) +{ + g_textureManager = manager; +} + +// tcache_slot_vulkan implementation + +void tcache_slot_vulkan::reset() +{ + image = nullptr; + imageView = nullptr; + allocation = VulkanAllocation(); + format = vk::Format::eUndefined; + currentLayout = vk::ImageLayout::eUndefined; + width = 0; + height = 0; + mipLevels = 1; + arrayLayers = 1; + bpp = 0; + bitmapHandle = -1; + arrayIndex = 0; + used = false; + framebuffer = nullptr; + framebufferView = nullptr; + renderPass = nullptr; + isRenderTarget = false; + is3D = false; + depth = 1; + isCubemap = false; + for (auto& v : cubeFaceViews) v = nullptr; + for (auto& fb : cubeFaceFramebuffers) fb = nullptr; + cubeImageView = nullptr; + uScale = 1.0f; + vScale = 1.0f; +} + +// VulkanTextureManager implementation + +VulkanTextureManager::VulkanTextureManager() = default; + +VulkanTextureManager::~VulkanTextureManager() +{ + if (m_initialized) { + shutdown(); + } +} + +bool VulkanTextureManager::init(vk::Device device, vk::PhysicalDevice physicalDevice, + VulkanMemoryManager* memoryManager, + vk::CommandPool commandPool, vk::Queue graphicsQueue) +{ + if (m_initialized) { + mprintf(("VulkanTextureManager::init called when already initialized!\n")); + return false; + } + + m_device = device; + m_physicalDevice = physicalDevice; + m_memoryManager = memoryManager; + m_commandPool = commandPool; + m_graphicsQueue = graphicsQueue; + + // Query device limits + auto properties = physicalDevice.getProperties(); + m_maxTextureSize = properties.limits.maxImageDimension2D; + m_maxAnisotropy = properties.limits.maxSamplerAnisotropy; + + mprintf(("Vulkan Texture Manager initialized\n")); + mprintf((" Max texture size: %u\n", m_maxTextureSize)); + mprintf((" Max anisotropy: %.1f\n", m_maxAnisotropy)); + + // Create default sampler + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = vk::Filter::eLinear; + samplerInfo.minFilter = vk::Filter::eLinear; + // Use ClampToEdge by default to match OpenGL's behavior for UI/interface textures. + // OpenGL creates all textures with GL_CLAMP_TO_EDGE and only switches to GL_REPEAT + // for 3D model textures at bind time (excluding AABITMAP, INTERFACE, CUBEMAP types). + // Using eRepeat here causes visible 1-pixel seams on UI bitmaps where edge texels + // blend with the opposite edge via linear filtering. + samplerInfo.addressModeU = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeV = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.addressModeW = vk::SamplerAddressMode::eClampToEdge; + samplerInfo.anisotropyEnable = (m_maxAnisotropy > 1.0f); + samplerInfo.maxAnisotropy = m_maxAnisotropy; + samplerInfo.borderColor = vk::BorderColor::eIntOpaqueBlack; + samplerInfo.unnormalizedCoordinates = false; + samplerInfo.compareEnable = false; + samplerInfo.compareOp = vk::CompareOp::eAlways; + samplerInfo.mipmapMode = vk::SamplerMipmapMode::eLinear; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = VK_LOD_CLAMP_NONE; + + try { + m_defaultSampler = m_device.createSampler(samplerInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create default sampler: %s\n", e.what())); + return false; + } + + // Create 1x1 white fallback textures for unbound descriptor slots + if (!createFallbackTexture(m_fallback2DArrayTexture, m_fallback2DArrayAllocation, + m_fallback2DArrayView, ImageViewType::Array2D)) { + return false; + } + if (!createFallbackTexture(m_fallbackTexture2D, m_fallbackTexture2DAllocation, + m_fallbackTextureView2D, ImageViewType::Plain2D)) { + return false; + } + if (!createFallbackTexture(m_fallbackCubeTexture, m_fallbackCubeAllocation, + m_fallbackCubeView, ImageViewType::Cube, 6, true)) { + return false; + } + if (!createFallbackTexture(m_fallback3DTexture, m_fallback3DAllocation, + m_fallback3DView, ImageViewType::Volume3D, 1, false, vk::ImageType::e3D)) { + return false; + } + + m_initialized = true; + return true; +} + +void VulkanTextureManager::shutdown() +{ + if (!m_initialized) { + return; + } + + // Destroy fallback 3D texture + if (m_fallback3DView) { + m_device.destroyImageView(m_fallback3DView); + m_fallback3DView = nullptr; + } + if (m_fallback3DTexture) { + m_device.destroyImage(m_fallback3DTexture); + m_fallback3DTexture = nullptr; + } + if (m_fallback3DAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallback3DAllocation); + } + + // Destroy fallback cubemap + if (m_fallbackCubeView) { + m_device.destroyImageView(m_fallbackCubeView); + m_fallbackCubeView = nullptr; + } + if (m_fallbackCubeTexture) { + m_device.destroyImage(m_fallbackCubeTexture); + m_fallbackCubeTexture = nullptr; + } + if (m_fallbackCubeAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallbackCubeAllocation); + } + + // Destroy fallback textures + if (m_fallbackTextureView2D) { + m_device.destroyImageView(m_fallbackTextureView2D); + m_fallbackTextureView2D = nullptr; + } + if (m_fallbackTexture2D) { + m_device.destroyImage(m_fallbackTexture2D); + m_fallbackTexture2D = nullptr; + } + if (m_fallbackTexture2DAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallbackTexture2DAllocation); + } + if (m_fallback2DArrayView) { + m_device.destroyImageView(m_fallback2DArrayView); + m_fallback2DArrayView = nullptr; + } + if (m_fallback2DArrayTexture) { + m_device.destroyImage(m_fallback2DArrayTexture); + m_fallback2DArrayTexture = nullptr; + } + if (m_fallback2DArrayAllocation.memory != VK_NULL_HANDLE) { + m_memoryManager->freeAllocation(m_fallback2DArrayAllocation); + } + + // Destroy samplers + if (m_defaultSampler) { + m_device.destroySampler(m_defaultSampler); + m_defaultSampler = nullptr; + } + + for (auto& pair : m_samplerCache) { + m_device.destroySampler(pair.second); + } + m_samplerCache.clear(); + + m_initialized = false; + mprintf(("Vulkan Texture Manager shutdown\n")); +} + +void VulkanTextureManager::bm_init(bitmap_slot* slot) +{ + if (!m_initialized || !slot) { + return; + } + + // Allocate Vulkan-specific data + if (slot->gr_info == nullptr) { + slot->gr_info = new tcache_slot_vulkan(); + } else { + static_cast(slot->gr_info)->reset(); + } +} + +void VulkanTextureManager::bm_create(bitmap_slot* slot) +{ + if (!m_initialized || !slot) { + return; + } + + // Ensure gr_info is allocated + if (slot->gr_info == nullptr) { + slot->gr_info = new tcache_slot_vulkan(); + } +} + +void VulkanTextureManager::bm_free_data(bitmap_slot* slot, bool release) +{ + if (!m_initialized || !slot || !slot->gr_info) { + return; + } + + auto* ts = static_cast(slot->gr_info); + auto* deletionQueue = getDeletionQueue(); + + // For shared animation texture arrays: check if any other frame still needs the image. + // We compute base frame from slot data (bitmapHandle - arrayIndex) rather than calling + // bm_get_base_frame(), because during shutdown/mission-unload the bitmap entries may + // already be cleaned up, causing bm_get_base_frame() to return -1. That would skip + // ref-counting and every frame slot would independently queue the same shared resources + // for destruction (double-free). + if (ts->arrayLayers > 1 && ts->bitmapHandle >= 0) { + ts->used = false; + + int baseFrame = ts->bitmapHandle - static_cast(ts->arrayIndex); + int numFrames = static_cast(ts->arrayLayers); + vk::Image sharedImage = ts->image; + + bool anyInUse = false; + for (int f = baseFrame; f < baseFrame + numFrames; f++) { + if (f == ts->bitmapHandle) { + continue; // skip self (already marked unused) + } + auto* fSlot = bm_get_slot(f, true); + if (fSlot && fSlot->gr_info) { + auto* fTs = static_cast(fSlot->gr_info); + if (fTs->used && fTs->image == sharedImage) { + anyInUse = true; + break; + } + } + } + if (anyInUse) { + // Other frames still use the shared image — just detach this slot + ts->image = nullptr; + ts->imageView = nullptr; + ts->allocation = VulkanAllocation{}; + ts->reset(); + if (release) { + delete ts; + slot->gr_info = nullptr; + } + return; + } + // No frames in use — fall through to destroy the shared image + } + + // Queue resources for deferred destruction to avoid destroying + // resources that may still be referenced by in-flight command buffers + + // Cubemap per-face framebuffers and views (must be before ts->framebuffer + // since framebuffer may alias cubeFaceFramebuffers[0]) + for (auto& fb : ts->cubeFaceFramebuffers) { + if (fb) { + deletionQueue->queueFramebuffer(fb); + fb = nullptr; + } + } + for (auto& v : ts->cubeFaceViews) { + if (v) { + deletionQueue->queueImageView(v); + v = nullptr; + } + } + if (ts->cubeImageView) { + deletionQueue->queueImageView(ts->cubeImageView); + ts->cubeImageView = nullptr; + } + // If framebuffer was aliased to cubeFaceFramebuffers[0], it's already cleaned up + if (ts->isCubemap) { + ts->framebuffer = nullptr; + } + + if (ts->framebuffer) { + deletionQueue->queueFramebuffer(ts->framebuffer); + ts->framebuffer = nullptr; + } + + if (ts->renderPass) { + deletionQueue->queueRenderPass(ts->renderPass); + ts->renderPass = nullptr; + } + + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + + if (ts->framebufferView) { + deletionQueue->queueImageView(ts->framebufferView); + ts->framebufferView = nullptr; + } + + if (ts->image) { + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; // Clear to prevent double-free + } + + ts->reset(); + + if (release) { + delete ts; + slot->gr_info = nullptr; + } +} + +bool VulkanTextureManager::uploadAnimationFrames(int handle, bitmap* bm, int compType, + int baseFrame, int numFrames) +{ + mprintf(("VulkanTexture: Uploading animation array: base=%d numFrames=%d triggered by handle=%d\n", + baseFrame, numFrames, handle)); + + // Get dimensions and format from the triggering frame's bitmap + uint32_t width = static_cast(bm->w); + uint32_t height = static_cast(bm->h); + uint32_t arrayLayerCount = static_cast(numFrames); + + bool isCompressed = (compType == DDS_DXT1 || compType == DDS_DXT3 || + compType == DDS_DXT5 || compType == DDS_BC7); + + // Determine format + vk::Format format; + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, compType); + } else { + format = bppToVkFormat(bm->bpp); + } + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTexture: uploadAnimationFrames: unsupported format bpp=%d compType=%d\n", + bm->bpp, compType)); + return false; + } + + // Calculate per-layer data size + size_t blockSize = 0; + size_t layerDataSize = 0; + uint32_t mipLevels = 1; + + if (isCompressed) { + blockSize = dds_block_size(compType); + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + if (mipLevels < 1) { + mipLevels = 1; + } + + // Calculate total data size per layer (all mips) + uint32_t mipW = width; + uint32_t mipH = height; + for (uint32_t i = 0; i < mipLevels; i++) { + layerDataSize += dds_compressed_mip_size(mipW, mipH, blockSize); + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } else { + size_t dstBytesPerPixel = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + layerDataSize = width * height * dstBytesPerPixel; + } + + size_t totalDataSize = layerDataSize * arrayLayerCount; + + // Create multi-layer image + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + vk::Image image; + VulkanAllocation allocation; + + if (!createImage(width, height, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, image, allocation, arrayLayerCount)) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to create %ux%u x%d array image\n", + width, height, numFrames)); + return false; + } + + // Create multi-layer image view + vk::ImageView imageView = createImageView(image, format, + vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D, arrayLayerCount); + if (!imageView) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to create image view\n")); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + // Create staging buffer for all layers + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = totalDataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to create staging buffer: %s\n", e.what())); + m_device.destroyImageView(imageView); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)) { + m_device.destroyBuffer(stagingBuffer); + m_device.destroyImageView(imageView); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + if (!mapped) { + m_memoryManager->freeAllocation(stagingAllocation); + m_device.destroyBuffer(stagingBuffer); + m_device.destroyImageView(imageView); + m_device.destroyImage(image); + m_memoryManager->freeAllocation(allocation); + return false; + } + + // Build per-layer copy regions and upload each frame's data + SCP_vector copyRegions; + + // Use the same lock parameters that were used for the triggering frame. + // bm->flags contains the lock flags (BMP_AABITMAP, BMP_TEX_OTHER, BMP_TEX_DXT*, etc.) + // bm->bpp contains the requested bpp. Using these ensures all frames are locked + // consistently (e.g., 8bpp for aabitmaps, 32bpp for RGBA textures). + int lockBpp = bm->bpp; + ushort lockFlags = bm->flags; + + // Set guard flag to make recursive bm_data calls no-ops + m_uploadingAnimation = true; + + for (int frame = baseFrame; frame < baseFrame + numFrames; frame++) { + int layerIndex = frame - baseFrame; + size_t layerOffset = layerIndex * layerDataSize; + uint8_t* dst = static_cast(mapped) + layerOffset; + + bitmap* frameBm; + bool needUnlock = false; + + if (frame == handle) { + // This is the frame that triggered us — use the passed bitmap directly + frameBm = bm; + } else { + // Lock this frame to get its data + frameBm = bm_lock(frame, lockBpp, lockFlags); + if (!frameBm) { + mprintf(("VulkanTexture: uploadAnimationFrames: failed to lock frame %d\n", frame)); + // Fill with zeros to avoid undefined data + memset(dst, 0, layerDataSize); + // Build copy regions anyway + if (isCompressed) { + uint32_t mipW = width, mipH = height; + size_t mipOffset = layerOffset; + for (uint32_t m = 0; m < mipLevels; m++) { + vk::BufferImageCopy region; + region.bufferOffset = static_cast(mipOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = m; + region.imageSubresource.baseArrayLayer = static_cast(layerIndex); + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(mipW, mipH, 1); + copyRegions.push_back(region); + uint32_t blocksW = (mipW + 3) / 4; + uint32_t blocksH = (mipH + 3) / 4; + mipOffset += blocksW * blocksH * blockSize; + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } else { + vk::BufferImageCopy region; + region.bufferOffset = static_cast(layerOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = static_cast(layerIndex); + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, 1); + copyRegions.push_back(region); + } + continue; + } + needUnlock = true; + } + + // Copy frame data to staging buffer + if (isCompressed) { + memcpy(dst, reinterpret_cast(frameBm->data), layerDataSize); + + // Build per-mip copy regions for this layer + uint32_t mipW = width, mipH = height; + size_t mipOffset = layerOffset; + for (uint32_t m = 0; m < mipLevels; m++) { + uint32_t blocksW = (mipW + 3) / 4; + uint32_t blocksH = (mipH + 3) / 4; + size_t mipSize = blocksW * blocksH * blockSize; + + vk::BufferImageCopy region; + region.bufferOffset = static_cast(mipOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = m; + region.imageSubresource.baseArrayLayer = static_cast(layerIndex); + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(mipW, mipH, 1); + copyRegions.push_back(region); + + mipOffset += mipSize; + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } else if (frameBm->bpp == 24) { + // Convert BGR (3 bytes) to BGRA (4 bytes) + const uint8_t* src = reinterpret_cast(frameBm->data); + size_t pixelCount = width * height; + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; // B + dst[1] = src[1]; // G + dst[2] = src[2]; // R + dst[3] = 255; // A + src += 3; + dst += 4; + } + + vk::BufferImageCopy region; + region.bufferOffset = static_cast(layerOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = static_cast(layerIndex); + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, 1); + copyRegions.push_back(region); + } else { + memcpy(dst, reinterpret_cast(frameBm->data), layerDataSize); + + vk::BufferImageCopy region; + region.bufferOffset = static_cast(layerOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = static_cast(layerIndex); + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, 1); + copyRegions.push_back(region); + } + + if (needUnlock) { + bm_unlock(frame); + } + } + + m_uploadingAnimation = false; + + // Flush staging buffer + m_memoryManager->flushMemory(stagingAllocation, 0, totalDataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, image, stagingBuffer, format, width, height, + mipLevels, vk::ImageLayout::eUndefined, false, copyRegions, + arrayLayerCount); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Store shared image in ALL frame slots + for (int frame = baseFrame; frame < baseFrame + numFrames; frame++) { + int layerIndex = frame - baseFrame; + auto* frameSlot = bm_get_slot(frame, true); + if (!frameSlot) { + continue; + } + if (!frameSlot->gr_info) { + bm_init(frameSlot); + } + auto* ts = static_cast(frameSlot->gr_info); + + // Defer destruction of any existing image in this slot + if (ts->image && ts->arrayLayers <= 1) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + } + deletionQueue->queueImage(ts->image, ts->allocation); + } + + ts->image = image; + ts->imageView = imageView; + ts->allocation = allocation; + ts->width = width; + ts->height = height; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->arrayLayers = arrayLayerCount; + ts->arrayIndex = static_cast(layerIndex); + ts->bitmapHandle = frame; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + } + + mprintf(("VulkanTexture: Animation array uploaded: %ux%u x%d layers, %zu bytes total\n", + width, height, numFrames, totalDataSize)); + return true; +} + +bool VulkanTextureManager::uploadCubemap(int handle, bitmap* bm, int compType) +{ + mprintf(("VulkanTexture: Uploading cubemap: handle=%d w=%d h=%d compType=%d\n", + handle, bm->w, bm->h, compType)); + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + if (!slot->gr_info) { + bm_init(slot); + } + auto* ts = static_cast(slot->gr_info); + + uint32_t faceW = static_cast(bm->w); + uint32_t faceH = static_cast(bm->h); + + // Map cubemap DDS compression types to base types + int baseCompType = compType; + if (compType == DDS_CUBEMAP_DXT1) baseCompType = DDS_DXT1; + else if (compType == DDS_CUBEMAP_DXT3) baseCompType = DDS_DXT3; + else if (compType == DDS_CUBEMAP_DXT5) baseCompType = DDS_DXT5; + + bool isCompressed = (baseCompType == DDS_DXT1 || baseCompType == DDS_DXT3 || + baseCompType == DDS_DXT5 || baseCompType == DDS_BC7); + + vk::Format format; + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, baseCompType); + } else { + format = bppToVkFormat(bm->bpp); + } + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTexture: uploadCubemap: unsupported format\n")); + return false; + } + + uint32_t mipLevels = 1; + size_t blockSize = 0; + + if (isCompressed) { + blockSize = (baseCompType == DDS_DXT1) ? 8 : 16; + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + if (mipLevels < 1) mipLevels = 1; + } + + // Calculate per-face data size (all mip levels for one face) + size_t perFaceSize = 0; + if (isCompressed) { + uint32_t mipW = faceW, mipH = faceH; + for (uint32_t m = 0; m < mipLevels; m++) { + uint32_t blocksW = (mipW + 3) / 4; + uint32_t blocksH = (mipH + 3) / 4; + perFaceSize += blocksW * blocksH * blockSize; + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } else { + size_t dstBpp = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + perFaceSize = faceW * faceH * dstBpp; + } + + size_t totalDataSize = perFaceSize * 6; + + // Defer destruction of existing resources + if (ts->image) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + + // Create cubemap image (6 layers, eCubeCompatible) + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + if (!createImage(faceW, faceH, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation, 6, true)) { + mprintf(("VulkanTexture: uploadCubemap: failed to create cubemap image\n")); + return false; + } + + // Create cubemap image view (samplerCube) + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + mipLevels, ImageViewType::Cube, 6); + if (!ts->imageView) { + mprintf(("VulkanTexture: uploadCubemap: failed to create cube image view\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = totalDataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanTexture: uploadCubemap: failed to create staging buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)) { + m_device.destroyBuffer(stagingBuffer); + return false; + } + + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + if (!mapped) { + m_memoryManager->freeAllocation(stagingAllocation); + m_device.destroyBuffer(stagingBuffer); + return false; + } + + // Copy data to staging buffer + // DDS cubemap data layout: face0[mip0..mipN], face1[mip0..mipN], ..., face5[mip0..mipN] + if (isCompressed) { + memcpy(mapped, reinterpret_cast(bm->data), totalDataSize); + } else if (bm->bpp == 24) { + // Convert BGR to BGRA for all 6 faces + const uint8_t* src = reinterpret_cast(bm->data); + uint8_t* dst = static_cast(mapped); + size_t pixelCount = faceW * faceH * 6; + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = 255; + src += 3; dst += 4; + } + } else { + memcpy(mapped, reinterpret_cast(bm->data), totalDataSize); + } + + // Build per-face, per-mip copy regions + SCP_vector copyRegions; + size_t bufferOffset = 0; + for (uint32_t face = 0; face < 6; face++) { + uint32_t mipW = faceW, mipH = faceH; + for (uint32_t mip = 0; mip < mipLevels; mip++) { + vk::BufferImageCopy region; + region.bufferOffset = static_cast(bufferOffset); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = mip; + region.imageSubresource.baseArrayLayer = face; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(mipW, mipH, 1); + copyRegions.push_back(region); + + if (isCompressed) { + uint32_t blocksW = (mipW + 3) / 4; + uint32_t blocksH = (mipH + 3) / 4; + bufferOffset += blocksW * blocksH * blockSize; + } else { + size_t dstBpp = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + bufferOffset += mipW * mipH * dstBpp; + } + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } + + m_memoryManager->flushMemory(stagingAllocation, 0, totalDataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, faceW, faceH, + mipLevels, vk::ImageLayout::eUndefined, false, copyRegions, 6); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = faceW; + ts->height = faceH; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->arrayLayers = 6; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->isCubemap = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + mprintf(("VulkanTexture: Cubemap uploaded: %ux%u, %u mips, format=%d\n", + faceW, faceH, mipLevels, static_cast(format))); + return true; +} + +bool VulkanTextureManager::upload3DTexture(int handle, bitmap* bm, int texDepth) +{ + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + uint32_t width = static_cast(bm->w); + uint32_t height = static_cast(bm->h); + uint32_t depth3D = static_cast(texDepth); + + // 3D textures are always 32bpp RGBA uncompressed, single mip + vk::Format format = vk::Format::eR8G8B8A8Unorm; + size_t dataSize = width * height * depth3D * 4; + + // Defer destruction of existing resources + if (ts->image) { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + + // Create 3D image + if (!createImage(width, height, 1, format, vk::ImageTiling::eOptimal, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + MemoryUsage::GpuOnly, ts->image, ts->allocation, + 1, false, depth3D, vk::ImageType::e3D)) { + mprintf(("Failed to create 3D texture image!\n")); + return false; + } + + // Create 3D image view + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, 1, ImageViewType::Volume3D); + if (!ts->imageView) { + mprintf(("Failed to create 3D texture image view!\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = dataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create staging buffer for 3D texture: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)) { + m_device.destroyBuffer(stagingBuffer); + return false; + } + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy and submit + vk::CommandBuffer cmd = beginSingleTimeCommands(); + + // Transition: eUndefined → eTransferDstOptimal + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = ts->image; + barrier.subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + + // Copy buffer to 3D image + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, depth3D); + + cmd.copyBufferToImage(stagingBuffer, ts->image, vk::ImageLayout::eTransferDstOptimal, region); + + // Transition: eTransferDstOptimal → eShaderReadOnlyOptimal + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + cmd.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barrier); + + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = width; + ts->height = height; + ts->depth = depth3D; + ts->is3D = true; + ts->format = format; + ts->mipLevels = 1; + ts->bpp = 32; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + mprintf(("VulkanTexture: 3D texture uploaded: %ux%ux%u, format=%d\n", + width, height, depth3D, static_cast(format))); + return true; +} + +bool VulkanTextureManager::bm_data(int handle, bitmap* bm, int compType) +{ + static int callCount = 0; + if (callCount < 20) { + mprintf(("VulkanTextureManager::bm_data #%d: handle=%d bm=%p bm->data=%p compType=%d\n", + callCount++, handle, bm, bm ? reinterpret_cast(bm->data) : nullptr, compType)); + } + + if (!m_initialized || !bm || !bm->data) { + return false; + } + + // Guard: nested bm_lock→bm_data calls during animation upload are no-ops + if (m_uploadingAnimation) { + return true; + } + + // Detect animated texture arrays + int numFrames = 0; + int baseFrame = bm_get_base_frame(handle, &numFrames); + if (baseFrame < 0) { + return false; + } + + if (numFrames > 1) { + // Check if the shared image already exists (earlier frame created it) + auto* baseSlot = bm_get_slot(baseFrame, true); + if (baseSlot) { + if (!baseSlot->gr_info) { + bm_init(baseSlot); + } + auto* baseTs = static_cast(baseSlot->gr_info); + if (baseTs->image && baseTs->arrayLayers == static_cast(numFrames)) { + // Share existing image with this frame's slot + auto* slot = bm_get_slot(handle, true); + if (!slot->gr_info) { + bm_init(slot); + } + auto* ts = static_cast(slot->gr_info); + ts->image = baseTs->image; + ts->imageView = baseTs->imageView; + ts->allocation = baseTs->allocation; + ts->width = baseTs->width; + ts->height = baseTs->height; + ts->format = baseTs->format; + ts->mipLevels = baseTs->mipLevels; + ts->bpp = baseTs->bpp; + ts->arrayLayers = baseTs->arrayLayers; + ts->arrayIndex = static_cast(handle - baseFrame); + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + return true; + } + } + // First frame requested — create array and upload all frames + return uploadAnimationFrames(handle, bm, compType, baseFrame, numFrames); + } + + // Detect cubemap textures + bool isCubemapUpload = (bm->flags & BMP_TEX_CUBEMAP) != 0; + if (!isCubemapUpload) { + // Also check compression type for cubemap DDS variants + isCubemapUpload = (compType == DDS_CUBEMAP_DXT1 || compType == DDS_CUBEMAP_DXT3 || + compType == DDS_CUBEMAP_DXT5); + } + + if (isCubemapUpload) { + return uploadCubemap(handle, bm, compType); + } + + // Detect 3D textures (volumetric data) + if (bm->d > 1) { + return upload3DTexture(handle, bm, bm->d); + } + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return false; + } + + // Ensure slot is initialized + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + uint32_t width = static_cast(bm->w); + uint32_t height = static_cast(bm->h); + uint32_t mipLevels = 1; + bool autoGenerateMips = false; + bool isCompressed = (compType == DDS_DXT1 || compType == DDS_DXT3 || + compType == DDS_DXT5 || compType == DDS_BC7); + + static int fmtLogCount = 0; + if (fmtLogCount < 30) { + mprintf(("VulkanTextureManager::bm_data: handle=%d w=%d h=%d bpp=%d true_bpp=%d flags=0x%x compType=%d\n", + handle, bm->w, bm->h, bm->bpp, bm->true_bpp, bm->flags, compType)); + fmtLogCount++; + } + + // Determine format and data size + vk::Format format; + size_t dataSize; + size_t blockSize = 0; + SCP_vector copyRegions; + + if (isCompressed) { + format = bppToVkFormat(bm->bpp, true, compType); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::bm_data: Unsupported compression type %d\n", compType)); + return false; + } + + blockSize = dds_block_size(compType); + + // Get pre-baked mipmap count from DDS file + mipLevels = static_cast(bm_get_num_mipmaps(handle)); + if (mipLevels < 1) { + mipLevels = 1; + } + + // Calculate total data size for all mip levels and build copy regions + dataSize = 0; + uint32_t mipW = width; + uint32_t mipH = height; + for (uint32_t i = 0; i < mipLevels; i++) { + size_t mipSize = dds_compressed_mip_size(mipW, mipH, blockSize); + + vk::BufferImageCopy region; + region.bufferOffset = static_cast(dataSize); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = i; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(mipW, mipH, 1); + copyRegions.push_back(region); + + dataSize += mipSize; + mipW = std::max(1u, mipW / 2); + mipH = std::max(1u, mipH / 2); + } + } else { + format = bppToVkFormat(bm->bpp); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::bm_data: Unsupported bpp %d\n", bm->bpp)); + return false; + } + + // 24bpp textures uploaded as 32bpp (Vulkan doesn't support 24bpp optimal tiling) + size_t dstBytesPerPixel = (bm->bpp == 24) ? 4 : (bm->bpp / 8); + dataSize = width * height * dstBytesPerPixel; + + // Auto-generate mipmaps for textures whose files originally had them. + // This only triggers for uncompressed textures that were originally DDS + // with mipmaps but got decompressed by a non-DDS lock path. + if (width > 4 && height > 4) { + int numMipmaps = bm_get_num_mipmaps(handle); + if (numMipmaps > 1) { + vk::FormatProperties fmtProps = m_physicalDevice.getFormatProperties(format); + if ((fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eSampledImageFilterLinear) && + (fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitSrc) && + (fmtProps.optimalTilingFeatures & vk::FormatFeatureFlagBits::eBlitDst)) { + mipLevels = calculateMipLevels(width, height); + autoGenerateMips = true; + } + } + } + } + + // If texture already exists with same dimensions, just update data + if (ts->image && ts->width == width && ts->height == height && ts->format == format) { + // Update existing texture - would use staging buffer + // For now, recreate + } + + // Defer destruction of existing resources — they may still be referenced + // by in-flight render or upload command buffers + if (ts->image) { + if (ts->arrayLayers > 1) { + // Shared animation image — just clear references, don't destroy + // (the image is shared with other frame slots) + ts->imageView = nullptr; + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } else { + auto* deletionQueue = getDeletionQueue(); + if (ts->imageView) { + deletionQueue->queueImageView(ts->imageView); + ts->imageView = nullptr; + } + deletionQueue->queueImage(ts->image, ts->allocation); + ts->image = nullptr; + ts->allocation = VulkanAllocation{}; + } + } + + // Create image + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + if (autoGenerateMips) { + usage |= vk::ImageUsageFlagBits::eTransferSrc; // Needed for vkCmdBlitImage mipmap generation + } + + if (!createImage(width, height, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation)) { + mprintf(("Failed to create texture image!\n")); + return false; + } + + // Create image view (sampler2DArray for regular textures) + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D); + if (!ts->imageView) { + mprintf(("Failed to create texture image view!\n")); + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return false; + } + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = dataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create staging buffer: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)) { + m_device.destroyBuffer(stagingBuffer); + return false; + } + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + if (isCompressed) { + // Compressed data: copy raw block data directly (includes all mip levels) + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + } else if (bm->bpp == 24) { + // Convert BGR (3 bytes) to BGRA (4 bytes), adding alpha=255 + const uint8_t* src = reinterpret_cast(bm->data); + uint8_t* dst = static_cast(mapped); + size_t pixelCount = width * height; + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; // B + dst[1] = src[1]; // G + dst[2] = src[2]; // R + dst[3] = 255; // A + src += 3; + dst += 4; + } + } else { + memcpy(mapped, reinterpret_cast(bm->data), dataSize); + } + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy (+ optional mipmap generation) and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, width, height, + mipLevels, vk::ImageLayout::eUndefined, autoGenerateMips, copyRegions); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update slot info + ts->width = width; + ts->height = height; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = bm->bpp; + ts->bitmapHandle = handle; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + + return true; +} + +int VulkanTextureManager::bm_make_render_target(int handle, int* width, int* height, + int* bpp, int* mm_lvl, int flags) +{ + if (!m_initialized || !width || !height) { + return 0; + } + + // Clamp to max size + if (static_cast(*width) > m_maxTextureSize) { + *width = static_cast(m_maxTextureSize); + } + if (static_cast(*height) > m_maxTextureSize) { + *height = static_cast(m_maxTextureSize); + } + + auto* slot = bm_get_slot(handle, true); + if (!slot) { + return 0; + } + + if (!slot->gr_info) { + bm_init(slot); + } + + auto* ts = static_cast(slot->gr_info); + + // Free any existing resources + bm_free_data(slot, false); + + uint32_t w = static_cast(*width); + uint32_t h = static_cast(*height); + uint32_t mipLevels = 1; + + if (flags & BMP_FLAG_RENDER_TARGET_MIPMAP) { + mipLevels = calculateMipLevels(w, h); + } + + bool isCubemapRT = (flags & BMP_FLAG_CUBEMAP) != 0; + uint32_t arrayLayers = isCubemapRT ? 6 : 1; + vk::Format format = vk::Format::eR8G8B8A8Unorm; + + // Create image for render target + vk::ImageUsageFlags usage = vk::ImageUsageFlagBits::eColorAttachment | + vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferSrc; + + if (flags & BMP_FLAG_RENDER_TARGET_MIPMAP) { + usage |= vk::ImageUsageFlagBits::eTransferDst; // For mipmap generation + } + + if (!createImage(w, h, mipLevels, format, vk::ImageTiling::eOptimal, + usage, MemoryUsage::GpuOnly, ts->image, ts->allocation, arrayLayers, isCubemapRT)) { + mprintf(("Failed to create render target image!\n")); + return 0; + } + + if (isCubemapRT) { + // Cubemap render target: create cube view for sampling + per-face 2D views for framebuffer + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + mipLevels, ImageViewType::Cube, 6); + if (!ts->imageView) { + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + // Create per-face 2D views for framebuffer attachments + for (uint32_t face = 0; face < 6; face++) { + ts->cubeFaceViews[face] = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, + 1, ImageViewType::Plain2D, 1, face); + if (!ts->cubeFaceViews[face]) { + mprintf(("Failed to create cubemap face %u view!\n", face)); + // Clean up previously created views + for (uint32_t j = 0; j < face; j++) { + m_device.destroyImageView(ts->cubeFaceViews[j]); + ts->cubeFaceViews[j] = nullptr; + } + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + } else { + // Regular render target: array view for shader compatibility + ts->imageView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, mipLevels, ImageViewType::Array2D); + if (!ts->imageView) { + m_device.destroyImage(ts->image); + ts->image = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + // For mipmapped render targets, create a single-mip view for framebuffer use + // (framebuffer attachments must have levelCount == 1) + if (mipLevels > 1) { + ts->framebufferView = createImageView(ts->image, format, vk::ImageAspectFlagBits::eColor, 1, ImageViewType::Array2D); + if (!ts->framebufferView) { + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + } + + // Create render pass for this target + vk::AttachmentDescription colorAttachment; + colorAttachment.format = format; + colorAttachment.samples = vk::SampleCountFlagBits::e1; + colorAttachment.loadOp = vk::AttachmentLoadOp::eClear; + colorAttachment.storeOp = vk::AttachmentStoreOp::eStore; + colorAttachment.stencilLoadOp = vk::AttachmentLoadOp::eDontCare; + colorAttachment.stencilStoreOp = vk::AttachmentStoreOp::eDontCare; + colorAttachment.initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + colorAttachment.finalLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + vk::AttachmentReference colorAttachmentRef; + colorAttachmentRef.attachment = 0; + colorAttachmentRef.layout = vk::ImageLayout::eColorAttachmentOptimal; + + vk::SubpassDescription subpass; + subpass.pipelineBindPoint = vk::PipelineBindPoint::eGraphics; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorAttachmentRef; + + vk::RenderPassCreateInfo renderPassInfo; + renderPassInfo.attachmentCount = 1; + renderPassInfo.pAttachments = &colorAttachment; + renderPassInfo.subpassCount = 1; + renderPassInfo.pSubpasses = &subpass; + + try { + ts->renderPass = m_device.createRenderPass(renderPassInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create render pass: %s\n", e.what())); + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + + if (isCubemapRT) { + // Create per-face framebuffers + for (uint32_t face = 0; face < 6; face++) { + vk::FramebufferCreateInfo framebufferInfo; + framebufferInfo.renderPass = ts->renderPass; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = &ts->cubeFaceViews[face]; + framebufferInfo.width = w; + framebufferInfo.height = h; + framebufferInfo.layers = 1; + + try { + ts->cubeFaceFramebuffers[face] = m_device.createFramebuffer(framebufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create cubemap face %u framebuffer: %s\n", face, e.what())); + return 0; + } + } + // Default framebuffer points to face 0 + ts->framebuffer = ts->cubeFaceFramebuffers[0]; + } else { + // Create framebuffer + // Use framebufferView (single-mip) if available, otherwise imageView + vk::ImageView fbAttachment = ts->framebufferView ? ts->framebufferView : ts->imageView; + vk::FramebufferCreateInfo framebufferInfo; + framebufferInfo.renderPass = ts->renderPass; + framebufferInfo.attachmentCount = 1; + framebufferInfo.pAttachments = &fbAttachment; + framebufferInfo.width = w; + framebufferInfo.height = h; + framebufferInfo.layers = 1; + + try { + ts->framebuffer = m_device.createFramebuffer(framebufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create framebuffer: %s\n", e.what())); + m_device.destroyRenderPass(ts->renderPass); + m_device.destroyImageView(ts->imageView); + m_device.destroyImage(ts->image); + ts->image = nullptr; + ts->imageView = nullptr; + ts->renderPass = nullptr; + m_memoryManager->freeAllocation(ts->allocation); + return 0; + } + } + + // Transition image to eShaderReadOnlyOptimal so it's in a valid layout + // if sampled before being rendered into (render pass expects this initial layout) + transitionImageLayout(ts->image, format, vk::ImageLayout::eUndefined, + vk::ImageLayout::eShaderReadOnlyOptimal, mipLevels, arrayLayers); + + // Update slot info + ts->width = w; + ts->height = h; + ts->format = format; + ts->mipLevels = mipLevels; + ts->bpp = 32; + ts->arrayLayers = arrayLayers; + ts->bitmapHandle = handle; + ts->isRenderTarget = true; + ts->isCubemap = isCubemapRT; + ts->used = true; + ts->uScale = 1.0f; + ts->vScale = 1.0f; + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + + if (bpp) { + *bpp = 32; + } + if (mm_lvl) { + *mm_lvl = static_cast(mipLevels); + } + + mprintf(("Created Vulkan render target: %ux%u\n", w, h)); + return 1; +} + +int VulkanTextureManager::bm_set_render_target(int handle, int face) +{ + if (!m_initialized) { + return 0; + } + + // handle < 0 means reset to default framebuffer + if (handle < 0) { + m_currentRenderTarget = -1; + return 1; + } + + auto* slot = bm_get_slot(handle, true); + if (!slot || !slot->gr_info) { + return 0; + } + + auto* ts = static_cast(slot->gr_info); + if (!ts->isRenderTarget || !ts->framebuffer) { + return 0; + } + + // For cubemap render targets, select the face framebuffer + if (ts->isCubemap && face >= 0 && face < 6) { + ts->framebuffer = ts->cubeFaceFramebuffers[face]; + } + + m_currentRenderTarget = handle; + + return 1; +} + +void VulkanTextureManager::update_texture(int bitmap_handle, int bpp, const ubyte* data, + int width, int height) +{ + if (!m_initialized || !data) { + return; + } + + auto* slot = bm_get_slot(bitmap_handle, true); + if (!slot || !slot->gr_info) { + return; + } + + auto* ts = static_cast(slot->gr_info); + if (!ts->image) { + return; + } + + uint32_t w = static_cast(width); + uint32_t h = static_cast(height); + + // Verify dimensions match existing texture + if (ts->width != w || ts->height != h) { + mprintf(("VulkanTextureManager::update_texture: Size mismatch (%ux%u vs %ux%u)\n", + w, h, ts->width, ts->height)); + return; + } + + // Use bppToVkFormat to determine format, matching how bm_data creates textures + vk::Format format = bppToVkFormat(bpp); + if (format == vk::Format::eUndefined) { + mprintf(("VulkanTextureManager::update_texture: Unsupported bpp %d\n", bpp)); + return; + } + + // Calculate staging buffer size (24bpp is uploaded as 32bpp BGRA) + size_t srcBytesPerPixel = bpp / 8; + size_t dstBytesPerPixel = (bpp == 24) ? 4 : srcBytesPerPixel; + size_t dataSize = w * h * dstBytesPerPixel; + + // Create staging buffer + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = dataSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAllocation; + + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("VulkanTextureManager::update_texture: Failed to create staging buffer: %s\n", e.what())); + return; + } + + Verify(m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuOnly, stagingAllocation)); + + // Copy data to staging buffer + void* mapped = m_memoryManager->mapMemory(stagingAllocation); + Verify(mapped); + if (bpp == 24) { + // Convert BGR (3 bytes) to BGRA (4 bytes), adding alpha=255 + const uint8_t* src = data; + uint8_t* dst = static_cast(mapped); + size_t pixelCount = w * h; + for (size_t i = 0; i < pixelCount; ++i) { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 255; + src += 3; + dst += 4; + } + } else { + memcpy(mapped, data, dataSize); + } + m_memoryManager->flushMemory(stagingAllocation, 0, dataSize); + m_memoryManager->unmapMemory(stagingAllocation); + + // Record transitions + copy into a single command buffer and submit async + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, ts->image, stagingBuffer, format, w, h, + ts->mipLevels, ts->currentLayout); + submitUploadAsync(cmd, stagingBuffer, stagingAllocation); + + // Update layout tracking + ts->currentLayout = vk::ImageLayout::eShaderReadOnlyOptimal; +} + +void VulkanTextureManager::get_bitmap_from_texture(void* data_out, int bitmap_num) +{ + if (!m_initialized || !data_out) { + return; + } + + // TODO: Implement texture readback + (void)bitmap_num; +} + +vk::Sampler VulkanTextureManager::getSampler(vk::Filter magFilter, vk::Filter minFilter, + vk::SamplerAddressMode addressMode, + bool enableAnisotropy, float maxAnisotropy, + bool enableMipmaps) +{ + // Create a key from sampler state + uint64_t key = 0; + key |= static_cast(magFilter) << 0; + key |= static_cast(minFilter) << 4; + key |= static_cast(addressMode) << 8; + key |= static_cast(enableAnisotropy) << 16; + key |= static_cast(enableMipmaps) << 17; + key |= static_cast(maxAnisotropy * 10) << 24; + + auto it = m_samplerCache.find(key); + if (it != m_samplerCache.end()) { + return it->second; + } + + // Create new sampler + vk::SamplerCreateInfo samplerInfo; + samplerInfo.magFilter = magFilter; + samplerInfo.minFilter = minFilter; + samplerInfo.addressModeU = addressMode; + samplerInfo.addressModeV = addressMode; + samplerInfo.addressModeW = addressMode; + samplerInfo.anisotropyEnable = enableAnisotropy && (m_maxAnisotropy > 1.0f); + samplerInfo.maxAnisotropy = std::max(1.0f, std::min(maxAnisotropy > 0.0f ? maxAnisotropy : m_maxAnisotropy, m_maxAnisotropy)); + samplerInfo.borderColor = vk::BorderColor::eIntOpaqueBlack; + samplerInfo.unnormalizedCoordinates = false; + samplerInfo.compareEnable = false; + samplerInfo.compareOp = vk::CompareOp::eAlways; + samplerInfo.mipmapMode = enableMipmaps ? vk::SamplerMipmapMode::eLinear : vk::SamplerMipmapMode::eNearest; + samplerInfo.mipLodBias = 0.0f; + samplerInfo.minLod = 0.0f; + samplerInfo.maxLod = enableMipmaps ? VK_LOD_CLAMP_NONE : 0.0f; + + try { + vk::Sampler sampler = m_device.createSampler(samplerInfo); + m_samplerCache[key] = sampler; + return sampler; + } catch (const vk::SystemError& e) { + mprintf(("Failed to create sampler: %s\n", e.what())); + return m_defaultSampler; + } +} + +vk::Sampler VulkanTextureManager::getDefaultSampler() +{ + return m_defaultSampler; +} + +vk::ImageView VulkanTextureManager::getFallback2DArrayView() +{ + return m_fallback2DArrayView; +} + +vk::ImageView VulkanTextureManager::getFallbackTextureView2D() +{ + return m_fallbackTextureView2D; +} + +vk::ImageView VulkanTextureManager::getFallbackCubeView() +{ + return m_fallbackCubeView; +} + +vk::ImageView VulkanTextureManager::getFallback3DView() +{ + return m_fallback3DView; +} + +tcache_slot_vulkan* VulkanTextureManager::getTextureSlot(int handle) +{ + auto* slot = bm_get_slot(handle, true); + if (!slot || !slot->gr_info) { + return nullptr; + } + return static_cast(slot->gr_info); +} + +bool VulkanTextureManager::isTextureValid(int handle) +{ + auto* ts = getTextureSlot(handle); + return ts && ts->image && ts->imageView && ts->used; +} + +vk::Format VulkanTextureManager::bppToVkFormat(int bpp, bool compressed, int compressionType) +{ + if (compressed) { + // DDS compression types + switch (compressionType) { + case DDS_DXT1: + return vk::Format::eBc1RgbaUnormBlock; + case DDS_DXT3: + return vk::Format::eBc2UnormBlock; + case DDS_DXT5: + return vk::Format::eBc3UnormBlock; + case DDS_BC7: + return vk::Format::eBc7UnormBlock; + default: + return vk::Format::eUndefined; + } + } + + switch (bpp) { + case 8: + return vk::Format::eR8Unorm; + case 16: + // OpenGL uses GL_UNSIGNED_SHORT_1_5_5_5_REV with GL_BGRA (A1R5G5B5) + return vk::Format::eA1R5G5B5UnormPack16; + case 24: + // 24bpp (BGR) is almost never supported for optimal tiling in Vulkan. + // We convert to 32bpp BGRA at upload time, so return the 32bpp format. + return vk::Format::eB8G8R8A8Unorm; + case 32: + // FSO uses BGRA format (BMP_AARRGGBB = BGRA in memory) + return vk::Format::eB8G8R8A8Unorm; + default: + return vk::Format::eUndefined; + } +} + +void VulkanTextureManager::transitionImageLayout(vk::Image image, vk::Format format, + vk::ImageLayout oldLayout, + vk::ImageLayout newLayout, + uint32_t mipLevels, + uint32_t arrayLayers) +{ + vk::CommandBuffer commandBuffer = beginSingleTimeCommands(); + + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + // Detect depth/stencil formats and use the correct aspect mask + if (format == vk::Format::eD32Sfloat || format == vk::Format::eD16Unorm) { + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth; + } else if (format == vk::Format::eD24UnormS8Uint || format == vk::Format::eD32SfloatS8Uint || + format == vk::Format::eD16UnormS8Uint) { + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + } else { + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + } + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + vk::PipelineStageFlags sourceStage; + vk::PipelineStageFlags destinationStage; + + if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eTransferDstOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eTransfer; + } else if (oldLayout == vk::ImageLayout::eTransferDstOptimal && + newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) { + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + sourceStage = vk::PipelineStageFlagBits::eTransfer; + destinationStage = vk::PipelineStageFlagBits::eFragmentShader; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eFragmentShader; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eColorAttachmentOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eColorAttachmentOutput; + } else if (oldLayout == vk::ImageLayout::eUndefined && + newLayout == vk::ImageLayout::eDepthStencilAttachmentOptimal) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite; + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eEarlyFragmentTests; + } else { + // Generic transition + barrier.srcAccessMask = vk::AccessFlagBits::eMemoryWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eMemoryRead; + sourceStage = vk::PipelineStageFlagBits::eAllCommands; + destinationStage = vk::PipelineStageFlagBits::eAllCommands; + } + + commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, + nullptr, nullptr, barrier); + + endSingleTimeCommands(commandBuffer); +} + +void vulkan_generate_mipmap_chain(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, + uint32_t mipLevels, uint32_t arrayLayers) +{ + if (mipLevels <= 1) { + return; + } + + // Generate each mip level via blit from the previous level + for (uint32_t i = 1; i < mipLevels; i++) { + uint32_t srcW = std::max(1u, width >> (i - 1)); + uint32_t srcH = std::max(1u, height >> (i - 1)); + uint32_t dstW = std::max(1u, width >> i); + uint32_t dstH = std::max(1u, height >> i); + + // Transition mip i from eUndefined to eTransferDstOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.oldLayout = vk::ImageLayout::eUndefined; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = i; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + // Blit from mip i-1 to mip i + vk::ImageBlit blit; + blit.srcSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + blit.srcSubresource.mipLevel = i - 1; + blit.srcSubresource.baseArrayLayer = 0; + blit.srcSubresource.layerCount = arrayLayers; + blit.srcOffsets[0] = vk::Offset3D(0, 0, 0); + blit.srcOffsets[1] = vk::Offset3D(static_cast(srcW), static_cast(srcH), 1); + + blit.dstSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + blit.dstSubresource.mipLevel = i; + blit.dstSubresource.baseArrayLayer = 0; + blit.dstSubresource.layerCount = arrayLayers; + blit.dstOffsets[0] = vk::Offset3D(0, 0, 0); + blit.dstOffsets[1] = vk::Offset3D(static_cast(dstW), static_cast(dstH), 1); + + cmd.blitImage(image, vk::ImageLayout::eTransferSrcOptimal, + image, vk::ImageLayout::eTransferDstOptimal, + blit, vk::Filter::eLinear); + + // Transition mip i to eTransferSrcOptimal (source for next blit) + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = i; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + } + + // Final transition: all mips to eShaderReadOnlyOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + barrier.oldLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, {}, {}, barrier); + } +} + +void VulkanTextureManager::frameStart() +{ + processPendingCommandBuffers(); +} + +bool VulkanTextureManager::createImage(uint32_t width, uint32_t height, uint32_t mipLevels, + vk::Format format, vk::ImageTiling tiling, + vk::ImageUsageFlags usage, MemoryUsage memUsage, + vk::Image& image, VulkanAllocation& allocation, + uint32_t arrayLayers, bool cubemap, + uint32_t imageDepth, vk::ImageType imageType) +{ + vk::ImageCreateInfo imageInfo; + imageInfo.imageType = imageType; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = imageDepth; + imageInfo.mipLevels = mipLevels; + imageInfo.arrayLayers = arrayLayers; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + imageInfo.usage = usage; + imageInfo.sharingMode = vk::SharingMode::eExclusive; + imageInfo.samples = vk::SampleCountFlagBits::e1; + + if (cubemap) { + imageInfo.flags |= vk::ImageCreateFlagBits::eCubeCompatible; + Assertion(arrayLayers == 6, "Cubemap images must have exactly 6 array layers!"); + } + + try { + image = m_device.createImage(imageInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create image: %s\n", e.what())); + return false; + } + + if (!m_memoryManager->allocateImageMemory(image, memUsage, allocation)) { + m_device.destroyImage(image); + image = nullptr; + return false; + } + + return true; +} + +vk::ImageView VulkanTextureManager::createImageView(vk::Image image, vk::Format format, + vk::ImageAspectFlags aspectFlags, + uint32_t mipLevels, + ImageViewType viewType, + uint32_t layerCount, + uint32_t baseArrayLayer) +{ + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = image; + switch (viewType) { + case ImageViewType::Cube: + viewInfo.viewType = vk::ImageViewType::eCube; + break; + case ImageViewType::Array2D: + viewInfo.viewType = vk::ImageViewType::e2DArray; + break; + case ImageViewType::Volume3D: + viewInfo.viewType = vk::ImageViewType::e3D; + break; + case ImageViewType::Plain2D: + default: + viewInfo.viewType = vk::ImageViewType::e2D; + break; + } + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = aspectFlags; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = mipLevels; + viewInfo.subresourceRange.baseArrayLayer = baseArrayLayer; + viewInfo.subresourceRange.layerCount = layerCount; + + try { + return m_device.createImageView(viewInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create image view: %s\n", e.what())); + return nullptr; + } +} + +bool VulkanTextureManager::createFallbackTexture(vk::Image& outImage, VulkanAllocation& outAlloc, + vk::ImageView& outView, ImageViewType viewType, + uint32_t arrayLayers, bool cubemap, + vk::ImageType imageType) +{ + if (!createImage(1, 1, 1, vk::Format::eR8G8B8A8Unorm, vk::ImageTiling::eOptimal, + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, + MemoryUsage::GpuOnly, outImage, outAlloc, arrayLayers, cubemap, 1, imageType)) { + mprintf(("Failed to create fallback texture image!\n")); + return false; + } + + outView = createImageView(outImage, vk::Format::eR8G8B8A8Unorm, + vk::ImageAspectFlagBits::eColor, 1, viewType, arrayLayers); + if (!outView) { + mprintf(("Failed to create fallback texture view!\n")); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + // Upload white pixels via staging buffer + SCP_vector whitePixels(arrayLayers, 0xFFFFFFFF); + vk::DeviceSize bufferSize = arrayLayers * sizeof(uint32_t); + + vk::BufferCreateInfo bufferInfo; + bufferInfo.size = bufferSize; + bufferInfo.usage = vk::BufferUsageFlagBits::eTransferSrc; + bufferInfo.sharingMode = vk::SharingMode::eExclusive; + + vk::Buffer stagingBuffer; + VulkanAllocation stagingAlloc; + try { + stagingBuffer = m_device.createBuffer(bufferInfo); + } catch (const vk::SystemError& e) { + mprintf(("Failed to create fallback staging buffer: %s\n", e.what())); + m_device.destroyImageView(outView); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + if (!m_memoryManager->allocateBufferMemory(stagingBuffer, MemoryUsage::CpuToGpu, stagingAlloc)) { + m_device.destroyBuffer(stagingBuffer); + m_device.destroyImageView(outView); + m_device.destroyImage(outImage); + m_memoryManager->freeAllocation(outAlloc); + return false; + } + + void* mapped = m_device.mapMemory(stagingAlloc.memory, stagingAlloc.offset, bufferSize); + memcpy(mapped, whitePixels.data(), bufferSize); + m_device.unmapMemory(stagingAlloc.memory); + + SCP_vector regions; + for (uint32_t i = 0; i < arrayLayers; i++) { + vk::BufferImageCopy region; + region.bufferOffset = i * sizeof(uint32_t); + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = i; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(1, 1, 1); + regions.push_back(region); + } + + vk::CommandBuffer cmd = beginSingleTimeCommands(); + recordUploadCommands(cmd, outImage, stagingBuffer, vk::Format::eR8G8B8A8Unorm, + 1, 1, 1, vk::ImageLayout::eUndefined, false, regions, arrayLayers); + endSingleTimeCommands(cmd); + + m_device.destroyBuffer(stagingBuffer); + m_memoryManager->freeAllocation(stagingAlloc); + + return true; +} + +vk::CommandBuffer VulkanTextureManager::beginSingleTimeCommands() +{ + vk::CommandBufferAllocateInfo allocInfo; + allocInfo.level = vk::CommandBufferLevel::ePrimary; + allocInfo.commandPool = m_commandPool; + allocInfo.commandBufferCount = 1; + + vk::CommandBuffer commandBuffer = m_device.allocateCommandBuffers(allocInfo)[0]; + + vk::CommandBufferBeginInfo beginInfo; + beginInfo.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit; + + commandBuffer.begin(beginInfo); + + return commandBuffer; +} + +void VulkanTextureManager::endSingleTimeCommands(vk::CommandBuffer commandBuffer) +{ + commandBuffer.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + m_graphicsQueue.submit(submitInfo, nullptr); + m_graphicsQueue.waitIdle(); + + m_device.freeCommandBuffers(m_commandPool, commandBuffer); +} + +void VulkanTextureManager::recordUploadCommands(vk::CommandBuffer cmd, vk::Image image, + vk::Buffer stagingBuffer, vk::Format format, + uint32_t width, uint32_t height, + uint32_t mipLevels, vk::ImageLayout oldLayout, + bool generateMips, + const SCP_vector& regions, + uint32_t arrayLayers) +{ + (void)format; // May be needed for depth/stencil transitions in the future + + // Barrier 1: oldLayout -> eTransferDstOptimal (all mip levels, all layers) + { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = oldLayout; + barrier.newLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + if (oldLayout == vk::ImageLayout::eUndefined) { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + } else { + barrier.srcAccessMask = vk::AccessFlagBits::eMemoryWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + {}, nullptr, nullptr, barrier); + } + } + + if (!regions.empty()) { + // Pre-baked mip levels: copy all regions (one per mip level) from the staging buffer + cmd.copyBufferToImage(stagingBuffer, image, vk::ImageLayout::eTransferDstOptimal, + static_cast(regions.size()), regions.data()); + } else { + // Single mip-0 copy + vk::BufferImageCopy region; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eColor; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = vk::Offset3D(0, 0, 0); + region.imageExtent = vk::Extent3D(width, height, 1); + + cmd.copyBufferToImage(stagingBuffer, image, vk::ImageLayout::eTransferDstOptimal, region); + } + + if (generateMips && mipLevels > 1 && regions.empty()) { + // Generate mipmaps via blit chain: upload mip 0, then downsample each level + + // Transition mip 0 from eTransferDstOptimal to eTransferSrcOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferRead; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eTransferSrcOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, + {}, {}, {}, barrier); + } + + vulkan_generate_mipmap_chain(cmd, image, width, height, mipLevels, arrayLayers); + } else { + // Simple transition: all mips from eTransferDstOptimal to eShaderReadOnlyOptimal + { + vk::ImageMemoryBarrier barrier; + barrier.oldLayout = vk::ImageLayout::eTransferDstOptimal; + barrier.newLayout = vk::ImageLayout::eShaderReadOnlyOptimal; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = mipLevels; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = arrayLayers; + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + + cmd.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eFragmentShader, + {}, nullptr, nullptr, barrier); + } + } +} + +void VulkanTextureManager::submitUploadAsync(vk::CommandBuffer cmd, vk::Buffer stagingBuffer, + VulkanAllocation stagingAllocation) +{ + cmd.end(); + + vk::SubmitInfo submitInfo; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + + m_graphicsQueue.submit(submitInfo, nullptr); + + // Defer staging buffer destruction (2 frames matches MAX_FRAMES_IN_FLIGHT) + auto* deletionQueue = getDeletionQueue(); + deletionQueue->queueBuffer(stagingBuffer, stagingAllocation); + + // Defer command buffer free + m_pendingCommandBuffers.push_back({cmd, VulkanDeletionQueue::FRAMES_TO_WAIT}); +} + +void VulkanTextureManager::processPendingCommandBuffers() +{ + auto it = m_pendingCommandBuffers.begin(); + while (it != m_pendingCommandBuffers.end()) { + if (it->framesRemaining == 0) { + m_device.freeCommandBuffers(m_commandPool, it->cb); + it = m_pendingCommandBuffers.erase(it); + } else { + it->framesRemaining--; + ++it; + } + } +} + +uint32_t VulkanTextureManager::calculateMipLevels(uint32_t width, uint32_t height) +{ + return static_cast(std::floor(std::log2(std::max(width, height)))) + 1; +} + +// ========== gr_screen function pointer implementations ========== + +int vulkan_preload(int bitmap_num, int /*is_aabitmap*/) +{ + auto* texManager = getTextureManager(); + + // Check if texture is already loaded + auto* slot = texManager->getTextureSlot(bitmap_num); + if (slot && slot->imageView) { + return 1; // Already loaded + } + + // Determine lock parameters based on compression type. + // For compressed DDS textures, lock with the matching DXT/BC7 flags to get + // raw compressed data with all pre-baked mipmap levels. + int compType = bm_is_compressed(bitmap_num); + int lockBpp = 32; + ubyte lockFlags = BMP_TEX_XPARENT; + + switch (compType) { + case DDS_DXT1: + lockBpp = 24; + lockFlags = BMP_TEX_DXT1; + break; + case DDS_DXT3: + lockBpp = 32; + lockFlags = BMP_TEX_DXT3; + break; + case DDS_DXT5: + lockBpp = 32; + lockFlags = BMP_TEX_DXT5; + break; + case DDS_BC7: + lockBpp = 32; + lockFlags = BMP_TEX_BC7; + break; + case DDS_CUBEMAP_DXT1: + lockBpp = 24; + lockFlags = BMP_TEX_CUBEMAP; + break; + case DDS_CUBEMAP_DXT3: + case DDS_CUBEMAP_DXT5: + lockBpp = 32; + lockFlags = BMP_TEX_CUBEMAP; + break; + default: + // Uncompressed — use 32bpp decompressed + compType = 0; + break; + } + + bitmap* bmp = bm_lock(bitmap_num, static_cast(lockBpp), lockFlags); + if (!bmp) { + static int warnCount = 0; + if (warnCount < 10) { + mprintf(("vulkan_preload: Failed to lock bitmap %d (compType=%d)\n", bitmap_num, compType)); + warnCount++; + } + return 0; + } + + // Upload the texture + bool success = texManager->bm_data(bitmap_num, bmp, compType); + + // Unlock bitmap + bm_unlock(bitmap_num); + + if (success) { + static int successCount = 0; + if (successCount < 10) { + mprintf(("vulkan_preload: Successfully uploaded texture %d (compressed=%d)\n", + bitmap_num, compType)); + successCount++; + } + } + + return success ? 1 : 0; +} + +void vulkan_bm_create(bitmap_slot* slot) +{ + auto* texManager = getTextureManager(); + texManager->bm_create(slot); +} + +void vulkan_bm_free_data(bitmap_slot* slot, bool release) +{ + auto* texManager = getTextureManager(); + texManager->bm_free_data(slot, release); +} + +void vulkan_bm_init(bitmap_slot* slot) +{ + auto* texManager = getTextureManager(); + texManager->bm_init(slot); +} + +bool vulkan_bm_data(int handle, bitmap* bm) +{ + auto* texManager = getTextureManager(); + return texManager->bm_data(handle, bm); +} + +void vulkan_bm_page_in_start() +{ + // Intentional no-op. The OpenGL implementation (opengl_preload_init) is also + // effectively empty — its only code is commented out. Vulkan textures are + // loaded on demand and don't need a page-in session setup. +} + +int vulkan_bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags) +{ + auto* texManager = getTextureManager(); + return texManager->bm_make_render_target(handle, width, height, bpp, mm_lvl, flags); +} + +int vulkan_bm_set_render_target(int handle, int face) +{ + auto* texManager = getTextureManager(); + return texManager->bm_set_render_target(handle, face); +} + +void vulkan_update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height) +{ + auto* texManager = getTextureManager(); + texManager->update_texture(bitmap_handle, bpp, data, width, height); +} + +void vulkan_get_bitmap_from_texture(void* data_out, int bitmap_num) +{ + auto* texManager = getTextureManager(); + texManager->get_bitmap_from_texture(data_out, bitmap_num); +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanTexture.h b/code/graphics/vulkan/VulkanTexture.h new file mode 100644 index 00000000000..25845f852d6 --- /dev/null +++ b/code/graphics/vulkan/VulkanTexture.h @@ -0,0 +1,364 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "VulkanMemory.h" + +#define BMPMAN_INTERNAL +#include "bmpman/bm_internal.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Vulkan-specific texture data stored in bitmap slots + * + * Extends gr_bitmap_info to store Vulkan image handles and metadata. + * This is the Vulkan equivalent of tcache_slot_opengl. + */ +class tcache_slot_vulkan : public gr_bitmap_info { +public: + vk::Image image; + vk::ImageView imageView; + VulkanAllocation allocation; + vk::Format format = vk::Format::eUndefined; + vk::ImageLayout currentLayout = vk::ImageLayout::eUndefined; + + uint32_t width = 0; + uint32_t height = 0; + uint32_t mipLevels = 1; + uint32_t arrayLayers = 1; + int bpp = 0; + + int bitmapHandle = -1; + uint32_t arrayIndex = 0; + bool used = false; + + // For render targets + vk::Framebuffer framebuffer; + vk::ImageView framebufferView; // Single-mip view for framebuffer (when mipLevels > 1) + vk::RenderPass renderPass; // Render pass compatible with this target + bool isRenderTarget = false; + + // 3D texture support + bool is3D = false; + uint32_t depth = 1; + + // Cubemap support + bool isCubemap = false; + vk::ImageView cubeFaceViews[6] = {}; // Per-face 2D views for render-to-cubemap + vk::Framebuffer cubeFaceFramebuffers[6] = {}; // Per-face framebuffers for render-to-cubemap + vk::ImageView cubeImageView; // Cube view for sampling (viewType=eCube, layerCount=6) + + // Texture scaling (for non-power-of-two handling) + float uScale = 1.0f; + float vScale = 1.0f; + + tcache_slot_vulkan() { reset(); } + ~tcache_slot_vulkan() override = default; + + void reset(); +}; + +/** + * @brief Manages Vulkan textures, samplers, and render targets + */ +class VulkanTextureManager { +public: + VulkanTextureManager(); + ~VulkanTextureManager(); + + // Non-copyable + VulkanTextureManager(const VulkanTextureManager&) = delete; + VulkanTextureManager& operator=(const VulkanTextureManager&) = delete; + + /** + * @brief Initialize the texture manager + */ + bool init(vk::Device device, vk::PhysicalDevice physicalDevice, + VulkanMemoryManager* memoryManager, + vk::CommandPool commandPool, vk::Queue graphicsQueue); + + /** + * @brief Shutdown and free all textures + */ + void shutdown(); + + // Bitmap management functions (implement gr_screen function pointers) + + /** + * @brief Initialize a bitmap slot for Vulkan + */ + void bm_init(bitmap_slot* slot); + + /** + * @brief Create Vulkan resources for a bitmap slot + */ + void bm_create(bitmap_slot* slot); + + /** + * @brief Free Vulkan resources for a bitmap slot + */ + void bm_free_data(bitmap_slot* slot, bool release); + + /** + * @brief Upload bitmap data to GPU + * @param compType Compression type (DDS_DXT1/3/5, DDS_BC7) or 0 for uncompressed + */ + bool bm_data(int handle, bitmap* bm, int compType = 0); + + /** + * @brief Create a render target + */ + int bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags); + + /** + * @brief Set active render target + */ + int bm_set_render_target(int handle, int face); + + /** + * @brief Update texture data + */ + void update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height); + + /** + * @brief Read texture data back to CPU + */ + void get_bitmap_from_texture(void* data_out, int bitmap_num); + + // Sampler management + + /** + * @brief Get or create a sampler with specified parameters + */ + vk::Sampler getSampler(vk::Filter magFilter, vk::Filter minFilter, + vk::SamplerAddressMode addressMode, + bool enableAnisotropy, float maxAnisotropy, + bool enableMipmaps); + + /** + * @brief Get default sampler for standard textures + */ + vk::Sampler getDefaultSampler(); + + /** + * @brief Get fallback white texture image view (2D_ARRAY) for unbound material texture slots + */ + vk::ImageView getFallback2DArrayView(); + + /** + * @brief Get fallback white texture image view (2D) for post-processing sampler2D slots + */ + vk::ImageView getFallbackTextureView2D(); + + /** + * @brief Get fallback white cubemap image view (Cube) for unbound samplerCube slots + */ + vk::ImageView getFallbackCubeView(); + + /** + * @brief Get fallback white 3D texture image view for unbound sampler3D slots + */ + vk::ImageView getFallback3DView(); + + // Texture access + + /** + * @brief Get texture slot data + */ + tcache_slot_vulkan* getTextureSlot(int handle); + + /** + * @brief Check if texture is valid and ready for use + */ + bool isTextureValid(int handle); + + // Utility functions + + /** + * @brief Convert FSO bitmap format to Vulkan format + */ + static vk::Format bppToVkFormat(int bpp, bool compressed = false, int compressionType = 0); + + /** + * @brief Transition image layout + */ + void transitionImageLayout(vk::Image image, vk::Format format, + vk::ImageLayout oldLayout, vk::ImageLayout newLayout, + uint32_t mipLevels = 1, uint32_t arrayLayers = 1); + + /** + * @brief Called at start of frame + */ + void frameStart(); + +private: + /** + * @brief Create a Vulkan image + * @param cubemap If true, sets eCubeCompatible flag (requires arrayLayers=6) + * @param imageType Vulkan image type (e2D, e3D, etc.) + */ + bool createImage(uint32_t width, uint32_t height, uint32_t mipLevels, + vk::Format format, vk::ImageTiling tiling, + vk::ImageUsageFlags usage, MemoryUsage memUsage, + vk::Image& image, VulkanAllocation& allocation, + uint32_t arrayLayers = 1, bool cubemap = false, + uint32_t imageDepth = 1, + vk::ImageType imageType = vk::ImageType::e2D); + + enum class ImageViewType { Array2D, Plain2D, Cube, Volume3D }; + + /** + * @brief Create an image view + * @param viewType Controls view type: Array2D=sampler2DArray, Plain2D=sampler2D, Cube=samplerCube + */ + vk::ImageView createImageView(vk::Image image, vk::Format format, + vk::ImageAspectFlags aspectFlags, + uint32_t mipLevels, + ImageViewType viewType = ImageViewType::Array2D, + uint32_t layerCount = 1, + uint32_t baseArrayLayer = 0); + + /** + * @brief Create a 1x1 white fallback texture (image + view + upload) + */ + bool createFallbackTexture(vk::Image& outImage, VulkanAllocation& outAlloc, + vk::ImageView& outView, ImageViewType viewType, + uint32_t arrayLayers = 1, bool cubemap = false, + vk::ImageType imageType = vk::ImageType::e2D); + + /** + * @brief Begin single-time command buffer + */ + vk::CommandBuffer beginSingleTimeCommands(); + + /** + * @brief End and submit single-time command buffer (synchronous, blocks on waitIdle) + */ + void endSingleTimeCommands(vk::CommandBuffer commandBuffer); + + /** + * @brief Record layout transitions and buffer-to-image copy into a command buffer + */ + void recordUploadCommands(vk::CommandBuffer cmd, vk::Image image, vk::Buffer stagingBuffer, + vk::Format format, uint32_t width, uint32_t height, + uint32_t mipLevels, vk::ImageLayout oldLayout, + bool generateMips = false, + const SCP_vector& regions = {}, + uint32_t arrayLayers = 1); + + /** + * @brief Submit an upload command buffer asynchronously and defer resource cleanup + * + * Submits without waitIdle. Queues staging buffer and command buffer for + * deferred destruction/free after enough frames have elapsed. + */ + void submitUploadAsync(vk::CommandBuffer cmd, vk::Buffer stagingBuffer, + VulkanAllocation stagingAllocation); + + /** + * @brief Free command buffers whose GPU work has completed + */ + void processPendingCommandBuffers(); + + /** + * @brief Calculate number of mipmap levels + */ + static uint32_t calculateMipLevels(uint32_t width, uint32_t height); + + /** + * @brief Upload all frames of an animation as layers of a single texture array + */ + bool uploadAnimationFrames(int handle, bitmap* bm, int compType, + int baseFrame, int numFrames); + + /** + * @brief Upload a cubemap DDS texture (6 faces) as a single cubemap image + */ + bool uploadCubemap(int handle, bitmap* bm, int compType); + + /** + * @brief Upload a 3D texture (volumetric data) as a single 3D image + */ + bool upload3DTexture(int handle, bitmap* bm, int texDepth); + + // Guard flag to prevent recursion when bm_lock calls bm_data during animation upload + bool m_uploadingAnimation = false; + + // Deferred command buffer free list + struct PendingCommandBuffer { + vk::CommandBuffer cb; + uint32_t framesRemaining; + }; + SCP_vector m_pendingCommandBuffers; + + vk::Device m_device; + vk::PhysicalDevice m_physicalDevice; + VulkanMemoryManager* m_memoryManager = nullptr; + vk::CommandPool m_commandPool; + vk::Queue m_graphicsQueue; + + // Cached samplers (key: packed sampler state) + SCP_unordered_map m_samplerCache; + vk::Sampler m_defaultSampler; + + // Fallback 1x1 white textures for unbound texture slots + vk::Image m_fallback2DArrayTexture; + vk::ImageView m_fallback2DArrayView; // 2D_ARRAY view (for material texture arrays) + VulkanAllocation m_fallback2DArrayAllocation; + + vk::Image m_fallbackTexture2D; + vk::ImageView m_fallbackTextureView2D; // 2D view (for post-processing sampler2D) + VulkanAllocation m_fallbackTexture2DAllocation; + + // Fallback 1x1x6 white cubemap for unbound samplerCube slots + vk::Image m_fallbackCubeTexture; + vk::ImageView m_fallbackCubeView; // Cube view (for samplerCube) + VulkanAllocation m_fallbackCubeAllocation; + + // Fallback 1x1x1 white 3D texture for unbound sampler3D slots + vk::Image m_fallback3DTexture; + vk::ImageView m_fallback3DView; // 3D view (for sampler3D) + VulkanAllocation m_fallback3DAllocation; + + // Device limits + uint32_t m_maxTextureSize = 4096; + float m_maxAnisotropy = 1.0f; + + // Current render target state + int m_currentRenderTarget = -1; + + bool m_initialized = false; +}; + +// Global texture manager instance +VulkanTextureManager* getTextureManager(); +void setTextureManager(VulkanTextureManager* manager); + +/** + * @brief Generate mip levels 1..mipLevels-1 via blit chain from the previous level. + * + * Prerequisite: mip 0 must already be in eTransferSrcOptimal. + * Result: ALL mip levels transitioned to eShaderReadOnlyOptimal. + */ +void vulkan_generate_mipmap_chain(vk::CommandBuffer cmd, vk::Image image, + uint32_t width, uint32_t height, + uint32_t mipLevels, uint32_t arrayLayers = 1); + +// ========== gr_screen function pointer implementations ========== + +int vulkan_preload(int bitmap_num, int is_aabitmap); +void vulkan_bm_create(bitmap_slot* slot); +void vulkan_bm_free_data(bitmap_slot* slot, bool release); +void vulkan_bm_init(bitmap_slot* slot); +bool vulkan_bm_data(int handle, bitmap* bm); +void vulkan_bm_page_in_start(); +int vulkan_bm_make_render_target(int handle, int* width, int* height, int* bpp, int* mm_lvl, int flags); +int vulkan_bm_set_render_target(int handle, int face); +void vulkan_update_texture(int bitmap_handle, int bpp, const ubyte* data, int width, int height); +void vulkan_get_bitmap_from_texture(void* data_out, int bitmap_num); + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanVertexFormat.cpp b/code/graphics/vulkan/VulkanVertexFormat.cpp new file mode 100644 index 00000000000..714000b41b0 --- /dev/null +++ b/code/graphics/vulkan/VulkanVertexFormat.cpp @@ -0,0 +1,184 @@ +#include "VulkanVertexFormat.h" + +namespace graphics { +namespace vulkan { + +// Vertex format mapping table +// Maps FSO vertex_format_data::vertex_format to Vulkan formats +// Based on GL_array_binding_data in gropengltnl.cpp +const VertexFormatMapping VERTEX_FORMAT_MAPPINGS[] = { + // Position formats + { vertex_format_data::POSITION4, vk::Format::eR32G32B32A32Sfloat, VertexAttributeLocation::Position, 4, 16 }, + { vertex_format_data::POSITION3, vk::Format::eR32G32B32Sfloat, VertexAttributeLocation::Position, 3, 12 }, + { vertex_format_data::POSITION2, vk::Format::eR32G32Sfloat, VertexAttributeLocation::Position, 2, 8 }, + + // Color formats + { vertex_format_data::COLOR3, vk::Format::eR8G8B8Unorm, VertexAttributeLocation::Color, 3, 3 }, + { vertex_format_data::COLOR4, vk::Format::eR8G8B8A8Unorm, VertexAttributeLocation::Color, 4, 4 }, + { vertex_format_data::COLOR4F, vk::Format::eR32G32B32A32Sfloat, VertexAttributeLocation::Color, 4, 16 }, + + // Texture coordinate formats + { vertex_format_data::TEX_COORD2, vk::Format::eR32G32Sfloat, VertexAttributeLocation::TexCoord, 2, 8 }, + { vertex_format_data::TEX_COORD4, vk::Format::eR32G32B32A32Sfloat, VertexAttributeLocation::TexCoord, 4, 16 }, + + // Normal/tangent formats + { vertex_format_data::NORMAL, vk::Format::eR32G32B32Sfloat, VertexAttributeLocation::Normal, 3, 12 }, + { vertex_format_data::TANGENT, vk::Format::eR32G32B32A32Sfloat, VertexAttributeLocation::Tangent, 4, 16 }, + + // Instance/particle formats + { vertex_format_data::MODEL_ID, vk::Format::eR32Sfloat, VertexAttributeLocation::ModelId, 1, 4 }, + { vertex_format_data::RADIUS, vk::Format::eR32Sfloat, VertexAttributeLocation::Radius, 1, 4 }, + { vertex_format_data::UVEC, vk::Format::eR32G32B32Sfloat, VertexAttributeLocation::Uvec, 3, 12 }, + + // Matrix format (mat4 = 4 vec4s, uses locations 8-11) + { vertex_format_data::MATRIX4, vk::Format::eR32G32B32A32Sfloat, VertexAttributeLocation::ModelMatrix, 16, 64 }, +}; + +const size_t VERTEX_FORMAT_MAPPINGS_COUNT = sizeof(VERTEX_FORMAT_MAPPINGS) / sizeof(VERTEX_FORMAT_MAPPINGS[0]); + +const VertexFormatMapping* getVertexFormatMapping(vertex_format_data::vertex_format format) +{ + for (size_t i = 0; i < VERTEX_FORMAT_MAPPINGS_COUNT; ++i) { + if (VERTEX_FORMAT_MAPPINGS[i].format == format) { + return &VERTEX_FORMAT_MAPPINGS[i]; + } + } + return nullptr; +} + +void VertexInputConfig::updatePointers() +{ + createInfo.vertexBindingDescriptionCount = static_cast(bindings.size()); + createInfo.pVertexBindingDescriptions = bindings.empty() ? nullptr : bindings.data(); + createInfo.vertexAttributeDescriptionCount = static_cast(attributes.size()); + createInfo.pVertexAttributeDescriptions = attributes.empty() ? nullptr : attributes.data(); +} + +const VertexInputConfig& VulkanVertexFormatCache::getVertexInputConfig(const vertex_layout& layout) +{ + size_t hash = layout.hash(); + + auto it = m_cache.find(hash); + if (it != m_cache.end()) { + return it->second; + } + + // Create new configuration + auto result = m_cache.emplace(hash, createVertexInputConfig(layout)); + return result.first->second; +} + +void VulkanVertexFormatCache::clear() +{ + m_cache.clear(); +} + +VertexInputConfig VulkanVertexFormatCache::createVertexInputConfig(const vertex_layout& layout) +{ + VertexInputConfig config; + + // Track which bindings we've already added + SCP_unordered_map bufferBindings; // buffer_number -> binding index + + size_t numComponents = layout.get_num_vertex_components(); + + for (size_t i = 0; i < numComponents; ++i) { + const vertex_format_data* component = layout.get_vertex_component(i); + const VertexFormatMapping* mapping = getVertexFormatMapping(component->format_type); + + if (!mapping) { + mprintf(("VulkanVertexFormat: Unknown vertex format %d\n", static_cast(component->format_type))); + continue; + } + + // Track which locations the layout natively provides + uint32_t loc = static_cast(mapping->location); + config.providedInputMask |= (1u << loc); + + // Get or create binding for this buffer + uint32_t bindingIndex; + auto bindingIt = bufferBindings.find(component->buffer_number); + if (bindingIt == bufferBindings.end()) { + bindingIndex = static_cast(config.bindings.size()); + bufferBindings[component->buffer_number] = bindingIndex; + + vk::VertexInputBindingDescription binding; + binding.binding = bindingIndex; + binding.stride = static_cast(component->stride); + binding.inputRate = (component->divisor > 0) ? + vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; + config.bindings.push_back(binding); + } else { + bindingIndex = bindingIt->second; + } + + // Handle MATRIX4 specially - it needs 4 attribute locations + if (component->format_type == vertex_format_data::MATRIX4) { + // mat4 requires 4 vec4 attributes at consecutive locations + for (uint32_t row = 0; row < 4; ++row) { + vk::VertexInputAttributeDescription attr; + attr.location = static_cast(mapping->location) + row; + attr.binding = bindingIndex; + attr.format = vk::Format::eR32G32B32A32Sfloat; + attr.offset = static_cast(component->offset) + (row * 16); + config.attributes.push_back(attr); + } + // Mark all 4 matrix locations as provided + config.providedInputMask |= (1u << (loc + 1)) | (1u << (loc + 2)) | (1u << (loc + 3)); + } else { + vk::VertexInputAttributeDescription attr; + attr.location = static_cast(mapping->location); + attr.binding = bindingIndex; + attr.format = mapping->vkFormat; + attr.offset = static_cast(component->offset); + config.attributes.push_back(attr); + } + } + + // Only add fallback bindings when the layout has actual vertex components. + // Empty layouts (e.g. fullscreen triangles) generate vertices in the shader + // and don't need any vertex input bindings. + uint32_t colorBit = 1u << static_cast(VertexAttributeLocation::Color); + if (!(config.providedInputMask & colorBit) && numComponents > 0) { + // Add binding for fallback color buffer (instanced so one value applies to all vertices) + vk::VertexInputBindingDescription colorBinding; + colorBinding.binding = FALLBACK_COLOR_BINDING; + colorBinding.stride = 16; // vec4 = 16 bytes + colorBinding.inputRate = vk::VertexInputRate::eInstance; // Same color for all vertices + config.bindings.push_back(colorBinding); + + vk::VertexInputAttributeDescription colorAttr; + colorAttr.location = static_cast(VertexAttributeLocation::Color); + colorAttr.binding = FALLBACK_COLOR_BINDING; + colorAttr.format = vk::Format::eR32G32B32A32Sfloat; + colorAttr.offset = 0; + config.attributes.push_back(colorAttr); + } + + // If no texcoord attribute, add a fallback providing (0,0,0,0) + // In OpenGL, missing vertex attributes default to (0,0,0,1); Vulkan requires explicit input + uint32_t texCoordBit = 1u << static_cast(VertexAttributeLocation::TexCoord); + if (!(config.providedInputMask & texCoordBit) && numComponents > 0) { + // Add binding for fallback texcoord buffer (instanced so one value applies to all vertices) + vk::VertexInputBindingDescription texCoordBinding; + texCoordBinding.binding = FALLBACK_TEXCOORD_BINDING; + texCoordBinding.stride = 16; // vec4 = 16 bytes + texCoordBinding.inputRate = vk::VertexInputRate::eInstance; + config.bindings.push_back(texCoordBinding); + + vk::VertexInputAttributeDescription texCoordAttr; + texCoordAttr.location = static_cast(VertexAttributeLocation::TexCoord); + texCoordAttr.binding = FALLBACK_TEXCOORD_BINDING; + texCoordAttr.format = vk::Format::eR32G32B32A32Sfloat; + texCoordAttr.offset = 0; + config.attributes.push_back(texCoordAttr); + } + + // Update the createInfo pointers + config.updatePointers(); + + return config; +} + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/VulkanVertexFormat.h b/code/graphics/vulkan/VulkanVertexFormat.h new file mode 100644 index 00000000000..359825e7490 --- /dev/null +++ b/code/graphics/vulkan/VulkanVertexFormat.h @@ -0,0 +1,115 @@ +#pragma once + +#include "globalincs/pstypes.h" +#include "graphics/2d.h" + +#include + +namespace graphics { +namespace vulkan { + +/** + * @brief Vertex attribute locations matching GLSL shader expectations + * + * These must match the layout(location = N) declarations in SPIR-V shaders. + * Based on opengl_vert_attrib::attrib_id from gropenglshader.h + */ +enum class VertexAttributeLocation : uint32_t { + Position = 0, // vec2, vec3, or vec4 + Color = 1, // vec3/vec4 (normalized u8 or float) + TexCoord = 2, // vec2 or vec4 + Normal = 3, // vec3 + Tangent = 4, // vec4 + ModelId = 5, // float + Radius = 6, // float + Uvec = 7, // vec3 + ModelMatrix = 8, // mat4 (uses locations 8-11) +}; + +/** + * @brief Mapping from FSO vertex_format to Vulkan format and location + */ +struct VertexFormatMapping { + vertex_format_data::vertex_format format; + vk::Format vkFormat; + VertexAttributeLocation location; + uint32_t componentCount; + uint32_t sizeInBytes; +}; + +/** + * @brief Get the Vulkan format mapping for a given vertex format + * @param format The FSO vertex format type + * @return Pointer to mapping info, or nullptr if not found + */ +const VertexFormatMapping* getVertexFormatMapping(vertex_format_data::vertex_format format); + +// Reserved binding indices for fallback buffers when vertex data is missing attributes +static constexpr uint32_t FALLBACK_COLOR_BINDING = 15; +static constexpr uint32_t FALLBACK_TEXCOORD_BINDING = 14; + +/** + * @brief Cached vertex input configuration + */ +struct VertexInputConfig { + SCP_vector bindings; + SCP_vector attributes; + vk::PipelineVertexInputStateCreateInfo createInfo; + + // Bitmask of vertex input locations natively provided by the layout (bit N = location N). + // Does NOT include fallback attributes. Compare with shader's vertexInputMask to + // determine which fallbacks are actually needed: shaderMask & ~providedInputMask. + uint32_t providedInputMask = 0; + + // Update createInfo pointers after vector modifications + void updatePointers(); +}; + +/** + * @brief Manages vertex format to Vulkan vertex input state conversion + * + * Converts FSO vertex_layout objects to Vulkan VkPipelineVertexInputStateCreateInfo. + * Caches configurations to avoid repeated conversions. + */ +class VulkanVertexFormatCache { +public: + VulkanVertexFormatCache() = default; + ~VulkanVertexFormatCache() = default; + + // Non-copyable + VulkanVertexFormatCache(const VulkanVertexFormatCache&) = delete; + VulkanVertexFormatCache& operator=(const VulkanVertexFormatCache&) = delete; + + /** + * @brief Get Vulkan vertex input state for a given layout + * @param layout The FSO vertex layout + * @return Reference to cached vertex input configuration + */ + const VertexInputConfig& getVertexInputConfig(const vertex_layout& layout); + + /** + * @brief Clear all cached configurations + */ + void clear(); + + /** + * @brief Get number of cached configurations + */ + size_t getCacheSize() const { return m_cache.size(); } + +private: + /** + * @brief Create a new vertex input configuration for a layout + */ + VertexInputConfig createVertexInputConfig(const vertex_layout& layout); + + // Cache: layout hash -> vertex input config + SCP_unordered_map m_cache; +}; + +// Global vertex format mapping table +extern const VertexFormatMapping VERTEX_FORMAT_MAPPINGS[]; +extern const size_t VERTEX_FORMAT_MAPPINGS_COUNT; + +} // namespace vulkan +} // namespace graphics diff --git a/code/graphics/vulkan/gr_vulkan.cpp b/code/graphics/vulkan/gr_vulkan.cpp index 833fccd152f..6b87dd28f4e 100644 --- a/code/graphics/vulkan/gr_vulkan.cpp +++ b/code/graphics/vulkan/gr_vulkan.cpp @@ -1,22 +1,476 @@ #include "gr_vulkan.h" - #include "VulkanRenderer.h" -#include "vulkan_stubs.h" +#include "VulkanBuffer.h" +#include "VulkanTexture.h" +#include "VulkanShader.h" +#include "VulkanDescriptorManager.h" +#include "VulkanPipeline.h" +#include "VulkanQuery.h" +#include "VulkanState.h" +#include "VulkanDraw.h" +#include "VulkanDeferred.h" +#include "VulkanPostProcessing.h" #include "backends/imgui_impl_sdl.h" #include "backends/imgui_impl_vulkan.h" -#include "mod_table/mod_table.h" +#include "osapi/osapi.h" + +#include "bmpman/bmpman.h" +#include "cfile/cfile.h" +#include "cmdline/cmdline.h" +#include "graphics/2d.h" +#include "graphics/matrix.h" +#include "graphics/material.h" +#include "graphics/post_processing.h" +#include "graphics/grinternal.h" +#include "lighting/lighting.h" +#include "pngutils/pngutils.h" namespace graphics { namespace vulkan { namespace { + std::unique_ptr renderer_instance; + +// Sync object for tracking frame completion +struct VulkanSyncObject { + uint64_t frameNumber; +}; + +// ========== Renderer-level functions ========== + +void vulkan_setup_frame() +{ + auto* renderer = getRendererInstance(); + renderer->setupFrame(); +} + +void vulkan_flip() +{ + renderer_instance->flip(); +} + +bool vulkan_is_capable(gr_capability capability) +{ + switch (capability) { + case gr_capability::CAPABILITY_ENVIRONMENT_MAP: + return true; + case gr_capability::CAPABILITY_NORMAL_MAP: + return Cmdline_normal != 0; + case gr_capability::CAPABILITY_HEIGHT_MAP: + return Cmdline_height != 0; + case gr_capability::CAPABILITY_SOFT_PARTICLES: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_DISTORTION: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_POST_PROCESSING: + return Gr_post_processing_enabled; + case gr_capability::CAPABILITY_DEFERRED_LIGHTING: + return light_deferred_enabled(); + case gr_capability::CAPABILITY_SHADOWS: + return getRendererInstance()->supportsShaderViewportLayerOutput(); + case gr_capability::CAPABILITY_THICK_OUTLINE: + return false; + case gr_capability::CAPABILITY_BATCHED_SUBMODELS: + return true; + case gr_capability::CAPABILITY_TIMESTAMP_QUERY: + return getQueryManager() != nullptr; + case gr_capability::CAPABILITY_SEPARATE_BLEND_FUNCTIONS: + // Vulkan supports per-attachment blend by spec + return true; + case gr_capability::CAPABILITY_PERSISTENT_BUFFER_MAPPING: + // Vulkan has persistently mappable host-visible memory + return true; + case gr_capability::CAPABILITY_BPTC: + return getRendererInstance()->isTextureCompressionBCSupported(); + case gr_capability::CAPABILITY_LARGE_SHADER: + // Always true for Vulkan: we use pre-compiled SPIR-V uber-shaders with + // runtime branching on modelData.flags. The variant approach would require + // compiling exponentially many SPIR-V permutations. Unbound texture slots + // are handled via fallback descriptors, so there's no driver issue. + return true; + case gr_capability::CAPABILITY_INSTANCED_RENDERING: + return true; + case gr_capability::CAPABILITY_QUERIES_REUSABLE: + // Vulkan queries require explicit reset between read and write. + // The backend manages this lifecycle internally via deleteQueryObject. + return false; + } + return false; +} + +bool vulkan_get_property(gr_property prop, void* dest) +{ + auto* renderer = getRendererInstance(); + + switch (prop) { + case gr_property::UNIFORM_BUFFER_OFFSET_ALIGNMENT: + *reinterpret_cast(dest) = static_cast(renderer->getMinUniformBufferOffsetAlignment()); + return true; + case gr_property::UNIFORM_BUFFER_MAX_SIZE: + *reinterpret_cast(dest) = static_cast(renderer->getMaxUniformBufferSize()); + return true; + case gr_property::MAX_ANISOTROPY: + *reinterpret_cast(dest) = renderer->getMaxAnisotropy(); + return true; + default: + return false; + } +} + +void vulkan_push_debug_group(const char* name) +{ + auto* renderer = getRendererInstance(); + if (!renderer->isDebugUtilsEnabled()) { + return; + } + + auto* stateTracker = getStateTracker(); + + vk::DebugUtilsLabelEXT label; + label.pLabelName = name; + label.color = {{ 1.0f, 1.0f, 1.0f, 1.0f }}; + stateTracker->getCommandBuffer().beginDebugUtilsLabelEXT(label); +} + +void vulkan_pop_debug_group() +{ + auto* renderer = getRendererInstance(); + if (!renderer->isDebugUtilsEnabled()) { + return; + } + + auto* stateTracker = getStateTracker(); + stateTracker->getCommandBuffer().endDebugUtilsLabelEXT(); +} + +void vulkan_imgui_new_frame() +{ + ImGui_ImplVulkan_NewFrame(); +} + +void vulkan_imgui_render_draw_data() +{ + auto* renderer = getRendererInstance(); + if (renderer) { + ImGui_ImplVulkan_RenderDrawData(ImGui::GetDrawData(), renderer->getVkCurrentCommandBuffer()); + } +} + +gr_sync vulkan_sync_fence() +{ + auto* renderer = getRendererInstance(); + auto* sync = new VulkanSyncObject(); + sync->frameNumber = renderer->getCurrentFrameNumber(); + return static_cast(sync); +} + +bool vulkan_sync_wait(gr_sync sync, uint64_t /*timeoutns*/) +{ + if (!sync) { + return true; + } + + auto* renderer = getRendererInstance(); + auto* syncObj = static_cast(sync); + + // Wait on the specific frame's fence (no-op if already complete) + renderer->waitForFrame(syncObj->frameNumber); + return true; +} + +void vulkan_sync_delete(gr_sync sync) +{ + if (sync) { + delete static_cast(sync); + } } +// ========== Screen capture (save/restore, screenshots) ========== + +static ubyte* Vulkan_saved_screen = nullptr; +static int Vulkan_saved_screen_id = -1; + +int vulkan_save_screen() +{ + if (Vulkan_saved_screen) { + // Already have a saved screen + return -1; + } + + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return -1; + } + + int bmpId = bm_create(32, static_cast(w), static_cast(h), pixels, 0); + if (bmpId < 0) { + vm_free(pixels); + return -1; + } + + Vulkan_saved_screen = pixels; + Vulkan_saved_screen_id = bmpId; + return Vulkan_saved_screen_id; +} + +void vulkan_restore_screen(int bmp_id) +{ + gr_reset_clip(); + + if (!Vulkan_saved_screen) { + gr_clear(); + return; + } + + Assert((bmp_id < 0) || (bmp_id == Vulkan_saved_screen_id)); + + if (Vulkan_saved_screen_id < 0) { + return; + } + + gr_set_bitmap(Vulkan_saved_screen_id); + gr_bitmap(0, 0, GR_RESIZE_NONE); +} + +void vulkan_free_screen(int bmp_id) +{ + if (!Vulkan_saved_screen) { + return; + } + + vm_free(Vulkan_saved_screen); + Vulkan_saved_screen = nullptr; + + Assert((bmp_id < 0) || (bmp_id == Vulkan_saved_screen_id)); + + if (Vulkan_saved_screen_id >= 0) { + bm_release(Vulkan_saved_screen_id); + Vulkan_saved_screen_id = -1; + } +} + +// Swizzle BGRA→RGBA in-place for PNG output (swap chain is B8G8R8A8) +static void swizzle_bgra_to_rgba(ubyte* pixels, size_t pixelCount) +{ + for (size_t i = 0; i < pixelCount; i++) { + size_t off = i * 4; + std::swap(pixels[off + 0], pixels[off + 2]); + } +} + +void vulkan_print_screen(const char* filename) +{ + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return; + } + + swizzle_bgra_to_rgba(pixels, static_cast(w) * h); + + char tmp[MAX_PATH_LEN]; + snprintf(tmp, MAX_PATH_LEN - 1, "screenshots/%s.png", filename); + + _mkdir(os_get_config_path("screenshots").c_str()); + + if (!png_write_bitmap(os_get_config_path(tmp).c_str(), w, h, false, pixels)) { + ReleaseWarning(LOCATION, "Failed to write screenshot to \"%s\".", os_get_config_path(tmp).c_str()); + } + + vm_free(pixels); +} + +SCP_string vulkan_blob_screen() +{ + ubyte* pixels = nullptr; + uint32_t w, h; + if (!renderer_instance->readbackFramebuffer(&pixels, &w, &h)) { + return ""; + } + + swizzle_bgra_to_rgba(pixels, static_cast(w) * h); + + SCP_string result = png_b64_bitmap(w, h, false, pixels); + + vm_free(pixels); + + return "data:image/png;base64," + result; +} + +// get_region: intentional no-op. The only caller is neb2_pre_render() in +// NEB2_RENDER_POF mode, which renders a 32x32 background thumbnail into a +// CPU buffer that is never actually read — the pixel data, ex_scale, and +// ey_scale it computes have no consumers. Modern nebula rendering uses +// NEB2_RENDER_HTL (fog color + gr_clear) and doesn't need get_region at all. +void vulkan_get_region(int /*front*/, int /*w*/, int /*h*/, ubyte* /*data*/) {} + +void stub_dump_envmap(const char* /*filename*/) {} + +std::unique_ptr stub_create_viewport(const os::ViewPortProperties& /*props*/) +{ + return std::unique_ptr(); +} +void stub_use_viewport(os::Viewport* /*view*/) {} +SCP_vector stub_openxr_get_extensions() { return {}; } +bool stub_openxr_test_capabilities() { return false; } +bool stub_openxr_create_session() { return false; } +int64_t stub_openxr_get_swapchain_format(const SCP_vector& /*allowed*/) { return 0; } +bool stub_openxr_acquire_swapchain_buffers() { return false; } +bool stub_openxr_flip() { return false; } + +// ========== Function pointer table ========== +// Implementations are defined in their respective files: +// VulkanDraw.cpp, VulkanBuffer.cpp, VulkanTexture.cpp, VulkanShader.cpp, VulkanState.cpp + +void init_function_pointers() +{ + // function pointers... + gr_screen.gf_setup_frame = vulkan_setup_frame; + gr_screen.gf_set_clip = vulkan_set_clip; + gr_screen.gf_reset_clip = vulkan_reset_clip; + + gr_screen.gf_clear = vulkan_clear; + + gr_screen.gf_print_screen = vulkan_print_screen; + gr_screen.gf_blob_screen = vulkan_blob_screen; + + gr_screen.gf_zbuffer_get = vulkan_zbuffer_get; + gr_screen.gf_zbuffer_set = vulkan_zbuffer_set; + gr_screen.gf_zbuffer_clear = vulkan_zbuffer_clear; + + gr_screen.gf_stencil_set = vulkan_stencil_set; + gr_screen.gf_stencil_clear = vulkan_stencil_clear; + + gr_screen.gf_alpha_mask_set = vulkan_alpha_mask_set; + + gr_screen.gf_save_screen = vulkan_save_screen; + gr_screen.gf_restore_screen = vulkan_restore_screen; + gr_screen.gf_free_screen = vulkan_free_screen; + + gr_screen.gf_get_region = vulkan_get_region; + + // now for the bitmap functions + gr_screen.gf_bm_free_data = vulkan_bm_free_data; + gr_screen.gf_bm_create = vulkan_bm_create; + gr_screen.gf_bm_init = vulkan_bm_init; + gr_screen.gf_bm_page_in_start = vulkan_bm_page_in_start; + gr_screen.gf_bm_data = vulkan_bm_data; + gr_screen.gf_bm_make_render_target = vulkan_bm_make_render_target; + gr_screen.gf_bm_set_render_target = vulkan_bm_set_render_target; + + gr_screen.gf_set_cull = vulkan_set_cull; + gr_screen.gf_set_color_buffer = vulkan_set_color_buffer; + + gr_screen.gf_set_clear_color = vulkan_set_clear_color; + + gr_screen.gf_preload = vulkan_preload; + + gr_screen.gf_set_texture_addressing = vulkan_set_texture_addressing; + gr_screen.gf_zbias = vulkan_zbias; + gr_screen.gf_set_fill_mode = vulkan_set_fill_mode; + + gr_screen.gf_create_buffer = vulkan_create_buffer; + gr_screen.gf_delete_buffer = vulkan_delete_buffer; + + gr_screen.gf_update_transform_buffer = vulkan_update_transform_buffer; + gr_screen.gf_update_buffer_data = vulkan_update_buffer_data; + gr_screen.gf_update_buffer_data_offset = vulkan_update_buffer_data_offset; + gr_screen.gf_map_buffer = vulkan_map_buffer; + gr_screen.gf_flush_mapped_buffer = vulkan_flush_mapped_buffer; + + gr_screen.gf_post_process_set_effect = vulkan_post_process_set_effect; + gr_screen.gf_post_process_set_defaults = vulkan_post_process_set_defaults; + + gr_screen.gf_post_process_begin = vulkan_post_process_begin; + gr_screen.gf_post_process_end = vulkan_post_process_end; + gr_screen.gf_post_process_save_zbuffer = vulkan_post_process_save_zbuffer; + gr_screen.gf_post_process_restore_zbuffer = vulkan_post_process_restore_zbuffer; + + gr_screen.gf_scene_texture_begin = vulkan_scene_texture_begin; + gr_screen.gf_scene_texture_end = vulkan_scene_texture_end; + gr_screen.gf_copy_effect_texture = vulkan_copy_effect_texture; + + gr_screen.gf_deferred_lighting_begin = vulkan_deferred_lighting_begin; + gr_screen.gf_deferred_lighting_msaa = vulkan_deferred_lighting_msaa; + gr_screen.gf_deferred_lighting_end = vulkan_deferred_lighting_end; + gr_screen.gf_deferred_lighting_finish = vulkan_deferred_lighting_finish; + + gr_screen.gf_calculate_irrmap = vulkan_calculate_irrmap; + gr_screen.gf_dump_envmap = stub_dump_envmap; + gr_screen.gf_override_fog = vulkan_override_fog; + + gr_screen.gf_imgui_new_frame = vulkan_imgui_new_frame; + gr_screen.gf_imgui_render_draw_data = vulkan_imgui_render_draw_data; + + gr_screen.gf_set_line_width = vulkan_set_line_width; + + gr_screen.gf_sphere = vulkan_draw_sphere; + + gr_screen.gf_shadow_map_start = vulkan_shadow_map_start; + gr_screen.gf_shadow_map_end = vulkan_shadow_map_end; + + gr_screen.gf_start_decal_pass = vulkan_start_decal_pass; + gr_screen.gf_stop_decal_pass = vulkan_stop_decal_pass; + gr_screen.gf_render_decals = vulkan_render_decals; + + gr_screen.gf_render_shield_impact = vulkan_render_shield_impact; + + gr_screen.gf_maybe_create_shader = vulkan_maybe_create_shader; + gr_screen.gf_recompile_all_shaders = vulkan_recompile_all_shaders; + + gr_screen.gf_clear_states = vulkan_clear_states; + + gr_screen.gf_update_texture = vulkan_update_texture; + gr_screen.gf_get_bitmap_from_texture = vulkan_get_bitmap_from_texture; + + gr_screen.gf_render_model = vulkan_render_model; + gr_screen.gf_render_primitives = vulkan_render_primitives; + gr_screen.gf_render_primitives_particle = vulkan_render_primitives_particle; + gr_screen.gf_render_primitives_distortion = vulkan_render_primitives_distortion; + gr_screen.gf_render_movie = vulkan_render_movie; + gr_screen.gf_render_nanovg = vulkan_render_nanovg; + gr_screen.gf_render_primitives_batched = vulkan_render_primitives_batched; + gr_screen.gf_render_rocket_primitives = vulkan_render_rocket_primitives; + + gr_screen.gf_is_capable = vulkan_is_capable; + gr_screen.gf_get_property = vulkan_get_property; + + gr_screen.gf_push_debug_group = vulkan_push_debug_group; + gr_screen.gf_pop_debug_group = vulkan_pop_debug_group; + + gr_screen.gf_create_query_object = vulkan_create_query_object; + gr_screen.gf_query_value = vulkan_query_value; + gr_screen.gf_query_value_available = vulkan_query_value_available; + gr_screen.gf_get_query_value = vulkan_get_query_value; + gr_screen.gf_delete_query_object = vulkan_delete_query_object; + + gr_screen.gf_create_viewport = stub_create_viewport; + gr_screen.gf_use_viewport = stub_use_viewport; + + gr_screen.gf_bind_uniform_buffer = vulkan_bind_uniform_buffer; + + gr_screen.gf_sync_fence = vulkan_sync_fence; + gr_screen.gf_sync_wait = vulkan_sync_wait; + gr_screen.gf_sync_delete = vulkan_sync_delete; + + gr_screen.gf_set_viewport = vulkan_set_viewport; + + gr_screen.gf_openxr_get_extensions = stub_openxr_get_extensions; + gr_screen.gf_openxr_test_capabilities = stub_openxr_test_capabilities; + gr_screen.gf_openxr_create_session = stub_openxr_create_session; + gr_screen.gf_openxr_get_swapchain_format = stub_openxr_get_swapchain_format; + gr_screen.gf_openxr_acquire_swapchain_buffers = stub_openxr_acquire_swapchain_buffers; + gr_screen.gf_openxr_flip = stub_openxr_flip; +} + +} // anonymous namespace + void initialize_function_pointers() { - init_stub_pointers(); + init_function_pointers(); } bool initialize(std::unique_ptr&& graphicsOps) @@ -26,12 +480,27 @@ bool initialize(std::unique_ptr&& graphicsOps) return false; } - gr_screen.gf_flip = []() { - renderer_instance->flip(); - }; + // Initialize ImGui SDL2 backend for input handling. + // The Vulkan rendering backend (ImGui_ImplVulkan) is initialized + // inside VulkanRenderer::initImGui() after all Vulkan objects are ready. + SDL_Window* window = os::getSDLMainWindow(); + if (window) { + ImGui_ImplSDL2_InitForVulkan(window); + } + + gr_screen.gf_flip = vulkan_flip; + + // Initialize matrices and viewport (matching OpenGL backend initialization) + gr_reset_matrices(); + gr_setup_viewport(); + + // Start first frame so a command buffer is active before the first draw calls. + // The engine draws the title screen during game_init(), before the main loop's + // first gr_flip() → setupFrame(). Without this, any gr_clear/gr_bitmap before + // the first flip would hit a null command buffer. Matches OpenGL init behavior. + gr_setup_frame(); - // Nothing else is finished so always fail here - mprintf(("Vulkan support is not finished yet so graphics initialization will always fail...\n")); + mprintf(("Vulkan: Initialization complete\n")); return true; } diff --git a/code/graphics/vulkan/vulkan_stubs.cpp b/code/graphics/vulkan/vulkan_stubs.cpp deleted file mode 100644 index a757ed553ef..00000000000 --- a/code/graphics/vulkan/vulkan_stubs.cpp +++ /dev/null @@ -1,395 +0,0 @@ -#include "vulkan_stubs.h" - -#include "graphics/2d.h" - -#define BMPMAN_INTERNAL -#include "bmpman/bm_internal.h" - -namespace graphics { -namespace vulkan { - -namespace { - -gr_buffer_handle stub_create_buffer(BufferType, BufferUsageHint) -{ - return gr_buffer_handle::invalid(); -} - -void stub_setup_frame() {} - -void stub_delete_buffer(gr_buffer_handle /*handle*/) {} - -int stub_preload(int /*bitmap_num*/, int /*is_aabitmap*/) { return 0; } - -int stub_save_screen() { return 1; } - -int stub_zbuffer_get() { return 0; } - -int stub_zbuffer_set(int /*mode*/) { return 0; } - -void gr_set_fill_mode_stub(int /*mode*/) {} - -void stub_clear() {} - -void stub_free_screen(int /*id*/) {} - -void stub_get_region(int /*front*/, int /*w*/, int /*h*/, ubyte* /*data*/) {} - -void stub_print_screen(const char* /*filename*/) {} - -SCP_string stub_blob_screen() { return ""; } - -void stub_reset_clip() {} - -void stub_restore_screen(int /*id*/) {} - -void stub_update_buffer_data(gr_buffer_handle /*handle*/, size_t /*size*/, const void* /*data*/) {} - -void stub_update_buffer_data_offset(gr_buffer_handle /*handle*/, - size_t /*offset*/, - size_t /*size*/, - const void* /*data*/) -{ -} - -void stub_update_transform_buffer(void* /*data*/, size_t /*size*/) {} - -void stub_set_clear_color(int /*r*/, int /*g*/, int /*b*/) {} - -void stub_set_clip(int /*x*/, int /*y*/, int /*w*/, int /*h*/, int /*resize_mode*/) {} - -int stub_set_cull(int /*cull*/) { return 0; } - -int stub_set_color_buffer(int /*mode*/) { return 0; } - -void stub_set_texture_addressing(int /*mode*/) {} - -void stub_zbias_stub(int /*bias*/) {} - -void stub_zbuffer_clear(int /*mode*/) {} - -int stub_stencil_set(int /*mode*/) { return 0; } - -void stub_stencil_clear() {} - -int stub_alpha_mask_set(int /*mode*/, float /*alpha*/) { return 0; } - -void stub_post_process_set_effect(const char* /*name*/, int /*x*/, const vec3d* /*rgb*/) {} - -void stub_post_process_set_defaults() {} - -void stub_post_process_save_zbuffer() {} - -void stub_post_process_begin() {} - -void stub_post_process_end() {} - -void stub_scene_texture_begin() {} - -void stub_scene_texture_end() {} - -void stub_copy_effect_texture() {} - -void stub_deferred_lighting_begin(bool /*clearNonColorBufs*/) {} - -void stub_deferred_lighting_msaa() {} - -void stub_deferred_lighting_end() {} - -void stub_deferred_lighting_finish() {} - -void stub_set_line_width(float /*width*/) {} - -void stub_draw_sphere(material* /*material_def*/, float /*rad*/) {} - -void stub_clear_states() {} - -void stub_update_texture(int /*bitmap_handle*/, int /*bpp*/, const ubyte* /*data*/, int /*width*/, int /*height*/) {} - -void stub_get_bitmap_from_texture(void* /*data_out*/, int /*bitmap_num*/) {} - -int stub_bm_make_render_target(int /*n*/, int* /*width*/, int* /*height*/, int* /*bpp*/, int* /*mm_lvl*/, int /*flags*/) -{ - return 0; -} - -int stub_bm_set_render_target(int /*n*/, int /*face*/) { return 0; } - -void stub_bm_create(bitmap_slot* /*slot*/) {} - -void stub_bm_free_data(bitmap_slot* /*slot*/, bool /*release*/) {} - -void stub_bm_init(bitmap_slot* /*slot*/) {} - -void stub_bm_page_in_start() {} - -bool stub_bm_data(int /*n*/, bitmap* /*bm*/) { return true; } - -int stub_maybe_create_shader(shader_type /*shader_t*/, unsigned int /*flags*/) { return -1; } - -void stub_shadow_map_start(matrix4* /*shadow_view_matrix*/, const matrix* /*light_matrix*/, vec3d* /*eye_pos*/) {} - -void stub_shadow_map_end() {} - -void stub_start_decal_pass() {} -void stub_stop_decal_pass() {} -void stub_render_decals(decal_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*num_elements*/, - const indexed_vertex_source& /*buffers*/, - const gr_buffer_handle& /*instance_buffer*/, - int /*num_instances*/) {} - -void stub_render_shield_impact(shield_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - gr_buffer_handle /*buffer_handle*/, - int /*n_verts*/) -{ -} - -void stub_render_model(model_material* /*material_info*/, - indexed_vertex_source* /*vert_source*/, - vertex_buffer* /*bufferp*/, - size_t /*texi*/) -{ -} - -void stub_render_primitives(material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/, - size_t /*buffer_offset*/) -{ -} - -void stub_render_primitives_particle(particle_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_primitives_distortion(distortion_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} -void stub_render_movie(movie_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*n_verts*/, - gr_buffer_handle /*buffer*/, - size_t /*buffer_offset*/) -{ -} - -void stub_render_nanovg(nanovg_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_primitives_batched(batched_bitmap_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*offset*/, - int /*n_verts*/, - gr_buffer_handle /*buffer_handle*/) -{ -} - -void stub_render_rocket_primitives(interface_material* /*material_info*/, - primitive_type /*prim_type*/, - vertex_layout* /*layout*/, - int /*n_indices*/, - gr_buffer_handle /*vertex_buffer*/, - gr_buffer_handle /*index_buffer*/) -{ -} - -bool stub_is_capable(gr_capability /*capability*/) { return false; } -bool stub_get_property(gr_property p, void* dest) -{ - if (p == gr_property::UNIFORM_BUFFER_OFFSET_ALIGNMENT) { - // This is required by the startup code of the uniform buffer manager - *reinterpret_cast(dest) = 4; - return true; - } - return false; -}; - -void stub_push_debug_group(const char*) {} - -void stub_pop_debug_group() {} - -int stub_create_query_object() { return -1; } - -void stub_query_value(int /*obj*/, QueryType /*type*/) {} - -bool stub_query_value_available(int /*obj*/) { return false; } - -std::uint64_t stub_get_query_value(int /*obj*/) { return 0; } - -void stub_delete_query_object(int /*obj*/) {} - -SCP_vector stub_openxr_get_extensions() { return {}; } - -bool stub_openxr_test_capabilities() { return false; } - -bool stub_openxr_create_session() { return false; } - -int64_t stub_openxr_get_swapchain_format(const SCP_vector& /*allowed*/) { return 0; } - -bool stub_openxr_acquire_swapchain_buffers() { return false; } - -bool stub_openxr_flip() { return false; } - -} // namespace - -void init_stub_pointers() -{ - // function pointers... - gr_screen.gf_setup_frame = stub_setup_frame; - gr_screen.gf_set_clip = stub_set_clip; - gr_screen.gf_reset_clip = stub_reset_clip; - - gr_screen.gf_clear = stub_clear; - - gr_screen.gf_print_screen = stub_print_screen; - gr_screen.gf_blob_screen = stub_blob_screen; - - gr_screen.gf_zbuffer_get = stub_zbuffer_get; - gr_screen.gf_zbuffer_set = stub_zbuffer_set; - gr_screen.gf_zbuffer_clear = stub_zbuffer_clear; - - gr_screen.gf_stencil_set = stub_stencil_set; - gr_screen.gf_stencil_clear = stub_stencil_clear; - - gr_screen.gf_alpha_mask_set = stub_alpha_mask_set; - - gr_screen.gf_save_screen = stub_save_screen; - gr_screen.gf_restore_screen = stub_restore_screen; - gr_screen.gf_free_screen = stub_free_screen; - - gr_screen.gf_get_region = stub_get_region; - - // now for the bitmap functions - gr_screen.gf_bm_free_data = stub_bm_free_data; - gr_screen.gf_bm_create = stub_bm_create; - gr_screen.gf_bm_init = stub_bm_init; - gr_screen.gf_bm_page_in_start = stub_bm_page_in_start; - gr_screen.gf_bm_data = stub_bm_data; - gr_screen.gf_bm_make_render_target = stub_bm_make_render_target; - gr_screen.gf_bm_set_render_target = stub_bm_set_render_target; - - gr_screen.gf_set_cull = stub_set_cull; - gr_screen.gf_set_color_buffer = stub_set_color_buffer; - - gr_screen.gf_set_clear_color = stub_set_clear_color; - - gr_screen.gf_preload = stub_preload; - - gr_screen.gf_set_texture_addressing = stub_set_texture_addressing; - gr_screen.gf_zbias = stub_zbias_stub; - gr_screen.gf_set_fill_mode = gr_set_fill_mode_stub; - - gr_screen.gf_create_buffer = stub_create_buffer; - gr_screen.gf_delete_buffer = stub_delete_buffer; - - gr_screen.gf_update_transform_buffer = stub_update_transform_buffer; - gr_screen.gf_update_buffer_data = stub_update_buffer_data; - gr_screen.gf_update_buffer_data_offset = stub_update_buffer_data_offset; - gr_screen.gf_map_buffer = [](gr_buffer_handle) -> void* { return nullptr; }; - gr_screen.gf_flush_mapped_buffer = [](gr_buffer_handle, size_t, size_t) {}; - - gr_screen.gf_post_process_set_effect = stub_post_process_set_effect; - gr_screen.gf_post_process_set_defaults = stub_post_process_set_defaults; - - gr_screen.gf_post_process_begin = stub_post_process_begin; - gr_screen.gf_post_process_end = stub_post_process_end; - gr_screen.gf_post_process_save_zbuffer = stub_post_process_save_zbuffer; - gr_screen.gf_post_process_restore_zbuffer = []() {}; - - gr_screen.gf_scene_texture_begin = stub_scene_texture_begin; - gr_screen.gf_scene_texture_end = stub_scene_texture_end; - gr_screen.gf_copy_effect_texture = stub_copy_effect_texture; - - gr_screen.gf_deferred_lighting_begin = stub_deferred_lighting_begin; - gr_screen.gf_deferred_lighting_msaa = stub_deferred_lighting_msaa; - gr_screen.gf_deferred_lighting_end = stub_deferred_lighting_end; - gr_screen.gf_deferred_lighting_finish = stub_deferred_lighting_finish; - - gr_screen.gf_set_line_width = stub_set_line_width; - - gr_screen.gf_sphere = stub_draw_sphere; - - gr_screen.gf_shadow_map_start = stub_shadow_map_start; - gr_screen.gf_shadow_map_end = stub_shadow_map_end; - - gr_screen.gf_start_decal_pass = stub_start_decal_pass; - gr_screen.gf_stop_decal_pass = stub_stop_decal_pass; - gr_screen.gf_render_decals = stub_render_decals; - - gr_screen.gf_render_shield_impact = stub_render_shield_impact; - - gr_screen.gf_maybe_create_shader = stub_maybe_create_shader; - - gr_screen.gf_clear_states = stub_clear_states; - - gr_screen.gf_update_texture = stub_update_texture; - gr_screen.gf_get_bitmap_from_texture = stub_get_bitmap_from_texture; - - gr_screen.gf_render_model = stub_render_model; - gr_screen.gf_render_primitives = stub_render_primitives; - gr_screen.gf_render_primitives_particle = stub_render_primitives_particle; - gr_screen.gf_render_primitives_distortion = stub_render_primitives_distortion; - gr_screen.gf_render_movie = stub_render_movie; - gr_screen.gf_render_nanovg = stub_render_nanovg; - gr_screen.gf_render_primitives_batched = stub_render_primitives_batched; - gr_screen.gf_render_rocket_primitives = stub_render_rocket_primitives; - - gr_screen.gf_is_capable = stub_is_capable; - gr_screen.gf_get_property = stub_get_property; - - gr_screen.gf_push_debug_group = stub_push_debug_group; - gr_screen.gf_pop_debug_group = stub_pop_debug_group; - - gr_screen.gf_create_query_object = stub_create_query_object; - gr_screen.gf_query_value = stub_query_value; - gr_screen.gf_query_value_available = stub_query_value_available; - gr_screen.gf_get_query_value = stub_get_query_value; - gr_screen.gf_delete_query_object = stub_delete_query_object; - - gr_screen.gf_create_viewport = [](const os::ViewPortProperties&) { return std::unique_ptr(); }; - gr_screen.gf_use_viewport = [](os::Viewport*) {}; - - gr_screen.gf_bind_uniform_buffer = [](uniform_block_type, size_t, size_t, gr_buffer_handle) {}; - - gr_screen.gf_sync_fence = []() -> gr_sync { return nullptr; }; - gr_screen.gf_sync_wait = [](gr_sync /*sync*/, uint64_t /*timeoutns*/) { return true; }; - gr_screen.gf_sync_delete = [](gr_sync /*sync*/) {}; - - gr_screen.gf_set_viewport = [](int /*x*/, int /*y*/, int /*width*/, int /*height*/) {}; - - gr_screen.gf_openxr_get_extensions = stub_openxr_get_extensions; - gr_screen.gf_openxr_test_capabilities = stub_openxr_test_capabilities; - gr_screen.gf_openxr_create_session = stub_openxr_create_session; - gr_screen.gf_openxr_get_swapchain_format = stub_openxr_get_swapchain_format; - gr_screen.gf_openxr_acquire_swapchain_buffers = stub_openxr_acquire_swapchain_buffers; - gr_screen.gf_openxr_flip = stub_openxr_flip; -} - -} // namespace vulkan -} // namespace graphics diff --git a/code/graphics/vulkan/vulkan_stubs.h b/code/graphics/vulkan/vulkan_stubs.h deleted file mode 100644 index caff77f7655..00000000000 --- a/code/graphics/vulkan/vulkan_stubs.h +++ /dev/null @@ -1,8 +0,0 @@ - -namespace graphics { -namespace vulkan { - -void init_stub_pointers(); - -} -} // namespace graphics diff --git a/code/lab/labv2_internal.h b/code/lab/labv2_internal.h index 0a2042512f7..d237e7d86b2 100644 --- a/code/lab/labv2_internal.h +++ b/code/lab/labv2_internal.h @@ -2,7 +2,6 @@ #include "lab/manager/lab_manager.h" #include "imconfig.h" #include "imgui.h" -#include "backends/imgui_impl_opengl3.h" #include "backends/imgui_impl_sdl.h" #include "extensions/imgui_sugar.hpp" diff --git a/code/lab/manager/lab_manager.cpp b/code/lab/manager/lab_manager.cpp index b43ebc234db..842189047dd 100644 --- a/code/lab/manager/lab_manager.cpp +++ b/code/lab/manager/lab_manager.cpp @@ -112,8 +112,7 @@ void LabManager::resetGraphicsSettings() { } void LabManager::onFrame(float frametime) { - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_NewFrame(); + gr_imgui_new_frame(); ImGui_ImplSDL2_NewFrame(gr_screen.max_w, gr_screen.max_h); ImGui::NewFrame(); @@ -389,8 +388,7 @@ void LabManager::onFrame(float frametime) { if (Cmdline_show_imgui_debug) ImGui::ShowDemoWindow(); ImGui::Render(); - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + gr_imgui_render_draw_data(); if (CloseThis) close(); diff --git a/code/options/Ingame_Options_internal.h b/code/options/Ingame_Options_internal.h index 1d43d9b62a8..aa9982c5008 100644 --- a/code/options/Ingame_Options_internal.h +++ b/code/options/Ingame_Options_internal.h @@ -2,7 +2,6 @@ #include "options/manager/ingame_options_manager.h" #include "imconfig.h" #include "imgui.h" -#include "backends/imgui_impl_opengl3.h" #include "backends/imgui_impl_sdl.h" #include "extensions/imgui_sugar.hpp" diff --git a/code/options/manager/ingame_options_manager.cpp b/code/options/manager/ingame_options_manager.cpp index bd14f6dbb6e..320d22f8a2d 100644 --- a/code/options/manager/ingame_options_manager.cpp +++ b/code/options/manager/ingame_options_manager.cpp @@ -138,8 +138,7 @@ void OptConfigurator::offer_save_options_popup() // The main Imgui rendering happens here as well as any i/o checking void OptConfigurator::onFrame() { - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_NewFrame(); + gr_imgui_new_frame(); ImGui_ImplSDL2_NewFrame(gr_screen.max_w, gr_screen.max_h); ImGui::NewFrame(); @@ -178,8 +177,7 @@ void OptConfigurator::onFrame() { if (Cmdline_show_imgui_debug) ImGui::ShowDemoWindow(); ImGui::Render(); - if (gr_screen.mode == GR_OPENGL) - ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); + gr_imgui_render_draw_data(); if (CloseThis) { close(); diff --git a/code/shaders.cmake b/code/shaders.cmake index 4c7763a2e62..8d865159d51 100644 --- a/code/shaders.cmake +++ b/code/shaders.cmake @@ -6,8 +6,64 @@ set(LEGACY_SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/def_files/data/effects") set(SHADERS ${SHADER_DIR}/default-material.frag ${SHADER_DIR}/default-material.vert - ${SHADER_DIR}/vulkan.frag - ${SHADER_DIR}/vulkan.vert + ${SHADER_DIR}/passthrough.frag + ${SHADER_DIR}/passthrough.vert + ${SHADER_DIR}/batched.frag + ${SHADER_DIR}/batched.vert + ${SHADER_DIR}/video.frag + ${SHADER_DIR}/video.vert + ${SHADER_DIR}/rocketui.frag + ${SHADER_DIR}/rocketui.vert + ${SHADER_DIR}/main.frag + ${SHADER_DIR}/main.vert + ${SHADER_DIR}/nanovg.frag + ${SHADER_DIR}/nanovg.vert + ${SHADER_DIR}/decal.frag + ${SHADER_DIR}/decal.vert + ${SHADER_DIR}/postprocess.vert + ${SHADER_DIR}/tonemapping.frag + ${SHADER_DIR}/brightpass.frag + ${SHADER_DIR}/blur.frag + ${SHADER_DIR}/bloom-comp.frag + ${SHADER_DIR}/fxaapre.frag + ${SHADER_DIR}/fxaa.frag + ${SHADER_DIR}/post.frag + ${SHADER_DIR}/lightshafts.frag + ${SHADER_DIR}/effect.vert + ${SHADER_DIR}/effect.frag + ${SHADER_DIR}/effect-distort.vert + ${SHADER_DIR}/effect-distort.frag + ${SHADER_DIR}/deferred.vert + ${SHADER_DIR}/deferred.frag + ${SHADER_DIR}/shadow.vert + ${SHADER_DIR}/shadow.frag + ${SHADER_DIR}/irradiance.vert + ${SHADER_DIR}/irradiance.frag + ${SHADER_DIR}/fog.vert + ${SHADER_DIR}/fog.frag + ${SHADER_DIR}/volumetric-fog.vert + ${SHADER_DIR}/volumetric-fog.frag + ${SHADER_DIR}/copy.frag + ${SHADER_DIR}/copy.vert + ${SHADER_DIR}/shield-impact.frag + ${SHADER_DIR}/shield-impact.vert + ${SHADER_DIR}/msaa-resolve.vert + ${SHADER_DIR}/msaa-resolve.frag +) + +# Shaders shared with the OpenGL backend. These get GLSL decompilation (.spv.glsl) +# and the decompiled GLSL is embedded for runtime use. +# All other shaders are Vulkan-only: SPIR-V compilation and embedding only. +set(SHADERS_GL_SHARED + ${SHADER_DIR}/default-material.frag + ${SHADER_DIR}/default-material.vert +) + +# Shaders that need C++ struct header generation from SPIR-V reflection. +# Generated structs are included via shader_structs.h for compile-time layout validation. +set(SHADERS_NEED_STRUCT_GEN + ${SHADER_DIR}/default-material.frag + ${SHADER_DIR}/default-material.vert ) target_sources(code PRIVATE ${SHADERS}) @@ -31,6 +87,9 @@ foreach (_shader ${SHADERS}) get_filename_component(_baseShaderName "${_shader}" NAME_WE) get_filename_component(_shaderExt "${_shader}" EXT) + list(FIND SHADERS_GL_SHARED "${_shader}" _isGlShared) + list(FIND SHADERS_NEED_STRUCT_GEN "${_shader}" _needStructs) + if (TARGET glslc) set(_depFileDir "${CMAKE_CURRENT_BINARY_DIR}/shaders") set(_depFile "${_depFileDir}/${_fileName}.spv.d") @@ -45,34 +104,54 @@ foreach (_shader ${SHADERS}) COMMAND ${CMAKE_COMMAND} -E make_directory "${_depFileDir}" COMMAND glslc "${_shader}" -o "${_spirvFile}" --target-env=vulkan1.0 -O -g "-I${SHADER_DIR}" "-I${LEGACY_SHADER_DIR}" -MD -MF "${_depFile}" -MT "${_relativeSpirvPath}" -Werror -x glsl - MAIN_DEPENDENCY "${shader}" + MAIN_DEPENDENCY "${_shader}" COMMENT "Compiling shader ${_fileName}" ${DEPFILE_PARAM} ) target_embed_files(code FILES "${_spirvFile}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") - set(_glslOutput "${_spirvFile}.glsl") - set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") - - list(APPEND _structHeaderList "${_structOutput}") - - add_custom_command(OUTPUT "${_glslOutput}" "${_structOutput}" - COMMAND shadertool --glsl "--glsl-output=${_glslOutput}" --structs "--structs-output=${_structOutput}" ${_spirvFile} - MAIN_DEPENDENCY "${_spirvFile}" - COMMENT "Processing shader ${_spirvFile}" - ) - - target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") + # Build shadertool arguments based on what this shader needs + set(_glslOutput) + set(_shadertoolArgs) + set(_shadertoolOutputs) + + if (_isGlShared GREATER -1) + set(_glslOutput "${_spirvFile}.glsl") + list(APPEND _shadertoolArgs --glsl "--glsl-output=${_glslOutput}") + list(APPEND _shadertoolOutputs "${_glslOutput}") + endif() + if (_needStructs GREATER -1) + set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") + list(APPEND _shadertoolArgs --structs "--structs-output=${_structOutput}") + list(APPEND _shadertoolOutputs "${_structOutput}") + list(APPEND _structHeaderList "${_structOutput}") + endif() + + if (_shadertoolArgs) + add_custom_command(OUTPUT ${_shadertoolOutputs} + COMMAND shadertool ${_shadertoolArgs} ${_spirvFile} + MAIN_DEPENDENCY "${_spirvFile}" + COMMENT "Processing shader ${_spirvFile}" + ) + endif() + + if (_glslOutput) + target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") + endif() else() + # No shader compiler available — use pre-compiled files from VCS target_embed_files(code FILES "${_spirvFile}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") - set(_glslOutput "${_spirvFile}.glsl") - set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") - - list(APPEND _structHeaderList "${_structOutput}") + if (_needStructs GREATER -1) + set(_structOutput "${_shaderOutputDir}/${_baseShaderName}_structs${_shaderExt}.h") + list(APPEND _structHeaderList "${_structOutput}") + endif() - target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") + if (_isGlShared GREATER -1) + set(_glslOutput "${_spirvFile}.glsl") + target_embed_files(code FILES "${_glslOutput}" RELATIVE_TO "${_shaderOutputDir}" PATH_TYPE_PREFIX "data/effects") + endif() endif() endforeach () diff --git a/code/source_groups.cmake b/code/source_groups.cmake index 2f23cbd95fd..1f7aaa56c71 100644 --- a/code/source_groups.cmake +++ b/code/source_groups.cmake @@ -560,6 +560,8 @@ add_file_folder("Graphics\\\\SoftwareGr\\\\Font" add_file_folder("Graphics\\\\Util" graphics/util/GPUMemoryHeap.cpp graphics/util/GPUMemoryHeap.h + graphics/util/primitives.h + graphics/util/primitives.cpp graphics/util/uniform_structs.h graphics/util/UniformAligner.h graphics/util/UniformAligner.cpp @@ -573,12 +575,38 @@ if (FSO_BUILD_WITH_VULKAN) add_file_folder("Graphics\\\\Vulkan" graphics/vulkan/gr_vulkan.cpp graphics/vulkan/gr_vulkan.h - graphics/vulkan/RenderFrame.cpp - graphics/vulkan/RenderFrame.h - graphics/vulkan/vulkan_stubs.cpp - graphics/vulkan/vulkan_stubs.h + graphics/vulkan/VulkanBuffer.cpp + graphics/vulkan/VulkanBuffer.h + graphics/vulkan/VulkanDeferred.cpp + graphics/vulkan/VulkanDeferred.h + graphics/vulkan/VulkanDeletionQueue.cpp + graphics/vulkan/VulkanDeletionQueue.h + graphics/vulkan/VulkanDescriptorManager.cpp + graphics/vulkan/VulkanDescriptorManager.h + graphics/vulkan/VulkanDraw.cpp + graphics/vulkan/VulkanDraw.h + graphics/vulkan/VulkanMemory.cpp + graphics/vulkan/VulkanMemory.h + graphics/vulkan/VulkanPipeline.cpp + graphics/vulkan/VulkanPipeline.h + graphics/vulkan/VulkanPostProcessing.cpp + graphics/vulkan/VulkanPostProcessing.h + graphics/vulkan/VulkanQuery.cpp + graphics/vulkan/VulkanQuery.h graphics/vulkan/VulkanRenderer.cpp graphics/vulkan/VulkanRenderer.h + graphics/vulkan/VulkanRenderFrame.cpp + graphics/vulkan/VulkanRenderFrame.h + graphics/vulkan/VulkanRenderState.cpp + graphics/vulkan/VulkanRenderState.h + graphics/vulkan/VulkanShader.cpp + graphics/vulkan/VulkanShader.h + graphics/vulkan/VulkanState.cpp + graphics/vulkan/VulkanState.h + graphics/vulkan/VulkanTexture.cpp + graphics/vulkan/VulkanTexture.h + graphics/vulkan/VulkanVertexFormat.cpp + graphics/vulkan/VulkanVertexFormat.h ) endif() diff --git a/code/tracing/tracing.cpp b/code/tracing/tracing.cpp index e88c666c47d..71298784e14 100644 --- a/code/tracing/tracing.cpp +++ b/code/tracing/tracing.cpp @@ -1,6 +1,7 @@ #include "tracing/tracing.h" #include "graphics/2d.h" +#include "globalincs/systemvars.h" #include "parse/parselo.h" #include "io/timer.h" @@ -57,9 +58,12 @@ std::unique_ptr mainFrameTimer; std::unique_ptr frameProfiler; SCP_vector query_objects; -// The GPU timestamp queries use an internal free list to reduce the number of graphics API calls +// Free list for backends where queries are immediately reusable (OpenGL). +// When queries are NOT reusable (Vulkan), the free list is bypassed and +// handles are returned to the backend. SCP_queue free_query_objects; bool do_gpu_queries = true; +bool queries_reusable = true; int get_query_object() { if (!free_query_objects.empty()) { @@ -69,7 +73,12 @@ int get_query_object() { } auto id = gr_create_query_object(); - query_objects.push_back(id); + if (queries_reusable) { + // Track for bulk cleanup at shutdown. When not reusable, the backend + // owns the lifecycle — handles are returned via gr_delete_query_object + // and the backend's own shutdown destroys the pool. + query_objects.push_back(id); + } return id; } @@ -83,7 +92,12 @@ int get_gpu_timestamp_query() { } void free_query_object(int obj) { - free_query_objects.push(obj); + if (queries_reusable) { + free_query_objects.push(obj); + } else { + // Backend manages reset lifecycle internally — hand it back. + gr_delete_query_object(obj); + } } struct gpu_trace_event { @@ -231,6 +245,7 @@ void init() { } do_gpu_queries = gr_is_capable(gr_capability::CAPABILITY_TIMESTAMP_QUERY); + queries_reusable = gr_is_capable(gr_capability::CAPABILITY_QUERIES_REUSABLE); if (do_gpu_queries) { gpu_start_query = get_gpu_timestamp_query(); @@ -261,11 +276,23 @@ SCP_string get_frame_profile_output() { } void shutdown() { - while (!gpu_events.empty()) { - process_events(); + if (queries_reusable) { + while (!gpu_events.empty()) { + process_events(); - // Don't do busy waiting... - os_sleep(5); + // Don't do busy waiting... + os_sleep(5); + } + } else { + // Discard remaining GPU events — no more frames will + // be submitted, so unsubmitted queries can never become + // available. + while (!gpu_events.empty()) { + auto& first = gpu_events.front(); + gr_delete_query_object(first.gpu_begin_query); + gr_delete_query_object(first.gpu_end_query); + gpu_events.pop(); + } } for (auto query : query_objects) { @@ -273,6 +300,10 @@ void shutdown() { } query_objects.clear(); + while (!free_query_objects.empty()) { + free_query_objects.pop(); + } + mainFrameTimer = nullptr; traceEventWriter = nullptr;