Skip to content

Commit d00056e

Browse files
authored
perf(lambda-rs): Cache missing per-instance vertex buffer slots (#191)
## Summary Cache missing per-instance vertex buffer slots in the render encoder so instancing validation no longer rescans every pipeline slot on every draw. This reduces validation overhead in instanced render passes from `O(D * S)` to `O(S + B + D)`, where `D` is draw calls, `S` is per-instance slots, and `B` is vertex buffer binds. ## Related Issues ## Changes - Cache missing per-instance slots when `RenderPassEncoder::set_pipeline` is called. - Update the cache incrementally in `RenderPassEncoder::set_vertex_buffer`. - Replace per-draw full scans with a constant-time cached missing-slot check in `draw` and `draw_indexed`. - Centralize the missing-instance-binding error message in render validation helpers. - Add encoder tests covering both post-pipeline binding and prebound-slot cases. - Add rustdoc for cached instancing validation behavior and the instanced test helper. ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] Feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [x] Documentation (updates to docs, specs, tutorials, or comments) - [ ] Refactor (code change that neither fixes a bug nor adds a feature) - [x] Performance (change that improves performance) - [x] Test (adding or updating tests) - [ ] Build/CI (changes to build process or CI configuration) ## Affected Crates - [x] `lambda-rs` - [ ] `lambda-rs-platform` - [ ] `lambda-rs-args` - [ ] `lambda-rs-logging` - [ ] Other: ## Checklist - [ ] Code follows the repository style guidelines (`cargo +nightly fmt --all`) - [ ] Code passes clippy (`cargo clippy --workspace --all-targets -- -D warnings`) - [ ] Tests pass (`cargo test --workspace`) - [x] New code includes appropriate documentation - [x] Public API changes are documented - [ ] Breaking changes are noted in this PR description ## Testing **Commands run:** ```bash cargo fmt --package lambda-rs cargo test -p lambda-rs render_pass_encoder_ -- --nocapture cargo test -p lambda-rs validate_instance_ -- --nocapture ``` **Manual verification steps (if applicable):** 1. Confirmed instanced draws fail with the existing validation error until the required per-instance slot is bound. 2. Confirmed binding the required instance slot after `set_pipeline` allows the draw to proceed. 3. Confirmed prebinding the instance slot before `set_pipeline` is respected by the cached validation state. ## Screenshots/Recordings Not applicable. ## Platform Testing - [x] macOS - [ ] Windows - [ ] Linux ## Additional Notes
2 parents 5b362a0 + 4c72dfe commit d00056e

File tree

2 files changed

+292
-25
lines changed

2 files changed

+292
-25
lines changed

crates/lambda-rs/src/render/encoder.rs

Lines changed: 263 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@
2929
//! ```
3030
3131
use std::{
32-
collections::HashSet,
32+
collections::{
33+
BTreeSet,
34+
HashSet,
35+
},
3336
ops::Range,
3437
};
3538

@@ -165,6 +168,11 @@ impl std::fmt::Debug for CommandEncoder {
165168
/// The encoder borrows the command encoder for the duration of the pass and
166169
/// performs validation on all operations.
167170
///
171+
/// When instancing validation is enabled, the encoder caches which per-instance
172+
/// vertex buffer slots are still missing for the active pipeline. This keeps
173+
/// repeated `draw*` validation constant-time even when a pass issues many
174+
/// draws.
175+
///
168176
/// # Type Parameters
169177
/// * `'pass` - The lifetime of the render pass, tied to the borrowed encoder
170178
/// and attachments.
@@ -185,7 +193,11 @@ pub struct RenderPassEncoder<'pass> {
185193
destination_depth_format: Option<DepthFormat>,
186194

187195
// Validation state (compiled out in release without features)
188-
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
196+
#[cfg(any(
197+
debug_assertions,
198+
feature = "render-validation-encoder",
199+
feature = "render-validation-instancing"
200+
))]
189201
current_pipeline: Option<CurrentPipeline>,
190202
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
191203
bound_index_buffer: Option<BoundIndexBuffer>,
@@ -206,11 +218,15 @@ pub struct RenderPassEncoder<'pass> {
206218
}
207219

208220
/// Tracks the currently bound pipeline for validation.
209-
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
210-
#[derive(Clone)]
221+
#[cfg(any(
222+
debug_assertions,
223+
feature = "render-validation-encoder",
224+
feature = "render-validation-instancing"
225+
))]
211226
struct CurrentPipeline {
212227
label: String,
213-
per_instance_slots: Vec<bool>,
228+
#[cfg(any(debug_assertions, feature = "render-validation-instancing"))]
229+
missing_instance_slots: BTreeSet<u32>,
214230
}
215231

216232
/// Tracks the currently bound index buffer for validation.
@@ -264,7 +280,11 @@ impl<'pass> RenderPassEncoder<'pass> {
264280
sample_count: pass.sample_count(),
265281
destination_color_format: destination_info.color_format,
266282
destination_depth_format: destination_info.depth_format,
267-
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
283+
#[cfg(any(
284+
debug_assertions,
285+
feature = "render-validation-encoder",
286+
feature = "render-validation-instancing"
287+
))]
268288
current_pipeline: None,
269289
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
270290
bound_index_buffer: None,
@@ -289,6 +309,10 @@ impl<'pass> RenderPassEncoder<'pass> {
289309
///
290310
/// Returns an error if the pipeline is incompatible with the current pass
291311
/// configuration (e.g., color target mismatch).
312+
///
313+
/// When instancing validation is enabled, this also computes the currently
314+
/// missing per-instance vertex buffer slots once so subsequent draw
315+
/// validation can reuse cached state.
292316
pub fn set_pipeline(
293317
&mut self,
294318
pipeline: &RenderPipeline,
@@ -366,12 +390,31 @@ impl<'pass> RenderPassEncoder<'pass> {
366390
}
367391

368392
// Track current pipeline for draw validation
369-
#[cfg(any(debug_assertions, feature = "render-validation-encoder"))]
393+
#[cfg(any(
394+
debug_assertions,
395+
feature = "render-validation-encoder",
396+
feature = "render-validation-instancing"
397+
))]
370398
{
371399
let label = pipeline.pipeline().label().unwrap_or("unnamed").to_string();
372400
self.current_pipeline = Some(CurrentPipeline {
373401
label,
374-
per_instance_slots: pipeline.per_instance_slots().clone(),
402+
#[cfg(any(
403+
debug_assertions,
404+
feature = "render-validation-instancing"
405+
))]
406+
missing_instance_slots: pipeline
407+
.per_instance_slots()
408+
.iter()
409+
.enumerate()
410+
.filter_map(|(slot, is_instance)| {
411+
if !is_instance || self.bound_vertex_slots.contains(&(slot as u32))
412+
{
413+
return None;
414+
}
415+
Some(slot as u32)
416+
})
417+
.collect(),
375418
});
376419
}
377420

@@ -471,10 +514,16 @@ impl<'pass> RenderPassEncoder<'pass> {
471514
}
472515

473516
/// Bind a vertex buffer to a slot.
517+
///
518+
/// When instancing validation is enabled, this updates the cached set of
519+
/// missing per-instance slots for the active pipeline.
474520
pub fn set_vertex_buffer(&mut self, slot: u32, buffer: &Buffer) {
475521
#[cfg(any(debug_assertions, feature = "render-validation-instancing"))]
476522
{
477523
self.bound_vertex_slots.insert(slot);
524+
if let Some(current_pipeline) = self.current_pipeline.as_mut() {
525+
current_pipeline.missing_instance_slots.remove(&slot);
526+
}
478527
}
479528

480529
self.pass.set_vertex_buffer(slot, buffer.raw());
@@ -547,13 +596,14 @@ impl<'pass> RenderPassEncoder<'pass> {
547596
#[cfg(any(debug_assertions, feature = "render-validation-instancing"))]
548597
{
549598
if let Some(ref pipeline) = self.current_pipeline {
550-
validation::validate_instance_bindings(
551-
&pipeline.label,
552-
&pipeline.per_instance_slots,
553-
&self.bound_vertex_slots,
554-
)
555-
.map_err(RenderPassError::Validation)?;
556-
599+
if let Some(slot) = pipeline.missing_instance_slots.iter().next() {
600+
return Err(RenderPassError::Validation(
601+
validation::missing_instance_binding_message(
602+
&pipeline.label,
603+
*slot,
604+
),
605+
));
606+
}
557607
validation::validate_instance_range("Draw", &instances)
558608
.map_err(RenderPassError::Validation)?;
559609
}
@@ -615,13 +665,14 @@ impl<'pass> RenderPassEncoder<'pass> {
615665
#[cfg(any(debug_assertions, feature = "render-validation-instancing"))]
616666
{
617667
if let Some(ref pipeline) = self.current_pipeline {
618-
validation::validate_instance_bindings(
619-
&pipeline.label,
620-
&pipeline.per_instance_slots,
621-
&self.bound_vertex_slots,
622-
)
623-
.map_err(RenderPassError::Validation)?;
624-
668+
if let Some(slot) = pipeline.missing_instance_slots.iter().next() {
669+
return Err(RenderPassError::Validation(
670+
validation::missing_instance_binding_message(
671+
&pipeline.label,
672+
*slot,
673+
),
674+
));
675+
}
625676
validation::validate_instance_range("DrawIndexed", &instances)
626677
.map_err(RenderPassError::Validation)?;
627678
}
@@ -738,6 +789,11 @@ mod tests {
738789
TextureBuilder,
739790
TextureFormat,
740791
},
792+
vertex::{
793+
ColorFormat,
794+
VertexAttribute,
795+
VertexElement,
796+
},
741797
viewport::Viewport,
742798
};
743799

@@ -768,6 +824,55 @@ mod tests {
768824
return (vs, fs);
769825
}
770826

827+
/// Build a minimal pipeline that declares one per-instance vertex buffer.
828+
///
829+
/// This helper exists for encoder tests that need instancing validation
830+
/// without depending on additional render state. The returned pipeline uses
831+
/// the shared triangle shaders and declares slot `0` as a per-instance
832+
/// buffer so tests can exercise cached instance-slot tracking.
833+
///
834+
/// # Arguments
835+
/// - `gpu`: The test GPU used to allocate the instance buffer and create the
836+
/// pipeline.
837+
/// - `pass`: The render pass the pipeline must be compatible with.
838+
///
839+
/// # Returns
840+
/// Returns a `RenderPipeline` configured with one per-instance vertex buffer
841+
/// bound at slot `0`.
842+
fn build_instanced_test_pipeline(
843+
gpu: &crate::render::gpu::Gpu,
844+
pass: &RenderPass,
845+
) -> RenderPipeline {
846+
let (vs, fs) = compile_triangle_shaders();
847+
let instance_buffer = BufferBuilder::new()
848+
.with_label("encoder-test-instance-layout")
849+
.with_usage(Usage::VERTEX)
850+
.with_properties(Properties::CPU_VISIBLE)
851+
.with_buffer_type(BufferType::Vertex)
852+
.build(gpu, vec![[0.0f32; 3]])
853+
.expect("build instance layout buffer");
854+
let instance_attributes = vec![VertexAttribute {
855+
location: 0,
856+
offset: 0,
857+
element: VertexElement {
858+
format: ColorFormat::Rgb32Sfloat,
859+
offset: 0,
860+
},
861+
}];
862+
863+
return RenderPipelineBuilder::new()
864+
.with_label("instanced-pipeline")
865+
.with_instance_buffer(instance_buffer, instance_attributes)
866+
.build(
867+
gpu,
868+
TextureFormat::Rgba8Unorm,
869+
DepthFormat::Depth24Plus,
870+
pass,
871+
&vs,
872+
Some(&fs),
873+
);
874+
}
875+
771876
/// Ensures the `Display` implementation for `RenderPassError` forwards the
772877
/// underlying message without modification.
773878
#[test]
@@ -1049,4 +1154,140 @@ mod tests {
10491154
let cb = encoder.finish();
10501155
gpu.submit(std::iter::once(cb));
10511156
}
1157+
1158+
/// Ensures instancing validation caches missing slots when the pipeline is
1159+
/// set, then clears them incrementally as matching vertex buffers are bound.
1160+
#[test]
1161+
fn render_pass_encoder_tracks_missing_instance_slots_incrementally() {
1162+
let Some(gpu) = crate::render::gpu::create_test_gpu("lambda-encoder-test")
1163+
else {
1164+
return;
1165+
};
1166+
1167+
let pass = RenderPassBuilder::new().with_label("instanced-pass").build(
1168+
&gpu,
1169+
TextureFormat::Rgba8Unorm,
1170+
DepthFormat::Depth24Plus,
1171+
);
1172+
let pipeline = build_instanced_test_pipeline(&gpu, &pass);
1173+
1174+
let resolve = TextureBuilder::new_2d(TextureFormat::Rgba8Unorm)
1175+
.with_size(4, 4)
1176+
.for_render_target()
1177+
.build(&gpu)
1178+
.expect("build resolve texture");
1179+
1180+
let mut encoder = platform::command::CommandEncoder::new(
1181+
gpu.platform(),
1182+
Some("lambda-instanced-encoder"),
1183+
);
1184+
1185+
let mut attachments = RenderColorAttachments::for_offscreen_pass(
1186+
pass.uses_color(),
1187+
pass.sample_count(),
1188+
None,
1189+
resolve.view_ref(),
1190+
);
1191+
1192+
let mut rp = RenderPassEncoder::new(
1193+
&mut encoder,
1194+
&pass,
1195+
RenderPassDestinationInfo {
1196+
color_format: Some(TextureFormat::Rgba8Unorm),
1197+
depth_format: None,
1198+
},
1199+
&mut attachments,
1200+
None,
1201+
);
1202+
1203+
rp.set_pipeline(&pipeline).expect("set instanced pipeline");
1204+
1205+
let missing_before_bind = rp.draw(0..3, 0..1);
1206+
if cfg!(any(
1207+
debug_assertions,
1208+
feature = "render-validation-instancing"
1209+
)) {
1210+
let err =
1211+
missing_before_bind.expect_err("draw must require instance binding");
1212+
assert!(matches!(err, RenderPassError::Validation(_)));
1213+
assert!(err.to_string().contains("slot 0"));
1214+
} else {
1215+
missing_before_bind.expect("draw ok without instancing validation");
1216+
}
1217+
1218+
let instance_buffer = BufferBuilder::new()
1219+
.with_label("encoder-test-instance-binding")
1220+
.with_usage(Usage::VERTEX)
1221+
.with_properties(Properties::CPU_VISIBLE)
1222+
.with_buffer_type(BufferType::Vertex)
1223+
.build(&gpu, vec![[1.0f32; 3]])
1224+
.expect("build instance binding buffer");
1225+
rp.set_vertex_buffer(0, &instance_buffer);
1226+
rp.draw(0..3, 0..1)
1227+
.expect("draw succeeds after required instance slot is bound");
1228+
1229+
drop(rp);
1230+
let cb = encoder.finish();
1231+
gpu.submit(std::iter::once(cb));
1232+
}
1233+
1234+
/// Ensures the cached instancing state honors vertex buffers that were bound
1235+
/// before the active pipeline was selected.
1236+
#[test]
1237+
fn render_pass_encoder_instancing_cache_respects_prebound_slots() {
1238+
let Some(gpu) = crate::render::gpu::create_test_gpu("lambda-encoder-test")
1239+
else {
1240+
return;
1241+
};
1242+
1243+
let pass = RenderPassBuilder::new()
1244+
.with_label("instanced-prebound-pass")
1245+
.build(&gpu, TextureFormat::Rgba8Unorm, DepthFormat::Depth24Plus);
1246+
let pipeline = build_instanced_test_pipeline(&gpu, &pass);
1247+
1248+
let resolve = TextureBuilder::new_2d(TextureFormat::Rgba8Unorm)
1249+
.with_size(4, 4)
1250+
.for_render_target()
1251+
.build(&gpu)
1252+
.expect("build resolve texture");
1253+
1254+
let mut encoder = platform::command::CommandEncoder::new(
1255+
gpu.platform(),
1256+
Some("lambda-instanced-prebound-encoder"),
1257+
);
1258+
1259+
let mut attachments = RenderColorAttachments::for_offscreen_pass(
1260+
pass.uses_color(),
1261+
pass.sample_count(),
1262+
None,
1263+
resolve.view_ref(),
1264+
);
1265+
1266+
let mut rp = RenderPassEncoder::new(
1267+
&mut encoder,
1268+
&pass,
1269+
RenderPassDestinationInfo {
1270+
color_format: Some(TextureFormat::Rgba8Unorm),
1271+
depth_format: None,
1272+
},
1273+
&mut attachments,
1274+
None,
1275+
);
1276+
1277+
let instance_buffer = BufferBuilder::new()
1278+
.with_label("encoder-test-prebound-instance-buffer")
1279+
.with_usage(Usage::VERTEX)
1280+
.with_properties(Properties::CPU_VISIBLE)
1281+
.with_buffer_type(BufferType::Vertex)
1282+
.build(&gpu, vec![[1.0f32; 3]])
1283+
.expect("build prebound instance buffer");
1284+
rp.set_vertex_buffer(0, &instance_buffer);
1285+
rp.set_pipeline(&pipeline).expect("set instanced pipeline");
1286+
rp.draw(0..3, 0..1)
1287+
.expect("prebound instance slot satisfies cached validation");
1288+
1289+
drop(rp);
1290+
let cb = encoder.finish();
1291+
gpu.submit(std::iter::once(cb));
1292+
}
10521293
}

0 commit comments

Comments
 (0)