-
Notifications
You must be signed in to change notification settings - Fork 830
Mark wave intrinsics as convergent to prevent loop miscompilation #8204
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need a pass test (under |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| // RUN: %dxc -T cs_6_6 -E main %s | FileCheck %s | ||
|
|
||
| // Regression test for a bug where the optimizer (JumpThreading) would | ||
| // restructure a while-loop containing wave intrinsics, moving | ||
| // WaveActiveCountBits outside the loop. This changes the set of active | ||
| // lanes at the wave op call site, producing incorrect results on SIMT | ||
| // hardware. | ||
|
|
||
| // Verify that WaveAllBitCount (opcode 135) appears BEFORE the loop's | ||
| // back-edge phi, ensuring it stays inside the loop body. | ||
|
|
||
| // CHECK: call i32 @dx.op.waveReadLaneFirst | ||
| // CHECK: call i32 @dx.op.waveAllOp | ||
| // CHECK: call i1 @dx.op.waveIsFirstLane | ||
| // CHECK: phi i32 | ||
| // CHECK: br i1 | ||
|
|
||
| RWStructuredBuffer<uint> Output : register(u1); | ||
|
|
||
| cbuffer Constants : register(b0) { | ||
| uint Width; | ||
| uint Height; | ||
| uint NumMaterials; | ||
| }; | ||
|
|
||
| [numthreads(32, 1, 1)] | ||
| void main(uint3 DTid : SV_DispatchThreadID) { | ||
| uint x = DTid.x; | ||
| uint y = DTid.y; | ||
|
|
||
| if (x >= Width || y >= Height) | ||
| return; | ||
|
|
||
| // Compute a material ID per lane (simple hash). | ||
| uint materialID = ((x * 7) + (y * 13)) % NumMaterials; | ||
|
|
||
| // Binning loop: each iteration peels off one material group. | ||
| // WaveReadLaneFirst picks a material, matching lanes count themselves | ||
| // with WaveActiveCountBits, and the first lane in the group writes | ||
| // the count. Non-matching lanes loop back for the next material. | ||
| bool go = true; | ||
| while (go) { | ||
| uint firstMat = WaveReadLaneFirst(materialID); | ||
| if (firstMat == materialID) { | ||
| uint count = WaveActiveCountBits(true); | ||
| if (WaveIsFirstLane()) { | ||
| InterlockedAdd(Output[firstMat], count); | ||
| } | ||
| go = false; | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change modifies attributes that show up in final DXIL. We should be cautious with changes like that, since it could break drivers not expecting it. We need to decide whether we want to expose the convergent attribute in next DXIL version and probably filter attributes on DXIL ops for prior shader models in DxilFinalizeModule (defined in DxilPreparePasses.cpp).