-
Notifications
You must be signed in to change notification settings - Fork 830
Initial HLK test for GroupSharedLimit #8160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
JoeCitizen
wants to merge
5
commits into
microsoft:main
Choose a base branch
from
JoeCitizen:user/jackell/GroupSharedMemory_HLK
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
bb0f4bc
Initial HLK test for GroupSharedLimit
JoeCitizen 94c999d
Make a copy of requred D3D12 structures and add MS/AS execution tests
JoeCitizen 1ceb7d4
fix up variable naming
JoeCitizen 17229ae
Address PR #8160 review comments
JoeCitizen 4f0b3cb
Address additional PR #8160 review comments
JoeCitizen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -209,6 +209,9 @@ class ExecutionTest { | |
| TEST_METHOD(WaveIntrinsicsInPSTest); | ||
| TEST_METHOD(WaveSizeTest); | ||
| TEST_METHOD(WaveSizeRangeTest); | ||
| TEST_METHOD(GroupSharedLimitTest); | ||
| TEST_METHOD(GroupSharedLimitASTest); | ||
| TEST_METHOD(GroupSharedLimitMSTest); | ||
| TEST_METHOD(PartialDerivTest); | ||
| TEST_METHOD(DerivativesTest); | ||
| TEST_METHOD(ComputeSampleTest); | ||
|
|
@@ -10619,6 +10622,315 @@ void ExecutionTest::WaveSizeRangeTest() { | |
| m_support); | ||
| } | ||
|
|
||
| // Helper: create a SM 6.10 device with HLK-aware skip/fail logic. | ||
| // Returns true if device was created, false if skipped. | ||
| static bool CreateGSMLimitTestDevice(D3D12SDKSelector *D3D12SDK, | ||
| CComPtr<ID3D12Device> &Device) { | ||
| bool FailIfRequirementsNotMet = false; | ||
| #ifdef _HLK_CONF | ||
| FailIfRequirementsNotMet = true; | ||
| #endif | ||
| WEX::TestExecution::RuntimeParameters::TryGetValue( | ||
| L"FailIfRequirementsNotMet", FailIfRequirementsNotMet); | ||
|
|
||
| const bool SkipUnsupported = !FailIfRequirementsNotMet; | ||
| if (!D3D12SDK->createDevice(&Device, D3D_SHADER_MODEL_6_10, | ||
| SkipUnsupported)) { | ||
| if (FailIfRequirementsNotMet) | ||
| LogErrorFmt(L"Device creation failed, resulting in test failure, since " | ||
| L"FailIfRequirementsNotMet is set."); | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| // Helper: run a GroupSharedLimit shader op test, read back UAV, and verify | ||
| // that the output buffer contains sequential uint values [0, GsmDwords). | ||
| static void RunGSMLimitShaderAndVerify( | ||
| ID3D12Device *Device, dxc::SpecificDllLoader &Support, LPCSTR OpName, | ||
| const char *ShaderText, UINT GsmDwords, UINT ShaderIndex, | ||
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet) { | ||
| std::shared_ptr<st::ShaderOpTestResult> Test = st::RunShaderOpTestAfterParse( | ||
| Device, Support, OpName, | ||
| [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) { | ||
| VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); | ||
| Op->Shaders.at(ShaderIndex).Text = ShaderText; | ||
| Data.resize(sizeof(uint32_t) * GsmDwords); | ||
| memset(Data.data(), 0, Data.size()); | ||
| }, | ||
| ShaderOpSet); | ||
|
|
||
| MappedData DataUav; | ||
| Test->Test->GetReadBackData("UAVBuffer0", &DataUav); | ||
| const uint32_t *OutData = (const uint32_t *)DataUav.data(); | ||
|
|
||
| for (UINT I = 0; I < GsmDwords; I++) { | ||
| VERIFY_ARE_EQUAL(OutData[I], I); | ||
| } | ||
| } | ||
|
|
||
| void ExecutionTest::GroupSharedLimitTest() { | ||
| WEX::TestExecution::SetVerifyOutput VerifySettings( | ||
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); | ||
|
|
||
| CComPtr<ID3D12Device> Device; | ||
| if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device)) | ||
| return; | ||
|
|
||
| const UINT MaxGSMCS = getMaxGroupSharedMemoryCS(Device); | ||
| LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupCS: %u bytes", MaxGSMCS); | ||
|
|
||
| // Read shader config | ||
| CComPtr<IStream> Stream; | ||
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = | ||
| std::make_shared<st::ShaderOpSet>(); | ||
| readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support); | ||
| st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get()); | ||
|
|
||
| // Test 1: GroupSharedLimit that is >= usage should succeed. | ||
| // Use 4096 DWORDs (16384 bytes) of TGSM with a limit of 16384 bytes. | ||
| { | ||
| static const UINT GSM_DWORDS = 4096; | ||
|
|
||
| LogCommentFmt(L"Test 1: GroupSharedLimit == usage (16384 bytes). " | ||
| L"Shader should compile and execute successfully."); | ||
|
|
||
| static const char Shader[] = | ||
| R"( | ||
| #define GSM_DWORDS 4096 | ||
| #define NUM_THREADS 64 | ||
| groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes | ||
| RWStructuredBuffer<uint> g_output : register(u0); | ||
|
|
||
| [GroupSharedLimit(16384)] | ||
| [numthreads(NUM_THREADS, 1, 1)] | ||
| void main(uint GI : SV_GroupIndex) { | ||
| for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS) | ||
| g_shared[i] = i; | ||
| GroupMemoryBarrierWithGroupSync(); | ||
| if (GI == 0) { | ||
| for (uint j = 0; j < GSM_DWORDS; j++) | ||
| g_output[j] = g_shared[j]; | ||
| } | ||
| })"; | ||
|
|
||
| RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest", | ||
| Shader, GSM_DWORDS, 0, ShaderOpSet); | ||
| LogCommentFmt(L"Test 1 passed: GroupSharedLimit == usage succeeded."); | ||
| } | ||
|
|
||
| // Test 2: GroupSharedLimit and usage are larger than the default. | ||
| // Use 9216 DWORDs (36864 bytes) of TGSM, which exceeds the default 32768, | ||
| // but set GroupSharedLimit to 36864 so it should succeed. | ||
| static const UINT GSM_BYTES_TEST2 = 36864; | ||
| if (MaxGSMCS < GSM_BYTES_TEST2) { | ||
| LogCommentFmt(L"Test 2 skipped: device max GSM (%u) < %u bytes", MaxGSMCS, | ||
| GSM_BYTES_TEST2); | ||
| } else { | ||
| static const UINT GSM_DWORDS = GSM_BYTES_TEST2 / sizeof(uint32_t); | ||
|
|
||
| LogCommentFmt(L"Test 2: GroupSharedLimit (%u) and usage (%u bytes), " | ||
| L"both above default (32768). " | ||
| L"Shader should compile and execute successfully.", | ||
| GSM_BYTES_TEST2, GSM_BYTES_TEST2); | ||
|
|
||
| static const char Shader[] = | ||
| R"( | ||
| #define GSM_DWORDS 9216 | ||
| #define NUM_THREADS 64 | ||
| groupshared uint g_shared[GSM_DWORDS]; // 36864 bytes | ||
| RWStructuredBuffer<uint> g_output : register(u0); | ||
|
|
||
| [GroupSharedLimit(36864)] | ||
| [numthreads(NUM_THREADS, 1, 1)] | ||
| void main(uint GI : SV_GroupIndex) { | ||
| for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS) | ||
| g_shared[i] = i; | ||
| GroupMemoryBarrierWithGroupSync(); | ||
| if (GI == 0) { | ||
| for (uint j = 0; j < GSM_DWORDS; j++) | ||
| g_output[j] = g_shared[j]; | ||
| } | ||
| })"; | ||
|
|
||
| RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest", | ||
| Shader, GSM_DWORDS, 0, ShaderOpSet); | ||
| LogCommentFmt(L"Test 2 passed: GroupSharedLimit > default succeeded."); | ||
| } | ||
|
|
||
| // Test 3: No GroupSharedLimit attribute, usage within default (32768 bytes). | ||
| // The shader should use default limit and succeed. | ||
| { | ||
| static const UINT GSM_DWORDS = 8192; | ||
|
|
||
| LogCommentFmt(L"Test 3: No GroupSharedLimit, usage (32768 bytes) <= " | ||
| L"default limit. Shader should succeed."); | ||
|
|
||
| static const char Shader[] = | ||
| R"( | ||
| #define GSM_DWORDS 8192 | ||
| #define NUM_THREADS 64 | ||
| groupshared uint g_shared[GSM_DWORDS]; // 32768 bytes (default max) | ||
| RWStructuredBuffer<uint> g_output : register(u0); | ||
|
|
||
| [numthreads(NUM_THREADS, 1, 1)] | ||
| void main(uint GI : SV_GroupIndex) { | ||
| for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS) | ||
| g_shared[i] = i; | ||
| GroupMemoryBarrierWithGroupSync(); | ||
| if (GI == 0) { | ||
| for (uint j = 0; j < GSM_DWORDS; j++) | ||
| g_output[j] = g_shared[j]; | ||
| } | ||
| })"; | ||
|
|
||
| RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitTest", | ||
| Shader, GSM_DWORDS, 0, ShaderOpSet); | ||
| LogCommentFmt(L"Test 3 passed: No attribute with default usage succeeded."); | ||
| } | ||
| } | ||
|
|
||
| void ExecutionTest::GroupSharedLimitASTest() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shared logic with GroupSharedLimitMSTest looks ripe for some refactoring/shared helpers. |
||
| WEX::TestExecution::SetVerifyOutput VerifySettings( | ||
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); | ||
|
|
||
| CComPtr<ID3D12Device> Device; | ||
| if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device)) | ||
| return; | ||
|
|
||
| if (!doesDeviceSupportMeshShaders(Device)) { | ||
| LogCommentFmt(L"Device does not support mesh shaders, skipping."); | ||
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); | ||
| return; | ||
| } | ||
|
|
||
| const UINT MaxGSMAS = getMaxGroupSharedMemoryAS(Device); | ||
| LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupAS: %u bytes", MaxGSMAS); | ||
|
|
||
| CComPtr<IStream> Stream; | ||
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = | ||
| std::make_shared<st::ShaderOpSet>(); | ||
| readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support); | ||
| st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get()); | ||
|
|
||
| // Test: AS shader fills groupshared memory and writes to UAV. | ||
| { | ||
| static const UINT GSM_DWORDS = 4096; | ||
|
|
||
| LogCommentFmt(L"AS Test: GroupSharedLimit == usage (16384 bytes). " | ||
| L"Amplification shader should compile and execute."); | ||
|
|
||
| static const char Shader[] = | ||
| R"( | ||
| struct Payload { uint dummy; }; | ||
|
|
||
| #define GSM_DWORDS 4096 | ||
| groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes | ||
| RWStructuredBuffer<uint> g_output : register(u0); | ||
|
|
||
| [GroupSharedLimit(16384)] | ||
| [numthreads(64, 1, 1)] | ||
| void ASMain(uint GI : SV_GroupIndex) { | ||
| for (uint i = GI; i < GSM_DWORDS; i += 64) | ||
| g_shared[i] = i; | ||
| GroupMemoryBarrierWithGroupSync(); | ||
| if (GI == 0) { | ||
| for (uint j = 0; j < GSM_DWORDS; j++) | ||
| g_output[j] = g_shared[j]; | ||
| } | ||
| Payload payload; | ||
| payload.dummy = 0; | ||
| DispatchMesh(1, 1, 1, payload); | ||
| } | ||
|
|
||
| struct MeshOutput { | ||
| float4 pos : SV_Position; | ||
| }; | ||
|
|
||
| [OutputTopology("triangle")] | ||
| [numthreads(1, 1, 1)] | ||
| void MSMain(in payload Payload p, | ||
| out vertices MeshOutput verts[3], | ||
| out indices uint3 tris[1]) { | ||
| SetMeshOutputCounts(0, 0); | ||
| } | ||
|
|
||
| float4 PSMain() : SV_Target { return float4(0,0,0,0); } | ||
| )"; | ||
|
|
||
| RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitASTest", | ||
| Shader, GSM_DWORDS, 0, ShaderOpSet); | ||
| LogCommentFmt( | ||
| L"AS Test passed: GroupSharedLimit in amplification shader succeeded."); | ||
| } | ||
| } | ||
|
|
||
| void ExecutionTest::GroupSharedLimitMSTest() { | ||
| WEX::TestExecution::SetVerifyOutput VerifySettings( | ||
| WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); | ||
|
|
||
| CComPtr<ID3D12Device> Device; | ||
| if (!CreateGSMLimitTestDevice(&*D3D12SDK, Device)) | ||
| return; | ||
|
|
||
| if (!doesDeviceSupportMeshShaders(Device)) { | ||
| LogCommentFmt(L"Device does not support mesh shaders, skipping."); | ||
| WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); | ||
| return; | ||
| } | ||
|
|
||
| const UINT MaxGSMMS = getMaxGroupSharedMemoryMS(Device); | ||
| LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupMS: %u bytes", MaxGSMMS); | ||
|
|
||
| CComPtr<IStream> Stream; | ||
| std::shared_ptr<st::ShaderOpSet> ShaderOpSet = | ||
| std::make_shared<st::ShaderOpSet>(); | ||
| readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support); | ||
| st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get()); | ||
|
|
||
| // Test: MS shader fills groupshared memory and writes to UAV. | ||
| { | ||
| static const UINT GSM_DWORDS = 4096; | ||
|
|
||
| LogCommentFmt(L"MS Test: GroupSharedLimit == usage (16384 bytes). " | ||
| L"Mesh shader should compile and execute."); | ||
|
|
||
| static const char Shader[] = | ||
| R"( | ||
| #define GSM_DWORDS 4096 | ||
| groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes | ||
| RWStructuredBuffer<uint> g_output : register(u0); | ||
|
|
||
| struct MeshOutput { | ||
| float4 pos : SV_Position; | ||
| }; | ||
|
|
||
| [GroupSharedLimit(16384)] | ||
| [OutputTopology("triangle")] | ||
| [numthreads(64, 1, 1)] | ||
| void MSMain(uint GI : SV_GroupIndex, | ||
| out vertices MeshOutput verts[3], | ||
| out indices uint3 tris[1]) { | ||
| SetMeshOutputCounts(0, 0); | ||
| for (uint i = GI; i < GSM_DWORDS; i += 64) | ||
| g_shared[i] = i; | ||
| GroupMemoryBarrierWithGroupSync(); | ||
| if (GI == 0) { | ||
| for (uint j = 0; j < GSM_DWORDS; j++) | ||
| g_output[j] = g_shared[j]; | ||
| } | ||
| } | ||
|
|
||
| float4 PSMain() : SV_Target { return float4(0,0,0,0); } | ||
| )"; | ||
|
|
||
| RunGSMLimitShaderAndVerify(Device, m_support, "GroupSharedLimitMSTest", | ||
| Shader, GSM_DWORDS, 0, ShaderOpSet); | ||
| LogCommentFmt( | ||
| L"MS Test passed: GroupSharedLimit in mesh shader succeeded."); | ||
| } | ||
| } | ||
|
|
||
| // Atomic operation testing | ||
|
|
||
| // Atomic tests take a single integer index as input and contort it into some | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For test 1,2,3 there seems to be a lot of duplicated code. Would it be worth factoring out common logic into a helper?