Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
357 changes: 357 additions & 0 deletions tools/clang/unittests/HLSLExec/ExecutionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ class ExecutionTest {
TEST_METHOD(WaveIntrinsicsInPSTest);
TEST_METHOD(WaveSizeTest);
TEST_METHOD(WaveSizeRangeTest);
TEST_METHOD(GroupSharedLimitTest);
TEST_METHOD(GroupSharedLimitASTest);
TEST_METHOD(GroupSharedLimitMSTest);
TEST_METHOD(PartialDerivTest);
TEST_METHOD(DerivativesTest);
TEST_METHOD(ComputeSampleTest);
Expand Down Expand Up @@ -10619,6 +10622,360 @@ void ExecutionTest::WaveSizeRangeTest() {
m_support);
}

void ExecutionTest::GroupSharedLimitTest() {
WEX::TestExecution::SetVerifyOutput VerifySettings(
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);

CComPtr<ID3D12Device> Device;
// GroupSharedLimit requires SM 6.10 (DXIL 1.10).
if (!createDevice(&Device, D3D_SHADER_MODEL_6_10,
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
/*skipUnsupported*/ false)) {
return;
}

UINT MaxGSMCS = getMaxGroupSharedMemoryCS(Device);
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupCS: %u bytes", MaxGSMCS);

// Read shader config
CComPtr<IStream> Stream;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
std::make_shared<st::ShaderOpSet>();
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());

// Test 1: GroupSharedLimit that is >= usage should succeed.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For test 1,2,3 there seems to be a lot of duplicated code. Would it be worth factoring out common logic into a helper?

// Use 4096 DWORDs (16384 bytes) of TGSM with a limit of 16384 bytes.
{
static const UINT GSM_DWORDS = 4096;
static const UINT NUM_THREADS = 64;

LogCommentFmt(L"Test 1: GroupSharedLimit == usage (16384 bytes). "
L"Shader should compile and execute successfully.");

// All threads cooperatively fill g_shared, then thread 0 copies
// the entire contents to the output buffer for verification.
static const char Shader[] =
R"(
#define GSM_DWORDS 4096
#define NUM_THREADS 64
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
RWStructuredBuffer<uint> g_output : register(u0);

[GroupSharedLimit(16384)]
[numthreads(NUM_THREADS, 1, 1)]
void main(uint GI : SV_GroupIndex) {
// Each thread writes multiple elements to fill the array.
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
g_shared[i] = i;
GroupMemoryBarrierWithGroupSync();
// Thread 0 copies all of groupshared memory to the output.
if (GI == 0) {
for (uint j = 0; j < GSM_DWORDS; j++)
g_output[j] = g_shared[j];
}
})";

std::shared_ptr<st::ShaderOpTestResult> Test =
st::RunShaderOpTestAfterParse(
Device, m_support, "GroupSharedLimitTest",
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
Op->Shaders.at(0).Text = Shader;
Data.resize(sizeof(uint32_t) * GSM_DWORDS);
memset(Data.data(), 0, Data.size());
},
ShaderOpSet);

MappedData DataUav;
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
uint32_t *OutData = (uint32_t *)DataUav.data();

for (UINT I = 0; I < GSM_DWORDS; I++) {
VERIFY_ARE_EQUAL(OutData[I], I);
}
LogCommentFmt(L"Test 1 passed: GroupSharedLimit == usage succeeded.");
}

// Test 2: GroupSharedLimit > usage (raising the ceiling above default).
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
// Use 9216 DWORDs (36864 bytes) of TGSM, which exceeds the default 32768,
// but set limit to 36864 so it should succeed.
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
if (MaxGSMCS < 36864) {
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
LogCommentFmt(L"Test 2 skipped: device max GSM (%u) < 36864 bytes",
MaxGSMCS);
} else {
static const UINT GSM_DWORDS = 9216;
static const UINT NUM_THREADS = 64;

LogCommentFmt(L"Test 2: GroupSharedLimit (36864) > usage (36864 bytes), "
Comment thread
JoeCitizen marked this conversation as resolved.
Outdated
L"both above default (32768). "
L"Shader should compile and execute successfully.");

static const char Shader[] =
R"(
#define GSM_DWORDS 9216
#define NUM_THREADS 64
groupshared uint g_shared[GSM_DWORDS]; // 36864 bytes
RWStructuredBuffer<uint> g_output : register(u0);

[GroupSharedLimit(36864)]
[numthreads(NUM_THREADS, 1, 1)]
void main(uint GI : SV_GroupIndex) {
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
g_shared[i] = i;
GroupMemoryBarrierWithGroupSync();
if (GI == 0) {
for (uint j = 0; j < GSM_DWORDS; j++)
g_output[j] = g_shared[j];
}
})";

std::shared_ptr<st::ShaderOpTestResult> Test =
st::RunShaderOpTestAfterParse(
Device, m_support, "GroupSharedLimitTest",
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
Op->Shaders.at(0).Text = Shader;
Data.resize(sizeof(uint32_t) * GSM_DWORDS);
memset(Data.data(), 0, Data.size());
},
ShaderOpSet);

MappedData DataUav;
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
uint32_t *OutData = (uint32_t *)DataUav.data();

for (UINT I = 0; I < GSM_DWORDS; I++) {
VERIFY_ARE_EQUAL(OutData[I], I);
}
LogCommentFmt(L"Test 2 passed: GroupSharedLimit > default succeeded.");
}

// Test 3: No GroupSharedLimit attribute, usage within default (32768 bytes).
// The shader should use default limit and succeed.
{
static const UINT GSM_DWORDS = 8192;
static const UINT NUM_THREADS = 64;

LogCommentFmt(L"Test 3: No GroupSharedLimit, usage (32768 bytes) <= "
L"default limit. Shader should succeed.");

static const char Shader[] =
R"(
#define GSM_DWORDS 8192
#define NUM_THREADS 64
groupshared uint g_shared[GSM_DWORDS]; // 32768 bytes (default max)
RWStructuredBuffer<uint> g_output : register(u0);

[numthreads(NUM_THREADS, 1, 1)]
void main(uint GI : SV_GroupIndex) {
for (uint i = GI; i < GSM_DWORDS; i += NUM_THREADS)
g_shared[i] = i;
GroupMemoryBarrierWithGroupSync();
if (GI == 0) {
for (uint j = 0; j < GSM_DWORDS; j++)
g_output[j] = g_shared[j];
}
})";

std::shared_ptr<st::ShaderOpTestResult> Test =
st::RunShaderOpTestAfterParse(
Device, m_support, "GroupSharedLimitTest",
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
Op->Shaders.at(0).Text = Shader;
Data.resize(sizeof(uint32_t) * GSM_DWORDS);
memset(Data.data(), 0, Data.size());
},
ShaderOpSet);

MappedData DataUav;
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
uint32_t *OutData = (uint32_t *)DataUav.data();

for (UINT I = 0; I < GSM_DWORDS; I++) {
VERIFY_ARE_EQUAL(OutData[I], I);
}
LogCommentFmt(L"Test 3 passed: No attribute with default usage succeeded.");
}
}

void ExecutionTest::GroupSharedLimitASTest() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shared logic with GroupSharedLimitMSTest looks ripe for some refactoring/shared helpers.

WEX::TestExecution::SetVerifyOutput VerifySettings(
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);

CComPtr<ID3D12Device> Device;
if (!createDevice(&Device, D3D_SHADER_MODEL_6_10,
/*skipUnsupported*/ false)) {
return;
}

if (!doesDeviceSupportMeshShaders(Device)) {
LogCommentFmt(L"Device does not support mesh shaders, skipping.");
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
return;
}

UINT MaxGSMAS = getMaxGroupSharedMemoryAS(Device);
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupAS: %u bytes", MaxGSMAS);

CComPtr<IStream> Stream;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
std::make_shared<st::ShaderOpSet>();
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());

// Test: AS shader fills groupshared memory and writes to UAV.
{
static const UINT GSM_DWORDS = 4096;

LogCommentFmt(L"AS Test: GroupSharedLimit == usage (16384 bytes). "
L"Amplification shader should compile and execute.");

static const char Shader[] =
R"(
struct Payload { uint dummy; };

#define GSM_DWORDS 4096
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
RWStructuredBuffer<uint> g_output : register(u0);

[GroupSharedLimit(16384)]
[numthreads(64, 1, 1)]
void ASMain(uint GI : SV_GroupIndex) {
for (uint i = GI; i < GSM_DWORDS; i += 64)
g_shared[i] = i;
GroupMemoryBarrierWithGroupSync();
if (GI == 0) {
for (uint j = 0; j < GSM_DWORDS; j++)
g_output[j] = g_shared[j];
}
Payload payload;
payload.dummy = 0;
DispatchMesh(1, 1, 1, payload);
}

struct MeshOutput {
float4 pos : SV_Position;
};

[OutputTopology("triangle")]
[numthreads(1, 1, 1)]
void MSMain(in payload Payload p,
out vertices MeshOutput verts[3],
out indices uint3 tris[1]) {
SetMeshOutputCounts(0, 0);
}

float4 PSMain() : SV_Target { return float4(0,0,0,0); }
)";

std::shared_ptr<st::ShaderOpTestResult> Test =
st::RunShaderOpTestAfterParse(
Device, m_support, "GroupSharedLimitASTest",
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
Op->Shaders.at(0).Text = Shader;
Data.resize(sizeof(uint32_t) * GSM_DWORDS);
memset(Data.data(), 0, Data.size());
},
ShaderOpSet);

MappedData DataUav;
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
uint32_t *OutData = (uint32_t *)DataUav.data();

for (UINT I = 0; I < GSM_DWORDS; I++) {
VERIFY_ARE_EQUAL(OutData[I], I);
}
LogCommentFmt(
L"AS Test passed: GroupSharedLimit in amplification shader succeeded.");
}
}

void ExecutionTest::GroupSharedLimitMSTest() {
WEX::TestExecution::SetVerifyOutput VerifySettings(
WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);

CComPtr<ID3D12Device> Device;
if (!createDevice(&Device, D3D_SHADER_MODEL_6_10,
/*skipUnsupported*/ false)) {
return;
}

if (!doesDeviceSupportMeshShaders(Device)) {
LogCommentFmt(L"Device does not support mesh shaders, skipping.");
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
return;
}

UINT MaxGSMMS = getMaxGroupSharedMemoryMS(Device);
LogCommentFmt(L"Device MaxGroupSharedMemoryPerGroupMS: %u bytes", MaxGSMMS);

CComPtr<IStream> Stream;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
std::make_shared<st::ShaderOpSet>();
readHlslDataIntoNewStream(L"ShaderOpArith.xml", &Stream, m_support);
st::ParseShaderOpSetFromStream(Stream, ShaderOpSet.get());

// Test: MS shader fills groupshared memory and writes to UAV.
{
static const UINT GSM_DWORDS = 4096;

LogCommentFmt(L"MS Test: GroupSharedLimit == usage (16384 bytes). "
L"Mesh shader should compile and execute.");

static const char Shader[] =
R"(
#define GSM_DWORDS 4096
groupshared uint g_shared[GSM_DWORDS]; // 16384 bytes
RWStructuredBuffer<uint> g_output : register(u0);

struct MeshOutput {
float4 pos : SV_Position;
};

[GroupSharedLimit(16384)]
[OutputTopology("triangle")]
[numthreads(64, 1, 1)]
void MSMain(uint GI : SV_GroupIndex,
out vertices MeshOutput verts[3],
out indices uint3 tris[1]) {
SetMeshOutputCounts(0, 0);
for (uint i = GI; i < GSM_DWORDS; i += 64)
g_shared[i] = i;
GroupMemoryBarrierWithGroupSync();
if (GI == 0) {
for (uint j = 0; j < GSM_DWORDS; j++)
g_output[j] = g_shared[j];
}
}

float4 PSMain() : SV_Target { return float4(0,0,0,0); }
)";

std::shared_ptr<st::ShaderOpTestResult> Test =
st::RunShaderOpTestAfterParse(
Device, m_support, "GroupSharedLimitMSTest",
[&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *Op) {
VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10)));
Op->Shaders.at(0).Text = Shader;
Data.resize(sizeof(uint32_t) * GSM_DWORDS);
memset(Data.data(), 0, Data.size());
},
ShaderOpSet);

MappedData DataUav;
Test->Test->GetReadBackData("UAVBuffer0", &DataUav);
uint32_t *OutData = (uint32_t *)DataUav.data();

for (UINT I = 0; I < GSM_DWORDS; I++) {
VERIFY_ARE_EQUAL(OutData[I], I);
}
LogCommentFmt(
L"MS Test passed: GroupSharedLimit in mesh shader succeeded.");
}
}

// Atomic operation testing

// Atomic tests take a single integer index as input and contort it into some
Expand Down
Loading