diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 8282fd5282..216799a464 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -10,6 +10,7 @@ add_clang_library(ExecHLSLTests SHARED ShaderOpTest.cpp TableParameterHandler.cpp LongVectors.cpp + LinearAlgebra.cpp HlslExecTestUtils.cpp ExecHLSLTests.rc ) diff --git a/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h new file mode 100644 index 0000000000..37becc8f39 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/HLSLTestDataTypes.h @@ -0,0 +1,606 @@ +#ifndef HLSLTESTDATATYPES_H +#define HLSLTESTDATATYPES_H + +#include +#include +#include +#include +#include + +#include +#include + +#include "HlslTestUtils.h" +#include "dxc/Support/Global.h" + +// Shared HLSL type wrappers for use in execution tests. +// These types bridge the gap between C++ and HLSL type representations. +namespace HLSLTestDataTypes { + +// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes. +// Take int32_t as a constructor argument and convert it to bool when needed. +// Comparisons cast to a bool because we only care if the bool representation is +// true or false. +struct HLSLBool_t { + HLSLBool_t() : Val(0) {} + HLSLBool_t(int32_t Val) : Val(Val) {} + HLSLBool_t(bool Val) : Val(Val) {} + + bool operator==(const HLSLBool_t &Other) const { + return static_cast(Val) == static_cast(Other.Val); + } + + bool operator!=(const HLSLBool_t &Other) const { + return static_cast(Val) != static_cast(Other.Val); + } + + bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; } + + bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; } + + bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; } + + bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; } + + HLSLBool_t operator*(const HLSLBool_t &Other) const { + return HLSLBool_t(Val * Other.Val); + } + + HLSLBool_t operator+(const HLSLBool_t &Other) const { + return HLSLBool_t(Val + Other.Val); + } + + HLSLBool_t operator-(const HLSLBool_t &Other) const { + return HLSLBool_t(Val - Other.Val); + } + + HLSLBool_t operator/(const HLSLBool_t &Other) const { + return HLSLBool_t(Val / Other.Val); + } + + HLSLBool_t operator%(const HLSLBool_t &Other) const { + return HLSLBool_t(Val % Other.Val); + } + + HLSLBool_t operator&&(const HLSLBool_t &Other) const { + return HLSLBool_t(Val && Other.Val); + } + + HLSLBool_t operator||(const HLSLBool_t &Other) const { + return HLSLBool_t(Val || Other.Val); + } + + bool AsBool() const { return static_cast(Val); } + + operator bool() const { return AsBool(); } + operator int16_t() const { return (int16_t)(AsBool()); } + operator int32_t() const { return (int32_t)(AsBool()); } + operator int64_t() const { return (int64_t)(AsBool()); } + operator uint16_t() const { return (uint16_t)(AsBool()); } + operator uint32_t() const { return (uint32_t)(AsBool()); } + operator uint64_t() const { return (uint64_t)(AsBool()); } + operator float() const { return (float)(AsBool()); } + operator double() const { return (double)(AsBool()); } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + // So we can construct std::strings using std::ostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + int32_t Val = 0; +}; + +// No native float16 type in C++ until C++23 . So we use uint16_t to represent +// it. Simple little wrapping struct to help handle the right behavior. +struct HLSLHalf_t { + HLSLHalf_t() : Val(0) {} + HLSLHalf_t(const float F) { + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const double D) { + float F; + if (D >= std::numeric_limits::max()) + F = std::numeric_limits::max(); + else if (D <= std::numeric_limits::lowest()) + F = std::numeric_limits::lowest(); + else + F = static_cast(D); + + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const uint32_t U) { + float F = static_cast(U); + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + + // PackedVector::HALF is a uint16. Make sure we don't ever accidentally + // convert one of these to a HLSLHalf_t by arithmetically converting it to a + // float. + HLSLHalf_t(DirectX::PackedVector::HALF) = delete; + + static double GetULP(HLSLHalf_t A) { + DXASSERT(!std::isnan(A) && !std::isinf(A), + "ULP of NaN or infinity is undefined"); + + HLSLHalf_t Next = A; + ++Next.Val; + + double NextD = Next; + double AD = A; + return NextD - AD; + } + + static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) { + HLSLHalf_t H; + H.Val = Half; + return H; + } + + // Implicit conversion to float for use with things like std::acos, std::tan, + // etc + operator float() const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val); + } + + bool operator==(const HLSLHalf_t &Other) const { + // Convert to floats to properly handle the '0 == -0' case which must + // compare to true but have different uint16_t values. + // That is, 0 == -0 is true. We store Val as a uint16_t. + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return A == B; + } + + bool operator<(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) < + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) > + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + // Used by tolerance checks in the tests. + bool operator>(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A > F; + } + + bool operator<(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A < F; + } + + bool operator<=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) <= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) >= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; } + + HLSLHalf_t operator*(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B)); + } + + HLSLHalf_t operator+(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B))); + } + + HLSLHalf_t operator-(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B)); + } + + HLSLHalf_t operator/(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B)); + } + + HLSLHalf_t operator%(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + const float C = std::fmod(A, B); + return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C)); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // So we can construct std::wstrings using std::wostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // HALF is an alias to uint16_t + DirectX::PackedVector::HALF Val = 0; +}; + +// Normalized type wrappers for SNorm [-1,1] and UNorm [0,1] interpretations. +// Thin wrappers over floating-point types to enable type-distinct input sets. +// The Signed parameter distinguishes SNorm (true) from UNorm (false). +template struct HLSLNorm_t { + BaseT Val; + + HLSLNorm_t() : Val() {} + HLSLNorm_t(BaseT V) : Val(V) {} + + operator BaseT() const { return Val; } + + HLSLNorm_t operator*(const HLSLNorm_t &O) const { + return HLSLNorm_t(Val * O.Val); + } + HLSLNorm_t operator+(const HLSLNorm_t &O) const { + return HLSLNorm_t(Val + O.Val); + } + HLSLNorm_t operator-(const HLSLNorm_t &O) const { + return HLSLNorm_t(Val - O.Val); + } + + bool operator==(const HLSLNorm_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLNorm_t &O) const { return !(Val == O.Val); } + bool operator<(const HLSLNorm_t &O) const { return Val < O.Val; } + bool operator>(const HLSLNorm_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLNorm_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLNorm_t &O) const { return Val >= O.Val; } + + friend std::ostream &operator<<(std::ostream &Os, const HLSLNorm_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::wostream &operator<<(std::wostream &Os, const HLSLNorm_t &Obj) { + Os << Obj.Val; + return Os; + } +}; + +using SNormF16_t = HLSLNorm_t; +using UNormF16_t = HLSLNorm_t; +using SNormF32_t = HLSLNorm_t; +using UNormF32_t = HLSLNorm_t; +using SNormF64_t = HLSLNorm_t; +using UNormF64_t = HLSLNorm_t; + +// FP8 E4M3 type wrapper (1 sign, 4 exponent, 3 mantissa, bias 7). +// Range: [-448, 448]. No Inf; only NaN (0x7F/0xFF). +struct F8E4M3_t { + uint8_t Val; + + F8E4M3_t() : Val(0) {} + F8E4M3_t(float F) { Val = FloatToF8E4M3(F); } + + operator float() const { return F8E4M3ToFloat(Val); } + + F8E4M3_t operator*(const F8E4M3_t &O) const { + return F8E4M3_t(float(*this) * float(O)); + } + F8E4M3_t operator+(const F8E4M3_t &O) const { + return F8E4M3_t(float(*this) + float(O)); + } + F8E4M3_t operator-(const F8E4M3_t &O) const { + return F8E4M3_t(float(*this) - float(O)); + } + + bool operator==(const F8E4M3_t &O) const { return Val == O.Val; } + bool operator!=(const F8E4M3_t &O) const { return Val != O.Val; } + bool operator<(const F8E4M3_t &O) const { return float(*this) < float(O); } + bool operator>(const F8E4M3_t &O) const { return float(*this) > float(O); } + bool operator<=(const F8E4M3_t &O) const { return float(*this) <= float(O); } + bool operator>=(const F8E4M3_t &O) const { return float(*this) >= float(O); } + + friend std::ostream &operator<<(std::ostream &Os, const F8E4M3_t &Obj) { + Os << float(Obj); + return Os; + } + friend std::wostream &operator<<(std::wostream &Os, const F8E4M3_t &Obj) { + Os << float(Obj); + return Os; + } + +private: + static float F8E4M3ToFloat(uint8_t V) { + uint8_t Sign = (V >> 7) & 1; + uint8_t Exp = (V >> 3) & 0xF; + uint8_t Mant = V & 0x7; + + if (Exp == 0xF && Mant == 0x7) + return std::numeric_limits::quiet_NaN(); + + float Result; + if (Exp == 0) + Result = std::ldexp(static_cast(Mant), -9); + else + Result = std::ldexp(1.0f + static_cast(Mant) / 8.0f, Exp - 7); + + return Sign ? -Result : Result; + } + + static uint8_t FloatToF8E4M3(float F) { + if (std::isnan(F)) + return 0x7F; + + uint8_t Sign = 0; + if (F < 0.0f) { + Sign = 1; + F = -F; + } + + if (F == 0.0f) + return Sign << 7; + + // Clamp to max representable (E=15, M=6 → 448). + if (F >= 448.0f) + return (Sign << 7) | (0xF << 3) | 0x6; + + int Exp; + float Frac = std::frexp(F, &Exp); + int BiasedExp = Exp + 6; + + if (BiasedExp <= 0) { + int Mant = static_cast(std::round(F * 512.0f)); + if (Mant > 7) + Mant = 7; + if (Mant < 0) + Mant = 0; + return (Sign << 7) | static_cast(Mant); + } + + float Significand = 2.0f * Frac; + int Mant = static_cast(std::round((Significand - 1.0f) * 8.0f)); + + if (Mant >= 8) { + Mant = 0; + BiasedExp++; + } + + if (BiasedExp >= 15) { + if (BiasedExp > 15 || Mant > 6) + return (Sign << 7) | (0xF << 3) | 0x6; + } + + return (Sign << 7) | (static_cast(BiasedExp) << 3) | + static_cast(Mant); + } +}; + +// FP8 E5M2 type wrapper (1 sign, 5 exponent, 2 mantissa, bias 15). +// Range: [-57344, 57344]. Has Inf and NaN (like IEEE 754). +struct F8E5M2_t { + uint8_t Val; + + F8E5M2_t() : Val(0) {} + F8E5M2_t(float F) { Val = FloatToF8E5M2(F); } + + operator float() const { return F8E5M2ToFloat(Val); } + + F8E5M2_t operator*(const F8E5M2_t &O) const { + return F8E5M2_t(float(*this) * float(O)); + } + F8E5M2_t operator+(const F8E5M2_t &O) const { + return F8E5M2_t(float(*this) + float(O)); + } + F8E5M2_t operator-(const F8E5M2_t &O) const { + return F8E5M2_t(float(*this) - float(O)); + } + + bool operator==(const F8E5M2_t &O) const { return Val == O.Val; } + bool operator!=(const F8E5M2_t &O) const { return Val != O.Val; } + bool operator<(const F8E5M2_t &O) const { return float(*this) < float(O); } + bool operator>(const F8E5M2_t &O) const { return float(*this) > float(O); } + bool operator<=(const F8E5M2_t &O) const { return float(*this) <= float(O); } + bool operator>=(const F8E5M2_t &O) const { return float(*this) >= float(O); } + + friend std::ostream &operator<<(std::ostream &Os, const F8E5M2_t &Obj) { + Os << float(Obj); + return Os; + } + friend std::wostream &operator<<(std::wostream &Os, const F8E5M2_t &Obj) { + Os << float(Obj); + return Os; + } + +private: + static float F8E5M2ToFloat(uint8_t V) { + uint8_t Sign = (V >> 7) & 1; + uint8_t Exp = (V >> 2) & 0x1F; + uint8_t Mant = V & 0x3; + + if (Exp == 0x1F) { + if (Mant == 0) + return Sign ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); + return std::numeric_limits::quiet_NaN(); + } + + float Result; + if (Exp == 0) + Result = std::ldexp(static_cast(Mant), -16); + else + Result = std::ldexp(1.0f + static_cast(Mant) / 4.0f, Exp - 15); + + return Sign ? -Result : Result; + } + + static uint8_t FloatToF8E5M2(float F) { + if (std::isnan(F)) + return 0x7F; + + uint8_t Sign = 0; + if (F < 0.0f) { + Sign = 1; + F = -F; + } + + if (std::isinf(F)) + return (Sign << 7) | (0x1F << 2); + + if (F == 0.0f) + return Sign << 7; + + // Clamp to max representable (E=30, M=3 → 57344). + if (F >= 57344.0f) + return (Sign << 7) | (0x1E << 2) | 0x3; + + int Exp; + float Frac = std::frexp(F, &Exp); + int BiasedExp = Exp + 14; + + if (BiasedExp <= 0) { + int Mant = static_cast(std::round(F * 65536.0f)); + if (Mant > 3) + Mant = 3; + if (Mant < 0) + Mant = 0; + return (Sign << 7) | static_cast(Mant); + } + + float Significand = 2.0f * Frac; + int Mant = static_cast(std::round((Significand - 1.0f) * 4.0f)); + + if (Mant >= 4) { + Mant = 0; + BiasedExp++; + } + + if (BiasedExp >= 31) + return (Sign << 7) | (0x1F << 2); + + return (Sign << 7) | (static_cast(BiasedExp) << 2) | + static_cast(Mant); + } +}; + +// +// Shared type traits and validation infrastructure. +// + +template constexpr bool isFloatingPointType() { + return std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v; +} + +enum class ValidationType { + Epsilon, + Ulp, +}; + +struct ValidationConfig { + double Tolerance = 0.0; + ValidationType Type = ValidationType::Epsilon; + + static ValidationConfig Epsilon(double Tolerance) { + return ValidationConfig{Tolerance, ValidationType::Epsilon}; + } + + static ValidationConfig Ulp(double Tolerance) { + return ValidationConfig{Tolerance, ValidationType::Ulp}; + } +}; + +// Default validation: ULP for floating point, exact for integers. +template struct DefaultValidation { + ValidationConfig Validation; + + DefaultValidation() { + if constexpr (isFloatingPointType()) + Validation = ValidationConfig::Ulp(1.0f); + } +}; + +// Strict validation: exact match by default. +struct StrictValidation { + ValidationConfig Validation; +}; + +// +// Value comparison overloads used by both LongVector and LinearAlgebra tests. +// + +template +inline bool doValuesMatch(T A, T B, double Tolerance, ValidationType) { + if (Tolerance == 0.0) + return A == B; + + T Diff = A > B ? A - B : B - A; + return Diff <= Tolerance; +} + +inline bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) { + return A == B; +} + +inline bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, + ValidationType VType) { + switch (VType) { + case ValidationType::Epsilon: + return CompareHalfEpsilon(A.Val, B.Val, static_cast(Tolerance)); + case ValidationType::Ulp: + return CompareHalfULP(A.Val, B.Val, static_cast(Tolerance)); + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +inline bool doValuesMatch(float A, float B, double Tolerance, + ValidationType VType) { + switch (VType) { + case ValidationType::Epsilon: + return CompareFloatEpsilon(A, B, static_cast(Tolerance)); + case ValidationType::Ulp: { + const int IntTolerance = static_cast(Tolerance); + return CompareFloatULP(A, B, IntTolerance); + } + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +inline bool doValuesMatch(double A, double B, double Tolerance, + ValidationType VType) { + switch (VType) { + case ValidationType::Epsilon: + return CompareDoubleEpsilon(A, B, Tolerance); + case ValidationType::Ulp: { + const int64_t IntTolerance = static_cast(Tolerance); + return CompareDoubleULP(A, B, IntTolerance); + } + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +} // namespace HLSLTestDataTypes + +#endif // HLSLTESTDATATYPES_H diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp new file mode 100644 index 0000000000..1178ac4f36 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LinearAlgebra.cpp @@ -0,0 +1,576 @@ +#ifndef NOMINMAX +#define NOMINMAX 1 +#endif + +#define INLINE_TEST_METHOD_MARKUP +#include + +#include "LinearAlgebraTestData.h" + +#include "ShaderOpTest.h" +#include "dxc/Support/Global.h" + +#include "HlslTestUtils.h" + +#include "HlslExecTestUtils.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace LinearAlgebra { + +// +// Operation Types +// + +enum class OpType : unsigned { +#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) SYMBOL, +#include "LinearAlgebraOps.def" + NumOpTypes +}; + +struct Operation { + size_t Arity; + const char *Define; + const char *ShaderName; + InputSet InputSets[2]; + OpType Type; +}; + +static constexpr Operation Operations[] = { +#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) \ + {ARITY, \ + DEFINE, \ + SHADER_NAME, \ + {InputSet::INPUT_SET_1, InputSet::INPUT_SET_2}, \ + OpType::SYMBOL}, +#include "LinearAlgebraOps.def" +}; + +constexpr const Operation &getOperation(OpType Op) { + if (Op < OpType::NumOpTypes) + return Operations[unsigned(Op)]; + std::abort(); +} + +// +// Data Types +// + +struct DataType { + const char *HLSLTypeString; + const char *CompTypeString; + bool Is16Bit; + size_t HLSLSizeInBytes; +}; + +template const DataType &getDataType() { + static_assert(sizeof(T) == 0, "Unknown data type"); +} + +#define DATA_TYPE(TYPE, HLSL_STRING, COMP_TYPE, HLSL_SIZE, IS_16BIT) \ + template <> const DataType &getDataType() { \ + static DataType DT{HLSL_STRING, COMP_TYPE, IS_16BIT, HLSL_SIZE}; \ + return DT; \ + } + +DATA_TYPE(HLSLHalf_t, "float16_t", "ComponentType::F16", 2, true) +DATA_TYPE(float, "float", "ComponentType::F32", 4, false) +DATA_TYPE(double, "double", "ComponentType::F64", 8, false) +DATA_TYPE(int32_t, "int", "ComponentType::I32", 4, false) +DATA_TYPE(uint32_t, "uint", "ComponentType::U32", 4, false) + +#undef DATA_TYPE + +using HLSLTestDataTypes::DefaultValidation; +using HLSLTestDataTypes::doValuesMatch; +using HLSLTestDataTypes::HLSLHalf_t; +using HLSLTestDataTypes::isFloatingPointType; +using HLSLTestDataTypes::StrictValidation; +using HLSLTestDataTypes::ValidationConfig; +using HLSLTestDataTypes::ValidationType; + +template +bool doMatricesMatch(const std::vector &Actual, + const std::vector &Expected, size_t M, size_t N, + const ValidationConfig &Config, bool VerboseLogging) { + DXASSERT(Actual.size() == Expected.size(), + "Actual and Expected must be the same size"); + + if (VerboseLogging) + hlsl_test::LogCommentFmt(L"Verifying %zux%zu matrix (%zu elements)", M, N, + Actual.size()); + + std::vector MismatchedIndexes; + for (size_t I = 0; I < Actual.size(); I++) { + if (!doValuesMatch(Actual[I], Expected[I], Config.Tolerance, Config.Type)) + MismatchedIndexes.push_back(I); + } + + if (MismatchedIndexes.empty()) + return true; + + for (size_t Index : MismatchedIndexes) { + std::wstringstream Wss(L""); + Wss << std::setprecision(15); + // Assumes row-major layout for (row,col) decomposition. + Wss << L"Mismatch at (" << Index / N << L"," << Index % N << L")"; + Wss << L" Actual:" << Actual[Index]; + Wss << L" Expected:" << Expected[Index]; + hlsl_test::LogErrorFmt(Wss.str().c_str()); + } + + return false; +} + +// +// Matrix dimensions for test iteration. +// + +struct MatrixDims { + size_t Rows; + size_t Cols; +}; + +std::vector getMatrixSizesToTest() { + return {{2, 2}, {4, 4}, {4, 8}, {8, 4}, {8, 8}}; +} + +// +// Build compiler options. +// + +std::string getCompilerOptionsString(const Operation &Op, + const DataType &ElemType, size_t Rows, + size_t Cols, size_t KDim = 0) { + std::stringstream Options; + + if (ElemType.Is16Bit) + Options << " -enable-16bit-types"; + + Options << " -D" << Op.Define; + Options << " -DELEM_TYPE=" << ElemType.HLSLTypeString; + Options << " -DOUT_TYPE=" << ElemType.HLSLTypeString; + Options << " -DCOMP_TYPE=" << ElemType.CompTypeString; + Options << " -DROWS=" << Rows; + Options << " -DCOLS=" << Cols; + + if (KDim > 0) + Options << " -DK_DIM=" << KDim; + + Options << " -DMATRIX_LAYOUT=0"; // 0 = RowMajor, 1 = ColMajor + + return Options.str(); +} + +// +// Shader buffer helpers. +// + +template +void fillShaderBuffer(std::vector &ShaderBuffer, + const std::vector &Data) { + const size_t DataSize = sizeof(T) * Data.size(); + DXASSERT_NOMSG(ShaderBuffer.size() >= DataSize); + + if constexpr (std::is_same_v) { + auto *Ptr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t I = 0; I < Data.size(); I++) + Ptr[I] = Data[I].Val; + return; + } + + auto *Ptr = reinterpret_cast(ShaderBuffer.data()); + for (size_t I = 0; I < Data.size(); I++) + Ptr[I] = Data[I]; +} + +template +void readShaderBuffer(const MappedData &ShaderBuffer, std::vector &OutData, + size_t NumElements) { + if constexpr (std::is_same_v) { + auto *Ptr = + static_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + OutData.push_back(HLSLHalf_t::FromHALF(Ptr[I])); + return; + } + + auto *Ptr = static_cast(ShaderBuffer.data()); + for (size_t I = 0; I < NumElements; I++) + OutData.push_back(Ptr[I]); +} + +// +// Input building helpers. Following LongVector::buildTestInput pattern. +// + +template using InputSets = std::vector>; + +template +std::vector buildTestInput(InputSet Set, size_t NumElements) { + const std::vector &RawData = getInputSet(Set); + + std::vector Result; + Result.reserve(NumElements); + for (size_t I = 0; I < NumElements; ++I) + Result.push_back(RawData[I % RawData.size()]); + + return Result; +} + +// Build an identity matrix of the given dimensions using the Identity InputSet +// for the diagonal value. +template +std::vector buildIdentityMatrix(size_t Rows, size_t Cols) { + const T One = getInputSet(InputSet::Identity)[0]; + const T Zero = One - One; + std::vector Result(Rows * Cols, Zero); + size_t MinDim = Rows < Cols ? Rows : Cols; + for (size_t I = 0; I < MinDim; ++I) + Result[I * Cols + I] = One; + return Result; +} + +template +InputSets buildTestInputs(const Operation &Op, size_t Rows, size_t Cols, + size_t KDim) { + InputSets Inputs; + const size_t NumElements = Rows * Cols; + + if (Op.Arity >= 1) + Inputs.push_back(buildTestInput(Op.InputSets[0], NumElements)); + + if (Op.Arity >= 2) { + // For binary ops the second input may be an identity matrix. + if (Op.InputSets[1] == InputSet::Identity) + Inputs.push_back(buildIdentityMatrix(KDim, Cols)); + else + Inputs.push_back(buildTestInput(Op.InputSets[1], KDim * Cols)); + } + + return Inputs; +} + +// +// Core GPU test runner. Returns the output buffer or nullopt if skipped. +// + +template +std::optional> +runLinAlgTest(ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Op, + const InputSets &Inputs, size_t Rows, size_t Cols, size_t KDim, + size_t ExpectedOutputSize) { + + const DataType &ElemType = getDataType(); + + std::string CompilerOptions = + getCompilerOptionsString(Op, ElemType, Rows, Cols, KDim); + + if (VerboseLogging) + hlsl_test::LogCommentFmt(L"Compiler Options: %S", CompilerOptions.c_str()); + + dxc::SpecificDllLoader DxilDllLoader; + CComPtr TestXML; + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxilDllLoader); + auto ShaderOpSet = std::make_shared(); + st::ParseShaderOpSetFromStream(TestXML, ShaderOpSet.get()); + + std::shared_ptr TestResult = + st::RunShaderOpTestAfterParse( + D3DDevice, DxilDllLoader, Op.ShaderName, + [&](LPCSTR Name, std::vector &ShaderData, + st::ShaderOp *ShaderOp) { + if (VerboseLogging) + hlsl_test::LogCommentFmt( + L"LinAlg RunShaderOpTest CallBack. Resource Name: %S", Name); + + if (_stricmp(Name, "OutputMatrix") == 0) { + ShaderOp->Shaders.at(0).Arguments = CompilerOptions.c_str(); + return; + } + + for (size_t I = 0; I < 2; ++I) { + std::string BufferName = "InputMatrix"; + BufferName += (char)('1' + I); + if (_stricmp(Name, BufferName.c_str()) == 0) { + if (I < Inputs.size() && !Inputs[I].empty()) + fillShaderBuffer(ShaderData, Inputs[I]); + return; + } + } + + LOG_ERROR_FMT_THROW( + L"LinAlg RunShaderOpTest CallBack. Unexpected Resource: %S", + Name); + }, + std::move(ShaderOpSet)); + + MappedData ShaderOutData; + TestResult->Test->GetReadBackData("OutputMatrix", &ShaderOutData); + + std::vector OutData; + readShaderBuffer(ShaderOutData, OutData, ExpectedOutputSize); + + return OutData; +} + +// +// runAndVerify - runs the GPU test and verifies results. +// + +template +void runAndVerify(ID3D12Device *D3DDevice, bool VerboseLogging, + const Operation &Op, const InputSets &Inputs, + const std::vector &Expected, + const ValidationConfig &Config, size_t Rows, size_t Cols, + size_t KDim) { + + auto Actual = runLinAlgTest(D3DDevice, VerboseLogging, Op, Inputs, Rows, + Cols, KDim, Expected.size()); + + if (!Actual) { + hlsl_test::LogCommentFmt(L"Test was skipped."); + return; + } + + VERIFY_IS_TRUE( + doMatricesMatch(*Actual, Expected, Rows, Cols, Config, VerboseLogging)); +} + +// +// Op definitions. Each op carries a ValidationConfig. +// Specializations are expected to have a ValidationConfig member. +// + +template struct Op; + +// ExpectedBuilder - specializations compute expected output from inputs. +template struct ExpectedBuilder; + +// FillMatrix: splat a scalar value across the entire matrix. +template struct Op : StrictValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &, size_t Rows, + size_t Cols, size_t) { + const T FillVal = getInputSet(InputSet::Fill)[0]; + return std::vector(Rows * Cols, FillVal); + } + + // FillMatrix input is special: just the scalar fill value. + static InputSets buildInputs(const Operation &, size_t, size_t, size_t) { + return {{getInputSet(InputSet::Fill)[0]}}; + } +}; + +// MatrixStore: load and store round-trip. +template +struct Op : DefaultValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, size_t, + size_t, size_t) { + return Inputs[0]; + } +}; + +// MatrixAccumulate: accumulate into zero-initialized output. +template +struct Op : DefaultValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, size_t, + size_t, size_t) { + return Inputs[0]; + } +}; + +// MatrixMul: multiply input matrix by identity. +template struct Op : DefaultValidation {}; + +template struct ExpectedBuilder { + static std::vector buildExpected(Op &, + const InputSets &Inputs, size_t, + size_t, size_t) { + // Multiplying by identity: result should equal Input1. + return Inputs[0]; + } +}; + +// +// dispatchTest - orchestrates building inputs, computing expected results, +// and running the test across multiple matrix sizes. +// Follows the same pattern as LongVector::dispatchTest. +// + +template +void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging) { + + const std::vector Sizes = getMatrixSizesToTest(); + constexpr const Operation &CurOp = getOperation(OP); + Op OpConfig; + + for (const MatrixDims &Dims : Sizes) { + const size_t Rows = Dims.Rows; + const size_t Cols = Dims.Cols; + // TODO: K dimension currently equals Cols for simplicity (square inner + // dimension). Add non-square K sizes for better multiply coverage. + const size_t KDim = (CurOp.Arity >= 2) ? Cols : 0; + + // FillMatrix has special input handling (scalar, not a matrix). + InputSets Inputs; + if constexpr (OP == OpType::FillMatrix) + Inputs = ExpectedBuilder::buildInputs(CurOp, Rows, Cols, KDim); + else + Inputs = buildTestInputs(CurOp, Rows, Cols, KDim); + + auto Expected = ExpectedBuilder::buildExpected(OpConfig, Inputs, + Rows, Cols, KDim); + + runAndVerify(D3DDevice, VerboseLogging, CurOp, Inputs, Expected, + OpConfig.Validation, Rows, Cols, KDim); + } +} + +} // namespace LinearAlgebra + +using namespace LinearAlgebra; + +// +// TAEF test entry point macro. +// +#define LINALG_TEST(Op, DataType) \ + TEST_METHOD(Op##_##DataType) { runTest(); } + +// +// Common test class for linear algebra tests. +// Follows the same pattern as LongVector::TestClassCommon. +// +class LinAlgTestClassCommon { +public: + bool setupClass() { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + if (!Initialized) { + Initialized = true; + + D3D12SDK = D3D12SDKSelector(); + + WEX::TestExecution::RuntimeParameters::TryGetValue(L"VerboseLogging", + VerboseLogging); + if (VerboseLogging) + hlsl_test::LogCommentFmt(L"Verbose logging is enabled for this test."); + else + hlsl_test::LogCommentFmt(L"Verbose logging is disabled for this test."); + + bool FailIfRequirementsNotMet = false; +#ifdef _HLK_CONF + FailIfRequirementsNotMet = true; +#endif + WEX::TestExecution::RuntimeParameters::TryGetValue( + L"FailIfRequirementsNotMet", FailIfRequirementsNotMet); + + const bool SkipUnsupported = !FailIfRequirementsNotMet; + // Linear algebra requires at least SM 6.10 device support. + if (!D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10, + SkipUnsupported)) { + if (FailIfRequirementsNotMet) + hlsl_test::LogErrorFmt( + L"Device Creation failed, resulting in test failure, since " + L"FailIfRequirementsNotMet is set."); + + return false; + } + } + + return true; + } + + bool setupMethod() { + if (D3DDevice && D3DDevice->GetDeviceRemovedReason() != S_OK) { + hlsl_test::LogCommentFmt(L"Device was lost!"); + D3DDevice.Release(); + } + + if (!D3DDevice) { + hlsl_test::LogCommentFmt(L"Creating device"); + + const bool SkipUnsupported = false; + VERIFY_IS_TRUE(D3D12SDK->createDevice(&D3DDevice, D3D_SHADER_MODEL_6_10, + SkipUnsupported)); + } + + return true; + } + + template void runTest() { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + dispatchTest(D3DDevice, VerboseLogging); + } + +protected: + CComPtr D3DDevice; + +private: + bool Initialized = false; + std::optional D3D12SDK; + bool VerboseLogging = false; +}; + +// +// TAEF Test Class +// +class DxilConf_SM610_LinearAlgebra : public LinAlgTestClassCommon { +public: + BEGIN_TEST_CLASS(DxilConf_SM610_LinearAlgebra) + TEST_CLASS_PROPERTY("Kits.TestName", + "D3D12 - Shader Model 6.10 - Linear Algebra Tests") + TEST_CLASS_PROPERTY("Kits.TestId", "f00df946-9877-4453-8844-b1f4c8977953") + TEST_CLASS_PROPERTY("Kits.Description", + "Validates SM 6.10 linear algebra matrix operations") + TEST_CLASS_PROPERTY( + "Kits.Specification", + "Device.Graphics.D3D12.DXILCore.ShaderModel610.CoreRequirement") + // Priority 2: SM 6.10 linalg not yet supported in automation. Test runners + // (lit, hcttest.cmd) only run tests with Priority < 2. See eede01664. + TEST_METHOD_PROPERTY(L"Priority", L"2") + END_TEST_CLASS() + + TEST_CLASS_SETUP(setupClass) { return LinAlgTestClassCommon::setupClass(); } + TEST_METHOD_SETUP(setupMethod) { + return LinAlgTestClassCommon::setupMethod(); + } + + // FillMatrix (Splat) + LINALG_TEST(FillMatrix, float); + LINALG_TEST(FillMatrix, HLSLHalf_t); + LINALG_TEST(FillMatrix, int32_t); + LINALG_TEST(FillMatrix, uint32_t); + + // MatrixStore (Load + Store round-trip) + LINALG_TEST(MatrixStore, float); + LINALG_TEST(MatrixStore, HLSLHalf_t); + LINALG_TEST(MatrixStore, int32_t); + LINALG_TEST(MatrixStore, uint32_t); + + // MatrixAccumulate (InterlockedAccumulate) + LINALG_TEST(MatrixAccumulate, float); + LINALG_TEST(MatrixAccumulate, HLSLHalf_t); + + // MatrixMul (Multiply) + LINALG_TEST(MatrixMul, float); + LINALG_TEST(MatrixMul, HLSLHalf_t); +}; diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def new file mode 100644 index 0000000000..9773eba608 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LinearAlgebraOps.def @@ -0,0 +1,26 @@ +// +// LinearAlgebraOps.def - X-Macro definitions for linear algebra operations +// +// This file defines the operations tested by the LinearAlgebra execution tests. +// Each OP entry maps an operation symbol to its preprocessor define, arity +// (number of input matrices), the ShaderOp name, and which InputSets to use. +// +// OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) +// SYMBOL - C++ enum name for the operation +// ARITY - Number of input matrices required (0 = scalar-only, 1, 2) +// DEFINE - Preprocessor define passed to the shader to select the op +// SHADER_NAME - Name of the ShaderOp in ShaderOpArith.xml +// INPUT_SET_1 - InputSet for first matrix (or Fill for scalar ops) +// INPUT_SET_2 - InputSet for second matrix (or unused placeholder) +// + +#ifndef OP +#define OP(SYMBOL, ARITY, DEFINE, SHADER_NAME, INPUT_SET_1, INPUT_SET_2) +#endif + +OP(FillMatrix, 0, "FUNC_FILL_MATRIX", "LinAlgOp", Fill, Fill) +OP(MatrixStore, 1, "FUNC_MATRIX_STORE", "LinAlgOp", Seed, Seed) +OP(MatrixAccumulate, 1, "FUNC_MATRIX_ACCUMULATE", "LinAlgOp", Seed, Seed) +OP(MatrixMul, 2, "FUNC_MATRIX_MUL", "LinAlgOp", Seed, Identity) + +#undef OP diff --git a/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h new file mode 100644 index 0000000000..a4bcdc0097 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LinearAlgebraTestData.h @@ -0,0 +1,220 @@ +#ifndef LINEARALGEBRATESTDATA_H +#define LINEARALGEBRATESTDATA_H + +#include + +#include + +#include "HLSLTestDataTypes.h" + +namespace LinearAlgebra { + +// +// Input data sets for matrix operations. +// Follows the same InputSet / BEGIN_INPUT_SETS pattern as LongVectorTestData.h. +// + +enum class InputSet { Seed, Fill, Identity }; + +template const std::vector &getInputSet(InputSet InputSet) { + static_assert(sizeof(T) == 0, "No InputSet for this type"); +} + +#define BEGIN_INPUT_SETS(TYPE) \ + template <> \ + inline const std::vector &getInputSet(InputSet InputSet) { \ + using T = TYPE; \ + switch (InputSet) { + +#define INPUT_SET(SET, ...) \ + case SET: { \ + static std::vector Data = {__VA_ARGS__}; \ + return Data; \ + } + +#define END_INPUT_SETS() \ + default: \ + break; \ + } \ + VERIFY_FAIL("Missing input set"); \ + std::abort(); \ + } + +using HLSLTestDataTypes::F8E4M3_t; +using HLSLTestDataTypes::F8E5M2_t; +using HLSLTestDataTypes::HLSLHalf_t; +using HLSLTestDataTypes::SNormF16_t; +using HLSLTestDataTypes::SNormF32_t; +using HLSLTestDataTypes::SNormF64_t; +using HLSLTestDataTypes::UNormF16_t; +using HLSLTestDataTypes::UNormF32_t; +using HLSLTestDataTypes::UNormF64_t; + +BEGIN_INPUT_SETS(HLSLHalf_t) +INPUT_SET(InputSet::Seed, HLSLHalf_t(1.0f), HLSLHalf_t(2.0f), HLSLHalf_t(3.0f), + HLSLHalf_t(4.0f), HLSLHalf_t(5.0f), HLSLHalf_t(6.0f), + HLSLHalf_t(7.0f), HLSLHalf_t(8.0f), HLSLHalf_t(9.0f), + HLSLHalf_t(10.0f), HLSLHalf_t(11.0f), HLSLHalf_t(12.0f), + HLSLHalf_t(13.0f), HLSLHalf_t(14.0f)) +INPUT_SET(InputSet::Fill, HLSLHalf_t(42.0f)) +INPUT_SET(InputSet::Identity, HLSLHalf_t(1.0f)) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(float) +INPUT_SET(InputSet::Seed, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, + 10.0f, 11.0f, 12.0f, 13.0f, 14.0f) +INPUT_SET(InputSet::Fill, 42.0f) +INPUT_SET(InputSet::Identity, 1.0f) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(double) +INPUT_SET(InputSet::Seed, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, + 11.0, 12.0, 13.0, 14.0) +INPUT_SET(InputSet::Fill, 42.0) +INPUT_SET(InputSet::Identity, 1.0) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(int32_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(uint32_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +// --- Additional scalar types (pre-staged for upcoming SM 6.10 ComponentTypes) +// --- + +BEGIN_INPUT_SETS(int8_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(uint8_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(int16_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(uint16_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(int64_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(uint64_t) +INPUT_SET(InputSet::Seed, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14) +INPUT_SET(InputSet::Fill, 42) +INPUT_SET(InputSet::Identity, 1) +END_INPUT_SETS() + +// --- Normalized types (pre-staged for SM 6.10 SNorm/UNorm ComponentTypes) --- + +BEGIN_INPUT_SETS(SNormF16_t) +INPUT_SET(InputSet::Seed, SNormF16_t(HLSLHalf_t(-0.9f)), + SNormF16_t(HLSLHalf_t(-0.7f)), SNormF16_t(HLSLHalf_t(-0.5f)), + SNormF16_t(HLSLHalf_t(-0.3f)), SNormF16_t(HLSLHalf_t(-0.1f)), + SNormF16_t(HLSLHalf_t(0.1f)), SNormF16_t(HLSLHalf_t(0.2f)), + SNormF16_t(HLSLHalf_t(0.3f)), SNormF16_t(HLSLHalf_t(0.4f)), + SNormF16_t(HLSLHalf_t(0.5f)), SNormF16_t(HLSLHalf_t(0.6f)), + SNormF16_t(HLSLHalf_t(0.7f)), SNormF16_t(HLSLHalf_t(0.8f)), + SNormF16_t(HLSLHalf_t(0.9f))) +INPUT_SET(InputSet::Fill, SNormF16_t(HLSLHalf_t(0.5f))) +INPUT_SET(InputSet::Identity, SNormF16_t(HLSLHalf_t(1.0f))) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(UNormF16_t) +INPUT_SET(InputSet::Seed, UNormF16_t(HLSLHalf_t(0.05f)), + UNormF16_t(HLSLHalf_t(0.1f)), UNormF16_t(HLSLHalf_t(0.15f)), + UNormF16_t(HLSLHalf_t(0.2f)), UNormF16_t(HLSLHalf_t(0.25f)), + UNormF16_t(HLSLHalf_t(0.3f)), UNormF16_t(HLSLHalf_t(0.35f)), + UNormF16_t(HLSLHalf_t(0.4f)), UNormF16_t(HLSLHalf_t(0.45f)), + UNormF16_t(HLSLHalf_t(0.5f)), UNormF16_t(HLSLHalf_t(0.55f)), + UNormF16_t(HLSLHalf_t(0.6f)), UNormF16_t(HLSLHalf_t(0.7f)), + UNormF16_t(HLSLHalf_t(0.8f))) +INPUT_SET(InputSet::Fill, UNormF16_t(HLSLHalf_t(0.5f))) +INPUT_SET(InputSet::Identity, UNormF16_t(HLSLHalf_t(1.0f))) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(SNormF32_t) +INPUT_SET(InputSet::Seed, SNormF32_t(-0.9f), SNormF32_t(-0.7f), + SNormF32_t(-0.5f), SNormF32_t(-0.3f), SNormF32_t(-0.1f), + SNormF32_t(0.1f), SNormF32_t(0.2f), SNormF32_t(0.3f), + SNormF32_t(0.4f), SNormF32_t(0.5f), SNormF32_t(0.6f), + SNormF32_t(0.7f), SNormF32_t(0.8f), SNormF32_t(0.9f)) +INPUT_SET(InputSet::Fill, SNormF32_t(0.5f)) +INPUT_SET(InputSet::Identity, SNormF32_t(1.0f)) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(UNormF32_t) +INPUT_SET(InputSet::Seed, UNormF32_t(0.05f), UNormF32_t(0.1f), + UNormF32_t(0.15f), UNormF32_t(0.2f), UNormF32_t(0.25f), + UNormF32_t(0.3f), UNormF32_t(0.35f), UNormF32_t(0.4f), + UNormF32_t(0.45f), UNormF32_t(0.5f), UNormF32_t(0.55f), + UNormF32_t(0.6f), UNormF32_t(0.7f), UNormF32_t(0.8f)) +INPUT_SET(InputSet::Fill, UNormF32_t(0.5f)) +INPUT_SET(InputSet::Identity, UNormF32_t(1.0f)) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(SNormF64_t) +INPUT_SET(InputSet::Seed, SNormF64_t(-0.9), SNormF64_t(-0.7), SNormF64_t(-0.5), + SNormF64_t(-0.3), SNormF64_t(-0.1), SNormF64_t(0.1), SNormF64_t(0.2), + SNormF64_t(0.3), SNormF64_t(0.4), SNormF64_t(0.5), SNormF64_t(0.6), + SNormF64_t(0.7), SNormF64_t(0.8), SNormF64_t(0.9)) +INPUT_SET(InputSet::Fill, SNormF64_t(0.5)) +INPUT_SET(InputSet::Identity, SNormF64_t(1.0)) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(UNormF64_t) +INPUT_SET(InputSet::Seed, UNormF64_t(0.05), UNormF64_t(0.1), UNormF64_t(0.15), + UNormF64_t(0.2), UNormF64_t(0.25), UNormF64_t(0.3), UNormF64_t(0.35), + UNormF64_t(0.4), UNormF64_t(0.45), UNormF64_t(0.5), UNormF64_t(0.55), + UNormF64_t(0.6), UNormF64_t(0.7), UNormF64_t(0.8)) +INPUT_SET(InputSet::Fill, UNormF64_t(0.5)) +INPUT_SET(InputSet::Identity, UNormF64_t(1.0)) +END_INPUT_SETS() + +// --- FP8 types (pre-staged for SM 6.10 packed ComponentTypes) --- + +BEGIN_INPUT_SETS(F8E4M3_t) +INPUT_SET(InputSet::Seed, F8E4M3_t(1.0f), F8E4M3_t(1.5f), F8E4M3_t(2.0f), + F8E4M3_t(2.5f), F8E4M3_t(3.0f), F8E4M3_t(4.0f), F8E4M3_t(5.0f), + F8E4M3_t(6.0f), F8E4M3_t(7.0f), F8E4M3_t(8.0f), F8E4M3_t(0.5f), + F8E4M3_t(0.25f), F8E4M3_t(0.75f), F8E4M3_t(10.0f)) +INPUT_SET(InputSet::Fill, F8E4M3_t(2.0f)) +INPUT_SET(InputSet::Identity, F8E4M3_t(1.0f)) +END_INPUT_SETS() + +BEGIN_INPUT_SETS(F8E5M2_t) +INPUT_SET(InputSet::Seed, F8E5M2_t(1.0f), F8E5M2_t(1.5f), F8E5M2_t(2.0f), + F8E5M2_t(3.0f), F8E5M2_t(4.0f), F8E5M2_t(5.0f), F8E5M2_t(6.0f), + F8E5M2_t(7.0f), F8E5M2_t(8.0f), F8E5M2_t(0.5f), F8E5M2_t(0.25f), + F8E5M2_t(0.75f), F8E5M2_t(10.0f), F8E5M2_t(12.0f)) +INPUT_SET(InputSet::Fill, F8E5M2_t(2.0f)) +INPUT_SET(InputSet::Identity, F8E5M2_t(1.0f)) +END_INPUT_SETS() + +#undef BEGIN_INPUT_SETS +#undef INPUT_SET +#undef END_INPUT_SETS + +} // namespace LinearAlgebra + +#endif // LINEARALGEBRATESTDATA_H diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 4126d861ac..cd58e05814 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -5,237 +5,16 @@ #include #include -#include #include #include -#include -#include - -#include "dxc/Support/Global.h" +#include "HLSLTestDataTypes.h" namespace LongVector { -// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes. -// Take int32_t as a constuctor argument and convert it to bool when needed. -// Comparisons cast to a bool because we only care if the bool representation is -// true or false. -struct HLSLBool_t { - HLSLBool_t() : Val(0) {} - HLSLBool_t(int32_t Val) : Val(Val) {} - HLSLBool_t(bool Val) : Val(Val) {} - - bool operator==(const HLSLBool_t &Other) const { - return static_cast(Val) == static_cast(Other.Val); - } - - bool operator!=(const HLSLBool_t &Other) const { - return static_cast(Val) != static_cast(Other.Val); - } - - bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; } - - bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; } - - bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; } - - bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; } - - HLSLBool_t operator*(const HLSLBool_t &Other) const { - return HLSLBool_t(Val * Other.Val); - } - - HLSLBool_t operator+(const HLSLBool_t &Other) const { - return HLSLBool_t(Val + Other.Val); - } - - HLSLBool_t operator-(const HLSLBool_t &Other) const { - return HLSLBool_t(Val - Other.Val); - } - - HLSLBool_t operator/(const HLSLBool_t &Other) const { - return HLSLBool_t(Val / Other.Val); - } - - HLSLBool_t operator%(const HLSLBool_t &Other) const { - return HLSLBool_t(Val % Other.Val); - } - - HLSLBool_t operator&&(const HLSLBool_t &Other) const { - return HLSLBool_t(Val && Other.Val); - } - - HLSLBool_t operator||(const HLSLBool_t &Other) const { - return HLSLBool_t(Val || Other.Val); - } - - bool AsBool() const { return static_cast(Val); } - - operator bool() const { return AsBool(); } - operator int16_t() const { return (int16_t)(AsBool()); } - operator int32_t() const { return (int32_t)(AsBool()); } - operator int64_t() const { return (int64_t)(AsBool()); } - operator uint16_t() const { return (uint16_t)(AsBool()); } - operator uint32_t() const { return (uint32_t)(AsBool()); } - operator uint64_t() const { return (uint64_t)(AsBool()); } - operator float() const { return (float)(AsBool()); } - operator double() const { return (double)(AsBool()); } - - // So we can construct std::wstrings using std::wostream - friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) { - Os << static_cast(Obj.Val); - return Os; - } - - // So we can construct std::strings using std::ostream - friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) { - Os << static_cast(Obj.Val); - return Os; - } - - int32_t Val = 0; -}; - -// No native float16 type in C++ until C++23 . So we use uint16_t to represent -// it. Simple little wrapping struct to help handle the right behavior. -struct HLSLHalf_t { - HLSLHalf_t() : Val(0) {} - HLSLHalf_t(const float F) { - Val = DirectX::PackedVector::XMConvertFloatToHalf(F); - } - HLSLHalf_t(const double D) { - float F; - if (D >= std::numeric_limits::max()) - F = std::numeric_limits::max(); - else if (D <= std::numeric_limits::lowest()) - F = std::numeric_limits::lowest(); - else - F = static_cast(D); - - Val = DirectX::PackedVector::XMConvertFloatToHalf(F); - } - HLSLHalf_t(const uint32_t U) { - float F = static_cast(U); - Val = DirectX::PackedVector::XMConvertFloatToHalf(F); - } - - // PackedVector::HALF is a uint16. Make sure we don't ever accidentally - // convert one of these to a HLSLHalf_t by arithmetically converting it to a - // float. - HLSLHalf_t(DirectX::PackedVector::HALF) = delete; - - static double GetULP(HLSLHalf_t A) { - DXASSERT(!std::isnan(A) && !std::isinf(A), - "ULP of NaN or infinity is undefined"); - - HLSLHalf_t Next = A; - ++Next.Val; - - double NextD = Next; - double AD = A; - return NextD - AD; - } - - static HLSLHalf_t FromHALF(DirectX::PackedVector::HALF Half) { - HLSLHalf_t H; - H.Val = Half; - return H; - } - - // Implicit conversion to float for use with things like std::acos, std::tan, - // etc - operator float() const { - return DirectX::PackedVector::XMConvertHalfToFloat(Val); - } - - bool operator==(const HLSLHalf_t &Other) const { - // Convert to floats to properly handle the '0 == -0' case which must - // compare to true but have different uint16_t values. - // That is, 0 == -0 is true. We store Val as a uint16_t. - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - return A == B; - } - - bool operator<(const HLSLHalf_t &Other) const { - return DirectX::PackedVector::XMConvertHalfToFloat(Val) < - DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - } - - bool operator>(const HLSLHalf_t &Other) const { - return DirectX::PackedVector::XMConvertHalfToFloat(Val) > - DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - } - - // Used by tolerance checks in the tests. - bool operator>(float F) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - return A > F; - } - - bool operator<(float F) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - return A < F; - } - - bool operator<=(const HLSLHalf_t &Other) const { - return DirectX::PackedVector::XMConvertHalfToFloat(Val) <= - DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - } - - bool operator>=(const HLSLHalf_t &Other) const { - return DirectX::PackedVector::XMConvertHalfToFloat(Val) >= - DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - } - - bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; } - - HLSLHalf_t operator*(const HLSLHalf_t &Other) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A * B)); - } - - HLSLHalf_t operator+(const HLSLHalf_t &Other) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - return FromHALF((DirectX::PackedVector::XMConvertFloatToHalf(A + B))); - } - - HLSLHalf_t operator-(const HLSLHalf_t &Other) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A - B)); - } - - HLSLHalf_t operator/(const HLSLHalf_t &Other) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(A / B)); - } - - HLSLHalf_t operator%(const HLSLHalf_t &Other) const { - const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); - const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); - const float C = std::fmod(A, B); - return FromHALF(DirectX::PackedVector::XMConvertFloatToHalf(C)); - } - - // So we can construct std::wstrings using std::wostream - friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) { - Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); - return Os; - } - - // So we can construct std::wstrings using std::wostream - friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) { - Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); - return Os; - } - - // HALF is an alias to uint16_t - DirectX::PackedVector::HALF Val = 0; -}; +// Import shared HLSL type wrappers into LongVector namespace. +using HLSLTestDataTypes::HLSLBool_t; +using HLSLTestDataTypes::HLSLHalf_t; enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, @@ -247,7 +26,8 @@ template const std::vector &getInputSet(InputSet InputSet) { } #define BEGIN_INPUT_SETS(TYPE) \ - template <> const std::vector &getInputSet(InputSet InputSet) { \ + template <> \ + inline const std::vector &getInputSet(InputSet InputSet) { \ using T = TYPE; \ switch (InputSet) { diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index dbb8a8d672..e3e5d8b0fc 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -64,10 +64,12 @@ DATA_TYPE(double, "double", 8) #undef DATA_TYPE -template constexpr bool isFloatingPointType() { - return std::is_same_v || std::is_same_v || - std::is_same_v; -} +using HLSLTestDataTypes::DefaultValidation; +using HLSLTestDataTypes::doValuesMatch; +using HLSLTestDataTypes::isFloatingPointType; +using HLSLTestDataTypes::StrictValidation; +using HLSLTestDataTypes::ValidationConfig; +using HLSLTestDataTypes::ValidationType; // // Operation Types @@ -186,72 +188,6 @@ void logLongVector(const std::vector &Values, const std::wstring &Name) { hlsl_test::LogCommentFmt(Wss.str().c_str()); } -enum class ValidationType { - Epsilon, - Ulp, -}; - -template -bool doValuesMatch(T A, T B, double Tolerance, ValidationType) { - if (Tolerance == 0.0) - return A == B; - - T Diff = A > B ? A - B : B - A; - return Diff <= Tolerance; -} - -bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, double, ValidationType) { - return A == B; -} - -bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareHalfEpsilon(A.Val, B.Val, static_cast(Tolerance)); - case ValidationType::Ulp: - return CompareHalfULP(A.Val, B.Val, static_cast(Tolerance)); - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - -bool doValuesMatch(float A, float B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareFloatEpsilon(A, B, static_cast(Tolerance)); - case ValidationType::Ulp: { - // Tolerance is in ULPs. Convert to int for the comparison. - const int IntTolerance = static_cast(Tolerance); - return CompareFloatULP(A, B, IntTolerance); - }; - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - -bool doValuesMatch(double A, double B, double Tolerance, - ValidationType ValidationType) { - switch (ValidationType) { - case ValidationType::Epsilon: - return CompareDoubleEpsilon(A, B, Tolerance); - case ValidationType::Ulp: { - // Tolerance is in ULPs. Convert to int64_t for the comparison. - const int64_t IntTolerance = static_cast(Tolerance); - return CompareDoubleULP(A, B, IntTolerance); - }; - default: - hlsl_test::LogErrorFmt( - L"Invalid ValidationType. Expecting Epsilon or ULP."); - return false; - } -} - template bool doVectorsMatch(const std::vector &ActualValues, const std::vector &ExpectedValues, double Tolerance, @@ -563,19 +499,6 @@ InputSets buildTestInputs(size_t VectorSize, const InputSet OpInputSets[3], return Inputs; } -struct ValidationConfig { - double Tolerance = 0.0; - ValidationType Type = ValidationType::Epsilon; - - static ValidationConfig Epsilon(double Tolerance) { - return ValidationConfig{Tolerance, ValidationType::Epsilon}; - } - - static ValidationConfig Ulp(double Tolerance) { - return ValidationConfig{Tolerance, ValidationType::Ulp}; - } -}; - template void runAndVerify( ID3D12Device *D3DDevice, bool VerboseLogging, const Operation &Operation, @@ -614,23 +537,6 @@ template struct Op; // member functions. template struct ExpectedBuilder; -// Default Validation configuration - ULP for floating point types, exact -// matches for everything else. -template struct DefaultValidation { - ValidationConfig ValidationConfig; - - DefaultValidation() { - if constexpr (isFloatingPointType()) - ValidationConfig = ValidationConfig::Ulp(1.0f); - } -}; - -// Strict Validation - Defaults to exact matches. -// Tolerance can be set to a non-zero value to allow for a wider range. -struct StrictValidation { - ValidationConfig ValidationConfig; -}; - // Macros to build up common patterns of Op definitions #define OP_1(OP, VALIDATION, IMPL) \ @@ -1264,7 +1170,7 @@ template struct ExpectedBuilder { AbsoluteEpsilon += computeAbsoluteEpsilon((SumPos + SumNeg), ULPTolerance); - Op.ValidationConfig = ValidationConfig::Epsilon(AbsoluteEpsilon); + Op.Validation = ValidationConfig::Epsilon(AbsoluteEpsilon); std::vector Expected; Expected.push_back(static_cast(DotProduct)); @@ -1777,7 +1683,7 @@ void dispatchTest(ID3D12Device *D3DDevice, bool VerboseLogging, auto Expected = ExpectedBuilder::buildExpected(Op, Inputs); runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig); + Op.Validation); } } @@ -1802,7 +1708,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging, auto Expected = ExpectedBuilder::buildExpected(Op, Inputs, WaveSize); runAndVerify(D3DDevice, VerboseLogging, Operation, Inputs, Expected, - Op.ValidationConfig, AdditionalCompilerOptions); + Op.Validation, AdditionalCompilerOptions); } } diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index b7edba9561..ebad1bb790 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4615,4 +4615,149 @@ void MSMain(uint GID : SV_GroupIndex, ]]> + + + + + + + + + + + + + + + + UAV(u0), UAV(u1), UAV(u2) + + + + + + + + + + + (0); + + dx::linalg::Matrix Mat = + dx::linalg::Matrix::Splat(FillVal); + + Mat.Store(g_OutputMatrix, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + } + #endif + + #ifdef FUNC_MATRIX_STORE + // Test MatrixStore: Load a matrix from InputMatrix1 and store it + // to OutputMatrix. Verifies the load-store round trip. + void TestMatrixStore() { + dx::linalg::Matrix Mat = + dx::linalg::Matrix::Load( + g_InputMatrix1, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + + Mat.Store(g_OutputMatrix, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + } + #endif + + #ifdef FUNC_MATRIX_ACCUMULATE + // Test MatrixAccumulate: Load a matrix from InputMatrix1, then + // InterlockedAccumulate it to OutputMatrix (which is pre-initialized). + void TestMatrixAccumulate() { + dx::linalg::Matrix Mat = + dx::linalg::Matrix::Load( + g_InputMatrix1, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + + Mat.InterlockedAccumulate(g_OutputMatrix, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + } + #endif + + #ifdef FUNC_MATRIX_MUL + // Test MatrixMul: Load two matrices and multiply them. + // MatA is ROWS x K_DIM (Use::A), MatB is K_DIM x COLS (Use::B). + // Result accumulator is ROWS x COLS. + void TestMatrixMul() { + static const uint StrideA = K_DIM * sizeof(ELEM_TYPE); + static const uint StrideB = COLS * sizeof(ELEM_TYPE); + + dx::linalg::Matrix MatA = + dx::linalg::Matrix::Load( + g_InputMatrix1, 0, StrideA, + dx::linalg::MatrixLayout::RowMajor); + + dx::linalg::Matrix MatB = + dx::linalg::Matrix::Load( + g_InputMatrix2, 0, StrideB, + dx::linalg::MatrixLayout::RowMajor); + + dx::linalg::Matrix Result = + dx::linalg::Multiply(MatA, MatB); + + Result.Store(g_OutputMatrix, 0, ByteStride, + dx::linalg::MatrixLayout::RowMajor); + } + #endif + + [numthreads(1, 1, 1)] + void main(uint GI : SV_GroupIndex) { + #ifdef FUNC_FILL_MATRIX + TestFillMatrix(); + #elif defined(FUNC_MATRIX_STORE) + TestMatrixStore(); + #elif defined(FUNC_MATRIX_ACCUMULATE) + TestMatrixAccumulate(); + #elif defined(FUNC_MATRIX_MUL) + TestMatrixMul(); + #endif + }; + ]]> + +